爲了將列表轉換爲二維透視表,之前自己寫過代碼,今天看到pandas直接有這個方法,感覺簡單順手多了,故重寫了社會特徵屬性的人口矩陣和距離矩陣分離的代碼,首先介紹一下pivot_table:
pandas.tools.pivot.pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',fill_value=None, margins=False, dropna=True)
value爲顯示的值,index爲行,columns爲列,如下面這個
df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
'B' : ['A', 'B', 'C'] * 4,'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
'D' : np.random.randn(12),'E' : np.random.randn(12)})
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])
這樣就是df表中以D作爲數值域,A,B爲行,C爲列的數據視圖。
下面是過剩通勤的社會特徵分類的修正代碼,換成pivot_table就簡單了很多:
# authors = Kanonpy
# coding=UTF-8
import pandas as pd
import numpy as np
import os
from scipy.optimize import linprog
distance = pd.read_excel('Distance.xlsx')
df = pd.read_excel('chuli.xls')
def commuteCalcu(pop,dist,name):
#保證pop和dist行列數值相等
intdistcolumns = {d:int(float(d)) for d in dist.columns}
intdistindex = {d:int(float(d)) for d in dist.index}
unicodepopcolumns = {d:unicode(d) for d in pop.columns}
unicodepopindex = {d:unicode(d) for d in pop.index}
for d in dist.columns:
if d not in pop.columns:
dist = dist.drop(d,axis=1)
#print 'the col %s in distance was del'%(str(d))
for i in dist.index:
if i not in pop.index:
dist = dist.drop(i,axis=0)
#print 'the col %s in distance was del '%(str(i))
for d in pop.columns:
if d not in dist.columns:
pop = pop.drop(d,axis=1)
#print 'the col %s in distance was del '%(str(i))
for i in pop.index:
if i not in dist.index:
pop = pop.drop(i,axis=0)
#print 'the col %s in distance was del '%(str(i))
if not os.path.exists(u'%s'%(name)):
os.mkdir(u'%s'%(name))
print u'creat %s_%s file'%(col,i)
dist.to_excel(u'%s/Population.xlsx'%(name))
pop.to_excel(u'%s/Distance.xlsx'%(name))
matrix = np.array(pop)*np.array(dist)
total_commute = matrix.sum()
commute = total_commute/np.array(pop).sum()
print u'%s 總通勤距離爲 %s'%(name,unicode(total_commute))
print u'%s 通勤距離(ARC)爲 %s'%(name,unicode(commute))
print u'%s 人口總數爲 %s'%(name,unicode(np.array(pop).sum()))
for col in [u'性別', u'戶籍', u'職業', u'收入']:
for i in df.groupby(col).size().index:
species = df[df[col]==i]
pt = pd.pivot_table(data=species,values=col,rows=u'工作地或學校地址',
cols=u'居住小區',aggfunc=np.size,fill_value=0)
if sum(pt.shape) > 10:
commuteCalcu(pt,distance,col+u'中的'+unicode(i))
else:
print '%s_%s is too small'%(col,i)
——————————————————————————————————————————————————
@Sugar_Lover