python_pandas_將街道行政區級別的數據進行分組求和

# -*- coding: utf-8
import pandas as pd
'''
population|gender|age|district_O|street_O|district_D|street_D
01|10|1|區|鎮|縣|鎮
'''
def get_bus_station():
    file = 'gender_age.txt'
    Street_OD = pd.read_csv(od_src + file,encoding='gbk',usecols=[0,1,2,3,5],sep='|',names=['gender','age','od','district_O','district_D'])
    df3 = Street_OD.groupby(['gender','age','district_O','district_D']).agg({'od':sum})
    df3.name = 'od_count'
    df3.to_csv(od_src + 'od_district_gender_age_201911.csv', index=True,header=True)

    Street_OD = pd.read_csv(od_src + file,encoding='gbk',usecols=[0,1,2,4,6],sep='|',names=['gender','age','od','street_O','street_D'])
    df3 = Street_OD.groupby(['gender','age','street_O','street_D']).agg({'od':sum})
    df3.name = 'od_count'
    df3.to_csv(od_src + 'od_street_gender_age_201911.csv', index=True,header=True)

    gender = pd.read_csv(od_src + 'od_district_gender_age_201911.csv')
    age = gender[gender['age']>3]
    age = age[age['age']<14]
    age.to_csv(od_src + 'od_district_age_gender_201911.csv', index=False)

    gender = pd.read_csv(od_src + 'od_street_gender_age_201911.csv')
    age = gender[gender['age']>3]
    age = age[age['age']<14]
    age.to_csv(od_src + 'od_street_age_gender_201911.csv', index=False)


#  ,01,06,1,區,街道,區,街道
def get_bus_station_202004():
    file = 'flow_age_202004.csv'
    Street_OD = pd.read_csv(od_src + file,usecols=[1,2,3,4,6],sep=',',names=['gender','age','od','district_O','district_D'])
    df3 = Street_OD.groupby(['gender','age','district_O','district_D']).agg({'od':sum})
    df3.name = 'od_count'
    df3.to_csv(od_src + 'od_district_gender_age_202004.csv', index=True,header=True)

    Street_OD = pd.read_csv(od_src + file,usecols=[1,2,3,5,7],sep=',',names=['gender','age','od','street_O','street_D'])
    df3 = Street_OD.groupby(['gender','age','street_O','street_D']).agg({'od':sum})
    df3.name = 'od_count'
    df3.to_csv(od_src + 'od_street_gender_age_202004.csv', index=True,header=True)

    gender = pd.read_csv(od_src + 'od_district_gender_age_201911.csv')
    age = gender[gender['age']>3]
    age = age[age['age']<14]
    age.to_csv(od_src + 'od_district_age_gender_202004.csv', index=False)

    gender = pd.read_csv(od_src + 'od_street_gender_age_201911.csv')
    age = gender[gender['age']>3]
    age = age[age['age']<14]
    age.to_csv(od_src + 'od_street_age_gender_202004.csv', index=False)

if __name__ == '__main__':
    od_src = r'D:\201911\\'
    get_bus_station_202004()

如需數據或者幫忙處理數據請私聊我

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章