python_pandas_計算地鐵站點的進出站量和客流量

import os
import pandas as pd
os.chdir(r'D:\data\地鐵數據\地鐵\161718od')

'''
此文件用於計算地鐵站點的進出站量、od
0,1,2,3,4,5,6,7,8
7,21,2016-02-07 15:57:02,268024,深大站,22,2016-02-07 16:30:07,260028,香梅北站
'''
# 計算單個站點的發生量與吸引量
def on_out(df):
    ser = df.groupby(['date_origin','hour_origin','4']).size()
    ser.name = 'traffic_production'
    on = ser.reset_index()
    ser = df.groupby(['date_destination','hour_destination','8']).size()
    ser.name = 'traffic_attraction'
    out = ser.reset_index()
    on_out_data = pd.merge(on,out,left_on=['date_origin','hour_origin','4'],right_on=['date_destination','hour_destination','8'],how='outer')
    on_out = on_out_data[['date_origin','hour_origin','4','traffic_production','traffic_attraction']]
    on_out.to_csv(on_out_dir + 'on_out_hour.csv', header=False,index=False,mode='a')
# 計算兩個站點之間的客流量
def od(df):
    sr = df.groupby(['date_origin','hour_origin','4','8']).size()
    sr.name = 'od_count'
    sr.reset_index()
    sr.to_csv(od_dir+'od_hour.csv', index=True,mode='a')


if __name__ == '__main__':
    on_out_dir = 'D:\data\地鐵數據\地鐵\on_out_161718\\'
    od_dir = 'D:\data\地鐵數據\地鐵\od_161718\\'
    files = os.listdir(r'D:\data\地鐵數據\地鐵\161718od\\')[13:]
    for file in files:
        # df = pd.read_csv('Metro_Statistics_OD_20160201-20160207.txt', usecols=[2,4,6,8])
        df = pd.read_csv(file, usecols=[2,4,6,8])
        df['date_origin'] = df['2'].map(lambda x: x[:10])
        df['hour_origin'] = df['2'].map(lambda x: x[10:13])
        df['date_destination'] = df['6'].map(lambda x: x[:10])
        df['hour_destination'] = df['6'].map(lambda x: x[10:13])
        on_out(df)
        od(df)

如需數據示例或幫忙請私聊我。。。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章