python_pandas_計算深圳地鐵線路斷面客流量

import os

import pandas as pd

from utils.read_write import eachFile, readJson, writeCSV, writeOneJSON

os.chdir(r'D:\data\地鐵數據\地鐵\161718od')

'''
此文件用於計算地鐵線路斷面客流量
計算步驟:1、找到最短路徑;
2、計算每個段面的客流量(使用數學分析法,需要每個斷面的運行時間,將每個時間段的斷面人數算出來)
0,1,2,3,4,5,6,7,8
7,21,2016-02-07 15:57:02,268024,深大站,22,2016-02-07 16:30:07,260028,香梅北站
2016	6.28	11	碧頭-福田
2016	10.28	7	太安-西麗湖
2016	10.28	9	文錦-紅樹灣南
2017			
2018			
'''

def findLine(start,end):
    start_line = yearStop[yearStop['name']==start]
    if start_line.shape[0]>0:
        start_line = start_line.iat[0,2]
    end_line = yearStop[yearStop['name']==end]
    if end_line.shape[0]>0:
        end_line = end_line.iat[0,2]
        key = str(start_line)+'-'+start+'_'+str(end_line)+'-'+end
        return key
    return 'a'

def addOne(split,start_day,start_hour):
    if split not in routeSplitJson.keys():
        routeSplitJson[split] = {}
    if start_day not in routeSplitJson[split].keys():
        routeSplitJson[split][start_day] = {}
    if start_hour not in routeSplitJson[split][start_day].keys():
        routeSplitJson[split][start_day][start_hour] = 0
    routeSplitJson[split][start_day][start_hour] = routeSplitJson[split][start_day][start_hour] + 1


def routeSplit(df):
    noWays = []
    for index,row in df.iterrows():
        start_time =row[0]
        start_day  = start_time[:10]
        start_hour  = start_time[10:13]
        start =row[1]
        end = row[2]
        if start != end:
            key = findLine(start,end)
            if key in minWays2017.keys():
                minStations = minWays2017[key]
                name0 = minStations[0]
                split = key.split('_')[0] + '_' + name0
                addOne(split,start_day,start_hour)
                for i in range(0,len(minStations)):
                    if i+1<len(minStations):
                        split = minStations[i] + '_' + minStations[i+1]
                        addOne(split, start_day, start_hour)
                    else:
                        split = minStations[i] + '_' + key.split('_')[1]
                        addOne(split, start_day, start_hour)
            else:
                noWays.append([start,start_time,end])
    writeCSV(noWays,saveDir)

if __name__ == '__main__':
    min_route_floyd = 'D:\data\地鐵數據\SZ_Metro\min_way\min_route_floyd_key\\'
    src = 'D:\學習文件\項目文件\方輿地鐵分析系統\data\SZ_Metro_LineSUM_Stoppoint\\'
    od_dir = r'D:\data\地鐵數據\地鐵\161718od\\'
    jsonDir = 'D:\data\地鐵數據\SZ_Metro\min_way\min_subway_json\\'
    saveDir = 'D:\data\地鐵數據\地鐵\split\\'
    files = eachFile(od_dir)
    file2017 = files[13:24]
    minWays2017 = readJson(min_route_floyd + '2017.json')
    yearStop = pd.read_csv(src + 'SZ_Metro_LineSUM_Stoppoint_{}_GCJ02.csv'.format('2017'),names=['id','name','line'],encoding='gbk')
    for file in file2017:
        df = pd.read_csv(od_dir+file, usecols=[2,4,8])
        # df = pd.read_csv(od_dir+'Metro_Statistics_OD_20171002-20171008.txt', usecols=[2,4,8])
        routeSplitJson = {}
        routeSplit(df)
        writeOneJSON(routeSplitJson, saveDir + file + '.json')


def findMinRoute(start,end):
    file = start + '_' + end + '.json'
    minWays = readJson(jsonDir+file)
    routeSplitData = []
    for minWay in minWays:
        via_stops = minWay['via_stops']
        departure_stop = minWay['departure_stop']['name']
        arrival_stop = minWay['arrival_stop']['name']
        if via_stops:
            name0 = via_stops[0]['name']
            split0 = departure_stop + '_' + name0
            routeSplitData.append(split0)
            for i in range(0,len(via_stops)):
                if i+1<len(via_stops):
                    split = via_stops[i]['name'] + '_' + via_stops[i+1]['name']
                    routeSplitData.append(split)
                else:
                    routeSplitData.append(via_stops[i]['name']+'_' + arrival_stop)
        else:
            routeSplitData.append(departure_stop + '_' + arrival_stop )

def routeSplit1(df):
    noWays = []
    routeSplitData = []
    for row in df.itertuples(index=False):
        start_time =getattr(row,'2')
        start_day  = start_time[:10]
        start_hour  = start_time[10:13]
        start =getattr(row,'4')
        end_time = getattr(row,'6')
        end_day = end_time[:10]
        end_hour = end_time[10:13]
        end = getattr(row,'8')
        key = start + '_' + end
        minStations = minWays2017[key]
        if minStations:
            name0 = minStations[0]
            split0 = start + '_' + name0
            routeSplitData.append([split0,start_day,start_hour,1])
            for i in range(0,len(minStations)):
                if i+1<len(minStations):
                    split = minStations[i] + '_' + minStations[i+1]
                    routeSplitData.append([split, start_day, start_hour, 1])
                else:
                    split = minStations[i] + '_' + end
                    routeSplitData.append([split, start_day, end_hour, 1])
        else:
            noWays.append([start,start_time,end,end_time])

如需數據示例或幫忙請私聊我。。。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章