計算公交車出行OD的整體思路如下:
基於公交車出行軌跡數據、設備線路對照表和乘客刷卡數據表。
公交車軌跡原始數據:公交車車牌號、定位時間和經緯度等信息,
乘客的刷卡數據:卡號、刷卡時間和交易終端號等信息。
公交車乘客出行OD提取規則:根據“聚類中心-站點”兩階段預測模型,基於乘客公共交通出行路徑選擇具有規律性及上車站點和前一下車站點通常在相對鄰近空間域的假設,對出行站點進行空間聚類構建乘客出行鏈,基於一種部分匹配預測算法的變階馬爾科夫模型對出行鏈路進行預測,進而提取出公交車乘客的完整出行OD。
數據處理部分請參考:
python_根據規土委乘客刷卡數據_找出用戶的上車、下車站點經緯度
# -*- coding: utf-8
import os
import pandas as pd
from utils.calculateDistance import calculator_gid
from utils.read_write import writeOneCSV
os.chdir(r'D:\data\\')
# “OD_-00.txt
# 文本中網格編號起點,網格編號終點,出行OD量對應的45581009024, 45581009024, 3 ”
def on_out_bus(name,group):
length = group.shape[0]
for index in range(0, length):
on_gid_list = []
if group.iat[index, 2]:
if group.iat[index, 3]:
lon, lat = float(group.iat[index, 2]), float(group.iat[index, 3])
gid = calculator_gid(lon, lat)
up_station_day = group.iat[index, 1][0:10]
up_station_time = group.iat[index, 1][11:13]
on_gid_list.append(name)
on_gid_list.append(gid)
if length > index + 1:
lon, lat = float(group.iat[index+1, 2]), float(group.iat[index+1, 3])
gid = calculator_gid(lon, lat)
on_gid_list.append(gid)
elif length == 1:
pass
else:
# 如果是最後一條就鏈接到第一條
lon, lat = float(group.iat[0, 2]), float(group.iat[0, 3])
gid1 = calculator_gid(lon, lat)
if gid == gid1:
pass
else:
on_gid_list.append(gid1)
writeOneCSV(on_gid_list, on_dir+'OD_'+ up_station_day+'-'+up_station_time + '.csv')
def get_bus_station():
for name,group in grouped_upstation:
on_out_bus(name,group)
if __name__ == '__main__':
# FID,Id,XZQDM,XZQMC,lsq,Longitude,Latitude,GID
rec_file_250 = 'deg.txt'
nets = pd.read_table(rec_file_250, sep=',', encoding='gbk')
# bus_taxi = 'all_card_up_stations_del.csv'
# bus_taxi = 'up_stations_5_add.csv'
# bus_taxi = 'add_up_stations.csv'
src = 'up_stations_5.csv'
# bus_taxi = 'up_stations_5_.csv'
# bus_taxi = 'up_stations_5_del.csv'
on_dir = 'D:\data\\'
all_upstation = pd.read_csv(src, engine='python',usecols=[0, 2,3, 4])
colum = ['CARDID', 'TRADEDATE', 'longitude', 'latitude']
all_upstation.columns = colum
all_upstation = all_upstation.dropna(subset=['longitude'])
all_upstation = all_upstation.dropna(subset=['latitude'])
# all_upstation = all_upstation.loc[~all_upstation.where(all_upstation['CARDID'] > '330975418').any(axis=1)]
# all_upstation = all_upstation.loc[~all_upstation.where(all_upstation['CARDID'] < '328684975').any(axis=1)]
grouped_upstation = all_upstation.groupby("CARDID")
get_bus_station()
如需數據示例和分析結果請私聊我。。。