預處理處理浮動車數據的習慣是
- 按照車牌或者日期+車牌分割原始數據
- 從matlab提取OD矩陣
分割
分割的程序基於c++,並且使用雙線程讀取加快速度
// ReadData.cpp : Defines the entry point for the console application.
#include "stdafx.h"
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
#include <algorithm>
#include "time.h"
#include <windows.h>
#include<stdlib.h>
#include <iomanip>
#include <io.h>
#include <direct.h>
using namespace std;
HANDLE g_hEventRead;//句柄
HANDLE g_hEventWrite;//句柄
HANDLE g_hEventTreadEnd1;//句柄
HANDLE g_hEventTreadEnd2;//句柄
bool isThreadEnd1 = false;
bool isThreadEnd2 = false;
int ThreadWriteNum = 0;
ifstream fin;
long int LineID = 0;
string dirName = "";
DWORD WINAPI parseSignleLineRecordA1(LPVOID lpParameter)
{
while (true)
{
WaitForSingleObject(g_hEventRead,INFINITE);
if(fin.eof())
{
printf("read file have finish!\n");
SetEvent(g_hEventRead);
break;
}
string ReadLine;
getline(fin,ReadLine);
SetEvent(g_hEventRead);
if(ReadLine.c_str() == NULL ||ReadLine == "")
{
printf("read file have finish!\n");
break;
}
LineID++;
int con[20],n=0; //con數組記錄逗號位置
int curLineLength = ReadLine.length();
string INDATE, INTIME, CARID1, CARID2,
Longitude, Latitude, Speed, Satellite,
StateFlag, BrakeFlag;
string carName;
int dotFirstPos = ReadLine.find_first_of(',', 0);
while(dotFirstPos != string::npos) //行末之前
{
con[n]= dotFirstPos;
n++;
dotFirstPos = ReadLine.find_first_of(',', dotFirstPos + 1);
}
if (n!=9) //不符合標準列數
{
printf("%s\na bad line record.\n",ReadLine.c_str());
continue;
}
else //符合標準列
{
INDATE = ReadLine.substr(0, con[0]);
if (INDATE != "20170301")
{
printf("%s\na bad line standard.\n", ReadLine.c_str());
continue;
}
INTIME = ReadLine.substr(con[0] + 1, con[1] - con[0] - 1);
CARID1 = ReadLine.substr(con[1] + 1, con[2] - con[1] - 1);
CARID2 = ReadLine.substr(con[2] + 1, con[3] - con[2] - 1);
if (CARID2.length() != 12)
{
printf("%s\na bad line standard.\n", ReadLine.c_str());
continue;
}
Longitude = ReadLine.substr(con[3] + 1, con[4] - con[3] - 1);
Latitude = ReadLine.substr(con[4] + 1, con[5] - con[4] - 1);
Speed = ReadLine.substr(con[5] + 1, con[6] - con[5] - 1);
Satellite = ReadLine.substr(con[6] + 1, con[7] - con[6] - 1);
StateFlag = ReadLine.substr(con[7] + 1, con[8] - con[7] - 1);
BrakeFlag = ReadLine.substr(con[8] + 1);
}
//打開寫文件
WaitForSingleObject(g_hEventWrite, INFINITE);
if (INTIME.length() == 6)
{
carName = CARID2.substr(2);
string WriteFile = "F:\\2017\\2017coda\\output\\split\\" + INDATE + "\\" + carName + ".txt"; //*****前兩位分文件名*****
ofstream fout;
fout.open(WriteFile, ios::app);
fout << INTIME.substr(0,2) << ";" << INTIME.substr(2, 2) << ";" << INTIME.substr(4, 2) << ";" << CARID1 << ";" << Longitude << ";" << Latitude << ";" << Speed << ";" << Satellite << ";" << StateFlag << ";" << BrakeFlag << endl;
fout.close();
}
else {
printf("%s\na bad line standard.\n", ReadLine.c_str());
continue;
}
SetEvent(g_hEventWrite);
}
isThreadEnd1 = true;
return NULL;
}
DWORD WINAPI parseSignleLineRecordA2(LPVOID lpParameter)
{
while (true)
{
WaitForSingleObject(g_hEventRead, INFINITE);
if (fin.eof())
{
printf("read file have finish!\n");
SetEvent(g_hEventRead);
break;
}
string ReadLine;
getline(fin, ReadLine);
SetEvent(g_hEventRead);
if (ReadLine.c_str() == NULL || ReadLine == "")
{
printf("read file have finish!\n");
break;
}
LineID++;
int con[20], n = 0; //con數組記錄逗號位置
int curLineLength = ReadLine.length();
string INDATE, INTIME, CARID1, CARID2,
Longitude, Latitude, Speed, Satellite,
StateFlag, BrakeFlag;
string carName;
int dotFirstPos = ReadLine.find_first_of(',', 0);
while (dotFirstPos != string::npos) //行末之前
{
con[n] = dotFirstPos;
n++;
dotFirstPos = ReadLine.find_first_of(',', dotFirstPos + 1);
}
if (n != 9) //不符合標準列數
{
printf("%s\na bad line record.\n", ReadLine.c_str());
continue;
}
else //符合標準列
{
INDATE = ReadLine.substr(0, con[0]);
if (INDATE != "20170301")
{
printf("%s\na bad line standard.\n", ReadLine.c_str());
continue;
}
INTIME = ReadLine.substr(con[0] + 1, con[1] - con[0] - 1);
CARID1 = ReadLine.substr(con[1] + 1, con[2] - con[1] - 1);
CARID2 = ReadLine.substr(con[2] + 1, con[3] - con[2] - 1);
if (CARID2.length() != 12)
{
printf("%s\na bad line standard.\n", ReadLine.c_str());
continue;
}
Longitude = ReadLine.substr(con[3] + 1, con[4] - con[3] - 1);
Latitude = ReadLine.substr(con[4] + 1, con[5] - con[4] - 1);
Speed = ReadLine.substr(con[5] + 1, con[6] - con[5] - 1);
Satellite = ReadLine.substr(con[6] + 1, con[7] - con[6] - 1);
StateFlag = ReadLine.substr(con[7] + 1, con[8] - con[7] - 1);
BrakeFlag = ReadLine.substr(con[8] + 1);
}
//打開寫文件
WaitForSingleObject(g_hEventWrite, INFINITE);
if (INTIME.length() == 6)
{
carName = CARID2.substr(2);
string WriteFile = "F:\\2017\\2017coda\\output\\split\\" + INDATE + "\\" + carName + ".txt"; //*****前兩位分文件名*****
ofstream fout;
fout.open(WriteFile, ios::app);
fout << INTIME.substr(0, 2) << ";" << INTIME.substr(2, 2) << ";" << INTIME.substr(4, 2) << ";" << CARID1 << ";" << Longitude << ";" << Latitude << ";" << Speed << ";" << Satellite << ";" << StateFlag << ";" << BrakeFlag << endl;
fout.close();
}
else {
printf("%s\na bad line standard.\n", ReadLine.c_str());
continue;
}
SetEvent(g_hEventWrite);
}
isThreadEnd2 = true;
return NULL;
}
int _tmain(int argc, _TCHAR* argv[])
{
clock_t start, finish;
start = clock();
LineID = 0;
#pragma region 清理過往數據
//system("del F:\\2017\\2017coda\\output\\split\\*.txt"); //*****建立子文件,結果文件夾清空*****(需手動操作)
//system("del F:\\2017\\2017coda\\output\\Process.txt"); //*****過程列表文件清空*****
cout<<"Remove Txt Success"<<endl;
#pragma endregion
#pragma region
std::stringstream stream;
string fileName = "F:\\2017\\2017coda\\input\\*.txt"; //*****分日期的原始輸入文件*****
_finddata_t fileInfo;
long handle = _findfirst(fileName.c_str(), &fileInfo);
do
{
LineID = 0;
string ReadFile;
stream << "F:\\2017\\2017coda\\input\\" << fileInfo.name; //*****分日期的原始輸入文件*****
stream >> ReadFile;
stream.clear();
stream.str("");
printf("%s is running.\n", ReadFile.c_str());
dirName = fileInfo.name;
dirName = dirName.substr(0,8);
string FileDir;
stream << "F:\\2017\\2017coda\\output\\split\\" << dirName; //*****存放輸出的文件夾(按日期新建文件夾)*****
stream >> FileDir;
stream.clear();
stream.str("");
mkdir(FileDir.data());
fin.open(ReadFile);
g_hEventRead = CreateEvent(NULL, FALSE, FALSE, "Read");
if (g_hEventRead)//判斷是否建立成功
{
if (ERROR_ALREADY_EXISTS == GetLastError())
{
cout << "only one instance can run!" << endl;
return 0;
}
}
SetEvent(g_hEventRead);//重點就是這個設定
g_hEventWrite = CreateEvent(NULL, FALSE, FALSE, "Write");
if (g_hEventWrite)//判斷是否建立成功
{
if (ERROR_ALREADY_EXISTS == GetLastError())
{
cout << "only one instance can run!" << endl;
return 0;
}
}
SetEvent(g_hEventWrite);//重點就是這個設定
isThreadEnd1 = false;
isThreadEnd2 = false;
//創建線程
HANDLE hThread1 = CreateThread(NULL, 0, parseSignleLineRecordA1, NULL, 0, NULL);
HANDLE hThread2 = CreateThread(NULL, 0, parseSignleLineRecordA2, NULL, 0, NULL);
while (!isThreadEnd1 || !isThreadEnd2)
{
}
fin.close();
CloseHandle(hThread1);
CloseHandle(hThread2);
CloseHandle(g_hEventRead);
CloseHandle(g_hEventWrite);
CloseHandle(g_hEventTreadEnd1);
CloseHandle(g_hEventTreadEnd2);
std::stringstream stream;
string WriteProcessFile;
stream << "F:\\2017\\2017coda\\output\\Process.txt"; //*****過程記錄文件路徑*****
stream >> WriteProcessFile;
stream.clear();
stream.str("");
ofstream fout;
fout.open(WriteProcessFile, ios::app);
fout << ReadFile << " Read Mission complete! " << "LineID count " << LineID << endl;
fout.close();
} while (_findnext(handle, &fileInfo) == 0);
#pragma endregion
finish = clock();
double duration = (double)(finish - start) / CLOCKS_PER_SEC;
printf( "%f seconds\n", duration );
system("pause");
return 0;
}
提取od
因爲熟悉matlab,但效率不是非常高
輸入爲分割後的結果:root\日期\車牌號.txt
輸出爲提取的od:root\日期.txt
%% 請修改輸入文件夾和輸出文件夾兩處位置
%% 0是空載,1是載客;否則調換
clear; %clc
hwait=waitbar(0,'Please wait>>>>>>>>>>>'); % use waitbar to check progress
TreeDir='F:\2017\2017coda\output\split\'; % direction need to be change
TreeDirs=dir(TreeDir);
MaxRow=10;
for nData=3:length(TreeDirs)
if ~TreeDirs(nData).isdir % 如果不是日期文件夾則跳過
continue;
end
ReadDir=[TreeDir,TreeDirs(nData).name,'\'];
ReadDirs=dir([ReadDir,'*.txt']); %擴展名
WritePath=['F:\2017\2017coda\output\od\',TreeDirs(nData).name,'.txt']; %time direction need to be change
fid=fopen (WritePath,'w');
title={'hour','minute','second','car','long','lat','speed','Sate','State','Brake','LineID'};
fprintf(fid,[repmat('%s,',[1,(MaxRow)]),'%s\n'], title{:});
fclose(fid);
CountDirs=length(ReadDirs);
LineID=1;
waitbar(0,hwait,'Reading>>>>>>>>>>>');
for i=1 : CountDirs
%=================Data Reading=================
carName=ReadDirs(i).name(1:10);%提取車牌號
fid = fopen([ReadDir,ReadDirs(i).name]);
%frewind(fid); %把fid指到文檔開始位置
Origin = textscan(fid, '%s%s%s%s%s%s%s%s%s%s','delimiter',';','CollectOutput',1);
Origin = Origin{1};
%=================Data Ordering By Time=================
Sort=sortrows(Origin,[1,2,3]);
fclose(fid);
clear Origin;
clear fid;
%=================OD Pairs Mining=================
[row,col]=size(Sort);
flag='0'; %0 means no starting point;1 means have start
pair=cell(1,MaxRow); pair(:)={'0'}; %start point
result={}; r=1;
time='';
for n=1 : (row - 1)
%find OD pair
if (strcmp(Sort{n,9},'1')) && (strcmp(flag,'0'))
pair(1,:)=Sort(n,:);
flag='1';
time=cellfun(@str2num,Sort(n,1:3));
elseif (strcmp(Sort{n,9},'1')) && (strcmp(Sort{n+1,9},'0')) && (strcmp (flag,'1'))
time2=cellfun(@str2num,Sort(n+1,1:3));
Pass=(time2(1)-time(1))*3600+(time2(2)-time(2))*60+(time2(3)-time(3));
if ((Pass>=60)&& (Pass<=18000) ) %60-18000 seconds
result (r,1:MaxRow)=pair(1,:);
result (r+1,1:MaxRow)=Sort(n+1,:);
result{r,4}=carName;
result{r+1,4}=carName;
result{r,MaxRow+1}=num2str(LineID); %od_ID
result{r+1,MaxRow+1}=num2str(LineID);
LineID=LineID+1;
r=r+2;
end
flag='0';
elseif (strcmp(Sort{n+1,9},'1')) && ( (n + 1) == row ) && (strcmp (flag,'1'))
time2=cellfun(@str2num,Sort(n+1,1:3));
Pass=(time2(1)-time(1))*3600+(time2(1)-time(1))*60+(time2(1)-time(1));
if ((Pass>=60)&& (Pass<=18000) ) %60-18000 seconds
result (r,1:MaxRow)=pair(1,:);
result (r+1,1:MaxRow)=Sort(n+1,:);
result{r,4}=carName;
result{r+1,4}=carName;
result{r,MaxRow+1}=num2str(LineID); %od_ID
result{r+1,MaxRow+1}=num2str(LineID);
LineID=LineID+1;
r=r+2;
end
flag='0';
end
end
clear Sort;
%=================Data Write=================
if (r>1)
fid=fopen (WritePath,'a');
for iwrite=1:r-1
fprintf(fid,[repmat('%s,',[1,(MaxRow)]),'%s\n'], result{iwrite,:});
end
fclose(fid);
end
%=================Waitbar=================
str=['Running ',TreeDirs(nData).name,' ',num2str(fix (i/CountDirs*100)),' %'];
waitbar(i/CountDirs,hwait,str);
end
end
close (hwait);