馬上就找工作了,正好認認真真把軟賽覆盤一下。
00 賽題描述
賽題本質上爲在大規模稀疏有向圖中找3-7的環路,即給定有向圖G = (V,E),尋找出所有滿足長度範圍在[3, 7]並滿足前後路徑權值浮動在一定範圍內(如A-B的權重爲X,B-C的權重爲Y,則需滿足0.2 <= Y/X <= 3)的簡單環路,數據滿足以下條件:
- 無自環
- 節點值爲32位的無符號整數
- 數據量最多爲200萬條
- 最大環數爲2000萬
要求輸出滿足:
- 第一行輸出環數量
- 第二行開始輸出所有環路
- 環路總體按照長度進行排序
- 環路以環路中最小節點爲起始
- 同一長度下按照字典序進行排序
賽題比較簡單,同熱身賽以運行時間爲評價指標,主要考察算法、程序優化以及面向鯤鵬處理器的特點(Cache、多核等)進行優化。
01 整體思路
由於賽題比較簡單,所以大部分選手的算法都大同小異。主要有兩個方向:
- 反向構建P3存路,正向四層判斷是否成環,即4+3的算法
- 反向構建P3不存路,正向六層利用反向獲得的信息進行剪枝,即6+3的算法
兩個方向對於不同的數據集效果不同,經測驗,在隨機圖中6+3性能優於4+3,而在完全圖和菊花圖中,4+3性能優於6+3。初賽的線上數據集6+3比4+3快一些,也是我所採用的算法,複賽線上數據集4+3比6+3快一些,不過相差不大。
程序主要包含以下3個部分:數據讀取以及正反向圖構建、多線程找環運動、結果輸出,以下將是3個部分的詳解以及其中用到的一些優化點講解。
02 數據讀取以及正反向圖構建
因爲賽題的輸入輸出數據量都比較大,所以IO時間至關重要。在數據讀取部分,主要利用了多線程以及數據結構上的優化。
1 數據讀取
//記錄所有數據
int* dataAll;
int dataAlln = 0;
char *buf = NULL;
int fd = open(testFile.c_str(),O_RDONLY);
if(fd < 0) {
cout << "open testFile error" << endl;
return false;
}
long dataSize = lseek(fd, 0, SEEK_END); //總的字符數
buf = (char *)mmap(NULL, dataSize, PROT_READ, MAP_PRIVATE, fd, 0);
dataAll = (int*)malloc(DATASIZE * 3 * sizeof(int));//存放所以數據
int* tmp = (int*)malloc(DATASIZE * sizeof(int));//存放不重複的數據
BitMap* NumMap = new BitMap();//用來記錄壓入狀態
int u = 0, v = 0, m = 0;
int n = 0;
int tmpn = 0;
while(n < dataSize){
__builtin_prefetch(buf + 1024, 0); //數據預取
u = 0;
v = 0;
m = 0;
while(*buf != ',') u = u * 10 + (*buf++ - '0'), ++n;
++buf;
++n;
while(*buf != ',') v = v * 10 + (*buf++ - '0'), ++n;
++buf;
++n;
//\r
while(*buf >= '0') m = m * 10 + (*buf++ - '0'), ++n;
while(*buf < '0') ++buf, ++n;
//壓入
*(dataAll + dataAlln++) = u;
*(dataAll + dataAlln++) = v;
*(dataAll + dataAlln++) = m;
//根據狀態確定是否壓入
if(!NumMap->test(u)){
*(tmp + tmpn++) = u;
NumMap->set(u);
}
if(!NumMap->test(v)){
*(tmp + tmpn++) = v;
NumMap->set(v);
}
}
這裏的主要優化點有:
- mmap讀取
- 爲了節省去重時間,利用bitmap把不重複的節點值記錄在tmp中,其用法如下圖:
//bitmap
#define WORD 32
#define SHIFT 5 //移位5
#define MASK 0x1F //16進制下的31
#define MAXN 2147483647
class BitMap{
private:
int *bitmap;
public:
BitMap(){
bitmap = new int[1 + MAXN / WORD];
}
void set(int i){
bitmap[i >> SHIFT] |= (1 << (i & MASK));
}
int test(int i){
return bitmap[i >> SHIFT] & (1 << (i & MASK));
}
};
2 建立映射及數據預處理
//轉字符串 第一位放長度
char *idsComma = new char[11 * DATASIZE];
//映射
unordered_map<int, int> graphmap;
//建立映射
int mapId = 0, id = 0;
graphmap.reserve(datasize);
for(int i = 0; i < tmpn; ++i){
id = tmp[i];
int2char(id,idsComma,mapId);
graphmap[id]=mapId++;
}
//多線程轉換
int splitNum = dataAlln / 3;
splitNum /= 4;
int start[5] = {0, splitNum * 3, 2 * splitNum * 3, 3 * splitNum * 3, dataAlln};
vector<thread> spTreads;
for(int i = 0; i < THREADNUM; i++){
spTreads.push_back(thread(toMapId,
start[i], start[i+1]));
}
for(auto iter = spTreads.begin(); iter != spTreads.end(); iter++){
iter->join();
}
//計算一次出入度
for(int i=0;i<dataAlln;i+=3){
u=dataAll[i];
v=dataAll[i+1];
outNum[u] += 2;
inNum[v] += 2;
}
這裏的主要優化點有:
- 提前建立節點到字符串的映射 idsComma的第11位存放字符串的長度
- map預分配空間,避免擴容的開銷
- 多線程將dataAll中的實際值轉爲映射值
- 提前計算出入度,爲正反向圖的建立做準備
3 建立正反向圖
這裏以反向圖爲例,正向圖的建立方法同反向圖
//反向圖建圖
void buildMapBack(){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
//根據出入度確定地址
int graphBackIdx = 0;
for(int i = 0; i < datasize; i++){
inAddr[i] = graphBackIdx;
graphBackIdx += inNum[i];
inNum[i] = 0;
}
//設置最大節點地址
inAddr[datasize] = graphBackIdx;
int u, v, m;
int pos;
for(int i=0;i<dataAlln;i+=3){
u=dataAll[i];
v=dataAll[i+1];
m=dataAll[i+2];
pos = inAddr[v] + inNum[v];
graph_back[pos]=u;
graph_back[pos + 1]=m;
inNum[v] += 2;
}
//對反向圖數據進行排序
for(int i = 0; i < datasize; ++i) {
int size = inNum[i];
if(size > 2){
int ssize = size >> 1;
vector<pair<int, int>> temp(ssize, make_pair(0, 0));
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
temp[vj] = make_pair(graph_back[inAddr[i] + vi], graph_back[inAddr[i] + vi + 1]);
}
sort(temp.begin(), temp.end(), [](pair<int, int>&a, pair<int, int>&b)->bool{
return a.first > b.first;
});
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
graph_back[inAddr[i] +vi] = temp[vj].first;
graph_back[inAddr[i] +vi + 1] = temp[vj].second;
}
}
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "buildMapBack Time: " << tPassed << endl;
#endif
}
爲了提高程序效率,正反向圖都採用數組來進行存儲。由於預先不知道需要分配多大的空間,採用類前向星來進行緊密存儲,首先根據出入度確定地址,然後依次放入映射後的節點值,最後對鄰接節點進行排序,這裏有一個較大的優化點爲:
- 由於題目要求環內以最小節點爲起始點,進行排序時,鄰節點從大到小排序,這樣可以遍歷到小於起始節點的節點,加快DFS效率
鯤鵬下測試,1963W環這部分耗時190ms。
03 多線程找環運動
找環部分爲了均衡各個線程,使用了automic原子操作,將每個起始節點的查找當做一次任務,利用搶佔式來爭奪資源,經測試,這種方法線下線上都是比較均衡的。
inline void FindCycleThread(int sstart, int eend, int id){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
int Anspos0, Anspos1, Anspos2, Anspos3, Anspos4;
for(int ys = sstart; ys < eend; ++ys){
if(flag[ys].test_and_set()) continue;
if(inNum[ys] == 0 || outNum[ys] == 0) continue;
//遍歷之前 先記錄地址
Anspos0 = posCount[id][0];
Anspos1 = posCount[id][1];
Anspos2 = posCount[id][2];
Anspos3 = posCount[id][3];
Anspos4 = posCount[id][4];
AnsSort[0][ys] = path[id][0] + Anspos0;
AnsSort[1][ys] = path[id][1] + Anspos1;
AnsSort[2][ys] = path[id][2] + Anspos2;
AnsSort[3][ys] = path[id][3] + Anspos3;
AnsSort[4][ys] = path[id][4] + Anspos4;
//找環開始
dfsBack(ys, id);
if(pathNode[id][0]) dfsFur(ys, id);
//置位標誌位
for(int i = 1 ; i <= pathNode[id][0]; i++){
int& node = pathNode[id][i];
flagNode[id][node] = 4;
}
flagNode[id][ys] = 4;
//結束後記錄長度
AnsSortCount[0][ys] = posCount[id][0] - Anspos0;
AnsSortCount[1][ys] = posCount[id][1] - Anspos1;
AnsSortCount[2][ys] = posCount[id][2] - Anspos2;
AnsSortCount[3][ys] = posCount[id][3] - Anspos3;
AnsSortCount[4][ys] = posCount[id][4] - Anspos4;
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "FindCycleThread thread: " << id << " time: " << tPassed << endl;
#endif
}
因爲搶佔式資源競爭會導致直接輸出的結果是無序的,所有先用兩個數組AnsSort和AnsSortCount分別記錄以某個節點爲起始的3-7環分別的位置以及數量,便於輸出時合併。
031 反向DFS 3層構建P3
在構建P3時,需要記錄每個節點的位置(1、2、3)、哪些節點被記錄以及最後一個路徑權重這三個信息。
//找環的全局變量
uint8_t flagNode[THREADNUM][DATASIZE];//記錄節點位置
int pathNode[THREADNUM][DATASIZE];//記錄所有被記錄的節點
int money[THREADNUM][DATASIZE];
//反向建圖
inline void dfsBack(int start, int id){
int pathNodeCnt = 1;
int i = start;
flagNode[id][i] = 0;
int tk,tj,te;
int nomy1, nomy2, nomy3;
int k = inAddr[i];
for(;k<inAddr[i+1];k+=2){
tk = graph_back[k];
if(tk <= i) break;
nomy1 = graph_back[k + 1];
//在第一層的時候記錄金額
money[id][tk] = nomy1;
flagNode[id][tk] = 1;
pathNode[id][pathNodeCnt++] =(tk);
int j = inAddr[tk];
for(;j<inAddr[tk + 1];j+=2){
tj = graph_back[j];
if(tj <= i) break;
nomy2 = graph_back[j + 1];
if(checkMoney(nomy2, nomy1)) continue;
if(flagNode[id][tj] > 2){
pathNode[id][pathNodeCnt++] = (tj);
flagNode[id][tj] = 2;
}
int e = inAddr[tj];
for(;e<inAddr[tj+1];e+=2){
te = graph_back[e];
if(te <= i) break;
if(flagNode[id][te] <= 3) continue;
nomy3 = graph_back[e + 1];
if(checkMoney(nomy3, nomy2)) continue;
//符合條件的標記 能成爲1、2必然有一條符合條件的3路徑
flagNode[id][te] = 3;
pathNode[id][pathNodeCnt++] = (te);
}
}
}
pathNode[id][0] = pathNodeCnt - 1;
}
這裏的主要優化點有:
- 儘量使用小的數據結構進行存儲,如flagNode類型爲uint8_t,如換爲int,則大大降低運行效率
- 只用一個數組去判斷鄰接節點是否讀取完,如加入另一個記錄出入度的數組,則不利Cache,將大大降低運行效率
032 正向DFS 4層找環
正向的時候,根據反向得到的信息進行剪枝。
inline void dfsFur(int start, int id){
//第一層
int i = start;
//放置每個節點
int tj,tk,tl,tm,tn,to;
//放置每個點的金額
int nomy1, nomy2,nomy3,nomy4,nomy5,nomy6,nomy7;
//找到環數的臨時變量
int num = 0, posk = 0;
//第二層
int it2 = outAddr[i];
for(;it2 < outAddr[i+1]; it2 += 2){
tj = graph[it2];
if(tj <= i) break;
nomy1 = graph[it2 + 1];
//第三層
int it3 = outAddr[tj];
for(; it3 < outAddr[tj+1]; it3 += 2){
tk = graph[it3];
if(tk <= i) break;
nomy2 = graph[it3 + 1];
//金額
if(checkMoney(nomy1, nomy2)) continue;
//第四層
int it4 = outAddr[tk];
for(; it4 < outAddr[tk + 1]; it4 += 2){
tl = graph[it4];
if(tl < i) break;
nomy3 = graph[it4 + 1];
if(checkMoney(nomy2, nomy3)) continue;
else if(tl == i){
if(checkMoney(nomy3, nomy1)) continue;
//找到3環
auto& pathk = path[id][0];
posk = posCount[id][0];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
posCount[id][0] += 3;
++num;
continue;
}
else if(tl == tj) continue;
//第五層
int it5 = outAddr[tl];
for(; it5 < outAddr[tl+1]; it5+=2){
tm = graph[it5];
if(tm < i) break;
nomy4 = graph[it5 + 1];
if(flagNode[id][tm] > 3 || checkMoney(nomy3, nomy4)) continue;
else if(tm == i){
if(checkMoney(nomy4, nomy1)) continue;
//找到4環
auto& pathk = path[id][1];
posk = posCount[id][1];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
posCount[id][1] += 4;
++num;
continue;
}
else if(tm == tj || tm == tk) continue;
//第六層
int it6 = outAddr[tm];
for(;it6 < outAddr[tm+1]; it6+=2){
tn = graph[it6];
if(tn < i) break;
nomy5 = graph[it6+1];
if(flagNode[id][tn] > 2 || checkMoney(nomy4, nomy5)) continue;
else if(tn == i){
if(checkMoney(nomy5, nomy1)) continue;
//找到5環
auto& pathk = path[id][2];
posk = posCount[id][2];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
posCount[id][2] += 5;
++num;
continue;
}
else if(tn == tj || tn == tk || tn == tl) continue;
//第七層
int it7 = outAddr[tn];
for(; it7 < outAddr[tn+1]; it7+=2){
to = graph[it7];
if(to < i) break;
nomy6 = graph[it7+1];
if(flagNode[id][to] > 1 || checkMoney(nomy5, nomy6)) continue;
else if(to == i){
if(checkMoney(nomy6, nomy1)) continue;
//找到6環
auto& pathk = path[id][3];
posk = posCount[id][3];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
*(pathk + posk + 5) = tn;
posCount[id][3] += 6;
++num;
continue;
}
else if(to == tj || to == tk || to == tl || to == tm) continue;
else{
nomy7 = money[id][to];
if(checkMoney(nomy6, nomy7) || checkMoney(nomy7, nomy1)) continue;
//找到7環
auto& pathk = path[id][4];
posk = posCount[id][4];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
*(pathk + posk + 5) = tn;
*(pathk + posk + 6) = to;
posCount[id][4] += 7;
++num;
}
}
}
}
}
}
}
ansCount[id] += num;
}
這裏的主要優化點有:
- 將遞歸展開成for循環
- 不必用vis記錄遍歷過的節點,使用條件判斷
- 條件判斷的順序
- 提前取地址,如:
auto& pathk = path[id][4];
posk = posCount[id][4];
04 結果輸出
由於找環中使用了原子操作,所以在輸出之前首先要將答案重新排序,之後再輸出。
inline int storePredict(string resultFile){
#ifdef TEST
timeval tStart, cTime, aTime, bTime, dTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
//對所有節點進行統一劃分拼接
//1.先得到所有的節點 順序節點 注意對逗號和回車的處理 空間需稍微開大 這裏需要解決反向問題
int* ansNode = (int*)malloc(MAXCIRCLES * 2 * MAXCHARNUM * sizeof(int));
int len = 0, charNum = 0, depNum = 0, tmpCharNUm = 0;
int lenNum[5] = {0};
for(int i = 0; i < DEPTH; ++i){
len = i + 3;
lenNum[i] = charNum;
for(int j = 0; j < datasize; ++j){
depNum = AnsSortCount[i][j];
if(depNum == 0) continue;
auto& addrNum = AnsSort[i][j];
for(int t = 0; t < depNum; t += len){
tmpCharNUm = charNum + depNum - t - len;
for(int tt = 0; tt < len; ++tt){
ansNode[tmpCharNUm++] = *(addrNum + t + tt);
}
}
charNum += depNum;
}
}
#ifdef TEST
gettimeofday(&bTime, 0);
bTime.tv_sec -= tStart.tv_sec;
bTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * bTime.tv_sec + bTime.tv_usec;
tPassed /= 1000;
cout << "toInt Time: " << tPassed << endl;
#endif
//2.對所有節點進行劃分 節點總數量charNum
int JustNode = charNum / 4;
//3.對數據進行劃分
int start[5] = {0, JustNode, 2 * JustNode, 3 * JustNode, charNum};
//4.開啓線程 進行轉換 4線程
char* charNode[4];
for(int i = 0; i < 4; ++i) charNode[i] = (char*)malloc(MAXCIRCLES * MAXCHARNUM * 12);
int charCount[4] = {0};
vector<thread> myTreads;
for(int i = 0; i < THREADNUM; i++){
myTreads.push_back(thread(CombineChar,ansNode, start[i], start[i+1], charNode[i], i, &charCount[i], lenNum));
}
for(auto iter = myTreads.begin(); iter != myTreads.end(); iter++){
iter->join();
}
//集體寫入
FILE *fp = fopen(resultFile.c_str(), "w");
char buf[32];
int resultC = 0;
for(int i = 0; i < THREADNUM; ++i) resultC += ansCount[i];
int idx = sprintf(buf,"%d\n",resultC);
buf[idx] = '\0';
//寫入數量
fwrite(buf, idx , sizeof(char), fp);
for(int i = 0; i < 4; ++i) fwrite(charNode[i], charCount[i], sizeof(char), fp);
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "storePredict Time: " << tPassed << endl;
cout<< "find : "<<resultC<<endl;
#endif
}
首先根據預存的AnsSort地址信息和AnsSortCount數量信息,得到排序好的結果,然後利用多線程進行字符串轉換,最後通過fwrite輸出。
這裏的主要優化點有:
- 在排序的時候將以某個節點爲起始節點的一系列環路同時排序(之前由於建圖鄰接點順序爲從大到小,所以得到以某一個節點的一系列某一長度的環路順序是反的)
- 字符串轉換時,充分考慮負載均衡
- 儘量不要多次使用fwrite進行輸出,當輸出次數很多時會嚴重降低程序效率
- mmap多線程在線下有效果,線上表現不如fwrite
05 寫在最後
由於賽題以程序運行時間爲唯一指標,所以代碼上應儘可能考慮程序的效率。這一點直接淘汰了Python和Java,導致清一色C++選手。從代碼的角度來說,爲了提高運行效率,可以看到,我們儘可能避免使用STL庫,比如用數組去代替vector、減少使用map等耗時的數據結構,其他方面去掉類、用for循環去代替迭代、用直接判斷去代替vis、用uint_8去代替int,甚至有些大佬去定製數據結構,進行內存的對齊等,均能在一定程度上提高成績。於我而言,雖也曾有進決賽的機會,B榜同部分大佬無成績,然收穫亦是不小,繼續努力,再接再厲!
全部代碼如下:
#include <sys/time.h>
#include <bits/stdc++.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <fcntl.h>
#define TEST
//最大節點ID
#define DATASIZE 2000000
#define THREADNUM 4
#define DEPTH 5
//最大環數
#define MAXCIRCLES 20000000
// 每個環的最大int數
#define MAXCHARNUM 7
//bitmap
#define WORD 32
#define SHIFT 5 //移位5
#define MASK 0x1F //16進制下的31
#define MAXN 2147483647
using namespace std;
//數據量
int datasize;
int ansCount[THREADNUM]={0};
int posCount[THREADNUM][DEPTH]={0};
//存放出入度
int inNum[DATASIZE];
int outNum[DATASIZE];
//存放地址
int inAddr[DATASIZE];
int outAddr[DATASIZE];
//用於存答案的順序
int* AnsSort[DEPTH][DATASIZE] = {0};
int AnsSortCount[DEPTH][DATASIZE] = {0};
//找環的全局變量
uint8_t flagNode[THREADNUM][DATASIZE];
int pathNode[THREADNUM][DATASIZE];
int money[THREADNUM][DATASIZE];
//轉字符串 第一位放長度
char *idsComma = new char[11 * DATASIZE];
//記錄正向圖和反向圖
int graph[DATASIZE * 2];
int graph_back[DATASIZE * 2];
//映射
unordered_map<int, int> graphmap;
//四線程分開 1線程
int *path13 = new int[3 * MAXCIRCLES];
int *path14 = new int[4 * MAXCIRCLES];
int *path15 = new int[5 * MAXCIRCLES];
int *path16 = new int[6 * MAXCIRCLES];
int *path17 = new int[7 * MAXCIRCLES];
//2線程
int *path23 = new int[3 * MAXCIRCLES];
int *path24 = new int[4 * MAXCIRCLES];
int *path25 = new int[5 * MAXCIRCLES];
int *path26 = new int[6 * MAXCIRCLES];
int *path27 = new int[7 * MAXCIRCLES];
//3線程
int *path33 = new int[3 * MAXCIRCLES];
int *path34 = new int[4 * MAXCIRCLES];
int *path35 = new int[5 * MAXCIRCLES];
int *path36 = new int[6 * MAXCIRCLES];
int *path37 = new int[7 * MAXCIRCLES];
//4線程
int *path43 = new int[3 * MAXCIRCLES];
int *path44 = new int[4 * MAXCIRCLES];
int *path45 = new int[5 * MAXCIRCLES];
int *path46 = new int[6 * MAXCIRCLES];
int *path47 = new int[7 * MAXCIRCLES];
int *path[4][5] = {{path13, path14, path15, path16, path17},
{path23, path24, path25, path26, path27},
{path33, path34, path35, path36, path37},
{path43, path44, path45, path46, path47}};
class BitMap{
private:
int *bitmap;
public:
BitMap(){
bitmap = new int[1 + MAXN / WORD];
}
void set(int i){
bitmap[i >> SHIFT] |= (1 << (i & MASK));
}
int test(int i){
return bitmap[i >> SHIFT] & (1 << (i & MASK));
}
};
void int2char(int v, char* s1, int p) {
int pos = 11 * p;
if(v == 0){
s1[pos + 0] = 1;
s1[pos + 1] = '0';
return;
}
int t = v;
int len = 0;
char buf[12] = "";
while(t) {
buf[len++] = t % 10 + '0';
t = t / 10;
}
for(int j = 0; j < len; j++){
s1[pos + len - j] = buf[j];
}
s1[pos + 0] = len;
return;
}
//記錄所有數據
int* dataAll;
int dataAlln = 0;
void buildMapFur(){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
//根據出入度確定地址
int graphIdx = 0;
for(int i = 0; i < datasize; i++){
outAddr[i] = graphIdx;
graphIdx += outNum[i];
outNum[i] = 0;
}
outAddr[datasize] = graphIdx;
int u, v, m;
int pos;
for(int i=0;i<dataAlln;i+=3){
u=dataAll[i];
v=dataAll[i+1];
m=dataAll[i+2];
pos = outAddr[u] + outNum[u];
graph[pos]=v;
graph[pos + 1]=m;
outNum[u] += 2;
}
//對正向圖進行排序
for(int i = 0; i < datasize; ++i) {
int size = outNum[i];
if(size > 2){
int ssize = size >> 1;
vector<pair<int, int>> temp(ssize, make_pair(0, 0));
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
temp[vj] = make_pair(graph[outAddr[i] + vi], graph[outAddr[i] + vi + 1]);
}
sort(temp.begin(), temp.end(), [](pair<int, int>&a, pair<int, int>&b)->bool{
return a.first > b.first;
});
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
graph[outAddr[i] +vi] = temp[vj].first;
graph[outAddr[i] +vi + 1] = temp[vj].second;
}
}
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "buidMapFur Time: " << tPassed << endl;
#endif
}
//反向圖建圖
void buildMapBack(){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
//根據出入度確定地址
int graphBackIdx = 0;
for(int i = 0; i < datasize; i++){
inAddr[i] = graphBackIdx;
graphBackIdx += inNum[i];
inNum[i] = 0;
}
//設置最大節點地址
inAddr[datasize] = graphBackIdx;
int u, v, m;
int pos;
for(int i=0;i<dataAlln;i+=3){
u=dataAll[i];
v=dataAll[i+1];
m=dataAll[i+2];
pos = inAddr[v] + inNum[v];
graph_back[pos]=u;
graph_back[pos + 1]=m;
inNum[v] += 2;
}
//對反向圖數據進行排序
for(int i = 0; i < datasize; ++i) {
int size = inNum[i];
if(size > 2){
int ssize = size >> 1;
vector<pair<int, int>> temp(ssize, make_pair(0, 0));
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
temp[vj] = make_pair(graph_back[inAddr[i] + vi], graph_back[inAddr[i] + vi + 1]);
}
sort(temp.begin(), temp.end(), [](pair<int, int>&a, pair<int, int>&b)->bool{
return a.first > b.first;
});
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
graph_back[inAddr[i] +vi] = temp[vj].first;
graph_back[inAddr[i] +vi + 1] = temp[vj].second;
}
}
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "buildMapBack Time: " << tPassed << endl;
#endif
}
inline void toMapId(int sstart, int eend){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
for(int i=sstart;i<eend;i += 3){
dataAll[i]=graphmap[dataAll[i]];
dataAll[i+1]=graphmap[dataAll[i+1]];
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "toMapId thread = " << tPassed << endl;
#endif
}
inline int loadTestData(string testFile){
#ifdef TEST
timeval tStart, cTime, aTime, bTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
char *buf = NULL;
int fd = open(testFile.c_str(),O_RDONLY);
if(fd < 0) {
cout << "open testFile error" << endl;
return false;
}
long dataSize = lseek(fd, 0, SEEK_END); //總的字符數
buf = (char *)mmap(NULL, dataSize, PROT_READ, MAP_PRIVATE, fd, 0);
dataAll = (int*)malloc(DATASIZE * 3 * sizeof(int));
int* tmp = (int*)malloc(DATASIZE * sizeof(int));
BitMap* NumMap = new BitMap();//用來記錄壓入狀態
int u = 0, v = 0, m = 0;
int n = 0;
int tmpn = 0;
while(n < dataSize){
__builtin_prefetch(buf + 1024, 0); //數據預取
u = 0;
v = 0;
m = 0;
while(*buf != ',') u = u * 10 + (*buf++ - '0'), ++n;
++buf;
++n;
while(*buf != ',') v = v * 10 + (*buf++ - '0'), ++n;
++buf;
++n;
//\r
while(*buf >= '0') m = m * 10 + (*buf++ - '0'), ++n;
while(*buf < '0') ++buf, ++n;
//壓入
*(dataAll + dataAlln++) = u;
*(dataAll + dataAlln++) = v;
*(dataAll + dataAlln++) = m;
//根據狀態確定是否壓入
if(!NumMap->test(u)){
*(tmp + tmpn++) = u;
NumMap->set(u);
}
if(!NumMap->test(v)){
*(tmp + tmpn++) = v;
NumMap->set(v);
}
}
sort(tmp,tmp+tmpn);
datasize=tmpn;
//建立映射
int mapId = 0, id = 0;
graphmap.reserve(datasize);
for(int i = 0; i < tmpn; ++i){
id = tmp[i];
int2char(id,idsComma,mapId);
graphmap[id]=mapId++;
}
//多線程轉換
int splitNum = dataAlln / 3;
splitNum /= 4;
int start[5] = {0, splitNum * 3, 2 * splitNum * 3, 3 * splitNum * 3, dataAlln};
vector<thread> spTreads;
for(int i = 0; i < THREADNUM; i++){
spTreads.push_back(thread(toMapId,
start[i], start[i+1]));
}
for(auto iter = spTreads.begin(); iter != spTreads.end(); iter++){
iter->join();
}
//計算一次出入度
for(int i=0;i<dataAlln;i+=3){
u=dataAll[i];
v=dataAll[i+1];
outNum[u] += 2;
inNum[v] += 2;
}
//開闢線程,正反向建圖
thread bulidMap1 = thread(buildMapFur);
thread bulidMap2 = thread(buildMapBack);
bulidMap1.join();
bulidMap2.join();
free(dataAll);
free(tmp);
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "loadTestData Time: " << tPassed << endl;
#endif
return 1;
}
inline bool checkMoney(long long mony2, long long mony1){
if(mony2 > 5 * mony1 || mony1 > 3 * mony2) return true;
return false;
}
//反向建圖
inline void dfsBack(int start, int id){
int pathNodeCnt = 1;
int i = start;
flagNode[id][i] = 0;
int tk,tj,te;
int nomy1, nomy2, nomy3;
int k = inAddr[i];
for(;k<inAddr[i+1];k+=2){
tk = graph_back[k];
if(tk <= i) break;
nomy1 = graph_back[k + 1];
//在第一層的時候記錄金額
money[id][tk] = nomy1;
flagNode[id][tk] = 1;
pathNode[id][pathNodeCnt++] =(tk);
int j = inAddr[tk];
for(;j<inAddr[tk + 1];j+=2){
tj = graph_back[j];
if(tj <= i) break;
nomy2 = graph_back[j + 1];
if(checkMoney(nomy2, nomy1)) continue;
if(flagNode[id][tj] > 2){
pathNode[id][pathNodeCnt++] = (tj);
flagNode[id][tj] = 2;
}
int e = inAddr[tj];
for(;e<inAddr[tj+1];e+=2){
te = graph_back[e];
if(te <= i) break;
if(flagNode[id][te] <= 3) continue;
nomy3 = graph_back[e + 1];
if(checkMoney(nomy3, nomy2)) continue;
//符合條件的標記 能成爲1、2必然有一條符合條件的3路徑
flagNode[id][te] = 3;
pathNode[id][pathNodeCnt++] = (te);
}
}
}
pathNode[id][0] = pathNodeCnt - 1;
}
inline void dfsFur(int start, int id){
//第一層
int i = start;
//放置每個節點
int tj,tk,tl,tm,tn,to;
//放置每個點的金額
int nomy1, nomy2,nomy3,nomy4,nomy5,nomy6,nomy7;
//找到環數的臨時變量
int num = 0, posk = 0;
//第二層
int it2 = outAddr[i];
for(;it2 < outAddr[i+1]; it2 += 2){
tj = graph[it2];
if(tj <= i) break;
nomy1 = graph[it2 + 1];
//第三層
int it3 = outAddr[tj];
for(; it3 < outAddr[tj+1]; it3 += 2){
tk = graph[it3];
if(tk <= i) break;
nomy2 = graph[it3 + 1];
//金額
if(checkMoney(nomy1, nomy2)) continue;
//第四層
int it4 = outAddr[tk];
for(; it4 < outAddr[tk + 1]; it4 += 2){
tl = graph[it4];
if(tl < i) break;
nomy3 = graph[it4 + 1];
if(checkMoney(nomy2, nomy3)) continue;
else if(tl == i){
if(checkMoney(nomy3, nomy1)) continue;
//找到3環
auto& pathk = path[id][0];
posk = posCount[id][0];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
posCount[id][0] += 3;
++num;
continue;
}
else if(tl == tj) continue;
//第五層
int it5 = outAddr[tl];
for(; it5 < outAddr[tl+1]; it5+=2){
tm = graph[it5];
if(tm < i) break;
nomy4 = graph[it5 + 1];
if(flagNode[id][tm] > 3 || checkMoney(nomy3, nomy4)) continue;
else if(tm == i){
if(checkMoney(nomy4, nomy1)) continue;
//找到4環
auto& pathk = path[id][1];
posk = posCount[id][1];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
posCount[id][1] += 4;
++num;
continue;
}
else if(tm == tj || tm == tk) continue;
//第六層
int it6 = outAddr[tm];
for(;it6 < outAddr[tm+1]; it6+=2){
tn = graph[it6];
if(tn < i) break;
nomy5 = graph[it6+1];
if(flagNode[id][tn] > 2 || checkMoney(nomy4, nomy5)) continue;
else if(tn == i){
if(checkMoney(nomy5, nomy1)) continue;
//找到5環
auto& pathk = path[id][2];
posk = posCount[id][2];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
posCount[id][2] += 5;
++num;
continue;
}
else if(tn == tj || tn == tk || tn == tl) continue;
//第七層
int it7 = outAddr[tn];
for(; it7 < outAddr[tn+1]; it7+=2){
to = graph[it7];
if(to < i) break;
nomy6 = graph[it7+1];
if(flagNode[id][to] > 1 || checkMoney(nomy5, nomy6)) continue;
else if(to == i){
if(checkMoney(nomy6, nomy1)) continue;
//找到6環
auto& pathk = path[id][3];
posk = posCount[id][3];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
*(pathk + posk + 5) = tn;
posCount[id][3] += 6;
++num;
continue;
}
else if(to == tj || to == tk || to == tl || to == tm) continue;
else{
nomy7 = money[id][to];
if(checkMoney(nomy6, nomy7) || checkMoney(nomy7, nomy1)) continue;
//找到7環
auto& pathk = path[id][4];
posk = posCount[id][4];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
*(pathk + posk + 5) = tn;
*(pathk + posk + 6) = to;
posCount[id][4] += 7;
++num;
}
}
}
}
}
}
}
ansCount[id] += num;
}
atomic_flag flag[DATASIZE] = {ATOMIC_FLAG_INIT};
inline void FindCycleThread(int sstart, int eend, int id){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
int Anspos0, Anspos1, Anspos2, Anspos3, Anspos4;
for(int ys = sstart; ys < eend; ++ys){
if(flag[ys].test_and_set()) continue;
if(inNum[ys] == 0 || outNum[ys] == 0) continue;
//遍歷之前 先記錄地址
Anspos0 = posCount[id][0];
Anspos1 = posCount[id][1];
Anspos2 = posCount[id][2];
Anspos3 = posCount[id][3];
Anspos4 = posCount[id][4];
AnsSort[0][ys] = path[id][0] + Anspos0;
AnsSort[1][ys] = path[id][1] + Anspos1;
AnsSort[2][ys] = path[id][2] + Anspos2;
AnsSort[3][ys] = path[id][3] + Anspos3;
AnsSort[4][ys] = path[id][4] + Anspos4;
//找環開始
dfsBack(ys, id);
if(pathNode[id][0]) dfsFur(ys, id);
//置位標誌位
for(int i = 1 ; i <= pathNode[id][0]; i++){
int& node = pathNode[id][i];
flagNode[id][node] = 4;
}
flagNode[id][ys] = 4;
//結束後記錄長度
AnsSortCount[0][ys] = posCount[id][0] - Anspos0;
AnsSortCount[1][ys] = posCount[id][1] - Anspos1;
AnsSortCount[2][ys] = posCount[id][2] - Anspos2;
AnsSortCount[3][ys] = posCount[id][3] - Anspos3;
AnsSortCount[4][ys] = posCount[id][4] - Anspos4;
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "FindCycleThread thread: " << id << " time: " << tPassed << endl;
#endif
}
inline void FindCycle()
{
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
memset(flagNode, 4, THREADNUM * DATASIZE);
//初始化
vector<thread> myTreads;
for(int i = 0; i < THREADNUM; i++){
myTreads.push_back(thread(FindCycleThread,
0, datasize, i));
}
for(auto iter = myTreads.begin(); iter != myTreads.end(); iter++){
iter->join();
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "FindCycle Time: " << tPassed << endl;
#endif
}
//解析字符串具體實現
inline void toChar(int sstart, int eend, int* res, int len, char* buf, int* num, int myLen){
//進來後先判斷長度
int tempLen = myLen;
int nodeValue = 0, nodeLen = 0, begin = 0;
//cout << *(num) << "++" << endl;
int ToCharNUm = *(num);
for(int i = sstart; i < eend; ++i){
++tempLen;
nodeValue = res[i];
begin = 11 * nodeValue;
//cout << nodeValue << endl;
nodeLen = idsComma[begin];
memcpy(buf + ToCharNUm, idsComma + begin + 1, nodeLen);
ToCharNUm += nodeLen;
//cout << ToCharNUm << endl;
buf[ToCharNUm++] = ',';
if(tempLen == len){
buf[ToCharNUm - 1] = '\n';
tempLen = 0;
}
}
*(num) = ToCharNUm;
}
inline void CombineChar(int* ansNode, int sstart, int eend, char* buf, int id, int* Num, int* lenNum){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
int pos4 = lenNum[1], pos5 = lenNum[2], pos6 = lenNum[3], pos7 = lenNum[4];
int thPos = 0;
int myLen = 0;
if(sstart < pos7){
//先處理3環
if(sstart < pos4){
//兩種情況
myLen = sstart % 3;
if(eend <= pos4){
toChar(sstart, eend, ansNode, 3, buf, &thPos, myLen);
sstart = -1;
}else{
toChar(sstart, pos4, ansNode, 3, buf, &thPos, myLen);
sstart = pos4;
}
}
//再處理4環
if(sstart != -1 && sstart < pos5){
myLen = (sstart - pos4) % 4;
if(eend <= pos5){
toChar(sstart, eend, ansNode, 4, buf, &thPos, myLen);
sstart = -1;
}else{
toChar(sstart, pos5, ansNode, 4, buf, &thPos, myLen);
sstart = pos5;
}
}
//再處理5環
if(sstart != -1 && sstart < pos6){
myLen = (sstart - pos5) % 5;
if(eend <= pos6){
toChar(sstart, eend, ansNode, 5, buf, &thPos, myLen);
sstart = -1;
}else{
toChar(sstart, pos6,ansNode, 5, buf, &thPos, myLen);
sstart = pos6;
}
}
//再處理6環
if(sstart != -1 && sstart < pos7){
myLen = (sstart - pos6) % 6;
if(eend <= pos7){
toChar(sstart, eend, ansNode, 6, buf, &thPos, myLen);
sstart = -1;
}else{
toChar(sstart, pos7, ansNode, 6, buf, &thPos, myLen);
sstart = pos7;
}
}
}
//最後處理7環
if(sstart != -1 && sstart >= pos7){
myLen = (sstart - pos7) % 7;
toChar(sstart, eend, ansNode, 7, buf, &thPos, myLen);
}
*(Num) = thPos;
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "CombineChar thread: " << id << " time: " << tPassed << endl;
#endif
}
inline int storePredict(string resultFile){
#ifdef TEST
timeval tStart, cTime, aTime, bTime, dTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
//對所有節點進行統一劃分拼接
//1.先得到所有的節點 順序節點 注意對逗號和回車的處理 空間需稍微開大 這裏需要解決反向問題
int* ansNode = (int*)malloc(MAXCIRCLES * 2 * MAXCHARNUM * sizeof(int));
int len = 0, charNum = 0, depNum = 0, tmpCharNUm = 0;
int lenNum[5] = {0};
for(int i = 0; i < DEPTH; ++i){
len = i + 3;
lenNum[i] = charNum;
for(int j = 0; j < datasize; ++j){
depNum = AnsSortCount[i][j];
if(depNum == 0) continue;
auto& addrNum = AnsSort[i][j];
for(int t = 0; t < depNum; t += len){
tmpCharNUm = charNum + depNum - t - len;
for(int tt = 0; tt < len; ++tt){
ansNode[tmpCharNUm++] = *(addrNum + t + tt);
}
}
charNum += depNum;
}
}
#ifdef TEST
gettimeofday(&bTime, 0);
bTime.tv_sec -= tStart.tv_sec;
bTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * bTime.tv_sec + bTime.tv_usec;
tPassed /= 1000;
cout << "toInt Time: " << tPassed << endl;
#endif
//2.對所有節點進行劃分 節點總數量charNum
int JustNode = charNum / 4;
//3.對數據進行劃分
int start[5] = {0, JustNode, 2 * JustNode, 3 * JustNode, charNum};
//4.開啓線程 進行轉換 4線程
char* charNode[4];
for(int i = 0; i < 4; ++i) charNode[i] = (char*)malloc(MAXCIRCLES * MAXCHARNUM * 12);
int charCount[4] = {0};
vector<thread> myTreads;
for(int i = 0; i < THREADNUM; i++){
myTreads.push_back(thread(CombineChar,ansNode, start[i], start[i+1], charNode[i], i, &charCount[i], lenNum));
}
for(auto iter = myTreads.begin(); iter != myTreads.end(); iter++){
iter->join();
}
//集體寫入
FILE *fp = fopen(resultFile.c_str(), "w");
char buf[32];
int resultC = 0;
for(int i = 0; i < THREADNUM; ++i) resultC += ansCount[i];
int idx = sprintf(buf,"%d\n",resultC);
buf[idx] = '\0';
//寫入數量
fwrite(buf, idx , sizeof(char), fp);
for(int i = 0; i < 4; ++i) fwrite(charNode[i], charCount[i], sizeof(char), fp);
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "storePredict Time: " << tPassed << endl;
cout<< "find : "<<resultC<<endl;
#endif
}
int main()
{
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
string testFile = "test_data_1963w.txt";
//string testFile = "test_data_1890w.txt";
//string testFile = "test_data_351.txt";
//string testFile = "test_data_289.txt";
//string testFile = "test_data_28.txt";
//string testFile = "test_data_10000_60000.txt";
//string testFile = "test_data_10000_40000.txt";
//string testFile = "test_data_50000.txt";
//string testFile = "test_data.txt";
//string testFile = "test_data_Fu.txt";
string resultFile = "re515.txt";
//string testFile = "/data/test_data.txt";
//string resultFile = "/projects/student/result.txt";
loadTestData(testFile);
FindCycle();
storePredict(resultFile);
//sleep(20);
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "main Time: " << tPassed << endl;
#endif
return 0;
}