輸入:個待排序的數據,個處理器。
輸出:從小到大排序的數據。
算法思想:個處理器對自己的個數據進行局部排序。每個處理器從局部有序的序列中選出個數據作爲自己的代表元素並將它們送到中,對這個元素進行排序,然後從中選取個主元並將它們廣播到所有的處理器中。然後每個處理器根據這個主元把自己的局部有序序列劃分成個部分,把每個部分送給對應的處理器,然後再對自己收到的部分做路歸併排序,最後把它們放到數組的對應位置上即可。
環境:
#include<cstdio>
#include<cmath>
#include<mpi.h>
#include<ctime>
#include<vector>
#include<iostream>
#include<algorithm>
#include<queue>
#define pr pair<int,int>
using namespace std;
inline void quicksort(int* vec, int beg, int end, int* pivots, int numprocs, int w) //對給定區間的元素進行排序 並選取代表元素
{
sort(vec+beg, vec + end); //排序
for (int i = 0; i < numprocs; i++) //選取代表元素
{
pivots[i] = vec[beg + i * w];
}
}
inline void AnoSort(int* src, int* dst, int* recvcount, int* rdispls, int numprocs,int totalnum)
{
int cur = 0, MIN, idx;
vector<int> cnt(numprocs);
while (cur < totalnum)
{
idx = -1;
for (int i = 0; i < numprocs; i++)
{
if (cnt[i] != recvcount[i])
{
if (idx == -1 || src[cnt[i] + rdispls[i]] < MIN)
MIN = src[cnt[i] + rdispls[i]], idx = i;
}
}
dst[cur++] = MIN;
++cnt[idx];
}
}
inline void MergeSort(int* src, int* dst, int* recvcount, int* rdispls, int numprocs) //另外一種多路歸併排序
{
priority_queue<pr, vector<pr>, greater<pr>> q;//first 爲具體的值 second爲row的下標
vector<int> idx(numprocs);
for (int i = 0; i < numprocs; i++)
{
if (idx[i] != recvcount[i])
{
q.push(pr(src[idx[i] + rdispls[i]], i));
++idx[i];
}
}
int cur = 0;
pr tmp;
while (!q.empty())
{
tmp = q.top();
q.pop();
dst[cur++] = tmp.first;
if (idx[tmp.second] != recvcount[tmp.second])
{
tmp.first = src[idx[tmp.second] + rdispls[tmp.second]];
++idx[tmp.second];
q.push(tmp);
}
}
}
inline void choosepivots(int* pivots, int numprocs, int myid,int *final_pivots) //選取主元並廣播
{
int* root_pivots = new int[numprocs * numprocs];; //收集pivots數組
MPI_Gather(pivots, numprocs, MPI_INT, root_pivots, numprocs, MPI_INT, 0, MPI_COMM_WORLD);
if (myid == 0) //0號進程
{
sort(root_pivots, root_pivots + numprocs * numprocs);
for (int i = 1; i < numprocs; i++) //選擇排序後的第P-1,2(P-1),…,(P-1)(P-1)個共P-1個主元;
final_pivots[i - 1] = root_pivots[i * numprocs];
}
MPI_Bcast(final_pivots, numprocs - 1, MPI_INT, 0, MPI_COMM_WORLD); //將最終選取的主元廣播出去
delete[] root_pivots;
}
inline void PSRS(int* vec, int n)
{
int myid, numprocs;
MPI_Comm_rank(MPI_COMM_WORLD, &myid); //得到進程id
MPI_Comm_size(MPI_COMM_WORLD, &numprocs); //進程總數
//if (!myid)
//{
// cout << "初始數組:\n";
// for (int i = 0; i < n; i++)
// cout << vec[i] << ' ';
// cout << "\n";
//}
clock_t start = clock();
int num = n / numprocs; //每個處理器要處理的數據
int beg = myid * num; //區間起始位置
int end = (myid + 1) * num; //區間結束位置 左閉右開
int w = n / (numprocs * numprocs); //選取代表元素
end = min(end, n); //限制區間右端點的最大值
int* pivots = new int[numprocs];
quicksort(vec, beg, end, pivots, numprocs, w); //進行局部排序 並選取代表元素
if (numprocs <= 1) //僅有1個進程
{
clock_t end = clock();
//cout << "排完序後:\n";
//for (int i = 0; i < n; i++)
// cout << myarray[i] << ' ';
cout << end - start << endl;
delete[] pivots;
return;
}
int* final_pivots = new int[numprocs - 1];
choosepivots(pivots, numprocs, myid, final_pivots); //選取主元並廣播
delete[] pivots;
int* sendcount = new int[numprocs], * sdispls = new int[numprocs]; //記錄分割後 每一段的個數 全局交換需要用到
int* recvcount = new int[numprocs], * rdispls = new int[numprocs];
sdispls[0] = beg;
sendcount[0] = upper_bound(vec + beg, vec + end, final_pivots[0]) - vec - sdispls[0];
for (int i = 1; i < numprocs - 1; i++)
{
sdispls[i] = sdispls[i - 1] + sendcount[i - 1]; //記錄每一段發送的起始位置
sendcount[i] = upper_bound(vec + beg, vec + end, final_pivots[i]) - vec - sdispls[i];//計算每一段發送的個數
}
sdispls[numprocs - 1] = sdispls[numprocs - 2] + sendcount[numprocs - 2];
sendcount[numprocs - 1] = end - sdispls[numprocs - 1];
delete[] final_pivots;
MPI_Alltoall(sendcount, 1, MPI_INT, recvcount, 1, MPI_INT, MPI_COMM_WORLD); //記錄每一段要接受的個數
int totalnum = recvcount[0];
rdispls[0] = 0;
for (int i = 1; i < numprocs; i++)
{
totalnum += recvcount[i];
rdispls[i] = rdispls[i - 1] + recvcount[i - 1];
}
int* result = new int[totalnum];
MPI_Alltoallv(vec, sendcount, sdispls, MPI_INT, result, recvcount, rdispls, MPI_INT, MPI_COMM_WORLD); //全局交換
int* sort_result = new int[totalnum];
//MergeSort(result, sort_result, recvcount, rdispls, numprocs); //多路歸併
AnoSort(result, sort_result, recvcount, rdispls, numprocs, totalnum);
int* num_idx = new int[numprocs]; //存儲每個進程控制的元素數
MPI_Gather(&totalnum, 1, MPI_INT, num_idx, 1, MPI_INT, 0, MPI_COMM_WORLD); //彙集信息到根進程中
int* finalpos = new int[numprocs];
if (myid == 0)
{
finalpos[0] = 0;
for (int i = 1; i < numprocs; i++)
{
finalpos[i] = finalpos[i - 1] + num_idx[i - 1]; //最終放置的位置
}
}
MPI_Gatherv(sort_result, totalnum, MPI_INT, vec, num_idx, finalpos, MPI_INT, 0, MPI_COMM_WORLD);
if (!myid)
{
clock_t end = clock();
//cout << "排完序後:\n";
//for (int i = 0; i < n; i++)
// cout << vec[i] << ' ';
//cout << endl;
cout << end - start << endl;
}
delete[] num_idx;
delete[] finalpos;
delete[] result;
delete[] sort_result;
delete[] sendcount;
delete[] sdispls;
delete[] recvcount;
delete[] rdispls;
}
int main(int argc, char* argv[])
{
int n = 1e7; //元素個數
int* vec = new int[n];
srand(time(0));
for (int i = 0; i < n; i++)
{
//vec[i] = i + 1;
vec[i] = rand(); //數組
}
MPI_Init(&argc, &argv);
PSRS(vec, n); //排序
MPI_Finalize();
return 0;
}