並行計算利用mpi實現PSRS算法

輸入： $n$ 個待排序的數據， $P$ 個處理器。
輸出：從小到大排序的數據。
算法思想： $P$ 個處理器對自己的 $n/P$ 個數據進行局部排序。每個處理器從局部有序的序列中選出 $P$ 個數據作爲自己的代表元素並將它們送到 $P_0$ 中， $P_0$ 對這 $P^2$ 個元素進行排序，然後從中選取 $P-1$ 個主元並將它們廣播到所有的處理器中。然後每個處理器根據這 $P-1$ 個主元把自己的局部有序序列劃分成 $P$ 個部分，把每個部分送給對應的處理器，然後再對自己收到的部分做 $P$ 路歸併排序，最後把它們放到數組的對應位置上即可。
環境： $VS2019$
$code：$

#include<cstdio>
#include<cmath>
#include<mpi.h>
#include<ctime>
#include<vector>
#include<iostream>
#include<algorithm>
#include<queue>
#define pr pair<int,int>
using namespace std;

inline void quicksort(int* vec, int beg, int end, int* pivots, int numprocs, int w)	//對給定區間的元素進行排序 並選取代表元素
{
	sort(vec+beg, vec + end);	//排序
	for (int i = 0; i < numprocs; i++)	//選取代表元素
	{
		pivots[i] = vec[beg + i * w];
	}
}

inline void AnoSort(int* src, int* dst, int* recvcount, int* rdispls, int numprocs,int totalnum)
{
	int cur = 0, MIN, idx;
	vector<int> cnt(numprocs);
	while (cur < totalnum)
	{
		idx = -1;
		for (int i = 0; i < numprocs; i++)
		{
			if (cnt[i] != recvcount[i])
			{
				if (idx == -1 || src[cnt[i] + rdispls[i]] < MIN)
					MIN = src[cnt[i] + rdispls[i]], idx = i;
			}
		}
		dst[cur++] = MIN;
		++cnt[idx];
	}
}

inline void MergeSort(int* src, int* dst, int* recvcount, int* rdispls, int numprocs)	//另外一種多路歸併排序
{
	priority_queue<pr, vector<pr>, greater<pr>> q;//first 爲具體的值 second爲row的下標
	vector<int> idx(numprocs);
	for (int i = 0; i < numprocs; i++)
	{
		if (idx[i] != recvcount[i])
		{
			q.push(pr(src[idx[i] + rdispls[i]], i));
			++idx[i];
		}
	}
	int cur = 0;
	pr tmp;
	while (!q.empty())
	{
		tmp = q.top();
		q.pop();
		dst[cur++] = tmp.first;
		if (idx[tmp.second] != recvcount[tmp.second])
		{
			tmp.first = src[idx[tmp.second] + rdispls[tmp.second]];
			++idx[tmp.second];
			q.push(tmp);
		}
	}
}

inline void choosepivots(int* pivots, int numprocs, int myid,int *final_pivots)	//選取主元並廣播
{
	int* root_pivots = new int[numprocs * numprocs];;	//收集pivots數組
	MPI_Gather(pivots, numprocs, MPI_INT, root_pivots, numprocs, MPI_INT, 0, MPI_COMM_WORLD);
	if (myid == 0)		//0號進程
	{
		sort(root_pivots, root_pivots + numprocs * numprocs);
		for (int i = 1; i < numprocs; i++)	//選擇排序後的第P-1，2(P-1)，…，(P-1)(P-1)個共P-1個主元；
			final_pivots[i - 1] = root_pivots[i * numprocs];
	}
	MPI_Bcast(final_pivots, numprocs - 1, MPI_INT, 0, MPI_COMM_WORLD);	//將最終選取的主元廣播出去
	delete[] root_pivots;
}

inline void PSRS(int* vec, int n)
{
	int myid, numprocs;
	MPI_Comm_rank(MPI_COMM_WORLD, &myid);	//得到進程id
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);	//進程總數
	//if (!myid)
	//{
	//	cout << "初始數組:\n";
	//	for (int i = 0; i < n; i++)
	//		cout << vec[i] << ' ';
	//	cout << "\n";
	//}
	clock_t start = clock();

	int num = n / numprocs;	//每個處理器要處理的數據
	int beg = myid * num;	//區間起始位置
	int end = (myid + 1) * num;	//區間結束位置 左閉右開
	int w = n / (numprocs * numprocs);	//選取代表元素
	end = min(end, n);	//限制區間右端點的最大值
	int* pivots = new int[numprocs];
	quicksort(vec, beg, end, pivots, numprocs, w);	//進行局部排序 並選取代表元素
	if (numprocs <= 1)	//僅有1個進程
	{
		clock_t end = clock();
		//cout << "排完序後:\n";
		//for (int i = 0; i < n; i++)
		//	cout << myarray[i] << ' ';
		cout << end - start << endl;
		delete[] pivots;
		return;
	}
	int* final_pivots = new int[numprocs - 1];
	choosepivots(pivots, numprocs, myid, final_pivots);	//選取主元並廣播

	delete[] pivots;

	int* sendcount = new int[numprocs], * sdispls = new int[numprocs];	//記錄分割後 每一段的個數 全局交換需要用到
	int* recvcount = new int[numprocs], * rdispls = new int[numprocs];

	sdispls[0] = beg;
	sendcount[0] = upper_bound(vec + beg, vec + end, final_pivots[0]) - vec - sdispls[0];
	for (int i = 1; i < numprocs - 1; i++)
	{
		sdispls[i] = sdispls[i - 1] + sendcount[i - 1];		//記錄每一段發送的起始位置
		sendcount[i] = upper_bound(vec + beg, vec + end, final_pivots[i]) - vec - sdispls[i];//計算每一段發送的個數
	}
	sdispls[numprocs - 1] = sdispls[numprocs - 2] + sendcount[numprocs - 2];
	sendcount[numprocs - 1] = end - sdispls[numprocs - 1];
	delete[] final_pivots;

	MPI_Alltoall(sendcount, 1, MPI_INT, recvcount, 1, MPI_INT, MPI_COMM_WORLD);	//記錄每一段要接受的個數
	int totalnum = recvcount[0];
	rdispls[0] = 0;
	for (int i = 1; i < numprocs; i++)
	{
		totalnum += recvcount[i];
		rdispls[i] = rdispls[i - 1] + recvcount[i - 1];
	}
	int* result = new int[totalnum];
	MPI_Alltoallv(vec, sendcount, sdispls, MPI_INT, result, recvcount, rdispls, MPI_INT, MPI_COMM_WORLD);	//全局交換
	int* sort_result = new int[totalnum];
	//MergeSort(result, sort_result, recvcount, rdispls, numprocs);	//多路歸併
	AnoSort(result, sort_result, recvcount, rdispls, numprocs, totalnum);
	int* num_idx = new int[numprocs];	//存儲每個進程控制的元素數
	MPI_Gather(&totalnum, 1, MPI_INT, num_idx, 1, MPI_INT, 0, MPI_COMM_WORLD);	//彙集信息到根進程中
	int* finalpos = new int[numprocs];
	if (myid == 0)
	{
		finalpos[0] = 0;
		for (int i = 1; i < numprocs; i++)
		{
			finalpos[i] = finalpos[i - 1] + num_idx[i - 1];	//最終放置的位置
		}
	}
	MPI_Gatherv(sort_result, totalnum, MPI_INT, vec, num_idx, finalpos, MPI_INT, 0, MPI_COMM_WORLD);
	if (!myid)
	{
		clock_t end = clock();
		//cout << "排完序後:\n";
		//for (int i = 0; i < n; i++)
		//	cout << vec[i] << ' ';
		//cout << endl;
		cout << end - start << endl;
	}
	delete[] num_idx;
	delete[] finalpos;
	delete[] result;
	delete[] sort_result;
	delete[] sendcount;
	delete[] sdispls;
	delete[] recvcount;
	delete[] rdispls;
}

int main(int argc, char* argv[])
{
	int n = 1e7;		//元素個數
	int* vec = new int[n];
	srand(time(0));
	for (int i = 0; i < n; i++)
	{
		//vec[i] = i + 1;
		vec[i] = rand();	//數組
	}
	MPI_Init(&argc, &argv);
	PSRS(vec, n);	//排序
	MPI_Finalize();

	return 0;
}

並行計算利用mpi實現PSRS算法

並行計算利用openmpi實現雅可比迭代法

並行計算利用mpi實現PSRS算法

並行計算利用mpi實現Cannon和fox算法

力扣 523. 連續的子數組和 hash+dp

力扣 155. 最小棧棧+思維

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結

並行計算 利用mpi實現PSRS算法

並行計算利用mpi實現PSRS算法