MPI並行計算學習筆記5——矩陣相乘(cannon算法的實現)

一 . 運行環境VS2017 + MPI

二 . 算法原理可見博客https://www.cnblogs.com/chihaoyuIsnotHere/p/10553617.html

三.  使用到的MPI關鍵技術:虛擬笛卡爾進程拓撲

四.  源代碼:頭文件Matrix.h與第三篇文章的類似,只需加上+號重載

#include<iostream>
#include"Matrix.h"
#include<cmath>
#include<ctime>
#include"mpi.h"

using namespace std;

const int N = 600;
int myid, numprocs,upRank,downRank,leftRank,rightRank,part,num;
int masterNode;
int coord[2];
double start, finish;
MPI_Comm Cart_Comm_World;

int isqrt(int x)
{
	int ipart = sqrt(x);
	if (ipart*ipart != x)
		++ipart;
	return ipart;
}

void MatrixScatter(Matrix &A)
{
	double *memoryPool = nullptr;
	
	if (coord[0] == 0 && coord[1] == 0)
	{
		Matrix tmp(N);
		tmp.ranCreate();
		//cout << tmp << endl;
		memoryPool = new double[N*N];
		
		int count = 0;
		for (int k = 0; k < part; ++k)
			for (int h = 0; h < part; ++h)
				for (int i = k * num; i <(k+1)*num; ++i)
					for (int j = h * num; j < (h+1)*num; ++j)
						memoryPool[count++] = tmp(i, j);
	}
	MPI_Scatter(memoryPool, num*num, MPI_DOUBLE, &A(0, 0), num*num, MPI_DOUBLE, masterNode, Cart_Comm_World);
	
	if (memoryPool)
		delete[]memoryPool;

}

//進行初始位移
void cannonInit(Matrix &A, Matrix &B)
{
	int x = coord[0], y = coord[1];//矩陣塊的行號與列號
	MPI_Status status;

        //獲得當前矩陣塊橫座標方向相聚x的左右鄰居
	MPI_Cart_shift(Cart_Comm_World, 1, x, &leftRank, &rightRank);
	if (leftRank != myid)
		MPI_Sendrecv_replace(&A(0, 0), num*num, MPI_DOUBLE, leftRank, 0, rightRank, 0, Cart_Comm_World, &status);
	
	MPI_Cart_shift(Cart_Comm_World, 0, y, &upRank, &downRank);
	if (upRank != myid)
		MPI_Sendrecv_replace(&B(0, 0), num*num, MPI_DOUBLE, upRank, 0, downRank, 0, Cart_Comm_World, &status);
	

}

void cannonSolver(Matrix &A,Matrix &B)
{
	MPI_Cart_shift(Cart_Comm_World, 0, 1, &upRank, &downRank);
	MPI_Cart_shift(Cart_Comm_World, 1, 1, &leftRank, &rightRank);
	Matrix C(num);
	MPI_Status status;
	for (int i = 0; i < part; ++i)
	{
		C = C + A * B;
		MPI_Sendrecv_replace(&A(0, 0), num*num, MPI_DOUBLE, leftRank, 0, rightRank, 0, Cart_Comm_World, &status);
		MPI_Sendrecv_replace(&B(0, 0), num*num, MPI_DOUBLE, upRank, 0, downRank, 0, Cart_Comm_World, &status);
	}
	
	double *memoryPool = nullptr;
	if (myid == masterNode)
		memoryPool = new double[N*N];
	MPI_Gather(&C(0, 0), num*num, MPI_DOUBLE, memoryPool, num*num, MPI_DOUBLE, masterNode, Cart_Comm_World);
	if (myid == masterNode)
	{
		Matrix result(N);
		int count = 0;
		for (int k = 0; k < part; ++k)
			for (int h = 0; h < part; ++h)
				for (int i = k * num; i < (k + 1)*num; ++i)
					for (int j = h * num; j < (h + 1)*num; ++j)
						result(i, j) = memoryPool[count++];
		//cout << result << endl;
		delete[]memoryPool;
	}
	


}

int main(int argc, char* argv[])
{
	MPI_Init(&argc, &argv);
	
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);


	if (numprocs == 1)
	{
		
		Matrix A(N), B(N);
		A.ranCreate();
		B.ranCreate();
		start = MPI_Wtime();//隨機數的生成特別耗費時間,不應該計入
		Matrix result = A * B;
		//cout << A * B << endl;
		finish = MPI_Wtime();
		cout << finish - start << endl;
		MPI_Finalize();
		
		return 0;
	}

	

	part = isqrt(numprocs);//一個維度的方向上,矩陣塊數;isqrt是對整型的求根,保證不錯
	num = N / part;//分塊矩陣的維度

        //創建虛擬笛卡爾拓撲,方便通信
	int dims[2] = { part,part };
	int periods[2] = { 1,1 };
	MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &Cart_Comm_World);
	MPI_Comm_rank(Cart_Comm_World, &myid);
	MPI_Cart_coords(Cart_Comm_World, myid, 2, coord);
	
	int position[2] = { 0,0 };
	MPI_Cart_rank(Cart_Comm_World, position, &masterNode);

	Matrix A(num),B(num);
	MatrixScatter(A);
	
	MatrixScatter(B);
	start = MPI_Wtime();
	cannonInit(A, B);
	cannonSolver(A,B);
	finish = MPI_Wtime();
	if(myid == 0)
	   cout << finish - start << endl;

	MPI_Comm_free(&Cart_Comm_World);
	MPI_Finalize();
	return 0;
}

五.運行結果(dim = 600,cannon算法在問題規模較小的情況下仍能獲得不錯的加速比)

 注:個人PC僅有8個計算核心,因而9核模擬的效果稍差

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章