一 . 運行環境VS2017 + MPI
二 . 算法原理可見博客https://www.cnblogs.com/chihaoyuIsnotHere/p/10553617.html
三. 使用到的MPI關鍵技術:虛擬笛卡爾進程拓撲
四. 源代碼:頭文件Matrix.h與第三篇文章的類似,只需加上+號重載
#include<iostream>
#include"Matrix.h"
#include<cmath>
#include<ctime>
#include"mpi.h"
using namespace std;
const int N = 600;
int myid, numprocs,upRank,downRank,leftRank,rightRank,part,num;
int masterNode;
int coord[2];
double start, finish;
MPI_Comm Cart_Comm_World;
int isqrt(int x)
{
int ipart = sqrt(x);
if (ipart*ipart != x)
++ipart;
return ipart;
}
void MatrixScatter(Matrix &A)
{
double *memoryPool = nullptr;
if (coord[0] == 0 && coord[1] == 0)
{
Matrix tmp(N);
tmp.ranCreate();
//cout << tmp << endl;
memoryPool = new double[N*N];
int count = 0;
for (int k = 0; k < part; ++k)
for (int h = 0; h < part; ++h)
for (int i = k * num; i <(k+1)*num; ++i)
for (int j = h * num; j < (h+1)*num; ++j)
memoryPool[count++] = tmp(i, j);
}
MPI_Scatter(memoryPool, num*num, MPI_DOUBLE, &A(0, 0), num*num, MPI_DOUBLE, masterNode, Cart_Comm_World);
if (memoryPool)
delete[]memoryPool;
}
//進行初始位移
void cannonInit(Matrix &A, Matrix &B)
{
int x = coord[0], y = coord[1];//矩陣塊的行號與列號
MPI_Status status;
//獲得當前矩陣塊橫座標方向相聚x的左右鄰居
MPI_Cart_shift(Cart_Comm_World, 1, x, &leftRank, &rightRank);
if (leftRank != myid)
MPI_Sendrecv_replace(&A(0, 0), num*num, MPI_DOUBLE, leftRank, 0, rightRank, 0, Cart_Comm_World, &status);
MPI_Cart_shift(Cart_Comm_World, 0, y, &upRank, &downRank);
if (upRank != myid)
MPI_Sendrecv_replace(&B(0, 0), num*num, MPI_DOUBLE, upRank, 0, downRank, 0, Cart_Comm_World, &status);
}
void cannonSolver(Matrix &A,Matrix &B)
{
MPI_Cart_shift(Cart_Comm_World, 0, 1, &upRank, &downRank);
MPI_Cart_shift(Cart_Comm_World, 1, 1, &leftRank, &rightRank);
Matrix C(num);
MPI_Status status;
for (int i = 0; i < part; ++i)
{
C = C + A * B;
MPI_Sendrecv_replace(&A(0, 0), num*num, MPI_DOUBLE, leftRank, 0, rightRank, 0, Cart_Comm_World, &status);
MPI_Sendrecv_replace(&B(0, 0), num*num, MPI_DOUBLE, upRank, 0, downRank, 0, Cart_Comm_World, &status);
}
double *memoryPool = nullptr;
if (myid == masterNode)
memoryPool = new double[N*N];
MPI_Gather(&C(0, 0), num*num, MPI_DOUBLE, memoryPool, num*num, MPI_DOUBLE, masterNode, Cart_Comm_World);
if (myid == masterNode)
{
Matrix result(N);
int count = 0;
for (int k = 0; k < part; ++k)
for (int h = 0; h < part; ++h)
for (int i = k * num; i < (k + 1)*num; ++i)
for (int j = h * num; j < (h + 1)*num; ++j)
result(i, j) = memoryPool[count++];
//cout << result << endl;
delete[]memoryPool;
}
}
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
if (numprocs == 1)
{
Matrix A(N), B(N);
A.ranCreate();
B.ranCreate();
start = MPI_Wtime();//隨機數的生成特別耗費時間,不應該計入
Matrix result = A * B;
//cout << A * B << endl;
finish = MPI_Wtime();
cout << finish - start << endl;
MPI_Finalize();
return 0;
}
part = isqrt(numprocs);//一個維度的方向上,矩陣塊數;isqrt是對整型的求根,保證不錯
num = N / part;//分塊矩陣的維度
//創建虛擬笛卡爾拓撲,方便通信
int dims[2] = { part,part };
int periods[2] = { 1,1 };
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &Cart_Comm_World);
MPI_Comm_rank(Cart_Comm_World, &myid);
MPI_Cart_coords(Cart_Comm_World, myid, 2, coord);
int position[2] = { 0,0 };
MPI_Cart_rank(Cart_Comm_World, position, &masterNode);
Matrix A(num),B(num);
MatrixScatter(A);
MatrixScatter(B);
start = MPI_Wtime();
cannonInit(A, B);
cannonSolver(A,B);
finish = MPI_Wtime();
if(myid == 0)
cout << finish - start << endl;
MPI_Comm_free(&Cart_Comm_World);
MPI_Finalize();
return 0;
}
五.運行結果(dim = 600,cannon算法在問題規模較小的情況下仍能獲得不錯的加速比)
注:個人PC僅有8個計算核心,因而9核模擬的效果稍差