我必须在MPI中分解和重构矩阵(我使用MPICH),我使用Scatterv和Gatherv,如this question中的示例所示。一切适用于小矩阵,但当矩阵大小增加(从800x800开始)时,程序在到达MPI_Gatherv时挂起。通过打印调试消息,我可以看到每个进程都将调用传递给Gatherv,除了排名为0的那个(Gatherv调用中的根进程)。 有什么建议吗?这是代码:
#include <iostream>
#include <cstring>
#include <fstream>
#include <cstdlib>
#include "mpi.h"
using namespace std;
#define TOP_ROW_TAG 1
#define BOTTOM_ROW_TAG 2
#define LEFT_COL_TAG 3
#define RIGHT_COL_TAG 4
int main(int argc, char ** argv) {
int me, nproc, width, height, wloc, hloc;
double k, d,c, wdouble, hdouble, discr, delta_t, t;
char* initial, end;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm cart_top;
wdouble = atof(argv[1]);
hdouble = atof(argv[2]);
discr = atof(argv[3]);
k = atof(argv[4]);
d = atof(argv[5]);
c = atof(argv[6]);
delta_t = atof(argv[7]);
t = atof(argv[8]);
initial = argv[9];
end = argv[10];
double p = k/(d*c);
double dsc = delta_t/(discr*discr);
width = wdouble / discr;
height = hdouble / discr;
const int NPROWS=4; /* number of rows in _decomposition_ */
const int NPCOLS=4; /* number of cols in _decomposition_ */
const int BLOCKROWS = width/NPROWS; /* number of rows in _block_ */
const int BLOCKCOLS = height/NPCOLS;
const int dims[2] = {NPROWS, NPCOLS};
const int periods[2] = {0,0};
int* mycoords = new int[2];
int locsz = (width*height)/nproc;
double* T, *Tnew, *local, *locnew;
local = new double[BLOCKROWS*BLOCKCOLS];
locnew = new double[BLOCKROWS*BLOCKCOLS];
T = new double[width * height];
Tnew = new double[width * height];
ifstream infile;
infile.open(initial);
if(me==0) {
cout<<"BLOCKROWS: "<<BLOCKROWS;
cout<<"BLOCKCOLS: "<<BLOCKCOLS<<endl;
cout<<"width: "<<width;
cout<<"height: "<<height<<endl;
int idx, jdx, temp;
for (int i=0; i<width*height; i++) {
string currline;
getline(infile, currline);
idx = atoi(strtok(currline.c_str(), " "));
jdx = atoi(strtok(NULL, " "));
temp = atof(strtok(NULL, " "));
T[idx*height+jdx] = temp;
infile.close();
}
MPI_Datatype blocktype;
MPI_Datatype blocktype2;
MPI_Datatype coltype, coltype2;
MPI_Type_vector(BLOCKROWS, 1, BLOCKCOLS, MPI_DOUBLE, &coltype);
MPI_Type_create_resized( coltype, 0, sizeof(double), &coltype2);
MPI_Type_commit(&coltype2);
MPI_Type_vector(BLOCKROWS, BLOCKCOLS, height, MPI_DOUBLE, &blocktype2);
MPI_Type_create_resized( blocktype2, 0, sizeof(double), &blocktype);
MPI_Type_commit(&blocktype);
int disps[NPROWS*NPCOLS];
int counts[NPROWS*NPCOLS];
for (int ii=0; ii<NPROWS; ii++) {
for (int jj=0; jj<NPCOLS; jj++) {
disps[ii*NPCOLS+jj] = ii*height*BLOCKROWS+jj*BLOCKCOLS;
counts [ii*NPCOLS+jj] = 1;
}
}
int myrank, lb_i, lb_j, ub_i, ub_j;
lb_i=0;
lb_j=0;
ub_i=BLOCKROWS;
ub_j=BLOCKCOLS;
/*
0= left neighbor;
1= right neighbor;
2=top neighbor;
3=bottom neighbor;
*/
int neighs[4] = {};
double* leftcol, *rightcol, *myleftcol, *myrightcol, *toprow, *bottomrow;
leftcol = new double[BLOCKROWS];
rightcol= new double[BLOCKROWS];
myleftcol = new double[BLOCKROWS];
myrightcol= new double[BLOCKROWS];
toprow = new double[BLOCKCOLS];
bottomrow = new double[BLOCKCOLS];
//Create topology and get neighbor's rank
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &cart_top);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Comm_rank(cart_top, &myrank);
MPI_Cart_shift(cart_top, 0, -1, &myrank, &neighs[0]);
MPI_Cart_shift(cart_top, 0, 1, &myrank, &neighs[1]);
MPI_Cart_shift(cart_top, 1, 1, &myrank, &neighs[2]);
MPI_Cart_shift(cart_top, 1, -1, &myrank, &neighs[3]);
MPI_Scatterv(T, counts, disps, blocktype, local, BLOCKROWS*BLOCKCOLS,
MPI_DOUBLE, 0, cart_top);
double curr_t=0;
for(double curr_t = 0; curr_t < t; curr_t+=delta_t) {
MPI_Barrier(cart_top);
//Send border columns to neighbors
if(neighs[2] != MPI_PROC_NULL) {
MPI_Send(&local[BLOCKCOLS-1], 1, coltype2, neighs[2], LEFT_COL_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[3] != MPI_PROC_NULL) {
MPI_Send(local, 1, coltype2, neighs[3], RIGHT_COL_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[0] != MPI_PROC_NULL) {
MPI_Send(local, BLOCKCOLS, MPI_DOUBLE, neighs[0], TOP_ROW_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[1] != MPI_PROC_NULL) {
MPI_Send(&local[(BLOCKROWS-1)*BLOCKCOLS], BLOCKCOLS, MPI_DOUBLE, neighs[1], BOTTOM_ROW_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[3] != MPI_PROC_NULL) {
MPI_Recv(leftcol, BLOCKROWS, MPI_DOUBLE, neighs[3], LEFT_COL_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
if(neighs[2] != MPI_PROC_NULL) {
MPI_Recv(rightcol, BLOCKROWS, MPI_DOUBLE, neighs[2], RIGHT_COL_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
if(neighs[1] != MPI_PROC_NULL) {
MPI_Recv(bottomrow, BLOCKCOLS, MPI_DOUBLE, neighs[1], TOP_ROW_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
if(neighs[0] != MPI_PROC_NULL) {
MPI_Recv(toprow, BLOCKCOLS, MPI_DOUBLE, neighs[0], BOTTOM_ROW_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
MPI_Barrier(cart_top);
double* aux;
//cout<<" t in process "<<me<<" is " <<t<<endl;
int i, j;
MPI_Comm_rank(cart_top, &myrank);
MPI_Barrier(cart_top);
for(i=lb_i; i<ub_i; i++) {
for(j=lb_j; j<ub_j; j++) {
double curr,c1,c2,c3,c4;
curr = local[i*BLOCKCOLS+j];
c1 = i==0 ? toprow[j] : local[(i-1)*BLOCKCOLS+j];
c2 = i==BLOCKROWS-1 ? bottomrow[j] : local[(i+1)*BLOCKCOLS+j];
c3 = j==0 ? leftcol[i] : local[i*BLOCKCOLS+(j-1)];
c4 = j==BLOCKCOLS-1 ? rightcol[i] : local[i*BLOCKCOLS+(j+1)];
locnew[i*BLOCKCOLS+j] = curr*(1-4*dsc*p) + dsc*p*(c1+c2+c3+c4);
/*if(i==0) locnew[i*BLOCKCOLS+j] = toprow[j];
else if(i==BLOCKROWS-1) locnew[i*BLOCKCOLS+j] = bottomrow[j];
if(j==0) locnew[i*BLOCKCOLS+j] = leftcol[i];
else if(j==BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = rightcol[i];
if(i!=0 && i!=BLOCKROWS-1 && j!=0 && j!=BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = local[i*BLOCKCOLS+j];*/
/*if(i==0) locnew[i*BLOCKCOLS+j] = (double)5000;
else if(i==BLOCKROWS-1) locnew[i*BLOCKCOLS+j] = (double)5000;
if(j==0) locnew[i*BLOCKCOLS+j] = (double)5000;
else if(j==BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = (double)5000;
if(i!=0 && i!=BLOCKROWS-1 && j!=0 && j!=BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = local[i*BLOCKCOLS+j];*/
}
}
aux = local;
local = locnew;
locnew = aux;
MPI_Barrier(cart_top);
/* aux = T;
T=Tnew;
Tnew = aux;*/
}
MPI_Gatherv(local, BLOCKROWS*BLOCKCOLS, MPI_DOUBLE, Tnew, counts, disps, blocktype, 0,cart_top);
if(me == 0) {
ofstream outfile;
outfile.open(argv[10]);
for(int i=0; i<width; i++) {
for(int j=0; j<height; j++) {
outfile<< i<<" " <<j<<" "<<Tnew[i*height+j]<<endl;
}
}
outfile.close();
}
MPI_Finalize();
}