Question

我必须在MPI中分解和重构矩阵（我使用MPICH），我使用Scatterv和Gatherv，如this question中的示例所示。一切适用于小矩阵，但当矩阵大小增加（从800x800开始）时，程序在到达MPI_Gatherv时挂起。通过打印调试消息，我可以看到每个进程都将调用传递给Gatherv，除了排名为0的那个（Gatherv调用中的根进程）。有什么建议吗？这是代码：

#include <iostream>
#include <cstring>
#include <fstream>
#include <cstdlib>
#include "mpi.h"
using namespace std;

#define TOP_ROW_TAG 1
#define BOTTOM_ROW_TAG 2
#define LEFT_COL_TAG 3
#define RIGHT_COL_TAG 4

int main(int argc, char ** argv) {
    int me, nproc, width, height, wloc, hloc;
    double k, d,c, wdouble, hdouble, discr, delta_t, t;
    char* initial, end;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &me); 
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm cart_top;

    wdouble = atof(argv[1]);
    hdouble = atof(argv[2]);
    discr = atof(argv[3]);
    k = atof(argv[4]);
    d = atof(argv[5]);
    c = atof(argv[6]);
    delta_t = atof(argv[7]);
    t = atof(argv[8]);
    initial = argv[9];
    end = argv[10];
    double p = k/(d*c);
    double dsc = delta_t/(discr*discr);

    width = wdouble / discr;
    height = hdouble / discr; 
    const int NPROWS=4;  /* number of rows in _decomposition_ */
    const int NPCOLS=4;  /* number of cols in _decomposition_ */
    const int BLOCKROWS = width/NPROWS;  /* number of rows in _block_ */
    const int BLOCKCOLS = height/NPCOLS;

    const int dims[2] = {NPROWS, NPCOLS};
    const int periods[2] = {0,0};
    int* mycoords = new int[2];

    int locsz = (width*height)/nproc;

    double* T, *Tnew, *local, *locnew;
    local = new double[BLOCKROWS*BLOCKCOLS];
    locnew = new double[BLOCKROWS*BLOCKCOLS];
    T = new double[width * height];
    Tnew = new double[width * height];

    ifstream infile;
    infile.open(initial);
    if(me==0) {
        cout<<"BLOCKROWS: "<<BLOCKROWS;
        cout<<"BLOCKCOLS: "<<BLOCKCOLS<<endl;
        cout<<"width: "<<width;
        cout<<"height: "<<height<<endl;
        int idx, jdx, temp;
        for (int i=0; i<width*height; i++) {
            string currline;
            getline(infile, currline);
            idx = atoi(strtok(currline.c_str(), " "));
            jdx = atoi(strtok(NULL, " "));
            temp = atof(strtok(NULL, " "));
            T[idx*height+jdx] = temp;
            infile.close();
        }

        MPI_Datatype blocktype;
        MPI_Datatype blocktype2;
        MPI_Datatype coltype, coltype2;


        MPI_Type_vector(BLOCKROWS, 1, BLOCKCOLS, MPI_DOUBLE, &coltype);
        MPI_Type_create_resized( coltype, 0, sizeof(double), &coltype2);
        MPI_Type_commit(&coltype2);


        MPI_Type_vector(BLOCKROWS, BLOCKCOLS, height, MPI_DOUBLE, &blocktype2);
        MPI_Type_create_resized( blocktype2, 0, sizeof(double), &blocktype);
        MPI_Type_commit(&blocktype);

        int disps[NPROWS*NPCOLS];
        int counts[NPROWS*NPCOLS];
        for (int ii=0; ii<NPROWS; ii++) {
            for (int jj=0; jj<NPCOLS; jj++) {
                disps[ii*NPCOLS+jj] = ii*height*BLOCKROWS+jj*BLOCKCOLS;
                counts [ii*NPCOLS+jj] = 1;
            }
        }

        int myrank, lb_i, lb_j, ub_i, ub_j;
        lb_i=0;
        lb_j=0;
        ub_i=BLOCKROWS;
        ub_j=BLOCKCOLS;
        /*
           0= left neighbor;
           1= right neighbor;
           2=top neighbor;
           3=bottom neighbor;
         */
        int neighs[4] = {};
        double* leftcol, *rightcol, *myleftcol, *myrightcol, *toprow, *bottomrow;
        leftcol = new double[BLOCKROWS];
        rightcol= new double[BLOCKROWS];
        myleftcol = new double[BLOCKROWS];
        myrightcol= new double[BLOCKROWS];
        toprow = new double[BLOCKCOLS];
        bottomrow = new double[BLOCKCOLS];

        //Create topology and get neighbor's rank
        MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &cart_top);
        MPI_Barrier(MPI_COMM_WORLD);
        MPI_Comm_rank(cart_top, &myrank);

        MPI_Cart_shift(cart_top, 0, -1, &myrank, &neighs[0]);
        MPI_Cart_shift(cart_top, 0, 1, &myrank, &neighs[1]);
        MPI_Cart_shift(cart_top, 1, 1, &myrank, &neighs[2]);
        MPI_Cart_shift(cart_top, 1, -1, &myrank, &neighs[3]);
        MPI_Scatterv(T, counts, disps, blocktype, local, BLOCKROWS*BLOCKCOLS, 
                MPI_DOUBLE, 0, cart_top);
        double curr_t=0;
        for(double curr_t = 0; curr_t < t; curr_t+=delta_t) {
            MPI_Barrier(cart_top);

            //Send border columns to neighbors
            if(neighs[2] != MPI_PROC_NULL) {
                MPI_Send(&local[BLOCKCOLS-1], 1, coltype2, neighs[2], LEFT_COL_TAG+(int)(curr_t*1000), cart_top);
            }
            if(neighs[3] != MPI_PROC_NULL) {
                MPI_Send(local, 1, coltype2, neighs[3], RIGHT_COL_TAG+(int)(curr_t*1000), cart_top);
            }
            if(neighs[0] != MPI_PROC_NULL) {
                MPI_Send(local, BLOCKCOLS, MPI_DOUBLE, neighs[0], TOP_ROW_TAG+(int)(curr_t*1000), cart_top);
            }
            if(neighs[1] != MPI_PROC_NULL) {
                MPI_Send(&local[(BLOCKROWS-1)*BLOCKCOLS], BLOCKCOLS, MPI_DOUBLE, neighs[1], BOTTOM_ROW_TAG+(int)(curr_t*1000), cart_top);
            }

            if(neighs[3] != MPI_PROC_NULL) {
                MPI_Recv(leftcol, BLOCKROWS, MPI_DOUBLE, neighs[3], LEFT_COL_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
            }
            if(neighs[2] != MPI_PROC_NULL) {
                MPI_Recv(rightcol, BLOCKROWS, MPI_DOUBLE, neighs[2], RIGHT_COL_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
            }
            if(neighs[1] != MPI_PROC_NULL) {
                MPI_Recv(bottomrow, BLOCKCOLS, MPI_DOUBLE, neighs[1], TOP_ROW_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
            }
            if(neighs[0] != MPI_PROC_NULL) {
                MPI_Recv(toprow, BLOCKCOLS, MPI_DOUBLE, neighs[0], BOTTOM_ROW_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
            }

            MPI_Barrier(cart_top);
            double* aux;
            //cout<<" t in process "<<me<<" is " <<t<<endl;
            int i, j;
            MPI_Comm_rank(cart_top, &myrank);
            MPI_Barrier(cart_top);

            for(i=lb_i; i<ub_i; i++) {
                for(j=lb_j; j<ub_j; j++) {
                    double curr,c1,c2,c3,c4;
                    curr = local[i*BLOCKCOLS+j];
                    c1 = i==0 ? toprow[j] : local[(i-1)*BLOCKCOLS+j];
                    c2 = i==BLOCKROWS-1 ? bottomrow[j] : local[(i+1)*BLOCKCOLS+j];
                    c3 = j==0 ? leftcol[i] : local[i*BLOCKCOLS+(j-1)];
                    c4 = j==BLOCKCOLS-1 ? rightcol[i] : local[i*BLOCKCOLS+(j+1)];

                    locnew[i*BLOCKCOLS+j] = curr*(1-4*dsc*p) + dsc*p*(c1+c2+c3+c4);
                    /*if(i==0) locnew[i*BLOCKCOLS+j] = toprow[j];
                      else if(i==BLOCKROWS-1) locnew[i*BLOCKCOLS+j] = bottomrow[j];
                      if(j==0) locnew[i*BLOCKCOLS+j] = leftcol[i];
                      else if(j==BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = rightcol[i];
                      if(i!=0 && i!=BLOCKROWS-1 && j!=0 && j!=BLOCKCOLS-1)    locnew[i*BLOCKCOLS+j] = local[i*BLOCKCOLS+j];*/
                    /*if(i==0) locnew[i*BLOCKCOLS+j] = (double)5000;
                      else if(i==BLOCKROWS-1) locnew[i*BLOCKCOLS+j] = (double)5000;
                      if(j==0) locnew[i*BLOCKCOLS+j] = (double)5000;
                      else if(j==BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = (double)5000;
                      if(i!=0 && i!=BLOCKROWS-1 && j!=0 && j!=BLOCKCOLS-1)    locnew[i*BLOCKCOLS+j] = local[i*BLOCKCOLS+j];*/
                }
            }

            aux = local;
            local = locnew;
            locnew = aux;

            MPI_Barrier(cart_top);

            /*  aux = T;
                T=Tnew;
                Tnew = aux;*/

        }
        MPI_Gatherv(local, BLOCKROWS*BLOCKCOLS, MPI_DOUBLE, Tnew, counts, disps, blocktype, 0,cart_top);
        if(me == 0) {
            ofstream outfile;
            outfile.open(argv[10]);
            for(int i=0; i<width; i++) {
                for(int j=0; j<height; j++) {
                    outfile<< i<<" " <<j<<" "<<Tnew[i*height+j]<<endl;
                }

            }
            outfile.close();
        }
        MPI_Finalize();

    }

重构矩阵时MPI_Gatherv挂起

0 个答案: