MPI阻止和分散

时间:2016-08-25 17:57:25

标签: c mpi

我有一个2D矩阵,假设这个矩阵是下面的4x6矩阵:

 1  2  3  4  5  6
 7  8  9 10 11 12
13 14 15 16 17 18
19 20 21 22 23 24

我希望4个进程中的每个进程获得一个2x3子矩阵并将其放在4x5缓冲区中。

类似的东西:

0  0  0  0  0
0  1  2  3  0
0  7  8  9  0
0  0  0  0  0

0  0  0  0  0
0  4  5  6  0
0 10 11 12  0
0  0  0  0  0

0  0  0  0  0
0 13 14 15  0
0 19 20 21  0
0  0  0  0  0

0  0  0  0  0
0 16 17 18  0
0 22 23 24  0
0  0  0  0  0    

额外的行不是问题,因为我可以调用MPI_Scatterv并指向进程缓冲区的第二行,但额外的列会使事情变得复杂。 这只能使用MPI数据类型和一个MPI_Scatterv调用吗?如果是,请给我一些指导。

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
dim[0] = dim[1] = sqrt(numtasks);
periods[0] = periods[1] = 0;
MPI_Cart_create(MPI_COMM_WORLD, 2, dim, periods, 1, &commCart);
MPI_Comm_rank(commCart, &taskid);

NPROWS = dim[0];
NPCOLS = dim[1];
blockRows = ROWS / NPROWS;
blockColumns = COLS / NPCOLS;

if (taskid == MASTER) {
    for(i=0;i<ROWS*COLS;i++){
        global[i]=i;
    }
}

float* local;
local = malloc(blockRows * (blockColumns+2) * sizeof (float));

for (i = 0; i < blockRows * (blockColumns+2); i++) {
    local[0][i] = 0;
    local[1][i] = 0;
}
MPI_Datatype type, resizedtype,column;
int sizes[2]    = {ROWS,COLS};  
int subsizes[2] = {blockRows,blockColumns}; 
int starts[2]   = {0,0};  

MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_FLOAT, &type);  
MPI_Type_create_resized(type, 0, blockColumns*sizeof(float), &resizedtype);
MPI_Type_commit(&resizedtype);

int *counts = malloc(numworkers*sizeof(int));
int *displs = malloc(numworkers*sizeof(int));
for(i=0;i<numworkers;i++){
    counts[i] = 1;
}

int disp = 0;
for(i=0;i<NPROWS;i++){
    for(j=0;j<NPCOLS;j++){
        displs[i*dim[0] + j] = disp;
        disp++;
    }
    disp += (blockColumns-1)*dim[0];
}

MPI_Scatterv(global, counts, displs, resizedtype,      
        &local[0][blockColumns], blockRows*blockColumns, MPI_FLOAT,  
        MASTER, commCart);

1 个答案:

答案 0 :(得分:0)

我已经猜到了你实际上希望你的代码如何工作(例如,当前定义本地的方式并将其用作1D,然后2D数组不是内部一致的)。需要注意的要点是:

  1. 您需要两种数据类型:一种用于挑选全局的正确元素,另一种用于存储到本地的正确元素。
  2. 最简单的方法是将sendtype调整为与MPI_FLOAT一样大,以便随意放置;因此,位移数以浮标数计算。
  3. 在接收方,您只需收到一个子阵列 - 无需调整大小。
  4. 我没有检查代码的一般性,但它似乎适用于您所说明的特定情况,即2x2分解的4x6矩阵。

    #include <stdio.h>
    #include <stdlib.h>
    #include <math.h>
    
    #include <mpi.h>
    
    #define MASTER 0
    
    #define ROWS 4
    #define COLS 6
    
    void main(void)
    {
      int dim[2], periods[2], NPROWS, NPCOLS, blockRows, blockColumns;
      int numtasks, taskid, i, j;
    
      MPI_Comm commCart;
    
      MPI_Init(NULL, NULL);
      MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
      dim[0] = dim[1] = sqrt(numtasks);
      periods[0] = periods[1] = 0;
      MPI_Cart_create(MPI_COMM_WORLD, 2, dim, periods, 1, &commCart);
      MPI_Comm_rank(commCart, &taskid);
    
      NPROWS = dim[0];
      NPCOLS = dim[1];
      blockRows = ROWS / NPROWS;
      blockColumns = COLS / NPCOLS;
    
      float* global;
      if (taskid == MASTER) {
        global = malloc(ROWS * COLS * sizeof (float));
        for(i=0;i<ROWS*COLS;i++){
          global[i]=i+1;
        }
      }
    
      float* local;
      local = malloc((blockRows+2) * (blockColumns+2) * sizeof (float));
    
      for (i = 0; i < (blockRows+2) * (blockColumns+2); i++) {
        local[i] = 0;
      }
    
    
      MPI_Datatype type, resizedtype,column;
      int sizes[2]    = {ROWS,COLS};
      int subsizes[2] = {blockRows,blockColumns};
      int starts[2]   = {0,0};
    
      MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_FLOAT, \
    &type);
      MPI_Type_create_resized(type, 0, sizeof(float), &resizedtype);
      MPI_Type_commit(&resizedtype);
    
      int *counts = malloc(numtasks*sizeof(int));
      int *displs = malloc(numtasks*sizeof(int));
      for(i=0;i<numtasks;i++){
        counts[i] = 1;
      }
    
      int disp = 0;
      for(i=0;i<NPROWS;i++){
        for(j=0;j<NPCOLS;j++){
          disp = i*blockRows*COLS+j*blockColumns;
          displs[i*dim[0] + j] = disp;
        }
      }
    
      MPI_Datatype localtype;
      int localsizes[2]    = {blockRows+2,blockColumns+2};
      int localsubsizes[2] = {blockRows,blockColumns};
      int localstarts[2]   = {1,1};
    
      MPI_Type_create_subarray(2, localsizes, localsubsizes, localstarts, MPI_ORDER\
    _C, MPI_FLOAT, &localtype);
      MPI_Type_commit(&localtype);
    
    
      MPI_Scatterv(global, counts, displs, resizedtype,
                   &local[0], 1, localtype,
                   MASTER, commCart);
    
    
      for (i=0; i < (blockRows+2)*(blockColumns+2); i++)
        {
          printf("rank %d: local[%d] = %f\n", taskid, i, local[i]);
        }
    
      MPI_Finalize();
    }
    

    这是输出 - 看起来像你想要的那样:

    mpiexec -n 4 ./scatterv
    rank 0: local[0] = 0.000000
    rank 0: local[1] = 0.000000
    rank 0: local[2] = 0.000000
    rank 0: local[3] = 0.000000
    rank 0: local[4] = 0.000000
    rank 0: local[5] = 0.000000
    rank 0: local[6] = 1.000000
    rank 0: local[7] = 2.000000
    rank 0: local[8] = 3.000000
    rank 0: local[9] = 0.000000
    rank 0: local[10] = 0.000000
    rank 0: local[11] = 7.000000
    rank 0: local[12] = 8.000000
    rank 0: local[13] = 9.000000
    rank 0: local[14] = 0.000000
    rank 0: local[15] = 0.000000
    rank 0: local[16] = 0.000000
    rank 0: local[17] = 0.000000
    rank 0: local[18] = 0.000000
    rank 0: local[19] = 0.000000
    rank 1: local[0] = 0.000000
    rank 1: local[1] = 0.000000
    rank 1: local[2] = 0.000000
    rank 1: local[3] = 0.000000
    rank 1: local[4] = 0.000000
    rank 1: local[5] = 0.000000
    rank 1: local[6] = 4.000000
    rank 1: local[7] = 5.000000
    rank 1: local[8] = 6.000000
    rank 1: local[9] = 0.000000
    rank 1: local[10] = 0.000000
    rank 1: local[11] = 10.000000
    rank 1: local[12] = 11.000000
    rank 1: local[13] = 12.000000
    rank 1: local[14] = 0.000000
    rank 1: local[15] = 0.000000
    rank 1: local[16] = 0.000000
    rank 1: local[17] = 0.000000
    rank 1: local[18] = 0.000000
    rank 1: local[19] = 0.000000
    rank 2: local[0] = 0.000000
    rank 2: local[1] = 0.000000
    rank 2: local[2] = 0.000000
    rank 2: local[3] = 0.000000
    rank 2: local[4] = 0.000000
    rank 2: local[5] = 0.000000
    rank 2: local[6] = 13.000000
    rank 2: local[7] = 14.000000
    rank 2: local[8] = 15.000000
    rank 2: local[9] = 0.000000
    rank 2: local[10] = 0.000000
    rank 2: local[11] = 19.000000
    rank 2: local[12] = 20.000000
    rank 2: local[13] = 21.000000
    rank 2: local[14] = 0.000000
    rank 2: local[15] = 0.000000
    rank 2: local[16] = 0.000000
    rank 2: local[17] = 0.000000
    rank 2: local[18] = 0.000000
    rank 2: local[19] = 0.000000
    rank 3: local[0] = 0.000000
    rank 3: local[1] = 0.000000
    rank 3: local[2] = 0.000000
    rank 3: local[3] = 0.000000
    rank 3: local[4] = 0.000000
    rank 3: local[5] = 0.000000
    rank 3: local[6] = 16.000000
    rank 3: local[7] = 17.000000
    rank 3: local[8] = 18.000000
    rank 3: local[9] = 0.000000
    rank 3: local[10] = 0.000000
    rank 3: local[11] = 22.000000
    rank 3: local[12] = 23.000000
    rank 3: local[13] = 24.000000
    rank 3: local[14] = 0.000000
    rank 3: local[15] = 0.000000
    rank 3: local[16] = 0.000000
    rank 3: local[17] = 0.000000
    rank 3: local[18] = 0.000000
    rank 3: local[19] = 0.000000