Question

我有以下编译和运行的代码：

mpicc -std=c99 region.c
mpirun -n 4 region

$ mpirun -version

mpirun（Open MPI）1.6.5

$ mpicc --version

gcc（Ubuntu 4.8.2-19ubuntu1）4.8.2

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

int rank,                   
    size,                    
    dims[2],                  
    coords[2],                 
    image_size[2] = {8,8},  
    local_image_size[2];        

MPI_Datatype border_row_t,
             border_col_t,
             subarray_type,
             recv_type;

unsigned char *image,          
              *region,        
              *local_region;  

void create_types() {
    int starts[2] = {0, 0};
    MPI_Type_create_subarray(2, image_size, local_image_size, starts, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &subarray_type);
    MPI_Type_commit(&subarray_type);


    MPI_Type_vector(local_image_size[0], local_image_size[1], image_size[1], MPI_UNSIGNED_CHAR, &recv_type);
    MPI_Type_commit(&recv_type);
}

void distribute_image(){
    if (0 == rank) {
        MPI_Request request;
        int num_hor_segments = image_size[0] / local_image_size[0];
        int num_vert_segments = image_size[1] / local_image_size[1];
        int dest_rank=0;
        for (int vert=0; vert<num_vert_segments; vert++) {
            for (int hor=0; hor<num_hor_segments; hor++) {
                MPI_Isend((image+(local_image_size[0]*hor)+(local_image_size[1]*image_size[1]*vert)), 1, subarray_type, dest_rank, 0, MPI_COMM_WORLD, &request);
                dest_rank++;
            }
        }
    }
    MPI_Status status;
    MPI_Recv(local_region, local_image_size[0]*local_image_size[1],  MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD, &status);
}

void gather_region(){

    int counts[4]={1,1,1,1};
    int disps[4]={0,4,32,36};

    MPI_Gatherv(local_region,local_image_size[0]*local_image_size[1], MPI_UNSIGNED_CHAR, region,counts,disps,recv_type,0,MPI_COMM_WORLD);

    if (0==rank) {
        printf("Actually returned:\n");
        for (int i=0; i<image_size[0]*image_size[1]; i++) {
            printf("%d\t", *(region+i));
            if ((i+1)%image_size[0]==0) printf("\n");
        }
    }

}

void init_mpi(int argc, char** argv){
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Dims_create(size, 2, dims);
}

void load_and_allocate_images(int argc, char** argv){

    if(rank == 0){
        image = (unsigned char*) malloc(sizeof(unsigned char*) * image_size[0] * image_size[1]);
        for (unsigned char i=0; i<image_size[0]*image_size[1]; i++) {
            image[i] = i;
            printf("%d\t", *(image+i));
            if((i+1)%image_size[0]==0) printf("\n");
        }
        printf("\n\n");
        region = (unsigned char*)calloc(sizeof(unsigned char),image_size[0]*image_size[1]);
    }
    local_image_size[0] = image_size[0]/dims[0];
    local_image_size[1] = image_size[1]/dims[1];

    int lsize = local_image_size[0]*local_image_size[1];
    int lsize_border = (local_image_size[0] + 2)*(local_image_size[1] + 2);
    local_region = (unsigned char*)calloc(sizeof(unsigned char),lsize_border);
}

void cleanup() {
    MPI_Type_free(&subarray_type);
    MPI_Type_free(&recv_type);
}

int main(int argc, char** argv){
    init_mpi(argc, argv);
    load_and_allocate_images(argc, argv);
    create_types();
    distribute_image();
    gather_region();
    cleanup();
    MPI_Finalize();
    exit(0);
}

当我以0,4,32和36的位移运行gatherv时，我得到以下内容

分布式矢量：

0   1   2   3   4   5   6   7   
8   9   10  11  12  13  14  15  
16  17  18  19  20  21  22  23  
24  25  26  27  28  29  30  31  
32  33  34  35  36  37  38  39  
40  41  42  43  44  45  46  47  
48  49  50  51  52  53  54  55  
56  57  58  59  60  61  62  63

实际上已退回：

0   1   2   3   0   0   0   0   
8   9   10  11  0   0   0   0   
16  17  18  19  0   0   0   0   
24  25  26  27  0   0   0   0   
0   0   0   0   0   0   0   0   
0   0   0   0   0   0   0   0   
0   0   0   0   0   0   0   0   
0   0   0   0   0   0   0   0

如果我将位移更改为0,1,32 36，我会得到以下结果：

分布式矢量：

0   1   2   3   4   5   6   7   
8   9   10  11  12  13  14  15  
16  17  18  19  20  21  22  23  
24  25  26  27  28  29  30  31  
32  33  34  35  36  37  38  39  
40  41  42  43  44  45  46  47  
48  49  50  51  52  53  54  55  
56  57  58  59  60  61  62  63

实际上已退回：

0   1   2   3   0   0   0   0   
8   9   10  11  0   0   0   0   
16  17  18  19  0   0   0   0   
24  25  26  27  4   5   6   7   
0   0   0   0   12  13  14  15  
0   0   0   0   20  21  22  23  
0   0   0   0   28  29  30  31  
0   0   0   0   0   0   0   0

为什么在返回的向量中1的位移转换为28？这让我很困惑。

Answer 1

MPI_GATHERV中的位移以数据类型范围为单位指定。 MPI_Type_vector(local_image_size[0], local_image_size[1], image_size[1], MPI_UNSIGNED_CHAR, &recv_type);创建的数据类型的范围为{(local_image_size[0]-1) * image_size[1] + local_image_size[1]} * extent(MPI_UNISIGNED_CHAR)。鉴于以下内容：

local_image_size[0] = 4
local_image_size[1] = 4
image_size[1] = 8
extent(MPI_UNSIGNED_CHAR) = 1 byte

这导致recv_type为(4-1) * 8 + 4或28字节的范围。因此，位移1指定超过接收缓冲区开头28个字节的位置。

可以通过使用MPI_Type_create_resized在其上强制使用不同的“可见”范围来“调整”类型。正确执行2D分解的整个过程在this answer中有详细描述。

MPI gatherv排量不按预期工作

1 个答案: