我有以下编译和运行的代码:
mpicc -std=c99 region.c
mpirun -n 4 region
$ mpirun -version
mpirun(Open MPI)1.6.5
$ mpicc --version
gcc(Ubuntu 4.8.2-19ubuntu1)4.8.2
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int rank,
size,
dims[2],
coords[2],
image_size[2] = {8,8},
local_image_size[2];
MPI_Datatype border_row_t,
border_col_t,
subarray_type,
recv_type;
unsigned char *image,
*region,
*local_region;
void create_types() {
int starts[2] = {0, 0};
MPI_Type_create_subarray(2, image_size, local_image_size, starts, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &subarray_type);
MPI_Type_commit(&subarray_type);
MPI_Type_vector(local_image_size[0], local_image_size[1], image_size[1], MPI_UNSIGNED_CHAR, &recv_type);
MPI_Type_commit(&recv_type);
}
void distribute_image(){
if (0 == rank) {
MPI_Request request;
int num_hor_segments = image_size[0] / local_image_size[0];
int num_vert_segments = image_size[1] / local_image_size[1];
int dest_rank=0;
for (int vert=0; vert<num_vert_segments; vert++) {
for (int hor=0; hor<num_hor_segments; hor++) {
MPI_Isend((image+(local_image_size[0]*hor)+(local_image_size[1]*image_size[1]*vert)), 1, subarray_type, dest_rank, 0, MPI_COMM_WORLD, &request);
dest_rank++;
}
}
}
MPI_Status status;
MPI_Recv(local_region, local_image_size[0]*local_image_size[1], MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD, &status);
}
void gather_region(){
int counts[4]={1,1,1,1};
int disps[4]={0,4,32,36};
MPI_Gatherv(local_region,local_image_size[0]*local_image_size[1], MPI_UNSIGNED_CHAR, region,counts,disps,recv_type,0,MPI_COMM_WORLD);
if (0==rank) {
printf("Actually returned:\n");
for (int i=0; i<image_size[0]*image_size[1]; i++) {
printf("%d\t", *(region+i));
if ((i+1)%image_size[0]==0) printf("\n");
}
}
}
void init_mpi(int argc, char** argv){
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Dims_create(size, 2, dims);
}
void load_and_allocate_images(int argc, char** argv){
if(rank == 0){
image = (unsigned char*) malloc(sizeof(unsigned char*) * image_size[0] * image_size[1]);
for (unsigned char i=0; i<image_size[0]*image_size[1]; i++) {
image[i] = i;
printf("%d\t", *(image+i));
if((i+1)%image_size[0]==0) printf("\n");
}
printf("\n\n");
region = (unsigned char*)calloc(sizeof(unsigned char),image_size[0]*image_size[1]);
}
local_image_size[0] = image_size[0]/dims[0];
local_image_size[1] = image_size[1]/dims[1];
int lsize = local_image_size[0]*local_image_size[1];
int lsize_border = (local_image_size[0] + 2)*(local_image_size[1] + 2);
local_region = (unsigned char*)calloc(sizeof(unsigned char),lsize_border);
}
void cleanup() {
MPI_Type_free(&subarray_type);
MPI_Type_free(&recv_type);
}
int main(int argc, char** argv){
init_mpi(argc, argv);
load_and_allocate_images(argc, argv);
create_types();
distribute_image();
gather_region();
cleanup();
MPI_Finalize();
exit(0);
}
当我以0,4,32和36的位移运行gatherv
时,我得到以下内容
分布式矢量:
0 1 2 3 4 5 6 7
8 9 10 11 12 13 14 15
16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31
32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55
56 57 58 59 60 61 62 63
实际上已退回:
0 1 2 3 0 0 0 0
8 9 10 11 0 0 0 0
16 17 18 19 0 0 0 0
24 25 26 27 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
如果我将位移更改为0,1,32 36,我会得到以下结果:
分布式矢量:
0 1 2 3 4 5 6 7
8 9 10 11 12 13 14 15
16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31
32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55
56 57 58 59 60 61 62 63
实际上已退回:
0 1 2 3 0 0 0 0
8 9 10 11 0 0 0 0
16 17 18 19 0 0 0 0
24 25 26 27 4 5 6 7
0 0 0 0 12 13 14 15
0 0 0 0 20 21 22 23
0 0 0 0 28 29 30 31
0 0 0 0 0 0 0 0
为什么在返回的向量中1的位移转换为28?这让我很困惑。
答案 0 :(得分:3)
MPI_GATHERV
中的位移以数据类型范围为单位指定。 MPI_Type_vector(local_image_size[0], local_image_size[1], image_size[1], MPI_UNSIGNED_CHAR, &recv_type);
创建的数据类型的范围为{(local_image_size[0]-1) * image_size[1] + local_image_size[1]} * extent(MPI_UNISIGNED_CHAR)
。鉴于以下内容:
local_image_size[0] = 4
local_image_size[1] = 4
image_size[1] = 8
extent(MPI_UNSIGNED_CHAR) = 1 byte
这导致recv_type
为(4-1) * 8 + 4
或28字节的范围。因此,位移1指定超过接收缓冲区开头28个字节的位置。
可以通过使用MPI_Type_create_resized
在其上强制使用不同的“可见”范围来“调整”类型。正确执行2D分解的整个过程在this answer中有详细描述。