Question

很高兴来到这里。新手很难找到帮助。

使用MPI，我试图尽可能均匀地将随机大小的2D数组分布到随机数的进程中。例如，假设我们有一个10x10阵列和9个进程。我想要实现的目标如下：

________________________
|        |      |      |
|   P0   |  P1  |  P2  |
|  4x4   | 4x3  | 4x3  |
|________|______|______|
|   P3   |  P4  |  P5  |
|  3x4   | 3x3  | 3x3  |
|________|______|______|
|   P6   |  P7  |  P8  |
|  3x4   | 3x3  | 3x3  |
|________|______|______|

到目前为止，我已经设法创建了方便的过程网格并计算了全局数组（每个子数组的第一个元素）的正确位移，但是当发送所述子数组时我很难失败。

我已尝试同时使用MPI_Type_vector和MPI_create_subarray，但我必须遗漏一些重要内容，因为我无法让MPI_Scatterv工作。

到目前为止，这是我的代码，请记住，我需要为2D数组操作提供一些讨厌的嵌套循环：

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

// This will be command-line arguments
#define COLS 10
#define ROWS 10

int main(int argc, char **argv) {

    int p, rank, i, j, proc;
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &p);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    // Let MPI decide the topology
    int dims[2] = {0,0};
    MPI_Dims_create(p, 2, dims);
    int periods[2] = {0,0}; // non-periodic topology
    int my_coords[2];
    MPI_Comm comm_2D;
    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &comm_2D);
    MPI_Cart_coords(comm_2D, rank, 2, my_coords);

    // Prepare the arrays and necessary info
    char global_matrix[ROWS*COLS];
    const int NPROWS = dims[0]; // Number of 'block' rows
    const int NPCOLS = dims[1]; // Number of 'block' cols
    int *num_rows; // Array containig the number of rows for each i-th process
    int *num_cols; // As before
    num_rows = (int *) malloc(p * sizeof(int));
    num_cols = (int *) malloc(p * sizeof(int));

    if (rank == 0) {
        // Fill global matrix
        for (i=0; i<ROWS*COLS; i++) {
            global_matrix[i] = (char)i;
        }

        // Calculate the number of rows/cols for each process
        for (i=0; i<p; i++) {
            num_rows[i] = ROWS/NPROWS;
            num_cols[i] = COLS/NPCOLS;
        }
        for (i=0; i<(ROWS%NPROWS); i++) {
            for (j=0; j<NPCOLS; j++) {
                num_rows[i*NPCOLS+j]++;
            }
        }
        for (i=0; i<(COLS%NPCOLS); i++) {
            for (j=0; j<NPROWS; j++) {
                num_cols[i+NPROWS*j]++;
            }
        }
    }

    // Inform the processes about his local matrix size
    MPI_Bcast(num_rows, p, MPI_INT, 0, comm_2D);
    MPI_Bcast(num_cols, p, MPI_INT, 0, comm_2D);

    // Define and initialize each local matrix
    char local_matrix[num_rows[rank]*num_cols[rank]];
    for (i=0; i<num_rows[rank]*num_cols[rank]; i++) {
        local_matrix[i] = 0;
    }

    // Preparing for the Scatterv. Calculate displacements and number
    // of elements to send to each process
    int *disps = NULL;
    int *counts = NULL;
    if (rank == 0) {
        disps = (int *) malloc(p * sizeof(int));
        counts = (int *) malloc(p * sizeof(int));
        for (i=0; i<NPROWS; i++) {
            for (j=0; j<NPCOLS; j++) {
                if (j == 0) {
                    // First block of the 'blockrow'
                    disps[i*NPCOLS+j] = i*COLS*num_rows[i*NPCOLS+j] + j*num_cols[i*NPCOLS+j];
                } else {
                    // Rest of the blocks
                    disps[i*NPCOLS+j] = disps[i*NPCOLS+j - 1] + num_cols[i*NPCOLS+j - 1];
                }
                // This is VERY important and im not sure of it.
                counts[i*NPCOLS+j] = 1; // 1 element to each process??
            }
        }
    }

    // Preparing the Datatypes for the Scatterv operation
    MPI_Datatype tmp_matrix_t, global_matrix_t, local_matrix_t;

    MPI_Type_vector(ROWS, 1, COLS, MPI_CHAR, &tmp_matrix_t);
    MPI_Type_create_resized(tmp_matrix_t, 0, sizeof(char), &global_matrix_t);
    MPI_Type_commit(&global_matrix_t);
    MPI_Type_free(&tmp_matrix_t);

    MPI_Type_vector(num_rows[rank], 1, num_cols[rank], MPI_CHAR, &tmp_matrix_t);
    MPI_Type_create_resized(tmp_matrix_t, 0, sizeof(char), &local_matrix_t);
    MPI_Type_commit(&local_matrix_t);
    MPI_Type_free(&tmp_matrix_t);

    // Doesn't work as expected
    MPI_Scatterv(global_matrix, counts, disps, global_matrix_t,
                 local_matrix, 1, local_matrix_t,              // receiving 1 element??
                 0, comm_2D);

    // Testing/printing results
    MPI_Barrier(comm_2D);
    for (proc=0; proc<p; proc++) {
        if (proc == rank) {
            if (rank == 0) {
                printf("Global matrix:\n");
                for (i=0; i<ROWS; i++) {
                    printf("G: ");
                    for (j=0; j<COLS; j++) {
                        printf("%3d ", (int)global_matrix[i*COLS+j]);
                    }
                    printf("\n");
                }
            }
            printf("Local matrix P%d:\n", rank);
            for (i=0; i<num_rows[rank]; i++) {
                printf("L%d: ", rank);
                for (j=0; j<num_cols[rank]; j++) {
                    printf("%3d ", (int)local_matrix[i*num_cols[rank]+j]);
                }
                printf("\n");
            }
        }
    }
    MPI_Finalize();
    return 0;
}

- 编辑：
再次查看此示例https://stackoverflow.com/a/7587133/4573730的代码@ Jonathan Dursi我开始认为您无法仅使用MPI_Scatterv获得我想要的结果。

现在我认为最好的方法可能是支持点对点通信，或者使用MPI_Scatterv和四个不同的通信器（因为我们最多有四个不同的子阵列大小），所以我们可以有相同的大小的发送/接收缓冲区。

PS：我觉得我的英语很糟糕，我很抱歉。

将随机大小的2D数组分配给随机的进程数

0 个答案: