Question

我正在尝试使用MPI库在C中编写程序，其中主进程创建一个2D数组并将其行分发给其他进程。矩阵的维度为p*p，其中p是进程数。

以下是代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>

int **createMatrix (int nrows, int ncols) {
    int **matrix;
    int h, i, j;

    if (( matrix = malloc(nrows*sizeof(int*))) == NULL) {
        printf("Malloc error");
        exit(1);
    }

    for (h=0; h<nrows; h++) {
        if (( matrix[h] = malloc( ncols * sizeof(int))) == NULL) {
            printf("Malloc error 2");
            exit(1);
        }
    }

    for (i=0; i<ncols; i++) {
        for (j=0; j<nrows; j++) {
            matrix[i][j] = ((i*nrows) + j);
        }
    }

    return matrix;
}

void printArray (int *row, int nElements) {
    int i;
    for (i=0; i<nElements; i++) {
        printf("%d ", row[i]);
    }
    printf("\n");
}

void printMatrix (int **matrix, int nrows, int ncols) {
    int i;
    for (i=0; i<nrows; i++) {
        printArray(matrix[i], ncols);
    }
}

int main (int argc, char **argv) {

    if (MPI_Init(&argc, &argv) != MPI_SUCCESS) {
        perror("Error initializing MPI");
        exit(1);
    }

    int p, id;
    MPI_Comm_size(MPI_COMM_WORLD, &p); // Get number of processes
    MPI_Comm_rank(MPI_COMM_WORLD, &id); // Get own ID

    int **matrix;

    if (id == 0) {
        matrix = createMatrix(p, p); // Master process creates matrix
        printf("Initial matrix:\n");
        printMatrix(matrix, p, p);
    }

    int *procRow = malloc(sizeof(int) * p); // received row will contain p integers
    if (procRow == NULL) {
        perror("Error in malloc 3");
        exit(1);
    }

    if (MPI_Scatter(*matrix, p, MPI_INT, // send one row, which contains p integers
                    procRow, p, MPI_INT, // receive one row, which contains p integers
                    0, MPI_COMM_WORLD) != MPI_SUCCESS) {

        perror("Scatter error");
        exit(1);
    }

    printf("Process %d received elements: ", id);
    printArray(procRow, p);

    MPI_Finalize();

    return 0;
}

运行此代码时收到的输出是

$ mpirun -np 4 test
Initial matrix:
0 1 2 3 
4 5 6 7 
8 9 10 11 
12 13 14 15 
Process 0 received elements: 0 1 2 3 
Process 1 received elements: 1 50 32 97 
Process 2 received elements: -1217693696 1 -1217684120 156314784 
Process 3 received elements: 1 7172196 0 0

进程0显然接收到正确的输入，但其他进程显示我无法理解的数字。另请注意，过程1和3的数量在程序的多次运行中是一致的，而过程2的数量在每次运行中都会发生变化。

在我看来，我的内存分配或指针使用有问题，但我对C语言编程很陌生。有人能解释一下这个输出是如何以及为什么产生的？中学，显然，我也对如何解决我的问题感兴趣:)提前感谢！

Answer 1

我认为你从根本上误解了分散操作的作用以及MPI如何分配和使用内存。

MPI_Scatter获取源数组并分成几部分，向MPI通信器的每个成员发送一个唯一的部分。在您的示例中，您需要将矩阵分配给线性内存中的连续p*p元素，这会将p值发送到每个进程。你的源“矩阵”是一个指针数组。无法保证行按顺序排列在内存中，MPI_Scatter不知道如何遍历已传递的指针数组。因此，调用只是读取超出矩阵指针间接传递的第一行的末尾，将内存中的内容视为数据。这就是为什么在第一行之后接收数据的进程中获取垃圾值的原因。

所有MPI数据复制例程都希望源和目标内存是“扁平”线性阵列。多维C数组应存储在row major order中，而不是像在此处一样存储在指针数组中。一个廉价的讨厌的hack你的例子来说明散点调用正常工作将是这样的：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>

int *createMatrix (int nrows, int ncols) {
    int *matrix;
    int h, i, j;

    if (( matrix = malloc(nrows*ncols*sizeof(int))) == NULL) {
        printf("Malloc error");
        exit(1);
    }

    for (h=0; h<nrows*ncols; h++) {
        matrix[h] = h+1;
    }

    return matrix;
}

void printArray (int *row, int nElements) {
    int i;
    for (i=0; i<nElements; i++) {
        printf("%d ", row[i]);
    }
    printf("\n");
}

int main (int argc, char **argv) {

    if (MPI_Init(&argc, &argv) != MPI_SUCCESS) {
        perror("Error initializing MPI");
        exit(1);
    }

    int p, id;
    MPI_Comm_size(MPI_COMM_WORLD, &p); // Get number of processes
    MPI_Comm_rank(MPI_COMM_WORLD, &id); // Get own ID

    int *matrix;

    if (id == 0) {
        matrix = createMatrix(p, p); // Master process creates matrix
        printf("Initial matrix:\n");
        printArray(matrix, p*p);
    }

    int *procRow = malloc(sizeof(int) * p); // received row will contain p integers
    if (procRow == NULL) {
        perror("Error in malloc 3");
        exit(1);
    }

    if (MPI_Scatter(matrix, p, MPI_INT, // send one row, which contains p integers
                procRow, p, MPI_INT, // receive one row, which contains p integers
                0, MPI_COMM_WORLD) != MPI_SUCCESS) {

        perror("Scatter error");
        exit(1);
    }

    printf("Process %d received elements: ", id);
    printArray(procRow, p);

    MPI_Finalize();

    return 0;
}

这样做：

$ mpicc -o scatter scatter.c 
$ mpiexec -np 4 scatter
Initial matrix:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 
Process 0 received elements: 1 2 3 4 
Process 1 received elements: 5 6 7 8 
Process 2 received elements: 9 10 11 12 
Process 3 received elements: 13 14 15 16

即。当您传递存储在线性存储器中的数据时，它可以工作。等效的行主要数组将静态分配如下：

int matrix[4][4] = { {  1,  2,  3,  4 }, 
                     {  5,  6,  7,  8 },
                     {  9, 10, 11, 12 },
                     { 13, 14, 15, 16 } };

注意静态分配的二维数组与代码动态分配的指针数组之间的区别。尽管它们看起来很相似，但它们并不是完全相同的。

2D数组和malloc的MPI_Scatter

1 个答案: