Question

我有一个动态的2d数组，它被分配在连续的内存空间中，但是如果我尝试使用MPI_Scatter将这个数组分散到两个MPI进程中会导致分段错误，整个代码都粘贴在这里：

dynamic_2d_array.h

#ifndef _DYNAMIC_2D_ARRAY_H_
#define _DYNAMIC_2D_ARRAY_H_

#include <stdio.h>
#include <stdlib.h>

//typedef double real;
typedef float real;

real **allocate_dynamic_2d_array(int nrows, int ncols);
void free_dynamic_2d_array(real** array_dynamic);
void print_matrix(real** array_dynamic, int nrows, int ncols, char* fmt_string);

real** allocate_dynamic_2d_array(int nrows, int ncols) {
    /* here is the method to correct the non-contiguous memory problem */
    int i;
    real** array_dynamic=(real**)malloc(nrows*sizeof(real*));
    real* data=(real*)malloc(nrows*ncols*sizeof(real));
    for (i=0; i<nrows; i++){
        array_dynamic[i]=&(data[ncols*i]);
    }
    return array_dynamic;
}

void free_dynamic_2d_array(real** array_dynamic){
    free((void*)array_dynamic[0]);
    free((void*)array_dynamic);
}

void print_matrix(real** array_dynamic, int nrows, int ncols, char* fmt_string) {
//void print_matrix(real array_dynamic[][4], int nrows, int ncols, char* fmt_string) {
    int i,j;
    for (i = 0; i < nrows; i++){
        for (j = 0; j < ncols; j++){
            printf(fmt_string, array_dynamic[i][j]);
        }
        printf("\n");
    }
}

#endif // #ifndef _DYNAMIC_2D_ARRAY_H_

这是scatter_mat.h：

#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#include "dynamic_2d_array.h"

int main(int argc, char ** argv)
{
    MPI_Init(&argc, &argv);
    int rank, psize, root = 0;
    int i,j;
    int ncols;
    int M=48,N=3;
    real *sub_mat;
    real **A;

    MPI_Comm_size(MPI_COMM_WORLD, &psize);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if (rank==0) {
        A = allocate_dynamic_2d_array(M,N);        
        printf("before scatter:\n");
        int idx=0;
        for (i=0;i<M;i++){
            for (j=0;j<N;j++) {
                A[i][j]=idx++;
                printf("%4.1f\t",A[i][j]);
            }
            printf("\n");
        }
    }

    ncols = M/psize;
    sub_mat = (real*)malloc(N*ncols*sizeof(real));

    MPI_Scatter(&(A[0][0]),N*ncols,MPI_FLOAT,sub_mat,N*ncols,MPI_FLOAT,root,MPI_COMM_WORLD);

    for (i=0;i<ncols*N;i++)
        printf("%3.1f\t",sub_mat[i]);
    printf("\n");

    if (rank==0) {
        free_dynamic_2d_array(A); 
    }

    free(sub_mat);
    MPI_Finalize();
    return 0;
}

如果我使用mpicc编译它并使用mpirun -np 2运行它，它将导致第1级的分段错误，更令我困惑的是它使用调试模式（mpicc -g）运行良好然而在发布时崩溃模式，我想一定有一些内存访问问题，但我无法弄清楚，有人可以给出一些建议吗？

以下是MPI编译器信息：

$ mpiexec --version
mpiexec (OpenRTE) 1.6.2

Report bugs to http://www.open-mpi.org/community/help/
$ mpicc -show
icc -I/usr/local/packages/openmpi/1.6.2/Intel-13.0.0/include -L/usr/local/packages/openmpi/1.6.2/Intel-13.0.0/lib -lmpi -ldl -lm -Wl,--export-dynamic -lrt -lnsl -libverbs -libumad -lpthread -lutil

非常感谢！

Answer 1

问题是您在所有级别中解除引用&(A[0][0])但它仅在根级别中分配。将NULL作为第一个参数传递给除{1>之外的所有等级中的MPI_Scatter。

您还应将ncols重命名为nrows（语义上）。

MPI_Scatter动态2d数组行导致分段错误

1 个答案: