Question

首先，我为我非常糟糕的英语道歉。我写了一个代码系列并行并行解决了2d中的拉普拉斯方程。从用户接收的节点数（在我的代码中，我接收的节点数不是ractangle的大小），以及定义的二维矩阵。在串行代码中，我可以导入大量节点（1000＆lt;），但是并行地我只能导入80个节点。如果我输入一个更大的数字，我会收到以下错误。首先，我正常定义了矩阵。当我使用“malloc”进行定义和为2维数组分配内存时，对于任意数量的节点，我收到相同的数据（甚至不到80）。

[handicraft-ThinkPad:03040] *** Process received signal ***
[handicraft-ThinkPad:03040] Signal: Segmentation fault (11)
[handicraft-ThinkPad:03040] Signal code: Address not mapped (1)
[handicraft-ThinkPad:03040] Failing at address: 0x1f4
[handicraft-ThinkPad:03040] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fd1a1728390]
[handicraft-ThinkPad:03040] [ 1] l[0x4010e0]
[handicraft-ThinkPad:03040] [ 2] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fd1a136e830]
[handicraft-ThinkPad:03040] [ 3] l[0x400b29]
[handicraft-ThinkPad:03040] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 3040 on node handicraft-ThinkPad exited on signal 11 (Segmentation fault).

问题出在哪里？如何在系列代码中导入并行代码中的大量节点？我的代码在这里：

#include <math.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
float **floatalloc2d(int n, int m);
int main()

{
    int rank,size;
    double start_t,end_t;
    MPI_Init (NULL,NULL);
    MPI_Comm_rank (MPI_COMM_WORLD, &rank);
MPI_Comm_size (MPI_COMM_WORLD, &size);
float k,b_left,b_right,b_up,b_down;
int l_type,u_type,r_type,d_type,i,j,n,flag;

//scan data from user
if (rank==0)
{

    printf("Enter number of node: \n");
    scanf("%d",&n);
    printf("Enter the k factor: \n");
    scanf("%f",&k);
    printf("Enter type of left boundary conditions: 0 for dirichlet and 1 for Neumann \n");
    scanf("%d",&l_type);
    printf("Enter left boundary conditions:\n");
    scanf("%f",&b_left);  
}
//calculate the time
start_t=MPI_Wtime();
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&k,1,MPI_FLOAT,0,MPI_COMM_WORLD);

int cond=0,dx=1,dy=1,step=n/size,snd_num=step*n,rcv_num=step*n;

//float t1[n][n],t2[n][n],t3[step][n],t4[step][n];
float error;
float** t1 = floatalloc2d(n, n);
float** t2 = floatalloc2d(n, n);
float** t3 = floatalloc2d(step, n);
float** t4 = floatalloc2d(step, n);
//comput with guass-sidel
for (int z=0;z<1000;z++)
{
    //send data to all process
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Scatter(t1,snd_num,MPI_FLOAT,t3,rcv_num,MPI_FLOAT,0,MPI_COMM_WORLD);
    //comput in each process
    for (i=1;i<(step-1);i++)
    {
        for (j=1;j<(n-1);j++)
        {
            t4[i][j]=0.25*(t3[i-1][j]+t3[i+1][j]+t3[i][j-1]+t3[i][j+1]);
            error=fabs(t4[i][j]-t3[i][j]);
            t3[i][j]=t4[i][j];
            //cout<<i<<","<<j<<":  ";
            //cout<<"error= "<<error<<"\n";
        }
    }
    //collect data from all process
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Gather(&t3,snd_num,MPI_FLOAT,&t1,rcv_num,MPI_FLOAT,0,MPI_COMM_WORLD);
    //review
}
end_t=MPI_Wtime();
MPI_Finalize();
}
float **floatalloc2d(int n, int m) {
float *data = (float *)malloc(n*m*sizeof(float));
float **array = (float **)malloc(n*sizeof(float *));
for (int i=0; i<n; i++)
    array[i] = &(data[i*m]);

return array;
}

非常感谢您的回答

Answer 1

您正在尝试分散矩阵的元素，但t1只是一个指针数组 - 它们并不匹配。您应该为矩阵使用连续的数据结构。如何做到这一点的一个例子由this answer：

给出

float **floatalloc2d(int n, int m) {
    float *data = (float *)malloc(n*m*sizeof(float));
    float **array = (float **)calloc(n*sizeof(float *));
    for (int i=0; i<n; i++)
        array[i] = &(data[i*m]);

    return array;
}

float floatfree2d(float **array) {
    free(array[0]);
    free(array);
    return;
}

float** t1 = floatalloc2d(n, n);

...

MPI_Scatter(t1[0],snd_num,MPI_FLOAT,t3[0],rcv_num,MPI_FLOAT,0,MPI_COMM_WORLD);

MPI中的内存问题

1 个答案: