通过C进行并行编程,信号:分段故障:(11)11信号代码:未映射的地址(1)

时间:2017-02-10 21:53:49

标签: c parallel-processing mpi vi

我现在正在使用MPI进行简单的并行编程。我在编译期间没有出错,但是在运行时我遇到了一些错误,我无法弄清楚。请帮忙!感谢你们! 源代码如下:

#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#include "matrix.h"
#define MIN(X,Y) (((X) < (Y)) ? (X) : (Y)) //IMPORTANT!!

int master = 0;
int numsent, i;
int nrows, ncols;
double *A, *x, *b, *buffer;
int rowidx;
int sender;
double ans;


int main(int argc, char *argv[])
{
    int myid;
    int nproc;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    /* CODING */

    MPI_Status stat; // IMPORTANT!!

    //master_stage1: master obtain the matrix A and vector X
    if(myid == master)
    {
        printf("What is the number of rows of matrix A:\n");
        scanf("%d", &nrows);
        printf("what is the number of columns of matrix A:\n");
        scanf("%d", &ncols);

        //printf("nrows = %d, ncols = %d\n", nrows, ncols);//text



        A = (double*)malloc(nrows*ncols*sizeof(double));
        b = (double*)malloc(nrows*sizeof(double));
        ObtainMatrixAndVector(nrows, ncols, A, x, b);
    }

    //master_stage2:bcast x, ncols, nrows, and p2p sent rows of A
    MPI_Bcast(&ncols, 1, MPI_INT, master, MPI_COMM_WORLD);
    MPI_Bcast(&nrows, 1, MPI_INT, master, MPI_COMM_WORLD);

    x = (double*)malloc(ncols*sizeof(double));
    MPI_Bcast(x, ncols, MPI_DOUBLE, master, MPI_COMM_WORLD);

    if(myid == master)
    {
        numsent = 0;
        for(i = 1; i <= MIN(nrows, nproc - 1); i++)
        {
            MPI_Send(&A[(i - 1)*ncols], ncols, MPI_DOUBLE, i, i, MPI_COMM_WORLD);
            numsent++;
        }

        //master_stage3: receiving
        for(i = 0; i <= nrows; i++)
        {
            MPI_Recv(&ans, 1, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
            sender = stat.MPI_SOURCE;
            rowidx = stat.MPI_TAG;
            b[rowidx-1] = ans;

            if(numsent < nrows)
            {
                MPI_Send(&A[numsent*ncols], ncols, MPI_DOUBLE, sender, numsent+1, MPI_COMM_WORLD);
                numsent++;
            }
            else
                MPI_Send(buffer, ncols, MPI_DOUBLE, sender, 0, MPI_COMM_WORLD);
        }
    }

    //Jobs Done by workers
    buffer = (double*)malloc(ncols*sizeof(double));
    while(1)
    {
        if(myid > nrows)
            break;
        else
        {
            MPI_Recv(buffer, ncols, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);

            rowidx = stat.MPI_TAG;
            if(rowidx == 0)
                break;
            ans = 0.0;
            for(i = 0; i < ncols; i++)
                ans += buffer[i] * x[i];
            MPI_Send(&ans, 1, MPI_DOUBLE, master, rowidx, MPI_COMM_WORLD);

        }
    }
    if(myid == master)
    {
        for(i = 0; i < nrows; i++)
            printf("%f\n", b[i]);
    } 

    /* CODING */
    MPI_Finalize();
}

matrix.c文件:

#include "matrix.h"

void ObtainMatrixAndVector(int m, int n, double *A, double *x, double *b)
{
// m: number of rows of matrix A
// n: number of columns of matrix A
// A: matrix of mxn
// x: vector of nx1
// b: vector of mx1 (containing exact solution for comparison purpose)
//
   int i, j;
   for (i = 0; i < m; i++) {
       x[i] = i + 1;
       for (j = 0; j < n; j++) {
           A[i*n+j] = 1.0/(i+j+1);  // Hilbert matrix
       }
   }

   // exact solution b = A*x
   for (i = 0; i < m; i++) {
       b[i] = 0.0;
       for (j = 0; j < n; j++) {
           b[i] += x[j]*A[i*n+j];
       }
   }
}

matrix.h:

#ifndef matrix_h
#define matrix_h

void ObtainMatrixAndVector(int m, int n, double *A, double *x, double *b);

#endif /* matrix_h */

错误:

[Nicks-MAC:02138] *** Process received signal ***
[Nicks-MAC:02138] Signal: Segmentation fault: 11 (11)
[Nicks-MAC:02138] Signal code: Address not mapped (1)
[Nicks-MAC:02138] Failing at address: 0x0
[Nicks-MAC:02138] [ 0] 0   libsystem_platform.dylib            0x00007fffbf27bbba _sigtramp + 26
[Nicks-MAC:02138] [ 1] 0   a.out                               0x0000000106daf0eb x + 4147
[Nicks-MAC:02138] [ 2] 0   a.out                               0x0000000106dad7a1 main + 321
[Nicks-MAC:02138] [ 3] 0   libdyld.dylib                       0x00007fffbf06e255 start + 1
[Nicks-MAC:02138] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 0 on node Nicks-MAC exited on signal 11 (Segmentation fault: 11).
--------------------------------------------------------------------------

谢谢sooooooo很多人!

2 个答案:

答案 0 :(得分:1)

您的代码中存在许多错误。

  • 在通话x之前,您未能在主服务器上分配ObtainMatrixAndVector。先在主人身上分配。但是,您还必须使x的其他分配仅对非主人有条件!

  • 同样,您无法在主要主要部分之前分配buffer。在此部分之前移动该分配。

  • 您无条件执行 worker 代码。主人不应该执行工人代码。

  • 您在这里for(i = 0; i <= nrows; i++)是一对一的,应该是i < nrows

我不确定我是否抓住了一切。此外,您还应free分配的内存。一般来说,您的工作分配代码非常聪明,并不一定很糟糕。但对于同构系统上的静态工作负载,静态分布会更合适。请考虑使用MPI_ScattervMPI_Gatherv,而不是发送单个邮件。这将减少通信开销。

答案 1 :(得分:0)

我想我看到了错误:

      ObtainMatrixAndVector(nrows, ncols, A, x, b);
}

//master_stage2:bcast x, ncols, nrows, and p2p sent rows of A
MPI_Bcast(&ncols, 1, MPI_INT, master, MPI_COMM_WORLD);
MPI_Bcast(&nrows, 1, MPI_INT, master, MPI_COMM_WORLD);

x = (double*)malloc(ncols*sizeof(double));

使用指向数组x 的指针 为其分配内存。

试试这个:

A = (double*)malloc(nrows*ncols*sizeof(double));
b = (double*)malloc(nrows*sizeof(double));
x = (double*)malloc(ncols*sizeof(double));
ObtainMatrixAndVector(nrows, ncols, A, x, b);