Question

我遇到了MPI_Finalize的问题。我可以在程序结束时遇到分段错误MPI_Finalize();，输出结果为：

A matrix =
48.3962 65.3245 15.0385 
72.383 25.8898 46.0265 

B matrix =

15.4881 50.6507 6.74602 71.0055 
12.2209 77.5441 61.5452 31.5127 
46.8515 89.4849 70.0342 57.3195 

gather returned: 0
C matrix =
2252.46 8862.55 5400.1 6356.94 
gather returned: 0
3593.88 9792.53 5305.12 8593.66 

time=7.6e-05 seconds
[dmc:13852] *** Process received signal ***
[dmc:13852] Signal: Segmentation fault (11)
[dmc:13852] Signal code:  (128)
[dmc:13852] Failing at address: (nil)
[dmc:13852] [ 0] /lib64/libpthread.so.0(+0xf850)[0x7fcba45e8850]
[dmc:13852] [ 1] /opt/asn/apps/openmpi_1.8.3_intel/lib/libopen-pal.so.6(opal_memory_ptmalloc2_int_free+0x1a9)[0x7fcba3b74a79]
[dmc:13852] [ 2] /opt/asn/apps/openmpi_1.8.3_intel/lib/libopen-pal.so.6(opal_memory_ptmalloc2_free+0xad)[0x7fcba3b765fd]
[dmc:13852] [ 3] /opt/asn/apps/openmpi_1.8.3_intel/lib/openmpi/mca_btl_openib.so(+0x27e14)[0x7fcb8f5cbe14]
[dmc:13852] [ 4] /opt/asn/apps/openmpi_1.8.3_intel/lib/openmpi/mca_btl_openib.so(mca_btl_openib_finalize+0x484)[0x7fcb8f5b0724]
[dmc:13852] [ 5] /opt/asn/apps/openmpi_1.8.3_intel/lib/libmpi.so.1(+0x8c246)[0x7fcba4d9e246]
[dmc:13852] [ 6] /opt/asn/apps/openmpi_1.8.3_intel/lib/libopen-pal.so.6(mca_base_framework_close+0x63)[0x7fcba3b14ac3]
[dmc:13852] [ 7] /opt/asn/apps/openmpi_1.8.3_intel/lib/libopen-pal.so.6(mca_base_framework_close+0x63)[0x7fcba3b14ac3]
[dmc:13852] [ 8] /opt/asn/apps/openmpi_1.8.3_intel/lib/libmpi.so.1(ompi_mpi_finalize+0x60e)[0x7fcba4d5e21e]
[dmc:13852] [ 9] ./mm_mult_mpi_gnu[0x4036ba]
[dmc:13852] [10] /lib64/libc.so.6(__libc_start_main+0xe6)[0x7fcba427bc36]
[dmc:13852] [11] ./mm_mult_mpi_gnu[0x402859]
[dmc:13852] *** End of error message ***

这是我的计划：

using namespace std;
#include <iostream>
#include <iomanip>
#include <sstream>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include "mpi.h"
#include <cmath>

#define MX_SZ 320
#define SEED 2397           /* random number seed */
#define MAX_VALUE  100.0    /* maximum size of array elements A, and B */

/* copied from mpbench */
#define TIMER_CLEAR     (tv1.tv_sec = tv1.tv_usec = tv2.tv_sec = tv2.tv_usec = 0)
#define TIMER_START     gettimeofday(&tv1, (struct timezone*)0)
#define TIMER_ELAPSED   ((tv2.tv_usec-tv1.tv_usec)+((tv2.tv_sec-tv1.tv_sec)*1000000))
#define TIMER_STOP      gettimeofday(&tv2, (struct timezone*)0)
struct timeval tv1,tv2;

/*
This declaration facilitates the creation of a two dimensional 
dynamically allocated arrays (i.e. the lxm A array, the mxn B
array, and the lxn C array).  It allows pointer arithmetic to 
be applied to a single data stream that can be dynamically allocated.
To address the element at row x, and column y you would use the
following notation:  A(x,y),B(x,y), or C(x,y), respectively.
Note that this differs from the normal C notation if A were a
two dimensional array of A[x][y] but is still very descriptive
of the data structure.
*/
float *a,*b,*c,*rbuf,*sbuf;
#define A(i,j) *(a+i*dim_m+j)
#define B(i,j) *(b+i*dim_n+j)
#define C(i,j) *(c+i*dim_n+j)
#define RBUF(i,j) *(rbuf+i*dim_m+j)
#define SBUF(i,j) *(sbuf+i*dim_n+j)


/*
   Routine to retrieve the data size of the numbers array from the 
   command line or by prompting the user for the information
*/
void get_index_size(int argc,char *argv[],int *dim_l,int *dim_m,int *dim_n) {
   if(argc!=2 && argc!=4) {
      cout<<"usage:  mm_mult_serial [l_dimension] <m_dimension n_dimmension>"
           << endl;
      exit(1);
   }
   else {
      if (argc == 2) {
         *dim_l = *dim_n = *dim_m = atoi(argv[1]);
      }
      else {
         *dim_l = atoi(argv[1]);
         *dim_m = atoi(argv[2]);
         *dim_n = atoi(argv[3]);
      }
   }
   if (*dim_l<=0 || *dim_n<=0 || *dim_m<=0) {
      cout<<"Error: number of rows and/or columns must be greater than 0"
          << endl;
      exit(1);
   }
}

/*
   Routine that fills the number matrix with Random Data with values
   between 0 and MAX_VALUE
   This simulates in some way what might happen if there was a 
   single sequential data acquisition source such as a single file
*/
void fill_matrix(float *array,int dim_m,int dim_n)
{
   int i,j;
   for(i=0;i<dim_m;i++) {
      for (j=0;j<dim_n;j++) {
         array[i*dim_n+j]=drand48()*MAX_VALUE;
      }
   }
}

/*
   Routine that outputs the matrices to the screen 
*/
void print_matrix(float *array,int dim_m,int dim_n)
{
   int i,j;
   for(i=0;i<dim_m;i++) {
      for (j=0;j<dim_n;j++) {
         cout << array[i*dim_n+j] << " ";
      }
      cout << endl;
   }
}

/*
   MAIN ROUTINE: summation of a number list
*/

int main( int argc, char *argv[])
{
   float dot_prod;
   int dim_l,dim_n,dim_m;
   int i,j,k;
   int id,p;
   int row_per_proc;
   int remain_rows;
   int error;
   MPI::Status status;
   MPI::Init(argc, argv);

   id = MPI::COMM_WORLD.Get_rank();
   p = MPI::COMM_WORLD.Get_size();

   if ( id == 0 ) {
      /* 
      get matrix sizes
      */
      get_index_size(argc,argv,&dim_l,&dim_m,&dim_n);

      row_per_proc = ceil(float(dim_l)/p);
      if (dim_l%p > 0) {
         remain_rows = dim_l - (row_per_proc*(p-1));
      } else {
         remain_rows = 0;
      }
   }

   // allocate space for b in all processes since it is shared
   b = new (nothrow) float[dim_m*dim_n];
   if (b==0) {
      cout << "ERROR: Insufficient Memory" << endl;
      MPI_Finalize();
      exit(1);
   }

   if (id == 0) {
      // allocate from heap space for a and c
      a = new (nothrow) float[dim_l*dim_m];
      c = new (nothrow) float[row_per_proc*p*dim_n];
      if (a==0 || c==0) {
         cout << "ERROR: Insufficient Memory" << endl;
         MPI_Finalize();
         exit(1);
      }

      /*
         initialize numbers matrix with random data
      */ 
      srand48(SEED);
      fill_matrix(a,dim_l,dim_m);
      fill_matrix(b,dim_m,dim_n);

      /*
        output numbers matrix
      */
      cout << "A matrix =" << endl;
      print_matrix(a,dim_l,dim_m);
      cout << endl;

      cout << "B matrix =" << endl;
      print_matrix(b,dim_m,dim_n);
      cout << endl;
   }
   /*
     broadcast variables
   */
   MPI_Bcast(&p, 1, MPI_INT, 0, MPI_COMM_WORLD);
   MPI_Bcast(&dim_l, 1, MPI_INT, 0, MPI_COMM_WORLD);
   MPI_Bcast(&dim_m, 1, MPI_INT, 0, MPI_COMM_WORLD);
   MPI_Bcast(&dim_n, 1, MPI_INT, 0, MPI_COMM_WORLD);
   MPI_Bcast(&row_per_proc, 1, MPI_INT, 0, MPI_COMM_WORLD);
   MPI_Bcast(&remain_rows, 1, MPI_INT, 0, MPI_COMM_WORLD);

   /* 
     process 0 sends rows to workers
   */
   rbuf = new float[row_per_proc*dim_m];
   MPI_Scatter(a, row_per_proc*dim_m, MPI_FLOAT, rbuf, row_per_proc*dim_m, MPI_FLOAT, 0, MPI_COMM_WORLD);

   /*
     send matrix B to all other processes
   */
   MPI_Bcast(b, dim_m*dim_n, MPI_FLOAT, 0, MPI_COMM_WORLD);

   /*
   Start recording the execution time
   */
   if (id == 0) {
      TIMER_CLEAR;
      TIMER_START;
   }

   // multiply local part of matrix
   sbuf = new float[row_per_proc*dim_n];
   for (i=0; i<row_per_proc; i++) {
      for (j=0; j<dim_n; j++) {
         dot_prod = 0.0;
         for (k=0; k<dim_m; k++) {
            dot_prod += RBUF(i,k)*B(k,j);
         }
         SBUF(i,j) = dot_prod;
      }
   }

   // send the calculated values back to process 0
   error = MPI_Gather(sbuf, row_per_proc*dim_n, MPI_FLOAT, c, row_per_proc*dim_n, MPI_FLOAT, 0, MPI_COMM_WORLD);
   cout << "gather returned: " << error << endl;

   if (id == 0) {
      /*
         stop recording the execution time
      */ 
      TIMER_STOP;

      cout << "C matrix =" << endl;
      print_matrix(c,dim_l,dim_n);
      cout << endl;
      cout << "time=" << setprecision(8) <<  TIMER_ELAPSED/1000000.0  << " seconds" << endl;
   }

   // return allocated memory
   delete a; delete b; delete c; delete rbuf; delete sbuf;
   error = MPI_Finalize();
   cout << "finalize returned: " << error << endl;
}

Answer 1

您仅在等级0创建a和c：

   if (id == 0) {
      // allocate from heap space for a and c
      a = new (nothrow) float[dim_l*dim_m];
      c = new (nothrow) float[row_per_proc*p*dim_n];

因此，您还应该仅在等级0上删除它们：

   if (id == 0) {
      /*
         stop recording the execution time
      */ 
      TIMER_STOP;

      cout << "C matrix =" << endl;
      print_matrix(c,dim_l,dim_n);
      cout << endl;
      cout << "time=" << setprecision(8) <<  TIMER_ELAPSED/1000000.0  << " seconds" << endl;
      delete a; delete c;
   }

   // return allocated memory
   delete b; delete rbuf; delete sbuf;

附加说明：如果排名0上的分配失败，在这种情况下更好地调用MPI_Finalize或广播此检查的结果，您正在调用MPI_Abort，以通知所有进程该失败并允许适当的终止。现在使用您的代码，程序可能会在遇到此错误时无限地挂起。

分段故障在c中调用MPI_Finalize（）

1 个答案: