free():MPI_Finalize()期间的无效指针

时间:2018-07-17 12:12:05

标签: c++ mpi

我有一个向量

std::vector<double**> blocks(L);
std::vector<double**> localblocks(blocks_local);

然后我使用send命令将位于等级0的数据发送到其他等级(我认为这是正确的术语)

for(i=1;i<numnodes-1;i++)
        {
            for(j=0;j<blocks_local;j++)
            {
                MPI_Send(blocks[i*blocks_local+j],N*N,MPI_DOUBLE,i,j,MPI_COMM_WORLD);
            }
        }

到目前为止,代码可以正常运行:没有错误。然后在其余的行上,以下代码是

for(i=0;i<blocks_local;i++)
{
    MPI_Recv(&localblocks[i],N*N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);
}

这是我收到一个无效的指针错误。

总输出为

6.8297e-05
3.6895e-05
4.3906e-05
4.4463e-05 << these just show the time it takes for a process to complete. Shows that the program has excited successfully.

free(): invalid pointer
[localhost:16841] *** Process received signal ***
[localhost:16841] Signal: Aborted (6)
[localhost:16841] Signal code:  (-6)
free(): invalid pointer
free(): invalid pointer
[localhost:16842] *** Process received signal ***
[localhost:16842] Signal: Aborted (6)
[localhost:16842] Signal code:  (-6)
[localhost:16840] *** Process received signal ***
[localhost:16840] Signal: Aborted (6)
[localhost:16840] Signal code:  (-6)
[localhost:16841] [ 0] /lib64/libpthread.so.0(+0x11fc0)[0x7fb761c10fc0]
[localhost:16841] [ 1] [localhost:16840] [ 0] [localhost:16842] [ 0] /lib64/libc.so.6(gsignal+0x10b)[0x7fb761876f2b]
[localhost:16841] [ 2] /lib64/libpthread.so.0(+0x11fc0)[0x7fb0f377cfc0]
[localhost:16840] [ 1] /lib64/libpthread.so.0(+0x11fc0)[0x7f0ec17e9fc0]
[localhost:16842] [ 1] /lib64/libc.so.6(gsignal+0x10b)[0x7fb0f33e2f2b]
[localhost:16840] /lib64/libc.so.6(abort+0x12b)[0x7fb761861561]
/lib64/libc.so.6(gsignal+0x10b)[0x7f0ec144ff2b]
[localhost:16842] [ 2] [localhost:16841] [ 3] [ 2] /lib64/libc.so.6(abort+0x12b)/lib64/libc.so.6(abort+0x12b)[0x7fb0f33cd561]
[localhost:16840] [ 3] [0x7f0ec143a561]
[localhost:16842] [ 3] /lib64/libc.so.6(+0x79917)[0x7fb7618b9917]
[localhost:16841] /lib64/libc.so.6(+0x79917)[0x7fb0f3425917]
[ 4] [localhost:16840] [ 4] /lib64/libc.so.6(+0x79917)[0x7f0ec1492917]
/lib64/libc.so.6(+0x7fdec)[0x7fb7618bfdec]
[localhost:16841] [ 5] [localhost:16842] [ 4] /lib64/libc.so.6(+0x7fdec)[0x7fb0f342bdec]
[localhost:16840] [ 5] /lib64/libc.so.6(+0x8157c)[0x7fb7618c157c]
[localhost:16841] [ 6] /lib64/libc.so.6(+0x7fdec)[0x7f0ec1498dec]
[localhost:16842] [ 5] /lib64/libc.so.6(+0x8157c)[0x7fb0f342d57c]
[localhost:16840] [ 6] /usr/lib64/openmpi/lib/libopen-pal.so.20(+0x4ffb2)[0x7fb76134dfb2]
[localhost:16841] [ 7] /lib64/libc.so.6(+0x8157c)[0x7f0ec149a57c]
[localhost:16842] [ 6] /usr/lib64/openmpi/lib/libopen-pal.so.20(+0x4ffb2)[0x7fb0f2eb9fb2]
[localhost:16840] /usr/lib64/openmpi/lib/libmpi.so.20(ompi_win_finalize+0x1c1)[0x7fb7627a9881]
[localhost:16841] [ 8] /usr/lib64/openmpi/lib/libopen-pal.so.20(+0x4ffb2)[0x7f0ec0f26fb2]
[localhost:16842] [ 7] [ 7] /usr/lib64/openmpi/lib/libmpi.so.20(ompi_win_finalize+0x1c1)[0x7fb0f4315881]
[localhost:16840] [ 8] /usr/lib64/openmpi/lib/libmpi.so.20(ompi_win_finalize+0x1c1)[0x7f0ec2382881]
[localhost:16842] [ 8] /usr/lib64/openmpi/lib/libmpi.so.20(ompi_mpi_finalize+0x2f1)[0x7fb7627a7711]
[localhost:16841] [ 9] ./a.out[0x408c75]
/usr/lib64/openmpi/lib/libmpi.so.20(ompi_mpi_finalize+0x2f1)[0x7fb0f4313711]
[localhost:16840] [ 9] ./a.out[0x408c75]
/usr/lib64/openmpi/lib/libmpi.so.20(ompi_mpi_finalize+0x2f1)[0x7f0ec2380711]
[localhost:16842] [ 9] [localhost:16841] [10] /lib64/libc.so.6(__libc_start_main+0xeb)./a.out[0x408c75]
[localhost:16842] [localhost:16840] [10] [0x7fb76186318b]
[localhost:16841] [11] ./a.out[0x40896a]
[localhost:16841] *** End of error message ***
/lib64/libc.so.6(__libc_start_main+0xeb)[0x7fb0f33cf18b]
[localhost:16840] [11] ./a.out[0x40896a]
[localhost:16840] *** End of error message ***
[10] /lib64/libc.so.6(__libc_start_main+0xeb)[0x7f0ec143c18b]
[localhost:16842] [11] ./a.out[0x40896a]
[localhost:16842] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 2 with PID 0 on node localhost exited on signal 6 (Aborted).
--------------------------------------------------------------------------

我已删除清理代码。因此,此清理必须来自MPI,我不确定如何解决此问题。

void BlockMatVecMultiplication(int mynode, int numnodes,int N,int L, std::vector<double **> blocks,double *x,double* y)
{
    int i,j;
    int local_offset,blocks_local,last_blocks_local;
    int *count;
    int *displacements;

    // The number of rows each processor is dealt
    blocks_local = L/numnodes;
    double *temp = new double[N*blocks_local];
    std::vector<double**> localblocks(blocks_local);

    // the offset
    local_offset = mynode*blocks_local;

    MPI_Status status;

    if(mynode == (numnodes-1))
    {
        blocks_local = L-blocks_local*(numnodes-1);
    }
    /* Distribute the blocks across the processes */

    // At this point node 0 has the matrix. So we only need
    // to distribute among the remaining nodes, using the 
    // last node as a cleanup.
    if(mynode ==0)
    {
        // This deals the matrix between processes 1 to numnodes -2
        for(i=1;i<numnodes-1;i++)
        {
            for(j=0;j<blocks_local;j++)
            {
                MPI_Send(blocks[i*blocks_local+j],N*N,MPI_DOUBLE,i,j,MPI_COMM_WORLD);
            }
        }
        // Here we use the last process to "clean up". For small N 
        // the load is poorly balanced.
        last_blocks_local = L- blocks_local*(numnodes-1);
        for(j=0;j<last_blocks_local;j++)
        {
            MPI_Send(blocks[(numnodes-1)*blocks_local+j],N*N,MPI_DOUBLE,numnodes-1,j,MPI_COMM_WORLD);
        }
    }
    else
    {
        /*This code allows other processes to obtain the chunks of data
        /* sent by process 0 */
        /* rows_local has a different value on the last processor, remember */
        for(i=0;i<blocks_local;i++)
        {
            MPI_Recv(&localblocks[i],N*N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);
        }
    }
}

从下面的代码中调用上述方法

#include <iostream>
#include <iomanip>
#include <mpi.h>
#include "SCmathlib.h"
#include "SCchapter7.h"

using namespace std;

int main(int argc, char * argv[])
{

    int i,j, N = 10,L=10;
    double **A,*x,*y;
    int totalnodes,mynode;
    std::vector<double**> blocks;
    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD, &totalnodes);
    MPI_Comm_rank(MPI_COMM_WORLD, &mynode);

    //output variable
    y = new double[N];

    //input variable
    x = new double[N];

    for(i=0;i<N;i++)
    {
        x[i] = 1.0;
    }

    // forms identity matrix on node 0
    if(mynode==0)
    {
        for(j=0;j<L;j++)
        {
            A = CreateMatrix(N,N);
            // fills the block
            for(i=0;i<N;i++)
            {
                A[i][i] = 1.0;
            }
            blocks.push_back(A);
        }
    }

    double start = MPI_Wtime();
    BlockMatVecMultiplication(mynode,totalnodes,N,L,blocks,x,y);
    double end = MPI_Wtime();
    if(mynode==0)
    {
        for(i=0;i<L;i++)
        {
            //DestroyMatrix(blocks[i],N,N);
        }
        //delete[] x;
        //delete[] y;
    }
    std::cout << end- start << std::endl;
    MPI_Finalize();
}

“包含”仅提供基本的矩阵功能。以下函数创建一个矩阵

double ** CreateMatrix(int m, int n){
  double ** mat;
  mat = new double*[m];
  for(int i=0;i<m;i++){
    mat[i] = new double[n];
    for(int j=0;j<m;j++)
      mat[i][j] = 0.0;
  }
  return mat;
} 

0 个答案:

没有答案