我有一个向量
std::vector<double**> blocks(L);
std::vector<double**> localblocks(blocks_local);
然后我使用send命令将位于等级0的数据发送到其他等级(我认为这是正确的术语)
for(i=1;i<numnodes-1;i++)
{
for(j=0;j<blocks_local;j++)
{
MPI_Send(blocks[i*blocks_local+j],N*N,MPI_DOUBLE,i,j,MPI_COMM_WORLD);
}
}
到目前为止,代码可以正常运行:没有错误。然后在其余的行上,以下代码是
for(i=0;i<blocks_local;i++)
{
MPI_Recv(&localblocks[i],N*N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);
}
这是我收到一个无效的指针错误。
总输出为
6.8297e-05
3.6895e-05
4.3906e-05
4.4463e-05 << these just show the time it takes for a process to complete. Shows that the program has excited successfully.
free(): invalid pointer
[localhost:16841] *** Process received signal ***
[localhost:16841] Signal: Aborted (6)
[localhost:16841] Signal code: (-6)
free(): invalid pointer
free(): invalid pointer
[localhost:16842] *** Process received signal ***
[localhost:16842] Signal: Aborted (6)
[localhost:16842] Signal code: (-6)
[localhost:16840] *** Process received signal ***
[localhost:16840] Signal: Aborted (6)
[localhost:16840] Signal code: (-6)
[localhost:16841] [ 0] /lib64/libpthread.so.0(+0x11fc0)[0x7fb761c10fc0]
[localhost:16841] [ 1] [localhost:16840] [ 0] [localhost:16842] [ 0] /lib64/libc.so.6(gsignal+0x10b)[0x7fb761876f2b]
[localhost:16841] [ 2] /lib64/libpthread.so.0(+0x11fc0)[0x7fb0f377cfc0]
[localhost:16840] [ 1] /lib64/libpthread.so.0(+0x11fc0)[0x7f0ec17e9fc0]
[localhost:16842] [ 1] /lib64/libc.so.6(gsignal+0x10b)[0x7fb0f33e2f2b]
[localhost:16840] /lib64/libc.so.6(abort+0x12b)[0x7fb761861561]
/lib64/libc.so.6(gsignal+0x10b)[0x7f0ec144ff2b]
[localhost:16842] [ 2] [localhost:16841] [ 3] [ 2] /lib64/libc.so.6(abort+0x12b)/lib64/libc.so.6(abort+0x12b)[0x7fb0f33cd561]
[localhost:16840] [ 3] [0x7f0ec143a561]
[localhost:16842] [ 3] /lib64/libc.so.6(+0x79917)[0x7fb7618b9917]
[localhost:16841] /lib64/libc.so.6(+0x79917)[0x7fb0f3425917]
[ 4] [localhost:16840] [ 4] /lib64/libc.so.6(+0x79917)[0x7f0ec1492917]
/lib64/libc.so.6(+0x7fdec)[0x7fb7618bfdec]
[localhost:16841] [ 5] [localhost:16842] [ 4] /lib64/libc.so.6(+0x7fdec)[0x7fb0f342bdec]
[localhost:16840] [ 5] /lib64/libc.so.6(+0x8157c)[0x7fb7618c157c]
[localhost:16841] [ 6] /lib64/libc.so.6(+0x7fdec)[0x7f0ec1498dec]
[localhost:16842] [ 5] /lib64/libc.so.6(+0x8157c)[0x7fb0f342d57c]
[localhost:16840] [ 6] /usr/lib64/openmpi/lib/libopen-pal.so.20(+0x4ffb2)[0x7fb76134dfb2]
[localhost:16841] [ 7] /lib64/libc.so.6(+0x8157c)[0x7f0ec149a57c]
[localhost:16842] [ 6] /usr/lib64/openmpi/lib/libopen-pal.so.20(+0x4ffb2)[0x7fb0f2eb9fb2]
[localhost:16840] /usr/lib64/openmpi/lib/libmpi.so.20(ompi_win_finalize+0x1c1)[0x7fb7627a9881]
[localhost:16841] [ 8] /usr/lib64/openmpi/lib/libopen-pal.so.20(+0x4ffb2)[0x7f0ec0f26fb2]
[localhost:16842] [ 7] [ 7] /usr/lib64/openmpi/lib/libmpi.so.20(ompi_win_finalize+0x1c1)[0x7fb0f4315881]
[localhost:16840] [ 8] /usr/lib64/openmpi/lib/libmpi.so.20(ompi_win_finalize+0x1c1)[0x7f0ec2382881]
[localhost:16842] [ 8] /usr/lib64/openmpi/lib/libmpi.so.20(ompi_mpi_finalize+0x2f1)[0x7fb7627a7711]
[localhost:16841] [ 9] ./a.out[0x408c75]
/usr/lib64/openmpi/lib/libmpi.so.20(ompi_mpi_finalize+0x2f1)[0x7fb0f4313711]
[localhost:16840] [ 9] ./a.out[0x408c75]
/usr/lib64/openmpi/lib/libmpi.so.20(ompi_mpi_finalize+0x2f1)[0x7f0ec2380711]
[localhost:16842] [ 9] [localhost:16841] [10] /lib64/libc.so.6(__libc_start_main+0xeb)./a.out[0x408c75]
[localhost:16842] [localhost:16840] [10] [0x7fb76186318b]
[localhost:16841] [11] ./a.out[0x40896a]
[localhost:16841] *** End of error message ***
/lib64/libc.so.6(__libc_start_main+0xeb)[0x7fb0f33cf18b]
[localhost:16840] [11] ./a.out[0x40896a]
[localhost:16840] *** End of error message ***
[10] /lib64/libc.so.6(__libc_start_main+0xeb)[0x7f0ec143c18b]
[localhost:16842] [11] ./a.out[0x40896a]
[localhost:16842] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 2 with PID 0 on node localhost exited on signal 6 (Aborted).
--------------------------------------------------------------------------
我已删除清理代码。因此,此清理必须来自MPI,我不确定如何解决此问题。
void BlockMatVecMultiplication(int mynode, int numnodes,int N,int L, std::vector<double **> blocks,double *x,double* y)
{
int i,j;
int local_offset,blocks_local,last_blocks_local;
int *count;
int *displacements;
// The number of rows each processor is dealt
blocks_local = L/numnodes;
double *temp = new double[N*blocks_local];
std::vector<double**> localblocks(blocks_local);
// the offset
local_offset = mynode*blocks_local;
MPI_Status status;
if(mynode == (numnodes-1))
{
blocks_local = L-blocks_local*(numnodes-1);
}
/* Distribute the blocks across the processes */
// At this point node 0 has the matrix. So we only need
// to distribute among the remaining nodes, using the
// last node as a cleanup.
if(mynode ==0)
{
// This deals the matrix between processes 1 to numnodes -2
for(i=1;i<numnodes-1;i++)
{
for(j=0;j<blocks_local;j++)
{
MPI_Send(blocks[i*blocks_local+j],N*N,MPI_DOUBLE,i,j,MPI_COMM_WORLD);
}
}
// Here we use the last process to "clean up". For small N
// the load is poorly balanced.
last_blocks_local = L- blocks_local*(numnodes-1);
for(j=0;j<last_blocks_local;j++)
{
MPI_Send(blocks[(numnodes-1)*blocks_local+j],N*N,MPI_DOUBLE,numnodes-1,j,MPI_COMM_WORLD);
}
}
else
{
/*This code allows other processes to obtain the chunks of data
/* sent by process 0 */
/* rows_local has a different value on the last processor, remember */
for(i=0;i<blocks_local;i++)
{
MPI_Recv(&localblocks[i],N*N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);
}
}
}
从下面的代码中调用上述方法
#include <iostream>
#include <iomanip>
#include <mpi.h>
#include "SCmathlib.h"
#include "SCchapter7.h"
using namespace std;
int main(int argc, char * argv[])
{
int i,j, N = 10,L=10;
double **A,*x,*y;
int totalnodes,mynode;
std::vector<double**> blocks;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &totalnodes);
MPI_Comm_rank(MPI_COMM_WORLD, &mynode);
//output variable
y = new double[N];
//input variable
x = new double[N];
for(i=0;i<N;i++)
{
x[i] = 1.0;
}
// forms identity matrix on node 0
if(mynode==0)
{
for(j=0;j<L;j++)
{
A = CreateMatrix(N,N);
// fills the block
for(i=0;i<N;i++)
{
A[i][i] = 1.0;
}
blocks.push_back(A);
}
}
double start = MPI_Wtime();
BlockMatVecMultiplication(mynode,totalnodes,N,L,blocks,x,y);
double end = MPI_Wtime();
if(mynode==0)
{
for(i=0;i<L;i++)
{
//DestroyMatrix(blocks[i],N,N);
}
//delete[] x;
//delete[] y;
}
std::cout << end- start << std::endl;
MPI_Finalize();
}
“包含”仅提供基本的矩阵功能。以下函数创建一个矩阵
double ** CreateMatrix(int m, int n){
double ** mat;
mat = new double*[m];
for(int i=0;i<m;i++){
mat[i] = new double[n];
for(int j=0;j<m;j++)
mat[i][j] = 0.0;
}
return mat;
}