我正在尝试在C ++线程中使用MPI远程内存访问(RMA)。如果写入次数很少,这似乎工作正常,但如果我尝试经常进行通信则会失败。以下来源表现出以下行为:
(i)如果使用#define USE_THREADS false
编译,程序运行正常,不论通过MY_BOUND
提及的值;
(ii)如果使用#define USE_THREADS true
编译,如果仅使用两个进程(mpiexec -n 2 ...
)执行,则程序运行正常,即仅使用一个线程进行通信,而不考虑通过{{引用的值。 1}};
(ii)如果使用MY_BOUND
编译,如果通过#define USE_THREADS true
引用的值很低,例如10或100,则程序运行正常,但如果通过{{引用的值,则程序崩溃1}}很大,例如100000。
我在第三种情况下获得的MY_BOUND
大值的错误是:
MY_BOUND
将调试器附加到流程会显示以下内容:
MY_BOUND
使用[...] *** Process received signal ***
[...] Signal: Segmentation fault: 11 (11)
[...] Signal code: Address not mapped (1)
[...] Failing at address: 0x0
[...] *** End of error message ***
--------------------------------------------------------------------------
mpiexec noticed that process rank 0 with PID 0 on node ... exited on signal 11 (Segmentation fault: 11).
--------------------------------------------------------------------------
编译OpenMPI时,我得到:
thread #4: tid = 0x5fa1e, 0x000000010e500de7 mca_osc_pt2pt.so`ompi_osc_pt2pt_sync_pscw_peer + 87, stop reason = EXC_BAD_ACCESS (code=1, address=0x0)
* frame #0: 0x000000010e500de7 mca_osc_pt2pt.so`ompi_osc_pt2pt_sync_pscw_peer + 87
frame #1: 0x000000010e4fd2a2 mca_osc_pt2pt.so`osc_pt2pt_incoming_post + 50
frame #2: 0x000000010e4fabf1 mca_osc_pt2pt.so`ompi_osc_pt2pt_process_receive + 961
frame #3: 0x000000010e4f4957 mca_osc_pt2pt.so`component_progress + 279
frame #4: 0x00000001059f3de4 libopen-pal.20.dylib`opal_progress + 68
frame #5: 0x000000010e4fc9dd mca_osc_pt2pt.so`ompi_osc_pt2pt_complete + 765
frame #6: 0x0000000105649a00 libmpi.20.dylib`MPI_Win_complete + 160
在我的实现中可能有一些简单的错误,但我很难找到它,可能是由于我对MPI,C ++和线程的理解不足。有任何想法,意见或反馈?谢谢,新年快乐。
我的设置是使用OpenMPI 2.0.1的GCC 4.9(使用--enable-mpi-thread-multiple编译)。
--enable-debug
UPDATE:在调试期间,我发现* thread #4: tid = 0x93d0d, 0x0000000111521b1e mca_osc_pt2pt.so`ompi_osc_pt2pt_sync_array_peer(rank=1, peers=0x0000000000000000, nranks=1, peer=0x0000000000000000) + 51 at osc_pt2pt_sync.c:61, stop reason = EXC_BAD_ACCESS (code=1, address=0x0)
frame #0: 0x0000000111521b1e mca_osc_pt2pt.so`ompi_osc_pt2pt_sync_array_peer(rank=1, peers=0x0000000000000000, nranks=1, peer=0x0000000000000000) + 51 at osc_pt2pt_sync.c:61
58 int mid = nranks / 2;
59
60 /* base cases */
-> 61 if (0 == nranks || (1 == nranks && peers[0]->rank != rank)) {
62 if (peer) {
63 *peer = NULL;
64 }
指针在调用堆栈帧中似乎不是NULL。一旦被调试的线程崩溃,也许另一个线程正在更新变量?这可能表明与锁定有关的问题吗?
#include <iostream>
#include <vector>
#include <thread>
#include "mpi.h"
#define MY_BOUND 100000
#define USE_THREADS true
void FinalizeMPI();
void InitMPI(int, char**);
void NoThreads(int, int);
void ThreadFunction(int ThreadID, int Bound) {
std::cout << "test " << ThreadID << std::endl;
MPI_Group GroupAll, myGroup, destGroup;
MPI_Comm myComm;
MPI_Win myWin;
MPI_Comm_group(MPI_COMM_WORLD, &GroupAll);
int ranks[2]{0, ThreadID+1};
MPI_Group_incl(GroupAll, 2, ranks, &myGroup);
MPI_Comm_create_group(MPI_COMM_WORLD, myGroup, 0, &myComm);
MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, myComm, &myWin);
int destrank = 1;
MPI_Group_incl(myGroup, 1, &destrank, &destGroup);
std::cout << "Objects created" << std::endl;
std::vector<int> data(5, 1);
for(int m=0;m<Bound;++m) {
MPI_Win_start(destGroup, 0, myWin);
MPI_Put(&data[0], 5, MPI_INT, 1, 0, 5, MPI_INT, myWin);
MPI_Win_complete(myWin);
}
MPI_Group_free(&destGroup);
MPI_Win_free(&myWin);
MPI_Comm_free(&myComm);
MPI_Group_free(&myGroup);
MPI_Group_free(&GroupAll);
}
void WithThreads(int comm_size, int Bound) {
std::vector<std::thread*> Threads(comm_size-1, NULL);
for(int k=0;k<comm_size-1;++k) {
Threads[k] = new std::thread(ThreadFunction, k, Bound);
}
for(int k=0;k<comm_size-1;++k) {
Threads[k]->join();
}
std::cout << "done" << std::endl;
}
int main(int argc, char** argv) {
InitMPI(argc, argv);
int rank,comm_size;
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&comm_size);
if(comm_size<2) {
FinalizeMPI();
return 0;
}
int Bound = MY_BOUND;
if(rank==0) {
if(USE_THREADS) {
WithThreads(comm_size, Bound);
} else {
NoThreads(comm_size, Bound);
}
} else {
MPI_Group GroupAll;
MPI_Comm_group(MPI_COMM_WORLD, &GroupAll);
std::vector<int> tmp(5,0);
MPI_Group myGroup, destinationGroup;
MPI_Comm myComm;
MPI_Win myWin;
int ranks[2]{0,rank};
MPI_Group_incl(GroupAll, 2, ranks, &myGroup);
MPI_Comm_create_group(MPI_COMM_WORLD, myGroup, 0, &myComm);
MPI_Win_create(&tmp[0], 5*sizeof(int), sizeof(int), MPI_INFO_NULL, myComm, &myWin);
int destrank = 0;
MPI_Group_incl(myGroup, 1, &destrank, &destinationGroup);
for(int m=0;m<Bound;++m) {
MPI_Win_post(destinationGroup, 0, myWin);
MPI_Win_wait(myWin);
}
std::cout << " Rank " << rank << ":";
for(auto& e : tmp) {
std::cout << " " << e;
}
std::cout << std::endl;
MPI_Win_free(&myWin);
MPI_Comm_free(&myComm);
MPI_Group_free(&myGroup);
MPI_Group_free(&GroupAll);
}
FinalizeMPI();
return 0;
}
void FinalizeMPI() {
int flag;
MPI_Finalized(&flag);
if(!flag)
MPI_Finalize();
}
void InitMPI(int argc, char** argv) {
int flag;
MPI_Initialized(&flag);
if(!flag) {
int provided_Support;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided_Support);
if(provided_Support!=MPI_THREAD_MULTIPLE) {
exit(0);
}
}
}
void NoThreads(int comm_size, int Bound) {
MPI_Group GroupAll;
MPI_Comm_group(MPI_COMM_WORLD, &GroupAll);
std::vector<MPI_Group> myGroups(comm_size-1);
std::vector<MPI_Comm> myComms(comm_size-1);
std::vector<MPI_Win> myWins(comm_size-1);
std::vector<MPI_Group> destGroups(comm_size-1);
for(int k=1;k<comm_size;++k) {
int ranks[2]{0, k};
MPI_Group_incl(GroupAll, 2, ranks, &myGroups[k-1]);
MPI_Comm_create_group(MPI_COMM_WORLD, myGroups[k-1], 0, &myComms[k-1]);
MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, myComms[k-1], &myWins[k-1]);
int destrank = 1;
MPI_Group_incl(myGroups[k-1], 1, &destrank, &destGroups[k-1]);
}
std::vector<int> data(5, 1);
for(int k=0;k<comm_size-1;++k) {
for(int m=0;m<Bound;++m) {
MPI_Win_start(destGroups[k], 0, myWins[k]);
MPI_Put(&data[0], 5, MPI_INT, 1, 0, 5, MPI_INT, myWins[k]);
MPI_Win_complete(myWins[k]);
}
}
for(int k=0;k<comm_size-1;++k) {
MPI_Win_free(&myWins[k]);
MPI_Comm_free(&myComms[k]);
MPI_Group_free(&myGroups[k]);
}
MPI_Group_free(&GroupAll);
}