我有以下代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
static int rank, size;
char msg[] = "This is a test message";
int main(int argc, char **argv) {
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size != 2) {
fprintf(stderr, "This test requires exactly 2 tasks (has: %d).\n", size);
MPI_Finalize();
return -1;
}
int run = 1;
if (argc > 1) {
run = atoi(argv[1]);
}
int len = strlen(msg) + 1;
if (argc > 2) {
len = atoi(argv[2]);
}
char buf[len];
strncpy(buf, msg, len);
MPI_Status statusArray[run];
MPI_Request reqArray[run];
double start = MPI_Wtime();
for (int i = 0; i < run; i++) {
if (!rank) {
MPI_Isend(buf, len, MPI_CHAR, 1, 0, MPI_COMM_WORLD, &reqArray[i]);
printf("mpi_isend for run %d\n", i);
} else {
MPI_Irecv(buf, len, MPI_CHAR, 0, 0, MPI_COMM_WORLD, &reqArray[i]);
printf("mpi_irecv for run %d\n", i);
}
}
int buflen = 512;
char name[buflen];
gethostname(name, buflen);
printf("host: %s has rank %d\n", name, rank);
printf("Reached here! for host %s before MPI_Waitall \n", name);
if(!rank) {
printf("calling mpi_waitall for sending side which is %s\n", name);
MPI_Waitall(run, &reqArray[0], &statusArray[0]);
}
else {
printf("calling mpi_waitall for receiving side which is %s\n", name);
MPI_Waitall(run, &reqArray[0], &statusArray[0]);
}
printf("finished waiting! for host %s\n", name);
double end = MPI_Wtime();
if (!rank) {
printf("Throughput: %.4f Gbps\n", 1e-9 * len * 8 * run / (end - start));
}
MPI_Finalize();
}
我在MPI_Waitall
之前在发送方遇到了段错误。错误消息是:
[host1:27679] *** Process received signal ***
[host1:27679] Signal: Segmentation fault (11)
[host1:27679] Signal code: Address not mapped (1)
[host1:27679] Failing at address: 0x8
[host1:27679] [ 0] /lib64/libpthread.so.0() [0x3ce7e0f500]
[host1:27679] [ 1] /usr/lib64/openmpi/mca_btl_openib.so(+0x21dc7) [0x7f46695c1dc7]
[host1:27679] [ 2] /usr/lib64/openmpi/mca_btl_openib.so(+0x1cbe1) [0x7f46695bcbe1]
[host1:27679] [ 3] /lib64/libpthread.so.0() [0x3ce7e07851]
[host1:27679] [ 4] /lib64/libc.so.6(clone+0x6d) [0x3ce76e811d]
[host1:27679] *** End of error message ***
我认为MPI_Request
的数组有问题。有人能指出来吗?
谢谢!
答案 0 :(得分:2)
我没有问题地运行你的程序(除了不包括unistd.h
的警告)。问题可能与您的Open MPI设置有关。您使用的是具有InfiniBand网络的计算机吗?如果没有,您可能希望更改为仅使用默认的tcp实现。您的问题可能与此有关。
如果你想指定你只使用tcp,你应该这样运行:
mpirun --mca btl tcp,self -n 2 <prog_name> <prog_args>
这将确保不会意外地检测到openib并在不应该使用时使用它。
另一方面,如果您的意思是使用InfiniBand,您可能会发现Open MPI存在某些问题。我怀疑是这种情况,因为你没有做任何花哨的事情。