#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
int main(int argc, char *argv[])
{
int myid, numprocs, number_of_completed_operation;
char message = 'a';
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Request* requests = (MPI_Request*)malloc((numprocs - 1)*sizeof(MPI_Request));
MPI_Status* statuses = (MPI_Status*)malloc(sizeof(MPI_Status)*(numprocs - 1));
int* indices = (int *)malloc((numprocs - 1)*sizeof(int));
char* buf = (char *)malloc((numprocs - 1)*sizeof(char));
if (myid != numprocs - 1)
{//worker
printf("***this is sender %d\n", myid);
MPI_Send(&message, 1, MPI_CHAR, numprocs - 1, 110, MPI_COMM_WORLD);
printf("*.*sender %d is done\n", myid);
}
else if (myid == numprocs - 1)
{
//master
int number_of_left_messages = numprocs - 1;//有numprocs-1个信息到来
int i;
for (i = 0; i < numprocs - 1; i++)
{
MPI_Irecv(&buf+i, 1, MPI_CHAR,i, 110, MPI_COMM_WORLD, &requests[i]);
}
MPI_Waitsome(numprocs - 1, requests, &number_of_completed_operation, indices, statuses);
number_of_left_messages = number_of_left_messages - number_of_completed_operation;
printf("number of completed operation is %d\n", number_of_left_messages);
printf("left message amount is %d\n", number_of_left_messages);
int j;
for (j = 0; j <numprocs - 1; j++)
{
printf("-------------\n");
printf("index is %d\n",indices[j]);
printf("source is %d\n", statuses[j].MPI_SOURCE);
//printf("good\n");
printf("--------====\n");
}
while (number_of_left_messages > 0)
{
MPI_Waitsome(numprocs - 1, requests, &number_of_completed_operation, indices, statuses);
printf("number of completed operation is %d\n", number_of_completed_operation);
for (j = 0; j <numprocs - 1; j++)
{
printf("-------------\n");
printf("index is %d\n", indices[j]);
printf("source is %d\n", statuses[j].MPI_SOURCE);
printf("--------====\n");
}
number_of_left_messages = number_of_left_messages - number_of_completed_operation;
printf("left message amount is %d\n", number_of_left_messages);
逻辑很简单,我将最后的进程设置为主进程,所有其他进程都是工作进程,工作者向主进程发送消息,主进程使用waitsome函数进行接收。
当我将进程数设置为4或更大时,系统显示错误如下:
[soit-mpi-pro-1:12197] *** An error occurred in MPI_Waitsome
[soit-mpi-pro-1:12197] *** reported by process [140533176729601,140531329925123]
[soit-mpi-pro-1:12197] *** on communicator MPI_COMM_WORLD
[soit-mpi-pro-1:12197] *** MPI_ERR_REQUEST: invalid request
[soit-mpi-pro-1:12197] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[soit-mpi-pro-1:12197] *** and potentially your MPI job)
答案 0 :(得分:2)
看起来您对MPI_Irecv
的调用可能是个问题。删除&
之前的额外buf
(你有一个双指针而不是一个指针)。
MPI_Irecv(buf+i, 1, MPI_CHAR,i, 110, MPI_COMM_WORLD, &requests[i]);
当我解决这个问题时,添加结束括号并调用MPI_Finalize()
,并删除一堆额外的输出,我在运行程序时没有任何问题:
$ mpiexec -n 8 ./a.out
***this is sender 3
*.*sender 3 is done
***this is sender 4
*.*sender 4 is done
***this is sender 5
*.*sender 5 is done
***this is sender 6
*.*sender 6 is done
***this is sender 0
*.*sender 0 is done
***this is sender 1
*.*sender 1 is done
***this is sender 2
*.*sender 2 is done
number of completed operation is 1
left message amount is 6
number of completed operation is 1
left message amount is 5
number of completed operation is 1
left message amount is 4
number of completed operation is 1
left message amount is 3
number of completed operation is 1
left message amount is 2
number of completed operation is 1
left message amount is 1
number of completed operation is 1
left message amount is 0
我不知道它是否得到了正确的答案,但这是一个不同的问题。
答案 1 :(得分:1)
您正在传递MPI_Irecv
指针buf
本身的地址加上偏移而不是其值。收到消息后,它会覆盖一个或多个附近堆栈变量的最后一个字节(在x86 / x64之类的小端系统上),这取决于堆栈布局,可能包括requests
和{{ 1}}。因此statuses
接收的指针不指向请求数组的开头,而是指向它之前,之后或其中间的某个位置,因此某些请求句柄无效且{{1}抱怨。在大端系统上,这会覆盖地址的最高字节,而且会导致无效的地址和分段错误。
使用MPI_Waitsome
(根据Wesley Bland的答案)或使用MPI_Waitsome
。我通常会发现个人品味是否使用第二种形式的第一种。