Question

我有很多从节点可能会也可能不会向主节点发送消息。所以目前主节点无法知道预期的MPI_Recv数量。由于效率原因，从节点必须向主节点发送最少数量的消息。

我设法找到了a cool trick，当它不再需要任何消息时，它会发送一条额外的“完成”消息。不幸的是，它似乎不适用于我的情况，那里的发送者数量可变。关于如何解决这个问题的任何想法？谢谢！

if(rank == 0){ //MASTER NODE

    while (1) {

        MPI_Recv(&buffer, 10, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);

        if (status.MPI_TAG == DONE) break;


        /* Do stuff */
    }

}else{ //MANY SLAVE NODES

    if(some conditions){
        MPI_Send(&buffer, 64, MPI_INT, root, 1, MPI_COMM_WORLD);
    }

}


MPI_Barrier(MPI_COMM_WORLD);
MPI_Send(NULL, 1, MPI_INT, root, DONE, MPI_COMM_WORLD);

不工作，该程序似乎仍在等待MPI_Recv

Answer 1

更简单，更优雅的选择是使用MPI_IBARRIER。让每个工作人员调用所需的所有发送，然后在完成后调用MPI_IBARRIER。在主文件上，您可以循环MPI_IRECV MPI_ANY_SOURCE和MPI_IBARRIER。完成MPI_IBARRIER后，您知道每个人都已完成，您可以取消MPI_IRECV并继续前进。伪代码看起来像这样：

if (master) {
  /* Start the barrier. Each process will join when it's done. */
  MPI_Ibarrier(MPI_COMM_WORLD, &requests[0]);

  do {
    /* Do the work */
    MPI_Irecv(..., MPI_ANY_SOURCE, &requests[1]);

    /* If the index that finished is 1, we received a message.
     * Otherwise, we finished the barrier and we're done. */
    MPI_Waitany(2, requests, &index, MPI_STATUSES_IGNORE);
  } while (index == 1);

  /* If we're done, we should cancel the receive request and move on. */
  MPI_Cancel(&requests[1]);
} else {
  /* Keep sending work back to the master until we're done. */
  while( ...work is to be done... ) {
    MPI_Send(...);
  }

  /* When we finish, join the Ibarrier. Note that
   * you can't use an MPI_Barrier here because it
   * has to match with the MPI_Ibarrier above. */
  MPI_Ibarrier(MPI_COMM_WORLD, &request);
  MPI_Wait(&request, MPI_STATUS_IGNORE);
}

Answer 2

1-您在错误的地方致电MPI_Barrier，应在MPI_Send之后致电。{ 2-当根从所有其他等级（大小-1）接收DONE时，根将从循环中退出。

经过一些修改后的代码：

#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>

int main(int argc, char** argv)
{

    MPI_Init(NULL, NULL);
    int size;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Status status;
    int DONE = 888;
    int buffer = 77;
    int root = 0 ;
    printf("here is rank %d with size=%d\n" , rank , size);fflush(stdout);
    int num_of_DONE = 0 ;
 if(rank == 0){ //MASTER NODE


    while (1) {

        MPI_Recv(&buffer, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
        printf("root recev %d from %d with tag = %d\n" , buffer , status.MPI_SOURCE , status.MPI_TAG );fflush(stdout);

        if (status.MPI_TAG == DONE)
        num_of_DONE++;
    printf("num_of_DONE=%d\n" , num_of_DONE);fflush(stdout);
    if(num_of_DONE == size -1)
        break;



        /* Do stuff */
    }

}else{ //MANY SLAVE NODES

    if(1){
        buffer = 66;
        MPI_Send(&buffer, 1, MPI_INT, root, 1, MPI_COMM_WORLD);
        printf("rank %d sent data.\n" , rank);fflush(stdout);
    }

}

    if(rank != 0)
    {
        buffer = 55;
        MPI_Send(&buffer, 1, MPI_INT, root, DONE, MPI_COMM_WORLD);
    }


    MPI_Barrier(MPI_COMM_WORLD);
    printf("rank %d done.\n" , rank);fflush(stdout);
    MPI_Finalize();
    return 0;
}

输出：

    hosam@hosamPPc:~/Desktop$ mpicc -o aa aa.c
    hosam@hosamPPc:~/Desktop$ mpirun -n 3 ./aa
here is rank 2 with size=3
here is rank 0 with size=3
rank 2 sent data.
here is rank 1 with size=3
rank 1 sent data.
root recev 66 from 1 with tag = 1
num_of_DONE=0
root recev 66 from 2 with tag = 1
num_of_DONE=0
root recev 55 from 2 with tag = 888
num_of_DONE=1
root recev 55 from 1 with tag = 888
num_of_DONE=2
rank 0 done.
rank 1 done.
rank 2 done.

MPI：当预期的MPI_Recv数量未知时该怎么办

2 个答案: