Question

我有一个索引数组，我希望每个工作人员根据这些索引做一些事情。数组的大小可能大于排名总数，所以我的第一个问题是除了主-工作负载平衡之外是否还有另一种方式？我想要一个余额系统，并且我想将每个索引分配给每个等级。

我在考虑 master-worker，在这种方法中，master rank (0) 将每个索引分配给其他 rank。但是当我使用 3 级和 15 索引运行我的代码时，我的代码在 while 循环中停止发送索引 4。我想知道是否有人可以帮助我找到问题

if(pCurrentID == 0) { // Master
   MPI_Status status;

   int nindices = 15;
   int mesg[1] = {0};
   int initial_id = 0;
   int recv_mesg[1] = {0};

  // -- send out initial ids to workers --//
   while (initial_id < size - 1) {
     if (initial_id < nindices) {
       MPI_Send(mesg, 1, MPI_INT, initial_id + 1, 1, MPI_COMM_WORLD);
       mesg[0] += 1;
       ++initial_id;
     }
   }

   //-- hand out id to workers dynamically --//
   while (mesg[0] != nindices) {
     MPI_Probe(MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &status);
     int isource = status.MPI_SOURCE;
     MPI_Recv(recv_mesg, 1, MPI_INT, isource, 1, MPI_COMM_WORLD, &status);
     MPI_Send(mesg, 1, MPI_INT, isource, 1, MPI_COMM_WORLD);
     mesg[0] += 1;
   }

   //-- hand out ending signals once done --//
   for (int rank = 1; rank < size; ++rank) {
     mesg[0] = -1;
     MPI_Send(mesg, 1, MPI_INT, rank, 0, MPI_COMM_WORLD);
   }
 } else { 
   MPI_Status status;
   int id[1] = {0};
   // Get the surrounding fragment id
   MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
   int itag = status.MPI_TAG;
   MPI_Recv(id, 1, MPI_INT, 0, itag, MPI_COMM_WORLD, &status);
   
   int jfrag = id[0];
   if (jfrag < 0) break;
   // do something
   MPI_Send(id, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
 }

Answer 1

<块引用>

我有一个索引数组，我希望每个工人做一些基于在这些索引上。数组的大小可能大于总数等级数，所以我的第一个问题是是否有另一种方式除了这里的主工人负载平衡？我想要一个余额系统，并且我想将每个索引分配给每个等级。

不，但是如果每个数组索引执行的工作花费的时间大致相同，您可以简单地在进程之间分散该数组。

<块引用>

我正在考虑大师级，在这种方法中大师级 (0) 将每个索引分配给其他等级。但是当我运行我的具有 3 级和 15 索引的代码我的代码在 while 循环中暂停发送索引 4. 我想知道是否有人可以帮我找到问题

正如评论中已经指出的那样，问题是您（在工作人员方面）缺少查询主人工作的循环。

负载均衡器可以实现如下：

master 首字母向其他 worker 发送迭代；
每个worker都在等待来自master的消息；
之后主从 MPI_Recv 调用 MPI_ANY_SOURCE 并等待另一个工人请求工作；
worker 在完成第一次迭代后，将其等级发送给 master，通知 master 发送新的迭代；
master 读取第 4 步中 worker 发送的 rank，检查数组中是否有新索引，如果仍有有效索引，则将其发送给 worker。否则，发送一个特殊的消息，通知工作人员没有更多的工作要执行。该消息可以是例如 -1;
当工人收到特殊消息时，它停止工作；
当所有工人都收到特殊消息时，master 停止工作。

这种方法的一个例子：

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

int main(int argc,char *argv[]){
    MPI_Init(NULL,NULL); // Initialize the MPI environment
    int rank; 
    int size;
    MPI_Status status;
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);
    MPI_Comm_size(MPI_COMM_WORLD,&size);

    int work_is_done = -1;
    if(rank == 0){
       int max_index = 10; 
       int index_simulator = 0;
       // Send statically the first iterations
       for(int i = 1; i < size; i++){
           MPI_Send(&index_simulator, 1, MPI_INT, i, i, MPI_COMM_WORLD); 
           index_simulator++;
       }  
       int processes_finishing_work = 0;
     
       do{
          int process_that_wants_work = 0;
          MPI_Recv(&process_that_wants_work, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &status);
          if(index_simulator < max_index){
             MPI_Send(&index_simulator, 1, MPI_INT, process_that_wants_work, 1, MPI_COMM_WORLD);  
             index_simulator++;
          }
          else{ // send special message 
               MPI_Send(&work_is_done, 1, MPI_INT, process_that_wants_work, 1, MPI_COMM_WORLD);
               processes_finishing_work++;
          }
       } while(processes_finishing_work < size - 1);
    }
    else{
        int index_to_work = 0;
        MPI_Recv(&index_to_work, 1, MPI_INT, 0, rank, MPI_COMM_WORLD, &status);    
        // Work with the iterations index_to_work
    
       do{
          MPI_Send(&rank, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
          MPI_Recv(&index_to_work, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
          if(index_to_work != work_is_done)
             // Work with the iterations index_to_work
       }while(index_to_work != work_is_done);
    }
    printf("Process {%d} -> I AM OUT\n", rank);
    MPI_Finalize();
    return 0;
 }

您可以通过以下方式改进上述方法：1) 发送的消息数量和 2) 等待消息的时间。对于前者，您可以尝试使用分块策略（即发送多个索引每个 MPI通信）。对于后者，您可以尝试使用非阻塞 MPI 通信，或者让两个线程 per 处理一个来接收/发送另一个工作来实际执行工作。这种多线程方法也允许主进程实际处理数组索引，但它使方法显着复杂化。

动态负载均衡 master-worker

1 个答案: