Question

我正在并行运行MPI实例。在某些时候，每个实例都有一个包含100个排名值的列表。我现在想从所有实例中收集前100个值。

如何在MPI中完成？有专门的功能吗？

谢谢！

Answer 1

如果您想收集每个实例的最高价值，那么MPI_Gather()是正确的选择。

如果你想收集所有实例的100个最高值（例如100个顶部值不是n * 100个值），那么我认为没有“原生”方法来实现这一点。 / *当你写list时，我希望你的确意味着array * /

话虽如此，您可以使用MPI_Op_create()来创建一个在两个数组上运行的运算符，然后使用先前定义的运算符调用MPI_Reduce()。

Answer 2

Gilles的建议是非常优雅的，因此我认为我会编写一个简单的示例代码，因为这将为那些学习MPI的用户定义的操作做出很好的练习。

请注意，我已经滥用了“len”参数对用户定义操作的含义。这意味着要执行的减少次数和不每次减少的大小。换句话说，len = 5应该意味着您要在每个进程上排序5个独立列表，而不是每个进程都有一个长度为5的列表。要解决此问题，需要定义适合于完整列表的新MPI数据类型（例如MPI_Type_contiguous）但我现在无法正常工作。

然而，即使是技术上不正确的代码也说明了基本方法。

3个进程的长度为5的列表的示例输出为：

private readonly BackgroundWorker worker = new BackgroundWorker { WorkerReportsProgress = true };

public MainWindow()
{
    InitializeComponent();

    worker.DoWork += worker_DoWork;
    worker.ProgressChanged += worker_ProgressChanged;
}

private void worker_DoWork(object sender, DoWorkEventArgs doWorkEventArgs)
{
    // Do some long process, break it up into a loop so you can periodically
    //  call worker.ReportProgress()

    worker.ReportProgress(i);  // Pass back some meaningful value
}

private void worker_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
    prgBar.Value = Math.Min(e.ProgressPercentage, 100);
}

这是代码。

rank 0, mysortedlist[0] = 12
rank 0, mysortedlist[1] = 9
rank 0, mysortedlist[2] = 6
rank 0, mysortedlist[3] = 3
rank 0, mysortedlist[4] = 0

rank 2, mysortedlist[0] = 14
rank 2, mysortedlist[1] = 11
rank 2, mysortedlist[2] = 8
rank 2, mysortedlist[3] = 5
rank 2, mysortedlist[4] = 2

rank 1, mysortedlist[0] = 13
rank 1, mysortedlist[1] = 10
rank 1, mysortedlist[2] = 7
rank 1, mysortedlist[3] = 4
rank 1, mysortedlist[4] = 1

rank 0, sortedlist[0] = 14
rank 0, sortedlist[1] = 13
rank 0, sortedlist[2] = 12
rank 0, sortedlist[3] = 11
rank 0, sortedlist[4] = 10

Answer 3

这里是适用于多个列表的完整代码，即＆＃34; count＆＃34; MPI_Reduce（）的参数被正确解释为单个列表的数量，而不是每个列表的长度。尽管列表长度N在main中是常量，但是还原操作更通用，并且从列表类型范围计算长度。

这里是4个进程中3个长度为5的列表的输出：

dsh@laptop> mpirun -n 4 ./mergesortlist

rank 1, mysortedlist[0] =  17 117 217
rank 1, mysortedlist[1] =  13 113 213
rank 1, mysortedlist[2] =   9 109 209
rank 1, mysortedlist[3] =   5 105 205
rank 1, mysortedlist[4] =   1 101 201

rank 2, mysortedlist[0] =  18 118 218
rank 2, mysortedlist[1] =  14 114 214
rank 2, mysortedlist[2] =  10 110 210
rank 2, mysortedlist[3] =   6 106 206
rank 2, mysortedlist[4] =   2 102 202

rank 3, mysortedlist[0] =  19 119 219
rank 3, mysortedlist[1] =  15 115 215
rank 3, mysortedlist[2] =  11 111 211
rank 3, mysortedlist[3] =   7 107 207
rank 3, mysortedlist[4] =   3 103 203

rank 0, mysortedlist[0] =  16 116 216
rank 0, mysortedlist[1] =  12 112 212
rank 0, mysortedlist[2] =   8 108 208
rank 0, mysortedlist[3] =   4 104 204
rank 0, mysortedlist[4] =   0 100 200

rank 0, sortedlist[0] =  19 119 219
rank 0, sortedlist[1] =  18 118 218
rank 0, sortedlist[2] =  17 117 217
rank 0, sortedlist[3] =  16 116 216
rank 0, sortedlist[4] =  15 115 215

以及代码：

#include <stdio.h>
#include <stdlib.h>

#include <mpi.h>

#define NUMLIST 3 // Number of distinct lists (of integers)
#define N 5       // Length of each list

void mergesortlist(void *vinvec, void *vinoutvec, int *n, MPI_Datatype *type);
void mergelist(int *merge, int *a, int *b, int n);

int main(void)
{
  int i, ilist;

  // local sorted list

  int mysortedlist[NUMLIST][N];

  // global sorted list

  int sortedlist[NUMLIST][N];

  MPI_Comm comm;

  MPI_Datatype MPI_LIST;
  MPI_Op MPI_MERGELIST;

  int size, rank;

  comm = MPI_COMM_WORLD;

  MPI_Init(NULL, NULL);

  MPI_Comm_size(comm, &size);
  MPI_Comm_rank(comm, &rank);

  // Define datatype appropriate for a single array of N integers

  MPI_Type_contiguous(N, MPI_INT, &MPI_LIST);
  MPI_Type_commit(&MPI_LIST);

  // Register new reduction operation to merge two sorted lists

  MPI_Op_create(mergesortlist, 1, &MPI_MERGELIST);

  // Generate sorted lists on each rank

  for (i=0; i < N; i++)
    {
      for (ilist=0; ilist < NUMLIST; ilist++)
      {
        mysortedlist[ilist][i] = rank+size*(N-i-1) + 100*ilist;
        sortedlist[ilist][i] = -1;
      }
    }

  for (i=0; i < N; i++)
   {
     printf("rank %d, mysortedlist[%d] =", rank, i);

     for (ilist=0; ilist < NUMLIST; ilist++)
       {
         printf(" %3d", mysortedlist[ilist][i]);
       }
     printf("\n");
    }

  printf("\n");

  // Perform reduction to rank 0

  MPI_Reduce(mysortedlist, sortedlist, NUMLIST, MPI_LIST, MPI_MERGELIST,
         0, comm);

  if (rank == 0)
    {
      for (i=0; i < N; i++)
      {
        printf("rank %d, sortedlist[%d] =", rank, i);

        for (ilist=0; ilist < NUMLIST; ilist++)
        {
          printf(" %3d", sortedlist[ilist][i]);
        }
        printf("\n");
      }

      printf("\n");
    }

  MPI_Finalize();

  return 0;
}


void mergesortlist(void *vinvec, void *vinoutvec, int *n, MPI_Datatype *type)
{
  MPI_Aint lb, listextent, intextent;

  int i, ilist;
  int nvec, nlist;

  int *invec    = (int *) vinvec;
  int *inoutvec = (int *) vinoutvec;

  // the count is the number of individual lists

  nlist = *n;

  // Infer length of each list from the extents
  // Should really check "type" is valid, i.e. a contiguous block of ints

  MPI_Type_get_extent(MPI_INT, &lb, &intextent);
  MPI_Type_get_extent(*type, &lb, &listextent);

  nvec = listextent/intextent;

  // Need a temporary as "mergelist" does not work in-place

  int *mergevec = (int *) malloc(nvec*sizeof(int));

  // Merge each of the "nlist" lists in turn

  for (ilist=0; ilist < nlist; ilist++)
    {
      mergelist(mergevec, &invec[ilist*nvec], &inoutvec[ilist*nvec], nvec);

      for (i=0; i < nvec; i++)
      {
        inoutvec[ilist*nvec+i] = mergevec[i];
      }
    }

  free(mergevec);
}


void mergelist(int *merge, int *a, int *b, int n)
{
  int i, ia, ib;

  ia = 0;
  ib = 0;

  for (i=0; i < n; i++)
    {
      if (a[ia] > b[ib])
        {
          merge[i] = a[ia];
          ia++;
        }
      else
        {
          merge[i] = b[ib];
          ib++;
        }
    }
}

从MPI中的每个节点列表中收集全局top-k

3 个答案: