Question

我正在尝试从主节点处将所有处理器（包括主节点）的不同长度的不同字符串收集到单个字符串（字符数组）中。这是MPI_Gatherv的原型：

int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
            void *recvbuf, const int *recvcounts, const int *displs,
            MPI_Datatype recvtype, int root, MPI_Comm comm)**.

我无法定义一些参数，例如recvbuf，recvcounts和displs。任何人都可以在C中为此提供源代码示例吗？

Answer 1

正如已经指出的，有很多使用MPI_Gatherv的例子，包括堆栈溢出;一个答案开始描述分散和收集工作，然后是散射/聚集变体如何扩展，可以找到here。

至关重要的是，对于更简单的Gather操作，每个块都具有相同的大小，MPI库可以轻松地预先计算每个块应该在最终编译的数组中的位置;在更一般的收集操作中，如果不太清楚，你可以选择 - 事实上，要求 - 准确说明每个项目的起点。

这里唯一的复杂因素是你正在处理字符串，所以你可能不希望所有东西都在一起;你需要为空格添加额外的填充，当然还有末尾的空终结符。

所以，让我们说你有五个想要发送字符串的进程：

Rank 0: "Hello"    (len=5)
Rank 1: "world!"   (len=6)
Rank 2: "Bonjour"  (len=7)
Rank 3: "le"       (len=2)
Rank 4: "monde!"   (len=6)

您希望将其组合成全局字符串：

Hello world! Bonjour le monde!\0
          111111111122222222223
0123456789012345678901234567890

recvcounts={5,6,7,2,6};  /* just the lengths */
displs = {0,6,13,21,24}; /* cumulative sum of len+1 for padding */

您可以看到位移0为0，位移i等于j = 0..i-1的（recvcounts [j] +1）之和：

   i    count[i]   count[i]+1   displ[i]   displ[i]-displ[i-1]
   ------------------------------------------------------------
   0       5          6           0    
   1       6          7           6                 6
   2       7          8          13                 7
   3       2          3          21                 8
   4       6          7          24                 3

这直接实现了：

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "mpi.h"

#define nstrings 5
const char *const strings[nstrings] = {"Hello","world!","Bonjour","le","monde!"};

int main(int argc, char **argv) {

    MPI_Init(&argc, &argv); 

    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    /* Everyone gets a string */    

    int myStringNum = rank % nstrings;
    char *mystring = (char *)strings[myStringNum];
    int mylen = strlen(mystring);

    printf("Rank %d: %s\n", rank, mystring);

    /*
     * Now, we Gather the string lengths to the root process, 
     * so we can create the buffer into which we'll receive the strings
     */

    const int root = 0;
    int *recvcounts = NULL;

    /* Only root has the received data */
    if (rank == root)
        recvcounts = malloc( size * sizeof(int)) ;

    MPI_Gather(&mylen, 1, MPI_INT,
               recvcounts, 1, MPI_INT,
               root, MPI_COMM_WORLD);

    /*
     * Figure out the total length of string, 
     * and displacements for each rank 
     */

    int totlen = 0;
    int *displs = NULL;
    char *totalstring = NULL;

    if (rank == root) {
        displs = malloc( size * sizeof(int) );

        displs[0] = 0;
        totlen += recvcounts[0]+1;

        for (int i=1; i<size; i++) {
           totlen += recvcounts[i]+1;   /* plus one for space or \0 after words */
           displs[i] = displs[i-1] + recvcounts[i-1] + 1;
        }

        /* allocate string, pre-fill with spaces and null terminator */
        totalstring = malloc(totlen * sizeof(char));            
        for (int i=0; i<totlen-1; i++)
            totalstring[i] = ' ';
        totalstring[totlen-1] = '\0';
    }

    /* 
     * Now we have the receive buffer, counts, and displacements, and 
     * can gather the strings 
     */

    MPI_Gatherv(mystring, mylen, MPI_CHAR,
                totalstring, recvcounts, displs, MPI_CHAR,
                root, MPI_COMM_WORLD);


    if (rank == root) {
        printf("%d: <%s>\n", rank, totalstring);
        free(totalstring);
        free(displs);
        free(recvcounts);
    }

    MPI_Finalize();
    return 0;
}

跑步给出：

$ mpicc -o gatherstring gatherstring.c -Wall -std=c99
$ mpirun -np 5 ./gatherstring
Rank 0: Hello
Rank 3: le
Rank 4: monde!
Rank 1: world!
Rank 2: Bonjour
0: <Hello world! Bonjour le monde!>

Answer 2

MPI_Gather+MPI_Gatherv需要计算所有等级的位移，当你的字符串长度几乎相似时，我认为这是不必要的。相反，您可以将MPI_Allreduce+MPI_Gather与填充的字符串接收缓冲区一起使用。填充基于使用MPI_Allreduce计算的最长可用字符串完成。这是代码：

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>

#include <mpi.h>

int main(int argc, char** argv) {
    MPI_Init(NULL, NULL);
    int rank;
    int nranks;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    srand(time(NULL) + rank);
    int my_len =  (rand() % 10) + 1; // str_len   \in [1, 9]
    int my_char = (rand() % 26) + 65; // str_char \in [65, 90] = [A, Z]

    char my_str[my_len + 1];
    memset(my_str, my_char, my_len);
    my_str[my_len] = '\0';
    printf("rank %d of %d has string=%s with size=%zu\n",
            rank, nranks, my_str, strlen(my_str));

    int max_len = 0;
    MPI_Allreduce(&my_len, &max_len, 1, 
                  MPI_INT, MPI_MAX, MPI_COMM_WORLD); 

    // + 1 for taking account of null pointer at the end ['\n']
    char *my_str_padded[max_len + 1]; 
    memset(my_str_padded, '\0', max_len + 1);
    memcpy(my_str_padded, my_str, my_len);

    char *all_str = NULL;
    if(!rank) {
        int all_len = (max_len + 1) * nranks;
        all_str = malloc(all_len * sizeof(char));   
        memset(all_str, '\0', all_len);
    }

    MPI_Gather(my_str_padded, max_len + 1, MPI_CHAR, 
                     all_str, max_len + 1, MPI_CHAR, 0, MPI_COMM_WORLD);

    if(!rank) {
        char *str_idx = all_str;
        int rank_idx = 0;
        while(*str_idx) {
            printf("rank %d sent string=%s with size=%zu\n", 
                    rank_idx, str_idx, strlen(str_idx));
            str_idx = str_idx + max_len + 1;
            rank_idx++;
        }

    }

    MPI_Finalize();
    return(0);  
}

请记住，选择使用置换的MPI_Gather+MPI_Gatherv和有时使用填充的MPI_AllReduce+MPI_Gather之间需要权衡，因为前者需要更多时间来计算位移，而后者需要更多存储来对齐接收缓冲区。

我还使用大字符串缓冲区对这两种方法进行了基准测试，但未发现任何重要的运行时差异。

如何使用MPI_Gatherv从包括主节点在内的不同处理器中收集不同长度的字符串？

2 个答案: