Question

我想将矩阵分区为块（不是条带），然后使用MPI_Scatter分配这些块。

我提出了有效的解决方案，但我认为它远非“最佳实践”。我有8x8矩阵，填充0到63之间的数字。然后我将它分成4个4x4块，使用MPI_Type_vector并通过MPI_Send分配它，但这需要一些额外的计算，因为我必须计算大矩阵中每个块的偏移量。 / p>

如果我使用散点图，则第一个（左上角）块传输正常，但其他块不是（块的开始错误偏移）。

那么可以使用MPI_Scatter传输矩阵块，或者进行所需分解的最佳方法是什么？

这是我的代码：

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

#define SIZE 8


int main(void) {

        MPI_Init(NULL, NULL);
        int p, rank;
        MPI_Comm_size(MPI_COMM_WORLD, &p);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        char i;

        char a[SIZE*SIZE];
        char b[(SIZE/2)*(SIZE/2)];

        MPI_Datatype columntype;
        MPI_Datatype columntype2;

        MPI_Type_vector(4, 4, SIZE, MPI_CHAR, &columntype2);
        MPI_Type_create_resized( columntype2, 0, sizeof(MPI_CHAR), &columntype );
        MPI_Type_commit(&columntype);

        if(rank == 0) {
                for( i = 0; i < SIZE*SIZE; i++) {
                        a[i] = i;
                }

                for(int rec=0; rec < p; rec++) {
                        int offset = (rec%2)*4 + (rec/2)*32;
                      MPI_Send (a+offset, 1, columntype, rec, 0, MPI_COMM_WORLD);
                }
        }
        MPI_Recv (b, 16, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        //MPI_Scatter(&a, 1, boki, &b, 16, MPI_CHAR , 0, MPI_COMM_WORLD);

        printf("rank= %d  b= \n%d %d %d %d\n%d %d %d %d\n%d %d %d %d\n%d %d %d %d\n", rank, b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]);

        MPI_Finalize();

        return 0;
}

Answer 1

你所拥有的几乎是“最佳实践”;在你习惯它之前，它只是有点混乱。

但有两件事：

首先，要小心：我假设sizeof(MPI_CHAR)是4个字节，而不是1. MPI_CHAR是一个（整数）常量，它描述（到MPI库）一个字符。您可能需要sizeof(char)或SIZE/2*sizeof(char)或其他任何方便的内容。但是调整大小的基本思路是正确的。

其次，我认为你仍然坚持使用MPI_Scatterv，因为没有简单的方法可以使每个块之间的偏移量相同。也就是说，第一个块中的第一个元素位于a[0]，第二个元素位于a[SIZE/2]（大小/ 2的跳跃），下一个位于a[SIZE*(SIZE/2)]（{{1}的跳跃}}）。因此，您需要能够手动生成偏移量。

以下似乎对我有用（当“大小”表示“行数”与“列数”等时，我将其概括一点以使其更清晰）：

(SIZE-1)*(SIZE/2)

运行：

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

#define COLS  12
#define ROWS  8

int main(int argc, char **argv) {

    MPI_Init(&argc, &argv);
    int p, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &p);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    char i;

    char a[ROWS*COLS];
    const int NPROWS=2;  /* number of rows in _decomposition_ */
    const int NPCOLS=3;  /* number of cols in _decomposition_ */
    const int BLOCKROWS = ROWS/NPROWS;  /* number of rows in _block_ */
    const int BLOCKCOLS = COLS/NPCOLS; /* number of cols in _block_ */

    if (rank == 0) {
        for (int ii=0; ii<ROWS*COLS; ii++) {
            a[ii] = (char)ii;
        }
    }

    if (p != NPROWS*NPCOLS) {
        fprintf(stderr,"Error: number of PEs %d != %d x %d\n", p, NPROWS, NPCOLS);
        MPI_Finalize();
        exit(-1);
    }
    char b[BLOCKROWS*BLOCKCOLS];
    for (int ii=0; ii<BLOCKROWS*BLOCKCOLS; ii++) b[ii] = 0;

    MPI_Datatype blocktype;
    MPI_Datatype blocktype2;

    MPI_Type_vector(BLOCKROWS, BLOCKCOLS, COLS, MPI_CHAR, &blocktype2);
    MPI_Type_create_resized( blocktype2, 0, sizeof(char), &blocktype);
    MPI_Type_commit(&blocktype);

    int disps[NPROWS*NPCOLS];
    int counts[NPROWS*NPCOLS];
    for (int ii=0; ii<NPROWS; ii++) {
        for (int jj=0; jj<NPCOLS; jj++) {
            disps[ii*NPCOLS+jj] = ii*COLS*BLOCKROWS+jj*BLOCKCOLS;
            counts [ii*NPCOLS+jj] = 1;
        }
    }

    MPI_Scatterv(a, counts, disps, blocktype, b, BLOCKROWS*BLOCKCOLS, MPI_CHAR, 0, MPI_COMM_WORLD);
    /* each proc prints it's "b" out, in order */
    for (int proc=0; proc<p; proc++) {
        if (proc == rank) {
            printf("Rank = %d\n", rank);
            if (rank == 0) {
                printf("Global matrix: \n");
                for (int ii=0; ii<ROWS; ii++) {
                    for (int jj=0; jj<COLS; jj++) {
                        printf("%3d ",(int)a[ii*COLS+jj]);
                    }
                    printf("\n");
                }
            }
            printf("Local Matrix:\n");
            for (int ii=0; ii<BLOCKROWS; ii++) {
                for (int jj=0; jj<BLOCKCOLS; jj++) {
                    printf("%3d ",(int)b[ii*BLOCKCOLS+jj]);
                }
                printf("\n");
            }
            printf("\n");
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }

    MPI_Finalize();

    return 0;
}

MPI分区矩阵成块

1 个答案: