使用MPI的Cannon算法

时间:2018-12-09 17:57:42

标签: c mpi ipc matrix-multiplication

我想通过笛卡尔通信器在C语言中使用MPI来实现Cannon算法,笛卡尔通信器使用默认功能进行移位,并通过从2个矩阵发送2维块来实现。

我尝试遵循在线上找到的一些教程,但是我意识到,使用二维块和笛卡尔传播器都没有实现我想要的方法。

编辑:在意识到我以错误的方式使用了proc_grid_size变量后,我设法克服了该错误,将过程矩阵的大小与块大小混淆了,并进入了一些未分配的状态内存区域。

我正在使用25个进程和2个10 * 10矩阵的输入来运行,这些矩阵存储在2个不同的文件中。

我目前正在尝试使用MPI_Cart_Shift函数实现移位操作。但是我不知道如何将块发送给邻居。

这是我当前对该特定部分的实现,该部分不起作用(应用程序只是挂起):

MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
    block_size * block_size, MPI_INT,
    0, MPI_COMM_WORLD);

MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
    block_size * block_size, MPI_INT,
    0, MPI_COMM_WORLD);

int nlocal;
int npes, dims[2], periods[2];
int myrank, my2drank, mycoords[2];
int uprank, downrank, leftrank, rightrank, coords[2];
int shiftsource, shiftdest;
MPI_Status status;
MPI_Comm comm_2d;

// Get the communicator related information
MPI_Comm_size(MPI_COMM_WORLD, &npes);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);

// Set up the Cartesian topology 
dims[0] = dims[1] = proc_matrix_size;//sqrt(npes);

// Set the periods for wraparound connections 
periods[0] = periods[1] = 1;

// Create the Cartesian topology, with rank reordering 
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d);

// Get the rank and coordinates with respect to the new topology
MPI_Comm_rank(comm_2d, &my2drank);
MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);


// Compute ranks of the up and left shifts
// Get line neighbors (direction = 1, displacement = 1)
MPI_Cart_shift(comm_2d, 1, 1, &leftrank, &rightrank);
// Get column neighbors (direction = 0, displacement = 1)
MPI_Cart_shift(comm_2d, 0, 1, &uprank, &downrank);

// Determine the dimension of the local matrix block 
nlocal = block_size;// n / dims[0];
MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(&(a[0][0]), 1, subarrtype,
    shiftdest, 1, shiftsource, 1, comm_2d, &status);
MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(&(b[0][0]), 1, subarrtype,
    shiftdest, 1, shiftsource, 1, comm_2d, &status);

关闭应用程序后,我发现根进程是唯一挂起的进程:

  

F:\ Facultate \ AN_4 \ PDC \ Labs \ MPI \ Cannon \ x64 \ Release> mpiexec -np 25 Cannon.exe   a.txt b.txt> mpiexec正在终止作业...

     

工作中止:   [排名]消息

     

[0]个作业被用户终止

     

[1-24]被终止

     

----错误分析-----

     

[0]在DESKTOP-JB1815M上    Ctrl-c被击中。作业被用户中止。

     

----错误分析-----

原始求解的代码:

int malloc2D(int ***array, int n, int m) {
int i;
/* allocate the n*m contiguous items */
int *p = (int*) calloc(n*m, sizeof(int));
if (!p) return -1;

/* allocate the row pointers into the memory */
(*array) = (int**) calloc(n, sizeof(int*));
if (!(*array)) {
    free(p);
    return -1;
}

/* set up the pointers into the contiguous memory */
for (i = 0; i<n; i++)
    (*array)[i] = &(p[i*m]);

return 0;
}

int free2D(int ***array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));

/* free the pointers into the memory */
free(*array);

return 0;
}

int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);

if (argc != 3) {
    fprintf(stderr, "Not enough arguments passed! Make sure you pass 2 filenames.\n");
    MPI_Abort(MPI_COMM_WORLD, 1);
}

// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);

// Declare file pointers
FILE* fa = NULL;
FILE* fb = NULL;

// Declare matrix pointers
int **A = NULL;
int **B = NULL;
int **C = NULL;

// Declare matrix dimensions
int ma = 0, na = 0;
int mb = 0, nb = 0;

// Nr of processes on each line/column in process mesh
int proc_matrix_size = (int)sqrt(world_size);

// Single value for quadratic matrix size
int n = 0;

// Nr of elements on each line/column in local matrix
// of each process
int block_size = 0;

// Open files and read matrices
if (world_rank == 0)
{
    fa = fopen(argv[1], "r");
    fb = fopen(argv[2], "r");

    // Read matrix dymensions
    fscanf(fa, "%d %d\n", &ma, &na);
    fscanf(fb, "%d %d\n", &mb, &nb);

    // Check if matrices are quadratic
    if ((ma != na) && (na != mb) && (mb != nb))
    {
        printf("Invalid matrices dimensions\n");
        return 0;
    }

    n = na;

    // Check if sqrt(nr_processes) divides matrix dimension 
    if ((n % proc_matrix_size != 0) || (world_size % proc_matrix_size != 0))
    { 
        printf("Number of processes does not fit matrix size\n");
        return 0;
    }

    block_size = n / proc_matrix_size;

    malloc2D(&A, n, n);
    malloc2D(&B, n, n);
    malloc2D(&C, n, n);

    // Read matrices A & B from file
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)
        {
            fscanf(fa, "%d ", &A[i][j]);
            fscanf(fb, "%d ", &B[i][j]);
        }
        fscanf(fa, "\n");
    }

    MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else {
    MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}


/* 
    Divide matrices in blocks and send each block to the corresponding process
*/

// Declare global pointers to matrices
int *globalAptr = NULL;
int *globalBptr = NULL;
int *globalCptr = NULL;

// Declare global return pointers
int *globalA2ptr = NULL;
int *globalB2ptr = NULL;

int **A2 = NULL;
int **B2 = NULL;

// Declare local matrix pointers
int **a = NULL;
int **b = NULL;
int **c = NULL;

malloc2D(&A2, n, n);
malloc2D(&B2, n, n);

if (world_rank == 0)
{
    globalAptr = &(A[0][0]);
    globalBptr = &(B[0][0]);
    globalA2ptr = &(A2[0][0]);
    globalB2ptr = &(B2[0][0]);
    globalCptr = &(C[0][0]);
}

malloc2D(&a, block_size, block_size);
malloc2D(&b, block_size, block_size);
malloc2D(&c, block_size, block_size);

// Sizes of input global matrix
int sizes[2] = { n, n };

// Sizes of each block
int subsizes[2] = { block_size, block_size };

// Begining of current block
int starts[2] = { 0,0 };

// Declare subarray type
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &type);
MPI_Type_create_resized(type, 0, block_size * sizeof(int), &subarrtype);
MPI_Type_commit(&subarrtype);

// Scatter the A and B to all processes
int* sendcounts = (int*)malloc(proc_matrix_size * proc_matrix_size * sizeof(int));
int* displs = (int*)malloc(proc_matrix_size * proc_matrix_size * sizeof(int));

if (world_rank == 0)
{
    for (int i = 0; i < proc_matrix_size * proc_matrix_size; i++)
        sendcounts[i] = 1;
    int disp = 0;
    for (int i = 0; i < proc_matrix_size; i++) {
        for (int j = 0; j < proc_matrix_size; j++) {
            displs[i * proc_matrix_size + j] = disp;
            disp += 1;
        }
        disp += ((n / proc_matrix_size)-1) * proc_matrix_size;
    }
}
MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
    block_size * block_size, MPI_INT,
    0, MPI_COMM_WORLD);

MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
    block_size * block_size, MPI_INT,
    0, MPI_COMM_WORLD);
for (int i = 0; i < block_size; i++) {
    for (int j = 0; j < block_size; j++) {
        a[i][j] = 10 + a[i][j];
        b[i][j] = 10 + b[i][j];
    }
}
// It all goes back to process 0
MPI_Gatherv(&(a[0][0]), block_size * block_size, MPI_INT,
    globalA2ptr, sendcounts, displs, subarrtype,
    0, MPI_COMM_WORLD);

MPI_Gatherv(&(b[0][0]), block_size * block_size, MPI_INT,
    globalB2ptr, sendcounts, displs, subarrtype,
    0, MPI_COMM_WORLD);
MPI_Finalize();

return 0;

}

OLD: 我想提一下,目前,我正在尝试通过默认通信器发送块,并计划在设法发送矩阵块之后实施移位操作和笛卡尔通信器。

我需要有关Scatterv函数的帮助,该函数会引发以下错误:

  

工作中止:[排名]消息

     

[0]致命错误MPI_Scatterv中的致命错误:无效计数,错误   堆栈:MPI_Scatterv(sbuf = 0x0000029262048D40,scnts = 0x00000292620482B0,   displs = 0x0000029262048250,dtype = USER,   rbuf = 0x000002926203ED30,rcount = 25,MPI_INT,root = 0,MPI_COMM_WORLD)   负计数失败,值为-1912594387

     

[1-7]已终止

这是我到目前为止编写的代码:

#include "stdafx.h"
#include "mpi.h"
#include "stdio.h"
#include "stdlib.h"
#include <assert.h>
#include <cstdlib>
#include <math.h> 

int malloc2D(int ***array, int n, int m) {
    int i;
    /* allocate the n*m contiguous items */
    int *p = (int*) malloc(n*m * sizeof(int));
    if (!p) return -1;

    /* allocate the row pointers into the memory */
    (*array) = (int**) malloc(n * sizeof(int*));
    if (!(*array)) {
        free(p);
        return -1;
    }

    /* set up the pointers into the contiguous memory */
    for (i = 0; i<n; i++)
        (*array)[i] = &(p[i*m]);

    return 0;
}

int free2D(int ***array) {
    /* free the memory - the first element of the array is at the start */
    free(&((*array)[0][0]));

    /* free the pointers into the memory */
    free(*array);

    return 0;
}
int main(int argc, char* argv[])
{
    MPI_Init(&argc, &argv);

    if (argc != 3) {
        fprintf(stderr, "Not enough arguments passed! Make sure you pass 2 filenames.\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    // Find out rank, size
    int world_rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    // Declare file pointers
    FILE* fa = NULL;
    FILE* fb = NULL;

    // Declare matrix pointers
    int **A = NULL;
    int **B = NULL;
    int **C = NULL;

    // Declare matrix dymensions
    int ma = 0, na = 0;
    int mb = 0, nb = 0;

    // Nr of processes on each line/column in process mesh
    int proc_grid_size = (int)sqrt(world_size);

    // Single value for quadratic matrix size
    int n = 0;

    // Nr of elements on each line/column in local matrix
    // of each process
    int block_size = 0;

    // Open files and read matrices
    if (world_rank == 0)
    {
        fa = fopen(argv[1], "r");
        fb = fopen(argv[2], "r");

        // Read matrix dymensions
        fscanf(fa, "%d %d\n", &ma, &na);
        fscanf(fb, "%d %d\n", &mb, &nb);

        // Check if matrices are quadratic
        if ((ma != na) && (na != mb) && (mb != nb))
        {
            printf("Invalid matrices dimensions\n");
            return 0;
        }

        n = na;

        // Check if sqrt(nr_processes) divides matrix dimension 
        if ((n % proc_grid_size != 0) || (world_size % proc_grid_size != 0))
        { 
            printf("Number of processes does not fit matrix size\n");
            return 0;
        }

        block_size = n / proc_grid_size;

        // Initialize matrices
        A = (int**)calloc(n, sizeof(int*));
        B = (int**)calloc(n, sizeof(int*));
        //C = (int**)calloc(n, sizeof(int*));
        for (int i = 0; i < n; i++)
        {
            A[i] = (int*)calloc(n, sizeof(int));
            B[i] = (int*)calloc(n, sizeof(int));
            //C[i] = (int*)calloc(n, sizeof(int));
        }

        // Read matrix A from file
        for (int i = 0; i < n; i++)
        {
            for (int j = 0; j < n; j++)
            {
                fscanf(fa, "%d ", &A[i][j]);
                printf("%d ", A[i][j]);
            }
            fscanf(fa, "\n");
            printf("\n");
        }

        // Read matrix B from file
        for (int i = 0; i < n; i++)
        {
            for (int j = 0; j < n; j++)
            {
                fscanf(fb, "%d ", &B[i][j]);
                printf("%d ", B[i][j]);
            }           
            fscanf(fb, "\n");
            printf("\n");
        }

        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
    }
    else {
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
    }


    /* 
        Divide matrices in blocks and send each block to the corresponding process
    */

    // Sizes of input global matrix
    int sizes[2] = { n, n };

    // Sizes of each block
    int subsizes[2] = { block_size, block_size };

    // Begining of current block
    int starts[2] = { 0,0 };        

    // Declare subarray type
    MPI_Datatype type, subarrtype;
    MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &type);
    MPI_Type_create_resized(type, 0, block_size * sizeof(int), &subarrtype);
    MPI_Type_commit(&subarrtype);

    // Declare global pointers to matrices
    int *globalAptr = NULL;
    int *globalBptr = NULL;

    int **A2 = NULL;
    int **B2 = NULL;

    malloc2D(&A2, n, n);
    malloc2D(&B2, n, n);

    // Declare global return pointers
    int *globalA2ptr = NULL;
    int *globalB2ptr = NULL;

    if (world_rank == 0)
    {
        globalAptr = &(A[0][0]);
        globalBptr = &(B[0][0]);
        globalA2ptr = &(A2[0][0]);
        globalB2ptr = &(B2[0][0]);
    }

    // Declare local matrix pointers
    int **a = NULL;
    int **b = NULL;

    malloc2D(&a, block_size, block_size);
    malloc2D(&b, block_size, block_size);

    // Scatter the A and B to all processes
    int* sendcounts = (int*)malloc(proc_grid_size * proc_grid_size * sizeof(int));
    int* displs = (int*)malloc(proc_grid_size * proc_grid_size * sizeof(int));

    if (world_rank == 0) 
    {
        for (int i = 0; i < proc_grid_size * proc_grid_size; i++) 
            sendcounts[i] = 1;
        int disp = 0;
        for (int i = 0; i < proc_grid_size; i++) {
            for (int j = 0; j < proc_grid_size; j++) {
                displs[i * proc_grid_size + j] = disp;
                disp += 1;
            }
            disp += ((block_size) - 1) * proc_grid_size;
        }

        for (int i = 0; i < proc_grid_size * proc_grid_size; i++)
        {
            printf("Send cound: %d\n", sendcounts[i]);
        }

    }
    MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
        block_size * block_size, MPI_INT,
        0, MPI_COMM_WORLD);

    MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
        block_size * block_size, MPI_INT,
        0, MPI_COMM_WORLD);
// Now each processor has its local array, and can process it 
    for (int i = 0; i < block_size; i++) {
        for (int j = 0; j < block_size; j++) {
            a[i][j] = 10 + a[i][j];
            b[i][j] = 10 + b[i][j];
        }
    }

    // It all goes back to process 0
    MPI_Gatherv(&(a[0][0]), block_size * block_size, MPI_INT,
        globalA2ptr, sendcounts, displs, subarrtype,
        0, MPI_COMM_WORLD);

    MPI_Gatherv(&(b[0][0]), block_size * block_size, MPI_INT,
        globalB2ptr, sendcounts, displs, subarrtype,
        0, MPI_COMM_WORLD);
    } 


    MPI_Finalize();

    return 0;

}

非常感谢您!

0 个答案:

没有答案