分段错误-使用MPI Send / Recv在C中进行矩阵乘法

时间:2018-07-30 23:57:24

标签: c parallel-processing mpi matrix-multiplication

我正在用MPI在C语言中做一些矩阵乘法。

我的测试矩阵的格式为:

A(i,j)= 2i + j

B(i,j)= i + 3j

我想使用MPI_Bcast将矩阵A广播到所有级别,并使用MPI_Send,RECV作为矩阵B并计算结果矩阵。

从我注意到的错误来看,似乎是在重新组合结果之间,因为主线程的MPI发送和接收工作正常,但此后立即产生了段错误11。

我以前曾尝试广播A矩阵并发送B矩阵,但是由于段错误也无法正常工作。现在,我尝试同时使用MPI_Send发送它们,并得到以下错误。它说MPI_Send中发生错误,我已经检查了所有内容,但无法解决。我在代码的许多地方添加了printf行,以知道到达的位置。直到 printf("%d\n", a[0][0]); 然后停止。

关于我的评论,我更改了allocMatrix的方式。

更新的代码:

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>

//Matrix A's x and y dimensions
#define aX 3
#define aY 2

//Matrix A's i and j values
#define aI 2
#define aJ 1

//Matrix B's x and y dimensions
#define bX 2
#define bY 2

//MatrixB's i and j values
#define bI 1
#define bJ 3


int** allocMatrix(int sizeX, int sizeY);
int** createMatrix(int sizeX, int sizeY, int i, int j);
void printMatrix(int** matrix, int sizeX, int sizeY);

int main (int argc, char *argv[])
{
    MPI_Status stat;
    int rank, numranks;
    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numranks);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);


    int** a = allocMatrix(aX, aY);
    int** b;
    if (rank == 0) {
        a = createMatrix(aX, aY, aI, aJ);
        b = createMatrix(bX, bY, bI, bJ);
    }
//  MPI_Bcast(&a, aX * aY, MPI_INT, 0, MPI_COMM_WORLD);

/*  int** bSub;
    int *counts = (int *) malloc(bY * sizeof(int));
    int *displacements = (int *) malloc(bY * sizeOf(int));
    int offset = 0;
    for(int y = 0; y < bY; y++){
        displacements[i] = offset;
        offset += b[y]
    }
    MPI_Scatterv(b, bX * bY, MPI_INT, bSub,  
*/

    if (rank == 0){
        int rowCount = bY / (numranks - 1);
        int displacement = 0;
        int i;
        for(i = 1; i < numranks; i++){
            int** resultSub;
            printf("Recieved from rank %d\n", i);

            MPI_Recv(&displacement, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
            MPI_Recv(&rowCount, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
            MPI_Recv(&resultSub, rowCount * bX, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
            for(y = displacement; y < displacement + rowCount; y++){
                for(x = 0; x < bX; x++){
                    result[y][x] = resultSub[y - displacement][x];
                }
            }
        }
        printf("Result:\n");

        for(y = 0; y < aY; y++){
            for(int x = 0; x < bX; x++){
                printf("%d ", result[y][x]);
            }
            printf("\n");
        }
    } else {
        int displacement;
        int rowCount;
        printf("Rank %d:\n", rank);
        MPI_Recv(&a, aX* aY, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
        printf("Rank %d got a\n", rank);
        printf("%d\n", a[0][0]);
        MPI_Recv(&displacement, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
        printf("Rank %d received displacement\n", rank);
        MPI_Recv(&rowCount, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
        int** bSub = allocMatrix(aX,rowCount);
        printf("Rank %d received rowCount\n", rank);
        MPI_Recv(&bSub, rowCount * bX, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
        printf("Rank %d received bSub\n", rank);
        int** result = allocMatrix(bX, rowCount);
        int x, y, j;
        printf("Rank %d starting\n", rank);
        for(y = 0; y < rowCount; y++){
            for(x = 0; x < aX; x++){
                int sum = 0;
                printf("Rank %d X: %d, Y: %d\n", rank, x, y);
                 for(j = 0; j < bX; j++){
                    printf("Rank %d J: %d\n", rank, j);
                    sum += a[j][x] * bSub[y][j];
                }
                result[y][x] = sum;     
            }
        }
        printf("Rank %d ending\n", rank);
        MPI_Send(&displacement, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
        MPI_Send(&rowCount, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
        MPI_Send(&result, bX * rowCount, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }
//  printMatrix(a, aX, aY);
//  printMatrix(b, bX, bY); 
    MPI_Finalize();
}

int** allocMatrix(int sizeX, int sizeY){
    /*int** matrix = malloc(sizeY * sizeof(*matrix));
    int y;
    for(y = 0; y < sizeY; y++){
        matrix[y] = malloc(sizeX * sizeof(*matrix[y]));
    }
    return matrix;*/
    int *data = (int *)malloc(sizeX * sizeY * sizeof(int));
    int **matrix =(int **)malloc(sizeY * sizeof(int*));
    int i;
    for(int i = 0; i < sizeY; i++){
        matrix[i] = &(data[sizeX * i]);
    }
int** createMatrix(int sizeX, int sizeY, int i, int j){
    int** matrix = allocMatrix(sizeX, sizeY);
    int x, y;
    for(y = 0; y < sizeY; y++){
        for(x = 0; x < sizeX; x++){
            matrix[y][x] = i * x + j * y;
        }
    }   
    return matrix;
}

void printMatrix(int** matrix, int sizeX, int sizeY){
        int x, y;
    for(y = 0; y < sizeY; y++){
                for(x = 0; x < sizeX; x++){
                        printf("%d ", matrix[y][x]);
                }
                printf("\n");
        }
}

我现在收到的错误:

Rank 15:
Rank 1:
Rank 7:
Rank 13:
Rank 16:
Rank 4:
Rank 2:
Rank 8:
Rank 17:
Rank 10:
Rank 6:
Rank 11:
Rank 5:
Rank 12:
Rank 9:
Rank 14:
[zarya:46623] *** Process received signal ***
[zarya:46623] Signal: Segmentation fault (11)
[zarya:46623] Signal code: Address not mapped (1)
[zarya:46623] Failing at address: 0x200000000
Rank 3:
Rank 1 got a
[zarya:46623] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7ff4f72bf390]
[zarya:46623] [ 1] ./mm.cx[0x400f1c]
[zarya:46623] [ 2] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7ff4f6f04830]
[zarya:46623] [ 3] ./mm.cx[0x400979]
[zarya:46623] *** End of error message ***
[winston:06305] *** Process received signal ***
[winston:06305] Signal: Segmentation fault (11)
[winston:06305] Signal code: Address not mapped (1)
[winston:06305] Failing at address: 0x200000000
Rank 2 got a
[winston:06305] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fe4536d1390]
[winston:06305] [ 1] ./mm.cx[0x400f1c]
[winston:06305] [ 2] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fe453316830]
[winston:06305] [ 3] ./mm.cx[0x400979]
[winston:06305] *** End of error message ***
[roadhog:16798] *** Process received signal ***
[roadhog:16798] Signal: Segmentation fault (11)
[roadhog:16798] Signal code: Address not mapped (1)
[roadhog:16798] Failing at address: 0x200000000
Rank 3 got a
[roadhog:16798] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f7216429390]
[roadhog:16798] [ 1] ./mm.cx[0x400f1c]
[roadhog:16798] [ 2] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f721606e830]
[roadhog:16798] [ 3] ./mm.cx[0x400979]
[roadhog:16798] *** End of error message ***
[orisa:32932] *** Process received signal ***
[orisa:32932] Signal: Segmentation fault (11)
[orisa:32932] Signal code: Address not mapped (1)
[orisa:32932] Failing at address: 0x200000000
Rank 4 got a
[orisa:32932] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f18ce4d7390]
[orisa:32932] [ 1] ./mm.cx[0x400f1c]
[orisa:32932] [ 2] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f18ce11c830]
[orisa:32932] [ 3] ./mm.cx[0x400979]
[orisa:32932] *** End of error message ***
[hanzo:43879] *** Process received signal ***

一些随机垃圾。

0 个答案:

没有答案