我正在用MPI在C语言中做一些矩阵乘法。
我的测试矩阵的格式为:
A(i,j)= 2i + j
B(i,j)= i + 3j
我想使用MPI_Bcast将矩阵A广播到所有级别,并使用MPI_Send,RECV作为矩阵B并计算结果矩阵。
从我注意到的错误来看,似乎是在重新组合结果之间,因为主线程的MPI发送和接收工作正常,但此后立即产生了段错误11。
我以前曾尝试广播A矩阵并发送B矩阵,但是由于段错误也无法正常工作。现在,我尝试同时使用MPI_Send发送它们,并得到以下错误。它说MPI_Send中发生错误,我已经检查了所有内容,但无法解决。我在代码的许多地方添加了printf行,以知道到达的位置。直到
printf("%d\n", a[0][0]);
然后停止。
关于我的评论,我更改了allocMatrix的方式。
更新的代码:
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
//Matrix A's x and y dimensions
#define aX 3
#define aY 2
//Matrix A's i and j values
#define aI 2
#define aJ 1
//Matrix B's x and y dimensions
#define bX 2
#define bY 2
//MatrixB's i and j values
#define bI 1
#define bJ 3
int** allocMatrix(int sizeX, int sizeY);
int** createMatrix(int sizeX, int sizeY, int i, int j);
void printMatrix(int** matrix, int sizeX, int sizeY);
int main (int argc, char *argv[])
{
MPI_Status stat;
int rank, numranks;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &numranks);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int** a = allocMatrix(aX, aY);
int** b;
if (rank == 0) {
a = createMatrix(aX, aY, aI, aJ);
b = createMatrix(bX, bY, bI, bJ);
}
// MPI_Bcast(&a, aX * aY, MPI_INT, 0, MPI_COMM_WORLD);
/* int** bSub;
int *counts = (int *) malloc(bY * sizeof(int));
int *displacements = (int *) malloc(bY * sizeOf(int));
int offset = 0;
for(int y = 0; y < bY; y++){
displacements[i] = offset;
offset += b[y]
}
MPI_Scatterv(b, bX * bY, MPI_INT, bSub,
*/
if (rank == 0){
int rowCount = bY / (numranks - 1);
int displacement = 0;
int i;
for(i = 1; i < numranks; i++){
int** resultSub;
printf("Recieved from rank %d\n", i);
MPI_Recv(&displacement, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
MPI_Recv(&rowCount, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
MPI_Recv(&resultSub, rowCount * bX, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
for(y = displacement; y < displacement + rowCount; y++){
for(x = 0; x < bX; x++){
result[y][x] = resultSub[y - displacement][x];
}
}
}
printf("Result:\n");
for(y = 0; y < aY; y++){
for(int x = 0; x < bX; x++){
printf("%d ", result[y][x]);
}
printf("\n");
}
} else {
int displacement;
int rowCount;
printf("Rank %d:\n", rank);
MPI_Recv(&a, aX* aY, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
printf("Rank %d got a\n", rank);
printf("%d\n", a[0][0]);
MPI_Recv(&displacement, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
printf("Rank %d received displacement\n", rank);
MPI_Recv(&rowCount, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
int** bSub = allocMatrix(aX,rowCount);
printf("Rank %d received rowCount\n", rank);
MPI_Recv(&bSub, rowCount * bX, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
printf("Rank %d received bSub\n", rank);
int** result = allocMatrix(bX, rowCount);
int x, y, j;
printf("Rank %d starting\n", rank);
for(y = 0; y < rowCount; y++){
for(x = 0; x < aX; x++){
int sum = 0;
printf("Rank %d X: %d, Y: %d\n", rank, x, y);
for(j = 0; j < bX; j++){
printf("Rank %d J: %d\n", rank, j);
sum += a[j][x] * bSub[y][j];
}
result[y][x] = sum;
}
}
printf("Rank %d ending\n", rank);
MPI_Send(&displacement, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(&rowCount, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(&result, bX * rowCount, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
// printMatrix(a, aX, aY);
// printMatrix(b, bX, bY);
MPI_Finalize();
}
int** allocMatrix(int sizeX, int sizeY){
/*int** matrix = malloc(sizeY * sizeof(*matrix));
int y;
for(y = 0; y < sizeY; y++){
matrix[y] = malloc(sizeX * sizeof(*matrix[y]));
}
return matrix;*/
int *data = (int *)malloc(sizeX * sizeY * sizeof(int));
int **matrix =(int **)malloc(sizeY * sizeof(int*));
int i;
for(int i = 0; i < sizeY; i++){
matrix[i] = &(data[sizeX * i]);
}
int** createMatrix(int sizeX, int sizeY, int i, int j){
int** matrix = allocMatrix(sizeX, sizeY);
int x, y;
for(y = 0; y < sizeY; y++){
for(x = 0; x < sizeX; x++){
matrix[y][x] = i * x + j * y;
}
}
return matrix;
}
void printMatrix(int** matrix, int sizeX, int sizeY){
int x, y;
for(y = 0; y < sizeY; y++){
for(x = 0; x < sizeX; x++){
printf("%d ", matrix[y][x]);
}
printf("\n");
}
}
我现在收到的错误:
Rank 15:
Rank 1:
Rank 7:
Rank 13:
Rank 16:
Rank 4:
Rank 2:
Rank 8:
Rank 17:
Rank 10:
Rank 6:
Rank 11:
Rank 5:
Rank 12:
Rank 9:
Rank 14:
[zarya:46623] *** Process received signal ***
[zarya:46623] Signal: Segmentation fault (11)
[zarya:46623] Signal code: Address not mapped (1)
[zarya:46623] Failing at address: 0x200000000
Rank 3:
Rank 1 got a
[zarya:46623] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7ff4f72bf390]
[zarya:46623] [ 1] ./mm.cx[0x400f1c]
[zarya:46623] [ 2] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7ff4f6f04830]
[zarya:46623] [ 3] ./mm.cx[0x400979]
[zarya:46623] *** End of error message ***
[winston:06305] *** Process received signal ***
[winston:06305] Signal: Segmentation fault (11)
[winston:06305] Signal code: Address not mapped (1)
[winston:06305] Failing at address: 0x200000000
Rank 2 got a
[winston:06305] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fe4536d1390]
[winston:06305] [ 1] ./mm.cx[0x400f1c]
[winston:06305] [ 2] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fe453316830]
[winston:06305] [ 3] ./mm.cx[0x400979]
[winston:06305] *** End of error message ***
[roadhog:16798] *** Process received signal ***
[roadhog:16798] Signal: Segmentation fault (11)
[roadhog:16798] Signal code: Address not mapped (1)
[roadhog:16798] Failing at address: 0x200000000
Rank 3 got a
[roadhog:16798] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f7216429390]
[roadhog:16798] [ 1] ./mm.cx[0x400f1c]
[roadhog:16798] [ 2] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f721606e830]
[roadhog:16798] [ 3] ./mm.cx[0x400979]
[roadhog:16798] *** End of error message ***
[orisa:32932] *** Process received signal ***
[orisa:32932] Signal: Segmentation fault (11)
[orisa:32932] Signal code: Address not mapped (1)
[orisa:32932] Failing at address: 0x200000000
Rank 4 got a
[orisa:32932] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f18ce4d7390]
[orisa:32932] [ 1] ./mm.cx[0x400f1c]
[orisa:32932] [ 2] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f18ce11c830]
[orisa:32932] [ 3] ./mm.cx[0x400979]
[orisa:32932] *** End of error message ***
[hanzo:43879] *** Process received signal ***
一些随机垃圾。