我正在使用MPI在C中进行一些矩阵乘法。 它工作正常,直到我试图超过15x15,我无法弄清楚为什么......
从我注意到的错误似乎主要发生在我看到“Process#sending ...”print之后,当从属进程将数据发送回主进程时就会发生这种情况。
错误讯息:
[LEC-B125N4J:12183] *** Process received signal ***
[LEC-B125N4J:12183] Signal: Segmentation fault (11)
[LEC-B125N4J:12183] Signal code: Address not mapped (1)
代码:
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <mpi.h>
//#define DIM 1000
#define DIM 15
/*
* Statically allocate the matrices to make the rows
* sequentially placed in memory. (This eases the task
* of distributing the problem among the slaves.)
* Make the matrices global to allow for larger
* dimensions.
*/
int A[DIM][DIM];
int B[DIM][DIM];
int C[DIM][DIM];
int D[DIM][DIM];
int correct_result(int A[DIM][DIM], int B[DIM][DIM])
{
int i,j;
for (i=0; i<DIM; ++i)
for (j=0; j<DIM; ++j)
if (A[i][j] != B[i][j])
return 0;
return 1;
}
int main (argc, argv)
int argc;
char *argv[];
{
int rank=0, size;
int i, j, k;
int time1;
volatile int tmp;
int iOffset = 0;
int iProblemSize = 0;
MPI_Init(&argc, &argv); /* starts MPI */
MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* get current process id */
MPI_Comm_size(MPI_COMM_WORLD, &size); /* get number of processes */
iProblemSize = (DIM / (size - 1));
if(rank == 0) { //Master
printf("Number of processes: %d (1 Master and %d slaves) - DIM: %d\n", size, (size - 1), DIM);
//Fill matrices A and B with random numbers
srand(timer(NULL));
for(i=0; i<DIM; ++i)
{
for (j=0; j<DIM; ++j)
{
A[i][j] = random() % 100 - 50;
B[i][j] = random() % 100 - 50;
C[i][j] = 0;
}
}
}
MPI_Bcast(B, (DIM * DIM), MPI_INT, 0, MPI_COMM_WORLD);
if(rank == 0) { //Master
/* Calculate the true answer */
for (i=0; i<DIM; ++i)
for (k=0; k<DIM; ++k)
for (j=0; j<DIM; ++j)
D[i][j] += A[i][k] * B[k][j];
time1 = timer();
//Send pieces of A to the slaves
iOffset = 0;
for(i = 1; i < size; i++) {
MPI_Send(A[iOffset], (iProblemSize * DIM), MPI_INT, i, 0, MPI_COMM_WORLD);
iOffset += iProblemSize;
/*for(j = 0; j < iProblemSize; j++) {
MPI_Send(A[iOffset + j], DIM, MPI_INT, i, 0, MPI_COMM_WORLD);
}
iOffset += iProblemSize;*/
}
//Take care of leftovers if needed (if uneven number of slaves)
if((size - 1) % DIM != 0) {
for(i = iOffset; i < DIM; i++) {
for(k = 0; k < DIM; k++) {
for(j = 0; j < DIM; j++) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
}
//Gather the results from the slaves
iOffset = 0;
for(i = 1; i < size; i++) {
MPI_Recv(C[iOffset], (iProblemSize * DIM), MPI_INT, i, 0, MPI_COMM_WORLD, NULL);
iOffset += iProblemSize;
printf("Received from %d!\n", i);
}
printf("All received!\n");
/* Error checking */
time1 = timer() - time1;
printf ("Your calculation is %scorrect.\n", correct_result(C,D) ? "" : "not ");
printf ("Total runtime: %f seconds\n", time1/1000000.0);
}
else { //Slaves
MPI_Recv(A, (iProblemSize * DIM), MPI_INT, 0, 0, MPI_COMM_WORLD, NULL);
/*for(j = 0; j < iProblemSize; j++) {
MPI_Recv(A[j], DIM, MPI_INT, 0, 0, MPI_COMM_WORLD, NULL);
}*/
//Do the calculations for C
//printf("Process %d doing calculations...\n", rank);
for (i = 0; i < (iProblemSize * DIM); ++i) {
for (k = 0; k < DIM; ++k) {
for (j = 0; j < DIM; ++j) {
C[i][j] += A[i][k] * B[k][j];
}
//printf("\n");
}
}
//printf("Process %d finished doing the calculations!\n", rank);
//Send the result to the master
printf("Process %d sending...\n", rank);
MPI_Send(C, (iProblemSize * DIM), MPI_INT, 0, 0, MPI_COMM_WORLD);
printf("Process %d finished sending!\n", rank);
}
MPI_Finalize();
return 0;
}
答案 0 :(得分:0)
好的,我终于修复了错误。 当奴隶进行计算时,问题出在循环中......
for (i = 0; i < (iProblemSize * DIM); ++i) {
应该是
for (i = 0; i < iProblemSize; ++i) {
:)