我尝试使用OpenMPI和C计算NxN矩阵乘法。除了MPI_Bcast()
之外,所有内容都按预期运行。据我了解,MASTER必须向其余的WORKER流程广播matrix_2
。同时,当WORKERS到达MPI_Bcast()
时,他们应该在那里等待,直到选定的进程(在这种情况下为MASTER)进行广播。
我得到的错误是分段错误和地址未映射,所以它肯定与矩阵的动态分配有关。我所做的是将matrix_1
的部分发送到每个进程,然后每个进程对之前广播的matrix_2
进行部分乘法和加法。
我知道错误必须在MPI_Bcast()
上,因为当我评论它时程序正确完成(但显然没有计算产品)。必须有一些我不知道的东西。我留下了我得到的代码和错误消息。谢谢你提前。
CODE
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
/* MACROS */
#define MASTER_TO_SLAVE_TAG 1
#define SLAVE_TO_MASTER_TAG 4
#define MASTER 0
#define WORKER 1
int *matrix_1;
int *matrix_2;
int *result;
double start_time;
double end_time;
int procID;
int numProc;
int size, numRows, from, to;
int i,j,k;
MPI_Status status;
MPI_Request request;
void addressMatrixMemory(int);
int main(int argc, char *argv[]){
size = atoi(argv[1]);
MPI_Init (&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &procID);
MPI_Comm_size(MPI_COMM_WORLD, &numProc);
addressMatrixMemory(size);
/* MASTER starts. */
if(procID == MASTER){
start_time = MPI_Wtime();
for(i = 1; i < numProc; i++){
numRows = size/(numProc - 1);
from = (i - 1) * numRows;
if(((i + 1) == numProc) && ((size % (numProc - 1))) != 0){
to = size;
} else {
to = from + numRows;
}
MPI_Isend(&from, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &request);
MPI_Isend(&to, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &request);
MPI_Isend(matrix_1, (to - from) * size, MPI_INT, i, MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &request);
}
}
MPI_Bcast(&matrix_2, size * size, MPI_INT, MASTER, MPI_COMM_WORLD);
/* WORKERS task */
if(procID >= WORKER){
int row, col;
int *matrix = malloc(sizeof(matrix_1[0])*size*size);
MPI_Recv(&from, 1, MPI_INT, MASTER, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &status);
MPI_Recv(&to, 1, MPI_INT, MASTER, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &status);
MPI_Recv(matrix, (to - from) * size, MPI_INT, MASTER, MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &status);
for(row = from; row < to; row++){
for(col = 0; col < size; col++){
result[row * size + col] = 0;
for(k = 0; k < size; k++);
result[row * size + col] += matrix[row * size + k] * matrix_2[k * size + col];
}
}
MPI_Isend(&from, 1, MPI_INT, MASTER, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &request);
MPI_Isend(&to, 1, MPI_INT, MASTER, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &request);
MPI_Isend(&result[from], (to - from) * size, MPI_INT, MASTER, SLAVE_TO_MASTER_TAG + 2, MPI_COMM_WORLD, &request);
}
/* MASTER gathers WORKERS job. */
if(procID == MASTER){
for(i = 1; i < numProc; i++){
MPI_Recv(&from, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &status);
MPI_Recv(&to, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &status);
MPI_Recv(&result[from], (to - from) * size, MPI_INT, i, SLAVE_TO_MASTER_TAG + 2, MPI_COMM_WORLD, &status);
}
end_time = MPI_Wtime();
printf("\nRunning Time = %f\n\n", end_time - start_time);
}
MPI_Finalize();
free(matrix_1);
free(matrix_2);
free(result);
return EXIT_SUCCESS;
}
void addressMatrixMemory(int n){
matrix_1 = malloc(sizeof(matrix_1[0])*n*n);
matrix_2 = malloc(sizeof(matrix_2[0])*n*n);
result = malloc(sizeof(result[0])*n*n);
/* Matrix init with values between 1 y 100. */
srand(time(NULL));
int r = rand() % 100 + 1;
int i;
for(i = 0; i < n*n; i++){
matrix_1[i] = r;
r = rand() % 100 + 1;
matrix_2[i] = r;
r = rand() % 100 + 1;
}
}
错误消息
[tuliansPC:28270] *** Process received signal ***
[tuliansPC:28270] Signal: Segmentation fault (11)
[tuliansPC:28270] Signal code: Address not mapped (1)
[tuliansPC:28270] Failing at address: 0x603680
[tuliansPC:28270] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x10340) [0x7f0a98ce0340]
[tuliansPC:28270] [ 1] /lib/x86_64-linux-gnu/libc.so.6(+0x97ffe) [0x7f0a9899fffe]
[tuliansPC:28270] [ 2] /usr/lib/libmpi.so.1(opal_convertor_pack+0x129) [0x7f0a98fef779]
[tuliansPC:28270] [ 3] /usr/lib/openmpi/lib/openmpi/mca_btl_sm.so(mca_btl_sm_prepare_src+0x1fd) [0x7f0a923c385d]
[tuliansPC:28270] [ 4] /usr/lib/openmpi/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_send_request_start_rndv+0x1dc) [0x7f0a93245c9c]
[tuliansPC:28270] [ 5] /usr/lib/openmpi/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_isend+0x8ec) [0x7f0a9323856c]
[tuliansPC:28270] [ 6] /usr/lib/openmpi/lib/openmpi/mca_coll_tuned.so(ompi_coll_tuned_bcast_intra_generic+0x3fc) [0x7f0a914f49fc]
[tuliansPC:28270] [ 7] /usr/lib/openmpi/lib/openmpi/mca_coll_tuned.so(ompi_coll_tuned_bcast_intra_pipeline+0xbc) [0x7f0a914f4d5c]
[tuliansPC:28270] [ 8] /usr/lib/openmpi/lib/openmpi/mca_coll_tuned.so(ompi_coll_tuned_bcast_intra_dec_fixed+0x134) [0x7f0a914ec7a4]
[tuliansPC:28270] [ 9] /usr/lib/openmpi/lib/openmpi/mca_coll_sync.so(mca_coll_sync_bcast+0x64) [0x7f0a917096a4]
[tuliansPC:28270] [10] /usr/lib/libmpi.so.1(MPI_Bcast+0x13d) [0x7f0a98f5678d]
[tuliansPC:28270] [11] ej5Exec() [0x400e8c]
[tuliansPC:28270] [12] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5) [0x7f0a98929ec5]
[tuliansPC:28270] [13] ej5Exec() [0x400ac9]
[tuliansPC:28270] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 28270 on node tuliansPC exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
答案 0 :(得分:0)
让我们从跳出的第一个问题开始吧。您未正确使用非阻止通信。 piecesArray[0].sym.scaleX = 232/piecesArray[0].sym.getBounds().width
piecesArray[0].sym.scaleY = 150/piecesArray[0].sym.getBounds().height
model = new createjs.Bitmap(piecesArray[0].sym);
是一种非阻止发送功能,这意味着当您致电MPI_Isend
时,您所做的一切就是告诉MPI您将来某个时候要发送的消息。它可能会在当时发送,也可能不会发送。为了保证数据实际发送,您需要使用MPI_Isend
之类的内容完成通话。通常当人们使用非阻止通话(MPI_Wait
)时,他们不会将它们混合阻止呼叫(MPI_Isend
)。如果您使用所有非阻止调用,则可以使用单个函数MPI_Recv
完成所有这些调用。
首先尝试解决这些问题并查看是否能解决您的问题。仅仅因为你评论了集体,并不意味着其他问题都没有。由于这种奇怪的行为,MPI程序很难调试。