我正在我的群集上运行乘法矩阵代码,我确信它的工作正常。我甚至使用了Scatterv并且它完美地分散......但是我在每个进程上动态分配了我的结果矩阵,其大小为sendcount [world_rank]行和z列,我检查了这些数字对于点是正确的...只分配了一个进程数组和其余的只是打印一个地址或其他东西。
我的代码在这里执行矩阵乘法:
count = 0;
while(count < z){
for (i = 0; i < sendcount[world_rank]; i++){
for(j = 0; j < y; j++){
//printf("matrix1[%d][%d] * matrix2[%d][%d] = ", i, j, j, count);
resultmatrix[i][count] += matrix1[i][j] * matrix2[j][count];
//printf("%d ", resultmatrix[i][count]);
}
//printf("rank: %d, i: %d count: %d resultmatrix: %d\n", world_rank, i, count, resultmatrix[i][count]);
}
count++;
//printf("\n");
}
当我在每个进程中打印结果矩阵进行测试时,只有我的根进程打印实际值... 这可能是我的群集中的一个问题,还是我错过了什么?
以下是我的全部代码:
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char* argv[]){
int world_size, world_rank, i, j, tag = 777;
int root = 0;
int count = 0;
int x = atoi(argv[1]);
int y = atoi(argv[2]);
int z = atoi(argv[3]);
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Status status;
int temp1_size = x*y;
int temp2_size = y*z;
//scatter v info
int sum = 0;
int rem1 = x%world_size;
int** matrix1;
int** matrix2;
int** resultmatrix;
int* tempM1;
int* tempM2;
int* recM1;
int* sendcount = (int*)malloc(sizeof(int) * world_size);
int* displs = (int*)malloc(sizeof(int) * world_size);
for(i = 0; i < world_size; i++){
sendcount[i] = (x/world_size)*y;
if(rem1 > 0){
sendcount[i] += y;
rem1--;
}
displs[i] = sum;
sum += sendcount[i];
}
//to send my arrays over... first by bcast other by scatter...
tempM1 = (int*)malloc(sizeof(int) * temp1_size);
tempM2 = (int*)malloc(sizeof(int) * temp2_size);
// first is for 1d other is for matrix1;
//the array that will hold out scattered matrix1... 1d array...
recM1 = (int*)malloc(sizeof(int) * 500);
// matrix one at every process after scatter...
if (world_rank == root){
FILE* Matrix1;
FILE* Matrix2;
Matrix1 = fopen("Matrix1.txt", "r");
Matrix2 = fopen("Matrix2.txt", "r");
int count = 0;
while (count < temp1_size){
fscanf(Matrix1, "%d", &tempM1[count]);
count++;
}
//printf("count: %d, temp1_size: %d\n", count, temp1_size);
count = 0;
while(count < temp2_size){
fscanf(Matrix2, "%d", &tempM2[count]);
count++;
}
fclose(Matrix1);
fclose(Matrix2);
/*for(i = 0; i < world_size; i++)
printf("sendcount: %d, displs: %d\n", sendcount[i], displs[i]);*/
}
MPI_Bcast(tempM2, temp2_size, MPI_INT, root, MPI_COMM_WORLD);
MPI_Scatterv(tempM1, sendcount, displs, MPI_INT, recM1, 500, MPI_INT, root, MPI_COMM_WORLD);
for(i = 0; i < world_size; i++){
sendcount[i] /= y;
//printf("%d at i: %d\n", sendcount[i], i);
}
matrix1 = (int**)malloc(sizeof(int*) * sendcount[world_rank]);
for(i = 0; i < sendcount[world_rank]; i++){
matrix1[i] = (int*)malloc(sizeof(int) * y);
}
printf("%d\n", matrix1[0][0]);
// allocating 2d array which is my matrix 2...
matrix2 = (int**)malloc(sizeof(int*) * y);
for(i = 0; i < y; i++){
matrix2[i] = (int*)malloc(sizeof(int) * z);
}
printf("%d\n", matrix2[0][0]);
// my result matrix which consists of my sendcount[world_rank] and z :)
resultmatrix = (int**)malloc(sizeof(int*) * x);
for(i = 0; i < x; i++){
resultmatrix[i] = (int*)malloc(sizeof(int) * z);
}
printf("%d\n", resultmatrix[0][0]);
count = 0;
for(i = 0; i < sendcount[world_rank]; i++){
for(j = 0; j < y; j++){
matrix1[i][j] = recM1[count];
count++;
//printf("%d ", matrix1[i][j]);
}
//printf("\n");
//printf("\n");
}
//printf("\n");
free(recM1);
count = 0;
for(i = 0; i < y; i++){
for(j = 0; j < z; j++){
matrix2[i][j] = tempM2[count];
count++;
//printf("%d ", matrix2[i][j]);
}
//printf("\n");
}
//printf("\n");
free(tempM2);
free(tempM1);
//printf("%d\n", resultmatrix[0][0]);
count = 0;
while(count < z){
for (i = 0; i < sendcount[world_rank]; i++){
for(j = 0; j < y; j++){
//printf("matrix1[%d][%d] * matrix2[%d][%d] = ", i, j, j, count);
resultmatrix[i][count] += matrix1[i][j] * matrix2[j][count];
//printf("%d ", resultmatrix[i][count]);
}
//printf("rank: %d, i: %d count: %d resultmatrix: %d\n", world_rank, i, count, resultmatrix[i][count]);
}
count++;
//printf("\n");
}
/*for(i = 0; i < sendcount[world_rank]; i++){
for(j = 0; j < z; j++){
printf("%d ", resultmatrix[i][j]);
}
printf("\n");
} */
//int khara = world_rank * scattered_rows;
//while(i < scattered_rows){
/*if(world_rank!=0){
for (i = 0; i < sendcount[world_rank]; i++){
MPI_Send(resultmatrix[i], z, MPI_INT, root, tag, MPI_COMM_WORLD);
}
}*/
//MPI_Gather(resultmatrix, z*scattered_rows, MPI_INT, global_result, x * z, MPI_INT, root, MPI_COMM_WORLD);
//i++;
//khara++;
//}
/*if(world_rank == root){
int rank = 1;
for(j = 0; j < sendcount[world_rank]; j++){
global_result[j] = resultmatrix[j];
}
count = sendcount[world_rank];
while(rank < world_size){
for(i = 0; i < sendcount[rank]; i++){
MPI_Recv(global_result[count++], z, MPI_INT, rank, tag, MPI_COMM_WORLD, &status);
}
rank++;
}*/
/*while (rank < world_size){
for(i = scattered_rows i = 0; i < sendcount; i += scattered_rows){
rank++;
for(j = 0; j < scattered_rows; j++){
printf("hello!!!\n");
MPI_Recv(global_result[count++], z, MPI_INT, rank, tag, MPI_COMM_WORLD, &status);
}
}*/
/*for(i = 0; i < x; i++){
printf("\n");
for(j = 0; j < z; j++){
printf("%d ", global_result[i][j]);
}
}*/
//printf("\nx = %d, y = %d, z= %d", x,y,z);
//}
free(matrix1);
free(matrix2);
MPI_Finalize();
return 0;
}
答案 0 :(得分:0)
您为resultmatrix[i]
分配内存,但不进行初始化。这意味着它的内容将是 indeterminate ,并且当您在语句中使用数据时
resultmatrix[i][count] += ...;
您将阅读并修改该不确定数据,您将获得undefined behavior。
在分配循环中,您可以使用例如memset
初始化已分配的内存:
for(i = 0; i < x; i++){
resultmatrix[i] = malloc(sizeof(int) * z);
memset(resultmatrix[i], 0, sizeof(int) * z);
}
[请注意,我删除了malloc
,in C you should not do that]