Question

我正在我的群集上运行乘法矩阵代码，我确信它的工作正常。我甚至使用了Scatterv并且它完美地分散......但是我在每个进程上动态分配了我的结果矩阵，其大小为sendcount [world_rank]行和z列，我检查了这些数字对于点是正确的...只分配了一个进程数组和其余的只是打印一个地址或其他东西。

我的代码在这里执行矩阵乘法：

count = 0;
while(count < z){

    for (i = 0; i < sendcount[world_rank]; i++){

        for(j = 0; j < y; j++){
            //printf("matrix1[%d][%d] * matrix2[%d][%d] = ", i, j, j, count);
            resultmatrix[i][count] += matrix1[i][j] * matrix2[j][count];
            //printf("%d ", resultmatrix[i][count]);

        }
        //printf("rank: %d, i: %d count: %d resultmatrix: %d\n", world_rank, i, count, resultmatrix[i][count]);

    }
    count++;
    //printf("\n");
}

当我在每个进程中打印结果矩阵进行测试时，只有我的根进程打印实际值... 这可能是我的群集中的一个问题，还是我错过了什么？

以下是我的全部代码：

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char* argv[]){

int world_size, world_rank, i, j, tag = 777;
int root = 0;
int count = 0;

int x = atoi(argv[1]);
int y = atoi(argv[2]);
int z = atoi(argv[3]);


MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Status status;
int temp1_size = x*y;
int temp2_size = y*z;
//scatter v info
int sum = 0;
int rem1 = x%world_size;
int** matrix1;
int** matrix2;
int** resultmatrix;
int* tempM1;
int* tempM2;
int* recM1;
int* sendcount = (int*)malloc(sizeof(int) * world_size);
int* displs = (int*)malloc(sizeof(int) * world_size);

for(i = 0; i < world_size; i++){

    sendcount[i] = (x/world_size)*y;
    if(rem1 > 0){
        sendcount[i] += y;
        rem1--;
    }
    displs[i] = sum;
    sum += sendcount[i];
}
//to send my arrays over... first by bcast other by scatter...
tempM1 = (int*)malloc(sizeof(int) * temp1_size);
tempM2 = (int*)malloc(sizeof(int) * temp2_size);
// first is for 1d other is for matrix1;
//the array that will hold out scattered matrix1... 1d array...
recM1 = (int*)malloc(sizeof(int) * 500);
// matrix one at every process after scatter...




if (world_rank == root){

    FILE* Matrix1;
    FILE* Matrix2;

    Matrix1 = fopen("Matrix1.txt", "r");
    Matrix2 = fopen("Matrix2.txt", "r");
    int count = 0;
    while (count < temp1_size){
        fscanf(Matrix1, "%d", &tempM1[count]);
        count++;
    }
    //printf("count: %d, temp1_size: %d\n", count, temp1_size);
    count = 0;
    while(count < temp2_size){
        fscanf(Matrix2, "%d", &tempM2[count]);
        count++;
    }
    fclose(Matrix1);
    fclose(Matrix2);

    /*for(i = 0; i < world_size; i++)
        printf("sendcount: %d, displs: %d\n", sendcount[i], displs[i]);*/

}
MPI_Bcast(tempM2, temp2_size, MPI_INT, root, MPI_COMM_WORLD);
MPI_Scatterv(tempM1, sendcount, displs, MPI_INT, recM1, 500, MPI_INT, root, MPI_COMM_WORLD);

for(i = 0; i < world_size; i++){
    sendcount[i] /= y;
    //printf("%d at i: %d\n", sendcount[i], i);
}

matrix1 = (int**)malloc(sizeof(int*) * sendcount[world_rank]);
for(i = 0; i < sendcount[world_rank]; i++){
    matrix1[i] = (int*)malloc(sizeof(int) * y);
}
printf("%d\n", matrix1[0][0]);
// allocating 2d array which is my matrix 2...
matrix2 = (int**)malloc(sizeof(int*) * y);
for(i = 0; i < y; i++){
    matrix2[i] = (int*)malloc(sizeof(int) * z);
}
printf("%d\n", matrix2[0][0]);
// my result matrix which consists of my sendcount[world_rank] and z :)
resultmatrix = (int**)malloc(sizeof(int*) * x);
for(i = 0; i < x; i++){
    resultmatrix[i] = (int*)malloc(sizeof(int) * z);
}
printf("%d\n", resultmatrix[0][0]);

count = 0;
for(i = 0; i < sendcount[world_rank]; i++){
    for(j = 0; j < y; j++){
        matrix1[i][j] = recM1[count];
        count++;
        //printf("%d ", matrix1[i][j]);
    }
    //printf("\n");
    //printf("\n");
}
//printf("\n");
free(recM1);
count = 0;
for(i = 0; i < y; i++){
    for(j = 0; j < z; j++){
        matrix2[i][j] = tempM2[count];
        count++;
        //printf("%d ", matrix2[i][j]);
    }
    //printf("\n");
}
//printf("\n");
free(tempM2);
free(tempM1);
//printf("%d\n", resultmatrix[0][0]);
count = 0;
while(count < z){

    for (i = 0; i < sendcount[world_rank]; i++){

        for(j = 0; j < y; j++){
            //printf("matrix1[%d][%d] * matrix2[%d][%d] = ", i, j, j, count);
            resultmatrix[i][count] += matrix1[i][j] * matrix2[j][count];
            //printf("%d ", resultmatrix[i][count]);

        }
        //printf("rank: %d, i: %d count: %d resultmatrix: %d\n", world_rank, i, count, resultmatrix[i][count]);

    }
    count++;
    //printf("\n");
}
/*for(i = 0; i < sendcount[world_rank]; i++){
    for(j = 0; j < z; j++){
        printf("%d ", resultmatrix[i][j]);
    }
    printf("\n");
}   */
    //int khara = world_rank * scattered_rows;
    //while(i < scattered_rows){
    /*if(world_rank!=0){
        for (i = 0; i < sendcount[world_rank]; i++){
            MPI_Send(resultmatrix[i], z, MPI_INT, root, tag, MPI_COMM_WORLD);
        }
    }*/
        //MPI_Gather(resultmatrix, z*scattered_rows, MPI_INT, global_result, x * z, MPI_INT, root, MPI_COMM_WORLD);
        //i++;
        //khara++;
    //}

/*if(world_rank == root){
    int rank = 1;
    for(j = 0; j < sendcount[world_rank]; j++){
            global_result[j] = resultmatrix[j];
        }
    count = sendcount[world_rank];
    while(rank < world_size){
        for(i = 0; i < sendcount[rank]; i++){
            MPI_Recv(global_result[count++], z, MPI_INT, rank, tag, MPI_COMM_WORLD, &status);

        }
        rank++;
    }*/

    /*while (rank < world_size){
    for(i = scattered_rows i = 0; i < sendcount; i += scattered_rows){
        rank++;
        for(j = 0; j < scattered_rows; j++){
            printf("hello!!!\n");
            MPI_Recv(global_result[count++], z, MPI_INT, rank, tag, MPI_COMM_WORLD, &status);
        }
    }*/


    /*for(i = 0; i < x; i++){
        printf("\n");
        for(j = 0; j < z; j++){
            printf("%d ", global_result[i][j]);
        }
    }*/
    //printf("\nx = %d, y = %d, z= %d", x,y,z);

//}
free(matrix1);
free(matrix2);
MPI_Finalize();
return 0;
}

Answer 1

您为resultmatrix[i]分配内存，但不进行初始化。这意味着它的内容将是 indeterminate ，并且当您在语句中使用数据时

resultmatrix[i][count] += ...;

您将阅读并修改该不确定数据，您将获得undefined behavior。

在分配循环中，您可以使用例如memset初始化已分配的内存：

for(i = 0; i < x; i++){
    resultmatrix[i] = malloc(sizeof(int) * z);
    memset(resultmatrix[i], 0, sizeof(int) * z);
}

[请注意，我删除了malloc，in C you should not do that]

结果的演员表

为什么我的大学集群上的所有进程都没有分配我的2d结果矩阵数组？

1 个答案: