MPI意外死锁

时间:2013-12-17 16:52:03

标签: c mpi

我希望有人可以帮助我。我的代码:

void process(int myid, int numprocs)
{
    int i,j, anzahl, rest;
    MPI_Status stat;

    meta = (int *)malloc(3 * sizeof(int));
    if(myid == 0)
    {
        meta[0] = ASpalten;
        meta[1] = AZeilen;
        meta[2] = BSpalten;

        for (i = 0; i < numprocs; i++) //masternode distributes matrix A to every single core
        {     
            MPI_Send(&meta[0], 3, MPI_INT, i, TAG, MPI_COMM_WORLD);
            printf("%d: debug04\n", myid);
            MPI_Send(&MA[0], ASpalten*AZeilen, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
            printf("%d: debug05\n", myid);
            MPI_Send(&MB[0], ASpalten*BSpalten, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
            printf("%d: debug06\n", myid);
        }
   }
   else
   {
       MPI_Recv(meta, 3, MPI_INT, 0, TAG, MPI_COMM_WORLD, &stat);
       printf("%d: debug01\n", myid);
       ASpalten = meta[0];
       AZeilen = meta[1];
       BSpalten=meta[2];
       printf("%d: debug02\n", myid);
       MA = (double*)malloc(ASpalten*AZeilen*sizeof(double));
       MB = (double*)malloc(ASpalten*BSpalten*sizeof(double));
       MR = (double*)malloc(AZeilen*BSpalten*sizeof(double));
       MPI_Recv(MA, ASpalten*AZeilen, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
       MPI_Recv(MB, ASpalten*BSpalten, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
       printf("%d: debug03\n", myid);
       // printf("%d: %f\n", myid, *(MA + _index(1, 1, ASpalten))); //funktioniert
    }

数据类型:

int ASpalten;
int AZeilen;
int BSpalten;
int *meta; //used to transfer meta data in 1 send
double *MA; //Matrix A
double *MB; //Matrix B

该程序应该使用MPI乘以2个矩阵。我的示例矩阵证明代码可能是有效的,我也可以运行最多130 * 90矩阵(可能更多可能更少),但无论如何,随着数量的增加,我可能会遇到死锁:控制台打印出"debug4"就是这样。如果有人知道我的程序出了什么问题,我将非常感激。我已经尝试使用MPI_INTEGER代替MPI_INT,但没有区别。任何帮助,将不胜感激。使用非常小的矩阵时控制台的输出(PS,我已经尝试以不同的顺序执行我的测试用例并修改现有的测试用例):

  

Testcase1 MPI:
  0:debug04
  0:debug05
  0:debug06
  0:debug04
  1:debug01
  1:debug02
  0:debug05
  1:debug03
  1:debugx1
  0:debug06
  0:debug04 ......

1 个答案:

答案 0 :(得分:1)

似乎进程0向proc 0发送消息,proc 0确实接收到它们。

我修改为

  for(i=1;i<numprocs;i++)

删除死锁。

#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include "mpi.h"


int ASpalten;
int AZeilen;
int BSpalten;
int *meta; //used to transfer meta data in 1 send
double *MA; //Matrix A
double *MB; //Matrix B
double *MR; //Matrix B

void process(int myid, int numprocs){
    int i,j, anzahl, rest;
    int TAG=0;
    MPI_Status stat;
    meta=(int*)malloc(3*sizeof(int));
    if(myid == 0)
    {meta[0]=ASpalten;
    meta[1]=AZeilen;
    meta[2]=BSpalten;
    for (i=1; i<numprocs; i++)//masternode distributes matrix A to every single core
    {     
        MPI_Send(&meta[0], 3, MPI_INT, i, TAG, MPI_COMM_WORLD);
        printf("%d: debug04\n", myid);
        MPI_Send(&MA[0], ASpalten*AZeilen, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
        printf("%d: debug05\n", myid);
        MPI_Send(&MB[0], ASpalten*BSpalten, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
        printf("%d: debug06\n", myid);
    }
    }
    else
    {
        MPI_Recv(meta, 3, MPI_INT, 0, TAG, MPI_COMM_WORLD, &stat);
        printf("%d: debug01\n", myid);
        ASpalten=meta[0];
        AZeilen=meta[1];
        BSpalten=meta[2];
        printf("%d: debug02\n", myid);
        MA = (double*)malloc(ASpalten*AZeilen*sizeof(double));
        MB = (double*)malloc(ASpalten*BSpalten*sizeof(double));
        MR = (double*)malloc(AZeilen*BSpalten*sizeof(double));
        MPI_Recv(MA, ASpalten*AZeilen, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
        MPI_Recv(MB, ASpalten*BSpalten, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
        printf("%d: debug03\n", myid);
        // printf("%d: %f\n", myid, *(MA + _index(1, 1, ASpalten))); //funktioniert
    }
}

int main(int argc,char *argv[])
{
    int rank, size;


    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    ASpalten=130;
    AZeilen=90;
    BSpalten=200;
    if(rank==0){

    }

    MA = (double*)malloc(ASpalten*AZeilen*sizeof(double));
    MB = (double*)malloc(ASpalten*BSpalten*sizeof(double));
    MR = (double*)malloc(AZeilen*BSpalten*sizeof(double));
    process(rank,size);
    MPI_Finalize();
    return 0;
}

再见,

弗朗西斯