放入循环时MPI_Revc失败

时间:2014-02-26 09:06:01

标签: c mpi cluster-computing

我试图在4节点集群中运行给定代码。这里必须对变量'dir'的一系列值进行计算。因此,主节点(节点0)发送输入值并在计算之后在循环中接收输出值。但是输出似乎只对变量'dir'的第一个初始化值是正确的。对于其余的值,输出不正确,它也会在两者之间中止,并显示以下错误。

[8640-n0:1173] *** An error occurred in MPI_Recv
[8640-n0:1173] *** on communicator MPI_COMM_WORLD
[8640-n0:1173] *** MPI_ERR_RANK: invalid rank
[8640-n0:1173] *** MPI_ERRORS_ARE_FATAL: your MPI job will now abort
[8640-n1][[21243,1],1][btl_tcp_frag.c:215:mca_btl_tcp_frag_recv] mca_btl_tcp_frag_recv: readv failed: Connection reset by peer (104)

--------------------------------------------------------------------------
mpirun has exited due to process rank 0 with PID 1173 on
node 8640-n0 exiting improperly. There are two reasons this could occur:

1. this process did not call "init" before exiting, but others in
the job did. This can cause a job to hang indefinitely while it waits
for all processes to call "init". By rule, if one process calls "init",
then ALL processes must call "init" prior to termination.

2. this process called "init", but exited without calling "finalize".
By rule, all processes that call "init" MUST call "finalize" prior to
exiting or it will be considered an "abnormal termination"

This may have caused other processes in the application to be 
terminated by signals sent by mpirun (as reported here).

基于mpi的并行代码是:

       #include <stdio.h>
#include <sys/time.h>
#include <math.h>
#include <complex.h>
#include <fftw3.h>
#include "mpi.h"
#define FROM_MASTER 1  /* setting a message type */ 
#define FROM_WORKER 2  /* setting a message type */ 
#define num_sampl 1024
#define num_sensors 8
int main(int argc,char *argv[])
{
    int i,j,k,id,dest,count,limit,offset=0,xftcount;
    fftwf_complex jfi_b,xft[num_sampl/2][num_sensors], X[num_sampl][num_sensors],Y_b[num_sampl/2][1],out[num_sampl/2+1];
    fftwf_plan p1;
    float x[num_sampl][num_sensors],in[num_sampl],y_b[num_sampl/2][1],Pmax;
    float mv[num_sampl][num_sensors], n[num_sampl][num_sensors], a[num_sampl][1], s_psi[181], qb[181],P[181],Plog[181];
    float dir=-90,d=0.003,c=1500,f=200000,N=1024,M=8;
    float fs = 3*f;
    float Om = (2 * M_PI * f)/fs;
    float del_s = (d * fs)/c;
    struct timeval start,end,total;
    int rank,numprocs;
    MPI_Status status;
    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);
    MPI_Comm_size(MPI_COMM_WORLD,&numprocs);    

    count=181/(numprocs-1);
    xftcount=(num_sampl/2)*num_sensors;

    p1= fftwf_plan_dft_r2c_1d(num_sampl,in,out,FFTW_ESTIMATE);

    for(i=0;i<num_sampl;i++)
        in[i]=0;
    for(i=0;i<num_sampl;i++)
         a[i][0]= 1/M;
    printf("iam process %d\n",rank);
    if(rank==0)
    {
        printf("dir=%f\n",dir);
do{
//Compute Rotational Fourier Transform Operator xft
        jfi_b= -I * 2 * M_PI * del_s * sin(dir * M_PI/180) / N;
//      printf("jfi_b(r)==%f jfi_b(i)==%f\n",creal(jfi_b),cimag(jfi_b));
        for(i=0;i<num_sampl/2;i++)
            for(j=0;j<num_sensors;j++){
                xft[i][j]=i * j;
                xft[i][j]=xft[i][j] * jfi_b;
                xft[i][j]=cexp(xft[i][j]);
//              printf("%d%d..xft(r)==%f xft(i)==%f\n",i,j,creal(xft[i][j]),cimag(xft[i][j]));
                }

        for(i=-90,j=0;i<=90,j<181;i++,j++)
        {
            s_psi[j] = sin((i * M_PI)/180);
//          printf("%f\n",s_psi[j]);
        }
//Distributing data s_psi[]
        for(dest=1;dest<numprocs;dest++)
        {
            MPI_Send(&xft[0][0],xftcount,MPI_DOUBLE,dest,FROM_MASTER,MPI_COMM_WORLD);
            MPI_Send(&offset,1,MPI_INT,dest,FROM_MASTER,MPI_COMM_WORLD);
            if(dest==1)
            {
                MPI_Send(&s_psi[offset],count+1,MPI_FLOAT,dest,FROM_MASTER,MPI_COMM_WORLD);
                offset=offset+count+1;
            }
            else
            {
                MPI_Send(&s_psi[offset],count,MPI_FLOAT,dest,FROM_MASTER,MPI_COMM_WORLD);
                offset=offset+count;
            }
        }
        gettimeofday(&start,NULL);
        for (dest=1; dest<numprocs; dest++)
        {
            MPI_Recv(&offset,1,MPI_INT,dest,FROM_WORKER,MPI_COMM_WORLD,&status);    
            if(dest==1)
                MPI_Recv(&P[offset],count+1,MPI_FLOAT,dest,FROM_WORKER,MPI_COMM_WORLD,&status);     
            else        
                MPI_Recv(&P[offset],count,MPI_FLOAT,dest,FROM_WORKER,MPI_COMM_WORLD,&status);
        }
        Pmax = P[0];
                for(i=1;i<181;i++)
                        if(P[i] > Pmax)
                                Pmax=P[i];
//          printf("Pmax=%f\n",Pmax);
                for(i=0;i<181;i++)
                {
                    P[i]=P[i]/Pmax;   
//          printf("P[i]/Pmax=%f\n",P[i]);
            Plog[i]=10 * log10(P[i]);                                            
//          printf("Plog[%d]=%f\n",i,Plog[i]);
        }
        gettimeofday(&end,NULL);
        timersub(&end,&start,&total);
        for(i=0;i<181;i++)
            printf("Plog[%d]=%f\n",i,Plog[i]);
        printf("time consumed=%ds %dus\n",total.tv_sec,total.tv_usec);
        dir=dir+90;
}while(dir<=90);
    }

    if(rank>0)
    {
do{     
        MPI_Recv(&xft[0][0],xftcount,MPI_DOUBLE,0,FROM_MASTER,MPI_COMM_WORLD,&status);
        MPI_Recv(&offset,1,MPI_INT,0,FROM_MASTER,MPI_COMM_WORLD,&status);
        if(rank==1)
        {
            MPI_Recv(&s_psi[offset],count+1,MPI_FLOAT,0,FROM_MASTER,MPI_COMM_WORLD,&status);
            limit=offset+count+1;
        }
        else
        {
            MPI_Recv(&s_psi[offset],count,MPI_FLOAT,0,FROM_MASTER,MPI_COMM_WORLD,&status);
            limit=offset+count;
        }
        for (id=offset;id<limit;id++)     //180 directions
        {
            qb[id] = del_s * s_psi[id];
//          printf("qb==%f\n",qb[id]);

            for(i=0;i<num_sampl;i++)
                for(j=0;j<num_sensors;j++)
                {
                    mv[i][j]=j;
//                  printf("mv==%f\n",mv[i][j]);
                    mv[i][j]=mv[i][j] * qb[id];
//                  printf("mv==%f\n",mv[i][j]);
                    X[i][j]=0;
                }
            for(i=0;i<num_sampl;i++)
                for(j=0;j<num_sensors;j++)
                {
                    n[i][j]=i;
//                  printf("n==%f\n",n[i][j]);
                    n[i][j]+=mv[i][j];
//                  printf("n==%f\n",n[i][j]);
                    n[i][j]=n[i][j]*Om;
//                  printf("n==%f\n",n[i][j]);
                    x[i][j]=sin(n[i][j]);
//                  printf("n==%f\n",x[i][j]);
                }
//FFT computation coloumn wise  
            for(j=0;j<num_sensors;j++)
            {
                for(i=0;i<num_sampl;i++)
                {
                    in[i]=x[i][j];
//              if(id==180)
//                  printf("col 1 in[] %f\n",in[i]);
                }
                fftwf_execute(p1);
                for(i=0;i<num_sampl/2;i++)
                        {
                            X[i][j] = out[i];
//                      if(id==180)
//                      printf("col 1 out[r]=%f\tout[i]=%f\n",creal(out[i]),cimag(out[i]));
//                  printf("X[r]=%f\tX[i]=%f\n",creal(X[i][j]),cimag(X[i][j]));
                }

            }   
//Computing X*XFT-Formation of beam with weighting function

            for(i=0;i<num_sampl/2;i++)
                for(j=0;j<num_sensors;j++)
                {
                    X[i][j]=X[i][j] * xft[i][j];    
//              if(id==180)
//                  printf("X*xft(%d%d)...X[r]=%f\tX[i]=%f\n",i,j,creal(X[i][j]),cimag(X[i][j]));
                }               

            for(i=0;i<num_sampl/2;i++)
            {
                Y_b[i][0]=0;
                for(j=0;j<num_sensors;j++)
                    Y_b[i][0]+=X[i][j] * a[j][0];
//              if(id==180)
//                  printf("Y_b(%d)...Y_b[r]=%f\tY_b[i]=%f\n",i,creal(Y_b[i][0]),cimag(Y_b[i][0]));
            }
//Find absolute values
            P[id]=0;
            for(i=0;i<num_sampl/2;i++)
            {
                y_b[i][0]=cabs(Y_b[i][0]);
                y_b[i][0]=pow(y_b[i][0],2);
                P[id]+=y_b[i][0];
//          if(id==180)
//              printf("y_b(%d)=%f\n",i,y_b[i][0]);
            }   
//          printf("P(%d)=%f\n",id,P[id]);
        }
        MPI_Send(&offset,1,MPI_INT,0,FROM_WORKER,MPI_COMM_WORLD);
        if(rank==1)
            MPI_Send(&P[offset],count+1,MPI_FLOAT,0,FROM_WORKER,MPI_COMM_WORLD);    
        else
            MPI_Send(&P[offset],count,MPI_FLOAT,0,FROM_WORKER,MPI_COMM_WORLD);
dir=dir+90;
}while(dir<=90);
    }   
    fftwf_destroy_plan(p1);     
    MPI_Finalize();
    return 0;
}

我只在MPI_Recv函数中输入了正确的等级。你能帮我解决这个问题。

0 个答案:

没有答案