MPI程序无法按预期工作

时间:2014-08-01 23:50:35

标签: c mpi

我开发了一个MPI程序,它可以循环发送和接收消息。消息传递由进程0启动:它发送到进程1,并从进程p-1接收。其他进程从其左邻居收到并发送给他们的右邻居。请注意,(p-1)-s右邻居为0.

以下是我的代码:

#include <stdio.h>
#include "mpi.h"
#include <stdlib.h>


/* cycle
   In this program a message is sent around a circle of processes 0 - (p-1), ]
   where p-1 sends to 0.
*/



int main(int argc, char **argv)
    {

    double startwtime, endwtime;
    float  elapsed_time, bandwidth;

    int my_id, next_id; /* process id-s */
    int p;      /* number of processes */
    char* message;  /* storage for the message */
    int i, k, max_msgs, msg_size, v;
    MPI_Status status;  /* return status for receive */


    MPI_Init( &argc, &argv );
    MPI_Comm_rank( MPI_COMM_WORLD, &my_id );
    MPI_Comm_size( MPI_COMM_WORLD, &p );

    if (argc < 3)
    {
    fprintf (stderr, "need msg count and msg size as params\n");
    goto EXIT;
    }

    if ((sscanf (argv[1], "%d", &max_msgs) < 1) ||
                (sscanf (argv[2], "%d", &msg_size) < 1))
    {
    fprintf (stderr, "need msg count and msg size as params\n");
    goto EXIT;
    }

    message = (char*)malloc (msg_size);
    char yahoo = 'C';
    message =&yahoo;


    if (argc > 3) v=1; else v=0;           /*are we in verbose mode*/

    /* don't start timer until everybody is ok */
    MPI_Barrier(MPI_COMM_WORLD); 
int t=0;
    if( my_id == 0 ) {
    startwtime = MPI_Wtime();

        // do max_msgs times:
        //   send message of size msg_size chars to process 1
        //   receive message of size msg_size chars from process p-1
while(t<max_msgs) {
MPI_Send((char *) message, msg_size, MPI_CHAR, my_id+1, 0, MPI_COMM_WORLD);     
printf("Message ,source and destination ids:%c,%d,%d \n ",*message, 0 ,my_id+1);
fflush(stdout);
MPI_Recv((char *) message, msg_size, MPI_CHAR, my_id-1, 0, MPI_COMM_WORLD, &status);
printf("Message,source and destination ids:%c,%d,%d \n",*message, my_id-1,
0);
fflush(stdout);
t++;
}
        MPI_Barrier(MPI_COMM_WORLD); 
    endwtime = MPI_Wtime(); 
    elapsed_time = endwtime-startwtime;
    bandwidth = 2.0 * max_msgs * msg_size / (elapsed_time); 
        printf("Number, size of messages: %3d , %3d \n", max_msgs, msg_size);
        fflush(stdout);
        printf("Wallclock time     = %f seconds\n", elapsed_time );           
        fflush(stdout);
        printf("Bandwidth          = %f bytes per second\n", bandwidth);           
        fflush(stdout);
    } else if( my_id == p-1 ) {

        // do max_msgs times:
        //   receive message of size msg_size from process to the left
        //   send message of size msg_size to process to the right (p-1 sends to 0)
    while(t<max_msgs) {
        MPI_Send((char *) message, msg_size, MPI_CHAR, 0, 0, MPI_COMM_WORLD);       
        MPI_Recv((char *) message, msg_size, MPI_CHAR, my_id-1, 0, MPI_COMM_WORLD, &status);
        t++;
    }
    } else {
        while(t<max_msgs) {
        MPI_Send((char *) message, msg_size, MPI_CHAR, my_id+1, 0, MPI_COMM_WORLD);         
        MPI_Recv((char *) message, msg_size, MPI_CHAR, my_id-1, 0, MPI_COMM_WORLD, &status);
        t++;
        }
          }

      MPI_Barrier(MPI_COMM_WORLD);      

EXIT:
    MPI_Finalize();
    return 0;
}

我无法获得正确的源和目标ID。运行1/2次后的程序进入无限循环。这可能有什么问题?

1 个答案:

答案 0 :(得分:3)

一些注释

您的代码的第一个问题是它有可能与某些MPI实现发生死锁。查看these注意事项了解详细信息。

不考虑死锁问题,此代码存在两个问题。第一个涉及

if( my_id == 0 ) {

条件。

MPI_Recv((char *) message, msg_size, MPI_CHAR, my_id-1, 0, MPI_COMM_WORLD, &status);

设置从PE -1接收消息的源,该消息不起作用。您想收到p-1

MPI_Barrier来电也是不对的。我必须考虑一下......得到它。您可以在MPI_Barrier分支中调用my_id==0,但在其他分支之外调用PE 0 calls barrier all conditionals finish PE 1 to p-1 calls barrier PE 0 calls barrier again frozen program 。这将产生以下内容

#include <stdio.h>
#include "mpi.h"
#include <stdlib.h>


/* cycle
   In this program a message is sent around a circle of processes 0 - (p-1), ]
   where p-1 sends to 0.
*/



int main(int argc, char **argv)
    {

    double startwtime, endwtime;
    float  elapsed_time, bandwidth;

    int my_id, next_id; /* process id-s */
    int p;      /* number of processes */
    char* message;  /* storage for the message */
    int i, k, max_msgs, msg_size, v;
    MPI_Status status;  /* return status for receive */


    MPI_Init( &argc, &argv );
    MPI_Comm_rank( MPI_COMM_WORLD, &my_id );
    MPI_Comm_size( MPI_COMM_WORLD, &p );

    if (argc < 3)
    {
    fprintf (stderr, "need msg count and msg size as params\n");
    goto EXIT;
    }

    if ((sscanf (argv[1], "%d", &max_msgs) < 1) ||
                (sscanf (argv[2], "%d", &msg_size) < 1))
    {
    fprintf (stderr, "need msg count and msg size as params\n");
    goto EXIT;
    }

    fprintf(stdout, "my_id: %d, max_msgs: %d\n", my_id, max_msgs);

    message = (char*)malloc (msg_size);
    char yahoo = 'C';
    message =&yahoo;


    if (argc > 3) v=1; else v=0;           /*are we in verbose mode*/

    /* don't start timer until everybody is ok */
    MPI_Barrier(MPI_COMM_WORLD); 

    fprintf(stdout, "my_id: %d, barrier 1\n", my_id);

    int t=0;

    if( my_id == 0 ) {
      startwtime = MPI_Wtime();

      // do max_msgs times:
      //   send message of size msg_size chars to process 1
      //   receive message of size msg_size chars from process p-1
      while(t<max_msgs) {
    printf("PE %d about to send\n ", my_id);
    fflush(stdout);
    MPI_Send((char *) message, msg_size, MPI_CHAR, my_id+1, 0, MPI_COMM_WORLD);     
    printf("PE %d completed send\n ", my_id);
    fflush(stdout);

    printf("PE %d about to recv\n ", my_id);
    fflush(stdout);
    MPI_Recv((char *) message, msg_size, MPI_CHAR, p-1, 0, MPI_COMM_WORLD, &status);
    printf("PE %d completed recv\n ", my_id);
    fflush(stdout);
    t++;
      }

      MPI_Barrier(MPI_COMM_WORLD); 
      endwtime = MPI_Wtime(); 
      elapsed_time = endwtime-startwtime;
      bandwidth = 2.0 * max_msgs * msg_size / (elapsed_time); 
      printf("Number, size of messages: %3d , %3d \n", max_msgs, msg_size);
      fflush(stdout);
      printf("Wallclock time     = %f seconds\n", elapsed_time );           
      fflush(stdout);
      printf("Bandwidth          = %f bytes per second\n", bandwidth);           
      fflush(stdout);

   } else if( my_id == p-1 ) {

      // do max_msgs times:
      //   receive message of size msg_size from process to the left
      //   send message of size msg_size to process to the right (p-1 sends to 0)
      while(t<max_msgs) {

    printf("PE %d about to send\n ", my_id);
    fflush(stdout);
        MPI_Send((char *) message, msg_size, MPI_CHAR, 0, 0, MPI_COMM_WORLD);       
    printf("PE %d completed send\n ", my_id);
    fflush(stdout);

    printf("PE %d about to recv\n ", my_id);
    fflush(stdout);
        MPI_Recv((char *) message, msg_size, MPI_CHAR, my_id-1, 0, MPI_COMM_WORLD, &status);
    printf("PE %d completed recv\n ", my_id);
    fflush(stdout);
        t++;
      }
      MPI_Barrier(MPI_COMM_WORLD);      

    } else {

      while(t<max_msgs) {

    printf("*PE %d about to send\n ", my_id);
    fflush(stdout);
        MPI_Send((char *) message, msg_size, MPI_CHAR, my_id+1, 0, MPI_COMM_WORLD);         
    printf("*PE %d completed send\n ", my_id);
    fflush(stdout);

    printf("*PE %d about to recv\n ", my_id);
    fflush(stdout);
        MPI_Recv((char *) message, msg_size, MPI_CHAR, my_id-1, 0, MPI_COMM_WORLD, &status);
    printf("*PE %d completed recv\n ", my_id);
    fflush(stdout);
        t++;
      }
      MPI_Barrier(MPI_COMM_WORLD);      

    }



    EXIT:
    MPI_Finalize();
    return 0;
    }

在所有条件中放置一个障碍,或者没有障碍。

工作示例(可能死锁)

> mpirun -n 3 ./a.out 1 1

my_id: 0, max_msgs: 1
my_id: 1, max_msgs: 1
my_id: 2, max_msgs: 1
my_id: 0, barrier 1
PE 0 about to send
 PE 0 completed send
 my_id: 1, barrier 1
*PE 1 about to send
 my_id: 2, barrier 1
PE 2 about to send
 PE 2 completed send
 PE 2 about to recv
 PE 0 about to recv
 PE 0 completed recv
 *PE 1 completed send
 *PE 1 about to recv
 *PE 1 completed recv
 PE 2 completed recv
 Number, size of messages:   1 ,   1 
Wallclock time     = 0.000050 seconds
Bandwidth          = 40136.878906 bytes per second

示例输出

{{1}}