分段错误MPI创建结构

时间:2014-09-29 15:20:37

标签: c debugging struct runtime-error mpi

我正在编写一个解决N-queens问题的MPI程序,过程0应该部分解决问题并让其他进程完成它。我的程序编译,但它在创建结构类型和尝试发送结构后显示我的分段错误。 这发生在spawn_processes函数(由进程0执行,因此段错误发生在进程0中),同时尝试发送subProblemType

这是我的代码:

#include <stdio.h>
#include <mpi.h>


#define MAXBOARDSIZE 8

static int board_size=8;
typedef struct boardplus{
  int size; // board size
  int x; // this is where we need to restart
  int y; // idem
  int board[MAXBOARDSIZE]; // the actual board, padded to largest instance
} subproblem;




#define  INIT   1  // Message to client:  subproblem
#define  DATA   2  // Message from client with results
#define  EXIT   4  // Message from client with CPU time
                   // Also to client, giving permission to exit
static long int N_solutions;
int solution_count;

void spawn_processes(int rows[board_size],int y){
    //printf("_______________________");
    subproblem subP;
    int col,//column number to start from
        count,//number of solutions recieved from a worker
        nProc,//total number of processes
        proc,
        nActive,i;// number of active processes


    MPI_Status status;
    MPI_Datatype subProblemType;
    MPI_Datatype type[4]={MPI_INT,MPI_INT,MPI_INT,MPI_INT};
    int block_len[4]={1,1,1,MAXBOARDSIZE};
    MPI_Aint disp[4];


    MPI_Address(&subP,disp);
    MPI_Address(&subP.x,disp+1);
    MPI_Address(&subP.y,disp+2);
    MPI_Address(&subP.board,disp+3);
    int base=disp[0];
    for(i=0;i<4;i++) disp[i]-=base;

    MPI_Type_create_struct(4,block_len,disp,type,&subProblemType);
    MPI_Type_commit(&subProblemType);
    MPI_Comm_size(MPI_COMM_WORLD,&nProc);
    subP.size=board_size;
    subP.y=y;
    //subP.board=rows;
    for(i=0;i<y;i++)subP.board[i]=rows[i];

    printf("spawning processes  ....\n");

    for(col=0,proc=1;proc<nProc && col<board_size;proc++, col++){
        printf("sending to process %d \n ",proc);
        fflush(stdout);
       subP.x=col;
       MPI_Send(&subP,1,subProblemType,proc,INIT,MPI_COMM_WORLD);
    }

    nActive=proc-1;
    // Receive back results and send out new problems
    while(col<board_size){
        MPI_Recv(&count,1,MPI_INT,MPI_ANY_SOURCE,DATA,MPI_COMM_WORLD,&status);
        proc=status.MPI_SOURCE;
        printf("recieved from process %d, found %d solutions \n",proc,count);
        N_solutions+=count;
        subP.x=col++;
        MPI_Send(&subP,1,subProblemType,proc,INIT,MPI_COMM_WORLD);
    }

    // Finally, receive back pending results and send termination
    // indication (message with size of zero).
    subP.size=0;
    while(nActive>0){
        MPI_Recv(&count,1,MPI_INT,MPI_ANY_SOURCE,DATA,MPI_COMM_WORLD,&status);
        proc=status.MPI_SOURCE;
        printf("recieved from process %d, found %d solutions \n",proc,count);
        --nActive;
        N_solutions+=count;
        //send a subproblem with size=0 (termination messages)
        MPI_Send(&subP,1,subProblemType,proc,INIT,MPI_COMM_WORLD);
    }

    for (proc = 1; proc < nProc; proc++)  
        MPI_Send(&proc, 0, MPI_INT, proc, EXIT, MPI_COMM_WORLD);


}




void process_queens(int my_id){

    int root=0;
    subproblem subP;
    MPI_Status status;
    int rows[board_size];
    int x,y,i;  

    MPI_Datatype subProblemType;
    MPI_Datatype type[4]={MPI_INT,MPI_INT,MPI_INT,MPI_INT};
    int block_len[4]={1,1,1,MAXBOARDSIZE};
    MPI_Aint disp[4];


    MPI_Address(&subP,disp);
    MPI_Address(&subP.x,disp+1);
    MPI_Address(&subP.y,disp+2);
    MPI_Address(&subP.board,disp+3);
    int base=disp[0];

    for(i=0;i<4;i++) disp[i]-=base;

    MPI_Type_create_struct(4,block_len,disp,type,&subProblemType);
    MPI_Type_commit(&subProblemType);

    printf("process %d waiting to recieve a task\n",my_id);
    fflush(stdout);
    MPI_Recv(&subP,1,subProblemType,root,INIT,MPI_COMM_WORLD,&status);

    while(subP.size>0){
    x=subP.x;
    y=subP.y;
    for(i=0;i<y;i++)rows[i]=subP.board[i];
    //rows=subP.board;


    if(is_safe(rows,x,y)){
    rows[y]=x;
    n_queens_solver(rows,y+1);
    }
    MPI_Send(&N_solutions,1,MPI_INT,root,DATA,MPI_COMM_WORLD);
    }

    // Final hand-shake:  get permission to terminate
   MPI_Recv(&N_solutions, 0, MPI_INT, 0, EXIT, MPI_COMM_WORLD, &status);

}

int is_safe(int rows[board_size], int x, int y)  
{
    int i;
    if (y == 0)
            return 1;
    for (i=0; i < y; ++i) {
       if (rows[i] == x || rows[i] == x + y - i || rows[i] == x - y +i)
            return 0;
    } 
    return 1;
}



void n_queens_solver(int rows[board_size], int y) 
{
    int x;

    for (x=0; x < board_size; ++x) {
        if (is_safe(rows, x, y)) {
            rows[y] = x;
            if (y == board_size-1) {

        ++N_solutions;
        }
            else
              n_queens_solver(rows, y+1);
        }
    }
}


void n_queens_expander(int rows[board_size], int y, int expand_levels)
{
  int x;
 if(y == expand_levels-1){

    spawn_processes(rows,y);

    }
else{
     for (x=0; x < board_size; ++x) {
    if (is_safe(rows, x, y)) 
      {
    rows[y] = x;
    n_queens_expander(rows, y+1, expand_levels-1);
      }
  }
  }
}



int main(int argc,char *argv[]) {

int rows[board_size];
//int expand_levels=1;
int numproc,my_id;
MPI_Status status;
MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&numproc);
    MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
    //printf("number of processes:%d \n",numproc);
    if(my_id==0){
        //printf("process 0 starting...\n");
        n_queens_expander(rows,0,1);
    }
    else{

        process_queens(my_id);
}
MPI_Finalize();

return 0;
}

3 个答案:

答案 0 :(得分:0)

虽然我不熟悉MPI,但是预感和一些快速的谷歌搜索建议您需要在结构中声明该数组的类型,而不是仅仅将其作为整数传递。*将其作为整数传递可能只是将指向数组中第一项的指针发送到远程计算机,这可能是远程计算机上的一些虚假内存地址,当远程计算机使用该地址时会导致段错误。

看一下这个答案:Creating an MPI_Datatype for a structure containing pointers。你可能能够解决这个基本想法。注意 - 从简短的略读中,Hristo Iliev在该主题中的回答可能是结构上更好的方法。

*我假设这就是MPI_Datatype type[4]={MPI_INT,MPI_INT,MPI_INT,MPI_INT};行的含义

答案 1 :(得分:0)

你有这个:

static int board_size=8;
typedef struct boardplus{
  int size; // board size
  int x; // this is where we need to restart
  int y; // idem
 int board[MAXBOARDSIZE]; // the actual board, padded to largest instance
} subproblem;

并制作如下类型:

MPI_Datatype subProblemType;
MPI_Datatype type[4]={MPI_INT,MPI_INT,MPI_INT,MPI_INT};
int block_len[4]={1,1,1,MAXBOARDSIZE};
MPI_Aint disp[4];


MPI_Address(&subP,disp);
MPI_Address(&subP.x,disp+1);
MPI_Address(&subP.y,disp+2);
MPI_Address(&subP.board,disp+3);
int base=disp[0];
for(i=0;i<4;i++) disp[i]-=base;

MPI_Type_create_struct(4,block_len,disp,type,&subProblemType);

我对你的位移修正持怀疑态度。更常见的是,处理MPI_Address类型的方法是使用MPI_BOTTOM作为缓冲区。因此,不是发送subP,您的发送将如下所示:

MPI_Send(MPI_BOTTOM,1,subProblemType,proc,INIT,MPI_COMM_WORLD);

答案 2 :(得分:0)

你的错误是对的......

MPI_Address(&subP,disp);
MPI_Address(&subP.x,disp+1);
MPI_Address(&subP.y,disp+2);
MPI_Address(&subP.board,disp+3);
int base=disp[0]; // <--------------------- HERE
for(i=0;i<4;i++) disp[i]-=base;

在LP64系统(包括运行OS X,FreeBSD,Solaris或Linux的64位x86系统)上,MPI_Aint长度为8个字节,而int长度仅为4个字节。 subP是一个堆栈变量,x64上主线程的堆栈位于虚拟地址空间的高位,因此在base的赋值中发生截断,计算出的位移与实际的。

解决方案:base应为MPI_Aint类型。

解决方案2:

for(i=1;i<4;i++) disp[i]-=disp[0];
disp[0] = 0;

所有排名都存在同样的问题,但由于主人崩溃而不发送数据,工作人员MPI_Recv从未写入内存,因此不会发生段错误。

请记住始终使用-Wall进行编译,并注意编译器生成的警告消息。