Question

我是MPI的新手，不确定这是否是正确的方法。或者，如果我应该以这种方式使用MPI，但我的问题如下：

我有一组指向用户定义结构的指针。根据每个进程中发生的情况，数组的元素可以是NULL或指向用户定义结构实例的指针。我现在需要数组的元素通过MPI相互通信。这是有问题的，因为其中一些不存在。

我应该详细说明：该结构有一个函数指针，指向需要进行通信的函数。如果元素存在，则调用该函数。如果不是，不是。

我的想法：为数组的每个元素创建一个专用的MPI通信器，其中包含元素为NOT NULL的所有处理器。然后在通信期间为相应元素参考此通信器。

我可以创建一个MPI通信器的“数组”，一个用于数组的每个元素吗？然后为每个元素引用MPI_COMM_ARRAY [i]？或者我完全处于死胡同，不应该使用NULL作为数组条目？什么是“干净”的编码方式？

这是我现在所拥有的简化。如果偶然细胞存在于所有过程中，它就有效。如果他们不这样做就会失败。示例代码：

#include <stdio.h> 
#include <stdlib.h>
#include <mpi.h>


void * createcell();
void Cell_givenumberofvertices(void * _self, int * NbOfVertices);
void Cell_givenumberofvertices_parallel(void * _self, int * NbOfVertices);
void Cell_addvertex(void * _self);
void addvertex(void * _self);
void getnumberofvertices(void * _self, int * NbOfVertices);


struct Cell{
  unsigned NbOfVertices;
  void (* givenumberofvertices)(void * _self, int * NbOfVertices);
  void (* addvertex)(void * _self);
};

void * createcell(){
  struct Cell * self = calloc(1, sizeof(struct Cell));
  int world_size;

  MPI_Comm_size(MPI_COMM_WORLD,&world_size);

  self->NbOfVertices = 0;
  self->addvertex = Cell_addvertex;

  if(world_size==0) self->givenumberofvertices = Cell_givenumberofvertices;
  else self->givenumberofvertices = Cell_givenumberofvertices_parallel;

  return self;
}

void Cell_givenumberofvertices(void * _self, int * NbOfVertices){
   struct Cell * self = _self;
   * NbOfVertices = self->NbOfVertices;
   return;
}

void Cell_givenumberofvertices_parallel(void * _self, int * NbOfVertices){
  struct Cell * self = _self;
  int world_size, world_rank;
  int i;
  int * NbVertxOnProcess;
  int totalnumberofvertices=0;

  MPI_Comm_size(MPI_COMM_WORLD,&world_size);
  MPI_Comm_rank(MPI_COMM_WORLD,&world_rank);
  NbVertxOnProcess = (int *) malloc(world_size*sizeof(int));

  MPI_Gather(&(self->NbOfVertices),1,MPI_UNSIGNED,NbVertxOnProcess,1,MPI_INT,0,MPI_COMM_WORLD);

  for(i=0;i<world_size;i++) totalnumberofvertices+=NbVertxOnProcess[i];

   * NbOfVertices = totalnumberofvertices;
   return;
}

void Cell_addvertex(void * _self){
  struct Cell * self = _self;
  self->NbOfVertices ++;
  return;
}

 void addvertex(void * _self){
   struct Cell * self = _self;
   self->addvertex(self);
 }

 void getnumberofvertices(void * _self, int * NbOfVertices){
  struct Cell * self = _self;
  self->givenumberofvertices(self, NbOfVertices);
 }  



int main(int argc, char *argv[]) {
  void ** cells;
  int i,j;
  const int numberofcells = 100;
  const int numberofvertices = 100;
  const float domainlength = 115.4;
  float grid[numberofcells];
  float vertexcoordinates[numberofvertices];
  int world_rank;

  MPI_Init(NULL,NULL);

  /* create array of Cell pointers */
  cells = (void **) calloc(numberofcells,sizeof(void *));

  /* create grid */
  for(i=0;i<numberofcells;i++){
    grid[i]=domainlength/numberofcells*(i+1);
  }
  /* generate random vertex coordinates */
  MPI_Comm_rank(MPI_COMM_WORLD,&world_rank);
  srand((unsigned int) world_rank);
  for(i=0;i<numberofvertices;i++){
    vertexcoordinates[i]=((float)rand()/(float)(RAND_MAX)) * domainlength;
  }
  /* find the cell the vertex is in */
  for(i=0;i<numberofvertices;i++){
    for(j=0;j<numberofcells;j++){
      float lb, ub;
      if(j==0) lb=0.0;
      else lb=grid[j-1];
      ub = grid[j];
      if(lb<vertexcoordinates[i]&&vertexcoordinates[i]<ub){
         if(!cells[j]){
          cells[j]=createcell();
         }
        addvertex(cells[j]);
      }
    }
  }

  for(i=0;i<numberofcells;i++){
     if(cells[i]){
      int NbVertxInCell;
      getnumberofvertices(cells[i], &NbVertxInCell);
      printf("%i vertices in cell number %i \n",NbVertxInCell,i);
    }
  }
  MPI_Finalize();
  return 0;
}

Answer 1

我仍然无法全面了解您要实现的目标，也不了解此类设计的理由。

无论如何，这里有一些想法。

首先，请注意，在调用集合操作时，来自通信器的所有任务都应该调用它，否则某些任务可能会挂起。

其次，在Cell_givenumberofvertices_parallel()中，您可以将MPI_Gather()替换为MPI_Reduce()。由于所有级别的主循环打印，我相信你真的想要MPI_Allreduce()

我还怀疑您要向MPI_Comm * comm添加struct Cell字段，否则并非所有等级都会调用MPI_COMM_WORLD上的集体，而且会挂起。

第三，我认为不需要指针功能。

如果一个单元格只在一个任务上，那么comm应该是MPI_COMM_SELF，你可以使用MPI_Allreduce()，所以不需要两个子程序。

最后但并非最不重要的是，我没有看到函数如何随时间返回不同的值，因此顶点的总数可以在初始化时计算，并存储为新字段。

例如，可能是

struct Cell{
  unsigned localNbOfVertices;
  unsigned totalNbOfVertices;
  MPI_Comm comm;
};

在您的主循环中，您可以

  for(i=0;i<numberofcells;i++){
     unsigned local=0, total;
     MPI_Comm comm;
     if(cells[i]) local=cells[i]->localNbOfVertices;
     MPI_Allreduce(&local, &total, 1, MPI_UNSIGNED, MPI_SUM, MPI_COMM_WORLD);
     if(cells[i] cells[i]->totalNbOfVertices = total;
     printf("%i vertices in cell number %i \n",total,i);
     if(cells[i]) MPI_Comm_split(MPI_COMM_WORLD, 0, world_rank, &cells[i]->comm); else MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, world_rank, &comm);
  }

mpi communicator用于结构数组的每个元素

1 个答案: