使用ParMETIS进行分区图

时间:2019-02-06 14:10:39

标签: c++ parallel-processing mpi csr metis

我尝试使用ParMETIS在4核心笔记本上对我的8个节点的非加权无向图进行分区,我有以下代码:

#include <cstdlib>
#include "parmetis.h"

int main(int argc, char **argv)
{
  MPI_Init(&argc, &argv);
  int np = 4;
  idx_t xadj_[3];
  idx_t adjncy_[5];
  int rank;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    if (rank == 0)
  {
      xadj_[0] = 0;
      xadj_[1] = 2;
      xadj_[2] = 5;
      adjncy_[0] = 4;
      adjncy_[1] = 1;
      adjncy_[2] = 0;
      adjncy_[3] = 5;
      adjncy_[4] = 2;
  }
    if (rank == 1)
  {
      xadj_[0] = 0;
      xadj_[1] = 3;
      xadj_[2] = 5;
      adjncy_[0] = 1;
      adjncy_[1] = 6;
      adjncy_[2] = 3;
      adjncy_[3] = 2;
      adjncy_[4] = 7;
  }
    if (rank == 2)
  {
      xadj_[0] = 0;
      xadj_[1] = 2;
      xadj_[2] = 5;
      adjncy_[0] = 5;
      adjncy_[1] = 0;
      adjncy_[2] = 6;
      adjncy_[3] = 1;
      adjncy_[4] = 4;
  }
    if (rank == 3)
  {
      xadj_[0] = 0;
      xadj_[1] = 3;
      xadj_[2] = 5;
      adjncy_[0] = 7;
      adjncy_[1] = 2;
      adjncy_[2] = 5;
      adjncy_[3] = 3;
      adjncy_[4] = 6;
  }
  idx_t *xadj = xadj_;
  idx_t *adjncy = adjncy_;

  idx_t vtxdist_[] = {0,2,4,6,8};
  idx_t *vtxdist = vtxdist_;

  idx_t *vwgt = NULL;

  idx_t *adjwgt = NULL;

  idx_t wgtflag_[] = {0};
  idx_t *wgtflag = wgtflag_;

  idx_t numflag_[] = {0};
  idx_t *numflag = numflag_;

  idx_t ncon_[] = {1};
  idx_t *ncon = ncon_;

  idx_t nparts_[] = {np};
  idx_t *nparts = nparts_;

  real_t *tpwgts = new real_t[np*ncon[0]]; for(int i=0; i<np*ncon[0]; i++) {tpwgts[i] = 1.0/np;}

  real_t ubvec_[] = {1.05};
  real_t *ubvec = ubvec_;

  idx_t options_[] ={0, 0, 0};
  idx_t *options =options_;

  idx_t *edgecut;
  idx_t part[8];

  MPI_Comm comm_val=MPI_COMM_WORLD;
  MPI_Comm *comm=&comm_val;
  ParMETIS_V3_PartKway(vtxdist,xadj,adjncy, vwgt, adjwgt, wgtflag, numflag, ncon, nparts, tpwgts, ubvec, options, edgecut, part, comm);
  MPI_Barrier(comm_val);
  printf("Processor %d --- %d\n", rank,*edgecut);
    for (int i = rank*2 ; i < rank*2+2; i++)
    {
      printf("%d\n",part[i]);
    }
  MPI_Finalize();
  return 0;
}

Graph image

对于每个等级(核心),我都设置了分布式CSR格式并尝试获取结果,但是得到了:

Processor 0 --- 6
0
0
Processor 1 --- 6
0
0
Processor 2 --- 6
2101207184
22080
Processor 3 --- 6
1904762080
22069

我做错了什么?也许是因为共享内存或每个内核都有自己的部分[8]?为什么我会得到如此奇怪的输出?

1 个答案:

答案 0 :(得分:0)

我发现了错误。我误会了一件事情,例如我有8个节点和4个核心,每个核心将具有[part [0],part [vtxdist [rank + 1] -vtxdist [rank]])。 例如,我有vtxdist = [0,1,4],这意味着我使用2个核心,第一个核心(等级= 0)将具有part [0],第二个核心(等级= 1)将具有part [0],part [1],第[2]部分。

#include <cstdlib>
#include "parmetis.h"

int main(int argc, char **argv)
{
  MPI_Init(&argc, &argv);
  idx_t np = 4;
  idx_t xadj_[3];
  idx_t adjncy_[5];
  idx_t rank, npes;
  MPI_Comm comm;
  MPI_Comm_dup(MPI_COMM_WORLD, &comm);
  MPI_Comm_size(comm, &npes);
  MPI_Comm_rank(comm, &rank);
    if (rank == 0)
  {
      xadj_[0] = 0;
      xadj_[1] = 2;
      xadj_[2] = 5;
      adjncy_[0] = 4;
      adjncy_[1] = 1;
      adjncy_[2] = 0;
      adjncy_[3] = 5;
      adjncy_[4] = 2;
  }
    if (rank == 1)
  {
      xadj_[0] = 0;
      xadj_[1] = 3;
      xadj_[2] = 5;
      adjncy_[0] = 1;
      adjncy_[1] = 6;
      adjncy_[2] = 3;
      adjncy_[3] = 2;
      adjncy_[4] = 7;
  }
    if (rank == 2)
  {
      xadj_[0] = 0;
      xadj_[1] = 2;
      xadj_[2] = 5;
      adjncy_[0] = 5;
      adjncy_[1] = 0;
      adjncy_[2] = 6;
      adjncy_[3] = 1;
      adjncy_[4] = 4;
  }
    if (rank == 3)
  {
      xadj_[0] = 0;
      xadj_[1] = 3;
      xadj_[2] = 5;
      adjncy_[0] = 7;
      adjncy_[1] = 2;
      adjncy_[2] = 5;
      adjncy_[3] = 3;
      adjncy_[4] = 6;
  }
  idx_t *xadj = xadj_;
  idx_t *adjncy = adjncy_;

  idx_t vtxdist_[] = {0,2,4,6,8};
  idx_t *vtxdist = vtxdist_;

  idx_t *vwgt = NULL;

  idx_t *adjwgt = NULL;

  idx_t wgtflag = 0;

  idx_t numflag = 0;

  idx_t ncon_[] = {1};
  idx_t *ncon = ncon_;

  idx_t nparts_[] = {np};
  idx_t *nparts = nparts_;

  real_t *tpwgts = new real_t[np*ncon[0]]; for(int i=0; i<np*ncon[0]; i++) {tpwgts[i] = 1.0/np;}

  real_t ubvec_[] = {1.05};
  real_t *ubvec = ubvec_;

  idx_t options_[] ={0, 0, 0};
  idx_t *options =options_;

  idx_t edgecut;
  idx_t *part;

  ParMETIS_V3_PartKway(vtxdist,xadj,adjncy, vwgt, adjwgt, &wgtflag, &numflag, ncon, nparts, tpwgts, ubvec, options, &edgecut, part, &comm);
  int rnvtxs,i,penum;
  MPI_Status status;
  if (rank == 0) {
    idx_t count = 0;
    for (i=0; i<vtxdist[1]; i++)
    {
      printf("part[%"PRIDX"] = %"PRIDX"\n", count, part[i]);
      count++;
    }
    for (penum=1; penum<npes; penum++) {
      rnvtxs = vtxdist[penum+1]-vtxdist[penum];
      int *rpart = new int[rnvtxs];
      MPI_Recv((int*)rpart, rnvtxs, IDX_T, penum, 1, comm, &status);

      for (i=0; i<rnvtxs; i++)
        {
          printf("part[%"PRIDX"] = %"PRIDX"\n", count, rpart[i]);
          count++;
        }
    }
  }
  else
    MPI_Send((int *)part, vtxdist[rank+1]-vtxdist[rank], IDX_T, 0, 1, comm); 

  MPI_Finalize();
  return 0;
}

因此将其与编译选项一起使用:

ParMETIS_INCLUDES = /home/user/Documents/parmetis/include
METIS_INCLUDES = /home/user/Documents/metis/include
ParMETIS_LIBS = /home/user/Documents/parmetis/lib
METIS_LIBS = /home/user/Documents/metis/lib

INCLUDES = -I${ParMETIS_INCLUDES} -I${METIS_INCLUDES}
LFLAGS =  -L${ParMETIS_LIBS} -L${METIS_LIBS}

CC = mpic++

par: par.cpp
    ${CC}  -Wall -g $(INCLUDES) -o par.out par.cpp $(LFLAGS) -lparmetis -lmetis

clean:
    rm *.o *.out *~

运行方式:

mpiexec -np 4 ./par.out

我有:

part[0] = 0
part[1] = 0
part[2] = 3
part[3] = 1
part[4] = 2
part[5] = 2
part[6] = 3
part[7] = 1

并带有图形测试文件

8 10
2 5
1 6 3
2 7 4
3 8
1 6
5 2 7
6 3 8
7 4

带有选项

mpiexec -np 4 ./parmetis test.graph 1 4 1 1 6 1

我有

0
0
1
3
2
2
1
3

如果我添加:

ParMETIS_V3_RefineKway(vtxdist,xadj,adjncy, vwgt, adjwgt, &wgtflag, &numflag, ncon, nparts, tpwgts, ubvec, options, &edgecut, part, &comm);

在ParMETIS_V3_PartKway之后,我得到:

part[0] = 0
part[1] = 0
part[2] = 1
part[3] = 1
part[4] = 2
part[5] = 2
part[6] = 3
part[7] = 3