输出不如预期mpi_gather

时间:2016-12-26 11:04:59

标签: fortran mpi

收集矩阵(4x4全局矩阵)后,我没有按预期获得输出。我正在生成一个全局矩阵:

global =  [0           1           2           3
           1           2           3           4
           2           3           4           5
           3           4           5           6 ]

尝试将其拆分为2X2子矩阵并收集使用 mpi_cart_create创建的2D拓扑。我正在打印并打印矩阵。我期待在 global_recv 中再次使用相同的矩阵。但在收集完我的矩阵之后会收集这样的信息:

global_recv = [ 0           2           2           4
                1           3           3           5
                1           3           3           5
                2           4           4           6]

我必须做出哪些改变以及我失踪的地方?我先尝试使用 mpi_gahterv ,但没有成功。我也希望使用 mpi_gatherv 来实现相同的功能。以下是代码:

PROGRAM MAIN
    implicit none
    include "mpif.h"
    integer, parameter:: nx = 4         ! global number of rows 
    integer, parameter:: ny = 4         ! global number of columns                           
    integer, parameter:: Root = 0
    integer global(nx,ny),global_recv(nx,ny)
    integer,allocatable::loc(:,:)       ! local matrix 
    integer rows,cols                   ! rows and columns in local matrix 
    integer,allocatable::counts(:),displs(:)
    integer myid,numprocs
    integer comm2d,ierr,req
    integer sx, ex, sy, ey
    integer dims(2),coord(2)
    logical periods(2)
    integer status(MPI_STATUS_SIZE)
    data periods/2*.false./
    integer i,j
! Initialize mpi
    CALL MPI_INIT( ierr )
    CALL MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr)
    CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
! Get a new communicator for a decomposition of the domain.  
! Let MPI find a "good" decomposition
    dims(1) = 0
    dims(2) = 0
    CALL MPI_DIMS_CREATE(numprocs,2,dims,ierr)
    CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periods,.true.,   &
                         comm2d,ierr)
! Get my position in this communicator
    CALL MPI_COMM_RANK(comm2d,myid,ierr)
    if (myid==Root) then
        print *,'dimensions:',dims(1),dims(2)
    endif
! Compute the decomposition
    CALL fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
    rows = ex-sx+1 
    cols = ey-sy+1  
! Initialize the a matrix
   do  i=1,nx
    do j=1,ny
       global(i,j) = (i-1)+(j-1)
     enddo
   enddo
! print global matrix 
   if (myid.EQ.Root) then
   print *, 'Global matrix :'
      do i=1,nx
         print *, global(i,:)
      enddo
    endif  
! get local matrix 
   allocate(loc(rows,cols))     
   loc = global(sx:ex,sy:ey)
   print *, myid,loc
! Build counts and displs for mpi_gatherv
   allocate(counts(numprocs),displs(numprocs))
   !dx = rows + (cols-1)*size(M,1);
   displs(1) = 0
   do j=1,numprocs
     counts(j) = rows*cols
    if((j-1).ne.0) displs(j) = displs(j-1) + counts(j-1)
  enddo

! Recieve the results using mpi_gather    
!   CALL MPI_GATHERV(loc,cols,MPI_INT,     &   
!                   b,counts,displs,MPI_INT,root,   &
!                   MPI_COMM_WORLD,ierr)         

    CALL MPI_GATHER(loc,rows*cols,MPI_INT, &  
                     global_recv,rows*cols,MPI_INT,   &  
                     Root, comm2d, ierr)
!      print the results
    if (myid.EQ.Root) then
    print *, 'Global recieved matrix:'
      do i=1,nx
         print *, global_recv(i,:)
      enddo
    endif
!      Cleanup goes here.
      CALL MPI_COMM_FREE( comm2d, ierr )
      CALL MPI_FINALIZE(ierr)

      STOP
      END
!******************************************************* 
      subroutine fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
      integer   comm2d
      integer   nx,ny,sx,ex,sy,ey
      integer   dims(2),coords(2),ierr
      logical   periods(2)
! Get (i,j) position of a processor from Cartesian topology.
      CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr)
! Decomposition in first (ie. X) direction
      CALL MPE_DECOMP1D(nx,dims(1),coords(1),sx,ex)
! Decomposition in second (ie. Y) direction
      CALL MPE_DECOMP1D(ny,dims(2),coords(2),sy,ey)
      return
      end
!*********************************************************************
      SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
      integer n,numprocs,myid,s,e,nlocal,deficit
      nlocal  = n / numprocs
      s       = myid * nlocal + 1
      deficit = mod(n,numprocs)
      s       = s + min(myid,deficit)
! Give one more slice to processors
      if (myid .lt. deficit) then
          nlocal = nlocal + 1
      endif
      e = s + nlocal - 1
      if (e .gt. n .or. myid .eq. numprocs-1) e = n
      return
      end

0 个答案:

没有答案