在mpi fortran中更新矩阵

时间:2016-12-13 10:57:44

标签: parallel-processing fortran mpi

我正在运行以下mpi fortran代码,我在每个处理器中生成一个矩阵。然后将每个矩阵值递增1并将更新的矩阵发送到根处理器。最后,在组装后打印完整的矩阵。我在根处理器中遇到问题,矩阵没有得到更新。这是为什么?使用四个处理器运行代码以更好地理解我的问题。

<Tabs>

我正在生成以下矩阵:

    PROGRAM MAIN
    include "mpif.h"
    parameter (nx = 4)
    parameter (ny = 4)
    parameter (tsteps = 5)
    real*8    a(nx,ny),b(nx,ny)
    integer   rows,cols
    integer   myid, myid1,Root,source,numprocs
    integer   it,comm2d,ierr,req
    integer   sx, ex, sy, ey
    integer   dims(2),coord(2)
    logical   periods(2)
    integer status(MPI_STATUS_SIZE)
    data periods/2*.false./

    Root = 0
    CALL MPI_INIT( ierr )
    CALL MPI_COMM_RANK(MPI_COMM_WORLD,myid1,ierr)
    CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
c       Get a new communicator for a decomposition of the domain.  
c       Let MPI find a "good" decomposition
    dims(1) = 0
    dims(2) = 0
    CALL MPI_DIMS_CREATE(numprocs,2,dims,ierr)
    if (myid1.EQ.Root) then
        print *,'dimensions:',dims(1),dims(2)
    endif
    CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periods,.true.,
     &                    comm2d,ierr)
c       Get my position in this communicator
c       CALL MPI_COMM_RANK(comm2d,myid,ierr)
c       Compute the decomposition
    CALL fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
    rows = ex-sx+1 
    cols = ey-sy+1  
c       Initialize the a matrix
    do  i= sx,ex
        do j=sy,ey
          a(i,j) = (i-1)+(j-1)
        enddo
    enddo    
    do it = 1,tsteps 
       do  i= sx,ex
           do j=sy,ey
              a(i,j) = a(i,j)+1
           enddo
       enddo
C     Send the results to other processors      
    call MPI_ISEND(sx,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ex,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(sy,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ey,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION,
     &                   Root,1,comm2d,req,ierr )
c    Recieved the results from othe precessors   
    if (myid1.EQ.Root) then
       do source = 0,numprocs-1
          call MPI_RECV(sx,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ex,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(sy,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ey,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION, 
     &                   source,1,comm2d,status,ierr)
          a(sx:ex,sy:ey) = a(sx:ex,sy:ey) 
          call MPI_Wait(req, status, ierr) 
       enddo
       endif
       if (myid1.EQ.Root) then
c      print the results
       print *, 'time step=',it
        do 90 i=1,nx
          do 80 j = 1,ny
             write(*,70)a(i,j)
  70        format(2x,f8.2,$)
  80      continue
          print *, ' '
  90    continue      
       endif
     enddo
C      Cleanup goes here.
      CALL MPI_Comm_free( comm2d, ierr )
30    CALL MPI_FINALIZE(ierr)

      STOP
      END
C******************************************************* 
      subroutine fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
      integer   comm2d
      integer   nx,ny,sx,ex,sy,ey
      integer   dims(2),coords(2),ierr
      logical   periods(2)
c Get (i,j) position of a processor from Cartesian topology.
      CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr)
C Decomposition in first (ie. X) direction
      CALL MPE_DECOMP1D(nx,dims(1),coords(1),sx,ex)
C Decomposition in second (ie. Y) direction
      CALL MPE_DECOMP1D(ny,dims(2),coords(2),sy,ey)
      return
      end
c********************************************************************* 
      SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
      integer n,numprocs,myid,s,e,nlocal,deficit
      nlocal  = n / numprocs
      s       = myid * nlocal + 1
      deficit = mod(n,numprocs)
      s       = s + min(myid,deficit)
C Give one more slice to processors
      if (myid .lt. deficit) then
          nlocal = nlocal + 1
      endif
      e = s + nlocal - 1
      if (e .gt. n .or. myid .eq. numprocs-1) e = n
      return
      end

我通过在循环中添加1来发送,接收A部分并在桌面上打印来更新矩阵A. A(1:2,1:2)没有显示打印矩阵的任何更新。

使用四个处理器运行代码,以便更好地理解我的问题。

1 个答案:

答案 0 :(得分:0)

我在代码中遇到了错误。发送到Root时的索引sx,ex,sy,ey正在被覆盖,因此它没有得到更新。我已经更正了代码并在下面发布。

PROGRAM MAIN
    include "mpif.h"
    parameter (nx = 4)
    parameter (ny = 4)
    parameter (tsteps = 5)
    real*8    a(nx,ny),b(nx,ny)
    integer   rows,cols
    integer   myid, myid1,Root,source,numprocs
    integer   it,comm2d,ierr,req
    integer   sx, ex, sy, ey
    integer   sx0, ex0, sy0, ey0
    integer   dims(2),coord(2)
    logical   periods(2)
    integer status(MPI_STATUS_SIZE)
    data periods/2*.false./

    Root = 0
    CALL MPI_INIT( ierr )
    CALL MPI_COMM_RANK(MPI_COMM_WORLD,myid1,ierr)
    CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
c       Get a new communicator for a decomposition of the domain.  
c       Let MPI find a "good" decomposition
    dims(1) = 0
    dims(2) = 0
    CALL MPI_DIMS_CREATE(numprocs,2,dims,ierr)
    if (myid1.EQ.Root) then
        print *,'dimensions:',dims(1),dims(2)
    endif
    CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periods,.true.,
     &                    comm2d,ierr)
c       Get my position in this communicator
c       CALL MPI_COMM_RANK(comm2d,myid,ierr)
c       Compute the decomposition
    CALL fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
    rows = ex-sx+1 
    cols = ey-sy+1  
c       Initialize the a matrix
    do  i= sx,ex
        do j=sy,ey
          a(i,j) = (i-1)+(j-1)
        enddo
    enddo    
    do it = 1,tsteps 
       do  i= sx,ex
           do j=sy,ey
              a(i,j) = a(i,j)+1
           enddo
       enddo
C     Send the results to other processors      
    call MPI_ISEND(sx,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ex,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(sy,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ey,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION,
     &                   Root,1,comm2d,req,ierr )
c    Recieved the results from othe precessors   
    if (myid1.EQ.Root) then
       do source = 0,numprocs-1
          call MPI_RECV(sx0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ex0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(sy0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ey0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(a(sx0:ex0,sy0:ey0),cols*rows,MPI_DOUBLE_PRECISION, 
     &                   source,1,comm2d,status,ierr)
          a(sx0:ex0,sy0:ey0) = a(sx0:ex0,sy0:ey0) 
          call MPI_Wait(req, status, ierr) 
       enddo
       endif
       if (myid1.EQ.Root) then
c      print the results
       print *, 'time step=',it
        do 90 i=1,nx
          do 80 j = 1,ny
             write(*,70)a(i,j)
  70        format(2x,f8.2,$)
  80      continue
          print *, ' '
  90    continue      
       endif
     enddo
C      Cleanup goes here.
      CALL MPI_Comm_free( comm2d, ierr )
30    CALL MPI_FINALIZE(ierr)

      STOP
      END
C******************************************************* 
      subroutine fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
      integer   comm2d
      integer   nx,ny,sx,ex,sy,ey
      integer   dims(2),coords(2),ierr
      logical   periods(2)
c Get (i,j) position of a processor from Cartesian topology.
      CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr)
C Decomposition in first (ie. X) direction
      CALL MPE_DECOMP1D(nx,dims(1),coords(1),sx,ex)
C Decomposition in second (ie. Y) direction
      CALL MPE_DECOMP1D(ny,dims(2),coords(2),sy,ey)
      return
      end
c********************************************************************* 
      SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
      integer n,numprocs,myid,s,e,nlocal,deficit
      nlocal  = n / numprocs
      s       = myid * nlocal + 1
      deficit = mod(n,numprocs)
      s       = s + min(myid,deficit)
C Give one more slice to processors
      if (myid .lt. deficit) then
          nlocal = nlocal + 1
      endif
      e = s + nlocal - 1
      if (e .gt. n .or. myid .eq. numprocs-1) e = n
      return
      end