Question

我建立了一个使用MPI_PACK，MPI_SEND，MPI_RECV分发fortran派生类型的最小示例，并且还交换了它们的边界以测试{{1} }}派生类型。

该代码可以正常工作，但是如果我在代码中间放置MPI_SENDRECV语句，则会显示一些奇怪的行为，这归因于某些内存损坏，而在MPI_PACKED下该代码可以正常工作代码末尾的语句。 deallocate语句在deallocate脚本的左侧标记为dellocate。

代码流是

1）(*)整个派生类型。

2）分发main，MPI_PACK和MPI_SEND，以恢复派生类型结构。

3）MPI_RECV分布式本地派生类型的边界。

4）使用MPI_UNPACK

在相邻处理器之间交换边界

我已经输入了与测试完全相同的代码，因此它们可以像MPI_PACK一样很好地进行编译，并且该问题可以完全重现。以下结果是MPI_SENDRECV的输出。

mpif90 mod_data_structure.f90 main.f90 -o main

mpirun -np 2 main

在代码中间插入module mod_data_structure implicit none type type_cell real(selected_real_kind(15,307)):: xc(2) real(selected_real_kind(15,307)):: values_c(8) integer :: flag_boundary end type type_cell type type_cell_list type(type_cell) :: cell(13,13) end type type_cell_list type type_cell_list_local type(type_cell),allocatable :: cell(:,:) end type type_cell_list_local end module mod_data_structure，输出的一部分如下所示，该代码按我的预期工作。

program main
  use MPI
  use mod_data_structure
  implicit none

  integer,parameter          :: nxmax = 9, nymax = 9, nbc = 2
  integer                    :: i, j, k, ii, jj
  type(type_cell_list)       :: A
  type(type_cell_list_local) :: A_local
  type(type_cell)            :: acell
  character(len=20)          :: write_fmt

  ! MPI variables
  integer                    :: n_proc, my_id, ierr, source, dest
  integer                    :: tag, tag_send, tag_recv
  integer                    :: status ( MPI_STATUS_SIZE ), &
                                status_l ( MPI_STATUS_SIZE ), &
                                status_r ( MPI_STATUS_SIZE )
  integer,allocatable        :: local_size(:), local_start(:)
  real(selected_real_kind(15,307)):: tmp
  character,allocatable      :: buffer(:), buffer_l(:), buffer_lg(:), buffer_r(:), buffer_rg(:) 
  integer                    :: bufsize, bufsize_gc
  integer                    :: left_proc, right_proc
  integer                    :: DBL_SIZE, INT_SIZE, position_local
  integer                    :: position_l, position_r
  integer,allocatable        :: position(:)

  call MPI_INIT ( ierr )
  call MPI_COMM_RANK ( MPI_COMM_WORLD, my_id,  ierr )
  call MPI_COMM_SIZE ( MPI_COMM_WORLD, n_proc, ierr )

  call MPI_PACK_SIZE(1,MPI_DOUBLE_PRECISION,MPI_COMM_WORLD,DBL_SIZE,ierr)
  call MPI_PACK_SIZE(1,MPI_INTEGER         ,MPI_COMM_WORLD,INT_SIZE,ierr)

  ! Construct the derived data types
  if ( my_id .eq. 0 ) then

    do i = 1,nxmax+2*nbc
      do j = 1,nymax+2*nbc
        A%cell(i,j)%flag_boundary = 0
        do k =  1,8
          A%cell(i,j)%values_c(k) = 0.d0
        enddo
        do k = 1,2
          A%cell(i,j)%xc(k) = 0.d0
        enddo
      enddo
    enddo

    do i = 1+nbc,nxmax+nbc
      do j = 1+nbc,nymax+nbc
        ii = i - nbc
        jj = j - nbc
        A%cell(i,j)%flag_boundary = 10*ii + jj
        do k = 1,8
          A%cell(i,j)%values_c(k) = 1.d1*ii + jj + 0.1d0*k
        enddo
        do k = 1,2
          A%cell(i,j)%xc(k) = 1.d1*ii + jj + 0.1d0*k
        enddo
      enddo
    enddo

    write(write_fmt, '(a,i,a)') '(',nymax+2*nbc,'i3)'
    write(*,*) 'my_id ', my_id
    write(*,*) 'Total flag_boundary'
    do i = 1,nxmax+2*nbc
      write(*,write_fmt) A%cell(i,:)%flag_boundary
    enddo
    write(*,*) ' '

  endif

  !*** Test MPI_PACK and MPI_SEND / MPI_RECV
  ! Prepare for the distribution
  allocate ( local_size(n_proc), local_start(n_proc), position(n_proc) )
  local_size  = 0
  local_start = 1

  tmp  = (nymax+2*nbc) / n_proc
  ! 'local_size'
  do i = 1,n_proc-1
    local_size(i) = ceiling(tmp)
  enddo
  local_size(n_proc) = nymax + 2*nbc - (n_proc - 1)*ceiling(tmp)

  allocate ( A_local%cell(nxmax+2*nbc,local_size(my_id+1)) )  ! ###

  ! 'local_start'
  do i = 1,n_proc-1
    local_start(i+1:n_proc) = local_start(i+1:n_proc) + local_size(i)
  enddo

  ! allocate 'buffer'
  bufsize = maxval(local_size) * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
  allocate ( buffer(bufsize) )

  position = 0
  if ( my_id .eq. 0 ) then

    ! Assign 'A_local' for 'my_id .eq. 0' itself
    do j = 1, local_size(my_id+1)
      do i = 1, nxmax+2*nbc
        A_local%cell(i,j) = A%cell(i,j)
      enddo
    enddo

    do k = 2, n_proc ! w/o 'my_id .eq. 0' itself
      do j = local_start(k), local_start(k) + local_size(k) - 1
        do i = 1,nxmax+2*nbc
          acell = A%cell(i,j)
          call MPI_PACK(acell%xc,            2, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
          call MPI_PACK(acell%values_c,      8, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
          call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER         , buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
        enddo
      enddo

      dest = k-1 ! ###
      tag  = k-1
      call MPI_SEND (buffer, bufsize, MPI_PACKED, dest, tag, MPI_COMM_WORLD, ierr )

    enddo

  else ! ( my_id .ne. 0 ) then

    source = 0
    tag    = my_id
    call MPI_RECV (buffer, bufsize, MPI_PACKED, source, tag, MPI_COMM_WORLD, status, ierr )

    position_local = 0
    do j = 1, local_size(my_id+1)
      do i = 1, nxmax+2*nbc
        call MPI_UNPACK (buffer, bufsize, position_local, acell%xc,            2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
        call MPI_UNPACK (buffer, bufsize, position_local, acell%values_c,      8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
        call MPI_UNPACK (buffer, bufsize, position_local, acell%flag_boundary, 1, MPI_INTEGER         , MPI_COMM_WORLD, ierr)

        A_local%cell(i,j) = acell 
      enddo
    enddo

  endif

(*)!deallocate ( buffer ) 

  do k = 1,n_proc
    if ( my_id .eq. (k-1) ) then
      write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
      write(*,*) ' Before MPI_SENDRECV'
      write(*,*) 'my_id ', my_id
      write(*,*) 'cols  ', local_size(my_id+1)
      do i = 1,nxmax+2*nbc
        write(*,write_fmt) A_local%cell(i,:)%flag_boundary
      enddo
      write(*,*) ' '
    endif
    !call MPI_BARRIER ( MPI_COMM_WORLD, ierr )    
  enddo

  ! Test MPI_SENDRECV 
  bufsize_gc = nbc * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
  allocate ( buffer_l(bufsize_gc), buffer_lg(bufsize_gc), buffer_r(bufsize_gc), buffer_rg(bufsize_gc) )

  ! 'left_proc'
  if ( my_id .eq. 0 ) then
    left_proc = MPI_PROC_NULL
  else ! ( my_id .ne. 0 ) then
    left_proc = my_id - 1
  endif

  ! 'right_proc'
  if ( my_id .eq. n_proc-1 ) then
    right_proc = MPI_PROC_NULL
  else ! ( my_id .ne. n_proc - 1 )
    right_proc = my_id + 1
  endif

  ! pack 'buffer_l' & 'buffer_r'
  position_l = 0
  do j = 1,nbc
    do i = 1,nxmax+2*nbc
      acell = A_local%cell(i,j)
      call MPI_PACK(acell%xc,            2, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
      call MPI_PACK(acell%values_c,      8, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
      call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER         , buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
    enddo
  enddo

  position_r = 0
  do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
    do i = 1,nxmax+2*nbc
      acell = A_local%cell(i,j)
      call MPI_PACK(acell%xc,            2, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
      call MPI_PACK(acell%values_c,      8, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
      call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER         , buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
    enddo
  enddo

  tag_send = my_id
  tag_recv = right_proc
  call MPI_SENDRECV (buffer_l,  bufsize_gc, MPI_PACKED, left_proc,  0, &
                     buffer_rg, bufsize_gc, MPI_PACKED, right_proc, 0, &
                     MPI_COMM_WORLD, status_l, ierr )

  tag_send = my_id
  tag_recv = left_proc
  call MPI_SENDRECV (buffer_r,  bufsize_gc, MPI_PACKED, right_proc,  0, &
                     buffer_lg, bufsize_gc, MPI_PACKED, left_proc,   0, &
                     MPI_COMM_WORLD, status_r, ierr )

  ! fill left boundary
  position_l = 0 
  do j = 1,nbc
    do i = 1,nxmax+2*nbc
      call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%xc,            2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
      call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%values_c,      8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
      call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%flag_boundary, 1, MPI_INTEGER         , MPI_COMM_WORLD, ierr)

      A_local%cell(i,j) = acell
    enddo
  enddo

  ! fill right boundary
  position_r = 0
  do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
    do i = 1,nxmax+2*nbc
      call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%xc,            2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
      call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%values_c,      8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
      call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%flag_boundary, 1, MPI_INTEGER         , MPI_COMM_WORLD, ierr)

      A_local%cell(i,j) = acell
    enddo
  enddo

  do k = 1,n_proc
    if ( my_id .eq. (k-1) ) then
      write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
      write(*,*) ' After MPI_SENDRECV'
      write(*,*) 'my_id ', my_id
      write(*,*) 'cols  ', local_size(my_id+1)
      do i = 1,nxmax+2*nbc
        write(*,write_fmt) A_local%cell(i,:)%flag_boundary
      enddo
      write(*,*) ' '
    endif
    !call MPI_BARRIER ( MPI_COMM_WORLD, ierr )    
  enddo

(*)deallocate ( buffer )
  deallocate ( buffer_l, buffer_lg, buffer_r, buffer_rg )

  call MPI_FINALIZE ( ierr )

end program

但是，如果我在代码中间找到deallocate(buffer)，则输出的相同部分看起来像这样。

  After MPI_SENDRECV
 my_id            0
 cols             6
  0  0  0  0  0  0
  0  0  0  0  0  0
  0  0 11 12 15 16
  0  0 21 22 25 26
  0  0 31 32 35 36
  0  0 41 42 45 46
  0  0 51 52 55 56
  0  0 61 62 65 66
  0  0 71 72 75 76
  0  0 81 82 85 86
  0  0 91 92 95 96
  0  0  0  0  0  0
  0  0  0  0  0  0

如果我更改deallocate(buffer)格式以显示更多的整数位数，则它们是10位数的整数，类似于After MPI_SENDRECV my_id 0 cols 6 0 0 0 0 0 0 ****** 0 0 0 0 ****** 11 12 15 16 ****** 21 22 25 26 ****** 31 32 35 36 ****** 41 42 45 46 ****** 51 52 55 56 ****** 61 62 65 66 ****** 71 72 75 76 ****** 81 82 85 86 0 0 91 92 95 96 0 0 0 0 0 0 0 0 0 0 0 0。

我在Segmentation Fault using MPI_Sendrecv with a 2D contiguous array处已经看到过这种问题，但是对于将write变量声明放到我在其余代码中不会使用的原因，尚无明确答案代码中间出现了这样的问题。

此问题源自何处？

Answer 1

我不确定我是否会回答这个问题，但是我对派生类型的实际经验是，使用不同的MPI实现处理它们的最安全方法是不使用任何高级MPI构造并使所有派生类型继续工作Fortran方面。

例如，我将编写git+https://github.com/mixmaxhq/mongo-cursor-pagination.git#master函数来打包和扩展您的数据类型：

pure

然后仅使用MPI_send和MPI_recv为MPI comms写两个包装，像这样用于标量：

integer, parameter :: TYPE_CELL_BUFSIZE = 11

pure function type_cell_pack(this) result(buffer)
   class(type_cell), intent(in) :: this
   real(real64) :: buffer(TYPE_CELL_BUFSIZE)

   buffer(1:8) = this%values_c
   buffer(9:10) = this%xc

   ! It will be faster to not use a separate MPI command for this only
   buffer(11) = real(this%flag_boundary,real64)

end function type_cell_pack

pure type(type_cell) function type_cell_unpack(buffer) result(this)
   real(real64), intent(in) :: buffer(TYPE_CELL_BUFSIZE)

   this%values_c = buffer(1:8)
   this%xc = buffer(9:10)
   this%flag_boundary = nint(buffer(11))

end function type_cell_unpack

以下是数组数量：

subroutine type_cell_send_scalar(this,fromCpu,toCpu,mpiWorld)
   type(type_cell), intent(inout) :: this
   integer, intent(in) :: fromCpu,toCpu,mpiWorld

   real(real64) :: mpibuf(TYPE_CELL_BUFSIZE)

   if (cpuid==fromCpu) then 
      mpibuf = type_cell_pack(this)
      call mpi_send(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
   elseif (cpuid==toCpu) then 
      call mpi_recv(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
      this = type_cell_unpack(mpibuf)
   endif

end subroutine type_cell_send_scalar

使用fortran90 deallocate语句导致内存损坏

1 个答案: