为什么MPI会让irecv无法正常工作?

时间:2012-11-17 01:17:55

标签: segmentation-fault fortran mpi

我有一组用户定义的类型。两个处理器中的每一个都唯一地写入阵列的一部分,最后,我想要组合阵列。为什么我的代码会产生分段错误?

program type_derived_recv_send
  use nodeinfo
  implicit none
  include 'mpif.h'
  integer(4) :: ierr
  integer(4) :: istatus1(MPI_STATUS_SIZE)
  integer(4) :: istatus2(MPI_STATUS_SIZE)
  integer(4) :: i,j,k,l
  integer(4) :: iblock(8)
  integer(4) :: idisp(8)
  integer(4) :: itype(8)
  integer(4) :: inewtype
  integer(4) :: iextent
  integer(4) :: itag, isend, irecv
  integer(4) :: n,m,p,q

    TYPE :: newXYZ
        real :: x, u
        real :: y, v
        real :: z, w
        real,dimension(3) :: uvw     
    END TYPE

    TYPE (newXYZ) :: xyz, xyz2

    TYPE (newXYZ), allocatable :: xyzArray(:,:,:,:) 
    n = 1
    m = 1 
    p = 1
    q = 2

    allocate( xyzArray(m,n,p,q) )


  call MPI_INIT(ierr)
  icomm = MPI_COMM_WORLD
  call MPI_COMM_SIZE(icomm,nproc,ierr)
  call MPI_COMM_RANK(icomm,iproc,ierr)

do l=1,q
    do k=1,p
        do j=1,n
            do i=1,m 
                    xyzArray(i,j,k,m)%x = 0
                    xyzArray(i,j,k,m)%y = 0
                    xyzArray(i,j,k,m)%z = 0
                    xyzArray(i,j,k,m)%u = 0
                    xyzArray(i,j,k,m)%v = 0
                    xyzArray(i,j,k,m)%w = 0
                    xyzArray(i,j,k,m)%uvw = (/0,0,0/)
end do
end do
end do
end do

    if (iproc == 1) then
        xyzArray(1,1,1,2)%x = 1.1
        xyzArray(1,1,1,2)%y = 2.1
        xyzArray(1,1,1,2)%z = 3.1
        xyzArray(1,1,1,2)%u = 4.1
        xyzArray(1,1,1,2)%v = 5.1
        xyzArray(1,1,1,2)%w = 6.1
        xyzArray(1,1,1,2)%uvw = (/10.1,10.1,10.1/)
    else
        xyzArray(1,1,1,1)%x = 0.1
        xyzArray(1,1,1,1)%y = 0.1       
        xyzArray(1,1,1,1)%z = 0.1
        xyzArray(1,1,1,1)%u = 0.1
        xyzArray(1,1,1,1)%v = 0.1       
        xyzArray(1,1,1,1)%w = 0.1 
        xyzArray(1,1,1,1)%uvw = (/0.1,0.1,0.1/)
    endif

    itag = 1
    xyz = xyzArray(1,1,1,2)
    xyz2 = xyzArray(1,1,1,1)

    if ( iproc == 1) call MPI_ISEND(xyzArray(1,1,1,2),36,MPI_INTEGER,0,1,icomm,isend,ierr)
    if ( iproc == 0) call MPI_IRECV(xyzArray(1,1,1,2),36,MPI_INTEGER,1,1,icomm,irecv,ierr)
    call MPI_WAIT(isend,istatus1,ierr)
    call MPI_WAIT(irecv,istatus2,ierr)

     if ( iproc == 0 )write(6,*)'iproc = ',iproc
     if ( iproc == 0 ) write(6,*)xyzArray

     if ( iproc == 1 )write(6,*)'iproc = ',iproc
     if ( iproc == 1 ) write(6,*)xyzArray

  call MPI_FINALIZE(ierr)
end program type_derived_recv_send

输出:

--------------------------------------------------------------------------
 newtype extent =           36
 xyz =    0.000000       0.000000       0.000000       0.000000       0.000000       0.000000       0.000000       0.000000       0.000000    
 xyz2 =   0.1000000      0.1000000      0.1000000      0.1000000      0.1000000      0.1000000      0.1000000      0.1000000      0.1000000    
[west0334:09950] *** Process received signal ***
[west0334:09950] Signal: Segmentation fault (11)
[west0334:09950] Signal code: Address not mapped (1)
[west0334:09950] Failing at address: 0x40
[west0334:09950] [ 0] /lib64/libpthread.so.0 [0x3e00e0eb70]
[west0334:09950] [ 1] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib64/libmpi.so.0(ompi_request_wait+0x17) [0x2b33f8628607]
[west0334:09950] [ 2] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib64/libmpi.so.0(PMPI_Wait+0x88) [0x2b33f86566e8]
[west0334:09950] [ 3] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib64/libmpi_f77.so.0(PMPI_WAIT+0x3a) [0x2b33f83eac8a]
[west0334:09950] [ 4] ./type_derived_recv_send.x(MAIN__+0x730) [0x401cd0]
 newtype extent =           36
 xyz =    1.100000       4.100000       2.100000       5.100000       3.100000       6.100000       10.10000       10.10000       10.10000    
 xyz2 =    0.000000       0.000000       0.000000       0.000000       0.000000       0.000000       0.000000       0.000000       0.000000    
[west0334:09951] *** Process received signal ***
[west0334:09951] Signal: Segmentation fault (11)
[west0334:09951] Signal code: Address not mapped (1)
[west0334:09951] Failing at address: 0x40
[west0334:09950] [ 5] ./type_derived_recv_send.x(main+0xe) [0x4049ae]
[west0334:09950] [ 6] /lib64/libc.so.6(__libc_start_main+0xf4) [0x3e0021d994]
[west0334:09950] [ 7] ./type_derived_recv_send.x [0x4014e9]
[west0334:09950] *** End of error message ***
[west0334:09951] [ 0] /lib64/libpthread.so.0 [0x3e00e0eb70]
[west0334:09951] [ 1] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib64/libmpi.so.0(ompi_request_wait+0x17) [0x2b73c1aca607]
[west0334:09951] [ 2] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib64/libmpi.so.0(PMPI_Wait+0x88) [0x2b73c1af86e8]
[west0334:09951] [ 3] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib64/libmpi_f77.so.0(PMPI_WAIT+0x3a) [0x2b73c188cc8a]
[west0334:09951] [ 4] ./type_derived_recv_send.x(MAIN__+0x748) [0x401ce8]
[west0334:09951] [ 5] ./type_derived_recv_send.x(main+0xe) [0x4049ae]
[west0334:09951] [ 6] /lib64/libc.so.6(__libc_start_main+0xf4) [0x3e0021d994]
[west0334:09951] [ 7] ./type_derived_recv_send.x [0x4014e9]
[west0334:09951] *** End of error message ***
Nov 16 20:17:14 2012 9939 4 7.06 handleTSRegisterTerm(): TS reports task <0> pid <9950> on host<west0334> killed or core dumped
Job  /lsf/7.0/linux2.6-glibc2.3-x86_64/bin/openmpi_wrapper ./type_derived_recv_send.x

2 个答案:

答案 0 :(得分:4)

错误在于:

if ( iproc == 1) call MPI_ISEND(xyzArray(1,1,1,2),36,MPI_INTEGER,0,1,icomm,isend,ierr)
if ( iproc == 0) call MPI_IRECV(xyzArray(1,1,1,2),36,MPI_INTEGER,1,1,icomm,irecv,ierr)
call MPI_WAIT(isend,istatus1,ierr)
call MPI_WAIT(irecv,istatus2,ierr)

irecv请求句柄在排名0中保持未初始化状态,而isend请求句柄仍保留在排名1中。这里有两个选项 - 将MPI_WAIT调用与其相应的非阻塞MPI操作分组,或者在此之前将isendirecv初始化为MPI_REQUEST_NULL

解决方案1:

if ( iproc == 1) then
   call MPI_ISEND(xyzArray(1,1,1,2),36,MPI_INTEGER,0,1,icomm,isend,ierr)
   call MPI_WAIT(isend,istatus1,ierr)
end if
if ( iproc == 0) then
   call MPI_IRECV(xyzArray(1,1,1,2),36,MPI_INTEGER,1,1,icomm,irecv,ierr)
   call MPI_WAIT(irecv,istatus2,ierr)
end if

解决方案2(简化):

ireq = MPI_REQUEST_NULL
if ( iproc == 1) call MPI_ISEND(xyzArray(1,1,1,2),36,MPI_INTEGER,0,1,icomm,ireq,ierr)
if ( iproc == 0) call MPI_IRECV(xyzArray(1,1,1,2),36,MPI_INTEGER,1,1,icomm,ireq,ierr)
call MPI_WAIT(ireq,istatus,ierr)

您实际上不需要两个单独的请求句柄以及两个单独的状态数组。使用空句柄初始化ireq时,此代码也可以使用超过2个MPI进程正常运行。

请注意,为了便于移植,您应该使用MPI_TYPE_CREATE_STRUCT构建派生结构数据类型,并使用它来发送和接收TYPE(newXYZ)的元素。

答案 1 :(得分:0)

为什么要发送一个带有整数MPI数据类型的9 real的类型?为什么不发送9 MPI_REAL或36 MPI_BYTE