我想用累积(MPI RMA)写入一个可以通过窗口访问的数组。为了便于检查,我写了每个单元格的数组索引。问题是,开始似乎总是正确的,但最终有时会出现错误的值:而不是其位置值,最后的条目被设置为最终索引。
我的代码:
Integer.class
我定义了一个新的MPI数据类型prec_mpi并使用了三个嵌套循环,因为我需要这个模式用于另一个(更大的)程序。使用整数来写入这个双精度数组也是有意的。
我输出了一个错误的输出,例如如下:
PROGRAM main
USE mpi
IMPLICIT NONE
REAL*8, PARAMETER :: prec = 1d-13
REAL(kind(prec)),ALLOCATABLE,DIMENSION(:) :: speicher1D
INTEGER :: stelle, ierror
INTEGER :: ix,iy,iz,nz,i
!--------------------------------------------------------------
!---------------MPI-Variablen----------------------------------
!--------------------------------------------------------------
INTEGER :: myRank = 0
INTEGER :: numProcs = 1
INTEGER, PARAMETER :: masterProc = 0
INTEGER :: s3Win1D,s3Win3D
INTEGER :: startIndex, endIndex
INTEGER(KIND=MPI_ADDRESS_KIND) :: sizeOfSchritt3D1
INTEGER(KIND=MPI_ADDRESS_KIND) :: s3TargetDisp
INTEGER :: prec_mpi, sizeOfprec_mpi
!--------------------------------------------------------------
!--------------MAIN-PROGRAM------------------------------------
!--------------------------------------------------------------
!----MPI-Initialisierung----
CALL MPI_INIT(ierror)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, myRank,ierror)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numProcs, ierror)
!Create new MPI-datatype
CALL MPI_TYPE_CREATE_F90_REAL(KIND(prec), MPI_UNDEFINED, prec_mpi,ierror)
CALL MPI_TYPE_SIZE(prec_mpi, sizeOfprec_mpi,ierror)
nz = 3
sizeOfSchritt3D1 = sizeOfprec_mpi*(nz*2+1)**3
ALLOCATE(speicher1D((nz*2+1)**3))
!------------MPI-RMA-Initialisierung--------------------------
CALL MPI_WIN_CREATE(speicher1D, sizeOfSchritt3D1, sizeOfprec_mpi,MPI_INFO_NULL,MPI_COMM_WORLD,s3Win1D, ierror)
!Determine each proc's area
startIndex = (nz*2+1)/numProcs*myRank-nz
endIndex = (nz*2+1)/numProcs*(myRank+1)-nz-1
IF(myRank+1 .EQ. numProcs) THEN
endIndex = nz
END IF
speicher1D = 0.0d0
CALL MPI_BARRIER(MPI_COMM_WORLD,ierror)
CALL MPI_WIN_FENCE(0,s3Win1D, ierror)
DO iz=startIndex, endIndex
DO iy=-nz,nz
DO ix=-nz,nz
stelle = 1 + (iz+nz)*(2*nz+1)**2 + (iy+nz)*2*nz+1)+ (ix+nz)
s3targetDisp = stelle-1
!WRITE(*,*) "Rank", myRank,"s3targetDisp: ",s3targetDisp, "index: ",stelle
!1D
CALL MPI_ACCUMULATE(REAL(stelle,kind(prec)), 1, prec_mpi, masterProc, s3targetDisp,1,prec_mpi, MPI_SUM,s3Win1D, ierror)
END DO
END DO
END DO
CALL MPI_WIN_FENCE(0,s3Win1D, ierror)
CALL MPI_BARRIER(MPI_COMM_WORLD, ierror)
DO i = 1,(nz*2+1)**3
IF(myRank .EQ. masterProc) THEN
WRITE(*,*) "Index: ",i,", entry: ",speicher1D(i)
END IF
END DO
DEALLOCATE(speicher1D)
CALL MPI_WIN_FREE(s3Win1D, ierror)
CALL MPI_FINALIZE(ierror)
END PROGRAM main
为什么条目会有这样的跳跃(在这种情况下是在索引275之后)?当我用嵌套循环中的[...]
Index: 250 , entry: 250.000000000000
Index: 251 , entry: 251.000000000000
Index: 252 , entry: 252.000000000000
Index: 253 , entry: 253.000000000000
Index: 254 , entry: 254.000000000000
Index: 255 , entry: 255.000000000000
Index: 256 , entry: 256.000000000000
Index: 257 , entry: 257.000000000000
Index: 258 , entry: 258.000000000000
Index: 259 , entry: 259.000000000000
Index: 260 , entry: 260.000000000000
Index: 261 , entry: 261.000000000000
Index: 262 , entry: 262.000000000000
Index: 263 , entry: 263.000000000000
Index: 264 , entry: 264.000000000000
Index: 265 , entry: 265.000000000000
Index: 266 , entry: 266.000000000000
Index: 267 , entry: 267.000000000000
Index: 268 , entry: 268.000000000000
Index: 269 , entry: 269.000000000000
Index: 270 , entry: 270.000000000000
Index: 271 , entry: 271.000000000000
Index: 272 , entry: 272.000000000000
Index: 273 , entry: 273.000000000000
Index: 274 , entry: 274.000000000000
Index: 275 , entry: 275.000000000000
Index: 276 , entry: 302.000000000000
Index: 277 , entry: 303.000000000000
Index: 278 , entry: 304.000000000000
Index: 279 , entry: 305.000000000000
Index: 280 , entry: 306.000000000000
Index: 281 , entry: 308.000000000000
Index: 282 , entry: 310.000000000000
Index: 283 , entry: 313.000000000000
Index: 284 , entry: 316.000000000000
Index: 285 , entry: 319.000000000000
Index: 286 , entry: 322.000000000000
Index: 287 , entry: 325.000000000000
Index: 288 , entry: 329.000000000000
Index: 289 , entry: 332.000000000000
Index: 290 , entry: 335.000000000000
Index: 291 , entry: 338.000000000000
Index: 292 , entry: 341.000000000000
Index: 293 , entry: 342.000000000000
Index: 294 , entry: 343.000000000000
Index: 295 , entry: 343.000000000000
Index: 296 , entry: 343.000000000000
Index: 297 , entry: 343.000000000000
Index: 298 , entry: 343.000000000000
Index: 299 , entry: 343.000000000000
Index: 300 , entry: 343.000000000000
[...]
Index: 341 , entry: 343.000000000000
Index: 342 , entry: 343.000000000000
Index: 343 , entry: 343.000000000000
检查时,stelle和s3TargetDisp的值似乎没问题。也许这是同步的一些问题......但是在嵌套循环中只是累积(没有混合RMA /本地)并且有一个关闭栅栏。