我写了一个简单的测试程序,以便在2d域(使用Fortran)中使用MPI实现FFTW。域为'Ny x Nx'宽,并在第二个('x')索引中分区。
在适当(我相信?)声明和分配变量和计划之后,我调用fftw_mpi r2c_2d函数,然后我用fftw_mpi c2r_2d将其输出转换回来,以检查我是否得到原始输入。 r2c_2d部分似乎工作正常。但是,在使用c2r_2d函数转换输出(除了标准化)后,我没有获得原始输入:结果向量在索引(:,j)处显示“零”,j对应于“Ny / 2”的倍数。我究竟做错了什么?谢谢!
以下是代码摘录:
Program TEST
use, intrinsic :: iso_c_binding
Implicit none
include 'mpif.h'
include 'fftw3-mpi.f03'
Integer*8,parameter :: nx=16, ny=16
!MPI
integer*8 :: ipe,npe
integer*8 ::mpi_realtype,icomm=mpi_comm_world,istat(mpi_status_size),ierr
! FFTW VARIABLES DECLARATION
type(C_PTR) :: p1, p2, cdatar, cdatac
integer(C_INTPTR_T) :: alloc_local, local_L, local_L_offset, local_M, local_M_offset
real(C_DOUBLE), pointer :: faux(:,:) ! real input 2d function
complex(C_DOUBLE), pointer :: gaux(:,:) ! complex output of 2d FFTW (transposed)
! MPI initialization
call mpi_init(ierr)
call mpi_comm_rank(icomm,ipe,ierr)
call mpi_comm_size(icomm,npe,ierr)
! FFTW ALLOCATIONS AND PLANS
call fftw_mpi_init()
alloc_local = fftw_mpi_local_size_2d(ny/2+1,nx &
,MPI_COMM_WORLD, local_L, local_L_offset)
cdatac = fftw_alloc_complex(alloc_local)
call c_f_pointer(cdatac, gaux, [nx,local_L]) !transposed
alloc_local = fftw_mpi_local_size_2d(nx,ny/2+1, MPI_COMM_WORLD, &
local_M, local_M_offset)
cdatar = fftw_alloc_real(2*alloc_local)
call c_f_pointer(cdatar, faux, [ny,local_M])
! Create plans
p1 = fftw_mpi_plan_dft_r2c_2d(nx,ny,faux,gaux, MPI_COMM_WORLD, &
ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_OUT))
p2 = fftw_mpi_plan_dft_c2r_2d(nx,ny,gaux,faux, MPI_COMM_WORLD, &
ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_IN))
! EXECUTE FFTW
call random_number(faux)
print *, "real input:", real(faux(1,:))
call fftw_mpi_execute_dft_r2c(p1,faux,gaux)
call fftw_mpi_execute_dft_c2r(p2, gaux, faux)
print *, "real output:", real(faux(1,:))/(nx*ny)
call fftw_destroy_plan(p1)
call fftw_destroy_plan(p2)
call mpi_finalize(ierr)
End Program TEST
答案 0 :(得分:3)
问题是由fftw:
所需的padding引起的虽然实际数据在概念上是n0×n1×n2×...×nd-1,但它在物理上存储为n0×n1×n2×...×[2(nd-1/2 + 1)]数组,其中最后一个维度已经填充,使其与复杂输出的大小相同。这很像就地串行r2c / c2r接口(参见实际数据的多维DFT),但在MPI中,即使对于非现场数据也需要填充。
因此,16x16变换的输入数组因此是16x18阵列。在每行末尾的额外两个数字的值在真实空间中是无意义的。然而,这些额外的数字不能忘记,因为c指针被强制转换为fortran 2D数组:
call c_f_pointer(cdatar, faux, [2*(ny/2+1),local_M])
额外的数字仍会打印在每行的末尾。可以对数组进行切片以避免打印这些毫无价值的值:
print *, "real input:", real(faux(1:ny,:))
...
print *, "real output:", real(faux(1:ny,:))/(nx*ny)
以下是基于您和How to do a fftw3 MPI "transposed" 2D transform if possible at all?的完整代码。它可以由mpif90 main.f90 -o main -I/usr/include -L/usr/lib -lfftw3_mpi -lfftw3 -lm
编译并由mpirun -np 2 main
运行。
Program TEST
use, intrinsic :: iso_c_binding
Implicit none
include 'mpif.h'
include 'fftw3-mpi.f03'
Integer*8,parameter :: nx=4, ny=8
!MPI
integer*8 :: ipe,npe
integer*8 ::mpi_realtype,icomm=mpi_comm_world,istat(mpi_status_size),ierr
! FFTW VARIABLES DECLARATION
type(C_PTR) :: p1, p2, cdatar, cdatac
integer(C_INTPTR_T) :: alloc_local, local_L, local_L_offset, local_M, local_M_offset
real(C_DOUBLE), pointer :: faux(:,:) ! real input 2d function
complex(C_DOUBLE), pointer :: gaux(:,:) ! complex output of 2d FFTW (transposed)
! MPI initialization
call mpi_init(ierr)
call mpi_comm_rank(icomm,ipe,ierr)
call mpi_comm_size(icomm,npe,ierr)
! FFTW ALLOCATIONS AND PLANS
call fftw_mpi_init()
alloc_local = fftw_mpi_local_size_2d(ny/2+1,nx &
,MPI_COMM_WORLD, local_L, local_L_offset)
cdatac = fftw_alloc_complex(alloc_local)
call c_f_pointer(cdatac, gaux, [nx,local_L]) !transposed
alloc_local = fftw_mpi_local_size_2d(nx,ny/2+1, MPI_COMM_WORLD, &
local_M, local_M_offset)
cdatar = fftw_alloc_real(2*alloc_local)
call c_f_pointer(cdatar, faux, [2*(ny/2+1),local_M])
! Create plans
p1 = fftw_mpi_plan_dft_r2c_2d(nx,ny,faux,gaux, MPI_COMM_WORLD, &
ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_OUT))
p2 = fftw_mpi_plan_dft_c2r_2d(nx,ny,gaux,faux, MPI_COMM_WORLD, &
ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_IN))
! EXECUTE FFTW
call random_number(faux)
print *, "real input:", real(faux(1:ny,:))
call fftw_mpi_execute_dft_r2c(p1,faux,gaux)
call fftw_mpi_execute_dft_c2r(p2, gaux, faux)
print *, "real output:", real(faux(1:ny,:))/(nx*ny)
call fftw_destroy_plan(p1)
call fftw_destroy_plan(p2)
call mpi_finalize(ierr)
End Program TEST