Question

我写了一个简单的测试程序，以便在2d域（使用Fortran）中使用MPI实现FFTW。域为'Ny x Nx'宽，并在第二个（'x'）索引中分区。

在适当（我相信？）声明和分配变量和计划之后，我调用fftw_mpi r2c_2d函数，然后我用fftw_mpi c2r_2d将其输出转换回来，以检查我是否得到原始输入。 r2c_2d部分似乎工作正常。但是，在使用c2r_2d函数转换输出（除了标准化）后，我没有获得原始输入：结果向量在索引（：，j）处显示“零”，j对应于“Ny / 2”的倍数。我究竟做错了什么？谢谢！

以下是代码摘录：

Program TEST

use, intrinsic :: iso_c_binding

Implicit none

include 'mpif.h'
include 'fftw3-mpi.f03'

Integer*8,parameter :: nx=16, ny=16

!MPI
integer*8 :: ipe,npe
integer*8 ::mpi_realtype,icomm=mpi_comm_world,istat(mpi_status_size),ierr

! FFTW VARIABLES DECLARATION
type(C_PTR)           :: p1, p2, cdatar, cdatac
integer(C_INTPTR_T)   :: alloc_local, local_L, local_L_offset, local_M, local_M_offset
real(C_DOUBLE), pointer :: faux(:,:)   ! real input 2d function
complex(C_DOUBLE), pointer :: gaux(:,:) ! complex output of 2d FFTW (transposed)


! MPI initialization
call mpi_init(ierr)

call mpi_comm_rank(icomm,ipe,ierr)
call mpi_comm_size(icomm,npe,ierr)


! FFTW ALLOCATIONS AND PLANS

call fftw_mpi_init()


alloc_local = fftw_mpi_local_size_2d(ny/2+1,nx &
    ,MPI_COMM_WORLD, local_L, local_L_offset)


cdatac = fftw_alloc_complex(alloc_local)

call c_f_pointer(cdatac, gaux, [nx,local_L]) !transposed

alloc_local = fftw_mpi_local_size_2d(nx,ny/2+1, MPI_COMM_WORLD, &
    local_M, local_M_offset)


cdatar = fftw_alloc_real(2*alloc_local)

call c_f_pointer(cdatar, faux, [ny,local_M])

! Create plans

p1 = fftw_mpi_plan_dft_r2c_2d(nx,ny,faux,gaux, MPI_COMM_WORLD, &
        ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_OUT))


p2 = fftw_mpi_plan_dft_c2r_2d(nx,ny,gaux,faux, MPI_COMM_WORLD, &
        ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_IN))

! EXECUTE FFTW

call random_number(faux)


print *, "real input:", real(faux(1,:))

call fftw_mpi_execute_dft_r2c(p1,faux,gaux)

call fftw_mpi_execute_dft_c2r(p2, gaux, faux)

print *, "real output:", real(faux(1,:))/(nx*ny)

call fftw_destroy_plan(p1)
call fftw_destroy_plan(p2)


call  mpi_finalize(ierr)


End Program TEST

Answer 1

问题是由fftw：

所需的padding引起的

虽然实际数据在概念上是n0×n1×n2×...×nd-1，但它在物理上存储为n0×n1×n2×...×[2（nd-1/2 + 1）]数组，其中最后一个维度已经填充，使其与复杂输出的大小相同。这很像就地串行r2c / c2r接口（参见实际数据的多维DFT），但在MPI中，即使对于非现场数据也需要填充。

因此，16x16变换的输入数组因此是16x18阵列。在每行末尾的额外两个数字的值在真实空间中是无意义的。然而，这些额外的数字不能忘记，因为c指针被强制转换为fortran 2D数组：

call c_f_pointer(cdatar, faux, [2*(ny/2+1),local_M])

额外的数字仍会打印在每行的末尾。可以对数组进行切片以避免打印这些毫无价值的值：

print *, "real input:", real(faux(1:ny,:))
...
print *, "real output:", real(faux(1:ny,:))/(nx*ny)

以下是基于您和How to do a fftw3 MPI "transposed" 2D transform if possible at all?的完整代码。它可以由mpif90 main.f90 -o main -I/usr/include -L/usr/lib -lfftw3_mpi -lfftw3 -lm编译并由mpirun -np 2 main运行。

Program TEST

use, intrinsic :: iso_c_binding

Implicit none

include 'mpif.h'
include 'fftw3-mpi.f03'

Integer*8,parameter :: nx=4, ny=8

!MPI
integer*8 :: ipe,npe
integer*8 ::mpi_realtype,icomm=mpi_comm_world,istat(mpi_status_size),ierr

! FFTW VARIABLES DECLARATION
type(C_PTR)           :: p1, p2, cdatar, cdatac
integer(C_INTPTR_T)   :: alloc_local, local_L, local_L_offset, local_M, local_M_offset
real(C_DOUBLE), pointer :: faux(:,:)   ! real input 2d function
complex(C_DOUBLE), pointer :: gaux(:,:) ! complex output of 2d FFTW (transposed)


! MPI initialization
call mpi_init(ierr)

call mpi_comm_rank(icomm,ipe,ierr)
call mpi_comm_size(icomm,npe,ierr)


! FFTW ALLOCATIONS AND PLANS

call fftw_mpi_init()


alloc_local = fftw_mpi_local_size_2d(ny/2+1,nx &
    ,MPI_COMM_WORLD, local_L, local_L_offset)


cdatac = fftw_alloc_complex(alloc_local)

call c_f_pointer(cdatac, gaux, [nx,local_L]) !transposed

alloc_local = fftw_mpi_local_size_2d(nx,ny/2+1, MPI_COMM_WORLD, &
    local_M, local_M_offset)


cdatar = fftw_alloc_real(2*alloc_local)

call c_f_pointer(cdatar, faux, [2*(ny/2+1),local_M])

! Create plans

p1 = fftw_mpi_plan_dft_r2c_2d(nx,ny,faux,gaux, MPI_COMM_WORLD, &
        ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_OUT))


p2 = fftw_mpi_plan_dft_c2r_2d(nx,ny,gaux,faux, MPI_COMM_WORLD, &
        ior(FFTW_MEASURE, FFTW_MPI_TRANSPOSED_IN))

! EXECUTE FFTW

call random_number(faux)


print *, "real input:", real(faux(1:ny,:))

call fftw_mpi_execute_dft_r2c(p1,faux,gaux)

call fftw_mpi_execute_dft_c2r(p2, gaux, faux)

print *, "real output:", real(faux(1:ny,:))/(nx*ny)

call fftw_destroy_plan(p1)
call fftw_destroy_plan(p2)


call  mpi_finalize(ierr)


End Program TEST

通过fftw_mpi_r2c_2d和fftw_mpi_c2r_2d

1 个答案: