使用OpenMP,FFTW计算速度太慢

时间:2018-03-18 08:20:57

标签: fortran openmp fftw

我使用gfortran -O -mcmodel=medium test.f90 -lfftw3_omp -lfftw3 -lm -qopenmp编译了以下程序,并将fftw3.f添加到/usr/lib。我使用ulimit -s unlimited to run ./out因为我得到了堆栈溢出(Why Segmentation fault is happening in this openmp code?)。

program main 


Implicit none
include  'omp_lib.h'
include 'fftw3.f'
Integer i, j, k, iter
Integer,Parameter :: Nx    =381
Integer,Parameter :: Ny    =129 
Integer,Parameter :: Nz    =129
Integer, parameter :: N1 =  Ny-1
Integer, parameter :: N2 =  Nz-1
double precision in
dimension in(N1,N2)
double complex out
dimension out(N1/2 + 1, N2)
integer*8 plan
real*8    U(-2:Nx+2,-2:Ny+2,-2:Nz+2)
real*8    V(-2:Nx+2,-2:Ny+2,-2:Nz+2)
real*8    W(-2:Nx+2,-2:Ny+2,-2:Nz+2)
Real*8  ky_vis(0:Ny)
Real*8  kz_vis(0:Nz)
Complex*16  F_F1(-2:Nx+2,-2:Ny+2,-2:Nz+2)
Real*8,Parameter :: pi  = 3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825d0
Real*8,Parameter :: Ly  = 1.0d0 * 2.0d0 * pi
Real*8,Parameter :: Lz  = 1.0d0 * 2.0d0 * pi
Real*8 c1_Nfft
Integer,Parameter :: NyFFT = (Ny-1 + 2) / 2



c1_Nfft = 1.0d0 / dsqrt( Dble(Ny-1) * Dble(Nz-1) )

U= reshape((/(i, i=1,(Nx+5)*(Ny+5)*(Nz+5))/),shape(U))
V =  reshape((/(i, i=1,(Nx+5)*(Ny+5)*(Nz+5))/),shape(V))
W =  reshape((/(i, i=1,(Nx+5)*(Ny+5)*(Nz+5))/),shape(W))

   !$OMP PARALLEL DO PRIVATE(j) SHARED(ky_vis)
    Do j = 1, (Ny-1)/2+1   
      ky_vis(j) = ( 2.0d0 * pi /Ly ) * Dble(        j-1 )
    End Do
    !$OMP End PARALLEL DO

    !$OMP PARALLEL DO PRIVATE(j) SHARED(ky_vis)
    Do j=(Ny-1)/2+2,Ny-1
      ky_vis(j) = ( 2.0d0 * pi /Ly ) * Dble((Ny-1)-(j-1))
    end do
    !$OMP End PARALLEL DO

     !$OMP PARALLEL DO PRIVATE(k) SHARED(kz_vis)
    Do k = 1, (Nz-1)/2+1   
      kz_vis(k) = ( 2.0d0 * pi /Lz ) * Dble(        k-1 )
    End Do
    !$OMP End PARALLEL DO

    !$OMP PARALLEL DO PRIVATE(j) SHARED(ky_vis)
    Do k=(Nz-1)/2+2,Nz-1
      kz_vis(k) = ( 2.0d0 * pi  /Lz ) * Dble((Nz-1)-(k-1))
    end do
    !$OMP End PARALLEL DO

!-----------FFTW----------------        
    Do i = 1, Nx-1 ! --- \8F\87\95ϊ\B7 ---
!          !$OMP PARALLEL DO PRIVATE(i,j,k) SHARED(Real_F1)
      Do k = 1, Nz-1
      Do j = 1, Ny-1
        in(j,k) = U(i,j,k)
      End Do; End Do
!         !$OMP END PARALLEL DO



     call dfftw_plan_dft_r2c_2d(plan,N1,N2,in,out,FFTW_ESTIMATE)
     call dfftw_execute_dft_r2c(plan, in, out)
     call dfftw_destroy_plan(plan)

!          !$OMP PARALLEL DO PRIVATE(j,k) SHARED(F_F1)
      Do j = 1, NyFFT
      Do k = 1, Nz-1
        F_F1(i,j,k) = out(j,k) * c1_Nfft
      End Do; End Do
!          !$OMP END PARALLEL DO
    End Do

!-----------IFFTW----------------  

    Do i = 1, Nx-1 !

      Do j = 1, NyFFT
      Do k = 1, Nz-1
        out(j,k) = - (  ky_vis(j) * ky_vis(j)                    &
                             + kz_vis(k) * kz_vis(k) ) * F_F1(i,j,k)
      End Do; End Do



      call dfftw_plan_dft_c2r_2d(plan,N1,N2,out,in,FFTW_ESTIMATE)
      call dfftw_execute_dft_c2r(plan, out, in)
      call dfftw_destroy_plan(plan)

!          !$OMP PARALLEL DO PRIVATE(j,k) SHARED(ddUdx)
      Do k = 1, Nz-1
      Do j = 1, Ny-1
        U(i,j,k) = U(i,j,k) + in(j,k) * c1_Nfft
      End Do; End Do
!          !$OMP END PARALLEL DO
    End Do

 end program

我使用system_clock检查FFTW执行时间,发现实际上计算速度太慢。有没有更好的方法让它运行得更快?我在Ubuntu 17.04上使用gfortran,CPU是AMD1950x,内存是64G。

1 个答案:

答案 0 :(得分:0)

首先,正如我之前在评论中指出的那样,你应该从不反复做

 do ... !your loop
   call dfftw_plan_dft_r2c_2d(plan,N1,N2,in,out,FFTW_ESTIMATE)
   call dfftw_execute_dft_r2c(plan, in, out)
   call dfftw_destroy_plan(plan)
end do

在循环中多次提供N1N2是不变的。 FFTW_ESTIMATE并不是一场大灾难,但它仍然很慢。您应该将计划一次重新使用。创建FFTW计划很慢。

 call dfftw_plan_dft_r2c_2d(plan,N1,N2,in,out,FFTW_ESTIMATE)

 do .... !your loop
   call dfftw_execute_dft_r2c(plan, in, out)
 end do

 call dfftw_destroy_plan(plan)

其次,您必须首先指示FFTW使用OpenMP线程。这一切都在手册中,我将在我的代码中展示我是如何做到的:

  use iso_c_binding
  integer(c_int) :: nthreads
  integer(c_int) :: error

  !$ nthreads = omp_get_num_threads()

  error =  fftw_init_threads()

  if (error==0) then
    write(*,*) "Error when initializing FFTW for threads."
  else
    call fftw_plan_with_nthreads(nthreads)
  end if

这使用现代的FFTW Fortran接口,而不是您传统的Fortran 90接口(是的Fortran 90已经过时了)。但它在遗留界面中应该相当类似,例如

    integer :: nthreads, error

    !$ nthreads = omp_get_num_threads()

    call dfftw_init_threads(error)

    call dfftw_plan_with_nthreads(nthreads)