具有多个同时减少的Fortran OpenMP会导致seg错误

时间:2017-08-25 16:19:11

标签: fortran openmp reduction

我曾经使用过intel openMP教程。我写了pi程序缩减,现在正在使用openMP处理fortran代码。我想用减少条款一次总计4个数量。代码看起来像这样:

call omp_set_num_threads(num_threads)
write(*,*) "number of parallel threads"
write(*,*) num_threads


N_init = 1200
N_t    = 1250

filename = 'POD_input/POD_avg.dat'

io = 0
nCell = 0
open(UNIT = 10, FILE = filename, STATUS = 'OLD', form = 'formatted')
    do
        read(10,*, end=67) nonsense, nonsense, nonsense, nonsense, nonsense, nonsense, nonsense, nonsense, nonsense, nonsense, nonsense
        nCell = nCell + 1
    end do
67 close(10)

allocate(eig(nCell))
allocate(wr(nCell))
allocate(wi(nCell))
allocate(work(4*nCell))

allocate(R_Corr(nCell, nCell))
allocate(U_Corr(nCell, nCell))
allocate(V_Corr(nCell, nCell))
allocate(P_Corr(nCell, nCell))

allocate(R_Tot(nCell, nCell))
allocate(U_Tot(nCell, nCell))
allocate(V_Tot(nCell, nCell))
allocate(P_Tot(nCell, nCell))

allocate(R_Fin(nCell, nCell))
allocate(U_Fin(nCell, nCell))
allocate(V_Fin(nCell, nCell))
allocate(P_Fin(nCell, nCell))

allocate(x(nCell))
allocate(y(nCell))
allocate(A(nCell))

allocate(Rho(nCell))
allocate(U(nCell))
allocate(V(nCell))
allocate(P(nCell))

allocate(R_x(nCell))
allocate(U_x(nCell))
allocate(V_x(nCell))
allocate(P_x(nCell))

allocate(R_c(nCell))
allocate(U_c(nCell))
allocate(V_c(nCell))
allocate(P_c(nCell))

allocate(R_av(nCell))
allocate(U_av(nCell))
allocate(V_av(nCell))
allocate(P_av(nCell))


open(UNIT = 10, FILE = filename, STATUS = 'OLD', form = 'formatted')
    do iCell = 1, nCell
        read(10,*)  x(iCell), y(iCell), A(iCell), nonsense, nonsense, nonsense, nonsense, R_av(iCell), U_av(iCell), V_av(iCell), P_av(iCell)
    end do
close(10)

filename = 'POD_output/POD_Mesh.dat'

open(UNIT = 10, FILE = filename, STATUS = 'unknown', form = 'unformatted', access='stream')
    write(10) nCell
    write(10) x(:)
    write(10) y(:)
close(10)

R_Tot = 0.0_dp
U_Tot = 0.0_dp
V_Tot = 0.0_dp
P_Tot = 0.0_dp

write(*,*) "begin correlation"

!$OMP PARALLEL DO REDUCTION(+:R_Tot, U_Tot, V_Tot, P_Tot) private(i, j, nonsense, filename, num, iCell, iTime, R_x, R_C, R_Corr, U_x, U_C, U_Corr, V_x, V_C, V_Corr, P_x, P_C, P_Corr)
   do iTime = N_init,N_t
       write(*,*) "inside loop"
       filename = 'POD_input/POD_input.'
       write(num,'(I6.6)') iTime
       filename = trim(adjustl(filename))//trim(adjustl(num))//trim(adjustl('.dat'))
       ! Read file

       write(*,*) "read file"
       open(UNIT = 10, FILE = filename, STATUS = 'OLD', form = 'formatted')
           do iCell = 1, nCell
               read(10,*) nonsense, nonsense, nonsense, R_x(iCell), U_x(iCell), V_x(iCell), P_x(iCell)
           end do
       close(10)

       R_x = R_x-R_av
       U_x = U_x-U_av
       V_x = V_x-V_av
       P_x = P_x-P_av

       R_C(:) = R_x(:)*sqrt(A(:))
       U_C(:) = U_x(:)*sqrt(A(:))
       V_C(:) = V_x(:)*sqrt(A(:))
       P_C(:) = P_x(:)*sqrt(A(:))


       do i = 1, nCell
           do j =1, nCell
               R_Corr(i,j) = R_C(i)*R_C(j)
               U_Corr(i,j) = U_C(i)*U_C(j)
               V_Corr(i,j) = V_C(i)*V_C(j)
               P_Corr(i,j) = P_C(i)*P_C(j)
           end do
       end do


       R_Tot =  R_Tot + R_Corr
       U_Tot =  U_Tot + U_Corr
       V_Tot =  V_Tot + V_Corr
       P_Tot =  P_Tot + P_Corr
   end do
!$OMP END PARALLEL DO 

即使使用1个线程运行,我也会收到分段错误错误。我该怎么做才能调试或修复它。谢谢。

编辑:包含更多代码以便于协助

1 个答案:

答案 0 :(得分:0)

我曾经遇到过类似的问题。使用的数组可能很大,因此堆栈大小不足以容纳这些数据。我建议你增加纸叠大小。我使用的是Linux,通常是通过cmd ulimit -s unlimited来完成的。

此外,当在openMP中使用reduce子句时,每个线程都会创建reduce子句所针对的变量的私有副本。这些数据放置在专用堆栈上,该专用堆栈的大小通常限制为几MB。要覆盖此限制,假设您是Linux用户,则可以运行例如cmd export OMP_STACKSIZE=50m将私有堆栈大小设置为50MB。可以根据硬件限制和阵列大小来更改50m值。

重要的一点是,对于大数组,归约子句可能根本不起作用。