我曾经使用过intel openMP教程。我写了pi程序缩减,现在正在使用openMP处理fortran代码。我想用减少条款一次总计4个数量。代码看起来像这样:
call omp_set_num_threads(num_threads)
write(*,*) "number of parallel threads"
write(*,*) num_threads
N_init = 1200
N_t = 1250
filename = 'POD_input/POD_avg.dat'
io = 0
nCell = 0
open(UNIT = 10, FILE = filename, STATUS = 'OLD', form = 'formatted')
do
read(10,*, end=67) nonsense, nonsense, nonsense, nonsense, nonsense, nonsense, nonsense, nonsense, nonsense, nonsense, nonsense
nCell = nCell + 1
end do
67 close(10)
allocate(eig(nCell))
allocate(wr(nCell))
allocate(wi(nCell))
allocate(work(4*nCell))
allocate(R_Corr(nCell, nCell))
allocate(U_Corr(nCell, nCell))
allocate(V_Corr(nCell, nCell))
allocate(P_Corr(nCell, nCell))
allocate(R_Tot(nCell, nCell))
allocate(U_Tot(nCell, nCell))
allocate(V_Tot(nCell, nCell))
allocate(P_Tot(nCell, nCell))
allocate(R_Fin(nCell, nCell))
allocate(U_Fin(nCell, nCell))
allocate(V_Fin(nCell, nCell))
allocate(P_Fin(nCell, nCell))
allocate(x(nCell))
allocate(y(nCell))
allocate(A(nCell))
allocate(Rho(nCell))
allocate(U(nCell))
allocate(V(nCell))
allocate(P(nCell))
allocate(R_x(nCell))
allocate(U_x(nCell))
allocate(V_x(nCell))
allocate(P_x(nCell))
allocate(R_c(nCell))
allocate(U_c(nCell))
allocate(V_c(nCell))
allocate(P_c(nCell))
allocate(R_av(nCell))
allocate(U_av(nCell))
allocate(V_av(nCell))
allocate(P_av(nCell))
open(UNIT = 10, FILE = filename, STATUS = 'OLD', form = 'formatted')
do iCell = 1, nCell
read(10,*) x(iCell), y(iCell), A(iCell), nonsense, nonsense, nonsense, nonsense, R_av(iCell), U_av(iCell), V_av(iCell), P_av(iCell)
end do
close(10)
filename = 'POD_output/POD_Mesh.dat'
open(UNIT = 10, FILE = filename, STATUS = 'unknown', form = 'unformatted', access='stream')
write(10) nCell
write(10) x(:)
write(10) y(:)
close(10)
R_Tot = 0.0_dp
U_Tot = 0.0_dp
V_Tot = 0.0_dp
P_Tot = 0.0_dp
write(*,*) "begin correlation"
!$OMP PARALLEL DO REDUCTION(+:R_Tot, U_Tot, V_Tot, P_Tot) private(i, j, nonsense, filename, num, iCell, iTime, R_x, R_C, R_Corr, U_x, U_C, U_Corr, V_x, V_C, V_Corr, P_x, P_C, P_Corr)
do iTime = N_init,N_t
write(*,*) "inside loop"
filename = 'POD_input/POD_input.'
write(num,'(I6.6)') iTime
filename = trim(adjustl(filename))//trim(adjustl(num))//trim(adjustl('.dat'))
! Read file
write(*,*) "read file"
open(UNIT = 10, FILE = filename, STATUS = 'OLD', form = 'formatted')
do iCell = 1, nCell
read(10,*) nonsense, nonsense, nonsense, R_x(iCell), U_x(iCell), V_x(iCell), P_x(iCell)
end do
close(10)
R_x = R_x-R_av
U_x = U_x-U_av
V_x = V_x-V_av
P_x = P_x-P_av
R_C(:) = R_x(:)*sqrt(A(:))
U_C(:) = U_x(:)*sqrt(A(:))
V_C(:) = V_x(:)*sqrt(A(:))
P_C(:) = P_x(:)*sqrt(A(:))
do i = 1, nCell
do j =1, nCell
R_Corr(i,j) = R_C(i)*R_C(j)
U_Corr(i,j) = U_C(i)*U_C(j)
V_Corr(i,j) = V_C(i)*V_C(j)
P_Corr(i,j) = P_C(i)*P_C(j)
end do
end do
R_Tot = R_Tot + R_Corr
U_Tot = U_Tot + U_Corr
V_Tot = V_Tot + V_Corr
P_Tot = P_Tot + P_Corr
end do
!$OMP END PARALLEL DO
即使使用1个线程运行,我也会收到分段错误错误。我该怎么做才能调试或修复它。谢谢。
编辑:包含更多代码以便于协助
答案 0 :(得分:0)
我曾经遇到过类似的问题。使用的数组可能很大,因此堆栈大小不足以容纳这些数据。我建议你增加纸叠大小。我使用的是Linux,通常是通过cmd ulimit -s unlimited
来完成的。
此外,当在openMP中使用reduce子句时,每个线程都会创建reduce子句所针对的变量的私有副本。这些数据放置在专用堆栈上,该专用堆栈的大小通常限制为几MB。要覆盖此限制,假设您是Linux用户,则可以运行例如cmd export OMP_STACKSIZE=50m
将私有堆栈大小设置为50MB。可以根据硬件限制和阵列大小来更改50m值。
重要的一点是,对于大数组,归约子句可能根本不起作用。