我编写了以下代码,然后尝试使用OpenMP来并行化它。但是,在使用f2py编译以下OpenMP代码之后,Python在运行时总会生成某些错误。没有错误消息,只有数字有点关闭,每当我用f2py编译它并在Python中运行时,它就会杀死内核。
我想知道这是否与我的并行区域有关。我总是对哪些变量变为私有而感到困惑,所以任何人都可以观察到任何错误吗?
subroutine simulate_omp(m,nt,s0,l,d,a,numthreads,x,y,psi)
!Pedestrian motion model
!input variables:
!n = m^2 = number of students
!nt: number of time steps
!s0: student speed
!l: initial spacing between students
!d: student motion influence by all other students within distance <= d
!a: noise amplitude
!numthreads: number of threads to use in parallel regions
!output variables:
!x,y: all m^2 student paths from i=1 to nt+1
!psi: synchronization parameter, store at all nt+1 times (including initial
condition)
use omp_lib
implicit none
integer, intent(in) :: m,nt,numthreads
real(kind=8), intent(in) :: s0,l,d,a
real(kind=8), dimension(m*m,nt+1), intent(out) :: x,y
real(kind=8), dimension(nt+1), intent(out) :: psi
real(kind=8), dimension(m*m,nt+1) :: xtemp,ytemp,u,v
real(kind=8), dimension(m*m,nt) :: usum,vsum,umean,vmean
real(kind=8) :: r(m*m)
real(kind=8),parameter :: pi = 4*atan(1.0_8)
integer :: i1,j1,k1,i2,j2,k2,count
!$call omp_set_num_threads(numthreads)
! initialize student positions
x = 0.d0
y = 0.d0
k1 = 0
do i1 = 1,m
do j1=1,m
k1 = k1 + 1
x(k1,1) = (j1-1)*l/2 - (m-1)*l/4
y(k1,1) = (i1-1)*l/2 - (m-1)*l/4
end do
end do
x(:,1) = x(:,1)/(m-1)
y(:,1) = y(:,1)/(m-1)
! initialize
xtemp(:,1) = x(:,1)
ytemp(:,1) = y(:,1)
call random_number(r)
u(:,1) = s0*cos(r*2*pi-pi)
v(:,1) = s0*sin(r*2*pi-pi)
psi(1) = sqrt(sum(u(:,1))**2+sum(v(:,1)**2))/dble(m)/dble(m)/s0
do i2 = 1,nt
!$OMP parallel do private(j2,k2,l)
do j2 = 1,m*m
usum(j2,i2) = 0
vsum(j2,i2) = 0
count = 0
!$OMP parallel do reduction(+:usum,vsum,count)
do k2 = 1,m*m
if ((xtemp(k2,i2)-xtemp(j2,i2))**2+(ytemp(k2,i2)-ytemp(j2,i2))**2<=d**2)
then
usum(j2,i2) = usum(j2,i2)+u(k2,i2)
vsum(j2,i2) = vsum(j2,i2)+v(k2,i2)
count = count+1
end if
end do
!$OMP end parallel do
umean(j2,i2) = usum(j2,i2)/dble(count)
vmean(j2,i2) = vsum(j2,i2)/dble(count)
u(j2,i2+1) = s0*cos(atan(vmean(j2,i2)/umean(j2,i2))+a*(r(j2)*2*pi-pi))
v(j2,i2+1) = s0*sin(atan(vmean(j2,i2)/umean(j2,i2))+a*(r(j2)*2*pi-pi))
xtemp(j2,i2+1) = xtemp(j2,i2)+u(j2,i2+1)
ytemp(j2,i2+1) = ytemp(j2,i2)+v(j2,i2+1)
! boundary conditions
if (xtemp(j2,i2+1)>l) then
xtemp(j2,i2+1) = xtemp(j2,i2+1)-2*l
else
if (xtemp(j2,i2+1)<-l) then
xtemp(j2,i2+1) = xtemp(j2,i2+1)+2*l
end if
end if
if (ytemp(j2,i2+1)>l) then
ytemp(j2,i2+1) = ytemp(j2,i2+1)-2*l
else
if (ytemp(j2,i2+1)<-l) then
ytemp(j2,i2+1) = ytemp(j2,i2+1)+2*l
end if
end if
end do
!$OMP end parallel do
psi(i2+1) = sqrt(sum(u(:,i2+1))**2+sum(v(:,i2+1))**2)/dble(m)/dble(m)/s0
end do
x(:,1:nt+1) = xtemp(:,1:nt+1)
y(:,1:nt+1) = ytemp(:,1:nt+1)
end subroutine simulate_omp
答案 0 :(得分:0)
参数l使用intent(in)声明,并且不在循环中修改,因此不需要将其声明为private。以下是没有外部并行循环的建议:
subroutine simulate_omp(m,nt,s0,l,d,a,numthreads,x,y,psi)
!Pedestrian motion model
!input variables:
!n = m^2 = number of students
!nt: number of time steps
!s0: student speed
!l: initial spacing between students
!d: student motion influence by all other students within distance <= d
!a: noise amplitude
!numthreads: number of threads to use in parallel regions
!output variables:
!x,y: all m^2 student paths from i=1 to nt+1
!psi: synchronization parameter, store at all nt+1 times (including initial
condition)
use omp_lib
implicit none
integer, intent(in) :: m,nt,numthreads
real(kind=8), intent(in) :: s0,l,d,a
real(kind=8), dimension(m*m,nt+1), intent(out) :: x,y
real(kind=8), dimension(nt+1), intent(out) :: psi
real(kind=8), dimension(m*m,nt+1) :: xtemp,ytemp,u,v
real(kind=8), dimension :: usum,vsum,umean,vmean
real(kind=8) :: r(m*m)
real(kind=8),parameter :: pi = 4*atan(1.0_8)
integer :: i1,j1,k1,i2,j2,k2,count
!$call omp_set_num_threads(numthreads)
! initialize student positions
x = 0.d0
y = 0.d0
k1 = 0
do i1 = 1,m
do j1=1,m
k1 = k1 + 1
x(k1,1) = (j1-1)*l/2 - (m-1)*l/4
y(k1,1) = (i1-1)*l/2 - (m-1)*l/4
end do
end do
x(:,1) = x(:,1)/(m-1)
y(:,1) = y(:,1)/(m-1)
! initialize
xtemp(:,1) = x(:,1)
ytemp(:,1) = y(:,1)
call random_number(r)
u(:,1) = s0*cos(r*2*pi-pi)
v(:,1) = s0*sin(r*2*pi-pi)
psi(1) = sqrt(sum(u(:,1))**2+sum(v(:,1)**2))/dble(m)/dble(m)/s0
do i2 = 1,nt
do j2 = 1,m*m
usum = 0
vsum = 0
count = 0
!$OMP parallel do private(k2), reduction(+:usum,vsum,count)
do k2 = 1,m*m
if ((xtemp(k2,i2)-xtemp(j2,i2))**2+(ytemp(k2,i2)-ytemp(j2,i2))**2<=d**2) then
usum = usum+u(k2,i2)
vsum = vsum+v(k2,i2)
count = count+1
end if
end do
!$OMP end parallel do
umean = usum/dble(count)
vmean = vsum/dble(count)
u(j2,i2+1) = s0*cos(atan(vmean/umean)+a*(r(j2)*2*pi-pi))
v(j2,i2+1) = s0*sin(atan(vmean/umean)+a*(r(j2)*2*pi-pi))
xtemp(j2,i2+1) = xtemp(j2,i2)+u(j2,i2+1)
ytemp(j2,i2+1) = ytemp(j2,i2)+v(j2,i2+1)
! boundary conditions
if (xtemp(j2,i2+1)>l) then
xtemp(j2,i2+1) = xtemp(j2,i2+1)-2*l
else
if (xtemp(j2,i2+1)<-l) then
xtemp(j2,i2+1) = xtemp(j2,i2+1)+2*l
end if
end if
if (ytemp(j2,i2+1)>l) then
ytemp(j2,i2+1) = ytemp(j2,i2+1)-2*l
else
if (ytemp(j2,i2+1)<-l) then
ytemp(j2,i2+1) = ytemp(j2,i2+1)+2*l
end if
end if
end do
psi(i2+1) = sqrt(sum(u(:,i2+1))**2+sum(v(:,i2+1))**2)/dble(m)/dble(m)/s0
end do
x(:,1:nt+1) = xtemp(:,1:nt+1)
y(:,1:nt+1) = ytemp(:,1:nt+1)
end subroutine simulate_omp
您可以计时并将其与使用私有(j2,k2,umean,vmean,usum,vsum,count),shared(u,v,xtemp,ytemp)并行化的外循环进行比较。确保后面的测试将OMP_NESTED设置为true。