Question

我编写了以下代码，然后尝试使用OpenMP来并行化它。但是，在使用f2py编译以下OpenMP代码之后，Python在运行时总会生成某些错误。没有错误消息，只有数字有点关闭，每当我用f2py编译它并在Python中运行时，它就会杀死内核。

我想知道这是否与我的并行区域有关。我总是对哪些变量变为私有而感到困惑，所以任何人都可以观察到任何错误吗？

subroutine simulate_omp(m,nt,s0,l,d,a,numthreads,x,y,psi)
!Pedestrian motion model
!input variables:
!n = m^2 = number of students
!nt: number of time steps
!s0: student speed
!l: initial spacing between students
!d: student motion influence by all other students within distance <= d
!a: noise amplitude
!numthreads: number of threads to use in parallel regions
!output variables:
!x,y: all m^2 student paths from i=1 to nt+1
!psi: synchronization parameter, store at all nt+1 times (including initial 
condition)
use omp_lib
implicit none
integer, intent(in) :: m,nt,numthreads
real(kind=8), intent(in) :: s0,l,d,a
real(kind=8), dimension(m*m,nt+1), intent(out) :: x,y
real(kind=8), dimension(nt+1), intent(out) :: psi
real(kind=8), dimension(m*m,nt+1) :: xtemp,ytemp,u,v
real(kind=8), dimension(m*m,nt) :: usum,vsum,umean,vmean
real(kind=8) :: r(m*m)
real(kind=8),parameter :: pi = 4*atan(1.0_8)
integer :: i1,j1,k1,i2,j2,k2,count

!$call omp_set_num_threads(numthreads)
! initialize student positions
x = 0.d0
y = 0.d0
k1 = 0
do i1 = 1,m
  do j1=1,m
    k1 = k1 + 1
    x(k1,1) = (j1-1)*l/2 - (m-1)*l/4
    y(k1,1) = (i1-1)*l/2 - (m-1)*l/4
  end do
end do
x(:,1) = x(:,1)/(m-1)
y(:,1) = y(:,1)/(m-1)

! initialize
xtemp(:,1) = x(:,1)
ytemp(:,1) = y(:,1)
call random_number(r)
u(:,1) = s0*cos(r*2*pi-pi)
v(:,1) = s0*sin(r*2*pi-pi)
psi(1) = sqrt(sum(u(:,1))**2+sum(v(:,1)**2))/dble(m)/dble(m)/s0

do i2 = 1,nt
  !$OMP parallel do private(j2,k2,l)
  do j2 = 1,m*m
    usum(j2,i2) = 0
    vsum(j2,i2) = 0
    count = 0
    !$OMP parallel do reduction(+:usum,vsum,count)
    do k2 = 1,m*m
      if ((xtemp(k2,i2)-xtemp(j2,i2))**2+(ytemp(k2,i2)-ytemp(j2,i2))**2<=d**2) 
then
        usum(j2,i2) = usum(j2,i2)+u(k2,i2)
        vsum(j2,i2) = vsum(j2,i2)+v(k2,i2)
        count = count+1
      end if
    end do
    !$OMP end parallel do
    umean(j2,i2) = usum(j2,i2)/dble(count)
    vmean(j2,i2) = vsum(j2,i2)/dble(count)
    u(j2,i2+1) = s0*cos(atan(vmean(j2,i2)/umean(j2,i2))+a*(r(j2)*2*pi-pi))
    v(j2,i2+1) = s0*sin(atan(vmean(j2,i2)/umean(j2,i2))+a*(r(j2)*2*pi-pi))
    xtemp(j2,i2+1) = xtemp(j2,i2)+u(j2,i2+1)
    ytemp(j2,i2+1) = ytemp(j2,i2)+v(j2,i2+1)

    ! boundary conditions
    if (xtemp(j2,i2+1)>l) then
      xtemp(j2,i2+1) = xtemp(j2,i2+1)-2*l
    else
      if (xtemp(j2,i2+1)<-l) then
        xtemp(j2,i2+1) = xtemp(j2,i2+1)+2*l
      end if
    end if
    if (ytemp(j2,i2+1)>l) then
      ytemp(j2,i2+1) = ytemp(j2,i2+1)-2*l
    else
      if (ytemp(j2,i2+1)<-l) then
        ytemp(j2,i2+1) = ytemp(j2,i2+1)+2*l
      end if
    end if
  end do
  !$OMP end parallel do
  psi(i2+1) = sqrt(sum(u(:,i2+1))**2+sum(v(:,i2+1))**2)/dble(m)/dble(m)/s0
end do
x(:,1:nt+1) = xtemp(:,1:nt+1)
y(:,1:nt+1) = ytemp(:,1:nt+1)

end subroutine simulate_omp

Answer 1

参数l使用intent（in）声明，并且不在循环中修改，因此不需要将其声明为private。以下是没有外部并行循环的建议：

subroutine simulate_omp(m,nt,s0,l,d,a,numthreads,x,y,psi)
!Pedestrian motion model
!input variables:
!n = m^2 = number of students
!nt: number of time steps
!s0: student speed
!l: initial spacing between students
!d: student motion influence by all other students within distance <= d
!a: noise amplitude
!numthreads: number of threads to use in parallel regions
!output variables:
!x,y: all m^2 student paths from i=1 to nt+1
!psi: synchronization parameter, store at all nt+1 times (including initial 
condition)
use omp_lib
implicit none
integer, intent(in) :: m,nt,numthreads
real(kind=8), intent(in) :: s0,l,d,a
real(kind=8), dimension(m*m,nt+1), intent(out) :: x,y
real(kind=8), dimension(nt+1), intent(out) :: psi
real(kind=8), dimension(m*m,nt+1) :: xtemp,ytemp,u,v
real(kind=8), dimension :: usum,vsum,umean,vmean
real(kind=8) :: r(m*m)
real(kind=8),parameter :: pi = 4*atan(1.0_8)
integer :: i1,j1,k1,i2,j2,k2,count

!$call omp_set_num_threads(numthreads)
! initialize student positions
x = 0.d0
y = 0.d0
k1 = 0
do i1 = 1,m
  do j1=1,m
    k1 = k1 + 1
    x(k1,1) = (j1-1)*l/2 - (m-1)*l/4
    y(k1,1) = (i1-1)*l/2 - (m-1)*l/4
  end do
end do
x(:,1) = x(:,1)/(m-1)
y(:,1) = y(:,1)/(m-1)

! initialize
xtemp(:,1) = x(:,1)
ytemp(:,1) = y(:,1)
call random_number(r)
u(:,1) = s0*cos(r*2*pi-pi)
v(:,1) = s0*sin(r*2*pi-pi)
psi(1) = sqrt(sum(u(:,1))**2+sum(v(:,1)**2))/dble(m)/dble(m)/s0

do i2 = 1,nt
  do j2 = 1,m*m
    usum = 0
    vsum = 0
    count = 0
    !$OMP parallel do private(k2), reduction(+:usum,vsum,count)
    do k2 = 1,m*m
      if ((xtemp(k2,i2)-xtemp(j2,i2))**2+(ytemp(k2,i2)-ytemp(j2,i2))**2<=d**2) then
        usum = usum+u(k2,i2)
        vsum = vsum+v(k2,i2)
        count = count+1
      end if
    end do
    !$OMP end parallel do
    umean = usum/dble(count)
    vmean = vsum/dble(count)
    u(j2,i2+1) = s0*cos(atan(vmean/umean)+a*(r(j2)*2*pi-pi))
    v(j2,i2+1) = s0*sin(atan(vmean/umean)+a*(r(j2)*2*pi-pi))
    xtemp(j2,i2+1) = xtemp(j2,i2)+u(j2,i2+1)
    ytemp(j2,i2+1) = ytemp(j2,i2)+v(j2,i2+1)

    ! boundary conditions
    if (xtemp(j2,i2+1)>l) then
      xtemp(j2,i2+1) = xtemp(j2,i2+1)-2*l
    else
      if (xtemp(j2,i2+1)<-l) then
        xtemp(j2,i2+1) = xtemp(j2,i2+1)+2*l
      end if
    end if
    if (ytemp(j2,i2+1)>l) then
      ytemp(j2,i2+1) = ytemp(j2,i2+1)-2*l
    else
      if (ytemp(j2,i2+1)<-l) then
        ytemp(j2,i2+1) = ytemp(j2,i2+1)+2*l
      end if
    end if
  end do
  psi(i2+1) = sqrt(sum(u(:,i2+1))**2+sum(v(:,i2+1))**2)/dble(m)/dble(m)/s0
end do
x(:,1:nt+1) = xtemp(:,1:nt+1)
y(:,1:nt+1) = ytemp(:,1:nt+1)

end subroutine simulate_omp

您可以计时并将其与使用私有（j2，k2，umean，vmean，usum，vsum，count），shared（u，v，xtemp，ytemp）并行化的外循环进行比较。确保后面的测试将OMP_NESTED设置为true。

使用OpenMP从Fortran子例程中导致错误的结果和崩溃

1 个答案: