Question

我在Fortran代码中的数据数组中发现了一些NaN，并且可能已将问题隔离到OpenMP do循环中。当该循环运行时，NaNs导致sp％ptl（i）％ph（6）：

!$OMP PARALLEL DO &
!$OMP PRIVATE( ITH, I )
do ith=1, sml_nthreads
  do i=i_beg(ith), i_end(ith)
    if(sp%ptl(i)%ph(3) >= 2pi .or. sp%ptl(i)%ph(3)< 0D0 ) then
      sp%ptl(i)%ph(3) = modulo(sp%ptl(i)%ph(3),2pi)
    endif
  enddo
enddo

但是，如果我运行相同但添加一行将0D0添加到虚拟变量，则NaN在sp％ptl（i）％ph（6）中消失：

!$OMP PARALLEL DO &
!$OMP PRIVATE( ITH, I )
do ith=1, sml_nthreads
  do i=i_beg(ith), i_end(ith)
    if(sp%ptl(i)%ph(3) >= 2pi .or. sp%ptl(i)%ph(3)< 0D0 ) then
      sp%ptl(i)%ph(3) = modulo(sp%ptl(i)%ph(3),2pi)
    endif

    tmp = tmp + 0D0
  enddo
enddo

当然，实际代码还有很多，这不是一个最小的工作示例。我的问题是为什么在do循环中添加任何行会导致sp％ptl（i）％ph（6）永远不会得到NaN＆＃39; s？在OpenMP内部只有一个if语句do循环一个坏主意？现在对我来说这是一个令人困惑的问题。这是如何运作的。

UPDATE 这是最小的例子，还没有像更大的代码那样工作，因为它没有NaN，而是在数组ptl的随机点中相当大的数字，但至少显示了基本的工作流程。我编译与更大的代码库（英特尔编译器，18.0.1.163）相同，然后使用srun -n1 -c24运行它。我用更大的代码库做了一些进一步的测试，并发现重新编译这里用＃34; mymod＆＃34;表示的子程序。没有优化（即-O0-g -C）使NaN消失。

UPDATE 2 没关系大数字，我只是忘记了ptl（i）％ph的初始化（现在在推送开始时添加），现在它已添加我从未得到NaN或这个最小的例子中的大数字（仍然存在于具有优化的较大代码中）。

module mymod
    integer, parameter :: ptl_nphase=8
    integer, parameter :: num=1000
    integer, parameter :: sml_nthreads=24
    type ptl_type
        real(8) :: ph(ptl_nphase)
    end type ptl_type
contains
    logical function is_nan(a)
        implicit none
        real (8) :: a

        is_nan = .not. ( a > 1D0 .or. a < 2D0 )

    end function is_nan

    subroutine split_indices(total,num_pieces,ibeg,iend)
        implicit none

        integer :: total
        integer :: num_pieces
        integer :: ibeg(num_pieces), iend(num_pieces)
        integer :: itmp1, itmp2, ioffset, i

        if (num_pieces > 0) then
            itmp1 = total/num_pieces
            itmp2 = mod(total,num_pieces)
            ioffset = 0
            do i=1,itmp2
            ibeg(i) = ioffset + 1
            iend(i) = ioffset + (itmp1+1)
            ioffset = iend(i)
            enddo
            do i=itmp2+1,num_pieces
            ibeg(i) = ioffset + 1
            if (ibeg(i) > total) then
                iend(i) = ibeg(i) - 1
            else
                iend(i) = ioffset + itmp1
                ioffset = iend(i)
            endif
            enddo
        endif
    end subroutine split_indices


    subroutine calc_source(ptl,icycle)
        implicit none
        type(ptl_type) :: ptl(num)
        integer :: ith, i, i_beg(sml_nthreads), i_end(sml_nthreads)
        integer :: icycle

        call split_indices(num, sml_nthreads, i_beg, i_end)
        !$OMP PARALLEL DO &
        !$OMP PRIVATE( ITH, I )
        do ith=1, sml_nthreads
        do i=i_beg(ith), i_end(ith)
        ptl(i)%ph(6) = ptl(i)%ph(6) + 1D0
        enddo
        enddo

        if (icycle==1) then
            !$OMP PARALLEL DO &
            !$OMP PRIVATE( ITH, I )
            do ith=1, sml_nthreads
                do i=i_beg(ith), i_end(ith)
                    ptl(i)%ph(7) = ptl(i)%ph(6)
                enddo
            enddo
        endif

    end subroutine calc_source


    subroutine push1(ptl)
        implicit none
        type(ptl_type) :: ptl(num)
        integer :: ith, i, i_beg(sml_nthreads), i_end(sml_nthreads)
        real(8) :: arr1(5)

        call split_indices(num, sml_nthreads, i_beg, i_end)
        !$OMP PARALLEL DO &
        !$OMP PRIVATE( ITH, I )
        do ith=1, sml_nthreads
            do i=i_beg(ith), i_end(ith)
                call random_number(arr1)
                ptl(i)%ph(1:5) = ptl(i)%ph(1:5) + arr1
            enddo
        enddo

    end subroutine push1


    subroutine push(ptl)
        implicit none
        type(ptl_type) :: ptl(num)
        integer :: icycle
        integer :: ith, i, i_beg(sml_nthreads), i_end(sml_nthreads)

        call split_indices(num, sml_nthreads, i_beg, i_end)
        !$OMP PARALLEL DO &
        !$OMP PRIVATE( ITH, I )
        do ith=1, sml_nthreads
            do i=i_beg(ith), i_end(ith)
               ptl(i)%ph(:) = 0D0
            enddo
        enddo

        do icycle=1,100
            call calc_source(ptl,icycle)

            call push1(ptl)

            call split_indices(num, sml_nthreads, i_beg, i_end)
            !$OMP PARALLEL DO &
            !$OMP PRIVATE( ITH, I )
            do ith=1, sml_nthreads
                do i=i_beg(ith), i_end(ith)
                    ptl(i)%ph(3) = modulo(ptl(i)%ph(3),6.28)
                enddo
            enddo

        enddo

    end subroutine push

end module mymod


program main
    use mymod
    implicit none
    type(ptl_type) :: ptl(num)
    integer :: ith, i, i_beg(sml_nthreads), i_end(sml_nthreads)

    call push(ptl)

    !check for nan
    call split_indices(num, sml_nthreads, i_beg, i_end)
    !$OMP PARALLEL DO &
    !$OMP PRIVATE( ITH, I )
    do ith=1, sml_nthreads
        do i=i_beg(ith), i_end(ith)
!            if (is_nan(ptl(i)%ph(6))) then
                !print *,'is_nan',i
                print *,ptl(i)%ph(6)
!            endif
        enddo
    enddo

end program main

Answer 1

我找到了一个解决方案，但仍然没有理解为什么，仍然无法生成一个最小的工作示例（我的简单示例从未在较大的代码中复制该问题）。这与优化有关，以及如何内联和向量化循环，但在查看优化报告时，使用tmp = tmp + 0D0行编译时没有明显的区别。

所以我决定开始删除代码。如果我删除了对calc_source的调用，那么NaN就会消失。当我把它放回去，并且只使用了一个空白的calc_source子程序时，NaNs又回来了。我将calc_source移动到与push相同的文件中，并且NaN已经消失了。

使用if语句的OpenMP循环仅导致NaN

1 个答案: