在fortran中意外放慢了意图(out)

时间:2018-02-22 06:47:43

标签: arrays fortran gfortran subroutine

我在Fortran遇到了一个奇怪的问题,我认为这是由不必要的数组副本引起的。当我在一个过程中有一个本地数组时代码很快。但是一旦我更改子例程以将 list 作为 intent(out)参数(或任何其他方法!)返回,代码就会减慢50倍!

示意图,代码看起来像这样(完整代码最后附加):

module

   type sys
      integer           :: num_coor
      real, allocatable :: pos(:,:)   !--> shape will be (2,num_coor)
      ...
   contains
      procedure :: look
      procedure :: change
      ...
   end type

contains

   subroutine look(obj,j)
   class(sys), intent(in) :: obj
   integer,    intent(in) :: j
   integer                :: n, list(100) !*** problem when intent(out) ***
   n = 0
   do k = 1, num_coor
      if( k close to j) then
         n = n + 1; list(n) = k        ! usually n<~70
      end if
   end do
   end subroutine

   ...
end module

主程序如下:

do
   select random j
   call obj%look(j)
   call obj%change(j)
end do

在这种形式下,每个周期大约需要 50纳秒才能完成,但是当我希望访问数组 list 时,它是在类型绑定过程中创建的 >看看,从主要区块(到目前为止我尝试过的任何方式)完成一个周期需要 1500纳秒

我想知道是否有人遇到类似的问题,并有任何建议。 如果有人有兴趣,我可以发送原始代码。

P.S。我尝试过使用gfortran 6.4.0,7.3.0和ifort 14和-O3等进行编译。

如果您想尝试,完整代码就在这里。 您可以跳过大部分内容,只关注“!&lt; --- here”指定的部分。

module mover
private
public  keeper, g_pp2d, pr 

   integer, parameter :: sp = kind(1.0), &
                         dp = kind(1.0d0), &
                         pr = sp
   type pp2d                            
      integer         :: nop
      real(pr), &
      allocatable     :: pos(:,:)
      real(pr)        :: lx
      real(pr)        :: ly
   contains 
      procedure       :: load => load 
      procedure       :: fill_random => fill_random
   end type pp2d

   type keeper
      integer         :: cap
      integer         :: noe
      integer, &
      allocatable     :: elm(:)
   contains
      procedure       :: init => init_keeper 
      procedure       :: add  => add_to_keeper
      procedure       :: pop  => pop_from_keeper
      procedure       :: ext  => extend_keeper     
      procedure       :: show => print_keeper   
   end type keeper 

   interface keeper
      procedure construct_keeper
   end interface


   integer, parameter :: nbbsk(2,8) = &
                         reshape( (/1,0,0,1,-1,0,0,-1, &
                                  1,-1,1,1,-1,1,-1,-1/), &
                                  (/2,8/) )
   integer, parameter :: med1(-1:1,-1:1) = reshape( &   
                             [8,4,5,3,0,1,7,2,6], [3,3])
   real(pr)           :: med2(2,-1:1,-1:1)       

   type, &
    extends(keeper)   :: basket
     integer          :: su(0:8)
     integer          :: cc(2)
     real(pr)         :: llim(2)
     real(pr)         :: ulim(2)
   end type basket

   type, &
    extends(pp2d)     :: g_pp2d 
      integer         :: nox
      integer         :: noy
      real(pr)        :: wx
      real(pr)        :: wy
      integer         :: nob
      type(basket), &
      allocatable     :: bsk(:)
      integer, &
      allocatable     :: bop(:)
      integer, &
      allocatable     :: kop(:)
      real(pr)        :: ll(2), &
                         ww(2)          
      integer         :: noxy(2) 
   contains
      procedure       :: ticks => make_ticks
      procedure       :: homogen => homogeneous_rep   
      procedure       :: absolute => absolute_rep   
      procedure       :: move_tiny => move_tiny_delta
      procedure       :: cube1 => selector_cube_1    !<------- here
      procedure       :: cube2 => selector_cube_2    !<------- here
   end type g_pp2d
contains

   subroutine init_keeper(kpr,cap)
   implicit none
   class(keeper), intent(inout) :: kpr
   integer,       intent(in)    :: cap
   kpr%cap = cap
   kpr%noe = 0
   allocate(kpr%elm(cap))
   end subroutine init_keeper

   function construct_keeper(cap)
   implicit none
   integer,       intent(in) :: cap
   type(keeper)             :: &
                  construct_keeper
   call construct_keeper%init(cap)
   end function construct_keeper

   subroutine add_to_keeper(kpr,k) 
   implicit none
   class(keeper), intent(inout) :: kpr
   integer,       intent(in)    :: k
   integer                      :: kk   
   kk = kpr%noe + 1
   if( kk>kpr%cap ) then
         call kpr%ext()
   end if
   kpr%noe = kk
   kpr%elm(kk) = k
   end subroutine add_to_keeper

   subroutine extend_keeper(kpr)
   implicit none
   class(keeper), intent(inout) :: kpr
   integer,       allocatable   :: buf(:)
   allocate( buf, source=[kpr%elm,0] )
   deallocate(kpr%elm)
   call move_alloc(buf,kpr%elm)
   kpr%cap = kpr%cap + 1
   end subroutine extend_keeper

   subroutine pop_from_keeper(kpr,k,l)
   implicit none
   class(keeper), intent(inout) :: kpr
   integer,       intent(in)    :: k
   integer,       intent(out)   :: l
   if( k<kpr%noe ) then
      l = kpr%elm(kpr%noe)
      kpr%elm(k) = l
   else
      l = kpr%elm(k) 
   end if
   kpr%noe = kpr%noe - 1
   end subroutine pop_from_keeper

   subroutine print_keeper(kpr)
   implicit none
   class(keeper), intent(in) :: kpr
   write(*,'(3x,i6,"/",i6,12x,100i7)') &
       kpr%noe, kpr%cap, kpr%elm(1:kpr%noe) 
   end subroutine print_keeper 

   subroutine fill_random(pos, nop, lx, ly)
   implicit none
   class(pp2d), intent(inout) :: pos
   integer,     intent(in)    :: nop
   real(pr),    intent(in)    :: lx, ly
   pos%nop = nop
   pos%lx = lx
   pos%ly = ly
   allocate(pos%pos(2,0:nop-1))
   call random_number(pos%pos)
   pos%pos(1,:) = pos%pos(1,:)*lx 
   pos%pos(2,:) = pos%pos(2,:)*ly
   end subroutine fill_random

   subroutine load(pos, fname)
   implicit none
   class(pp2d),      intent(inout) :: pos
   character(len=*), intent(in)    :: fname
   integer                         :: u, i
   open( newunit=u, file=fname )
      read(u,*)
      read(u,*)
      read(u,*)
      read(u,*) pos%nop
      allocate(pos%pos(2,0:pos%nop-1))
      read(u,*) pos%lx, pos%ly
      do i = 0, pos%nop-1
          read(u,*) pos%pos(:,i)
      end do
   close(u)
   end subroutine load

   subroutine make_ticks(pos, wth)
   implicit none
   class(g_pp2d), intent(inout) :: pos
   real(pr),      intent(in)    :: wth 
   integer                      :: cx, cy, bl, at, &
                                   i, j, k, boc
   pos%nox = floor(pos%lx/wth)
   pos%noy = floor(pos%ly/wth)
   pos%ll = [pos%lx,pos%ly]
   pos%noxy = [pos%nox-1,pos%noy-1]
   pos%wx = pos%lx/pos%nox
   pos%wy = pos%ly/pos%noy
   pos%ww = [pos%wx,pos%wy]      
   pos%nob = pos%nox * pos%noy
   if( .not. allocated(pos%bsk) ) then 
      allocate(pos%bsk(0:pos%nob-1))
      allocate(pos%bop(0:pos%nop-1))
      allocate(pos%kop(0:pos%nop-1))
   else
      deallocate(pos%bsk)
      allocate(pos%bsk(0:pos%nob-1))
   end if
   boc = pos%nop/pos%nob + 1
   do cy = 0, pos%noy-1
      do cx = 0, pos%nox-1
         bl = cy*pos%nox + cx
         call pos%bsk(bl)%init(boc)
         pos%bsk(bl)%cc = (/cx,cy/)
         pos%bsk(bl)%llim = (/ cx   *pos%wx, cy   *pos%wy/)
         pos%bsk(bl)%ulim = (/(cx+1)*pos%wx,(cy+1)*pos%wy/)
         pos%bsk(bl)%su(0) = bl      
         do k = 1, 8
            i = mod(cx+nbbsk(1,k),pos%nox)
            j = mod(cy+nbbsk(2,k),pos%noy)
            if(i<0) i = i + pos%nox
            if(j<0) j = j + pos%noy
            pos%bsk(bl)%su(k) =  j*pos%nox + i
         end do
      end do
   end do
   do at = 0, pos%nop-1
      cx = floor(pos%pos(1,at)/pos%wx)
      cy = floor(pos%pos(2,at)/pos%wy)
      bl = cy*pos%nox + cx
      call pos%bsk(bl)%add(at)
      pos%bop(at) = bl
      pos%kop(at) = pos%bsk(bl)%noe
   end do
   do i= -1, 1
      do j= -1, 1
         med2(:,i,j) = [i,j]*pos%ww
      end do
   end do
   end subroutine make_ticks

   subroutine homogeneous_rep(pos)   
   implicit none
   class(g_pp2d), intent(inout) :: pos
   integer                      :: at, bl
   do at = 0, pos%nop-1
      bl = pos%bop(at) 
      pos%pos(:,at) = pos%pos(:,at) - pos%bsk(bl)%llim
   end do
   end subroutine homogeneous_rep

   subroutine absolute_rep(pos)   
   implicit none
   class(g_pp2d), intent(inout) :: pos
   integer                      :: at, bl
   do at = 0, pos%nop-1
      bl = pos%bop(at) 
      pos%pos(:,at) = pos%pos(:,at) + pos%bsk(bl)%llim
   end do
   end subroutine absolute_rep


   subroutine move_tiny_delta(pos, at, dir, delta) 
   implicit none
   class(g_pp2d), intent(inout) :: pos
   integer,       intent(in)    :: at, dir
   real(pr),      intent(in)    :: delta
   integer                      :: bl, cc(2),  &
                                   newbl, side_eff
   bl = pos%bop(at)
   pos%pos(dir,at) =  pos%pos(dir,at) + delta
   if( delta >= 0.0_pr ) then
      if( pos%pos(dir,at)>=pos%ww(dir) ) then
         pos%pos(dir,at) = pos%pos(dir,at) - pos%ww(dir)
         newbl = pos%bsk(bl)%su(dir)
         call pos%bsk(bl)%pop( pos%kop(at), side_eff )
         pos%kop( side_eff ) = pos%kop(at)
         call pos%bsk(newbl)%add( at )
         pos%bop( at ) = newbl
         pos%kop( at ) = pos%bsk(newbl)%noe
      end if
   else
      if( pos%pos(dir,at)<0.0_pr ) then
         pos%pos(dir,at) = pos%pos(dir,at) + pos%ww(dir)
         newbl = pos%bsk(bl)%su(dir+2)
         call pos%bsk(bl)%pop( pos%kop(at), side_eff )
         pos%kop( side_eff ) = pos%kop(at)
         call pos%bsk(newbl)%add( at )
         pos%bop( at ) = newbl
         pos%kop( at ) = pos%bsk(newbl)%noe
      end if
   end if
   end subroutine move_tiny_delta


   subroutine selector_cube_1(pos,at,wth)  !<------------- here
   implicit none
   class(g_pp2d), intent(in) :: pos
   integer,       intent(in) :: at
   real(pr),      intent(in) :: wth
   integer                   :: i, j, k, &
                                l, m, bl, bbl
   real(pr)                  :: dr(2), c1(2), &
                                c2(2), delta
   integer                   :: n, x(100) !<------------- here
   n = 0
   c1 = -[wth,wth]
   c2 = +[wth,wth]
   bl = pos%bop(at)
   do i= -1, 1
      do j= -1, 1
         k = med1(i,j)
         bbl = pos%bsk(bl)%su(k) 
         do l= 1, pos%bsk(bbl)%noe
            m = pos%bsk(bbl)%elm(l)
            dr = pos%pos(:,m) - pos%pos(:,at) + med2(:,i,j) 
            if( all(dr>c1) .and. all(dr<c2) ) then
               delta = dr(1)*dr(1) + dr(2)*dr(2) 
               n = n+1     
               x(n) = m           !<------------------ here
            end if
         end do     
      end do
   end do
   end subroutine selector_cube_1

   subroutine selector_cube_2(pos,at,wth,x) !<----------- here 
   implicit none
   class(g_pp2d), intent(in) :: pos
   integer,       intent(in) :: at
   real(pr),      intent(in) :: wth
   integer                   :: i, j, k, &
                                l, m, bl, bbl
   real(pr)                  :: dr(2), c1(2), &
                                c2(2), delta
   integer                   :: n
   integer, intent(out)    :: x(100) !<---------------- here
   n = 0
   c1 = -[wth,wth]
   c2 = +[wth,wth]
   bl = pos%bop(at)
   do i= -1, 1
      do j= -1, 1
         k = med1(i,j)
         bbl = pos%bsk(bl)%su(k) 
         do l= 1, pos%bsk(bbl)%noe
            m = pos%bsk(bbl)%elm(l)
            dr = pos%pos(:,m) - pos%pos(:,at) + med2(:,i,j) 
            if( all(dr>c1) .and. all(dr<c2) ) then
               delta = dr(1)*dr(1) + dr(2)*dr(2) 
               n = n+1
               x(n) = m
            end if
         end do     
      end do
   end do
   end subroutine selector_cube_2
end module mover





program main
use mover
implicit none
type(g_pp2d) :: pos
real(pr) :: wth = 2.5_pr, delta, dmax = 0.1_pr, &
             rand_pr
integer :: dir, step, steps = 10**5, &
           at, i
integer(8) :: start_time, end_time, count_rate
integer :: x(100)
! initiate 
call pos%fill_random(1000,20.0,20.0)
call pos%ticks(wth)
call pos%homogen()
! run
call system_clock(start_time,count_rate)
do step = 1, steps
   call random_number(rand_pr)
   at = floor(rand_pr*pos%nop)
   call random_number(rand_pr)
   if(rand_pr<0.5_pr) then
      dir = 1
   else
      dir = 2
   end if
   call random_number(rand_pr)
   delta = (2*rand_pr-1.0_pr) * dmax
   !   *********************************************
   !      if changed to cube2, the code drastically slows down  
   !-- --------------------------------------------------------------
   !call pos%cube1(at,wth)            !<---------------- here
   call pos%cube2(at,wth, x )        !<---------------- here
   !-- -------------------------------------------------------------
   call pos%move_tiny(at,dir,delta)
end do
call system_clock(end_time)
call pos%absolute()            
! report
write(*,'(3x,a,i7,a)') "Performance (count rate = ", count_rate, "):"
write(*,'(14x,f12.6,6x,a)') &
            (real(end_time-start_time)/count_rate)*10**9/steps, &
                                          "Nano Sec (per cycle)"
end program main

0 个答案:

没有答案