我发现功率计算a ** b比dble(a)** dble(b)快,其中a和b是浮点精度。为什么?我的编译器是
COLLECT_GCC=gfortran
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/lto-wrapper
Target: x86_64-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man
--infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla
--enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release
--with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions
--enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu --enable-languages=c,c++,objc,obj-c++,java,fortran,ada,go,lto
--enable-plugin --enable-initfini-array --disable-libgcj --with-isl=/builddir/build/BUILD/gcc-4.8.5-20150702/obj-x86_64-redhat-linux/isl-install
--with-cloog=/builddir/build/BUILD/gcc-4.8.5-20150702/obj-x86_64-redhat-linux/cloog-install
--enable-gnu-indirect-function --with-tune=generic --with-arch_32=x86-64 --build=x86
_64-redhat-linux
Thread model: posix
gcc version 4.8.5 20150623 (Red Hat 4.8.5-4) (GCC)
这是我的测试代码:
module func
real,parameter:: b=1.33
real ::a(1000000)
contains
! My test power calculation in float precision
subroutine power(c)
real:: c(:)
integer :: i
do i = 1, 1000000
c(i) = a(i)**b
enddo
end subroutine
! My test power calculation in double precision (float converted into double)
subroutine hp_power(c)
real:: c(:)
integer :: i
do i = 1, 1000000
c(i) = dble(a(i))**dble(b)
enddo
end subroutine
end module
program compare
use func
implicit none
integer :: i
real :: c1(1000000), c2(1000000)
real :: start_T,end_T
!init the input array
do i = 1, 1000000
a(i) = 3.!(3.+i*0.000001)
end do
call cpu_time(start_T)
call power(c1)
call cpu_time(end_T)
write(*,*) "The power running time is :,", end_T-start_T, 's', 'c(5):',c1(5)
call cpu_time(start_T)
call hp_power(c2)
call cpu_time(end_T)
write(*,*) "The hp_power running time is :,", end_T-start_T, 's', 'c(5):',c2(5)
答案 0 :(得分:3)
考虑到你的代码的以下改编(在intel编译器上编译),我添加了第三个例程,其中结果存储在double类型的数组中:
module func
real,parameter:: b=1.33
real ::a(1000000)
contains
! My test power calculation in float precision
subroutine power(c)
real:: c(:)
integer :: i
do i = 1, 1000000
c(i) = a(i)**b
enddo
end subroutine
! My test power calculation in double precision (float converted into double)
subroutine hp_power(c)
real:: c(:)
integer :: i
do i = 1, 1000000
c(i) = dble(a(i))**dble(b)
enddo
end subroutine
! My test power calculation in double precision (float converted into double)
subroutine hp2_power(c)
real*8:: c(:)
integer :: i
do i = 1, 1000000
c(i) = dble(a(i))**dble(b)
enddo
end subroutine
end module
program compare
use func
implicit none
integer :: i
INTEGER :: count, count_2, count_rate, count_max
integer :: x
real c1_, c1_acc, c2_, c2_acc, c3_, c3_acc
real :: c1(1000000), c2(1000000)
real*8 :: c3(1000000)
real :: start_T,end_T
c1_acc = 0.0
c2_acc = 0.0
!init the input array
do i = 1, 1000000
a(i) = 3.!(3.+i*0.000001)
end do
do x = 1, 100
call system_clock(count, count_rate, count_max)
call power(c1)
call system_clock(count_2, count_rate, count_max)
c1_ = real(count_2-count)/count_rate
!write(*,*) "The power running time is :,", c1_, 's', 'c(5):',c1(5)
c1_acc = c1_acc + c1_
call system_clock(count, count_rate, count_max)
call hp_power(c2)
call system_clock(count_2, count_rate, count_max)
c2_ = real(count_2-count)/count_rate
!write(*,*) "The hp_power running time is :,", end_T-start_T, 's', 'c(5):',c2(5)
c2_acc = c2_acc + c2_
call system_clock(count, count_rate, count_max)
call hp2_power(c3)
call system_clock(count_2, count_rate, count_max)
c3_ = real(count_2-count)/count_rate
!write(*,*) "The hp_power running time is :,", end_T-start_T, 's', 'c(5):',c2(5)
c3_acc = c3_acc + c3_
enddo
c2_ = real(c2_acc) / 100.0
c1_ = real(c1_acc) / 100.0
c3_ = real(c3_acc) / 100.0
write (*,*) c1_
write(*,*) c2_
write(*,*) c3_
end program Compare
在调试中,我得到了这样的结果:
电源 - > 2.0639971E-02
hp_power - > 2.7769983E-02
hp2_power - > 2.7449980E-02
在发布(有优化)时,这些:
电源 - > 6.7950045E-03
hp_power - > 6.8100006E-03
hp2_power - > 1.6954981E-02
正在发生的事情是,在优化时,如果结果将存储在real类型的数组中,则忽略转换。在hp2_Power中,结果将存储在real * 8(double)类型的数组中,因此此处不能应用此特定优化(比此函数的时间差异)。
我始终获得一致的结果。在调试中,power函数总是比hp_power函数快,而hp_power几乎与hp2_power相同,而在发布时,hp2_power总是很慢,而功率与hp_power非常相似。
当其他事情干扰时代时,你需要做很多重复才能确定。当我看到个别时间(来自重复)时,时间变化一点,功率和hp_power有时会完全相同。