我正在尝试实现Neumaier算法,以在MPI调用收集的元素数组上求和双精度数。这是必要的,因为我们求和的值是阻力系数值,通常比较10 ^ -5小数位。将结果与使用代码的串行版本获得的结果进行比较,值仍然非常不同,就像算法存在某种问题一样。我在下面的程序中做错了吗?
!
! ----------------------------------------------------------------------
!
! SUBROUTINE mpisum
!
!
subroutine dmpisum(num,total,nproc)
USE shared_data
implicit none
!
integer(kind=4) :: ierr,nproc,i
real(kind=8) :: num,total,c,t
real(kind=8), dimension(nproc) :: num_r
!
total = 0.0d0
num_r(:) = 0.0d0
!
! Now, gather all the values from all procs.
!
call MPI_GATHER(num,1,MPI_DOUBLE_PRECISION,num_r,1, &
MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
!
! Now, perform the Neumaier algorithm to sum all values avoiding rounding
! errors.
!
if (myid.eq.0) then
total = num_r(1)
c = 0.0d0
t = 0.0d0
!
do i = 2, nproc
t = total + num_r(i)
if (dabs(total) >= dabs(num_r(i))) then
c = c + ( (total-t) + num_r(i) )
else
c = c + ( (num_r(i) - t) + total)
end if
total = t
end do
!
total = total + c
!
end if
!
end subroutine dmpisum
在所有过程中都会调用dmpisum(如果我是大师级别,则为no ....)。