这是对问题36182486,41421437和其他几个问题的跟进。我想通过使用多个处理器并行处理各个元素来加速偏差和质量矩阵的组装以进行FEM计算。这个小MWE显示了操作的内容。
!! compile with gfortran -fopenmp -o FEMassembly FEMassembly.f90
Program FEMassembly
use, intrinsic :: iso_c_binding
implicit none
real (c_double) :: arrayM(3,3)=reshape((/2.d0,1.d0,1.d0,1.d0,&
&2.d0,1.d0,1.d0,1.d0,2.d0/),(/3,3/)) ! contrib from one element
integer (c_int) :: ke,ne=4,kx,nx=6,nodes(3)
real (c_double) :: L(6,6)
integer (c_int) :: t(4,3)=reshape((/1,2,5,6,2,3,4,5,4,5,2,3/),(/4,3/))
!! first, no OMP
do ke=1,ne ! for each triangular element
nodes=t(ke,:)
L(nodes,nodes)=L(nodes,nodes)+arrayM
end do
print *,'L no OMP'
write(*,fmt="(6(1x,f3.0))")(L(kx,1:6),kx=1,nx)
L=0
!$omp parallel do private (nodes)
do ke=1,ne ! for each triangular element
nodes=t(ke,:)
!! !$omp atomic
L(nodes,nodes)=L(nodes,nodes)+arrayM
!! !$omp end atomic
end do
!$omp end parallel do
print *,'L with OMP and race'
write(*,fmt="(6(1x,f3.0))")(L(kx,1:6),kx=1,nx)
End Program FEMassembly
随着原子指令的注释,数组L包含几个错误的值,可能是因为我试图用原子指令避免的竞争条件。结果是:
L no OMP
2. 1. 0. 1. 0. 0.
1. 6. 1. 2. 2. 0.
0. 1. 4. 0. 2. 1.
1. 2. 0. 4. 1. 0.
0. 2. 2. 1. 6. 1.
0. 0. 1. -0. 1. 2.
L with OMP and race
2. 1. 0. 1. 0. 0.
1. 6. 1. 2. 2. 0.
0. 1. 2. 0. 2. 1.
1. 2. 0. 4. 1. 0.
0. 2. 2. 1. 6. 1.
0. 0. 1. 0. 1. 2.
如果取消注释“atomic”指令,编译器将返回错误: 错误:!$ OMP ATOMIC语句必须在(1)处设置内部类型的标量变量 其中(1)指向L行(节点,节点)中的arrayM .....
我希望实现的是每个元素(这里是平凡的数组M)的耗时贡献并行发生,但由于几个线程处理相同的矩阵元素,因此必须采取措施使总和出现在有序的时尚。任何人都可以建议一种方法吗?
答案 0 :(得分:2)
在Fortran中,最简单的方法是使用缩减。这是因为OpenMP for Fortran支持减少数组。以下是我认为你要做的事情,但是因为
而需要一点点盐有时这么小的阵列很难找到竞争条件
!! compile with gfortran -fopenmp -o FEMassembly FEMassembly.f90
Program FEMassembly
use, intrinsic :: iso_c_binding
Use omp_lib, Only : omp_get_num_threads
implicit none
real (c_double) :: arrayM(3,3)=reshape((/2.d0,1.d0,1.d0,1.d0,&
&2.d0,1.d0,1.d0,1.d0,2.d0/),(/3,3/)) ! contrib from one element
integer (c_int) :: ke,ne=4,nodes(3)
real (c_double) :: L(6,6)
integer (c_int) :: t(4,3)=reshape((/1,2,5,6,2,3,4,5,4,5,2,3/),(/4,3/))
! Not declared in original program
Integer :: nx, kx
! Not set in original program
nx = Size( L, Dim = 1 )
!$omp parallel default( none ) private ( ke, nodes ) shared( ne, t, L, arrayM )
!$omp single
Write( *, * ) 'Working on ', omp_get_num_threads(), ' threads'
!$omp end single
!$omp do reduction( +:L )
do ke=1,ne ! for each triangular element
nodes=t(ke,:)
L(nodes,nodes)=L(nodes,nodes)+arrayM
end do
!$omp end do
!$omp end parallel
write(*,fmt="(6(1x,f3.0))")(L(kx,1:6),kx=1,nx)
End Program FEMassembly