我正在编写Fortran代码来执行矩阵向量乘法。在未预先存储矩阵元素的意义上,乘法是即时执行的。代码在串行情况下运行良好,但是当我使用OpenMP实现并行化的情况时,我无法获得正确的输出。可能有人建议我做错了什么?
Module module1
integer:: N, M
integer, allocatable:: Jeven_A(:), Jeven_B(:), Jodd_A(:), Jodd_B(:), s(:)
END module module1
subroutine lookup(a,Pswitch,sa)
use module1
implicit none
integer, intent(in):: a, Pswitch
integer, intent(out):: sa
integer:: Ia, Ib
Ib = ibits(a,N/2,N/2)
Ia = ibits(a,0,N/2)
if (Pswitch==0) then
sa = Jeven_A(Ia)+Jeven_B(Ib)
else if (Pswitch==1) then
sa = Jodd_A(Ia)+Jodd_B(Ib)
end if
end subroutine lookup
SUBROUTINE Mv_onthefly(V, vni, vno)
USE module1
IMPLICIT NONE
INTEGER, INTENT(IN):: vni, vno
complex, DIMENSION(1:M,1:3):: V
INTEGER::sb, i,j ,k, a, b
complex, allocatable:: Vvni(:), Vvno(:)
allocate(Vvni(1:M),Vvno(1:M))
Do i=1, M
Vvni(i) = V(i,vni)
END DO
Vvno = 0.
!$OMP PARALLEL private(k,i,j,sb,a,b,bittest,Vvni)
!$OMP DO REDUCTION(+:Vvno)
DO k=1,M
a = s(k)
b = 0
DO i=0, N-1
Do j=0, N-1
b = IEOR(a,2**i+2**j)
CALL lookup(b,0,sb) !subroutine which lookup the index sb corresponds to b in the array s
Vvno(sb)=Vvno(sb)-J1*Vvni(k)
END do
END DO
DO i=0, N-1
Vvno(k)=Vvno(k)-J2*Vvni(k)
END DO
END DO
!$OMP END DO
!$OMP END PARALLEL
Do i =1, M
V(i,vno)=Vvno(i)
END DO
deallocate(Vvni,Vvno)
END SUBROUTINE Mv_onthefly