我想在OpenMP(2核心)中实现以下顺序代码,用于并行计算。
我应该使用OpenMP进行哪些修改?如果你帮助我,我将不胜感激。提前致谢
program POTENTIAL
parameter (imx=201, jmx=201)
common /pars/ imax,jmax,kmax,kout
common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)
common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)
data rl2,rl2allow /1.,1.E-7/
c..Read the input data, generate the grid data and initialize the solution
call INIT
c..Start the iterative solution loop
k = 0
DO WHILE (k .lt. kmax .and. rl2 .gt. rl2allow)
k = k + 1
call BC
c..Point iterative solutions
call SOR
c..Line iterative solution
c call SLOR
c..Update phi_k array and evaluate the residual
rl2=0.
do j = 2,jmax-1
do i = 2,imax-1
rl2 = rl2 + (phi_kp1(i,j) - phi_k(i,j))**2
phi_k(i,j) = phi_kp1(i,j)
enddo
enddo
rl2 = SQRT(rl2/((imax-2)*(jmax-2)))
print*, 'Residual@k=',k,rl2
c..Output intermediate solutions
if( mod(k,kout).eq.0 .and. k.ne.kmax) call IO(k)
ENDDO
call IO(k)
stop
end
subroutine SOR
parameter (imx=201, jmx=201)
common /pars/ imax,jmax,kmax,kout
common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)
common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)
beta2 = (dr/dth)**2
c..Solve for phi^k+1
do i = 2,imax-1
c1 = beta2/r(i)**2
c2 = 0.5*dr/r(i)
c3 = 0.5/(1.+c1)
do j = 2,jmax-1
c..Implement the point Jacobi, Gauss-Seidel and SOR methods
phi_kp1(i,j) = c3*((1.-c2)*phi_k(i-1,j) + (1.+c2)*phi_k(i+1,j)
> + c1*(phi_k(i,j-1) + phi_k(i,j+1)) )
enddo
enddo
return
end
subroutine INIT
parameter (imx=201, jmx=201)
common /pars/ imax,jmax,kmax,kout
common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)
common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)
data r1,r2/1.0,5.0/, imax,jmax/101,91/
c..Read input parameters like imax, jmax
print*, 'Enter kmax and kout :'
read(*,*) kmax, kout
pi = 4.*ATAN(1.)
dr = (r2-r1)/float(imax-1)
dth = pi/float(jmax-1)
c..Grid generation
do j=1,jmax
do i=1,imax
r(i) = r1 + dr*(i-1)
th(j) = dth*(j-1)
x(i,j) = r(i)*COS(th(j))
y(i,j) = r(i)*SIN(th(j))
c...initialize the phi arrays
phi_k(i,j) = r(i)*COS(th(j))
phi_kp1(i,j) = 0.
enddo
enddo
call BC
call IO(1)
return
end
c-------------------------------------------------------------------
subroutine BC
parameter (imx=201, jmx=201)
common /pars/ imax,jmax,kmax,kout
common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)
common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)
do i = 1,imax
phi_k(i,1) = phi_k(i,2)
phi_kp1(i,1) = phi_k(i,2)
phi_k(i,jmax) = phi_k(i,jmax-1)
phi_kp1(i,jmax) = phi_k(i,jmax-1)
enddo
do j = 1,jmax
phi_k(1,j) = phi_k(2,j)
phi_kp1(1,j) = phi_k(2,j)
phi_k(imax,j) = r(imax)*COS(th(j))
phi_kp1(imax,j) = phi_k(imax,j)
enddo
return
end
c-------------------------------------------------------------------
subroutine VELOCITY(vx,vy)
parameter (imx=201, jmx=201)
common /pars/ imax,jmax,kmax,kout
common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)
common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)
dimension vx(imx,jmx),vy(imx,jmx)
do j = 1,jmax
do i = 1,imax
if(i .eq. 1) then
vr = (phi_k(2,j)-phi_k(1,j))/dr
elseif(i .eq. imax) then
vr = (phi_k(imax,j)-phi_k(imax-1,j))/dr
else
vr = 0.5*(phi_k(i+1,j)-phi_k(i-1,j))/dr
endif
if(j .eq. 1) then
vth = (phi_k(i,2)-phi_k(i,1))/(dth*r(i))
elseif(j .eq. jmax) then
vth = (phi_k(i,jmax)-phi_k(i,jmax-1))/(dth*r(i))
else
vth = 0.5*(phi_k(i,j+1)-phi_k(i,j-1))/(dth*r(i))
endif
vx(i,j) = vr*COS(th(j)) - vth*SIN(th(j))
vy(i,j) = vr*SIN(th(j)) + vth*COS(th(j))
enddo
enddo
return
end
c-------------------------------------------------------------------
subroutine IO(k)
c..Output solution in tecplot format
parameter (imx=201, jmx=201)
common /pars/ imax,jmax,kmax,kout
common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)
common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)
dimension vx(imx,jmx),vy(imx,jmx)
character fname*32,string*8,ext*5
call VELOCITY(vx,vy)
write(string,'(f8.5)') float(k)/100000
read(string,'(3x,a5)') ext
fname = 'vars-'//ext//'.tec'
open(1,file=fname,form='formatted')
write(1,*) ' variables="x","y","phi","u","v" '
write(1,*) ' zone i=',imax, ', j=',jmax
do j = 1,jmax
do i = 1,imax
write(1,*) x(i,j),y(i,j),phi_k(i,j),vx(i,j),vy(i,j)
enddo
enddo
close(1)
return
end
c-------------------------------------------------------------------
SUBROUTINE THOMAS(mx,il,iu,A,B,C,R)
c............................................................
c Solution of a tridiagonal system of n equations of the form
c A(i)*x(i-1) + B(i)*x(i) + C(i)*x(i+1) = R(i) for i=il,iu
c the solution X(i) is stored in F(i)
c A(il-1) and C(iu+1) are not used.
c A,B,C,R are arrays to be provided by the user
c............................................................
dimension a(mx),b(mx),c(mx),r(mx),x(mx)
x(il)=c(il)/b(il)
r(il)=r(il)/b(il)
do i=il+1,iu
z=1./(b(i)-a(i)*x(i-1))
x(i)=c(i)*z
r(i)=(r(i)-a(i)*r(i-1))*z
enddo
do i=iu-1,il,-1
r(i)=r(i)-x(i)*r(i+1)
enddo
return
end
答案 0 :(得分:1)
查看代码的迭代部分(第14-37行),我们观察到有两个在循环中执行代码的调用(BC和SOR)。这些是放置OpenMP指令的潜在位置。在这两个例程中,循环不会公开阻止并行执行的数据依赖项,因此您可以应用以下转换。
对于BC例程(如下),您可以使用两次!$ OMP PARALLEL DO在可用线程中分配两个循环的工作(每个循环一个指令)。
subroutine BC
parameter (imx=201, jmx=201)
common /pars/ imax,jmax,kmax,kout
common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)
common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)
!$OMP PARALLEL DO SHARED(IMAX,PHI_K,PHI_KP1)
do i = 1,imax
phi_k(i,1) = phi_k(i,2)
phi_kp1(i,1) = phi_k(i,2)
phi_k(i,jmax) = phi_k(i,jmax-1)
phi_kp1(i,jmax) = phi_k(i,jmax-1)
enddo
!$OMP END PARALLEL DO
!$OMP PARALLEL DO SHARED(JMAX,PHI_K,PHI_KP1)
do j = 1,jmax
phi_k(1,j) = phi_k(2,j)
phi_kp1(1,j) = phi_k(2,j)
phi_k(imax,j) = r(imax)*COS(th(j))
phi_kp1(imax,j) = phi_k(imax,j)
enddo
!$OMP END PARALLEL DO
return
end
并且类似地对于SOR,您可以在线程之间分配外部循环的工作。在这种情况下,每个线程将需要时间变量(c1,c2,c3)和迭代变量(j)的私有副本。
subroutine SOR
parameter (imx=201, jmx=201)
common /pars/ imax,jmax,kmax,kout
common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)
common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)
beta2 = (dr/dth)**2
c..Solve for phi^k+1
C$OMP PARALLEL DO PRIVATE(c1,c2,c3,j) SHARED(IMAX,JMAX,PHI_KP1,PHI_K)
do i = 2,imax-1
c1 = beta2/r(i)**2
c2 = 0.5*dr/r(i)
c3 = 0.5/(1.+c1)
do j = 2,jmax-1
c..Implement the point Jacobi, Gauss-Seidel and SOR methods
phi_kp1(i,j) = c3*((1.-c2)*phi_k(i-1,j) + (1.+c2)*phi_k(i+1,j)
> + c1*(phi_k(i,j-1) + phi_k(i,j+1)) )
enddo
enddo
C$OMP END PARALLEL DO
return
end
您可能希望浏览其余代码并查看进一步的并行化机会。您可以使用分析器(gprof,TAU,Vampir,Paraver)来识别应用程序热点并将重点放在那里。