我正在学习OpenMP和MPI的并行化。现在,我正在编写一个简单的高斯消除程序,没有旋转。 顺序代码如下:
program ge
implicit none
integer, PARAMETER::n = 4
real, dimension (n,n+1)::A
integer::i,j,k
real start, finish
print *
print *, "Basic gaussian elimination"
print *, "By Jose Ortega Valiente"
print *
print *, "Square matrix dimension = ", n
call cpu_time(start)
!matrix A shape
DO i = 1, n
DO j = 1, n
A(i,j) = i + j
END DO
A(i,i)=A(i,i)*100
END DO
! calcualte RHS (B)
do i = 1, n
A(i, n+1) = 0.0
do j = 1, n
A(i, n+1) = A(i, n+1) + A(i,j)
end do
end do
!let us print the matrix A|RHS
print*
print*,"Matrix A"
DO, i = 1, n
write(*,"100g15.5") ( A(i,j), j=1,n+1 )
END DO
!Basic Gaussian Elimination
DO k = 1, n
DO j = k+1, n
A(k,j) = A(k,j)/A(k,k)
END DO
A(k,n+1) = A(k,n+1)/A(k,k)
A(k,k)=1.0
DO i = k+1, n
DO j = k+1, n-1
A(i,j) = A(i,j) - A(i,k)*A(k,j)
END DO
A(i,n+1) = A(i,n+1) - A(i,k)*A(k,n+1)
A(i,k) = 0.0
END DO
END DO
!Let us print the result matrix A|RHS
print*
print*, "Matrix A result"
DO, i = 1, n
write(*,"100g15.5") ( A(i,j), j=1,n+1 )
END DO
call cpu_time(finish)
print *, ""
print *, ""
print *, "It took: ", finish-start
end program ge
我没有问题做这个程序的OpenMP版本,但我不知道如何开始使用MPI版本。
OpenMP版本就在这里,以防有人对此感兴趣:
program ge
implicit none
include 'omp_lib.h'
integer, PARAMETER::n = 4
real, dimension (n,n+1)::A
integer::i,j,k
real start, finish
integer :: my_rank, p
!$omp parallel private( my_rank )
!$ p = omp_get_num_threads()
!$omp single
print *
print *, "Basic gaussian elimination"
print *, "By Jose Ortega Valiente"
print *
print *, "Square matrix dimension = ", n
print *, "Number of threads available: ", p
!$omp end single
start = omp_get_wtime()
!matrix A shape
!$omp do reduction (+:A)
DO i = 1, n
DO j = 1, n
A(i,j) = i + j
END DO
A(i,i)=A(i,i)*100
END DO
!omp end do
!The RHS is very simple - No difference in parallel
! calculate RHS (B)
!$omp single
do i = 1, n
A(i, n+1) = 0.0
do j = 1, n
A(i, n+1) = A(i, n+1) + A(i,j)
end do
end do
!let us print the matrix A and the RHS
print*
print*,"Matrix A"
DO, i = 1, n
write(*,"100g15.5") ( A(i,j), j=1,n+1 )
END DO
!$omp end single
!Basic Gaussian Elimination
!Outermost loop (k), iterations depend on each other
DO k = 1+my_rank, n
!Inner loop (j), order doesn't affect the result
!Dividing the whole row by the element A(k,j)
!$omp do
DO j = k+1, n
A(k,j) = A(k,j)/A(k,k)
END DO
!$omp end do
A(k,n+1) = A(k,n+1)/A(k,k)
A(k,k)=1.0
!Inner loop (i), order doesn't affect the result
!Eliminating column k
!$omp do
DO i = k+1, n
DO j = k+1, n-1
A(i,j) = A(i,j) - A(i,k)*A(k,j)
END DO
A(i,n+1) = A(i,n+1) - A(i,k)*A(k,n+1)
A(i,k) = 0.0
END DO
!$omp end do
END DO
!$omp end parallel
!Let us print the results (matrix A|RHS)
print*
print*, "Matrix A result"
DO, i = 1, n
write(*,"100g15.5") ( A(i,j), j=1,n+1 )
END DO
finish = omp_get_wtime()
print *, ""
print *, ""
print *, "It took: ", finish-start
end program ge
我的问题不是代码,而是方法。谁能帮我理解怎么做?我的程序效果不好,我认为这是因为我真的不知道如何处理它:
program ge
implicit NONE
include 'mpif.h'
integer, PARAMETER::n = 4
real, dimension (n,n)::A
real, dimension (n)::B,X
integer::i,j
integer:: norm,row,col
real multiplier
real start, finish
integer ierr, taskid, numtasks, stat
call MPI_INIT( ierr )
call MPI_COMM_RANK( MPI_COMM_WORLD, taskid, ierr )
call MPI_COMM_SIZE( MPI_COMM_WORLD, numtasks, ierr )
if (taskid == 0) then
print *
print *, "Basic gaussian elimination"
print *, "By Jose Ortega Valiente"
print *
print *, "Square matrix dimension = ", n
print *, "Number of Nodes = ", numtasks
end if
call cpu_time(start)
!matrix A shape
DO i = 1, n
DO j = 1, n
A(i,j) = i + j
END DO
A(i,i)=A(i,i)*100
END DO
! calculate RHS (B) - no real difference with only 1 node
if (taskid == 0) then
do i = 1, n
B(i) = 0.0
do j = 1, n
B(i) = B(i) + A(i,j)
X(i) = 0 !just to initialize
end do
end do
end if
!let us print the matrix A and B
if (taskid == 0) then
print*, ""
print*,"Matrix A"
DO i = 1, n
write(*,"100g15.5") ( A(i,j), j=1,n )
END DO
print*, ""
print*, "Vector B result"
DO i = 1, n
print *, B(i)
END DO
end if
!Basic Gaussian Elimination
DO norm = 0, n-1
! Broadcast A(norm) row and B(norm)
call MPI_Bcast(A(norm,0), n, MPI_REAL, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(B(norm), MPI_REAL, 0, MPI_COMM_WORLD, ierr)
! Send data from Node 0 to other Nodes
if (taskid == 0) then
DO i=1, numtasks
DO row=norm+1+i, n, numtasks
DO j=1, n
call MPI_SEND(A(row,j),n,MPI_REAL,i,0,MPI_COMM_WORLD,ierr)
END DO
call MPI_SEND(B(row),1,MPI_REAL,i,0,MPI_COMM_WORLD,ierr)
END DO
END DO
DO row=norm+1, n, numtasks
multiplier = A(row,norm)/A(norm,norm)
DO col=norm, n
A(row,col) = A(row,col) - A(norm,col)*multiplier
END DO
B(row) = B(row) - B(norm)*multiplier
END DO
!recieve the updated data drom other Nodes
DO i=1, numtasks
DO j=1, n
call MPI_RECV(A(row,j),n,MPI_REAL,i,1,MPI_COMM_WORLD,stat, ierr)
END DO
call MPI_RECV(B(row),1,MPI_REAL,i,1,MPI_COMM_WORLD,stat, ierr)
END DO
else !recieve data from process 0
DO row=norm+1+taskid, n, numtasks
DO j=1,n
call MPI_RECV(A(row,j),n,MPI_REAL,0,0,MPI_COMM_WORLD,stat, ierr)
END DO
call MPI_RECV(B(row),1,MPI_REAL,0,0,MPI_COMM_WORLD,stat, ierr)
multiplier = A(row,norm)/A(norm,norm)
DO col=norm, n
A(row,col) = A(row,col)-A(norm,col)*multiplier
END DO
B(row) = B(row) - B(norm)*multiplier
!Send back the results
DO j=1, n
call MPI_RECV(A(row,j),n,MPI_REAL,0,1,MPI_COMM_WORLD,stat, ierr)
END DO
call MPI_RECV(B(row),1,MPI_REAL,0,1,MPI_COMM_WORLD,stat, ierr)
END DO
end if
END DO
!Back substitution
if (taskid == 0) then
DO row=n-1, 0, --1
X(row) = B(row)
DO col=n-1, row, --1
X(row) = X(row) - A(row,col)*X(col)
END DO
X(row) = X(row)/A(row,row)
END DO
end if
call cpu_time(finish)
!Let us print the result matrix A and B
if (taskid == 0) then
print*, ""
print*, "Matrix A result"
DO i = 1, n
write(*,"100g15.5") ( A(i,j), j=1,n )
END DO
print*, ""
print*, "Vector B result"
DO i = 1, n
print *, B(i)
END DO
print*, ""
print*, "Vector X result"
DO i = 1, n
print *, X(i)
END DO
print *, ""
print *, ""
print *, "It took: ", finish-start
end if
call MPI_FINALIZE(ierr)
结束程序ge