MPI_GATHERV (Fortran) 从二维子矩阵创建一个新的二维矩阵

时间:2021-07-28 17:34:36

标签: arrays matrix fortran mpi

我正在尝试将行数不同但列数相同的子二维数组收集到全局二维数组中。例如,假设使用 2 个 MPI 进程,第一个进程(即 rank == 0)有:

local = [11,12,13,14]

,第二个过程(即 rank == 1)有:

local = [21,22,23,24
         31,32,33,34]

然后,我想将这两个数组连接成一个二维数组:

global = [11,12,13,14
          21,22,23,24
          31,32,33,34]

由于每个“本地”数组都有不同的行数,我(可能)想使用 mpi_gatherv(或 mpi_allgatherv)。我在这里发现了相同的问题:Using Gatherv for 2d Arrays in FortranUsing MPI_gatherv to create a new matrix from other smaller matrices,但我还是不太明白。所以,请教我。这是我的示例代码:

program main
use mpi
implicit none
   
integer :: i, j
integer :: rank, npro, ierr
integer, allocatable :: local(:,:)
integer, allocatable :: global(:,:), displs(:), counts(:)
integer :: loc_size(2), glob_size(2), starts(2) 
integer :: newtype, int_size, resizedtype
integer(kind=MPI_ADDRESS_KIND) :: extent, begin
! End of local variables ==================================================!

call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, npro, ierr)

! I will set local 2D arrays as: [1,4] for rank #0, and [2,4] for rank #1
! then, the global 2D array will be [3,4] (assuming I use 2 processes)
loc_size  = [rank+1,4]      ! [1,4], [2,4]
glob_size = [3,4]           ! I will use npro = 2

! allocate local and global arrays
allocate(local(loc_size(1),   loc_size(2))) ! [1,4], [2,4]
allocate(global(glob_size(1), glob_size(2)))! [3,4] ! if npro = 2

! set "local" array
!       rank = 0: [11, 12, 13, 14]
!       rank = 1: [21, 22, 23, 24
!                  31, 32, 33, 34]
if(rank == 0) then
   do j=1,4
      local(1,j) = 10 + j ! [11,12,13,14]
   end do
else if(rank == 1) then
   do i=1,2
      do j=1,4
         local(i,j) = (i+1)*10 + j ! [21,22,23,24; 31,32,33,34]
      end do
   end do
end if


! create a 2D subarray and set as "newtype" 
starts    = [0,0]              ! array start location
call MPI_Type_create_subarray(2, glob_size, loc_size, starts, &
&                             MPI_ORDER_FORTRAN, MPI_INTEGER, &
&                             newtype, ierr)

! get MPI_INTEGER type size in byte
! I don't quite understand the following processes...
! So, please comment on each step if possible...
call MPI_Type_size(MPI_INTEGER, int_size, ierr)
begin  =  0
extent =  (rank+1) * int_size ! rank 0 = 4 byte; rank 1 = 8 byte (am I doing correct here?)
call MPI_Type_create_resized(newtype, begin, extent, resizedtype, ierr) ! I dont' quite understand this process
call MPI_Type_commit(resizedtype, ierr)

! allocate index for mpi_gatherv
allocate(displs(npro)) ! [2], index for mpi_gatherv
allocate(counts(npro)) ! [2], index for mpi_gatherv

counts = [1,1]
do i =  1,npro
   displs(i) = (i-1) ! [0,1]
end do

call MPI_Gatherv(local, 1, MPI_INTEGER,               &
&                global, counts, displs, resizedtype, &
&                0, MPI_COMM_WORLD, ierr)

if(rank == 0) then
   do i=1,3
      write(*,*) (global(i,j), j=1,4)
   end do
end if

call MPI_Finalize(ierr)
end program main

提前致谢。

1 个答案:

答案 0 :(得分:1)

我认为如果您更改存储顺序会容易得多(即具有 rank "i" 初始化固定长度的 "i+1" 列),但以下代码似乎适用于您当前拥有的内容。我打开了调试输出,将列数更改为 4,在 3 个进程上运行(因此全局行数 = 1+2+3 = 6)并确保使用唯一数据初始化本地数组。

重要的一点是,您需要为每个等级使用不同的发送类型,因为步幅不同(因为本地数组的维度不同)。也许有更简单的方法来做到这一点(不改变存储顺序),但至少这似乎有效。

请注意,注释不再与实际代码相关!

program main
use mpi
implicit none
   
integer :: i, j
integer :: rank, npro, ierr
integer, allocatable :: local(:,:)
integer, allocatable :: global(:,:), displs(:), counts(:)
integer :: loc_size(2), glob_size(2), starts(2) 
integer :: newtype, int_size, stype, resizedstype, rtype, resizedrtype
integer(kind=MPI_ADDRESS_KIND) :: extent, begin
! End of local variables ==================================================!

call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, npro, ierr)

! I will set local 2D arrays as: [1,3] for rank #0, and [2,3] for rank #1
! then, the global 2D array will be [3,3] (assuming I use 2 processes)
loc_size  = [rank+1,4]      ! [1,3], [2,3]
glob_size = [6,4]           ! I will use npro = 3

! allocate local and global arrays
allocate(local(loc_size(1),   loc_size(2))) ! [1,3], [2,3]
allocate(global(glob_size(1), glob_size(2)))! [3,3] ! if npro = 2

! set "local" array
!       rank = 0: [0, 0, 0]
!       rank = 1: [1, 1, 1
!                  1, 1, 1]
do i=1,rank+1
   do j=1,4
      local(i,j) = 10*rank+4*(i-1)+j
   end do
end do

! check the local array 
 do i=1,rank+1
    write(*,*) 'rank = ', rank, 'local = ', (local(i,j), j=1,4)
 end do

! create a 2D subarray and set as send type stype 
loc_size= [1,4]
starts    = [0,0]              ! array start location
glob_size=[rank+1,4]
call MPI_Type_create_subarray(2, glob_size, loc_size, starts, &
&                             MPI_ORDER_FORTRAN, MPI_INTEGER, &
&                             stype, ierr)

! get MPI_INTEGER type size in byte
call MPI_Type_size(MPI_INTEGER, int_size, ierr)
begin  =  0
extent =  int_size

call MPI_Type_create_resized(stype, begin, extent, resizedstype, ierr)
call MPI_Type_commit(resizedstype, ierr)

 ! create a 2D subarray and set as receive type rtype 
loc_size=[1,4]
starts    = [0,0]              ! array start location
glob_size=[6,4]
call MPI_Type_create_subarray(2, glob_size, loc_size, starts, &
&                             MPI_ORDER_FORTRAN, MPI_INTEGER, &
&                             rtype, ierr)

! get MPI_INTEGER type size in byte
! I don't quite understand the following processes...
! So, please comment on each step if possible...
call MPI_Type_size(MPI_INTEGER, int_size, ierr)
begin  =  0
extent =  int_size

call MPI_Type_create_resized(rtype, begin, extent, resizedrtype, ierr)
call MPI_Type_commit(resizedrtype, ierr)

! allocate index for mpi_gatherv
allocate(displs(npro)) ! [2], index for mpi_gatherv
allocate(counts(npro)) ! [2], index for mpi_gatherv

counts = [1,2,3]
displs = [0,1,3]
call MPI_Gatherv(local, rank+1, resizedstype,               &
&                global, counts, displs, resizedrtype, &
&                0, MPI_COMM_WORLD, ierr)

if(rank == 0) then
   do i=1,6
      write(*,*) (global(i,j), j=1,4)
   end do
end if

call MPI_Finalize(ierr)
end program main

如果我运行 3 个进程,我会得到合理的结果:

 rank =            0 local =            1           2           3           4
 rank =            1 local =           11          12          13          14
 rank =            1 local =           15          16          17          18
 rank =            2 local =           21          22          23          24
 rank =            2 local =           25          26          27          28
 rank =            2 local =           29          30          31          32
           1           2           3           4
          11          12          13          14
          15          16          17          18
          21          22          23          24
          25          26          27          28
          29          30          31          32