在Fortran中使用Gatherv for 2d Arrays

时间:2017-05-27 21:13:13

标签: arrays fortran mpi fortran90 column-major-order

我在不同进程上有一些大小=(2,9)的2d数组,我想在根进程上使用MPI_Gatherv在size =(2 * nProcs,9)的全局数组中连接。为此,我试图改编这篇文章:Sending 2D arrays in Fortran with MPI_Gather

但我真的明白他们在做什么,我的例子不起作用:

program testing
    use mpi
    implicit none
    integer(4), allocatable :: local(:,:)
    integer(4), allocatable :: global(:,:), displs(:), counts(:)
    integer(4)              :: me, nProcs, ierr, i 
    integer(4), parameter   :: root = 3
    integer                 :: loc_size(2), glob_size(2), newtype, int_size, &
        resizedtype, starts(2)

    integer(kind=MPI_ADDRESS_KIND) :: extent, begin

    call MPI_Init(ierr)
    call MPI_Comm_rank(MPI_COMM_WORLD, me, ierr)
    call MPI_Comm_size(MPI_COMM_WORLD, nProcs, ierr)

    loc_size  = [2,9]
    glob_size = [2*nProcs, 9]
    starts    = [0, 0]

    allocate(local(loc_size(1),   loc_size(2)))
    allocate(global(glob_size(1), glob_size(2)))
    allocate(displs(nProcs))
    allocate(counts(nProcs))

    call create_loc(me, local)

    do i = 0,nProcs-1
        if(me ==  i) then
            call print_mtx(me, local)
        endif
        call MPI_Barrier(MPI_COMM_WORLD, ierr)
    enddo

    call MPI_Type_create_subarray(2, glob_size, loc_size, starts, &
        MPI_ORDER_FORTRAN, MPI_INTEGER4, &
        newtype, ierr)

    call MPI_Type_size(MPI_INTEGER4, int_size, ierr)
    extent =  2*9 *  int_size
    begin  =  0
    call MPI_Type_create_resized(newtype, begin, extent, resizedtype, ierr)
    call MPI_Type_commit(resizedtype, ierr)

    counts =  1

    do i =  1,nProcs
        displs(i) = (i-1) * 2 * 9 
    enddo

    call MPI_Gatherv(local, 2*9, MPI_INTEGER4, &
        global, counts, displs, resizedtype, &
        root, MPI_COMM_WORLD, ierr)

    if(me ==  root) then
        write (*,*) "##########################"
    endif
    call MPI_Barrier(MPI_COMM_WORLD, ierr)
    if(me ==  root) then
        call print_mtx(me, global)
    endif
    call MPI_Barrier(MPI_COMM_WORLD, ierr)
    call MPI_Finalize(ierr)

contains
    subroutine create_loc(me, arr)
        implicit none
        integer(4), intent(in)  :: me
        integer(4),allocatable  :: arr(:,:)
        integer(4)              :: i

        do i =  1,9
            arr(1,i) = 20 * me + i
            arr(2,i) = 20 * me + i + 10
        enddo
    end subroutine create_loc

    subroutine print_mtx(me, mtx)
        implicit none
        integer(4), intent(in)  :: me, mtx(:,:)
        integer(4)              :: i, j

        do i =  1,size(mtx,2)
            write (unit=6, fmt="(A,I2,A)", advance='no') '[',me, ']' 
            do j =  1,size(mtx,1)
                write(unit=6,fmt="(I,A)",advance='no') mtx(j,i), ";  "
            enddo
            write (*,*) " "
        enddo
        write (*,*) " "
    end subroutine print_mtx


end program testing

输出:

[ 0]           1;            11;    
[ 0]           2;            12;    
[ 0]           3;            13;    
[ 0]           4;            14;    
[ 0]           5;            15;    
[ 0]           6;            16;    
[ 0]           7;            17;    
[ 0]           8;            18;    
[ 0]           9;            19;    

[ 1]          21;            31;    
[ 1]          22;            32;    
[ 1]          23;            33;    
[ 1]          24;            34;    
[ 1]          25;            35;    
[ 1]          26;            36;    
[ 1]          27;            37;    
[ 1]          28;            38;    
[ 1]          29;            39;    

[ 2]          41;            51;    
[ 2]          42;            52;    
[ 2]          43;            53;    
[ 2]          44;            54;    
[ 2]          45;            55;    
[ 2]          46;            56;    
[ 2]          47;            57;    
[ 2]          48;            58;    
[ 2]          49;            59;    

[ 3]          61;            71;    
[ 3]          62;            72;    
[ 3]          63;            73;    
[ 3]          64;            74;    
[ 3]          65;            75;    
[ 3]          66;            76;    
[ 3]          67;            77;    
[ 3]          68;            78;    
[ 3]          69;            79;    

 ##########################
[ 3]           1;            11;   -2111771071;         32765;   -2111771061;         32765;   -2111771047;         32765;    
[ 3]           2;            12;   -2111771013;         32765;   -2111770992;         32765;   -2111770934;         32765;    
[ 3]           3;            13;   -2111769952;         32765;   -2111769932;         32765;   -2111769910;         32765;    
[ 3]           4;            14;   -2111769772;         32765;   -2111769691;         32765;   -2111769647;         32765;    
[ 3]           5;            15;   -2111769585;         32765;   -2111769533;         32765;   -2111769511;         32765;    
[ 3]           6;            16;   -2111769426;         32765;   -2111769398;         32765;   -2111769319;         32765;    
[ 3]           7;            17;   -2111769242;         32765;   -2111769178;         32765;   -2111769145;         32765;    
[ 3]           8;            18;   -2111769061;         32765;   -2111768963;         32765;   -2111768932;         32765;    
[ 3]           9;            19;   -2111768793;         32765;   -2111768613;         32765;   -2111768596;         32765;    

由于第一个进程上的数组传递得很好,我最好的猜测是,它与位移有关,但我无法解决它。

在上面提到的帖子中,他们创建了一个这样的新类型:

call MPI_Type_size(MPI_CHARACTER, charsize, ierr)
extent = localsize*charsize
begin  = 0
call MPI_Type_create_resized(newtype, begin, extent, resizedtype, ierr)
call MPI_Type_commit(resizedtype, ierr)

MPI_Type_create_resized做了什么,为什么有必要?

他们设置了extent = localsize * charsize(不是localsize ** 2 * charsize)和count = 1,但是子数组是3x3,而不是3x1。他们如何仍然发送3x3矩阵?

如何修复我的示例程序?

1 个答案:

答案 0 :(得分:1)

您的计划中有两个问题:

替换

extent =  2*9 *  int_size

extent =  2 *  int_size

并替换

displs(i) = (i-1) * 2 * 9 

displs(i) = (i-1)

你应该没问题

 ##########################
[ 3]       1;        11;        21;        31;        41;        51;        61;        71;    
[ 3]       2;        12;        22;        32;        42;        52;        62;        72;    
[ 3]       3;        13;        23;        33;        43;        53;        63;        73;    
[ 3]       4;        14;        24;        34;        44;        54;        64;        74;    
[ 3]       5;        15;        25;        35;        45;        55;        65;        75;    
[ 3]       6;        16;        26;        36;        46;        56;        66;        76;    
[ 3]       7;        17;        27;        37;        47;        57;        67;        77;    
[ 3]       8;        18;        28;        38;        48;        58;        68;        78;    
[ 3]       9;        19;        29;        39;        49;        59;        69;        79;    

一般来说,我认为MPI_Type_create_subarray不适合散布/聚集,在这种情况下,您只需使用MPI_Type_vector

call MPI_Type_vector(9, 2, 8, MPI_INTEGER4, newtype, ierr)

(请注意,您仍然需要MPI_Type_create_resized

最后但同样重要的是,您现在不需要MPI_Gatherv

call MPI_Gather(local, 2*9, MPI_INTEGER4, &
    global, 1, resizedtype, &
    root, MPI_COMM_WORLD, ierr)

这里还不错