MPI:如何正确使用MPI_Win_allocate_shared

时间:2016-05-14 00:24:07

标签: mpi mpi-rma

我想在进程之间使用共享内存。我尝试了MPI_Win_allocate_shared但是当我执行程序时它给了我一个奇怪的错误:

第592行./src/mpid/ch3/include/mpid_rma_shm.h中的断言失败:local_target_rank >= 0 internal ABORT

这是我的来源:

# include <stdlib.h>
# include <stdio.h>
# include <time.h>

# include "mpi.h"

int main ( int argc, char *argv[] );
void pt(int t[], int s);

int main ( int argc, char *argv[] )
{
    int rank, size, shared_elem = 0, i;
    MPI_Init ( &argc, &argv );
    MPI_Comm_rank ( MPI_COMM_WORLD, &rank );
    MPI_Comm_size ( MPI_COMM_WORLD, &size );
    MPI_Win win;
    int *shared;

    if (rank == 0) shared_elem = size;
    MPI_Win_allocate_shared(shared_elem*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &shared, &win);
    if(rank==0)
    {
        MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, win);
        for(i = 0; i < size; i++)
        {
            shared[i] = -1;
        }
        MPI_Win_unlock(0,win);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    int *local = (int *)malloc( size * sizeof(int) );
    MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);
    for(i = 0; i < 10; i++)
    {
        MPI_Get(&(local[i]), 1, MPI_INT, 0, i,1, MPI_INT, win);
    }
    printf("processus %d (avant): ", rank);
    pt(local,size);
    MPI_Win_unlock(0,win);

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, 0, win);

    MPI_Put(&rank, 1, MPI_INT, 0, rank, 1, MPI_INT, win);

    MPI_Win_unlock(0,win);

    MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);
    for(i = 0; i < 10; i++)
    {
        MPI_Get(&(local[i]), 1, MPI_INT, 0, i,1, MPI_INT, win);
    }
    printf("processus %d (apres): ", rank);
    pt(local,size);
    MPI_Win_unlock(0,win);


    MPI_Win_free(&win);
    MPI_Free_mem(shared);
    MPI_Free_mem(local);
    MPI_Finalize ( );

    return 0;
}

void pt(int t[],int s)
{
    int i = 0;
    while(i < s)
    {
        printf("%d ",t[i]);
        i++;
    }
    printf("\n");
}

我得到以下结果:

processus 0 (avant): -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 
processus 0 (apres): 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 
processus 4 (avant): 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 
processus 4 (apres): 0 -1 -1 -1 4 -1 -1 -1 -1 -1 
Assertion failed in file ./src/mpid/ch3/include/mpid_rma_shm.h at line 592: local_target_rank >= 0
internal ABORT - process 5
Assertion failed in file ./src/mpid/ch3/include/mpid_rma_shm.h at line 592: local_target_rank >= 0
internal ABORT - process 6
Assertion failed in file ./src/mpid/ch3/include/mpid_rma_shm.h at line 592: local_target_rank >= 0
internal ABORT - process 9

有人可以帮我弄清楚出了什么问题&amp;这个错误意味着什么?非常感谢。

2 个答案:

答案 0 :(得分:3)

MPI_Win_allocate_shared背离了MPI非常抽象的本质。它暴露了底层内存组织,并允许程序绕过昂贵(通常令人困惑)的MPI RMA操作,并直接在具有此类操作的系统上利用共享内存。虽然MPI通常处理排名不共享物理内存地址空间的distributed-memory环境,但现在典型的HPC系统由许多互连的shared-memory节点组成。因此,在同一节点上执行的秩可能会附加到共享内存段,并通过共享数据而不是消息传递进行通信。

MPI提供了一个通信器拆分操作,允许人们创建一个排名子组,以便每个子组中的排名能够共享内存:

MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, key, info, &newcomm);

在典型的集群上,这基本上按照它们执行的节点对排名进行分组。完成拆分后,可以在每个newcomm的排名上执行共享内存窗口分配。请注意,对于多节点群集作业,这将导致多个独立的newcomm通信器,从而导致多个共享内存窗口。一个节点上的排名不会(也不应该)能够看到其他节点上的共享内存窗口。

在这方面,MPI_Win_allocate_shared是一个独立于平台的包装器,围绕特定于操作系统的共享内存分配机制。

答案 1 :(得分:0)

此代码和用法存在一些问题。其中一些在@Hristolliev的回答中提及。

  1. 您必须在同一节点中运行所有进程才能拥有一个intranode通信器或使用communicator split共享。
  2. 您需要使用至少10个进程运行此代码。
  3. 第三,free()应取消分配shared
  4. 您应该从查询中获取shared指针。
  5. 你应该解除分配Win_free(我认为这是由# include <stdlib.h> # include <stdio.h> # include <time.h> # include "mpi.h" int main ( int argc, char *argv[] ); void pt(int t[], int s); int main ( int argc, char *argv[] ) { int rank, size, shared_elem = 0, i; MPI_Init ( &argc, &argv ); MPI_Comm_rank ( MPI_COMM_WORLD, &rank ); MPI_Comm_size ( MPI_COMM_WORLD, &size ); MPI_Win win; int *shared; // if (rank == 0) shared_elem = size; // MPI_Win_allocate_shared(shared_elem*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &shared, &win); if (rank == 0) { MPI_Win_allocate_shared(size, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &shared, &win); } else { int disp_unit; MPI_Aint ssize; MPI_Win_allocate_shared(0, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &shared, &win); MPI_Win_shared_query(win, 0, &ssize, &disp_unit, &shared); } if(rank==0) { MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, win); for(i = 0; i < size; i++) { shared[i] = -1; } MPI_Win_unlock(0,win); } MPI_Barrier(MPI_COMM_WORLD); int *local = (int *)malloc( size * sizeof(int) ); MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win); for(i = 0; i < 10; i++) { MPI_Get(&(local[i]), 1, MPI_INT, 0, i,1, MPI_INT, win); } printf("processus %d (avant): ", rank); pt(local,size); MPI_Win_unlock(0,win); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, 0, win); MPI_Put(&rank, 1, MPI_INT, 0, rank, 1, MPI_INT, win); MPI_Win_unlock(0,win); MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win); for(i = 0; i < 10; i++) { MPI_Get(&(local[i]), 1, MPI_INT, 0, i,1, MPI_INT, win); } printf("processus %d (apres): ", rank); pt(local,size); MPI_Win_unlock(0,win); MPI_Win_free(&win); // MPI_Free_mem(shared); free(local); // MPI_Free_mem(local); MPI_Finalize ( ); return 0; } void pt(int t[],int s) { int i = 0; while(i < s) { printf("%d ",t[i]); i++; } printf("\n"); } 处理的)
  6. 这是结果代码:

    ls "aaa bbb"