MPI_ERR_RANK:群集的无效等级

时间:2018-10-10 21:29:37

标签: c mpi openmpi bucket-sort

我正在为一个类做一个项目,并且使用了Internet上的连续Bucket排序代码,并且尝试使用OpenMPI使其成为并行版本。 该代码将在集群系统上运行。当我测试它时,它给了我以下错误:

  

“ [cluster:5379] * MPI_Send [cluster:5379] 中发生错误   在通讯器MPI_COMM_WORLD [cluster:5379] MPI_ERR_RANK上:   无效的排名[cluster:5379] * MPI_ERRORS_ARE_FATAL:您的MPI工作   现在将中止“

有人可以建议我如何解决它吗?

ps。我的编码能力很差,因此我可能无法回答一些问题。

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"

struct bucket
{
    int count;
    int* value;
};

int compareIntegers(const void* first, const void* second)
{
    int x = *((int*)first), y = *((int*)second);
    if (x == y)
    {
        return 0;
    }
    else if (x < y)
    {
        return -1;
    }
    else
    {
        return 1;
    }
}

void bucketSort(int array[], int n)
{
    struct bucket buckets[3];
    int i, j, k;
    for (i = 0; i < 3; i++)
    {
        buckets[i].count = 0;
        buckets[i].value = (int*)malloc(sizeof(int) * n);
    }

    for (i = 0; i < n; i++)
    {
        if (array[i] < 0)
        {
            buckets[0].value[buckets[0].count++] = array[i];
        }
        else if (array[i] > 10)
        {
            buckets[2].value[buckets[2].count++] = array[i];
        }
        else
        {
            buckets[1].value[buckets[1].count++] = array[i];
        }
    }
    for (k = 0, i = 0; i < 3; i++)
    {
        // now using quicksort to sort the elements of buckets
        qsort(buckets[i].value, buckets[i].count, sizeof(int), &compareIntegers);
        for (j = 0; j < buckets[i].count; j++)
        {
            array[k + j] = buckets[i].value[j];
        }
        k += buckets[i].count;
        free(buckets[i].value);
    }

}

int main(char *argv[], int argc)
{
    int array[1000000];
    int i = 0, j, k, n;
    int num;
    //for MPI
    int numProc, rank;
    char procName[MPI_MAX_PROCESSOR_NAME];
    int nameLen;
    int chunksize;
    double start, end;
    int msgtag;

    //MPI
    MPI_Status stat;
    start = MPI_Wtime();    //timer start
    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank); //process rank ,comm_world = communication of the process
    MPI_Comm_size(MPI_COMM_WORLD, &numProc); //number of process
    msgtag = 1234;

    if (rank == 0)
    {
        printf("Enter number of element to be sort: ");
        scanf("%d", &num);

        for (i = 0; i < num; i++) //random num elements
        {
            array[i] = rand();
        }

        n = i;
        printf("\nBefore Sorting\n");
        for (j = 0; j < i; j++)
        {
            printf("%d ", array[j]);
        }
        MPI_Send(&array[j], j, MPI_INT, 1, msgtag, MPI_COMM_WORLD);
    }

    if (rank == 1)
    {
        MPI_Recv(&array[j], j, MPI_INT, 0, msgtag, MPI_COMM_WORLD, &stat);
        bucketSort(array, n);
        MPI_Send(&array, n, MPI_INT, 2, msgtag, MPI_COMM_WORLD);
    }

    if (rank == 2)
    {
        MPI_Recv(&array, n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
        printf("\nAfter Sorting\n");
        for (k = 0; k < i; k++)
        {
            printf("%d ", array[k]);
        }
    }
    //MPI END
    MPI_Finalize();
    end = MPI_Wtime();  // timer end   
    double time_spent = end - start;
    printf("\ntime used for this program was %f Sec.", time_spent);

    return 0;
}

1 个答案:

答案 0 :(得分:0)

它们在您的代码中有很多错误。希望截止日期是星期一…

第一:

int main(int argc, char *argv[])

将比int main(int argc, char *argv[])

更好

第二

进程0是用来读取要生成的元素数的进程。
然后它必须将其广播到所有其他进程,否则其他进程在变量num中将具有未定义的数字,对吗?

因此

if (rank == 0)
{
        printf("Enter number of element to be sort: ");
        fflush(stdout);
        scanf("%d", &num);
        for (i = 0; i < num; i++) //random num elements
    {
        array[i] = rand();
    }
        n = num;
        printf("\nBefore Sorting (%i)\n", n);
    for (j = 0; j < n; j++)
    {
        printf("%d ", array[j]);
    }
        fflush(stdout);
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);

第三:

避免重用循环中涉及的值。我明白了

for (j = 0; j < n; j++)
{
    printf("%d ", array[j]);
}

你有

j=n

但不是很清楚...

第四

MPI_Send或receive的第一个参数是数组中第一个元素的地址。所以通过

MPI_Send(&array[j], j, MPI_INT, 1, msgtag, MPI_COMM_WORLD);

并且由于j=n(请参见上面的备注),我想您将无法获得想要的东西。

您需要的是

MPI_Send(&array[0], n, MPI_INT, 1, msgtag, MPI_COMM_WORLD);

第五

MPI_Barrier是您的朋友。输出是一项关键操作,因此在进行输出操作之前,您可以(可选)确保所有进程都已达到这一点。

if (rank == 2)
{
    MPI_Recv(&array, n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
    printf("\nAfter Sorting\n");
    for (k = 0; k < i; k++)
    {
        printf("%d ", array[k]);
    }
}

成为

if (rank == 2)
{
    MPI_Recv(&(array[0]), n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
}
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 2)
{
    printf("\nAfter Sorting\n");
    for (k = 0; k < n; k++)
    {
        printf("%d ", array[k]);
    }
}
    MPI_Barrier(MPI_COMM_WORLD);

结论:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"

struct bucket
{
    int count;
    int* value;
};

int compareIntegers(const void* first, const void* second)
{
    int x = *((int*)first), y = *((int*)second);
    if (x == y)
    {
        return 0;
    }
    else if (x < y)
    {
        return -1;
    }
    else
    {
        return 1;
    }
}

void bucketSort(int array[], int n)
{
    struct bucket buckets[3];
    int i, j, k;
    for (i = 0; i < 3; i++)
    {
        buckets[i].count = 0;
        buckets[i].value = (int*)malloc(sizeof(int) * n);
    }

    for (i = 0; i < n; i++)
    {
        if (array[i] < 0)
        {
            buckets[0].value[buckets[0].count++] = array[i];
        }
        else if (array[i] > 10)
        {
            buckets[2].value[buckets[2].count++] = array[i];
        }
        else
        {
            buckets[1].value[buckets[1].count++] = array[i];
        }
    }
    for (k = 0, i = 0; i < 3; i++)
    {
        // now using quicksort to sort the elements of buckets
        qsort(buckets[i].value, buckets[i].count, sizeof(int), &compareIntegers);
        for (j = 0; j < buckets[i].count; j++)
        {
            array[k + j] = buckets[i].value[j];
        }
        k += buckets[i].count;
        free(buckets[i].value);
    }

}

int main(int argc, char *argv[])
{
    int array[1000000];
    int i = 0, j, k, n;
    int num;
    //for MPI
    int numProc, rank;
    char procName[MPI_MAX_PROCESSOR_NAME];
    int nameLen;
    int chunksize;
    double start, end;
    int msgtag;

    //MPI
    MPI_Status stat;
    start = MPI_Wtime();    //timer start
    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank); //process rank ,comm_world = communication of the process
    MPI_Comm_size(MPI_COMM_WORLD, &numProc); //number of process
    msgtag = 1234;

    if (rank == 0)
    {
        printf("Enter number of element to be sort: ");
        fflush(stdout);
        scanf("%d", &num);
        for (i = 0; i < num; i++) //random num elements
        {
            array[i] = rand();
        }
        n = num;
        printf("\nBefore Sorting\n");
        for (j = 0; j < n; j++)
        {
            printf("%d ", array[j]);
        }
        fflush(stdout);
    }
    MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if (rank == 0)
    {
        MPI_Send(&(array[0]), n, MPI_INT, 1, msgtag, MPI_COMM_WORLD);
    }
    if (rank == 1)
    {
        MPI_Recv(&(array[0]), n, MPI_INT, 0, msgtag, MPI_COMM_WORLD, &stat);
        bucketSort(array, n);
        MPI_Send(&(array[0]), n, MPI_INT, 2, msgtag, MPI_COMM_WORLD);
    }
    if (rank == 2)
    {
        MPI_Recv(&(array[0]), n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank == 2)
    {
        printf("\nAfter Sorting\n");
        for (k = 0; k < n; k++)
        {
            printf("%d ", array[k]);
        }
    }
    //MPI END
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();
    end = MPI_Wtime();  // timer end   
    double time_spent = end - start;
    printf("\ntime used for this program was %f Sec.", time_spent);

    return 0;
}

运行

mpirun -np 3 test_mpi.exe

输出

Enter number of element to be sort: 10

Before Sorting
1804289383 846930886 1681692777 1714636915 1957747793 424238335 719885386 1649760492 596516649 1189641421
After Sorting

424238335 596516649 719885386 846930886 1189641421 1649760492 1681692777 1714636915 1804289383 1957747793
time used for this program was 2.271976 Sec.time used for this program was 2.281183 Sec.
time used for this program was 2.277746 Sec.