mpi i / o文件错过了随机行

时间:2015-06-28 09:05:44

标签: io parallel-processing mpi

我正在处理MPI I / O问题。等级0从参数文件中读取位置,然后发送到等级1,2,3。所有这些过程(1,2,3)将根据Rank 0给出的位置从读取文件中获取文本并写入不同的写作文件中的行。当我在一台计算机上运行程序时,一切正常。但是当我使用2台计算机时(仍有4个进程,服务器上排名0,1而客户端排名为1,2),输出文件的一些随机行丢失了!这是我的代码

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

//define the message
#define MSG_MISSION_COMPLETE 78
#define MSG_EXIT 79

//define a structural message of MPI
int array_of_blocklengths[3] = { 1, 1, 1 };
MPI_Aint array_of_displacements[3] = { 0, sizeof(float), sizeof(float) + sizeof(int) };
MPI_Datatype array_of_types[3] = {MPI_FLOAT, MPI_FLOAT, MPI_INT};

MPI_Datatype location;

int master();
int slave(MPI_File fhr, MPI_File fhw);                                  
int main(int argc, char* argv[])
{
    int rank;

    MPI_File fhr, fhw;
    char read[] = "./sharedReadSample1.txt";
    char write[] = "./sharedWriteSample1.txt";
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    printf("%d is speaking\n", rank);

    MPI_File_open(MPI_COMM_WORLD, read, MPI_MODE_RDONLY, MPI_INFO_NULL, &fhr);
    MPI_File_open(MPI_COMM_WORLD, write, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fhw);

    if (rank == 0)//rank 0, dispatch the tasks
        master();
    else//other processes
        slave(fhr, fhw);

    MPI_Finalize();
    printf("%d said byebye\n", rank);

    MPI_File_close(&fhr);
    MPI_File_close(&fhw);
    return 0;
}


int master()//master, read the parameters, send them to other slave processes, get the message of task finishing, arrange next task to the slave who completed the task
{
    int i, size, firstmsg, nslave;

    int buf[256];
    struct{
        float pause;//pause time
        int stand;//starting position in the file
        int offset;//offset
    }buf_str[10000] = { {0.0,0,0} };
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    nslave = size - 1;//the number of slaves

    FILE* fp;
    FILE* fpm;//for log

    fp = fopen("sharedAttributeSample1.txt", "rb");
    if (fp == NULL)
    {
        printf("The file was not opened\n");
        getchar();
        //send a quit message to slaves, use the tag to tell them(>10000)
        for (i = 10000; i < 10000 + nslave; i++)
        {
            buf[0] = MSG_EXIT;
            MPI_Send(&buf[0], 1, MPI_INT, i - 10000 + 1, i, MPI_COMM_WORLD);
        }
        return 0;
    }
    else
        printf("The file was opened\n");

    fpm = fopen("./logs/log_master.txt","wb");
    if (fpm == NULL)
        printf("master log system failed to load!\n");

    for (i = 0; i < 10000;i++)
    {
        fscanf(fp,"%f,%d,%d", &buf_str[i].pause, &buf_str[i].stand, &buf_str[i].offset);
    }

    MPI_Status status;

    MPI_Type_struct(3, array_of_blocklengths, array_of_displacements, array_of_types, &location);
    MPI_Type_commit(&location);

    for (i = 0; i < nslave; i++)
    {
        MPI_Send(&buf_str[i], 1, location, i+1, i, MPI_COMM_WORLD);
        fprintf(fpm, "initial message %d sent\n",i);
    }

    for (i = nslave; i < 10000; i++)
    {
        MPI_Recv(buf, 256, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);//receive messages from slaves
        fprintf(fpm, "task %d complete massage received\n",status.MPI_TAG);

        if (buf[0] == MSG_MISSION_COMPLETE)//send next task
        {
            firstmsg = status.MPI_SOURCE;
            fprintf(fpm, "task %d  is sent to %d \n", i, firstmsg);
            MPI_Send(&buf_str[i], 1, location, firstmsg, i, MPI_COMM_WORLD);
        }
    }

    for (i = 10000; i < 10000+nslave; i++)//send quitting message
    {
        buf[0] = MSG_EXIT;
        MPI_Send(&buf_str[0], 1, location, i-10000+1, i, MPI_COMM_WORLD);
    }

    fclose(fp);
    fclose(fpm);
    return 0;
}

int slave(MPI_File fhr, MPI_File fhw)
{
    struct{
        float pause;
        int stand;
        int offset;
    }buf_str;

    char buf[256];
    int buf_s[256];

    int rank, size, nslave, i=0;
    char name[30];

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    MPI_Comm_size(MPI_COMM_WORLD, &size);
    nslave = size - 1;

    FILE* fps[nslave];

    //open their own logging pointers
    for(i=0;i<nslave;i++)
    {
        if(i == rank-1)
        {
            sprintf(name,"./logs/logfile_slave%d",i+1);
            fps[i] = fopen(name, "w");
            if(fps[i] == NULL)
                printf("failed to open logfile of slave %d\n", i+1);
            break;
        }
    }

    MPI_Status status;
    MPI_Status status_read;
    MPI_Status status_write;

    MPI_Type_struct(3, array_of_blocklengths, array_of_displacements, array_of_types, &location);
    MPI_Type_commit(&location);

    while (1)
    {
        //receive the message from master
        MPI_Recv(&buf_str, 1, location, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
        fprintf(fps[i], "process %d message %d received\n",rank,status.MPI_TAG);

        if (status.MPI_TAG < 10000){//if it is a task 

            sleep(buf_str.pause);//sleep, to simulate a computing process
            fprintf(fps[i], "process %d sleep for %f seconds\n", rank, buf_str.pause);
            //read from the position given
            MPI_File_read_at(fhr, buf_str.stand, buf, buf_str.offset, MPI_CHAR, &status_read);
            buf[buf_str.offset] = '\n';//need a \n 
            MPI_File_write_at(fhw, status.MPI_TAG*(buf_str.offset+1), buf, buf_str.offset+1, MPI_CHAR, &status_write);

            fprintf(fps[i], "%d has done task %d\n", rank, status.MPI_TAG);
            //send task complete message to master
            buf_s[0] = MSG_MISSION_COMPLETE;
            MPI_Send(&buf_s, 1, MPI_INT, 0, status.MPI_TAG, MPI_COMM_WORLD);
        }

        else
            break;
    }

    fclose(fps[i]);
    return 0;
}

0 个答案:

没有答案