我正在处理MPI I / O问题。等级0从参数文件中读取位置,然后发送到等级1,2,3。所有这些过程(1,2,3)将根据Rank 0给出的位置从读取文件中获取文本并写入不同的写作文件中的行。当我在一台计算机上运行程序时,一切正常。但是当我使用2台计算机时(仍有4个进程,服务器上排名0,1而客户端排名为1,2),输出文件的一些随机行丢失了!这是我的代码
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
//define the message
#define MSG_MISSION_COMPLETE 78
#define MSG_EXIT 79
//define a structural message of MPI
int array_of_blocklengths[3] = { 1, 1, 1 };
MPI_Aint array_of_displacements[3] = { 0, sizeof(float), sizeof(float) + sizeof(int) };
MPI_Datatype array_of_types[3] = {MPI_FLOAT, MPI_FLOAT, MPI_INT};
MPI_Datatype location;
int master();
int slave(MPI_File fhr, MPI_File fhw);
int main(int argc, char* argv[])
{
int rank;
MPI_File fhr, fhw;
char read[] = "./sharedReadSample1.txt";
char write[] = "./sharedWriteSample1.txt";
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("%d is speaking\n", rank);
MPI_File_open(MPI_COMM_WORLD, read, MPI_MODE_RDONLY, MPI_INFO_NULL, &fhr);
MPI_File_open(MPI_COMM_WORLD, write, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fhw);
if (rank == 0)//rank 0, dispatch the tasks
master();
else//other processes
slave(fhr, fhw);
MPI_Finalize();
printf("%d said byebye\n", rank);
MPI_File_close(&fhr);
MPI_File_close(&fhw);
return 0;
}
int master()//master, read the parameters, send them to other slave processes, get the message of task finishing, arrange next task to the slave who completed the task
{
int i, size, firstmsg, nslave;
int buf[256];
struct{
float pause;//pause time
int stand;//starting position in the file
int offset;//offset
}buf_str[10000] = { {0.0,0,0} };
MPI_Comm_size(MPI_COMM_WORLD, &size);
nslave = size - 1;//the number of slaves
FILE* fp;
FILE* fpm;//for log
fp = fopen("sharedAttributeSample1.txt", "rb");
if (fp == NULL)
{
printf("The file was not opened\n");
getchar();
//send a quit message to slaves, use the tag to tell them(>10000)
for (i = 10000; i < 10000 + nslave; i++)
{
buf[0] = MSG_EXIT;
MPI_Send(&buf[0], 1, MPI_INT, i - 10000 + 1, i, MPI_COMM_WORLD);
}
return 0;
}
else
printf("The file was opened\n");
fpm = fopen("./logs/log_master.txt","wb");
if (fpm == NULL)
printf("master log system failed to load!\n");
for (i = 0; i < 10000;i++)
{
fscanf(fp,"%f,%d,%d", &buf_str[i].pause, &buf_str[i].stand, &buf_str[i].offset);
}
MPI_Status status;
MPI_Type_struct(3, array_of_blocklengths, array_of_displacements, array_of_types, &location);
MPI_Type_commit(&location);
for (i = 0; i < nslave; i++)
{
MPI_Send(&buf_str[i], 1, location, i+1, i, MPI_COMM_WORLD);
fprintf(fpm, "initial message %d sent\n",i);
}
for (i = nslave; i < 10000; i++)
{
MPI_Recv(buf, 256, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);//receive messages from slaves
fprintf(fpm, "task %d complete massage received\n",status.MPI_TAG);
if (buf[0] == MSG_MISSION_COMPLETE)//send next task
{
firstmsg = status.MPI_SOURCE;
fprintf(fpm, "task %d is sent to %d \n", i, firstmsg);
MPI_Send(&buf_str[i], 1, location, firstmsg, i, MPI_COMM_WORLD);
}
}
for (i = 10000; i < 10000+nslave; i++)//send quitting message
{
buf[0] = MSG_EXIT;
MPI_Send(&buf_str[0], 1, location, i-10000+1, i, MPI_COMM_WORLD);
}
fclose(fp);
fclose(fpm);
return 0;
}
int slave(MPI_File fhr, MPI_File fhw)
{
struct{
float pause;
int stand;
int offset;
}buf_str;
char buf[256];
int buf_s[256];
int rank, size, nslave, i=0;
char name[30];
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
nslave = size - 1;
FILE* fps[nslave];
//open their own logging pointers
for(i=0;i<nslave;i++)
{
if(i == rank-1)
{
sprintf(name,"./logs/logfile_slave%d",i+1);
fps[i] = fopen(name, "w");
if(fps[i] == NULL)
printf("failed to open logfile of slave %d\n", i+1);
break;
}
}
MPI_Status status;
MPI_Status status_read;
MPI_Status status_write;
MPI_Type_struct(3, array_of_blocklengths, array_of_displacements, array_of_types, &location);
MPI_Type_commit(&location);
while (1)
{
//receive the message from master
MPI_Recv(&buf_str, 1, location, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
fprintf(fps[i], "process %d message %d received\n",rank,status.MPI_TAG);
if (status.MPI_TAG < 10000){//if it is a task
sleep(buf_str.pause);//sleep, to simulate a computing process
fprintf(fps[i], "process %d sleep for %f seconds\n", rank, buf_str.pause);
//read from the position given
MPI_File_read_at(fhr, buf_str.stand, buf, buf_str.offset, MPI_CHAR, &status_read);
buf[buf_str.offset] = '\n';//need a \n
MPI_File_write_at(fhw, status.MPI_TAG*(buf_str.offset+1), buf, buf_str.offset+1, MPI_CHAR, &status_write);
fprintf(fps[i], "%d has done task %d\n", rank, status.MPI_TAG);
//send task complete message to master
buf_s[0] = MSG_MISSION_COMPLETE;
MPI_Send(&buf_s, 1, MPI_INT, 0, status.MPI_TAG, MPI_COMM_WORLD);
}
else
break;
}
fclose(fps[i]);
return 0;
}