Question

我有一个C程序，它需要一个非常大的文件（可以是5GB到65GB）并转换文件中的数据，然后将转置的数据写出到其他文件。总的来说，由于转换，结果文件大约大30倍。我使用的是开放式mpi，因此每个处理器都会写入自己的文件。

每个处理器以非常快的速度将第一个~18 GB的数据写入其自己的结果文件。但是，在此阶段，程序会慢慢爬行，并且top命令输出上的％CPU从~100％急剧下降到0.3％。

有人能说出这个理由吗？我达到了系统限制吗？

代码：

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>


unsigned long long impute_len=0;

void write_results(unsigned long long, unsigned long long, int);



void main(int argc, char **argv){

    // the impute output
    impute_fp=fopen("infile.txt", "r");

    // find input file length
    fseek(impute_fp, 0, SEEK_END);
    impute_len=ftell(impute_fp);


    //mpi magic - hopefully!
    MPI_Status status;
    unsigned long long proc_id, ierr, num_procs, tot_recs, recs_per_proc, 
        root_recs, start_byte, end_byte, start_recv, end_recv; 


    // Now replicte this process to create parallel processes.
    ierr = MPI_Init(&argc, &argv);


    //find out process ID, and how many processes were started. 
    ierr = MPI_Comm_rank(MPI_COMM_WORLD, &proc_id);
    ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

    if(proc_id == 0){

        tot_recs = impute_len/54577;    //54577 is length of each line
        recs_per_proc = tot_recs/num_procs;

        if(tot_recs % num_procs != 0){
            recs_per_proc=recs_per_proc+1;
            root_recs = tot_recs-(recs_per_proc*(num_procs-1));

        }else{
            root_recs = recs_per_proc;
        }


        //distribute a portion to each child process 
        int z=0;
        for(int x=1; x<num_procs; x++){
            start_byte = ((root_recs*54577))+(z*(recs_per_proc*54577));
            end_byte = ((root_recs*54577))+((z+1)*(recs_per_proc*54577));

            ierr = MPI_Send(&start_byte, 1 , MPI_UNSIGNED_LONG_LONG, x, 0, MPI_COMM_WORLD);

            ierr = MPI_Send(&end_byte, 1 , MPI_UNSIGNED_LONG_LONG, x, 0, MPI_COMM_WORLD);

            z++;
        }


        //root proc bit of work
        write_results(0, (root_recs*54577), proc_id);


    }else{
        //must be a slave process

        ierr = MPI_Recv(&start_recv, 1, MPI_UNSIGNED_LONG_LONG, 0, 0, MPI_COMM_WORLD, &status);

        ierr = MPI_Recv(&end_recv, 1, MPI_UNSIGNED_LONG_LONG, 0, 0, MPI_COMM_WORLD, &status);

        //Write my portion of file
        write_results(start_recv, end_recv, proc_id);

    }

    ierr = MPI_Finalize();
    fclose(impute_fp);

}


void write_results(unsigned long long start, unsigned long long end, int proc_id){  

    **logic to write out transposed data here

    }

    fclose(results_fp);

}

open mpi开始非常快，但很快就会大幅减速

0 个答案: