在pthread上运行MPI比在主线程上运行要慢

时间:2017-03-31 17:35:24

标签: c++ c mpi

我正在运行以下MPI代码,使用一个线程来执行MPI功能,另一个只是阻塞等待。问题是我得到的时间比在第一个线程上运行MPI函数要差得多,如图所示在第二个代码上。

#include <stdio.h>
#include <mpi.h>
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>
#include <pthread.h>

pthread_t _comm_thread;
#define MILLION 1000000
#define START_TIMER(timer) { gettimeofday(&(timer), NULL); }

#define STOP_TIMER(timer) { gettimeofday(&(timer), NULL); }
#define TIME_DIFF(timer1, timer2, total) { \
  long long sec_diff = 0; \
  long long usec_diff = 0; \
  sec_diff = (timer2).tv_sec - (timer1).tv_sec; \
  usec_diff = (timer2).tv_usec - (timer1).tv_usec; \
  (total) += (sec_diff * MILLION) + usec_diff; \
}
#define KILO 1024
short done_flag = 0;
short finished = 0;
int num_messages = 500;
int payload_sizes[] = {1, 2, 4 , 8, 16,32,64,128,256,512,1024,2*KILO,4*KILO,8*KILO,16*KILO,32*KILO,64*KILO,128*KILO,256*KILO,512*KILO,KILO*KILO};
// int payload_sizes[] = {KILO*KILO};
int num_payload_sizes = 21;
    int loops=0;

void* receive(void* args){
    int provide,my_proc;
    MPI_Init_thread(NULL,NULL,MPI_THREAD_FUNNELED,&provide);
    MPI_Comm_rank(MPI_COMM_WORLD,&my_proc);

    char* buffer=NULL;
    int cnt=0;
    long long total;
    struct timeval start, stop;
    if( my_proc==0 ){
        int i;
        for(i=0;i<num_payload_sizes;++i){
            done_flag = 0;
            total = 0;
            char* buffer = (char *)malloc(payload_sizes[i]);
            int j;
            for(j=0;j<num_messages;++j){
                START_TIMER(start)
                MPI_Send(buffer,payload_sizes[i],MPI_BYTE,1,0,MPI_COMM_WORLD);      
                STOP_TIMER(stop)
                MPI_Recv(buffer,0,MPI_BYTE,1,0,MPI_COMM_WORLD,MPI_STATUS_IGNORE);       
                TIME_DIFF(start,stop,total)
            }
            printf("Payload size: %d, time : %lld usec\n",payload_sizes[i],(total)/num_messages);
            free(buffer);
        }
        // pthread_exit(0);

    }
    else{

        total = 0;
    while(1){
        MPI_Status stat;
        int size;
        MPI_Probe(MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&stat); 
        MPI_Get_count(&stat,MPI_BYTE,&size);
        buffer = (char *)malloc(size);
        // START_TIMER(start)
        MPI_Recv(buffer,size,MPI_BYTE,0,0,MPI_COMM_WORLD,MPI_STATUS_IGNORE);        
        // STOP_TIMER(stop)
        cnt++;
        // TIME_DIFF(start,stop,total)
        MPI_Send(NULL,0,MPI_BYTE,0,0,MPI_COMM_WORLD);       
        if(cnt ==num_messages){
            // printf("Time : %lld usec loops%d\n",total/cnt,loops);
            loops++;
            cnt =0;
            total = 0;
        }
        if(loops == num_payload_sizes)
            break;      
        free(buffer);
        }
    }
     MPI_Finalize();
    // pthread_exit(0);
}
int started = 0;
void * nothing(void *args){
    started =1;
    pthread_exit(0);

    // while(1);
}
int main(){
    pthread_create( &_comm_thread, NULL, receive, NULL);
    // while(started ==0);
    // receive(NULL);
    pthread_join(_comm_thread,NULL);

    return 0;
}

这是第二个(快速)代码:

#include <stdio.h>
#include <mpi.h>
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>
#include <pthread.h>

pthread_t _comm_thread;
#define MILLION 1000000
#define START_TIMER(timer) { gettimeofday(&(timer), NULL); }

#define STOP_TIMER(timer) { gettimeofday(&(timer), NULL); }
#define TIME_DIFF(timer1, timer2, total) { \
  long long sec_diff = 0; \
  long long usec_diff = 0; \
  sec_diff = (timer2).tv_sec - (timer1).tv_sec; \
  usec_diff = (timer2).tv_usec - (timer1).tv_usec; \
  (total) += (sec_diff * MILLION) + usec_diff; \
}
#define KILO 1024
short done_flag = 0;
short finished = 0;
int num_messages = 500;
int payload_sizes[] = {1, 2, 4 , 8, 16,32,64,128,256,512,1024,2*KILO,4*KILO,8*KILO,16*KILO,32*KILO,64*KILO,128*KILO,256*KILO,512*KILO,KILO*KILO};
// int payload_sizes[] = {KILO*KILO};
int num_payload_sizes = 21;
    int loops=0;

void* receive(void* args){
    int provide,my_proc;
    MPI_Init_thread(NULL,NULL,MPI_THREAD_FUNNELED,&provide);
    MPI_Comm_rank(MPI_COMM_WORLD,&my_proc);

    char* buffer=NULL;
    int cnt=0;
    long long total;
    struct timeval start, stop;
    if( my_proc==0 ){
        int i;
        for(i=0;i<num_payload_sizes;++i){
            done_flag = 0;
            total = 0;
            char* buffer = (char *)malloc(payload_sizes[i]);
            int j;
            for(j=0;j<num_messages;++j){
                START_TIMER(start)
                MPI_Send(buffer,payload_sizes[i],MPI_BYTE,1,0,MPI_COMM_WORLD);      
                STOP_TIMER(stop)
                MPI_Recv(buffer,0,MPI_BYTE,1,0,MPI_COMM_WORLD,MPI_STATUS_IGNORE);       
                TIME_DIFF(start,stop,total)
            }
            printf("Payload size: %d, time : %lld usec\n",payload_sizes[i],(total)/num_messages);
            free(buffer);
        }
        // pthread_exit(0);

    }
    else{

        total = 0;
    while(1){
        MPI_Status stat;
        int size;
        MPI_Probe(MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&stat); 
        MPI_Get_count(&stat,MPI_BYTE,&size);
        buffer = (char *)malloc(size);
        // START_TIMER(start)
        MPI_Recv(buffer,size,MPI_BYTE,0,0,MPI_COMM_WORLD,MPI_STATUS_IGNORE);        
        // STOP_TIMER(stop)
        cnt++;
        // TIME_DIFF(start,stop,total)
        MPI_Send(NULL,0,MPI_BYTE,0,0,MPI_COMM_WORLD);       
        if(cnt ==num_messages){
            printf("Time : %lld usec loops%d\n",total/cnt,loops);
            loops++;
            cnt =0;
            total = 0;
        }
        if(loops == num_payload_sizes)
            break;      
        free(buffer);
        }
    }
    // pthread_exit(0);
    MPI_Finalize();
}
int started = 0;
void * nothing(void *args){
    started =1;
    pthread_exit(0);

    // while(1);
}
int main(){
    pthread_create( &_comm_thread, NULL, nothing, NULL);
    while(started ==0);
    receive(NULL);
    pthread_join(_comm_thread,NULL);

    return 0;
}

那么您如何解释这种行为?我很困惑。谢谢。

0 个答案:

没有答案