为什么我的扫描程序代码使用Pthread或OpenMP这么慢?

时间:2015-04-05 07:44:18

标签: multithreading parallel-processing pthreads openmp

我想扫描一个数组并获得扫描结果。

在我的代码中,我使用shuffle函数创建一个无序数组。然后扫描它得到一些数字(大于60000)。我将数组拆分为threadnum-part,每个线程都得到一个部分来处理。似乎不同线程之间没有共享内存。

那么,为什么两个并行代码是如此之慢,因为它太慢,我认为填充的东西可能不是主要原因。谁能给我一些提示?我是并行编程的初学者,谢谢。

这是我的测试代码,包含三部分:序列化/ Pthread / OpenMP。你可以复制它并在你自己的机器上进行测试。

序列化代码:

#include <stdlib.h>
#include <stdio.h>
#include <time.h>

#define N (65532)
#define threadnum 2
typedef struct{
    int *mydata;
    int *myres;
    int val;
    int datalen;
    int reslen;
    int tid;
}arg_t;
int randint(int i, int k){
   int ret;

   if(i> k){
        int t= i;i=k;k=t;
   }
    ret = i + rand()%(k-i+1);

    return ret;
}
void shuffle(int *org,int n){

    int j=0,i;
    for(i = n-1;i !=0;i--){
        j = randint(0,i);

        int t = org[j];org[j] = org[i];org[i] =t;
    }

}

void scan(arg_t *arg){
    int i;

    arg->reslen=0;
    for(i=0;i<arg->datalen;i++){
        //if(arg->mydata[i] > arg->val){
            arg->myres[arg->reslen] = arg->mydata[i];
            arg->reslen +=(arg->mydata[i] > arg->val);
        //}
    }
}
int main(){

    struct timeval begin,end;
    int i,A[N],*res,reslen,perthread;
    double diff_usec;
    arg_t args[threadnum];

    for(i=0;i<N;i++){
        A[i] = i+1;
    }
    shuffle(A,N);

    gettimeofday(&begin,NULL);
    res = malloc(sizeof(int)*688); 

    reslen = 0;
    for(i=0;i<N;i++){
        //if(arg->mydata[i] > arg->val){
            res[reslen] =A[i];
            reslen +=(A[i] > 60000);
        //}
    }
    gettimeofday(&end,NULL);
    diff_usec = (((end).tv_sec*1000000L + (end).tv_usec)- ((begin).tv_sec*1000000L+(begin).tv_usec));
    printf("\n%.4lf %d\n",diff_usec,reslen);
    return 0;
}

Pthread代码:

#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <pthread.h>

#define N 65532
#define threadnum 2
typedef struct{
    int *mydata;
    int *myres;
    int val;
    int datalen;
    int reslen;
    int tid;
}arg_t;
int randint(int i, int k){
   int ret;

   if(i> k){
        int t= i;i=k;k=t;
   }
    ret = i + rand()%(k-i+1);

    return ret;
}
void shuffle(int *org,int n){

    int j=0,i;
    for(i = n-1;i !=0;i--){
        j = randint(0,i);

        int t = org[j];org[j] = org[i];org[i] =t;
    }

}

void scan(arg_t *arg){
    int i;

    arg->reslen=0;
    for(i=0;i<arg->datalen;i++){
        //if(arg->mydata[i] > arg->val){
            arg->myres[arg->reslen] = arg->mydata[i];
            arg->reslen +=(arg->mydata[i] > arg->val);
        //}
    }
}
int get_time(struct timespec *begin , struct timespec *end){
    return 1000 * (end -> tv_sec - begin -> tv_sec) + (end -> tv_sec - begin->tv_sec)/1000000;     
}
int main(){

    struct timeval begin,end;
    //struct timespec begin,end;
    int i,A[N],*res,reslen,perthread;
    arg_t args[threadnum];
    double diff_usec;
    pthread_t tid[threadnum];

    for(i=0;i<N;i++){
        A[i] = i+1;
    }
    shuffle(A,N);


    gettimeofday(&begin,NULL);
    //clock_gettime(CLOCK_REALTIME,&begin);
    perthread = N/threadnum;
    for(i=0;i<threadnum;i++){
        args[i].mydata = A+i*perthread;
        args[i].myres  = malloc(sizeof(int)*perthread); 
        args[i].datalen = (i == threadnum-1)?(N-(threadnum-1)*perthread):perthread;
        args[i].val = 60000;
        args[i].tid = i;
        pthread_create(&tid[i],NULL,scan,(void*)&args[i]);
    }

    reslen =0;
    for(i=0;i<threadnum;i++){
        pthread_join(tid[i],NULL);
        reslen += args[i].reslen;
    }
    res = malloc(sizeof(int)*reslen);
    reslen=0;
    for(i=0;i<threadnum;i++){
        memcpy(res+reslen,args[i].myres,args[i].reslen);
        reslen += args[i].reslen;
    }
    //clock_gettime(CLOCK_REALTIME,&end);
    gettimeofday(&end,NULL);
    diff_usec = (((end).tv_sec*1000000L + (end).tv_usec)- ((begin).tv_sec*1000000L+(begin).tv_usec));
    //printf("\n%dms %d\n",get_time(&begin,&end),reslen);
    printf("\n%.4lfms %d\n",diff_usec,reslen);
    return 0;
}

OpenMP代码:

#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#include <time.h>

#define N 65532
#define threadnum 16
typedef struct{
    int *mydata;
    int *myres;
    int val;
    int datalen;
    int reslen;
}arg_t;
int get_time(struct timespec *begin , struct timespec *end){
    return 1000 * (end -> tv_sec - begin -> tv_sec) + (end -> tv_sec - begin->tv_sec)/1000000;     
}
int randint(int i, int k){
   int ret;

   if(i> k){
        int t= i;i=k;k=t;
   }
    ret = i + rand()%(k-i+1);

    return ret;
}
void shuffle(int *org,int n){

    int j=0,i;
    for(i = n-1;i !=0;i--){
        j = randint(0,i);

        int t = org[j];org[j] = org[i];org[i] =t;
    }

}

void scan(arg_t *arg){
    int i;

    arg->reslen=0;
    for(i=0;i<arg->datalen;i++){
        //if(arg->mydata[i] > arg->val){
            arg->myres[arg->reslen] = arg->mydata[i];
            arg->reslen +=(arg->mydata[i] > arg->val);
        //}
    }
}
int main(){
    struct timeval begin,end;    
    //struct timespec begin,end;
    int i,A[N],*res,reslen,perthread;
    double diff_usec;
    arg_t args[threadnum];

    for(i=0;i<N;i++){
        A[i] = i+1;
    }
    shuffle(A,N);

    gettimeofday(&begin,NULL);
    //clock_gettime(CLOCK_REALTIME,&begin);
    perthread = N/threadnum;
    for(i=0;i<threadnum;i++){
        args[i].mydata = A+i*perthread;
        args[i].myres  = malloc(sizeof(int)*perthread); 
        args[i].datalen = (i == threadnum-1)?(N-(threadnum-1)*perthread):perthread;
        args[i].val = 60000;
    }
    omp_set_num_threads(threadnum);
    #pragma omp parallel 
    {
        scan(&args[omp_get_thread_num()]);
    }

    reslen =0;
    for(i=0;i<threadnum;i++){
        reslen += args[i].reslen;
    }
    res = malloc(sizeof(int)*reslen);
    reslen=0;
    for(i=0;i<threadnum;i++){
        memcpy(res+reslen,args[i].myres,args[i].reslen);
        reslen += args[i].reslen;
    }
    gettimeofday(&end,NULL);
    //clock_gettime(CLOCK_REALTIME,&end);
    diff_usec = (((end).tv_sec*1000000L + (end).tv_usec)- ((begin).tv_sec*1000000L+(begin).tv_usec));
    printf("\n%.4lfms %d\n",diff_usec,reslen);
    return 0;
}

0 个答案:

没有答案