我想扫描一个数组并获得扫描结果。
在我的代码中,我使用shuffle函数创建一个无序数组。然后扫描它得到一些数字(大于60000)。我将数组拆分为threadnum-part,每个线程都得到一个部分来处理。似乎不同线程之间没有共享内存。
那么,为什么两个并行代码是如此之慢,因为它太慢,我认为填充的东西可能不是主要原因。谁能给我一些提示?我是并行编程的初学者,谢谢。
这是我的测试代码,包含三部分:序列化/ Pthread / OpenMP。你可以复制它并在你自己的机器上进行测试。
序列化代码:
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#define N (65532)
#define threadnum 2
typedef struct{
int *mydata;
int *myres;
int val;
int datalen;
int reslen;
int tid;
}arg_t;
int randint(int i, int k){
int ret;
if(i> k){
int t= i;i=k;k=t;
}
ret = i + rand()%(k-i+1);
return ret;
}
void shuffle(int *org,int n){
int j=0,i;
for(i = n-1;i !=0;i--){
j = randint(0,i);
int t = org[j];org[j] = org[i];org[i] =t;
}
}
void scan(arg_t *arg){
int i;
arg->reslen=0;
for(i=0;i<arg->datalen;i++){
//if(arg->mydata[i] > arg->val){
arg->myres[arg->reslen] = arg->mydata[i];
arg->reslen +=(arg->mydata[i] > arg->val);
//}
}
}
int main(){
struct timeval begin,end;
int i,A[N],*res,reslen,perthread;
double diff_usec;
arg_t args[threadnum];
for(i=0;i<N;i++){
A[i] = i+1;
}
shuffle(A,N);
gettimeofday(&begin,NULL);
res = malloc(sizeof(int)*688);
reslen = 0;
for(i=0;i<N;i++){
//if(arg->mydata[i] > arg->val){
res[reslen] =A[i];
reslen +=(A[i] > 60000);
//}
}
gettimeofday(&end,NULL);
diff_usec = (((end).tv_sec*1000000L + (end).tv_usec)- ((begin).tv_sec*1000000L+(begin).tv_usec));
printf("\n%.4lf %d\n",diff_usec,reslen);
return 0;
}
Pthread代码:
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <pthread.h>
#define N 65532
#define threadnum 2
typedef struct{
int *mydata;
int *myres;
int val;
int datalen;
int reslen;
int tid;
}arg_t;
int randint(int i, int k){
int ret;
if(i> k){
int t= i;i=k;k=t;
}
ret = i + rand()%(k-i+1);
return ret;
}
void shuffle(int *org,int n){
int j=0,i;
for(i = n-1;i !=0;i--){
j = randint(0,i);
int t = org[j];org[j] = org[i];org[i] =t;
}
}
void scan(arg_t *arg){
int i;
arg->reslen=0;
for(i=0;i<arg->datalen;i++){
//if(arg->mydata[i] > arg->val){
arg->myres[arg->reslen] = arg->mydata[i];
arg->reslen +=(arg->mydata[i] > arg->val);
//}
}
}
int get_time(struct timespec *begin , struct timespec *end){
return 1000 * (end -> tv_sec - begin -> tv_sec) + (end -> tv_sec - begin->tv_sec)/1000000;
}
int main(){
struct timeval begin,end;
//struct timespec begin,end;
int i,A[N],*res,reslen,perthread;
arg_t args[threadnum];
double diff_usec;
pthread_t tid[threadnum];
for(i=0;i<N;i++){
A[i] = i+1;
}
shuffle(A,N);
gettimeofday(&begin,NULL);
//clock_gettime(CLOCK_REALTIME,&begin);
perthread = N/threadnum;
for(i=0;i<threadnum;i++){
args[i].mydata = A+i*perthread;
args[i].myres = malloc(sizeof(int)*perthread);
args[i].datalen = (i == threadnum-1)?(N-(threadnum-1)*perthread):perthread;
args[i].val = 60000;
args[i].tid = i;
pthread_create(&tid[i],NULL,scan,(void*)&args[i]);
}
reslen =0;
for(i=0;i<threadnum;i++){
pthread_join(tid[i],NULL);
reslen += args[i].reslen;
}
res = malloc(sizeof(int)*reslen);
reslen=0;
for(i=0;i<threadnum;i++){
memcpy(res+reslen,args[i].myres,args[i].reslen);
reslen += args[i].reslen;
}
//clock_gettime(CLOCK_REALTIME,&end);
gettimeofday(&end,NULL);
diff_usec = (((end).tv_sec*1000000L + (end).tv_usec)- ((begin).tv_sec*1000000L+(begin).tv_usec));
//printf("\n%dms %d\n",get_time(&begin,&end),reslen);
printf("\n%.4lfms %d\n",diff_usec,reslen);
return 0;
}
OpenMP代码:
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#include <time.h>
#define N 65532
#define threadnum 16
typedef struct{
int *mydata;
int *myres;
int val;
int datalen;
int reslen;
}arg_t;
int get_time(struct timespec *begin , struct timespec *end){
return 1000 * (end -> tv_sec - begin -> tv_sec) + (end -> tv_sec - begin->tv_sec)/1000000;
}
int randint(int i, int k){
int ret;
if(i> k){
int t= i;i=k;k=t;
}
ret = i + rand()%(k-i+1);
return ret;
}
void shuffle(int *org,int n){
int j=0,i;
for(i = n-1;i !=0;i--){
j = randint(0,i);
int t = org[j];org[j] = org[i];org[i] =t;
}
}
void scan(arg_t *arg){
int i;
arg->reslen=0;
for(i=0;i<arg->datalen;i++){
//if(arg->mydata[i] > arg->val){
arg->myres[arg->reslen] = arg->mydata[i];
arg->reslen +=(arg->mydata[i] > arg->val);
//}
}
}
int main(){
struct timeval begin,end;
//struct timespec begin,end;
int i,A[N],*res,reslen,perthread;
double diff_usec;
arg_t args[threadnum];
for(i=0;i<N;i++){
A[i] = i+1;
}
shuffle(A,N);
gettimeofday(&begin,NULL);
//clock_gettime(CLOCK_REALTIME,&begin);
perthread = N/threadnum;
for(i=0;i<threadnum;i++){
args[i].mydata = A+i*perthread;
args[i].myres = malloc(sizeof(int)*perthread);
args[i].datalen = (i == threadnum-1)?(N-(threadnum-1)*perthread):perthread;
args[i].val = 60000;
}
omp_set_num_threads(threadnum);
#pragma omp parallel
{
scan(&args[omp_get_thread_num()]);
}
reslen =0;
for(i=0;i<threadnum;i++){
reslen += args[i].reslen;
}
res = malloc(sizeof(int)*reslen);
reslen=0;
for(i=0;i<threadnum;i++){
memcpy(res+reslen,args[i].myres,args[i].reslen);
reslen += args[i].reslen;
}
gettimeofday(&end,NULL);
//clock_gettime(CLOCK_REALTIME,&end);
diff_usec = (((end).tv_sec*1000000L + (end).tv_usec)- ((begin).tv_sec*1000000L+(begin).tv_usec));
printf("\n%.4lfms %d\n",diff_usec,reslen);
return 0;
}