我写了以下代码。当我使用单线程(代码中aa = 1)时,需要11秒。但是对于aa = 2,需要190秒。但是我的机器有4个cpu。你能帮助我吗?我正在编译
gcc Thread.c -lpthread -O3
#include<stdio.h>
#include<pthread.h>
#include <time.h>
#include<stdlib.h>
#define N 100
int aa;
unsigned long long *reg, *b;
#define LIMIT 5000000
void *abc(void* arg){
srand(time(NULL));
unsigned long long i,l;
unsigned char z,j;
int *limit_ptr=(int *)arg;
int i1=*limit_ptr;
printf("i1=%d\n",i1);
for(l=0;l<LIMIT;l++){
for(i=0;i<N;i++)
z=(((double) N)*rand()/(RAND_MAX+1.0));;
z = (((double) N)*rand()/(RAND_MAX+1.0));;
if(z==0)
reg[i1]++;
}
}
int main(){
int l;
aa=2;
printf("%d \n\n",aa);
reg = (unsigned long long *) malloc(aa * sizeof(unsigned long long ));
b = (unsigned long long *) malloc(aa * sizeof(unsigned long long ));
struct timespec start, finish;
double elapsed;
clock_gettime(CLOCK_MONOTONIC, &start);
for(l=0;l<aa;l++)
reg[l]=0;
pthread_t tid[aa];
int j1;
unsigned long long cc=0;
for(j1=0;j1<aa;j1++)
b[j1]=j1;
for(j1=0;j1<aa;j1++){
printf("%d\n", j1);
pthread_create(&tid[j1],NULL,abc,&b[j1]);
}
//
for(j1=0;j1<aa;j1++){
pthread_join(tid[j1],NULL);
printf("%d\n", j1);
}
for(j1=0;j1<aa;j1++)
cc=cc+reg[j1];
printf("%0.10f %llu\n", (double)cc/(aa*LIMIT),cc);
clock_gettime(CLOCK_MONOTONIC, &finish);
elapsed = (finish.tv_sec - start.tv_sec);
elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
printf("Time=%0.10f\n", elapsed);
pthread_exit(NULL);
}
根据建议,我更改了代码。虽然现在执行时间还可以,但随机数生成并不好。代码中数组reg的值是相同的。请帮我。
#include<stdio.h>
#include<pthread.h>
#include <time.h>
#include<stdlib.h>
#include <sched.h>
#define N 100
#define aa 4
unsigned long long *reg, *b;
#define LIMIT 5000000
int rgen()
{
int xi;
int seed;
struct drand48_data drand_buf;
double x;
static i;
if (i==0){
seed = time(NULL);
srand48_r(seed, &drand_buf);
i = 10;
}
drand48_r (&drand_buf, &x);
xi = (int) (x * 100);
return xi;
}
void *abc(void* arg){
int l,i, z;
int *limit_ptr = (int *) arg;
int i1 = *limit_ptr;
printf("i1=%d\n",i1);
for(l=0; l<LIMIT; l++){
for(i=0; i<N; i++)
z= rgen(i1);
if(z==0)
reg[i1]++;
}
}
int main(){
int l;
printf("%d \n\n",aa);
reg = (unsigned long long *) malloc(aa * sizeof(unsigned long long ));
b = (unsigned long long *) malloc(aa * sizeof(unsigned long long ));
struct timespec start, finish;
double elapsed;
clock_gettime(CLOCK_MONOTONIC, &start);
for(l=0;l<aa;l++)
reg[l]=0;
pthread_t tid[aa];
int j1;
unsigned long long cc=0;
for(j1=0;j1<aa;j1++)
b[j1]=j1;
for(j1=0;j1<aa;j1++){
printf("%d\n", j1);
pthread_create(&tid[j1],NULL,abc,&b[j1]);
}
//
printf("created threads\n");
for(j1=0;j1<aa;j1++){
pthread_join(tid[j1],NULL);
printf("%d\n", j1);
}
for(j1=0;j1<aa;j1++)
cc=cc+reg[j1];
printf("\n");
for(j1=0;j1<aa;j1++)
printf("%llu ", reg[j1]);
printf("\n");
printf("%0.10f %llu\n", (double)cc/(aa*LIMIT),cc);
clock_gettime(CLOCK_MONOTONIC, &finish);
elapsed = (finish.tv_sec - start.tv_sec);
elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
printf("Time=%0.10f\n", elapsed);
pthread_exit(NULL);
}
答案 0 :(得分:1)
我必须同意EOF:问题是对rand()
的调用。如果他们被time(NULL)
取代,您将得到您期望的结果。
#include<stdio.h>
#include<pthread.h>
#include <time.h>
#include<stdlib.h>
#define N 100
#define LIMIT 5000000
static unsigned long long *reg;
void *abc(void *arg)
{
//srand(time(NULL));
unsigned long long i, l, k;
unsigned char z;
int *limit_ptr = (int *) arg;
int i1 = *limit_ptr;
printf("Argument: i1=%d\n", i1);
k = 0;
for (l = 0; l < LIMIT; l++) {
for (i = 0; i < N; i++){
z = (((double) N) * time(NULL) / (RAND_MAX + 1.0));
k++;
}
z = (((double) N) * time(NULL) / (RAND_MAX + 1.0));
if (z == 0)
reg[i1]++;
}
printf("Loops of %d: %lld\n",i1, k);
// normal would be pthread_exit() but the linear timings would need
// their own function/ extra argument/needless complexity
return NULL;
}
int main(int argc, char **argv)
{
int l;
struct timespec start, finish;
double elapsed_threaded, elapsed_linear;
int j1;
unsigned long long cc = 0;
int num_threads;
unsigned long long *b;
pthread_attr_t attr;
if(argc > 1)
num_threads = atoi(argv[1]); // don't use atoi in production
else
num_threads = 2;
printf("# of threads: %d\n", num_threads);
// checks omitted here
reg = (unsigned long long *) malloc(num_threads * sizeof(unsigned long long));
b = (unsigned long long *) malloc(num_threads * sizeof(unsigned long long));
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
for (l = 0; l < num_threads; l++)
reg[l] = 0;
pthread_t tid[num_threads];
for (j1 = 0; j1 < num_threads; j1++)
b[j1] = j1;
clock_gettime(CLOCK_MONOTONIC, &start);
for (j1 = 0; j1 < num_threads; j1++) {
printf("Thread #%d started\n", j1);
// Check omitted
pthread_create(&tid[j1], &attr, abc, &b[j1]);
}
for (j1 = 0; j1 < num_threads; j1++) {
// Check omitted
pthread_join(tid[j1], NULL);
printf("Thread #%d joined\n", j1);
}
clock_gettime(CLOCK_MONOTONIC, &finish);
for (j1 = 0; j1 < num_threads; j1++)
cc = cc + reg[j1];
printf("%0.10f %llu\n", (double) cc / (num_threads * LIMIT), cc);
elapsed_threaded = (finish.tv_sec - start.tv_sec);
elapsed_threaded += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
clock_gettime(CLOCK_MONOTONIC, &start);
for (j1 = 0; j1 < num_threads; j1++) {
abc(&b[j1]);
}
clock_gettime(CLOCK_MONOTONIC, &finish);
elapsed_linear = (finish.tv_sec - start.tv_sec);
elapsed_linear += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
printf("Time threaded: %0.10f\nTime linear: %0.10f\nRelation %0.10f\n",
elapsed_threaded,elapsed_linear, elapsed_threaded/elapsed_linear);
pthread_exit(NULL);
}
我稍微清理了一下(明确地设置PTHREAD_CREATE_JOINABLE
可能是多余的,但检查你的pthread实现是否确定)。
使用gcc -W -Wall -O3 -g3 thread.c -o thread -lpthread
结果
1 thread:
Time threaded: 1.3713400890
Time linear: 1.3394284740
Relation 1.0238247996
2 threads:
Time threaded: 1.4989349930
Time linear: 2.6568704750
Relation 0.5641731530
3 threads:
Time threaded: 1.8101585490
Time linear: 3.9965480710
Relation 0.4529305082
4 threads:
Time threaded: 1.3974386710
Time linear: 5.3152423440
Relation 0.2629115627
5 threads:
Time threaded: 1.7717215210
Time linear: 6.6438871330
Relation 0.2666694189
您现在可能会猜到我的CPU有多少核心。
PS:请执行所有检查,即使您正在进行实验,尤其是,如果您正在进行实验。