当使用多个线程时,C中的多线程编程速度很慢

时间:2016-06-22 18:54:51

标签: c multithreading pthreads

我写了以下代码。当我使用单线程(代码中aa = 1)时,需要11秒。但是对于aa = 2,需要190秒。但是我的机器有4个cpu。你能帮助我吗?我正在编译

gcc Thread.c -lpthread -O3

#include<stdio.h>
#include<pthread.h>
#include <time.h>
#include<stdlib.h>



#define N 100

int aa;


unsigned long long *reg, *b;
#define  LIMIT 5000000




void *abc(void* arg){
       srand(time(NULL));

       unsigned long long i,l;
       unsigned char   z,j;
       int *limit_ptr=(int *)arg;
       int i1=*limit_ptr;
       printf("i1=%d\n",i1);
       for(l=0;l<LIMIT;l++){
           for(i=0;i<N;i++)
               z=(((double) N)*rand()/(RAND_MAX+1.0));;

           z = (((double) N)*rand()/(RAND_MAX+1.0));;

           if(z==0)
               reg[i1]++;

        }           

}

int main(){

    int l;
    aa=2;
    printf("%d  \n\n",aa);
    reg = (unsigned long long *) malloc(aa * sizeof(unsigned long long ));
    b = (unsigned long long *) malloc(aa * sizeof(unsigned long long ));

    struct timespec start, finish;
    double elapsed;
    clock_gettime(CLOCK_MONOTONIC, &start);

    for(l=0;l<aa;l++)
        reg[l]=0;
    pthread_t tid[aa];

    int j1;
    unsigned long long  cc=0;



    for(j1=0;j1<aa;j1++)
        b[j1]=j1;


    for(j1=0;j1<aa;j1++){

        printf("%d\n", j1);
        pthread_create(&tid[j1],NULL,abc,&b[j1]);
    }
//    

    for(j1=0;j1<aa;j1++){
        pthread_join(tid[j1],NULL);
        printf("%d\n", j1);
    }

    for(j1=0;j1<aa;j1++)    
        cc=cc+reg[j1]; 


    printf("%0.10f   %llu\n", (double)cc/(aa*LIMIT),cc);

    clock_gettime(CLOCK_MONOTONIC, &finish);
    elapsed = (finish.tv_sec - start.tv_sec);
    elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;  

    printf("Time=%0.10f\n", elapsed);  
    pthread_exit(NULL);

}

根据建议,我更改了代码。虽然现在执行时间还可以,但随机数生成并不好。代码中数组reg的值是相同的。请帮我。

#include<stdio.h>
#include<pthread.h>
#include <time.h>
#include<stdlib.h>
#include <sched.h>


#define N 100
#define aa 4



unsigned long long *reg, *b;
#define  LIMIT 5000000

int rgen()
{
    int xi;
    int seed;
    struct drand48_data drand_buf;
    double x;
    static i;

    if (i==0){
        seed = time(NULL);
        srand48_r(seed, &drand_buf);
        i = 10;
    }

    drand48_r (&drand_buf, &x);
    xi = (int) (x * 100);      

    return xi;
}


void *abc(void* arg){
       int l,i, z;
       int *limit_ptr = (int *) arg;
       int i1 = *limit_ptr;

       printf("i1=%d\n",i1);
       for(l=0; l<LIMIT; l++){
                 for(i=0; i<N; i++)
                     z= rgen(i1);

           if(z==0)
               reg[i1]++;

        }           

}

int main(){

int l;

printf("%d  \n\n",aa);
reg = (unsigned long long *) malloc(aa * sizeof(unsigned long long ));
b = (unsigned long long *) malloc(aa * sizeof(unsigned long long ));

struct timespec start, finish;
double elapsed;
clock_gettime(CLOCK_MONOTONIC, &start);

for(l=0;l<aa;l++)
    reg[l]=0;
pthread_t tid[aa];

int j1;
unsigned long long  cc=0;



for(j1=0;j1<aa;j1++)
  b[j1]=j1;


for(j1=0;j1<aa;j1++){

    printf("%d\n", j1);
    pthread_create(&tid[j1],NULL,abc,&b[j1]);
}
//    
  printf("created threads\n"); 
  for(j1=0;j1<aa;j1++){
   pthread_join(tid[j1],NULL);
   printf("%d\n", j1);
   }

for(j1=0;j1<aa;j1++)    
  cc=cc+reg[j1]; 

printf("\n");
for(j1=0;j1<aa;j1++)   
   printf("%llu   ", reg[j1]);

printf("\n");

printf("%0.10f   %llu\n", (double)cc/(aa*LIMIT),cc);

clock_gettime(CLOCK_MONOTONIC, &finish);
elapsed = (finish.tv_sec - start.tv_sec);
elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;  

printf("Time=%0.10f\n", elapsed);  
pthread_exit(NULL);


}

1 个答案:

答案 0 :(得分:1)

我必须同意EOF:问题是对rand()的调用。如果他们被time(NULL)取代,您将得到您期望的结果。

#include<stdio.h>
#include<pthread.h>
#include <time.h>
#include<stdlib.h>

#define N 100
#define LIMIT 5000000

static unsigned long long *reg;

void *abc(void *arg)
{
  //srand(time(NULL));
  unsigned long long i, l, k;
  unsigned char z;
  int *limit_ptr = (int *) arg;
  int i1 = *limit_ptr;

  printf("Argument: i1=%d\n", i1);
  k = 0;

  for (l = 0; l < LIMIT; l++) {
    for (i = 0; i < N; i++){
      z = (((double) N) * time(NULL) / (RAND_MAX + 1.0));
      k++;
    }
    z = (((double) N) * time(NULL) / (RAND_MAX + 1.0));
    if (z == 0)
      reg[i1]++;
  }
  printf("Loops of %d: %lld\n",i1, k);
  // normal would be pthread_exit() but the linear timings would need
  // their own function/ extra argument/needless complexity
  return NULL;
}

int main(int argc, char **argv)
{
  int l;

  struct timespec start, finish;
  double elapsed_threaded, elapsed_linear;

  int j1;
  unsigned long long cc = 0;

  int num_threads;

  unsigned long long *b;

  pthread_attr_t attr;

  if(argc > 1)
    num_threads = atoi(argv[1]); // don't use atoi in production
  else
    num_threads = 2;

  printf("# of threads: %d\n", num_threads);
  // checks omitted here
  reg = (unsigned long long *) malloc(num_threads * sizeof(unsigned long long));
  b = (unsigned long long *) malloc(num_threads * sizeof(unsigned long long));

  pthread_attr_init(&attr);
  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

  for (l = 0; l < num_threads; l++)
    reg[l] = 0;
  pthread_t tid[num_threads];

  for (j1 = 0; j1 < num_threads; j1++)
    b[j1] = j1;

  clock_gettime(CLOCK_MONOTONIC, &start);
  for (j1 = 0; j1 < num_threads; j1++) {
    printf("Thread #%d started\n", j1);
    // Check omitted
    pthread_create(&tid[j1], &attr, abc, &b[j1]);
  }
  for (j1 = 0; j1 < num_threads; j1++) {
    // Check omitted
    pthread_join(tid[j1], NULL);
    printf("Thread #%d joined\n", j1);
  }
  clock_gettime(CLOCK_MONOTONIC, &finish);

  for (j1 = 0; j1 < num_threads; j1++)
    cc = cc + reg[j1];

  printf("%0.10f   %llu\n", (double) cc / (num_threads * LIMIT), cc);

  elapsed_threaded = (finish.tv_sec - start.tv_sec);
  elapsed_threaded += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;

  clock_gettime(CLOCK_MONOTONIC, &start);
  for (j1 = 0; j1 < num_threads; j1++) {
    abc(&b[j1]);
  }
  clock_gettime(CLOCK_MONOTONIC, &finish);
  elapsed_linear = (finish.tv_sec - start.tv_sec);
  elapsed_linear += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;

  printf("Time threaded: %0.10f\nTime linear:   %0.10f\nRelation %0.10f\n",
           elapsed_threaded,elapsed_linear, elapsed_threaded/elapsed_linear);
  pthread_exit(NULL);
}

我稍微清理了一下(明确地设置PTHREAD_CREATE_JOINABLE可能是多余的,但检查你的pthread实现是否确定)。

使用gcc -W -Wall -O3 -g3 thread.c -o thread -lpthread

编译

结果

1 thread:
Time threaded: 1.3713400890
Time linear:   1.3394284740
Relation 1.0238247996

2 threads:
Time threaded: 1.4989349930
Time linear:   2.6568704750
Relation 0.5641731530

3 threads:
Time threaded: 1.8101585490
Time linear:   3.9965480710
Relation 0.4529305082

4 threads:
Time threaded: 1.3974386710
Time linear:   5.3152423440
Relation 0.2629115627

5 threads:
Time threaded: 1.7717215210
Time linear:   6.6438871330
Relation 0.2666694189

您现在可能会猜到我的CPU有多少核心。

PS:请执行所有检查,即使您正在进行实验,尤其是,如果您正在进行实验。