在c ++中并行化素生成器

时间:2014-05-15 15:45:06

标签: c multithreading parallel-processing openmp primes

这是一个主要的生成器程序,当处理器int numProc的数量为1时它工作正常,但如果用户认为有多个处理器,我想使用并行化的质数版本,以便生成分发给许多进程。

注意:我已在int main中取出标志处理和菜单显示代码以节省空间

代码:

#define NUMITERS 1
#define MAXSIZE 500000
#define MAXNUMPROCS 64
#define PRIME(num) (2*(num) + 3)
#define NUM(prime) (((prime) - 3)/2)

#define TRUE 1
#define FALSE 0

int lastPrime, count;      /* Last Prime and Number of Primes Found */
int size = 100000;           /* Number of numbers to test for prime */
int numProcs = 5;          /* Number of processors */
FILE *out = NULL;          /* File to output primes to */
char *flags;               /* Array of primes (odd numbers only) 
                              i.e. flags[0] corresponds to 3
                              flags[1] corresponds to 5
                              flags[n] corresponds to 2*n+3
                              flags[i] is TRUE if i is a prime */

void primes(void);              /* procedure prototype */
void parallelPrimes(void);      /* procedure prototype */

int main(int argc, char *argv[])
{
    clock_t t;
t = clock();
    int i, opt;
    /* MENU DISPLAY and flag operations here */

    if (numProcs == 1) 
      primes();   /* Call  primes routine */
    else
      parallelPrimes(); /* call multi-threaded/parallelized

    /* print out all of the primes found */
    if (out != NULL) {
      int i;
      fprintf(out, "2\n");
      for (i = 0; i < size; ++i)
    if (flags[i])
      fprintf(out, "%d\n", PRIME(i));
    }

    free(flags);
    printf(" Number of primes = %d, largest prime = %d\n", count, lastPrime);
    t = clock() - t;
printf ("It took me %d clicks (%f seconds).\n",t,((float)t)/CLOCKS_PER_SEC);
}

void primes()
{
    int i,iter, prime, div1, div2, rem;
    for (iter=0; iter < NUMITERS; ++iter)      
        {
            count = 0;
            lastPrime = 0;

            for (i=0; i < size; ++i) {    /* For every odd number */
            prime = PRIME(i);              
                div1=1;
                do {                            
                    div1 += 2;            /* Divide by 3, 5, 7, ... */
                    div2 = prime / div1;  /* Find the dividend */
                    rem = prime % div1;   /* Find remainder */
                } while (rem != 0 && div1 <= div2); 

                if (rem != 0 || div1 == prime) {
                    /* prime is really a prime */
                    flags[i] = TRUE;
                    count++;                   
                    lastPrime = prime;
                } else {
                    /* prime is not a prime */
                    flags[i] = FALSE;         
                }
            }
        }
}

到目前为止,我已尝试在以下函数中使用#include<omp.h>

void parallelPrimes()
{

    int i;
    int iter, prime;
    int div1, div2, rem;

    for (iter=0; iter < NUMITERS; ++iter)      
        /*  
           Don't parallelize this loop??? */
        {
            count = 0;
            lastPrime = 0;
            omp_set_num_threads(numProcs);
            #pragma omp parallel
            int ID = omp_get_thread_num();
                    #pragma omp for
                    for (i=0; i < size; ++i) {    /* For every odd number */
                            prime = PRIME(i);              
                /* Keep searching for divisor until rem == 0 (i.e. non prime),
                   or we've reached the sqrt of prime (when div1 > div2) */

                            div1=1;
                            do {                            
                               div1 += 2;            /* Divide by 3, 5, 7, ... */
                               div2 = prime / div1;  /* Find the dividend */
                               rem = prime % div1;   /* Find remainder */
                                } while (rem != 0 && div1 <= div2); 

                           if (rem != 0 || div1 == prime) {
                           /* prime is really a prime */

                               flags[i] = TRUE;
                               count++;                   
                               lastPrime = prime;
                           } else {
                                /* prime is not a prime */
                               flags[i] = FALSE;         
                                  }
                           }
               }
    }

但它实际上比非并行化函数primes()慢,我是否在这个函数中错误地实现了并行性?

1 个答案:

答案 0 :(得分:2)

[编辑]我建议查看TBB的例子&#39; primes&#39;。但我承认,将它移植到OpenMP并不是一件容易的事。

它使用parallel_reduce(用于计算素数)与自定义SieveRange,但它仍然可以转换为omp parallel,因为范围本质上是:

    NumberType middle = r.my_begin + (r.my_end-r.my_begin+r.my_stride-1)/2;
    middle = middle/my_stride*my_stride;

即。为通常的parallel_for方法增加了一个步伐。

另一个障碍是Sieve仿函数持有Multiplies类的私有副本,应该重新编写以匹配openmp方法。

E.g。 ParallelCountPrimes()可以使用这个并行结构重新编写:

    #pragma omp parallel
    {
        Multiples multiples(n); // per-thread copy, needs reworking to remain 'uninitialized'
        NumberType m = multiples.m;
        #pragma omp for
        for(NumberType i = multiples.m; i < n; i += multiples.m*CHUNKSIZE) {
            if( !multiples.is_initialized() )
                multiples.initialize( i ); // initialize by index of the first iteration
            NumberType window_size = m;
            NumberType end = i+multiples.m*CHUNKSIZE;
            if(end>n) end = n;
            for( NumberType j=i; j<end; j+=window_size ) { 
                if( j+window_size>end ) 
                    window_size = end-j;
                multiples.find_primes_in_window( j, window_size );
            }
        }                
    }