Question

我正在使用BSP在C中实现Erastothenes Sieve的并行算法。

我的代码编译并执行但不打印素数。当我这样做./bspsieve 2 1000例如我得到的唯一输出是 “它花了0.000371秒为2的proc 0”。虽然它应该打印所有找到的素数！奇怪的是，算法看起来确实有效。如果我在上面的例子中使用更大的上限，则需要更长的时间。当我分配更多处理器时，花费的时间更少。所以在某个地方可能是一个愚蠢的错误，但我在C上遇到了严重的问题而且我正在远程计算机上工作，所以我不确定那些工具......

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mcbsp.h>


/*
Note: To compile, this file has to be in the same folder as mcbsp.h and you need the 2 following commands:
gcc -Iinclude/ -pthread -c -o bspsieve.o bspsieve.c
gcc -o bspsieve bspsieve.o lib/libmcbsp1.1.0.a -lpthread -lrt

*/

  int procs;
  int upperbound;
  int *primes;

//SPMD function
void bspSieve(){
    bsp_begin(procs);

    int p = bsp_nprocs(); // p = number of procs obtained 
    int s = bsp_pid();  // s = proc number

    float blocksize;    // block size to be used, note last proc has a different size!
    if( s != p-1){
        blocksize = ceil(upperbound/p); 
    } else {
        blocksize = upperbound - (p-1)*ceil(upperbound/p);
    }

    // Initialize start time and end time, set start time to now.   
    double start_time,end_time;
    start_time = bsp_time();

    // Create vector that has block of candidates
    int *blockvector;
    blockvector = (int *)malloc(blocksize*sizeof(int));
    int q;
    for(q = 0; q<blocksize; q++){
    //List contains the integers from s*blocksize till blocksize + s*blocksize
      blockvector[q] = q + s*blocksize;
    } 

    //We neglect the first 2 'primes' in processor 0.
     if(s == 0){
        blockvector[0] = 0;
        blockvector[1] = 0;
     }

    // We are using the block distribution. We assume that n is large enough  to
    // assure that n/p is larger than sqrt(n). This means that we will always find the
    // sieving prime in the first block, and so have to broadcast from the first 
    // processor to the others.
     long sieving_prime;
     int i;
     bsp_push_reg( &sieving_prime,sizeof(long) );
     bsp_sync();

     for(i = 2; i * i < upperbound; i++) {
       //Part 1: if first processor, get the newest sieving prime, broadcast. Search for newest prime starting from i.
        if(s == 0){
       int findPrimeNb;
       for(findPrimeNb = i; findPrimeNb < blocksize; findPrimeNb++) {
          if( blockvector[findPrimeNb] != 0) {
                 sieving_prime  = blockvector[findPrimeNb];
                 //broadcast
         int procNb;
         for(procNb = 0; procNb < p; ++procNb){
                     bsp_put(procNb, &sieving_prime,&sieving_prime,0,sizeof(long));
        }
             break;
          }
           }
    }
        bsp_sync();

    //Part 2: Sieve using the sieving prime
    int sievingNb;
    for(sievingNb = 0; sievingNb < blocksize; sievingNb++){
       //check if element is multiple of sieving prime, if so, pcross out (put to zero)
       if( blockvector[sievingNb] % sieving_prime == 0){
          blockvector[sievingNb] = 0;
           }
    }

     }

     //part 3: get local primes to central area
     int transferNb;
     long transferPrime;
     for(transferNb = 0; transferNb < blocksize; transferNb++){
        transferPrime = blockvector[transferNb];
        primes[transferPrime] = transferPrime;
     }

     // take the end time.
     end_time = bsp_time();

   //Print amount of taken time, only processor 0 has to do this.
   if( s == 0 ){
      printf("It took : %.6lf seconds for proc %d out of %d. \n", end_time-start_time, bsp_pid(), bsp_nprocs());
      fflush(stdout);
   }
   bsp_pop_reg(&sieving_prime);
   bsp_end();
}



int main(int argc, char **argv){

    if(argc != 3) {
    printf( "Usage: %s <proc count> <upper bound> <n", argv[ 0 ] );
    exit(1);
    }
    //retrieve parameters
    procs = atoi( argv[ 1 ] );
    upperbound = atoi( argv[ 2 ] );

    primes = (int *)malloc(upperbound*sizeof(int));

    // init and call parallel part
    bsp_init(bspSieve, argc, argv); 
    bspSieve();


   //Print all non zeros of candidates, these are the primes.
   // Primes only go to p*p <= n
   int i;
   for(i = 0; i*i <= upperbound; i++) {
        if(primes[i] > 0) {
        printf("%d, ",primes[i]);
    }
    }
    return 0;
}

Answer 1

printf不会自动包含尾随换行符，并且通常不会刷新输出缓冲区，直到它输出换行符;所以可能你只需要添加一个

printf("\n");

在您的计划结束时，就在return 0;。

之前

或者，或者另外，如果您想要看到输出（如果BSP允许），您可以添加

fflush(stdout);

在printf("%d, ",primes[i]);之后，显式刷新输出缓冲区。

BSP实施Sieve Of Eratosthenes不打印C

1 个答案: