Question

编辑编辑：在第二条评论中使用该程序后，这是我的问题，原帖在下面。

    > ./bspsieve 8 10                           processors to use: 8
upper bound: 10
It took : 0.000076 seconds for proc 0 out of 8.
*** glibc detected *** ./bspsieve: munmap_chunk(): invalid pointer: 0x000000000060d040 ***
======= Backtrace: =========
/lib64/libc.so.6(+0x75218)[0x7fbacd7c2218]
./bspsieve[0x4015f7]
./bspsieve[0x401728]
/lib64/libc.so.6(__libc_start_main+0xe6)[0x7fbacd76bbc6]
./bspsieve[0x401119]
======= Memory map: ========
00400000-0040b000 r-xp 00000000 00:1a 5278668                            /vsc-mounts/leuven-user/gst/guest043/ParCoSieve/MulticoreBSP-for-C/bspsieve
0060b000-0060c000 r--p 0000b000 00:1a 5278668                            /vsc-mounts/leuven-user/gst/guest043/ParCoSieve/MulticoreBSP-for-C/bspsieve
0060c000-0060d000 rw-p 0000c000 00:1a 5278668                            /vsc-mounts/leuven-user/gst/guest043/ParCoSieve/MulticoreBSP-for-C/bspsieve
0060d000-0062e000 rw-p 00000000 00:00 0                                  [heap]
7fba48000000-7fba48021000 rw-p 00000000 00:00 0
7fba48021000-7fba4c000000 ---p 00000000 00:00 0
7fba4d26e000-7fba4d284000 r-xp 00000000 08:06 7531397                    /lib64/libgcc_s.so.1
7fba4d284000-7fba4d483000 ---p 00016000 08:06 7531397                    /lib64/libgcc_s.so.1
7fba4d483000-7fba4d484000 r--p 00015000 08:06 7531397                    /lib64/libgcc_s.so.1
7fba4d484000-7fba4d485000 rw-p 00016000 08:06 7531397                    /lib64/libgcc_s.so.1
7fbacd74d000-7fbacd8a2000 r-xp 00000000 08:06 7531274                    /lib64/libc-2.11.1.so
7fbacd8a2000-7fbacdaa1000 ---p 00155000 08:06 7531274                    /lib64/libc-2.11.1.so
7fbacdaa1000-7fbacdaa5000 r--p 00154000 08:06 7531274                    /lib64/libc-2.11.1.so
7fbacdaa5000-7fbacdaa6000 rw-p 00158000 08:06 7531274                    /lib64/libc-2.11.1.so
7fbacdaa6000-7fbacdaab000 rw-p 00000000 00:00 0
7fbacdaab000-7fbacdb00000 r-xp 00000000 08:06 7531290                    /lib64/libm-2.11.1.so
7fbacdb00000-7fbacdcff000 ---p 00055000 08:06 7531290                    /lib64/libm-2.11.1.so
7fbacdcff000-7fbacdd00000 r--p 00054000 08:06 7531290                    /lib64/libm-2.11.1.so
7fbacdd00000-7fbacdd01000 rw-p 00055000 08:06 7531290                    /lib64/libm-2.11.1.so
7fbacdd01000-7fbacdd09000 r-xp 00000000 08:06 7531329                    /lib64/librt-2.11.1.so
7fbacdd09000-7fbacdf08000 ---p 00008000 08:06 7531329                    /lib64/librt-2.11.1.so
7fbacdf08000-7fbacdf09000 r--p 00007000 08:06 7531329                    /lib64/librt-2.11.1.so
7fbacdf09000-7fbacdf0a000 rw-p 00008000 08:06 7531329                    /lib64/librt-2.11.1.so
7fbacdf0a000-7fbacdf21000 r-xp 00000000 08:06 7531435                    /lib64/libpthread-2.11.1.so
7fbacdf21000-7fbace121000 ---p 00017000 08:06 7531435                    /lib64/libpthread-2.11.1.so
7fbace121000-7fbace122000 r--p 00017000 08:06 7531435                    /lib64/libpthread-2.11.1.so
7fbace122000-7fbace123000 rw-p 00018000 08:06 7531435                    /lib64/libpthread-2.11.1.so
7fbace123000-7fbace127000 rw-p 00000000 00:00 0
7fbace127000-7fbace146000 r-xp 00000000 08:06 17398545                   /lib64/ld-2.11.1.so
7fbace30b000-7fbace30f000 rw-p 00000000 00:00 0
7fbace343000-7fbace345000 rw-p 00000000 00:00 0
7fbace345000-7fbace346000 r--p 0001e000 08:06 17398545                   /lib64/ld-2.11.1.so
7fbace346000-7fbace347000 rw-p 0001f000 08:06 17398545                   /lib64/ld-2.11.1.so
7fbace347000-7fbace348000 rw-p 00000000 00:00 0
7fff10a45000-7fff10a5a000 rw-p 00000000 00:00 0                          [stack]
7fff10ae3000-7fff10ae4000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
Aborted (core dumped)

================= 原帖

我正在尝试使用BSP（http://en.wikipedia.org/wiki/Bulk_synchronous_parallel）库在C中实现一个简单的并行版Erastothenes。

我对C和BSP都缺乏经验。到目前为止，我有两个问题： 1）编译完成后（由指令完成）并尝试使用./bspsieve 200运行程序，我得到“Segmentation fault（core dumped）”

2）我做错了其他任何事情？我不是在寻找一个好的算法，只是一个能产生预期结果的算法。

    #include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mcbsp.h>


/*
Note: To compile, this file has to be in the same folder as mcbsp.h and you need the 2 following commands:
gcc -Iinclude/ -pthread -c -o bspsieve.o bspsieve.c
gcc -o bspsieve bspsieve.o lib/libmcbsp1.1.0.a -lpthread -lrt

*/

  int procs;
  float upperbound;
  int *primes;

//SPMD function
void bspSieve(){
    bsp_begin(procs);

    float p = bsp_nprocs(); // p = number of procs obtained 
    int s = bsp_pid();  // s = proc number

    float blocksize;    // block size to be used, note last proc has a different size!
    if( s != p){
        blocksize = ceil(upperbound/p); 
    } else {
        blocksize = upperbound - (p-1)*ceil(upperbound/p);
    }

    // Initialize start time and end time, set start time to now.   
    double start_time,end_time;
    start_time = bsp_time();

    // Create vector that has block of candidates
    int *blockvector;
    blockvector = (int *)malloc(blocksize*sizeof(int));
    int q;
    for(q = 0; q<blocksize; q++){
    //List contains the integers from s*blocksize till blocksize + s*blocksize
      blockvector[q] = q + s*blocksize;
    } 

    //We neglect the first 2 'primes' in processor 0.
     if(s == 0){
        blockvector[0] = 0;
        blockvector[1] = 0;
     }

    // We are using the block distribution. We assume that n is large enough  to
    // assure that n/p is larger than sqrt(n). This means that we will always find the
    // sieving prime in the first block, and so have to broadcast from the first 
    // processor to the others.
     long sieving_prime;
     int i;
     bsp_push_reg( &sieving_prime,sizeof(long) );
     for(i = 2; i * i < upperbound; i++) {
       //Part 1: if first processor, get the newest sieving prime, broadcast. Search for newest prime starting from i.
        if(s == 0){
       int findPrimeNb;
       for(findPrimeNb = i; findPrimeNb <= blocksize; findPrimeNb++) {
          if( blockvector[findPrimeNb] != 0) {
                 sieving_prime  = blockvector[findPrimeNb];
                 //broadcast
         int procNb;
         for(procNb = 0; procNb < p; ++procNb){
                     bsp_put(procNb, &sieving_prime,&sieving_prime,0,sizeof(long));
        }
             break;
          }
           }
    }
        bsp_sync();

    //Part 2: Sieve using the sieving prime
    int sievingNb;
    for(sievingNb = 0; sievingNb < blocksize; sievingNb++){
       //check if element is multiple of sieving prime, if so, pcross out (put to zero)
       if( blockvector[sievingNb] % sieving_prime == 0){
          blockvector[sievingNb] = 0;
           }
    }

     }

     //part 3: get local primes to central area
     int transferNb;
     long transferPrime;
     for(transferNb = 0; transferNb < blocksize; transferNb++){
        transferPrime = blockvector[transferNb];
        primes[transferPrime] = transferPrime;
     }

     // take the end time.
     end_time = bsp_time();

   //Print amount of taken time, only processor 0 has to do this.
   if( s == 0 ){
      printf("It took : %.6lf seconds for proc %d out of %d. \n", end_time-start_time, bsp_pid(), bsp_nprocs());
      fflush(stdout);
   }

   bsp_end();
}



int main(int argc, char **argv){

    primes = (int *)malloc(upperbound*sizeof(int));

    if(argc != 2) {
    printf( "processors to use: %s ", argv[ 0 ] );
    printf( "upper bound: %s ", argv[ 1 ] );
    }
    //retrieve parameters
    procs = atoi( argv[ 1 ] );
    upperbound = atoi( argv[ 2 ] );

    // init and call parallel part
    bsp_init(bspSieve, argc, argv); 
    bspSieve();


   //Print all non zeros of candidates, these are the primes.
   // Primes only go to p*p <= n
   int i;
   for(i = 0; i*i <= upperbound; i++) {
        if(primes[i] > 0) {
        printf("%d, ",primes[i]);
    }
    }
    return 0;
}

编辑：我已经完成了以下操作来修复数学家1975注意到的错误，但是我仍然遇到了分段错误。

    int main(int argc, char **argv){

    if(argc != 2) {
    printf( "processors to use: %s ", argv[ 0 ] );
    printf( "upper bound: %s ", argv[ 1 ] );
    }
    //retrieve parameters
    procs = atoi( argv[ 1 ] );
    upperbound = atoi( argv[ 2 ] );

    primes = (int *)malloc(upperbound*sizeof(int));
    // init and call parallel part
    bsp_init(bspSieve, argc, argv); 
    bspSieve();

   //Print all non zeros of candidates, these are the primes.
   // Primes only go to p*p <= n
   int i;
   for(i = 0; i*i <= upperbound; i++) {
        if(primes[i] > 0) {
        printf("%d, ",primes[i]);
    }
    }
    return 0;
}

Answer 1

主要

中的循环出现问题

primes = (int *)malloc(upperbound*sizeof(int)); <-- upperbound determines size of                            
                                                    memory allocated


upperbound = atoi( argv[ 2 ] );  <-- changing array limit

// init and call parallel part
bsp_init(bspSieve, argc, argv); 
bspSieve();



int i;
for(i = 0; i*i <= upperbound; i++) {   <-- Array limit not necessarily true

因为您更改了数组大小限制，但在循环中使用它来确定数组边界。这取决于upperbound对您的更改可能允许访问您尚未分配的内存，这可能会给您一个段错误。

Answer 2

以下是您的代码的更正版本。我使用的调试工具是valgrind。安装它，使用-g编译bspsieve，并使用valgrind 8 30 ./bspsieve，您将被告知有关任何内存错误的信息。

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mcbsp.h>

/*
Compile using
gcc -Wall -g -o bspsieve bspsieve.c lib/libmcbsp1.1.0.a -pthread -lrt -lm -Iinclude
*/

int procs;
float upperbound;
int *primes;

//////////////// //////////////// BSPSIEVE

//SPMD function
void
bspSieve ()
{
    bsp_begin (procs);

    float p = bsp_nprocs ();    // p = number of procs obtained 
    int s = bsp_pid (); // s = proc number

    float blocksize;    // block size to be used, note last proc has a different size!
    if (s != p-1) {
        blocksize = ceil (upperbound / p);
    }
    else {
        blocksize = upperbound - (p - 1) * ceil (upperbound / p);
    }

    // Initialize start time and end time, set start time to now.   
    double start_time, end_time;
    start_time = bsp_time ();

    // Create vector that has block of candidates
    int *blockvector;
    blockvector = (int *) malloc (blocksize * sizeof (int));
    int q;
    for (q = 0; q < blocksize; q++) {
        //List contains the integers from s*blocksize till blocksize + s*blocksize
        blockvector[q] = q + s * blocksize;
    }

    //We neglect the first 2 'primes' in processor 0.
    if (s == 0) {
        blockvector[0] = 0;
        blockvector[1] = 0;
    }

    // We are using the block distribution. We assume that n is large enough  to
    // assure that n/p is larger than sqrt(n). This means that we will always find the
    // sieving prime in the first block, and so have to broadcast from the first 
    // processor to the others.
    long sieving_prime;
    int i;
    bsp_push_reg (&sieving_prime, sizeof (long));
    bsp_sync ();
    for (i = 2; i * i < upperbound; i++) {
        //Part 1: if first processor, get the newest sieving prime, broadcast. Search for newest prime starting from i.
        if (s == 0) {
            int findPrimeNb;
            for (findPrimeNb = i; findPrimeNb <= blocksize; findPrimeNb++) {
                if (blockvector[findPrimeNb] != 0) {
                    sieving_prime = blockvector[findPrimeNb];
                    //broadcast
                    int procNb;
                    for (procNb = 0; procNb < p; ++procNb) {
                        bsp_put (procNb, &sieving_prime, &sieving_prime, 0,
                               sizeof (long));
                    }
                    break;
                }
            }
        }
        bsp_sync ();

        //Part 2: Sieve using the sieving prime
        int sievingNb;
        for (sievingNb = 0; sievingNb < blocksize; sievingNb++) {
            //check if element is multiple of sieving prime, if so, pcross out (put to zero)
                    if (blockvector[sievingNb] != sieving_prime)
            if (blockvector[sievingNb] % sieving_prime == 0) {
                blockvector[sievingNb] = 0;
            }
        }

    }

    //part 3: get local primes to central area
    int transferNb;
    long transferPrime;
    for (transferNb = 0; transferNb < blocksize; transferNb++) {
        transferPrime = blockvector[transferNb];
        primes[transferPrime] = transferPrime;
    }

    // take the end time.
    end_time = bsp_time ();

    //Print amount of taken time, only processor 0 has to do this.
    if (s == 0) {
        printf ("It took : %.6lf seconds for proc %d out of %d. \n",
            end_time - start_time, bsp_pid (), bsp_nprocs ());
        fflush (stdout);
    }

    bsp_end ();
}

//////////////// //////////////// //////////////// //////////////// MAIN

int
main (int argc, char **argv)
{

    if (argc != 3) {
            printf("Usage: %s <processor count> <upper bound>\n", argv[0]);
            exit(1);
    }

    printf ("processors to use: %s \n", argv[1]);
    printf ("upper bound: %s \n", argv[2]);     //retrieve parameters

    procs = atoi (argv[1]);
    upperbound = atoi (argv[2]);

    primes = (int *) malloc (upperbound * sizeof (int));
    // init and call parallel part
    bsp_init (bspSieve, argc, argv);
    bspSieve ();

    //Print all non zeros of candidates, these are the primes.
    // DO Primes only go to p*p <= n? The rest of the numbers seem to be prime
    int i;
    for (i = 0; i < upperbound; i++) {
        if (primes[i] > 0) {
            printf ("%d, ", primes[i]);
        }
    }
    printf("\n");
    return 0;
}

Brat Sieve of Eratosthenes C实现给出了分段故障（核心转储）

2 个答案: