多线程分割故障

时间:2015-10-04 21:00:25

标签: c

我试图通过计算出数字的倍数来计算素数。这样做足够了,你只剩下素数。左 例如:https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes

问题是这只适用于素数到10.000。当尝试计算更多将导致分段错误(核心转储)。 然而,在线程数量和您尝试计算的素数之间存在一些折衷。例如:NROF_SIEVE = 4000和NROF_THREADS = 10.有时候工作。但是:NROF_SIEVE = 4000和NROF_THREADS = 20.不起作用并导致分段错误。

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>     // for usleep()
#include <time.h>       // for time()
#include <pthread.h>
#include <errno.h>

#include "prime.h"

typedef unsigned long long  MY_TYPE;
static pthread_mutex_t      mutex          = PTHREAD_MUTEX_INITIALIZER;

// create a bitmask where bit at position n is set
#define BITMASK(n)          (((MY_TYPE) 1) << (n))

// check if bit n in v is set
#define BIT_IS_SET(v,n)     (((v) & BITMASK(n)) == BITMASK(n))

// set bit n in v
#define BIT_SET(v,n)        ((v) =  (v) |  BITMASK(n))

// clear bit n in v
#define BIT_CLEAR(v,n)      ((v) =  (v) & ~BITMASK(n))

static void rsleep (int t);
static void * my_thread (void * arg);
static unsigned long long get_next_parameter (unsigned long long current);

int main (void)
{
    // TODO: start threads generate all primes between 2 and NROF_SIEVE and output the results
    // (see thread_malloc_free_test() and thread_mutex_test() how to use threads and mutexes,
    //  see bit_test() how to manipulate bits in a large integer)

    unsigned long long i = 0;
    for (i = 0; i <= NROF_SIEVE/64; i++) {
        buffer[i] = ~0;
    }
    BIT_CLEAR(buffer[0], 0);
    BIT_CLEAR(buffer[0], 1);

    unsigned long long *       parameter;
    pthread_t   thread_id[NROF_THREADS];
    unsigned long long current = 2;

    // start the first NROF_THREADS
    for (i = 0; i < NROF_THREADS; i++) {
        parameter = malloc(sizeof(MY_TYPE));
        * parameter = current;
        pthread_create (&thread_id[i], NULL, my_thread, parameter);
        current++;
        rsleep(5);
    }

    /*
    while (1) {
        if (current >= NROF_SIEVE) {
            //exit the loop since we are done
            break;
        } else if (BIT_IS_SET(buffer[current/64], current % 64)){
            // eliminate these multiples
            * parameter = current;
            // start the thread
            pthread_create (&thread_id, NULL, my_thread, parameter);
            rsleep (3);
            // wait for the thread, and we are interested in the return value
            pthread_join (thread_id, NULL);
            current++;
        } else {
            // not yet a one found so increase
            current++;
        }
    }
    */ 

    unsigned long long o = 0;
    while (1) {
        pthread_join (thread_id[o], NULL);
        pthread_mutex_lock (&mutex);
        parameter = malloc(sizeof(MY_TYPE));
        * parameter = get_next_parameter (current);
        if (* parameter == 0) { //no more elements to process
            break;
        }
        current++;
        pthread_create (&thread_id[o], NULL, my_thread, parameter);
        pthread_mutex_unlock (&mutex);
        rsleep(5);
        o = (o + 1) % NROF_THREADS;
    }
    pthread_mutex_unlock (&mutex);

    // join with the last threads
    for (i = 0; i < NROF_THREADS; i++) {
        pthread_join (thread_id[i], NULL);
    }

    // print the prime numbers
    for (i = 2; i <= NROF_SIEVE; i++) {
        if (BIT_IS_SET(buffer[i/64], i % 64)) {
            fprintf(stdout, "%lld\n", i);
        }
    }
    return (0);
}

/*
* Thread method to strike out all the multiples of the arg
* parameter. Put the results in the buffer[]. Has to be with a `mutex protecting in from other threads enter this critical section`
*/
static void * my_thread (void * arg) {
    unsigned long long * argi;
    argi = (unsigned long long *) arg;   // cast it to a int pointer
    unsigned long long local_current = * argi;
    unsigned long long helper = local_current;
    free(arg);
    while (local_current <= NROF_SIEVE) {
        local_current = local_current + helper;
        pthread_mutex_lock (&mutex);
        BIT_CLEAR(buffer[local_current/64], local_current % 64);
        pthread_mutex_unlock (&mutex);
    }
    return NULL;
}

unsigned long long get_next_parameter (unsigned long long current) {
    while (current <= NROF_SIEVE) {
        if (BIT_IS_SET(buffer[current/64], current % 64)) {
            return current;
        } else {
            current++;
        }
    }
    return 0;
}

/*
 * rsleep(int t)
 *
 * The calling thread will be suspended for a random amount of time between 0 and t microseconds
 * At the first call, the random generator is seeded with the current time
 */
static void rsleep (int t)
{
    static bool first_call = true;

    if (first_call == true)
    {
        srandom (time (NULL) % getpid());
        first_call = false;
    }
    usleep (random () % t);
}

标题文件:

/**
 * NROF_SIEVE: size of the sieve
 * (value must be between 1 and 15485864)
 */
#define NROF_SIEVE         40

/**
 * NROF_THREADS: number of threads that will run in parallel
 */
#define NROF_THREADS        50

/**
 * buffer[]: datastructure of the sieve; each number is represented by one bit
 */
static unsigned long long   buffer [(NROF_SIEVE/64) + 1];

1 个答案:

答案 0 :(得分:3)

分段错误通常是由无效的内存访问引起的。在足够现代版本的Clang或GCC上使用和/或重新编译您的程序。

这里有一个有意义的回溯中止了很多:

$ gcc -Wall -g prime.c -fsanitize={undefined,address} -pthread
$ ./a.out
prime.c:121:9: runtime error: index 1 out of bounds for type 'long long unsigned int [1]'
prime.c:121:9: runtime error: index 1 out of bounds for type 'long long unsigned int [1]'
prime.c:121:9: runtime error: load of address 0x0000006043e8 with insufficient space for an object of type 'long long unsigned int'
0x0000006043e8: note: pointer points here
 20 8a a2 ec  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00
              ^ 
=================================================================
==3351==ERROR: AddressSanitizer: global-buffer-overflow on address 0x0000006043e8 at pc 0x0000004018cb bp 0x7fc153ae0e30 sp 0x7fc153ae0e20
READ of size 8 at 0x0000006043e8 thread T31
    #0 0x4018ca in my_thread /tmp/so/prime.c:121
    #1 0x7fc1660064a3 in start_thread (/usr/lib/libpthread.so.0+0x74a3)
    #2 0x7fc165d4413c in clone (/usr/lib/libc.so.6+0xe913c)

0x0000006043e8 is located 0 bytes to the right of global variable 'buffer' defined in 'prime.h:15:29' (0x6043e0) of size 8
0x0000006043e8 is located 56 bytes to the left of global variable 'mutex' defined in 'prime.c:12:29' (0x604420) of size 40
SUMMARY: AddressSanitizer: global-buffer-overflow /tmp/so/prime.c:121 my_thread

第121行包含buffer[local_current/64]

static void * my_thread (void * arg) {
    unsigned long long * argi;
    argi = (unsigned long long *) arg;   // cast it to a int pointer
    unsigned long long local_current = * argi;
    unsigned long long helper = local_current;
    free(arg);
    while (local_current <= NROF_SIEVE) {
        local_current = local_current + helper;
        pthread_mutex_lock (&mutex);
        BIT_CLEAR(buffer[local_current/64], local_current % 64);
        pthread_mutex_unlock (&mutex);
    }
    return NULL;
}

这似乎表明您的local_current变得大于NROF_SIEVE,如果helper > 0确实如此。有点可疑......

使用调试器(gdb),您可以设置(条件)断点,检查变量等。一个例子:

$ gdb -q ./a.out 
Reading symbols from ./a.out...done.
(gdb) break prime.c:121 if local_current >= 64
Breakpoint 1 at 0x4017e6: file prime.c, line 121.
(gdb) run
Starting program: /tmp/so/a.out 
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/usr/lib/libthread_db.so.1".
[Switching to Thread 0x7fffe21de700 (LWP 4118)]

Breakpoint 1, my_thread (arg=0x60200000ebd0) at prime.c:121
121             BIT_CLEAR(buffer[local_current/64], local_current % 64);
(gdb) print local_current
$1 = 70
(gdb) step
__ubsan::__ubsan_handle_out_of_bounds (Data=0x602ea0, Index=1) at /build/gcc-multilib/src/gcc-5.2.0/libsanitizer/ubsan/ubsan_handlers.cc:227
227     /build/gcc-multilib/src/gcc-5.2.0/libsanitizer/ubsan/ubsan_handlers.cc: No such file or directory.
(gdb) next
prime.c:121:9: runtime error: index 1 out of bounds for type 'long long unsigned int [1]'
228     in /build/gcc-multilib/src/gcc-5.2.0/libsanitizer/ubsan/ubsan_handlers.cc
(gdb) info threads 
  Id   Target Id         Frame 
  40   Thread 0x7fffdf9d9700 (LWP 4128) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  39   Thread 0x7fffe01da700 (LWP 4127) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  38   Thread 0x7fffe09db700 (LWP 4125) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  37   Thread 0x7fffe11dc700 (LWP 4124) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  36   Thread 0x7fffe19dd700 (LWP 4123) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
* 35   Thread 0x7fffe21de700 (LWP 4118) "a.out" __ubsan::__ubsan_handle_out_of_bounds (Data=0x602ea0, Index=1)
    at /build/gcc-multilib/src/gcc-5.2.0/libsanitizer/ubsan/ubsan_handlers.cc:227
  34   Thread 0x7fffe29df700 (LWP 4117) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  33   Thread 0x7fffe31e0700 (LWP 4116) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  32   Thread 0x7fffe39e1700 (LWP 4115) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  9    Thread 0x7fffef1f8700 (LWP 4092) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  7    Thread 0x7ffff01fa700 (LWP 4089) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  6    Thread 0x7ffff09fb700 (LWP 4088) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  4    Thread 0x7ffff19fd700 (LWP 4086) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  1    Thread 0x7ffff7fac7c0 (LWP 4080) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
(gdb) continue
Continuing.
prime.c:121:9: runtime error: index 1 out of bounds for type 'long long unsigned int [1]'
prime.c:121:9: runtime error: load of address 0x0000006043e8 with insufficient space for an object of type 'long long unsigned int'
0x0000006043e8: note: pointer points here
 20 8a a2 a8  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00
              ^ 
=================================================================
==4080==ERROR: AddressSanitizer: global-buffer-overflow on address 0x0000006043e8 at pc 0x0000004018cb bp 0x7fffe21dde30 sp 0x7fffe21dde20

祝你的作业好运!