Question

我在学校有一项任务，在2分钟内计算最多10 ^ 18的素数，并且使用不超过2 GB的内存。我第一次尝试使用以下优化实现了分段筛：

使用32位整数将素数存储为分段的数据
没有存储奇数
将筛子分成尺寸为sqrt（n）的部分
在标记复合材料时对其进行计数，这样我就不必再次循环筛网了
使用动态内存分配来存储第一个素数到sqrt（n）（在这种情况下，我在C中创建了一个存储素数的队列）

问题是，为了计算我的计算机上的素数高达10 ^ 9（具有相当不错的规格）需要13秒，因此10 ^ 18会花费我几天。

我的问题是，是否有一些我缺少的优化，或者是否有更好更快的方法将素数计算到数字？代码：

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>

typedef   signed          char  int8_t;
typedef   signed     short int int16_t;
typedef   signed           int int32_t;
typedef   signed long long int int64_t;

typedef unsigned          char  uint8_t;
typedef unsigned     short int uint16_t;
typedef unsigned           int uint32_t;
typedef unsigned long long int uint64_t;

#define  SIZE 32 
#define DEBUG

#define  KRED "\x1B[31m"
#define  KGRN "\x1B[32m"
#define  KYEL "\x1B[33m"
#define  KBLU "\x1B[34m"
#define  KMAG "\x1B[35m"
#define  KCYN "\x1B[36m"
#define  KWHT "\x1B[37m"
#define RESET "\033[0m"

struct node {
    uint64_t     data;
    struct node* next;
};

struct queue {
    struct node* first;
    struct node* last;
    uint32_t     size; 
};

typedef struct node  Node;
typedef struct queue Queue;

/* Queue model */
uint8_t enqueue(Queue* queue, int64_t value) {
    Node* node = (Node*)malloc(sizeof(Node*));

    if (node == NULL)
        return 0;

    node->data = value;
    if (queue->last)
        queue->last->next = node;

    queue->last = node;
    if (queue->first == NULL)
        queue->first = queue->last;

    queue->size++;
    return 1;
}

uint64_t dequeue(Queue* queue) {
    Node*         node = queue->first;
    uint64_t save_data = node->data;

    if (queue->size == 0)
        return 0;

    queue->first = queue->first->next;
    queue->size--;
    free(node);

    return save_data;
}

Node* queue_peek(Queue* queue) {
    return queue->first;
}

uint32_t queue_size(Queue* queue) {
    return queue->size;
}

Queue* init_queue() {
    Queue* queue = (Queue*)malloc(sizeof(Queue*));

    queue->first = queue->last = NULL;
    queue->size  = 0;

    return queue;
}

/* Working with bit arrays functions */
uint8_t count_set_bits(uint64_t nbr) {
    uint8_t count = 0;

    while (nbr) {
        count++;
        nbr &= (nbr - 1);
    }

    return count;
}

uint8_t get_bit(uint32_t array[], uint32_t position) {
    const uint64_t mask = 1U << (position % SIZE);

    return array[position / SIZE] & mask ? 1 : 0;
}

void clear_bit(uint32_t array[], uint32_t position) {
    const uint64_t mask = ~(1U << (position % SIZE));

    array[position / SIZE] &= mask;
}

void set_bit(uint32_t array[], uint32_t position) {
    array[position / SIZE] |= (1U << (position % SIZE));
}

/* Solve the problem */
Queue* initial_sieve(uint64_t limit) {
    Queue*   queue   = init_queue();
    uint64_t _sqrt   = (uint64_t)sqrt(limit);
    uint32_t *primes = (uint32_t*)calloc(_sqrt / SIZE + 1, sizeof(uint32_t));

    set_bit(primes, 0);
    // working with reversed logic, otherwise primes should all me initialized to max uiint64_t

    enqueue(queue, 2);
    for (uint64_t number = 3; number <= _sqrt; number += 2) {
        if (get_bit(primes, number / 2) == 0) {
            enqueue(queue, number);

            for (uint64_t position = number * number; position <= _sqrt; position += (number * 2)) {    
                set_bit(primes, position / 2);
            }
        }
        else
            set_bit(primes, number / 2);
    }

    return queue;
}

uint64_t count_primes(uint64_t limit) {
    uint64_t start, end, delta;
    uint64_t non_primes_counter, initial_size;
    uint32_t *current_sieve;
    Queue* queue;

    queue = initial_sieve(limit);
    initial_size = queue->size;
    start = delta = (uint64_t)sqrt(limit);
    end   = 2 * start;
    non_primes_counter = 0;

    printf("Limits: %llu -> %llu\n", start, end);
    while (start < limit) {
        Node*    prime = queue->first->next; // pass 2 since only odd maps are represented in the sieve
        uint64_t count = 0;

        current_sieve = (uint32_t*)calloc(delta / SIZE + 1, sizeof(uint32_t));
        // memset(current_sieve, 0, sizeof(uint32_t) * delta);

        while (prime != NULL) {
            uint64_t first_composite = start / prime->data * prime->data;

            // calculate the first multiple of the given prime in the interval
            if (first_composite < start)
                first_composite += prime->data;
            if ((first_composite & 1) == 0)
                first_composite += prime->data;

            // set all the composites of the current prime in the given interval
            for (uint64_t number = first_composite; number <= end; number += (prime->data) * 2) {
                const uint64_t position = (number - start) / 2;

                if (get_bit(current_sieve, position) == 0) {
                    set_bit(current_sieve, position);
                    count++;
                }
            }

            // free(current_sieve);
            prime = prime->next;
        }

        non_primes_counter += count;
        start += delta;
        end   += delta;

        if (end > limit)
            end = limit;
    }

    uint64_t total = (limit - delta + 1) / 2 - non_primes_counter;

    printf("%sTotal composites and initial size: %llu %llu %s\n", KCYN, non_primes_counter, initial_size, RESET);
    printf("%sTotal primes: %llu %s\n", KCYN, total, RESET);
    return queue->size + (limit  - delta + 1) / 2 - non_primes_counter;
}

/* Main */
int main(int argc, char **argv) {
    clock_t begin, end;
    double  time;

    if (argc < 2) {
        printf("Invalid number of parameters\n");
        printf("Program will exit now.\n");
        return 0;
    }

    begin = clock();
    printf("%sNumber of primes found up to %s%s: %s%llu.\n%s", KWHT, KCYN, argv[1], KYEL, count_primes(atoll(argv[1])), RESET);
    end     = clock();
    time    = (double)(end - begin) / CLOCKS_PER_SEC;

    printf("%sTotal time elapsed since the starting of the program: %s%lf seconds.\n%s", KWHT, KYEL, time, RESET);  
    return 0;
}

谢谢，马库斯

Answer 1

你需要计算素数的数量，而不是全部找到它们（它们太多了）。这称为Prime-counting function。

在数学中，计数功能是计算小于或等于某个实数x的素数的函数。它用π（x）表示。

有很多方法可以计算出这个功能。使用方法比较查看此Wolfram page。在两分钟内完成这项工作似乎是一项艰巨的任务。

正如评论中提到的，还有一个很棒的answer at math.stackexchange，我认为这会有所帮助。

计算素数最多18位数优化

1 个答案: