Question

我一直在尝试在C for Linux中实现MPSC循环缓冲区。这是缓冲结构：

typedef struct mpsc_buffer_s {
    sem_t semaphore;
    unsigned char cache_pad_1[CACHE_LINE - sizeof(sem_t)];
    uint64_t write_pos;
    unsigned char cache_pad_2[CACHE_LINE - sizeof(uint64_t)];
    size_t size;
    unsigned char cache_pad_3[CACHE_LINE - sizeof(size_t)];
    uint64_t read_pos;
    unsigned char cache_pad_4[CACHE_LINE - sizeof(uint64_t)];
    void **buffer;
} mpsc_buffer_t  __attribute__ ((__aligned__(CACHE_LINE)));

以下是相关功能：

mpsc_buffer_t* init_mpsc_buffer(size_t size) {
    mpsc_buffer_t *new_buffer;
    // allocation and init
    posix_memalign((void**) &new_buffer, CACHE_LINE, sizeof(mpsc_buffer_t));
    new_buffer->size = size;
    new_buffer->read_pos = 0;
    new_buffer->write_pos = 0;
    int rc = sem_init(&new_buffer->semaphore, 0, 0);
    ABORT_ON_ERR(rc, "Semaphore init failed");
    // allocating internal pointers buffer
    new_buffer->buffer = malloc(new_buffer->size * sizeof(void*));
    memset(new_buffer->buffer, 0, new_buffer->size * sizeof(void*));
    return new_buffer;
}
void add_to_buffer(mpsc_buffer_t *buffer, void *element) {
    // get next address to write into
    uint64_t write_pos = __sync_fetch_and_add(&buffer->write_pos, 1) %   buffer->size;
    //spin lock until the address is free
    while(!__sync_bool_compare_and_swap(&(buffer->buffer[write_pos]), NULL, element));
    // increment semaphore
    int rc = sem_post(&buffer->semaphore);
    ABORT_ON_ERR(rc, "Semaphore unlock failed");
}

void* get_from_buffer(mpsc_buffer_t *buffer) {
    int rc = sem_wait(&buffer->semaphore);
    ABORT_ON_ERR(rc, "Semaphore wait failed");

    uint64_t read_pos = buffer->read_pos % buffer->size;
    void *element = buffer->buffer[read_pos];
    if(!element) {
        error_print("cannot get NULL stuff - read_pos %u", read_pos);
    }
    buffer->buffer[read_pos] = NULL;
    buffer->read_pos++;
    return element;
}

我使用这种缓冲区来传递指针。很明显，我不发送NULL指针。

当我将生产者数量从2增加到3时，会出现一个野生错误：然后，消费者开始读取NULL值。由于我没有主动发送NULL指针，这意味着消费者线程获得正信号量，但随后从读取位置读取NULL值。

另一方面，缓冲区中的某些指针未被清除，从而导致潜在的死锁。

算法中是否存在逻辑错误，或者这些问题可能与我看不到的缓存机制有关？

Answer 1

在递增写入索引和分配条目指针之间存在竞争条件。

考虑生产者A递增写入索引但是用完时间片的情况。同时，生产者B再次递增写入索引，填充 next 条目 - 记住，A还没有填充其条目 - 并增加信号量。现在，如果消费者C在A之前被唤醒，它完全有理由相信A已经填充了它的条目，并抓住了它。因为尚未填充，所以它为NULL。

换句话说：

Producer A     Producer B     Consumer C

write_pos++
               write_pos++
               sets buffer[]
               sem_post()
                              sem_wait()
                              read_pos++
                              uses buffer[]
sets buffer[]
sem_post()
                              sem_wait()
                              read_pos++
                              uses buffer[]

生产者越多，看到上述情景的概率就越高。

解决方案很简单：添加一个write_pos2计数器，用于序列化编写器，以便它们以正确的顺序发布信号量。

考虑以下示例程序：

#define  _POSIX_C_SOURCE 200809L
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#include <pthread.h>
#include <semaphore.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>

typedef struct {
    sem_t              semaphore;
    uint64_t           size;
    volatile uint64_t  wrnext;  /* Next free write slot */
    volatile uint64_t  wrindex; /* Write index, second half */
    volatile uint64_t  rdindex; /* Read index */
    void              *entry[];
} cbuffer;

static cbuffer *cbuffer_destroy(cbuffer *const cbuf)
{
    if (cbuf) {
        cbuf->size = 0;
        cbuf->wrnext = 0;
        cbuf->wrindex = 0;
        cbuf->rdindex = 0;
        sem_destroy(&cbuf->semaphore);
        free(cbuf);
    }
    return NULL;
}

static cbuffer *cbuffer_create(const size_t size)
{
    cbuffer *cbuf;

    if (size < 2) {
        errno = EINVAL;
        return NULL;
    }

    cbuf = malloc(sizeof *cbuf + size * sizeof cbuf->entry[0]);
    if (!cbuf) {
        errno = ENOMEM;
        return NULL;
    }

    memset(cbuf->entry, 0, size * sizeof cbuf->entry[0]);

    sem_init(&cbuf->semaphore, 0, 0);
    cbuf->size = size;
    cbuf->wrnext = 0;
    cbuf->wrindex = 0;
    cbuf->rdindex = 0;

    return cbuf;
}

static void cbuffer_add(cbuffer *const cbuf, void *const entry)
{
    uint64_t wrnext;

    /* Get next nose value. */
    wrnext = __sync_fetch_and_add(&cbuf->wrnext, (uint64_t)1);

    /* Spin while buffer full. */
    while (!__sync_bool_compare_and_swap(&cbuf->entry[wrnext % cbuf->size], NULL, entry))
        ;

    /* Spin until we can update the head to match next. */
    while (!__sync_bool_compare_and_swap(&cbuf->wrindex, wrnext, wrnext + (uint64_t)1))
        ;

    /* TODO: check for -1 and errno == EOVERFLOW */
    sem_post(&cbuf->semaphore);
}

static void *cbuffer_get(cbuffer *const cbuf)
{
    uint64_t rdindex;

    /* Get the index of the oldest entry. */
    rdindex = __sync_fetch_and_add(&cbuf->rdindex, (uint64_t)1);

    sem_wait(&cbuf->semaphore);

    /* Pop entry. */
    return __sync_fetch_and_and(&cbuf->entry[rdindex % cbuf->size], NULL);
}

static volatile int done = 0;
static cbuffer     *cb = NULL;

void *consumer_thread(void *payload)
{
    const long    id = (long)payload;
    unsigned long count = 0UL;
    void         *entry;

    pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
    pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);

    while (1) {
        if (done)
            return NULL;

        entry = cbuffer_get(cb);
        count++;

        if (!entry) {
            printf("Consumer %ld: NULL pointer at %lu encountered!\n", id, count);
            fflush(stderr);
            done = 1;
            return NULL;
        }

    }
}

void *producer_thread(void *payload __attribute__((unused)))
{
    unsigned long count = 0UL;

    pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
    pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);

    while (1) {

        if (done)
            return NULL;

        cbuffer_add(cb, (void *)(256UL + (count & 255UL)));

    }
}

int main(int argc, char *argv[])
{
    pthread_attr_t  attrs;
    pthread_t      *producer_id;
    pthread_t      *consumer_id;
    sigset_t        blocked;
    siginfo_t       info;
    struct timespec timeout;
    int             producers, consumers, size, i, result;
    char            dummy;

    if (argc != 4 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
        fprintf(stderr, "\n");
        fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
        fprintf(stderr, "       %s SIZE PRODUCERS CONSUMERS\n", argv[0]);
        fprintf(stderr, "\n");
        return 1;
    }

    if (sscanf(argv[1], " %d %c", &size, &dummy) != 1 || size < 2) {
        fprintf(stderr, "%s: Invalid circular buffer size.\n", argv[1]);
        return 1;
    }

    if (sscanf(argv[2], " %d %c", &producers, &dummy) != 1 || producers < 1) {
        fprintf(stderr, "%s: Invalid number of producer threads.\n", argv[2]);
        return 1;
    }

    if (sscanf(argv[3], " %d %c", &consumers, &dummy) != 1 || consumers < 1) {
        fprintf(stderr, "%s: Invalid number of consumer threads.\n", argv[3]);
        return 1;
    }

    cb = cbuffer_create(size);
    producer_id = malloc((size_t)producers * sizeof *producer_id);
    consumer_id = malloc((size_t)consumers * sizeof *consumer_id);
    if (!cb || !producer_id || !consumer_id) {
        fprintf(stderr, "%s.\n", strerror(ENOMEM));
        return 1;
    }

    sigemptyset(&blocked);
    sigaddset(&blocked, SIGINT);
    sigaddset(&blocked, SIGTERM);
    sigprocmask(SIG_BLOCK, &blocked, NULL);

    pthread_attr_init(&attrs);
    pthread_attr_setstacksize(&attrs, 32768);

    /* Start consumer threads. */
    for (i = 0; i < consumers; i++) {
        result = pthread_create(&consumer_id[i], &attrs, consumer_thread, (void *)(1L + (long)i));
        if (result) {
            fprintf(stderr, "Cannot start consumer threads: %s.\n", strerror(result));
            exit(1);
        }
    }

    /* Start producer threads. */
    for (i = 0; i < producers; i++) {
        result = pthread_create(&producer_id[i], &attrs, producer_thread, (void *)(1L + (long)i));
        if (result) {
            fprintf(stderr, "Cannot start producer threads: %s.\n", strerror(result));
            exit(1);
        }
    }

    pthread_attr_destroy(&attrs);

    printf("Press CTRL+C or send SIGTERM to process %ld to stop testing.\n", (long)getpid());
    fflush(stdout);

    while (1) {

        if (done)
            break;

        timeout.tv_sec = (time_t)0;
        timeout.tv_nsec = 10000000L; /* 0.010000000 seconds */
        result = sigtimedwait(&blocked, &info, &timeout);
        if (result != -1 || errno != EAGAIN) {
            done = 1;
            break;
        }
    }

    printf("Exiting...\n");
    fflush(stdout);

    for (i = 0; i < producers; i++)
        pthread_cancel(producer_id[i]);
    for (i = 0; i < consumers; i++)
        pthread_cancel(consumer_id[i]);

    for (i = 0; i < producers; i++)
        pthread_join(producer_id[i], NULL);
    for (i = 0; i < consumers; i++)
        pthread_join(consumer_id[i], NULL);

    cb = cbuffer_destroy(cb);
    free(producer_id);
    free(consumer_id);

    return 0;
}

虽然我可能错了，但我可以运行上面的任意数量的生成器（显然只有一个使用者），而不会遇到NULL指针。您可以轻松添加一些逻辑来验证指针。

我相信即使在无可争议的情况下你也会旋转很多。

我个人会考虑使用两个链接列表：一个用于未使用/空闲的插槽，另一个用于添加的条目。（如果您的指向条目以next指针字段开头，那么您只需要使用的列表。我更喜欢这个，我自己。）

生产者总是从空闲列表中获取第一个节点，并预先添加到使用的列表中。消费者抓住整个使用的清单。所有这些操作都使用简单的do { } while (!__sync_bool_compare_and_swap());循环，或do { } while (!__atomic_compare_exchange());用于GCC 4.7及更高版本，循环只执行一次。类似于以下内容 - 未经测试 - 代码：

 struct node {
     struct node *next;
     /* whatever data here */
 };

void add_one(volatile struct node **const list, struct node *item)
{
    do {
        item->next = (*list) ? (*list)->next : NULL;
    } while (!__sync_bool_compare_and_swap(list, item->next, item);
}

struct node *get_one(volatile struct node **const list)
{
    struct node *first, *next;
    do {
         first = *list;
         next = (first) ? first->next : NULL;
    } while (!__sync_bool_compare_and_swap(list, first, next);
    if (first)
        first->next = NULL;
    return first;
}

struct node *get_all(volatile struct node **const list)
{
    struct node *all, *root;
    do {
        all = *list;
    } while (!__sync_bool_compare_and_swap(list, all, NULL));
    root = NULL;
    while (all) {
        struct node *const curr = all;
        all = all->next;
        curr->next = root;
        root = curr;
    }
    return root;
}

请注意，上面get_all()会反转列表，以便最旧的条目在返回的列表中排在第一位。这使得消费者可以轻松地按照添加顺序处理所有条目，并且通常情况下的开销最小。

有问题吗？

排除多个生产者单个消费者循环缓冲

1 个答案: