io_getevents两次返回完成事件

时间:2015-04-21 18:12:47

标签: asynchronous io linux-kernel

我有一个较大的应用程序,它通过io_submit()和io_getevents()使用Linux的异步IO。偶尔,io_getevents似乎会两次返回其中一个IO。这显示为返回事件的'events'数组中的doubled条目,或者作为复制前一次调用io_getevents()返回的条目的条目。

int io_getevents(aio_context_t ctx_id, long min_nr, long nr,
                        struct io_event *events, struct timespec *timeout);
  • iocbs一次只发布到io_submit()。
  • 我使用io_getevents的返回值来限制对'events'的访问。即,

    struct io_event event[EVENT_COUNT];
    // .. skip a bit
    reaped = io_getevents(_m_aio_context, count, EVENT_COUNT, event, &timeout);
    if(reaped < 0) {
      if(errno == EINTR) {
        continue;
      }
    
      perror("reap error:");
      assert(0);
    }
    
    assert(reaped <= (int)EVENT_COUNT);
    for(i = 0; i < reaped; i++) {
      struct my_iocb *my_iocb = (struct my_iocb *)event[i].obj;
      ...
      // The symptom is that my_iocb is the same pointer as event[i-1].obj,
      // or that my_iocb was recently returned by a previous call to
      // io_getevents() and was not re-submitted since then.
      ...
    

我担心没有现实的方法来粘贴其余的相关来源。但是,我添加了相当多的工具来检查相同的IOCB是否从未给过io_submit()两次,并检查io_submit()的错误返回。在我最后一个bug的例子中,自应用程序启动以来,没有五十亿次调用io_submit()返回错误。

我发布这个问题是为了检查A)是否有人之前已经看过这个问题或B)这种行为是以某种方式预期的。

如果是预期的,那么解决它的典型方法是什么?我的应用程序遇到了麻烦,因为它在此时释放了iocb,显然第二个指向同一个iocb的指针会导致双重释放/损坏等。

谢谢!

亚历

1 个答案:

答案 0 :(得分:0)

在io_submit中,您应该将struct iocb的aio_data字段设置为上下文结构的值。

处理完成事件时,你应该使用event [i] .data来查找上下文指针;不要使用event [i] .obj!

此外,我还编写了一个测试,检查是否收到了两次通知:它通过在请求中设置一个魔术值来检查,并且通知检查并重置此魔术值;这个测试工作正常。

编译它     gcc -std = c ++ 0x -g aio.cpp -lstdc ++ -lpthread -o aio

测试:

#include <stdio.h>      /* for perror() */
#include <unistd.h>     /* for syscall() */
#include <sys/syscall.h>    /* for __NR_* definitions */
#include <linux/aio_abi.h>  /* for AIO types and constants */
#include <fcntl.h>      /* O_RDWR */
#include <string.h>     /* memset() */
#include <inttypes.h>   /* uint64_t */

#include <atomic>
#include <pthread.h>

#define MAX_EVENTS 1000

#define BLOCK_SIZE 4096
#define FILE_SIZE (BLOCK_SIZE * 10000)

#define MAGIC_VALUE 0xff0001DDEEFFAABB
#define MAGIC_SIZE  8

std::atomic<uint64_t> request_cnt;

int fd;
aio_context_t ctx;


#if 1

// no header on my system that has these - says that libaio does it;

inline int io_setup(unsigned nr, aio_context_t *ctxp)
{
    return syscall(__NR_io_setup, nr, ctxp);
}

inline int io_destroy(aio_context_t ctx) 
{
    return syscall(__NR_io_destroy, ctx);
}

inline int io_submit(aio_context_t ctx, long nr,  struct iocb **iocbpp) 
{
    return syscall(__NR_io_submit, ctx, nr, iocbpp);
}

inline int io_getevents(aio_context_t ctx, long min_nr, long max_nr,
        struct io_event *events, struct timespec *timeout)
{
    return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout);
}

#endif





void *notification_th(void *)
{
    struct io_event events[MAX_EVENTS];
    int ret, i;

    while(true)
    {

        /* get the reply */
        ret = io_getevents(ctx, 1, MAX_EVENTS, events, NULL);
        for(i=0; i < ret; i++)
        {
            char *data = (char *) events[i].data;

            if (* ((uint64_t *) (data + BLOCK_SIZE)) != MAGIC_VALUE)
            {
                fprintf(stderr, "notification received twice\n");
            }
            * ((uint64_t *) (data + BLOCK_SIZE)) = 0;

            //fprintf(stderr,"notify %p\n",data);

            delete [] data;
        }
    }

    return NULL;
}

void *sender_th(void *)
{
    char *data ;
    struct iocb cb;
    struct iocb *cbs[1];
    int ret;
    uint64_t cnt;


    while( true )
    {
        cnt = request_cnt.fetch_add( 1 );


        data = new char [ BLOCK_SIZE + MAGIC_SIZE ];
        memset( data, (char) cnt , BLOCK_SIZE );

        * ((uint64_t *) (data + BLOCK_SIZE)) = MAGIC_VALUE;

        /* setup I/O control block */
        memset(&cb, 0, sizeof(cb));
        cb.aio_fildes = fd;
        cb.aio_lio_opcode = IOCB_CMD_PWRITE;

        /* command-specific options */
        cb.aio_buf = (uint64_t)data;
        cb.aio_offset = (BLOCK_SIZE * cnt ) % FILE_SIZE;
        cb.aio_nbytes = BLOCK_SIZE;
        cb.aio_data = (uint64_t) data;

        cbs[0] = &cb;

        //fprintf(stderr,"submit %lu %p\n",cnt,data);

        ret = io_submit(ctx, 1, cbs);
        if (ret != 1) {
             if (ret < 0) {
            //fprintf(stderr,"error %lu %p\n",cnt,data);
            perror("io_submit error");
             } else
            fprintf(stderr, "could not sumbit IOs");
            return  NULL;
        }
    }
    return NULL;
}   

int main()
{
    int i;
    int ret;

    request_cnt = 0;

    fd = open("/tmp/testfile", O_RDWR | O_CREAT);
    if (fd < 0) {
        perror("open error");
        return -1;
    }

    ctx = 0;

    ret = io_setup(10000, &ctx);
    if (ret < 0) {
        perror("io_setup error");
        return -1;
    }

    pthread_t th;
    pthread_create(&th,NULL,notification_th,NULL);

    for(i = 0; i < 10; i++ )
    {
        pthread_t th;
        pthread_create(&th,NULL,notification_th,NULL);
    }

    for(i = 0; i < 10; i++ )
    {
        pthread_t th;
        pthread_create(&th,NULL,sender_th,NULL);
    }


    void *value;
    pthread_join(th,&value);


    ret = io_destroy(ctx);
    if (ret < 0) {
        perror("io_destroy error");
        return -1;
    }


return 0;
}