在调用io_destroy期间,使用异步AIO请求的自定义设备驱动程序挂起

时间:2019-06-05 01:09:05

标签: c linux-kernel linux-device-driver aio

我正在使用AIO将Linux设备驱动程序从Fedora 13移植到Ubuntu 19.04(内核5.0.0-16-通用)。目前,除请求取消外,其他所有操作都有效;如果在用户应用程序调用io_destroy(系统调用__NR_io_destroy)时任何请求仍处于活动状态,则该过程将挂起。据我所知,唯一支持AIO取消的官方设备驱动程序是USB Gadget,而且AIO接口似乎经常更改,因此很难找到可用的示例。

下面是一个精简的MWE(最小工作示例),以及系统日志的结尾。目前,我相信相关的驱动程序函数是dummy_driver_aio_canceldummy_driver_read_iter,但是我无法确定我是在错误的contex中调用某些内容还是缺少函数调用。当我尝试使用KGDB时,测试系统会不断锁定,从而使逐步执行代码变得困难。如果允许io_getevents调用超时,则完整版的驱动程序和测试应用程序也会锁定。

设备驱动程序源

#include <linux/module.h>
#include <linux/cdev.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
#include <linux/aio.h>

#define DRIVER_NAME "dummy_driver"
#define DRIVER_VERSION "0.0"
#define DRIVER_CLASS_NAME "dummy_class"

MODULE_AUTHOR("Some Author");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRIVER_VERSION);

static int dummy_driver_major;
static struct class * dummy_driver_class = NULL;

#define MAX_DEVICES 1

struct dummy_driver_dev{
    struct cdev cdev;
    struct device * device;
    int minor;
};

static struct dummy_driver_dev * dummy_driver_instance = NULL;

static int dummy_driver_aio_cancel(struct kiocb * iocb){
    printk("dummy_driver: dummy_driver_aio_cancel\n");

    //BUG_ON(iocb->ki_complete == NULL);
    //iocb->ki_complete(iocb, 0, 2);

    printk("dummy_driver_aio_cancel done\n");
    return(0);
}

static ssize_t dummy_driver_read_iter(struct kiocb *iocb, struct iov_iter * iter){
    struct file *filp = iocb->ki_filp;
    struct dummy_driver_dev * instance = filp->private_data;

    printk("dummy_driver_read_iter\n");

    iocb->private = instance;

    // Set the cancel function
    kiocb_set_cancel_fn(iocb, dummy_driver_aio_cancel);

    return(-EIOCBQUEUED);
}

static int dummy_driver_open(struct inode *inode, struct file *filp){
    pr_info("dummy_driver_open\n");

    struct dummy_driver_dev *inst = container_of(inode->i_cdev, struct dummy_driver_dev, cdev);
    filp->private_data = inst;

    return(0);
}

static int dummy_driver_release(struct inode *inode, struct file *filp){
    filp->private_data = NULL;
    return(0);
}

static struct file_operations dummy_driver_fops = {
    .owner = THIS_MODULE,
    .open = dummy_driver_open,
    .unlocked_ioctl = NULL,
    .read_iter = dummy_driver_read_iter,
    .release = dummy_driver_release,
    .llseek = generic_file_llseek,
    .mmap = NULL,
};

static void remove_device(struct dummy_driver_dev * instance){
    if(instance == NULL) return;

    printk("dummy_driver: Calling device_destroy\n");
    if(instance->device) device_destroy(dummy_driver_class, MKDEV(dummy_driver_major, instance->minor));

    printk("dummy_driver: Calling cdev_del\n");
    if(instance->cdev.dev != 0 || instance->cdev.count != 0) cdev_del(&instance->cdev);

    printk("dummy_driver: Freeing instance memory\n");
    kfree(instance);
}

static int probe_device(struct dummy_driver_dev ** pp_instance){
    static int instance_counter = 0;
    int err;

    // Allocate the instance memory
    printk("dummy_driver: Allocating instance memory\n");
    struct dummy_driver_dev * instance = kmalloc(sizeof(struct dummy_driver_dev), GFP_KERNEL);
    if(instance == NULL){
        pr_err("Failed to allocate instance memory.\n");
        return -ENOMEM;
    }
    memset(instance, 0, sizeof(struct dummy_driver_dev));
    *pp_instance = instance;

    // Set the minor number (instance number)
    instance->minor = instance_counter++;

    printk("dummy_driver: Calling cdev_init\n");
    cdev_init(&instance->cdev, &dummy_driver_fops);
    instance->cdev.owner = THIS_MODULE;
    err = cdev_add(&instance->cdev, MKDEV(dummy_driver_major, instance->minor), 1);
    if(err){
        pr_err("Failed to cdev_add.\n");
        remove_device(instance);
        return(err);
    }

    printk("dummy_driver: Calling device_create\n");
    instance->device = device_create(dummy_driver_class, NULL, MKDEV(dummy_driver_major, instance->minor), NULL, "dummy_driver%d", instance->minor);
    if(IS_ERR(instance->device)){
        err = PTR_ERR(instance->device);
        pr_err("device_create failed\n");
        remove_device(instance);
        return(err);
    }

    printk("Done probe_device\n");
    return(0);
}

static int __init dummy_driver_init_module(void){
    int err;
    dev_t dev;

    printk(KERN_INFO DRIVER_NAME " driver " DRIVER_VERSION);

    printk("dummy_driver: Calling class_create\n");
    dummy_driver_class = class_create(THIS_MODULE, DRIVER_CLASS_NAME);
    if(IS_ERR(dummy_driver_class)){
        pr_err("Error creating DUMMY_DRIVER driver class.\n");
        return PTR_ERR(dummy_driver_class);
    }

    printk("dummy_driver: Calling alloc_chrdev_region\n");
    err = alloc_chrdev_region(&dev, 0, MAX_DEVICES, DRIVER_NAME);
    if(err){
        pr_err("Call to alloc_chrdev_region failed.\n");
        class_destroy(dummy_driver_class);
        return(err);
    }

    dummy_driver_major = MAJOR(dev);

    printk("dummy_driver: Calling probe_device\n");
    err = probe_device(&dummy_driver_instance);
    if(err){
        pr_err("Failed to probe the device.\n");
        unregister_chrdev_region(dev, MAX_DEVICES);
        class_destroy(dummy_driver_class);
        return(err);
    }

    printk("dummy_driver: Done dummy_driver_init_module\n");
    return(0);
}

static void __exit dummy_driver_exit_module(void){
    printk("dummy_driver: Calling remove_device\n");
    remove_device(dummy_driver_instance);

    printk("dummy_driver: unregistering chrdev region\n");
    unregister_chrdev_region(MKDEV(dummy_driver_major, 0), MAX_DEVICES);

    printk("dummy_driver: Calling class destroy\n");
    class_destroy(dummy_driver_class);
}

module_init(dummy_driver_init_module);
module_exit(dummy_driver_exit_module);

用户空间测试应用程序

#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdint.h>
#include <linux/types.h>
#include <linux/aio_abi.h>
#include <sys/syscall.h>
#include "syserr.h"

#define TOTAL_REQUEST_COUNT 20

typedef struct data_block_t {
    uint8_t data[0x1000];
} data_block_t;

data_block_t blocks[TOTAL_REQUEST_COUNT];

void perform_aio(const int fd){
    struct iocb iocbs[TOTAL_REQUEST_COUNT];
    struct iocb * piocbs[TOTAL_REQUEST_COUNT];
    aio_context_t aio_context = 0;

    // Setup AIO
    printf("Setting up AIO\n");
    SYSERR(syscall(__NR_io_setup, TOTAL_REQUEST_COUNT, &aio_context));

    // Fill in the iocb strcutures
    printf("Filling out the iocb strctures\n");
    for(size_t event_iter = 0; event_iter < TOTAL_REQUEST_COUNT; event_iter++){
        memset(&iocbs[event_iter], 0, sizeof(struct iocb));
        iocbs[event_iter].aio_lio_opcode = IOCB_CMD_PREAD;
        iocbs[event_iter].aio_fildes = fd;
        iocbs[event_iter].aio_buf = (unsigned long)&blocks[event_iter];
        iocbs[event_iter].aio_nbytes = sizeof(data_block_t);
        piocbs[event_iter] = &iocbs[event_iter];
    }

    // Submit the AIO requests
    printf("Submitting the AIO request\n");
    SYSERR(syscall(__NR_io_submit, aio_context, TOTAL_REQUEST_COUNT, piocbs));

#if 0
    // Cancel the events
    for(size_t iter = 0; iter < TOTAL_REQUEST_COUNT; iter++){
        printf("Canceling request %zu\n", iter);
        const int result = syscall(__NR_io_cancel, aio_context, &iocbs[iter], NULL);
        if(result != -1 || errno != EINPROGRESS){
            printf("io_cancel failed: %i errno: %i\n", result, errno);
        }
    }
#endif

    // Destroy the AIO context
    printf("Destroying the AIO context\n");
    SYSERR(syscall(__NR_io_destroy, aio_context));

    printf("perform_aio done\n");
}

int main(){
    // Open the device
    const char * device_name = "/dev/dummy_driver0";
    const int fd = open(device_name, O_RDWR);
    if(fd == -1){
        fprintf(stderr, "Failed to open %s\n", device_name);
        exit(-1);
    }

    perform_aio(fd);

    SYSERR(close(fd));
    return(0);
}

系统日志输出尾巴

[  141.836265] dummy_driver_aio_cancel done
[  141.938024] dummy_driver: dummy_driver_aio_cancel
[  142.066647] dummy_driver_aio_cancel done
[  142.179343] dummy_driver: dummy_driver_aio_cancel
[  142.297067] dummy_driver_aio_cancel done
[  363.561794] INFO: task TestApp:1234 blocked for more than 120 seconds.
[  363.728858]       Tainted: G           OE     5.0.0-16-generic #17-Ubuntu
[  363.897479] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  364.098982] TestApp         D    0  1234   1221 0x00000000
[  364.246430] Call Trace:
[  364.320295]  __schedule+0x2d0/0x840
[  364.420990]  ? vm_area_free+0x18/0x20
[  364.526674]  schedule+0x2c/0x70
[  364.613448]  schedule_timeout+0x258/0x360
[  364.726142]  ? call_rcu+0x10/0x20
[  364.821967]  ? __percpu_ref_switch_mode+0xdb/0x180
[  364.948546]  ? __vm_munmap+0x8e/0xd0
[  365.048274]  wait_for_completion+0xb7/0x140
[  365.156979]  ? wake_up_q+0x80/0x80
[  365.245813]  __x64_sys_io_destroy+0xb0/0x100
[  365.362434]  do_syscall_64+0x5a/0x110
[  365.462157]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  365.595809] RIP: 0033:0x7f835e1262e9
[  365.690561] Code: Bad RIP value.
[  365.775326] RSP: 002b:00007fff429174c8 EFLAGS: 00000203 ORIG_RAX: 00000000000000cf
[  365.975858] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f835e1262e9
[  366.164338] RDX: 00007f835e11c024 RSI: 00007f835e1f6580 RDI: 00007f835e006000
[  366.347785] RBP: 00007fff42917aa0 R08: 0000000000000000 R09: 0000000000000000
[  366.529341] R10: 00007f835e1fb500 R11: 0000000000000203 R12: 00005649ba4c60e0
[  366.707860] R13: 00007fff42917ba0 R14: 0000000000000000 R15: 0000000000000000

0 个答案:

没有答案