如何在删除模块时安全地退出kthread,它定期执行一个函数?

时间:2016-06-27 13:07:21

标签: linux-kernel

我正在启动一个kthread,它将从可加载的内核模块定期执行一个函数。

int start_fwd_filter(struct bloom_filter *bflt)
{
    fwd_bflt_thread = 
    kthread_run((void *)timed_fwd_filter, (void *)bflt, "fwd_bflt");

    if(fwd_bflt_thread == NULL)
            return -1;

    return 0;
}

int timed_fwd_filter(void* data)
{
    struct bloom_filter *bflt = (struct bloom_filter *)data;

    allow_signal(SIGKILL|SIGSTOP);

    while(1)
    {
            __set_current_state(TASK_INTERRUPTIBLE);
            schedule_timeout(delay*HZ);

            if(kthread_should_stop())
            {
                   pr_info(" *** mtp | 1.timed_fwd_filter thread"
                           " stopped | timed_fwd_filter *** \n");

                   timed_fwd_filter_stopped = 1;
                   return 0;
            }

            if(signal_pending(current))
                    goto exit_timed_fwd_filter;


            if(tcp_client_fwd_filter(bflt) < 0)
            {
                    pr_info(" *** mtp | tcp_client_fwd_filter 2 attmepts "
                            "failed | timed_fwd_filter *** \n");
            }

            if(kthread_should_stop())
            {
                   pr_info(" *** mtp | 2.timed_fwd_filter thread"
                           " stopped | timed_fwd_filter *** \n");

                   timed_fwd_filter_stopped = 1;
                   return 0;
            }

            if(signal_pending(current))
            {
                   goto exit_timed_fwd_filter;
            }

    }

exit_timed_fwd_filter:

    timed_fwd_filter_stopped = 1;

    do_exit(0);
}

当模块被卸载时,我发出一个kthread_stop()调用来停止这个线程:

static void __exit network_server_exit(void)
{
    int ret;
    int id;

    if(fwd_bflt_thread != NULL)
    {
            if(!timed_fwd_filter_stopped)
            {
                    ret = kthread_stop(fwd_bflt_thread);

                    if(!ret)
                            pr_info(" *** mtp | timed forward filter thread"
                                    " stopped | network_server_exit *** \n");
            }
    }
    ...
}

但是我无法删除模块,因为rmmod挂起了:

[  599.823825] INFO: task rmmod:2359 blocked for more than 120 seconds.
[  599.823845]       Tainted: G           OE 3.16.0-45-generic #60~14.04.1-Ubuntu
[  599.823863] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  599.823882] rmmod           D ffff88022f3530c0     0  2359   2358 0x00000004
[  599.823884]  ffff880216eb3da0 0000000000000082 ffff8802209d3d20 ffff880216eb3fd8
[  599.823885]  00000000000130c0 00000000000130c0 ffff8802239a3d20 ffff8800bb91ff10
[  599.823887]  ffff8800bb91ff18 7fffffffffffffff ffff8802209d3d20 00007fe3a7de9200
[  599.823888] Call Trace:
[  599.823893]  [<ffffffff8176a2d9>] schedule+0x29/0x70
[  599.823894]  [<ffffffff817696b9>] schedule_timeout+0x229/0x2a0
[  599.823897]  [<ffffffff810c9198>] ? console_unlock+0x1f8/0x440
[  599.823899]  [<ffffffff8176add6>] wait_for_completion+0xa6/0x160
[  599.823901]  [<ffffffff810a1b30>] ? wake_up_state+0x20/0x20
[  599.823904]  [<ffffffff810917da>] kthread_stop+0x4a/0xe0
[  599.823907]  [<ffffffffc052b6b0>] network_server_exit+0x83/0x9d3 [local_tcp]
[  599.823910]  [<ffffffff810ebb92>] SyS_delete_module+0x162/0x200
[  599.823912]  [<ffffffff81013057>] ? do_notify_resume+0x97/0xb0
[  599.823915]  [<ffffffff8176e34d>] system_call_fastpath+0x1a/0x1f

怀疑我明确打电话给schedule_timeout(),定期致电tcp_client_fwd_filter(bflt),可能会导致问题,因为kthread_stop()最终还会调用schedule_timeout(} MAX_SCHEDULE_TIMEOUT为了让kthread->exited->done成立,我删除了schedule_timeout()来电并输入:

int timed_fwd_filter(void* data)
{
    struct bloom_filter *bflt = (struct bloom_filter *)data;

    DECLARE_WAIT_QUEUE_HEAD(fwdwait);

    allow_signal(SIGKILL|SIGSTOP);

    while(1)
    {
            wait_event_timeout( fwdwait, (kthread_should_stop() == true), delay*HZ)
            if(kthread_should_stop())
            {
                   pr_info(" *** mtp | 1.timed_fwd_filter thread"
                           " stopped | timed_fwd_filter *** \n");

                   timed_fwd_filter_stopped = 1;
                   return 0;
            }
            ...

我仍然遇到同样的问题而且我意识到wait_event_timeout()也会调用schedule_timeout()。我想我在这里做错了什么,但无法确切地知道究竟是什么。有人可以帮助我吗?

  

编辑:   我清理了代码,并使!kthread_should_stop()检查了while循环条件,因为在许多示例中都找到了它,但仍然没有运气。

int timed_fwd_filter(void* data)
{
    unsigned long jleft = 0;

    struct bloom_filter *bflt = (struct bloom_filter *)data;

    allow_signal(SIGKILL|SIGSTOP);

    set_current_state(TASK_INTERRUPTIBLE);

    while(!kthread_should_stop())
    {
            jleft = schedule_timeout(delay*HZ);

            __set_current_state(TASK_RUNNING);

            if(signal_pending(current))
            {
                   goto exit_timed_fwd_filter;
            }

            if(tcp_client_fwd_filter(bflt) < 0)
            {
                    pr_info(" *** mtp | tcp_client_fwd_filter 2 attmepts "
                            "failed | timed_fwd_filter *** \n");
            }

            set_current_state(TASK_INTERRUPTIBLE);


    }

    __set_current_state(TASK_RUNNING);


exit_timed_fwd_filter:

   timed_fwd_filter_stopped = 1;
   do_exit(0);
}

0 个答案:

没有答案