我正在启动一个kthread,它将从可加载的内核模块定期执行一个函数。
int start_fwd_filter(struct bloom_filter *bflt)
{
fwd_bflt_thread =
kthread_run((void *)timed_fwd_filter, (void *)bflt, "fwd_bflt");
if(fwd_bflt_thread == NULL)
return -1;
return 0;
}
int timed_fwd_filter(void* data)
{
struct bloom_filter *bflt = (struct bloom_filter *)data;
allow_signal(SIGKILL|SIGSTOP);
while(1)
{
__set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(delay*HZ);
if(kthread_should_stop())
{
pr_info(" *** mtp | 1.timed_fwd_filter thread"
" stopped | timed_fwd_filter *** \n");
timed_fwd_filter_stopped = 1;
return 0;
}
if(signal_pending(current))
goto exit_timed_fwd_filter;
if(tcp_client_fwd_filter(bflt) < 0)
{
pr_info(" *** mtp | tcp_client_fwd_filter 2 attmepts "
"failed | timed_fwd_filter *** \n");
}
if(kthread_should_stop())
{
pr_info(" *** mtp | 2.timed_fwd_filter thread"
" stopped | timed_fwd_filter *** \n");
timed_fwd_filter_stopped = 1;
return 0;
}
if(signal_pending(current))
{
goto exit_timed_fwd_filter;
}
}
exit_timed_fwd_filter:
timed_fwd_filter_stopped = 1;
do_exit(0);
}
当模块被卸载时,我发出一个kthread_stop()调用来停止这个线程:
static void __exit network_server_exit(void)
{
int ret;
int id;
if(fwd_bflt_thread != NULL)
{
if(!timed_fwd_filter_stopped)
{
ret = kthread_stop(fwd_bflt_thread);
if(!ret)
pr_info(" *** mtp | timed forward filter thread"
" stopped | network_server_exit *** \n");
}
}
...
}
但是我无法删除模块,因为rmmod挂起了:
[ 599.823825] INFO: task rmmod:2359 blocked for more than 120 seconds.
[ 599.823845] Tainted: G OE 3.16.0-45-generic #60~14.04.1-Ubuntu
[ 599.823863] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 599.823882] rmmod D ffff88022f3530c0 0 2359 2358 0x00000004
[ 599.823884] ffff880216eb3da0 0000000000000082 ffff8802209d3d20 ffff880216eb3fd8
[ 599.823885] 00000000000130c0 00000000000130c0 ffff8802239a3d20 ffff8800bb91ff10
[ 599.823887] ffff8800bb91ff18 7fffffffffffffff ffff8802209d3d20 00007fe3a7de9200
[ 599.823888] Call Trace:
[ 599.823893] [<ffffffff8176a2d9>] schedule+0x29/0x70
[ 599.823894] [<ffffffff817696b9>] schedule_timeout+0x229/0x2a0
[ 599.823897] [<ffffffff810c9198>] ? console_unlock+0x1f8/0x440
[ 599.823899] [<ffffffff8176add6>] wait_for_completion+0xa6/0x160
[ 599.823901] [<ffffffff810a1b30>] ? wake_up_state+0x20/0x20
[ 599.823904] [<ffffffff810917da>] kthread_stop+0x4a/0xe0
[ 599.823907] [<ffffffffc052b6b0>] network_server_exit+0x83/0x9d3 [local_tcp]
[ 599.823910] [<ffffffff810ebb92>] SyS_delete_module+0x162/0x200
[ 599.823912] [<ffffffff81013057>] ? do_notify_resume+0x97/0xb0
[ 599.823915] [<ffffffff8176e34d>] system_call_fastpath+0x1a/0x1f
怀疑我明确打电话给schedule_timeout()
,定期致电tcp_client_fwd_filter(bflt)
,可能会导致问题,因为kthread_stop()
最终还会调用schedule_timeout(
} MAX_SCHEDULE_TIMEOUT
为了让kthread->exited->done
成立,我删除了schedule_timeout()
来电并输入:
int timed_fwd_filter(void* data)
{
struct bloom_filter *bflt = (struct bloom_filter *)data;
DECLARE_WAIT_QUEUE_HEAD(fwdwait);
allow_signal(SIGKILL|SIGSTOP);
while(1)
{
wait_event_timeout( fwdwait, (kthread_should_stop() == true), delay*HZ)
if(kthread_should_stop())
{
pr_info(" *** mtp | 1.timed_fwd_filter thread"
" stopped | timed_fwd_filter *** \n");
timed_fwd_filter_stopped = 1;
return 0;
}
...
我仍然遇到同样的问题而且我意识到wait_event_timeout()
也会调用schedule_timeout(
)。我想我在这里做错了什么,但无法确切地知道究竟是什么。有人可以帮助我吗?
编辑: 我清理了代码,并使
!kthread_should_stop()
检查了while
循环条件,因为在许多示例中都找到了它,但仍然没有运气。
int timed_fwd_filter(void* data)
{
unsigned long jleft = 0;
struct bloom_filter *bflt = (struct bloom_filter *)data;
allow_signal(SIGKILL|SIGSTOP);
set_current_state(TASK_INTERRUPTIBLE);
while(!kthread_should_stop())
{
jleft = schedule_timeout(delay*HZ);
__set_current_state(TASK_RUNNING);
if(signal_pending(current))
{
goto exit_timed_fwd_filter;
}
if(tcp_client_fwd_filter(bflt) < 0)
{
pr_info(" *** mtp | tcp_client_fwd_filter 2 attmepts "
"failed | timed_fwd_filter *** \n");
}
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
exit_timed_fwd_filter:
timed_fwd_filter_stopped = 1;
do_exit(0);
}