我正在运行自定义Fedora 17内核3.3.0-0.rc5.git3.1.yfkm2.fc17.x86_64,警告显示在dmesg上:
[ 858.634304]
[ 858.634324] ===============================
[ 858.634350] [ INFO: suspicious RCU usage. ]
[ 858.634375] 3.3.0-0.rc5.git3.1.yfkm2.fc17.x86_64 #1 Not tainted
[ 858.634409] -------------------------------
[ 858.634435] kernel/pid.c:425 find_task_by_pid_ns() needs rcu_read_lock() protection!
[ 858.634478]
[ 858.634479] other info that might help us debug this:
[ 858.634480]
[ 858.634528]
[ 858.634529] rcu_scheduler_active = 1, debug_locks = 0
[ 858.634567] no locks held by monitor/10550.
[ 858.634591]
[ 858.634592] stack backtrace:
[ 858.634620] Pid: 10550, comm: monitor Not tainted 3.3.0-0.rc5.git3.1.yfkm2.fc17.x86_64 #1
[ 858.634666] Call Trace:
[ 858.634688] [<ffffffff810c8c55>] lockdep_rcu_suspicious+0xe5/0x100
[ 858.634727] [<ffffffff81086921>] find_task_by_pid_ns+0x81/0xa0
[ 858.634762] [<ffffffff81086962>] find_task_by_vpid+0x22/0x30
[ 858.634798] [<ffffffff8131ccd5>] yfkm2_is_pid_running+0x15/0x40
[ 858.634835] [<ffffffff8131ce54>] sys_yfkm2_monitor+0x14/0x80
[ 858.634870] [<ffffffff816a6ba9>] system_call_fastpath+0x16/0x1b
monitor是调用sys_yfkm2_monitor系统调用传递pid的用户应用程序。自定义代码按预期工作,但我很好奇dmesg上显示的警告消息。我做错了什么?
用户应用程序monitor.c:
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#define SYS_yfkm2_monitor __NR_yfkm2_monitor
#define SYS_yfkm2_notifyme __NR_yfkm2_notifyme
int main (int argc, char *argv[])
{
if (argc < 2) {
printf("Error. Use %s <PID>\n", argv[0]);
return 1;
}
pid_t pid = atoi(argv[1]);
long ret;
ret = syscall(SYS_yfkm2_monitor, pid);
if (ret == 0){
printf("Sucess on adding %d!\n", pid);
return 0;
} else {
printf("Failure! Is %s a valid PID?\n", argv[1]);
return 1;
}
}
内核代码:
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#define YFKM2_KT_TIMEOUT (1*HZ) /* 1 second */
struct yfkm2 {
pid_t monitor; /* PID to monitor */
pid_t notifyme; /* PID to notify */
struct list_head list; /* Linked List struct */
};
/* How many Kernel Threads are running? */
atomic_t yfkm2_kthread_run_count = ATOMIC_INIT(0);
/* Define and initialize yfkm2_(linked)list */
LIST_HEAD(yfkm2_list);
/* Define and initialize yfkm2_(read&write)lock */
DEFINE_RWLOCK(yfkm2_lock);
/*
* yfkm2_is_pid_running(pid_t pid)
*
* Check if pid is running
*
* return 0 if pid is running
* return 1 if pid is not running
*/
int yfkm2_is_pid_running(pid_t pid)
{
struct task_struct *q;
q = find_task_by_vpid(pid);
if (q != NULL && q->pid == pid)
return 0;
return 1;
}
/*
* yfkm2_kill(pid_t pid)
*
* Kills pid
*
* return 0 if pid was running and send SIGKILL to pid
* return 1 if pid is not running
*/
int yfkm2_kill(pid_t pid)
{
struct task_struct *q;
q = find_task_by_vpid(pid);
if (q != NULL) {
force_sig(SIGKILL, q);
return 0;
}
return 1;
}
/*
* int yfkm2_kthread(void *data)
*
* The Kernel Thread
*
* Traverse the yfkm2_list looking for yfkm2->notifyme that are not 0.
* If any found, check if correspondent yfkm2->monitor is still running. If not
* kill yfkm2->notifyme. After traversing the list, check if the list is empty.
* If so return 0. If not sleep one second and start again.
*
* return 0 if yfkm2_list is empty
* should never return 1
*/
int yfkm2_kthread(void *data) /* data is NEVER used */
{
struct yfkm2 *yfkm2_tmp, *yfkm2_tmp2;
bool empty;
while (true) {
/* Needs write protection due possible item removal from list */
write_lock(&yfkm2_lock); /* Write lock */
list_for_each_entry_safe(yfkm2_tmp, yfkm2_tmp2,
&yfkm2_list, list) {
if (yfkm2_tmp->notifyme != 0) {
if (yfkm2_is_pid_running(yfkm2_tmp->monitor) != 0) {
yfkm2_kill(yfkm2_tmp->notifyme);
list_del(&yfkm2_tmp->list);
kfree(yfkm2_tmp);
}
}
}
write_unlock(&yfkm2_lock); /* Write unlock */
read_lock(&yfkm2_lock); /* Read lock */
empty = list_empty(&yfkm2_list);
read_unlock(&yfkm2_lock); /* Read unlock */
if (empty) {
/* The counter is increased at sys_yfkm2_notifyme()
* Before exit, decrease atomic run counter */
atomic_dec(&yfkm2_kthread_run_count);
return 0;
}
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(YFKM2_KT_TIMEOUT);
}
/* Before exit, decrease atomic run counter */
atomic_dec(&yfkm2_kthread_run_count);
return 1;
}
/*
* asmlinkage long sys_yfkm2_monitor(pid_t monitor)
*
* The system call that check if monitor correspond to a running pid and stores
* monitor at yfkm2_list->monitor
*
* return 0 if pid is running
* return 1 if pid is not running
*/
asmlinkage long sys_yfkm2_monitor(pid_t monitor)
{
struct yfkm2 *yfkm2_tmp;
if (yfkm2_is_pid_running(monitor) == 0) {
yfkm2_tmp = kmalloc(sizeof(*yfkm2_tmp), GFP_KERNEL);
yfkm2_tmp->monitor = monitor;
yfkm2_tmp->notifyme = 0;
write_lock(&yfkm2_lock);
list_add(&yfkm2_tmp->list, &yfkm2_list);
write_unlock(&yfkm2_lock);
return 0;
}
return 1;
}
/*
* asmlinkage long sys_yfkm2_notifyme(pid_t monitor, pid_t notifyme)
*
* The system call that looks for monitor at yfkm2_list->monitor. If found
* store notifyme at yfkm2_list->notifyme. It also starts the kernel thread
* if it is not running.
*
* return 0 if pid is running
* return 1 if pid is not running
*/
asmlinkage long sys_yfkm2_notifyme(pid_t monitor, pid_t notifyme)
{
struct yfkm2 *yfkm2_tmp;
bool found_monitored_pid = false;
write_lock(&yfkm2_lock); /* Write lock */
list_for_each_entry(yfkm2_tmp, &yfkm2_list, list) {
if (yfkm2_tmp->monitor == monitor) {
yfkm2_tmp->notifyme = notifyme;
found_monitored_pid = true;
break;
}
}
write_unlock(&yfkm2_lock); /* Write unlock */
if (found_monitored_pid) {
if (atomic_read(&yfkm2_kthread_run_count) < 1) {
/* The counter is decreased at yfkm2_kthread()
* Before start, increase atomic run counter */
atomic_inc(&yfkm2_kthread_run_count);
kthread_run(&yfkm2_kthread, NULL, "yfkm2_kthread");
}
return 0;
} else {
return 1;
}
}
答案 0 :(得分:3)
您没有在任务列表上执行正确的锁定。例如,您的yfkm2_kill()
函数应为:
int yfkm2_kill(pid_t pid)
{
struct task_struct *q;
rcu_read_lock();
q = find_task_by_vpid(pid);
if (q)
get_task_struct(q);
rcu_read_unlock();
if (q == NULL)
return 1;
force_sig(SIGKILL, q);
put_task_struct(q);
return 0;
}
...但你的整个设计似乎非常活泼。例如,->monitor
任务之一可以退出,并在内核线程通知之前用具有相同PID的新的不同任务替换。
答案 1 :(得分:1)
您似乎在没有所需锁定的情况下运行代码
这样的事情往往有效,除了它们偶尔崩溃(可能很长一段时间)。
我不太了解这些函数,但似乎应该在读取模式下在某些RCU锁(可能是保护进程列表的锁)下调用find_task_by_vpid
。