我编写了一个内核模块,需要将消息推送到用户空间。想法是内核模块缓冲消息并发信号通知用户空间程序,然后用户空间程序通过netlink套接字请求消息并获取消息。我的问题是,在缓冲90条消息后,机器锁定,我需要重新启动。我无法弄清楚我做错了什么,而且我在内核模块的其他地方成功使用了链表。
//
// A message from the kernel space to user space.
//
typedef struct CoreLinkMessage
{
unsigned int id;
char* data;
unsigned int length;
struct list_head list; // kernel's list structure
} CoreLinkMessage;
此函数初始化列表和信号量:
// Constructor
void
ctsRtNetlinkSystem_init( void )
{
sema_init(&cmd_sem_, 1);
INIT_LIST_HEAD(&cmd_list_.list);
}
这是必须导致问题的功能。它只是将项目推送到链表的尾部。如果我注释掉将项目添加到链接列表并仅调用信号,则程序无限期运行,因此我认为问题不在于信号。
//
// Allows the kernel module to buffer messages until requested by
// the user space
//
void
ctsRtNetlinkSystem_addMessage(char* data, unsigned int length)
{
CoreLinkMessage* msg;
int sem_ret;
BOOL doSignal = FALSE;
//
// LOCK the semaphore
//
sem_ret = down_interruptible(&cmd_sem_);
if ( !sem_ret )
{
msg = (CoreLinkMessage*)kmalloc(sizeof(CoreLinkMessage), GFP_KERNEL );
if ( msg == NULL )
{
PRINTF(CTSMSG_INFO
"ctsRtNetlinkSystem_addMessage failed to allocate memory! \n" );
goto unlock;
}
memset( msg, 0, sizeof(CoreLinkMessage) );
msg->data = (char*)kmalloc( length, GFP_KERNEL );
if ( msg->data == NULL )
{
kfree( msg );
PRINTF(CTSMSG_INFO
"ctsRtNetlinkSystem_addMessage failed to allocate data memory!\n" );
goto unlock;
}
memcpy( msg->data, data, length );
msg->length = length;
lastMessageId_ += 1;
msg->id = lastMessageId_;
list_add_tail(&(msg->list), &cmd_list_.list);
doSignal = TRUE;
unlock:
up( &cmd_sem_ );
if ( doSignal )
sendMessageSignal( msg->id );
}
else
{
PRINTF(CTSMSG_INFO
"CtsRtNetlinkSystem_addMessage -- failed to get semaphore\n" );
}
}
//
// Signal the user space that a message is waiting. Pass along the message
// id
//
static BOOL
sendMessageSignal( unsigned int id )
{
int ret;
struct siginfo info;
struct task_struct *t;
memset(&info, 0, sizeof(struct siginfo));
info.si_signo = SIGNAL_MESSAGE;
info.si_code = SI_QUEUE; // this is bit of a trickery:
// SI_QUEUE is normally used by sigqueue
// from user space,
// and kernel space should use SI_KERNEL.
// But if SI_KERNEL is used the real_time data
// is not delivered to the user space signal
// handler function.
// tell the user space application the index of the message
// real time signals may have 32 bits of data.
info.si_int = id;
rcu_read_lock();
//find the task_struct associated with this pid
t = // find_task_by_pid_type( PIDTYPE_PID, registeredPid_ );
// find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
pid_task(find_vpid(registeredPid_), PIDTYPE_PID);
if(t == NULL)
{
PRINTF(CTSMSG_INFO
"CtsRtNetlinkSystem::sendMessageSignal -- no such pid\n");
rcu_read_unlock();
registeredPid_ = 0;
return FALSE;
}
rcu_read_unlock();
//send the signal
ret = send_sig_info(SIGNAL_MESSAGE, &info, t);
if (ret < 0)
{
PRINTF(CTSMSG_INFO
"CtsRtNetlinkSystem::sendMessageSignal -- \n"
"\t error sending signal %d \n", ret );
return FALSE;
}
return TRUE;
}
我目前正在虚拟机上测试该程序,因此我创建了一个每7秒钟滴答一次的计时器并向缓冲区添加一条消息。
//
// Create a timer to call the process thread
// with nanosecond resolution.
//
static void
createTimer(void)
{
hrtimer_init(
&processTimer_, // instance of process timer
CLOCK_MONOTONIC, // Pick a specific clock. CLOCK_MONOTONIC is
// guaranteed to move forward, no matter what.
// It's akin to jiffies tick count
// CLOCK_REALTIME matches the current real-world time
HRTIMER_MODE_REL ); // Timer mode (HRTIMER_ABS or HRTIMER_REL)
processTimer_.function = &cyclic_task;
processTimerNs_ = ktime_set(1, FREQUENCY_NSEC);
//
// Start the timer. It will callback the .function
// when the timer expires.
//
hrtimer_start(
&processTimer_, // instance of process timer
processTimerNs_, // time, nanosecconds
HRTIMER_MODE_REL ); // HRTIMER_REL indicates that time should be
// interpreted relative
// HRTIMER_ABS indicates time is an
// absolute value
}
static enum hrtimer_restart
cyclic_task(struct hrtimer* timer)
{
char msg[255];
sprintf(msg, "%s", "Testing the buffer.");
ctsRtNetlink_send( &msg[0], strlen(msg) );
hrtimer_forward_now(
&processTimer_,
processTimerNs_ );
return HRTIMER_RESTART;
}
提前感谢您的帮助。
答案 0 :(得分:0)
分配的内存不足
确保为字符串长度+ 1分配足够的内存来存储它的终结符
在发送时,可能需要length + 1
。
// ctsRtNetlink_send( &msg[0], strlen(msg) );
ctsRtNetlink_send( &msg[0], strlen(msg) + 1); // +1 for \0
答案 1 :(得分:0)
虽然问题的代码流程不是很清楚,但我觉得列表添加可能不是问题。您必须在其他位置处理列表,您必须从列表中删除消息等。我怀疑在列表添加和删除之间的某处存在某种死锁情况。此外,请检查将消息复制到的位置。用户空间并从列表中删除并释放它。我想,你不是试图直接从用户空间引用你的mesg作为上面建议的评论员之一。
另外,
memset( msg, 0, sizeof(CoreLinkMessage) );
if ( msg == NULL )
{
这两行必须颠倒其他的顺序,如果alloc失败,你的系统就注定了。
答案 2 :(得分:0)
使用GFP_ATOMIC代替GFP_KERNEL for kmalloc解决了这个问题。到目前为止三天的运行时间,并没有崩溃。我怀疑一个人无法在由hrtimer触发的线程中睡觉。
msg = (CoreLinkMessage*)kmalloc(sizeof(CoreLinkMessage), GFP_ATOMIC );
感谢大家的见解!