我获得了一个Linux Ubuntu崩溃报告,用于运行内核netfilter,通过netlink通道进行用户通信大约12个小时。 netlink用作警报通道,它会定期(大约5分钟)从内核netfilter向用户应用程序发送消息。
崩溃报告如下:
ProblemType: KernelOops
Annotation: Your system might become unstable now and might need to be restarted.
Date: Fri Nov 6 04:43:48 2015
Failure: oops
OopsText:
NMI watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [Xorg:2178]
Modules linked in: netfilter(OE) bnep rfcomm bluetooth bridge stp llc joydev ipmi_ssif
ipmi_devintf intel_rapl iosf_mbi x86_pkg_temp_thermal intel_powerclamp coretemp kvm
dm_multipath scsi_dh crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel
aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd ioatdma sb_edac edac_core lpc_ich
mei_me mei wmi shpchp 8250_fintek ipmi_si ipmi_msghandler acpi_power_meter acpi_pad
mac_hid parport_pc ppdev lp parport vxlan ip6_udp_tunnel udp_tunnel hid_generic usbhid
hid igb tg3 i2c_algo_bit ahci dca ptp libahci pps_core dm_mirror dm_region_hash dm_log
CPU: 0 PID: 2178 Comm: Xorg Tainted: G D W IOE 3.19.0-31-generic #36~14.04.1-Ubuntu
Hardware name: Intel Corporation S2600CW/S2600CW, BIOS SE5C610.86B.01.01.0008.021120151325 02/11/2015
task: ffff881024af1d70 ti: ffff88101dd28000 task.ti: ffff88101dd28000
RIP: 0010:[<ffffffff817b69bf>] [<ffffffff817b69bf>] _raw_spin_lock+0x2f/0x60
RSP: 0018:ffff88101dd2bb20 EFLAGS: 00000202
RAX: 0000000000000c19 RBX: 0000000000000000 RCX: 0000000000004808
RDX: 0000000000004806 RSI: 0000000000004808 RDI: ffff880f111c29a0
RBP: ffff88101dd2bc28 R08: 000000000000000e R09: ffff88101dd2bfd8
R10: ffff880f150a1e00 R11: 000000000003fc18 R12: ffff880f10380700
R13: 0000000000008ec0 R14: 0000000000000618 R15: 0000000000000000
FS: 00007f8743a1b8c0(0000) GS:ffff88103e400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f40240d8000 CR3: 000000101dd5f000 CR4: 00000000001407f0
Stack:
ffffffff81749d22 dead000000200200 ffff88101dd7ed40 ffff881024af1d70
ffff88101dd2bbe8 ffff881024af1d70 ffff880f111c2958 0000000000000001
dead000000000000 dead000000000000 ffff880f150a1e00 ffff880f111c29a0
Call Trace:
[<ffffffff81749d22>] ? unix_stream_recvmsg+0x152/0x8d0
[<ffffffff813527f1>] ? aa_sock_msg_perm+0x81/0x150
[<ffffffff812008c0>] ? poll_select_copy_remaining+0x130/0x130
[<ffffffff8168f1c2>] sock_recvmsg+0xa2/0xd0
[<ffffffff812008c0>] ? poll_select_copy_remaining+0x130/0x130
[<ffffffff812008c0>] ? poll_select_copy_remaining+0x130/0x130
[<ffffffff8168ee5e>] ? copy_msghdr_from_user+0x15e/0x1f0
[<ffffffff812008c0>] ? poll_select_copy_remaining+0x130/0x130
[<ffffffff8168efe3>] ___sys_recvmsg+0xf3/0x230
[<ffffffff813aed80>] ? timerqueue_add+0x60/0xb0
[<ffffffff810dd189>] ? enqueue_hrtimer+0x29/0x90
[<ffffffff810dd768>] ? __hrtimer_start_range_ns+0x278/0x3a0
[<ffffffff81690402>] __sys_recvmsg+0x42/0x80
[<ffffffff81690452>] SyS_recvmsg+0x12/0x20
[<ffffffff817b6dcd>] system_call_fastpath+0x16/0x1b
Code: 00 b8 00 00 02 00 f0 0f c1 07 89 c2 c1 ea 10 66 39 c2 75 01 c3 89 d1 0f b7 f2 b8 00 80 00 00 eb 0a 0f 1f 00 f3 90 83 e8 01 74 20 <0f> b7 17 41 89 d0 41 31 c8 41 81 e0 fe ff 00 00 75 e7 55 0f b7
Package: linux-image-3.19.0-31-generic 3.19.0-31.36~14.04.1
SourcePackage: linux
Tags: kernel-oops
Uname: Linux 3.19.0-31-generic x86_64
您是否有助于解释崩溃的原因!
这是我的netfilter代码:
int init_module()
{
nfho.hook = (void *)&hook_func;
nfho.hooknum = 0;
nfho.pf = PF_INET;
nfho.priority = NF_IP_PRI_FIRST;
nf_register_hook(&nfho);
sub_init();
return 0;
}
static int __init sub_init(void)
{
struct netlink_kernel_cfg cfg =
{
.input = nl_recv_msg,
};
nl_sk = netlink_kernel_create(&init_net, NETLINK_USER, &cfg);
if (!nl_sk)
{
printk(KERN_ALERT "Error creating socket.\n");
return -1;
}
return 0;
}
// Function to be called by hook
static unsigned int hook_func(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
{
struct iphdr *iph = NULL;
bool rv;
char msg[256];
iph = ip_hdr(skb);
if ((iph != NULL) && (iph->protocol == IPPROTO_TCP) && (iph->version == 4) && (sizeof(struct iphdr) < 256))
{
memcpy(msg, iph, sizeof(struct iphdr));
send_msg(msg, sizeof(struct iphdr));
return NF_DROP;
}
return NF_ACCEPT;
}
return NF_ACCEPT;
}
static void send_msg(unsigned char *msg, int msg_size)
{
int res;
struct nlmsghdr *send_nlh = NULL;
if (msg_size <= 0)
{
printk(KERN_ERR "Invalid msg size of: %d\n", msg_size);
return;
}
if (msg_size > 1024)
{
printk(KERN_ERR "Msg size of: %d larger than 1024!\n", msg_size);
msg_size = 1024;
}
if (msg_size > 0)
{
printk(KERN_INFO "Sending number of bytes: %d\n", msg_size);
skb_out = nlmsg_new(msg, 0);
if (!skb_out)
{
printk(KERN_ERR "Failed to allocate new skb\n");
return;
}
NETLINK_CB(skb_out).dst_group = 0; /* not in mcast group */
send_nlh = nlmsg_put(skb_out, 0, 0, NLMSG_DONE, msg_size, 0);
if (send_nlh == NULL)
{
printk(KERN_INFO "Error in creating nlmsg_put!\n");
return;
}
memcpy(nlmsg_data(send_nlh), msg, msg_size);
res = netlink_unicast(nl_sk, skb_out, pid, MSG_DONTWAIT);
if (res < 0)
{
printk(KERN_INFO "Error while sending back to user\n");
}
}
}
nl_recv_msg代码如下:
static void nl_recv_msg(struct sk_buff *skb)
{
char *nlmsg_data_ptr;
char *data;
struct nlmsghdr *nlh;
int sign_length = 0;
char *reply_status_msg;
if (skb == NULL) {
printk(KERN_INFO "NULL skb found!\n");
return;
}
nlh = (struct nlmsghdr *)skb->data;
pid = nlh->nlmsg_pid; /* pid of sending process */
nlmsg_data_ptr = (char *)nlmsg_data(nlh);
if (nlmsg_data_ptr == NULL) {
printk(KERN_INFO "NULL nlmsg_data_ptr found!\n");
return;
}
printk(KERN_INFO "Received msg payload: \"%s\", length: \"%d\"\n", nlmsg_data_ptr, (int)strlen(nlmsg_data_ptr));
if (strncmp(nlmsg_data_ptr, "on", strlen("on")) == 0)
{
filter_on = true;
printk(KERN_INFO "Turn on filter!\n");
}
else if (strncmp(nlmsg_data_ptr, "off", strlen("off")) == 0)
{
filter_on = false;
printk(KERN_INFO "Turn off filter!\n");
}
else
{
printk(KERN_INFO "Unknown command!\n");
}
}