运行超过12小时后,Net Netnet的Netfilter崩溃

时间:2015-11-06 01:58:55

标签: linux crash kernel netfilter netlink

我获得了一个Linux Ubuntu崩溃报告,用于运行内核netfilter,通过netlink通道进行用户通信大约12个小时。 netlink用作警报通道,它会定期(大约5分钟)从内核netfilter向用户应用程序发送消息。

崩溃报告如下:

ProblemType: KernelOops
Annotation: Your system might become unstable now and might need to be restarted.
Date: Fri Nov  6 04:43:48 2015
Failure: oops
OopsText:
 NMI watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [Xorg:2178]
 Modules linked in: netfilter(OE) bnep rfcomm bluetooth bridge stp llc joydev ipmi_ssif
ipmi_devintf intel_rapl iosf_mbi x86_pkg_temp_thermal intel_powerclamp coretemp kvm
dm_multipath scsi_dh crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel
aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd ioatdma sb_edac edac_core lpc_ich
mei_me mei wmi shpchp 8250_fintek ipmi_si ipmi_msghandler acpi_power_meter acpi_pad
mac_hid parport_pc ppdev lp parport vxlan ip6_udp_tunnel udp_tunnel hid_generic usbhid
hid igb tg3 i2c_algo_bit ahci dca ptp libahci pps_core dm_mirror dm_region_hash dm_log

 CPU: 0 PID: 2178 Comm: Xorg Tainted: G      D W IOE  3.19.0-31-generic #36~14.04.1-Ubuntu
 Hardware name: Intel Corporation S2600CW/S2600CW, BIOS SE5C610.86B.01.01.0008.021120151325 02/11/2015
 task: ffff881024af1d70 ti: ffff88101dd28000 task.ti: ffff88101dd28000
 RIP: 0010:[<ffffffff817b69bf>]  [<ffffffff817b69bf>] _raw_spin_lock+0x2f/0x60
 RSP: 0018:ffff88101dd2bb20  EFLAGS: 00000202
 RAX: 0000000000000c19 RBX: 0000000000000000 RCX: 0000000000004808
 RDX: 0000000000004806 RSI: 0000000000004808 RDI: ffff880f111c29a0
 RBP: ffff88101dd2bc28 R08: 000000000000000e R09: ffff88101dd2bfd8
 R10: ffff880f150a1e00 R11: 000000000003fc18 R12: ffff880f10380700
 R13: 0000000000008ec0 R14: 0000000000000618 R15: 0000000000000000
 FS:  00007f8743a1b8c0(0000) GS:ffff88103e400000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f40240d8000 CR3: 000000101dd5f000 CR4: 00000000001407f0
 Stack:
  ffffffff81749d22 dead000000200200 ffff88101dd7ed40 ffff881024af1d70
  ffff88101dd2bbe8 ffff881024af1d70 ffff880f111c2958 0000000000000001
  dead000000000000 dead000000000000 ffff880f150a1e00 ffff880f111c29a0
 Call Trace:
  [<ffffffff81749d22>] ? unix_stream_recvmsg+0x152/0x8d0
  [<ffffffff813527f1>] ? aa_sock_msg_perm+0x81/0x150
  [<ffffffff812008c0>] ? poll_select_copy_remaining+0x130/0x130
  [<ffffffff8168f1c2>] sock_recvmsg+0xa2/0xd0
  [<ffffffff812008c0>] ? poll_select_copy_remaining+0x130/0x130
  [<ffffffff812008c0>] ? poll_select_copy_remaining+0x130/0x130
  [<ffffffff8168ee5e>] ? copy_msghdr_from_user+0x15e/0x1f0
  [<ffffffff812008c0>] ? poll_select_copy_remaining+0x130/0x130
  [<ffffffff8168efe3>] ___sys_recvmsg+0xf3/0x230
  [<ffffffff813aed80>] ? timerqueue_add+0x60/0xb0
  [<ffffffff810dd189>] ? enqueue_hrtimer+0x29/0x90
  [<ffffffff810dd768>] ? __hrtimer_start_range_ns+0x278/0x3a0
  [<ffffffff81690402>] __sys_recvmsg+0x42/0x80
  [<ffffffff81690452>] SyS_recvmsg+0x12/0x20
  [<ffffffff817b6dcd>] system_call_fastpath+0x16/0x1b
 Code: 00 b8 00 00 02 00 f0 0f c1 07 89 c2 c1 ea 10 66 39 c2 75 01 c3 89 d1 0f b7 f2 b8 00 80 00 00 eb 0a 0f 1f 00 f3 90 83 e8 01 74 20 <0f> b7 17 41 89 d0 41 31 c8 41 81 e0 fe ff 00 00 75 e7 55 0f b7 

Package: linux-image-3.19.0-31-generic 3.19.0-31.36~14.04.1
SourcePackage: linux
Tags: kernel-oops
Uname: Linux 3.19.0-31-generic x86_64

您是否有助于解释崩溃的原因!

这是我的netfilter代码:

int init_module()
{       
    nfho.hook = (void *)&hook_func;

    nfho.hooknum = 0; 
    nfho.pf = PF_INET; 
    nfho.priority = NF_IP_PRI_FIRST; 

    nf_register_hook(&nfho);

    sub_init();

    return 0;
}

static int __init sub_init(void)
{
    struct netlink_kernel_cfg cfg = 
        {
            .input = nl_recv_msg,
        };

    nl_sk = netlink_kernel_create(&init_net, NETLINK_USER, &cfg);
    if (!nl_sk)
    {
        printk(KERN_ALERT "Error creating socket.\n");
        return -1;
    }

    return 0;
}

// Function to be called by hook
static unsigned int hook_func(unsigned int hooknum, struct sk_buff *skb, 
    const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
{
    struct iphdr *iph = NULL;
    bool rv;
    char msg[256];

    iph = ip_hdr(skb);

    if ((iph != NULL) && (iph->protocol == IPPROTO_TCP) && (iph->version == 4) && (sizeof(struct iphdr) < 256))
    {
            memcpy(msg, iph, sizeof(struct iphdr));
            send_msg(msg, sizeof(struct iphdr));    

            return NF_DROP;             
    } 

    return NF_ACCEPT;
}

    return NF_ACCEPT;
}

static void send_msg(unsigned char *msg, int msg_size)
{
    int res;
    struct nlmsghdr *send_nlh = NULL;   

    if (msg_size <= 0)
    {
        printk(KERN_ERR "Invalid msg size of: %d\n", msg_size);
        return;
    }

    if (msg_size > 1024)
    {
        printk(KERN_ERR "Msg size of: %d larger than 1024!\n", msg_size);
        msg_size = 1024;
    }

    if (msg_size > 0)
    {
        printk(KERN_INFO "Sending number of bytes: %d\n", msg_size);

        skb_out = nlmsg_new(msg, 0);
        if (!skb_out)
        {
            printk(KERN_ERR "Failed to allocate new skb\n");
            return;
        }
        NETLINK_CB(skb_out).dst_group = 0;  /* not in mcast group */

        send_nlh = nlmsg_put(skb_out, 0, 0, NLMSG_DONE, msg_size, 0);
        if (send_nlh == NULL)
        {
            printk(KERN_INFO "Error in creating nlmsg_put!\n");
            return;
        }

        memcpy(nlmsg_data(send_nlh), msg, msg_size);

        res = netlink_unicast(nl_sk, skb_out, pid, MSG_DONTWAIT);
        if (res < 0)
        {
            printk(KERN_INFO "Error while sending back to user\n");
        }                       
    }
}

nl_recv_msg代码如下:

static void nl_recv_msg(struct sk_buff *skb)
{
    char *nlmsg_data_ptr;
    char *data; 
    struct nlmsghdr *nlh;
    int sign_length = 0;
    char *reply_status_msg;

    if (skb == NULL) {
        printk(KERN_INFO "NULL skb found!\n");
        return;
    }

    nlh = (struct nlmsghdr *)skb->data;         
    pid = nlh->nlmsg_pid; /* pid of sending process */  
    nlmsg_data_ptr = (char *)nlmsg_data(nlh);

    if (nlmsg_data_ptr == NULL) {
        printk(KERN_INFO "NULL nlmsg_data_ptr found!\n");
        return;
    }

    printk(KERN_INFO "Received msg payload: \"%s\", length: \"%d\"\n", nlmsg_data_ptr, (int)strlen(nlmsg_data_ptr));

    if (strncmp(nlmsg_data_ptr, "on", strlen("on")) == 0)
    {
        filter_on = true;

        printk(KERN_INFO "Turn on filter!\n");
    }
    else if (strncmp(nlmsg_data_ptr, "off", strlen("off")) == 0)
    {
        filter_on = false;      
        printk(KERN_INFO "Turn off filter!\n");
    }
    else
    {
        printk(KERN_INFO "Unknown command!\n");
    }
}

0 个答案:

没有答案