通过netlink从内核到用户空间的单播数据失败

时间:2015-12-03 12:56:42

标签: c module linux-kernel netlink

我是内核开发的新手,在通过netlink套接字将数据从内核空间转换为用户空间时遇到了问题。 send_nat()函数将从内核模块调用,以将用户定义的 struct nat_mntr 写入netlink套接字。但是 nlmsg_unicast()一直失败,即使我在 cfg 中尝试了不同的配置。请帮我找出代码中的错误。

userdefined.c

    int no_data_request = 1;
    EXPORT_SYMBOL(no_data_request);
    int request_pid = 0; // PID of requesting process
    EXPORT_SYMBOL(request_pid);

void send_nat(struct sock *nl_sk, struct nat_mntr *nat_data, int pid, int group, gfp_t flags, int *sock_closed){

        struct nlmsghdr *nlh;
        struct sk_buff *skb_out;
        int msg_size;
        int res = 200;
        #define MYPROTO 31

        printk(KERN_DEBUG "%s: Entered \n", __FUNCTION__);
        if ( nat_data == NULL ){
                printk(KERN_DEBUG "%s: nat_data is NULL: Leaving \n", __FUNCTION__);
                return ;
        }else {
                printk(KERN_DEBUG "%s: nat_data is filled \n", __FUNCTION__);
        }

        if (nl_sk == NULL) {

                printk(KERN_DEBUG "%s: nl_sk is NULL  \n", __FUNCTION__);
        }else
                printk(KERN_DEBUG "%s: nl_sock is not null \n", __FUNCTION__);

        if (*sock_closed == 1) {

                printk(KERN_DEBUG "%s: sock_closed == 1, creating socket \n", __FUNCTION__);
                struct netlink_kernel_cfg cfg  = {
                        .groups         = 1,
                        .input          = rr,
                };
                nl_sk  = netlink_kernel_create(&init_net, MYPROTO, &cfg);

                if (!nl_sk) {
                        printk(KERN_DEBUG "%s: Error creating socket: sock_closed = %d:  Leaving  \n",  __FUNCTION__ , *sock_closed);
                        return ;
                }
                else {
                        *sock_closed = 0;
                        printk(KERN_DEBUG "%s: Socket created successfully: sock_closed = %d  \n",  __FUNCTION__ , *sock_closed );
                }
        }
        else if (*sock_closed == 0 ){
                printk(KERN_DEBUG "%s:Already created socket.  sock_closed = 0 \n",  __FUNCTION__);
        }
        else {
                printk(KERN_DEBUG "%s: sock_closed status is unknown: sock_closed = %d Leaving  \n",  __FUNCTION__ , *sock_closed);
                return ;
        }

        if( no_data_request){
                printk(KERN_DEBUG "%s: No one has requested data: Leaving  \n",  __FUNCTION__);
                return ;
        }else{
                printk(KERN_DEBUG "%s: Process %d requested the data\n",  __FUNCTION__, request_pid );
        }
        msg_size = sizeof(struct nat_mntr);
        skb_out = nlmsg_new(msg_size, 0);

        if ( !skb_out ) {
                printk(KERN_DEBUG "%s: Failed to skb_out = nlmsg_new(msg_size, 0): Leaving \n",  __FUNCTION__);
                return;
        }

        nlh = nlmsg_put(skb_out, 0, 0, NLMSG_DONE, msg_size, 0); /* NLMSG_DONE */
        NETLINK_CB(skb_out).dst_group = 0; /* not in mcast group */

        if (!nlh) {
                printk(KERN_DEBUG "%s: Failed nlh = nlmsg_put(skb_out, 0, 1,  NLMSG_DONE, msg_size, 0): Leaving  \n",  __FUNCTION__);
                return ;
        }
        else {
                printk(KERN_DEBUG "%s: Successfull nlh = nlmsg_put(skb_out, 0, 0,  NLMSG_DONE, msg_size, 0)  \n",  __FUNCTION__);
        }

        if(memcpy(nlmsg_data(nlh), nat_data , sizeof(nat_data) ) == NULL ) {
                printk(KERN_DEBUG "%s: Failed to memcpy(nlmsg_data(nlh), nat_data , sizeof(struct nat_mntr)) Leaving  \n",  __FUNCTION__);
                return ;
        }
        res =  nlmsg_unicast(nl_sk, skb_out2, request_pid);
        if (res < 0 ){
                printk(KERN_DEBUG "%s: Failed to  nlmsg_unicast(nl_sk, skb_out, request_pid): Leaving \n",  __FUNCTION__);
                return ;
        }
        printk(KERN_DEBUG "%s: Data sent successfully : Leaving \n",  __FUNCTION__);
}


// Callback of kernel socket. 
void rr(struct sk_buff *skb){
        printk(KERN_DEBUG "%s: Entered \n", __FUNCTION__);
        struct nlmsghdr *nlh;
        nlh = (struct nlmsghdr *)skb->data;
        printk(KERN_DEBUG "Request received \n");
        request_pid = nlh->nlmsg_pid; /* pid of sending process */
        no_data_request = 0; // Someone is out there
        printk(KERN_DEBUG "%s: Leaving:\n", __FUNCTION__);
}

kernel_module.c

#define NAT_GROUP 21
struct sock *nl_sk_ud = NULL;
EXPORT_SYMBOL(nl_sk_ud);
int sock_closed = 1;
EXPORT_SYMBOL(sock_closed);
struct nat_mntr *data = NULL;
EXPORT_SYMBOL(data);

any_kernel_function(){

....

data = get_info(skb, 0, l3proto, l4proto, &target, mtype); // Returns pointer to struct nat_mntr
send_nat(nl_sk_ud, data,  0, NAT_GROUP, 0, &sock_closed);

....

}

1 个答案:

答案 0 :(得分:1)

如果您的内核模块写了一个答案,那么用户空间请求将收到两个响应:一个ACK(由内核自动制作)和实际响应。

我认为人们没有注意到这一点,因为通常内核模块会在ACK之前快速回答。因此,用户空间客户端接收答案并忽略接下来的任何内容(包括ACK)直到下一个请求。

在您的代码中,内核模块不会立即回答。它等待数据可用并稍后提取。这可能发生了:

  1. 用户空间客户端发送请求。
  2. 内核模块存储pid。
  3. Linux向客户端回复确认。
  4. 客户端收到答案,错误地将其解析为伪造的nat_mntr而不是ACK,然后关闭套接字。
  5. 内核模块在获得数据后发送答案。 nlmsg_unicast()返回错误代码-111,因为客户端不再侦听。
  6. 解决此问题的一种方法是让客户端期望两个数据包 - 并忽略第一个数据包,即ACK。

    BTW:这不是你代码的唯一问题。

    • 当你这样做时

      nl_sk = netlink_kernel_create(&amp; init_net,MYPROTO,&amp; cfg);

    您将套接字分配给本地变量。如果再次调用该函数,即使nl_sk will为0,sock_closed仍未初始化。

    • 您永远不会取消分配套接字。因为您无法在同一协议上在内核空间中打开两个套接字,所以后续套接字创建将无法完全失败,直到您重新启动。 (例如,如果您需要重新编译,这将对您产生影响。)
    • 代码很活泼。至少,no_data_requestrequest_pid应该是原子整数。
    • 不要这样做:

      nlh =(struct nlmsghdr *)skb-&gt; data;

    这样更好:

    nlh = nlmsg_hdr(skb);
    

    这甚至更好(因为它为您做了一些验证和Netlink文书工作):

    netlink_rcv_skb(skb, &rr2);