我的Linux 3.8.8内核会在内核中遍历tcp sk_write_queue时出现错误,如下所示:
BUG: unable to handle kernel paging request at 00b85055
我在内核中的代码逻辑如下:
在步骤(5)中,有时会触发上面的Oops,问题代码为:
skb_is_nonlinear(skb);
这意味着skb指针现在无效或为null,lock_sock不能保护遍历过程吗?
代码:
asmlinkage long sys_tcp_get(int fd, int *id)
{
int err, i, j;
struct socket *sock;
struct sock *sk;
struct sk_buff_head *queue;
struct sk_buff *skb;
unsigned char buf[4] = {0};
sock = sockfd_lookup(fd, &err);
if (sock == NULL) return -1;
sk = sock->sk;
if (sk == NULL) {
err = -1;
goto free_socket;
}
lock_sock(sk);
if (((1 << sk->sk_state) & ~TCPF_ESTABLISHED) ||
(sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))) {
err = -1;
goto release_socket;
}
queue = &(sk->sk_write_queue);
if (tcp_send_head(sk) == NULL) {
err = -1;
goto release_socket;
}
for(skb = tcp_send_head(sk); skb != (struct sk_buff *)(queue); skb = skb->next) {
if (skb_is_nonlinear(skb)) {
err = -1;
goto release_socket;
}
i = 0;
j = 0;
while(i < skb->len) {
buf[j++] = skb->data[i++];
if (j == 4) {
if (buf[0] == 1 && buf[1] == 2 && buf[2] == 3) {
*id = buf[3];
err = 0;
goto release_socket;
} else {
j = 0;
}
}
}
}
release_socket:
release_sock(sk);
free_socket:
sockfd_put(sock);
return err;
}