Note:以下学习基于kernel 3.13.1与自己学习碰到的条件情况debug1.tcp_v4_rcv()/* * From tcp_input.c */int tcp_v4_rcv(struct sk_buff *skb){ sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); if (!sk) goto no_tcp_socket; if (!sock_owned_by_user(sk)) { { if (!tcp_prequeue(sk, skb)) --------recv情况(1) ret = tcp_v4_do_rcv(sk, skb);--------recv情况(2) } } else if (unlikely(sk_add_backlog(sk, skb,sk->sk_rcvbuf + sk->sk_sndbuf))) ------recv情况(3) sock_put(sk); return ret;}static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb,const __be16 sport,const __be16 dport){ struct sock *sk = skb_steal_sock(skb);{static inline struct sock *skb_steal_sock(struct sk_buff *skb){ if (skb->sk) { struct sock *sk = skb->sk; skb->destructor = NULL; skb->sk = NULL; return sk; } return NULL;}}}----------------------------------------------------------recv情况(1)/* Packet is added to VJ-style prequeue for processing in process * context, if a reader task is waiting. Apparently, this exciting * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) * failed somewhere. Latency? Burstiness? Well, at least now we will * see, why it failed. 8)8) --ANK * */bool tcp_prequeue(struct sock *sk, struct sk_buff *skb){ struct tcp_sock *tp = tcp_sk(sk); if (sysctl_tcp_low_latency || !tp->ucopy.task) return false; if (skb->len skb_queue_len(&tp->ucopy.prequeue) == 0) return false; __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; if (tp->ucopy.memory > sk->sk_rcvbuf) { struct sk_buff *skb1; while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { sk_backlog_rcv(sk, skb1); } tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible_sync_poll(sk_sleep(sk),POLLIN | POLLRDNORM | POLLRDBAND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,(3 * tcp_rto_min(sk)) / 4,TCP_RTO_MAX); } return true;}Note:(1)通过 __skb_queue_tail(&tp->ucopy.prequeue, skb);把skb buffer加入tp->ucopy.prequeue列表 (2)wake_up_interruptible_sync_poll唤醒tcp_recvmasg读取信息----------------------------------------------------------recv情况(2)1.tcp_v4_do_rcv()/* The socket must have it's spinlock held when we get * here. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. * This is because we cannot sleep with the original spinlock * held. */int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb){ struct sock *rsk; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst = sk->sk_rx_dst; sock_rps_save_rxhash(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, 0) == NULL) { dst_release(dst); sk->sk_rx_dst = NULL; } } tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); return 0; }}2.tcp_queue_rcv()static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, bool *fragstolen){ int eaten; struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue); __skb_pull(skb, hdrlen); eaten = (tail &&tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (!eaten) { __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); } return eaten;}----------------------------------------------------------recv情况(3)------完全一点都没有整明白=-=----------------------------------------------------------1.tcp_recvmsg()/* * This routine copies from a sock struct into the user buffer. * * Technical note: in 2.3 we work on _locked_ socket, so that * tricks with *seq access order and skb->users are not required. * Probably, code can be easily improved even more. */int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len){ seq = &tp->copied_seq; //u32 copied_seq; /* Head of yet unread data */ if (flags & MSG_PEEK) { peek_seq = tp->copied_seq; seq = &peek_seq; } do { u32 offset; /* Next get a buffer. */ skb_queue_walk(&sk->sk_receive_queue, skb) { /* Now that we have two receive queues this * shouldn't happen. */ if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n", *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags)) break; offset = *seq - TCP_SKB_CB(skb)->seq; if (tcp_hdr(skb)->syn) offset--; if (offset len) goto found_ok_skb; } if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { /* Install new reader */ tp->ucopy.len = len; /* Ugly... If prequeue is not empty, we have to * process it before releasing socket, otherwise * order will be broken at second iteration. * More elegant solution is required!!! * * Look: we have the following (pseudo)queues: * * 1. packets in flight * 2. backlog * 3. prequeue * 4. receive_queue * * Each queue can be processed only if the next ones * are empty. At this point we have empty receive_queue. * But prequeue _can_ be not empty after 2nd iteration, * when we jumped to start of loop because backlog * processing added something to receive_queue. * We cannot release_sock(), because backlog contains * packets arrived _after_ prequeued ones. * * Shortly, algorithm is clear --- to process all * the queues in order. We could make it more directly, * requeueing packets from backlog to prequeue, if * is not empty. It is more elegant, but eats cycles, * unfortunately. */ if (!skb_queue_empty(&tp->ucopy.prequeue)) goto do_prequeue; /* __ Set realtime policy in scheduler __ */ } if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); } else sk_wait_data(sk, &timeo); if (user_recv) { int chunk; /* __ Restore normal policy in scheduler __ */ if ((chunk = len - tp->ucopy.len) != 0) { NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); len -= chunk; copied += chunk; } if (tp->rcv_nxt == tp->copied_seq && !skb_queue_empty(&tp->ucopy.prequeue)) {do_prequeue: tcp_prequeue_process(sk); if ((chunk = len - tp->ucopy.len) != 0) { len -= chunk; copied += chunk; } } } continue; found_ok_skb: /* Ok so how much can we use? */ used = skb->len - offset;//offset = *seq - TCP_SKB_CB(skb)->seq; if (len used = len; if (!(flags & MSG_TRUNC)) { { err = skb_copy_datagram_iovec(skb, offset,msg->msg_iov, used); if (err) { /* Exception. Bailout! */ if (!copied) copied = -EFAULT; break; } } } *seq += used; copied += used; len -= used; tcp_rcv_space_adjust(sk); } while (len > 0); release_sock(sk); return copied;}EXPORT_SYMBOL(tcp_recvmsg);2.tcp_prequeue_process()static void tcp_prequeue_process(struct sock *sk){ /* RX process wants to run with disabled BHs, though it is not necessary */ local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk_backlog_rcv(sk, skb); local_bh_enable(); /* Clear memory counter. */ tp->ucopy.memory = 0;}tcp_prequeue(sk, skb) --------recv情况(1)====>延迟接收数据(1)tcp_recvmsg()----> sk_wait_data();睡眠等待数据的到来(2)tcp_v4_rcv()---->tcp_prequeue()--->__skb_queue_tail()/wake_up_interruptible_sync_poll();添加数据并唤醒tcp_recvmsg()(3)tcp_recvmsg()---->if (!skb_queue_empty(&tp->ucopy.prequeue)) goto do_prequeue;---->tcp_prequeue_process()---->tcp_do_v4_rcv()---->tcp_rcv_established()---->tcp_queue_rcv()接收数据tcp_v4_do_rcv(sk, skb)--------recv情况(2)====>立即接收数据(1)tcp_recvmsg()----> sk_wait_data();睡眠等待数据的到来(2)tcp_v4_rcv()---->tcp_do_v4_rcv()---->tcp_rcv_established()---->tcp_queue_rcv()接收数据