kernel version:3.13.1skb_buffer的分配(Device为虚拟机Intel e1000)1.e1000_clean_rx_irq/** * e1000_clean_rx_irq - Send received data up the network stack; legacy * @adapter: board private structure * @rx_ring: ring to clean * @work_done: amount of napi work completed this call * @work_to_do: max amount of work allowed for this call to do */static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int *work_done, int work_to_do){while (rx_desc->status & E1000_RXD_STAT_DD) { struct sk_buff *skb; skb = buffer_info->skb; buffer_info->skb = NULL; length = le16_to_cpu(rx_desc->length); skb_put(skb, length); } cleaned_count = E1000_DESC_UNUSED(rx_ring); if (cleaned_count) adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);}2.e1000_alloc_rx_buffers/** * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended * @adapter: address of board private structure **/static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int cleaned_count){ buffer_info = &rx_ring->buffer_info[i]; skb = buffer_info->skb; skb = netdev_alloc_skb_ip_align(netdev, bufsz);}tcp_data_queue()---->tcp_try_rmem_schedule()---->sk_rmem_schedule()---->__sk_rmem_schedule()1.tcp_protstruct proto tcp_prot = { .enter_memory_pressure = tcp_enter_memory_pressure, .sockets_allocated = &tcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure,/*static void tcp_init_mem(void){ unsigned long limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); sysctl_tcp_mem[0] = limit / 4 * 3; sysctl_tcp_mem[1] = limit; sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;}*/ .sysctl_rmem = sysctl_tcp_rmem,}/** * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated * @sk: socket * @size: memory size to allocate * @kind: allocation type * * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means * rmem allocation. This function assumes that protocols which have * memory_pressure use sk_wmem_queued as write buffer accounting. */int __sk_mem_schedule(struct sock *sk, int size, int kind){ struct proto *prot = sk->sk_prot; int amt = sk_mem_pages(size); long allocated; int parent_status = UNDER_LIMIT; sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;/*static inline longsk_memory_allocated_add(struct sock *sk, int amt, int *parent_status){ struct proto *prot = sk->sk_prot; return atomic_long_add_return(amt, prot->memory_allocated);}*/ allocated = sk_memory_allocated_add(sk, amt, &parent_status); /* Under limit. */ if (parent_status == UNDER_LIMIT && allocated sk_leave_memory_pressure(sk); return 1; }/*static inline long sk_prot_mem_limits(const struct sock *sk, int index){ long *prot = sk->sk_prot->sysctl_mem; return prot[index];}*/ /* Under pressure. (we or our parents) */ if ((parent_status > SOFT_LIMIT) || allocated > sk_prot_mem_limits(sk, 1)) sk_enter_memory_pressure(sk); /* Over hard limit (we or our parents) */ if ((parent_status == OVER_LIMIT) || (allocated > sk_prot_mem_limits(sk, 2))) goto suppress_allocation; /* guarantee minimum buffer size under pressure */ if (kind == SK_MEM_RECV) { if (atomic_read(&sk->sk_rmem_alloc) sysctl_rmem[0]) return 1; } if (sk_has_memory_pressure(sk)) { int alloc; if (!sk_under_memory_pressure(sk)) return 1; alloc = sk_sockets_allocated_read_positive(sk); if (sk_prot_mem_limits(sk, 2) > alloc *sk_mem_pages(sk->sk_wmem_queued +atomic_read(&sk->sk_rmem_alloc) + sk->sk_forward_alloc)) return 1; }suppress_allocation: /* Alas. Undo changes. */ sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; sk_memory_allocated_sub(sk, amt); return 0;}记录:1.网卡分配skb_buffer空间,只要有数据就会分配skb_buffer 2.__sk_mem_schedule通过各种预设值限定sk->sk_forward_alloc 3.在TCP通过各种预设值的计算达到限定接收窗口的大小以限定网卡的接收数据(两边套在一起)--------------------------------------------------------------------https://segmentfault.com/a/1190000021488819sk上还有两个字段十分重要,sk->sk_rmem_alloc表示当前已经使用的接收缓冲区内存,sk->sk_forward_alloc表示预先向内核分配到的内存。这两个字段有什么关系呢? 打个比方,sk->sk_forward_alloc就好比充值的点卡,sk->sk_rmeme_alloc则用来记录实际花销。当需要花费内存时,内核总是先看sk->sk_forward_alloc有没有,如果没有,则向系统申请内存,放到sk->sk_forward_alloc里,之后再花费时,发现sk->sk_forward_alloc还有,就直接扣这里面的就行了。而当sk->sk_forward_alloc花光时,则又会重新充值。kernel version:3.13.1static void tcp_data_queue(struct sock *sk, struct sk_buff *skb){ int eaten = -1; if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { if (tcp_receive_window(tp) == 0) goto out_of_window; /* Ok. In sequence. In window. */ if (tp->ucopy.task == current && tp->copied_seq == tp->rcv_nxt && tp->ucopy.len && sock_owned_by_user(sk) && !tp->urg_data) { int chunk = min_t(unsigned int, skb->len,tp->ucopy.len); if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) { tp->ucopy.len -= chunk; tp->copied_seq += chunk; eaten = (chunk == skb->len); tcp_rcv_space_adjust(sk); } } if (eatenqueue_and_out: //eaten的值以及的if条件句,会存在tcp_try_rmem_schedule还未对sk->sk_forward_alloc //分配空间,就在tcp_queue_rcv函数里面操作,而导致sk->sk_forward_alloc变为负数。 if (eaten && tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto drop; eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); }......}kernel version:4.14static void tcp_data_queue(struct sock *sk, struct sk_buff *skb){ int eaten; if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { if (tcp_receive_window(tp) == 0) goto out_of_window;queue_and_out: if(skb_queue_len(&sk->sk_receive_queue) == 0) sk_forced_mem_schedule(sk,skb->truesize); else if(tcp_try_rmeme_schedule(sk,skb,skb->truesize)) goto drop; eaten = tcp_queue_rcv(sk,skb,0,&fragstole); ..... } ......} 11-20 14:32