Bridge转发逻辑

——lvyilong316

本文主要介绍linux bridge的转发流程,以及bridge设计的几个hook点。首先看一张完整的转发图。

Bridge转发逻辑-LMLPHP

下面逐一看每个函数的分析

netif_receive_skb:网卡接收函数

/net/core/dev.c


点击(此处)折叠或打开

  1. int netif_receive_skb(struct sk_buff *skb)
  2. {
  3.     //
  4. skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
  5.     //
  6. }


handle_bridge:网桥处理函数

/net/core/dev.c


点击(此处)折叠或打开

  1. static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
  2.                         struct packet_type **pt_prev, int *ret,
  3.                         struct net_device *orig_dev)
  4. {
  5.        struct net_bridge_port *port;
  6.        if (skb->pkt_type == PACKET_LOOPBACK ||
  7.            (port = rcu_dereference(skb->dev->br_port)) == NULL)// A注意这个判断
  8.            return skb;
  9.   
  10.        if (*pt_prev) { //一般来说pt_prev为NULL
  11.            *ret = deliver_skb(skb, *pt_prev, orig_dev);
  12.            *pt_prev = NULL;
  13.        }
  14.        //调用bridge挂载函数,改函数在bridge模块装载时初始化
  15.        return br_handle_frame_hook(port, skb);
  16. }
  17. br_handle_frame_hook函数在bridge模块装载时初始化
  18. /* net/bridge/br.c */
  19. static int __init br_init(void)
  20. {
  21.     //......
  22.     br_handle_frame_hook = br_handle_frame;
  23.     //......
  24.   }


br_handle_frame

/net/bridge/br_input.c


点击(此处)折叠或打开

  1. struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
  2. {
  3.     const unsigned char *dest = eth_hdr(skb)->h_dest;
  4.     int (*rhook)(struct sk_buff *skb);
  5.     //
  6.     if (unlikely(is_link_local(dest))) {//如果是本地多播地址(形如:01:80:c2:00:00:0X)
  7.     //
  8.     // 自身包进入PF_BEIDGE的INPUT点, 一般处理的包数不多
  9.         if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
  10.                 NULL, br_handle_local_finish))
  11.             return NULL; /* frame consumed by filter */
  12.         else
  13.             return skb; /* continue processing */
  14.     }
  15. //进入转发逻辑
  16. forward:
  17.     switch (p->state) {
  18.     case BR_STATE_FORWARDING:
  19.         rhook = rcu_dereference(br_should_route_hook);
  20.         if (rhook != NULL) {
  21.             if (rhook(skb))
  22.                 return skb;
  23.             dest = eth_hdr(skb)->h_dest;
  24.         }
  25.         /* fall through */
  26.     case BR_STATE_LEARNING://如果数据包的目的mac为bridge的mac
  27.         if (!compare_ether_addr(p->br->dev->dev_addr, dest))
  28.             skb->pkt_type = PACKET_HOST;
  29.         //bridge的PRE_ROUTING
  30.         NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
  31.             br_handle_frame_finish);
  32.         break;
  33.     default:
  34. drop:
  35.         kfree_skb(skb);
  36.     }
  37.     return NULL;
  38. }


br_handle_frame_finish:这个函数完成更新mac表、查找mac表确定出口dev

/net/bridge/br_input.c


点击(此处)折叠或打开

  1. int br_handle_frame_finish(struct sk_buff *skb)
  2. {
  3.     const unsigned char *dest = eth_hdr(skb)->h_dest;
  4.     struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
  5.     struct net_bridge *br;
  6.     struct net_bridge_fdb_entry *dst;
  7.     struct sk_buff *skb2;
  8.     /* insert into forwarding database after filtering to avoid spoofing */
  9.     br = p->br;
  10.     br_fdb_update(br, p, eth_hdr(skb)->h_source);
  11.     /* The packet skb2 goes to the local host (NULL to skip). */
  12.     skb2 = NULL;
  13.     if (br->dev->flags & IFF_PROMISC) //如果网桥设备被设置为混杂模式
  14.         skb2 = skb;
  15.     dst = NULL;
  16.     if (is_multicast_ether_addr(dest)) {//如果是多播(首字节为x01)
  17.         br->dev->stats.multicast++;
  18.         skb2 = skb;
  19.     } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { //如果目的mac为本机mac
  20.         skb2 = skb;
  21.         /* Do not forward the packet since it's local. */
  22.         skb = NULL; //skb2为要发往本机上层协议栈的,skb为要转发的
  23. }
  24.    
  25.     if (skb2 == skb)
  26.         skb2 = skb_clone(skb, GFP_ATOMIC);
  27.     if (skb2) //发往本机上层协议栈
  28.         br_pass_frame_up(br, skb2);
  29.     if (skb) { //转发
  30.         if (dst)
  31.             br_forward(dst->dst, skb);
  32.         else
  33.             br_flood_forward(br, skb);
  34.     }
  35. out:
  36.     return 0;
  37. drop:
  38.     kfree_skb(skb);
  39.     goto out;
  40. }


br_pass_frame_up发往本地

// net/bridge/br_input.c


点击(此处)折叠或打开

  1. static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
  2. {
  3.     struct net_device *indev, *brdev = br->dev;
  4.     brdev->stats.rx_packets++;
  5.     brdev->stats.rx_bytes += skb->len;
  6.     indev = skb->dev;
  7.     skb->dev = brdev;
  8.     //bridge的LOCAL_IN
  9.     NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
  10.         netif_receive_skb);
  11. }


    这段代码非常简单,对net_bridge的数据统计进行更新以后,再更新skb->dev,最后通过NF_HOOKNF_BR_LOCAL_IN挂接点上调用回了netif_receive_skb

    前面已经提到,在netif_receive_skb函数中,调用了handle_bridge函数,并且触发了网桥的处理流程,现在发往网桥虚拟设备的数据包又回到了netif_receive_skb,那么网桥的处理过程会不会又被调用到呢?

     linux/net/bridge/br_if.c里面可以看到br_add_if函数,实际上的操作是将某一网口(dev)加入网桥组,这个函数调用了new_nbp(br, dev); 用以填充net_bridge以及dev结构的重要成员,里面将dev->br_port(这里dev是加入bridgedev而不是bridge自身对应的dev)设定为一个新建的net_bridge_port结构。而上面的br_pass_frame_up函数将skb->dev赋成了br->dev,实际上skb->dev变成了网桥建立的虚拟设备(bridge自身对应的dev),这个设备是网桥本身而不是桥组的某一端口,系统没有为其调用br_add_if,所以这个net_device结构的br_port指针没有进行赋值。

     handle_bridge中有这样的检查

if (skb->pkt_type == PACKET_LOOPBACK ||

           (port = rcu_dereference(skb->dev->br_port)) == NULL)

              return skb;

    经过br_pass_frame_up 函数后,skb->dev->br_port为空,所以将直接返回skb而不进行网桥处理。

    另外,我们看到,系统在NF_BR_LOCAL_IN挂接点上调用了netif_receive_skb,但是net_if_receive_skb还会调用ip_rcv函数,所以数据包在NF_IP_LOCAL_IN还可以被捕获到。

br_forwar转发

// net/bridge/br_forward.c


点击(此处)折叠或打开

  1. void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
  2. {
  3.     //接口检查,确认端口处于BR_STATE_FORWARDING状态,网桥允许转发,并且转发的出口和入口的dev不相等
  4.     if (should_deliver(to, skb)) {
  5.         __br_forward(to, skb);
  6.         return;
  7.     }
  8.     kfree_skb(skb);
  9. }


__br_forward

// net/bridge/br_forward.c

点击(此处)折叠或打开

  1. static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
  2. {
  3.     struct net_device *indev;
  4.     indev = skb->dev;
  5.     skb->dev = to->dev; //修改skb->dev为目的出口对应的dev
  6.     skb_forward_csum(skb); //计算校验和
  7.     //bridge的FORWARD
  8.     NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
  9.             br_forward_finish);
  10. }


br_forward_finish

点击(此处)折叠或打开

  1. int br_forward_finish(struct sk_buff *skb)
  2. { //bridge的POST_ROUTING
  3.     return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
  4.                br_dev_queue_push_xmit);
  5. }

br_dev_queue_push_xmit

// net/bridge/br_forward.c

点击(此处)折叠或打开

  1. int br_dev_queue_push_xmit(struct sk_buff *skb)
  2. {
  3.     /* drop mtu oversized packets except gso */
  4.     if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
  5.         kfree_skb(skb);
  6.     else {
  7.         /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
  8.         if (nf_bridge_maybe_copy_header(skb))
  9.             kfree_skb(skb);
  10.         else {
  11.             skb_push(skb, ETH_HLEN);
  12.             dev_queue_xmit(skb);
  13.         }
  14.     }
  15.     return 0;
  16. }

    dev_queue_xmit()会判断skb中的dev字段,根据这个字段指示的设备调用该设备的发送函数hard_start_xmit来对skb进行转发。其实到这里bridge的转发逻辑基本就完成了,但是如果目的dev依然是bridge呢,那就调用bridgehard_start_xmit,而bridgehard_start_xmit bridge初始化中由br_dev_setup设置。

/* net/bridge/br_device.c */ 

点击(此处)折叠或打开

  1. void br_dev_setup(struct net_device *dev)
  2. {
  3.      //......
  4.      dev->hard_start_xmit = br_dev_xmit;
  5.      //......
  6. }

br_dev_xmit

/* net/bridge/br_device.c */ 

/* net device transmit always called with no BH (preempt_disabled) */ 

点击(此处)折叠或打开

  1. int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
  2. {
  3.      struct net_bridge *br = netdev_priv(dev);
  4.      const unsigned char *dest = skb->data;
  5.      struct net_bridge_fdb_entry *dst;
  6.      br->statistics.tx_packets++;
  7.      br->statistics.tx_bytes += skb->len;
  8.      skb->mac.raw = skb->data;
  9.      skb_pull(skb, ETH_HLEN);
  10.      if (dest[0] & 1)
  11.      // 多播发送
  12.          br_flood_deliver(br, skb, 0);
  13.      else if ((dst = __br_fdb_get(br, dest)) != NULL) //查转发表
  14.      // 单播发送
  15.          br_deliver(dst->dst, skb);
  16.      else
  17.      // 广播发送
  18.          br_flood_deliver(br, skb, 0);
  19.      // 这些发送函数最终都会调用__br_deliver()函数
  20.      return 0;
  21. }

__br_deliver

/* net/bridge/br_forward.c */ 

点击(此处)折叠或打开

  1. static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
  2. {
  3.      skb->dev = to->dev; //设置为出口dev
  4.      // 此处是PF_BRIDGE的OUTPUT点
  5.      NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
  6.      br_forward_finish);
  7. }

    注意这里调用完成后又要调用 br_forward_finish,但这不是循环,因为__br_deliveskb->dev已经改变,下一轮调用的hard_start_xmit 也会不同。最后再看一遍全局的转发图,应该就比较清晰了。

Bridge转发逻辑-LMLPHP

09-30 14:18