原文链接:http://ry0117.com/2016/12/24/OVS内核KEY值提取及匹配流表代码分析/

当开启OVS后,创建datapath类型为system的网桥并他添加相关接口,OVS网桥内接口在网卡接收到数据包后,数据包会先到OVS的内核模块openvswitch内,从数据包上提取key值,并使用key值匹配OVS内核模块中的流表,当匹配到相应的流表后,则执行流表上相应的动作;

当在OVS内核缓存中匹配不到流表,则将key值信息通过NetLink发送给用户态的ovs-vswitchd守护进程,由其来决定如何处理数据包。

下面就Linux-3.19版本内核中OpenvSwitch内核模块中的提取Key值、匹配流表及执行流表动作相关的代码做一下分析。

OVS 内核KEY值提取及匹配流表代码分析-LMLPHP

提取KEY值(datapath/flow.c)

Key值信息是匹配流表的前提,key值中包括很多的信息,包括源MAC地址、目的MAC地址、VLAN信息、协议类型、源IP地址,目的IP地址、端口号等信息,所有的key值都可以从skb数据包中提取到。

 int
ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
/* Extract metadata from packet. */
if (tun_info) {
memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
if (tun_info->options) {
BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
8)) - 1
> sizeof(key->tun_opts));
memcpy(GENEVE_OPTS(key, tun_info->options_len),
tun_info->options, tun_info->options_len);
key->tun_opts_len = tun_info->options_len;
} else {
key->tun_opts_len = 0;
}
} else {
key->tun_opts_len = 0;
memset(&key->tun_key, 0, sizeof(key->tun_key));
}
/*根据skb相关信息,给key的相关变量赋值*/
key->phy.priority = skb->priority;
/*设置key->phy.in_port为vport的接口序号*/
key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
key->phy.skb_mark = skb->mark;
key->ovs_flow_hash = 0;
key->recirc_id = 0;
/*提取skb中的MAC、协议、IP地址、端口信息等key值*/
return key_extract(skb, key);
}
/**
* key_extract - extracts a flow key from an Ethernet frame.
* @skb: sk_buff that contains the frame, with skb->data pointing to the
* Ethernet header
* @key: output flow key
*
* The caller must ensure that skb->len >= ETH_HLEN.
*
* Returns 0 if successful, otherwise a negative errno value.
*
* Initializes @skb header pointers as follows:
*
* - skb->mac_header: the Ethernet header.
*
* - skb->network_header: just past the Ethernet header, or just past the
* VLAN header, to the first byte of the Ethernet payload.
*
* - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
* on output, then just past the IP header, if one is present and
* of a correct length, otherwise the same as skb->network_header.
* For other key->eth.type values it is left untouched.
*/
static int
key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
int error;
struct ethhdr *eth;
/* Flags are always used as part of stats */
key->tp.flags = 0;
/*重置L2层头指针*/
skb_reset_mac_header(skb);
/* Link layer. We are guaranteed to have at least the 14 byte Ethernet
* header in the linear data area.
*/
/*获取二层头部指针,并提取源MAC及目的MAC信息到key中*/
eth = eth_hdr(skb);
ether_addr_copy(key->eth.src, eth->h_source);
ether_addr_copy(key->eth.dst, eth->h_dest);
/*将skb->data指向L2的MAC地址结束的地址处,
* 如果带有VLAN信息,则skb->data指向vlan相关信息的开头
* 如果不带vlan信息,则skb->data则指向eth.type字段处*/
__skb_pull(skb, 2 * ETH_ALEN);
/* We are going to push all headers that we pull, so no need to
* update skb->csum here.
*/
/*提取vlan信息到key中*/
key->eth.tci = 0;
if (vlan_tx_tag_present(skb))
key->eth.tci = htons(skb->vlan_tci);
else if (eth->h_proto == htons(ETH_P_8021Q))
if (unlikely(parse_vlan(skb, key)))
return -ENOMEM;
/*提取ether type数据包类型如ETH_P_IP、ETH_P_ARP、ETH_P_IPV6等*/
key->eth.type = parse_ethertype(skb);
if (unlikely(key->eth.type == htons(0)))
return -ENOMEM;
/*重置L3头部指针及MAC长度,保证skb->network_header指向正确的位置*/
skb_reset_network_header(skb);
skb_reset_mac_len(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
/* Network layer. */
/*IP协议数据包*/
if (key->eth.type == htons(ETH_P_IP)) {
struct iphdr *nh;
__be16 offset;
/*检查IP数据包的合法性,若合法则设置skb->transport_header*/
error = check_iphdr(skb);
if (unlikely(error)) {
/*不合法的IP数据包*/
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv4, 0, sizeof(key->ipv4));
if (error == -EINVAL) {
/* 此处不知道为何将L4头设置为L3层头部,
* 也不知道为何error=0,后面搞清楚了在回来修改*/
skb->transport_header = skb->network_header;
error = 0;
}
return error;
}
/*获取L3层头部指针,并提取源IP及目的IP信息到key中*/
nh = ip_hdr(skb);
key->ipv4.addr.src = nh->saddr;
key->ipv4.addr.dst = nh->daddr;
/*提取IP的四层协议信息、TOS及ttl信息到key中*/
key->ip.proto = nh->protocol;
key->ip.tos = nh->tos;
key->ip.ttl = nh->ttl;
/*
* 从L3层中提取IP分片信息
* 对IP分片中的几个标志不是很清楚,暂时不做说明
*/
offset = nh->frag_off & htons(IP_OFFSET);
if (offset) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
return 0;
}
if (nh->frag_off & htons(IP_MF) ||
skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
else
key->ip.frag = OVS_FRAG_TYPE_NONE;
/* Transport layer. */
/*TCP协议数据包*/
if (key->ip.proto == IPPROTO_TCP) {
if (tcphdr_ok(skb)) {
/*获取tcp四层头部,提取源端口及目的端口信息到key中*/
struct tcphdr *tcp = tcp_hdr(skb);
key->tp.src = tcp->source;
key->tp.dst = tcp->dest;
key->tp.flags = TCP_FLAGS_BE16(tcp);
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
/*UDP协议数据包*/
} else if (key->ip.proto == IPPROTO_UDP) {
if (udphdr_ok(skb)) {
/*获取UDP四层头部,提取源端口及目的端口信息到key中*/
struct udphdr *udp = udp_hdr(skb);
key->tp.src = udp->source;
key->tp.dst = udp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
/*SCTP协议数据包*/
} else if (key->ip.proto == IPPROTO_SCTP) {
if (sctphdr_ok(skb)) {
/*获取SCTP四层头部,提取源端口及目的端口到key中*/
struct sctphdr *sctp = sctp_hdr(skb);
key->tp.src = sctp->source;
key->tp.dst = sctp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
/*ICMP协议数据包*/
} else if (key->ip.proto == IPPROTO_ICMP) {
if (icmphdr_ok(skb)) {
/*获取ICMP头部,并提取ICMP 类型及代码字段到key中*/
struct icmphdr *icmp = icmp_hdr(skb);
/* The ICMP type and code fields use the 16-bit
* transport port fields, so we need to store
* them in 16-bit network byte order. */
key->tp.src = htons(icmp->type);
key->tp.dst = htons(icmp->code);
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
/*ARP协议或者RARP协议数据包*/
} else if (key->eth.type == htons(ETH_P_ARP) ||
key->eth.type == htons(ETH_P_RARP)) {
struct arp_eth_header *arp;
bool arp_available = arphdr_ok(skb);
/*获取ARP头部指针*/
arp = (struct arp_eth_header *)skb_network_header(skb);
if (arp_available &&
arp->ar_hrd == htons(ARPHRD_ETHER) &&
arp->ar_pro == htons(ETH_P_IP) &&
arp->ar_hln == ETH_ALEN &&
arp->ar_pln == 4) {
/*提取ARP option字段到key中*/
/* We only match on the lower 8 bits of the opcode. */
if (ntohs(arp->ar_op) <= 0xff)
key->ip.proto = ntohs(arp->ar_op);
else
key->ip.proto = 0;
/*提取源MAC、目的MAC、源IP及目的MAC信息到key中*/
memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
} else {
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
/*去提取MPLS信息到key中*/
} else if (eth_p_mpls(key->eth.type)) {
size_t stack_len = MPLS_HLEN;
/* In the presence of an MPLS label stack the end of the L2
* header and the beginning of the L3 header differ.
*
* Advance network_header to the beginning of the L3
* header. mac_len corresponds to the end of the L2 header.
*/
while (1) {
__be32 lse;
error = check_header(skb, skb->mac_len + stack_len);
if (unlikely(error))
return 0;
memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
if (stack_len == MPLS_HLEN)
memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
skb_set_network_header(skb, skb->mac_len + stack_len);
if (lse & htonl(MPLS_LS_S_MASK))
break;
stack_len += MPLS_HLEN;
}
/*IPv6协议,提取IPv6相关信息到key中 */
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
nh_len = parse_ipv6hdr(skb, key);
if (unlikely(nh_len < 0)) {
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
if (nh_len == -EINVAL) {
skb->transport_header = skb->network_header;
error = 0;
} else {
error = nh_len;
}
return error;
}
if (key->ip.frag == OVS_FRAG_TYPE_LATER)
return 0;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
/* Transport layer. */
if (key->ip.proto == NEXTHDR_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->tp.src = tcp->source;
key->tp.dst = tcp->dest;
key->tp.flags = TCP_FLAGS_BE16(tcp);
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
} else if (key->ip.proto == NEXTHDR_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->tp.src = udp->source;
key->tp.dst = udp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
} else if (key->ip.proto == NEXTHDR_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->tp.src = sctp->source;
key->tp.dst = sctp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
} else if (key->ip.proto == NEXTHDR_ICMP) {
if (icmp6hdr_ok(skb)) {
error = parse_icmpv6(skb, key, nh_len);
if (error)
return error;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
}
return 0;
}

根据KEY值匹配流表(datapath/datapath.c)

通过ovs_flow_key_extract函数及key_extract函数从skb中提取所有需要的key值,下面就是使用key值来匹配OVS内核模块openvswitch中缓存的流表信息,并在匹配到流表后执行流表中相应的动作处理数据包。若在内核中未匹配到流表,则通过Netlink消息将key值发送到用户态ovs-vswitchd进程,由用户态进程来决定如何处理数据包

 /* Must be called with rcu_read_lock. */
void
ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
struct sw_flow_actions *sf_acts;
struct dp_stats_percpu *stats;
u64 *stats_counter;
u32 n_mask_hit;
/*获取每CPU变量dp->stats_percpu*/
stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */
/*根据key值遍历所有的流表*/
flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
if (unlikely(!flow)) {
/*
* 未匹配到任何流表,则将key值封装到Netlink消息中通过
* netlink发送到用户态ovs-vswitchd进程
* 由用户态进程来决定如何处理数据包
*/
struct dp_upcall_info upcall;
int error;
upcall.cmd = OVS_PACKET_CMD_MISS;
upcall.userdata = NULL;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
upcall.egress_tun_info = NULL;
/*封装Netlink消息并发送给用户态ovs-vswitchd进程*/
error = ovs_dp_upcall(dp, skb, key, &upcall);
if (unlikely(error))
kfree_skb(skb);
else
consume_skb(skb);
stats_counter = &stats->n_missed;
goto out;
}
/*查询到流表后,更新相关流表的信息,包括流表匹配的包数及字节数*/
ovs_flow_stats_update(flow, key->tp.flags, skb);
/*获取匹配的流表的执行动作*/
sf_acts = rcu_dereference(flow->sf_acts);
/*执行匹配流表的动作*/
ovs_execute_actions(dp, skb, sf_acts, key);
stats_counter = &stats->n_hit;
out:
/* Update datapath statistics. */
u64_stats_update_begin(&stats->syncp);
(*stats_counter)++;
stats->n_mask_hit += n_mask_hit;
u64_stats_update_end(&stats->syncp);
}

执行流表ACTION(datapath/actions.c)

匹配到对应的流表后,从流表中获取流表的动作,循环遍历所有的flow action,执行相应的action动作。

 /* Execute a list of actions against 'skb'. */
int
ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_actions *acts,
struct sw_flow_key *key)
{
int level = this_cpu_read(exec_actions_level);
int err;
this_cpu_inc(exec_actions_level);
OVS_CB(skb)->egress_tun_info = NULL;
/*执行流表动作*/
err = do_execute_actions(dp, skb, key,
acts->actions, acts->actions_len);
/*不知道这个process_deferred_actions具体是干什么的*/
if (!level)
process_deferred_actions(dp);
this_cpu_dec(exec_actions_level);
return err;
}
/* Execute a list of actions against 'skb'. */
static int
do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
const struct nlattr *attr, int len)
{
/* Every output action needs a separate clone of 'skb', but the common
* case is just a single output action, so that doing a clone and
* then freeing the original skbuff is wasteful. So the following code
* is slightly obscure just to avoid that.
*/
int prev_port = -1;
const struct nlattr *a;
int rem;
for (a = attr, rem = len; rem > 0;
a = nla_next(a, &rem)) {
int err = 0;
if (unlikely(prev_port != -1)) {
/*设置了output接口,克隆一份skb将数据包从
* prv_port接口发送出去*/
struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
if (out_skb)
do_output(dp, out_skb, prev_port);
prev_port = -1;
}
switch (nla_type(a)) {
/*数据包发送的端口号*/
case OVS_ACTION_ATTR_OUTPUT:
prev_port = nla_get_u32(a);
break;
/*将数据包发送到用户态进程*/
case OVS_ACTION_ATTR_USERSPACE:
output_userspace(dp, skb, key, a);
break;
/*为key->ovs_flow_hash赋值*/
case OVS_ACTION_ATTR_HASH:
execute_hash(skb, key, a);
break;
/*MPLS处理,不了解MPLS,忽略*/
case OVS_ACTION_ATTR_PUSH_MPLS:
err = push_mpls(skb, key, nla_data(a));
break;
/*MPLS处理,忽略*/
case OVS_ACTION_ATTR_POP_MPLS:
err = pop_mpls(skb, key, nla_get_be16(a));
break;
/*设置VLAN tag*/
case OVS_ACTION_ATTR_PUSH_VLAN:
err = push_vlan(skb, key, nla_data(a));
break;
/*去Vlan tag*/
case OVS_ACTION_ATTR_POP_VLAN:
err = pop_vlan(skb, key);
break;
/*将skb及key添加到defered action中*/
case OVS_ACTION_ATTR_RECIRC:
err = execute_recirc(dp, skb, key, a, rem);
if (nla_is_last(a, rem)) {
/* If this is the last action, the skb has
* been consumed or freed.
* Return immediately.
*/
return err;
}
break;
/*根据修改的动作,对数据包进行修改*/
case OVS_ACTION_ATTR_SET:
err = execute_set_action(skb, key, nla_data(a));
break;
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, key, a);
break;
}
if (unlikely(err)) {
kfree_skb(skb);
return err;
}
}
if (prev_port != -1)
do_output(dp, skb, prev_port);
else
consume_skb(skb);
return 0;
}

OUTPUT ACTION(datapath/actions.c)

流表的OUTPUT动作指定了数据包发送的出接口信息,调用do_output->ovs_vport_send->vport->ops->send发送函数将数据包从output action对应的接口发送出去。

 /*do_outpu发送数据包*/
static void
do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
{
struct vport *vport = ovs_vport_rcu(dp, out_port);
if (likely(vport))
ovs_vport_send(vport, skb);
else
kfree_skb(skb);
}
/**
* ovs_vport_send - send a packet on a device
*
* @vport: vport on which to send the packet
* @skb: skb to send
*
* Sends the given packet and returns the length of data sent. Either ovs
* lock or rcu_read_lock must be held.
*/
int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
{
/* 调用vport->ops->send回调函数发送数据包 */
int sent = vport->ops->send(vport, skb);
if (likely(sent > 0)) {
struct pcpu_sw_netstats *stats;
/*发送成功后更新每CPU变量vport->percpu_stats中的发送包数及发送字节数*/
stats = this_cpu_ptr(vport->percpu_stats);
u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
stats->tx_bytes += sent;
u64_stats_update_end(&stats->syncp);
} else if (sent < 0) {
ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
} else {
ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
}
return sent;
}

当OVS接口类型为system时,vport->ops->send函数为netdev_send:

 /*此函数即为OVS流表output action 发送数据包时的函数*/
static int
netdev_send(struct vport *vport, struct sk_buff *skb)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
int mtu = netdev_vport->dev->mtu;
int len;
/*如果未开启gso且数据包长度大于MTU,则释放数据包*/
if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
netdev_vport->dev->name,
packet_length(skb), mtu);
goto drop;
}
/*设置skb->dev为output action网口*/
skb->dev = netdev_vport->dev;
len = skb->len;
/*最后调用dev_queue_xmit发送数据包*/
dev_queue_xmit(skb);
return len;
drop:
kfree_skb(skb);
return 0;
}

SET ACTION(datapath/actions.c)

流表SET动作会修改数据包中指定的信息,如skb->priority skb->mark等信息。

 static int
execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
const struct nlattr *nested_attr)
{
int err = 0;
switch (nla_type(nested_attr)) {
case OVS_KEY_ATTR_PRIORITY:
skb->priority = nla_get_u32(nested_attr);
key->phy.priority = skb->priority;
break;
case OVS_KEY_ATTR_SKB_MARK:
skb->mark = nla_get_u32(nested_attr);
key->phy.skb_mark = skb->mark;
break;
case OVS_KEY_ATTR_TUNNEL_INFO:
OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
break;
case OVS_KEY_ATTR_ETHERNET:
err = set_eth_addr(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_IPV4:
err = set_ipv4(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_IPV6:
err = set_ipv6(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_TCP:
err = set_tcp(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_UDP:
err = set_udp(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_SCTP:
err = set_sctp(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_MPLS:
err = set_mpls(skb, key, nla_data(nested_attr));
break;
}
return err;
}

PUSH_VLAN ACTION(datapath/actions.c)

流表PUSH_VLAN动作会在数据包中添加对应的VLAN tag信息。

 static int
push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_vlan *vlan)
{
if (vlan_tx_tag_present(skb))
invalidate_flow_key(key);
else
key->eth.tci = vlan->vlan_tci;
return skb_vlan_push(skb, vlan->vlan_tpid,
ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
}
int
skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
{
if (vlan_tx_tag_present(skb)) {
unsigned int offset = skb->data - skb_mac_header(skb);
int err;
/* __vlan_insert_tag expect skb->data pointing to mac header.
* So change skb->data before calling it and change back to
* original position later
*/
__skb_push(skb, offset);
err = __vlan_insert_tag(skb, skb->vlan_proto,
vlan_tx_tag_get(skb));
if (err)
return err;
skb->protocol = skb->vlan_proto;
skb->mac_len += VLAN_HLEN;
__skb_pull(skb, offset);
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(skb->data
+ (2 * ETH_ALEN), VLAN_HLEN, 0));
}
__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
return 0;
}
static inline void
__vlan_hwaccel_put_tag(struct sk_buff *skb,
__be16 vlan_proto, u16 vlan_tci)
{
/*设置数据包Vlan tag信息*/
skb->vlan_proto = vlan_proto;
skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
}

POP_VLAN ACTION(datapath/actions.c)

流表POP_VLAN动作移除数据包中的Vlan tag信息并更新数据包中的校验和

 static int
pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
{
int err;
err = skb_vlan_pop(skb);
if (vlan_tx_tag_present(skb))
invalidate_flow_key(key);
else
key->eth.tci = 0;
return err;
}
int
skb_vlan_pop(struct sk_buff *skb)
{
u16 vlan_tci;
__be16 vlan_proto;
int err;
if (likely(vlan_tx_tag_present(skb))) {
skb->vlan_tci = 0;
} else {
if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
skb->protocol != htons(ETH_P_8021AD)) ||
skb->len < VLAN_ETH_HLEN))
return 0;
err = __skb_vlan_pop(skb, &vlan_tci);
if (err)
return err;
}
/* move next vlan tag to hw accel tag */
if (likely((skb->protocol != htons(ETH_P_8021Q) &&
skb->protocol != htons(ETH_P_8021AD)) ||
skb->len < VLAN_ETH_HLEN))
return 0;
vlan_proto = skb->protocol;
err = __skb_vlan_pop(skb, &vlan_tci);
if (unlikely(err))
return err;
__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
return 0;
}
/* remove VLAN header from packet and update csum accordingly. */
static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
{
struct vlan_hdr *vhdr;
unsigned int offset = skb->data - skb_mac_header(skb);
int err;
__skb_push(skb, offset);
err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
if (unlikely(err))
goto pull; skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
*vlan_tci = ntohs(vhdr->h_vlan_TCI);
memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
__skb_pull(skb, VLAN_HLEN);
vlan_set_encap_proto(skb, vhdr);
skb->mac_header += VLAN_HLEN;
if (skb_network_offset(skb) < ETH_HLEN)
skb_set_network_header(skb, ETH_HLEN);
skb_reset_mac_len(skb);
pull:
__skb_pull(skb, offset);
return err;
}
05-11 13:24