diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 59ed3992c7609540cfebddae99e24eba1a86f6d0..375d812fea36f31dc6fa353be96bc30ba87ce20b 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -705,6 +705,15 @@ enum ovs_nat_attr { #define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1) +/* + * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument. + * @addresses: Source and destination MAC addresses. + * @eth_type: Ethernet type + */ +struct ovs_action_push_eth { + struct ovs_key_ethernet addresses; +}; + /** * enum ovs_action_attr - Action types. * @@ -738,6 +747,10 @@ enum ovs_nat_attr { * is no MPLS label stack, as determined by ethertype, no action is taken. * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related * entries in the flow key. + * @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the + * packet. + * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the + * packet. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -765,6 +778,8 @@ enum ovs_action_attr { * bits. */ OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */ OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */ + OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ + OVS_ACTION_ATTR_POP_ETH, /* No argument. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 1105c4e29c6275a002fac3bd9b26ab28bf1565b9..514f7bcf7c63ce0f3148e42726df74ff77c0c8af 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -66,6 +66,7 @@ struct ovs_frag_data { u16 vlan_tci; __be16 vlan_proto; unsigned int l2_len; + u8 mac_proto; u8 l2_data[MAX_L2_LEN]; }; @@ -137,12 +138,12 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, static void invalidate_flow_key(struct sw_flow_key *key) { - key->eth.type = htons(0); + key->mac_proto |= SW_FLOW_KEY_INVALID; } static bool is_flow_key_valid(const struct sw_flow_key *key) { - return !!key->eth.type; + return !(key->mac_proto & SW_FLOW_KEY_INVALID); } static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, @@ -186,7 +187,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); - update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); + if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) + update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); skb->protocol = mpls->mpls_ethertype; invalidate_flow_key(key); @@ -196,7 +198,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, const __be16 ethertype) { - struct ethhdr *hdr; int err; err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); @@ -212,11 +213,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - /* mpls_hdr() is used to locate the ethertype field correctly in the - * presence of VLAN tags. - */ - hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); - update_ethertype(skb, hdr, ethertype); + if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) { + struct ethhdr *hdr; + + /* mpls_hdr() is used to locate the ethertype field correctly in the + * presence of VLAN tags. + */ + hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); + update_ethertype(skb, hdr, ethertype); + } if (eth_p_mpls(skb->protocol)) skb->protocol = ethertype; @@ -312,6 +317,47 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } +/* pop_eth does not support VLAN packets as this action is never called + * for them. + */ +static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key) +{ + skb_pull_rcsum(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + /* safe right before invalidate_flow_key */ + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + +static int push_eth(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_action_push_eth *ethh) +{ + struct ethhdr *hdr; + + /* Add the new Ethernet header */ + if (skb_cow_head(skb, ETH_HLEN) < 0) + return -ENOMEM; + + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + hdr = eth_hdr(skb); + ether_addr_copy(hdr->h_source, ethh->addresses.eth_src); + ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst); + hdr->h_proto = skb->protocol; + + skb_postpush_rcsum(skb, hdr, ETH_HLEN); + + /* safe right before invalidate_flow_key */ + key->mac_proto = MAC_PROTO_ETHERNET; + invalidate_flow_key(key); + return 0; +} + static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, __be32 addr, __be32 new_addr) { @@ -673,7 +719,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk skb_reset_mac_len(skb); } - ovs_vport_send(vport, skb); + ovs_vport_send(vport, skb, data->mac_proto); return 0; } @@ -692,7 +738,7 @@ static struct dst_ops ovs_dst_ops = { * ovs_vport_output(), which is called once per fragmented packet. */ static void prepare_frag(struct vport *vport, struct sk_buff *skb, - u16 orig_network_offset) + u16 orig_network_offset, u8 mac_proto) { unsigned int hlen = skb_network_offset(skb); struct ovs_frag_data *data; @@ -705,6 +751,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb, data->network_offset = orig_network_offset; data->vlan_tci = skb->vlan_tci; data->vlan_proto = skb->vlan_proto; + data->mac_proto = mac_proto; data->l2_len = hlen; memcpy(&data->l2_data, skb->data, hlen); @@ -713,7 +760,8 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb, } static void ovs_fragment(struct net *net, struct vport *vport, - struct sk_buff *skb, u16 mru, __be16 ethertype) + struct sk_buff *skb, u16 mru, + struct sw_flow_key *key) { u16 orig_network_offset = 0; @@ -727,11 +775,12 @@ static void ovs_fragment(struct net *net, struct vport *vport, goto err; } - if (ethertype == htons(ETH_P_IP)) { + if (key->eth.type == htons(ETH_P_IP)) { struct dst_entry ovs_dst; unsigned long orig_dst; - prepare_frag(vport, skb, orig_network_offset); + prepare_frag(vport, skb, orig_network_offset, + ovs_key_mac_proto(key)); dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_dst.dev = vport->dev; @@ -742,7 +791,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, ip_do_fragment(net, skb->sk, skb, ovs_vport_output); refdst_drop(orig_dst); - } else if (ethertype == htons(ETH_P_IPV6)) { + } else if (key->eth.type == htons(ETH_P_IPV6)) { const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); unsigned long orig_dst; struct rt6_info ovs_rt; @@ -751,7 +800,8 @@ static void ovs_fragment(struct net *net, struct vport *vport, goto err; } - prepare_frag(vport, skb, orig_network_offset); + prepare_frag(vport, skb, orig_network_offset, + ovs_key_mac_proto(key)); memset(&ovs_rt, 0, sizeof(ovs_rt)); dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, DST_OBSOLETE_NONE, DST_NOCOUNT); @@ -765,7 +815,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, refdst_drop(orig_dst); } else { WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", - ovs_vport_name(vport), ntohs(ethertype), mru, + ovs_vport_name(vport), ntohs(key->eth.type), mru, vport->dev->mtu); goto err; } @@ -785,26 +835,19 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, u32 cutlen = OVS_CB(skb)->cutlen; if (unlikely(cutlen > 0)) { - if (skb->len - cutlen > ETH_HLEN) + if (skb->len - cutlen > ovs_mac_header_len(key)) pskb_trim(skb, skb->len - cutlen); else - pskb_trim(skb, ETH_HLEN); + pskb_trim(skb, ovs_mac_header_len(key)); } - if (likely(!mru || (skb->len <= mru + ETH_HLEN))) { - ovs_vport_send(vport, skb); + if (likely(!mru || + (skb->len <= mru + vport->dev->hard_header_len))) { + ovs_vport_send(vport, skb, ovs_key_mac_proto(key)); } else if (mru <= vport->dev->mtu) { struct net *net = read_pnet(&dp->net); - __be16 ethertype = key->eth.type; - - if (!is_flow_key_valid(key)) { - if (eth_p_mpls(skb->protocol)) - ethertype = skb->inner_protocol; - else - ethertype = vlan_get_protocol(skb); - } - ovs_fragment(net, vport, skb, mru, ethertype); + ovs_fragment(net, vport, skb, mru, key); } else { kfree_skb(skb); } @@ -1198,6 +1241,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, if (err) return err == -EINPROGRESS ? 0 : err; break; + + case OVS_ACTION_ATTR_PUSH_ETH: + err = push_eth(skb, key, nla_data(a)); + break; + + case OVS_ACTION_ATTR_POP_ETH: + err = pop_eth(skb, key); + break; } if (unlikely(err)) { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index fa8760176b7d59106ef699c397e1eae432cfc1c0..1402f1be642dfc350643bdce332d3f043d7f8d8a 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -560,7 +560,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct sw_flow_actions *sf_acts; struct datapath *dp; - struct ethhdr *eth; struct vport *input_vport; u16 mru = 0; int len; @@ -581,17 +580,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); - skb_reset_mac_header(packet); - eth = eth_hdr(packet); - - /* Normally, setting the skb 'protocol' field would be handled by a - * call to eth_type_trans(), but it assumes there's a sending - * device, which we may not have. */ - if (eth_proto_is_802_3(eth->h_proto)) - packet->protocol = eth->h_proto; - else - packet->protocol = htons(ETH_P_802_2); - /* Set packet's mru */ if (a[OVS_PACKET_ATTR_MRU]) { mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); @@ -618,6 +606,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; + packet->protocol = flow->key.eth.type; rcu_read_lock(); dp = get_dp_rcu(net, ovs_header->dp_ifindex); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 22087062bd1013e7c526dc95bce379bcaaebc4b9..08aa926cd5cfe5f6b3f47f3dc4df5c970a6ba2a7 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -334,14 +334,17 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) return 1; } -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +static void clear_vlan(struct sw_flow_key *key) { - int res; - key->eth.vlan.tci = 0; key->eth.vlan.tpid = 0; key->eth.cvlan.tci = 0; key->eth.cvlan.tpid = 0; +} + +static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +{ + int res; if (skb_vlan_tag_present(skb)) { key->eth.vlan.tci = htons(skb->vlan_tci); @@ -483,17 +486,20 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, * * Returns 0 if successful, otherwise a negative errno value. * - * Initializes @skb header pointers as follows: + * Initializes @skb header fields as follows: * - * - skb->mac_header: the Ethernet header. + * - skb->mac_header: the L2 header. * - * - skb->network_header: just past the Ethernet header, or just past the - * VLAN header, to the first byte of the Ethernet payload. + * - skb->network_header: just past the L2 header, or just past the + * VLAN header, to the first byte of the L2 payload. * * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 * on output, then just past the IP header, if one is present and * of a correct length, otherwise the same as skb->network_header. * For other key->eth.type values it is left untouched. + * + * - skb->protocol: the type of the data starting at skb->network_header. + * Equals to key->eth.type. */ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) { @@ -505,28 +511,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) skb_reset_mac_header(skb); - /* Link layer. We are guaranteed to have at least the 14 byte Ethernet - * header in the linear data area. - */ - eth = eth_hdr(skb); - ether_addr_copy(key->eth.src, eth->h_source); - ether_addr_copy(key->eth.dst, eth->h_dest); + /* Link layer. */ + clear_vlan(key); + if (key->mac_proto == MAC_PROTO_NONE) { + if (unlikely(eth_type_vlan(skb->protocol))) + return -EINVAL; - __skb_pull(skb, 2 * ETH_ALEN); - /* We are going to push all headers that we pull, so no need to - * update skb->csum here. - */ + skb_reset_network_header(skb); + } else { + eth = eth_hdr(skb); + ether_addr_copy(key->eth.src, eth->h_source); + ether_addr_copy(key->eth.dst, eth->h_dest); - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; + __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. + */ - key->eth.type = parse_ethertype(skb); - if (unlikely(key->eth.type == htons(0))) - return -ENOMEM; + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; + + skb->protocol = parse_ethertype(skb); + if (unlikely(skb->protocol == htons(0))) + return -ENOMEM; - skb_reset_network_header(skb); + skb_reset_network_header(skb); + __skb_push(skb, skb->data - skb_mac_header(skb)); + } skb_reset_mac_len(skb); - __skb_push(skb, skb->data - skb_mac_header(skb)); + key->eth.type = skb->protocol; /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { @@ -721,9 +734,25 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) return key_extract(skb, key); } +static int key_extract_mac_proto(struct sk_buff *skb) +{ + switch (skb->dev->type) { + case ARPHRD_ETHER: + return MAC_PROTO_ETHERNET; + case ARPHRD_NONE: + if (skb->protocol == htons(ETH_P_TEB)) + return MAC_PROTO_ETHERNET; + return MAC_PROTO_NONE; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { + int res; + /* Extract metadata from packet. */ if (tun_info) { key->tun_proto = ip_tunnel_info_af(tun_info); @@ -751,6 +780,10 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->phy.skb_mark = skb->mark; ovs_ct_fill_key(skb, key); key->ovs_flow_hash = 0; + res = key_extract_mac_proto(skb); + if (res < 0) + return res; + key->mac_proto = res; key->recirc_id = 0; return key_extract(skb, key); @@ -767,5 +800,29 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, if (err) return err; + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { + /* key_extract assumes that skb->protocol is set-up for + * layer 3 packets which is the case for other callers, + * in particular packets recieved from the network stack. + * Here the correct value can be set from the metadata + * extracted above. + */ + skb->protocol = key->eth.type; + } else { + struct ethhdr *eth; + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + /* Normally, setting the skb 'protocol' field would be + * handled by a call to eth_type_trans(), but it assumes + * there's a sending device, which we may not have. + */ + if (eth_proto_is_802_3(eth->h_proto)) + skb->protocol = eth->h_proto; + else + skb->protocol = htons(ETH_P_802_2); + } + return key_extract(skb, key); } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index ae783f5c6695867a0941037700b36c2d8ef16a58..f61cae7f9030df68c3001e5e63dc2c903b3d1d3f 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -37,6 +37,12 @@ struct sk_buff; +enum sw_flow_mac_proto { + MAC_PROTO_NONE = 0, + MAC_PROTO_ETHERNET, +}; +#define SW_FLOW_KEY_INVALID 0x80 + /* Store options at the end of the array if they are less than the * maximum size. This allows us to get the benefits of variable length * matching for small options. @@ -68,6 +74,7 @@ struct sw_flow_key { u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } __packed phy; /* Safe when right after 'tun_key'. */ + u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */ u8 tun_proto; /* Protocol of encapsulating tunnel. */ u32 ovs_flow_hash; /* Datapath computed hash value. */ u32 recirc_id; /* Recirculation ID. */ @@ -206,6 +213,21 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; +static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key) +{ + return key->mac_proto & ~SW_FLOW_KEY_INVALID; +} + +static inline u16 __ovs_mac_header_len(u8 mac_proto) +{ + return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0; +} + +static inline u16 ovs_mac_header_len(const struct sw_flow_key *key) +{ + return __ovs_mac_header_len(ovs_key_mac_proto(key)); +} + static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid) { return sfid->ufid_len; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ae25ded82b3baa779438d3460aa1aa8d5ef964f9..d19044f2b1f4d1216ec7a4f25eaabecbf99c6214 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -123,7 +123,7 @@ static void update_range(struct sw_flow_match *match, static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs, bool log) { - u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; + u64 key_expected = 0; u64 mask_allowed = key_attrs; /* At most allow all key attributes */ /* The following mask attributes allowed only if they @@ -969,10 +969,33 @@ static int parse_vlan_from_nlattrs(struct sw_flow_match *match, return 0; } +static int parse_eth_type_from_nlattrs(struct sw_flow_match *match, + u64 *attrs, const struct nlattr **a, + bool is_mask, bool log) +{ + __be16 eth_type; + + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + if (is_mask) { + /* Always exact match EtherType. */ + eth_type = htons(0xffff); + } else if (!eth_proto_is_802_3(eth_type)) { + OVS_NLERR(log, "EtherType %x is less than min %x", + ntohs(eth_type), ETH_P_802_3_MIN); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); + *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + return 0; +} + static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask, bool log) { + u8 mac_proto = MAC_PROTO_ETHERNET; + if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); @@ -1059,6 +1082,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, sizeof(*cl), is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); } + + /* For layer 3 packets the Ethernet type is provided + * and treated as metadata but no MAC addresses are provided. + */ + if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && + (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) + mac_proto = MAC_PROTO_NONE; + + /* Always exact match mac_proto */ + SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask); + + if (mac_proto == MAC_PROTO_NONE) + return parse_eth_type_from_nlattrs(match, attrs, a, is_mask, + log); + return 0; } @@ -1081,33 +1119,26 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, SW_FLOW_KEY_MEMCPY(match, eth.dst, eth_key->eth_dst, ETH_ALEN, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); - } - if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { - /* VLAN attribute is always parsed before getting here since it - * may occur multiple times. - */ - OVS_NLERR(log, "VLAN attribute unexpected."); - return -EINVAL; - } - - if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { - __be16 eth_type; - - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (is_mask) { - /* Always exact match EtherType. */ - eth_type = htons(0xffff); - } else if (!eth_proto_is_802_3(eth_type)) { - OVS_NLERR(log, "EtherType %x is less than min %x", - ntohs(eth_type), ETH_P_802_3_MIN); + if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { + /* VLAN attribute is always parsed before getting here since it + * may occur multiple times. + */ + OVS_NLERR(log, "VLAN attribute unexpected."); return -EINVAL; } - SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - } else if (!is_mask) { - SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); + if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { + err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask, + log); + if (err) + return err; + } else if (!is_mask) { + SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); + } + } else if (!match->key->eth.type) { + OVS_NLERR(log, "Either Ethernet header or EtherType is required."); + return -EINVAL; } if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { @@ -1556,42 +1587,44 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (ovs_ct_put_key(output, skb)) goto nla_put_failure; - nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); - if (!nla) - goto nla_put_failure; - - eth_key = nla_data(nla); - ether_addr_copy(eth_key->eth_src, output->eth.src); - ether_addr_copy(eth_key->eth_dst, output->eth.dst); - - if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { - if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) + if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); + if (!nla) goto nla_put_failure; - encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.vlan.tci) - goto unencap; - if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { - if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) + eth_key = nla_data(nla); + ether_addr_copy(eth_key->eth_src, output->eth.src); + ether_addr_copy(eth_key->eth_dst, output->eth.dst); + + if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) goto nla_put_failure; - in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.cvlan.tci) + encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.vlan.tci) goto unencap; + + if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) + goto nla_put_failure; + in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.cvlan.tci) + goto unencap; + } } - } - if (swkey->eth.type == htons(ETH_P_802_2)) { - /* - * Ethertype 802.2 is represented in the netlink with omitted - * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and - * 0xffff in the mask attribute. Ethertype can also - * be wildcarded. - */ - if (is_mask && output->eth.type) - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, - output->eth.type)) - goto nla_put_failure; - goto unencap; + if (swkey->eth.type == htons(ETH_P_802_2)) { + /* + * Ethertype 802.2 is represented in the netlink with omitted + * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and + * 0xffff in the mask attribute. Ethertype can also + * be wildcarded. + */ + if (is_mask && output->eth.type) + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, + output->eth.type)) + goto nla_put_failure; + goto unencap; + } } if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) @@ -2126,8 +2159,8 @@ static bool validate_masked(u8 *data, int len) static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, - struct sw_flow_actions **sfa, - bool *skip_copy, __be16 eth_type, bool masked, bool log) + struct sw_flow_actions **sfa, bool *skip_copy, + u8 mac_proto, __be16 eth_type, bool masked, bool log) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -2157,9 +2190,12 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_LABELS: - case OVS_KEY_ATTR_ETHERNET: break; + case OVS_KEY_ATTR_ETHERNET: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; + case OVS_KEY_ATTR_TUNNEL: if (masked) return -EINVAL; /* Masked tunnel set not supported. */ @@ -2324,6 +2360,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) { + u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; @@ -2346,6 +2383,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), [OVS_ACTION_ATTR_CT] = (u32)-1, [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), + [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), + [OVS_ACTION_ATTR_POP_ETH] = 0, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -2394,10 +2433,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, } case OVS_ACTION_ATTR_POP_VLAN: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; vlan_tci = htons(0); break; case OVS_ACTION_ATTR_PUSH_VLAN: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; vlan = nla_data(a); if (!eth_type_vlan(vlan->vlan_tpid)) return -EINVAL; @@ -2447,14 +2490,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, - &skip_copy, eth_type, false, log); + &skip_copy, mac_proto, eth_type, + false, log); if (err) return err; break; case OVS_ACTION_ATTR_SET_MASKED: err = validate_set(a, key, sfa, - &skip_copy, eth_type, true, log); + &skip_copy, mac_proto, eth_type, + true, log); if (err) return err; break; @@ -2474,6 +2519,22 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, skip_copy = true; break; + case OVS_ACTION_ATTR_PUSH_ETH: + /* Disallow pushing an Ethernet header if one + * is already present */ + if (mac_proto != MAC_PROTO_NONE) + return -EINVAL; + mac_proto = MAC_PROTO_NONE; + break; + + case OVS_ACTION_ATTR_POP_ETH: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; + if (vlan_tci & htons(VLAN_TAG_PRESENT)) + return -EINVAL; + mac_proto = MAC_PROTO_ETHERNET; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index e825753de1e0065944a1e555872efe2198e4f3b0..0389398fa4ab81a57a4b27f47ffc879f8904f446 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -57,8 +57,10 @@ static void netdev_port_receive(struct sk_buff *skb) if (unlikely(!skb)) return; - skb_push(skb, ETH_HLEN); - skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + if (skb->dev->type == ARPHRD_ETHER) { + skb_push(skb, ETH_HLEN); + skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + } ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: @@ -97,7 +99,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) } if (vport->dev->flags & IFF_LOOPBACK || - vport->dev->type != ARPHRD_ETHER || + (vport->dev->type != ARPHRD_ETHER && + vport->dev->type != ARPHRD_NONE) || ovs_is_internal_dev(vport->dev)) { err = -EINVAL; goto error_put; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 9bb85b35a1fbff2f6558417afbb9b2203dabb212..b6c8524032a0633c8f3262e9ea58e457b6a14598 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -464,9 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, return 0; } -static unsigned int packet_length(const struct sk_buff *skb) +static unsigned int packet_length(const struct sk_buff *skb, + struct net_device *dev) { - unsigned int length = skb->len - ETH_HLEN; + unsigned int length = skb->len - dev->hard_header_len; if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol)) @@ -480,14 +481,34 @@ static unsigned int packet_length(const struct sk_buff *skb) return length; } -void ovs_vport_send(struct vport *vport, struct sk_buff *skb) +void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) { int mtu = vport->dev->mtu; - if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { + switch (vport->dev->type) { + case ARPHRD_NONE: + if (mac_proto == MAC_PROTO_ETHERNET) { + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + skb->protocol = htons(ETH_P_TEB); + } else if (mac_proto != MAC_PROTO_NONE) { + WARN_ON_ONCE(1); + goto drop; + } + break; + case ARPHRD_ETHER: + if (mac_proto != MAC_PROTO_ETHERNET) + goto drop; + break; + default: + goto drop; + } + + if (unlikely(packet_length(skb, vport->dev) > mtu && + !skb_is_gso(skb))) { net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", vport->dev->name, - packet_length(skb), mtu); + packet_length(skb, vport->dev), mtu); vport->dev->stats.tx_errors++; goto drop; } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 46e5b69927c72f9cec5c5d32034120ad87439961..cda66c26ad0889a0d7ef99b5e14b2f83e4e2ff11 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -197,6 +197,6 @@ int __ovs_vport_ops_register(struct vport_ops *ops); }) void ovs_vport_ops_unregister(struct vport_ops *ops); -void ovs_vport_send(struct vport *vport, struct sk_buff *skb); +void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto); #endif /* vport.h */