diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 1a8132eb2a3ec150fb1563a3d24ab03892b8aeab..cfdcfd67d02a9a894bb5b151d69ed273569c557b 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -535,6 +535,122 @@ static int ipvlan_process_outbound(struct sk_buff *skb) return ret; } +static int ipvlan_process_v4_forward(struct sk_buff *skb) +{ + const struct iphdr *ip4h = ip_hdr(skb); + struct net_device *dev = skb->dev; + struct net *net = dev_net(dev); + struct rtable *rt; + int err, ret = NET_XMIT_DROP; + struct flowi4 fl4 = { + .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, + .daddr = ip4h->daddr, + .saddr = ip4h->saddr, + }; + + rt = ip_route_output_flow(net, &fl4, NULL); + if (IS_ERR(rt)) + goto err; + + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { + ip_rt_put(rt); + goto err; + } + skb_dst_set(skb, &rt->dst); + err = ip_local_out(net, skb->sk, skb); + if (unlikely(net_xmit_eval(err))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + goto out; +err: + dev->stats.tx_errors++; + kfree_skb(skb); +out: + return ret; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int ipvlan_process_v6_forward(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct net_device *dev = skb->dev; + struct net *net = dev_net(dev); + struct dst_entry *dst; + int err, ret = NET_XMIT_DROP; + struct flowi6 fl6 = { + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowi6_flags = FLOWI_FLAG_ANYSRC, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + ret = dst->error; + dst_release(dst); + goto err; + } + skb_dst_set(skb, dst); + err = ip6_local_out(net, skb->sk, skb); + if (unlikely(net_xmit_eval(err))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + goto out; +err: + dev->stats.tx_errors++; + kfree_skb(skb); +out: + return ret; +} +#else +static int ipvlan_process_v6_forward(struct sk_buff *skb) +{ + return NET_XMIT_DROP; +} +#endif + +static int ipvlan_process_forward(struct sk_buff *skb) +{ + struct ethhdr *ethh = eth_hdr(skb); + int ret = NET_XMIT_DROP; + + /* In this mode we dont care about multicast and broadcast traffic */ + if (is_multicast_ether_addr(ethh->h_dest)) { + pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); + kfree_skb(skb); + goto out; + } + + /* The ipvlan is a pseudo-L2 device, so the packets that we receive + * will have L2; which need to discarded and processed further + * in the net-ns of the main-device. + */ + if (skb_mac_header_was_set(skb)) { + skb_pull(skb, sizeof(*ethh)); + skb->mac_header = (typeof(skb->mac_header))~0U; + skb_reset_network_header(skb); + } + + if (skb->protocol == htons(ETH_P_IPV6)) { + ret = ipvlan_process_v6_forward(skb); + } else if (skb->protocol == htons(ETH_P_IP)) { + ret = ipvlan_process_v4_forward(skb); + } else { + pr_warn_ratelimited("Dropped outbound packet type=%x\n", + ntohs(skb->protocol)); + kfree_skb(skb); + } +out: + return ret; +} + static void ipvlan_multicast_enqueue(struct ipvl_port *port, struct sk_buff *skb, bool tx_pkt) { @@ -632,6 +748,46 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) return dev_queue_xmit(skb); } +static int ipvlan_xmit_mode_l2e(struct sk_buff *skb, struct net_device *dev) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ethhdr *eth = eth_hdr(skb); + struct ipvl_addr *addr; + void *lyr3h; + int addr_type; + + if (!ipvlan_is_vepa(ipvlan->port) && + ether_addr_equal(eth->h_dest, eth->h_source)) { + lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); + if (lyr3h) { + addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, + addr_type, true); + if (addr) { + if (ipvlan_is_private(ipvlan->port)) { + consume_skb(skb); + return NET_XMIT_DROP; + } + return ipvlan_rcv_frame(addr, &skb, true); + } + } + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_XMIT_DROP; + + /* maybe the packet need been forward */ + skb->dev = ipvlan->phy_dev; + ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); + return ipvlan_process_forward(skb); + } else if (is_multicast_ether_addr(eth->h_dest)) { + ipvlan_skb_crossing_ns(skb, NULL); + ipvlan_multicast_enqueue(ipvlan->port, skb, true); + return NET_XMIT_SUCCESS; + } + + skb->dev = ipvlan->phy_dev; + return dev_queue_xmit(skb); +} + int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); @@ -649,6 +805,8 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) case IPVLAN_MODE_L3: case IPVLAN_MODE_L3S: return ipvlan_xmit_mode_l3(skb, dev); + case IPVLAN_MODE_L2E: + return ipvlan_xmit_mode_l2e(skb, dev); } /* Should not reach here */ @@ -730,6 +888,36 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, return ret; } +static rx_handler_result_t ipvlan_handle_mode_l2e(struct sk_buff **pskb, + struct ipvl_port *port) +{ + struct sk_buff *skb = *pskb; + struct ethhdr *eth = eth_hdr(skb); + rx_handler_result_t ret = RX_HANDLER_PASS; + + if (is_multicast_ether_addr(eth->h_dest)) { + if (ipvlan_external_frame(skb, port)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + /* External frames are queued for device local + * distribution, but a copy is given to master + * straight away to avoid sending duplicates later + * when work-queue processes this frame. This is + * achieved by returning RX_HANDLER_PASS. + */ + if (nskb) { + ipvlan_skb_crossing_ns(nskb, NULL); + ipvlan_multicast_enqueue(port, nskb, false); + } + } + } else { + /* Perform like l3 mode for non-multicast packet */ + ret = ipvlan_handle_mode_l3(pskb, port); + } + + return ret; +} + rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; @@ -743,6 +931,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) return ipvlan_handle_mode_l2(pskb, port); case IPVLAN_MODE_L3: return ipvlan_handle_mode_l3(pskb, port); + case IPVLAN_MODE_L2E: + return ipvlan_handle_mode_l2e(pskb, port); case IPVLAN_MODE_L3S: return RX_HANDLER_PASS; } diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 5fb541897863c7ba6a0ff4bcc12a5a1981baeaac..8afae55538fbdea2f279f17a4c7670f1370464b2 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -9,6 +9,10 @@ #include "ipvlan.h" +static int ipvlan_default_mode = IPVLAN_MODE_L3; +module_param(ipvlan_default_mode, int, 0400); +MODULE_PARM_DESC(ipvlan_default_mode, "set ipvlan default mode: 0 for l2, 1 for l3, 2 for l2e, 3 for l3s, others invalid now"); + static unsigned int ipvlan_netid __read_mostly; struct ipvlan_netns { @@ -583,7 +587,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, struct ipvl_port *port; struct net_device *phy_dev; int err; - u16 mode = IPVLAN_MODE_L3; + u16 mode = ipvlan_default_mode; if (!tb[IFLA_LINK]) return -EINVAL; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 43391e2d1153adb701433d6794702b73f2d60297..9d8eac55da5a3e585a37fca763fa9faa98e32f22 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -493,6 +493,7 @@ enum { enum ipvlan_mode { IPVLAN_MODE_L2 = 0, IPVLAN_MODE_L3, + IPVLAN_MODE_L2E, IPVLAN_MODE_L3S, IPVLAN_MODE_MAX };