diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index b9ebd0d18a522db0da8603f5cb401c40d5645386..f184fb5bd11046d814f0b03e122bc7419bc7cdf5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -298,8 +298,10 @@ config NLMON config NET_VRF tristate "Virtual Routing and Forwarding (Lite)" - depends on IP_MULTIPLE_TABLES && IPV6_MULTIPLE_TABLES + depends on IP_MULTIPLE_TABLES depends on NET_L3_MASTER_DEV + depends on IPV6 || IPV6=n + depends on IPV6_MULTIPLE_TABLES || IPV6=n ---help--- This option enables the support for mapping interfaces into VRF's. The support enables VRF devices. diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 191579aeab1695957e272d54adf630eb57f7ae14..92fa3e1ea65cca564907a43a78a859b6063f7f65 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,7 @@ struct slave_queue { struct net_vrf { struct slave_queue queue; struct rtable *rth; + struct rt6_info *rt6; u32 tb_id; }; @@ -104,12 +106,56 @@ static struct dst_ops vrf_dst_ops = { .default_advmss = vrf_default_advmss, }; +/* neighbor handling is done with actual device; do not want + * to flip skb->dev for those ndisc packets. This really fails + * for multiple next protocols (e.g., NEXTHDR_HOP). But it is + * a start. + */ +#if IS_ENABLED(CONFIG_IPV6) +static bool check_ipv6_frame(const struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data; + size_t hlen = sizeof(*ipv6h); + bool rc = true; + + if (skb->len < hlen) + goto out; + + if (ipv6h->nexthdr == NEXTHDR_ICMP) { + const struct icmp6hdr *icmph; + + if (skb->len < hlen + sizeof(*icmph)) + goto out; + + icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h)); + switch (icmph->icmp6_type) { + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + rc = false; + break; + } + } + +out: + return rc; +} +#else +static bool check_ipv6_frame(const struct sk_buff *skb) +{ + return false; +} +#endif + static bool is_ip_rx_frame(struct sk_buff *skb) { switch (skb->protocol) { case htons(ETH_P_IP): - case htons(ETH_P_IPV6): return true; + case htons(ETH_P_IPV6): + return check_ipv6_frame(skb); } return false; } @@ -169,12 +215,53 @@ static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev, return stats; } +#if IS_ENABLED(CONFIG_IPV6) +static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, + struct net_device *dev) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); + struct flowi6 fl6 = { + /* needed to match OIF rule */ + .flowi6_oif = dev->ifindex, + .flowi6_iif = LOOPBACK_IFINDEX, + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = ip6_flowinfo(iph), + .flowi6_mark = skb->mark, + .flowi6_proto = iph->nexthdr, + .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF, + }; + int ret = NET_XMIT_DROP; + struct dst_entry *dst; + struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst == dst_null) + goto err; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + ret = ip6_local_out(net, skb->sk, skb); + if (unlikely(net_xmit_eval(ret))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + + return ret; +err: + vrf_tx_error(dev, skb); + return NET_XMIT_DROP; +} +#else static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { vrf_tx_error(dev, skb); return NET_XMIT_DROP; } +#endif static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4, struct net_device *vrf_dev) @@ -269,6 +356,157 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } +#if IS_ENABLED(CONFIG_IPV6) +static struct dst_entry *vrf_ip6_check(struct dst_entry *dst, u32 cookie) +{ + return dst; +} + +static struct dst_ops vrf_dst_ops6 = { + .family = AF_INET6, + .local_out = ip6_local_out, + .check = vrf_ip6_check, + .mtu = vrf_v4_mtu, + .destroy = vrf_dst_destroy, + .default_advmss = vrf_default_advmss, +}; + +static int init_dst_ops6_kmem_cachep(void) +{ + vrf_dst_ops6.kmem_cachep = kmem_cache_create("vrf_ip6_dst_cache", + sizeof(struct rt6_info), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + + if (!vrf_dst_ops6.kmem_cachep) + return -ENOMEM; + + return 0; +} + +static void free_dst_ops6_kmem_cachep(void) +{ + kmem_cache_destroy(vrf_dst_ops6.kmem_cachep); +} + +static int vrf_input6(struct sk_buff *skb) +{ + skb->dev->stats.rx_errors++; + kfree_skb(skb); + return 0; +} + +/* modelled after ip6_finish_output2 */ +static int vrf_finish_output6(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst->dev; + struct neighbour *neigh; + struct in6_addr *nexthop; + int ret; + + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; + + rcu_read_lock_bh(); + nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + if (!IS_ERR(neigh)) { + ret = dst_neigh_output(dst, neigh, skb); + rcu_read_unlock_bh(); + return ret; + } + rcu_read_unlock_bh(); + + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + kfree_skb(skb); + return -EINVAL; +} + +/* modelled after ip6_output */ +static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, + net, sk, skb, NULL, skb_dst(skb)->dev, + vrf_finish_output6, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); +} + +static void vrf_rt6_destroy(struct net_vrf *vrf) +{ + dst_destroy(&vrf->rt6->dst); + free_percpu(vrf->rt6->rt6i_pcpu); + vrf->rt6 = NULL; +} + +static int vrf_rt6_create(struct net_device *dev) +{ + struct net_vrf *vrf = netdev_priv(dev); + struct dst_entry *dst; + struct rt6_info *rt6; + int cpu; + int rc = -ENOMEM; + + rt6 = dst_alloc(&vrf_dst_ops6, dev, 0, + DST_OBSOLETE_NONE, + (DST_HOST | DST_NOPOLICY | DST_NOXFRM)); + if (!rt6) + goto out; + + dst = &rt6->dst; + + rt6->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_KERNEL); + if (!rt6->rt6i_pcpu) { + dst_destroy(dst); + goto out; + } + for_each_possible_cpu(cpu) { + struct rt6_info **p = per_cpu_ptr(rt6->rt6i_pcpu, cpu); + *p = NULL; + } + + memset(dst + 1, 0, sizeof(*rt6) - sizeof(*dst)); + + INIT_LIST_HEAD(&rt6->rt6i_siblings); + INIT_LIST_HEAD(&rt6->rt6i_uncached); + + rt6->dst.input = vrf_input6; + rt6->dst.output = vrf_output6; + + rt6->rt6i_table = fib6_get_table(dev_net(dev), vrf->tb_id); + + atomic_set(&rt6->dst.__refcnt, 2); + + vrf->rt6 = rt6; + rc = 0; +out: + return rc; +} +#else +static int init_dst_ops6_kmem_cachep(void) +{ + return 0; +} + +static void free_dst_ops6_kmem_cachep(void) +{ +} + +static void vrf_rt6_destroy(struct net_vrf *vrf) +{ +} + +static int vrf_rt6_create(struct net_device *dev) +{ + return 0; +} +#endif + /* modelled after ip_finish_output2 */ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -490,6 +728,7 @@ static void vrf_dev_uninit(struct net_device *dev) struct slave *slave, *next; vrf_rtable_destroy(vrf); + vrf_rt6_destroy(vrf); list_for_each_entry_safe(slave, next, head, list) vrf_del_slave(dev, slave->dev); @@ -513,10 +752,15 @@ static int vrf_dev_init(struct net_device *dev) if (!vrf->rth) goto out_stats; + if (vrf_rt6_create(dev) != 0) + goto out_rth; + dev->flags = IFF_MASTER | IFF_NOARP; return 0; +out_rth: + vrf_rtable_destroy(vrf); out_stats: free_percpu(dev->dstats); dev->dstats = NULL; @@ -586,10 +830,30 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) fl4->flowi4_scope = scope; } +#if IS_ENABLED(CONFIG_IPV6) +static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, + const struct flowi6 *fl6) +{ + struct rt6_info *rt = NULL; + + if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { + struct net_vrf *vrf = netdev_priv(dev); + + rt = vrf->rt6; + atomic_inc(&rt->dst.__refcnt); + } + + return (struct dst_entry *)rt; +} +#endif + static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_get_saddr = vrf_get_saddr, +#if IS_ENABLED(CONFIG_IPV6) + .l3mdev_get_rt6_dst = vrf_get_rt6_dst, +#endif }; static void vrf_get_drvinfo(struct net_device *dev, @@ -731,6 +995,10 @@ static int __init vrf_init_module(void) if (!vrf_dst_ops.kmem_cachep) return -ENOMEM; + rc = init_dst_ops6_kmem_cachep(); + if (rc != 0) + goto error2; + register_netdevice_notifier(&vrf_notifier_block); rc = rtnl_link_register(&vrf_link_ops); @@ -741,6 +1009,8 @@ static int __init vrf_init_module(void) error: unregister_netdevice_notifier(&vrf_notifier_block); + free_dst_ops6_kmem_cachep(); +error2: kmem_cache_destroy(vrf_dst_ops.kmem_cachep); return rc; } @@ -750,6 +1020,7 @@ static void __exit vrf_cleanup_module(void) rtnl_link_unregister(&vrf_link_ops); unregister_netdevice_notifier(&vrf_notifier_block); kmem_cache_destroy(vrf_dst_ops.kmem_cachep); + free_dst_ops6_kmem_cachep(); } module_init(vrf_init_module); diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 44a19a1711048edf24e56c4f298f86a7cdd77789..774d85b2d5d97734b79eadea20f090ebf684b057 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -19,14 +19,22 @@ * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device * * @l3mdev_get_saddr: Get source address for a flow + * + * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device */ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); + + /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, const struct flowi4 *fl4); void (*l3mdev_get_saddr)(struct net_device *dev, struct flowi4 *fl4); + + /* IPv6 ops */ + struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, + const struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -123,6 +131,31 @@ static inline void l3mdev_get_saddr(struct net *net, int ifindex, } } +static inline struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev, + const struct flowi6 *fl6) +{ + if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rt6_dst) + return dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); + + return NULL; +} + +static inline +struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net, + const struct flowi6 *fl6) +{ + struct dst_entry *dst = NULL; + struct net_device *dev; + + dev = dev_get_by_index(net, fl6->flowi6_oif); + if (dev) { + dst = l3mdev_get_rt6_dst(dev, fl6); + dev_put(dev); + } + + return dst; +} + #else static inline int l3mdev_master_ifindex_rcu(struct net_device *dev) @@ -171,6 +204,19 @@ static inline void l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) { } + +static inline +struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev, + const struct flowi6 *fl6) +{ + return NULL; +} +static inline +struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net, + const struct flowi6 *fl6) +{ + return NULL; +} #endif #endif /* _NET_L3MDEV_H_ */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c8380f1876f193fa86c05375e055e31be165167a..f0326aae7a02dee3a37c3f49f99a67abe251f2d6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include #include @@ -2146,7 +2147,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_PREFIX, + .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX, .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_expires = expires, @@ -2179,8 +2180,9 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX; - table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX); + table = fib6_get_table(dev_net(dev), tb_id); if (!table) return NULL; @@ -2211,7 +2213,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, static void addrconf_add_mroute(struct net_device *dev) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_LOCAL, + .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL, .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_dst_len = 8, @@ -3029,6 +3031,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) { struct in6_addr addr; + /* no link local addresses on L3 master devices */ + if (netif_is_l3_master(idev->dev)) + return; + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6c2b2132c8d328e4d947c3b0b8216ea40f582f90..efb1c00f227028e536d3b6eca2b3a430899e4770 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -68,6 +68,7 @@ #include #include #include +#include #include @@ -496,6 +497,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev); + dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; @@ -575,7 +579,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; - fl6.flowi6_oif = skb->dev->ifindex; + fl6.flowi6_oif = l3mdev_fib_oif(skb->dev); fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7d2e0023c72dbe2e466b35ffb1c6f0c0446af6da..09fddf70cca4ba885e1bfe942807753a9b073bc2 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -264,6 +264,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) return NULL; } +EXPORT_SYMBOL_GPL(fib6_get_table); static void __net_init fib6_tables_init(struct net *net) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 32583b507c2ee7613cf9d27030d476d602d866bc..23f97c4783bbaf7a810eb77992bceab20ddb6925 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -55,6 +55,7 @@ #include #include #include +#include static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -885,7 +886,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { + (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { dst_release(dst); dst = NULL; } @@ -1037,7 +1039,7 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; if (!fl6->flowi6_oif) - fl6->flowi6_oif = dst->dev->ifindex; + fl6->flowi6_oif = l3mdev_fib_oif(dst->dev); return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b18012f9f9fcba0b7d1f59b2a156f0d35b28b63a..3e0f855e1bead049064a284494eda378f85ae47e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include @@ -147,6 +148,7 @@ struct neigh_table nd_tbl = { .gc_thresh2 = 512, .gc_thresh3 = 1024, }; +EXPORT_SYMBOL_GPL(nd_tbl); static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) { @@ -441,8 +443,11 @@ static void ndisc_send_skb(struct sk_buff *skb, if (!dst) { struct flowi6 fl6; + int oif = l3mdev_fib_oif(skb->dev); - icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); + icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); + if (oif != skb->dev->ifindex) + fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); @@ -766,7 +771,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { - +have_ifp: if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { /* @@ -792,6 +797,18 @@ static void ndisc_recv_ns(struct sk_buff *skb) } else { struct net *net = dev_net(dev); + /* perhaps an address on the master device */ + if (netif_is_l3_slave(dev)) { + struct net_device *mdev; + + mdev = netdev_master_upper_dev_get_rcu(dev); + if (mdev) { + ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1); + if (ifp) + goto have_ifp; + } + } + idev = in6_dev_get(dev); if (!idev) { /* XXX: count this drop? */ @@ -1483,6 +1500,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) struct flowi6 fl6; int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; + int oif = l3mdev_fib_oif(dev); bool ret; if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { @@ -1499,7 +1517,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, - &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); + &saddr_buf, &ipv6_hdr(skb)->saddr, oif); + + if (oif != skb->dev->ifindex) + fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index db5b54ad59125f0fd2fddf1d7a06078633d294fb..5fc1149fe91d85bb26c18ac5a74d1728146d8b78 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -61,6 +61,7 @@ #include #include #include +#include #include @@ -1044,6 +1045,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; + if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) + oif = 0; + redo_rt6_select: rt = rt6_select(fn, oif, strict); if (rt->rt6i_nsiblings) @@ -1141,7 +1145,7 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip_tunnel_info *tun_info; struct flowi6 fl6 = { - .flowi6_iif = skb->dev->ifindex, + .flowi6_iif = l3mdev_fib_oif(skb->dev), .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), @@ -1165,8 +1169,13 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, struct flowi6 *fl6) { + struct dst_entry *dst; int flags = 0; + dst = l3mdev_rt6_dst_by_oif(net, fl6); + if (dst) + return dst; + fl6->flowi6_iif = LOOPBACK_IFINDEX; if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || @@ -2263,7 +2272,6 @@ static struct rt6_info *rt6_add_route_info(struct net *net, unsigned int pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_INFO, .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = ifindex, .fc_dst_len = prefixlen, @@ -2274,6 +2282,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; + cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO; cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; @@ -2314,7 +2323,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, unsigned int pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_DFLT, + .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | @@ -2361,7 +2370,8 @@ static void rtmsg_to_fib6_config(struct net *net, { memset(cfg, 0, sizeof(*cfg)); - cfg->fc_table = RT6_TABLE_MAIN; + cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? + : RT6_TABLE_MAIN; cfg->fc_ifindex = rtmsg->rtmsg_ifindex; cfg->fc_metric = rtmsg->rtmsg_metric; cfg->fc_expires = rtmsg->rtmsg_info; @@ -2470,6 +2480,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, bool anycast) { + u32 tb_id; struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, DST_NOCOUNT); @@ -2492,7 +2503,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_gateway = *addr; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; - rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); + tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; + rt->rt6i_table = fib6_get_table(net, tb_id); rt->dst.flags |= DST_NOCACHE; atomic_set(&rt->dst.__refcnt, 1); @@ -3254,6 +3266,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) } else { fl6.flowi6_oif = oif; + if (netif_index_is_l3_master(net, oif)) { + fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | + FLOWI_FLAG_SKIP_NH_OIF; + } + rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); }