diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 732f8c6ae975877ccd9c5873369f9be6490f06e9..9e25b1bc31da5b9ca98afc1f6f4e9eb5f3f7e42a 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -38,6 +38,11 @@ struct ip_tunnel_prl_entry { struct rcu_head rcu_head; }; +struct ip_tunnel_dst { + struct dst_entry __rcu *dst; + spinlock_t lock; +}; + struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; @@ -54,6 +59,8 @@ struct ip_tunnel { int hlen; /* Precalculated header length */ int mlink; + struct ip_tunnel_dst __percpu *dst_cache; + struct ip_tunnel_parm parms; /* for SIT */ diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 90ff9570d7d4def935f3224514312fc217812d9c..e2c9cff26eb5d8024a6fbf7ffda3805f610a2e48 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -68,6 +68,63 @@ static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, IP_TNL_HASH_BITS); } +static inline void __tunnel_dst_set(struct ip_tunnel_dst *idst, + struct dst_entry *dst) +{ + struct dst_entry *old_dst; + + if (dst && (dst->flags & DST_NOCACHE)) + dst = NULL; + + spin_lock_bh(&idst->lock); + old_dst = rcu_dereference(idst->dst); + rcu_assign_pointer(idst->dst, dst); + dst_release(old_dst); + spin_unlock_bh(&idst->lock); +} + +static inline void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst) +{ + __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst); +} + +static inline void tunnel_dst_reset(struct ip_tunnel *t) +{ + tunnel_dst_set(t, NULL); +} + +static void tunnel_dst_reset_all(struct ip_tunnel *t) +{ + int i; + + for_each_possible_cpu(i) + __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); +} + +static inline struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) +{ + struct dst_entry *dst; + + rcu_read_lock(); + dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); + if (dst) + dst_hold(dst); + rcu_read_unlock(); + return dst; +} + +struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie) +{ + struct dst_entry *dst = tunnel_dst_get(t); + + if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + tunnel_dst_reset(t); + return NULL; + } + + return dst; +} + /* Often modified stats are per cpu, other are shared (netdev->stats) */ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *tot) @@ -318,11 +375,10 @@ static struct net_device *__ip_tunnel_create(struct net *net, return ERR_PTR(err); } -static inline struct rtable *ip_route_output_tunnel(struct net *net, - struct flowi4 *fl4, - int proto, - __be32 daddr, __be32 saddr, - __be32 key, __u8 tos, int oif) +static inline void init_tunnel_flow(struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = oif; @@ -331,7 +387,6 @@ static inline struct rtable *ip_route_output_tunnel(struct net *net, fl4->flowi4_tos = tos; fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; - return ip_route_output_key(net, fl4); } static int ip_tunnel_bind_dev(struct net_device *dev) @@ -350,14 +405,14 @@ static int ip_tunnel_bind_dev(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - rt = ip_route_output_tunnel(tunnel->net, &fl4, - tunnel->parms.iph.protocol, - iph->daddr, iph->saddr, - tunnel->parms.o_key, - RT_TOS(iph->tos), - tunnel->parms.link); + init_tunnel_flow(&fl4, iph->protocol, iph->daddr, + iph->saddr, tunnel->parms.o_key, + RT_TOS(iph->tos), tunnel->parms.link); + rt = ip_route_output_key(tunnel->net, &fl4); + if (!IS_ERR(rt)) { tdev = rt->dst.dev; + tunnel_dst_set(tunnel, dst_clone(&rt->dst)); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) @@ -528,10 +583,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi4 fl4; u8 tos, ttl; __be16 df; - struct rtable *rt; /* Route to the other host */ + struct rtable *rt = NULL; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int err; + bool connected = true; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -581,27 +637,39 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, #endif else goto tx_error; + + connected = false; } tos = tnl_params->tos; if (tos & 0x1) { tos &= ~0x1; - if (skb->protocol == htons(ETH_P_IP)) + if (skb->protocol == htons(ETH_P_IP)) { tos = inner_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + connected = false; + } else if (skb->protocol == htons(ETH_P_IPV6)) { tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + connected = false; + } } - rt = ip_route_output_tunnel(tunnel->net, &fl4, - protocol, - dst, tnl_params->saddr, - tunnel->parms.o_key, - RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); + + if (connected) + rt = (struct rtable *)tunnel_dst_check(tunnel, 0); + + if (!rt) { + rt = ip_route_output_key(tunnel->net, &fl4); + + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (connected) + tunnel_dst_set(tunnel, dst_clone(&rt->dst)); } + if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; @@ -696,6 +764,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, if (set_mtu) dev->mtu = mtu; } + tunnel_dst_reset_all(t); netdev_state_change(dev); } @@ -811,6 +880,7 @@ static void ip_tunnel_dev_free(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); gro_cells_destroy(&tunnel->gro_cells); + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -989,8 +1059,21 @@ int ip_tunnel_init(struct net_device *dev) u64_stats_init(&ipt_stats->syncp); } + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; + } + + for_each_possible_cpu(i) { + struct ip_tunnel_dst *idst = per_cpu_ptr(tunnel->dst_cache, i); + idst-> dst = NULL; + spin_lock_init(&idst->lock); + } + err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); return err; } @@ -1015,6 +1098,8 @@ void ip_tunnel_uninit(struct net_device *dev) /* fb_tunnel_dev will be unregisted in net-exit call. */ if (itn->fb_tunnel_dev != dev) ip_tunnel_del(netdev_priv(dev)); + + tunnel_dst_reset_all(tunnel); } EXPORT_SYMBOL_GPL(ip_tunnel_uninit);