提交 e9bf3b07 编写于 作者: D David S. Miller

Merge branch 'tunnel_dst_caching'

Tom Herbert says:

====================
ipv4: Cache dst in tunnels

Version 3 of caching routes in tunnels.

Addressed some comments from Eric in this series.

There are two patches (variants) in the series:
1) One dst cached for each tunnel.
2) Percpu dst cache per tunnel to avoid false sharing

Testing with GRE tunnels on a 32 CPU host with bnx2x (RSS support
for GRE) shows a modest improvement in CPU utilization with these
patches running 200 TCP_RR netperf clients.

Without patches
71.22% CPU utilization
138/180/244 90/95/99% latencies
1.30465e+06 CPU/tps
18318 CPU/tps

With patches
69.84%
142/186/249 90/95/99% latencies
1.30827e+06
18732 CPU/tps
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -38,6 +38,11 @@ struct ip_tunnel_prl_entry { ...@@ -38,6 +38,11 @@ struct ip_tunnel_prl_entry {
struct rcu_head rcu_head; struct rcu_head rcu_head;
}; };
struct ip_tunnel_dst {
struct dst_entry __rcu *dst;
spinlock_t lock;
};
struct ip_tunnel { struct ip_tunnel {
struct ip_tunnel __rcu *next; struct ip_tunnel __rcu *next;
struct hlist_node hash_node; struct hlist_node hash_node;
...@@ -54,6 +59,8 @@ struct ip_tunnel { ...@@ -54,6 +59,8 @@ struct ip_tunnel {
int hlen; /* Precalculated header length */ int hlen; /* Precalculated header length */
int mlink; int mlink;
struct ip_tunnel_dst __percpu *dst_cache;
struct ip_tunnel_parm parms; struct ip_tunnel_parm parms;
/* for SIT */ /* for SIT */
......
...@@ -68,6 +68,63 @@ static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, ...@@ -68,6 +68,63 @@ static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
IP_TNL_HASH_BITS); IP_TNL_HASH_BITS);
} }
static inline void __tunnel_dst_set(struct ip_tunnel_dst *idst,
struct dst_entry *dst)
{
struct dst_entry *old_dst;
if (dst && (dst->flags & DST_NOCACHE))
dst = NULL;
spin_lock_bh(&idst->lock);
old_dst = rcu_dereference(idst->dst);
rcu_assign_pointer(idst->dst, dst);
dst_release(old_dst);
spin_unlock_bh(&idst->lock);
}
static inline void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
{
__tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
}
static inline void tunnel_dst_reset(struct ip_tunnel *t)
{
tunnel_dst_set(t, NULL);
}
static void tunnel_dst_reset_all(struct ip_tunnel *t)
{
int i;
for_each_possible_cpu(i)
__tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
}
static inline struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
{
struct dst_entry *dst;
rcu_read_lock();
dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
if (dst)
dst_hold(dst);
rcu_read_unlock();
return dst;
}
struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
{
struct dst_entry *dst = tunnel_dst_get(t);
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
tunnel_dst_reset(t);
return NULL;
}
return dst;
}
/* Often modified stats are per cpu, other are shared (netdev->stats) */ /* Often modified stats are per cpu, other are shared (netdev->stats) */
struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot) struct rtnl_link_stats64 *tot)
...@@ -318,8 +375,7 @@ static struct net_device *__ip_tunnel_create(struct net *net, ...@@ -318,8 +375,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
return ERR_PTR(err); return ERR_PTR(err);
} }
static inline struct rtable *ip_route_output_tunnel(struct net *net, static inline void init_tunnel_flow(struct flowi4 *fl4,
struct flowi4 *fl4,
int proto, int proto,
__be32 daddr, __be32 saddr, __be32 daddr, __be32 saddr,
__be32 key, __u8 tos, int oif) __be32 key, __u8 tos, int oif)
...@@ -331,7 +387,6 @@ static inline struct rtable *ip_route_output_tunnel(struct net *net, ...@@ -331,7 +387,6 @@ static inline struct rtable *ip_route_output_tunnel(struct net *net,
fl4->flowi4_tos = tos; fl4->flowi4_tos = tos;
fl4->flowi4_proto = proto; fl4->flowi4_proto = proto;
fl4->fl4_gre_key = key; fl4->fl4_gre_key = key;
return ip_route_output_key(net, fl4);
} }
static int ip_tunnel_bind_dev(struct net_device *dev) static int ip_tunnel_bind_dev(struct net_device *dev)
...@@ -350,14 +405,14 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ...@@ -350,14 +405,14 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
struct flowi4 fl4; struct flowi4 fl4;
struct rtable *rt; struct rtable *rt;
rt = ip_route_output_tunnel(tunnel->net, &fl4, init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
tunnel->parms.iph.protocol, iph->saddr, tunnel->parms.o_key,
iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link);
tunnel->parms.o_key, rt = ip_route_output_key(tunnel->net, &fl4);
RT_TOS(iph->tos),
tunnel->parms.link);
if (!IS_ERR(rt)) { if (!IS_ERR(rt)) {
tdev = rt->dst.dev; tdev = rt->dst.dev;
tunnel_dst_set(tunnel, dst_clone(&rt->dst));
ip_rt_put(rt); ip_rt_put(rt);
} }
if (dev->type != ARPHRD_ETHER) if (dev->type != ARPHRD_ETHER)
...@@ -528,10 +583,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -528,10 +583,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
struct flowi4 fl4; struct flowi4 fl4;
u8 tos, ttl; u8 tos, ttl;
__be16 df; __be16 df;
struct rtable *rt; /* Route to the other host */ struct rtable *rt = NULL; /* Route to the other host */
unsigned int max_headroom; /* The extra header space needed */ unsigned int max_headroom; /* The extra header space needed */
__be32 dst; __be32 dst;
int err; int err;
bool connected = true;
inner_iph = (const struct iphdr *)skb_inner_network_header(skb); inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
...@@ -581,27 +637,39 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -581,27 +637,39 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
#endif #endif
else else
goto tx_error; goto tx_error;
connected = false;
} }
tos = tnl_params->tos; tos = tnl_params->tos;
if (tos & 0x1) { if (tos & 0x1) {
tos &= ~0x1; tos &= ~0x1;
if (skb->protocol == htons(ETH_P_IP)) if (skb->protocol == htons(ETH_P_IP)) {
tos = inner_iph->tos; tos = inner_iph->tos;
else if (skb->protocol == htons(ETH_P_IPV6)) connected = false;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
connected = false;
}
} }
rt = ip_route_output_tunnel(tunnel->net, &fl4, init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
protocol, tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
dst, tnl_params->saddr,
tunnel->parms.o_key, if (connected)
RT_TOS(tos), rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
tunnel->parms.link);
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
if (IS_ERR(rt)) { if (IS_ERR(rt)) {
dev->stats.tx_carrier_errors++; dev->stats.tx_carrier_errors++;
goto tx_error; goto tx_error;
} }
if (connected)
tunnel_dst_set(tunnel, dst_clone(&rt->dst));
}
if (rt->dst.dev == dev) { if (rt->dst.dev == dev) {
ip_rt_put(rt); ip_rt_put(rt);
dev->stats.collisions++; dev->stats.collisions++;
...@@ -696,6 +764,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, ...@@ -696,6 +764,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (set_mtu) if (set_mtu)
dev->mtu = mtu; dev->mtu = mtu;
} }
tunnel_dst_reset_all(t);
netdev_state_change(dev); netdev_state_change(dev);
} }
...@@ -811,6 +880,7 @@ static void ip_tunnel_dev_free(struct net_device *dev) ...@@ -811,6 +880,7 @@ static void ip_tunnel_dev_free(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
gro_cells_destroy(&tunnel->gro_cells); gro_cells_destroy(&tunnel->gro_cells);
free_percpu(tunnel->dst_cache);
free_percpu(dev->tstats); free_percpu(dev->tstats);
free_netdev(dev); free_netdev(dev);
} }
...@@ -989,8 +1059,21 @@ int ip_tunnel_init(struct net_device *dev) ...@@ -989,8 +1059,21 @@ int ip_tunnel_init(struct net_device *dev)
u64_stats_init(&ipt_stats->syncp); u64_stats_init(&ipt_stats->syncp);
} }
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
if (!tunnel->dst_cache) {
free_percpu(dev->tstats);
return -ENOMEM;
}
for_each_possible_cpu(i) {
struct ip_tunnel_dst *idst = per_cpu_ptr(tunnel->dst_cache, i);
idst-> dst = NULL;
spin_lock_init(&idst->lock);
}
err = gro_cells_init(&tunnel->gro_cells, dev); err = gro_cells_init(&tunnel->gro_cells, dev);
if (err) { if (err) {
free_percpu(tunnel->dst_cache);
free_percpu(dev->tstats); free_percpu(dev->tstats);
return err; return err;
} }
...@@ -1015,6 +1098,8 @@ void ip_tunnel_uninit(struct net_device *dev) ...@@ -1015,6 +1098,8 @@ void ip_tunnel_uninit(struct net_device *dev)
/* fb_tunnel_dev will be unregisted in net-exit call. */ /* fb_tunnel_dev will be unregisted in net-exit call. */
if (itn->fb_tunnel_dev != dev) if (itn->fb_tunnel_dev != dev)
ip_tunnel_del(netdev_priv(dev)); ip_tunnel_del(netdev_priv(dev));
tunnel_dst_reset_all(tunnel);
} }
EXPORT_SYMBOL_GPL(ip_tunnel_uninit); EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册