提交 7d75c0cb 编写于 作者: D David S. Miller

Merge branch 'net-introduce-and-use-route-hint'

Paolo Abeni says:

====================
net: introduce and use route hint

This series leverages the listification infrastructure to avoid
unnecessary route lookup on ingress packets. In absence of custom rules,
packets with equal daddr will usually land on the same dst.

When processing packet bursts (lists) we can easily reference the previous
dst entry. When we hit the 'same destination' condition we can avoid the
route lookup, coping the already available dst.

Detailed performance numbers are available in the individual commit
messages.

v3 -> v4:
 - move helpers to their own patches (Eric D.)
 - enable hints for SUBTREE builds (David A.)
 - re-enable hints for ipv4 forward (David A.)

v2 -> v3:
 - use fib*_has_custom_rules() helpers (David A.)
 - add ip*_extract_route_hint() helper (Edward C.)
 - use prev skb as hint instead of copying data (Willem )

v1 -> v2:
 - fix build issue with !CONFIG_IP*_MULTIPLE_TABLES
 - fix potential race in ip6_list_rcv_finish()
====================
Acked-by: NEdward Cree <ecree@solarflare.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -90,7 +90,32 @@ struct fib6_gc_args {
#ifndef CONFIG_IPV6_SUBTREES
#define FIB6_SUBTREE(fn) NULL
static inline bool fib6_routes_require_src(const struct net *net)
{
return false;
}
static inline void fib6_routes_require_src_inc(struct net *net) {}
static inline void fib6_routes_require_src_dec(struct net *net) {}
#else
static inline bool fib6_routes_require_src(const struct net *net)
{
return net->ipv6.fib6_routes_require_src > 0;
}
static inline void fib6_routes_require_src_inc(struct net *net)
{
net->ipv6.fib6_routes_require_src++;
}
static inline void fib6_routes_require_src_dec(struct net *net)
{
net->ipv6.fib6_routes_require_src--;
}
#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1))
#endif
......@@ -212,6 +237,11 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
return ((struct rt6_info *)dst)->rt6i_idev;
}
static inline bool fib6_requires_src(const struct fib6_info *rt)
{
return rt->fib6_src.plen > 0;
}
static inline void fib6_clean_expires(struct fib6_info *f6i)
{
f6i->fib6_flags &= ~RTF_EXPIRES;
......@@ -502,6 +532,11 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
}
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
static inline bool fib6_has_custom_rules(const struct net *net)
{
return net->ipv6.fib6_has_custom_rules;
}
int fib6_rules_init(void);
void fib6_rules_cleanup(void);
bool fib6_rule_default(const struct fib_rule *rule);
......@@ -527,6 +562,10 @@ static inline bool fib6_rules_early_flow_dissect(struct net *net,
return true;
}
#else
static inline bool fib6_has_custom_rules(const struct net *net)
{
return false;
}
static inline int fib6_rules_init(void)
{
return 0;
......
......@@ -311,6 +311,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
return err;
}
static inline bool fib4_has_custom_rules(const struct net *net)
{
return false;
}
static inline bool fib4_rule_default(const struct fib_rule *rule)
{
return true;
......@@ -378,6 +383,11 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
return err;
}
static inline bool fib4_has_custom_rules(const struct net *net)
{
return net->ipv4.fib_has_custom_rules;
}
bool fib4_rule_default(const struct fib_rule *rule);
int fib4_rules_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack);
......
......@@ -83,6 +83,9 @@ struct netns_ipv6 {
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
unsigned int fib6_rules_require_fldissect;
bool fib6_has_custom_rules;
#ifdef CONFIG_IPV6_SUBTREES
unsigned int fib6_routes_require_src;
#endif
struct rt6_info *ip6_prohibit_entry;
struct rt6_info *ip6_blk_hole_entry;
struct fib6_table *fib6_local_tbl;
......
......@@ -185,6 +185,10 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin,
struct fib_result *res);
int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin,
const struct sk_buff *hint);
static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin)
{
......
......@@ -70,11 +70,6 @@ static int __net_init fib4_rules_init(struct net *net)
fib_free_table(main_table);
return -ENOMEM;
}
static bool fib4_has_custom_rules(struct net *net)
{
return false;
}
#else
struct fib_table *fib_new_table(struct net *net, u32 id)
......@@ -131,11 +126,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
}
return NULL;
}
static bool fib4_has_custom_rules(struct net *net)
{
return net->ipv4.fib_has_custom_rules;
}
#endif /* CONFIG_IP_MULTIPLE_TABLES */
static void fib_replace_table(struct net *net, struct fib_table *old,
......
......@@ -302,16 +302,31 @@ static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
return true;
}
static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
const struct sk_buff *hint)
{
return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr &&
ip_hdr(hint)->tos == iph->tos;
}
INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *dev)
struct sk_buff *skb, struct net_device *dev,
const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
struct rtable *rt;
int err;
if (ip_can_use_hint(skb, iph, hint)) {
err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
dev, hint);
if (unlikely(err))
goto drop_error;
}
if (net->ipv4.sysctl_ip_early_demux &&
!skb_dst(skb) &&
!skb->sk &&
......@@ -408,7 +423,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
if (!skb)
return NET_RX_SUCCESS;
ret = ip_rcv_finish_core(net, sk, skb, dev);
ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
if (ret != NET_RX_DROP)
ret = dst_input(skb);
return ret;
......@@ -535,11 +550,20 @@ static void ip_sublist_rcv_finish(struct list_head *head)
}
}
static struct sk_buff *ip_extract_route_hint(const struct net *net,
struct sk_buff *skb, int rt_type)
{
if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
return NULL;
return skb;
}
static void ip_list_rcv_finish(struct net *net, struct sock *sk,
struct list_head *head)
{
struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
struct sk_buff *skb, *next;
struct list_head sublist;
INIT_LIST_HEAD(&sublist);
......@@ -554,11 +578,14 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
skb = l3mdev_ip_rcv(skb);
if (!skb)
continue;
if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
continue;
dst = skb_dst(skb);
if (curr_dst != dst) {
hint = ip_extract_route_hint(net, skb,
((struct rtable *)dst)->rt_type);
/* dispatch old sublist */
if (!list_empty(&sublist))
ip_sublist_rcv_finish(&sublist);
......
......@@ -2019,10 +2019,52 @@ static int ip_mkroute_input(struct sk_buff *skb,
return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
}
/* Implements all the saddr-related checks as ip_route_input_slow(),
* assuming daddr is valid and the destination is not a local broadcast one.
* Uses the provided hint instead of performing a route lookup.
*/
int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev,
const struct sk_buff *hint)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct rtable *rt = (struct rtable *)hint;
struct net *net = dev_net(dev);
int err = -EINVAL;
u32 tag = 0;
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
if (ipv4_is_zeronet(saddr))
goto martian_source;
if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
goto martian_source;
if (rt->rt_type != RTN_LOCAL)
goto skip_validate_source;
tos &= IPTOS_RT_MASK;
err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
if (err < 0)
goto martian_source;
skip_validate_source:
skb_dst_copy(skb, hint);
return 0;
martian_source:
ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
return err;
}
/*
* NOTE. We drop all the packets that has local source
* addresses, because every properly looped back packet
* must have correct destination already attached by output routine.
* Changes in the enforced policies must be applied also to
* ip_route_use_hint().
*
* Such approach solves two big problems:
* 1. Not simplex devices are handled properly.
......
......@@ -1461,6 +1461,8 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
}
#endif
goto failure;
} else if (fib6_requires_src(rt)) {
fib6_routes_require_src_inc(info->nl_net);
}
return err;
......@@ -1933,6 +1935,8 @@ int fib6_del(struct fib6_info *rt, struct nl_info *info)
struct fib6_info *cur = rcu_dereference_protected(*rtp,
lockdep_is_held(&table->tb6_lock));
if (rt == cur) {
if (fib6_requires_src(cur))
fib6_routes_require_src_dec(info->nl_net);
fib6_del_route(table, fn, rtp, info);
return 0;
}
......
......@@ -86,11 +86,27 @@ static void ip6_sublist_rcv_finish(struct list_head *head)
}
}
static bool ip6_can_use_hint(const struct sk_buff *skb,
const struct sk_buff *hint)
{
return hint && !skb_dst(skb) &&
ipv6_addr_equal(&ipv6_hdr(hint)->daddr, &ipv6_hdr(skb)->daddr);
}
static struct sk_buff *ip6_extract_route_hint(const struct net *net,
struct sk_buff *skb)
{
if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
return NULL;
return skb;
}
static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
struct list_head *head)
{
struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
struct sk_buff *skb, *next;
struct list_head sublist;
INIT_LIST_HEAD(&sublist);
......@@ -104,9 +120,15 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
skb = l3mdev_ip6_rcv(skb);
if (!skb)
continue;
if (ip6_can_use_hint(skb, hint))
skb_dst_copy(skb, hint);
else
ip6_rcv_finish_core(net, sk, skb);
dst = skb_dst(skb);
if (curr_dst != dst) {
hint = ip6_extract_route_hint(net, skb);
/* dispatch old sublist */
if (!list_empty(&sublist))
ip6_sublist_rcv_finish(&sublist);
......
......@@ -6199,6 +6199,9 @@ static int __net_init ip6_route_net_init(struct net *net)
dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
ip6_template_metrics, true);
INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
#ifdef CONFIG_IPV6_SUBTREES
net->ipv6.fib6_routes_require_src = 0;
#endif
#endif
net->ipv6.sysctl.flush_delay = 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册