提交 907eea48 编写于 作者: A Amit Cohen 提交者: Jakub Kicinski

net: ipv6: Emit notification when fib hardware flags are changed

After installing a route to the kernel, user space receives an
acknowledgment, which means the route was installed in the kernel,
but not necessarily in hardware.

The asynchronous nature of route installation in hardware can lead
to a routing daemon advertising a route before it was actually installed in
hardware. This can result in packet loss or mis-routed packets until the
route is installed in hardware.

It is also possible for a route already installed in hardware to change
its action and therefore its flags. For example, a host route that is
trapping packets can be "promoted" to perform decapsulation following
the installation of an IPinIP/VXLAN tunnel.

Emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/RTM_F_TRAP flags
are changed. The aim is to provide an indication to user-space
(e.g., routing daemons) about the state of the route in hardware.

Introduce a sysctl that controls this behavior.

Keep the default value at 0 (i.e., do not emit notifications) for several
reasons:
- Multiple RTM_NEWROUTE notification per-route might confuse existing
  routing daemons.
- Convergence reasons in routing daemons.
- The extra notifications will negatively impact the insertion rate.
- Not all users are interested in these notifications.

Move fib6_info_hw_flags_set() to C file because it is no longer a short
function.
Signed-off-by: NAmit Cohen <amcohen@nvidia.com>
Signed-off-by: NIdo Schimmel <idosch@nvidia.com>
Reviewed-by: NDavid Ahern <dsahern@kernel.org>
Signed-off-by: NJakub Kicinski <kuba@kernel.org>
上级 efc42879
...@@ -1795,6 +1795,26 @@ nexthop_compat_mode - BOOLEAN ...@@ -1795,6 +1795,26 @@ nexthop_compat_mode - BOOLEAN
and extraneous notifications. and extraneous notifications.
Default: true (backward compat mode) Default: true (backward compat mode)
fib_notify_on_flag_change - INTEGER
Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/
RTM_F_TRAP flags are changed.
After installing a route to the kernel, user space receives an
acknowledgment, which means the route was installed in the kernel,
but not necessarily in hardware.
It is also possible for a route already installed in hardware to change
its action and therefore its flags. For example, a host route that is
trapping packets can be "promoted" to perform decapsulation following
the installation of an IPinIP/VXLAN tunnel.
The notifications will indicate to user-space the state of the route.
Default: 0 (Do not emit notifications.)
Possible values:
- 0 - Do not emit notifications.
- 1 - Emit notifications.
IPv6 Fragmentation: IPv6 Fragmentation:
ip6frag_high_thresh - INTEGER ip6frag_high_thresh - INTEGER
......
...@@ -336,14 +336,6 @@ static inline void fib6_info_release(struct fib6_info *f6i) ...@@ -336,14 +336,6 @@ static inline void fib6_info_release(struct fib6_info *f6i)
call_rcu(&f6i->rcu, fib6_info_destroy_rcu); call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
} }
static inline void
fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, bool offload,
bool trap)
{
f6i->offload = offload;
f6i->trap = trap;
}
enum fib6_walk_state { enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES #ifdef CONFIG_IPV6_SUBTREES
FWS_S, FWS_S,
...@@ -546,6 +538,8 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) ...@@ -546,6 +538,8 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
{ {
return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
} }
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
bool offload, bool trap);
#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
struct bpf_iter__ipv6_route { struct bpf_iter__ipv6_route {
......
...@@ -51,6 +51,7 @@ struct netns_sysctl_ipv6 { ...@@ -51,6 +51,7 @@ struct netns_sysctl_ipv6 {
int max_hbh_opts_len; int max_hbh_opts_len;
int seg6_flowlabel; int seg6_flowlabel;
bool skip_notify_on_dev_down; bool skip_notify_on_dev_down;
int fib_notify_on_flag_change;
}; };
struct netns_ipv6 { struct netns_ipv6 {
......
...@@ -954,6 +954,7 @@ static int __net_init inet6_net_init(struct net *net) ...@@ -954,6 +954,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT; net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN; net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN; net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
net->ipv6.sysctl.fib_notify_on_flag_change = 0;
atomic_set(&net->ipv6.fib6_sernum, 1); atomic_set(&net->ipv6.fib6_sernum, 1);
err = ipv6_init_mibs(net); err = ipv6_init_mibs(net);
......
...@@ -6064,6 +6064,50 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt, ...@@ -6064,6 +6064,50 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
} }
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
bool offload, bool trap)
{
struct sk_buff *skb;
int err;
if (f6i->offload == offload && f6i->trap == trap)
return;
f6i->offload = offload;
f6i->trap = trap;
if (!rcu_access_pointer(f6i->fib6_node))
/* The route was removed from the tree, do not send
* notfication.
*/
return;
if (!net->ipv6.sysctl.fib_notify_on_flag_change)
return;
skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL);
if (!skb) {
err = -ENOBUFS;
goto errout;
}
err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0,
0, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL);
return;
errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
EXPORT_SYMBOL(fib6_info_hw_flags_set);
static int ip6_route_dev_notify(struct notifier_block *this, static int ip6_route_dev_notify(struct notifier_block *this,
unsigned long event, void *ptr) unsigned long event, void *ptr)
{ {
......
...@@ -160,6 +160,15 @@ static struct ctl_table ipv6_table_template[] = { ...@@ -160,6 +160,15 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "fib_notify_on_flag_change",
.data = &init_net.ipv6.sysctl.fib_notify_on_flag_change,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{ } { }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册