提交 84b7187c 编写于 作者: D David S. Miller

Merge branch 'mlxsw-Support-for-IPv6-UC-router'

Jiri Pirko says:

====================
mlxsw: Support for IPv6 UC router

Ido says:

This set adds support for IPv6 unicast routes offload. The first four
patches make the FIB notification chain generic so that it could be used
by address families other than IPv4. This is done by having each address
family register its callbacks with the common code, so that its FIB tables
and rules could be dumped upon registration to the chain, while ensuring
the integrity of the dump. The exact mechanics are explained in detail in
the first patch.

The next six patches build upon this work and add the necessary callbacks
in IPv6 code. This allows listeners of the chain to receive notifications
about IPv6 routes addition, deletion and replacement as well as FIB rules
notifications.

Unlike user space notifications for IPv6 multipath routes, the FIB
notification chain notifies these on a per-nexthop basis. This allows
us to keep the common code lean and is also unnecessary, as notifications
are serialized by each table's lock whereas applications maintaining
netlink caches may suffer from concurrent dumps and deletions / additions
of routes.

The next five patches audit the different code paths reading the route's
reference count (rt6i_ref) and remove assumptions regarding its meaning.
This is needed since non-FIB users need to be able to hold a reference on
the route and a non-zero reference count no longer means the route is in
the FIB.

The last six patches enable the mlxsw driver to offload IPv6 unicast
routes to the Spectrum ASIC. Without resorting to ACLs, lookup is done
solely based on the destination IP, so the abort mechanism is invoked
upon the addition of source-specific routes.

Follow-up patch sets will increase the scale of gatewayed routes by
consolidating identical nexthop groups to one adjacency entry in the
device's adjacency table (as in IPv4), as well as add support for
NH_{ADD,DEL} events which enable support for the
'ignore_routes_with_linkdown' sysctl.

Changes in v2:
* Provide offload indication for individual nexthops (David Ahern).
* Use existing route reference count instead of adding another one.
  This resulted in several new patches to remove assumptions regarding
  current semantics of the existing reference count (David Ahern).
* Add helpers to allow non-FIB users to take a reference on route.
* Remove use of tb6_lock in mlxsw (David Ahern).
* Add IPv6 dependency to mlxsw.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -75,6 +75,7 @@ config MLXSW_SPECTRUM
depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
depends on PSAMPLE || PSAMPLE=n
depends on BRIDGE || BRIDGE=n
depends on IPV6 || IPV6=n
select PARMAN
select MLXFW
default m
......
......@@ -34,6 +34,7 @@
#include <net/netevent.h>
#include <net/arp.h>
#include <net/fib_rules.h>
#include <net/fib_notifier.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <generated/utsrelease.h>
......@@ -2191,6 +2192,10 @@ static int rocker_router_fib_event(struct notifier_block *nb,
{
struct rocker *rocker = container_of(nb, struct rocker, fib_nb);
struct rocker_fib_event_work *fib_work;
struct fib_notifier_info *info = ptr;
if (info->family != AF_INET)
return NOTIFY_DONE;
fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
if (WARN_ON(!fib_work))
......
#ifndef __NET_FIB_NOTIFIER_H
#define __NET_FIB_NOTIFIER_H
#include <linux/types.h>
#include <linux/notifier.h>
#include <net/net_namespace.h>
struct fib_notifier_info {
struct net *net;
int family;
};
enum fib_event_type {
FIB_EVENT_ENTRY_REPLACE,
FIB_EVENT_ENTRY_APPEND,
FIB_EVENT_ENTRY_ADD,
FIB_EVENT_ENTRY_DEL,
FIB_EVENT_RULE_ADD,
FIB_EVENT_RULE_DEL,
FIB_EVENT_NH_ADD,
FIB_EVENT_NH_DEL,
};
struct fib_notifier_ops {
int family;
struct list_head list;
unsigned int (*fib_seq_read)(struct net *net);
int (*fib_dump)(struct net *net, struct notifier_block *nb);
struct rcu_head rcu;
};
int call_fib_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info);
int register_fib_notifier(struct notifier_block *nb,
void (*cb)(struct notifier_block *nb));
int unregister_fib_notifier(struct notifier_block *nb);
struct fib_notifier_ops *
fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net);
void fib_notifier_ops_unregister(struct fib_notifier_ops *ops);
#endif
......@@ -8,6 +8,7 @@
#include <linux/refcount.h>
#include <net/flow.h>
#include <net/rtnetlink.h>
#include <net/fib_notifier.h>
struct fib_kuid_range {
kuid_t start;
......@@ -57,6 +58,7 @@ struct fib_rules_ops {
int addr_size;
int unresolved_rules;
int nr_goto_rules;
unsigned int fib_rules_seq;
int (*action)(struct fib_rule *,
struct flowi *, int,
......@@ -89,6 +91,11 @@ struct fib_rules_ops {
struct rcu_head rcu;
};
struct fib_rule_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib_rule *rule;
};
#define FRA_GENERIC_POLICY \
[FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
[FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
......@@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
u32 flags);
bool fib_rule_matchall(const struct fib_rule *rule);
int fib_rules_dump(struct net *net, struct notifier_block *nb, int family);
unsigned int fib_rules_seq_read(struct net *net, int family);
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
......
......@@ -16,10 +16,12 @@
#include <linux/ipv6_route.h>
#include <linux/rtnetlink.h>
#include <linux/spinlock.h>
#include <linux/notifier.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/netlink.h>
#include <net/inetpeer.h>
#include <net/fib_notifier.h>
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB6_TABLE_HASHSZ 256
......@@ -185,6 +187,22 @@ static inline void ip6_rt_put(struct rt6_info *rt)
dst_release(&rt->dst);
}
void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
static inline void rt6_hold(struct rt6_info *rt)
{
atomic_inc(&rt->rt6i_ref);
}
static inline void rt6_release(struct rt6_info *rt)
{
if (atomic_dec_and_test(&rt->rt6i_ref)) {
rt6_free_pcpu(rt);
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
}
}
enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
......@@ -233,6 +251,7 @@ struct fib6_table {
struct fib6_node tb6_root;
struct inet_peer_base tb6_peers;
unsigned int flags;
unsigned int fib_seq;
#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0)
};
......@@ -256,6 +275,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_table *,
struct flowi6 *, int);
struct fib6_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
struct rt6_info *rt;
};
/*
* exported functions
*/
......@@ -292,9 +316,24 @@ int fib6_init(void);
int ipv6_route_open(struct inode *inode, struct file *file);
int call_fib6_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info);
int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info);
int __net_init fib6_notifier_init(struct net *net);
void __net_exit fib6_notifier_exit(struct net *net);
unsigned int fib6_tables_seq_read(struct net *net);
int fib6_tables_dump(struct net *net, struct notifier_block *nb);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
int fib6_rules_init(void);
void fib6_rules_cleanup(void);
bool fib6_rule_default(const struct fib_rule *rule);
int fib6_rules_dump(struct net *net, struct notifier_block *nb);
unsigned int fib6_rules_seq_read(struct net *net);
#else
static inline int fib6_rules_init(void)
{
......@@ -304,5 +343,17 @@ static inline void fib6_rules_cleanup(void)
{
return ;
}
static inline bool fib6_rule_default(const struct fib_rule *rule)
{
return true;
}
static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb)
{
return 0;
}
static inline unsigned int fib6_rules_seq_read(struct net *net)
{
return 0;
}
#endif
#endif
......@@ -19,6 +19,7 @@
#include <net/flow.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
#include <net/fib_notifier.h>
#include <net/fib_rules.h>
#include <net/inetpeer.h>
#include <linux/percpu.h>
......@@ -188,10 +189,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
#define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \
FIB_RES_SADDR(net, res))
struct fib_notifier_info {
struct net *net;
};
struct fib_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
u32 dst;
......@@ -202,44 +199,21 @@ struct fib_entry_notifier_info {
u32 tb_id;
};
struct fib_rule_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib_rule *rule;
};
struct fib_nh_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib_nh *fib_nh;
};
enum fib_event_type {
FIB_EVENT_ENTRY_REPLACE,
FIB_EVENT_ENTRY_APPEND,
FIB_EVENT_ENTRY_ADD,
FIB_EVENT_ENTRY_DEL,
FIB_EVENT_RULE_ADD,
FIB_EVENT_RULE_DEL,
FIB_EVENT_NH_ADD,
FIB_EVENT_NH_DEL,
};
int register_fib_notifier(struct notifier_block *nb,
void (*cb)(struct notifier_block *nb));
int unregister_fib_notifier(struct notifier_block *nb);
int call_fib_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
int call_fib4_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info);
int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info);
int __net_init fib4_notifier_init(struct net *net);
void __net_exit fib4_notifier_exit(struct net *net);
void fib_notify(struct net *net, struct notifier_block *nb);
#ifdef CONFIG_IP_MULTIPLE_TABLES
void fib_rules_notify(struct net *net, struct notifier_block *nb);
#else
static inline void fib_rules_notify(struct net *net, struct notifier_block *nb)
{
}
#endif
struct fib_table {
struct hlist_node tb_hlist;
......@@ -312,6 +286,16 @@ static inline bool fib4_rule_default(const struct fib_rule *rule)
return true;
}
static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb)
{
return 0;
}
static inline unsigned int fib4_rules_seq_read(struct net *net)
{
return 0;
}
#else /* CONFIG_IP_MULTIPLE_TABLES */
int __net_init fib4_rules_init(struct net *net);
void __net_exit fib4_rules_exit(struct net *net);
......@@ -357,6 +341,8 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
}
bool fib4_rule_default(const struct fib_rule *rule);
int fib4_rules_dump(struct net *net, struct notifier_block *nb);
unsigned int fib4_rules_seq_read(struct net *net);
#endif /* CONFIG_IP_MULTIPLE_TABLES */
......
......@@ -88,6 +88,7 @@ struct net {
/* core fib_rules */
struct list_head rules_ops;
struct list_head fib_notifier_ops; /* protected by net_mutex */
struct net_device *loopback_dev; /* The loopback */
struct netns_core core;
......
......@@ -159,6 +159,7 @@ struct netns_ipv4 {
int sysctl_fib_multipath_hash_policy;
#endif
struct fib_notifier_ops *notifier_ops;
unsigned int fib_seq; /* protected by rtnl_mutex */
atomic_t rt_genid;
......
......@@ -86,6 +86,7 @@ struct netns_ipv6 {
atomic_t dev_addr_genid;
atomic_t fib6_sernum;
struct seg6_pernet_data *seg6_data;
struct fib_notifier_ops *notifier_ops;
};
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
......
......@@ -35,6 +35,7 @@
#define RTF_PREF(pref) ((pref) << 27)
#define RTF_PREF_MASK 0x18000000
#define RTF_OFFLOAD 0x20000000 /* offloaded route */
#define RTF_PCPU 0x40000000 /* read-only: can not be set by user */
#define RTF_LOCAL 0x80000000
......
......@@ -9,7 +9,8 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
fib_notifier.o
obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
......
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <net/net_namespace.h>
#include <net/fib_notifier.h>
static ATOMIC_NOTIFIER_HEAD(fib_chain);
int call_fib_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->net = net;
return nb->notifier_call(nb, event_type, info);
}
EXPORT_SYMBOL(call_fib_notifier);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->net = net;
return atomic_notifier_call_chain(&fib_chain, event_type, info);
}
EXPORT_SYMBOL(call_fib_notifiers);
static unsigned int fib_seq_sum(void)
{
struct fib_notifier_ops *ops;
unsigned int fib_seq = 0;
struct net *net;
rtnl_lock();
for_each_net(net) {
list_for_each_entry(ops, &net->fib_notifier_ops, list)
fib_seq += ops->fib_seq_read(net);
}
rtnl_unlock();
return fib_seq;
}
static int fib_net_dump(struct net *net, struct notifier_block *nb)
{
struct fib_notifier_ops *ops;
list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
int err = ops->fib_dump(net, nb);
if (err)
return err;
}
return 0;
}
static bool fib_dump_is_consistent(struct notifier_block *nb,
void (*cb)(struct notifier_block *nb),
unsigned int fib_seq)
{
atomic_notifier_chain_register(&fib_chain, nb);
if (fib_seq == fib_seq_sum())
return true;
atomic_notifier_chain_unregister(&fib_chain, nb);
if (cb)
cb(nb);
return false;
}
#define FIB_DUMP_MAX_RETRIES 5
int register_fib_notifier(struct notifier_block *nb,
void (*cb)(struct notifier_block *nb))
{
int retries = 0;
int err;
do {
unsigned int fib_seq = fib_seq_sum();
struct net *net;
rcu_read_lock();
for_each_net_rcu(net) {
err = fib_net_dump(net, nb);
if (err)
goto err_fib_net_dump;
}
rcu_read_unlock();
if (fib_dump_is_consistent(nb, cb, fib_seq))
return 0;
} while (++retries < FIB_DUMP_MAX_RETRIES);
return -EBUSY;
err_fib_net_dump:
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(register_fib_notifier);
int unregister_fib_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&fib_chain, nb);
}
EXPORT_SYMBOL(unregister_fib_notifier);
static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
struct net *net)
{
struct fib_notifier_ops *o;
list_for_each_entry(o, &net->fib_notifier_ops, list)
if (ops->family == o->family)
return -EEXIST;
list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
return 0;
}
struct fib_notifier_ops *
fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net)
{
struct fib_notifier_ops *ops;
int err;
ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
if (!ops)
return ERR_PTR(-ENOMEM);
err = __fib_notifier_ops_register(ops, net);
if (err)
goto err_register;
return ops;
err_register:
kfree(ops);
return ERR_PTR(err);
}
EXPORT_SYMBOL(fib_notifier_ops_register);
void fib_notifier_ops_unregister(struct fib_notifier_ops *ops)
{
list_del_rcu(&ops->list);
kfree_rcu(ops, rcu);
}
EXPORT_SYMBOL(fib_notifier_ops_unregister);
static int __net_init fib_notifier_net_init(struct net *net)
{
INIT_LIST_HEAD(&net->fib_notifier_ops);
return 0;
}
static struct pernet_operations fib_notifier_net_ops = {
.init = fib_notifier_net_init,
};
static int __init fib_notifier_init(void)
{
return register_pernet_subsys(&fib_notifier_net_ops);
}
subsys_initcall(fib_notifier_init);
......@@ -299,6 +299,67 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);
static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_rule *rule, int family)
{
struct fib_rule_notifier_info info = {
.info.family = family,
.rule = rule,
};
return call_fib_notifier(nb, net, event_type, &info.info);
}
static int call_fib_rule_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib_rule *rule,
struct fib_rules_ops *ops)
{
struct fib_rule_notifier_info info = {
.info.family = ops->family,
.rule = rule,
};
ops->fib_rules_seq++;
return call_fib_notifiers(net, event_type, &info.info);
}
/* Called with rcu_read_lock() */
int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
{
struct fib_rules_ops *ops;
struct fib_rule *rule;
ops = lookup_rules_ops(net, family);
if (!ops)
return -EAFNOSUPPORT;
list_for_each_entry_rcu(rule, &ops->rules_list, list)
call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
family);
rules_ops_put(ops);
return 0;
}
EXPORT_SYMBOL_GPL(fib_rules_dump);
unsigned int fib_rules_seq_read(struct net *net, int family)
{
unsigned int fib_rules_seq;
struct fib_rules_ops *ops;
ASSERT_RTNL();
ops = lookup_rules_ops(net, family);
if (!ops)
return 0;
fib_rules_seq = ops->fib_rules_seq;
rules_ops_put(ops);
return fib_rules_seq;
}
EXPORT_SYMBOL_GPL(fib_rules_seq_read);
static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
struct fib_rules_ops *ops)
{
......@@ -548,6 +609,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
......@@ -687,6 +749,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops);
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).portid);
fib_rule_put(rule);
......
......@@ -1247,22 +1247,28 @@ static int __net_init ip_fib_net_init(struct net *net)
int err;
size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
net->ipv4.fib_seq = 0;
err = fib4_notifier_init(net);
if (err)
return err;
/* Avoid false sharing : Use at least a full cache line */
size = max_t(size_t, size, L1_CACHE_BYTES);
net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
if (!net->ipv4.fib_table_hash)
return -ENOMEM;
if (!net->ipv4.fib_table_hash) {
err = -ENOMEM;
goto err_table_hash_alloc;
}
err = fib4_rules_init(net);
if (err < 0)
goto fail;
goto err_rules_init;
return 0;
fail:
err_rules_init:
kfree(net->ipv4.fib_table_hash);
err_table_hash_alloc:
fib4_notifier_exit(net);
return err;
}
......@@ -1292,6 +1298,7 @@ static void ip_fib_net_exit(struct net *net)
#endif
rtnl_unlock();
kfree(net->ipv4.fib_table_hash);
fib4_notifier_exit(net);
}
static int __net_init fib_net_init(struct net *net)
......
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <net/net_namespace.h>
#include <net/fib_notifier.h>
#include <net/netns/ipv4.h>
#include <net/ip_fib.h>
static ATOMIC_NOTIFIER_HEAD(fib_chain);
int call_fib_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info)
int call_fib4_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->net = net;
return nb->notifier_call(nb, event_type, info);
info->family = AF_INET;
return call_fib_notifier(nb, net, event_type, info);
}
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
ASSERT_RTNL();
info->family = AF_INET;
net->ipv4.fib_seq++;
info->net = net;
return atomic_notifier_call_chain(&fib_chain, event_type, info);
return call_fib_notifiers(net, event_type, info);
}
static unsigned int fib_seq_sum(void)
static unsigned int fib4_seq_read(struct net *net)
{
unsigned int fib_seq = 0;
struct net *net;
rtnl_lock();
for_each_net(net)
fib_seq += net->ipv4.fib_seq;
rtnl_unlock();
ASSERT_RTNL();
return fib_seq;
return net->ipv4.fib_seq + fib4_rules_seq_read(net);
}
static bool fib_dump_is_consistent(struct notifier_block *nb,
void (*cb)(struct notifier_block *nb),
unsigned int fib_seq)
static int fib4_dump(struct net *net, struct notifier_block *nb)
{
atomic_notifier_chain_register(&fib_chain, nb);
if (fib_seq == fib_seq_sum())
return true;
atomic_notifier_chain_unregister(&fib_chain, nb);
if (cb)
cb(nb);
return false;
int err;
err = fib4_rules_dump(net, nb);
if (err)
return err;
fib_notify(net, nb);
return 0;
}
#define FIB_DUMP_MAX_RETRIES 5
int register_fib_notifier(struct notifier_block *nb,
void (*cb)(struct notifier_block *nb))
{
int retries = 0;
static const struct fib_notifier_ops fib4_notifier_ops_template = {
.family = AF_INET,
.fib_seq_read = fib4_seq_read,
.fib_dump = fib4_dump,
};
do {
unsigned int fib_seq = fib_seq_sum();
struct net *net;
int __net_init fib4_notifier_init(struct net *net)
{
struct fib_notifier_ops *ops;
/* Mutex semantics guarantee that every change done to
* FIB tries before we read the change sequence counter
* is now visible to us.
*/
rcu_read_lock();
for_each_net_rcu(net) {
fib_rules_notify(net, nb);
fib_notify(net, nb);
}
rcu_read_unlock();
net->ipv4.fib_seq = 0;
if (fib_dump_is_consistent(nb, cb, fib_seq))
return 0;
} while (++retries < FIB_DUMP_MAX_RETRIES);
ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net);
if (IS_ERR(ops))
return PTR_ERR(ops);
net->ipv4.notifier_ops = ops;
return -EBUSY;
return 0;
}
EXPORT_SYMBOL(register_fib_notifier);
int unregister_fib_notifier(struct notifier_block *nb)
void __net_exit fib4_notifier_exit(struct net *net)
{
return atomic_notifier_chain_unregister(&fib_chain, nb);
fib_notifier_ops_unregister(net->ipv4.notifier_ops);
}
EXPORT_SYMBOL(unregister_fib_notifier);
......@@ -68,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule)
}
EXPORT_SYMBOL_GPL(fib4_rule_default);
int fib4_rules_dump(struct net *net, struct notifier_block *nb)
{
return fib_rules_dump(net, nb, AF_INET);
}
unsigned int fib4_rules_seq_read(struct net *net)
{
return fib_rules_seq_read(net, AF_INET);
}
int __fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res, unsigned int flags)
{
......@@ -185,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_rule *rule)
{
struct fib_rule_notifier_info info = {
.rule = rule,
};
return call_fib_notifier(nb, net, event_type, &info.info);
}
static int call_fib_rule_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib_rule *rule)
{
struct fib_rule_notifier_info info = {
.rule = rule,
};
return call_fib_notifiers(net, event_type, &info.info);
}
/* Called with rcu_read_lock() */
void fib_rules_notify(struct net *net, struct notifier_block *nb)
{
struct fib_rules_ops *ops = net->ipv4.rules_ops;
struct fib_rule *rule;
list_for_each_entry_rcu(rule, &ops->rules_list, list)
call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
}
static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
FRA_GENERIC_POLICY,
[FRA_FLOW] = { .type = NLA_U32 },
......@@ -273,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
err = 0;
errout:
......@@ -295,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
errout:
return err;
}
......
......@@ -44,6 +44,7 @@
#include <net/netlink.h>
#include <net/nexthop.h>
#include <net/lwtunnel.h>
#include <net/fib_notifier.h>
#include "fib_lookup.h"
......@@ -1451,14 +1452,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN)
break;
return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
&info.info);
return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
&info.info);
case FIB_EVENT_NH_DEL:
if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
(fib_nh->nh_flags & RTNH_F_DEAD))
return call_fib_notifiers(dev_net(fib_nh->nh_dev),
event_type, &info.info);
return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
event_type, &info.info);
default:
break;
}
......
......@@ -81,6 +81,7 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/fib_notifier.h>
#include <trace/events/fib.h>
#include "fib_lookup.h"
......@@ -97,7 +98,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
.type = type,
.tb_id = tb_id,
};
return call_fib_notifier(nb, net, event_type, &info.info);
return call_fib4_notifier(nb, net, event_type, &info.info);
}
static int call_fib_entry_notifiers(struct net *net,
......@@ -113,7 +114,7 @@ static int call_fib_entry_notifiers(struct net *net,
.type = type,
.tb_id = tb_id,
};
return call_fib_notifiers(net, event_type, &info.info);
return call_fib4_notifiers(net, event_type, &info.info);
}
#define MAX_STAT_DEPTH 32
......
......@@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
udp_offload.o seg6.o
udp_offload.o seg6.o fib6_notifier.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
......
......@@ -3066,7 +3066,7 @@ static void init_loopback(struct net_device *dev)
* lo device down, release this obsolete dst and
* reallocate a new router for ifa.
*/
if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
if (!sp_ifa->rt->rt6i_node) {
ip6_rt_put(sp_ifa->rt);
sp_ifa->rt = NULL;
} else {
......@@ -3321,11 +3321,11 @@ static void addrconf_gre_config(struct net_device *dev)
static int fixup_permanent_addr(struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
{
/* rt6i_ref == 0 means the host route was removed from the
/* !rt6i_node means the host route was removed from the
* FIB, for example, if 'lo' device is taken down. In that
* case regenerate the host route.
*/
if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
if (!ifp->rt || !ifp->rt->rt6i_node) {
struct rt6_info *rt, *prev;
rt = addrconf_dst_alloc(idev, &ifp->addr, false);
......
#include <linux/notifier.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <net/net_namespace.h>
#include <net/fib_notifier.h>
#include <net/netns/ipv6.h>
#include <net/ip6_fib.h>
int call_fib6_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->family = AF_INET6;
return call_fib_notifier(nb, net, event_type, info);
}
int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->family = AF_INET6;
return call_fib_notifiers(net, event_type, info);
}
static unsigned int fib6_seq_read(struct net *net)
{
return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
}
static int fib6_dump(struct net *net, struct notifier_block *nb)
{
int err;
err = fib6_rules_dump(net, nb);
if (err)
return err;
return fib6_tables_dump(net, nb);
}
static const struct fib_notifier_ops fib6_notifier_ops_template = {
.family = AF_INET6,
.fib_seq_read = fib6_seq_read,
.fib_dump = fib6_dump,
};
int __net_init fib6_notifier_init(struct net *net)
{
struct fib_notifier_ops *ops;
ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net);
if (IS_ERR(ops))
return PTR_ERR(ops);
net->ipv6.notifier_ops = ops;
return 0;
}
void __net_exit fib6_notifier_exit(struct net *net)
{
fib_notifier_ops_unregister(net->ipv6.notifier_ops);
}
......@@ -14,6 +14,7 @@
*/
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/export.h>
#include <net/fib_rules.h>
......@@ -29,6 +30,36 @@ struct fib6_rule {
u8 tclass;
};
static bool fib6_rule_matchall(const struct fib_rule *rule)
{
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
if (r->dst.plen || r->src.plen || r->tclass)
return false;
return fib_rule_matchall(rule);
}
bool fib6_rule_default(const struct fib_rule *rule)
{
if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
rule->l3mdev)
return false;
if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
return false;
return true;
}
EXPORT_SYMBOL_GPL(fib6_rule_default);
int fib6_rules_dump(struct net *net, struct notifier_block *nb)
{
return fib_rules_dump(net, nb, AF_INET6);
}
unsigned int fib6_rules_seq_read(struct net *net)
{
return fib_rules_seq_read(net, AF_INET6);
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
......
......@@ -33,6 +33,7 @@
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/lwtunnel.h>
#include <net/fib_notifier.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
......@@ -153,7 +154,7 @@ static void node_free(struct fib6_node *fn)
kmem_cache_free(fib6_node_kmem, fn);
}
static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
{
int cpu;
......@@ -176,15 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
free_percpu(non_pcpu_rt->rt6i_pcpu);
non_pcpu_rt->rt6i_pcpu = NULL;
}
static void rt6_release(struct rt6_info *rt)
{
if (atomic_dec_and_test(&rt->rt6i_ref)) {
rt6_free_pcpu(rt);
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
}
}
EXPORT_SYMBOL_GPL(rt6_free_pcpu);
static void fib6_link_table(struct net *net, struct fib6_table *tb)
{
......@@ -302,6 +295,109 @@ static void __net_init fib6_tables_init(struct net *net)
#endif
unsigned int fib6_tables_seq_read(struct net *net)
{
unsigned int h, fib_seq = 0;
rcu_read_lock();
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv6.fib_table_hash[h];
struct fib6_table *tb;
hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
read_lock_bh(&tb->tb6_lock);
fib_seq += tb->fib_seq;
read_unlock_bh(&tb->tb6_lock);
}
}
rcu_read_unlock();
return fib_seq;
}
static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct rt6_info *rt)
{
struct fib6_entry_notifier_info info = {
.rt = rt,
};
return call_fib6_notifier(nb, net, event_type, &info.info);
}
static int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct rt6_info *rt)
{
struct fib6_entry_notifier_info info = {
.rt = rt,
};
rt->rt6i_table->fib_seq++;
return call_fib6_notifiers(net, event_type, &info.info);
}
struct fib6_dump_arg {
struct net *net;
struct notifier_block *nb;
};
static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
{
if (rt == arg->net->ipv6.ip6_null_entry)
return;
call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
}
static int fib6_node_dump(struct fib6_walker *w)
{
struct rt6_info *rt;
for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
fib6_rt_dump(rt, w->args);
w->leaf = NULL;
return 0;
}
static void fib6_table_dump(struct net *net, struct fib6_table *tb,
struct fib6_walker *w)
{
w->root = &tb->tb6_root;
read_lock_bh(&tb->tb6_lock);
fib6_walk(net, w);
read_unlock_bh(&tb->tb6_lock);
}
/* Called with rcu_read_lock() */
int fib6_tables_dump(struct net *net, struct notifier_block *nb)
{
struct fib6_dump_arg arg;
struct fib6_walker *w;
unsigned int h;
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
return -ENOMEM;
w->func = fib6_node_dump;
arg.net = net;
arg.nb = nb;
w->args = &arg;
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv6.fib_table_hash[h];
struct fib6_table *tb;
hlist_for_each_entry_rcu(tb, head, tb6_hlist)
fib6_table_dump(net, tb, w);
}
kfree(w);
return 0;
}
static int fib6_dump_node(struct fib6_walker *w)
{
int res;
......@@ -733,8 +829,6 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
}
fn = fn->parent;
}
/* No more references are possible at this point. */
BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
}
}
......@@ -879,6 +973,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
*ins = rt;
rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
......@@ -906,6 +1002,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
atomic_inc(&rt->rt6i_ref);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
......@@ -913,6 +1011,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
fn->fn_flags |= RTN_RTINFO;
}
nsiblings = iter->rt6i_nsiblings;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
rt6_release(iter);
......@@ -925,6 +1024,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
break;
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->dst.rt6_next;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
rt6_release(iter);
nsiblings--;
......@@ -1459,6 +1559,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
fib6_purge_rt(rt, fn, net);
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
rt6_release(rt);
......@@ -1839,6 +1940,11 @@ static void fib6_gc_timer_cb(unsigned long arg)
static int __net_init fib6_net_init(struct net *net)
{
size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
int err;
err = fib6_notifier_init(net);
if (err)
return err;
spin_lock_init(&net->ipv6.fib6_gc_lock);
rwlock_init(&net->ipv6.fib6_walker_lock);
......@@ -1891,6 +1997,7 @@ static int __net_init fib6_net_init(struct net *net)
out_rt6_stats:
kfree(net->ipv6.rt6_stats);
out_timer:
fib6_notifier_exit(net);
return -ENOMEM;
}
......@@ -1907,6 +2014,7 @@ static void fib6_net_exit(struct net *net)
kfree(net->ipv6.fib6_main_tbl);
kfree(net->ipv6.fib_table_hash);
kfree(net->ipv6.rt6_stats);
fib6_notifier_exit(net);
}
static struct pernet_operations fib6_net_ops = {
......
......@@ -1820,6 +1820,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
if (cfg->fc_flags & RTF_OFFLOAD) {
NL_SET_ERR_MSG(extack, "Userspace can not set RTF_OFFLOAD");
goto out;
}
if (cfg->fc_dst_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid prefix length");
goto out;
......@@ -3327,6 +3332,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
if (rt->rt6i_flags & RTF_OFFLOAD)
*flags |= RTNH_F_OFFLOAD;
/* not needed for multipath encoding b/c it has a rtnexthop struct */
if (!skip_oif && rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册