提交 7e3f4f3a 编写于 作者: D David S. Miller

Merge branch 'mlxsw-vrf'

Jiri Pirko says:

====================
mlxsw: Enable VRF offload

Ido says:

Packets received from netdevs enslaved to different VRF devices are
forwarded using different FIB tables. In the Spectrum ASIC this is
achieved by binding different router interfaces (RIFs) to different
virtual routers (VRs). Each RIF represents an enslaved netdev and each
VR has its own FIB table according to which packets are forwarded.

The first three patches add an helper to check if a FIB rule is a
default rule and extend the FIB notification chain to include the rule's
info as part of the RULE_{ADD,DEL} events. This allows offloading
drivers to sanitize the rules they don't support and flush their tables.

The fourth patch introduces a small change in the VRF driver to allow
capable drivers to more easily offload VRFs.

Finally, the last patches gradually add support for VRFs in the mlxsw
driver. First, on top of port netdevs, stacked LAG and VLAN devices and
then on top of bridges.

Some limitations I would like to point out:

1) The old model where 'oif' / 'iif' rules were programmed for each L3
master device isn't supported. Upon insertion of these rules the driver
will flush its tables and forwarding will be done by the kernel instead.
It's inferior in every way to the single 'l3mdev' rule, so this shouldn't
be an issue.

2) Inter-VRF routes pointing to a VRF device aren't offloaded. Packets
hitting these routes will be forwarded by the kernel. Inter-VRF routes
pointing to netdevs enslaved to a different VRF are offloaded.

3) There's a small discrepancy between the kernel's datapath and the
device's. By default, packets forwarded by the kernel first do a lookup
in the local table and then in the VRF's table (assuming no match). In
the device, lookup is done only in the VRF's table, which is probably
the intended behavior. Changes in v2 allow user to properly re-order the
default rules without triggering the abort mechanism.

Changes in v3:
* Remove 'l3mdev' from the matchall list, as it's related to the action
  and not the selector (David Ahern).
* Use container_of() instead of typecasting (David Ahern).
* Add David's Acked-by to the second patch.
* Add an helper in IPv4 code to check if rule is a default rule (David
  Ahern).

Changes in v2:
* Drop default rule indication and allow re-ordering of default rules
  (David Ahern).
* Remove ifdef around 'struct fib_rule_notifier_info' and drop redundant
  dependency on IP_MULTIPLE_TABLES from rocker and mlxsw.
* Add David's Acked-by to the fourth patch.
* Remove netif_is_vrf_master() and use netif_is_l3_master() instead
  (David Ahern).
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -3951,7 +3951,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
upper_dev = info->upper_dev;
if (!is_vlan_dev(upper_dev) &&
!netif_is_lag_master(upper_dev) &&
!netif_is_bridge_master(upper_dev))
!netif_is_bridge_master(upper_dev) &&
!netif_is_l3_master(upper_dev))
return -EINVAL;
if (!info->linking)
break;
......@@ -3991,6 +3992,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
else
mlxsw_sp_port_lag_leave(mlxsw_sp_port,
upper_dev);
} else if (netif_is_l3_master(upper_dev)) {
if (info->linking)
err = mlxsw_sp_port_vrf_join(mlxsw_sp_port);
else
mlxsw_sp_port_vrf_leave(mlxsw_sp_port);
} else {
err = -EINVAL;
WARN_ON(1);
......@@ -4105,7 +4111,7 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
switch (event) {
case NETDEV_PRECHANGEUPPER:
upper_dev = info->upper_dev;
if (!is_vlan_dev(upper_dev))
if (!is_vlan_dev(upper_dev) && !netif_is_l3_master(upper_dev))
return -EINVAL;
if (is_vlan_dev(upper_dev) &&
br_dev != mlxsw_sp->master_bridge.dev)
......@@ -4120,6 +4126,12 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
else
mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp,
upper_dev);
} else if (netif_is_l3_master(upper_dev)) {
if (info->linking)
err = mlxsw_sp_bridge_vrf_join(mlxsw_sp,
br_dev);
else
mlxsw_sp_bridge_vrf_leave(mlxsw_sp, br_dev);
} else {
err = -EINVAL;
WARN_ON(1);
......@@ -4353,14 +4365,16 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev,
switch (event) {
case NETDEV_PRECHANGEUPPER:
upper_dev = info->upper_dev;
if (!netif_is_bridge_master(upper_dev))
if (!netif_is_bridge_master(upper_dev) &&
!netif_is_l3_master(upper_dev))
return -EINVAL;
if (!info->linking)
break;
/* We can't have multiple VLAN interfaces configured on
* the same port and being members in the same bridge.
*/
if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port,
if (netif_is_bridge_master(upper_dev) &&
!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port,
upper_dev))
return -EINVAL;
break;
......@@ -4372,6 +4386,11 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev,
upper_dev);
else
mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport);
} else if (netif_is_l3_master(upper_dev)) {
if (info->linking)
err = mlxsw_sp_vport_vrf_join(mlxsw_sp_vport);
else
mlxsw_sp_vport_vrf_leave(mlxsw_sp_vport);
} else {
err = -EINVAL;
WARN_ON(1);
......@@ -4402,6 +4421,47 @@ static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev,
return 0;
}
static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev,
unsigned long event, void *ptr)
{
struct netdev_notifier_changeupper_info *info;
struct mlxsw_sp *mlxsw_sp;
int err = 0;
mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
if (!mlxsw_sp)
return 0;
info = ptr;
switch (event) {
case NETDEV_PRECHANGEUPPER:
/* VLAN devices are only allowed on top of the
* VLAN-aware bridge.
*/
if (WARN_ON(vlan_dev_real_dev(vlan_dev) !=
mlxsw_sp->master_bridge.dev))
return -EINVAL;
if (!netif_is_l3_master(info->upper_dev))
return -EINVAL;
break;
case NETDEV_CHANGEUPPER:
if (netif_is_l3_master(info->upper_dev)) {
if (info->linking)
err = mlxsw_sp_bridge_vrf_join(mlxsw_sp,
vlan_dev);
else
mlxsw_sp_bridge_vrf_leave(mlxsw_sp, vlan_dev);
} else {
err = -EINVAL;
WARN_ON(1);
}
break;
}
return err;
}
static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
unsigned long event, void *ptr)
{
......@@ -4414,6 +4474,9 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
else if (netif_is_lag_master(real_dev))
return mlxsw_sp_netdevice_lag_vport_event(real_dev, event, ptr,
vid);
else if (netif_is_bridge_master(real_dev))
return mlxsw_sp_netdevice_bridge_vlan_event(vlan_dev, event,
ptr);
return 0;
}
......
......@@ -578,6 +578,14 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
unsigned long event, void *ptr);
void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *r);
int mlxsw_sp_vport_vrf_join(struct mlxsw_sp_port *mlxsw_sp_vport);
void mlxsw_sp_vport_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
int mlxsw_sp_port_vrf_join(struct mlxsw_sp_port *mlxsw_sp_port);
void mlxsw_sp_port_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_port);
int mlxsw_sp_bridge_vrf_join(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev);
void mlxsw_sp_bridge_vrf_leave(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev);
int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count);
void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
......
......@@ -41,10 +41,13 @@
#include <linux/in6.h>
#include <linux/notifier.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/fib_rules.h>
#include <net/l3mdev.h>
#include "spectrum.h"
#include "core.h"
......@@ -2514,6 +2517,7 @@ struct mlxsw_sp_fib_event_work {
struct work_struct work;
union {
struct fib_entry_notifier_info fen_info;
struct fib_rule_notifier_info fr_info;
struct fib_nh_notifier_info fnh_info;
};
struct mlxsw_sp *mlxsw_sp;
......@@ -2525,6 +2529,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
struct mlxsw_sp_fib_event_work *fib_work =
container_of(work, struct mlxsw_sp_fib_event_work, work);
struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
struct fib_rule *rule;
bool replace, append;
int err;
......@@ -2548,7 +2553,10 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
break;
case FIB_EVENT_RULE_ADD: /* fall through */
case FIB_EVENT_RULE_DEL:
mlxsw_sp_router_fib4_abort(mlxsw_sp);
rule = fib_work->fr_info.rule;
if (!fib4_rule_default(rule) && !rule->l3mdev)
mlxsw_sp_router_fib4_abort(mlxsw_sp);
fib_rule_put(rule);
break;
case FIB_EVENT_NH_ADD: /* fall through */
case FIB_EVENT_NH_DEL:
......@@ -2591,6 +2599,11 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
*/
fib_info_hold(fib_work->fen_info.fi);
break;
case FIB_EVENT_RULE_ADD: /* fall through */
case FIB_EVENT_RULE_DEL:
memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
fib_rule_get(fib_work->fr_info.rule);
break;
case FIB_EVENT_NH_ADD: /* fall through */
case FIB_EVENT_NH_DEL:
memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
......@@ -2648,7 +2661,7 @@ static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r,
return true;
return false;
case NETDEV_DOWN:
if (r && !in_dev->ifa_list)
if (r && !in_dev->ifa_list && !netif_is_l3_slave(r->dev))
return true;
/* It is possible we already removed the RIF ourselves
* if it was assigned to a netdev that is now a bridge
......@@ -2751,6 +2764,7 @@ mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
struct net_device *l3_dev)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
u32 tb_id = l3mdev_fib_table(l3_dev);
struct mlxsw_sp_vr *vr;
struct mlxsw_sp_fid *f;
struct mlxsw_sp_rif *r;
......@@ -2761,7 +2775,7 @@ mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
if (rif == MLXSW_SP_INVALID_RIF)
return ERR_PTR(-ERANGE);
vr = mlxsw_sp_vr_get(mlxsw_sp, RT_TABLE_MAIN);
vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
if (IS_ERR(vr))
return ERR_CAST(vr);
......@@ -2999,6 +3013,7 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev,
struct mlxsw_sp_fid *f)
{
u32 tb_id = l3mdev_fib_table(l3_dev);
struct mlxsw_sp_vr *vr;
struct mlxsw_sp_rif *r;
u16 rif;
......@@ -3008,7 +3023,7 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
if (rif == MLXSW_SP_INVALID_RIF)
return -ERANGE;
vr = mlxsw_sp_vr_get(mlxsw_sp, RT_TABLE_MAIN);
vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
if (IS_ERR(vr))
return PTR_ERR(vr);
......@@ -3211,6 +3226,73 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
return err;
}
int mlxsw_sp_vport_vrf_join(struct mlxsw_sp_port *mlxsw_sp_vport)
{
struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
struct net_device *dev = mlxsw_sp_vport->dev;
/* In case vPort already has a RIF, then we need to drop it.
* A new one will be created using the VRF's VR.
*/
if (f && f->r)
mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, dev);
}
void mlxsw_sp_vport_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
{
mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
}
int mlxsw_sp_port_vrf_join(struct mlxsw_sp_port *mlxsw_sp_port)
{
struct mlxsw_sp_port *mlxsw_sp_vport;
mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1);
if (WARN_ON(!mlxsw_sp_vport))
return -EINVAL;
return mlxsw_sp_vport_vrf_join(mlxsw_sp_vport);
}
void mlxsw_sp_port_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_port)
{
struct mlxsw_sp_port *mlxsw_sp_vport;
mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1);
if (WARN_ON(!mlxsw_sp_vport))
return;
mlxsw_sp_vport_vrf_leave(mlxsw_sp_vport);
}
int mlxsw_sp_bridge_vrf_join(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev)
{
struct mlxsw_sp_fid *f;
f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
if (WARN_ON(!f))
return -EINVAL;
if (f->r)
mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
}
void mlxsw_sp_bridge_vrf_leave(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev)
{
struct mlxsw_sp_fid *f;
f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
if (WARN_ON(!f))
return;
mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
}
static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
{
struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
......
......@@ -33,6 +33,7 @@
#include <net/rtnetlink.h>
#include <net/netevent.h>
#include <net/arp.h>
#include <net/fib_rules.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <generated/utsrelease.h>
......@@ -2175,7 +2176,10 @@ static const struct switchdev_ops rocker_port_switchdev_ops = {
struct rocker_fib_event_work {
struct work_struct work;
struct fib_entry_notifier_info fen_info;
union {
struct fib_entry_notifier_info fen_info;
struct fib_rule_notifier_info fr_info;
};
struct rocker *rocker;
unsigned long event;
};
......@@ -2185,6 +2189,7 @@ static void rocker_router_fib_event_work(struct work_struct *work)
struct rocker_fib_event_work *fib_work =
container_of(work, struct rocker_fib_event_work, work);
struct rocker *rocker = fib_work->rocker;
struct fib_rule *rule;
int err;
/* Protect internal structures from changes */
......@@ -2202,7 +2207,10 @@ static void rocker_router_fib_event_work(struct work_struct *work)
break;
case FIB_EVENT_RULE_ADD: /* fall through */
case FIB_EVENT_RULE_DEL:
rocker_world_fib4_abort(rocker);
rule = fib_work->fr_info.rule;
if (!fib4_rule_default(rule))
rocker_world_fib4_abort(rocker);
fib_rule_put(rule);
break;
}
rtnl_unlock();
......@@ -2233,6 +2241,11 @@ static int rocker_router_fib_event(struct notifier_block *nb,
*/
fib_info_hold(fib_work->fen_info.fi);
break;
case FIB_EVENT_RULE_ADD: /* fall through */
case FIB_EVENT_RULE_DEL:
memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
fib_rule_get(fib_work->fr_info.rule);
break;
}
queue_work(rocker->rocker_owq, &fib_work->work);
......
......@@ -747,14 +747,18 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
{
int ret;
port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL);
if (ret < 0)
return ret;
goto err;
port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
cycle_netdev(port_dev);
return 0;
err:
port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
return ret;
}
static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
......
......@@ -141,6 +141,7 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
struct fib_lookup_arg *);
int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
u32 flags);
bool fib_rule_matchall(const struct fib_rule *rule);
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh);
int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh);
......
......@@ -213,6 +213,11 @@ struct fib_entry_notifier_info {
u32 tb_id;
};
struct fib_rule_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib_rule *rule;
};
struct fib_nh_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib_nh *fib_nh;
......@@ -311,6 +316,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
return err;
}
static inline bool fib4_rule_default(const struct fib_rule *rule)
{
return true;
}
#else /* CONFIG_IP_MULTIPLE_TABLES */
int __net_init fib4_rules_init(struct net *net);
void __net_exit fib4_rules_exit(struct net *net);
......@@ -355,6 +365,8 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
return err;
}
bool fib4_rule_default(const struct fib_rule *rule);
#endif /* CONFIG_IP_MULTIPLE_TABLES */
/* Exported by fib_frontend.c */
......
......@@ -23,6 +23,20 @@ static const struct fib_kuid_range fib_kuid_range_unset = {
KUIDT_INIT(~0),
};
bool fib_rule_matchall(const struct fib_rule *rule)
{
if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
rule->flags)
return false;
if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
return false;
if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
!uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
return false;
return true;
}
EXPORT_SYMBOL_GPL(fib_rule_matchall);
int fib_default_rule_add(struct fib_rules_ops *ops,
u32 pref, u32 table, u32 flags)
{
......
......@@ -47,6 +47,27 @@ struct fib4_rule {
#endif
};
static bool fib4_rule_matchall(const struct fib_rule *rule)
{
struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
if (r->dst_len || r->src_len || r->tos)
return false;
return fib_rule_matchall(rule);
}
bool fib4_rule_default(const struct fib_rule *rule)
{
if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
rule->l3mdev)
return false;
if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN &&
rule->table != RT_TABLE_DEFAULT)
return false;
return true;
}
EXPORT_SYMBOL_GPL(fib4_rule_default);
int __fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res, unsigned int flags)
{
......@@ -164,20 +185,36 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_rule *rule)
{
struct fib_rule_notifier_info info = {
.rule = rule,
};
return call_fib_notifier(nb, net, event_type, &info.info);
}
static int call_fib_rule_notifiers(struct net *net,
enum fib_event_type event_type)
enum fib_event_type event_type,
struct fib_rule *rule)
{
struct fib_notifier_info info;
struct fib_rule_notifier_info info = {
.rule = rule,
};
return call_fib_notifiers(net, event_type, &info);
return call_fib_notifiers(net, event_type, &info.info);
}
/* Called with rcu_read_lock() */
void fib_rules_notify(struct net *net, struct notifier_block *nb)
{
struct fib_notifier_info info;
struct fib_rules_ops *ops = net->ipv4.rules_ops;
struct fib_rule *rule;
if (net->ipv4.fib_has_custom_rules)
call_fib_notifier(nb, net, FIB_EVENT_RULE_ADD, &info);
list_for_each_entry_rcu(rule, &ops->rules_list, list)
call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
}
static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
......@@ -236,7 +273,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD);
call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
err = 0;
errout:
......@@ -258,7 +295,7 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL);
call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
errout:
return err;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册