提交 f6310b61 编写于 作者: D David S. Miller

Merge branch 'net-Add-route-offload-indication'

Ido Schimmel says:

====================
net: Add route offload indication

This patch set adds offload indication to IPv4 and IPv6 routes. So far
offload indication was only available for the nexthop via
'RTNH_F_OFFLOAD', which is problematic as a nexthop is usually shared
between multiple routes.

Based on feedback from Roopa and David on the RFC [1], the indication is
split to 'offload' and 'trap'. This is done because not all the routes
present in hardware actually offload traffic from the kernel. For
example, host routes merely trap packets to the kernel. The two flags
are dumped to user space via the 'rtm_flags' field in the ancillary
header of the rtnetlink message.

In addition, the patch set uses the new flags in order to test the FIB
offload API by adding a dummy FIB offload implementation to netdevsim.
The new tests are added to a shared library and can be therefore shared
between different drivers.

Patches #1-#3 add offload indication to IPv4 routes.
Patches #4 adds offload indication to IPv6 routes.
Patches #5-#6 add support for the offload indication in mlxsw.
Patch #7 adds dummy FIB offload implementation in netdevsim.
Patches #8-#10 add selftests.

v2 (feedback from David Ahern):
* Patch #2: Name last argument of fib_dump_info()
* Patch #2: Move 'struct fib_rt_info' to include/net/ip_fib.h so that it
  could later be passed to fib_alias_hw_flags_set()
* Patch #3: Make use of 'struct fib_rt_info' in fib_alias_hw_flags_set()
* Patch #6: Convert to new fib_alias_hw_flags_set() interface
* Patch #7: Convert to new fib_alias_hw_flags_set() interface

[1] https://patchwork.ozlabs.org/cover/1170530/
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -549,6 +549,7 @@ source "drivers/net/hyperv/Kconfig"
config NETDEVSIM
tristate "Simulated networking device"
depends on DEBUG_FS
depends on IPV6 || IPV6=n
select NET_DEVLINK
help
This driver is a developer testing tool and software model that can
......
......@@ -3235,20 +3235,6 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
return 0;
}
static void
mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
enum mlxsw_reg_ralue_op op, int err);
static void
mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
{
enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
struct mlxsw_sp_fib_entry *fib_entry;
list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node)
mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
}
static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
{
/* Valid sizes for an adjacency group are:
......@@ -3352,6 +3338,73 @@ mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
}
}
static struct mlxsw_sp_nexthop *
mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
static void
mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
int i;
for (i = 0; i < nh_grp->count; i++) {
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
if (nh->offloaded)
nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
else
nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
}
}
static void
__mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
struct mlxsw_sp_fib6_entry *fib6_entry)
{
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
struct mlxsw_sp_nexthop *nh;
nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
if (nh && nh->offloaded)
fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
else
fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
}
}
static void
mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
/* Unfortunately, in IPv6 the route and the nexthop are described by
* the same struct, so we need to iterate over all the routes using the
* nexthop group and set / clear the offload indication for them.
*/
list_for_each_entry(fib6_entry, &nh_grp->fib_list,
common.nexthop_group_node)
__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
}
static void
mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
case AF_INET:
mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
break;
case AF_INET6:
mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
break;
}
}
static void
mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
......@@ -3425,6 +3478,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
goto set_trap;
}
mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
if (!old_adj_index_valid) {
/* The trap was set for fib entries, so we have to call
* fib entry update to unset it and use adjacency index.
......@@ -3446,9 +3501,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
goto set_trap;
}
/* Offload state within the group changed, so update the flags. */
mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
return;
set_trap:
......@@ -3461,6 +3513,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
if (err)
dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
if (old_adj_index_valid)
mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
nh_grp->ecmp_size, nh_grp->adj_index);
......@@ -4043,131 +4096,128 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
}
static void
mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
int i;
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE ||
fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
return;
}
for (i = 0; i < nh_grp->count; i++) {
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
int dst_len = fib_entry->fib_node->key.prefix_len;
struct mlxsw_sp_fib4_entry *fib4_entry;
struct fib_rt_info fri;
bool should_offload;
if (nh->offloaded)
nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
else
nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
}
should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
common);
fri.fi = fi;
fri.tb_id = fib4_entry->tb_id;
fri.dst = cpu_to_be32(*p_dst);
fri.dst_len = dst_len;
fri.tos = fib4_entry->tos;
fri.type = fib4_entry->type;
fri.offload = should_offload;
fri.trap = !should_offload;
fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}
static void
mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
int i;
if (!list_is_singular(&nh_grp->fib_list))
return;
for (i = 0; i < nh_grp->count; i++) {
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
int dst_len = fib_entry->fib_node->key.prefix_len;
struct mlxsw_sp_fib4_entry *fib4_entry;
struct fib_rt_info fri;
nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
}
fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
common);
fri.fi = fi;
fri.tb_id = fib4_entry->tb_id;
fri.dst = cpu_to_be32(*p_dst);
fri.dst_len = dst_len;
fri.tos = fib4_entry->tos;
fri.type = fib4_entry->type;
fri.offload = false;
fri.trap = false;
fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}
static void
mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
bool should_offload;
should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
/* In IPv6 a multipath route is represented using multiple routes, so
* we need to set the flags on all of them.
*/
fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
common);
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
return;
}
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
struct mlxsw_sp_nexthop *nh;
nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
if (nh && nh->offloaded)
fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
else
fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
}
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload,
!should_offload);
}
static void
mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
common);
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct fib6_info *rt = mlxsw_sp_rt6->rt;
rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
}
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false);
}
static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
static void
mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
switch (fib_entry->fib_node->fib->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
mlxsw_sp_fib4_entry_offload_set(fib_entry);
mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
break;
case MLXSW_SP_L3_PROTO_IPV6:
mlxsw_sp_fib6_entry_offload_set(fib_entry);
mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
break;
}
}
static void
mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
switch (fib_entry->fib_node->fib->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
mlxsw_sp_fib4_entry_offload_unset(fib_entry);
mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
break;
case MLXSW_SP_L3_PROTO_IPV6:
mlxsw_sp_fib6_entry_offload_unset(fib_entry);
mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
break;
}
}
static void
mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
enum mlxsw_reg_ralue_op op, int err)
mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
enum mlxsw_reg_ralue_op op)
{
switch (op) {
case MLXSW_REG_RALUE_OP_WRITE_DELETE:
return mlxsw_sp_fib_entry_offload_unset(fib_entry);
case MLXSW_REG_RALUE_OP_WRITE_WRITE:
if (err)
return;
if (mlxsw_sp_fib_entry_should_offload(fib_entry))
mlxsw_sp_fib_entry_offload_set(fib_entry);
else
mlxsw_sp_fib_entry_offload_unset(fib_entry);
return;
mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
break;
case MLXSW_REG_RALUE_OP_WRITE_DELETE:
mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
break;
default:
return;
break;
}
}
......@@ -4394,7 +4444,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
{
int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
if (err)
return err;
mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
return err;
}
......@@ -4830,7 +4883,7 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
if (!replaced)
return 0;
mlxsw_sp_fib_entry_offload_unset(replaced);
mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
common);
mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
......@@ -5113,6 +5166,11 @@ static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
&nh_grp->fib_list);
fib6_entry->common.nh_group = nh_grp;
/* The route and the nexthop are described by the same struct, so we
* need to the update the nexthop offload indication for the new route.
*/
__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
return 0;
}
......@@ -5393,7 +5451,7 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
if (!replaced)
return 0;
mlxsw_sp_fib_entry_offload_unset(replaced);
mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
common);
mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
......
......@@ -14,6 +14,12 @@
* THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
*/
#include <linux/in6.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/rhashtable.h>
#include <linux/spinlock_types.h>
#include <linux/types.h>
#include <net/fib_notifier.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
......@@ -36,6 +42,48 @@ struct nsim_fib_data {
struct notifier_block fib_nb;
struct nsim_per_fib_data ipv4;
struct nsim_per_fib_data ipv6;
struct rhashtable fib_rt_ht;
struct list_head fib_rt_list;
spinlock_t fib_lock; /* Protects hashtable, list and accounting */
struct devlink *devlink;
};
struct nsim_fib_rt_key {
unsigned char addr[sizeof(struct in6_addr)];
unsigned char prefix_len;
int family;
u32 tb_id;
};
struct nsim_fib_rt {
struct nsim_fib_rt_key key;
struct rhash_head ht_node;
struct list_head list; /* Member of fib_rt_list */
};
struct nsim_fib4_rt {
struct nsim_fib_rt common;
struct fib_info *fi;
u8 tos;
u8 type;
};
struct nsim_fib6_rt {
struct nsim_fib_rt common;
struct list_head nh_list;
unsigned int nhs;
};
struct nsim_fib6_rt_nh {
struct list_head list; /* Member of nh_list */
struct fib6_info *rt;
};
static const struct rhashtable_params nsim_fib_rt_ht_params = {
.key_offset = offsetof(struct nsim_fib_rt, key),
.head_offset = offsetof(struct nsim_fib_rt, ht_node),
.key_len = sizeof(struct nsim_fib_rt_key),
.automatic_shrinking = true,
};
u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
......@@ -144,18 +192,556 @@ static int nsim_fib_account(struct nsim_fib_entry *entry, bool add,
return err;
}
static void nsim_fib_rt_init(struct nsim_fib_data *data,
struct nsim_fib_rt *fib_rt, const void *addr,
size_t addr_len, unsigned int prefix_len,
int family, u32 tb_id)
{
memcpy(fib_rt->key.addr, addr, addr_len);
fib_rt->key.prefix_len = prefix_len;
fib_rt->key.family = family;
fib_rt->key.tb_id = tb_id;
list_add(&fib_rt->list, &data->fib_rt_list);
}
static void nsim_fib_rt_fini(struct nsim_fib_rt *fib_rt)
{
list_del(&fib_rt->list);
}
static struct nsim_fib_rt *nsim_fib_rt_lookup(struct rhashtable *fib_rt_ht,
const void *addr, size_t addr_len,
unsigned int prefix_len,
int family, u32 tb_id)
{
struct nsim_fib_rt_key key;
memset(&key, 0, sizeof(key));
memcpy(key.addr, addr, addr_len);
key.prefix_len = prefix_len;
key.family = family;
key.tb_id = tb_id;
return rhashtable_lookup_fast(fib_rt_ht, &key, nsim_fib_rt_ht_params);
}
static struct nsim_fib4_rt *
nsim_fib4_rt_create(struct nsim_fib_data *data,
struct fib_entry_notifier_info *fen_info)
{
struct nsim_fib4_rt *fib4_rt;
fib4_rt = kzalloc(sizeof(*fib4_rt), GFP_ATOMIC);
if (!fib4_rt)
return NULL;
nsim_fib_rt_init(data, &fib4_rt->common, &fen_info->dst, sizeof(u32),
fen_info->dst_len, AF_INET, fen_info->tb_id);
fib4_rt->fi = fen_info->fi;
fib_info_hold(fib4_rt->fi);
fib4_rt->tos = fen_info->tos;
fib4_rt->type = fen_info->type;
return fib4_rt;
}
static void nsim_fib4_rt_destroy(struct nsim_fib4_rt *fib4_rt)
{
fib_info_put(fib4_rt->fi);
nsim_fib_rt_fini(&fib4_rt->common);
kfree(fib4_rt);
}
static struct nsim_fib4_rt *
nsim_fib4_rt_lookup(struct rhashtable *fib_rt_ht,
const struct fib_entry_notifier_info *fen_info)
{
struct nsim_fib_rt *fib_rt;
fib_rt = nsim_fib_rt_lookup(fib_rt_ht, &fen_info->dst, sizeof(u32),
fen_info->dst_len, AF_INET,
fen_info->tb_id);
if (!fib_rt)
return NULL;
return container_of(fib_rt, struct nsim_fib4_rt, common);
}
static void nsim_fib4_rt_hw_flags_set(struct net *net,
const struct nsim_fib4_rt *fib4_rt,
bool trap)
{
u32 *p_dst = (u32 *) fib4_rt->common.key.addr;
int dst_len = fib4_rt->common.key.prefix_len;
struct fib_rt_info fri;
fri.fi = fib4_rt->fi;
fri.tb_id = fib4_rt->common.key.tb_id;
fri.dst = cpu_to_be32(*p_dst);
fri.dst_len = dst_len;
fri.tos = fib4_rt->tos;
fri.type = fib4_rt->type;
fri.offload = false;
fri.trap = trap;
fib_alias_hw_flags_set(net, &fri);
}
static int nsim_fib4_rt_add(struct nsim_fib_data *data,
struct nsim_fib4_rt *fib4_rt,
struct netlink_ext_ack *extack)
{
struct net *net = devlink_net(data->devlink);
int err;
err = nsim_fib_account(&data->ipv4.fib, true, extack);
if (err)
return err;
err = rhashtable_insert_fast(&data->fib_rt_ht,
&fib4_rt->common.ht_node,
nsim_fib_rt_ht_params);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to insert IPv4 route");
goto err_fib_dismiss;
}
nsim_fib4_rt_hw_flags_set(net, fib4_rt, true);
return 0;
err_fib_dismiss:
nsim_fib_account(&data->ipv4.fib, false, extack);
return err;
}
static int nsim_fib4_rt_replace(struct nsim_fib_data *data,
struct nsim_fib4_rt *fib4_rt,
struct nsim_fib4_rt *fib4_rt_old,
struct netlink_ext_ack *extack)
{
struct net *net = devlink_net(data->devlink);
int err;
/* We are replacing a route, so no need to change the accounting. */
err = rhashtable_replace_fast(&data->fib_rt_ht,
&fib4_rt_old->common.ht_node,
&fib4_rt->common.ht_node,
nsim_fib_rt_ht_params);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to replace IPv4 route");
return err;
}
nsim_fib4_rt_hw_flags_set(net, fib4_rt, true);
nsim_fib4_rt_hw_flags_set(net, fib4_rt_old, false);
nsim_fib4_rt_destroy(fib4_rt_old);
return 0;
}
static int nsim_fib4_rt_insert(struct nsim_fib_data *data,
struct fib_entry_notifier_info *fen_info)
{
struct netlink_ext_ack *extack = fen_info->info.extack;
struct nsim_fib4_rt *fib4_rt, *fib4_rt_old;
int err;
fib4_rt = nsim_fib4_rt_create(data, fen_info);
if (!fib4_rt)
return -ENOMEM;
fib4_rt_old = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info);
if (!fib4_rt_old)
err = nsim_fib4_rt_add(data, fib4_rt, extack);
else
err = nsim_fib4_rt_replace(data, fib4_rt, fib4_rt_old, extack);
if (err)
nsim_fib4_rt_destroy(fib4_rt);
return err;
}
static void nsim_fib4_rt_remove(struct nsim_fib_data *data,
const struct fib_entry_notifier_info *fen_info)
{
struct netlink_ext_ack *extack = fen_info->info.extack;
struct nsim_fib4_rt *fib4_rt;
fib4_rt = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info);
if (WARN_ON_ONCE(!fib4_rt))
return;
rhashtable_remove_fast(&data->fib_rt_ht, &fib4_rt->common.ht_node,
nsim_fib_rt_ht_params);
nsim_fib_account(&data->ipv4.fib, false, extack);
nsim_fib4_rt_destroy(fib4_rt);
}
static int nsim_fib4_event(struct nsim_fib_data *data,
struct fib_notifier_info *info,
unsigned long event)
{
struct fib_entry_notifier_info *fen_info;
int err = 0;
fen_info = container_of(info, struct fib_entry_notifier_info, info);
if (fen_info->fi->nh) {
NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
return 0;
}
switch (event) {
case FIB_EVENT_ENTRY_REPLACE:
err = nsim_fib4_rt_insert(data, fen_info);
break;
case FIB_EVENT_ENTRY_DEL:
nsim_fib4_rt_remove(data, fen_info);
break;
default:
break;
}
return err;
}
static struct nsim_fib6_rt_nh *
nsim_fib6_rt_nh_find(const struct nsim_fib6_rt *fib6_rt,
const struct fib6_info *rt)
{
struct nsim_fib6_rt_nh *fib6_rt_nh;
list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list) {
if (fib6_rt_nh->rt == rt)
return fib6_rt_nh;
}
return NULL;
}
static int nsim_fib6_rt_nh_add(struct nsim_fib6_rt *fib6_rt,
struct fib6_info *rt)
{
struct nsim_fib6_rt_nh *fib6_rt_nh;
fib6_rt_nh = kzalloc(sizeof(*fib6_rt_nh), GFP_ATOMIC);
if (!fib6_rt_nh)
return -ENOMEM;
fib6_info_hold(rt);
fib6_rt_nh->rt = rt;
list_add_tail(&fib6_rt_nh->list, &fib6_rt->nh_list);
fib6_rt->nhs++;
return 0;
}
static void nsim_fib6_rt_nh_del(struct nsim_fib6_rt *fib6_rt,
const struct fib6_info *rt)
{
struct nsim_fib6_rt_nh *fib6_rt_nh;
fib6_rt_nh = nsim_fib6_rt_nh_find(fib6_rt, rt);
if (WARN_ON_ONCE(!fib6_rt_nh))
return;
fib6_rt->nhs--;
list_del(&fib6_rt_nh->list);
#if IS_ENABLED(CONFIG_IPV6)
fib6_info_release(fib6_rt_nh->rt);
#endif
kfree(fib6_rt_nh);
}
static struct nsim_fib6_rt *
nsim_fib6_rt_create(struct nsim_fib_data *data,
struct fib6_entry_notifier_info *fen6_info)
{
struct fib6_info *iter, *rt = fen6_info->rt;
struct nsim_fib6_rt *fib6_rt;
int i = 0;
int err;
fib6_rt = kzalloc(sizeof(*fib6_rt), GFP_ATOMIC);
if (!fib6_rt)
return NULL;
nsim_fib_rt_init(data, &fib6_rt->common, &rt->fib6_dst.addr,
sizeof(rt->fib6_dst.addr), rt->fib6_dst.plen, AF_INET6,
rt->fib6_table->tb6_id);
/* We consider a multipath IPv6 route as one entry, but it can be made
* up from several fib6_info structs (one for each nexthop), so we
* add them all to the same list under the entry.
*/
INIT_LIST_HEAD(&fib6_rt->nh_list);
err = nsim_fib6_rt_nh_add(fib6_rt, rt);
if (err)
goto err_fib_rt_fini;
if (!fen6_info->nsiblings)
return fib6_rt;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
if (i == fen6_info->nsiblings)
break;
err = nsim_fib6_rt_nh_add(fib6_rt, iter);
if (err)
goto err_fib6_rt_nh_del;
i++;
}
WARN_ON_ONCE(i != fen6_info->nsiblings);
return fib6_rt;
err_fib6_rt_nh_del:
list_for_each_entry_continue_reverse(iter, &rt->fib6_siblings,
fib6_siblings)
nsim_fib6_rt_nh_del(fib6_rt, iter);
nsim_fib6_rt_nh_del(fib6_rt, rt);
err_fib_rt_fini:
nsim_fib_rt_fini(&fib6_rt->common);
kfree(fib6_rt);
return ERR_PTR(err);
}
static void nsim_fib6_rt_destroy(struct nsim_fib6_rt *fib6_rt)
{
struct nsim_fib6_rt_nh *iter, *tmp;
list_for_each_entry_safe(iter, tmp, &fib6_rt->nh_list, list)
nsim_fib6_rt_nh_del(fib6_rt, iter->rt);
WARN_ON_ONCE(!list_empty(&fib6_rt->nh_list));
nsim_fib_rt_fini(&fib6_rt->common);
kfree(fib6_rt);
}
static struct nsim_fib6_rt *
nsim_fib6_rt_lookup(struct rhashtable *fib_rt_ht, const struct fib6_info *rt)
{
struct nsim_fib_rt *fib_rt;
fib_rt = nsim_fib_rt_lookup(fib_rt_ht, &rt->fib6_dst.addr,
sizeof(rt->fib6_dst.addr),
rt->fib6_dst.plen, AF_INET6,
rt->fib6_table->tb6_id);
if (!fib_rt)
return NULL;
return container_of(fib_rt, struct nsim_fib6_rt, common);
}
static int nsim_fib6_rt_append(struct nsim_fib_data *data,
struct fib6_entry_notifier_info *fen6_info)
{
struct fib6_info *iter, *rt = fen6_info->rt;
struct nsim_fib6_rt *fib6_rt;
int i = 0;
int err;
fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt);
if (WARN_ON_ONCE(!fib6_rt))
return -EINVAL;
err = nsim_fib6_rt_nh_add(fib6_rt, rt);
if (err)
return err;
rt->trap = true;
if (!fen6_info->nsiblings)
return 0;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
if (i == fen6_info->nsiblings)
break;
err = nsim_fib6_rt_nh_add(fib6_rt, iter);
if (err)
goto err_fib6_rt_nh_del;
iter->trap = true;
i++;
}
WARN_ON_ONCE(i != fen6_info->nsiblings);
return 0;
err_fib6_rt_nh_del:
list_for_each_entry_continue_reverse(iter, &rt->fib6_siblings,
fib6_siblings) {
iter->trap = false;
nsim_fib6_rt_nh_del(fib6_rt, iter);
}
rt->trap = false;
nsim_fib6_rt_nh_del(fib6_rt, rt);
return err;
}
static void nsim_fib6_rt_hw_flags_set(const struct nsim_fib6_rt *fib6_rt,
bool trap)
{
struct nsim_fib6_rt_nh *fib6_rt_nh;
list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list)
fib6_info_hw_flags_set(fib6_rt_nh->rt, false, trap);
}
static int nsim_fib6_rt_add(struct nsim_fib_data *data,
struct nsim_fib6_rt *fib6_rt,
struct netlink_ext_ack *extack)
{
int err;
err = nsim_fib_account(&data->ipv6.fib, true, extack);
if (err)
return err;
err = rhashtable_insert_fast(&data->fib_rt_ht,
&fib6_rt->common.ht_node,
nsim_fib_rt_ht_params);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to insert IPv6 route");
goto err_fib_dismiss;
}
nsim_fib6_rt_hw_flags_set(fib6_rt, true);
return 0;
err_fib_dismiss:
nsim_fib_account(&data->ipv6.fib, false, extack);
return err;
}
static int nsim_fib6_rt_replace(struct nsim_fib_data *data,
struct nsim_fib6_rt *fib6_rt,
struct nsim_fib6_rt *fib6_rt_old,
struct netlink_ext_ack *extack)
{
int err;
/* We are replacing a route, so no need to change the accounting. */
err = rhashtable_replace_fast(&data->fib_rt_ht,
&fib6_rt_old->common.ht_node,
&fib6_rt->common.ht_node,
nsim_fib_rt_ht_params);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to replace IPv6 route");
return err;
}
nsim_fib6_rt_hw_flags_set(fib6_rt, true);
nsim_fib6_rt_hw_flags_set(fib6_rt_old, false);
nsim_fib6_rt_destroy(fib6_rt_old);
return 0;
}
static int nsim_fib6_rt_insert(struct nsim_fib_data *data,
struct fib6_entry_notifier_info *fen6_info)
{
struct netlink_ext_ack *extack = fen6_info->info.extack;
struct nsim_fib6_rt *fib6_rt, *fib6_rt_old;
int err;
fib6_rt = nsim_fib6_rt_create(data, fen6_info);
if (!fib6_rt)
return -ENOMEM;
fib6_rt_old = nsim_fib6_rt_lookup(&data->fib_rt_ht, fen6_info->rt);
if (!fib6_rt_old)
err = nsim_fib6_rt_add(data, fib6_rt, extack);
else
err = nsim_fib6_rt_replace(data, fib6_rt, fib6_rt_old, extack);
if (err)
nsim_fib6_rt_destroy(fib6_rt);
return err;
}
static void
nsim_fib6_rt_remove(struct nsim_fib_data *data,
const struct fib6_entry_notifier_info *fen6_info)
{
struct netlink_ext_ack *extack = fen6_info->info.extack;
struct nsim_fib6_rt *fib6_rt;
/* Multipath routes are first added to the FIB trie and only then
* notified. If we vetoed the addition, we will get a delete
* notification for a route we do not have. Therefore, do not warn if
* route was not found.
*/
fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, fen6_info->rt);
if (!fib6_rt)
return;
/* If not all the nexthops are deleted, then only reduce the nexthop
* group.
*/
if (fen6_info->nsiblings + 1 != fib6_rt->nhs) {
nsim_fib6_rt_nh_del(fib6_rt, fen6_info->rt);
return;
}
rhashtable_remove_fast(&data->fib_rt_ht, &fib6_rt->common.ht_node,
nsim_fib_rt_ht_params);
nsim_fib_account(&data->ipv6.fib, false, extack);
nsim_fib6_rt_destroy(fib6_rt);
}
static int nsim_fib6_event(struct nsim_fib_data *data,
struct fib_notifier_info *info,
unsigned long event)
{
struct fib6_entry_notifier_info *fen6_info;
int err = 0;
fen6_info = container_of(info, struct fib6_entry_notifier_info, info);
if (fen6_info->rt->nh) {
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
return 0;
}
if (fen6_info->rt->fib6_src.plen) {
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 source-specific route is not supported");
return 0;
}
switch (event) {
case FIB_EVENT_ENTRY_REPLACE:
err = nsim_fib6_rt_insert(data, fen6_info);
break;
case FIB_EVENT_ENTRY_APPEND:
err = nsim_fib6_rt_append(data, fen6_info);
break;
case FIB_EVENT_ENTRY_DEL:
nsim_fib6_rt_remove(data, fen6_info);
break;
default:
break;
}
return err;
}
static int nsim_fib_event(struct nsim_fib_data *data,
struct fib_notifier_info *info, bool add)
struct fib_notifier_info *info, unsigned long event)
{
struct netlink_ext_ack *extack = info->extack;
int err = 0;
switch (info->family) {
case AF_INET:
err = nsim_fib_account(&data->ipv4.fib, add, extack);
err = nsim_fib4_event(data, info, event);
break;
case AF_INET6:
err = nsim_fib_account(&data->ipv6.fib, add, extack);
err = nsim_fib6_event(data, info, event);
break;
}
......@@ -170,6 +756,9 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event,
struct fib_notifier_info *info = ptr;
int err = 0;
/* IPv6 routes can be added via RAs from softIRQ. */
spin_lock_bh(&data->fib_lock);
switch (event) {
case FIB_EVENT_RULE_ADD: /* fall through */
case FIB_EVENT_RULE_DEL:
......@@ -178,23 +767,74 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event,
break;
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_DEL:
err = nsim_fib_event(data, info, event != FIB_EVENT_ENTRY_DEL);
err = nsim_fib_event(data, info, event);
break;
}
spin_unlock_bh(&data->fib_lock);
return notifier_from_errno(err);
}
static void nsim_fib4_rt_free(struct nsim_fib_rt *fib_rt,
struct nsim_fib_data *data)
{
struct devlink *devlink = data->devlink;
struct nsim_fib4_rt *fib4_rt;
fib4_rt = container_of(fib_rt, struct nsim_fib4_rt, common);
nsim_fib4_rt_hw_flags_set(devlink_net(devlink), fib4_rt, false);
nsim_fib_account(&data->ipv4.fib, false, NULL);
nsim_fib4_rt_destroy(fib4_rt);
}
static void nsim_fib6_rt_free(struct nsim_fib_rt *fib_rt,
struct nsim_fib_data *data)
{
struct nsim_fib6_rt *fib6_rt;
fib6_rt = container_of(fib_rt, struct nsim_fib6_rt, common);
nsim_fib6_rt_hw_flags_set(fib6_rt, false);
nsim_fib_account(&data->ipv6.fib, false, NULL);
nsim_fib6_rt_destroy(fib6_rt);
}
static void nsim_fib_rt_free(void *ptr, void *arg)
{
struct nsim_fib_rt *fib_rt = ptr;
struct nsim_fib_data *data = arg;
switch (fib_rt->key.family) {
case AF_INET:
nsim_fib4_rt_free(fib_rt, data);
break;
case AF_INET6:
nsim_fib6_rt_free(fib_rt, data);
break;
default:
WARN_ON_ONCE(1);
}
}
/* inconsistent dump, trying again */
static void nsim_fib_dump_inconsistent(struct notifier_block *nb)
{
struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data,
fib_nb);
struct nsim_fib_rt *fib_rt, *fib_rt_tmp;
/* The notifier block is still not registered, so we do not need to
* take any locks here.
*/
list_for_each_entry_safe(fib_rt, fib_rt_tmp, &data->fib_rt_list, list) {
rhashtable_remove_fast(&data->fib_rt_ht, &fib_rt->ht_node,
nsim_fib_rt_ht_params);
nsim_fib_rt_free(fib_rt, data);
}
data->ipv4.fib.num = 0ULL;
data->ipv4.rules.num = 0ULL;
data->ipv6.fib.num = 0ULL;
data->ipv6.rules.num = 0ULL;
}
......@@ -255,6 +895,13 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return ERR_PTR(-ENOMEM);
data->devlink = devlink;
spin_lock_init(&data->fib_lock);
INIT_LIST_HEAD(&data->fib_rt_list);
err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params);
if (err)
goto err_data_free;
nsim_fib_set_max_all(data, devlink);
......@@ -263,7 +910,7 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
nsim_fib_dump_inconsistent, extack);
if (err) {
pr_err("Failed to register fib notifier\n");
goto err_out;
goto err_rhashtable_destroy;
}
devlink_resource_occ_get_register(devlink,
......@@ -284,7 +931,10 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
data);
return data;
err_out:
err_rhashtable_destroy:
rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free,
data);
err_data_free:
kfree(data);
return ERR_PTR(err);
}
......@@ -300,5 +950,8 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
devlink_resource_occ_get_unregister(devlink,
NSIM_RESOURCE_IPV4_FIB);
unregister_fib_notifier(devlink_net(devlink), &data->fib_nb);
rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free,
data);
WARN_ON_ONCE(!list_empty(&data->fib_rt_list));
kfree(data);
}
......@@ -192,7 +192,9 @@ struct fib6_info {
dst_nopolicy:1,
dst_host:1,
fib6_destroying:1,
unused:3;
offload:1,
trap:1,
unused:1;
struct rcu_head rcu;
struct nexthop *nh;
......@@ -329,6 +331,13 @@ static inline void fib6_info_release(struct fib6_info *f6i)
call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
}
static inline void fib6_info_hw_flags_set(struct fib6_info *f6i, bool offload,
bool trap)
{
f6i->offload = offload;
f6i->trap = trap;
}
enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
......
......@@ -204,6 +204,18 @@ __be32 fib_result_prefsrc(struct net *net, struct fib_result *res);
#define FIB_RES_DEV(res) (FIB_RES_NHC(res)->nhc_dev)
#define FIB_RES_OIF(res) (FIB_RES_NHC(res)->nhc_oif)
struct fib_rt_info {
struct fib_info *fi;
u32 tb_id;
__be32 dst;
int dst_len;
u8 tos;
u8 type;
u8 offload:1,
trap:1,
unused:6;
};
struct fib_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
u32 dst;
......@@ -464,6 +476,7 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *fc_encap,
void fib_nh_common_release(struct fib_nh_common *nhc);
/* Exported by fib_trie.c */
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri);
void fib_trie_init(void);
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias);
......
......@@ -309,6 +309,8 @@ enum rt_scope_t {
#define RTM_F_PREFIX 0x800 /* Prefix addresses */
#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */
#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */
#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */
#define RTM_F_TRAP 0x8000 /* route is trapping packets */
/* Reserved table identifiers */
......
......@@ -16,6 +16,9 @@ struct fib_alias {
u8 fa_slen;
u32 tb_id;
s16 fa_default;
u8 offload:1,
trap:1,
unused:6;
struct rcu_head rcu;
};
......@@ -35,9 +38,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
struct netlink_ext_ack *extack);
bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi);
int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
unsigned int);
int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
struct fib_rt_info *fri, unsigned int flags);
void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len,
u32 tb_id, const struct nl_info *info, unsigned int nlm_flags);
......
......@@ -504,6 +504,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
int dst_len, u32 tb_id, const struct nl_info *info,
unsigned int nlm_flags)
{
struct fib_rt_info fri;
struct sk_buff *skb;
u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
int err = -ENOBUFS;
......@@ -512,9 +513,15 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
if (!skb)
goto errout;
err = fib_dump_info(skb, info->portid, seq, event, tb_id,
fa->fa_type, key, dst_len,
fa->fa_tos, fa->fa_info, nlm_flags);
fri.fi = fa->fa_info;
fri.tb_id = tb_id;
fri.dst = key;
fri.dst_len = dst_len;
fri.tos = fa->fa_tos;
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
......@@ -1725,10 +1732,11 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
#endif
int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
struct fib_info *fi, unsigned int flags)
struct fib_rt_info *fri, unsigned int flags)
{
unsigned int nhs = fib_info_num_path(fi);
unsigned int nhs = fib_info_num_path(fri->fi);
struct fib_info *fi = fri->fi;
u32 tb_id = fri->tb_id;
struct nlmsghdr *nlh;
struct rtmsg *rtm;
......@@ -1738,22 +1746,22 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
rtm = nlmsg_data(nlh);
rtm->rtm_family = AF_INET;
rtm->rtm_dst_len = dst_len;
rtm->rtm_dst_len = fri->dst_len;
rtm->rtm_src_len = 0;
rtm->rtm_tos = tos;
rtm->rtm_tos = fri->tos;
if (tb_id < 256)
rtm->rtm_table = tb_id;
else
rtm->rtm_table = RT_TABLE_COMPAT;
if (nla_put_u32(skb, RTA_TABLE, tb_id))
goto nla_put_failure;
rtm->rtm_type = type;
rtm->rtm_type = fri->type;
rtm->rtm_flags = fi->fib_flags;
rtm->rtm_scope = fi->fib_scope;
rtm->rtm_protocol = fi->fib_protocol;
if (rtm->rtm_dst_len &&
nla_put_in_addr(skb, RTA_DST, dst))
nla_put_in_addr(skb, RTA_DST, fri->dst))
goto nla_put_failure;
if (fi->fib_priority &&
nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
......@@ -1795,6 +1803,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure;
}
if (fri->offload)
rtm->rtm_flags |= RTM_F_OFFLOAD;
if (fri->trap)
rtm->rtm_flags |= RTM_F_TRAP;
nlmsg_end(skb, nlh);
return 0;
......
......@@ -1012,6 +1012,52 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
return NULL;
}
static struct fib_alias *
fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
{
u8 slen = KEYLENGTH - fri->dst_len;
struct key_vector *l, *tp;
struct fib_table *tb;
struct fib_alias *fa;
struct trie *t;
tb = fib_get_table(net, fri->tb_id);
if (!tb)
return NULL;
t = (struct trie *)tb->tb_data;
l = fib_find_node(t, &tp, be32_to_cpu(fri->dst));
if (!l)
return NULL;
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
fa->fa_tos == fri->tos && fa->fa_info == fri->fi &&
fa->fa_type == fri->type)
return fa;
}
return NULL;
}
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
{
struct fib_alias *fa_match;
rcu_read_lock();
fa_match = fib_find_matching_alias(net, fri);
if (!fa_match)
goto out;
fa_match->offload = fri->offload;
fa_match->trap = fri->trap;
out:
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(fib_alias_hw_flags_set);
static void trie_rebalance(struct trie *t, struct key_vector *tn)
{
while (!IS_TRIE(tn))
......@@ -1220,24 +1266,29 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_slen = fa->fa_slen;
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
new_fa->offload = 0;
new_fa->trap = 0;
hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0,
tb->tb_id, true) == fa) {
tb->tb_id, true) == new_fa) {
enum fib_event_type fib_event;
fib_event = FIB_EVENT_ENTRY_REPLACE;
err = call_fib_entry_notifiers(net, fib_event,
key, plen,
new_fa, extack);
if (err)
if (err) {
hlist_replace_rcu(&new_fa->fa_list,
&fa->fa_list);
goto out_free_new_fa;
}
}
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
alias_free_mem_rcu(fa);
fib_release_info(fi_drop);
......@@ -1275,6 +1326,8 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_slen = slen;
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
new_fa->offload = 0;
new_fa->trap = 0;
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
......@@ -2191,14 +2244,20 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
if (filter->dump_routes) {
if (!s_fa) {
struct fib_rt_info fri;
fri.fi = fi;
fri.tb_id = tb->tb_id;
fri.dst = xkey;
fri.dst_len = KEYLENGTH - fa->fa_slen;
fri.tos = fa->fa_tos;
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
err = fib_dump_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWROUTE,
tb->tb_id, fa->fa_type,
xkey,
KEYLENGTH - fa->fa_slen,
fa->fa_tos, fi, flags);
RTM_NEWROUTE, &fri, flags);
if (err < 0)
goto stop;
}
......
......@@ -3223,16 +3223,41 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb_reset_mac_header(skb);
if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
struct fib_rt_info fri;
if (!res.fi) {
err = fib_props[res.type].error;
if (!err)
err = -EHOSTUNREACH;
goto errout_rcu;
}
fri.fi = res.fi;
fri.tb_id = table_id;
fri.dst = res.prefix;
fri.dst_len = res.prefixlen;
fri.tos = fl4.flowi4_tos;
fri.type = rt->rt_type;
fri.offload = 0;
fri.trap = 0;
if (res.fa_head) {
struct fib_alias *fa;
hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) {
u8 slen = 32 - fri.dst_len;
if (fa->fa_slen == slen &&
fa->tb_id == fri.tb_id &&
fa->fa_tos == fri.tos &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
fri.offload = fa->offload;
fri.trap = fa->trap;
break;
}
}
}
err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
rt->rt_type, res.prefix, res.prefixlen,
fl4.flowi4_tos, res.fi, 0);
nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0);
} else {
err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
NETLINK_CB(in_skb).portid,
......
......@@ -5576,6 +5576,13 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
expires -= jiffies;
}
if (!dst) {
if (rt->offload)
rtm->rtm_flags |= RTM_F_OFFLOAD;
if (rt->trap)
rtm->rtm_flags |= RTM_F_TRAP;
}
if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
goto nla_put_failure;
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# This test is for checking the FIB offload API on top of mlxsw.
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
ipv4_identical_routes
ipv4_tos
ipv4_metric
ipv4_replace
ipv4_delete
ipv4_plen
ipv4_replay
ipv4_flush
ipv6_add
ipv6_metric
ipv6_append_single
ipv6_replace_single
ipv6_metric_multipath
ipv6_append_multipath
ipv6_replace_multipath
ipv6_append_multipath_to_single
ipv6_delete_single
ipv6_delete_multipath
ipv6_replay_single
ipv6_replay_multipath
"
NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
source $lib_dir/fib_offload_lib.sh
ipv4_identical_routes()
{
fib_ipv4_identical_routes_test "testns1"
}
ipv4_tos()
{
fib_ipv4_tos_test "testns1"
}
ipv4_metric()
{
fib_ipv4_metric_test "testns1"
}
ipv4_replace()
{
fib_ipv4_replace_test "testns1"
}
ipv4_delete()
{
fib_ipv4_delete_test "testns1"
}
ipv4_plen()
{
fib_ipv4_plen_test "testns1"
}
ipv4_replay_metric()
{
fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV"
}
ipv4_replay_tos()
{
fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV"
}
ipv4_replay_plen()
{
fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV"
}
ipv4_replay()
{
ipv4_replay_metric
ipv4_replay_tos
ipv4_replay_plen
}
ipv4_flush()
{
fib_ipv4_flush_test "testns1"
}
ipv6_add()
{
fib_ipv6_add_test "testns1"
}
ipv6_metric()
{
fib_ipv6_metric_test "testns1"
}
ipv6_append_single()
{
fib_ipv6_append_single_test "testns1"
}
ipv6_replace_single()
{
fib_ipv6_replace_single_test "testns1"
}
ipv6_metric_multipath()
{
fib_ipv6_metric_multipath_test "testns1"
}
ipv6_append_multipath()
{
fib_ipv6_append_multipath_test "testns1"
}
ipv6_replace_multipath()
{
fib_ipv6_replace_multipath_test "testns1"
}
ipv6_append_multipath_to_single()
{
fib_ipv6_append_multipath_to_single_test "testns1"
}
ipv6_delete_single()
{
fib_ipv6_delete_single_test "testns1"
}
ipv6_delete_multipath()
{
fib_ipv6_delete_multipath_test "testns1"
}
ipv6_replay_single()
{
fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV"
}
ipv6_replay_multipath()
{
fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV"
}
setup_prepare()
{
ip netns add testns1
if [ $? -ne 0 ]; then
echo "Failed to add netns \"testns1\""
exit 1
fi
devlink dev reload $DEVLINK_DEV netns testns1
if [ $? -ne 0 ]; then
echo "Failed to reload into netns \"testns1\""
exit 1
fi
}
cleanup()
{
pre_cleanup
devlink -N testns1 dev reload $DEVLINK_DEV netns $$
ip netns del testns1
}
trap cleanup EXIT
setup_prepare
tests_run
exit $EXIT_STATUS
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# This test is for checking the FIB offload API. It makes use of netdevsim
# which registers a listener to the FIB notification chain.
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
ipv4_identical_routes
ipv4_tos
ipv4_metric
ipv4_replace
ipv4_delete
ipv4_plen
ipv4_replay
ipv4_flush
ipv4_error_path
ipv6_add
ipv6_metric
ipv6_append_single
ipv6_replace_single
ipv6_metric_multipath
ipv6_append_multipath
ipv6_replace_multipath
ipv6_append_multipath_to_single
ipv6_delete_single
ipv6_delete_multipath
ipv6_replay_single
ipv6_replay_multipath
ipv6_error_path
"
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
source $lib_dir/fib_offload_lib.sh
ipv4_identical_routes()
{
fib_ipv4_identical_routes_test "testns1"
}
ipv4_tos()
{
fib_ipv4_tos_test "testns1"
}
ipv4_metric()
{
fib_ipv4_metric_test "testns1"
}
ipv4_replace()
{
fib_ipv4_replace_test "testns1"
}
ipv4_delete()
{
fib_ipv4_delete_test "testns1"
}
ipv4_plen()
{
fib_ipv4_plen_test "testns1"
}
ipv4_replay_metric()
{
fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV"
}
ipv4_replay_tos()
{
fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV"
}
ipv4_replay_plen()
{
fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV"
}
ipv4_replay()
{
ipv4_replay_metric
ipv4_replay_tos
ipv4_replay_plen
}
ipv4_flush()
{
fib_ipv4_flush_test "testns1"
}
ipv4_error_path_add()
{
local lsb
RET=0
ip -n testns1 link add name dummy1 type dummy
ip -n testns1 link set dev dummy1 up
devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10
devlink -N testns1 dev reload $DEVLINK_DEV
for lsb in $(seq 1 20); do
ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1 \
&> /dev/null
done
log_test "IPv4 error path - add"
ip -n testns1 link del dev dummy1
}
ipv4_error_path_replay()
{
local lsb
RET=0
ip -n testns1 link add name dummy1 type dummy
ip -n testns1 link set dev dummy1 up
devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100
devlink -N testns1 dev reload $DEVLINK_DEV
for lsb in $(seq 1 20); do
ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1
done
devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10
devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null
log_test "IPv4 error path - replay"
ip -n testns1 link del dev dummy1
# Successfully reload after deleting all the routes.
devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100
devlink -N testns1 dev reload $DEVLINK_DEV
}
ipv4_error_path()
{
# Test the different error paths of the notifiers by limiting the size
# of the "IPv4/fib" resource.
ipv4_error_path_add
ipv4_error_path_replay
}
ipv6_add()
{
fib_ipv6_add_test "testns1"
}
ipv6_metric()
{
fib_ipv6_metric_test "testns1"
}
ipv6_append_single()
{
fib_ipv6_append_single_test "testns1"
}
ipv6_replace_single()
{
fib_ipv6_replace_single_test "testns1"
}
ipv6_metric_multipath()
{
fib_ipv6_metric_multipath_test "testns1"
}
ipv6_append_multipath()
{
fib_ipv6_append_multipath_test "testns1"
}
ipv6_replace_multipath()
{
fib_ipv6_replace_multipath_test "testns1"
}
ipv6_append_multipath_to_single()
{
fib_ipv6_append_multipath_to_single_test "testns1"
}
ipv6_delete_single()
{
fib_ipv6_delete_single_test "testns1"
}
ipv6_delete_multipath()
{
fib_ipv6_delete_multipath_test "testns1"
}
ipv6_replay_single()
{
fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV"
}
ipv6_replay_multipath()
{
fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV"
}
ipv6_error_path_add_single()
{
local lsb
RET=0
ip -n testns1 link add name dummy1 type dummy
ip -n testns1 link set dev dummy1 up
devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10
devlink -N testns1 dev reload $DEVLINK_DEV
for lsb in $(seq 1 20); do
ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1 \
&> /dev/null
done
log_test "IPv6 error path - add single"
ip -n testns1 link del dev dummy1
}
ipv6_error_path_add_multipath()
{
local lsb
RET=0
for i in $(seq 1 2); do
ip -n testns1 link add name dummy$i type dummy
ip -n testns1 link set dev dummy$i up
ip -n testns1 address add 2001:db8:$i::1/64 dev dummy$i
done
devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10
devlink -N testns1 dev reload $DEVLINK_DEV
for lsb in $(seq 1 20); do
ip -n testns1 route add 2001:db8:10::${lsb}/128 \
nexthop via 2001:db8:1::2 dev dummy1 \
nexthop via 2001:db8:2::2 dev dummy2 &> /dev/null
done
log_test "IPv6 error path - add multipath"
for i in $(seq 1 2); do
ip -n testns1 link del dev dummy$i
done
}
ipv6_error_path_replay()
{
local lsb
RET=0
ip -n testns1 link add name dummy1 type dummy
ip -n testns1 link set dev dummy1 up
devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100
devlink -N testns1 dev reload $DEVLINK_DEV
for lsb in $(seq 1 20); do
ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1
done
devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10
devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null
log_test "IPv6 error path - replay"
ip -n testns1 link del dev dummy1
# Successfully reload after deleting all the routes.
devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100
devlink -N testns1 dev reload $DEVLINK_DEV
}
ipv6_error_path()
{
# Test the different error paths of the notifiers by limiting the size
# of the "IPv6/fib" resource.
ipv6_error_path_add_single
ipv6_error_path_add_multipath
ipv6_error_path_replay
}
setup_prepare()
{
local netdev
modprobe netdevsim &> /dev/null
echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
while [ ! -d $SYSFS_NET_DIR ] ; do :; done
ip netns add testns1
if [ $? -ne 0 ]; then
echo "Failed to add netns \"testns1\""
exit 1
fi
devlink dev reload $DEVLINK_DEV netns testns1
if [ $? -ne 0 ]; then
echo "Failed to reload into netns \"testns1\""
exit 1
fi
}
cleanup()
{
pre_cleanup
ip netns del testns1
echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
modprobe -r netdevsim &> /dev/null
}
trap cleanup EXIT
setup_prepare
tests_run
exit $EXIT_STATUS
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册