diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 8809dd4de57e2f13952184c3bc0867edbcec1a3d..8cb2625472c3540c536feda2d5f9332910bb7ad4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -40,6 +40,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \ en_rep.o en/rep/bond.o en/mod_hdr.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ en/mapping.o lib/fs_chains.o en/tc_tun.o \ + esw/indir_table.o en/tc_tun_encap.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h index 1177860a2ee48f7e791e1c8303d10ba92a32453b..f15718db5d0ee163eb9c897ad7ebf436a460c9c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h @@ -15,7 +15,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update, TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha, bool neigh_connected), TP_ARGS(nhe, ha, neigh_connected), - TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name) + TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name) __array(u8, ha, ETH_ALEN) __array(u8, v4, 4) __array(u8, v6, 16) @@ -25,7 +25,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update, struct in6_addr *pin6; __be32 *p32; - __assign_str(devname, mn->dev->name); + __assign_str(devname, nhe->neigh_dev->name); __entry->neigh_connected = neigh_connected; memcpy(__entry->ha, ha, ETH_ALEN); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h index d4e6cfaaade3726547f9e5d0db2030cd69c30c26..ac52ef37f38a8e66883d4aef985c6e4976c706f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h @@ -77,7 +77,7 @@ TRACE_EVENT(mlx5e_stats_flower, TRACE_EVENT(mlx5e_tc_update_neigh_used_value, TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used), TP_ARGS(nhe, neigh_used), - TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name) + TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name) __array(u8, v4, 4) __array(u8, v6, 16) __field(bool, neigh_used) @@ -86,7 +86,7 @@ TRACE_EVENT(mlx5e_tc_update_neigh_used_value, struct in6_addr *pin6; __be32 *p32; - __assign_str(devname, mn->dev->name); + __assign_str(devname, nhe->neigh_dev->name); __entry->neigh_used = neigh_used; p32 = (__be32 *)__entry->v4; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c index 616ee585a98557367046020ea9b7cec0dfe80a35..be0ee03de7217b08fff3fcdc6770a442ee8d9950 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -129,10 +129,10 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) work); struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; struct neighbour *n = update_work->n; + bool neigh_connected, same_dev; struct mlx5e_encap_entry *e; unsigned char ha[ETH_ALEN]; struct mlx5e_priv *priv; - bool neigh_connected; u8 nud_state, dead; rtnl_lock(); @@ -146,12 +146,16 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) memcpy(ha, n->ha, ETH_ALEN); nud_state = n->nud_state; dead = n->dead; + same_dev = READ_ONCE(nhe->neigh_dev) == n->dev; read_unlock_bh(&n->lock); neigh_connected = (nud_state & NUD_VALID) && !dead; trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); + if (!same_dev) + goto out; + list_for_each_entry(e, &nhe->encap_list, encap_list) { if (!mlx5e_encap_take(e)) continue; @@ -160,6 +164,7 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) mlx5e_rep_update_flows(priv, e, neigh_connected, ha); mlx5e_encap_put(priv, e); } +out: rtnl_unlock(); mlx5e_release_neigh_update_work(update_work); } @@ -175,7 +180,6 @@ static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv if (WARN_ON(!update_work)) return NULL; - m_neigh.dev = n->dev; m_neigh.family = n->ops->family; memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); @@ -246,7 +250,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, rcu_read_lock(); list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, neigh_list) { - if (p->dev == nhe->m_neigh.dev) { + if (p->dev == READ_ONCE(nhe->neigh_dev)) { found = true; break; } @@ -369,7 +373,8 @@ mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, } int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev, struct mlx5e_neigh_hash_entry **nhe) { int err; @@ -379,10 +384,11 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, return -ENOMEM; (*nhe)->priv = priv; - memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); + memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh)); spin_lock_init(&(*nhe)->encap_list_lock); INIT_LIST_HEAD(&(*nhe)->encap_list); refcount_set(&(*nhe)->refcnt, 1); + WRITE_ONCE((*nhe)->neigh_dev, neigh_dev); err = mlx5e_rep_neigh_entry_insert(priv, *nhe); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h index 32b239189c95bbf17dea25084ba6b77972075b31..6fe0ab9709432009a00828367e92db05633947d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h @@ -16,7 +16,8 @@ struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh); int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev, struct mlx5e_neigh_hash_entry **nhe); void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 76177f7c5ec292b02fbfff1876205bf9b5bf1ee7..065126370acd2d1c47fe18898a968d50f4c64bc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -26,7 +26,9 @@ struct mlx5e_rep_indr_block_priv { }; int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e) + struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; @@ -39,9 +41,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, return err; mutex_lock(&rpriv->neigh_update.encap_lock); - nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh); if (!nhe) { - err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); + err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe); if (err) { mutex_unlock(&rpriv->neigh_update.encap_lock); mlx5_tun_entropy_refcount_dec(tun_entropy, @@ -122,7 +124,7 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv, } unlock: mutex_unlock(&esw->offloads.encap_tbl_lock); - mlx5e_put_encap_flow_list(priv, &flow_list); + mlx5e_put_flow_list(priv, &flow_list); } static int @@ -651,7 +653,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, tc_skb_ext->chain = chain; - zone_restore_id = reg_c1 & ZONE_RESTORE_MAX; + zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &uplink_rpriv->uplink_priv; @@ -660,7 +662,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, return false; } - tunnel_id = reg_c1 >> REG_MAPPING_SHIFT(TUNNEL_TO_REG); + tunnel_id = reg_c1 >> ESW_TUN_OFFSET; return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); #endif /* CONFIG_NET_TC_SKB_EXT */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index fdf9702c2d7dc034e13ed9a29a74f1dfbe757bde..d0661578467bea55c2c63924a6b10133bee248a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -27,7 +27,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv, unsigned char ha[ETH_ALEN]); int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e); + struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev); void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 40aaa105b2fcd63c10e3230d0dc745b7aa9295d6..0b503ebe59ecc9bfde5bb7c9ac6c4d0728791c4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -711,6 +711,8 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->outer_match_level = MLX5_MATCH_L4; attr->counter = entry->counter->counter; attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) + attr->esw_attr->in_mdev = priv->mdev; mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); @@ -1761,7 +1763,6 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, goto err_set_registers; } - dealloc_mod_hdr_actions(mod_acts); pre_ct_attr->modify_hdr = mod_hdr; pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 6503b614337cae655e24070bc73bb72ef78acc64..69e618d1707138a03305d8c96afb15ca82c45f57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -73,7 +73,7 @@ struct mlx5_ct_attr { #define zone_restore_to_reg_ct {\ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\ .moffset = 0,\ - .mlen = 1,\ + .mlen = (ESW_ZONE_ID_BITS / 8),\ .soffset = MLX5_BYTE_OFF(fte_match_param,\ misc_parameters_2.metadata_reg_c_1) + 3,\ } @@ -81,14 +81,12 @@ struct mlx5_ct_attr { #define nic_zone_restore_to_reg_ct {\ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\ .moffset = 2,\ - .mlen = 1,\ + .mlen = (ESW_ZONE_ID_BITS / 8),\ } #define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen) #define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset) #define REG_MAPPING_SHIFT(reg) (REG_MAPPING_MOFFSET(reg) * 8) -#define ZONE_RESTORE_BITS (REG_MAPPING_MLEN(ZONE_RESTORE_TO_REG) * 8) -#define ZONE_RESTORE_MAX GENMASK(ZONE_RESTORE_BITS - 1, 0) #if IS_ENABLED(CONFIG_MLX5_TC_CT) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h new file mode 100644 index 0000000000000000000000000000000000000000..c223591ffc220902c6b8ff06f411d79ac7b3e231 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_PRIV_H__ +#define __MLX5_EN_TC_PRIV_H__ + +#include "en_tc.h" + +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) + +#define MLX5E_TC_MAX_SPLITS 1 + +enum { + MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, + MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, + MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, + MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, + MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, + MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, + MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, + MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, + MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, + MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9, + MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10, +}; + +struct mlx5e_tc_flow_parse_attr { + const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct net_device *filter_dev; + struct mlx5_flow_spec spec; + struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; + int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; + struct ethhdr eth; +}; + +/* Helper struct for accessing a struct containing list_head array. + * Containing struct + * |- Helper array + * [0] Helper item 0 + * |- list_head item 0 + * |- index (0) + * [1] Helper item 1 + * |- list_head item 1 + * |- index (1) + * To access the containing struct from one of the list_head items: + * 1. Get the helper item from the list_head item using + * helper item = + * container_of(list_head item, helper struct type, list_head field) + * 2. Get the contining struct from the helper item and its index in the array: + * containing struct = + * container_of(helper item, containing struct type, helper field[index]) + */ +struct encap_flow_item { + struct mlx5e_encap_entry *e; /* attached encap instance */ + struct list_head list; + int index; +}; + +struct encap_route_flow_item { + struct mlx5e_route_entry *r; /* attached route instance */ + int index; +}; + +struct mlx5e_tc_flow { + struct rhash_head node; + struct mlx5e_priv *priv; + u64 cookie; + unsigned long flags; + struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; + + /* flows sharing the same reformat object - currently mpls decap */ + struct list_head l3_to_l2_reformat; + struct mlx5e_decap_entry *decap_reformat; + + /* flows sharing same route entry */ + struct list_head decap_routes; + struct mlx5e_route_entry *decap_route; + struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS]; + + /* Flow can be associated with multiple encap IDs. + * The number of encaps is bounded by the number of supported + * destinations. + */ + struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_tc_flow *peer_flow; + struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */ + struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ + struct list_head hairpin; /* flows sharing the same hairpin */ + struct list_head peer; /* flows with peer flow */ + struct list_head unready; /* flows not ready to be offloaded (e.g + * due to missing route) + */ + struct net_device *orig_dev; /* netdev adding flow first */ + int tmp_entry_index; + struct list_head tmp_list; /* temporary flow list used by neigh update */ + refcount_t refcnt; + struct rcu_head rcu_head; + struct completion init_done; + int tunnel_id; /* the mapped tunnel id of this flow */ + struct mlx5_flow_attr *attr; +}; + +u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer); + +struct mlx5_flow_handle * +mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow); + +static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before setting bit. */ + smp_mb__before_atomic(); + set_bit(flag, &flow->flags); +} + +#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) + +static inline bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, + unsigned long flag) +{ + /* test_and_set_bit() provides all necessary barriers */ + return test_and_set_bit(flag, &flow->flags); +} + +#define flow_flag_test_and_set(flow, flag) \ + __flow_flag_test_and_set(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static inline void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before clearing bit. */ + smp_mb__before_atomic(); + clear_bit(flag, &flow->flags); +} + +#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static inline bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + bool ret = test_bit(flag, &flow->flags); + + /* Read fields of flow structure only after checking flags. */ + smp_mb__after_atomic(); + return ret; +} + +#define flow_flag_test(flow, flag) __flow_flag_test(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow); +struct mlx5_flow_handle * +mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec); +void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + +struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow); +void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); + +struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow); + +#endif /* __MLX5_EN_TC_PRIV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 90930e54b6f2882f95336e647ca2d8b51af6d6c0..f8075a604605d15be8959bc6a148a3c994b003ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -6,10 +6,32 @@ #include #include #include "en/tc_tun.h" +#include "en/tc_priv.h" #include "en_tc.h" #include "rep/tc.h" #include "rep/neigh.h" +struct mlx5e_tc_tun_route_attr { + struct net_device *out_dev; + struct net_device *route_dev; + union { + struct flowi4 fl4; + struct flowi6 fl6; + } fl; + struct neighbour *n; + u8 ttl; +}; + +#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {} + +static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr) +{ + if (attr->n) + neigh_release(attr->n); + if (attr->route_dev) + dev_put(attr->route_dev); +} + struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) { if (netif_is_vxlan(tunnel_dev)) @@ -79,12 +101,10 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct net_device **out_dev, - struct net_device **route_dev, - struct flowi4 *fl4, - struct neighbour **out_n, - u8 *out_ttl) + struct mlx5e_tc_tun_route_attr *attr) { + struct net_device *route_dev; + struct net_device *out_dev; struct neighbour *n; struct rtable *rt; @@ -97,46 +117,50 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = mdev->priv.eswitch; uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - fl4->flowi4_oif = uplink_dev->ifindex; + attr->fl.fl4.flowi4_oif = uplink_dev->ifindex; } - rt = ip_route_output_key(dev_net(mirred_dev), fl4); + rt = ip_route_output_key(dev_net(mirred_dev), &attr->fl.fl4); if (IS_ERR(rt)) return PTR_ERR(rt); if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) { - ip_rt_put(rt); - return -ENETUNREACH; + ret = -ENETUNREACH; + goto err_rt_release; } #else return -EOPNOTSUPP; #endif - ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev); - if (ret < 0) { - ip_rt_put(rt); - return ret; - } - dev_hold(*route_dev); + ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev); + if (ret < 0) + goto err_rt_release; + dev_hold(route_dev); - if (!(*out_ttl)) - *out_ttl = ip4_dst_hoplimit(&rt->dst); - n = dst_neigh_lookup(&rt->dst, &fl4->daddr); - ip_rt_put(rt); + if (!attr->ttl) + attr->ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr); if (!n) { - dev_put(*route_dev); - return -ENOMEM; + ret = -ENOMEM; + goto err_dev_release; } - *out_n = n; + ip_rt_put(rt); + attr->route_dev = route_dev; + attr->out_dev = out_dev; + attr->n = n; return 0; + +err_dev_release: + dev_put(route_dev); +err_rt_release: + ip_rt_put(rt); + return ret; } -static void mlx5e_route_lookup_ipv4_put(struct net_device *route_dev, - struct neighbour *n) +static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr) { - neigh_release(n); - dev_put(route_dev); + mlx5e_tc_tun_route_attr_cleanup(attr); } static const char *mlx5e_netdev_kind(struct net_device *dev) @@ -188,28 +212,26 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); const struct ip_tunnel_key *tun_key = &e->tun_info->key; - struct net_device *out_dev, *route_dev; - struct flowi4 fl4 = {}; - struct neighbour *n; + struct mlx5e_neigh m_neigh = {}; + TC_TUN_ROUTE_ATTR_INIT(attr); int ipv4_encap_size; char *encap_header; - u8 nud_state, ttl; struct iphdr *ip; + u8 nud_state; int err; /* add the IP fields */ - fl4.flowi4_tos = tun_key->tos; - fl4.daddr = tun_key->u.ipv4.dst; - fl4.saddr = tun_key->u.ipv4.src; - ttl = tun_key->ttl; + attr.fl.fl4.flowi4_tos = tun_key->tos; + attr.fl.fl4.daddr = tun_key->u.ipv4.dst; + attr.fl.fl4.saddr = tun_key->u.ipv4.src; + attr.ttl = tun_key->ttl; - err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &out_dev, &route_dev, - &fl4, &n, &ttl); + err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr); if (err) return err; ipv4_encap_size = - (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct iphdr) + e->tunnel->calc_hlen(e); @@ -226,40 +248,36 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, goto release_neigh; } - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - e->route_dev_ifindex = route_dev->ifindex; + m_neigh.family = attr.n->ops->family; + memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len); + e->out_dev = attr.out_dev; + e->route_dev_ifindex = attr.route_dev->ifindex; /* It's important to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the * neigh changes it's validity state, we would find the relevant neigh * in the hash. */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev); if (err) goto free_encap; - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + read_unlock_bh(&attr.n->lock); /* add ethernet header */ - ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, ETH_P_IP); /* add ip header */ ip->tos = tun_key->tos; ip->version = 0x4; ip->ihl = 0x5; - ip->ttl = ttl; - ip->daddr = fl4.daddr; - ip->saddr = fl4.saddr; + ip->ttl = attr.ttl; + ip->daddr = attr.fl.fl4.daddr; + ip->saddr = attr.fl.fl4.saddr; /* add tunneling protocol header */ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), @@ -271,7 +289,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, e->encap_header = encap_header; if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); + neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ @@ -287,8 +305,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, } e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - mlx5e_route_lookup_ipv4_put(route_dev, n); + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv4_put(&attr); return err; destroy_neigh_entry: @@ -296,55 +314,155 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, free_encap: kfree(encap_header); release_neigh: - mlx5e_route_lookup_ipv4_put(route_dev, n); + mlx5e_route_lookup_ipv4_put(&attr); + return err; +} + +int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + TC_TUN_ROUTE_ATTR_INIT(attr); + int ipv4_encap_size; + char *encap_header; + struct iphdr *ip; + u8 nud_state; + int err; + + /* add the IP fields */ + attr.fl.fl4.flowi4_tos = tun_key->tos; + attr.fl.fl4.daddr = tun_key->u.ipv4.dst; + attr.fl.fl4.saddr = tun_key->u.ipv4.src; + attr.ttl = tun_key->ttl; + + err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv4_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct iphdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + e->route_dev_ifindex = attr.route_dev->ifindex; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IP); + + /* add ip header */ + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = attr.ttl; + ip->daddr = attr.fl.fl4.daddr; + ip->saddr = attr.fl.fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, e); + if (err) + goto free_encap; + + e->encap_size = ipv4_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto free_encap; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv4_put(&attr); + return err; + +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv4_put(&attr); return err; } #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct net_device **out_dev, - struct net_device **route_dev, - struct flowi6 *fl6, - struct neighbour **out_n, - u8 *out_ttl) + struct mlx5e_tc_tun_route_attr *attr) { + struct net_device *route_dev; + struct net_device *out_dev; struct dst_entry *dst; struct neighbour *n; - int ret; - dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6, + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, &attr->fl.fl6, NULL); if (IS_ERR(dst)) return PTR_ERR(dst); - if (!(*out_ttl)) - *out_ttl = ip6_dst_hoplimit(dst); + if (!attr->ttl) + attr->ttl = ip6_dst_hoplimit(dst); - ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev); - if (ret < 0) { - dst_release(dst); - return ret; - } + ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev); + if (ret < 0) + goto err_dst_release; - dev_hold(*route_dev); - n = dst_neigh_lookup(dst, &fl6->daddr); - dst_release(dst); + dev_hold(route_dev); + n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr); if (!n) { - dev_put(*route_dev); - return -ENOMEM; + ret = -ENOMEM; + goto err_dev_release; } - *out_n = n; + dst_release(dst); + attr->out_dev = out_dev; + attr->route_dev = route_dev; + attr->n = n; return 0; + +err_dev_release: + dev_put(route_dev); +err_dst_release: + dst_release(dst); + return ret; } -static void mlx5e_route_lookup_ipv6_put(struct net_device *route_dev, - struct neighbour *n) +static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr) { - neigh_release(n); - dev_put(route_dev); + mlx5e_tc_tun_route_attr_cleanup(attr); } int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, @@ -353,28 +471,25 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); const struct ip_tunnel_key *tun_key = &e->tun_info->key; - struct net_device *out_dev, *route_dev; - struct flowi6 fl6 = {}; + struct mlx5e_neigh m_neigh = {}; + TC_TUN_ROUTE_ATTR_INIT(attr); struct ipv6hdr *ip6h; - struct neighbour *n = NULL; int ipv6_encap_size; char *encap_header; - u8 nud_state, ttl; + u8 nud_state; int err; - ttl = tun_key->ttl; + attr.ttl = tun_key->ttl; + attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + attr.fl.fl6.daddr = tun_key->u.ipv6.dst; + attr.fl.fl6.saddr = tun_key->u.ipv6.src; - fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); - fl6.daddr = tun_key->u.ipv6.dst; - fl6.saddr = tun_key->u.ipv6.src; - - err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &out_dev, &route_dev, - &fl6, &n, &ttl); + err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr); if (err) return err; ipv6_encap_size = - (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct ipv6hdr) + e->tunnel->calc_hlen(e); @@ -391,39 +506,35 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, goto release_neigh; } - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - e->route_dev_ifindex = route_dev->ifindex; + m_neigh.family = attr.n->ops->family; + memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len); + e->out_dev = attr.out_dev; + e->route_dev_ifindex = attr.route_dev->ifindex; /* It's importent to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the * neigh changes it's validity state, we would find the relevant neigh * in the hash. */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev); if (err) goto free_encap; - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + read_unlock_bh(&attr.n->lock); /* add ethernet header */ - ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, ETH_P_IPV6); /* add ip header */ ip6_flow_hdr(ip6h, tun_key->tos, 0); /* the HW fills up ipv6 payload len */ - ip6h->hop_limit = ttl; - ip6h->daddr = fl6.daddr; - ip6h->saddr = fl6.saddr; + ip6h->hop_limit = attr.ttl; + ip6h->daddr = attr.fl.fl6.daddr; + ip6h->saddr = attr.fl.fl6.saddr; /* add tunneling protocol header */ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), @@ -435,7 +546,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, e->encap_header = encap_header; if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); + neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ @@ -452,8 +563,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, } e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - mlx5e_route_lookup_ipv6_put(route_dev, n); + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv6_put(&attr); return err; destroy_neigh_entry: @@ -461,10 +572,160 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, free_encap: kfree(encap_header); release_neigh: - mlx5e_route_lookup_ipv6_put(route_dev, n); + mlx5e_route_lookup_ipv6_put(&attr); return err; } + +int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + TC_TUN_ROUTE_ATTR_INIT(attr); + struct ipv6hdr *ip6h; + int ipv6_encap_size; + char *encap_header; + u8 nud_state; + int err; + + attr.ttl = tun_key->ttl; + + attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + attr.fl.fl6.daddr = tun_key->u.ipv6.dst; + attr.fl.fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv6_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct ipv6hdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + e->route_dev_ifindex = attr.route_dev->ifindex; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IPV6); + + /* add ip header */ + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = attr.ttl; + ip6h->daddr = attr.fl.fl6.daddr; + ip6h->saddr = attr.fl.fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, e); + if (err) + goto free_encap; + + e->encap_size = ipv6_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto free_encap; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv6_put(&attr); + return err; + +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv6_put(&attr); + return err; +} +#endif + +int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *flow_attr) +{ + struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; + TC_TUN_ROUTE_ATTR_INIT(attr); + u16 vport_num; + int err = 0; + + if (flow_attr->ip_version == 4) { + /* Addresses are swapped for decap */ + attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4; + attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4; + err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr); + } +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (flow_attr->ip_version == 6) { + /* Addresses are swapped for decap */ + attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6; + attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6; + err = mlx5e_route_lookup_ipv6_get(priv, priv->netdev, &attr); + } #endif + else + return 0; + + if (err) + return err; + + if (attr.route_dev->netdev_ops != &mlx5e_netdev_ops || + !mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) + goto out; + + err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num); + if (err) + goto out; + + esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.vxlan_vni); + esw_attr->rx_tun_attr->decap_vport = vport_num; + +out: + if (flow_attr->ip_version == 4) + mlx5e_route_lookup_ipv4_put(&attr); +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (flow_attr->ip_version == 6) + mlx5e_route_lookup_ipv6_put(&attr); +#endif + return err; +} bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev) @@ -625,14 +886,6 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, } } - /* Enforce DMAC when offloading incoming tunneled flows. - * Flow counters require a match on the DMAC. - */ - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), priv->netdev->dev_addr); - /* let software handle IP fragments */ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 704359df60951e3a174dfedc14fa356ec639426f..fa992e869044077afb08f5e562ad098114fa104b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -59,17 +59,30 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e); +int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e); +int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); #else static inline int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; } +int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ return -EOPNOTSUPP; } #endif +int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c new file mode 100644 index 0000000000000000000000000000000000000000..6a116335bb21c1b5aece90bbba1899a4a3e939e3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -0,0 +1,1653 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include +#include "tc_tun_encap.h" +#include "en_tc.h" +#include "tc_tun.h" +#include "rep/tc.h" +#include "diag/en_tc_tracepoint.h" + +enum { + MLX5E_ROUTE_ENTRY_VALID = BIT(0), +}; + +struct mlx5e_route_key { + int ip_version; + union { + __be32 v4; + struct in6_addr v6; + } endpoint_ip; +}; + +struct mlx5e_route_entry { + struct mlx5e_route_key key; + struct list_head encap_entries; + struct list_head decap_flows; + u32 flags; + struct hlist_node hlist; + refcount_t refcnt; + int tunnel_dev_index; + struct rcu_head rcu; +}; + +struct mlx5e_tc_tun_encap { + struct mlx5e_priv *priv; + struct notifier_block fib_nb; + spinlock_t route_lock; /* protects route_tbl */ + unsigned long route_tbl_last_update; + DECLARE_HASHTABLE(route_tbl, 8); +}; + +static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) +{ + return r->flags & MLX5E_ROUTE_ENTRY_VALID; +} + +int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec) +{ + struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; + struct mlx5_rx_tun_attr *tun_attr; + void *daddr, *saddr; + u8 ip_version; + + tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); + if (!tun_attr) + return -ENOMEM; + + esw_attr->rx_tun_attr = tun_attr; + ip_version = mlx5e_tc_get_ip_version(spec, true); + + if (ip_version == 4) { + daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + tun_attr->dst_ip.v4 = *(__be32 *)daddr; + tun_attr->src_ip.v4 = *(__be32 *)saddr; + if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) + return 0; + } +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (ip_version == 6) { + int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); + struct in6_addr zerov6 = {}; + + daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); + saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); + memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); + memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); + if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || + !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) + return 0; + } +#endif + /* Only set the flag if both src and dst ip addresses exist. They are + * required to establish routing. + */ + flow_flag_set(flow, TUN_RX); + return 0; +} + +static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) +{ + bool all_flow_encaps_valid = true; + int i; + + /* Flow can be associated with multiple encap entries. + * Before offloading the flow verify that all of them have + * a valid neighbour. + */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) + continue; + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { + all_flow_encaps_valid = false; + break; + } + } + + return all_flow_encaps_valid; +} + +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; + struct mlx5_flow_spec *spec; + struct mlx5e_tc_flow *flow; + int err; + + if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) + return; + + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + e->encap_size, e->encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", + PTR_ERR(e->pkt_reformat)); + return; + } + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(flow, flow_list, tmp_list) { + if (!mlx5e_is_offloaded_flow(flow)) + continue; + attr = flow->attr; + esw_attr = attr->esw_attr; + spec = &attr->parse_attr->spec; + + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; + esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + + /* Do not offload flows with unresolved neighbors */ + if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) + continue; + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + continue; + } + + mlx5e_tc_unoffload_from_slow_path(esw, flow); + flow->rule[0] = rule; + /* was unset when slow path rule removed */ + flow_flag_set(flow, OFFLOADED); + } +} + +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; + struct mlx5_flow_spec *spec; + struct mlx5e_tc_flow *flow; + int err; + + list_for_each_entry(flow, flow_list, tmp_list) { + if (!mlx5e_is_offloaded_flow(flow)) + continue; + attr = flow->attr; + esw_attr = attr->esw_attr; + spec = &attr->parse_attr->spec; + + /* update from encap rule to slow path rule */ + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + /* mark the flow's encap dest as non-valid */ + esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + continue; + } + + mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); + flow->rule[0] = rule; + /* was unset when fast path rule removed */ + flow_flag_set(flow, OFFLOADED); + } + + /* we know that the encap is valid */ + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); +} + +static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, + struct list_head *flow_list, + int index) +{ + if (IS_ERR(mlx5e_flow_get(flow))) + return; + wait_for_completion(&flow->init_done); + + flow->tmp_entry_index = index; + list_add(&flow->tmp_list, flow_list); +} + +/* Takes reference to all flows attached to encap and adds the flows to + * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. + */ +void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) +{ + struct encap_flow_item *efi; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + mlx5e_take_tmp_flow(flow, flow_list, efi->index); + } +} + +/* Takes reference to all flows attached to route and adds the flows to + * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. + */ +static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, + struct list_head *flow_list) +{ + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, &r->decap_flows, decap_routes) + mlx5e_take_tmp_flow(flow, flow_list, 0); +} + +static struct mlx5e_encap_entry * +mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_encap_entry *next = NULL; + +retry: + rcu_read_lock(); + + /* find encap with non-zero reference counter value */ + for (next = e ? + list_next_or_null_rcu(&nhe->encap_list, + &e->encap_list, + struct mlx5e_encap_entry, + encap_list) : + list_first_or_null_rcu(&nhe->encap_list, + struct mlx5e_encap_entry, + encap_list); + next; + next = list_next_or_null_rcu(&nhe->encap_list, + &next->encap_list, + struct mlx5e_encap_entry, + encap_list)) + if (mlx5e_encap_take(next)) + break; + + rcu_read_unlock(); + + /* release starting encap */ + if (e) + mlx5e_encap_put(netdev_priv(e->out_dev), e); + if (!next) + return next; + + /* wait for encap to be fully initialized */ + wait_for_completion(&next->res_ready); + /* continue searching if encap entry is not in valid state after completion */ + if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { + e = next; + goto retry; + } + + return next; +} + +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + struct mlx5e_encap_entry *e = NULL; + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + struct neigh_table *tbl; + bool neigh_used = false; + struct neighbour *n; + u64 lastuse; + + if (m_neigh->family == AF_INET) + tbl = &arp_tbl; +#if IS_ENABLED(CONFIG_IPV6) + else if (m_neigh->family == AF_INET6) + tbl = ipv6_stub->nd_tbl; +#endif + else + return; + + /* mlx5e_get_next_valid_encap() releases previous encap before returning + * next one. + */ + while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { + struct mlx5e_priv *priv = netdev_priv(e->out_dev); + struct encap_flow_item *efi, *tmp; + struct mlx5_eswitch *esw; + LIST_HEAD(flow_list); + + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + list_for_each_entry_safe(efi, tmp, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, + encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + list_add(&flow->tmp_list, &flow_list); + + if (mlx5e_is_offloaded_flow(flow)) { + counter = mlx5e_tc_get_counter(flow); + lastuse = mlx5_fc_query_lastuse(counter); + if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + neigh_used = true; + break; + } + } + } + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_put_flow_list(priv, &flow_list); + if (neigh_used) { + /* release current encap before breaking the loop */ + mlx5e_encap_put(priv, e); + break; + } + } + + trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); + + if (neigh_used) { + nhe->reported_lastuse = jiffies; + + /* find the relevant neigh according to the cached device and + * dst ip pair + */ + n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); + if (!n) + return; + + neigh_event_send(n, NULL); + neigh_release(n); + } +} + +static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + WARN_ON(!list_empty(&e->flows)); + + if (e->compl_result > 0) { + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + } + + kfree(e->tun_info); + kfree(e->encap_header); + kfree_rcu(e, rcu); +} + +static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, + struct mlx5e_decap_entry *d) +{ + WARN_ON(!list_empty(&d->flows)); + + if (!d->compl_result) + mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); + + kfree_rcu(d, rcu); +} + +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) + return; + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) + return; + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + +static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index); + +void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, int out_index) +{ + struct mlx5e_encap_entry *e = flow->encaps[out_index].e; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (flow->attr->esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + mlx5e_detach_encap_route(priv, flow, out_index); + + /* flow wasn't fully initialized */ + if (!e) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->encaps[out_index].list); + flow->encaps[out_index].e = NULL; + if (!refcount_dec_and_test(&e->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_entry *d = flow->decap_reformat; + + if (!d) + return; + + mutex_lock(&esw->offloads.decap_tbl_lock); + list_del(&flow->l3_to_l2_reformat); + flow->decap_reformat = NULL; + + if (!refcount_dec_and_test(&d->refcnt)) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + return; + } + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + +struct encap_key { + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; +}; + +static int cmp_encap_info(struct encap_key *a, + struct encap_key *b) +{ + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || + a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; +} + +static int cmp_decap_info(struct mlx5e_decap_key *a, + struct mlx5e_decap_key *b) +{ + return memcmp(&a->key, &b->key, sizeof(b->key)); +} + +static int hash_encap_info(struct encap_key *key) +{ + return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), + key->tc_tunnel->tunnel_type); +} + +static int hash_decap_info(struct mlx5e_decap_key *key) +{ + return jhash(&key->key, sizeof(key->key), 0); +} + +bool mlx5e_encap_take(struct mlx5e_encap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static struct mlx5e_encap_entry * +mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e; + struct encap_key e_key; + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; + if (!cmp_encap_info(&e_key, key) && + mlx5e_encap_take(e)) + return e; + } + + return NULL; +} + +static struct mlx5e_decap_entry * +mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_key r_key; + struct mlx5e_decap_entry *e; + + hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, + hlist, hash_key) { + r_key = e->key; + if (!cmp_decap_info(&r_key, key) && + mlx5e_decap_take(e)) + return e; + } + return NULL; +} + +struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) +{ + size_t tun_size = sizeof(*tun_info) + tun_info->options_len; + + return kmemdup(tun_info, tun_size, GFP_KERNEL); +} + +static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < out_index; i++) { + if (flow->encaps[i].e != e) + continue; + NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); + netdev_err(priv->netdev, "can't duplicate encap action\n"); + return true; + } + + return false; +} + +static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + struct net_device *out_dev, + int route_dev_ifindex, + int out_index) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *route_dev; + u16 vport_num; + int err = 0; + u32 data; + + route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); + + if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || + !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) + goto out; + + err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); + if (err) + goto out; + + attr->dest_chain = 0; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; + data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, + vport_num); + err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, + MLX5_FLOW_NAMESPACE_FDB, + VPORT_TO_REG, data); + if (err >= 0) { + esw_attr->dests[out_index].src_port_rewrite_act_id = err; + err = 0; + } + +out: + if (route_dev) + dev_put(route_dev); + return err; +} + +static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + struct net_device *out_dev, + int route_dev_ifindex, + int out_index) +{ + int act_id = attr->dests[out_index].src_port_rewrite_act_id; + struct net_device *route_dev; + u16 vport_num; + int err = 0; + u32 data; + + route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); + + if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || + !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { + err = -ENODEV; + goto out; + } + + err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); + if (err) + goto out; + + data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, + vport_num); + mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); + +out: + if (route_dev) + dev_put(route_dev); + return err; +} + +static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_tun_encap *encap; + unsigned int ret; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + encap = uplink_priv->encap; + + spin_lock_bh(&encap->route_lock); + ret = encap->route_tbl_last_update; + spin_unlock_bh(&encap->route_lock); + return ret; +} + +static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_encap_entry *e, + bool new_encap_entry, + unsigned long tbl_time_before, + int out_index); + +int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, + struct net_device **encap_dev, + bool *encap_valid) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + const struct ip_tunnel_info *tun_info; + unsigned long tbl_time_before = 0; + struct encap_key key; + struct mlx5e_encap_entry *e; + bool entry_created = false; + unsigned short family; + uintptr_t hash_key; + int err = 0; + + parse_attr = attr->parse_attr; + tun_info = parse_attr->tun_info[out_index]; + family = ip_tunnel_info_af(tun_info); + key.ip_tun_key = &tun_info->key; + key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); + if (!key.tc_tunnel) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); + return -EOPNOTSUPP; + } + + hash_key = hash_encap_info(&key); + + mutex_lock(&esw->offloads.encap_tbl_lock); + e = mlx5e_encap_get(priv, &key, hash_key); + + /* must verify if encap is valid or not */ + if (e) { + /* Check that entry was not already attached to this flow */ + if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { + err = -EOPNOTSUPP; + goto out_err; + } + + mutex_unlock(&esw->offloads.encap_tbl_lock); + wait_for_completion(&e->res_ready); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + if (e->compl_result < 0) { + err = -EREMOTEIO; + goto out_err; + } + goto attach_flow; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { + err = -ENOMEM; + goto out_err; + } + + refcount_set(&e->refcnt, 1); + init_completion(&e->res_ready); + entry_created = true; + INIT_LIST_HEAD(&e->route_list); + + tun_info = mlx5e_dup_tun_info(tun_info); + if (!tun_info) { + err = -ENOMEM; + goto out_err_init; + } + e->tun_info = tun_info; + err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); + if (err) + goto out_err_init; + + INIT_LIST_HEAD(&e->flows); + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + tbl_time_before = mlx5e_route_tbl_get_last_update(priv); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + if (family == AF_INET) + err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); + else if (family == AF_INET6) + err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + complete_all(&e->res_ready); + if (err) { + e->compl_result = err; + goto out_err; + } + e->compl_result = 1; + +attach_flow: + err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, + out_index); + if (err) + goto out_err; + + flow->encaps[out_index].e = e; + list_add(&flow->encaps[out_index].list, &e->flows); + flow->encaps[out_index].index = out_index; + *encap_dev = e->out_dev; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; + attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + *encap_valid = true; + } else { + *encap_valid = false; + } + mutex_unlock(&esw->offloads.encap_tbl_lock); + + return err; + +out_err: + mutex_unlock(&esw->offloads.encap_tbl_lock); + if (e) + mlx5e_encap_put(priv, e); + return err; + +out_err_init: + mutex_unlock(&esw->offloads.encap_tbl_lock); + kfree(tun_info); + kfree(e); + return err; +} + +int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_decap_entry *d; + struct mlx5e_decap_key key; + uintptr_t hash_key; + int err = 0; + + parse_attr = flow->attr->parse_attr; + if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + NL_SET_ERR_MSG_MOD(extack, + "encap header larger than max supported"); + return -EOPNOTSUPP; + } + + key.key = parse_attr->eth; + hash_key = hash_decap_info(&key); + mutex_lock(&esw->offloads.decap_tbl_lock); + d = mlx5e_decap_get(priv, &key, hash_key); + if (d) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + wait_for_completion(&d->res_ready); + mutex_lock(&esw->offloads.decap_tbl_lock); + if (d->compl_result) { + err = -EREMOTEIO; + goto out_free; + } + goto found; + } + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + err = -ENOMEM; + goto out_err; + } + + d->key = key; + refcount_set(&d->refcnt, 1); + init_completion(&d->res_ready); + INIT_LIST_HEAD(&d->flows); + hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2, + sizeof(parse_attr->eth), + &parse_attr->eth, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(d->pkt_reformat)) { + err = PTR_ERR(d->pkt_reformat); + d->compl_result = err; + } + mutex_lock(&esw->offloads.decap_tbl_lock); + complete_all(&d->res_ready); + if (err) + goto out_free; + +found: + flow->decap_reformat = d; + attr->decap_pkt_reformat = d->pkt_reformat; + list_add(&flow->l3_to_l2_reformat, &d->flows); + mutex_unlock(&esw->offloads.decap_tbl_lock); + return 0; + +out_free: + mutex_unlock(&esw->offloads.decap_tbl_lock); + mlx5e_decap_put(priv, d); + return err; + +out_err: + mutex_unlock(&esw->offloads.decap_tbl_lock); + return err; +} + +static int cmp_route_info(struct mlx5e_route_key *a, + struct mlx5e_route_key *b) +{ + if (a->ip_version == 4 && b->ip_version == 4) + return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, + sizeof(a->endpoint_ip.v4)); + else if (a->ip_version == 6 && b->ip_version == 6) + return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, + sizeof(a->endpoint_ip.v6)); + return 1; +} + +static u32 hash_route_info(struct mlx5e_route_key *key) +{ + if (key->ip_version == 4) + return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); + return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); +} + +static void mlx5e_route_dealloc(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r) +{ + WARN_ON(!list_empty(&r->decap_flows)); + WARN_ON(!list_empty(&r->encap_entries)); + + kfree_rcu(r, rcu); +} + +static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) + return; + + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + lockdep_assert_held(&esw->offloads.encap_tbl_lock); + + if (!refcount_dec_and_test(&r->refcnt)) + return; + hash_del_rcu(&r->hlist); + mlx5e_route_dealloc(priv, r); +} + +static struct mlx5e_route_entry * +mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, + u32 hash_key) +{ + struct mlx5e_route_key r_key; + struct mlx5e_route_entry *r; + + hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { + r_key = r->key; + if (!cmp_route_info(&r_key, key) && + refcount_inc_not_zero(&r->refcnt)) + return r; + } + return NULL; +} + +static struct mlx5e_route_entry * +mlx5e_route_get_create(struct mlx5e_priv *priv, + struct mlx5e_route_key *key, + int tunnel_dev_index, + unsigned long *route_tbl_change_time) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_tun_encap *encap; + struct mlx5e_route_entry *r; + u32 hash_key; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + encap = uplink_priv->encap; + + hash_key = hash_route_info(key); + spin_lock_bh(&encap->route_lock); + r = mlx5e_route_get(encap, key, hash_key); + spin_unlock_bh(&encap->route_lock); + if (r) { + if (!mlx5e_route_entry_valid(r)) { + mlx5e_route_put_locked(priv, r); + return ERR_PTR(-EINVAL); + } + return r; + } + + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (!r) + return ERR_PTR(-ENOMEM); + + r->key = *key; + r->flags |= MLX5E_ROUTE_ENTRY_VALID; + r->tunnel_dev_index = tunnel_dev_index; + refcount_set(&r->refcnt, 1); + INIT_LIST_HEAD(&r->decap_flows); + INIT_LIST_HEAD(&r->encap_entries); + + spin_lock_bh(&encap->route_lock); + *route_tbl_change_time = encap->route_tbl_last_update; + hash_add(encap->route_tbl, &r->hlist, hash_key); + spin_unlock_bh(&encap->route_lock); + + return r; +} + +static struct mlx5e_route_entry * +mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) +{ + u32 hash_key = hash_route_info(key); + struct mlx5e_route_entry *r; + + spin_lock_bh(&encap->route_lock); + encap->route_tbl_last_update = jiffies; + r = mlx5e_route_get(encap, key, hash_key); + spin_unlock_bh(&encap->route_lock); + + return r; +} + +struct mlx5e_tc_fib_event_data { + struct work_struct work; + unsigned long event; + struct mlx5e_route_entry *r; + struct net_device *ul_dev; +}; + +static void mlx5e_tc_fib_event_work(struct work_struct *work); +static struct mlx5e_tc_fib_event_data * +mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) +{ + struct mlx5e_tc_fib_event_data *fib_work; + + fib_work = kzalloc(sizeof(*fib_work), flags); + if (WARN_ON(!fib_work)) + return NULL; + + INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); + fib_work->event = event; + fib_work->ul_dev = ul_dev; + + return fib_work; +} + +static int +mlx5e_route_enqueue_update(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + unsigned long event) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_rep_priv *uplink_rpriv; + struct net_device *ul_dev; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + ul_dev = uplink_rpriv->netdev; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); + if (!fib_work) + return -ENOMEM; + + dev_hold(ul_dev); + refcount_inc(&r->refcnt); + fib_work->r = r; + queue_work(priv->wq, &fib_work->work); + + return 0; +} + +int mlx5e_attach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned long tbl_time_before, tbl_time_after; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + int err = 0; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + mutex_lock(&esw->offloads.encap_tbl_lock); + if (!esw_attr->rx_tun_attr) + goto out; + + tbl_time_before = mlx5e_route_tbl_get_last_update(priv); + tbl_time_after = tbl_time_before; + err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr); + if (err || !esw_attr->rx_tun_attr->decap_vport) + goto out; + + key.ip_version = attr->ip_version; + if (key.ip_version == 4) + key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; + else + key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; + + r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, + &tbl_time_after); + if (IS_ERR(r)) { + err = PTR_ERR(r); + goto out; + } + /* Routing changed concurrently. FIB event handler might have missed new + * entry, schedule update. + */ + if (tbl_time_before != tbl_time_after) { + err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); + if (err) { + mlx5e_route_put_locked(priv, r); + goto out; + } + } + + flow->decap_route = r; + list_add(&flow->decap_routes, &r->decap_flows); + mutex_unlock(&esw->offloads.encap_tbl_lock); + return 0; + +out: + mutex_unlock(&esw->offloads.encap_tbl_lock); + return err; +} + +static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_encap_entry *e, + bool new_encap_entry, + unsigned long tbl_time_before, + int out_index) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned long tbl_time_after = tbl_time_before; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + const struct ip_tunnel_info *tun_info; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + unsigned short family; + int err = 0; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + tun_info = parse_attr->tun_info[out_index]; + family = ip_tunnel_info_af(tun_info); + + if (family == AF_INET) { + key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; + key.ip_version = 4; + } else if (family == AF_INET6) { + key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; + key.ip_version = 6; + } + + err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, + e->route_dev_ifindex, out_index); + if (err || !(esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) + return err; + + r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], + &tbl_time_after); + if (IS_ERR(r)) + return PTR_ERR(r); + /* Routing changed concurrently. FIB event handler might have missed new + * entry, schedule update. + */ + if (tbl_time_before != tbl_time_after) { + err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); + if (err) { + mlx5e_route_put_locked(priv, r); + return err; + } + } + + flow->encap_routes[out_index].r = r; + if (new_encap_entry) + list_add(&e->route_list, &r->encap_entries); + flow->encap_routes[out_index].index = out_index; + return 0; +} + +void mlx5e_detach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_route_entry *r = flow->decap_route; + + if (!r) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->decap_routes); + flow->decap_route = NULL; + + if (!refcount_dec_and_test(&r->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index) +{ + struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e, *tmp; + + if (!r) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + flow->encap_routes[out_index].r = NULL; + + if (!refcount_dec_and_test(&r->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) + list_del_init(&e->route_list); + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *encap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, encap_flows, tmp_list) { + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + + if (!mlx5e_is_offloaded_flow(flow)) + continue; + esw_attr = attr->esw_attr; + + if (flow_flag_test(flow, SLOW)) + mlx5e_tc_unoffload_from_slow_path(esw, flow); + else + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); + attr->modify_hdr = NULL; + + esw_attr->dests[flow->tmp_entry_index].flags &= + ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + } + + e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + e->pkt_reformat = NULL; + } +} + +static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, + struct net_device *tunnel_dev, + struct mlx5e_encap_entry *e, + struct list_head *encap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + int err; + + err = ip_tunnel_info_af(e->tun_info) == AF_INET ? + mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : + mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); + e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; + + list_for_each_entry(flow, encap_flows, tmp_list) { + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + + if (flow_flag_test(flow, FAILED)) + continue; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + spec = &parse_attr->spec; + + err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, + e->out_dev, e->route_dev_ifindex, + flow->tmp_entry_index); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); + continue; + } + + err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", + err); + continue; + } + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; + esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) + goto offload_to_slow_path; + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + } else { + flow->rule[0] = rule; + } + } else { +offload_to_slow_path: + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + /* mark the flow's encap dest as non-valid */ + esw_attr->dests[flow->tmp_entry_index].flags &= + ~MLX5_ESW_DEST_ENCAP_VALID; + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + } else { + flow->rule[0] = rule; + } + } + flow_flag_set(flow, OFFLOADED); + } +} + +static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + struct list_head *flow_list, + bool replace) +{ + struct net_device *tunnel_dev; + struct mlx5e_encap_entry *e; + + tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); + if (!tunnel_dev) + return -ENODEV; + + list_for_each_entry(e, &r->encap_entries, route_list) { + LIST_HEAD(encap_flows); + + mlx5e_take_all_encap_flows(e, &encap_flows); + if (list_empty(&encap_flows)) + continue; + + if (mlx5e_route_entry_valid(r)) + mlx5e_invalidate_encap(priv, e, &encap_flows); + + if (!replace) { + list_splice(&encap_flows, flow_list); + continue; + } + + mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); + list_splice(&encap_flows, flow_list); + } + + return 0; +} + +static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, flow_list, tmp_list) + if (mlx5e_is_offloaded_flow(flow)) + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); +} + +static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, + struct list_head *decap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, decap_flows, tmp_list) { + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + if (flow_flag_test(flow, FAILED)) + continue; + + parse_attr = attr->parse_attr; + spec = &parse_attr->spec; + err = mlx5e_tc_tun_route_lookup(priv, spec, attr); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", + err); + continue; + } + + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", + err); + } else { + flow->rule[0] = rule; + flow_flag_set(flow, OFFLOADED); + } + } +} + +static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + struct list_head *flow_list, + bool replace) +{ + struct net_device *tunnel_dev; + LIST_HEAD(decap_flows); + + tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); + if (!tunnel_dev) + return -ENODEV; + + mlx5e_take_all_route_decap_flows(r, &decap_flows); + if (mlx5e_route_entry_valid(r)) + mlx5e_unoffload_flow_list(priv, &decap_flows); + if (replace) + mlx5e_reoffload_decap(priv, &decap_flows); + + list_splice(&decap_flows, flow_list); + + return 0; +} + +static void mlx5e_tc_fib_event_work(struct work_struct *work) +{ + struct mlx5e_tc_fib_event_data *event_data = + container_of(work, struct mlx5e_tc_fib_event_data, work); + struct net_device *ul_dev = event_data->ul_dev; + struct mlx5e_priv *priv = netdev_priv(ul_dev); + struct mlx5e_route_entry *r = event_data->r; + struct mlx5_eswitch *esw; + LIST_HEAD(flow_list); + bool replace; + int err; + + /* sync with concurrent neigh updates */ + rtnl_lock(); + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; + + if (!mlx5e_route_entry_valid(r) && !replace) + goto out; + + err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", + err); + + err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", + err); + + if (replace) + r->flags |= MLX5E_ROUTE_ENTRY_VALID; +out: + mutex_unlock(&esw->offloads.encap_tbl_lock); + rtnl_unlock(); + + mlx5e_put_flow_list(priv, &flow_list); + mlx5e_route_put(priv, event_data->r); + dev_put(event_data->ul_dev); + kfree(event_data); +} + +static struct mlx5e_tc_fib_event_data * +mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, + struct net_device *ul_dev, + struct mlx5e_tc_tun_encap *encap, + unsigned long event, + struct fib_notifier_info *info) +{ + struct fib_entry_notifier_info *fen_info; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + struct net_device *fib_dev; + + fen_info = container_of(info, struct fib_entry_notifier_info, info); + fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + if (fib_dev->netdev_ops != &mlx5e_netdev_ops || + fen_info->dst_len != 32) + return NULL; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); + if (!fib_work) + return ERR_PTR(-ENOMEM); + + key.endpoint_ip.v4 = htonl(fen_info->dst); + key.ip_version = 4; + + /* Can't fail after this point because releasing reference to r + * requires obtaining sleeping mutex which we can't do in atomic + * context. + */ + r = mlx5e_route_lookup_for_update(encap, &key); + if (!r) + goto out; + fib_work->r = r; + dev_hold(ul_dev); + + return fib_work; + +out: + kfree(fib_work); + return NULL; +} + +static struct mlx5e_tc_fib_event_data * +mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, + struct net_device *ul_dev, + struct mlx5e_tc_tun_encap *encap, + unsigned long event, + struct fib_notifier_info *info) +{ + struct fib6_entry_notifier_info *fen_info; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + struct net_device *fib_dev; + + fen_info = container_of(info, struct fib6_entry_notifier_info, info); + fib_dev = fib6_info_nh_dev(fen_info->rt); + if (fib_dev->netdev_ops != &mlx5e_netdev_ops || + fen_info->rt->fib6_dst.plen != 128) + return NULL; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); + if (!fib_work) + return ERR_PTR(-ENOMEM); + + memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, + sizeof(fen_info->rt->fib6_dst.addr)); + key.ip_version = 6; + + /* Can't fail after this point because releasing reference to r + * requires obtaining sleeping mutex which we can't do in atomic + * context. + */ + r = mlx5e_route_lookup_for_update(encap, &key); + if (!r) + goto out; + fib_work->r = r; + dev_hold(ul_dev); + + return fib_work; + +out: + kfree(fib_work); + return NULL; +} + +static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct mlx5e_tc_fib_event_data *fib_work; + struct fib_notifier_info *info = ptr; + struct mlx5e_tc_tun_encap *encap; + struct net_device *ul_dev; + struct mlx5e_priv *priv; + + encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); + priv = encap->priv; + ul_dev = priv->netdev; + priv = netdev_priv(ul_dev); + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + if (info->family == AF_INET) + fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); + else if (info->family == AF_INET6) + fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); + else + return NOTIFY_DONE; + + if (!IS_ERR_OR_NULL(fib_work)) { + queue_work(priv->wq, &fib_work->work); + } else if (IS_ERR(fib_work)) { + NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); + mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", + PTR_ERR(fib_work)); + } + + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_DONE; +} + +struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_tun_encap *encap; + int err; + + encap = kvzalloc(sizeof(*encap), GFP_KERNEL); + if (!encap) + return ERR_PTR(-ENOMEM); + + encap->priv = priv; + encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; + spin_lock_init(&encap->route_lock); + hash_init(encap->route_tbl); + err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, + NULL, NULL); + if (err) { + kvfree(encap); + return ERR_PTR(err); + } + + return encap; +} + +void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) +{ + if (!encap) + return; + + unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); + flush_workqueue(encap->priv->wq); /* flush fib event works */ + kvfree(encap); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h new file mode 100644 index 0000000000000000000000000000000000000000..3391504d9a08eef5d9e7a99c74e8c3708589f6a3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_TUN_ENCAP_H__ +#define __MLX5_EN_TC_TUN_ENCAP_H__ + +#include "tc_priv.h" + +void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, int out_index); + +int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, + struct net_device **encap_dev, + bool *encap_valid); +int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack); +void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +int mlx5e_attach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); +void mlx5e_detach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info); + +int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec); + +struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv); +void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap); + +#endif /* __MLX5_EN_TC_TUN_ENCAP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 988195ab1c54b72d584073cf129d17f7201b02ad..d1696404cca9f3a41a49261d0dca11d89a461b6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -59,6 +59,8 @@ struct mlx5e_neigh_update_table { struct mlx5_tc_ct_priv; struct mlx5e_rep_bond; +struct mlx5e_tc_tun_encap; + struct mlx5_rep_uplink_priv { /* Filters DB - instantiated by the uplink representor and shared by * the uplink's VFs @@ -90,6 +92,9 @@ struct mlx5_rep_uplink_priv { /* support eswitch vports bonding */ struct mlx5e_rep_bond *bond; + + /* tc tunneling encapsulation private data */ + struct mlx5e_tc_tun_encap *encap; }; struct mlx5e_rep_priv { @@ -110,7 +115,6 @@ struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) } struct mlx5e_neigh { - struct net_device *dev; union { __be32 v4; struct in6_addr v6; @@ -122,6 +126,7 @@ struct mlx5e_neigh_hash_entry { struct rhash_head rhash_node; struct mlx5e_neigh m_neigh; struct mlx5e_priv *priv; + struct net_device *neigh_dev; /* Save the neigh hash entry in a list on the representor in * addition to the hash table. In order to iterate easily over the @@ -153,6 +158,7 @@ enum { /* set when the encap entry is successfully offloaded into HW */ MLX5_ENCAP_ENTRY_VALID = BIT(0), MLX5_REFORMAT_DECAP = BIT(1), + MLX5_ENCAP_ENTRY_NO_ROUTE = BIT(2), }; struct mlx5e_decap_key { @@ -175,12 +181,12 @@ struct mlx5e_encap_entry { struct mlx5e_neigh_hash_entry *nhe; /* neigh hash entry list of encaps sharing the same neigh */ struct list_head encap_list; - struct mlx5e_neigh m_neigh; /* a node of the eswitch encap hash table which keeping all the encap * entries */ struct hlist_node encap_hlist; struct list_head flows; + struct list_head route_list; struct mlx5_pkt_reformat *pkt_reformat; const struct ip_tunnel_info *tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 280ea1e1e039d843fb6bca9209ef51b44ac4bd89..db142ee96510c48042fe2cfd73edf8d4a0770f17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -63,6 +63,8 @@ #include "en/mapping.h" #include "en/tc_ct.h" #include "en/mod_hdr.h" +#include "en/tc_priv.h" +#include "en/tc_tun_encap.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "lib/fs_chains.h" @@ -71,90 +73,6 @@ #define nic_chains(priv) ((priv)->fs.tc.chains) #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) -#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) - -enum { - MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, - MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, - MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, - MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, - MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, - MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, - MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, - MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, - MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, - MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, - MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, - MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, - MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, - MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, -}; - -#define MLX5E_TC_MAX_SPLITS 1 - -/* Helper struct for accessing a struct containing list_head array. - * Containing struct - * |- Helper array - * [0] Helper item 0 - * |- list_head item 0 - * |- index (0) - * [1] Helper item 1 - * |- list_head item 1 - * |- index (1) - * To access the containing struct from one of the list_head items: - * 1. Get the helper item from the list_head item using - * helper item = - * container_of(list_head item, helper struct type, list_head field) - * 2. Get the contining struct from the helper item and its index in the array: - * containing struct = - * container_of(helper item, containing struct type, helper field[index]) - */ -struct encap_flow_item { - struct mlx5e_encap_entry *e; /* attached encap instance */ - struct list_head list; - int index; -}; - -struct mlx5e_tc_flow { - struct rhash_head node; - struct mlx5e_priv *priv; - u64 cookie; - unsigned long flags; - struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; - - /* flows sharing the same reformat object - currently mpls decap */ - struct list_head l3_to_l2_reformat; - struct mlx5e_decap_entry *decap_reformat; - - /* Flow can be associated with multiple encap IDs. - * The number of encaps is bounded by the number of supported - * destinations. - */ - struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; - struct mlx5e_tc_flow *peer_flow; - struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */ - struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ - struct list_head hairpin; /* flows sharing the same hairpin */ - struct list_head peer; /* flows with peer flow */ - struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ - struct net_device *orig_dev; /* netdev adding flow first */ - int tmp_efi_index; - struct list_head tmp_list; /* temporary flow list used by neigh update */ - refcount_t refcnt; - struct rcu_head rcu_head; - struct completion init_done; - int tunnel_id; /* the mapped tunnel id of this flow */ - struct mlx5_flow_attr *attr; -}; - -struct mlx5e_tc_flow_parse_attr { - const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; - struct net_device *filter_dev; - struct mlx5_flow_spec spec; - struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; - int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; - struct ethhdr eth; -}; #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) @@ -165,10 +83,15 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { .moffset = 0, .mlen = 2, }, + [VPORT_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 2, + .mlen = 2, + }, [TUNNEL_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, .moffset = 1, - .mlen = 3, + .mlen = ((ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS) / 8), .soffset = MLX5_BYTE_OFF(fte_match_param, misc_parameters_2.metadata_reg_c_1), }, @@ -247,11 +170,11 @@ mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, } int -mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, - enum mlx5_flow_namespace_type ns, - enum mlx5e_tc_attr_to_reg type, - u32 data) +mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data) { int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; @@ -275,9 +198,10 @@ mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, MLX5_SET(set_action_in, modact, offset, moffset * 8); MLX5_SET(set_action_in, modact, length, mlen * 8); MLX5_SET(set_action_in, modact, data, data); + err = mod_hdr_acts->num_actions; mod_hdr_acts->num_actions++; - return 0; + return err; } static struct mlx5_tc_ct_priv * @@ -326,6 +250,41 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv, mlx5e_del_offloaded_nic_rule(priv, rule, attr); } +int +mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data); + + return ret < 0 ? ret : 0; +} + +void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + int act_id, u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + + modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 4) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset * 8); + MLX5_SET(set_action_in, modact, length, mlen * 8); + MLX5_SET(set_action_in, modact, data, data); +} + struct mlx5e_hairpin { struct mlx5_hairpin *pair; @@ -363,15 +322,14 @@ struct mlx5e_hairpin_entry { static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); -static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) +struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) { if (!flow || !refcount_inc_not_zero(&flow->refcnt)) return ERR_PTR(-EINVAL); return flow; } -static void mlx5e_flow_put(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) +void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { if (refcount_dec_and_test(&flow->refcnt)) { mlx5e_tc_del_flow(priv, flow); @@ -379,48 +337,6 @@ static void mlx5e_flow_put(struct mlx5e_priv *priv, } } -static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) -{ - /* Complete all memory stores before setting bit. */ - smp_mb__before_atomic(); - set_bit(flag, &flow->flags); -} - -#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) - -static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, - unsigned long flag) -{ - /* test_and_set_bit() provides all necessary barriers */ - return test_and_set_bit(flag, &flow->flags); -} - -#define flow_flag_test_and_set(flow, flag) \ - __flow_flag_test_and_set(flow, \ - MLX5E_TC_FLOW_FLAG_##flag) - -static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) -{ - /* Complete all memory stores before clearing bit. */ - smp_mb__before_atomic(); - clear_bit(flag, &flow->flags); -} - -#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ - MLX5E_TC_FLOW_FLAG_##flag) - -static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) -{ - bool ret = test_bit(flag, &flow->flags); - - /* Read fields of flow structure only after checking flags. */ - smp_mb__after_atomic(); - return ret; -} - -#define flow_flag_test(flow, flag) __flow_flag_test(flow, \ - MLX5E_TC_FLOW_FLAG_##flag) - bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) { return flow_flag_test(flow, ESWITCH); @@ -431,7 +347,7 @@ static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) return flow_flag_test(flow, FT); } -static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) +bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) { return flow_flag_test(flow, OFFLOADED); } @@ -1146,23 +1062,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, kfree(flow->attr); } -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index); - -static int mlx5e_attach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct net_device *mirred_dev, - int out_index, - struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid); -static int mlx5e_attach_decap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack); -static void mlx5e_detach_decap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow); - -static struct mlx5_flow_handle * +struct mlx5_flow_handle * mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -1197,10 +1097,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, return rule; } -static void -mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, - struct mlx5e_tc_flow *flow, - struct mlx5_flow_attr *attr) +void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) { flow_flag_clear(flow, OFFLOADED); @@ -1219,7 +1118,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); } -static struct mlx5_flow_handle * +struct mlx5_flow_handle * mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec) @@ -1245,9 +1144,8 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, return rule; } -static void -mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, - struct mlx5e_tc_flow *flow) +void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow) { struct mlx5_flow_attr *slow_attr; @@ -1315,6 +1213,63 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) mutex_unlock(&uplink_priv->unready_flows_lock); } +static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv); + +bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) +{ + struct mlx5_core_dev *out_mdev, *route_mdev; + struct mlx5e_priv *out_priv, *route_priv; + + out_priv = netdev_priv(out_dev); + out_mdev = out_priv->mdev; + route_priv = netdev_priv(route_dev); + route_mdev = route_priv->mdev; + + if (out_mdev->coredev_type != MLX5_COREDEV_PF || + route_mdev->coredev_type != MLX5_COREDEV_VF) + return false; + + return same_hw_devs(out_priv, route_priv); +} + +int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) +{ + struct mlx5e_priv *out_priv, *route_priv; + struct mlx5_core_dev *route_mdev; + struct mlx5_eswitch *esw; + u16 vhca_id; + int err; + + out_priv = netdev_priv(out_dev); + esw = out_priv->mdev->priv.eswitch; + route_priv = netdev_priv(route_dev); + route_mdev = route_priv->mdev; + + vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + return err; +} + +int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts; + struct mlx5_modify_hdr *mod_hdr; + + mod_hdr = mlx5_modify_header_alloc(priv->mdev, + get_flow_name_space(flow), + mod_hdr_acts->num_actions, + mod_hdr_acts->actions); + if (IS_ERR(mod_hdr)) + return PTR_ERR(mod_hdr); + + WARN_ON(flow->attr->modify_hdr); + flow->attr->modify_hdr = mod_hdr; + + return 0; +} + static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, @@ -1324,11 +1279,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct net_device *out_dev, *encap_dev = NULL; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; + bool vf_tun = false, encap_valid = true; struct mlx5_esw_flow_attr *esw_attr; struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; - bool encap_valid = true; u32 max_prio, max_chain; int err = 0; int out_index; @@ -1342,20 +1297,28 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { NL_SET_ERR_MSG_MOD(extack, "Requested chain is out of supported range"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } max_prio = mlx5_chains_get_prio_range(esw_chains(esw)); if (attr->prio > max_prio) { NL_SET_ERR_MSG_MOD(extack, "Requested priority is out of supported range"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; + } + + if (flow_flag_test(flow, TUN_RX)) { + err = mlx5e_attach_decap_route(priv, flow); + if (err) + goto err_out; } if (flow_flag_test(flow, L3_TO_L2_DECAP)) { err = mlx5e_attach_decap(priv, flow, extack); if (err) - return err; + goto err_out; } parse_attr = attr->parse_attr; @@ -1373,8 +1336,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err = mlx5e_attach_encap(priv, flow, out_dev, out_index, extack, &encap_dev, &encap_valid); if (err) - return err; + goto err_out; + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + vf_tun = true; out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; esw_attr->dests[out_index].rep = rpriv->rep; @@ -1383,20 +1349,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) - return err; + goto err_out; if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) { - err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); - if (err) - return err; + if (vf_tun) { + err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + if (err) + goto err_out; + } else { + err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); + if (err) + goto err_out; + } } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw_attr->counter_dev, true); - if (IS_ERR(counter)) - return PTR_ERR(counter); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_out; + } attr->counter = counter; } @@ -1410,12 +1383,17 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, else flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); - if (IS_ERR(flow->rule[0])) - return PTR_ERR(flow->rule[0]); - else - flow_flag_set(flow, OFFLOADED); + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); + goto err_out; + } + flow_flag_set(flow, OFFLOADED); return 0; + +err_out: + flow_flag_set(flow, FAILED); + return err; } static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) @@ -1436,8 +1414,11 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + bool vf_tun = false; int out_index; + esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); if (flow_flag_test(flow, NOT_READY)) @@ -1455,20 +1436,33 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_eswitch_del_vlan_action(esw, attr); - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) - if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { + if (flow->decap_route) + mlx5e_detach_decap_route(priv, flow); + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + vf_tun = true; + if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { mlx5e_detach_encap(priv, flow, out_index); kfree(attr->parse_attr->tun_info[out_index]); } - kvfree(attr->parse_attr); + } mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + dealloc_mod_hdr_actions(&attr->parse_attr->mod_hdr_acts); + if (vf_tun && attr->modify_hdr) + mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); + else + mlx5e_detach_mod_hdr(priv, flow); + } + kvfree(attr->parse_attr); + kvfree(attr->esw_attr->rx_tun_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter); + mlx5_fc_destroy(esw_attr->counter_dev, attr->counter); if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); @@ -1476,141 +1470,13 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, kfree(flow->attr); } -void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, - struct list_head *flow_list) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_flow_handle *rule; - struct mlx5_flow_attr *attr; - struct mlx5_flow_spec *spec; - struct mlx5e_tc_flow *flow; - int err; - - e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - e->encap_size, e->encap_header, - MLX5_FLOW_NAMESPACE_FDB); - if (IS_ERR(e->pkt_reformat)) { - mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", - PTR_ERR(e->pkt_reformat)); - return; - } - e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(priv); - - list_for_each_entry(flow, flow_list, tmp_list) { - bool all_flow_encaps_valid = true; - int i; - - if (!mlx5e_is_offloaded_flow(flow)) - continue; - attr = flow->attr; - esw_attr = attr->esw_attr; - spec = &attr->parse_attr->spec; - - esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat; - esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; - /* Flow can be associated with multiple encap entries. - * Before offloading the flow verify that all of them have - * a valid neighbour. - */ - for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { - if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) - continue; - if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { - all_flow_encaps_valid = false; - break; - } - } - /* Do not offload flows with unresolved neighbors */ - if (!all_flow_encaps_valid) - continue; - /* update from slow path rule to encap rule */ - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", - err); - continue; - } - - mlx5e_tc_unoffload_from_slow_path(esw, flow); - flow->rule[0] = rule; - /* was unset when slow path rule removed */ - flow_flag_set(flow, OFFLOADED); - } -} - -void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, - struct list_head *flow_list) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_flow_handle *rule; - struct mlx5_flow_attr *attr; - struct mlx5_flow_spec *spec; - struct mlx5e_tc_flow *flow; - int err; - - list_for_each_entry(flow, flow_list, tmp_list) { - if (!mlx5e_is_offloaded_flow(flow)) - continue; - attr = flow->attr; - esw_attr = attr->esw_attr; - spec = &attr->parse_attr->spec; - - /* update from encap rule to slow path rule */ - rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); - /* mark the flow's encap dest as non-valid */ - esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; - - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", - err); - continue; - } - - mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); - flow->rule[0] = rule; - /* was unset when fast path rule removed */ - flow_flag_set(flow, OFFLOADED); - } - - /* we know that the encap is valid */ - e->flags &= ~MLX5_ENCAP_ENTRY_VALID; - mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); -} - -static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) +struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) { return flow->attr->counter; } -/* Takes reference to all flows attached to encap and adds the flows to - * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. - */ -void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) -{ - struct encap_flow_item *efi; - struct mlx5e_tc_flow *flow; - - list_for_each_entry(efi, &e->flows, list) { - flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); - if (IS_ERR(mlx5e_flow_get(flow))) - continue; - wait_for_completion(&flow->init_done); - - flow->tmp_efi_index = efi->index; - list_add(&flow->tmp_list, flow_list); - } -} - /* Iterate over tmp_list of flows attached to flow_list head. */ -void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) +void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) { struct mlx5e_tc_flow *flow, *tmp; @@ -1618,222 +1484,6 @@ void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_l mlx5e_flow_put(priv, flow); } -static struct mlx5e_encap_entry * -mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, - struct mlx5e_encap_entry *e) -{ - struct mlx5e_encap_entry *next = NULL; - -retry: - rcu_read_lock(); - - /* find encap with non-zero reference counter value */ - for (next = e ? - list_next_or_null_rcu(&nhe->encap_list, - &e->encap_list, - struct mlx5e_encap_entry, - encap_list) : - list_first_or_null_rcu(&nhe->encap_list, - struct mlx5e_encap_entry, - encap_list); - next; - next = list_next_or_null_rcu(&nhe->encap_list, - &next->encap_list, - struct mlx5e_encap_entry, - encap_list)) - if (mlx5e_encap_take(next)) - break; - - rcu_read_unlock(); - - /* release starting encap */ - if (e) - mlx5e_encap_put(netdev_priv(e->out_dev), e); - if (!next) - return next; - - /* wait for encap to be fully initialized */ - wait_for_completion(&next->res_ready); - /* continue searching if encap entry is not in valid state after completion */ - if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { - e = next; - goto retry; - } - - return next; -} - -void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) -{ - struct mlx5e_neigh *m_neigh = &nhe->m_neigh; - struct mlx5e_encap_entry *e = NULL; - struct mlx5e_tc_flow *flow; - struct mlx5_fc *counter; - struct neigh_table *tbl; - bool neigh_used = false; - struct neighbour *n; - u64 lastuse; - - if (m_neigh->family == AF_INET) - tbl = &arp_tbl; -#if IS_ENABLED(CONFIG_IPV6) - else if (m_neigh->family == AF_INET6) - tbl = ipv6_stub->nd_tbl; -#endif - else - return; - - /* mlx5e_get_next_valid_encap() releases previous encap before returning - * next one. - */ - while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { - struct mlx5e_priv *priv = netdev_priv(e->out_dev); - struct encap_flow_item *efi, *tmp; - struct mlx5_eswitch *esw; - LIST_HEAD(flow_list); - - esw = priv->mdev->priv.eswitch; - mutex_lock(&esw->offloads.encap_tbl_lock); - list_for_each_entry_safe(efi, tmp, &e->flows, list) { - flow = container_of(efi, struct mlx5e_tc_flow, - encaps[efi->index]); - if (IS_ERR(mlx5e_flow_get(flow))) - continue; - list_add(&flow->tmp_list, &flow_list); - - if (mlx5e_is_offloaded_flow(flow)) { - counter = mlx5e_tc_get_counter(flow); - lastuse = mlx5_fc_query_lastuse(counter); - if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { - neigh_used = true; - break; - } - } - } - mutex_unlock(&esw->offloads.encap_tbl_lock); - - mlx5e_put_encap_flow_list(priv, &flow_list); - if (neigh_used) { - /* release current encap before breaking the loop */ - mlx5e_encap_put(priv, e); - break; - } - } - - trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); - - if (neigh_used) { - nhe->reported_lastuse = jiffies; - - /* find the relevant neigh according to the cached device and - * dst ip pair - */ - n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev); - if (!n) - return; - - neigh_event_send(n, NULL); - neigh_release(n); - } -} - -static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) -{ - WARN_ON(!list_empty(&e->flows)); - - if (e->compl_result > 0) { - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); - - if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); - } - - kfree(e->tun_info); - kfree(e->encap_header); - kfree_rcu(e, rcu); -} - -static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, - struct mlx5e_decap_entry *d) -{ - WARN_ON(!list_empty(&d->flows)); - - if (!d->compl_result) - mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); - - kfree_rcu(d, rcu); -} - -void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - - if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) - return; - hash_del_rcu(&e->encap_hlist); - mutex_unlock(&esw->offloads.encap_tbl_lock); - - mlx5e_encap_dealloc(priv, e); -} - -static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - - if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) - return; - hash_del_rcu(&d->hlist); - mutex_unlock(&esw->offloads.decap_tbl_lock); - - mlx5e_decap_dealloc(priv, d); -} - -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) -{ - struct mlx5e_encap_entry *e = flow->encaps[out_index].e; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - - /* flow wasn't fully initialized */ - if (!e) - return; - - mutex_lock(&esw->offloads.encap_tbl_lock); - list_del(&flow->encaps[out_index].list); - flow->encaps[out_index].e = NULL; - if (!refcount_dec_and_test(&e->refcnt)) { - mutex_unlock(&esw->offloads.encap_tbl_lock); - return; - } - hash_del_rcu(&e->encap_hlist); - mutex_unlock(&esw->offloads.encap_tbl_lock); - - mlx5e_encap_dealloc(priv, e); -} - -static void mlx5e_detach_decap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_decap_entry *d = flow->decap_reformat; - - if (!d) - return; - - mutex_lock(&esw->offloads.decap_tbl_lock); - list_del(&flow->l3_to_l2_reformat); - flow->decap_reformat = NULL; - - if (!refcount_dec_and_test(&d->refcnt)) { - mutex_unlock(&esw->offloads.decap_tbl_lock); - return; - } - hash_del_rcu(&d->hlist); - mutex_unlock(&esw->offloads.decap_tbl_lock); - - mlx5e_decap_dealloc(priv, d); -} - static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; @@ -2091,6 +1741,29 @@ void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, } } +u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) +{ + void *headers_v; + u16 ethertype; + u8 ip_version; + + if (outer) + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + else + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); + + ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version); + /* Return ip_version converted from ethertype anyway */ + if (!ip_version) { + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP) + ip_version = 4; + else if (ethertype == ETH_P_IPV6) + ip_version = 6; + } + return ip_version; +} + static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -2099,6 +1772,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, u8 *match_level, bool *match_inner) { + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = f->common.extack; bool needs_mapping, sets_mapping; @@ -2136,6 +1810,31 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, */ if (!netif_is_bareudp(filter_dev)) flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + err = mlx5e_tc_set_attr_rx_tun(flow, spec); + if (err) + return err; + } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + struct mlx5_flow_spec *tmp_spec; + + tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL); + if (!tmp_spec) { + NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec"); + netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec"); + return -ENOMEM; + } + memcpy(tmp_spec, spec, sizeof(*tmp_spec)); + + err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); + if (err) { + kvfree(tmp_spec); + NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); + return err; + } + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + kvfree(tmp_spec); + if (err) + return err; } if (!needs_mapping && !sets_mapping) @@ -3584,35 +3283,6 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, return 0; } -struct encap_key { - const struct ip_tunnel_key *ip_tun_key; - struct mlx5e_tc_tunnel *tc_tunnel; -}; - -static inline int cmp_encap_info(struct encap_key *a, - struct encap_key *b) -{ - return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || - a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; -} - -static inline int cmp_decap_info(struct mlx5e_decap_key *a, - struct mlx5e_decap_key *b) -{ - return memcmp(&a->key, &b->key, sizeof(b->key)); -} - -static inline int hash_encap_info(struct encap_key *key) -{ - return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), - key->tc_tunnel->tunnel_type); -} - -static inline int hash_decap_info(struct mlx5e_decap_key *key) -{ - return jhash(&key->key, sizeof(key->key), 0); -} - static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, struct net_device *peer_netdev) { @@ -3626,277 +3296,6 @@ static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, same_hw_devs(priv, peer_priv)); } -bool mlx5e_encap_take(struct mlx5e_encap_entry *e) -{ - return refcount_inc_not_zero(&e->refcnt); -} - -static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) -{ - return refcount_inc_not_zero(&e->refcnt); -} - -static struct mlx5e_encap_entry * -mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, - uintptr_t hash_key) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_encap_entry *e; - struct encap_key e_key; - - hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, - encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info->key; - e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, key) && - mlx5e_encap_take(e)) - return e; - } - - return NULL; -} - -static struct mlx5e_decap_entry * -mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, - uintptr_t hash_key) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_decap_key r_key; - struct mlx5e_decap_entry *e; - - hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, - hlist, hash_key) { - r_key = e->key; - if (!cmp_decap_info(&r_key, key) && - mlx5e_decap_take(e)) - return e; - } - return NULL; -} - -static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info) -{ - size_t tun_size = sizeof(*tun_info) + tun_info->options_len; - - return kmemdup(tun_info, tun_size, GFP_KERNEL); -} - -static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - int out_index, - struct mlx5e_encap_entry *e, - struct netlink_ext_ack *extack) -{ - int i; - - for (i = 0; i < out_index; i++) { - if (flow->encaps[i].e != e) - continue; - NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); - netdev_err(priv->netdev, "can't duplicate encap action\n"); - return true; - } - - return false; -} - -static int mlx5e_attach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct net_device *mirred_dev, - int out_index, - struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; - const struct ip_tunnel_info *tun_info; - struct encap_key key; - struct mlx5e_encap_entry *e; - unsigned short family; - uintptr_t hash_key; - int err = 0; - - parse_attr = attr->parse_attr; - tun_info = parse_attr->tun_info[out_index]; - family = ip_tunnel_info_af(tun_info); - key.ip_tun_key = &tun_info->key; - key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); - if (!key.tc_tunnel) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); - return -EOPNOTSUPP; - } - - hash_key = hash_encap_info(&key); - - mutex_lock(&esw->offloads.encap_tbl_lock); - e = mlx5e_encap_get(priv, &key, hash_key); - - /* must verify if encap is valid or not */ - if (e) { - /* Check that entry was not already attached to this flow */ - if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { - err = -EOPNOTSUPP; - goto out_err; - } - - mutex_unlock(&esw->offloads.encap_tbl_lock); - wait_for_completion(&e->res_ready); - - /* Protect against concurrent neigh update. */ - mutex_lock(&esw->offloads.encap_tbl_lock); - if (e->compl_result < 0) { - err = -EREMOTEIO; - goto out_err; - } - goto attach_flow; - } - - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) { - err = -ENOMEM; - goto out_err; - } - - refcount_set(&e->refcnt, 1); - init_completion(&e->res_ready); - - tun_info = dup_tun_info(tun_info); - if (!tun_info) { - err = -ENOMEM; - goto out_err_init; - } - e->tun_info = tun_info; - err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); - if (err) - goto out_err_init; - - INIT_LIST_HEAD(&e->flows); - hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); - mutex_unlock(&esw->offloads.encap_tbl_lock); - - if (family == AF_INET) - err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); - else if (family == AF_INET6) - err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); - - /* Protect against concurrent neigh update. */ - mutex_lock(&esw->offloads.encap_tbl_lock); - complete_all(&e->res_ready); - if (err) { - e->compl_result = err; - goto out_err; - } - e->compl_result = 1; - -attach_flow: - flow->encaps[out_index].e = e; - list_add(&flow->encaps[out_index].list, &e->flows); - flow->encaps[out_index].index = out_index; - *encap_dev = e->out_dev; - if (e->flags & MLX5_ENCAP_ENTRY_VALID) { - attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; - attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; - *encap_valid = true; - } else { - *encap_valid = false; - } - mutex_unlock(&esw->offloads.encap_tbl_lock); - - return err; - -out_err: - mutex_unlock(&esw->offloads.encap_tbl_lock); - if (e) - mlx5e_encap_put(priv, e); - return err; - -out_err_init: - mutex_unlock(&esw->offloads.encap_tbl_lock); - kfree(tun_info); - kfree(e); - return err; -} - -static int mlx5e_attach_decap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; - struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5e_decap_entry *d; - struct mlx5e_decap_key key; - uintptr_t hash_key; - int err = 0; - - parse_attr = flow->attr->parse_attr; - if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { - NL_SET_ERR_MSG_MOD(extack, - "encap header larger than max supported"); - return -EOPNOTSUPP; - } - - key.key = parse_attr->eth; - hash_key = hash_decap_info(&key); - mutex_lock(&esw->offloads.decap_tbl_lock); - d = mlx5e_decap_get(priv, &key, hash_key); - if (d) { - mutex_unlock(&esw->offloads.decap_tbl_lock); - wait_for_completion(&d->res_ready); - mutex_lock(&esw->offloads.decap_tbl_lock); - if (d->compl_result) { - err = -EREMOTEIO; - goto out_free; - } - goto found; - } - - d = kzalloc(sizeof(*d), GFP_KERNEL); - if (!d) { - err = -ENOMEM; - goto out_err; - } - - d->key = key; - refcount_set(&d->refcnt, 1); - init_completion(&d->res_ready); - INIT_LIST_HEAD(&d->flows); - hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); - mutex_unlock(&esw->offloads.decap_tbl_lock); - - d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, - MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2, - sizeof(parse_attr->eth), - &parse_attr->eth, - MLX5_FLOW_NAMESPACE_FDB); - if (IS_ERR(d->pkt_reformat)) { - err = PTR_ERR(d->pkt_reformat); - d->compl_result = err; - } - mutex_lock(&esw->offloads.decap_tbl_lock); - complete_all(&d->res_ready); - if (err) - goto out_free; - -found: - flow->decap_reformat = d; - attr->decap_pkt_reformat = d->pkt_reformat; - list_add(&flow->l3_to_l2_reformat, &d->flows); - mutex_unlock(&esw->offloads.decap_tbl_lock); - return 0; - -out_free: - mutex_unlock(&esw->offloads.decap_tbl_lock); - mlx5e_decap_put(priv, d); - return err; - -out_err: - mutex_unlock(&esw->offloads.decap_tbl_lock); - return err; -} - static int parse_tc_vlan_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, @@ -4249,7 +3648,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (encap) { parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex; - parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info); + parse_attr->tun_info[esw_attr->out_count] = + mlx5e_dup_tun_info(info); if (!parse_attr->tun_info[esw_attr->out_count]) return -ENOMEM; encap = false; @@ -4386,6 +3786,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } } + /* always set IP version for indirect table handling */ + attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true); + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { /* For prio tag mode, replace vlan pop with rewrite vlan prio @@ -4666,7 +4069,6 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, return flow; err_free: - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); mlx5e_flow_put(priv, flow); out: return ERR_PTR(err); @@ -4811,6 +4213,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, return 0; err_free: + flow_flag_set(flow, FAILED); dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); mlx5e_flow_put(priv, flow); out: @@ -5332,7 +4735,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) } uplink_priv->tunnel_mapping = mapping; - mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true); + /* 0xFFF is reserved for stack devices slow path table mark */ + mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true); if (IS_ERR(mapping)) { err = PTR_ERR(mapping); goto err_enc_opts_mapping; @@ -5345,8 +4749,14 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); + uplink_priv->encap = mlx5e_tc_tun_init(priv); + if (IS_ERR(uplink_priv->encap)) + goto err_register_fib_notifier; + return err; +err_register_fib_notifier: + rhashtable_destroy(tc_ht); err_ht_init: mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); err_enc_opts_mapping: @@ -5363,10 +4773,11 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) { struct mlx5_rep_uplink_priv *uplink_priv; - rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); - uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); + mlx5e_tc_tun_cleanup(uplink_priv->encap); + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); mapping_destroy(uplink_priv->tunnel_mapping); @@ -5466,7 +4877,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, tc_skb_ext->chain = chain; zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) & - ZONE_RESTORE_MAX; + ESW_ZONE_ID_MASK; if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, zone_restore_id)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 4a2ce241522e37f003665ddf15200eafc59cb0e6..89003ae7775a811d8e700dbf267c0537c4185ea5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -37,6 +37,8 @@ #include "en.h" #include "eswitch.h" #include "en/tc_ct.h" +#include "en/tc_tun.h" +#include "en_rep.h" #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff @@ -76,6 +78,7 @@ struct mlx5_flow_attr { struct mlx5_flow_table *dest_ft; u8 inner_match_level; u8 outer_match_level; + u8 ip_version; u32 flags; union { struct mlx5_esw_flow_attr esw_attr[0]; @@ -83,6 +86,19 @@ struct mlx5_flow_attr { }; }; +struct mlx5_rx_tun_attr { + u16 decap_vport; + union { + __be32 v4; + struct in6_addr v6; + } src_ip; /* Valid if decap_vport is not zero */ + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; /* Valid if decap_vport is not zero */ + u32 vni; +}; + #define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16 #define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0) @@ -158,7 +174,7 @@ bool mlx5e_encap_take(struct mlx5e_encap_entry *e); void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list); -void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); +void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); struct mlx5e_neigh_hash_entry; void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); @@ -167,6 +183,7 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work); enum mlx5e_tc_attr_to_reg { CHAIN_TO_REG, + VPORT_TO_REG, TUNNEL_TO_REG, CTSTATE_TO_REG, ZONE_TO_REG, @@ -197,6 +214,11 @@ int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, enum mlx5e_tc_attr_to_reg type, u32 data); +void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + int act_id, u32 data); + void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, enum mlx5e_tc_attr_to_reg type, u32 data, @@ -207,6 +229,16 @@ void mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, u32 *data, u32 *mask); +int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data); + +int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow); + int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, int namespace, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); @@ -242,6 +274,10 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv, struct mlx5_flow_handle *rule, struct mlx5_flow_attr *attr); +bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev); +int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, + u16 *vport); + #else /* CONFIG_MLX5_CLS_ACT */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} @@ -283,7 +319,7 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) reg_b = be32_to_cpu(cqe->ft_metadata); - if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ZONE_RESTORE_BITS)) + if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ESW_ZONE_ID_BITS)) return false; chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c new file mode 100644 index 0000000000000000000000000000000000000000..b7d00c4c704680661a8208ef4540a528fd4b305b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -0,0 +1,517 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "eswitch.h" +#include "en.h" +#include "en_tc.h" +#include "fs_core.h" +#include "esw/indir_table.h" +#include "lib/fs_chains.h" + +#define MLX5_ESW_INDIR_TABLE_SIZE 128 +#define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2) +#define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1) + +struct mlx5_esw_indir_table_rule { + struct list_head list; + struct mlx5_flow_handle *handle; + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; + u32 vni; + struct mlx5_modify_hdr *mh; + refcount_t refcnt; +}; + +struct mlx5_esw_indir_table_entry { + struct hlist_node hlist; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *recirc_grp; + struct mlx5_flow_group *fwd_grp; + struct mlx5_flow_handle *fwd_rule; + struct list_head recirc_rules; + int recirc_cnt; + int fwd_ref; + + u16 vport; + u8 ip_version; +}; + +struct mlx5_esw_indir_table { + struct mutex lock; /* protects table */ + DECLARE_HASHTABLE(table, 8); +}; + +struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void) +{ + struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL); + + if (!indir) + return ERR_PTR(-ENOMEM); + + mutex_init(&indir->lock); + hash_init(indir->table); + return indir; +} + +void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) +{ + mutex_destroy(&indir->lock); + kvfree(indir); +} + +bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + /* Use indirect table for all IP traffic from UL to VF with vport + * destination when source rewrite flag is set. + */ + return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK && + mlx5_eswitch_is_vf_vport(esw, vport_num) && + esw->dev == dest_mdev && + attr->ip_version && + attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE; +} + +u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0; +} + +static struct mlx5_esw_indir_table_rule * +mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_esw_indir_table_rule *rule; + + list_for_each_entry(rule, &e->recirc_rules, list) + if (rule->vni == attr->rx_tun_attr->vni && + !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip, + sizeof(attr->rx_tun_attr->dst_ip))) + goto found; + return NULL; + +found: + refcount_inc(&rule->refcnt); + return rule; +} + +static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + struct mlx5_esw_indir_table_entry *e) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_esw_indir_table_rule *rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + int err = 0; + u32 data; + + rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr); + if (rule) + return 0; + + if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX) + return -EINVAL; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return -ENOMEM; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) { + err = -ENOMEM; + goto out; + } + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS | + MLX5_MATCH_MISC_PARAMETERS_2; + if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) { + MLX5_SET(fte_match_param, rule_spec->match_criteria, + outer_headers.ip_version, 0xf); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, + attr->ip_version); + } else if (attr->ip_version) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.ethertype); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype, + (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6)); + } else { + err = -EOPNOTSUPP; + goto err_ethertype; + } + + if (attr->ip_version == 4) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + MLX5_SET(fte_match_param, rule_spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(esw_attr->rx_tun_attr->dst_ip.v4)); + } else if (attr->ip_version == 6) { + int len = sizeof(struct in6_addr); + + memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, len); + memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &esw_attr->rx_tun_attr->dst_ip.v6, len); + } + + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + misc_parameters.vxlan_vni); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni, + MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni)); + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch, + MLX5_VPORT_UPLINK)); + + /* Modify flow source to recirculate packet */ + data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport); + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + VPORT_TO_REG, data); + if (err) + goto err_mod_hdr_regc0; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT); + if (err) + goto err_mod_hdr_regc1; + + flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts.num_actions, mod_acts.actions); + if (IS_ERR(flow_act.modify_hdr)) { + err = PTR_ERR(flow_act.modify_hdr); + goto err_mod_hdr_alloc; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = mlx5_chains_get_table(chains, 0, 1, 0); + if (!dest.ft) { + err = PTR_ERR(dest.ft); + goto err_table; + } + handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto err_handle; + } + + dealloc_mod_hdr_actions(&mod_acts); + rule->handle = handle; + rule->vni = esw_attr->rx_tun_attr->vni; + rule->mh = flow_act.modify_hdr; + memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, + sizeof(esw_attr->rx_tun_attr->dst_ip)); + refcount_set(&rule->refcnt, 1); + list_add(&rule->list, &e->recirc_rules); + e->recirc_cnt++; + goto out; + +err_handle: + mlx5_chains_put_table(chains, 0, 1, 0); +err_table: + mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr); +err_mod_hdr_alloc: +err_mod_hdr_regc1: + dealloc_mod_hdr_actions(&mod_acts); +err_mod_hdr_regc0: +err_ethertype: + kfree(rule); +out: + kfree(rule_spec); + return err; +} + +static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_esw_indir_table_entry *e) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + struct mlx5_esw_indir_table_rule *rule; + + list_for_each_entry(rule, &e->recirc_rules, list) + if (rule->vni == esw_attr->rx_tun_attr->vni && + !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, + sizeof(esw_attr->rx_tun_attr->dst_ip))) + goto found; + + return; + +found: + if (!refcount_dec_and_test(&rule->refcnt)) + return; + + mlx5_del_flow_rules(rule->handle); + mlx5_chains_put_table(chains, 0, 1, 0); + mlx5_modify_header_dealloc(esw->dev, rule->mh); + list_del(&rule->list); + kfree(rule); + e->recirc_cnt--; +} + +static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + struct mlx5_esw_indir_table_entry *e) +{ + int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) + MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf); + else + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype); + + if (attr->ip_version == 4) { + MLX5_SET_TO_ONES(fte_match_param, match, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else if (attr->ip_version == 6) { + memset(MLX5_ADDR_OF(fte_match_param, match, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, sizeof(struct in6_addr)); + } else { + err = -EOPNOTSUPP; + goto out; + } + + MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni); + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(create_flow_group_in, in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX); + e->recirc_grp = mlx5_create_flow_group(e->ft, in); + if (IS_ERR(e->recirc_grp)) { + err = PTR_ERR(e->recirc_grp); + goto out; + } + + INIT_LIST_HEAD(&e->recirc_rules); + e->recirc_cnt = 0; + +out: + kfree(in); + return err; +} + +static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, + struct mlx5_esw_indir_table_entry *e) +{ + int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + kfree(in); + return -ENOMEM; + } + + /* Hold one entry */ + MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); + MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); + e->fwd_grp = mlx5_create_flow_group(e->ft, in); + if (IS_ERR(e->fwd_grp)) { + err = PTR_ERR(e->fwd_grp); + goto err_out; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = e->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1); + if (IS_ERR(e->fwd_rule)) { + mlx5_destroy_flow_group(e->fwd_grp); + err = PTR_ERR(e->fwd_rule); + } + +err_out: + kfree(spec); + kfree(in); + return err; +} + +static struct mlx5_esw_indir_table_entry * +mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, u16 vport, bool decap) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *root_ns; + struct mlx5_esw_indir_table_entry *e; + struct mlx5_flow_table *ft; + int err = 0; + + root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) + return ERR_PTR(-ENOENT); + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + ft_attr.prio = FDB_TC_OFFLOAD; + ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE; + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.level = 1; + + ft = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto tbl_err; + } + e->ft = ft; + e->vport = vport; + e->ip_version = attr->ip_version; + e->fwd_ref = !decap; + + err = mlx5_create_indir_recirc_group(esw, attr, spec, e); + if (err) + goto recirc_grp_err; + + if (decap) { + err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); + if (err) + goto recirc_rule_err; + } + + err = mlx5_create_indir_fwd_group(esw, e); + if (err) + goto fwd_grp_err; + + hash_add(esw->fdb_table.offloads.indir->table, &e->hlist, + vport << 16 | attr->ip_version); + + return e; + +fwd_grp_err: + if (decap) + mlx5_esw_indir_table_rule_put(esw, attr, e); +recirc_rule_err: + mlx5_destroy_flow_group(e->recirc_grp); +recirc_grp_err: + mlx5_destroy_flow_table(e->ft); +tbl_err: + kfree(e); + return ERR_PTR(err); +} + +static struct mlx5_esw_indir_table_entry * +mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version) +{ + struct mlx5_esw_indir_table_entry *e; + u32 key = vport << 16 | ip_version; + + hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key) + if (e->vport == vport && e->ip_version == ip_version) + return e; + + return NULL; +} + +struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + u16 vport, bool decap) +{ + struct mlx5_esw_indir_table_entry *e; + int err; + + mutex_lock(&esw->fdb_table.offloads.indir->lock); + e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); + if (e) { + if (!decap) { + e->fwd_ref++; + } else { + err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); + if (err) + goto out_err; + } + } else { + e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap); + if (IS_ERR(e)) { + err = PTR_ERR(e); + esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err); + goto out_err; + } + } + mutex_unlock(&esw->fdb_table.offloads.indir->lock); + return e->ft; + +out_err: + mutex_unlock(&esw->fdb_table.offloads.indir->lock); + return ERR_PTR(err); +} + +void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap) +{ + struct mlx5_esw_indir_table_entry *e; + + mutex_lock(&esw->fdb_table.offloads.indir->lock); + e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); + if (!e) + goto out; + + if (!decap) + e->fwd_ref--; + else + mlx5_esw_indir_table_rule_put(esw, attr, e); + + if (e->fwd_ref || e->recirc_cnt) + goto out; + + hash_del(&e->hlist); + mlx5_destroy_flow_group(e->recirc_grp); + mlx5_del_flow_rules(e->fwd_rule); + mlx5_destroy_flow_group(e->fwd_grp); + mlx5_destroy_flow_table(e->ft); + kfree(e); +out: + mutex_unlock(&esw->fdb_table.offloads.indir->lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h new file mode 100644 index 0000000000000000000000000000000000000000..cb9eafd1b4eed446ee61ea8173549d4bdf53dac6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_ESW_FT_H__ +#define __MLX5_ESW_FT_H__ + +#ifdef CONFIG_MLX5_CLS_ACT + +struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void); +void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir); + +struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + u16 vport, bool decap); +void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap); + +bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev); + +u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr); + +#else +/* indir API stubs */ +struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void) +{ + return NULL; +} + +void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) +{ +} + +static inline struct mlx5_flow_table * +mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + u16 vport, bool decap) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap) +{ +} + +bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev) +{ + return false; +} + +static inline u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) +{ + return 0; +} +#endif + +#endif /* __MLX5_ESW_FT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 820305b1664e3616fc3df244a4dc287a7aa22241..aba17835465b4d83974591b1926f8cd307524dee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1300,6 +1300,13 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + ret = mlx5_esw_vport_vhca_id_set(esw, vport_num); + if (ret) + goto err_vhca_mapping; + } + esw_vport_change_handle_locked(vport); esw->enabled_vports++; @@ -1307,6 +1314,11 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, done: mutex_unlock(&esw->state_lock); return ret; + +err_vhca_mapping: + esw_vport_cleanup(esw, vport); + mutex_unlock(&esw->state_lock); + return ret; } void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) @@ -1325,6 +1337,11 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) /* Disable events from this vport */ arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) + mlx5_esw_vport_vhca_id_clear(esw, vport_num); + /* We don't assume VFs will cleanup after themselves. * Calling vport change handler while vport is disabled will cleanup * the vport resources. @@ -1815,6 +1832,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) mlx5e_mod_hdr_tbl_init(&esw->offloads.mod_hdr); atomic64_set(&esw->offloads.num_flows, 0); ida_init(&esw->offloads.vport_metadata_ida); + xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); mutex_init(&esw->state_lock); mutex_init(&esw->mode_lock); @@ -1854,6 +1872,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_offloads_cleanup_reps(esw); mutex_destroy(&esw->mode_lock); mutex_destroy(&esw->state_lock); + WARN_ON(!xa_empty(&esw->offloads.vhca_map)); + xa_destroy(&esw->offloads.vhca_map); ida_destroy(&esw->offloads.vport_metadata_ida); mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr); mutex_destroy(&esw->offloads.encap_tbl_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 479d2ac2cd8558756109e18527c99a54efc3c541..fdf5c8c05c1b4438d07a6365893e278345439da4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -160,6 +161,8 @@ struct mlx5_vport { struct devlink_port *dl_port; }; +struct mlx5_esw_indir_table; + struct mlx5_eswitch_fdb { union { struct legacy_fdb { @@ -176,9 +179,11 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_namespace *ns; struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *send_to_vport_meta_grp; struct mlx5_flow_group *peer_miss_grp; struct mlx5_flow_handle **peer_miss_rules; struct mlx5_flow_group *miss_grp; + struct mlx5_flow_handle **send_to_vport_meta_rules; struct mlx5_flow_handle *miss_rule_uni; struct mlx5_flow_handle *miss_rule_multi; int vlan_push_pop_refcount; @@ -190,6 +195,8 @@ struct mlx5_eswitch_fdb { struct mutex lock; } vports; + struct mlx5_esw_indir_table *indir; + } offloads; }; u32 flags; @@ -212,6 +219,7 @@ struct mlx5_esw_offload { struct mod_hdr_tbl mod_hdr; DECLARE_HASHTABLE(termtbl_tbl, 8); struct mutex termtbl_mutex; /* protects termtbl hash */ + struct xarray vhca_map; const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; atomic64_t num_flows; @@ -387,12 +395,14 @@ enum mlx5_flow_match_level { enum { MLX5_ESW_DEST_ENCAP = BIT(0), MLX5_ESW_DEST_ENCAP_VALID = BIT(1), + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2), }; enum { MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3), }; struct mlx5_esw_flow_attr { @@ -413,7 +423,9 @@ struct mlx5_esw_flow_attr { struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_core_dev *mdev; struct mlx5_termtbl_handle *termtbl; + int src_port_rewrite_act_id; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5_rx_tun_attr *rx_tun_attr; struct mlx5_pkt_reformat *decap_pkt_reformat; }; @@ -734,6 +746,10 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p u16 vport_num, u32 sfnum); void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num); + /** * mlx5_esw_event_info - Indicates eswitch mode changed/changing. * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7f09f2bbf7c1dd1b776e502a9a10fbc174e18eb0..94cb0217b4f39202da2f91ce910842548053b276 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,7 +38,9 @@ #include #include "mlx5_core.h" #include "eswitch.h" +#include "esw/indir_table.h" #include "esw/acl/ofld.h" +#include "esw/indir_table.h" #include "rdma.h" #include "en.h" #include "fs_core.h" @@ -257,7 +259,9 @@ mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw, static void mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr, + struct mlx5_eswitch *src_esw, + u16 vport) { void *misc2; void *misc; @@ -266,10 +270,12 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, * VHCA in dual-port RoCE mode, and matching on source vport may fail. */ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + if (mlx5_esw_indir_table_decap_vport(attr)) + vport = mlx5_esw_indir_table_decap_vport(attr); misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch, - attr->in_rep->vport)); + mlx5_eswitch_get_vport_metadata_for_match(src_esw, + vport)); misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, @@ -278,12 +284,12 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; } else { misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + MLX5_CAP_GEN(src_esw->dev, vhca_id)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); @@ -295,6 +301,299 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, } } +static int +esw_setup_decap_indir(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec) +{ + struct mlx5_flow_table *ft; + + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + ft = mlx5_esw_indir_table_get(esw, attr, spec, + mlx5_esw_indir_table_decap_vport(attr), true); + return PTR_ERR_OR_ZERO(ft); +} + +static void +esw_cleanup_decap_indir(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + if (mlx5_esw_indir_table_decap_vport(attr)) + mlx5_esw_indir_table_put(esw, attr, + mlx5_esw_indir_table_decap_vport(attr), + true); +} + +static int +esw_setup_ft_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + int i) +{ + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = attr->dest_ft; + + if (mlx5_esw_indir_table_decap_vport(attr)) + return esw_setup_decap_indir(esw, attr, spec); + return 0; +} + +static void +esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, + int i) +{ + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = mlx5_chains_get_tc_end_ft(chains); +} + +static int +esw_setup_chain_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, + u32 chain, u32 prio, u32 level, + int i) +{ + struct mlx5_flow_table *ft; + + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + ft = mlx5_chains_get_table(chains, chain, prio, level); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = ft; + return 0; +} + +static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, + int from, int to) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + int i; + + for (i = from; i < to; i++) + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + mlx5_chains_put_table(chains, 0, 1, 0); + else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + esw_attr->dests[i].mdev)) + mlx5_esw_indir_table_put(esw, attr, esw_attr->dests[i].rep->vport, + false); +} + +static bool +esw_is_chain_src_port_rewrite(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr) +{ + int i; + + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + return true; + return false; +} + +static int +esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_fs_chains *chains, + struct mlx5_flow_attr *attr, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int j, err; + + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); + if (err) + goto err_setup_chain; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[j].pkt_reformat; + } + return 0; + +err_setup_chain: + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j); + return err; +} + +static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count); +} + +static bool +esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int i; + + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + esw_attr->dests[i].mdev)) + return true; + return false; +} + +static int +esw_setup_indir_table(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + bool ignore_flow_lvl, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int j, err; + + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { + if (ignore_flow_lvl) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, spec, + esw_attr->dests[j].rep->vport, false); + if (IS_ERR(dest[*i].ft)) { + err = PTR_ERR(dest[*i].ft); + goto err_indir_tbl_get; + } + } + + if (mlx5_esw_indir_table_decap_vport(attr)) { + err = esw_setup_decap_indir(esw, attr, spec); + if (err) + goto err_indir_tbl_get; + } + + return 0; + +err_indir_tbl_get: + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j); + return err; +} + +static void esw_cleanup_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count); + esw_cleanup_decap_indir(esw, attr); +} + +static void +esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level) +{ + mlx5_chains_put_table(chains, chain, prio, level); +} + +static void +esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int attr_idx, int dest_idx, bool pkt_reformat) +{ + dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; + dest[dest_idx].vport.vhca_id = + MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { + if (pkt_reformat) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; + } + dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + dest[dest_idx].vport.pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; + } +} + +static int +esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int i) +{ + int j; + + for (j = esw_attr->split_count; j < esw_attr->out_count; j++, i++) + esw_setup_vport_dest(dest, flow_act, esw, esw_attr, j, i, true); + return i; +} + +static int +esw_setup_dests(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + int err = 0; + + if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && + MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) && + mlx5_eswitch_vport_match_metadata_enabled(esw)) + attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE; + + if (attr->dest_ft) { + esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); + (*i)++; + } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { + esw_setup_slow_path_dest(dest, flow_act, chains, *i); + (*i)++; + } else if (attr->dest_chain) { + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, + 1, 0, *i); + (*i)++; + } else if (esw_is_indir_table(esw, attr)) { + err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i); + } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) { + err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i); + } else { + *i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i); + } + + return err; +} + +static void +esw_cleanup_dests(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + + if (attr->dest_ft) { + esw_cleanup_decap_indir(esw, attr); + } else if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) { + if (attr->dest_chain) + esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0); + else if (esw_is_indir_table(esw, attr)) + esw_cleanup_indir_table(esw, attr); + else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) + esw_cleanup_chain_src_port_rewrite(esw, attr); + } +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@ -308,7 +607,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr fwd_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; - int j, i = 0; + int i = 0; if (esw->mode != MLX5_ESWITCH_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); @@ -329,50 +628,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } } + mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr); + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - struct mlx5_flow_table *ft; - - if (attr->dest_ft) { - flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = attr->dest_ft; - i++; - } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { - flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = mlx5_chains_get_tc_end_ft(chains); - i++; - } else if (attr->dest_chain) { - flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; - ft = mlx5_chains_get_table(chains, attr->dest_chain, - 1, 0); - if (IS_ERR(ft)) { - rule = ERR_CAST(ft); - goto err_create_goto_table; - } - - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = ft; - i++; - } else { - for (j = esw_attr->split_count; j < esw_attr->out_count; j++) { - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = esw_attr->dests[j].rep->vport; - dest[i].vport.vhca_id = - MLX5_CAP_GEN(esw_attr->dests[j].mdev, vhca_id); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - dest[i].vport.flags |= - MLX5_FLOW_DEST_VPORT_VHCA_ID; - if (esw_attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) { - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_act.pkt_reformat = - esw_attr->dests[j].pkt_reformat; - dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.pkt_reformat = - esw_attr->dests[j].pkt_reformat; - } - i++; - } + int err; + + err = esw_setup_dests(dest, &flow_act, esw, attr, spec, &i); + if (err) { + rule = ERR_PTR(err); + goto err_create_goto_table; } } @@ -407,15 +671,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, fdb = attr->ft; if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT)) - mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr); + mlx5_eswitch_set_rule_source_port(esw, spec, attr, + esw_attr->in_mdev->priv.eswitch, + esw_attr->in_rep->vport); } if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; } - mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr); - if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr, &flow_act, dest, i); @@ -434,8 +698,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, else if (attr->chain || attr->prio) mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_esw_get: - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain) - mlx5_chains_put_table(chains, attr->dest_chain, 1, 0); + esw_cleanup_dests(esw, attr); err_create_goto_table: return rule; } @@ -453,7 +716,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_table *fast_fdb; struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; - int i; + int i, err = 0; fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0); if (IS_ERR(fast_fdb)) { @@ -472,22 +735,26 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; for (i = 0; i < esw_attr->split_count; i++) { - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = esw_attr->dests[i].rep->vport; - dest[i].vport.vhca_id = - MLX5_CAP_GEN(esw_attr->dests[i].mdev, vhca_id); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; - if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) { - dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.pkt_reformat = esw_attr->dests[i].pkt_reformat; + if (esw_is_indir_table(esw, attr)) + err = esw_setup_indir_table(dest, &flow_act, esw, attr, spec, false, &i); + else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) + err = esw_setup_chain_src_port_rewrite(dest, &flow_act, esw, chains, attr, + &i); + else + esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false); + + if (err) { + rule = ERR_PTR(err); + goto err_chain_src_rewrite; } } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = fwd_fdb; i++; - mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr); + mlx5_eswitch_set_rule_source_port(esw, spec, attr, + esw_attr->in_mdev->priv.eswitch, + esw_attr->in_rep->vport); if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; @@ -495,13 +762,16 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); - if (IS_ERR(rule)) - goto add_err; + if (IS_ERR(rule)) { + i = esw_attr->split_count; + goto err_chain_src_rewrite; + } atomic64_inc(&esw->offloads.num_flows); return rule; -add_err: +err_chain_src_rewrite: + esw_put_dest_tables_loop(esw, attr, 0, i); esw_vport_tbl_put(esw, &fwd_attr); err_get_fwd: mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); @@ -542,13 +812,13 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, if (fwd_rule) { esw_vport_tbl_put(esw, &fwd_attr); mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); + esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count); } else { if (split) esw_vport_tbl_put(esw, &fwd_attr); else if (attr->chain || attr->prio) mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); - if (attr->dest_chain) - mlx5_chains_put_table(chains, attr->dest_chain, 1, 0); + esw_cleanup_dests(esw, attr); } } @@ -810,6 +1080,81 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } +static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules; + int i = 0, num_vfs = esw->esw_funcs.num_vfs, vport_num; + + if (!num_vfs || !flows) + return; + + mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs) + mlx5_del_flow_rules(flows[i++]); + + kvfree(flows); +} + +static int +mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw) +{ + int num_vfs, vport_num, rule_idx = 0, err = 0; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_handle **flows; + struct mlx5_flow_spec *spec; + + num_vfs = esw->esw_funcs.num_vfs; + flows = kvzalloc(num_vfs * sizeof(*flows), GFP_KERNEL); + if (!flows) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto alloc_err; + } + + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_1, + ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs) { + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); + dest.vport.num = vport_num; + + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule idx %d, err %ld\n", + rule_idx, PTR_ERR(flow_rule)); + goto rule_err; + } + flows[rule_idx++] = flow_rule; + } + + esw->fdb_table.offloads.send_to_vport_meta_rules = flows; + kvfree(spec); + return 0; + +rule_err: + while (--rule_idx >= 0) + mlx5_del_flow_rules(flows[rule_idx]); + kvfree(spec); +alloc_err: + kvfree(flows); + return err; +} + static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw) { return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & @@ -1292,11 +1637,11 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = {}; + int num_vfs, table_size, ix, err = 0; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; u32 flags = 0, *flow_group_in; - int table_size, ix, err = 0; struct mlx5_flow_group *g; void *match_criteria; u8 *dmac; @@ -1322,7 +1667,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) } table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + - MLX5_ESW_MISS_FLOWS + esw->total_vports; + MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -1370,6 +1715,38 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) } esw->fdb_table.offloads.send_to_vport_grp = g; + /* meta send to vport */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + + num_vfs = esw->esw_funcs.num_vfs; + if (num_vfs) { + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + num_vfs - 1); + ix += num_vfs; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", + err); + goto send_vport_meta_err; + } + esw->fdb_table.offloads.send_to_vport_meta_grp = g; + + err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); + if (err) + goto meta_rule_err; + } + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { /* create peer esw miss group */ memset(flow_group_in, 0, inlen); @@ -1437,6 +1814,11 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); peer_miss_err: + mlx5_eswitch_del_send_to_vport_meta_rules(esw); +meta_rule_err: + if (esw->fdb_table.offloads.send_to_vport_meta_grp) + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp); +send_vport_meta_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: esw_chains_destroy(esw, esw_chains(esw)); @@ -1458,7 +1840,10 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); + mlx5_eswitch_del_send_to_vport_meta_rules(esw); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + if (esw->fdb_table.offloads.send_to_vport_meta_grp) + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp); if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); @@ -2182,12 +2567,20 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) static int esw_offloads_steering_init(struct mlx5_eswitch *esw) { + struct mlx5_esw_indir_table *indir; int err; memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.vports.lock); hash_init(esw->fdb_table.offloads.vports.table); + indir = mlx5_esw_indir_table_init(); + if (IS_ERR(indir)) { + err = PTR_ERR(indir); + goto create_indir_err; + } + esw->fdb_table.offloads.indir = indir; + err = esw_create_uplink_offloads_acl_tables(esw); if (err) goto create_acl_err; @@ -2219,6 +2612,8 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) create_offloads_err: esw_destroy_uplink_offloads_acl_tables(esw); create_acl_err: + mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); +create_indir_err: mutex_destroy(&esw->fdb_table.offloads.vports.lock); return err; } @@ -2230,6 +2625,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_restore_table(esw); esw_destroy_offloads_table(esw); esw_destroy_uplink_offloads_acl_tables(esw); + mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); mutex_destroy(&esw->fdb_table.offloads.vports.lock); } @@ -2867,3 +3263,94 @@ void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_devlink_sf_port_unregister(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); } + +static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + *vhca_id = 0; + if (mlx5_esw_is_manager_vport(esw, vport_num) || + !MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) + return -EPERM; + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); + +out_free: + kfree(query_ctx); + return err; +} + +int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) +{ + u16 *old_entry, *vhca_map_entry, vhca_id; + int err; + + err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + if (err) { + esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n", + vport_num, err); + return err; + } + + vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL); + if (!vhca_map_entry) + return -ENOMEM; + + *vhca_map_entry = vport_num; + old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL); + if (xa_is_err(old_entry)) { + kfree(vhca_map_entry); + return xa_err(old_entry); + } + kfree(old_entry); + return 0; +} + +void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num) +{ + u16 *vhca_map_entry, vhca_id; + int err; + + err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + if (err) + esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n", + vport_num, err); + + vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id); + kfree(vhca_map_entry); +} + +int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num) +{ + u16 *res = xa_load(&esw->offloads.vhca_map, vhca_id); + + if (!res) + return -ENOENT; + + *vport_num = *res; + return 0; +} + +u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, + u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) + return 0; + + return vport->metadata; +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 3754ef98554f418ed27dcb7d6421437e7545afcf..efe403c7e354278c6010f3d5e6fb36a0d6e43055 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -270,5 +270,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out); + void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index ba78e0660523c1114e8e0219b01b74e2516e6a32..e05c5c0f3ae1d0b3777a2e83c82edac82a253c7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1164,3 +1164,15 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_functions(dev); } EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports); + +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out) +{ + u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); + u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + MLX5_SET(query_hca_cap_in, in, function_id, function_id); + MLX5_SET(query_hca_cap_in, in, other_function, true); + return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); +} diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 29fd832950e0deed34af5ae4f36861a03188a8dc..994c2c8cb4fd74600a103fd7a0dd3efd3d0bc668 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -96,6 +96,35 @@ static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); +u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, + u16 vport_num); + +/* Reg C1 usage: + * Reg C1 = < ESW_TUN_ID(12) | ESW_TUN_OPTS(12) | ESW_ZONE_ID(8) > + * + * Highest 12 bits of reg c1 is the encapsulation tunnel id, next 12 bits is + * encapsulation tunnel options, and the lowest 8 bits are used for zone id. + * + * Zone id is used to restore CT flow when packet misses on chain. + * + * Tunnel id and options are used together to restore the tunnel info metadata + * on miss and to support inner header rewrite by means of implicit chain 0 + * flows. + */ +#define ESW_ZONE_ID_BITS 8 +#define ESW_TUN_OPTS_BITS 12 +#define ESW_TUN_ID_BITS 12 +#define ESW_TUN_OPTS_OFFSET ESW_ZONE_ID_BITS +#define ESW_TUN_OFFSET ESW_TUN_OPTS_OFFSET +#define ESW_ZONE_ID_MASK GENMASK(ESW_ZONE_ID_BITS - 1, 0) +#define ESW_TUN_OPTS_MASK GENMASK(32 - ESW_TUN_ID_BITS - 1, ESW_TUN_OPTS_OFFSET) +#define ESW_TUN_MASK GENMASK(31, ESW_TUN_OFFSET) +#define ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT 0 /* 0 is not a valid tunnel id */ +#define ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT 0xFFF /* 0xFFF is a reserved mapping */ +#define ESW_TUN_SLOW_TABLE_GOTO_VPORT ((ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT << ESW_TUN_OPTS_BITS) | \ + ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT) +#define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK + u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */