提交 6431ce6c 编写于 作者: D David S. Miller

Merge tag 'mlx5-updates-2022-05-17' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2022-05-17

MISC updates to mlx5 dirver

1) Aya Levin allows relaxed ordering over VFs

2) Gal Pressman Adds support XDP SQs for uplink representors in switchdev mode

3) Add debugfs TC stats and command failure syndrome for debuggability

4) Tariq uses variants of vzalloc where it could help

5) Multiport eswitch support from Elic Cohen:

Eli Cohen Says:
===============

The multiport eswitch feature allows to forward traffic from a
representor net device to the uplink port of an associated eswitch's
uplink port.

This feature requires creating a LAG object. Since LAG can be created
only once for a function, the feature is mutual exclusive with either
bonding or multipath.

Multipath eswitch mode is entered automatically these conditions are
met:
1. No other LAG related mode is active.
2. A rule that explicitly forwards to an uplink port is inserted.

The implementation maintains a reference count on such rules. When the
reference count reaches zero, the LAG is released and other modes may be
used.

When an explicit rule that explicitly forwards to an uplink port is
inserted while another LAG mode is active, that rule will not be
offloaded by the hardware since the hardware cannot guarantee that the
rule will actually be forwarded to that port.

Example rules that forwards to an uplink port is:

$ tc filter add dev rep0 root flower action mirred egress \
  redirect dev uplinkrep0

$ tc filter add dev rep0 root flower action mirred egress \
  redirect dev uplinkrep1

This feature is supported only if LAG_RESOURCE_ALLOCATION firmware
configuration parameter is set to true.

The series consists of three patches:
1. Lag state machine refactor
   This patch does not add new functionality but rather changes the way
   the state of the LAG is maintained.
2. Small fix to remove unused argument.
3. The actual implementation of the feature.
===============

====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -39,7 +39,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \
en_rep.o en/rep/bond.o en/mod_hdr.o \
en/mapping.o
en/mapping.o lag/mpesw.o
mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
lib/fs_chains.o en/tc_tun.o \
esw/indir_table.o en/tc_tun_encap.o \
......
......@@ -213,12 +213,6 @@ int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node)
}
EXPORT_SYMBOL_GPL(mlx5_db_alloc_node);
int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
{
return mlx5_db_alloc_node(dev, db, dev->priv.numa_node);
}
EXPORT_SYMBOL_GPL(mlx5_db_alloc);
void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
{
u32 db_per_page = PAGE_SIZE / cache_line_size();
......
......@@ -1887,7 +1887,8 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
return err;
}
static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int err)
static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
u32 syndrome, int err)
{
struct mlx5_cmd_stats *stats;
......@@ -1902,6 +1903,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int
if (err == -EREMOTEIO) {
stats->failed_mbox_status++;
stats->last_failed_mbox_status = status;
stats->last_failed_syndrome = syndrome;
}
spin_unlock_irq(&stats->lock);
}
......@@ -1909,6 +1911,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int
/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
{
u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
u8 status = MLX5_GET(mbox_out, out, status);
if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
......@@ -1917,7 +1920,7 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *
if (!err && status != MLX5_CMD_STAT_OK)
err = -EREMOTEIO;
cmd_status_log(dev, opcode, status, err);
cmd_status_log(dev, opcode, status, syndrome, err);
return err;
}
......
......@@ -192,6 +192,8 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
&stats->last_failed_errno);
debugfs_create_u8("last_failed_mbox_status", 0400, stats->root,
&stats->last_failed_mbox_status);
debugfs_create_x32("last_failed_syndrome", 0400, stats->root,
&stats->last_failed_syndrome);
}
}
}
......
......@@ -1220,6 +1220,7 @@ mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev)
MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe);
}
int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev);
int mlx5e_priv_init(struct mlx5e_priv *priv,
const struct mlx5e_profile *profile,
struct net_device *netdev,
......
......@@ -565,8 +565,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
MLX5_CAP_GEN(mdev, relaxed_ordering_write);
bool ro = MLX5_CAP_GEN(mdev, relaxed_ordering_write);
return ro && lro_en ?
MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
......
......@@ -10,6 +10,7 @@
#include "en/tc_tun_encap.h"
#include "en/tc_priv.h"
#include "en_rep.h"
#include "lag/lag.h"
static bool
same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev)
......@@ -215,6 +216,7 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
struct net_device *uplink_dev;
struct mlx5e_priv *out_priv;
struct mlx5_eswitch *esw;
bool is_uplink_rep;
int *ifindexes;
int if_count;
int err;
......@@ -229,6 +231,10 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
parse_state->ifindexes[if_count] = out_dev->ifindex;
parse_state->if_count++;
is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev);
err = mlx5_lag_do_mirred(priv->mdev, out_dev);
if (err)
return err;
out_dev = get_fdb_out_dev(uplink_dev, out_dev);
if (!out_dev)
......@@ -268,6 +274,14 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
rpriv = out_priv->ppriv;
esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
/* If output device is bond master then rules are not explicit
* so we don't attempt to count them.
*/
if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
attr->lag.count = true;
esw_attr->out_count++;
return 0;
......
......@@ -15,6 +15,7 @@
#include <linux/refcount.h>
#include <linux/xarray.h>
#include <linux/if_macvlan.h>
#include <linux/debugfs.h>
#include "lib/fs_chains.h"
#include "en/tc_ct.h"
......@@ -47,6 +48,15 @@
#define ct_dbg(fmt, args...)\
netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
struct mlx5_tc_ct_debugfs {
struct {
atomic_t offloaded;
atomic_t rx_dropped;
} stats;
struct dentry *root;
};
struct mlx5_tc_ct_priv {
struct mlx5_core_dev *dev;
const struct net_device *netdev;
......@@ -66,6 +76,8 @@ struct mlx5_tc_ct_priv {
struct mlx5_ct_fs *fs;
struct mlx5_ct_fs_ops *fs_ops;
spinlock_t ht_lock; /* protects ft entries */
struct mlx5_tc_ct_debugfs debugfs;
};
struct mlx5_ct_flow {
......@@ -520,6 +532,8 @@ mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
{
mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
atomic_dec(&ct_priv->debugfs.stats.offloaded);
}
static struct flow_action_entry *
......@@ -1040,6 +1054,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
if (err)
goto err_nat;
atomic_inc(&ct_priv->debugfs.stats.offloaded);
return 0;
err_nat:
......@@ -2064,6 +2079,29 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
return err;
}
static void
mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
{
bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB;
struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
char dirname[16] = {};
if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0)
return;
ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev));
debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
&ct_dbgfs->stats.offloaded);
debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
&ct_dbgfs->stats.rx_dropped);
}
static void
mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
{
debugfs_remove_recursive(ct_priv->debugfs.root);
}
#define INIT_ERR_PREFIX "tc ct offload init failed"
struct mlx5_tc_ct_priv *
......@@ -2139,6 +2177,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
if (err)
goto err_init_fs;
mlx5_ct_tc_create_dbgfs(ct_priv);
return ct_priv;
err_init_fs:
......@@ -2171,6 +2210,7 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
if (!ct_priv)
return;
mlx5_ct_tc_remove_dbgfs(ct_priv);
chains = ct_priv->chains;
ct_priv->fs_ops->destroy(ct_priv->fs);
......@@ -2200,22 +2240,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
return true;
if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
return false;
goto out_inc_drop;
if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
return false;
goto out_inc_drop;
spin_lock(&ct_priv->ht_lock);
entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
if (!entry) {
spin_unlock(&ct_priv->ht_lock);
return false;
goto out_inc_drop;
}
if (IS_ERR(entry)) {
spin_unlock(&ct_priv->ht_lock);
return false;
goto out_inc_drop;
}
spin_unlock(&ct_priv->ht_lock);
......@@ -2223,4 +2263,8 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
__mlx5_tc_ct_entry_put(entry);
return true;
out_inc_drop:
atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
return false;
}
......@@ -38,12 +38,11 @@
void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
{
bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev);
bool ro_write = MLX5_CAP_GEN(mdev, relaxed_ordering_write);
bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read);
MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_pci_enable && ro_read);
MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_pci_enable && ro_write);
MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_read);
MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_write);
}
static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
......
......@@ -155,7 +155,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
list_size = max_list_size;
}
vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
vlans = kvcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
if (!vlans)
return -ENOMEM;
......@@ -171,7 +171,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n",
err);
kfree(vlans);
kvfree(vlans);
return err;
}
......
......@@ -4812,6 +4812,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->vlan_features |= NETIF_F_TSO6;
netdev->vlan_features |= NETIF_F_RXCSUM;
netdev->vlan_features |= NETIF_F_RXHASH;
netdev->vlan_features |= NETIF_F_GSO_PARTIAL;
netdev->mpls_features |= NETIF_F_SG;
netdev->mpls_features |= NETIF_F_HW_CSUM;
......@@ -4877,7 +4878,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
NETIF_F_GSO_IPXIP6;
}
netdev->hw_features |= NETIF_F_GSO_PARTIAL;
netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4;
netdev->hw_features |= NETIF_F_GSO_UDP_L4;
netdev->features |= NETIF_F_GSO_UDP_L4;
......@@ -5221,6 +5221,15 @@ mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
return max_nch;
}
int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev)
{
/* Indirect TIRS: 2 sets of TTCs (inner + outer steering)
* and 1 set of direct TIRS
*/
return 2 * MLX5E_NUM_INDIR_TIRS
+ mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile);
}
/* mlx5e generic netdev management API (move to en_common.c) */
int mlx5e_priv_init(struct mlx5e_priv *priv,
const struct mlx5e_profile *profile,
......
......@@ -399,7 +399,9 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
{
int sqs_per_channel = mlx5e_get_dcb_num_tc(&priv->channels.params);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
bool is_uplink_rep = mlx5e_is_uplink_rep(priv);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
int n, tc, nch, num_sqs = 0;
......@@ -411,9 +413,13 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
ptp_sq = !!(priv->channels.ptp &&
MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS));
nch = priv->channels.num + ptp_sq;
/* +2 for xdpsqs, they don't exist on the ptp channel but will not be
* counted for by num_sqs.
*/
if (is_uplink_rep)
sqs_per_channel += 2;
sqs = kcalloc(nch * mlx5e_get_dcb_num_tc(&priv->channels.params), sizeof(*sqs),
GFP_KERNEL);
sqs = kvcalloc(nch * sqs_per_channel, sizeof(*sqs), GFP_KERNEL);
if (!sqs)
goto out;
......@@ -421,6 +427,13 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
c = priv->channels.c[n];
for (tc = 0; tc < c->num_tc; tc++)
sqs[num_sqs++] = c->sq[tc].sqn;
if (is_uplink_rep) {
if (c->xdp)
sqs[num_sqs++] = c->rq_xdpsq.sqn;
sqs[num_sqs++] = c->xdpsq.sqn;
}
}
if (ptp_sq) {
struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
......@@ -430,7 +443,7 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
}
err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs);
kfree(sqs);
kvfree(sqs);
out:
if (err)
......@@ -604,10 +617,16 @@ bool mlx5e_eswitch_vf_rep(const struct net_device *netdev)
return netdev->netdev_ops == &mlx5e_netdev_ops_rep;
}
/* One indirect TIR set for outer. Inner not supported in reps. */
#define REP_NUM_INDIR_TIRS MLX5E_NUM_INDIR_TIRS
static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev)
{
return (1 << MLX5_CAP_GEN(mdev, log_max_tir)) /
mlx5_eswitch_get_total_vports(mdev);
int max_tir_num = 1 << MLX5_CAP_GEN(mdev, log_max_tir);
int num_vports = mlx5_eswitch_get_total_vports(mdev);
return (max_tir_num - mlx5e_get_pf_num_tirs(mdev)
- (num_vports * REP_NUM_INDIR_TIRS)) / num_vports;
}
static void mlx5e_build_rep_params(struct net_device *netdev)
......@@ -1269,7 +1288,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
struct mlx5e_rep_priv *rpriv;
int err;
rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
rpriv = kvzalloc(sizeof(*rpriv), GFP_KERNEL);
if (!rpriv)
return -ENOMEM;
......@@ -1284,7 +1303,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
err = mlx5e_vport_vf_rep_load(dev, rep);
if (err)
kfree(rpriv);
kvfree(rpriv);
return err;
}
......@@ -1312,7 +1331,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
priv->profile->cleanup(priv);
mlx5e_destroy_netdev(priv);
free_ppriv:
kfree(ppriv); /* mlx5e_rep_priv */
kvfree(ppriv); /* mlx5e_rep_priv */
}
static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
......
......@@ -1740,6 +1740,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
free_flow_post_acts(flow);
if (flow->attr->lag.count)
mlx5_lag_del_mpesw_rule(esw->dev);
kvfree(attr->esw_attr->rx_tun_attr);
kvfree(attr->parse_attr);
kfree(flow->attr);
......@@ -3788,12 +3791,25 @@ static bool is_lag_dev(struct mlx5e_priv *priv,
same_hw_reps(priv, peer_netdev));
}
static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
{
if (mlx5e_eswitch_uplink_rep(out_dev) &&
MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
return true;
return false;
}
bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
struct net_device *out_dev)
{
if (is_merged_eswitch_vfs(priv, out_dev))
return true;
if (is_multiport_eligible(priv, out_dev))
return true;
if (is_lag_dev(priv, out_dev))
return true;
......@@ -4050,6 +4066,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_core_dev *in_mdev)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
......@@ -4085,17 +4102,26 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
if (flow->attr->lag.count) {
err = mlx5_lag_add_mpesw_rule(esw->dev);
if (err)
goto err_free;
}
err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
complete_all(&flow->init_done);
if (err) {
if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
goto err_free;
goto err_lag;
add_unready_flow(flow);
}
return flow;
err_lag:
if (flow->attr->lag.count)
mlx5_lag_del_mpesw_rule(esw->dev);
err_free:
mlx5e_flow_put(priv, flow);
out:
......
......@@ -85,6 +85,13 @@ struct mlx5_flow_attr {
u32 flags;
struct list_head list;
struct mlx5e_post_act_handle *post_act_handle;
struct {
/* Indicate whether the parsed flow should be counted for lag mode decision
* making
*/
bool count;
} lag;
/* keep this union last */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
......
......@@ -49,6 +49,7 @@
#include "en_tc.h"
#include "en/mapping.h"
#include "devlink.h"
#include "lag/lag.h"
#define mlx5_esw_for_each_rep(esw, i, rep) \
xa_for_each(&((esw)->offloads.vport_reps), i, rep)
......@@ -418,6 +419,8 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
dest[dest_idx].vport.vhca_id =
MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
if (mlx5_lag_mpesw_is_activated(esw->dev))
dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
}
if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
if (pkt_reformat) {
......
......@@ -221,7 +221,6 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev,
return 0;
}
#ifdef CONFIG_MLX5_EN_RXNFC
static u32 mlx5i_flow_type_mask(u32 flow_type)
{
return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
......@@ -243,9 +242,18 @@ static int mlx5i_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
{
struct mlx5e_priv *priv = mlx5i_epriv(dev);
/* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part
* of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc,
* to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc
* is compiled out via CONFIG_MLX5_EN_RXNFC=n.
*/
if (info->cmd == ETHTOOL_GRXRINGS) {
info->data = priv->channels.params.num_channels;
return 0;
}
return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs);
}
#endif
const struct ethtool_ops mlx5i_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
......@@ -263,10 +271,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
.get_coalesce = mlx5i_get_coalesce,
.set_coalesce = mlx5i_set_coalesce,
.get_ts_info = mlx5i_get_ts_info,
#ifdef CONFIG_MLX5_EN_RXNFC
.get_rxnfc = mlx5i_get_rxnfc,
.set_rxnfc = mlx5i_set_rxnfc,
#endif
.get_link_ksettings = mlx5i_get_link_ksettings,
.get_link = ethtool_op_get_link,
};
......
......@@ -5,12 +5,13 @@
static char *get_str_mode_type(struct mlx5_lag *ldev)
{
if (ldev->flags & MLX5_LAG_FLAG_ROCE)
return "roce";
if (ldev->flags & MLX5_LAG_FLAG_SRIOV)
return "switchdev";
if (ldev->flags & MLX5_LAG_FLAG_MULTIPATH)
return "multipath";
switch (ldev->mode) {
case MLX5_LAG_MODE_ROCE: return "roce";
case MLX5_LAG_MODE_SRIOV: return "switchdev";
case MLX5_LAG_MODE_MULTIPATH: return "multipath";
case MLX5_LAG_MODE_MPESW: return "multiport_eswitch";
default: return "invalid";
}
return NULL;
}
......@@ -43,11 +44,11 @@ static int port_sel_mode_show(struct seq_file *file, void *priv)
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
if (__mlx5_lag_is_active(ldev))
mode = get_str_port_sel_mode(ldev->flags);
mode = mlx5_get_str_port_sel_mode(ldev);
else
ret = -EINVAL;
mutex_unlock(&ldev->lock);
if (ret || !mode)
if (ret)
return ret;
seq_printf(file, "%s\n", mode);
......@@ -79,7 +80,7 @@ static int flags_show(struct seq_file *file, void *priv)
mutex_lock(&ldev->lock);
lag_active = __mlx5_lag_is_active(ldev);
if (lag_active)
shared_fdb = ldev->shared_fdb;
shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
mutex_unlock(&ldev->lock);
if (!lag_active)
......@@ -103,7 +104,7 @@ static int mapping_show(struct seq_file *file, void *priv)
mutex_lock(&ldev->lock);
lag_active = __mlx5_lag_is_active(ldev);
if (lag_active) {
if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) {
if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports,
&num_ports);
hash = true;
......
......@@ -41,6 +41,7 @@
#include "esw/acl/ofld.h"
#include "lag.h"
#include "mp.h"
#include "mpesw.h"
enum {
MLX5_LAG_EGRESS_PORT_1 = 1,
......@@ -53,21 +54,33 @@ enum {
*/
static DEFINE_SPINLOCK(lag_lock);
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, bool shared_fdb, u8 flags)
static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
{
if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
if (mode == MLX5_LAG_MODE_MPESW)
return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
}
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
unsigned long flags)
{
bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
int port_sel_mode = get_port_sel_mode(mode, flags);
u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
void *lag_ctx;
lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
if (!(flags & MLX5_LAG_FLAG_HASH_BASED)) {
if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) {
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
} else {
MLX5_SET(lagc, lag_ctx, port_select_mode,
MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT);
}
MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
return mlx5_cmd_exec_in(dev, create_lag, in);
}
......@@ -139,7 +152,7 @@ void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
struct mlx5_lag *ldev,
struct lag_tracker *tracker,
u8 flags)
unsigned long flags)
{
char buf[MLX5_MAX_PORTS * 10 + 1] = {};
u8 enabled_ports[MLX5_MAX_PORTS] = {};
......@@ -150,7 +163,7 @@ static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
int i;
int j;
if (flags & MLX5_LAG_FLAG_HASH_BASED) {
if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
&num_enabled);
for (i = 0; i < num_enabled; i++) {
......@@ -187,7 +200,8 @@ static void mlx5_ldev_free(struct kref *ref)
if (ldev->nb.notifier_call)
unregister_netdevice_notifier_net(&init_net, &ldev->nb);
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
mlx5_lag_mpesw_cleanup(ldev);
cancel_work_sync(&ldev->mpesw_work);
destroy_workqueue(ldev->wq);
mutex_destroy(&ldev->lock);
kfree(ldev);
......@@ -227,11 +241,14 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
ldev->nb.notifier_call = NULL;
mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
}
ldev->mode = MLX5_LAG_MODE_NONE;
err = mlx5_lag_mp_init(ldev);
if (err)
mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
err);
mlx5_lag_mpesw_init(ldev);
ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
ldev->buckets = 1;
......@@ -252,12 +269,12 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
{
return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
return ldev->mode == MLX5_LAG_MODE_ROCE;
}
static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
{
return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
return ldev->mode == MLX5_LAG_MODE_SRIOV;
}
/* Create a mapping between steering slots and active ports.
......@@ -372,7 +389,7 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED)
if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags))
return mlx5_lag_port_sel_modify(ldev, ports);
return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
}
......@@ -404,19 +421,19 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
memcpy(ldev->v2p_map, ports, sizeof(ports));
mlx5_lag_print_mapping(dev0, ldev, tracker,
ldev->flags);
ldev->mode_flags);
break;
}
}
if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
!(ldev->flags & MLX5_LAG_FLAG_ROCE))
!(ldev->mode == MLX5_LAG_MODE_ROCE))
mlx5_lag_drop_rule_setup(ldev, tracker);
}
#define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4
static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
struct lag_tracker *tracker, u8 *flags)
unsigned long *flags)
{
struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
......@@ -424,7 +441,7 @@ static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
/* Four ports are support only in hash mode */
if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table))
return -EINVAL;
*flags |= MLX5_LAG_FLAG_HASH_BASED;
set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
if (ldev->ports > 2)
ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
}
......@@ -432,49 +449,67 @@ static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
return 0;
}
static int mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
struct lag_tracker *tracker, u8 *flags)
static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
enum mlx5_lag_mode mode,
unsigned long *flags)
{
struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
if (mode == MLX5_LAG_MODE_MPESW)
return;
if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
*flags |= MLX5_LAG_FLAG_HASH_BASED;
return 0;
set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
}
static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
struct lag_tracker *tracker, u8 *flags)
static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
struct lag_tracker *tracker, bool shared_fdb,
unsigned long *flags)
{
bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE);
bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
*flags = 0;
if (shared_fdb)
set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
if (roce_lag)
return mlx5_lag_set_port_sel_mode_roce(ldev, tracker, flags);
return mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, flags);
return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
return 0;
}
char *get_str_port_sel_mode(u8 flags)
char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev)
{
if (flags & MLX5_LAG_FLAG_HASH_BASED)
return "hash";
return "queue_affinity";
int port_sel_mode = get_port_sel_mode(ldev->mode, ldev->mode_flags);
switch (port_sel_mode) {
case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
default: return "invalid";
}
}
static int mlx5_create_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
bool shared_fdb, u8 flags)
enum mlx5_lag_mode mode,
unsigned long flags)
{
bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
int err;
mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
if (tracker)
mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
shared_fdb, get_str_port_sel_mode(flags));
shared_fdb, mlx5_get_str_port_sel_mode(ldev));
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, shared_fdb, flags);
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
if (err) {
mlx5_core_err(dev0,
"Failed to create LAG (%d)\n",
......@@ -503,33 +538,35 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
u8 flags,
enum mlx5_lag_mode mode,
bool shared_fdb)
{
bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
unsigned long flags = 0;
int err;
err = mlx5_lag_set_port_sel_mode(ldev, tracker, &flags);
err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
if (err)
return err;
mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
if (flags & MLX5_LAG_FLAG_HASH_BASED) {
err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
ldev->v2p_map);
if (err) {
mlx5_core_err(dev0,
"Failed to create LAG port selection(%d)\n",
err);
return err;
if (mode != MLX5_LAG_MODE_MPESW) {
mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
ldev->v2p_map);
if (err) {
mlx5_core_err(dev0,
"Failed to create LAG port selection(%d)\n",
err);
return err;
}
}
}
err = mlx5_create_lag(ldev, tracker, shared_fdb, flags);
err = mlx5_create_lag(ldev, tracker, mode, flags);
if (err) {
if (flags & MLX5_LAG_FLAG_HASH_BASED)
if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
mlx5_lag_port_sel_destroy(ldev);
if (roce_lag)
mlx5_core_err(dev0,
......@@ -541,12 +578,12 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
return err;
}
if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
!roce_lag)
mlx5_lag_drop_rule_setup(ldev, tracker);
ldev->flags |= flags;
ldev->shared_fdb = shared_fdb;
ldev->mode = mode;
ldev->mode_flags = flags;
return 0;
}
......@@ -556,16 +593,17 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
bool roce_lag = __mlx5_lag_is_roce(ldev);
u8 flags = ldev->flags;
unsigned long flags = ldev->mode_flags;
int err;
ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
ldev->mode = MLX5_LAG_MODE_NONE;
ldev->mode_flags = 0;
mlx5_lag_mp_reset(ldev);
if (ldev->shared_fdb) {
if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
dev1->priv.eswitch);
ldev->shared_fdb = false;
clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
}
MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
......@@ -582,7 +620,7 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
return err;
}
if (flags & MLX5_LAG_FLAG_HASH_BASED)
if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
mlx5_lag_port_sel_destroy(ldev);
if (mlx5_lag_has_drop_rule(ldev))
mlx5_lag_drop_rule_cleanup(ldev);
......@@ -656,11 +694,11 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
}
}
static void mlx5_disable_lag(struct mlx5_lag *ldev)
void mlx5_disable_lag(struct mlx5_lag *ldev)
{
bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
bool shared_fdb = ldev->shared_fdb;
bool roce_lag;
int err;
int i;
......@@ -693,7 +731,7 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
}
}
static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
......@@ -729,6 +767,18 @@ static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
return roce_lag;
}
static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
{
return do_bond && __mlx5_lag_is_active(ldev) &&
ldev->mode != MLX5_LAG_MODE_MPESW;
}
static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
{
return !do_bond && __mlx5_lag_is_active(ldev) &&
ldev->mode != MLX5_LAG_MODE_MPESW;
}
static void mlx5_do_bond(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
......@@ -759,8 +809,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
mlx5_lag_remove_devices(ldev);
err = mlx5_activate_lag(ldev, &tracker,
roce_lag ? MLX5_LAG_FLAG_ROCE :
MLX5_LAG_FLAG_SRIOV,
roce_lag ? MLX5_LAG_MODE_ROCE :
MLX5_LAG_MODE_SRIOV,
shared_fdb);
if (err) {
if (shared_fdb || roce_lag)
......@@ -791,9 +841,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
return;
}
}
} else if (do_bond && __mlx5_lag_is_active(ldev)) {
} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
mlx5_modify_lag(ldev, &tracker);
} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
mlx5_disable_lag(ldev);
}
}
......@@ -831,7 +881,6 @@ static void mlx5_do_bond_work(struct work_struct *work)
static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
struct net_device *ndev,
struct netdev_notifier_changeupper_info *info)
{
struct net_device *upper = info->upper_dev, *ndev_tmp;
......@@ -968,6 +1017,7 @@ static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
return 1;
}
/* this handler is always registered to netdev events */
static int mlx5_lag_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
......@@ -987,8 +1037,7 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
switch (event) {
case NETDEV_CHANGEUPPER:
changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
ptr);
changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
break;
case NETDEV_CHANGELOWERSTATE:
changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
......@@ -1156,7 +1205,7 @@ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
mutex_lock(&ldev->lock);
mlx5_ldev_remove_netdev(ldev, netdev);
ldev->flags &= ~MLX5_LAG_FLAG_READY;
clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
lag_is_active = __mlx5_lag_is_active(ldev);
mutex_unlock(&ldev->lock);
......@@ -1183,7 +1232,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
break;
if (i >= ldev->ports)
ldev->flags |= MLX5_LAG_FLAG_READY;
set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
mutex_unlock(&ldev->lock);
mlx5_queue_bond_work(ldev, 0);
}
......@@ -1252,7 +1301,8 @@ bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
spin_lock(&lag_lock);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
res = ldev && __mlx5_lag_is_sriov(ldev) &&
test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
spin_unlock(&lag_lock);
return res;
......
......@@ -10,6 +10,7 @@
#include "mlx5_core.h"
#include "mp.h"
#include "port_sel.h"
#include "mpesw.h"
enum {
MLX5_LAG_P1,
......@@ -17,16 +18,21 @@ enum {
};
enum {
MLX5_LAG_FLAG_ROCE = 1 << 0,
MLX5_LAG_FLAG_SRIOV = 1 << 1,
MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
MLX5_LAG_FLAG_READY = 1 << 3,
MLX5_LAG_FLAG_HASH_BASED = 1 << 4,
MLX5_LAG_FLAG_NDEVS_READY,
};
#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\
MLX5_LAG_FLAG_MULTIPATH | \
MLX5_LAG_FLAG_HASH_BASED)
enum {
MLX5_LAG_MODE_FLAG_HASH_BASED,
MLX5_LAG_MODE_FLAG_SHARED_FDB,
};
enum mlx5_lag_mode {
MLX5_LAG_MODE_NONE,
MLX5_LAG_MODE_ROCE,
MLX5_LAG_MODE_SRIOV,
MLX5_LAG_MODE_MULTIPATH,
MLX5_LAG_MODE_MPESW,
};
struct lag_func {
struct mlx5_core_dev *dev;
......@@ -47,22 +53,25 @@ struct lag_tracker {
* It serves both its phys functions.
*/
struct mlx5_lag {
u8 flags;
enum mlx5_lag_mode mode;
unsigned long mode_flags;
unsigned long state_flags;
u8 ports;
u8 buckets;
int mode_changes_in_progress;
bool shared_fdb;
u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
struct kref ref;
struct lag_func pf[MLX5_MAX_PORTS];
struct lag_tracker tracker;
struct workqueue_struct *wq;
struct delayed_work bond_work;
struct work_struct mpesw_work;
struct notifier_block nb;
struct lag_mp lag_mp;
struct mlx5_lag_port_sel port_sel;
/* Protect lag fields/state changes */
struct mutex lock;
struct lag_mpesw lag_mpesw;
};
static inline struct mlx5_lag *
......@@ -74,29 +83,33 @@ mlx5_lag_dev(struct mlx5_core_dev *dev)
static inline bool
__mlx5_lag_is_active(struct mlx5_lag *ldev)
{
return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
return ldev->mode != MLX5_LAG_MODE_NONE;
}
static inline bool
mlx5_lag_is_ready(struct mlx5_lag *ldev)
{
return ldev->flags & MLX5_LAG_FLAG_READY;
return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
}
void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker);
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
u8 flags,
enum mlx5_lag_mode mode,
bool shared_fdb);
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev);
bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev);
void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev);
int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev);
char *get_str_port_sel_mode(u8 flags);
char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev);
void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
u8 *ports, int *num_enabled);
void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev);
void mlx5_ldev_remove_debugfs(struct dentry *dbg);
void mlx5_disable_lag(struct mlx5_lag *ldev);
#endif /* __MLX5_LAG_H__ */
......@@ -11,7 +11,7 @@
static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
{
return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
}
static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
......@@ -179,7 +179,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
struct lag_tracker tracker;
tracker = ldev->tracker;
mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
}
mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#include <linux/netdevice.h>
#include <net/nexthop.h>
#include "lag/lag.h"
#include "eswitch.h"
#include "lib/mlx5.h"
void mlx5_mpesw_work(struct work_struct *work)
{
struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work);
mutex_lock(&ldev->lock);
mlx5_disable_lag(ldev);
mutex_unlock(&ldev->lock);
}
static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = dev->priv.lag;
if (!queue_work(ldev->wq, &ldev->mpesw_work))
mlx5_core_warn(dev, "failed to queue work\n");
}
void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = dev->priv.lag;
if (!ldev)
return;
mutex_lock(&ldev->lock);
if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) &&
ldev->mode == MLX5_LAG_MODE_MPESW)
mlx5_lag_disable_mpesw(dev);
mutex_unlock(&ldev->lock);
}
int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = dev->priv.lag;
bool shared_fdb;
int err = 0;
if (!ldev)
return 0;
mutex_lock(&ldev->lock);
if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
goto out;
if (ldev->mode != MLX5_LAG_MODE_NONE) {
err = -EINVAL;
goto out;
}
shared_fdb = mlx5_shared_fdb_supported(ldev);
err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, shared_fdb);
if (err)
mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
out:
mutex_unlock(&ldev->lock);
return err;
}
int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev)
{
struct mlx5_lag *ldev = mdev->priv.lag;
if (!netif_is_bond_master(out_dev) || !ldev)
return 0;
mutex_lock(&ldev->lock);
if (ldev->mode == MLX5_LAG_MODE_MPESW) {
mutex_unlock(&ldev->lock);
return -EOPNOTSUPP;
}
mutex_unlock(&ldev->lock);
return 0;
}
bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev)
{
bool ret;
ret = dev->priv.lag && dev->priv.lag->mode == MLX5_LAG_MODE_MPESW;
return ret;
}
void mlx5_lag_mpesw_init(struct mlx5_lag *ldev)
{
INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work);
atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0);
}
void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev)
{
cancel_delayed_work_sync(&ldev->bond_work);
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#ifndef __MLX5_LAG_MPESW_H__
#define __MLX5_LAG_MPESW_H__
#include "lag.h"
#include "mlx5_core.h"
struct lag_mpesw {
struct work_struct mpesw_work;
atomic_t mpesw_rule_count;
};
void mlx5_mpesw_work(struct work_struct *work);
int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev);
bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev);
#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
void mlx5_lag_mpesw_init(struct mlx5_lag *ldev);
void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev);
#else
void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) {}
void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) {}
#endif
#endif /* __MLX5_LAG_MPESW_H__ */
......@@ -1886,7 +1886,6 @@ static struct pci_driver mlx5_core_driver = {
* Return: Pointer to the associated mlx5_core_dev or NULL.
*/
struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev)
__acquires(&mdev->intf_state_mutex)
{
struct mlx5_core_dev *mdev;
......@@ -1912,7 +1911,6 @@ EXPORT_SYMBOL(mlx5_vf_get_core_dev);
* access the mdev any more.
*/
void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev)
__releases(&mdev->intf_state_mutex)
{
mutex_unlock(&mdev->intf_state_mutex);
}
......
......@@ -94,8 +94,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
if (msix_vec_count > max_msix)
return -EOVERFLOW;
query_cap = kzalloc(query_sz, GFP_KERNEL);
hca_cap = kzalloc(set_sz, GFP_KERNEL);
query_cap = kvzalloc(query_sz, GFP_KERNEL);
hca_cap = kvzalloc(set_sz, GFP_KERNEL);
if (!hca_cap || !query_cap) {
ret = -ENOMEM;
goto out;
......@@ -118,8 +118,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
out:
kfree(hca_cap);
kfree(query_cap);
kvfree(hca_cap);
kvfree(query_cap);
return ret;
}
......
......@@ -280,7 +280,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_in) +
req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
out = kzalloc(out_sz, GFP_KERNEL);
out = kvzalloc(out_sz, GFP_KERNEL);
if (!out)
return -ENOMEM;
......@@ -307,7 +307,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
ether_addr_copy(addr_list[i], mac_addr);
}
out:
kfree(out);
kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list);
......@@ -335,7 +335,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
in = kzalloc(in_sz, GFP_KERNEL);
in = kvzalloc(in_sz, GFP_KERNEL);
if (!in)
return -ENOMEM;
......@@ -360,7 +360,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
}
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
kfree(in);
kvfree(in);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
......@@ -386,7 +386,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
list_size * MLX5_ST_SZ_BYTES(vlan_layout);
memset(out, 0, sizeof(out));
in = kzalloc(in_sz, GFP_KERNEL);
in = kvzalloc(in_sz, GFP_KERNEL);
if (!in)
return -ENOMEM;
......@@ -411,7 +411,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
}
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
kfree(in);
kvfree(in);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
......@@ -542,8 +542,8 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
out_sz += nout * sizeof(*gid);
in = kzalloc(in_sz, GFP_KERNEL);
out = kzalloc(out_sz, GFP_KERNEL);
in = kvzalloc(in_sz, GFP_KERNEL);
out = kvzalloc(out_sz, GFP_KERNEL);
if (!in || !out) {
err = -ENOMEM;
goto out;
......@@ -573,8 +573,8 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
gid->global.interface_id = tmp->global.interface_id;
out:
kfree(in);
kfree(out);
kvfree(in);
kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid);
......@@ -607,8 +607,8 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
out_sz += nout * MLX5_ST_SZ_BYTES(pkey);
in = kzalloc(in_sz, GFP_KERNEL);
out = kzalloc(out_sz, GFP_KERNEL);
in = kvzalloc(in_sz, GFP_KERNEL);
out = kvzalloc(out_sz, GFP_KERNEL);
if (!in || !out) {
err = -ENOMEM;
goto out;
......@@ -638,8 +638,8 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
*pkey = MLX5_GET_PR(pkey, pkarr, pkey);
out:
kfree(in);
kfree(out);
kvfree(in);
kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey);
......@@ -658,7 +658,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
out = kzalloc(out_sz, GFP_KERNEL);
out = kvzalloc(out_sz, GFP_KERNEL);
if (!out)
return -ENOMEM;
......@@ -717,7 +717,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
system_image_guid);
ex:
kfree(out);
kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context);
......@@ -728,7 +728,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
struct mlx5_hca_vport_context *rep;
int err;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
rep = kvzalloc(sizeof(*rep), GFP_KERNEL);
if (!rep)
return -ENOMEM;
......@@ -736,7 +736,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
if (!err)
*sys_image_guid = rep->sys_image_guid;
kfree(rep);
kvfree(rep);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid);
......@@ -747,7 +747,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
struct mlx5_hca_vport_context *rep;
int err;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
rep = kvzalloc(sizeof(*rep), GFP_KERNEL);
if (!rep)
return -ENOMEM;
......@@ -755,7 +755,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
if (!err)
*node_guid = rep->node_guid;
kfree(rep);
kvfree(rep);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
......@@ -770,7 +770,7 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
int err;
out = kzalloc(outlen, GFP_KERNEL);
out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
......@@ -786,7 +786,7 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
nic_vport_context.promisc_all);
out:
kfree(out);
kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc);
......@@ -874,7 +874,7 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
int value;
int err;
out = kzalloc(outlen, GFP_KERNEL);
out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
......@@ -891,7 +891,7 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
*status = !value;
out:
kfree(out);
kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb);
......@@ -1033,7 +1033,7 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
mlx5_core_dbg(dev, "vf %d\n", vf);
is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
in = kzalloc(in_sz, GFP_KERNEL);
in = kvzalloc(in_sz, GFP_KERNEL);
if (!in)
return -ENOMEM;
......@@ -1065,7 +1065,7 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
req->cap_mask1_perm);
err = mlx5_cmd_exec_in(dev, modify_hca_vport_context, in);
ex:
kfree(in);
kvfree(in);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
......
......@@ -272,6 +272,8 @@ struct mlx5_cmd_stats {
u32 last_failed_errno;
/* last bad status returned by FW */
u8 last_failed_mbox_status;
/* last command failed syndrome returned by FW */
u32 last_failed_syndrome;
struct dentry *root;
/* protect command average calculations */
spinlock_t lock;
......@@ -1051,9 +1053,14 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
int size_in, void *data_out, int size_out,
u16 reg_num, int arg, int write);
int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db,
int node);
static inline int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
{
return mlx5_db_alloc_node(dev, db, dev->priv.numa_node);
}
void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
const char *mlx5_command_str(int command);
......
......@@ -1359,7 +1359,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 vhca_resource_manager[0x1];
u8 hca_cap_2[0x1];
u8 reserved_at_21[0x1];
u8 create_lag_when_not_master_up[0x1];
u8 dtor[0x1];
u8 event_on_vhca_state_teardown_request[0x1];
u8 event_on_vhca_state_in_use[0x1];
......@@ -10816,7 +10816,8 @@ struct mlx5_ifc_dcbx_param_bits {
enum {
MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY = 0,
MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT,
MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT = 1,
MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW = 2,
};
struct mlx5_ifc_lagc_bits {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册