提交 5c65c564 编写于 作者: O Or Gerlitz 提交者: Saeed Mahameed

net/mlx5e: Support offloading TC NIC hairpin flows

We refer to TC NIC rule that involves forwarding as "hairpin".

All hairpin rules from the current NIC device (called "func" in
the code) to a given NIC device ("peer") are steered into the
same hairpin RQ/SQ pair.

The hairpin pair is set on demand and removed when there are no
TC rules that need it.

Here's a TC rule that matches on icmp, does header re-write of the
dst mac and hairpin from RX/enp1s2f1 to TX/enp1s2f2 (enp1s2f1/2 are
two mlx5 devices):

tc filter add dev enp1s2f1 protocol ip parent ffff: prio 2
    flower skip_sw ip_proto icmp
     action pedit ex munge eth dst set 10:22:33:44:55:66 pipe
     action mirred egress redirect dev enp1s2f2
Signed-off-by: NOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: NSaeed Mahameed <saeedm@mellanox.com>
上级 77ab67b7
......@@ -659,6 +659,7 @@ struct mlx5e_tc_table {
struct rhashtable ht;
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
DECLARE_HASHTABLE(hairpin_tbl, 8);
};
struct mlx5e_vlan_table {
......
......@@ -56,12 +56,14 @@ struct mlx5_nic_flow_attr {
u32 action;
u32 flow_tag;
u32 mod_hdr_id;
u32 hairpin_tirn;
};
enum {
MLX5E_TC_FLOW_ESWITCH = BIT(0),
MLX5E_TC_FLOW_NIC = BIT(1),
MLX5E_TC_FLOW_OFFLOADED = BIT(2),
MLX5E_TC_FLOW_HAIRPIN = BIT(3),
};
struct mlx5e_tc_flow {
......@@ -71,6 +73,7 @@ struct mlx5e_tc_flow {
struct mlx5_flow_handle *rule;
struct list_head encap; /* flows sharing the same encap ID */
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
struct list_head hairpin; /* flows sharing the same hairpin */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
......@@ -101,6 +104,17 @@ struct mlx5e_hairpin {
u32 tirn;
};
struct mlx5e_hairpin_entry {
/* a node of a hash table which keeps all the hairpin entries */
struct hlist_node hairpin_hlist;
/* flows sharing the same hairpin */
struct list_head flows;
int peer_ifindex;
struct mlx5e_hairpin *hp;
};
struct mod_hdr_key {
int num_actions;
void *actions;
......@@ -319,6 +333,98 @@ static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
kvfree(hp);
}
static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
int peer_ifindex)
{
struct mlx5e_hairpin_entry *hpe;
hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
hairpin_hlist, peer_ifindex) {
if (hpe->peer_ifindex == peer_ifindex)
return hpe;
}
return NULL;
}
static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5e_tc_flow_parse_attr *parse_attr)
{
int peer_ifindex = parse_attr->mirred_ifindex;
struct mlx5_hairpin_params params;
struct mlx5e_hairpin_entry *hpe;
struct mlx5e_hairpin *hp;
int err;
if (!MLX5_CAP_GEN(priv->mdev, hairpin)) {
netdev_warn(priv->netdev, "hairpin is not supported\n");
return -EOPNOTSUPP;
}
hpe = mlx5e_hairpin_get(priv, peer_ifindex);
if (hpe)
goto attach_flow;
hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
if (!hpe)
return -ENOMEM;
INIT_LIST_HEAD(&hpe->flows);
hpe->peer_ifindex = peer_ifindex;
params.log_data_size = 15;
params.log_data_size = min_t(u8, params.log_data_size,
MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
params.log_data_size = max_t(u8, params.log_data_size,
MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
params.q_counter = priv->q_counter;
hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
if (IS_ERR(hp)) {
err = PTR_ERR(hp);
goto create_hairpin_err;
}
netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x log data size %d\n",
hp->tirn, hp->pair->rqn, hp->pair->peer_mdev->priv.name,
hp->pair->sqn, params.log_data_size);
hpe->hp = hp;
hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, peer_ifindex);
attach_flow:
flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
list_add(&flow->hairpin, &hpe->flows);
return 0;
create_hairpin_err:
kfree(hpe);
return err;
}
static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
struct list_head *next = flow->hairpin.next;
list_del(&flow->hairpin);
/* no more hairpin flows for us, release the hairpin pair */
if (list_empty(next)) {
struct mlx5e_hairpin_entry *hpe;
hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
hpe->hp->pair->peer_mdev->priv.name);
mlx5e_hairpin_destroy(hpe->hp);
hash_del(&hpe->hairpin_hlist);
kfree(hpe);
}
}
static struct mlx5_flow_handle *
mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
......@@ -326,7 +432,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
{
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {
.action = attr->action,
.flow_tag = attr->flow_tag,
......@@ -335,18 +441,33 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5_fc *counter = NULL;
struct mlx5_flow_handle *rule;
bool table_created = false;
int err;
int err, dest_ix = 0;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = priv->fs.vlan.ft.t;
} else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(dev, true);
if (IS_ERR(counter))
return ERR_CAST(counter);
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
if (err) {
rule = ERR_PTR(err);
goto err_add_hairpin_flow;
}
dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
dest[dest_ix].tir_num = attr->hairpin_tirn;
} else {
dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[dest_ix].ft = priv->fs.vlan.ft.t;
}
dest_ix++;
}
dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dest.counter = counter;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(dev, true);
if (IS_ERR(counter)) {
rule = ERR_CAST(counter);
goto err_fc_create;
}
dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dest[dest_ix].counter = counter;
dest_ix++;
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
......@@ -389,7 +510,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
&flow_act, &dest, 1);
&flow_act, dest, dest_ix);
if (IS_ERR(rule))
goto err_add_rule;
......@@ -406,7 +527,10 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
mlx5e_detach_mod_hdr(priv, flow);
err_create_mod_hdr_id:
mlx5_fc_destroy(dev, counter);
err_fc_create:
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
mlx5e_hairpin_flow_del(priv, flow);
err_add_hairpin_flow:
return rule;
}
......@@ -427,6 +551,9 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
mlx5e_hairpin_flow_del(priv, flow);
}
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
......@@ -1519,6 +1646,20 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
return true;
}
static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
{
struct mlx5_core_dev *fmdev, *pmdev;
u16 func_id, peer_id;
fmdev = priv->mdev;
pmdev = peer_priv->mdev;
func_id = (u16)((fmdev->pdev->bus->number << 8) | PCI_SLOT(fmdev->pdev->devfn));
peer_id = (u16)((pmdev->pdev->bus->number << 8) | PCI_SLOT(pmdev->pdev->devfn));
return (func_id == peer_id);
}
static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow)
......@@ -1563,6 +1704,23 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return -EOPNOTSUPP;
}
if (is_tcf_mirred_egress_redirect(a)) {
struct net_device *peer_dev = tcf_mirred_dev(a);
if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
same_hw_devs(priv, netdev_priv(peer_dev))) {
parse_attr->mirred_ifindex = peer_dev->ifindex;
flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
} else {
netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
peer_dev->name);
return -EINVAL;
}
continue;
}
if (is_tcf_skbedit_mark(a)) {
u32 mark = tcf_skbedit_mark(a);
......@@ -2285,6 +2443,7 @@ int mlx5e_tc_init(struct mlx5e_priv *priv)
struct mlx5e_tc_table *tc = &priv->fs.tc;
hash_init(tc->mod_hdr_tbl);
hash_init(tc->hairpin_tbl);
tc->ht_params = mlx5e_tc_flow_ht_params;
return rhashtable_init(&tc->ht, &tc->ht_params);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册