提交 93698321 编写于 作者: D David S. Miller

Merge tag 'mlx5e-updates-2018-12-10' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed:

====================
mlx5e-updates-2018-12-10 (gre)

This patch set adds GRE offloading support to Mellanox ethernet driver.

Patches 1-5 replace the existing egdev mechanism with the new TC indirect
block binds mechanism that was introduced by Netronome:
7f76fa36 ("net: sched: register callbacks for indirect tc block binds")

Patches 6-9 add GRE offloading support along with some required
refactoring work.

Patch 10, Add netif_is_gretap()/netif_is_ip6gretap()
 - Changed the is_gretap_dev and is_ip6gretap_dev logic from structure
   comparison to string comparison of the rtnl_link_ops kind field.

Patch 11, add GRE offloading support to mlx5.

Patch 12 removes the egdev mechanism from TC as it is no longer used by
any of the drivers.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -647,8 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
flags, local_page_list, NULL, NULL);
up_read(&owning_mm->mmap_sem);
if (npages < 0)
if (npages < 0) {
if (npages != -EAGAIN)
pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
else
pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
break;
}
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
mutex_lock(&umem_odp->umem_mutex);
......@@ -666,8 +671,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
ret = ib_umem_odp_map_dma_single_page(
umem_odp, k, local_page_list[j],
access_mask, current_seq);
if (ret < 0)
if (ret < 0) {
if (ret != -EAGAIN)
pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
else
pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
break;
}
p = page_to_phys(local_page_list[j]);
k++;
......
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \
cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
......
......@@ -35,6 +35,7 @@
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
#include "mlx5_ib.h"
#include "srq.h"
static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
{
......@@ -81,7 +82,7 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
!((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
return cqe;
} else {
......@@ -177,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_core_srq *msrq = NULL;
if (qp->ibqp.xrcd) {
msrq = mlx5_core_get_srq(dev->mdev,
be32_to_cpu(cqe->srqn));
msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
srq = to_mibsrq(msrq);
} else {
srq = to_msrq(qp->ibqp.srq);
......@@ -197,7 +197,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
}
wc->byte_len = be32_to_cpu(cqe->byte_cnt);
switch (cqe->op_own >> 4) {
switch (get_cqe_opcode(cqe)) {
case MLX5_CQE_RESP_WR_IMM:
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
wc->wc_flags = IB_WC_WITH_IMM;
......@@ -537,7 +537,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
*/
rmb();
opcode = cqe64->op_own >> 4;
opcode = get_cqe_opcode(cqe64);
if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
if (likely(cq->resize_buf)) {
free_cq_buf(dev, &cq->buf);
......@@ -1295,7 +1295,7 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
return -EINVAL;
}
while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) {
dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
(i + 1) & cq->resize_buf->nent);
dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
......
......@@ -4,6 +4,7 @@
*/
#include "ib_rep.h"
#include "srq.h"
static const struct mlx5_ib_profile rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
......@@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_rep_roce_init,
mlx5_ib_stage_rep_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_SRQ,
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
......
......@@ -60,6 +60,7 @@
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
#include "srq.h"
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
......@@ -82,10 +83,13 @@ static char mlx5_version[] =
struct mlx5_ib_event_work {
struct work_struct work;
struct mlx5_core_dev *dev;
void *context;
enum mlx5_dev_event event;
unsigned long param;
union {
struct mlx5_ib_dev *dev;
struct mlx5_ib_multiport_info *mpi;
};
bool is_slave;
unsigned int event;
void *param;
};
enum {
......@@ -2669,11 +2673,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
ntohs(ib_spec->gre.val.protocol));
memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
gre_key_h),
gre_key.nvgre.hi),
&ib_spec->gre.mask.key,
sizeof(ib_spec->gre.mask.key));
memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
gre_key_h),
gre_key.nvgre.hi),
&ib_spec->gre.val.key,
sizeof(ib_spec->gre.val.key));
break;
......@@ -4226,6 +4230,63 @@ static void delay_drop_handler(struct work_struct *work)
mutex_unlock(&delay_drop->lock);
}
static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
struct ib_event *ibev)
{
switch (eqe->sub_type) {
case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
schedule_work(&ibdev->delay_drop.delay_drop_work);
break;
default: /* do nothing */
return;
}
}
static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
struct ib_event *ibev)
{
u8 port = (eqe->data.port.port >> 4) & 0xf;
ibev->element.port_num = port;
switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
/* In RoCE, port up/down events are handled in
* mlx5_netdev_event().
*/
if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET)
return -EINVAL;
ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ?
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
break;
case MLX5_PORT_CHANGE_SUBTYPE_LID:
ibev->event = IB_EVENT_LID_CHANGE;
break;
case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
ibev->event = IB_EVENT_PKEY_CHANGE;
schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
break;
case MLX5_PORT_CHANGE_SUBTYPE_GUID:
ibev->event = IB_EVENT_GID_CHANGE;
break;
case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
ibev->event = IB_EVENT_CLIENT_REREGISTER;
break;
default:
return -EINVAL;
}
return 0;
}
static void mlx5_ib_handle_event(struct work_struct *_work)
{
struct mlx5_ib_event_work *work =
......@@ -4233,65 +4294,37 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
struct mlx5_ib_dev *ibdev;
struct ib_event ibev;
bool fatal = false;
u8 port = (u8)work->param;
if (mlx5_core_is_mp_slave(work->dev)) {
ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
if (work->is_slave) {
ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
if (!ibdev)
goto out;
} else {
ibdev = work->context;
ibdev = work->dev;
}
switch (work->event) {
case MLX5_DEV_EVENT_SYS_ERROR:
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx5_ib_handle_internal_error(ibdev);
ibev.element.port_num = (u8)(unsigned long)work->param;
fatal = true;
break;
case MLX5_DEV_EVENT_PORT_UP:
case MLX5_DEV_EVENT_PORT_DOWN:
case MLX5_DEV_EVENT_PORT_INITIALIZED:
/* In RoCE, port up/down events are handled in
* mlx5_netdev_event().
*/
if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET)
case MLX5_EVENT_TYPE_PORT_CHANGE:
if (handle_port_change(ibdev, work->param, &ibev))
goto out;
ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ?
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
break;
case MLX5_DEV_EVENT_LID_CHANGE:
ibev.event = IB_EVENT_LID_CHANGE;
break;
case MLX5_DEV_EVENT_PKEY_CHANGE:
ibev.event = IB_EVENT_PKEY_CHANGE;
schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
break;
case MLX5_DEV_EVENT_GUID_CHANGE:
ibev.event = IB_EVENT_GID_CHANGE;
break;
case MLX5_DEV_EVENT_CLIENT_REREG:
ibev.event = IB_EVENT_CLIENT_REREGISTER;
break;
case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
schedule_work(&ibdev->delay_drop.delay_drop_work);
goto out;
case MLX5_EVENT_TYPE_GENERAL_EVENT:
handle_general_event(ibdev, work->param, &ibev);
/* fall through */
default:
goto out;
}
ibev.device = &ibdev->ib_dev;
ibev.element.port_num = port;
ibev.device = &ibdev->ib_dev;
if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num);
goto out;
}
......@@ -4304,22 +4337,43 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
kfree(work);
}
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
static int mlx5_ib_event(struct notifier_block *nb,
unsigned long event, void *param)
{
struct mlx5_ib_event_work *work;
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return;
return NOTIFY_DONE;
INIT_WORK(&work->work, mlx5_ib_handle_event);
work->dev = dev;
work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events);
work->is_slave = false;
work->param = param;
work->context = context;
work->event = event;
queue_work(mlx5_ib_event_wq, &work->work);
return NOTIFY_OK;
}
static int mlx5_ib_event_slave_port(struct notifier_block *nb,
unsigned long event, void *param)
{
struct mlx5_ib_event_work *work;
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return NOTIFY_DONE;
INIT_WORK(&work->work, mlx5_ib_handle_event);
work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events);
work->is_slave = true;
work->param = param;
work->event = event;
queue_work(mlx5_ib_event_wq, &work->work);
return NOTIFY_OK;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
......@@ -5330,7 +5384,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector);
}
/* The mlx5_ib_multiport_mutex should be held when calling this function */
......@@ -5350,6 +5404,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
spin_unlock(&port->mp.mpi_lock);
return;
}
if (mpi->mdev_events.notifier_call)
mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
mpi->mdev_events.notifier_call = NULL;
mpi->ibdev = NULL;
spin_unlock(&port->mp.mpi_lock);
......@@ -5405,6 +5464,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
ibdev->port[port_num].mp.mpi = mpi;
mpi->ibdev = ibdev;
mpi->mdev_events.notifier_call = NULL;
spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
......@@ -5422,6 +5482,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
goto unbind;
}
mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
if (err)
goto unbind;
......@@ -5694,8 +5757,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
dev->ib_dev.dev.parent = &mdev->pdev->dev;
mutex_init(&dev->cap_mask_mutex);
......@@ -6034,6 +6096,11 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
return mlx5_ib_odp_init_one(dev);
}
void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_odp_cleanup_one(dev);
}
int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
......@@ -6152,6 +6219,34 @@ static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
mlx5_ib_unregister_vport_reps(dev);
}
static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
{
dev->mdev_events.notifier_call = mlx5_ib_event;
mlx5_notifier_register(dev->mdev, &dev->mdev_events);
return 0;
}
static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
}
static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
{
int uid;
uid = mlx5_ib_devx_create(dev);
if (uid > 0)
dev->devx_whitelist_uid = uid;
return 0;
}
static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
{
if (dev->devx_whitelist_uid)
mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
}
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
......@@ -6163,8 +6258,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
profile->stage[stage].cleanup(dev);
}
if (dev->devx_whitelist_uid)
mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
ib_dealloc_device((struct ib_device *)dev);
}
......@@ -6173,7 +6266,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
{
int err;
int i;
int uid;
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
......@@ -6183,10 +6275,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
}
}
uid = mlx5_ib_devx_create(dev);
if (uid > 0)
dev->devx_whitelist_uid = uid;
dev->profile = profile;
dev->ib_active = true;
......@@ -6214,12 +6302,18 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_roce_init,
mlx5_ib_stage_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_SRQ,
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
mlx5_ib_stage_odp_init,
NULL),
mlx5_ib_stage_odp_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_stage_counters_init,
mlx5_ib_stage_counters_cleanup),
......@@ -6238,6 +6332,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_SPECS,
mlx5_ib_stage_populate_specs,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
mlx5_ib_stage_devx_init,
mlx5_ib_stage_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
......@@ -6265,9 +6362,15 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_rep_roce_init,
mlx5_ib_stage_rep_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_SRQ,
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_stage_counters_init,
mlx5_ib_stage_counters_cleanup),
......@@ -6388,10 +6491,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
.event = mlx5_ib_event,
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
.pfault = mlx5_ib_pfault,
#endif
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
......
......@@ -41,7 +41,6 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/qp.h>
#include <linux/mlx5/srq.h>
#include <linux/mlx5/fs.h>
#include <linux/types.h>
#include <linux/mlx5/transobj.h>
......@@ -50,6 +49,8 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include "srq.h"
#define mlx5_ib_dbg(_dev, format, arg...) \
dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
__LINE__, current->pid, ##arg)
......@@ -774,7 +775,9 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_CAPS,
MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
MLX5_IB_STAGE_SRQ,
MLX5_IB_STAGE_DEVICE_RESOURCES,
MLX5_IB_STAGE_DEVICE_NOTIFIER,
MLX5_IB_STAGE_ODP,
MLX5_IB_STAGE_COUNTERS,
MLX5_IB_STAGE_CONG_DEBUGFS,
......@@ -782,6 +785,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_BFREG,
MLX5_IB_STAGE_PRE_IB_REG_UMR,
MLX5_IB_STAGE_SPECS,
MLX5_IB_STAGE_WHITELIST_UID,
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
......@@ -806,6 +810,7 @@ struct mlx5_ib_multiport_info {
struct list_head list;
struct mlx5_ib_dev *ibdev;
struct mlx5_core_dev *mdev;
struct notifier_block mdev_events;
struct completion unref_comp;
u64 sys_image_guid;
u32 mdev_refcnt;
......@@ -880,10 +885,20 @@ struct mlx5_ib_lb_state {
bool enabled;
};
struct mlx5_ib_pf_eq {
struct mlx5_ib_dev *dev;
struct mlx5_eq *core;
struct work_struct work;
spinlock_t lock; /* Pagefaults spinlock */
struct workqueue_struct *wq;
mempool_t *pool;
};
struct mlx5_ib_dev {
struct ib_device ib_dev;
const struct uverbs_object_tree_def *driver_trees[7];
struct mlx5_core_dev *mdev;
struct notifier_block mdev_events;
struct mlx5_roce roce[MLX5_MAX_PORTS];
int num_ports;
/* serialize update of capability mask
......@@ -902,6 +917,8 @@ struct mlx5_ib_dev {
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_odp_caps odp_caps;
u64 odp_max_size;
struct mlx5_ib_pf_eq odp_pf_eq;
/*
* Sleepable RCU that prevents destruction of MRs while they are still
* being used by a page fault handler.
......@@ -927,6 +944,7 @@ struct mlx5_ib_dev {
u64 sys_image_guid;
struct mlx5_memic memic;
u16 devx_whitelist_uid;
struct mlx5_srq_table srq_table;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
......@@ -1158,9 +1176,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
struct mlx5_pagefault *pfault);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
......@@ -1175,6 +1192,7 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
......
......@@ -37,6 +37,46 @@
#include "mlx5_ib.h"
#include "cmd.h"
#include <linux/mlx5/eq.h>
/* Contains the details of a pagefault. */
struct mlx5_pagefault {
u32 bytes_committed;
u32 token;
u8 event_subtype;
u8 type;
union {
/* Initiator or send message responder pagefault details. */
struct {
/* Received packet size, only valid for responders. */
u32 packet_size;
/*
* Number of resource holding WQE, depends on type.
*/
u32 wq_num;
/*
* WQE index. Refers to either the send queue or
* receive queue, according to event_subtype.
*/
u16 wqe_index;
} wqe;
/* RDMA responder pagefault details */
struct {
u32 r_key;
/*
* Received packet size, minimal size page fault
* resolution required for forward progress.
*/
u32 packet_size;
u32 rdma_op_len;
u64 rdma_va;
} rdma;
};
struct mlx5_ib_pf_eq *eq;
struct work_struct work;
};
#define MAX_PREFETCH_LEN (4*1024*1024U)
/* Timeout in ms to wait for an active mmu notifier to complete when handling
......@@ -304,14 +344,20 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
{
int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
pfault->wqe.wq_num : pfault->token;
int ret = mlx5_core_page_fault_resume(dev->mdev,
pfault->token,
wq_num,
pfault->type,
error);
if (ret)
mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n",
wq_num);
u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { };
u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { };
int err;
MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME);
MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type);
MLX5_SET(page_fault_resume_in, in, token, pfault->token);
MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
MLX5_SET(page_fault_resume_in, in, error, !!error);
err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
if (err)
mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n",
wq_num, err);
}
static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
......@@ -607,8 +653,8 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
if (!wait_for_completion_timeout(
&odp->notifier_completion,
timeout)) {
mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
current_seq, odp->notifiers_seq);
mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
current_seq, odp->notifiers_seq, odp->notifiers_count);
}
} else {
/* The MR is being killed, kill the QP as well. */
......@@ -1026,16 +1072,31 @@ static int mlx5_ib_mr_responder_pfault_handler(
return 0;
}
static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev,
u32 wq_num)
static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
u32 wq_num, int pf_type)
{
struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num);
enum mlx5_res_type res_type;
if (!mqp) {
mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num);
switch (pf_type) {
case MLX5_WQE_PF_TYPE_RMP:
res_type = MLX5_RES_SRQ;
break;
case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
case MLX5_WQE_PF_TYPE_RESP:
case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
res_type = MLX5_RES_QP;
break;
default:
return NULL;
}
return mlx5_core_res_hold(dev->mdev, wq_num, res_type);
}
static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
{
struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res;
return to_mibqp(mqp);
}
......@@ -1049,18 +1110,30 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
int resume_with_error = 1;
u16 wqe_index = pfault->wqe.wqe_index;
int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
struct mlx5_core_rsc_common *res;
struct mlx5_ib_qp *qp;
res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type);
if (!res) {
mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num);
return;
}
switch (res->res) {
case MLX5_RES_QP:
qp = res_to_qp(res);
break;
default:
mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type);
goto resolve_page_fault;
}
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
goto resolve_page_fault;
}
qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num);
if (!qp)
goto resolve_page_fault;
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
......@@ -1100,6 +1173,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
pfault->wqe.wq_num, resume_with_error,
pfault->type);
mlx5_core_res_put(res);
free_page((unsigned long)buffer);
}
......@@ -1178,10 +1252,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
}
}
void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
struct mlx5_pagefault *pfault)
static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault)
{
struct mlx5_ib_dev *dev = context;
u8 event_subtype = pfault->event_subtype;
switch (event_subtype) {
......@@ -1198,6 +1270,203 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
}
}
static void mlx5_ib_eqe_pf_action(struct work_struct *work)
{
struct mlx5_pagefault *pfault = container_of(work,
struct mlx5_pagefault,
work);
struct mlx5_ib_pf_eq *eq = pfault->eq;
mlx5_ib_pfault(eq->dev, pfault);
mempool_free(pfault, eq->pool);
}
static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
{
struct mlx5_eqe_page_fault *pf_eqe;
struct mlx5_pagefault *pfault;
struct mlx5_eqe *eqe;
int cc = 0;
while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) {
pfault = mempool_alloc(eq->pool, GFP_ATOMIC);
if (!pfault) {
schedule_work(&eq->work);
break;
}
pf_eqe = &eqe->data.page_fault;
pfault->event_subtype = eqe->sub_type;
pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
mlx5_ib_dbg(eq->dev,
"PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
eqe->sub_type, pfault->bytes_committed);
switch (eqe->sub_type) {
case MLX5_PFAULT_SUBTYPE_RDMA:
/* RDMA based event */
pfault->type =
be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
pfault->token =
be32_to_cpu(pf_eqe->rdma.pftype_token) &
MLX5_24BIT_MASK;
pfault->rdma.r_key =
be32_to_cpu(pf_eqe->rdma.r_key);
pfault->rdma.packet_size =
be16_to_cpu(pf_eqe->rdma.packet_length);
pfault->rdma.rdma_op_len =
be32_to_cpu(pf_eqe->rdma.rdma_op_len);
pfault->rdma.rdma_va =
be64_to_cpu(pf_eqe->rdma.rdma_va);
mlx5_ib_dbg(eq->dev,
"PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
pfault->type, pfault->token,
pfault->rdma.r_key);
mlx5_ib_dbg(eq->dev,
"PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
pfault->rdma.rdma_op_len,
pfault->rdma.rdma_va);
break;
case MLX5_PFAULT_SUBTYPE_WQE:
/* WQE based event */
pfault->type =
(be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
pfault->token =
be32_to_cpu(pf_eqe->wqe.token);
pfault->wqe.wq_num =
be32_to_cpu(pf_eqe->wqe.pftype_wq) &
MLX5_24BIT_MASK;
pfault->wqe.wqe_index =
be16_to_cpu(pf_eqe->wqe.wqe_index);
pfault->wqe.packet_size =
be16_to_cpu(pf_eqe->wqe.packet_length);
mlx5_ib_dbg(eq->dev,
"PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
pfault->type, pfault->token,
pfault->wqe.wq_num,
pfault->wqe.wqe_index);
break;
default:
mlx5_ib_warn(eq->dev,
"Unsupported page fault event sub-type: 0x%02hhx\n",
eqe->sub_type);
/* Unsupported page faults should still be
* resolved by the page fault handler
*/
}
pfault->eq = eq;
INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action);
queue_work(eq->wq, &pfault->work);
cc = mlx5_eq_update_cc(eq->core, ++cc);
}
mlx5_eq_update_ci(eq->core, cc, 1);
}
static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr)
{
struct mlx5_ib_pf_eq *eq = eq_ptr;
unsigned long flags;
if (spin_trylock_irqsave(&eq->lock, flags)) {
mlx5_ib_eq_pf_process(eq);
spin_unlock_irqrestore(&eq->lock, flags);
} else {
schedule_work(&eq->work);
}
return IRQ_HANDLED;
}
/* mempool_refill() was proposed but unfortunately wasn't accepted
* http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
* Cheap workaround.
*/
static void mempool_refill(mempool_t *pool)
{
while (pool->curr_nr < pool->min_nr)
mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
}
static void mlx5_ib_eq_pf_action(struct work_struct *work)
{
struct mlx5_ib_pf_eq *eq =
container_of(work, struct mlx5_ib_pf_eq, work);
mempool_refill(eq->pool);
spin_lock_irq(&eq->lock);
mlx5_ib_eq_pf_process(eq);
spin_unlock_irq(&eq->lock);
}
enum {
MLX5_IB_NUM_PF_EQE = 0x1000,
MLX5_IB_NUM_PF_DRAIN = 64,
};
static int
mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
{
struct mlx5_eq_param param = {};
int err;
INIT_WORK(&eq->work, mlx5_ib_eq_pf_action);
spin_lock_init(&eq->lock);
eq->dev = dev;
eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN,
sizeof(struct mlx5_pagefault));
if (!eq->pool)
return -ENOMEM;
eq->wq = alloc_workqueue("mlx5_ib_page_fault",
WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM,
MLX5_NUM_CMD_EQE);
if (!eq->wq) {
err = -ENOMEM;
goto err_mempool;
}
param = (struct mlx5_eq_param) {
.index = MLX5_EQ_PFAULT_IDX,
.mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
.nent = MLX5_IB_NUM_PF_EQE,
.context = eq,
.handler = mlx5_ib_eq_pf_int
};
eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", &param);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
goto err_wq;
}
return 0;
err_wq:
destroy_workqueue(eq->wq);
err_mempool:
mempool_destroy(eq->pool);
return err;
}
static int
mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
{
int err;
err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
cancel_work_sync(&eq->work);
destroy_workqueue(eq->wq);
mempool_destroy(eq->pool);
return err;
}
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
{
if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
......@@ -1226,7 +1495,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
int ret;
int ret = 0;
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
......@@ -1236,7 +1505,20 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
}
}
return 0;
if (!MLX5_CAP_GEN(dev->mdev, pg))
return ret;
ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq);
return ret;
}
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
{
if (!MLX5_CAP_GEN(dev->mdev, pg))
return;
mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq);
}
int mlx5_ib_odp_init(void)
......@@ -1246,4 +1528,3 @@ int mlx5_ib_odp_init(void)
return 0;
}
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
* Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/module.h>
#include <linux/mlx5/qp.h>
#include <linux/mlx5/srq.h>
#include <linux/slab.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
/* not supported currently */
static int srq_signature;
#include "srq.h"
static void *get_wqe(struct mlx5_ib_srq *srq, int n)
{
......@@ -202,7 +171,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
err = -ENOMEM;
goto err_in;
}
srq->wq_sig = !!srq_signature;
srq->wq_sig = 0;
in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
......@@ -327,7 +296,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
in.pd = to_mpd(pd)->pdn;
in.db_record = srq->db.dma;
err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
err = mlx5_cmd_create_srq(dev, &srq->msrq, &in);
kvfree(in.pas);
if (err) {
mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
......@@ -351,7 +320,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
return &srq->ibsrq;
err_core:
mlx5_core_destroy_srq(dev->mdev, &srq->msrq);
mlx5_cmd_destroy_srq(dev, &srq->msrq);
err_usr_kern_srq:
if (pd->uobject)
......@@ -381,7 +350,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return -EINVAL;
mutex_lock(&srq->mutex);
ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1);
ret = mlx5_cmd_arm_srq(dev, &srq->msrq, attr->srq_limit, 1);
mutex_unlock(&srq->mutex);
if (ret)
......@@ -402,7 +371,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
if (!out)
return -ENOMEM;
ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out);
ret = mlx5_cmd_query_srq(dev, &srq->msrq, out);
if (ret)
goto out_box;
......@@ -420,7 +389,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
struct mlx5_ib_dev *dev = to_mdev(srq->device);
struct mlx5_ib_srq *msrq = to_msrq(srq);
mlx5_core_destroy_srq(dev->mdev, &msrq->msrq);
mlx5_cmd_destroy_srq(dev, &msrq->msrq);
if (srq->uobject) {
mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
......
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved.
*/
#ifndef MLX5_IB_SRQ_H
#define MLX5_IB_SRQ_H
enum {
MLX5_SRQ_FLAG_ERR = (1 << 0),
MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
MLX5_SRQ_FLAG_RNDV = (1 << 2),
};
struct mlx5_srq_attr {
u32 type;
u32 flags;
u32 log_size;
u32 wqe_shift;
u32 log_page_size;
u32 wqe_cnt;
u32 srqn;
u32 xrcd;
u32 page_offset;
u32 cqn;
u32 pd;
u32 lwm;
u32 user_index;
u64 db_record;
__be64 *pas;
u32 tm_log_list_size;
u32 tm_next_tag;
u32 tm_hw_phase_cnt;
u32 tm_sw_phase_cnt;
u16 uid;
};
struct mlx5_ib_dev;
struct mlx5_core_srq {
struct mlx5_core_rsc_common common; /* must be first */
u32 srqn;
int max;
size_t max_gs;
size_t max_avail_gather;
int wqe_shift;
void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e);
atomic_t refcount;
struct completion free;
u16 uid;
};
struct mlx5_srq_table {
struct notifier_block nb;
/* protect radix tree
*/
spinlock_t lock;
struct radix_tree_root tree;
};
int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in);
int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out);
int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq);
struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn);
int mlx5_init_srq_table(struct mlx5_ib_dev *dev);
void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev);
#endif /* MLX5_IB_SRQ_H */
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
* Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include <linux/mlx5/srq.h>
#include <rdma/ib_verbs.h>
#include "mlx5_core.h"
#include <linux/mlx5/transobj.h>
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
{
struct mlx5_srq_table *table = &dev->priv.srq_table;
struct mlx5_core_srq *srq;
spin_lock(&table->lock);
srq = radix_tree_lookup(&table->tree, srqn);
if (srq)
atomic_inc(&srq->refcount);
spin_unlock(&table->lock);
if (!srq) {
mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
return;
}
srq->event(srq, event_type);
if (atomic_dec_and_test(&srq->refcount))
complete(&srq->free);
}
#include "mlx5_ib.h"
#include "srq.h"
static int get_pas_size(struct mlx5_srq_attr *in)
{
......@@ -132,9 +78,9 @@ static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
}
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
{
struct mlx5_srq_table *table = &dev->priv.srq_table;
struct mlx5_srq_table *table = &dev->srq_table;
struct mlx5_core_srq *srq;
spin_lock(&table->lock);
......@@ -147,9 +93,8 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
return srq;
}
EXPORT_SYMBOL(mlx5_core_get_srq);
static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
......@@ -176,7 +121,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(create_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_SRQ);
err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
sizeof(create_out));
kvfree(create_in);
if (!err) {
......@@ -187,8 +132,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return err;
}
static int destroy_srq_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq)
static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
......@@ -198,11 +142,11 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
srq_out, sizeof(srq_out));
return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
sizeof(srq_out));
}
static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq)
{
u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
......@@ -214,11 +158,11 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
srq_out, sizeof(srq_out));
return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
sizeof(srq_out));
}
static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
......@@ -233,8 +177,8 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(query_srq_in, srq_in, opcode,
MLX5_CMD_OP_QUERY_SRQ);
MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
MLX5_ST_SZ_BYTES(query_srq_out));
if (err)
goto out;
......@@ -247,7 +191,7 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return err;
}
static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
......@@ -277,7 +221,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_CMD_OP_CREATE_XRC_SRQ);
memset(create_out, 0, sizeof(create_out));
err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
sizeof(create_out));
if (err)
goto out;
......@@ -289,7 +233,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
return err;
}
static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq)
{
u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
......@@ -300,12 +244,12 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
xrcsrq_out, sizeof(xrcsrq_out));
}
static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq, u16 lwm)
static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
u16 lwm)
{
u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
......@@ -316,11 +260,11 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
xrcsrq_out, sizeof(xrcsrq_out));
}
static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
......@@ -338,8 +282,8 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_CMD_OP_QUERY_XRC_SRQ);
MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
MLX5_ST_SZ_BYTES(query_xrc_srq_out));
err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out));
if (err)
goto out;
......@@ -354,21 +298,27 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
return err;
}
static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
void *create_in;
void *create_out = NULL;
void *create_in = NULL;
void *rmpc;
void *wq;
int pas_size;
int outlen;
int inlen;
int err;
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
outlen = MLX5_ST_SZ_BYTES(create_rmp_out);
create_in = kvzalloc(inlen, GFP_KERNEL);
if (!create_in)
return -ENOMEM;
create_out = kvzalloc(outlen, GFP_KERNEL);
if (!create_in || !create_out) {
err = -ENOMEM;
goto out;
}
rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
......@@ -378,16 +328,20 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
set_wq(wq, in);
memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
if (!err)
MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP);
err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen);
if (!err) {
srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn);
srq->uid = in->uid;
}
out:
kvfree(create_in);
kvfree(create_out);
return err;
}
static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq)
static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
......@@ -395,22 +349,30 @@ static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
static int arm_rmp_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq,
static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
u16 lwm)
{
void *in;
void *out = NULL;
void *in = NULL;
void *rmpc;
void *wq;
void *bitmask;
int outlen;
int inlen;
int err;
in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
if (!in)
return -ENOMEM;
inlen = MLX5_ST_SZ_BYTES(modify_rmp_in);
outlen = MLX5_ST_SZ_BYTES(modify_rmp_out);
in = kvzalloc(inlen, GFP_KERNEL);
out = kvzalloc(outlen, GFP_KERNEL);
if (!in || !out) {
err = -ENOMEM;
goto out;
}
rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
......@@ -422,25 +384,39 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev,
MLX5_SET(wq, wq, lwm, lwm);
MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen);
out:
kvfree(in);
kvfree(out);
return err;
}
static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
u32 *rmp_out;
u32 *rmp_out = NULL;
u32 *rmp_in = NULL;
void *rmpc;
int outlen;
int inlen;
int err;
rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
if (!rmp_out)
return -ENOMEM;
outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
inlen = MLX5_ST_SZ_BYTES(query_rmp_in);
err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out);
rmp_out = kvzalloc(outlen, GFP_KERNEL);
rmp_in = kvzalloc(inlen, GFP_KERNEL);
if (!rmp_out || !rmp_in) {
err = -ENOMEM;
goto out;
}
MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP);
MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn);
err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen);
if (err)
goto out;
......@@ -451,10 +427,11 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
out:
kvfree(rmp_out);
kvfree(rmp_in);
return err;
}
static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
......@@ -489,7 +466,7 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(xrqc, xrqc, cqn, in->cqn);
MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
MLX5_SET(create_xrq_in, create_in, uid, in->uid);
err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
sizeof(create_out));
kvfree(create_in);
if (!err) {
......@@ -500,7 +477,7 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return err;
}
static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
......@@ -509,10 +486,10 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
static int arm_xrq_cmd(struct mlx5_core_dev *dev,
static int arm_xrq_cmd(struct mlx5_ib_dev *dev,
struct mlx5_core_srq *srq,
u16 lwm)
{
......@@ -525,10 +502,10 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(arm_rq_in, in, lwm, lwm);
MLX5_SET(arm_rq_in, in, uid, srq->uid);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
......@@ -544,7 +521,7 @@ static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen);
err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen);
if (err)
goto out;
......@@ -567,11 +544,10 @@ static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return err;
}
static int create_srq_split(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq,
static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
if (!dev->issi)
if (!dev->mdev->issi)
return create_srq_cmd(dev, srq, in);
switch (srq->common.res) {
case MLX5_RES_XSRQ:
......@@ -583,10 +559,9 @@ static int create_srq_split(struct mlx5_core_dev *dev,
}
}
static int destroy_srq_split(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq)
static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
if (!dev->issi)
if (!dev->mdev->issi)
return destroy_srq_cmd(dev, srq);
switch (srq->common.res) {
case MLX5_RES_XSRQ:
......@@ -598,11 +573,11 @@ static int destroy_srq_split(struct mlx5_core_dev *dev,
}
}
int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
struct mlx5_srq_table *table = &dev->srq_table;
int err;
struct mlx5_srq_table *table = &dev->priv.srq_table;
switch (in->type) {
case IB_SRQT_XRC:
......@@ -625,10 +600,8 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
spin_lock_irq(&table->lock);
err = radix_tree_insert(&table->tree, srq->srqn, srq);
spin_unlock_irq(&table->lock);
if (err) {
mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
if (err)
goto err_destroy_srq_split;
}
return 0;
......@@ -637,25 +610,18 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return err;
}
EXPORT_SYMBOL(mlx5_core_create_srq);
int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
struct mlx5_srq_table *table = &dev->priv.srq_table;
struct mlx5_srq_table *table = &dev->srq_table;
struct mlx5_core_srq *tmp;
int err;
spin_lock_irq(&table->lock);
tmp = radix_tree_delete(&table->tree, srq->srqn);
spin_unlock_irq(&table->lock);
if (!tmp) {
mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
return -EINVAL;
}
if (tmp != srq) {
mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
if (!tmp || tmp != srq)
return -EINVAL;
}
err = destroy_srq_split(dev, srq);
if (err)
......@@ -667,12 +633,11 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
return 0;
}
EXPORT_SYMBOL(mlx5_core_destroy_srq);
int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out)
{
if (!dev->issi)
if (!dev->mdev->issi)
return query_srq_cmd(dev, srq, out);
switch (srq->common.res) {
case MLX5_RES_XSRQ:
......@@ -683,12 +648,11 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return query_rmp_cmd(dev, srq, out);
}
}
EXPORT_SYMBOL(mlx5_core_query_srq);
int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq)
int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq)
{
if (!dev->issi)
if (!dev->mdev->issi)
return arm_srq_cmd(dev, srq, lwm, is_srq);
switch (srq->common.res) {
case MLX5_RES_XSRQ:
......@@ -699,18 +663,60 @@ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
return arm_rmp_cmd(dev, srq, lwm);
}
}
EXPORT_SYMBOL(mlx5_core_arm_srq);
void mlx5_init_srq_table(struct mlx5_core_dev *dev)
static int srq_event_notifier(struct notifier_block *nb,
unsigned long type, void *data)
{
struct mlx5_srq_table *table;
struct mlx5_core_srq *srq;
struct mlx5_eqe *eqe;
u32 srqn;
if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR &&
type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)
return NOTIFY_DONE;
table = container_of(nb, struct mlx5_srq_table, nb);
eqe = data;
srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
spin_lock(&table->lock);
srq = radix_tree_lookup(&table->tree, srqn);
if (srq)
atomic_inc(&srq->refcount);
spin_unlock(&table->lock);
if (!srq)
return NOTIFY_OK;
srq->event(srq, eqe->type);
if (atomic_dec_and_test(&srq->refcount))
complete(&srq->free);
return NOTIFY_OK;
}
int mlx5_init_srq_table(struct mlx5_ib_dev *dev)
{
struct mlx5_srq_table *table = &dev->priv.srq_table;
struct mlx5_srq_table *table = &dev->srq_table;
memset(table, 0, sizeof(*table));
spin_lock_init(&table->lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
table->nb.notifier_call = srq_event_notifier;
mlx5_notifier_register(dev->mdev, &table->nb);
return 0;
}
void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev)
{
/* nothing */
struct mlx5_srq_table *table = &dev->srq_table;
mlx5_notifier_unregister(dev->mdev, &table->nb);
}
......@@ -12,9 +12,9 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
# mlx5 core basic
#
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
diag/fs_tracepoint.o diag/fw_tracer.o
#
......@@ -30,7 +30,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o
#
# Core extra
......
......@@ -40,9 +40,11 @@
#include <linux/random.h>
#include <linux/io-mapping.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/eq.h>
#include <linux/debugfs.h>
#include "mlx5_core.h"
#include "lib/eq.h"
enum {
CMD_IF_REV = 5,
......@@ -313,6 +315,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_FPGA_DESTROY_QP:
case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
case MLX5_CMD_OP_DEALLOC_MEMIC:
case MLX5_CMD_OP_PAGE_FAULT_RESUME:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
......@@ -326,7 +329,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_MKEY:
case MLX5_CMD_OP_QUERY_MKEY:
case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
case MLX5_CMD_OP_PAGE_FAULT_RESUME:
case MLX5_CMD_OP_CREATE_EQ:
case MLX5_CMD_OP_QUERY_EQ:
case MLX5_CMD_OP_GEN_EQE:
......@@ -371,6 +373,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
case MLX5_CMD_OP_QUERY_Q_COUNTER:
case MLX5_CMD_OP_SET_MONITOR_COUNTER:
case MLX5_CMD_OP_ARM_MONITOR_COUNTER:
case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
case MLX5_CMD_OP_QUERY_RATE_LIMIT:
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
......@@ -520,6 +524,8 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER);
MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER);
MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
......@@ -805,6 +811,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
return MLX5_GET(mbox_in, in->first.data, opcode);
}
static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
static void cb_timeout_handler(struct work_struct *work)
{
struct delayed_work *dwork = container_of(work, struct delayed_work,
......@@ -1412,14 +1420,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
up(&cmd->sem);
}
static int cmd_comp_notifier(struct notifier_block *nb,
unsigned long type, void *data)
{
struct mlx5_core_dev *dev;
struct mlx5_cmd *cmd;
struct mlx5_eqe *eqe;
cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb);
dev = container_of(cmd, struct mlx5_core_dev, cmd);
eqe = data;
mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
return NOTIFY_OK;
}
void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
{
MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD);
mlx5_eq_notifier_register(dev, &dev->cmd.nb);
mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
}
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
{
mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
mlx5_eq_notifier_unregister(dev, &dev->cmd.nb);
}
static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
......@@ -1435,7 +1461,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
}
}
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
{
struct mlx5_cmd *cmd = &dev->cmd;
struct mlx5_cmd_work_ent *ent;
......@@ -1533,7 +1559,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
}
}
}
EXPORT_SYMBOL(mlx5_cmd_comp_handler);
void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
{
unsigned long flags;
u64 vector;
/* wait for pending handlers to complete */
mlx5_eq_synchronize_cmd_irq(dev);
spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
if (!vector)
goto no_trig;
vector |= MLX5_TRIGGERED_CMD_COMP;
spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
mlx5_cmd_comp_handler(dev, vector, true);
return;
no_trig:
spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
}
static int status_to_err(u8 status)
{
......
......@@ -38,6 +38,7 @@
#include <rdma/ib_verbs.h>
#include <linux/mlx5/cq.h>
#include "mlx5_core.h"
#include "lib/eq.h"
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
......@@ -92,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
struct mlx5_eq *eq;
struct mlx5_eq_comp *eq;
int err;
eq = mlx5_eqn2eq(dev, eqn);
eq = mlx5_eqn2comp_eq(dev, eqn);
if (IS_ERR(eq))
return PTR_ERR(eq);
......@@ -119,12 +120,12 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
INIT_LIST_HEAD(&cq->tasklet_ctx.list);
/* Add to comp EQ CQ tree to recv comp events */
err = mlx5_eq_add_cq(eq, cq);
err = mlx5_eq_add_cq(&eq->core, cq);
if (err)
goto err_cmd;
/* Add to async EQ CQ tree to recv async events */
err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq);
if (err)
goto err_cq_add;
......@@ -139,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
return 0;
err_cq_add:
mlx5_eq_del_cq(eq, cq);
mlx5_eq_del_cq(&eq->core, cq);
err_cmd:
memset(din, 0, sizeof(din));
memset(dout, 0, sizeof(dout));
......@@ -157,11 +158,11 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
int err;
err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
if (err)
return err;
err = mlx5_eq_del_cq(cq->eq, cq);
err = mlx5_eq_del_cq(&cq->eq->core, cq);
if (err)
return err;
......
......@@ -36,6 +36,7 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
#include "lib/eq.h"
enum {
QP_PID,
......@@ -349,6 +350,16 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
return param;
}
static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
u32 *out, int outlen)
{
u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
}
static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
int index)
{
......
......@@ -45,75 +45,11 @@ struct mlx5_device_context {
unsigned long state;
};
struct mlx5_delayed_event {
struct list_head list;
struct mlx5_core_dev *dev;
enum mlx5_dev_event event;
unsigned long param;
};
enum {
MLX5_INTERFACE_ADDED,
MLX5_INTERFACE_ATTACHED,
};
static void add_delayed_event(struct mlx5_priv *priv,
struct mlx5_core_dev *dev,
enum mlx5_dev_event event,
unsigned long param)
{
struct mlx5_delayed_event *delayed_event;
delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
if (!delayed_event) {
mlx5_core_err(dev, "event %d is missed\n", event);
return;
}
mlx5_core_dbg(dev, "Accumulating event %d\n", event);
delayed_event->dev = dev;
delayed_event->event = event;
delayed_event->param = param;
list_add_tail(&delayed_event->list, &priv->waiting_events_list);
}
static void delayed_event_release(struct mlx5_device_context *dev_ctx,
struct mlx5_priv *priv)
{
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
struct mlx5_delayed_event *de;
struct mlx5_delayed_event *n;
struct list_head temp;
INIT_LIST_HEAD(&temp);
spin_lock_irq(&priv->ctx_lock);
priv->is_accum_events = false;
list_splice_init(&priv->waiting_events_list, &temp);
if (!dev_ctx->context)
goto out;
list_for_each_entry_safe(de, n, &temp, list)
dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
out:
spin_unlock_irq(&priv->ctx_lock);
list_for_each_entry_safe(de, n, &temp, list) {
list_del(&de->list);
kfree(de);
}
}
/* accumulating events that can come after mlx5_ib calls to
* ib_register_device, till adding that interface to the events list.
*/
static void delayed_event_start(struct mlx5_priv *priv)
{
spin_lock_irq(&priv->ctx_lock);
priv->is_accum_events = true;
spin_unlock_irq(&priv->ctx_lock);
}
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
......@@ -129,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
dev_ctx->intf = intf;
delayed_event_start(priv);
dev_ctx->context = intf->add(dev);
if (dev_ctx->context) {
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
......@@ -139,22 +73,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (dev_ctx->intf->pfault) {
if (priv->pfault) {
mlx5_core_err(dev, "multiple page fault handlers not supported");
} else {
priv->pfault_ctx = dev_ctx->context;
priv->pfault = dev_ctx->intf->pfault;
}
}
#endif
spin_unlock_irq(&priv->ctx_lock);
}
delayed_event_release(dev_ctx, priv);
if (!dev_ctx->context)
kfree(dev_ctx);
}
......@@ -179,15 +100,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
if (!dev_ctx)
return;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
spin_lock_irq(&priv->ctx_lock);
if (priv->pfault == dev_ctx->intf->pfault)
priv->pfault = NULL;
spin_unlock_irq(&priv->ctx_lock);
synchronize_srcu(&priv->pfault_srcu);
#endif
spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock);
......@@ -207,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
if (!dev_ctx)
return;
delayed_event_start(priv);
if (intf->attach) {
if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
goto out;
return;
if (intf->attach(dev, dev_ctx->context))
goto out;
return;
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
} else {
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
goto out;
return;
dev_ctx->context = intf->add(dev);
if (!dev_ctx->context)
goto out;
return;
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
}
out:
delayed_event_release(dev_ctx, priv);
}
void mlx5_attach_device(struct mlx5_core_dev *dev)
......@@ -350,28 +256,6 @@ void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
mutex_unlock(&mlx5_intf_mutex);
}
void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
{
struct mlx5_priv *priv = &mdev->priv;
struct mlx5_device_context *dev_ctx;
unsigned long flags;
void *result = NULL;
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
if ((dev_ctx->intf->protocol == protocol) &&
dev_ctx->intf->get_dev) {
result = dev_ctx->intf->get_dev(dev_ctx->context);
break;
}
spin_unlock_irqrestore(&priv->ctx_lock, flags);
return result;
}
EXPORT_SYMBOL(mlx5_get_protocol_dev);
/* Must be called with intf_mutex held */
void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
{
......@@ -422,44 +306,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
return res;
}
void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_device_context *dev_ctx;
unsigned long flags;
spin_lock_irqsave(&priv->ctx_lock, flags);
if (priv->is_accum_events)
add_delayed_event(priv, dev, event, param);
/* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
* still in priv->ctx_list. In this case, only notify the dev_ctx if its
* ADDED or ATTACHED bit are set.
*/
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf->event &&
(test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
dev_ctx->intf->event(dev, dev_ctx->context, event, param);
spin_unlock_irqrestore(&priv->ctx_lock, flags);
}
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_core_page_fault(struct mlx5_core_dev *dev,
struct mlx5_pagefault *pfault)
{
struct mlx5_priv *priv = &dev->priv;
int srcu_idx;
srcu_idx = srcu_read_lock(&priv->pfault_srcu);
if (priv->pfault)
priv->pfault(dev, priv->pfault_ctx, pfault);
srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
}
#endif
void mlx5_dev_list_lock(void)
{
......
......@@ -161,10 +161,10 @@ static void print_misc_parameters_hdrs(struct trace_seq *p,
PRINT_MASKED_VAL(name, p, format); \
}
DECLARE_MASK_VAL(u64, gre_key) = {
.m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 |
MLX5_GET(fte_match_set_misc, mask, gre_key_l),
.v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 |
MLX5_GET(fte_match_set_misc, value, gre_key_l)};
.m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 |
MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo),
.v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 |
MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)};
PRINT_MASKED_VAL(gre_key, p, "%llu");
PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u");
......
......@@ -30,6 +30,7 @@
* SOFTWARE.
*/
#define CREATE_TRACE_POINTS
#include "lib/eq.h"
#include "fw_tracer.h"
#include "fw_tracer_tracepoint.h"
......@@ -846,9 +847,9 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
return ERR_PTR(err);
}
/* Create HW resources + start tracer
* must be called before Async EQ is created
*/
static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
/* Create HW resources + start tracer */
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
{
struct mlx5_core_dev *dev;
......@@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
goto err_dealloc_pd;
}
MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
mlx5_eq_notifier_register(dev, &tracer->nb);
mlx5_fw_tracer_start(tracer);
return 0;
......@@ -883,9 +887,7 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
return err;
}
/* Stop tracer + Cleanup HW resources
* must be called after Async EQ is destroyed
*/
/* Stop tracer + Cleanup HW resources */
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
{
if (IS_ERR_OR_NULL(tracer))
......@@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
tracer->owner);
mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
cancel_work_sync(&tracer->ownership_change_work);
cancel_work_sync(&tracer->handle_traces_work);
......@@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
kfree(tracer);
}
void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
{
struct mlx5_fw_tracer *tracer = dev->tracer;
if (!tracer)
return;
struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
struct mlx5_core_dev *dev = tracer->dev;
struct mlx5_eqe *eqe = data;
switch (eqe->sub_type) {
case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
......@@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
eqe->sub_type);
}
return NOTIFY_OK;
}
EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
......@@ -55,6 +55,7 @@
struct mlx5_fw_tracer {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
bool owner;
u8 trc_ver;
struct workqueue_struct *work_queue;
......@@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
#endif
......@@ -176,8 +176,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
{
return is_kdump_kernel() ?
MLX5E_MIN_NUM_CHANNELS :
min_t(int, mdev->priv.eq_table.num_comp_vectors,
MLX5E_MAX_NUM_CHANNELS);
min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
}
/* Use this function to get max num channels after netdev was created */
......@@ -629,7 +628,6 @@ struct mlx5e_channel_stats {
} ____cacheline_aligned_in_smp;
enum {
MLX5E_STATE_ASYNC_EVENTS_ENABLED,
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
};
......@@ -696,6 +694,8 @@ struct mlx5e_priv {
struct hwtstamp_config tstamp;
u16 q_counter;
u16 drop_rq_q_counter;
struct notifier_block events_nb;
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx dcbx;
#endif
......
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2018 Mellanox Technologies. */
#include <net/vxlan.h>
#include <net/gre.h>
#include "lib/vxlan.h"
#include "en/tc_tun.h"
static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct net_device **out_dev,
struct flowi4 *fl4,
struct neighbour **out_n,
u8 *out_ttl)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *uplink_rpriv;
struct rtable *rt;
struct neighbour *n = NULL;
#if IS_ENABLED(CONFIG_INET)
int ret;
rt = ip_route_output_key(dev_net(mirred_dev), fl4);
ret = PTR_ERR_OR_ZERO(rt);
if (ret)
return ret;
#else
return -EOPNOTSUPP;
#endif
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
/* if the egress device isn't on the same HW e-switch, we use the uplink */
if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
*out_dev = uplink_rpriv->netdev;
else
*out_dev = rt->dst.dev;
if (!(*out_ttl))
*out_ttl = ip4_dst_hoplimit(&rt->dst);
n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
ip_rt_put(rt);
if (!n)
return -ENOMEM;
*out_n = n;
return 0;
}
static const char *mlx5e_netdev_kind(struct net_device *dev)
{
if (dev->rtnl_link_ops)
return dev->rtnl_link_ops->kind;
else
return "";
}
static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct net_device **out_dev,
struct flowi6 *fl6,
struct neighbour **out_n,
u8 *out_ttl)
{
struct neighbour *n = NULL;
struct dst_entry *dst;
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
struct mlx5e_rep_priv *uplink_rpriv;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
int ret;
ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
fl6);
if (ret < 0)
return ret;
if (!(*out_ttl))
*out_ttl = ip6_dst_hoplimit(dst);
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
/* if the egress device isn't on the same HW e-switch, we use the uplink */
if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
*out_dev = uplink_rpriv->netdev;
else
*out_dev = dst->dev;
#else
return -EOPNOTSUPP;
#endif
n = dst_neigh_lookup(dst, &fl6->daddr);
dst_release(dst);
if (!n)
return -ENOMEM;
*out_n = n;
return 0;
}
static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key)
{
__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
struct udphdr *udp = (struct udphdr *)(buf);
struct vxlanhdr *vxh = (struct vxlanhdr *)
((char *)udp + sizeof(struct udphdr));
udp->dest = tun_key->tp_dst;
vxh->vx_flags = VXLAN_HF_VNI;
vxh->vx_vni = vxlan_vni_field(tun_id);
return 0;
}
static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key)
{
__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
int hdr_len;
struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
/* the HW does not calculate GRE csum or sequences */
if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
return -EOPNOTSUPP;
greh->protocol = htons(ETH_P_TEB);
/* GRE key */
hdr_len = gre_calc_hlen(tun_key->tun_flags);
greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
if (tun_key->tun_flags & TUNNEL_KEY) {
__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
*ptr = tun_id;
}
return 0;
}
static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
struct mlx5e_encap_entry *e)
{
int err = 0;
struct ip_tunnel_key *key = &e->tun_info.key;
if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
*ip_proto = IPPROTO_UDP;
err = mlx5e_gen_vxlan_header(buf, key);
} else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
*ip_proto = IPPROTO_GRE;
err = mlx5e_gen_gre_header(buf, key);
} else {
pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n"
, e->tunnel_type);
err = -EOPNOTSUPP;
}
return err;
}
int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e)
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
int ipv4_encap_size = ETH_HLEN +
sizeof(struct iphdr) +
e->tunnel_hlen;
struct ip_tunnel_key *tun_key = &e->tun_info.key;
struct net_device *out_dev;
struct neighbour *n = NULL;
struct flowi4 fl4 = {};
char *encap_header;
struct ethhdr *eth;
u8 nud_state, ttl;
struct iphdr *ip;
int err;
if (max_encap_size < ipv4_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
ipv4_encap_size, max_encap_size);
return -EOPNOTSUPP;
}
encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
if (!encap_header)
return -ENOMEM;
/* add the IP fields */
fl4.flowi4_tos = tun_key->tos;
fl4.daddr = tun_key->u.ipv4.dst;
fl4.saddr = tun_key->u.ipv4.src;
ttl = tun_key->ttl;
err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
&fl4, &n, &ttl);
if (err)
goto free_encap;
/* used by mlx5e_detach_encap to lookup a neigh hash table
* entry in the neigh hash table when a user deletes a rule
*/
e->m_neigh.dev = n->dev;
e->m_neigh.family = n->ops->family;
memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
e->out_dev = out_dev;
/* It's important to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
* neigh changes it's validity state, we would find the relevant neigh
* in the hash.
*/
err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
if (err)
goto free_encap;
read_lock_bh(&n->lock);
nud_state = n->nud_state;
ether_addr_copy(e->h_dest, n->ha);
read_unlock_bh(&n->lock);
/* add ethernet header */
eth = (struct ethhdr *)encap_header;
ether_addr_copy(eth->h_dest, e->h_dest);
ether_addr_copy(eth->h_source, out_dev->dev_addr);
eth->h_proto = htons(ETH_P_IP);
/* add ip header */
ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
ip->tos = tun_key->tos;
ip->version = 0x4;
ip->ihl = 0x5;
ip->ttl = ttl;
ip->daddr = fl4.daddr;
ip->saddr = fl4.saddr;
/* add tunneling protocol header */
err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
&ip->protocol, e);
if (err)
goto destroy_neigh_entry;
e->encap_size = ipv4_encap_size;
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
neigh_event_send(n, NULL);
err = -EAGAIN;
goto out;
}
err = mlx5_packet_reformat_alloc(priv->mdev,
e->reformat_type,
ipv4_encap_size, encap_header,
MLX5_FLOW_NAMESPACE_FDB,
&e->encap_id);
if (err)
goto destroy_neigh_entry;
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
neigh_release(n);
return err;
destroy_neigh_entry:
mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
free_encap:
kfree(encap_header);
out:
if (n)
neigh_release(n);
return err;
}
int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e)
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
int ipv6_encap_size = ETH_HLEN +
sizeof(struct ipv6hdr) +
e->tunnel_hlen;
struct ip_tunnel_key *tun_key = &e->tun_info.key;
struct net_device *out_dev;
struct neighbour *n = NULL;
struct flowi6 fl6 = {};
struct ipv6hdr *ip6h;
char *encap_header;
struct ethhdr *eth;
u8 nud_state, ttl;
int err;
if (max_encap_size < ipv6_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
ipv6_encap_size, max_encap_size);
return -EOPNOTSUPP;
}
encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
if (!encap_header)
return -ENOMEM;
ttl = tun_key->ttl;
fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
fl6.daddr = tun_key->u.ipv6.dst;
fl6.saddr = tun_key->u.ipv6.src;
err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
&fl6, &n, &ttl);
if (err)
goto free_encap;
/* used by mlx5e_detach_encap to lookup a neigh hash table
* entry in the neigh hash table when a user deletes a rule
*/
e->m_neigh.dev = n->dev;
e->m_neigh.family = n->ops->family;
memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
e->out_dev = out_dev;
/* It's importent to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
* neigh changes it's validity state, we would find the relevant neigh
* in the hash.
*/
err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
if (err)
goto free_encap;
read_lock_bh(&n->lock);
nud_state = n->nud_state;
ether_addr_copy(e->h_dest, n->ha);
read_unlock_bh(&n->lock);
/* add ethernet header */
eth = (struct ethhdr *)encap_header;
ether_addr_copy(eth->h_dest, e->h_dest);
ether_addr_copy(eth->h_source, out_dev->dev_addr);
eth->h_proto = htons(ETH_P_IPV6);
/* add ip header */
ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
ip6_flow_hdr(ip6h, tun_key->tos, 0);
/* the HW fills up ipv6 payload len */
ip6h->hop_limit = ttl;
ip6h->daddr = fl6.daddr;
ip6h->saddr = fl6.saddr;
/* add tunneling protocol header */
err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
&ip6h->nexthdr, e);
if (err)
goto destroy_neigh_entry;
e->encap_size = ipv6_encap_size;
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
neigh_event_send(n, NULL);
err = -EAGAIN;
goto out;
}
err = mlx5_packet_reformat_alloc(priv->mdev,
e->reformat_type,
ipv6_encap_size, encap_header,
MLX5_FLOW_NAMESPACE_FDB,
&e->encap_id);
if (err)
goto destroy_neigh_entry;
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
neigh_release(n);
return err;
destroy_neigh_entry:
mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
free_encap:
kfree(encap_header);
out:
if (n)
neigh_release(n);
return err;
}
int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev)
{
if (netif_is_vxlan(tunnel_dev))
return MLX5E_TC_TUNNEL_TYPE_VXLAN;
else if (netif_is_gretap(tunnel_dev) ||
netif_is_ip6gretap(tunnel_dev))
return MLX5E_TC_TUNNEL_TYPE_GRETAP;
else
return MLX5E_TC_TUNNEL_TYPE_UNKNOWN;
}
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev)
{
int tunnel_type = mlx5e_tc_tun_get_type(netdev);
if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN &&
MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
return true;
else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP &&
MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap))
return true;
else
return false;
}
int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e,
struct netlink_ext_ack *extack)
{
e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev);
if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
int dst_port = be16_to_cpu(e->tun_info.key.tp_dst);
if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
NL_SET_ERR_MSG_MOD(extack,
"vxlan udp dport was not registered with the HW");
netdev_warn(priv->netdev,
"%d isn't an offloaded vxlan udp dport\n",
dst_port);
return -EOPNOTSUPP;
}
e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
e->tunnel_hlen = VXLAN_HLEN;
} else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags);
} else {
e->reformat_type = -1;
e->tunnel_hlen = -1;
return -EOPNOTSUPP;
}
return 0;
}
static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f,
void *headers_c,
void *headers_v)
{
struct netlink_ext_ack *extack = f->common.extack;
struct flow_dissector_key_ports *key =
skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_ENC_PORTS,
f->key);
struct flow_dissector_key_ports *mask =
skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_ENC_PORTS,
f->mask);
void *misc_c = MLX5_ADDR_OF(fte_match_param,
spec->match_criteria,
misc_parameters);
void *misc_v = MLX5_ADDR_OF(fte_match_param,
spec->match_value,
misc_parameters);
/* Full udp dst port must be given */
if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) ||
memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) {
NL_SET_ERR_MSG_MOD(extack,
"VXLAN decap filter must include enc_dst_port condition");
netdev_warn(priv->netdev,
"VXLAN decap filter must include enc_dst_port condition\n");
return -EOPNOTSUPP;
}
/* udp dst port must be knonwn as a VXLAN port */
if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst))) {
NL_SET_ERR_MSG_MOD(extack,
"Matched UDP port is not registered as a VXLAN port");
netdev_warn(priv->netdev,
"UDP port %d is not registered as a VXLAN port\n",
be16_to_cpu(key->dst));
return -EOPNOTSUPP;
}
/* dst UDP port is valid here */
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst));
MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst));
MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(mask->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src));
/* match on VNI */
if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
struct flow_dissector_key_keyid *key =
skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_ENC_KEYID,
f->key);
struct flow_dissector_key_keyid *mask =
skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_ENC_KEYID,
f->mask);
MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
be32_to_cpu(mask->keyid));
MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
be32_to_cpu(key->keyid));
}
return 0;
}
static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f,
void *outer_headers_c,
void *outer_headers_v)
{
void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
misc_parameters);
void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
misc_parameters);
if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) {
NL_SET_ERR_MSG_MOD(f->common.extack,
"GRE HW offloading is not supported");
netdev_warn(priv->netdev, "GRE HW offloading is not supported\n");
return -EOPNOTSUPP;
}
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
ip_protocol, IPPROTO_GRE);
/* gre protocol*/
MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
/* gre key */
if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
struct flow_dissector_key_keyid *mask = NULL;
struct flow_dissector_key_keyid *key = NULL;
mask = skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_ENC_KEYID,
f->mask);
MLX5_SET(fte_match_set_misc, misc_c,
gre_key.key, be32_to_cpu(mask->keyid));
key = skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_ENC_KEYID,
f->key);
MLX5_SET(fte_match_set_misc, misc_v,
gre_key.key, be32_to_cpu(key->keyid));
}
return 0;
}
int mlx5e_tc_tun_parse(struct net_device *filter_dev,
struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f,
void *headers_c,
void *headers_v)
{
int tunnel_type;
int err = 0;
tunnel_type = mlx5e_tc_tun_get_type(filter_dev);
if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
err = mlx5e_tc_tun_parse_vxlan(priv, spec, f,
headers_c, headers_v);
} else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
err = mlx5e_tc_tun_parse_gretap(priv, spec, f,
headers_c, headers_v);
} else {
netdev_warn(priv->netdev,
"decapsulation offload is not supported for %s net device (%d)\n",
mlx5e_netdev_kind(filter_dev), tunnel_type);
return -EOPNOTSUPP;
}
return err;
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2018 Mellanox Technologies. */
#ifndef __MLX5_EN_TC_TUNNEL_H__
#define __MLX5_EN_TC_TUNNEL_H__
#include <linux/netdevice.h>
#include <linux/mlx5/fs.h>
#include <net/pkt_cls.h>
#include <linux/netlink.h>
#include "en.h"
#include "en_rep.h"
enum {
MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
MLX5E_TC_TUNNEL_TYPE_VXLAN,
MLX5E_TC_TUNNEL_TYPE_GRETAP
};
int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e,
struct netlink_ext_ack *extack);
int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e);
int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e);
int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev);
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev);
int mlx5e_tc_tun_parse(struct net_device *filter_dev,
struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f,
void *headers_c,
void *headers_v);
#endif //__MLX5_EN_TC_TUNNEL_H__
......@@ -49,6 +49,7 @@
#include "lib/clock.h"
#include "en/port.h"
#include "en/xdp.h"
#include "lib/eq.h"
struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
......@@ -293,33 +294,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->update_stats_work);
}
static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
enum mlx5_dev_event event, unsigned long param)
static int async_event(struct notifier_block *nb, unsigned long event, void *data)
{
struct mlx5e_priv *priv = vpriv;
struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
struct mlx5_eqe *eqe = data;
if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
return;
if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
return NOTIFY_DONE;
switch (event) {
case MLX5_DEV_EVENT_PORT_UP:
case MLX5_DEV_EVENT_PORT_DOWN:
switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
queue_work(priv->wq, &priv->update_carrier_work);
break;
default:
break;
return NOTIFY_DONE;
}
return NOTIFY_OK;
}
static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
{
set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
priv->events_nb.notifier_call = async_event;
mlx5_notifier_register(priv->mdev, &priv->events_nb);
}
static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
{
clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
}
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
......@@ -1763,11 +1766,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
mlx5e_free_cq(cq);
}
static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
{
return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
}
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam)
......@@ -1918,9 +1916,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_channel_param *cparam,
struct mlx5e_channel **cp)
{
int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
struct net_dim_cq_moder icocq_moder = {0, 0};
struct net_device *netdev = priv->netdev;
int cpu = mlx5e_get_cpu(priv, ix);
struct mlx5e_channel *c;
unsigned int irq;
int err;
......@@ -3388,11 +3386,14 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
{
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return mlx5e_configure_flower(priv, cls_flower, flags);
return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
flags);
case TC_CLSFLOWER_DESTROY:
return mlx5e_delete_flower(priv, cls_flower, flags);
return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
flags);
case TC_CLSFLOWER_STATS:
return mlx5e_stats_flower(priv, cls_flower, flags);
return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
flags);
default:
return -EOPNOTSUPP;
}
......@@ -4137,17 +4138,17 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
struct mlx5e_txqsq *sq)
{
struct mlx5_eq *eq = sq->cq.mcq.eq;
struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
u32 eqe_count;
netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
eq->eqn, eq->cons_index, eq->irqn);
eq->core.eqn, eq->core.cons_index, eq->core.irqn);
eqe_count = mlx5_eq_poll_irq_disabled(eq);
if (!eqe_count)
return false;
netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn);
sq->channel->stats->eq_rearm++;
return true;
}
......@@ -4988,7 +4989,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
netif_carrier_off(netdev);
#ifdef CONFIG_MLX5_EN_ARFS
netdev->rx_cpu_rmap = mdev->rmap;
netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev);
#endif
return 0;
......@@ -5200,21 +5201,12 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
kfree(ppriv);
}
static void *mlx5e_get_netdev(void *vpriv)
{
struct mlx5e_priv *priv = vpriv;
return priv->netdev;
}
static struct mlx5_interface mlx5e_interface = {
.add = mlx5e_add,
.remove = mlx5e_remove,
.attach = mlx5e_attach,
.detach = mlx5e_detach,
.event = mlx5e_async_event,
.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
.get_dev = mlx5e_get_netdev,
};
void mlx5e_init(void)
......
......@@ -42,6 +42,7 @@
#include "en.h"
#include "en_rep.h"
#include "en_tc.h"
#include "en/tc_tun.h"
#include "fs_core.h"
#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
......@@ -49,6 +50,15 @@
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
struct mlx5e_rep_indr_block_priv {
struct net_device *netdev;
struct mlx5e_rep_priv *rpriv;
struct list_head list;
};
static void mlx5e_rep_indr_unregister_block(struct net_device *netdev);
static void mlx5e_rep_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
......@@ -518,6 +528,186 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
neigh_release(n);
}
static struct mlx5e_rep_indr_block_priv *
mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
struct net_device *netdev)
{
struct mlx5e_rep_indr_block_priv *cb_priv;
/* All callback list access should be protected by RTNL. */
ASSERT_RTNL();
list_for_each_entry(cb_priv,
&rpriv->uplink_priv.tc_indr_block_priv_list,
list)
if (cb_priv->netdev == netdev)
return cb_priv;
return NULL;
}
static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
{
struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
list_for_each_entry_safe(cb_priv, temp, head, list) {
mlx5e_rep_indr_unregister_block(cb_priv->netdev);
kfree(cb_priv);
}
}
static int
mlx5e_rep_indr_offload(struct net_device *netdev,
struct tc_cls_flower_offload *flower,
struct mlx5e_rep_indr_block_priv *indr_priv)
{
int err = 0;
struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
switch (flower->command) {
case TC_CLSFLOWER_REPLACE:
err = mlx5e_configure_flower(netdev, priv,
flower, MLX5E_TC_EGRESS);
break;
case TC_CLSFLOWER_DESTROY:
err = mlx5e_delete_flower(netdev, priv,
flower, MLX5E_TC_EGRESS);
break;
case TC_CLSFLOWER_STATS:
err = mlx5e_stats_flower(netdev, priv,
flower, MLX5E_TC_EGRESS);
break;
default:
err = -EOPNOTSUPP;
}
return err;
}
static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type,
void *type_data, void *indr_priv)
{
struct mlx5e_rep_indr_block_priv *priv = indr_priv;
switch (type) {
case TC_SETUP_CLSFLOWER:
return mlx5e_rep_indr_offload(priv->netdev, type_data, priv);
default:
return -EOPNOTSUPP;
}
}
static int
mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
struct mlx5e_rep_priv *rpriv,
struct tc_block_offload *f)
{
struct mlx5e_rep_indr_block_priv *indr_priv;
int err = 0;
if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
return -EOPNOTSUPP;
switch (f->command) {
case TC_BLOCK_BIND:
indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
if (indr_priv)
return -EEXIST;
indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
if (!indr_priv)
return -ENOMEM;
indr_priv->netdev = netdev;
indr_priv->rpriv = rpriv;
list_add(&indr_priv->list,
&rpriv->uplink_priv.tc_indr_block_priv_list);
err = tcf_block_cb_register(f->block,
mlx5e_rep_indr_setup_block_cb,
netdev, indr_priv, f->extack);
if (err) {
list_del(&indr_priv->list);
kfree(indr_priv);
}
return err;
case TC_BLOCK_UNBIND:
tcf_block_cb_unregister(f->block,
mlx5e_rep_indr_setup_block_cb,
netdev);
indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
if (indr_priv) {
list_del(&indr_priv->list);
kfree(indr_priv);
}
return 0;
default:
return -EOPNOTSUPP;
}
return 0;
}
static
int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv,
enum tc_setup_type type, void *type_data)
{
switch (type) {
case TC_SETUP_BLOCK:
return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv,
type_data);
default:
return -EOPNOTSUPP;
}
}
static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
struct net_device *netdev)
{
int err;
err = __tc_indr_block_cb_register(netdev, rpriv,
mlx5e_rep_indr_setup_tc_cb,
netdev);
if (err) {
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
netdev_name(netdev), err);
}
return err;
}
static void mlx5e_rep_indr_unregister_block(struct net_device *netdev)
{
__tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
netdev);
}
static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
uplink_priv.netdevice_nb);
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
if (!mlx5e_tc_tun_device_to_offload(priv, netdev))
return NOTIFY_OK;
switch (event) {
case NETDEV_REGISTER:
mlx5e_rep_indr_register_block(rpriv, netdev);
break;
case NETDEV_UNREGISTER:
mlx5e_rep_indr_unregister_block(netdev);
break;
}
return NOTIFY_OK;
}
static struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh);
......@@ -838,24 +1028,14 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
{
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return mlx5e_configure_flower(priv, cls_flower, flags);
return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
flags);
case TC_CLSFLOWER_DESTROY:
return mlx5e_delete_flower(priv, cls_flower, flags);
return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
flags);
case TC_CLSFLOWER_STATS:
return mlx5e_stats_flower(priv, cls_flower, flags);
default:
return -EOPNOTSUPP;
}
}
static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
void *cb_priv)
{
struct mlx5e_priv *priv = cb_priv;
switch (type) {
case TC_SETUP_CLSFLOWER:
return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
flags);
default:
return -EOPNOTSUPP;
}
......@@ -1244,7 +1424,7 @@ mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
int err;
if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
......@@ -1258,12 +1438,23 @@ mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
goto err_remove_sqs;
/* init shared tc flow table */
err = mlx5e_tc_esw_init(&rpriv->tc_ht);
err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
if (err)
goto err_neigh_cleanup;
/* init indirect block notifications */
INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
err = register_netdevice_notifier(&uplink_priv->netdevice_nb);
if (err) {
mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
goto err_indirect_block_cleanup;
}
return 0;
err_indirect_block_cleanup:
mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
err_neigh_cleanup:
mlx5e_rep_neigh_cleanup(rpriv);
err_remove_sqs:
......@@ -1280,8 +1471,12 @@ mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
if (test_bit(MLX5E_STATE_OPENED, &priv->state))
mlx5e_remove_sqs_fwd_rules(priv);
/* clean indirect TC block notifications */
unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb);
mlx5e_rep_indr_clean_block_privs(rpriv);
/* clean uplink offloaded TC rules, delete shared tc flow table */
mlx5e_tc_esw_cleanup(&rpriv->tc_ht);
mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
mlx5e_rep_neigh_cleanup(rpriv);
}
......@@ -1329,24 +1524,16 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH);
upriv = netdev_priv(uplink_rpriv->netdev);
err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev,
upriv);
if (err)
goto err_neigh_cleanup;
err = register_netdev(netdev);
if (err) {
pr_warn("Failed to register representor netdev for vport %d\n",
rep->vport);
goto err_egdev_cleanup;
goto err_neigh_cleanup;
}
return 0;
err_egdev_cleanup:
tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
upriv);
err_neigh_cleanup:
mlx5e_rep_neigh_cleanup(rpriv);
......@@ -1373,8 +1560,6 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch,
REP_ETH);
upriv = netdev_priv(uplink_rpriv->netdev);
tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
upriv);
mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_detach_netdev(priv);
mlx5e_destroy_netdev(priv);
......
......@@ -53,13 +53,33 @@ struct mlx5e_neigh_update_table {
unsigned long min_interval; /* jiffies */
};
struct mlx5_rep_uplink_priv {
/* Filters DB - instantiated by the uplink representor and shared by
* the uplink's VFs
*/
struct rhashtable tc_ht;
/* indirect block callbacks are invoked on bind/unbind events
* on registered higher level devices (e.g. tunnel devices)
*
* tc_indr_block_cb_priv_list is used to lookup indirect callback
* private data
*
* netdevice_nb is the netdev events notifier - used to register
* tunnel devices for block events
*
*/
struct list_head tc_indr_block_priv_list;
struct notifier_block netdevice_nb;
};
struct mlx5e_rep_priv {
struct mlx5_eswitch_rep *rep;
struct mlx5e_neigh_update_table neigh_update;
struct net_device *netdev;
struct mlx5_flow_handle *vport_rx_rule;
struct list_head vport_sqs_list;
struct rhashtable tc_ht; /* valid for uplink rep */
struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
};
static inline
......@@ -129,6 +149,8 @@ struct mlx5e_encap_entry {
struct net_device *out_dev;
int tunnel_type;
int tunnel_hlen;
int reformat_type;
u8 flags;
char *encap_header;
int encap_size;
......
......@@ -554,9 +554,9 @@ static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
mlx5_cqwq_pop(&cq->wq);
if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
netdev_WARN_ONCE(cq->channel->netdev,
"Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own);
"Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe));
return;
}
......@@ -898,7 +898,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
prefetchw(va); /* xdp_frame data area */
prefetch(data);
if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
return NULL;
}
......@@ -930,7 +930,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
u16 byte_cnt = cqe_bcnt - headlen;
struct sk_buff *skb;
if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
return NULL;
}
......@@ -1154,7 +1154,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
wi->consumed_strides += cstrides;
if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
goto mpwrq_cqe_out;
}
......
......@@ -30,6 +30,7 @@
* SOFTWARE.
*/
#include "lib/mlx5.h"
#include "en.h"
#include "en_accel/ipsec.h"
#include "en_accel/tls.h"
......@@ -1088,13 +1089,13 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
}
static const struct counter_desc mlx5e_pme_status_desc[] = {
{ "module_unplug", 8 },
{ "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED },
};
static const struct counter_desc mlx5e_pme_error_desc[] = {
{ "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */
{ "module_high_temp", 48 }, /* high temperature */
{ "module_bad_shorted", 56 }, /* bad or shorted cable/module */
{ "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK },
{ "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE },
{ "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE },
};
#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc)
......@@ -1122,15 +1123,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
int idx)
{
struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
struct mlx5_pme_stats pme_stats;
int i;
mlx5_get_pme_stats(priv->mdev, &pme_stats);
for (i = 0; i < NUM_PME_STATUS_STATS; i++)
data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters,
mlx5e_pme_status_desc, i);
for (i = 0; i < NUM_PME_ERR_STATS; i++)
data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters,
mlx5e_pme_error_desc, i);
return idx;
......
......@@ -51,12 +51,12 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
int mlx5e_configure_flower(struct mlx5e_priv *priv,
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
int mlx5e_delete_flower(struct mlx5e_priv *priv,
int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
int mlx5e_stats_flower(struct mlx5e_priv *priv,
int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
struct mlx5e_encap_entry;
......@@ -70,6 +70,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
int mlx5e_tc_num_filters(struct mlx5e_priv *priv);
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
......
......@@ -507,7 +507,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
&sq->state)) {
mlx5e_dump_error_cqe(sq,
......
......@@ -36,8 +36,10 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
#include "lib/eq.h"
#include "eswitch.h"
#include "fs_core.h"
#include "lib/eq.h"
#define UPLINK_VPORT 0xFFFF
......@@ -1567,7 +1569,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
/* Mark this vport as disabled to discard new events */
vport->enabled = false;
synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
/* Wait for current already scheduled events to complete */
flush_workqueue(esw->work_queue);
/* Disable events from this vport */
......@@ -1593,10 +1594,25 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
mutex_unlock(&esw->state_lock);
}
static int eswitch_vport_event(struct notifier_block *nb,
unsigned long type, void *data)
{
struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb);
struct mlx5_eqe *eqe = data;
struct mlx5_vport *vport;
u16 vport_num;
vport_num = be16_to_cpu(eqe->data.vport_change.vport_num);
vport = &esw->vports[vport_num];
if (vport->enabled)
queue_work(esw->work_queue, &vport->vport_change_handler);
return NOTIFY_OK;
}
/* Public E-Switch API */
#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
{
int err;
......@@ -1640,6 +1656,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
for (i = 0; i <= nvfs; i++)
esw_enable_vport(esw, i, enabled_events);
if (mode == SRIOV_LEGACY) {
MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
mlx5_eq_notifier_register(esw->dev, &esw->nb);
}
esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
esw->enabled_vports);
return 0;
......@@ -1669,6 +1690,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
mc_promisc = &esw->mc_promisc;
nvports = esw->enabled_vports;
if (esw->mode == SRIOV_LEGACY)
mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
for (i = 0; i < esw->total_vports; i++)
esw_disable_vport(esw, i);
......@@ -1777,23 +1801,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
kfree(esw);
}
void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
{
struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
struct mlx5_vport *vport;
if (!esw) {
pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
vport_num);
return;
}
vport = &esw->vports[vport_num];
if (vport->enabled)
queue_work(esw->work_queue, &vport->vport_change_handler);
}
/* Vport Administration */
#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
......
......@@ -181,6 +181,7 @@ struct esw_mc_addr { /* SRIOV only */
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
struct mlx5_eswitch_fdb fdb_table;
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
struct workqueue_struct *work_queue;
......@@ -211,7 +212,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
......@@ -352,7 +352,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
......
......@@ -125,8 +125,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
dest[i].vport.num = attr->out_rep[j]->vport;
dest[i].vport.vhca_id =
MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
dest[i].vport.vhca_id_valid =
!!MLX5_CAP_ESW(esw->dev, merged_eswitch);
if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
dest[i].vport.flags |=
MLX5_FLOW_DEST_VPORT_VHCA_ID;
i++;
}
}
......@@ -220,7 +221,8 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
dest[i].vport.num = attr->out_rep[i]->vport;
dest[i].vport.vhca_id =
MLX5_CAP_GEN(attr->out_mdev[i], vhca_id);
dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
}
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[i].ft = fwd_fdb,
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2018 Mellanox Technologies
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
#include "lib/eq.h"
#include "lib/mlx5.h"
struct mlx5_event_nb {
struct mlx5_nb nb;
void *ctx;
};
/* General events handlers for the low level mlx5_core driver
*
* Other Major feature specific events such as
* clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
* separate notifiers callbacks, specifically by those mlx5 components.
*/
static int any_notifier(struct notifier_block *, unsigned long, void *);
static int temp_warn(struct notifier_block *, unsigned long, void *);
static int port_module(struct notifier_block *, unsigned long, void *);
/* handler which forwards the event to events->nh, driver notifiers */
static int forward_event(struct notifier_block *, unsigned long, void *);
static struct mlx5_nb events_nbs_ref[] = {
/* Events to be proccessed by mlx5_core */
{.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
{.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
{.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
/* QP/WQ resource events to forward */
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
/* SRQ events */
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
};
struct mlx5_events {
struct mlx5_core_dev *dev;
struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
/* driver notifier chain */
struct atomic_notifier_head nh;
/* port module events stats */
struct mlx5_pme_stats pme_stats;
};
static const char *eqe_type_str(u8 type)
{
switch (type) {
case MLX5_EVENT_TYPE_COMP:
return "MLX5_EVENT_TYPE_COMP";
case MLX5_EVENT_TYPE_PATH_MIG:
return "MLX5_EVENT_TYPE_PATH_MIG";
case MLX5_EVENT_TYPE_COMM_EST:
return "MLX5_EVENT_TYPE_COMM_EST";
case MLX5_EVENT_TYPE_SQ_DRAINED:
return "MLX5_EVENT_TYPE_SQ_DRAINED";
case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
case MLX5_EVENT_TYPE_CQ_ERROR:
return "MLX5_EVENT_TYPE_CQ_ERROR";
case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
case MLX5_EVENT_TYPE_INTERNAL_ERROR:
return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
case MLX5_EVENT_TYPE_PORT_CHANGE:
return "MLX5_EVENT_TYPE_PORT_CHANGE";
case MLX5_EVENT_TYPE_GPIO_EVENT:
return "MLX5_EVENT_TYPE_GPIO_EVENT";
case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
case MLX5_EVENT_TYPE_REMOTE_CONFIG:
return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
case MLX5_EVENT_TYPE_STALL_EVENT:
return "MLX5_EVENT_TYPE_STALL_EVENT";
case MLX5_EVENT_TYPE_CMD:
return "MLX5_EVENT_TYPE_CMD";
case MLX5_EVENT_TYPE_PAGE_REQUEST:
return "MLX5_EVENT_TYPE_PAGE_REQUEST";
case MLX5_EVENT_TYPE_PAGE_FAULT:
return "MLX5_EVENT_TYPE_PAGE_FAULT";
case MLX5_EVENT_TYPE_PPS_EVENT:
return "MLX5_EVENT_TYPE_PPS_EVENT";
case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
case MLX5_EVENT_TYPE_FPGA_ERROR:
return "MLX5_EVENT_TYPE_FPGA_ERROR";
case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
case MLX5_EVENT_TYPE_GENERAL_EVENT:
return "MLX5_EVENT_TYPE_GENERAL_EVENT";
case MLX5_EVENT_TYPE_MONITOR_COUNTER:
return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
case MLX5_EVENT_TYPE_DEVICE_TRACER:
return "MLX5_EVENT_TYPE_DEVICE_TRACER";
default:
return "Unrecognized event";
}
}
/* handles all FW events, type == eqe->type */
static int any_notifier(struct notifier_block *nb,
unsigned long type, void *data)
{
struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
struct mlx5_events *events = event_nb->ctx;
struct mlx5_eqe *eqe = data;
mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
eqe_type_str(eqe->type), eqe->sub_type);
return NOTIFY_OK;
}
/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
{
struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
struct mlx5_events *events = event_nb->ctx;
struct mlx5_eqe *eqe = data;
u64 value_lsb;
u64 value_msb;
value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
mlx5_core_warn(events->dev,
"High temperature on sensors with bit set %llx %llx",
value_msb, value_lsb);
return NOTIFY_OK;
}
/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
{
switch (status) {
case MLX5_MODULE_STATUS_PLUGGED:
return "Cable plugged";
case MLX5_MODULE_STATUS_UNPLUGGED:
return "Cable unplugged";
case MLX5_MODULE_STATUS_ERROR:
return "Cable error";
case MLX5_MODULE_STATUS_DISABLED:
return "Cable disabled";
default:
return "Unknown status";
}
}
static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
{
switch (error) {
case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
return "Power budget exceeded";
case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
return "Long Range for non MLNX cable";
case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
return "Bus stuck (I2C or data shorted)";
case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
return "No EEPROM/retry timeout";
case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
return "Enforce part number list";
case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
return "Unknown identifier";
case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
return "High Temperature";
case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
return "Bad or shorted cable/module";
case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
default:
return "Unknown error";
}
}
/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
static int port_module(struct notifier_block *nb, unsigned long type, void *data)
{
struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
struct mlx5_events *events = event_nb->ctx;
struct mlx5_eqe *eqe = data;
enum port_module_event_status_type module_status;
enum port_module_event_error_type error_type;
struct mlx5_eqe_port_module *module_event_eqe;
const char *status_str, *error_str;
u8 module_num;
module_event_eqe = &eqe->data.port_module;
module_num = module_event_eqe->module;
module_status = module_event_eqe->module_status &
PORT_MODULE_EVENT_MODULE_STATUS_MASK;
error_type = module_event_eqe->error_type &
PORT_MODULE_EVENT_ERROR_TYPE_MASK;
if (module_status < MLX5_MODULE_STATUS_NUM)
events->pme_stats.status_counters[module_status]++;
status_str = mlx5_pme_status_to_string(module_status);
if (module_status == MLX5_MODULE_STATUS_ERROR) {
if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
events->pme_stats.error_counters[error_type]++;
error_str = mlx5_pme_error_to_string(error_type);
}
if (!printk_ratelimit())
return NOTIFY_OK;
if (module_status == MLX5_MODULE_STATUS_ERROR)
mlx5_core_err(events->dev,
"Port module event[error]: module %u, %s, %s\n",
module_num, status_str, error_str);
else
mlx5_core_info(events->dev,
"Port module event: module %u, %s\n",
module_num, status_str);
return NOTIFY_OK;
}
void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
{
*stats = dev->priv.events->pme_stats;
}
/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
{
struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
struct mlx5_events *events = event_nb->ctx;
struct mlx5_eqe *eqe = data;
mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
eqe_type_str(eqe->type), eqe->sub_type);
atomic_notifier_call_chain(&events->nh, event, data);
return NOTIFY_OK;
}
int mlx5_events_init(struct mlx5_core_dev *dev)
{
struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
if (!events)
return -ENOMEM;
ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
events->dev = dev;
dev->priv.events = events;
return 0;
}
void mlx5_events_cleanup(struct mlx5_core_dev *dev)
{
kvfree(dev->priv.events);
}
void mlx5_events_start(struct mlx5_core_dev *dev)
{
struct mlx5_events *events = dev->priv.events;
int i;
for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
events->notifiers[i].nb = events_nbs_ref[i];
events->notifiers[i].ctx = events;
mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
}
}
void mlx5_events_stop(struct mlx5_core_dev *dev)
{
struct mlx5_events *events = dev->priv.events;
int i;
for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
}
int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
{
struct mlx5_events *events = dev->priv.events;
return atomic_notifier_chain_register(&events->nh, nb);
}
EXPORT_SYMBOL(mlx5_notifier_register);
int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
{
struct mlx5_events *events = dev->priv.events;
return atomic_notifier_chain_unregister(&events->nh, nb);
}
EXPORT_SYMBOL(mlx5_notifier_unregister);
int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
{
return atomic_notifier_call_chain(&events->nh, event, data);
}
......@@ -334,7 +334,7 @@ static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
{
u8 opcode, status = 0;
opcode = cqe->op_own >> 4;
opcode = get_cqe_opcode(cqe);
switch (opcode) {
case MLX5_CQE_REQ_ERR:
......
......@@ -36,6 +36,7 @@
#include "mlx5_core.h"
#include "lib/mlx5.h"
#include "lib/eq.h"
#include "fpga/core.h"
#include "fpga/conn.h"
......@@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
return 0;
}
static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
{
struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
return mlx5_fpga_event(fdev, event, eqe);
}
static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
{
struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
return mlx5_fpga_event(fdev, event, eqe);
}
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
{
struct mlx5_fpga_device *fdev = mdev->fpga;
......@@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
if (err)
goto out;
MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
err = mlx5_fpga_conn_device_init(fdev);
if (err)
goto err_rsvd_gid;
......@@ -201,6 +223,8 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
mlx5_fpga_conn_device_cleanup(fdev);
err_rsvd_gid:
mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
mlx5_core_unreserve_gids(mdev, max_num_qps);
out:
spin_lock_irqsave(&fdev->state_lock, flags);
......@@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
}
mlx5_fpga_conn_device_cleanup(fdev);
mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
mlx5_core_unreserve_gids(mdev, max_num_qps);
}
......@@ -283,9 +310,10 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
return "Unknown";
}
void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
unsigned long event, void *eqe)
{
struct mlx5_fpga_device *fdev = mdev->fpga;
void *data = ((struct mlx5_eqe *)eqe)->data.raw;
const char *event_name;
bool teardown = false;
unsigned long flags;
......@@ -303,9 +331,7 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
break;
default:
mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
event);
return;
return NOTIFY_DONE;
}
spin_lock_irqsave(&fdev->state_lock, flags);
......@@ -326,4 +352,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
*/
if (teardown)
mlx5_trigger_health_work(fdev->mdev);
return NOTIFY_OK;
}
......@@ -35,11 +35,16 @@
#ifdef CONFIG_MLX5_FPGA
#include <linux/mlx5/eq.h>
#include "lib/eq.h"
#include "fpga/cmd.h"
/* Represents an Innova device */
struct mlx5_fpga_device {
struct mlx5_core_dev *mdev;
struct mlx5_nb fpga_err_nb;
struct mlx5_nb fpga_qp_err_nb;
spinlock_t state_lock; /* Protects state transitions */
enum mlx5_fpga_status state;
enum mlx5_fpga_image last_admin_image;
......@@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev);
void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
#else
......@@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
{
}
static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
void *data)
{
}
#endif
#endif /* __MLX5_FPGA_CORE_H__ */
......@@ -308,22 +308,68 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
struct fs_fte *fte, bool *extended_dest)
{
int fw_log_max_fdb_encap_uplink =
MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink);
int num_fwd_destinations = 0;
struct mlx5_flow_rule *dst;
int num_encap = 0;
*extended_dest = false;
if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
return 0;
list_for_each_entry(dst, &fte->node.children, node.list) {
if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
continue;
if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
num_encap++;
num_fwd_destinations++;
}
if (num_fwd_destinations > 1 && num_encap > 0)
*extended_dest = true;
if (*extended_dest && !fw_log_max_fdb_encap_uplink) {
mlx5_core_warn(dev, "FW does not support extended destination");
return -EOPNOTSUPP;
}
if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) {
mlx5_core_warn(dev, "FW does not support more than %d encaps",
1 << fw_log_max_fdb_encap_uplink);
return -EOPNOTSUPP;
}
return 0;
}
static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
int opmod, int modify_mask,
struct mlx5_flow_table *ft,
unsigned group_id,
struct fs_fte *fte)
{
unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
bool extended_dest = false;
struct mlx5_flow_rule *dst;
void *in_flow_context, *vlan;
void *in_match_value;
unsigned int inlen;
int dst_cnt_size;
void *in_dests;
u32 *in;
int err;
if (mlx5_set_extended_dest(dev, fte, &extended_dest))
return -EOPNOTSUPP;
if (!extended_dest)
dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct);
else
dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
......@@ -343,9 +389,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_context, in_flow_context, group_id, group_id);
MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
fte->action.reformat_id);
MLX5_SET(flow_context, in_flow_context, extended_destination,
extended_dest);
if (extended_dest) {
u32 action;
action = fte->action.action &
~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
MLX5_SET(flow_context, in_flow_context, action, action);
} else {
MLX5_SET(flow_context, in_flow_context, action,
fte->action.action);
MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
fte->action.reformat_id);
}
MLX5_SET(flow_context, in_flow_context, modify_header_id,
fte->action.modify_id);
......@@ -387,10 +444,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
id = dst->dest_attr.vport.num;
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id_valid,
dst->dest_attr.vport.vhca_id_valid);
!!(dst->dest_attr.vport.flags &
MLX5_FLOW_DEST_VPORT_VHCA_ID));
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id,
dst->dest_attr.vport.vhca_id);
if (extended_dest) {
MLX5_SET(dest_format_struct, in_dests,
packet_reformat,
!!(dst->dest_attr.vport.flags &
MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
MLX5_SET(extended_dest_format, in_dests,
packet_reformat_id,
dst->dest_attr.vport.reformat_id);
}
break;
default:
id = dst->dest_attr.tir_num;
......@@ -399,7 +466,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(dest_format_struct, in_dests, destination_type,
type);
MLX5_SET(dest_format_struct, in_dests, destination_id, id);
in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
in_dests += dst_cnt_size;
list_size++;
}
......@@ -420,7 +487,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
dst->dest_attr.counter_id);
in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
in_dests += dst_cnt_size;
list_size++;
}
if (list_size > max_list_size) {
......
......@@ -145,29 +145,6 @@ struct mlx5_flow_table {
struct rhltable fgs_hash;
};
struct mlx5_fc_cache {
u64 packets;
u64 bytes;
u64 lastuse;
};
struct mlx5_fc {
struct list_head list;
struct llist_node addlist;
struct llist_node dellist;
/* last{packets,bytes} members are used when calculating the delta since
* last reading
*/
u64 lastpackets;
u64 lastbytes;
u32 id;
bool aging;
struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
};
struct mlx5_ft_underlay_qp {
struct list_head list;
u32 qpn;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册