提交 e4952747 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This cycle we got a new RDMA driver "ERDMA" for the Alibaba cloud
  environment. Otherwise the changes are dominated by rxe fixes.

  There is another RDMA driver on the list that might get merged next
  cycle, 'MANA' for the Azure cloud environment.

  Summary:

   - Bug fixes and small features for irdma, hns, siw, qedr, hfi1, mlx5

   - General spelling/grammer fixes

   - rdma cm can follow changes in neighbours for control packets

   - Significant amounts of rxe fixes and spec compliance changes

   - Use the modern NAPI API

   - Use the bitmap API instead of open coding

   - Performance improvements for rtrs

   - Add the ERDMA driver for Alibaba cloud

   - Fix a use after free bug in SRP"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (99 commits)
  RDMA/ib_srpt: Unify checking rdma_cm_id condition in srpt_cm_req_recv()
  RDMA/rxe: Fix error unwind in rxe_create_qp()
  RDMA/mlx5: Add missing check for return value in get namespace flow
  RDMA/rxe: Split qp state for requester and completer
  RDMA/rxe: Generate error completion for error requester QP state
  RDMA/rxe: Update wqe_index for each wqe error completion
  RDMA/srpt: Fix a use-after-free
  RDMA/srpt: Introduce a reference count in struct srpt_device
  RDMA/srpt: Duplicate port name members
  IB/qib: Fix repeated "in" within comments
  RDMA/erdma: Add driver to kernel build environment
  RDMA/erdma: Add the ABI definitions
  RDMA/erdma: Add the erdma module
  RDMA/erdma: Add connection management (CM) support
  RDMA/erdma: Add verbs implementation
  RDMA/erdma: Add verbs header file
  RDMA/erdma: Add event queue implementation
  RDMA/erdma: Add cmdq implementation
  RDMA/erdma: Add main include file
  RDMA/erdma: Add the hardware related definitions
  ...
......@@ -736,6 +736,14 @@ S: Maintained
F: Documentation/i2c/busses/i2c-ali1563.rst
F: drivers/i2c/busses/i2c-ali1563.c
ALIBABA ELASTIC RDMA DRIVER
M: Cheng Xu <chengyou@linux.alibaba.com>
M: Kai Shen <kaishen@linux.alibaba.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/erdma
F: include/uapi/rdma/erdma-abi.h
ALIENWARE WMI DRIVER
L: Dell.Client.Kernel@dell.com
S: Maintained
......
......@@ -78,20 +78,21 @@ config INFINIBAND_VIRT_DMA
def_bool !HIGHMEM
if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
source "drivers/infiniband/hw/erdma/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/irdma/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/sw/siw/Kconfig"
......
......@@ -11,6 +11,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/rbtree.h>
#include <linux/igmp.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
......@@ -20,6 +21,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
......@@ -168,6 +170,9 @@ static struct ib_sa_client sa_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct rb_root id_table = RB_ROOT;
/* Serialize operations of id_table tree */
static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;
......@@ -202,6 +207,11 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
}
}
struct id_table_entry {
struct list_head id_list;
struct rb_node rb_node;
};
struct cma_device {
struct list_head list;
struct ib_device *device;
......@@ -420,11 +430,21 @@ static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
return hdr->ip_version >> 4;
}
static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
{
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
{
return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
}
static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
{
return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
}
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
struct in_device *in_dev = NULL;
......@@ -445,6 +465,117 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
return (in_dev) ? 0 : -ENODEV;
}
static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
struct id_table_entry *entry_b)
{
struct rdma_id_private *id_priv = list_first_entry(
&entry_b->id_list, struct rdma_id_private, id_list_entry);
int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
struct sockaddr *sb = cma_dst_addr(id_priv);
if (ifindex_a != ifindex_b)
return (ifindex_a > ifindex_b) ? 1 : -1;
if (sa->sa_family != sb->sa_family)
return sa->sa_family - sb->sa_family;
if (sa->sa_family == AF_INET)
return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr,
(char *)&((struct sockaddr_in *)sb)->sin_addr,
sizeof(((struct sockaddr_in *)sa)->sin_addr));
return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
&((struct sockaddr_in6 *)sb)->sin6_addr);
}
static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
{
struct rb_node **new, *parent = NULL;
struct id_table_entry *this, *node;
unsigned long flags;
int result;
node = kzalloc(sizeof(*node), GFP_KERNEL);
if (!node)
return -ENOMEM;
spin_lock_irqsave(&id_table_lock, flags);
new = &id_table.rb_node;
while (*new) {
this = container_of(*new, struct id_table_entry, rb_node);
result = compare_netdev_and_ip(
node_id_priv->id.route.addr.dev_addr.bound_dev_if,
cma_dst_addr(node_id_priv), this);
parent = *new;
if (result < 0)
new = &((*new)->rb_left);
else if (result > 0)
new = &((*new)->rb_right);
else {
list_add_tail(&node_id_priv->id_list_entry,
&this->id_list);
kfree(node);
goto unlock;
}
}
INIT_LIST_HEAD(&node->id_list);
list_add_tail(&node_id_priv->id_list_entry, &node->id_list);
rb_link_node(&node->rb_node, parent, new);
rb_insert_color(&node->rb_node, &id_table);
unlock:
spin_unlock_irqrestore(&id_table_lock, flags);
return 0;
}
static struct id_table_entry *
node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
{
struct rb_node *node = root->rb_node;
struct id_table_entry *data;
int result;
while (node) {
data = container_of(node, struct id_table_entry, rb_node);
result = compare_netdev_and_ip(ifindex, sa, data);
if (result < 0)
node = node->rb_left;
else if (result > 0)
node = node->rb_right;
else
return data;
}
return NULL;
}
static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
{
struct id_table_entry *data;
unsigned long flags;
spin_lock_irqsave(&id_table_lock, flags);
if (list_empty(&id_priv->id_list_entry))
goto out;
data = node_from_ndev_ip(&id_table,
id_priv->id.route.addr.dev_addr.bound_dev_if,
cma_dst_addr(id_priv));
if (!data)
goto out;
list_del_init(&id_priv->id_list_entry);
if (list_empty(&data->id_list)) {
rb_erase(&data->rb_node, &id_table);
kfree(data);
}
out:
spin_unlock_irqrestore(&id_table_lock, flags);
}
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
......@@ -481,16 +612,6 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
{
return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
}
static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
{
return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
}
static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
return id_priv->id.route.addr.src_addr.ss_family;
......@@ -861,6 +982,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
refcount_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
INIT_LIST_HEAD(&id_priv->device_item);
INIT_LIST_HEAD(&id_priv->id_list_entry);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
......@@ -1883,6 +2005,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
cma_cancel_operation(id_priv, state);
rdma_restrack_del(&id_priv->res);
cma_remove_id_from_tree(id_priv);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
......@@ -3172,8 +3295,11 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
else if (rdma_protocol_roce(id->device, id->port_num))
else if (rdma_protocol_roce(id->device, id->port_num)) {
ret = cma_resolve_iboe_route(id_priv);
if (!ret)
cma_add_id_to_tree(id_priv);
}
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv);
else
......@@ -4922,10 +5048,87 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
return ret;
}
static void cma_netevent_work_handler(struct work_struct *_work)
{
struct rdma_id_private *id_priv =
container_of(_work, struct rdma_id_private, id.net_work);
struct rdma_cm_event event = {};
mutex_lock(&id_priv->handler_mutex);
if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
goto out_unlock;
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = -ETIMEDOUT;
if (cma_cm_event_handler(id_priv, &event)) {
__acquire(&id_priv->handler_mutex);
id_priv->cm_id.ib = NULL;
cma_id_put(id_priv);
destroy_id_handler_unlock(id_priv);
return;
}
out_unlock:
mutex_unlock(&id_priv->handler_mutex);
cma_id_put(id_priv);
}
static int cma_netevent_callback(struct notifier_block *self,
unsigned long event, void *ctx)
{
struct id_table_entry *ips_node = NULL;
struct rdma_id_private *current_id;
struct neighbour *neigh = ctx;
unsigned long flags;
if (event != NETEVENT_NEIGH_UPDATE)
return NOTIFY_DONE;
spin_lock_irqsave(&id_table_lock, flags);
if (neigh->tbl->family == AF_INET6) {
struct sockaddr_in6 neigh_sock_6;
neigh_sock_6.sin6_family = AF_INET6;
neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
(struct sockaddr *)&neigh_sock_6);
} else if (neigh->tbl->family == AF_INET) {
struct sockaddr_in neigh_sock_4;
neigh_sock_4.sin_family = AF_INET;
neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
(struct sockaddr *)&neigh_sock_4);
} else
goto out;
if (!ips_node)
goto out;
list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
neigh->ha, ETH_ALEN))
continue;
INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler);
cma_id_get(current_id);
queue_work(cma_wq, &current_id->id.net_work);
}
out:
spin_unlock_irqrestore(&id_table_lock, flags);
return NOTIFY_DONE;
}
static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
static struct notifier_block cma_netevent_cb = {
.notifier_call = cma_netevent_callback
};
static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
......@@ -5148,6 +5351,7 @@ static int __init cma_init(void)
ib_sa_register_client(&sa_client);
register_netdevice_notifier(&cma_nb);
register_netevent_notifier(&cma_netevent_cb);
ret = ib_register_client(&cma_client);
if (ret)
......@@ -5162,6 +5366,7 @@ static int __init cma_init(void)
err_ib:
ib_unregister_client(&cma_client);
err:
unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
......@@ -5174,6 +5379,7 @@ static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ib_unregister_client(&cma_client);
unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
......
......@@ -64,6 +64,7 @@ struct rdma_id_private {
struct list_head listen_item;
struct list_head listen_list;
};
struct list_head id_list_entry;
struct cma_device *cma_dev;
struct list_head mc_list;
......
......@@ -68,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
* In exclusive access mode, we check that the counter is zero (nobody
* claimed this object) and we set it to -1. Releasing a shared access
* lock is done simply by decreasing the counter. As for exclusive
* access locks, since only a single one of them is is allowed
* access locks, since only a single one of them is allowed
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
......
......@@ -250,7 +250,7 @@ static bool upper_device_filter(struct ib_device *ib_dev, u32 port,
/**
* is_upper_ndev_bond_master_filter - Check if a given netdevice
* is bond master device of netdevice of the the RDMA device of port.
* is bond master device of netdevice of the RDMA device of port.
* @ib_dev: IB device to check
* @port: Port to consider for adding default GID
* @rdma_ndev: Pointer to rdma netdevice
......
......@@ -13,3 +13,4 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
obj-$(CONFIG_INFINIBAND_HNS) += hns/
obj-$(CONFIG_INFINIBAND_QEDR) += qedr/
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/
obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/
......@@ -173,7 +173,7 @@ struct bnxt_re_dev {
/* Max of 2 lossless traffic class supported per port */
u16 cosq[2];
/* QP for for handling QP1 packets */
/* QP for handling QP1 packets */
struct bnxt_re_gsi_context gsi_ctx;
struct bnxt_re_stats stats;
atomic_t nq_alloc_cnt;
......
# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_ERDMA
tristate "Alibaba Elastic RDMA Adapter (ERDMA) support"
depends on PCI_MSI && 64BIT
depends on INFINIBAND_ADDR_TRANS
depends on INFINIBAND_USER_ACCESS
help
This is a RDMA/iWarp driver for Alibaba Elastic RDMA Adapter(ERDMA),
which supports RDMA features in Alibaba cloud environment.
To compile this driver as module, choose M here. The module will be
called erdma.
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_INFINIBAND_ERDMA) := erdma.o
erdma-y := erdma_cm.o erdma_main.o erdma_cmdq.o erdma_cq.o erdma_verbs.o erdma_qp.o erdma_eq.o
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
/* Kai Shen <kaishen@linux.alibaba.com> */
/* Copyright (c) 2020-2022, Alibaba Group. */
#ifndef __ERDMA_H__
#define __ERDMA_H__
#include <linux/bitfield.h>
#include <linux/netdevice.h>
#include <linux/xarray.h>
#include <rdma/ib_verbs.h>
#include "erdma_hw.h"
#define DRV_MODULE_NAME "erdma"
#define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack"
struct erdma_eq {
void *qbuf;
dma_addr_t qbuf_dma_addr;
spinlock_t lock;
u32 depth;
u16 ci;
u16 rsvd;
atomic64_t event_num;
atomic64_t notify_num;
u64 __iomem *db_addr;
u64 *db_record;
};
struct erdma_cmdq_sq {
void *qbuf;
dma_addr_t qbuf_dma_addr;
spinlock_t lock;
u32 depth;
u16 ci;
u16 pi;
u16 wqebb_cnt;
u64 *db_record;
};
struct erdma_cmdq_cq {
void *qbuf;
dma_addr_t qbuf_dma_addr;
spinlock_t lock;
u32 depth;
u32 ci;
u32 cmdsn;
u64 *db_record;
atomic64_t armed_num;
};
enum {
ERDMA_CMD_STATUS_INIT,
ERDMA_CMD_STATUS_ISSUED,
ERDMA_CMD_STATUS_FINISHED,
ERDMA_CMD_STATUS_TIMEOUT
};
struct erdma_comp_wait {
struct completion wait_event;
u32 cmd_status;
u32 ctx_id;
u16 sq_pi;
u8 comp_status;
u8 rsvd;
u32 comp_data[4];
};
enum {
ERDMA_CMDQ_STATE_OK_BIT = 0,
ERDMA_CMDQ_STATE_TIMEOUT_BIT = 1,
ERDMA_CMDQ_STATE_CTX_ERR_BIT = 2,
};
#define ERDMA_CMDQ_TIMEOUT_MS 15000
#define ERDMA_REG_ACCESS_WAIT_MS 20
#define ERDMA_WAIT_DEV_DONE_CNT 500
struct erdma_cmdq {
unsigned long *comp_wait_bitmap;
struct erdma_comp_wait *wait_pool;
spinlock_t lock;
bool use_event;
struct erdma_cmdq_sq sq;
struct erdma_cmdq_cq cq;
struct erdma_eq eq;
unsigned long state;
struct semaphore credits;
u16 max_outstandings;
};
#define COMPROMISE_CC ERDMA_CC_CUBIC
enum erdma_cc_alg {
ERDMA_CC_NEWRENO = 0,
ERDMA_CC_CUBIC,
ERDMA_CC_HPCC_RTT,
ERDMA_CC_HPCC_ECN,
ERDMA_CC_HPCC_INT,
ERDMA_CC_METHODS_NUM
};
struct erdma_devattr {
u32 fw_version;
unsigned char peer_addr[ETH_ALEN];
int numa_node;
enum erdma_cc_alg cc;
u32 grp_num;
u32 irq_num;
bool disable_dwqe;
u16 dwqe_pages;
u16 dwqe_entries;
u32 max_qp;
u32 max_send_wr;
u32 max_recv_wr;
u32 max_ord;
u32 max_ird;
u32 max_send_sge;
u32 max_recv_sge;
u32 max_sge_rd;
u32 max_cq;
u32 max_cqe;
u64 max_mr_size;
u32 max_mr;
u32 max_pd;
u32 max_mw;
u32 local_dma_key;
};
#define ERDMA_IRQNAME_SIZE 50
struct erdma_irq {
char name[ERDMA_IRQNAME_SIZE];
u32 msix_vector;
cpumask_t affinity_hint_mask;
};
struct erdma_eq_cb {
bool ready;
void *dev; /* All EQs use this fields to get erdma_dev struct */
struct erdma_irq irq;
struct erdma_eq eq;
struct tasklet_struct tasklet;
};
struct erdma_resource_cb {
unsigned long *bitmap;
spinlock_t lock;
u32 next_alloc_idx;
u32 max_cap;
};
enum {
ERDMA_RES_TYPE_PD = 0,
ERDMA_RES_TYPE_STAG_IDX = 1,
ERDMA_RES_CNT = 2,
};
#define ERDMA_EXTRA_BUFFER_SIZE ERDMA_DB_SIZE
#define WARPPED_BUFSIZE(size) ((size) + ERDMA_EXTRA_BUFFER_SIZE)
struct erdma_dev {
struct ib_device ibdev;
struct net_device *netdev;
struct pci_dev *pdev;
struct notifier_block netdev_nb;
resource_size_t func_bar_addr;
resource_size_t func_bar_len;
u8 __iomem *func_bar;
struct erdma_devattr attrs;
/* physical port state (only one port per device) */
enum ib_port_state state;
/* cmdq and aeq use the same msix vector */
struct erdma_irq comm_irq;
struct erdma_cmdq cmdq;
struct erdma_eq aeq;
struct erdma_eq_cb ceqs[ERDMA_NUM_MSIX_VEC - 1];
spinlock_t lock;
struct erdma_resource_cb res_cb[ERDMA_RES_CNT];
struct xarray qp_xa;
struct xarray cq_xa;
u32 next_alloc_qpn;
u32 next_alloc_cqn;
spinlock_t db_bitmap_lock;
/* We provide max 64 uContexts that each has one SQ doorbell Page. */
DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT);
/*
* We provide max 496 uContexts that each has one SQ normal Db,
* and one directWQE db。
*/
DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT);
atomic_t num_ctx;
struct list_head cep_list;
};
static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift)
{
idx &= (depth - 1);
return qbuf + (idx << shift);
}
static inline struct erdma_dev *to_edev(struct ib_device *ibdev)
{
return container_of(ibdev, struct erdma_dev, ibdev);
}
static inline u32 erdma_reg_read32(struct erdma_dev *dev, u32 reg)
{
return readl(dev->func_bar + reg);
}
static inline u64 erdma_reg_read64(struct erdma_dev *dev, u32 reg)
{
return readq(dev->func_bar + reg);
}
static inline void erdma_reg_write32(struct erdma_dev *dev, u32 reg, u32 value)
{
writel(value, dev->func_bar + reg);
}
static inline void erdma_reg_write64(struct erdma_dev *dev, u32 reg, u64 value)
{
writeq(value, dev->func_bar + reg);
}
static inline u32 erdma_reg_read32_filed(struct erdma_dev *dev, u32 reg,
u32 filed_mask)
{
u32 val = erdma_reg_read32(dev, reg);
return FIELD_GET(filed_mask, val);
}
int erdma_cmdq_init(struct erdma_dev *dev);
void erdma_finish_cmdq_init(struct erdma_dev *dev);
void erdma_cmdq_destroy(struct erdma_dev *dev);
void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op);
int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size,
u64 *resp0, u64 *resp1);
void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq);
int erdma_ceqs_init(struct erdma_dev *dev);
void erdma_ceqs_uninit(struct erdma_dev *dev);
void notify_eq(struct erdma_eq *eq);
void *get_next_valid_eqe(struct erdma_eq *eq);
int erdma_aeq_init(struct erdma_dev *dev);
void erdma_aeq_destroy(struct erdma_dev *dev);
void erdma_aeq_event_handler(struct erdma_dev *dev);
void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb);
#endif
此差异已折叠。
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
/* Kai Shen <kaishen@linux.alibaba.com> */
/* Copyright (c) 2020-2022, Alibaba Group. */
/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
/* Greg Joyce <greg@opengridcomputing.com> */
/* Copyright (c) 2008-2019, IBM Corporation */
/* Copyright (c) 2017, Open Grid Computing, Inc. */
#ifndef __ERDMA_CM_H__
#define __ERDMA_CM_H__
#include <linux/tcp.h>
#include <net/sock.h>
#include <rdma/iw_cm.h>
/* iWarp MPA protocol defs */
#define MPA_REVISION_EXT_1 129
#define MPA_MAX_PRIVDATA RDMA_MAX_PRIVATE_DATA
#define MPA_KEY_REQ "MPA ID Req Frame"
#define MPA_KEY_REP "MPA ID Rep Frame"
#define MPA_KEY_SIZE 16
#define MPA_DEFAULT_HDR_LEN 28
struct mpa_rr_params {
__be16 bits;
__be16 pd_len;
};
/*
* MPA request/response Hdr bits & fields
*/
enum {
MPA_RR_FLAG_MARKERS = __cpu_to_be16(0x8000),
MPA_RR_FLAG_CRC = __cpu_to_be16(0x4000),
MPA_RR_FLAG_REJECT = __cpu_to_be16(0x2000),
MPA_RR_RESERVED = __cpu_to_be16(0x1f00),
MPA_RR_MASK_REVISION = __cpu_to_be16(0x00ff)
};
/*
* MPA request/reply header
*/
struct mpa_rr {
u8 key[16];
struct mpa_rr_params params;
};
struct erdma_mpa_ext {
__be32 cookie;
__be32 bits;
};
enum {
MPA_EXT_FLAG_CC = cpu_to_be32(0x0000000f),
};
struct erdma_mpa_info {
struct mpa_rr hdr; /* peer mpa hdr in host byte order */
struct erdma_mpa_ext ext_data;
char *pdata;
int bytes_rcvd;
};
struct erdma_sk_upcalls {
void (*sk_state_change)(struct sock *sk);
void (*sk_data_ready)(struct sock *sk, int bytes);
void (*sk_error_report)(struct sock *sk);
};
struct erdma_dev;
enum erdma_cep_state {
ERDMA_EPSTATE_IDLE = 1,
ERDMA_EPSTATE_LISTENING,
ERDMA_EPSTATE_CONNECTING,
ERDMA_EPSTATE_AWAIT_MPAREQ,
ERDMA_EPSTATE_RECVD_MPAREQ,
ERDMA_EPSTATE_AWAIT_MPAREP,
ERDMA_EPSTATE_RDMA_MODE,
ERDMA_EPSTATE_CLOSED
};
struct erdma_cep {
struct iw_cm_id *cm_id;
struct erdma_dev *dev;
struct list_head devq;
spinlock_t lock;
struct kref ref;
int in_use;
wait_queue_head_t waitq;
enum erdma_cep_state state;
struct list_head listenq;
struct erdma_cep *listen_cep;
struct erdma_qp *qp;
struct socket *sock;
struct erdma_cm_work *mpa_timer;
struct list_head work_freelist;
struct erdma_mpa_info mpa;
int ord;
int ird;
int pd_len;
/* hold user's private data. */
void *private_data;
/* Saved upcalls of socket llp.sock */
void (*sk_state_change)(struct sock *sk);
void (*sk_data_ready)(struct sock *sk);
void (*sk_error_report)(struct sock *sk);
};
#define MPAREQ_TIMEOUT (HZ * 20)
#define MPAREP_TIMEOUT (HZ * 10)
#define CONNECT_TIMEOUT (HZ * 10)
enum erdma_work_type {
ERDMA_CM_WORK_ACCEPT = 1,
ERDMA_CM_WORK_READ_MPAHDR,
ERDMA_CM_WORK_CLOSE_LLP, /* close socket */
ERDMA_CM_WORK_PEER_CLOSE, /* socket indicated peer close */
ERDMA_CM_WORK_MPATIMEOUT,
ERDMA_CM_WORK_CONNECTED,
ERDMA_CM_WORK_CONNECTTIMEOUT
};
struct erdma_cm_work {
struct delayed_work work;
struct list_head list;
enum erdma_work_type type;
struct erdma_cep *cep;
};
#define to_sockaddr_in(a) (*(struct sockaddr_in *)(&(a)))
static inline int getname_peer(struct socket *s, struct sockaddr_storage *a)
{
return s->ops->getname(s, (struct sockaddr *)a, 1);
}
static inline int getname_local(struct socket *s, struct sockaddr_storage *a)
{
return s->ops->getname(s, (struct sockaddr *)a, 0);
}
int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *param);
int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *param);
int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen);
int erdma_create_listen(struct iw_cm_id *id, int backlog);
int erdma_destroy_listen(struct iw_cm_id *id);
void erdma_cep_get(struct erdma_cep *ceq);
void erdma_cep_put(struct erdma_cep *ceq);
int erdma_cm_queue_work(struct erdma_cep *ceq, enum erdma_work_type type);
int erdma_cm_init(void);
void erdma_cm_exit(void);
#define sk_to_cep(sk) ((struct erdma_cep *)((sk)->sk_user_data))
#endif
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
/* Kai Shen <kaishen@linux.alibaba.com> */
/* Copyright (c) 2020-2022, Alibaba Group. */
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/types.h>
#include "erdma.h"
#include "erdma_hw.h"
#include "erdma_verbs.h"
static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
{
struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
*cmdq->cq.db_record = db_data;
writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
atomic64_inc(&cmdq->cq.armed_num);
}
static void kick_cmdq_db(struct erdma_cmdq *cmdq)
{
struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
*cmdq->sq.db_record = db_data;
writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
}
static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
{
int comp_idx;
spin_lock(&cmdq->lock);
comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
cmdq->max_outstandings);
if (comp_idx == cmdq->max_outstandings) {
spin_unlock(&cmdq->lock);
return ERR_PTR(-ENOMEM);
}
__set_bit(comp_idx, cmdq->comp_wait_bitmap);
spin_unlock(&cmdq->lock);
return &cmdq->wait_pool[comp_idx];
}
static void put_comp_wait(struct erdma_cmdq *cmdq,
struct erdma_comp_wait *comp_wait)
{
int used;
cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
spin_lock(&cmdq->lock);
used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
spin_unlock(&cmdq->lock);
WARN_ON(!used);
}
static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
struct erdma_cmdq *cmdq)
{
int i;
cmdq->wait_pool =
devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
sizeof(struct erdma_comp_wait), GFP_KERNEL);
if (!cmdq->wait_pool)
return -ENOMEM;
spin_lock_init(&cmdq->lock);
cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
&dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
if (!cmdq->comp_wait_bitmap)
return -ENOMEM;
for (i = 0; i < cmdq->max_outstandings; i++) {
init_completion(&cmdq->wait_pool[i].wait_event);
cmdq->wait_pool[i].ctx_id = i;
}
return 0;
}
static int erdma_cmdq_sq_init(struct erdma_dev *dev)
{
struct erdma_cmdq *cmdq = &dev->cmdq;
struct erdma_cmdq_sq *sq = &cmdq->sq;
u32 buf_size;
sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
buf_size = sq->depth << SQEBB_SHIFT;
sq->qbuf =
dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
&sq->qbuf_dma_addr, GFP_KERNEL);
if (!sq->qbuf)
return -ENOMEM;
sq->db_record = (u64 *)(sq->qbuf + buf_size);
spin_lock_init(&sq->lock);
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
upper_32_bits(sq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
lower_32_bits(sq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG,
sq->qbuf_dma_addr + buf_size);
return 0;
}
static int erdma_cmdq_cq_init(struct erdma_dev *dev)
{
struct erdma_cmdq *cmdq = &dev->cmdq;
struct erdma_cmdq_cq *cq = &cmdq->cq;
u32 buf_size;
cq->depth = cmdq->sq.depth;
buf_size = cq->depth << CQE_SHIFT;
cq->qbuf =
dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
&cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
if (!cq->qbuf)
return -ENOMEM;
spin_lock_init(&cq->lock);
cq->db_record = (u64 *)(cq->qbuf + buf_size);
atomic64_set(&cq->armed_num, 0);
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
upper_32_bits(cq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
lower_32_bits(cq->qbuf_dma_addr));
erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG,
cq->qbuf_dma_addr + buf_size);
return 0;
}
static int erdma_cmdq_eq_init(struct erdma_dev *dev)
{
struct erdma_cmdq *cmdq = &dev->cmdq;
struct erdma_eq *eq = &cmdq->eq;
u32 buf_size;
eq->depth = cmdq->max_outstandings;
buf_size = eq->depth << EQE_SHIFT;
eq->qbuf =
dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
&eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
if (!eq->qbuf)
return -ENOMEM;
spin_lock_init(&eq->lock);
atomic64_set(&eq->event_num, 0);
eq->db_addr =
(u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG);
eq->db_record = (u64 *)(eq->qbuf + buf_size);
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
upper_32_bits(eq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
lower_32_bits(eq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG,
eq->qbuf_dma_addr + buf_size);
return 0;
}
int erdma_cmdq_init(struct erdma_dev *dev)
{
int err, i;
struct erdma_cmdq *cmdq = &dev->cmdq;
u32 sts, ctrl;
cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
cmdq->use_event = false;
sema_init(&cmdq->credits, cmdq->max_outstandings);
err = erdma_cmdq_wait_res_init(dev, cmdq);
if (err)
return err;
err = erdma_cmdq_sq_init(dev);
if (err)
return err;
err = erdma_cmdq_cq_init(dev);
if (err)
goto err_destroy_sq;
err = erdma_cmdq_eq_init(dev);
if (err)
goto err_destroy_cq;
ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1);
erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
ERDMA_REG_DEV_ST_INIT_DONE_MASK);
if (sts)
break;
msleep(ERDMA_REG_ACCESS_WAIT_MS);
}
if (i == ERDMA_WAIT_DEV_DONE_CNT) {
dev_err(&dev->pdev->dev, "wait init done failed.\n");
err = -ETIMEDOUT;
goto err_destroy_eq;
}
set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
return 0;
err_destroy_eq:
dma_free_coherent(&dev->pdev->dev,
(cmdq->eq.depth << EQE_SHIFT) +
ERDMA_EXTRA_BUFFER_SIZE,
cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
err_destroy_cq:
dma_free_coherent(&dev->pdev->dev,
(cmdq->cq.depth << CQE_SHIFT) +
ERDMA_EXTRA_BUFFER_SIZE,
cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
err_destroy_sq:
dma_free_coherent(&dev->pdev->dev,
(cmdq->sq.depth << SQEBB_SHIFT) +
ERDMA_EXTRA_BUFFER_SIZE,
cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
return err;
}
void erdma_finish_cmdq_init(struct erdma_dev *dev)
{
/* after device init successfully, change cmdq to event mode. */
dev->cmdq.use_event = true;
arm_cmdq_cq(&dev->cmdq);
}
void erdma_cmdq_destroy(struct erdma_dev *dev)
{
struct erdma_cmdq *cmdq = &dev->cmdq;
clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
dma_free_coherent(&dev->pdev->dev,
(cmdq->eq.depth << EQE_SHIFT) +
ERDMA_EXTRA_BUFFER_SIZE,
cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
dma_free_coherent(&dev->pdev->dev,
(cmdq->sq.depth << SQEBB_SHIFT) +
ERDMA_EXTRA_BUFFER_SIZE,
cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
dma_free_coherent(&dev->pdev->dev,
(cmdq->cq.depth << CQE_SHIFT) +
ERDMA_EXTRA_BUFFER_SIZE,
cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
}
static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
{
__be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
cmdq->cq.depth, CQE_SHIFT);
u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
__be32_to_cpu(READ_ONCE(*cqe)));
return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
}
static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
struct erdma_comp_wait *comp_wait)
{
__le64 *wqe;
u64 hdr = *req;
comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
reinit_completion(&comp_wait->wait_event);
comp_wait->sq_pi = cmdq->sq.pi;
wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
SQEBB_SHIFT);
memcpy(wqe, req, req_len);
cmdq->sq.pi += cmdq->sq.wqebb_cnt;
hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
comp_wait->ctx_id) |
FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
*wqe = cpu_to_le64(hdr);
kick_cmdq_db(cmdq);
}
static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
{
struct erdma_comp_wait *comp_wait;
u32 hdr0, sqe_idx;
__be32 *cqe;
u16 ctx_id;
u64 *sqe;
int i;
cqe = get_next_valid_cmdq_cqe(cmdq);
if (!cqe)
return -EAGAIN;
cmdq->cq.ci++;
dma_rmb();
hdr0 = __be32_to_cpu(*cqe);
sqe_idx = __be32_to_cpu(*(cqe + 1));
sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
SQEBB_SHIFT);
ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
comp_wait = &cmdq->wait_pool[ctx_id];
if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
return -EIO;
comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
cmdq->sq.ci += cmdq->sq.wqebb_cnt;
for (i = 0; i < 4; i++)
comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i));
if (cmdq->use_event)
complete(&comp_wait->wait_event);
return 0;
}
static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
{
unsigned long flags;
u16 comp_num;
spin_lock_irqsave(&cmdq->cq.lock, flags);
/* We must have less than # of max_outstandings
* completions at one time.
*/
for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
if (erdma_poll_single_cmd_completion(cmdq))
break;
if (comp_num && cmdq->use_event)
arm_cmdq_cq(cmdq);
spin_unlock_irqrestore(&cmdq->cq.lock, flags);
}
void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
{
int got_event = 0;
if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
!cmdq->use_event)
return;
while (get_next_valid_eqe(&cmdq->eq)) {
cmdq->eq.ci++;
got_event++;
}
if (got_event) {
cmdq->cq.cmdsn++;
erdma_polling_cmd_completions(cmdq);
}
notify_eq(&cmdq->eq);
}
static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
struct erdma_cmdq *cmdq, u32 timeout)
{
unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
while (1) {
erdma_polling_cmd_completions(cmdq);
if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
break;
if (time_is_before_jiffies(comp_timeout))
return -ETIME;
msleep(20);
}
return 0;
}
static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
struct erdma_cmdq *cmdq, u32 timeout)
{
unsigned long flags = 0;
wait_for_completion_timeout(&comp_ctx->wait_event,
msecs_to_jiffies(timeout));
if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
spin_lock_irqsave(&cmdq->cq.lock, flags);
comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
spin_unlock_irqrestore(&cmdq->cq.lock, flags);
return -ETIME;
}
return 0;
}
void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
{
*hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
}
int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size,
u64 *resp0, u64 *resp1)
{
struct erdma_comp_wait *comp_wait;
int ret;
if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
return -ENODEV;
down(&cmdq->credits);
comp_wait = get_comp_wait(cmdq);
if (IS_ERR(comp_wait)) {
clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
up(&cmdq->credits);
return PTR_ERR(comp_wait);
}
spin_lock(&cmdq->sq.lock);
push_cmdq_sqe(cmdq, req, req_size, comp_wait);
spin_unlock(&cmdq->sq.lock);
if (cmdq->use_event)
ret = erdma_wait_cmd_completion(comp_wait, cmdq,
ERDMA_CMDQ_TIMEOUT_MS);
else
ret = erdma_poll_cmd_completion(comp_wait, cmdq,
ERDMA_CMDQ_TIMEOUT_MS);
if (ret) {
set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
goto out;
}
if (comp_wait->comp_status)
ret = -EIO;
if (resp0 && resp1) {
*resp0 = *((u64 *)&comp_wait->comp_data[0]);
*resp1 = *((u64 *)&comp_wait->comp_data[2]);
}
put_comp_wait(cmdq, comp_wait);
out:
up(&cmdq->credits);
return ret;
}
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
/* Kai Shen <kaishen@linux.alibaba.com> */
/* Copyright (c) 2020-2022, Alibaba Group. */
#include <rdma/ib_verbs.h>
#include "erdma_hw.h"
#include "erdma_verbs.h"
static void *get_next_valid_cqe(struct erdma_cq *cq)
{
__be32 *cqe = get_queue_entry(cq->kern_cq.qbuf, cq->kern_cq.ci,
cq->depth, CQE_SHIFT);
u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
__be32_to_cpu(READ_ONCE(*cqe)));
return owner ^ !!(cq->kern_cq.ci & cq->depth) ? cqe : NULL;
}
static void notify_cq(struct erdma_cq *cq, u8 solcitied)
{
u64 db_data =
FIELD_PREP(ERDMA_CQDB_IDX_MASK, (cq->kern_cq.notify_cnt)) |
FIELD_PREP(ERDMA_CQDB_CQN_MASK, cq->cqn) |
FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
FIELD_PREP(ERDMA_CQDB_SOL_MASK, solcitied) |
FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->kern_cq.cmdsn) |
FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->kern_cq.ci);
*cq->kern_cq.db_record = db_data;
writeq(db_data, cq->kern_cq.db);
}
int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct erdma_cq *cq = to_ecq(ibcq);
unsigned long irq_flags;
int ret = 0;
spin_lock_irqsave(&cq->kern_cq.lock, irq_flags);
notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq))
ret = 1;
cq->kern_cq.notify_cnt++;
spin_unlock_irqrestore(&cq->kern_cq.lock, irq_flags);
return ret;
}
static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = {
[ERDMA_OP_WRITE] = IB_WC_RDMA_WRITE,
[ERDMA_OP_READ] = IB_WC_RDMA_READ,
[ERDMA_OP_SEND] = IB_WC_SEND,
[ERDMA_OP_SEND_WITH_IMM] = IB_WC_SEND,
[ERDMA_OP_RECEIVE] = IB_WC_RECV,
[ERDMA_OP_RECV_IMM] = IB_WC_RECV_RDMA_WITH_IMM,
[ERDMA_OP_RECV_INV] = IB_WC_RECV,
[ERDMA_OP_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[ERDMA_OP_INVALIDATE] = IB_WC_LOCAL_INV,
[ERDMA_OP_RSP_SEND_IMM] = IB_WC_RECV,
[ERDMA_OP_SEND_WITH_INV] = IB_WC_SEND,
[ERDMA_OP_REG_MR] = IB_WC_REG_MR,
[ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV,
[ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ,
};
static const struct {
enum erdma_wc_status erdma;
enum ib_wc_status base;
enum erdma_vendor_err vendor;
} map_cqe_status[ERDMA_NUM_WC_STATUS] = {
{ ERDMA_WC_SUCCESS, IB_WC_SUCCESS, ERDMA_WC_VENDOR_NO_ERR },
{ ERDMA_WC_GENERAL_ERR, IB_WC_GENERAL_ERR, ERDMA_WC_VENDOR_NO_ERR },
{ ERDMA_WC_RECV_WQE_FORMAT_ERR, IB_WC_GENERAL_ERR,
ERDMA_WC_VENDOR_INVALID_RQE },
{ ERDMA_WC_RECV_STAG_INVALID_ERR, IB_WC_REM_ACCESS_ERR,
ERDMA_WC_VENDOR_RQE_INVALID_STAG },
{ ERDMA_WC_RECV_ADDR_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR,
ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION },
{ ERDMA_WC_RECV_RIGHT_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR,
ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR },
{ ERDMA_WC_RECV_PDID_ERR, IB_WC_REM_ACCESS_ERR,
ERDMA_WC_VENDOR_RQE_INVALID_PD },
{ ERDMA_WC_RECV_WARRPING_ERR, IB_WC_REM_ACCESS_ERR,
ERDMA_WC_VENDOR_RQE_WRAP_ERR },
{ ERDMA_WC_SEND_WQE_FORMAT_ERR, IB_WC_LOC_QP_OP_ERR,
ERDMA_WC_VENDOR_INVALID_SQE },
{ ERDMA_WC_SEND_WQE_ORD_EXCEED, IB_WC_GENERAL_ERR,
ERDMA_WC_VENDOR_ZERO_ORD },
{ ERDMA_WC_SEND_STAG_INVALID_ERR, IB_WC_LOC_ACCESS_ERR,
ERDMA_WC_VENDOR_SQE_INVALID_STAG },
{ ERDMA_WC_SEND_ADDR_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR,
ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION },
{ ERDMA_WC_SEND_RIGHT_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR,
ERDMA_WC_VENDOR_SQE_ACCESS_ERR },
{ ERDMA_WC_SEND_PDID_ERR, IB_WC_LOC_ACCESS_ERR,
ERDMA_WC_VENDOR_SQE_INVALID_PD },
{ ERDMA_WC_SEND_WARRPING_ERR, IB_WC_LOC_ACCESS_ERR,
ERDMA_WC_VENDOR_SQE_WARP_ERR },
{ ERDMA_WC_FLUSH_ERR, IB_WC_WR_FLUSH_ERR, ERDMA_WC_VENDOR_NO_ERR },
{ ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR },
};
#define ERDMA_POLLCQ_NO_QP 1
static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
{
struct erdma_dev *dev = to_edev(cq->ibcq.device);
u8 opcode, syndrome, qtype;
struct erdma_kqp *kern_qp;
struct erdma_cqe *cqe;
struct erdma_qp *qp;
u16 wqe_idx, depth;
u32 qpn, cqe_hdr;
u64 *id_table;
u64 *wqe_hdr;
cqe = get_next_valid_cqe(cq);
if (!cqe)
return -EAGAIN;
cq->kern_cq.ci++;
/* cqbuf should be ready when we poll */
dma_rmb();
qpn = be32_to_cpu(cqe->qpn);
wqe_idx = be32_to_cpu(cqe->qe_idx);
cqe_hdr = be32_to_cpu(cqe->hdr);
qp = find_qp_by_qpn(dev, qpn);
if (!qp)
return ERDMA_POLLCQ_NO_QP;
kern_qp = &qp->kern_qp;
qtype = FIELD_GET(ERDMA_CQE_HDR_QTYPE_MASK, cqe_hdr);
syndrome = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, cqe_hdr);
opcode = FIELD_GET(ERDMA_CQE_HDR_OPCODE_MASK, cqe_hdr);
if (qtype == ERDMA_CQE_QTYPE_SQ) {
id_table = kern_qp->swr_tbl;
depth = qp->attrs.sq_size;
wqe_hdr = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
qp->attrs.sq_size, SQEBB_SHIFT);
kern_qp->sq_ci =
FIELD_GET(ERDMA_SQE_HDR_WQEBB_CNT_MASK, *wqe_hdr) +
wqe_idx + 1;
} else {
id_table = kern_qp->rwr_tbl;
depth = qp->attrs.rq_size;
}
wc->wr_id = id_table[wqe_idx & (depth - 1)];
wc->byte_len = be32_to_cpu(cqe->size);
wc->wc_flags = 0;
wc->opcode = wc_mapping_table[opcode];
if (opcode == ERDMA_OP_RECV_IMM || opcode == ERDMA_OP_RSP_SEND_IMM) {
wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->imm_data));
wc->wc_flags |= IB_WC_WITH_IMM;
} else if (opcode == ERDMA_OP_RECV_INV) {
wc->ex.invalidate_rkey = be32_to_cpu(cqe->inv_rkey);
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
}
if (syndrome >= ERDMA_NUM_WC_STATUS)
syndrome = ERDMA_WC_GENERAL_ERR;
wc->status = map_cqe_status[syndrome].base;
wc->vendor_err = map_cqe_status[syndrome].vendor;
wc->qp = &qp->ibqp;
return 0;
}
int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct erdma_cq *cq = to_ecq(ibcq);
unsigned long flags;
int npolled, ret;
spin_lock_irqsave(&cq->kern_cq.lock, flags);
for (npolled = 0; npolled < num_entries;) {
ret = erdma_poll_one_cqe(cq, wc + npolled);
if (ret == -EAGAIN) /* no received new CQEs. */
break;
else if (ret) /* ignore invalid CQEs. */
continue;
npolled++;
}
spin_unlock_irqrestore(&cq->kern_cq.lock, flags);
return npolled;
}
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
/* Kai Shen <kaishen@linux.alibaba.com> */
/* Copyright (c) 2020-2022, Alibaba Group. */
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/types.h>
#include "erdma.h"
#include "erdma_hw.h"
#include "erdma_verbs.h"
#define MAX_POLL_CHUNK_SIZE 16
void notify_eq(struct erdma_eq *eq)
{
u64 db_data = FIELD_PREP(ERDMA_EQDB_CI_MASK, eq->ci) |
FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1);
*eq->db_record = db_data;
writeq(db_data, eq->db_addr);
atomic64_inc(&eq->notify_num);
}
void *get_next_valid_eqe(struct erdma_eq *eq)
{
u64 *eqe = get_queue_entry(eq->qbuf, eq->ci, eq->depth, EQE_SHIFT);
u32 owner = FIELD_GET(ERDMA_CEQE_HDR_O_MASK, READ_ONCE(*eqe));
return owner ^ !!(eq->ci & eq->depth) ? eqe : NULL;
}
void erdma_aeq_event_handler(struct erdma_dev *dev)
{
struct erdma_aeqe *aeqe;
u32 cqn, qpn;
struct erdma_qp *qp;
struct erdma_cq *cq;
struct ib_event event;
u32 poll_cnt = 0;
memset(&event, 0, sizeof(event));
while (poll_cnt < MAX_POLL_CHUNK_SIZE) {
aeqe = get_next_valid_eqe(&dev->aeq);
if (!aeqe)
break;
dma_rmb();
dev->aeq.ci++;
atomic64_inc(&dev->aeq.event_num);
poll_cnt++;
if (FIELD_GET(ERDMA_AEQE_HDR_TYPE_MASK,
le32_to_cpu(aeqe->hdr)) == ERDMA_AE_TYPE_CQ_ERR) {
cqn = le32_to_cpu(aeqe->event_data0);
cq = find_cq_by_cqn(dev, cqn);
if (!cq)
continue;
event.device = cq->ibcq.device;
event.element.cq = &cq->ibcq;
event.event = IB_EVENT_CQ_ERR;
if (cq->ibcq.event_handler)
cq->ibcq.event_handler(&event,
cq->ibcq.cq_context);
} else {
qpn = le32_to_cpu(aeqe->event_data0);
qp = find_qp_by_qpn(dev, qpn);
if (!qp)
continue;
event.device = qp->ibqp.device;
event.element.qp = &qp->ibqp;
event.event = IB_EVENT_QP_FATAL;
if (qp->ibqp.event_handler)
qp->ibqp.event_handler(&event,
qp->ibqp.qp_context);
}
}
notify_eq(&dev->aeq);
}
int erdma_aeq_init(struct erdma_dev *dev)
{
struct erdma_eq *eq = &dev->aeq;
u32 buf_size;
eq->depth = ERDMA_DEFAULT_EQ_DEPTH;
buf_size = eq->depth << EQE_SHIFT;
eq->qbuf =
dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
&eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
if (!eq->qbuf)
return -ENOMEM;
spin_lock_init(&eq->lock);
atomic64_set(&eq->event_num, 0);
atomic64_set(&eq->notify_num, 0);
eq->db_addr = (u64 __iomem *)(dev->func_bar + ERDMA_REGS_AEQ_DB_REG);
eq->db_record = (u64 *)(eq->qbuf + buf_size);
erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG,
upper_32_bits(eq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_L_REG,
lower_32_bits(eq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_AEQ_DEPTH_REG, eq->depth);
erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG,
eq->qbuf_dma_addr + buf_size);
return 0;
}
void erdma_aeq_destroy(struct erdma_dev *dev)
{
struct erdma_eq *eq = &dev->aeq;
dma_free_coherent(&dev->pdev->dev,
WARPPED_BUFSIZE(eq->depth << EQE_SHIFT), eq->qbuf,
eq->qbuf_dma_addr);
}
void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb)
{
struct erdma_dev *dev = ceq_cb->dev;
struct erdma_cq *cq;
u32 poll_cnt = 0;
u64 *ceqe;
int cqn;
if (!ceq_cb->ready)
return;
while (poll_cnt < MAX_POLL_CHUNK_SIZE) {
ceqe = get_next_valid_eqe(&ceq_cb->eq);
if (!ceqe)
break;
dma_rmb();
ceq_cb->eq.ci++;
poll_cnt++;
cqn = FIELD_GET(ERDMA_CEQE_HDR_CQN_MASK, READ_ONCE(*ceqe));
cq = find_cq_by_cqn(dev, cqn);
if (!cq)
continue;
if (rdma_is_kernel_res(&cq->ibcq.res))
cq->kern_cq.cmdsn++;
if (cq->ibcq.comp_handler)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
notify_eq(&ceq_cb->eq);
}
static irqreturn_t erdma_intr_ceq_handler(int irq, void *data)
{
struct erdma_eq_cb *ceq_cb = data;
tasklet_schedule(&ceq_cb->tasklet);
return IRQ_HANDLED;
}
static void erdma_intr_ceq_task(unsigned long data)
{
erdma_ceq_completion_handler((struct erdma_eq_cb *)data);
}
static int erdma_set_ceq_irq(struct erdma_dev *dev, u16 ceqn)
{
struct erdma_eq_cb *eqc = &dev->ceqs[ceqn];
int err;
snprintf(eqc->irq.name, ERDMA_IRQNAME_SIZE, "erdma-ceq%u@pci:%s", ceqn,
pci_name(dev->pdev));
eqc->irq.msix_vector = pci_irq_vector(dev->pdev, ceqn + 1);
tasklet_init(&dev->ceqs[ceqn].tasklet, erdma_intr_ceq_task,
(unsigned long)&dev->ceqs[ceqn]);
cpumask_set_cpu(cpumask_local_spread(ceqn + 1, dev->attrs.numa_node),
&eqc->irq.affinity_hint_mask);
err = request_irq(eqc->irq.msix_vector, erdma_intr_ceq_handler, 0,
eqc->irq.name, eqc);
if (err) {
dev_err(&dev->pdev->dev, "failed to request_irq(%d)\n", err);
return err;
}
irq_set_affinity_hint(eqc->irq.msix_vector,
&eqc->irq.affinity_hint_mask);
return 0;
}
static void erdma_free_ceq_irq(struct erdma_dev *dev, u16 ceqn)
{
struct erdma_eq_cb *eqc = &dev->ceqs[ceqn];
irq_set_affinity_hint(eqc->irq.msix_vector, NULL);
free_irq(eqc->irq.msix_vector, eqc);
}
static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq)
{
struct erdma_cmdq_create_eq_req req;
dma_addr_t db_info_dma_addr;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
CMDQ_OPCODE_CREATE_EQ);
req.eqn = eqn;
req.depth = ilog2(eq->depth);
req.qbuf_addr = eq->qbuf_dma_addr;
req.qtype = ERDMA_EQ_TYPE_CEQ;
/* Vector index is the same as EQN. */
req.vector_idx = eqn;
db_info_dma_addr = eq->qbuf_dma_addr + (eq->depth << EQE_SHIFT);
req.db_dma_addr_l = lower_32_bits(db_info_dma_addr);
req.db_dma_addr_h = upper_32_bits(db_info_dma_addr);
return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req,
sizeof(struct erdma_cmdq_create_eq_req),
NULL, NULL);
}
static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn)
{
struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT;
int ret;
eq->qbuf =
dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
&eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
if (!eq->qbuf)
return -ENOMEM;
spin_lock_init(&eq->lock);
atomic64_set(&eq->event_num, 0);
atomic64_set(&eq->notify_num, 0);
eq->depth = ERDMA_DEFAULT_EQ_DEPTH;
eq->db_addr =
(u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG +
(ceqn + 1) * ERDMA_DB_SIZE);
eq->db_record = (u64 *)(eq->qbuf + buf_size);
eq->ci = 0;
dev->ceqs[ceqn].dev = dev;
/* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
ret = create_eq_cmd(dev, ceqn + 1, eq);
dev->ceqs[ceqn].ready = ret ? false : true;
return ret;
}
static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn)
{
struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT;
struct erdma_cmdq_destroy_eq_req req;
int err;
dev->ceqs[ceqn].ready = 0;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
CMDQ_OPCODE_DESTROY_EQ);
/* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
req.eqn = ceqn + 1;
req.qtype = ERDMA_EQ_TYPE_CEQ;
req.vector_idx = ceqn + 1;
err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
NULL);
if (err)
return;
dma_free_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), eq->qbuf,
eq->qbuf_dma_addr);
}
int erdma_ceqs_init(struct erdma_dev *dev)
{
u32 i, j;
int err;
for (i = 0; i < dev->attrs.irq_num - 1; i++) {
err = erdma_ceq_init_one(dev, i);
if (err)
goto out_err;
err = erdma_set_ceq_irq(dev, i);
if (err) {
erdma_ceq_uninit_one(dev, i);
goto out_err;
}
}
return 0;
out_err:
for (j = 0; j < i; j++) {
erdma_free_ceq_irq(dev, j);
erdma_ceq_uninit_one(dev, j);
}
return err;
}
void erdma_ceqs_uninit(struct erdma_dev *dev)
{
u32 i;
for (i = 0; i < dev->attrs.irq_num - 1; i++) {
erdma_free_ceq_irq(dev, i);
erdma_ceq_uninit_one(dev, i);
}
}
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
/* Kai Shen <kaishen@linux.alibaba.com> */
/* Copyright (c) 2020-2022, Alibaba Group. */
#ifndef __ERDMA_HW_H__
#define __ERDMA_HW_H__
#include <linux/kernel.h>
#include <linux/types.h>
/* PCIe device related definition. */
#define PCI_VENDOR_ID_ALIBABA 0x1ded
#define ERDMA_PCI_WIDTH 64
#define ERDMA_FUNC_BAR 0
#define ERDMA_MISX_BAR 2
#define ERDMA_BAR_MASK (BIT(ERDMA_FUNC_BAR) | BIT(ERDMA_MISX_BAR))
/* MSI-X related. */
#define ERDMA_NUM_MSIX_VEC 32U
#define ERDMA_MSIX_VECTOR_CMDQ 0
/* PCIe Bar0 Registers. */
#define ERDMA_REGS_VERSION_REG 0x0
#define ERDMA_REGS_DEV_CTRL_REG 0x10
#define ERDMA_REGS_DEV_ST_REG 0x14
#define ERDMA_REGS_NETDEV_MAC_L_REG 0x18
#define ERDMA_REGS_NETDEV_MAC_H_REG 0x1C
#define ERDMA_REGS_CMDQ_SQ_ADDR_L_REG 0x20
#define ERDMA_REGS_CMDQ_SQ_ADDR_H_REG 0x24
#define ERDMA_REGS_CMDQ_CQ_ADDR_L_REG 0x28
#define ERDMA_REGS_CMDQ_CQ_ADDR_H_REG 0x2C
#define ERDMA_REGS_CMDQ_DEPTH_REG 0x30
#define ERDMA_REGS_CMDQ_EQ_DEPTH_REG 0x34
#define ERDMA_REGS_CMDQ_EQ_ADDR_L_REG 0x38
#define ERDMA_REGS_CMDQ_EQ_ADDR_H_REG 0x3C
#define ERDMA_REGS_AEQ_ADDR_L_REG 0x40
#define ERDMA_REGS_AEQ_ADDR_H_REG 0x44
#define ERDMA_REGS_AEQ_DEPTH_REG 0x48
#define ERDMA_REGS_GRP_NUM_REG 0x4c
#define ERDMA_REGS_AEQ_DB_REG 0x50
#define ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG 0x60
#define ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG 0x68
#define ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG 0x70
#define ERDMA_AEQ_DB_HOST_ADDR_REG 0x78
#define ERDMA_REGS_STATS_TSO_IN_PKTS_REG 0x80
#define ERDMA_REGS_STATS_TSO_OUT_PKTS_REG 0x88
#define ERDMA_REGS_STATS_TSO_OUT_BYTES_REG 0x90
#define ERDMA_REGS_STATS_TX_DROP_PKTS_REG 0x98
#define ERDMA_REGS_STATS_TX_BPS_METER_DROP_PKTS_REG 0xa0
#define ERDMA_REGS_STATS_TX_PPS_METER_DROP_PKTS_REG 0xa8
#define ERDMA_REGS_STATS_RX_PKTS_REG 0xc0
#define ERDMA_REGS_STATS_RX_BYTES_REG 0xc8
#define ERDMA_REGS_STATS_RX_DROP_PKTS_REG 0xd0
#define ERDMA_REGS_STATS_RX_BPS_METER_DROP_PKTS_REG 0xd8
#define ERDMA_REGS_STATS_RX_PPS_METER_DROP_PKTS_REG 0xe0
#define ERDMA_REGS_CEQ_DB_BASE_REG 0x100
#define ERDMA_CMDQ_SQDB_REG 0x200
#define ERDMA_CMDQ_CQDB_REG 0x300
/* DEV_CTRL_REG details. */
#define ERDMA_REG_DEV_CTRL_RESET_MASK 0x00000001
#define ERDMA_REG_DEV_CTRL_INIT_MASK 0x00000002
/* DEV_ST_REG details. */
#define ERDMA_REG_DEV_ST_RESET_DONE_MASK 0x00000001U
#define ERDMA_REG_DEV_ST_INIT_DONE_MASK 0x00000002U
/* eRDMA PCIe DBs definition. */
#define ERDMA_BAR_DB_SPACE_BASE 4096
#define ERDMA_BAR_SQDB_SPACE_OFFSET ERDMA_BAR_DB_SPACE_BASE
#define ERDMA_BAR_SQDB_SPACE_SIZE (384 * 1024)
#define ERDMA_BAR_RQDB_SPACE_OFFSET \
(ERDMA_BAR_SQDB_SPACE_OFFSET + ERDMA_BAR_SQDB_SPACE_SIZE)
#define ERDMA_BAR_RQDB_SPACE_SIZE (96 * 1024)
#define ERDMA_BAR_CQDB_SPACE_OFFSET \
(ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE)
/* Doorbell page resources related. */
/*
* Max # of parallelly issued directSQE is 3072 per device,
* hardware organizes this into 24 group, per group has 128 credits.
*/
#define ERDMA_DWQE_MAX_GRP_CNT 24
#define ERDMA_DWQE_NUM_PER_GRP 128
#define ERDMA_DWQE_TYPE0_CNT 64
#define ERDMA_DWQE_TYPE1_CNT 496
/* type1 DB contains 2 DBs, takes 256Byte. */
#define ERDMA_DWQE_TYPE1_CNT_PER_PAGE 16
#define ERDMA_SDB_SHARED_PAGE_INDEX 95
/* Doorbell related. */
#define ERDMA_DB_SIZE 8
#define ERDMA_CQDB_IDX_MASK GENMASK_ULL(63, 56)
#define ERDMA_CQDB_CQN_MASK GENMASK_ULL(55, 32)
#define ERDMA_CQDB_ARM_MASK BIT_ULL(31)
#define ERDMA_CQDB_SOL_MASK BIT_ULL(30)
#define ERDMA_CQDB_CMDSN_MASK GENMASK_ULL(29, 28)
#define ERDMA_CQDB_CI_MASK GENMASK_ULL(23, 0)
#define ERDMA_EQDB_ARM_MASK BIT(31)
#define ERDMA_EQDB_CI_MASK GENMASK_ULL(23, 0)
#define ERDMA_PAGE_SIZE_SUPPORT 0x7FFFF000
/* WQE related. */
#define EQE_SIZE 16
#define EQE_SHIFT 4
#define RQE_SIZE 32
#define RQE_SHIFT 5
#define CQE_SIZE 32
#define CQE_SHIFT 5
#define SQEBB_SIZE 32
#define SQEBB_SHIFT 5
#define SQEBB_MASK (~(SQEBB_SIZE - 1))
#define SQEBB_ALIGN(size) ((size + SQEBB_SIZE - 1) & SQEBB_MASK)
#define SQEBB_COUNT(size) (SQEBB_ALIGN(size) >> SQEBB_SHIFT)
#define ERDMA_MAX_SQE_SIZE 128
#define ERDMA_MAX_WQEBB_PER_SQE 4
/* CMDQ related. */
#define ERDMA_CMDQ_MAX_OUTSTANDING 128
#define ERDMA_CMDQ_SQE_SIZE 64
/* cmdq sub module definition. */
enum CMDQ_WQE_SUB_MOD {
CMDQ_SUBMOD_RDMA = 0,
CMDQ_SUBMOD_COMMON = 1
};
enum CMDQ_RDMA_OPCODE {
CMDQ_OPCODE_QUERY_DEVICE = 0,
CMDQ_OPCODE_CREATE_QP = 1,
CMDQ_OPCODE_DESTROY_QP = 2,
CMDQ_OPCODE_MODIFY_QP = 3,
CMDQ_OPCODE_CREATE_CQ = 4,
CMDQ_OPCODE_DESTROY_CQ = 5,
CMDQ_OPCODE_REG_MR = 8,
CMDQ_OPCODE_DEREG_MR = 9
};
enum CMDQ_COMMON_OPCODE {
CMDQ_OPCODE_CREATE_EQ = 0,
CMDQ_OPCODE_DESTROY_EQ = 1,
CMDQ_OPCODE_QUERY_FW_INFO = 2,
};
/* cmdq-SQE HDR */
#define ERDMA_CMD_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52)
#define ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK GENMASK_ULL(47, 32)
#define ERDMA_CMD_HDR_SUB_MOD_MASK GENMASK_ULL(25, 24)
#define ERDMA_CMD_HDR_OPCODE_MASK GENMASK_ULL(23, 16)
#define ERDMA_CMD_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
struct erdma_cmdq_destroy_cq_req {
u64 hdr;
u32 cqn;
};
#define ERDMA_EQ_TYPE_AEQ 0
#define ERDMA_EQ_TYPE_CEQ 1
struct erdma_cmdq_create_eq_req {
u64 hdr;
u64 qbuf_addr;
u8 vector_idx;
u8 eqn;
u8 depth;
u8 qtype;
u32 db_dma_addr_l;
u32 db_dma_addr_h;
};
struct erdma_cmdq_destroy_eq_req {
u64 hdr;
u64 rsvd0;
u8 vector_idx;
u8 eqn;
u8 rsvd1;
u8 qtype;
};
/* create_cq cfg0 */
#define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24)
#define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20)
#define ERDMA_CMD_CREATE_CQ_CQN_MASK GENMASK(19, 0)
/* create_cq cfg1 */
#define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16)
#define ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK BIT(15)
#define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0)
struct erdma_cmdq_create_cq_req {
u64 hdr;
u32 cfg0;
u32 qbuf_addr_l;
u32 qbuf_addr_h;
u32 cfg1;
u64 cq_db_info_addr;
u32 first_page_offset;
};
/* regmr/deregmr cfg0 */
#define ERDMA_CMD_MR_VALID_MASK BIT(31)
#define ERDMA_CMD_MR_KEY_MASK GENMASK(27, 20)
#define ERDMA_CMD_MR_MPT_IDX_MASK GENMASK(19, 0)
/* regmr cfg1 */
#define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12)
#define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6)
#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 2)
#define ERDMA_CMD_REGMR_ACC_MODE_MASK GENMASK(1, 0)
/* regmr cfg2 */
#define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27)
#define ERDMA_CMD_REGMR_MTT_TYPE_MASK GENMASK(21, 20)
#define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0)
struct erdma_cmdq_reg_mr_req {
u64 hdr;
u32 cfg0;
u32 cfg1;
u64 start_va;
u32 size;
u32 cfg2;
u64 phy_addr[4];
};
struct erdma_cmdq_dereg_mr_req {
u64 hdr;
u32 cfg;
};
/* modify qp cfg */
#define ERDMA_CMD_MODIFY_QP_STATE_MASK GENMASK(31, 24)
#define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20)
#define ERDMA_CMD_MODIFY_QP_QPN_MASK GENMASK(19, 0)
struct erdma_cmdq_modify_qp_req {
u64 hdr;
u32 cfg;
u32 cookie;
__be32 dip;
__be32 sip;
__be16 sport;
__be16 dport;
u32 send_nxt;
u32 recv_nxt;
};
/* create qp cfg0 */
#define ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK GENMASK(31, 20)
#define ERDMA_CMD_CREATE_QP_QPN_MASK GENMASK(19, 0)
/* create qp cfg1 */
#define ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK GENMASK(31, 20)
#define ERDMA_CMD_CREATE_QP_PD_MASK GENMASK(19, 0)
/* create qp cqn_mtt_cfg */
#define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28)
#define ERDMA_CMD_CREATE_QP_CQN_MASK GENMASK(23, 0)
/* create qp mtt_cfg */
#define ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK GENMASK(31, 12)
#define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1)
#define ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK BIT(0)
#define ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK GENMASK_ULL(31, 0)
struct erdma_cmdq_create_qp_req {
u64 hdr;
u32 cfg0;
u32 cfg1;
u32 sq_cqn_mtt_cfg;
u32 rq_cqn_mtt_cfg;
u64 sq_buf_addr;
u64 rq_buf_addr;
u32 sq_mtt_cfg;
u32 rq_mtt_cfg;
u64 sq_db_info_dma_addr;
u64 rq_db_info_dma_addr;
};
struct erdma_cmdq_destroy_qp_req {
u64 hdr;
u32 qpn;
};
/* cap qword 0 definition */
#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
#define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16)
#define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0)
/* cap qword 1 definition */
#define ERDMA_CMD_DEV_CAP_DMA_LOCAL_KEY_MASK GENMASK_ULL(63, 32)
#define ERDMA_CMD_DEV_CAP_DEFAULT_CC_MASK GENMASK_ULL(31, 28)
#define ERDMA_CMD_DEV_CAP_QBLOCK_MASK GENMASK_ULL(27, 16)
#define ERDMA_CMD_DEV_CAP_MAX_MW_MASK GENMASK_ULL(7, 0)
#define ERDMA_NQP_PER_QBLOCK 1024
#define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0)
/* CQE hdr */
#define ERDMA_CQE_HDR_OWNER_MASK BIT(31)
#define ERDMA_CQE_HDR_OPCODE_MASK GENMASK(23, 16)
#define ERDMA_CQE_HDR_QTYPE_MASK GENMASK(15, 8)
#define ERDMA_CQE_HDR_SYNDROME_MASK GENMASK(7, 0)
#define ERDMA_CQE_QTYPE_SQ 0
#define ERDMA_CQE_QTYPE_RQ 1
#define ERDMA_CQE_QTYPE_CMDQ 2
struct erdma_cqe {
__be32 hdr;
__be32 qe_idx;
__be32 qpn;
union {
__le32 imm_data;
__be32 inv_rkey;
};
__be32 size;
__be32 rsvd[3];
};
struct erdma_sge {
__aligned_le64 laddr;
__le32 length;
__le32 lkey;
};
/* Receive Queue Element */
struct erdma_rqe {
__le16 qe_idx;
__le16 rsvd0;
__le32 qpn;
__le32 rsvd1;
__le32 rsvd2;
__le64 to;
__le32 length;
__le32 stag;
};
/* SQE */
#define ERDMA_SQE_HDR_SGL_LEN_MASK GENMASK_ULL(63, 56)
#define ERDMA_SQE_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52)
#define ERDMA_SQE_HDR_QPN_MASK GENMASK_ULL(51, 32)
#define ERDMA_SQE_HDR_OPCODE_MASK GENMASK_ULL(31, 27)
#define ERDMA_SQE_HDR_DWQE_MASK BIT_ULL(26)
#define ERDMA_SQE_HDR_INLINE_MASK BIT_ULL(25)
#define ERDMA_SQE_HDR_FENCE_MASK BIT_ULL(24)
#define ERDMA_SQE_HDR_SE_MASK BIT_ULL(23)
#define ERDMA_SQE_HDR_CE_MASK BIT_ULL(22)
#define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
/* REG MR attrs */
#define ERDMA_SQE_MR_MODE_MASK GENMASK(1, 0)
#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 2)
#define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6)
#define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12)
struct erdma_write_sqe {
__le64 hdr;
__be32 imm_data;
__le32 length;
__le32 sink_stag;
__le32 sink_to_l;
__le32 sink_to_h;
__le32 rsvd;
struct erdma_sge sgl[0];
};
struct erdma_send_sqe {
__le64 hdr;
union {
__be32 imm_data;
__le32 invalid_stag;
};
__le32 length;
struct erdma_sge sgl[0];
};
struct erdma_readreq_sqe {
__le64 hdr;
__le32 invalid_stag;
__le32 length;
__le32 sink_stag;
__le32 sink_to_l;
__le32 sink_to_h;
__le32 rsvd;
};
struct erdma_reg_mr_sqe {
__le64 hdr;
__le64 addr;
__le32 length;
__le32 stag;
__le32 attrs;
__le32 rsvd;
};
/* EQ related. */
#define ERDMA_DEFAULT_EQ_DEPTH 256
/* ceqe */
#define ERDMA_CEQE_HDR_DB_MASK BIT_ULL(63)
#define ERDMA_CEQE_HDR_PI_MASK GENMASK_ULL(55, 32)
#define ERDMA_CEQE_HDR_O_MASK BIT_ULL(31)
#define ERDMA_CEQE_HDR_CQN_MASK GENMASK_ULL(19, 0)
/* aeqe */
#define ERDMA_AEQE_HDR_O_MASK BIT(31)
#define ERDMA_AEQE_HDR_TYPE_MASK GENMASK(23, 16)
#define ERDMA_AEQE_HDR_SUBTYPE_MASK GENMASK(7, 0)
#define ERDMA_AE_TYPE_QP_FATAL_EVENT 0
#define ERDMA_AE_TYPE_QP_ERQ_ERR_EVENT 1
#define ERDMA_AE_TYPE_ACC_ERR_EVENT 2
#define ERDMA_AE_TYPE_CQ_ERR 3
#define ERDMA_AE_TYPE_OTHER_ERROR 4
struct erdma_aeqe {
__le32 hdr;
__le32 event_data0;
__le32 event_data1;
__le32 rsvd;
};
enum erdma_opcode {
ERDMA_OP_WRITE = 0,
ERDMA_OP_READ = 1,
ERDMA_OP_SEND = 2,
ERDMA_OP_SEND_WITH_IMM = 3,
ERDMA_OP_RECEIVE = 4,
ERDMA_OP_RECV_IMM = 5,
ERDMA_OP_RECV_INV = 6,
ERDMA_OP_REQ_ERR = 7,
ERDMA_OP_READ_RESPONSE = 8,
ERDMA_OP_WRITE_WITH_IMM = 9,
ERDMA_OP_RECV_ERR = 10,
ERDMA_OP_INVALIDATE = 11,
ERDMA_OP_RSP_SEND_IMM = 12,
ERDMA_OP_SEND_WITH_INV = 13,
ERDMA_OP_REG_MR = 14,
ERDMA_OP_LOCAL_INV = 15,
ERDMA_OP_READ_WITH_INV = 16,
ERDMA_NUM_OPCODES = 17,
ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1
};
enum erdma_wc_status {
ERDMA_WC_SUCCESS = 0,
ERDMA_WC_GENERAL_ERR = 1,
ERDMA_WC_RECV_WQE_FORMAT_ERR = 2,
ERDMA_WC_RECV_STAG_INVALID_ERR = 3,
ERDMA_WC_RECV_ADDR_VIOLATION_ERR = 4,
ERDMA_WC_RECV_RIGHT_VIOLATION_ERR = 5,
ERDMA_WC_RECV_PDID_ERR = 6,
ERDMA_WC_RECV_WARRPING_ERR = 7,
ERDMA_WC_SEND_WQE_FORMAT_ERR = 8,
ERDMA_WC_SEND_WQE_ORD_EXCEED = 9,
ERDMA_WC_SEND_STAG_INVALID_ERR = 10,
ERDMA_WC_SEND_ADDR_VIOLATION_ERR = 11,
ERDMA_WC_SEND_RIGHT_VIOLATION_ERR = 12,
ERDMA_WC_SEND_PDID_ERR = 13,
ERDMA_WC_SEND_WARRPING_ERR = 14,
ERDMA_WC_FLUSH_ERR = 15,
ERDMA_WC_RETRY_EXC_ERR = 16,
ERDMA_NUM_WC_STATUS
};
enum erdma_vendor_err {
ERDMA_WC_VENDOR_NO_ERR = 0,
ERDMA_WC_VENDOR_INVALID_RQE = 1,
ERDMA_WC_VENDOR_RQE_INVALID_STAG = 2,
ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION = 3,
ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR = 4,
ERDMA_WC_VENDOR_RQE_INVALID_PD = 5,
ERDMA_WC_VENDOR_RQE_WRAP_ERR = 6,
ERDMA_WC_VENDOR_INVALID_SQE = 0x20,
ERDMA_WC_VENDOR_ZERO_ORD = 0x21,
ERDMA_WC_VENDOR_SQE_INVALID_STAG = 0x30,
ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION = 0x31,
ERDMA_WC_VENDOR_SQE_ACCESS_ERR = 0x32,
ERDMA_WC_VENDOR_SQE_INVALID_PD = 0x33,
ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34
};
#endif
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
/* Kai Shen <kaishen@linux.alibaba.com> */
/* Copyright (c) 2020-2022, Alibaba Group. */
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <net/addrconf.h>
#include <rdma/erdma-abi.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
#include "erdma.h"
#include "erdma_cm.h"
#include "erdma_hw.h"
#include "erdma_verbs.h"
MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
MODULE_LICENSE("Dual BSD/GPL");
static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
void *arg)
{
struct net_device *netdev = netdev_notifier_info_to_dev(arg);
struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
if (dev->netdev == NULL || dev->netdev != netdev)
goto done;
switch (event) {
case NETDEV_UP:
dev->state = IB_PORT_ACTIVE;
erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
break;
case NETDEV_DOWN:
dev->state = IB_PORT_DOWN;
erdma_port_event(dev, IB_EVENT_PORT_ERR);
break;
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
case NETDEV_CHANGEADDR:
case NETDEV_CHANGEMTU:
case NETDEV_GOING_DOWN:
case NETDEV_CHANGE:
default:
break;
}
done:
return NOTIFY_OK;
}
static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
{
struct net_device *netdev;
int ret = -ENODEV;
/* Already binded to a net_device, so we skip. */
if (dev->netdev)
return 0;
rtnl_lock();
for_each_netdev(&init_net, netdev) {
/*
* In erdma, the paired netdev and ibdev should have the same
* MAC address. erdma can get the value from its PCIe bar
* registers. Since erdma can not get the paired netdev
* reference directly, we do a traverse here to get the paired
* netdev.
*/
if (ether_addr_equal_unaligned(netdev->perm_addr,
dev->attrs.peer_addr)) {
ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
if (ret) {
rtnl_unlock();
ibdev_warn(&dev->ibdev,
"failed (%d) to link netdev", ret);
return ret;
}
dev->netdev = netdev;
break;
}
}
rtnl_unlock();
return ret;
}
static int erdma_device_register(struct erdma_dev *dev)
{
struct ib_device *ibdev = &dev->ibdev;
int ret;
ret = erdma_enum_and_get_netdev(dev);
if (ret)
return ret;
addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
if (ret) {
dev_err(&dev->pdev->dev,
"ib_register_device failed: ret = %d\n", ret);
return ret;
}
dev->netdev_nb.notifier_call = erdma_netdev_event;
ret = register_netdevice_notifier(&dev->netdev_nb);
if (ret) {
ibdev_err(&dev->ibdev, "failed to register notifier.\n");
ib_unregister_device(ibdev);
}
return ret;
}
static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
{
struct erdma_dev *dev = data;
erdma_cmdq_completion_handler(&dev->cmdq);
erdma_aeq_event_handler(dev);
return IRQ_HANDLED;
}
static void erdma_dwqe_resource_init(struct erdma_dev *dev)
{
int total_pages, type0, type1;
dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG);
if (dev->attrs.grp_num < 4)
dev->attrs.disable_dwqe = true;
else
dev->attrs.disable_dwqe = false;
/* One page contains 4 goups. */
total_pages = dev->attrs.grp_num * 4;
if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) {
dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT;
type0 = ERDMA_DWQE_TYPE0_CNT;
type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
} else {
type1 = total_pages / 3;
type0 = total_pages - type1 - 1;
}
dev->attrs.dwqe_pages = type0;
dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
}
static int erdma_request_vectors(struct erdma_dev *dev)
{
int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
int ret;
ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
if (ret < 0) {
dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
ret);
return ret;
}
dev->attrs.irq_num = ret;
return 0;
}
static int erdma_comm_irq_init(struct erdma_dev *dev)
{
snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
pci_name(dev->pdev));
dev->comm_irq.msix_vector =
pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
&dev->comm_irq.affinity_hint_mask);
irq_set_affinity_hint(dev->comm_irq.msix_vector,
&dev->comm_irq.affinity_hint_mask);
return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
dev->comm_irq.name, dev);
}
static void erdma_comm_irq_uninit(struct erdma_dev *dev)
{
irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
free_irq(dev->comm_irq.msix_vector, dev);
}
static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
{
int ret;
erdma_dwqe_resource_init(dev);
ret = dma_set_mask_and_coherent(&pdev->dev,
DMA_BIT_MASK(ERDMA_PCI_WIDTH));
if (ret)
return ret;
dma_set_max_seg_size(&pdev->dev, UINT_MAX);
return 0;
}
static void erdma_device_uninit(struct erdma_dev *dev)
{
u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
}
static const struct pci_device_id erdma_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
{}
};
static int erdma_probe_dev(struct pci_dev *pdev)
{
struct erdma_dev *dev;
int bars, err;
u32 version;
err = pci_enable_device(pdev);
if (err) {
dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
return err;
}
pci_set_master(pdev);
dev = ib_alloc_device(erdma_dev, ibdev);
if (!dev) {
dev_err(&pdev->dev, "ib_alloc_device failed\n");
err = -ENOMEM;
goto err_disable_device;
}
pci_set_drvdata(pdev, dev);
dev->pdev = pdev;
dev->attrs.numa_node = dev_to_node(&pdev->dev);
bars = pci_select_bars(pdev, IORESOURCE_MEM);
err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
if (bars != ERDMA_BAR_MASK || err) {
err = err ? err : -EINVAL;
goto err_ib_device_release;
}
dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
dev->func_bar =
devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
if (!dev->func_bar) {
dev_err(&pdev->dev, "devm_ioremap failed.\n");
err = -EFAULT;
goto err_release_bars;
}
version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
if (version == 0) {
/* we knows that it is a non-functional function. */
err = -ENODEV;
goto err_iounmap_func_bar;
}
err = erdma_device_init(dev, pdev);
if (err)
goto err_iounmap_func_bar;
err = erdma_request_vectors(dev);
if (err)
goto err_iounmap_func_bar;
err = erdma_comm_irq_init(dev);
if (err)
goto err_free_vectors;
err = erdma_aeq_init(dev);
if (err)
goto err_uninit_comm_irq;
err = erdma_cmdq_init(dev);
if (err)
goto err_uninit_aeq;
err = erdma_ceqs_init(dev);
if (err)
goto err_uninit_cmdq;
erdma_finish_cmdq_init(dev);
return 0;
err_uninit_cmdq:
erdma_device_uninit(dev);
erdma_cmdq_destroy(dev);
err_uninit_aeq:
erdma_aeq_destroy(dev);
err_uninit_comm_irq:
erdma_comm_irq_uninit(dev);
err_free_vectors:
pci_free_irq_vectors(dev->pdev);
err_iounmap_func_bar:
devm_iounmap(&pdev->dev, dev->func_bar);
err_release_bars:
pci_release_selected_regions(pdev, bars);
err_ib_device_release:
ib_dealloc_device(&dev->ibdev);
err_disable_device:
pci_disable_device(pdev);
return err;
}
static void erdma_remove_dev(struct pci_dev *pdev)
{
struct erdma_dev *dev = pci_get_drvdata(pdev);
erdma_ceqs_uninit(dev);
erdma_device_uninit(dev);
erdma_cmdq_destroy(dev);
erdma_aeq_destroy(dev);
erdma_comm_irq_uninit(dev);
pci_free_irq_vectors(dev->pdev);
devm_iounmap(&pdev->dev, dev->func_bar);
pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
ib_dealloc_device(&dev->ibdev);
pci_disable_device(pdev);
}
#define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
static int erdma_dev_attrs_init(struct erdma_dev *dev)
{
int err;
u64 req_hdr, cap0, cap1;
erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_QUERY_DEVICE);
err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
&cap1);
if (err)
return err;
dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
dev->attrs.max_mr = dev->attrs.max_qp << 1;
dev->attrs.max_cq = dev->attrs.max_qp << 1;
dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
dev->attrs.max_ord = ERDMA_MAX_ORD;
dev->attrs.max_ird = ERDMA_MAX_IRD;
dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
dev->attrs.max_pd = ERDMA_MAX_PD;
dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
CMDQ_OPCODE_QUERY_FW_INFO);
err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
&cap1);
if (!err)
dev->attrs.fw_version =
FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
return err;
}
static int erdma_res_cb_init(struct erdma_dev *dev)
{
int i, j;
for (i = 0; i < ERDMA_RES_CNT; i++) {
dev->res_cb[i].next_alloc_idx = 1;
spin_lock_init(&dev->res_cb[i].lock);
dev->res_cb[i].bitmap =
bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
if (!dev->res_cb[i].bitmap)
goto err;
}
return 0;
err:
for (j = 0; j < i; j++)
bitmap_free(dev->res_cb[j].bitmap);
return -ENOMEM;
}
static void erdma_res_cb_free(struct erdma_dev *dev)
{
int i;
for (i = 0; i < ERDMA_RES_CNT; i++)
bitmap_free(dev->res_cb[i].bitmap);
}
static const struct ib_device_ops erdma_device_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_ERDMA,
.uverbs_abi_ver = ERDMA_ABI_VERSION,
.alloc_mr = erdma_ib_alloc_mr,
.alloc_pd = erdma_alloc_pd,
.alloc_ucontext = erdma_alloc_ucontext,
.create_cq = erdma_create_cq,
.create_qp = erdma_create_qp,
.dealloc_pd = erdma_dealloc_pd,
.dealloc_ucontext = erdma_dealloc_ucontext,
.dereg_mr = erdma_dereg_mr,
.destroy_cq = erdma_destroy_cq,
.destroy_qp = erdma_destroy_qp,
.get_dma_mr = erdma_get_dma_mr,
.get_port_immutable = erdma_get_port_immutable,
.iw_accept = erdma_accept,
.iw_add_ref = erdma_qp_get_ref,
.iw_connect = erdma_connect,
.iw_create_listen = erdma_create_listen,
.iw_destroy_listen = erdma_destroy_listen,
.iw_get_qp = erdma_get_ibqp,
.iw_reject = erdma_reject,
.iw_rem_ref = erdma_qp_put_ref,
.map_mr_sg = erdma_map_mr_sg,
.mmap = erdma_mmap,
.mmap_free = erdma_mmap_free,
.modify_qp = erdma_modify_qp,
.post_recv = erdma_post_recv,
.post_send = erdma_post_send,
.poll_cq = erdma_poll_cq,
.query_device = erdma_query_device,
.query_gid = erdma_query_gid,
.query_port = erdma_query_port,
.query_qp = erdma_query_qp,
.req_notify_cq = erdma_req_notify_cq,
.reg_user_mr = erdma_reg_user_mr,
INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
};
static int erdma_ib_device_add(struct pci_dev *pdev)
{
struct erdma_dev *dev = pci_get_drvdata(pdev);
struct ib_device *ibdev = &dev->ibdev;
u64 mac;
int ret;
ret = erdma_dev_attrs_init(dev);
if (ret)
return ret;
ibdev->node_type = RDMA_NODE_RNIC;
memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
/*
* Current model (one-to-one device association):
* One ERDMA device per net_device or, equivalently,
* per physical port.
*/
ibdev->phys_port_cnt = 1;
ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
ib_set_device_ops(ibdev, &erdma_device_ops);
INIT_LIST_HEAD(&dev->cep_list);
spin_lock_init(&dev->lock);
xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
dev->next_alloc_cqn = 1;
dev->next_alloc_qpn = 1;
ret = erdma_res_cb_init(dev);
if (ret)
return ret;
spin_lock_init(&dev->db_bitmap_lock);
bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT);
bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT);
atomic_set(&dev->num_ctx, 0);
mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
u64_to_ether_addr(mac, dev->attrs.peer_addr);
ret = erdma_device_register(dev);
if (ret)
goto err_out;
return 0;
err_out:
xa_destroy(&dev->qp_xa);
xa_destroy(&dev->cq_xa);
erdma_res_cb_free(dev);
return ret;
}
static void erdma_ib_device_remove(struct pci_dev *pdev)
{
struct erdma_dev *dev = pci_get_drvdata(pdev);
unregister_netdevice_notifier(&dev->netdev_nb);
ib_unregister_device(&dev->ibdev);
erdma_res_cb_free(dev);
xa_destroy(&dev->qp_xa);
xa_destroy(&dev->cq_xa);
}
static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int ret;
ret = erdma_probe_dev(pdev);
if (ret)
return ret;
ret = erdma_ib_device_add(pdev);
if (ret) {
erdma_remove_dev(pdev);
return ret;
}
return 0;
}
static void erdma_remove(struct pci_dev *pdev)
{
erdma_ib_device_remove(pdev);
erdma_remove_dev(pdev);
}
static struct pci_driver erdma_pci_driver = {
.name = DRV_MODULE_NAME,
.id_table = erdma_pci_tbl,
.probe = erdma_probe,
.remove = erdma_remove
};
MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
static __init int erdma_init_module(void)
{
int ret;
ret = erdma_cm_init();
if (ret)
return ret;
ret = pci_register_driver(&erdma_pci_driver);
if (ret)
erdma_cm_exit();
return ret;
}
static void __exit erdma_exit_module(void)
{
pci_unregister_driver(&erdma_pci_driver);
erdma_cm_exit();
}
module_init(erdma_init_module);
module_exit(erdma_exit_module);
此差异已折叠。
此差异已折叠。
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
/* Kai Shen <kaishen@linux.alibaba.com> */
/* Copyright (c) 2020-2022, Alibaba Group. */
#ifndef __ERDMA_VERBS_H__
#define __ERDMA_VERBS_H__
#include <linux/errno.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/iw_cm.h>
#include "erdma.h"
#include "erdma_cm.h"
#include "erdma_hw.h"
/* RDMA Capability. */
#define ERDMA_MAX_PD (128 * 1024)
#define ERDMA_MAX_SEND_WR 4096
#define ERDMA_MAX_ORD 128
#define ERDMA_MAX_IRD 128
#define ERDMA_MAX_SGE_RD 1
#define ERDMA_MAX_CONTEXT (128 * 1024)
#define ERDMA_MAX_SEND_SGE 6
#define ERDMA_MAX_RECV_SGE 1
#define ERDMA_MAX_INLINE (sizeof(struct erdma_sge) * (ERDMA_MAX_SEND_SGE))
#define ERDMA_MAX_FRMR_PA 512
enum {
ERDMA_MMAP_IO_NC = 0, /* no cache */
};
struct erdma_user_mmap_entry {
struct rdma_user_mmap_entry rdma_entry;
u64 address;
u8 mmap_flag;
};
struct erdma_ucontext {
struct ib_ucontext ibucontext;
u32 sdb_type;
u32 sdb_idx;
u32 sdb_page_idx;
u32 sdb_page_off;
u64 sdb;
u64 rdb;
u64 cdb;
struct rdma_user_mmap_entry *sq_db_mmap_entry;
struct rdma_user_mmap_entry *rq_db_mmap_entry;
struct rdma_user_mmap_entry *cq_db_mmap_entry;
/* doorbell records */
struct list_head dbrecords_page_list;
struct mutex dbrecords_page_mutex;
};
struct erdma_pd {
struct ib_pd ibpd;
u32 pdn;
};
/*
* MemoryRegion definition.
*/
#define ERDMA_MAX_INLINE_MTT_ENTRIES 4
#define MTT_SIZE(mtt_cnt) (mtt_cnt << 3) /* per mtt takes 8 Bytes. */
#define ERDMA_MR_MAX_MTT_CNT 524288
#define ERDMA_MTT_ENTRY_SIZE 8
#define ERDMA_MR_TYPE_NORMAL 0
#define ERDMA_MR_TYPE_FRMR 1
#define ERDMA_MR_TYPE_DMA 2
#define ERDMA_MR_INLINE_MTT 0
#define ERDMA_MR_INDIRECT_MTT 1
#define ERDMA_MR_ACC_LR BIT(0)
#define ERDMA_MR_ACC_LW BIT(1)
#define ERDMA_MR_ACC_RR BIT(2)
#define ERDMA_MR_ACC_RW BIT(3)
static inline u8 to_erdma_access_flags(int access)
{
return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) |
(access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) |
(access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0);
}
struct erdma_mem {
struct ib_umem *umem;
void *mtt_buf;
u32 mtt_type;
u32 page_size;
u32 page_offset;
u32 page_cnt;
u32 mtt_nents;
u64 va;
u64 len;
u64 mtt_entry[ERDMA_MAX_INLINE_MTT_ENTRIES];
};
struct erdma_mr {
struct ib_mr ibmr;
struct erdma_mem mem;
u8 type;
u8 access;
u8 valid;
};
struct erdma_user_dbrecords_page {
struct list_head list;
struct ib_umem *umem;
u64 va;
int refcnt;
};
struct erdma_uqp {
struct erdma_mem sq_mtt;
struct erdma_mem rq_mtt;
dma_addr_t sq_db_info_dma_addr;
dma_addr_t rq_db_info_dma_addr;
struct erdma_user_dbrecords_page *user_dbr_page;
u32 rq_offset;
};
struct erdma_kqp {
u16 sq_pi;
u16 sq_ci;
u16 rq_pi;
u16 rq_ci;
u64 *swr_tbl;
u64 *rwr_tbl;
void __iomem *hw_sq_db;
void __iomem *hw_rq_db;
void *sq_buf;
dma_addr_t sq_buf_dma_addr;
void *rq_buf;
dma_addr_t rq_buf_dma_addr;
void *sq_db_info;
void *rq_db_info;
u8 sig_all;
};
enum erdma_qp_state {
ERDMA_QP_STATE_IDLE = 0,
ERDMA_QP_STATE_RTR = 1,
ERDMA_QP_STATE_RTS = 2,
ERDMA_QP_STATE_CLOSING = 3,
ERDMA_QP_STATE_TERMINATE = 4,
ERDMA_QP_STATE_ERROR = 5,
ERDMA_QP_STATE_UNDEF = 7,
ERDMA_QP_STATE_COUNT = 8
};
enum erdma_qp_attr_mask {
ERDMA_QP_ATTR_STATE = (1 << 0),
ERDMA_QP_ATTR_LLP_HANDLE = (1 << 2),
ERDMA_QP_ATTR_ORD = (1 << 3),
ERDMA_QP_ATTR_IRD = (1 << 4),
ERDMA_QP_ATTR_SQ_SIZE = (1 << 5),
ERDMA_QP_ATTR_RQ_SIZE = (1 << 6),
ERDMA_QP_ATTR_MPA = (1 << 7)
};
struct erdma_qp_attrs {
enum erdma_qp_state state;
enum erdma_cc_alg cc; /* Congestion control algorithm */
u32 sq_size;
u32 rq_size;
u32 orq_size;
u32 irq_size;
u32 max_send_sge;
u32 max_recv_sge;
u32 cookie;
#define ERDMA_QP_ACTIVE 0
#define ERDMA_QP_PASSIVE 1
u8 qp_type;
u8 pd_len;
};
struct erdma_qp {
struct ib_qp ibqp;
struct kref ref;
struct completion safe_free;
struct erdma_dev *dev;
struct erdma_cep *cep;
struct rw_semaphore state_lock;
union {
struct erdma_kqp kern_qp;
struct erdma_uqp user_qp;
};
struct erdma_cq *scq;
struct erdma_cq *rcq;
struct erdma_qp_attrs attrs;
spinlock_t lock;
};
struct erdma_kcq_info {
void *qbuf;
dma_addr_t qbuf_dma_addr;
u32 ci;
u32 cmdsn;
u32 notify_cnt;
spinlock_t lock;
u8 __iomem *db;
u64 *db_record;
};
struct erdma_ucq_info {
struct erdma_mem qbuf_mtt;
struct erdma_user_dbrecords_page *user_dbr_page;
dma_addr_t db_info_dma_addr;
};
struct erdma_cq {
struct ib_cq ibcq;
u32 cqn;
u32 depth;
u32 assoc_eqn;
union {
struct erdma_kcq_info kern_cq;
struct erdma_ucq_info user_cq;
};
};
#define QP_ID(qp) ((qp)->ibqp.qp_num)
static inline struct erdma_qp *find_qp_by_qpn(struct erdma_dev *dev, int id)
{
return (struct erdma_qp *)xa_load(&dev->qp_xa, id);
}
static inline struct erdma_cq *find_cq_by_cqn(struct erdma_dev *dev, int id)
{
return (struct erdma_cq *)xa_load(&dev->cq_xa, id);
}
void erdma_qp_get(struct erdma_qp *qp);
void erdma_qp_put(struct erdma_qp *qp);
int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
enum erdma_qp_attr_mask mask);
void erdma_qp_llp_close(struct erdma_qp *qp);
void erdma_qp_cm_drop(struct erdma_qp *qp);
static inline struct erdma_ucontext *to_ectx(struct ib_ucontext *ibctx)
{
return container_of(ibctx, struct erdma_ucontext, ibucontext);
}
static inline struct erdma_pd *to_epd(struct ib_pd *pd)
{
return container_of(pd, struct erdma_pd, ibpd);
}
static inline struct erdma_mr *to_emr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct erdma_mr, ibmr);
}
static inline struct erdma_qp *to_eqp(struct ib_qp *qp)
{
return container_of(qp, struct erdma_qp, ibqp);
}
static inline struct erdma_cq *to_ecq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct erdma_cq, ibcq);
}
static inline struct erdma_user_mmap_entry *
to_emmap(struct rdma_user_mmap_entry *ibmmap)
{
return container_of(ibmmap, struct erdma_user_mmap_entry, rdma_entry);
}
int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *data);
void erdma_dealloc_ucontext(struct ib_ucontext *ibctx);
int erdma_query_device(struct ib_device *dev, struct ib_device_attr *attr,
struct ib_udata *data);
int erdma_get_port_immutable(struct ib_device *dev, u32 port,
struct ib_port_immutable *ib_port_immutable);
int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *data);
int erdma_query_port(struct ib_device *dev, u32 port,
struct ib_port_attr *attr);
int erdma_query_gid(struct ib_device *dev, u32 port, int idx,
union ib_gid *gid);
int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *data);
int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
struct ib_udata *data);
int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
struct ib_qp_init_attr *init_attr);
int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
struct ib_udata *data);
int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
u64 virt, int access, struct ib_udata *udata);
struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int rights);
int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *data);
int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma);
void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
void erdma_qp_get_ref(struct ib_qp *ibqp);
void erdma_qp_put_ref(struct ib_qp *ibqp);
struct ib_qp *erdma_get_ibqp(struct ib_device *dev, int id);
int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
const struct ib_send_wr **bad_send_wr);
int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg);
int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason);
#endif
# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_HFI1
tristate "Cornelis OPX Gen1 support"
depends on X86_64 && INFINIBAND_RDMAVT && I2C
depends on X86_64 && INFINIBAND_RDMAVT && I2C && !UML
select MMU_NOTIFIER
select CRC32
select I2C_ALGOBIT
......
......@@ -1179,8 +1179,10 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
goto done;
ret = init_user_ctxt(fd, uctxt);
if (ret)
if (ret) {
hfi1_free_ctxt_rcv_groups(uctxt);
goto done;
}
user_init(uctxt);
......
......@@ -742,9 +742,7 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
kzalloc_node(sizeof(*tx->sdma_hdr),
GFP_KERNEL, priv->dd->node);
netif_tx_napi_add(dev, &txq->napi,
hfi1_ipoib_poll_tx_ring,
NAPI_POLL_WEIGHT);
netif_napi_add_tx(dev, &txq->napi, hfi1_ipoib_poll_tx_ring);
}
return 0;
......
......@@ -216,7 +216,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx)
* right now.
*/
set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
netif_napi_add_weight(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
rc = msix_netdev_request_rcd_irq(rxq->rcd);
if (rc)
goto bail_context_irq_failure;
......
......@@ -172,7 +172,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
}
/*
* Read nbytes from "from" and and place them in the low bytes
* Read nbytes from "from" and place them in the low bytes
* of pbuf->carry. Other bytes are left as-is. Any previous
* value in pbuf->carry is lost.
*
......
......@@ -959,6 +959,7 @@ struct hns_roce_dev {
const struct hns_roce_hw *hw;
void *priv;
struct workqueue_struct *irq_workq;
struct work_struct ecc_work;
const struct hns_roce_dfx_hw *dfx;
u32 func_num;
u32 is_vf;
......
......@@ -250,6 +250,7 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
HNS_ROCE_OPC_EXT_CFG = 0x8512,
HNS_ROCE_QUERY_RAM_ECC = 0x8513,
HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
......@@ -1107,6 +1108,11 @@ enum {
#define CFG_GMV_BT_BA_H CMQ_REQ_FIELD_LOC(51, 32)
#define CFG_GMV_BT_IDX CMQ_REQ_FIELD_LOC(95, 64)
/* Fields of HNS_ROCE_QUERY_RAM_ECC */
#define QUERY_RAM_ECC_1BIT_ERR CMQ_REQ_FIELD_LOC(31, 0)
#define QUERY_RAM_ECC_RES_TYPE CMQ_REQ_FIELD_LOC(63, 32)
#define QUERY_RAM_ECC_TAG CMQ_REQ_FIELD_LOC(95, 64)
struct hns_roce_cfg_sgid_tb {
__le32 table_idx_rsv;
__le32 vf_sgid_l;
......@@ -1343,6 +1349,12 @@ struct hns_roce_dip {
struct list_head node; /* all dips are on a list */
};
struct fmea_ram_ecc {
u32 is_ecc_err;
u32 res_type;
u32 index;
};
/* only for RNR timeout issue of HIP08 */
#define HNS_ROCE_CLOCK_ADJUST 1000
#define HNS_ROCE_MAX_CQ_PERIOD 65
......@@ -1382,7 +1394,6 @@ struct hns_roce_dip {
#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
#define HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S 0
#define HNS_ROCE_V2_VF_INT_ST_RAS_INT_S 1
#define HNS_ROCE_EQ_DB_CMD_AEQ 0x0
#define HNS_ROCE_EQ_DB_CMD_AEQ_ARMED 0x1
......
......@@ -1477,12 +1477,13 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port,
list_for_each_entry (listen_node, &cm_core->listen_list, list) {
memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
listen_port = listen_node->loc_port;
if (listen_port != dst_port ||
!(listener_state & listen_node->listener_state))
continue;
/* compare node pair, return node handle if a match */
if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
!memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
listen_port == dst_port &&
vlan_id == listen_node->vlan_id &&
(listener_state & listen_node->listener_state)) {
if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) ||
(!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) &&
vlan_id == listen_node->vlan_id)) {
refcount_inc(&listen_node->refcnt);
spin_unlock_irqrestore(&cm_core->listen_list_lock,
flags);
......
......@@ -4872,10 +4872,12 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
sd_diff = sd_needed - hmc_fpm_misc->max_sds;
if (sd_diff > 128) {
if (qpwanted > 128 && sd_diff > 144)
if (!(loop_count % 2) && qpwanted > 128) {
qpwanted /= 2;
mrwanted /= 2;
pblewanted /= 2;
} else {
mrwanted /= 2;
pblewanted /= 2;
}
continue;
}
if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF &&
......
......@@ -257,10 +257,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
iwqp->last_aeq = info->ae_id;
spin_unlock_irqrestore(&iwqp->lock, flags);
ctx_info = &iwqp->ctx_info;
if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1))
ctx_info->roce_info->err_rq_idx_valid = true;
else
ctx_info->iwarp_info->err_rq_idx_valid = true;
} else {
if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR)
continue;
......@@ -370,16 +366,12 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
case IRDMA_AE_LCE_CQ_CATASTROPHIC:
case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
if (rdma_protocol_roce(&iwdev->ibdev, 1))
ctx_info->roce_info->err_rq_idx_valid = false;
else
ctx_info->iwarp_info->err_rq_idx_valid = false;
fallthrough;
default:
ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d\n",
info->ae_id, info->qp, info->qp_cq_id);
ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n",
info->ae_id, info->qp, info->qp_cq_id, info->ae_src);
if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
if (!info->sq && ctx_info->roce_info->err_rq_idx_valid) {
ctx_info->roce_info->err_rq_idx_valid = info->rq;
if (info->rq) {
ctx_info->roce_info->err_rq_idx = info->wqe_idx;
irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va,
ctx_info);
......@@ -388,7 +380,8 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
irdma_cm_disconn(iwqp);
break;
}
if (!info->sq && ctx_info->iwarp_info->err_rq_idx_valid) {
ctx_info->iwarp_info->err_rq_idx_valid = info->rq;
if (info->rq) {
ctx_info->iwarp_info->err_rq_idx = info->wqe_idx;
ctx_info->tcp_info_valid = false;
ctx_info->iwarp_info_valid = true;
......@@ -1512,10 +1505,7 @@ static int irdma_hmc_setup(struct irdma_pci_f *rf)
int status;
u32 qpcnt;
if (rf->rdma_ver == IRDMA_GEN_1)
qpcnt = rsrc_limits_table[rf->limits_sel].qplimit * 2;
else
qpcnt = rsrc_limits_table[rf->limits_sel].qplimit;
qpcnt = rsrc_limits_table[rf->limits_sel].qplimit;
rf->sd_type = IRDMA_SD_TYPE_DIRECT;
status = irdma_cfg_fpm_val(&rf->sc_dev, qpcnt);
......@@ -1543,7 +1533,7 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf)
rf->obj_mem.pa);
rf->obj_mem.va = NULL;
if (rf->rdma_ver != IRDMA_GEN_1) {
kfree(rf->allocated_ws_nodes);
bitmap_free(rf->allocated_ws_nodes);
rf->allocated_ws_nodes = NULL;
}
kfree(rf->ceqlist);
......@@ -1972,9 +1962,8 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
u32 ret;
if (rf->rdma_ver != IRDMA_GEN_1) {
rf->allocated_ws_nodes =
kcalloc(BITS_TO_LONGS(IRDMA_MAX_WS_NODES),
sizeof(unsigned long), GFP_KERNEL);
rf->allocated_ws_nodes = bitmap_zalloc(IRDMA_MAX_WS_NODES,
GFP_KERNEL);
if (!rf->allocated_ws_nodes)
return -ENOMEM;
......@@ -2023,7 +2012,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
return 0;
mem_rsrc_kzalloc_fail:
kfree(rf->allocated_ws_nodes);
bitmap_free(rf->allocated_ws_nodes);
rf->allocated_ws_nodes = NULL;
return ret;
......
......@@ -85,7 +85,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define IRDMA_NO_QSET 0xffff
#define IW_CFG_FPM_QP_COUNT 32768
#define IRDMA_MAX_PAGES_PER_FMR 512
#define IRDMA_MAX_PAGES_PER_FMR 262144
#define IRDMA_MIN_PAGES_PER_FMR 1
#define IRDMA_CQP_COMPL_RQ_WQE_FLUSHED 2
#define IRDMA_CQP_COMPL_SQ_WQE_FLUSHED 3
......
......@@ -652,6 +652,7 @@ static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = {
};
static const struct irdma_cqp_err_info irdma_noncrit_err_list[] = {
{0xffff, 0x8002, "Invalid State"},
{0xffff, 0x8006, "Flush No Wqe Pending"},
{0xffff, 0x8007, "Modify QP Bad Close"},
{0xffff, 0x8009, "LLP Closed"},
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部