提交 9603e221 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Doug Ledford:
 "A small collection of -rc fixes. Mostly. One API addition, but that's
  because we wanted to use it in a fix. There's also a bug fix that is
  going to render the 5.5 kernel's soft-RoCE driver incompatible with
  all soft-RoCE versions prior, but it's required to actually implement
  the protocol according to the RoCE spec and required in order for the
  soft-RoCE driver to be able to successfully work with actual RoCE
  hardware.

  Summary:

   - Update Steve Wise info

   - Fix for soft-RoCE crc calculations (will break back compatibility,
     but only with the soft-RoCE driver, which has had this bug since it
     was introduced and it is an on-the-wire bug, but will make
     soft-RoCE fully compatible with real RoCE hardware)

   - cma init fixup

   - counters oops fix

   - fix for mlx4 init/teardown sequence

   - fix for mkx5 steering rules

   - introduce a cleanup API, which isn't a fix, but we want to use it
     in the next fix

   - fix for mlx5 memory management that uses API in previous patch"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  IB/mlx5: Fix device memory flows
  IB/core: Introduce rdma_user_mmap_entry_insert_range() API
  IB/mlx5: Fix steering rule of drop and count
  IB/mlx4: Follow mirror sequence of device add during device removal
  RDMA/counter: Prevent auto-binding a QP which are not tracked with res
  rxe: correctly calculate iCRC for unaligned payloads
  Update mailmap info for Steve Wise
  RDMA/cma: add missed unregister_pernet_subsys in init failure
......@@ -276,3 +276,5 @@ Gustavo Padovan <gustavo@las.ic.unicamp.br>
Gustavo Padovan <padovan@profusion.mobi>
Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
Steve Wise <larrystevenwise@gmail.com> <swise@chelsio.com>
Steve Wise <larrystevenwise@gmail.com> <swise@opengridcomputing.com>
......@@ -4763,6 +4763,7 @@ static int __init cma_init(void)
err:
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
err_wq:
destroy_workqueue(cma_wq);
return ret;
......
......@@ -286,6 +286,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
struct rdma_counter *counter;
int ret;
if (!qp->res.valid)
return 0;
if (!rdma_is_port_valid(dev, port))
return -EINVAL;
......
......@@ -238,28 +238,32 @@ void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
/**
* rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa
* rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
* in a given range.
*
* @ucontext: associated user context.
* @entry: the entry to insert into the mmap_xa
* @length: length of the address that will be mmapped
* @min_pgoff: minimum pgoff to be returned
* @max_pgoff: maximum pgoff to be returned
*
* This function should be called by drivers that use the rdma_user_mmap
* interface for implementing their mmap syscall A database of mmap offsets is
* handled in the core and helper functions are provided to insert entries
* into the database and extract entries when the user calls mmap with the
* given offset. The function allocates a unique page offset that should be
* provided to user, the user will use the offset to retrieve information such
* as address to be mapped and how.
* given offset. The function allocates a unique page offset in a given range
* that should be provided to user, the user will use the offset to retrieve
* information such as address to be mapped and how.
*
* Return: 0 on success and -ENOMEM on failure
*/
int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
struct rdma_user_mmap_entry *entry,
size_t length)
int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
struct rdma_user_mmap_entry *entry,
size_t length, u32 min_pgoff,
u32 max_pgoff)
{
struct ib_uverbs_file *ufile = ucontext->ufile;
XA_STATE(xas, &ucontext->mmap_xa, 0);
XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
u32 xa_first, xa_last, npages;
int err;
u32 i;
......@@ -285,7 +289,7 @@ int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
entry->npages = npages;
while (true) {
/* First find an empty index */
xas_find_marked(&xas, U32_MAX, XA_FREE_MARK);
xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
if (xas.xa_node == XAS_RESTART)
goto err_unlock;
......@@ -332,4 +336,30 @@ int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
mutex_unlock(&ufile->umap_lock);
return -ENOMEM;
}
EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
/**
* rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
*
* @ucontext: associated user context.
* @entry: the entry to insert into the mmap_xa
* @length: length of the address that will be mmapped
*
* This function should be called by drivers that use the rdma_user_mmap
* interface for handling user mmapped addresses. The database is handled in
* the core and helper functions are provided to insert entries into the
* database and extract entries when the user calls mmap with the given offset.
* The function allocates a unique page offset that should be provided to user,
* the user will use the offset to retrieve information such as address to
* be mapped and how.
*
* Return: 0 on success and -ENOMEM on failure
*/
int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
struct rdma_user_mmap_entry *entry,
size_t length)
{
return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
U32_MAX);
}
EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
......@@ -3018,16 +3018,17 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
ibdev->ib_active = false;
flush_workqueue(wq);
mlx4_ib_close_sriov(ibdev);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
mlx4_ib_diag_cleanup(ibdev);
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
mlx4_ib_close_sriov(ibdev);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
mlx4_ib_diag_cleanup(ibdev);
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
kfree(ibdev->ib_uc_qpns_bitmap);
......
......@@ -157,7 +157,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
return -ENOMEM;
}
int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
{
struct mlx5_core_dev *dev = dm->dev;
u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
......@@ -175,15 +175,13 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
MLX5_SET(dealloc_memic_in, in, memic_size, length);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return;
if (!err) {
spin_lock(&dm->lock);
bitmap_clear(dm->memic_alloc_pages,
start_page_idx, num_pages);
spin_unlock(&dm->lock);
}
return err;
spin_lock(&dm->lock);
bitmap_clear(dm->memic_alloc_pages,
start_page_idx, num_pages);
spin_unlock(&dm->lock);
}
int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
......
......@@ -46,7 +46,7 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
u64 length, u32 alignment);
int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
......
......@@ -2074,6 +2074,24 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
virt_to_page(dev->mdev->clock_info));
}
static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
{
struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
struct mlx5_ib_dm *mdm;
switch (mentry->mmap_flag) {
case MLX5_IB_MMAP_TYPE_MEMIC:
mdm = container_of(mentry, struct mlx5_ib_dm, mentry);
mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr,
mdm->size);
kfree(mdm);
break;
default:
WARN_ON(true);
}
}
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
......@@ -2186,26 +2204,55 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
return err;
}
static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
static int add_dm_mmap_entry(struct ib_ucontext *context,
struct mlx5_ib_dm *mdm,
u64 address)
{
mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC;
mdm->mentry.address = address;
return rdma_user_mmap_entry_insert_range(
context, &mdm->mentry.rdma_entry,
mdm->size,
MLX5_IB_MMAP_DEVICE_MEM << 16,
(MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
}
static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
{
unsigned long idx;
u8 command;
command = get_command(vma->vm_pgoff);
idx = get_extended_index(vma->vm_pgoff);
return (command << 16 | idx);
}
static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
struct vm_area_struct *vma,
struct ib_ucontext *ucontext)
{
struct mlx5_ib_ucontext *mctx = to_mucontext(context);
struct mlx5_ib_dev *dev = to_mdev(context->device);
u16 page_idx = get_extended_index(vma->vm_pgoff);
size_t map_size = vma->vm_end - vma->vm_start;
u32 npages = map_size >> PAGE_SHIFT;
struct mlx5_user_mmap_entry *mentry;
struct rdma_user_mmap_entry *entry;
unsigned long pgoff;
pgprot_t prot;
phys_addr_t pfn;
int ret;
if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
page_idx + npages)
pgoff = mlx5_vma_to_pgoff(vma);
entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
if (!entry)
return -EINVAL;
pfn = ((dev->mdev->bar_addr +
MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
PAGE_SHIFT) +
page_idx;
return rdma_user_mmap_io(context, vma, pfn, map_size,
pgprot_writecombine(vma->vm_page_prot),
NULL);
mentry = to_mmmap(entry);
pfn = (mentry->address >> PAGE_SHIFT);
prot = pgprot_writecombine(vma->vm_page_prot);
ret = rdma_user_mmap_io(ucontext, vma, pfn,
entry->npages * PAGE_SIZE,
prot,
entry);
rdma_user_mmap_entry_put(&mentry->rdma_entry);
return ret;
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
......@@ -2248,11 +2295,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);
case MLX5_IB_MMAP_DEVICE_MEM:
return dm_mmap(ibcontext, vma);
default:
return -EINVAL;
return mlx5_ib_mmap_offset(dev, vma, ibcontext);
}
return 0;
......@@ -2288,8 +2332,9 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
{
struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
u64 start_offset;
u32 page_idx;
u16 page_idx;
int err;
u64 address;
dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
......@@ -2298,28 +2343,30 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
if (err)
return err;
page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) -
MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >>
PAGE_SHIFT;
address = dm->dev_addr & PAGE_MASK;
err = add_dm_mmap_entry(ctx, dm, address);
if (err)
goto err_dealloc;
page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
err = uverbs_copy_to(attrs,
MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
&page_idx, sizeof(page_idx));
&page_idx,
sizeof(page_idx));
if (err)
goto err_dealloc;
goto err_copy;
start_offset = dm->dev_addr & ~PAGE_MASK;
err = uverbs_copy_to(attrs,
MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
&start_offset, sizeof(start_offset));
if (err)
goto err_dealloc;
bitmap_set(to_mucontext(ctx)->dm_pages, page_idx,
DIV_ROUND_UP(dm->size, PAGE_SIZE));
goto err_copy;
return 0;
err_copy:
rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
err_dealloc:
mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
......@@ -2423,23 +2470,13 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev;
struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
struct mlx5_ib_dm *dm = to_mdm(ibdm);
u32 page_idx;
int ret;
switch (dm->type) {
case MLX5_IB_UAPI_DM_TYPE_MEMIC:
ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
if (ret)
return ret;
page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) -
MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >>
PAGE_SHIFT;
bitmap_clear(ctx->dm_pages, page_idx,
DIV_ROUND_UP(dm->size, PAGE_SIZE));
break;
rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
return 0;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
dm->size, ctx->devx_uid, dm->dev_addr,
......@@ -3544,10 +3581,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
INIT_LIST_HEAD(&handler->list);
if (dst) {
memcpy(&dest_arr[0], dst, sizeof(*dst));
dest_num++;
}
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(dev->mdev, spec,
......@@ -3560,6 +3593,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
}
if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
memcpy(&dest_arr[0], dst, sizeof(*dst));
dest_num++;
}
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
......@@ -3600,10 +3638,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) {
if (!dest_num)
rule_dst = NULL;
dest_num = 0;
}
} else {
if (is_egress)
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
......@@ -6236,6 +6272,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.map_mr_sg = mlx5_ib_map_mr_sg,
.map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
.mmap = mlx5_ib_mmap,
.mmap_free = mlx5_ib_mmap_free,
.modify_cq = mlx5_ib_modify_cq,
.modify_device = mlx5_ib_modify_device,
.modify_port = mlx5_ib_modify_port,
......
......@@ -118,6 +118,10 @@ enum {
MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
};
enum mlx5_ib_mmap_type {
MLX5_IB_MMAP_TYPE_MEMIC = 1,
};
#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \
(MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
......@@ -135,7 +139,6 @@ struct mlx5_ib_ucontext {
u32 tdn;
u64 lib_caps;
DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
u16 devx_uid;
/* For RoCE LAG TX affinity */
atomic_t tx_port_affinity;
......@@ -556,6 +559,12 @@ enum mlx5_ib_mtt_access_flags {
MLX5_IB_MTT_WRITE = (1 << 1),
};
struct mlx5_user_mmap_entry {
struct rdma_user_mmap_entry rdma_entry;
u8 mmap_flag;
u64 address;
};
struct mlx5_ib_dm {
struct ib_dm ibdm;
phys_addr_t dev_addr;
......@@ -567,6 +576,7 @@ struct mlx5_ib_dm {
} icm_dm;
/* other dm types specific params should be added here */
};
struct mlx5_user_mmap_entry mentry;
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
......@@ -1101,6 +1111,13 @@ to_mflow_act(struct ib_flow_action *ibact)
return container_of(ibact, struct mlx5_ib_flow_action, ib_action);
}
static inline struct mlx5_user_mmap_entry *
to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
{
return container_of(rdma_entry,
struct mlx5_user_mmap_entry, rdma_entry);
}
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct mlx5_db *db);
......
......@@ -389,7 +389,7 @@ void rxe_rcv(struct sk_buff *skb)
calc_icrc = rxe_icrc_hdr(pkt, skb);
calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt),
payload_size(pkt));
payload_size(pkt) + bth_pad(pkt));
calc_icrc = (__force u32)cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
if (skb->protocol == htons(ETH_P_IPV6))
......
......@@ -500,6 +500,12 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (err)
return err;
}
if (bth_pad(pkt)) {
u8 *pad = payload_addr(pkt) + paylen;
memset(pad, 0, bth_pad(pkt));
crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt));
}
}
p = payload_addr(pkt) + paylen + bth_pad(pkt);
......
......@@ -732,6 +732,13 @@ static enum resp_states read_reply(struct rxe_qp *qp,
if (err)
pr_err("Failed copying memory\n");
if (bth_pad(&ack_pkt)) {
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
u8 *pad = payload_addr(&ack_pkt) + payload;
memset(pad, 0, bth_pad(&ack_pkt));
icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt));
}
p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
*p = ~icrc;
......
......@@ -2832,6 +2832,11 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
struct rdma_user_mmap_entry *entry,
size_t length);
int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
struct rdma_user_mmap_entry *entry,
size_t length, u32 min_pgoff,
u32 max_pgoff);
struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
unsigned long pgoff);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册