diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index de3c2fc6f361c1356443327754b46d3a2ae8c480..17ab369efc5eb3dea5fa00c6071ac07f543bb1f4 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2185,27 +2185,69 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, return 0; } +static unsigned int devx_umem_find_best_pgsize(struct ib_umem *umem, + unsigned long pgsz_bitmap) +{ + unsigned long page_size; + + /* Don't bother checking larger page sizes as offset must be zero and + * total DEVX umem length must be equal to total umem length. + */ + pgsz_bitmap &= GENMASK_ULL(max_t(u64, order_base_2(umem->length), + PAGE_SHIFT), + MLX5_ADAPTER_PAGE_SHIFT); + if (!pgsz_bitmap) + return 0; + + page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, U64_MAX); + if (!page_size) + return 0; + + /* If the page_size is less than the CPU page size then we can use the + * offset and create a umem which is a subset of the page list. + * For larger page sizes we can't be sure the DMA list reflects the + * VA so we must ensure that the umem extent is exactly equal to the + * page list. Reduce the page size until one of these cases is true. + */ + while ((ib_umem_dma_offset(umem, page_size) != 0 || + (umem->length % page_size) != 0) && + page_size > PAGE_SIZE) + page_size /= 2; + + return page_size; +} + static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, struct uverbs_attr_bundle *attrs, struct devx_umem *obj, struct devx_umem_reg_cmd *cmd) { + unsigned long pgsz_bitmap; unsigned int page_size; __be64 *mtt; void *umem; + int ret; /* - * We don't know what the user intends to use this umem for, but the HW - * restrictions must be met. MR, doorbell records, QP, WQ and CQ all - * have different requirements. Since we have no idea how to sort this - * out, only support PAGE_SIZE with the expectation that userspace will - * provide the necessary alignments inside the known PAGE_SIZE and that - * FW will check everything. + * If the user does not pass in pgsz_bitmap then the user promises not + * to use umem_offset!=0 in any commands that allocate on top of the + * umem. + * + * If the user wants to use a umem_offset then it must pass in + * pgsz_bitmap which guides the maximum page size and thus maximum + * object alignment inside the umem. See the PRM. + * + * Users are not allowed to use IOVA here, mkeys are not supported on + * umem. */ - page_size = ib_umem_find_best_pgoff( - obj->umem, PAGE_SIZE, - __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset), - 0)); + ret = uverbs_get_const_default(&pgsz_bitmap, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, + GENMASK_ULL(63, + min(PAGE_SHIFT, MLX5_ADAPTER_PAGE_SHIFT))); + if (ret) + return ret; + + page_size = devx_umem_find_best_pgsize(obj->umem, pgsz_bitmap); if (!page_size) return -EINVAL; @@ -2791,6 +2833,8 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY), UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, enum ib_access_flags), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, + u64), UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 3fd9b380a091202a1da4ca1f1a035714af7d1f02..3f0bc7597ba726dcdfc0d4921d57b66b34dcc017 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -154,6 +154,7 @@ enum mlx5_ib_devx_umem_reg_attrs { MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, + MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, }; enum mlx5_ib_devx_umem_dereg_attrs {