提交 14f974d7 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "One core bug fix and a few driver ones

   - FRWR memory registration for hfi1/qib didn't work with with some
     iovas causing a NFSoRDMA failure regression due to a fix in the NFS
     side

   - A command flow error in mlx5 allowed user space to send a corrupt
     command (and also smash the kernel stack we've since learned)

   - Fix a regression and some bugs with device hot unplug that was
     discovered while reviewing Andrea's patches

   - hns has a failure if the user asks for certain QP configurations"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/hns: Bugfix for mapping user db
  RDMA/ucontext: Fix regression with disassociate
  RDMA/mlx5: Use rdma_user_map_io for mapping BAR pages
  RDMA/mlx5: Do not allow the user to write to the clock page
  IB/mlx5: Fix scatter to CQE in DCT QP creation
  IB/rdmavt: Fix frwr memory registration
...@@ -160,6 +160,7 @@ struct ib_uverbs_file { ...@@ -160,6 +160,7 @@ struct ib_uverbs_file {
struct mutex umap_lock; struct mutex umap_lock;
struct list_head umaps; struct list_head umaps;
struct page *disassociate_page;
struct idr idr; struct idr idr;
/* spinlock protects write access to idr */ /* spinlock protects write access to idr */
......
...@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref) ...@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref)
kref_put(&file->async_file->ref, kref_put(&file->async_file->ref,
ib_uverbs_release_async_event_file); ib_uverbs_release_async_event_file);
put_device(&file->device->dev); put_device(&file->device->dev);
if (file->disassociate_page)
__free_pages(file->disassociate_page, 0);
kfree(file); kfree(file);
} }
...@@ -877,9 +880,50 @@ static void rdma_umap_close(struct vm_area_struct *vma) ...@@ -877,9 +880,50 @@ static void rdma_umap_close(struct vm_area_struct *vma)
kfree(priv); kfree(priv);
} }
/*
* Once the zap_vma_ptes has been called touches to the VMA will come here and
* we return a dummy writable zero page for all the pfns.
*/
static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
{
struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
vm_fault_t ret = 0;
if (!priv)
return VM_FAULT_SIGBUS;
/* Read only pages can just use the system zero page. */
if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
vmf->page = ZERO_PAGE(vmf->vm_start);
get_page(vmf->page);
return 0;
}
mutex_lock(&ufile->umap_lock);
if (!ufile->disassociate_page)
ufile->disassociate_page =
alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
if (ufile->disassociate_page) {
/*
* This VMA is forced to always be shared so this doesn't have
* to worry about COW.
*/
vmf->page = ufile->disassociate_page;
get_page(vmf->page);
} else {
ret = VM_FAULT_SIGBUS;
}
mutex_unlock(&ufile->umap_lock);
return ret;
}
static const struct vm_operations_struct rdma_umap_ops = { static const struct vm_operations_struct rdma_umap_ops = {
.open = rdma_umap_open, .open = rdma_umap_open,
.close = rdma_umap_close, .close = rdma_umap_close,
.fault = rdma_umap_fault,
}; };
static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
...@@ -889,6 +933,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, ...@@ -889,6 +933,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
struct ib_uverbs_file *ufile = ucontext->ufile; struct ib_uverbs_file *ufile = ucontext->ufile;
struct rdma_umap_priv *priv; struct rdma_umap_priv *priv;
if (!(vma->vm_flags & VM_SHARED))
return ERR_PTR(-EINVAL);
if (vma->vm_end - vma->vm_start != size) if (vma->vm_end - vma->vm_start != size)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
...@@ -992,7 +1039,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) ...@@ -992,7 +1039,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
* at a time to get the lock ordering right. Typically there * at a time to get the lock ordering right. Typically there
* will only be one mm, so no big deal. * will only be one mm, so no big deal.
*/ */
down_write(&mm->mmap_sem); down_read(&mm->mmap_sem);
if (!mmget_still_valid(mm)) if (!mmget_still_valid(mm))
goto skip_mm; goto skip_mm;
mutex_lock(&ufile->umap_lock); mutex_lock(&ufile->umap_lock);
...@@ -1006,11 +1053,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) ...@@ -1006,11 +1053,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
zap_vma_ptes(vma, vma->vm_start, zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start); vma->vm_end - vma->vm_start);
vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
} }
mutex_unlock(&ufile->umap_lock); mutex_unlock(&ufile->umap_lock);
skip_mm: skip_mm:
up_write(&mm->mmap_sem); up_read(&mm->mmap_sem);
mmput(mm); mmput(mm);
} }
} }
......
...@@ -533,7 +533,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, ...@@ -533,7 +533,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
{ {
if (attr->qp_type == IB_QPT_XRC_TGT) if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr)
return 0; return 0;
return 1; return 1;
......
...@@ -1119,6 +1119,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, ...@@ -1119,6 +1119,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, qp_packet_based)) if (MLX5_CAP_GEN(mdev, qp_packet_based))
resp.flags |= resp.flags |=
MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
} }
if (field_avail(typeof(resp), sw_parsing_caps, if (field_avail(typeof(resp), sw_parsing_caps,
...@@ -2066,6 +2068,7 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, ...@@ -2066,6 +2068,7 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
if (vma->vm_flags & VM_WRITE) if (vma->vm_flags & VM_WRITE)
return -EPERM; return -EPERM;
vma->vm_flags &= ~VM_MAYWRITE;
if (!dev->mdev->clock_info_page) if (!dev->mdev->clock_info_page)
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -2231,19 +2234,18 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm ...@@ -2231,19 +2234,18 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
if (vma->vm_flags & VM_WRITE) if (vma->vm_flags & VM_WRITE)
return -EPERM; return -EPERM;
vma->vm_flags &= ~VM_MAYWRITE;
/* Don't expose to user-space information it shouldn't have */ /* Don't expose to user-space information it shouldn't have */
if (PAGE_SIZE > 4096) if (PAGE_SIZE > 4096)
return -EOPNOTSUPP; return -EOPNOTSUPP;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pfn = (dev->mdev->iseg_base + pfn = (dev->mdev->iseg_base +
offsetof(struct mlx5_init_seg, internal_timer_h)) >> offsetof(struct mlx5_init_seg, internal_timer_h)) >>
PAGE_SHIFT; PAGE_SHIFT;
if (io_remap_pfn_range(vma, vma->vm_start, pfn, return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
PAGE_SIZE, vma->vm_page_prot)) PAGE_SIZE,
return -EAGAIN; pgprot_noncached(vma->vm_page_prot));
break;
case MLX5_IB_MMAP_CLOCK_INFO: case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context); return mlx5_ib_mmap_clock_info_page(dev, vma, context);
......
...@@ -1818,13 +1818,16 @@ static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr, ...@@ -1818,13 +1818,16 @@ static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr,
rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
if (rcqe_sz == 128) { if (init_attr->qp_type == MLX5_IB_QPT_DCT) {
MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); if (rcqe_sz == 128)
MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
return; return;
} }
if (init_attr->qp_type != MLX5_IB_QPT_DCT) MLX5_SET(qpc, qpc, cs_res,
MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
MLX5_RES_SCAT_DATA32_CQE);
} }
static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
......
...@@ -608,11 +608,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) ...@@ -608,11 +608,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
if (unlikely(mapped_segs == mr->mr.max_segs)) if (unlikely(mapped_segs == mr->mr.max_segs))
return -ENOMEM; return -ENOMEM;
if (mr->mr.length == 0) {
mr->mr.user_base = addr;
mr->mr.iova = addr;
}
m = mapped_segs / RVT_SEGSZ; m = mapped_segs / RVT_SEGSZ;
n = mapped_segs % RVT_SEGSZ; n = mapped_segs % RVT_SEGSZ;
mr->mr.map[m]->segs[n].vaddr = (void *)addr; mr->mr.map[m]->segs[n].vaddr = (void *)addr;
...@@ -630,17 +625,24 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) ...@@ -630,17 +625,24 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
* @sg_nents: number of entries in sg * @sg_nents: number of entries in sg
* @sg_offset: offset in bytes into sg * @sg_offset: offset in bytes into sg
* *
* Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
*
* Return: number of sg elements mapped to the memory region * Return: number of sg elements mapped to the memory region
*/ */
int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset) int sg_nents, unsigned int *sg_offset)
{ {
struct rvt_mr *mr = to_imr(ibmr); struct rvt_mr *mr = to_imr(ibmr);
int ret;
mr->mr.length = 0; mr->mr.length = 0;
mr->mr.page_shift = PAGE_SHIFT; mr->mr.page_shift = PAGE_SHIFT;
return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
rvt_set_page); mr->mr.user_base = ibmr->iova;
mr->mr.iova = ibmr->iova;
mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
mr->mr.length = (size_t)ibmr->length;
return ret;
} }
/** /**
...@@ -671,6 +673,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, ...@@ -671,6 +673,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
ibmr->rkey = key; ibmr->rkey = key;
mr->mr.lkey = key; mr->mr.lkey = key;
mr->mr.access_flags = access; mr->mr.access_flags = access;
mr->mr.iova = ibmr->iova;
atomic_set(&mr->mr.lkey_invalid, 0); atomic_set(&mr->mr.lkey_invalid, 0);
return 0; return 0;
......
...@@ -238,6 +238,7 @@ enum mlx5_ib_query_dev_resp_flags { ...@@ -238,6 +238,7 @@ enum mlx5_ib_query_dev_resp_flags {
MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0,
MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1,
MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2,
MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3,
}; };
enum mlx5_ib_tunnel_offloads { enum mlx5_ib_tunnel_offloads {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册