diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 533df3c68d2ff07f7213d6048f4ebc6e85482c59..c5edeaa6ee8e87396e1176989312568cac912a9c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -493,6 +493,7 @@ struct hns_roce_wq { int wqe_cnt; /* WQE num */ u32 max_post; int max_gs; + u32 rsv_sge; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -601,6 +602,7 @@ struct hns_roce_srq { unsigned long srqn; int max; int max_gs; + u32 rsv_sge; int wqe_shift; void __iomem *db_reg_l; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4e81d1bf2441e3cb1fdf82b447c6f398f148b838..4fd842f5fe6b84d92f96673bc06fd39dfe48c525 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -742,6 +742,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct device *dev = hr_dev->dev; unsigned long flags = 0; void *wqe = NULL; + u32 max_sge; int ret = 0; int nreq; int ind; @@ -766,6 +767,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, return -EINVAL; } + max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&hr_qp->rq, nreq, hr_qp->ibqp.recv_cq)) { @@ -774,9 +776,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, goto out; } - if (unlikely(wr->num_sge >= hr_qp->rq.max_gs)) { - dev_err(dev, "RQ: sge num(%d) is larger or equal than max sge num(%d)\n", - wr->num_sge, hr_qp->rq.max_gs); + if (unlikely(wr->num_sge > max_sge)) { + dev_err(dev, "RQ: sge num(%d) is larger than max sge num(%d)\n", + wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; goto out; @@ -791,7 +793,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, dseg++; } - if (wr->num_sge < hr_qp->rq.max_gs) { + if (hr_qp->rq.rsv_sge) { dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg->addr = 0; dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); @@ -1985,10 +1987,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg); caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline); caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg); + caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); + caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); caps->num_aeq_vectors = resp_a->num_aeq_vectors; caps->num_other_vectors = resp_a->num_other_vectors; caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; @@ -5429,7 +5433,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, done: qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; - qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; @@ -7050,7 +7054,7 @@ int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) attr->srq_limit = limit_wl; attr->max_wr = srq->max - 1; - attr->max_sge = srq->max_gs - HNS_ROCE_RESERVED_SGE; + attr->max_sge = srq->max_gs - srq->rsv_sge; memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); @@ -7100,6 +7104,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, unsigned long flags; int ret = 0; int wqe_idx; + u32 max_sge; void *wqe; int nreq; int ind; @@ -7108,11 +7113,12 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, spin_lock_irqsave(&srq->lock, flags); ind = srq->head & (srq->max - 1); + max_sge = srq->max_gs - srq->rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge >= srq->max_gs)) { + if (unlikely(wr->num_sge > max_sge)) { dev_err(hr_dev->dev, "srq(0x%lx) wr sge num(%d) exceed the max num %d.\n", - srq->srqn, wr->num_sge, srq->max_gs); + srq->srqn, wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; break; @@ -7137,7 +7143,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); } - if (wr->num_sge < srq->max_gs) { + if (srq->rsv_sge) { dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg[i].addr = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index dad3ac0b4731cfa8dbdb066b7c88e38b6c323177..11318105826a5762bff013df9688ed8190ad6bf3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -58,16 +58,16 @@ #define HNS_ROCE_V2_MAX_WQE_NUM 0x8000 #define HNS_ROCE_V2_MAX_SRQ 0x100000 #define HNS_ROCE_V2_MAX_SRQ_WR 0x8000 -#define HNS_ROCE_V2_MAX_SRQ_SGE 0xff +#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100 #define HNS_ROCE_V2_MAX_CQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100 #define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQE_NUM 0x400000 #define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000 /* reserve one sge to circumvent a hardware issue */ -#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0xff +#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff -#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0xff +#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 #define HNS_ROCE_V2_UAR_NUM 256 diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 520213d6461edbfaae584e6fa45aa34192b695ee..e0f2413c2a0a367d2924f38cea1f1c95c75fd084 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -344,17 +344,42 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, } EXPORT_SYMBOL_GPL(hns_roce_release_range_qp); +static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp, + int user) +{ + u32 max_sge = dev->caps.max_rq_sg; + + if (dev->pci_dev->revision > PCI_REVISION_ID_HIP08_B) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_qp->rq.rsv_sge = 1; + + return max_sge; +} + + + static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, int is_user, int has_rq, struct hns_roce_qp *hr_qp) { + u32 max_sge = proc_rq_sge(hr_dev, hr_qp, is_user); struct device *dev = hr_dev->dev; u32 max_cnt; /* Check the validity of QP support capacity */ if (cap->max_recv_wr > hr_dev->caps.max_wqes || - cap->max_recv_sge > hr_dev->caps.max_rq_sg) { - dev_err(dev, "RQ(0x%lx) WR or sge error!max_recv_wr=%d max_recv_sge=%d\n", + cap->max_recv_sge > max_sge) { + dev_err(dev, "RQ(0x%lx) WR or sge error, depth = %u, sge = %u\n", hr_qp->qpn, cap->max_recv_wr, cap->max_recv_sge); return -EINVAL; } @@ -386,7 +411,7 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, max_cnt = max(1U, cap->max_recv_sge); hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt + - HNS_ROCE_RESERVED_SGE); + hr_qp->rq.rsv_sge); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_MAX_SGE_NUM) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -397,7 +422,7 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, } cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt; - cap->max_recv_sge = hr_qp->rq.max_gs - HNS_ROCE_RESERVED_SGE; + cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 1e17484d3676aaa069e2e8cc4828f7a7a343152a..9d4aec18be0fcddc39f096436de35463ce792be6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -30,7 +30,7 @@ * SOFTWARE. */ #include "roce_k_compat.h" - +#include #include #include #include "hns_roce_device.h" @@ -431,6 +431,28 @@ static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); } +static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq, + bool user) +{ + u32 max_sge = dev->caps.max_srq_sges; + + if (dev->pci_dev->revision > PCI_REVISION_ID_HIP08_B) + return max_sge; + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_srq->rsv_sge = 1; + + return max_sge; +} + + struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata) @@ -439,23 +461,26 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, struct hns_roce_srq *srq; int srq_desc_size; int srq_buf_size; + u32 max_sge; int ret; u32 cqn; - /* Check the actual SRQ wqe and SRQ sge num */ - if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) - return ERR_PTR(-EINVAL); - srq = kzalloc(sizeof(*srq), GFP_KERNEL); if (!srq) return ERR_PTR(-ENOMEM); + max_sge = proc_srq_sge(hr_dev, srq, !!udata); + + if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + srq_init_attr->attr.max_sge > max_sge) + return ERR_PTR(-EINVAL); + mutex_init(&srq->mutex); spin_lock_init(&srq->lock); srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); - srq->max_gs = srq_init_attr->attr.max_sge + HNS_ROCE_RESERVED_SGE; + srq->max_gs = + roundup_pow_of_two(srq_init_attr->attr.max_sge + srq->rsv_sge); srq_desc_size = max(HNS_ROCE_SGE_SIZE, HNS_ROCE_SGE_SIZE * srq->max_gs); srq_desc_size = roundup_pow_of_two(srq_desc_size); @@ -499,6 +524,8 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, srq->event = hns_roce_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->srqn; + srq_init_attr->attr.max_wr = srq->max; + srq_init_attr->attr.max_sge = srq->max_gs - srq->rsv_sge; if (pd->uobject) { if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {