提交 4e84608c 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull RDMA fixes from Jason Gunthorpe:
 "Bug fixes for old bugs in the hns and hfi1 drivers:

   - Calculate various values in hns properly to avoid over/underflows
     in some cases

   - Fix an oops, PCI negotiation on Gen4 systems, and bugs related to
     retries"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/hns: Correct the value of srq_desc_size
  RDMA/hns: Correct the value of HNS_ROCE_HEM_CHUNK_LEN
  IB/hfi1: TID RDMA WRITE should not return IB_WC_RNR_RETRY_EXC_ERR
  IB/hfi1: Calculate flow weight based on QP MTU for TID RDMA
  IB/hfi1: Ensure r_tid_ack is valid before building TID RDMA ACK packet
  IB/hfi1: Ensure full Gen3 speed in a Gen4 system
...@@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void) ...@@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void)
goto bail_dev; goto bail_dev;
} }
hfi1_compute_tid_rdma_flow_wt();
/* /*
* These must be called before the driver is registered with * These must be called before the driver is registered with
* the PCI subsystem. * the PCI subsystem.
......
...@@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd) ...@@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
/* /*
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
*/ */
if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { if (parent &&
(dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n"); dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
dd->link_gen3_capable = 0; dd->link_gen3_capable = 0;
} }
......
...@@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
if (qp->s_flags & RVT_S_WAIT_RNR) if (qp->s_flags & RVT_S_WAIT_RNR)
goto bail_stop; goto bail_stop;
rdi = ib_to_rvt(qp->ibqp.device); rdi = ib_to_rvt(qp->ibqp.device);
if (qp->s_rnr_retry == 0 && if (!(rdi->post_parms[wqe->wr.opcode].flags &
!((rdi->post_parms[wqe->wr.opcode].flags & RVT_OPERATION_IGN_RNR_CNT)) {
RVT_OPERATION_IGN_RNR_CNT) && if (qp->s_rnr_retry == 0) {
qp->s_rnr_retry_cnt == 0)) {
status = IB_WC_RNR_RETRY_EXC_ERR; status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b; goto class_b;
} }
if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
qp->s_rnr_retry--; qp->s_rnr_retry--;
}
/* /*
* The last valid PSN is the previous PSN. For TID RDMA WRITE * The last valid PSN is the previous PSN. For TID RDMA WRITE
......
...@@ -107,8 +107,6 @@ static u32 mask_generation(u32 a) ...@@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
* C - Capcode * C - Capcode
*/ */
static u32 tid_rdma_flow_wt;
static void tid_rdma_trigger_resume(struct work_struct *work); static void tid_rdma_trigger_resume(struct work_struct *work);
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req); static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req, static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
...@@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet, ...@@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
struct tid_rdma_flow *flow, struct tid_rdma_flow *flow,
bool fecn); bool fecn);
static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
{
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
priv->r_tid_ack = priv->r_tid_tail;
}
static void tid_rdma_schedule_ack(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
priv->s_flags |= RVT_S_ACK_PENDING;
hfi1_schedule_tid_send(qp);
}
static void tid_rdma_trigger_ack(struct rvt_qp *qp)
{
validate_r_tid_ack(qp->priv);
tid_rdma_schedule_ack(qp);
}
static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
{ {
return return
...@@ -3005,10 +3023,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, ...@@ -3005,10 +3023,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
qpriv->s_nak_state = IB_NAK_PSN_ERROR; qpriv->s_nak_state = IB_NAK_PSN_ERROR;
/* We are NAK'ing the next expected PSN */ /* We are NAK'ing the next expected PSN */
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn); qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
qpriv->s_flags |= RVT_S_ACK_PENDING; tid_rdma_trigger_ack(qp);
if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
qpriv->r_tid_ack = qpriv->r_tid_tail;
hfi1_schedule_tid_send(qp);
} }
goto unlock; goto unlock;
} }
...@@ -3371,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, ...@@ -3371,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32); return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
} }
void hfi1_compute_tid_rdma_flow_wt(void) static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
{ {
/* /*
* Heuristic for computing the RNR timeout when waiting on the flow * Heuristic for computing the RNR timeout when waiting on the flow
* queue. Rather than a computationaly expensive exact estimate of when * queue. Rather than a computationaly expensive exact estimate of when
* a flow will be available, we assume that if a QP is at position N in * a flow will be available, we assume that if a QP is at position N in
* the flow queue it has to wait approximately (N + 1) * (number of * the flow queue it has to wait approximately (N + 1) * (number of
* segments between two sync points), assuming PMTU of 4K. The rationale * segments between two sync points). The rationale for this is that
* for this is that flows are released and recycled at each sync point. * flows are released and recycled at each sync point.
*/ */
tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) / return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
TID_RDMA_MAX_SEGMENT_SIZE;
} }
static u32 position_in_queue(struct hfi1_qp_priv *qpriv, static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
...@@ -3505,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) ...@@ -3505,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) { if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp); ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
if (ret) { if (ret) {
to_seg = tid_rdma_flow_wt * to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
position_in_queue(qpriv, position_in_queue(qpriv,
&rcd->flow_queue); &rcd->flow_queue);
break; break;
...@@ -3526,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) ...@@ -3526,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
/* /*
* If overtaking req->acked_tail, send an RNR NAK. Because the * If overtaking req->acked_tail, send an RNR NAK. Because the
* QP is not queued in this case, and the issue can only be * QP is not queued in this case, and the issue can only be
* caused due a delay in scheduling the second leg which we * caused by a delay in scheduling the second leg which we
* cannot estimate, we use a rather arbitrary RNR timeout of * cannot estimate, we use a rather arbitrary RNR timeout of
* (MAX_FLOWS / 2) segments * (MAX_FLOWS / 2) segments
*/ */
...@@ -3534,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) ...@@ -3534,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
MAX_FLOWS)) { MAX_FLOWS)) {
ret = -EAGAIN; ret = -EAGAIN;
to_seg = MAX_FLOWS >> 1; to_seg = MAX_FLOWS >> 1;
qpriv->s_flags |= RVT_S_ACK_PENDING; tid_rdma_trigger_ack(qp);
hfi1_schedule_tid_send(qp);
break; break;
} }
...@@ -4335,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) ...@@ -4335,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn, trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
req); req);
trace_hfi1_tid_write_rsp_rcv_data(qp); trace_hfi1_tid_write_rsp_rcv_data(qp);
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) validate_r_tid_ack(priv);
priv->r_tid_ack = priv->r_tid_tail;
if (opcode == TID_OP(WRITE_DATA_LAST)) { if (opcode == TID_OP(WRITE_DATA_LAST)) {
release_rdma_sge_mr(e); release_rdma_sge_mr(e);
...@@ -4375,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) ...@@ -4375,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
} }
done: done:
priv->s_flags |= RVT_S_ACK_PENDING; tid_rdma_schedule_ack(qp);
hfi1_schedule_tid_send(qp);
exit: exit:
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
if (fecn) if (fecn)
...@@ -4388,10 +4399,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) ...@@ -4388,10 +4399,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
if (!priv->s_nak_state) { if (!priv->s_nak_state) {
priv->s_nak_state = IB_NAK_PSN_ERROR; priv->s_nak_state = IB_NAK_PSN_ERROR;
priv->s_nak_psn = flow->flow_state.r_next_psn; priv->s_nak_psn = flow->flow_state.r_next_psn;
priv->s_flags |= RVT_S_ACK_PENDING; tid_rdma_trigger_ack(qp);
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
priv->r_tid_ack = priv->r_tid_tail;
hfi1_schedule_tid_send(qp);
} }
goto done; goto done;
} }
...@@ -4939,8 +4947,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) ...@@ -4939,8 +4947,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
qpriv->resync = true; qpriv->resync = true;
/* RESYNC request always gets a TID RDMA ACK. */ /* RESYNC request always gets a TID RDMA ACK. */
qpriv->s_nak_state = 0; qpriv->s_nak_state = 0;
qpriv->s_flags |= RVT_S_ACK_PENDING; tid_rdma_trigger_ack(qp);
hfi1_schedule_tid_send(qp);
bail: bail:
if (fecn) if (fecn)
qp->s_flags |= RVT_S_ECN; qp->s_flags |= RVT_S_ECN;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT) #define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
#define TID_RDMA_SEGMENT_SHIFT 18
/* /*
* Bit definitions for priv->s_flags. * Bit definitions for priv->s_flags.
...@@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, ...@@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr, struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len); u32 *bth1, u32 *bth2, u32 *len);
void hfi1_compute_tid_rdma_flow_wt(void);
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet); void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
......
...@@ -59,7 +59,7 @@ enum { ...@@ -59,7 +59,7 @@ enum {
#define HNS_ROCE_HEM_CHUNK_LEN \ #define HNS_ROCE_HEM_CHUNK_LEN \
((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
(sizeof(struct scatterlist))) (sizeof(struct scatterlist) + sizeof(void *)))
#define check_whether_bt_num_3(type, hop_num) \ #define check_whether_bt_num_3(type, hop_num) \
(type < HEM_TYPE_MTT && hop_num == 2) (type < HEM_TYPE_MTT && hop_num == 2)
......
...@@ -376,7 +376,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ...@@ -376,7 +376,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
srq->max_gs = srq_init_attr->attr.max_sge; srq->max_gs = srq_init_attr->attr.max_sge;
srq_desc_size = max(16, 16 * srq->max_gs); srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
srq->wqe_shift = ilog2(srq_desc_size); srq->wqe_shift = ilog2(srq_desc_size);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册