diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ef58ebadb3aea0b9a41222055292e69cb427c72d..bbd6155d3e3454fa04ca6dcf129b866e44857b6d 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -73,7 +73,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; - int xprt_rdma_pad_optimize = 0; + int xprt_rdma_pad_optimize = 1; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -599,7 +599,7 @@ xprt_rdma_send_request(struct rpc_task *task) if (req->rl_niovs == 0) rc = rpcrdma_marshal_req(rqst); - else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) + else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) rc = rpcrdma_marshal_chunks(rqst, 0); if (rc < 0) goto failed_marshal; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b92b04083e402e46d91288370c8d602563e75e54..c98e40643910326abf13ecf70d3f570289175006 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -62,6 +62,7 @@ #endif static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); +static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); /* * internal functions @@ -105,13 +106,51 @@ rpcrdma_run_tasklet(unsigned long data) static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); +static const char * const async_event[] = { + "CQ error", + "QP fatal error", + "QP request error", + "QP access error", + "communication established", + "send queue drained", + "path migration successful", + "path mig error", + "device fatal error", + "port active", + "port error", + "LID change", + "P_key change", + "SM change", + "SRQ error", + "SRQ limit reached", + "last WQE reached", + "client reregister", + "GID change", +}; + +#define ASYNC_MSG(status) \ + ((status) < ARRAY_SIZE(async_event) ? \ + async_event[(status)] : "unknown async error") + +static void +rpcrdma_schedule_tasklet(struct list_head *sched_list) +{ + unsigned long flags; + + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + list_splice_tail(sched_list, &rpcrdma_tasklets_g); + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + tasklet_schedule(&rpcrdma_tasklet_g); +} + static void rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - dprintk("RPC: %s: QP error %X on device %s ep %p\n", - __func__, event->event, event->device->name, context); + pr_err("RPC: %s: %s on device %s ep %p\n", + __func__, ASYNC_MSG(event->event), + event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; ep->rep_func(ep); @@ -124,8 +163,9 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - dprintk("RPC: %s: CQ error %X on device %s ep %p\n", - __func__, event->event, event->device->name, context); + pr_err("RPC: %s: %s on device %s ep %p\n", + __func__, ASYNC_MSG(event->event), + event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; ep->rep_func(ep); @@ -243,7 +283,6 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) struct list_head sched_list; struct ib_wc *wcs; int budget, count, rc; - unsigned long flags; INIT_LIST_HEAD(&sched_list); budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; @@ -261,10 +300,7 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) rc = 0; out_schedule: - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - list_splice_tail(&sched_list, &rpcrdma_tasklets_g); - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - tasklet_schedule(&rpcrdma_tasklet_g); + rpcrdma_schedule_tasklet(&sched_list); return rc; } @@ -309,8 +345,15 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) static void rpcrdma_flush_cqs(struct rpcrdma_ep *ep) { - rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep); - rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep); + struct ib_wc wc; + LIST_HEAD(sched_list); + + while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) + rpcrdma_recvcq_process_wc(&wc, &sched_list); + if (!list_empty(&sched_list)) + rpcrdma_schedule_tasklet(&sched_list); + while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0) + rpcrdma_sendcq_process_wc(&wc); } #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -733,7 +776,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* set trigger for requesting send completion */ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; - if (ep->rep_cqinit <= 2) + if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) + ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; + else if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); ep->rep_ia = ia; @@ -866,8 +911,19 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) rpcrdma_ep_disconnect(ep, ia); rpcrdma_flush_cqs(ep); - if (ia->ri_memreg_strategy == RPCRDMA_FRMR) + switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: rpcrdma_reset_frmrs(ia); + break; + case RPCRDMA_MTHCAFMR: + rpcrdma_reset_fmrs(ia); + break; + case RPCRDMA_ALLPHYSICAL: + break; + default: + rc = -EIO; + goto out; + } xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); id = rpcrdma_create_id(xprt, ia, @@ -1287,6 +1343,34 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_pool); } +/* After a disconnect, unmap all FMRs. + * + * This is invoked only in the transport connect worker in order + * to serialize with rpcrdma_register_fmr_external(). + */ +static void +rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) +{ + struct rpcrdma_xprt *r_xprt = + container_of(ia, struct rpcrdma_xprt, rx_ia); + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct list_head *pos; + struct rpcrdma_mw *r; + LIST_HEAD(l); + int rc; + + list_for_each(pos, &buf->rb_all) { + r = list_entry(pos, struct rpcrdma_mw, mw_all); + + INIT_LIST_HEAD(&l); + list_add(&r->r.fmr->list, &l); + rc = ib_unmap_fmr(&l); + if (rc) + dprintk("RPC: %s: ib_unmap_fmr failed %i\n", + __func__, rc); + } +} + /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in * an unusable state. Find FRMRs in this state and dereg / reg * each. FRMRs that are VALID and attached to an rpcrdma_req are @@ -1918,10 +2002,10 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, break; default: - return -1; + return -EIO; } if (rc) - return -1; + return rc; return nsegs; } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ac7fc9a3134276bba8f67ad2cf9b84714ee3b3f4..b799041b75bf9efd01dc0c3a5bb77cc832bdf020 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -97,6 +97,12 @@ struct rpcrdma_ep { struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; }; +/* + * Force a signaled SEND Work Request every so often, + * in case the provider needs to do some housekeeping. + */ +#define RPCRDMA_MAX_UNSIGNALED_SENDS (32) + #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)