提交 302d3deb 编写于 作者: C Chuck Lever 提交者: Anna Schumaker

xprtrdma: Prevent inline overflow

When deciding whether to send a Call inline, rpcrdma_marshal_req
doesn't take into account header bytes consumed by chunk lists.
This results in Call messages on the wire that are sometimes larger
than the inline threshold.

Likewise, when a Write list or Reply chunk is in play, the server's
reply has to emit an RDMA Send that includes a larger-than-minimal
RPC-over-RDMA header.

The actual size of a Call message cannot be estimated until after
the chunk lists have been registered. Thus the size of each
RPC-over-RDMA header can be estimated only after chunks are
registered; but the decision to register chunks is based on the size
of that header. Chicken, meet egg.

The best a client can do is estimate header size based on the
largest header that might occur, and then ensure that inline content
is always smaller than that.
Signed-off-by: NChuck Lever <chuck.lever@oracle.com>
Tested-by: NSteve Wise <swise@opengridcomputing.com>
Signed-off-by: NAnna Schumaker <Anna.Schumaker@Netapp.com>
上级 94931746
...@@ -39,6 +39,9 @@ static int ...@@ -39,6 +39,9 @@ static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata) struct rpcrdma_create_data_internal *cdata)
{ {
rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
RPCRDMA_MAX_DATA_SEGS /
RPCRDMA_MAX_FMR_SGES));
return 0; return 0;
} }
......
...@@ -231,6 +231,9 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, ...@@ -231,6 +231,9 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
depth; depth;
} }
rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
RPCRDMA_MAX_DATA_SEGS /
ia->ri_max_frmr_depth));
return 0; return 0;
} }
......
...@@ -36,8 +36,11 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, ...@@ -36,8 +36,11 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
__func__, PTR_ERR(mr)); __func__, PTR_ERR(mr));
return -ENOMEM; return -ENOMEM;
} }
ia->ri_dma_mr = mr; ia->ri_dma_mr = mr;
rpcrdma_set_max_header_sizes(ia, cdata, min_t(unsigned int,
RPCRDMA_MAX_DATA_SEGS,
RPCRDMA_MAX_HDR_SEGS));
return 0; return 0;
} }
......
...@@ -61,7 +61,6 @@ enum rpcrdma_chunktype { ...@@ -61,7 +61,6 @@ enum rpcrdma_chunktype {
rpcrdma_replych rpcrdma_replych
}; };
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
static const char transfertypes[][12] = { static const char transfertypes[][12] = {
"pure inline", /* no chunks */ "pure inline", /* no chunks */
" read chunk", /* some argument via rdma read */ " read chunk", /* some argument via rdma read */
...@@ -69,18 +68,72 @@ static const char transfertypes[][12] = { ...@@ -69,18 +68,72 @@ static const char transfertypes[][12] = {
"write chunk", /* some result via rdma write */ "write chunk", /* some result via rdma write */
"reply chunk" /* entire reply via rdma write */ "reply chunk" /* entire reply via rdma write */
}; };
#endif
/* Returns size of largest RPC-over-RDMA header in a Call message
*
* The client marshals only one chunk list per Call message.
* The largest list is the Read list.
*/
static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
{
unsigned int size;
/* Fixed header fields and list discriminators */
size = RPCRDMA_HDRLEN_MIN;
/* Maximum Read list size */
maxsegs += 2; /* segment for head and tail buffers */
size = maxsegs * sizeof(struct rpcrdma_read_chunk);
dprintk("RPC: %s: max call header size = %u\n",
__func__, size);
return size;
}
/* Returns size of largest RPC-over-RDMA header in a Reply message
*
* There is only one Write list or one Reply chunk per Reply
* message. The larger list is the Write list.
*/
static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
{
unsigned int size;
/* Fixed header fields and list discriminators */
size = RPCRDMA_HDRLEN_MIN;
/* Maximum Write list size */
maxsegs += 2; /* segment for head and tail buffers */
size = sizeof(__be32); /* segment count */
size += maxsegs * sizeof(struct rpcrdma_segment);
size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max reply header size = %u\n",
__func__, size);
return size;
}
void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata,
unsigned int maxsegs)
{
ia->ri_max_inline_write = cdata->inline_wsize -
rpcrdma_max_call_header_size(maxsegs);
ia->ri_max_inline_read = cdata->inline_rsize -
rpcrdma_max_reply_header_size(maxsegs);
}
/* The client can send a request inline as long as the RPCRDMA header /* The client can send a request inline as long as the RPCRDMA header
* plus the RPC call fit under the transport's inline limit. If the * plus the RPC call fit under the transport's inline limit. If the
* combined call message size exceeds that limit, the client must use * combined call message size exceeds that limit, the client must use
* the read chunk list for this operation. * the read chunk list for this operation.
*/ */
static bool rpcrdma_args_inline(struct rpc_rqst *rqst) static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{ {
unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); return rqst->rq_snd_buf.len <= ia->ri_max_inline_write;
} }
/* The client can't know how large the actual reply will be. Thus it /* The client can't know how large the actual reply will be. Thus it
...@@ -89,11 +142,12 @@ static bool rpcrdma_args_inline(struct rpc_rqst *rqst) ...@@ -89,11 +142,12 @@ static bool rpcrdma_args_inline(struct rpc_rqst *rqst)
* limit, the client must provide a write list or a reply chunk for * limit, the client must provide a write list or a reply chunk for
* this request. * this request.
*/ */
static bool rpcrdma_results_inline(struct rpc_rqst *rqst) static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{ {
unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst); return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
} }
static int static int
...@@ -492,7 +546,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) ...@@ -492,7 +546,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
*/ */
if (rqst->rq_rcv_buf.flags & XDRBUF_READ) if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
wtype = rpcrdma_writech; wtype = rpcrdma_writech;
else if (rpcrdma_results_inline(rqst)) else if (rpcrdma_results_inline(r_xprt, rqst))
wtype = rpcrdma_noch; wtype = rpcrdma_noch;
else else
wtype = rpcrdma_replych; wtype = rpcrdma_replych;
...@@ -511,7 +565,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) ...@@ -511,7 +565,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* that both has a data payload, and whose non-data arguments * that both has a data payload, and whose non-data arguments
* by themselves are larger than the inline threshold. * by themselves are larger than the inline threshold.
*/ */
if (rpcrdma_args_inline(rqst)) { if (rpcrdma_args_inline(r_xprt, rqst)) {
rtype = rpcrdma_noch; rtype = rpcrdma_noch;
} else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { } else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) {
rtype = rpcrdma_readch; rtype = rpcrdma_readch;
...@@ -561,6 +615,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) ...@@ -561,6 +615,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
if (hdrlen < 0) if (hdrlen < 0)
return hdrlen; return hdrlen;
if (hdrlen + rpclen > RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
goto out_overflow;
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd" dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n", " headerp 0x%p base 0x%p lkey 0x%x\n",
__func__, transfertypes[wtype], hdrlen, rpclen, __func__, transfertypes[wtype], hdrlen, rpclen,
...@@ -587,6 +644,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) ...@@ -587,6 +644,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
req->rl_niovs = 2; req->rl_niovs = 2;
return 0; return 0;
out_overflow:
pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s\n",
hdrlen, rpclen, transfertypes[wtype]);
/* Terminate this RPC. Chunks registered above will be
* released by xprt_release -> xprt_rmda_free .
*/
return -EIO;
} }
/* /*
......
...@@ -73,6 +73,8 @@ struct rpcrdma_ia { ...@@ -73,6 +73,8 @@ struct rpcrdma_ia {
struct completion ri_done; struct completion ri_done;
int ri_async_rc; int ri_async_rc;
unsigned int ri_max_frmr_depth; unsigned int ri_max_frmr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
struct ib_qp_attr ri_qp_attr; struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr; struct ib_qp_init_attr ri_qp_init_attr;
}; };
...@@ -538,6 +540,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); ...@@ -538,6 +540,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
*/ */
int rpcrdma_marshal_req(struct rpc_rqst *); int rpcrdma_marshal_req(struct rpc_rqst *);
void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *,
unsigned int);
/* RPC/RDMA module init - xprtrdma/transport.c /* RPC/RDMA module init - xprtrdma/transport.c
*/ */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册