提交 4e99b321 编写于 作者: L Linus Torvalds

Merge tag 'nfs-for-5.8-2' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client bugfixes from Anna Schumaker:
 "Stable Fixes:
   - xprtrdma: Fix handling of RDMA_ERROR replies
   - sunrpc: Fix rollback in rpc_gssd_dummy_populate()
   - pNFS/flexfiles: Fix list corruption if the mirror count changes
   - NFSv4: Fix CLOSE not waiting for direct IO completion
   - SUNRPC: Properly set the @subbuf parameter of xdr_buf_subsegment()

  Other Fixes:
   - xprtrdma: Fix a use-after-free with r_xprt->rx_ep
   - Fix other xprtrdma races during disconnect
   - NFS: Fix memory leak of export_path"

* tag 'nfs-for-5.8-2' of git://git.linux-nfs.org/projects/anna/linux-nfs:
  SUNRPC: Properly set the @subbuf parameter of xdr_buf_subsegment()
  NFSv4 fix CLOSE not waiting for direct IO compeletion
  pNFS/flexfiles: Fix list corruption if the mirror count changes
  nfs: Fix memory leak of export_path
  sunrpc: fixed rollback in rpc_gssd_dummy_populate()
  xprtrdma: Fix handling of RDMA_ERROR replies
  xprtrdma: Clean up disconnect
  xprtrdma: Clean up synopsis of rpcrdma_flush_disconnect()
  xprtrdma: Use re_connect_status safely in rpcrdma_xprt_connect()
  xprtrdma: Prevent dereferencing r_xprt->rx_ep after it is freed
......@@ -267,8 +267,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
{
struct inode *inode = dreq->inode;
inode_dio_end(inode);
if (dreq->iocb) {
long res = (long) dreq->error;
if (dreq->count != 0) {
......@@ -280,7 +278,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
complete(&dreq->completion);
igrab(inode);
nfs_direct_req_release(dreq);
inode_dio_end(inode);
iput(inode);
}
static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
......@@ -410,8 +411,10 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
inode_dio_end(inode);
igrab(inode);
nfs_direct_req_release(dreq);
inode_dio_end(inode);
iput(inode);
return result < 0 ? result : -EIO;
}
......@@ -864,8 +867,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
inode_dio_end(inode);
igrab(inode);
nfs_direct_req_release(dreq);
inode_dio_end(inode);
iput(inode);
return result < 0 ? result : -EIO;
}
......
......@@ -83,6 +83,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
dprintk("NFS: release(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
inode_dio_wait(inode);
nfs_file_clear_open_context(filp);
return 0;
}
......
......@@ -907,9 +907,8 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
goto out_mds;
/* Use a direct mapping of ds_idx to pgio mirror_idx */
if (WARN_ON_ONCE(pgio->pg_mirror_count !=
FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)))
goto out_mds;
if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))
goto out_eagain;
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
......@@ -931,7 +930,10 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
pgio->pg_maxretrans = io_maxretrans;
return;
out_eagain:
pnfs_generic_pg_cleanup(pgio);
pgio->pg_error = -EAGAIN;
return;
out_mds:
trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode,
0, NFS4_MAX_UINT64, IOMODE_RW,
......@@ -941,6 +943,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
pgio->pg_lseg = NULL;
pgio->pg_maxretrans = 0;
nfs_pageio_reset_write_mds(pgio);
pgio->pg_error = -EAGAIN;
}
static unsigned int
......
......@@ -308,6 +308,7 @@ static int try_location(struct fs_context *fc,
if (IS_ERR(export_path))
return PTR_ERR(export_path);
kfree(ctx->nfs_server.export_path);
ctx->nfs_server.export_path = export_path;
source = kmalloc(len + 1 + ctx->nfs_server.export_path_len + 1,
......
......@@ -1317,6 +1317,7 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
q.len = strlen(gssd_dummy_clnt_dir[0].name);
clnt_dentry = d_hash_and_lookup(gssd_dentry, &q);
if (!clnt_dentry) {
__rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
pipe_dentry = ERR_PTR(-ENOENT);
goto out;
}
......
......@@ -1118,6 +1118,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->head[0].iov_len;
subbuf->head[0].iov_base = buf->head[0].iov_base;
subbuf->head[0].iov_len = 0;
}
......@@ -1130,6 +1131,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->page_len;
subbuf->pages = buf->pages;
subbuf->page_base = 0;
subbuf->page_len = 0;
}
......@@ -1141,6 +1144,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->tail[0].iov_len;
subbuf->tail[0].iov_base = buf->tail[0].iov_base;
subbuf->tail[0].iov_len = 0;
}
......
......@@ -367,7 +367,7 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
trace_xprtrdma_wc_fastreg(wc, frwr);
/* The MR will get recycled when the associated req is retransmitted */
rpcrdma_flush_disconnect(cq, wc);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
......@@ -452,7 +452,7 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
trace_xprtrdma_wc_li(wc, frwr);
__frwr_release_mr(wc, mr);
rpcrdma_flush_disconnect(cq, wc);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
......@@ -474,7 +474,7 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
__frwr_release_mr(wc, mr);
complete(&frwr->fr_linv_done);
rpcrdma_flush_disconnect(cq, wc);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
......@@ -582,7 +582,7 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
smp_rmb();
rpcrdma_complete_rqst(rep);
rpcrdma_flush_disconnect(cq, wc);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
......
......@@ -1349,8 +1349,7 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
}
r_xprt->rx_stats.bad_reply_count++;
return -EREMOTEIO;
return -EIO;
}
/* Perform XID lookup, reconstruction of the RPC reply, and
......@@ -1387,13 +1386,11 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
spin_unlock(&xprt->queue_lock);
return;
/* If the incoming reply terminated a pending RPC, the next
* RPC call will post a replacement receive buffer as it is
* being marshaled.
*/
out_badheader:
trace_xprtrdma_reply_hdr(rep);
r_xprt->rx_stats.bad_reply_count++;
rqst->rq_task->tk_status = status;
status = 0;
goto out;
}
......
......@@ -242,7 +242,7 @@ xprt_rdma_connect_worker(struct work_struct *work)
rc = rpcrdma_xprt_connect(r_xprt);
xprt_clear_connecting(xprt);
if (r_xprt->rx_ep && r_xprt->rx_ep->re_connect_status > 0) {
if (!rc) {
xprt->connect_cookie++;
xprt->stat.connect_count++;
xprt->stat.connect_time += (long)jiffies -
......
......@@ -84,7 +84,8 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep);
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep);
static void rpcrdma_ep_get(struct rpcrdma_ep *ep);
static int rpcrdma_ep_put(struct rpcrdma_ep *ep);
static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
gfp_t flags);
......@@ -97,7 +98,8 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
*/
static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
{
struct rdma_cm_id *id = r_xprt->rx_ep->re_id;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct rdma_cm_id *id = ep->re_id;
/* Flush Receives, then wait for deferred Reply work
* to complete.
......@@ -108,6 +110,8 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
* local invalidations.
*/
ib_drain_sq(id->qp);
rpcrdma_ep_put(ep);
}
/**
......@@ -126,23 +130,27 @@ static void rpcrdma_qp_event_handler(struct ib_event *event, void *context)
trace_xprtrdma_qp_event(ep, event);
}
/* Ensure xprt_force_disconnect() is invoked exactly once when a
* connection is closed or lost. (The important thing is it needs
* to be invoked "at least" once).
*/
static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep)
{
if (atomic_add_unless(&ep->re_force_disconnect, 1, 1))
xprt_force_disconnect(ep->re_xprt);
}
/**
* rpcrdma_flush_disconnect - Disconnect on flushed completion
* @cq: completion queue
* @r_xprt: transport to disconnect
* @wc: work completion entry
*
* Must be called in process context.
*/
void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc)
void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc)
{
struct rpcrdma_xprt *r_xprt = cq->cq_context;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
if (wc->status != IB_WC_SUCCESS &&
r_xprt->rx_ep->re_connect_status == 1) {
r_xprt->rx_ep->re_connect_status = -ECONNABORTED;
xprt_force_disconnect(xprt);
}
if (wc->status != IB_WC_SUCCESS)
rpcrdma_force_disconnect(r_xprt->rx_ep);
}
/**
......@@ -156,11 +164,12 @@ static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_sendctx *sc =
container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
struct rpcrdma_xprt *r_xprt = cq->cq_context;
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_send(sc, wc);
rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc);
rpcrdma_flush_disconnect(cq, wc);
rpcrdma_sendctx_put_locked(r_xprt, sc);
rpcrdma_flush_disconnect(r_xprt, wc);
}
/**
......@@ -195,7 +204,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
return;
out_flushed:
rpcrdma_flush_disconnect(cq, wc);
rpcrdma_flush_disconnect(r_xprt, wc);
rpcrdma_rep_destroy(rep);
}
......@@ -239,7 +248,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr;
struct rpcrdma_ep *ep = id->context;
struct rpc_xprt *xprt = ep->re_xprt;
might_sleep();
......@@ -263,10 +271,9 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
/* fall through */
case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV;
xprt_force_disconnect(xprt);
goto disconnected;
case RDMA_CM_EVENT_ESTABLISHED:
kref_get(&ep->re_kref);
rpcrdma_ep_get(ep);
ep->re_connect_status = 1;
rpcrdma_update_cm_private(ep, &event->param.conn);
trace_xprtrdma_inline_thresh(ep);
......@@ -288,8 +295,8 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DISCONNECTED:
ep->re_connect_status = -ECONNABORTED;
disconnected:
xprt_force_disconnect(xprt);
return rpcrdma_ep_destroy(ep);
rpcrdma_force_disconnect(ep);
return rpcrdma_ep_put(ep);
default:
break;
}
......@@ -345,7 +352,7 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
return ERR_PTR(rc);
}
static void rpcrdma_ep_put(struct kref *kref)
static void rpcrdma_ep_destroy(struct kref *kref)
{
struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref);
......@@ -369,13 +376,18 @@ static void rpcrdma_ep_put(struct kref *kref)
module_put(THIS_MODULE);
}
static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep)
{
kref_get(&ep->re_kref);
}
/* Returns:
* %0 if @ep still has a positive kref count, or
* %1 if @ep was destroyed successfully.
*/
static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep)
static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep)
{
return kref_put(&ep->re_kref, rpcrdma_ep_put);
return kref_put(&ep->re_kref, rpcrdma_ep_destroy);
}
static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
......@@ -492,7 +504,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
return 0;
out_destroy:
rpcrdma_ep_destroy(ep);
rpcrdma_ep_put(ep);
rdma_destroy_id(id);
out_free:
kfree(ep);
......@@ -519,10 +531,13 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
return rc;
ep = r_xprt->rx_ep;
ep->re_connect_status = 0;
xprt_clear_connected(xprt);
rpcrdma_reset_cwnd(r_xprt);
/* Bump the ep's reference count while there are
* outstanding Receives.
*/
rpcrdma_ep_get(ep);
rpcrdma_post_recvs(r_xprt, true);
rc = rpcrdma_sendctxs_create(r_xprt);
......@@ -552,8 +567,6 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
rpcrdma_mrs_create(r_xprt);
out:
if (rc)
ep->re_connect_status = rc;
trace_xprtrdma_connect(r_xprt, rc);
return rc;
}
......@@ -587,7 +600,7 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt)
rpcrdma_mrs_destroy(r_xprt);
rpcrdma_sendctxs_destroy(r_xprt);
if (rpcrdma_ep_destroy(ep))
if (rpcrdma_ep_put(ep))
rdma_destroy_id(id);
r_xprt->rx_ep = NULL;
......
......@@ -82,6 +82,7 @@ struct rpcrdma_ep {
unsigned int re_max_inline_recv;
int re_async_rc;
int re_connect_status;
atomic_t re_force_disconnect;
struct ib_qp_init_attr re_attr;
wait_queue_head_t re_connect_wait;
struct rpc_xprt *re_xprt;
......@@ -446,7 +447,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
/*
* Endpoint calls - xprtrdma/verbs.c
*/
void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc);
void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册