提交 c3e9c04b 编写于 作者: L Linus Torvalds

Merge tag 'nfs-for-4.15-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client updates from Anna Schumaker:
 "Stable bugfixes:
   - Revalidate "." and ".." correctly on open
   - Avoid RCU usage in tracepoints
   - Fix ugly referral attributes
   - Fix a typo in nomigration mount option
   - Revert "NFS: Move the flock open mode check into nfs_flock()"

  Features:
   - Implement a stronger send queue accounting system for NFS over RDMA
   - Switch some atomics to the new refcount_t type

  Other bugfixes and cleanups:
   - Clean up access mode bits
   - Remove special-case revalidations in nfs_opendir()
   - Improve invalidating NFS over RDMA memory for async operations that
     time out
   - Handle NFS over RDMA replies with a worqueue
   - Handle NFS over RDMA sends with a workqueue
   - Fix up replaying interrupted requests
   - Remove dead NFS over RDMA definitions
   - Update NFS over RDMA copyright information
   - Be more consistent with bool initialization and comparisons
   - Mark expected switch fall throughs
   - Various sunrpc tracepoint cleanups
   - Fix various OPEN races
   - Fix a typo in nfs_rename()
   - Use common error handling code in nfs_lock_and_join_request()
   - Check that some structures are properly cleaned up during
     net_exit()
   - Remove net pointer from dprintk()s"

* tag 'nfs-for-4.15-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (62 commits)
  NFS: Revert "NFS: Move the flock open mode check into nfs_flock()"
  NFS: Fix typo in nomigration mount option
  nfs: Fix ugly referral attributes
  NFS: super: mark expected switch fall-throughs
  sunrpc: remove net pointer from messages
  nfs: remove net pointer from messages
  sunrpc: exit_net cleanup check added
  nfs client: exit_net cleanup check added
  nfs/write: Use common error handling code in nfs_lock_and_join_requests()
  NFSv4: Replace closed stateids with the "invalid special stateid"
  NFSv4: nfs_set_open_stateid must not trigger state recovery for closed state
  NFSv4: Check the open stateid when searching for expired state
  NFSv4: Clean up nfs4_delegreturn_done
  NFSv4: cleanup nfs4_close_done
  NFSv4: Retry NFS4ERR_OLD_STATEID errors in layoutreturn
  pNFS: Retry NFS4ERR_OLD_STATEID errors in layoutreturn-on-close
  NFSv4: Don't try to CLOSE if the stateid 'other' field has changed
  NFSv4: Retry CLOSE and DELEGRETURN on NFS4ERR_OLD_STATEID.
  NFS: Fix a typo in nfs_rename()
  NFSv4: Fix open create exclusive when the server reboots
  ...
...@@ -67,7 +67,7 @@ int nfs_cache_upcall(struct cache_detail *cd, char *entry_name) ...@@ -67,7 +67,7 @@ int nfs_cache_upcall(struct cache_detail *cd, char *entry_name)
*/ */
void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq) void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq)
{ {
if (atomic_dec_and_test(&dreq->count)) if (refcount_dec_and_test(&dreq->count))
kfree(dreq); kfree(dreq);
} }
...@@ -87,7 +87,7 @@ static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req) ...@@ -87,7 +87,7 @@ static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req)
dreq = container_of(req, struct nfs_cache_defer_req, req); dreq = container_of(req, struct nfs_cache_defer_req, req);
dreq->deferred_req.revisit = nfs_dns_cache_revisit; dreq->deferred_req.revisit = nfs_dns_cache_revisit;
atomic_inc(&dreq->count); refcount_inc(&dreq->count);
return &dreq->deferred_req; return &dreq->deferred_req;
} }
...@@ -99,7 +99,7 @@ struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void) ...@@ -99,7 +99,7 @@ struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void)
dreq = kzalloc(sizeof(*dreq), GFP_KERNEL); dreq = kzalloc(sizeof(*dreq), GFP_KERNEL);
if (dreq) { if (dreq) {
init_completion(&dreq->completion); init_completion(&dreq->completion);
atomic_set(&dreq->count, 1); refcount_set(&dreq->count, 1);
dreq->req.defer = nfs_dns_cache_defer; dreq->req.defer = nfs_dns_cache_defer;
} }
return dreq; return dreq;
......
...@@ -16,7 +16,7 @@ struct nfs_cache_defer_req { ...@@ -16,7 +16,7 @@ struct nfs_cache_defer_req {
struct cache_req req; struct cache_req req;
struct cache_deferred_req deferred_req; struct cache_deferred_req deferred_req;
struct completion completion; struct completion completion;
atomic_t count; refcount_t count;
}; };
extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name); extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name);
......
...@@ -49,15 +49,15 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) ...@@ -49,15 +49,15 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
if (ret <= 0) if (ret <= 0)
goto out_err; goto out_err;
nn->nfs_callback_tcpport = ret; nn->nfs_callback_tcpport = ret;
dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
nn->nfs_callback_tcpport, PF_INET, net); nn->nfs_callback_tcpport, PF_INET, net->ns.inum);
ret = svc_create_xprt(serv, "tcp", net, PF_INET6, ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret > 0) { if (ret > 0) {
nn->nfs_callback_tcpport6 = ret; nn->nfs_callback_tcpport6 = ret;
dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", dprintk("NFS: Callback listener port = %u (af %u, net %x\n",
nn->nfs_callback_tcpport6, PF_INET6, net); nn->nfs_callback_tcpport6, PF_INET6, net->ns.inum);
} else if (ret != -EAFNOSUPPORT) } else if (ret != -EAFNOSUPPORT)
goto out_err; goto out_err;
return 0; return 0;
...@@ -185,7 +185,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc ...@@ -185,7 +185,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
if (--nn->cb_users[minorversion]) if (--nn->cb_users[minorversion])
return; return;
dprintk("NFS: destroy per-net callback data; net=%p\n", net); dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum);
svc_shutdown_net(serv, net); svc_shutdown_net(serv, net);
} }
...@@ -198,7 +198,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, ...@@ -198,7 +198,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
if (nn->cb_users[minorversion]++) if (nn->cb_users[minorversion]++)
return 0; return 0;
dprintk("NFS: create per-net callback data; net=%p\n", net); dprintk("NFS: create per-net callback data; net=%x\n", net->ns.inum);
ret = svc_bind(serv, net); ret = svc_bind(serv, net);
if (ret < 0) { if (ret < 0) {
...@@ -223,7 +223,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, ...@@ -223,7 +223,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
err_bind: err_bind:
nn->cb_users[minorversion]--; nn->cb_users[minorversion]--;
dprintk("NFS: Couldn't create callback socket: err = %d; " dprintk("NFS: Couldn't create callback socket: err = %d; "
"net = %p\n", ret, net); "net = %x\n", ret, net->ns.inum);
return ret; return ret;
} }
......
...@@ -440,7 +440,7 @@ static bool referring_call_exists(struct nfs_client *clp, ...@@ -440,7 +440,7 @@ static bool referring_call_exists(struct nfs_client *clp,
uint32_t nrclists, uint32_t nrclists,
struct referring_call_list *rclists) struct referring_call_list *rclists)
{ {
bool status = 0; bool status = false;
int i, j; int i, j;
struct nfs4_session *session; struct nfs4_session *session;
struct nfs4_slot_table *tbl; struct nfs4_slot_table *tbl;
......
...@@ -163,7 +163,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) ...@@ -163,7 +163,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
atomic_set(&clp->cl_count, 1); refcount_set(&clp->cl_count, 1);
clp->cl_cons_state = NFS_CS_INITING; clp->cl_cons_state = NFS_CS_INITING;
memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen); memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen);
...@@ -269,7 +269,7 @@ void nfs_put_client(struct nfs_client *clp) ...@@ -269,7 +269,7 @@ void nfs_put_client(struct nfs_client *clp)
nn = net_generic(clp->cl_net, nfs_net_id); nn = net_generic(clp->cl_net, nfs_net_id);
if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { if (refcount_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
list_del(&clp->cl_share_link); list_del(&clp->cl_share_link);
nfs_cb_idr_remove_locked(clp); nfs_cb_idr_remove_locked(clp);
spin_unlock(&nn->nfs_client_lock); spin_unlock(&nn->nfs_client_lock);
...@@ -314,7 +314,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat ...@@ -314,7 +314,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
sap)) sap))
continue; continue;
atomic_inc(&clp->cl_count); refcount_inc(&clp->cl_count);
return clp; return clp;
} }
return NULL; return NULL;
...@@ -1006,7 +1006,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, ...@@ -1006,7 +1006,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
/* Copy data from the source */ /* Copy data from the source */
server->nfs_client = source->nfs_client; server->nfs_client = source->nfs_client;
server->destroy = source->destroy; server->destroy = source->destroy;
atomic_inc(&server->nfs_client->cl_count); refcount_inc(&server->nfs_client->cl_count);
nfs_server_copy_userdata(server, source); nfs_server_copy_userdata(server, source);
server->fsid = fattr->fsid; server->fsid = fattr->fsid;
...@@ -1166,7 +1166,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v) ...@@ -1166,7 +1166,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
clp->rpc_ops->version, clp->rpc_ops->version,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
atomic_read(&clp->cl_count), refcount_read(&clp->cl_count),
clp->cl_hostname); clp->cl_hostname);
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -1040,6 +1040,33 @@ int nfs_delegations_present(struct nfs_client *clp) ...@@ -1040,6 +1040,33 @@ int nfs_delegations_present(struct nfs_client *clp)
return ret; return ret;
} }
/**
* nfs4_refresh_delegation_stateid - Update delegation stateid seqid
* @dst: stateid to refresh
* @inode: inode to check
*
* Returns "true" and updates "dst->seqid" * if inode had a delegation
* that matches our delegation stateid. Otherwise "false" is returned.
*/
bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
{
struct nfs_delegation *delegation;
bool ret = false;
if (!inode)
goto out;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL &&
nfs4_stateid_match_other(dst, &delegation->stateid)) {
dst->seqid = delegation->stateid.seqid;
return ret;
}
rcu_read_unlock();
out:
return ret;
}
/** /**
* nfs4_copy_delegation_stateid - Copy inode's state ID information * nfs4_copy_delegation_stateid - Copy inode's state ID information
* @inode: inode to check * @inode: inode to check
......
...@@ -62,6 +62,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 ...@@ -62,6 +62,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred);
bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags); int nfs4_have_delegation(struct inode *inode, fmode_t flags);
......
...@@ -118,13 +118,6 @@ nfs_opendir(struct inode *inode, struct file *filp) ...@@ -118,13 +118,6 @@ nfs_opendir(struct inode *inode, struct file *filp)
goto out; goto out;
} }
filp->private_data = ctx; filp->private_data = ctx;
if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
/* This is a mountpoint, so d_revalidate will never
* have been called, so we need to refresh the
* inode (for close-open consistency) ourselves.
*/
__nfs_revalidate_inode(NFS_SERVER(inode), inode);
}
out: out:
put_rpccred(cred); put_rpccred(cred);
return res; return res;
...@@ -253,7 +246,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri ...@@ -253,7 +246,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
desc->cache_entry_index = index; desc->cache_entry_index = index;
return 0; return 0;
out_eof: out_eof:
desc->eof = 1; desc->eof = true;
return -EBADCOOKIE; return -EBADCOOKIE;
} }
...@@ -307,7 +300,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des ...@@ -307,7 +300,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
if (array->eof_index >= 0) { if (array->eof_index >= 0) {
status = -EBADCOOKIE; status = -EBADCOOKIE;
if (*desc->dir_cookie == array->last_cookie) if (*desc->dir_cookie == array->last_cookie)
desc->eof = 1; desc->eof = true;
} }
out: out:
return status; return status;
...@@ -761,7 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) ...@@ -761,7 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
ent = &array->array[i]; ent = &array->array[i];
if (!dir_emit(desc->ctx, ent->string.name, ent->string.len, if (!dir_emit(desc->ctx, ent->string.name, ent->string.len,
nfs_compat_user_ino64(ent->ino), ent->d_type)) { nfs_compat_user_ino64(ent->ino), ent->d_type)) {
desc->eof = 1; desc->eof = true;
break; break;
} }
desc->ctx->pos++; desc->ctx->pos++;
...@@ -773,7 +766,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) ...@@ -773,7 +766,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
ctx->duped = 1; ctx->duped = 1;
} }
if (array->eof_index >= 0) if (array->eof_index >= 0)
desc->eof = 1; desc->eof = true;
kunmap(desc->page); kunmap(desc->page);
cache_page_release(desc); cache_page_release(desc);
...@@ -873,7 +866,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) ...@@ -873,7 +866,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
if (res == -EBADCOOKIE) { if (res == -EBADCOOKIE) {
res = 0; res = 0;
/* This means either end of directory */ /* This means either end of directory */
if (*desc->dir_cookie && desc->eof == 0) { if (*desc->dir_cookie && !desc->eof) {
/* Or that the server has 'lost' a cookie */ /* Or that the server has 'lost' a cookie */
res = uncached_readdir(desc); res = uncached_readdir(desc);
if (res == 0) if (res == 0)
...@@ -1241,8 +1234,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) ...@@ -1241,8 +1234,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
return 0; return 0;
} }
if (nfs_mapping_need_revalidate_inode(inode)) error = nfs_lookup_verify_inode(inode, flags);
error = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
__func__, inode->i_ino, error ? "invalid" : "valid"); __func__, inode->i_ino, error ? "invalid" : "valid");
return !error; return !error;
...@@ -1393,6 +1385,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int); ...@@ -1393,6 +1385,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
const struct dentry_operations nfs4_dentry_operations = { const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs4_lookup_revalidate, .d_revalidate = nfs4_lookup_revalidate,
.d_weak_revalidate = nfs_weak_revalidate,
.d_delete = nfs_dentry_delete, .d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput, .d_iput = nfs_dentry_iput,
.d_automount = nfs_d_automount, .d_automount = nfs_d_automount,
...@@ -2064,7 +2057,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -2064,7 +2057,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* should mark the directories for revalidation. * should mark the directories for revalidation.
*/ */
d_move(old_dentry, new_dentry); d_move(old_dentry, new_dentry);
nfs_set_verifier(new_dentry, nfs_set_verifier(old_dentry,
nfs_save_change_attribute(new_dir)); nfs_save_change_attribute(new_dir));
} else if (error == -ENOENT) } else if (error == -ENOENT)
nfs_dentry_handle_enoent(old_dentry); nfs_dentry_handle_enoent(old_dentry);
...@@ -2369,15 +2362,15 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) ...@@ -2369,15 +2362,15 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
} }
EXPORT_SYMBOL_GPL(nfs_access_add_cache); EXPORT_SYMBOL_GPL(nfs_access_add_cache);
#define NFS_MAY_READ (NFS4_ACCESS_READ) #define NFS_MAY_READ (NFS_ACCESS_READ)
#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \ #define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
NFS4_ACCESS_EXTEND | \ NFS_ACCESS_EXTEND | \
NFS4_ACCESS_DELETE) NFS_ACCESS_DELETE)
#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \ #define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
NFS4_ACCESS_EXTEND) NFS_ACCESS_EXTEND)
#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE #define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP) #define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE) #define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
static int static int
nfs_access_calc_mask(u32 access_result, umode_t umode) nfs_access_calc_mask(u32 access_result, umode_t umode)
{ {
...@@ -2425,9 +2418,14 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) ...@@ -2425,9 +2418,14 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
if (!may_block) if (!may_block)
goto out; goto out;
/* Be clever: ask server to check for all possible rights */ /*
cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE * Determine which access bits we want to ask for...
| NFS_MAY_WRITE | NFS_MAY_READ; */
cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
if (S_ISDIR(inode->i_mode))
cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
else
cache.mask |= NFS_ACCESS_EXECUTE;
cache.cred = cred; cache.cred = cred;
status = NFS_PROTO(inode)->access(inode, &cache); status = NFS_PROTO(inode)->access(inode, &cache);
if (status != 0) { if (status != 0) {
......
...@@ -829,23 +829,9 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) ...@@ -829,23 +829,9 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
is_local = 1; is_local = 1;
/* /* We're simulating flock() locks using posix locks on the server */
* VFS doesn't require the open mode to match a flock() lock's type. if (fl->fl_type == F_UNLCK)
* NFS, however, may simulate flock() locking with posix locking which
* requires the open mode to match the lock type.
*/
switch (fl->fl_type) {
case F_UNLCK:
return do_unlk(filp, cmd, fl, is_local); return do_unlk(filp, cmd, fl, is_local);
case F_RDLCK:
if (!(filp->f_mode & FMODE_READ))
return -EBADF;
break;
case F_WRLCK:
if (!(filp->f_mode & FMODE_WRITE))
return -EBADF;
}
return do_setlk(filp, cmd, fl, is_local); return do_setlk(filp, cmd, fl, is_local);
} }
EXPORT_SYMBOL_GPL(nfs_flock); EXPORT_SYMBOL_GPL(nfs_flock);
......
...@@ -471,10 +471,10 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) ...@@ -471,10 +471,10 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
return PNFS_NOT_ATTEMPTED; return PNFS_NOT_ATTEMPTED;
dprintk("%s USE DS: %s cl_count %d\n", __func__, dprintk("%s USE DS: %s cl_count %d\n", __func__,
ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
/* No multipath support. Use first DS */ /* No multipath support. Use first DS */
atomic_inc(&ds->ds_clp->cl_count); refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp; hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx; hdr->ds_commit_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j); fh = nfs4_fl_select_ds_fh(lseg, j);
...@@ -515,10 +515,10 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) ...@@ -515,10 +515,10 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n", dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
hdr->pgio_done_cb = filelayout_write_done_cb; hdr->pgio_done_cb = filelayout_write_done_cb;
atomic_inc(&ds->ds_clp->cl_count); refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp; hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx; hdr->ds_commit_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j); fh = nfs4_fl_select_ds_fh(lseg, j);
...@@ -1064,9 +1064,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) ...@@ -1064,9 +1064,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
goto out_err; goto out_err;
dprintk("%s ino %lu, how %d cl_count %d\n", __func__, dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count));
data->commit_done_cb = filelayout_commit_done_cb; data->commit_done_cb = filelayout_commit_done_cb;
atomic_inc(&ds->ds_clp->cl_count); refcount_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp; data->ds_clp = ds->ds_clp;
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh) if (fh)
......
...@@ -187,7 +187,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo, ...@@ -187,7 +187,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
continue; continue;
if (!ff_mirror_match_fh(mirror, pos)) if (!ff_mirror_match_fh(mirror, pos))
continue; continue;
if (atomic_inc_not_zero(&pos->ref)) { if (refcount_inc_not_zero(&pos->ref)) {
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
return pos; return pos;
} }
...@@ -218,7 +218,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) ...@@ -218,7 +218,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
mirror = kzalloc(sizeof(*mirror), gfp_flags); mirror = kzalloc(sizeof(*mirror), gfp_flags);
if (mirror != NULL) { if (mirror != NULL) {
spin_lock_init(&mirror->lock); spin_lock_init(&mirror->lock);
atomic_set(&mirror->ref, 1); refcount_set(&mirror->ref, 1);
INIT_LIST_HEAD(&mirror->mirrors); INIT_LIST_HEAD(&mirror->mirrors);
} }
return mirror; return mirror;
...@@ -242,7 +242,7 @@ static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) ...@@ -242,7 +242,7 @@ static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror) static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror)
{ {
if (mirror != NULL && atomic_dec_and_test(&mirror->ref)) if (mirror != NULL && refcount_dec_and_test(&mirror->ref))
ff_layout_free_mirror(mirror); ff_layout_free_mirror(mirror);
} }
...@@ -1726,10 +1726,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) ...@@ -1726,10 +1726,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
vers = nfs4_ff_layout_ds_version(lseg, idx); vers = nfs4_ff_layout_ds_version(lseg, idx);
dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
hdr->pgio_done_cb = ff_layout_read_done_cb; hdr->pgio_done_cb = ff_layout_read_done_cb;
atomic_inc(&ds->ds_clp->cl_count); refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp; hdr->ds_clp = ds->ds_clp;
fh = nfs4_ff_layout_select_ds_fh(lseg, idx); fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
if (fh) if (fh)
...@@ -1785,11 +1785,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) ...@@ -1785,11 +1785,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n", dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count),
vers); vers);
hdr->pgio_done_cb = ff_layout_write_done_cb; hdr->pgio_done_cb = ff_layout_write_done_cb;
atomic_inc(&ds->ds_clp->cl_count); refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp; hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx; hdr->ds_commit_idx = idx;
fh = nfs4_ff_layout_select_ds_fh(lseg, idx); fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
...@@ -1863,11 +1863,11 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) ...@@ -1863,11 +1863,11 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
vers = nfs4_ff_layout_ds_version(lseg, idx); vers = nfs4_ff_layout_ds_version(lseg, idx);
dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count), data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
vers); vers);
data->commit_done_cb = ff_layout_commit_done_cb; data->commit_done_cb = ff_layout_commit_done_cb;
data->cred = ds_cred; data->cred = ds_cred;
atomic_inc(&ds->ds_clp->cl_count); refcount_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp; data->ds_clp = ds->ds_clp;
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh) if (fh)
...@@ -2286,7 +2286,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, ...@@ -2286,7 +2286,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags))
continue; continue;
/* mirror refcount put in cleanup_layoutstats */ /* mirror refcount put in cleanup_layoutstats */
if (!atomic_inc_not_zero(&mirror->ref)) if (!refcount_inc_not_zero(&mirror->ref))
continue; continue;
dev = &mirror->mirror_ds->id_node; dev = &mirror->mirror_ds->id_node;
memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#define FF_FLAGS_NO_IO_THRU_MDS 2 #define FF_FLAGS_NO_IO_THRU_MDS 2
#define FF_FLAGS_NO_READ_IO 4 #define FF_FLAGS_NO_READ_IO 4
#include <linux/refcount.h>
#include "../pnfs.h" #include "../pnfs.h"
/* XXX: Let's filter out insanely large mirror count for now to avoid oom /* XXX: Let's filter out insanely large mirror count for now to avoid oom
...@@ -82,7 +83,7 @@ struct nfs4_ff_layout_mirror { ...@@ -82,7 +83,7 @@ struct nfs4_ff_layout_mirror {
nfs4_stateid stateid; nfs4_stateid stateid;
struct rpc_cred __rcu *ro_cred; struct rpc_cred __rcu *ro_cred;
struct rpc_cred __rcu *rw_cred; struct rpc_cred __rcu *rw_cred;
atomic_t ref; refcount_t ref;
spinlock_t lock; spinlock_t lock;
unsigned long flags; unsigned long flags;
struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat read_stat;
......
...@@ -783,7 +783,7 @@ EXPORT_SYMBOL_GPL(nfs_getattr); ...@@ -783,7 +783,7 @@ EXPORT_SYMBOL_GPL(nfs_getattr);
static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
{ {
atomic_set(&l_ctx->count, 1); refcount_set(&l_ctx->count, 1);
l_ctx->lockowner = current->files; l_ctx->lockowner = current->files;
INIT_LIST_HEAD(&l_ctx->list); INIT_LIST_HEAD(&l_ctx->list);
atomic_set(&l_ctx->io_count, 0); atomic_set(&l_ctx->io_count, 0);
...@@ -797,7 +797,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context ...@@ -797,7 +797,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context
do { do {
if (pos->lockowner != current->files) if (pos->lockowner != current->files)
continue; continue;
atomic_inc(&pos->count); refcount_inc(&pos->count);
return pos; return pos;
} while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
return NULL; return NULL;
...@@ -836,7 +836,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx) ...@@ -836,7 +836,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
struct nfs_open_context *ctx = l_ctx->open_context; struct nfs_open_context *ctx = l_ctx->open_context;
struct inode *inode = d_inode(ctx->dentry); struct inode *inode = d_inode(ctx->dentry);
if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock))
return; return;
list_del(&l_ctx->list); list_del(&l_ctx->list);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
...@@ -913,7 +913,7 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context); ...@@ -913,7 +913,7 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context);
struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
{ {
if (ctx != NULL) if (ctx != NULL)
atomic_inc(&ctx->lock_context.count); refcount_inc(&ctx->lock_context.count);
return ctx; return ctx;
} }
EXPORT_SYMBOL_GPL(get_nfs_open_context); EXPORT_SYMBOL_GPL(get_nfs_open_context);
...@@ -924,11 +924,11 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) ...@@ -924,11 +924,11 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
struct super_block *sb = ctx->dentry->d_sb; struct super_block *sb = ctx->dentry->d_sb;
if (!list_empty(&ctx->list)) { if (!list_empty(&ctx->list)) {
if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
return; return;
list_del(&ctx->list); list_del(&ctx->list);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} else if (!atomic_dec_and_test(&ctx->lock_context.count)) } else if (!refcount_dec_and_test(&ctx->lock_context.count))
return; return;
if (inode != NULL) if (inode != NULL)
NFS_PROTO(inode)->close_context(ctx, is_sync); NFS_PROTO(inode)->close_context(ctx, is_sync);
...@@ -2084,8 +2084,12 @@ static int nfs_net_init(struct net *net) ...@@ -2084,8 +2084,12 @@ static int nfs_net_init(struct net *net)
static void nfs_net_exit(struct net *net) static void nfs_net_exit(struct net *net)
{ {
struct nfs_net *nn = net_generic(net, nfs_net_id);
nfs_fs_proc_net_exit(net); nfs_fs_proc_net_exit(net);
nfs_cleanup_cb_ident_idr(net); nfs_cleanup_cb_ident_idr(net);
WARN_ON_ONCE(!list_empty(&nn->nfs_client_list));
WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list));
} }
static struct pernet_operations nfs_net_ops = { static struct pernet_operations nfs_net_ops = {
......
...@@ -188,6 +188,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) ...@@ -188,6 +188,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{ {
struct nfs3_accessargs arg = { struct nfs3_accessargs arg = {
.fh = NFS_FH(inode), .fh = NFS_FH(inode),
.access = entry->mask,
}; };
struct nfs3_accessres res; struct nfs3_accessres res;
struct rpc_message msg = { struct rpc_message msg = {
...@@ -196,25 +197,9 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) ...@@ -196,25 +197,9 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
.rpc_resp = &res, .rpc_resp = &res,
.rpc_cred = entry->cred, .rpc_cred = entry->cred,
}; };
int mode = entry->mask;
int status = -ENOMEM; int status = -ENOMEM;
dprintk("NFS call access\n"); dprintk("NFS call access\n");
if (mode & MAY_READ)
arg.access |= NFS3_ACCESS_READ;
if (S_ISDIR(inode->i_mode)) {
if (mode & MAY_WRITE)
arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE;
if (mode & MAY_EXEC)
arg.access |= NFS3_ACCESS_LOOKUP;
} else {
if (mode & MAY_WRITE)
arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND;
if (mode & MAY_EXEC)
arg.access |= NFS3_ACCESS_EXECUTE;
}
res.fattr = nfs_alloc_fattr(); res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL) if (res.fattr == NULL)
goto out; goto out;
......
...@@ -145,7 +145,7 @@ struct nfs4_lock_state { ...@@ -145,7 +145,7 @@ struct nfs4_lock_state {
unsigned long ls_flags; unsigned long ls_flags;
struct nfs_seqid_counter ls_seqid; struct nfs_seqid_counter ls_seqid;
nfs4_stateid ls_stateid; nfs4_stateid ls_stateid;
atomic_t ls_count; refcount_t ls_count;
fl_owner_t ls_owner; fl_owner_t ls_owner;
}; };
...@@ -162,6 +162,7 @@ enum { ...@@ -162,6 +162,7 @@ enum {
NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */ NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */ NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */
NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */
}; };
struct nfs4_state { struct nfs4_state {
...@@ -185,6 +186,8 @@ struct nfs4_state { ...@@ -185,6 +186,8 @@ struct nfs4_state {
unsigned int n_rdwr; /* Number of read/write references */ unsigned int n_rdwr; /* Number of read/write references */
fmode_t state; /* State on the server (R,W, or RW) */ fmode_t state; /* State on the server (R,W, or RW) */
atomic_t count; atomic_t count;
wait_queue_head_t waitq;
}; };
...@@ -458,6 +461,10 @@ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); ...@@ -458,6 +461,10 @@ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t,
const struct nfs_lock_context *, nfs4_stateid *, const struct nfs_lock_context *, nfs4_stateid *,
struct rpc_cred **); struct rpc_cred **);
extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst,
struct nfs4_state *state);
extern bool nfs4_copy_open_stateid(nfs4_stateid *dst,
struct nfs4_state *state);
extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
...@@ -465,7 +472,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); ...@@ -465,7 +472,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_release_seqid(struct nfs_seqid *seqid); extern void nfs_release_seqid(struct nfs_seqid *seqid);
extern void nfs_free_seqid(struct nfs_seqid *seqid); extern void nfs_free_seqid(struct nfs_seqid *seqid);
extern int nfs4_setup_sequence(const struct nfs_client *client, extern int nfs4_setup_sequence(struct nfs_client *client,
struct nfs4_sequence_args *args, struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res, struct nfs4_sequence_res *res,
struct rpc_task *task); struct rpc_task *task);
...@@ -475,6 +482,7 @@ extern int nfs4_sequence_done(struct rpc_task *task, ...@@ -475,6 +482,7 @@ extern int nfs4_sequence_done(struct rpc_task *task,
extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp);
extern const nfs4_stateid zero_stateid; extern const nfs4_stateid zero_stateid;
extern const nfs4_stateid invalid_stateid;
/* nfs4super.c */ /* nfs4super.c */
struct nfs_mount_info; struct nfs_mount_info;
......
...@@ -483,7 +483,7 @@ static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new, ...@@ -483,7 +483,7 @@ static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new,
* ID and serverowner fields. Wait for CREATE_SESSION * ID and serverowner fields. Wait for CREATE_SESSION
* to finish. */ * to finish. */
if (pos->cl_cons_state > NFS_CS_READY) { if (pos->cl_cons_state > NFS_CS_READY) {
atomic_inc(&pos->cl_count); refcount_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock); spin_unlock(&nn->nfs_client_lock);
nfs_put_client(*prev); nfs_put_client(*prev);
...@@ -559,7 +559,7 @@ int nfs40_walk_client_list(struct nfs_client *new, ...@@ -559,7 +559,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
* way that a SETCLIENTID_CONFIRM to pos can succeed is * way that a SETCLIENTID_CONFIRM to pos can succeed is
* if new and pos point to the same server: * if new and pos point to the same server:
*/ */
atomic_inc(&pos->cl_count); refcount_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock); spin_unlock(&nn->nfs_client_lock);
nfs_put_client(prev); nfs_put_client(prev);
...@@ -715,7 +715,7 @@ int nfs41_walk_client_list(struct nfs_client *new, ...@@ -715,7 +715,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
continue; continue;
found: found:
atomic_inc(&pos->cl_count); refcount_inc(&pos->cl_count);
*result = pos; *result = pos;
status = 0; status = 0;
break; break;
...@@ -749,7 +749,7 @@ nfs4_find_client_ident(struct net *net, int cb_ident) ...@@ -749,7 +749,7 @@ nfs4_find_client_ident(struct net *net, int cb_ident)
spin_lock(&nn->nfs_client_lock); spin_lock(&nn->nfs_client_lock);
clp = idr_find(&nn->cb_ident_idr, cb_ident); clp = idr_find(&nn->cb_ident_idr, cb_ident);
if (clp) if (clp)
atomic_inc(&clp->cl_count); refcount_inc(&clp->cl_count);
spin_unlock(&nn->nfs_client_lock); spin_unlock(&nn->nfs_client_lock);
return clp; return clp;
} }
...@@ -793,7 +793,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, ...@@ -793,7 +793,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
spin_lock(&nn->nfs_client_lock); spin_lock(&nn->nfs_client_lock);
list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
if (nfs4_cb_match_client(addr, clp, minorversion) == false) if (!nfs4_cb_match_client(addr, clp, minorversion))
continue; continue;
if (!nfs4_has_session(clp)) if (!nfs4_has_session(clp))
...@@ -804,7 +804,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, ...@@ -804,7 +804,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
sid->data, NFS4_MAX_SESSIONID_LEN) != 0) sid->data, NFS4_MAX_SESSIONID_LEN) != 0)
continue; continue;
atomic_inc(&clp->cl_count); refcount_inc(&clp->cl_count);
spin_unlock(&nn->nfs_client_lock); spin_unlock(&nn->nfs_client_lock);
return clp; return clp;
} }
......
此差异已折叠。
...@@ -69,6 +69,14 @@ const nfs4_stateid zero_stateid = { ...@@ -69,6 +69,14 @@ const nfs4_stateid zero_stateid = {
{ .data = { 0 } }, { .data = { 0 } },
.type = NFS4_SPECIAL_STATEID_TYPE, .type = NFS4_SPECIAL_STATEID_TYPE,
}; };
const nfs4_stateid invalid_stateid = {
{
.seqid = cpu_to_be32(0xffffffffU),
.other = { 0 },
},
.type = NFS4_INVALID_STATEID_TYPE,
};
static DEFINE_MUTEX(nfs_clid_init_mutex); static DEFINE_MUTEX(nfs_clid_init_mutex);
int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
...@@ -645,6 +653,7 @@ nfs4_alloc_open_state(void) ...@@ -645,6 +653,7 @@ nfs4_alloc_open_state(void)
INIT_LIST_HEAD(&state->lock_states); INIT_LIST_HEAD(&state->lock_states);
spin_lock_init(&state->state_lock); spin_lock_init(&state->state_lock);
seqlock_init(&state->seqlock); seqlock_init(&state->seqlock);
init_waitqueue_head(&state->waitq);
return state; return state;
} }
...@@ -825,7 +834,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, ...@@ -825,7 +834,7 @@ __nfs4_find_lock_state(struct nfs4_state *state,
ret = pos; ret = pos;
} }
if (ret) if (ret)
atomic_inc(&ret->ls_count); refcount_inc(&ret->ls_count);
return ret; return ret;
} }
...@@ -843,7 +852,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f ...@@ -843,7 +852,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
if (lsp == NULL) if (lsp == NULL)
return NULL; return NULL;
nfs4_init_seqid_counter(&lsp->ls_seqid); nfs4_init_seqid_counter(&lsp->ls_seqid);
atomic_set(&lsp->ls_count, 1); refcount_set(&lsp->ls_count, 1);
lsp->ls_state = state; lsp->ls_state = state;
lsp->ls_owner = fl_owner; lsp->ls_owner = fl_owner;
lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
...@@ -907,7 +916,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) ...@@ -907,7 +916,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
if (lsp == NULL) if (lsp == NULL)
return; return;
state = lsp->ls_state; state = lsp->ls_state;
if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock)) if (!refcount_dec_and_lock(&lsp->ls_count, &state->state_lock))
return; return;
list_del(&lsp->ls_locks); list_del(&lsp->ls_locks);
if (list_empty(&state->lock_states)) if (list_empty(&state->lock_states))
...@@ -927,7 +936,7 @@ static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) ...@@ -927,7 +936,7 @@ static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner; struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
dst->fl_u.nfs4_fl.owner = lsp; dst->fl_u.nfs4_fl.owner = lsp;
atomic_inc(&lsp->ls_count); refcount_inc(&lsp->ls_count);
} }
static void nfs4_fl_release_lock(struct file_lock *fl) static void nfs4_fl_release_lock(struct file_lock *fl)
...@@ -985,18 +994,39 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, ...@@ -985,18 +994,39 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
return ret; return ret;
} }
static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
{
bool ret;
int seq;
do {
ret = false;
seq = read_seqbegin(&state->seqlock);
if (nfs4_state_match_open_stateid_other(state, dst)) {
dst->seqid = state->open_stateid.seqid;
ret = true;
}
} while (read_seqretry(&state->seqlock, seq));
return ret;
}
bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
{ {
bool ret;
const nfs4_stateid *src; const nfs4_stateid *src;
int seq; int seq;
do { do {
ret = false;
src = &zero_stateid; src = &zero_stateid;
seq = read_seqbegin(&state->seqlock); seq = read_seqbegin(&state->seqlock);
if (test_bit(NFS_OPEN_STATE, &state->flags)) if (test_bit(NFS_OPEN_STATE, &state->flags)) {
src = &state->open_stateid; src = &state->open_stateid;
ret = true;
}
nfs4_stateid_copy(dst, src); nfs4_stateid_copy(dst, src);
} while (read_seqretry(&state->seqlock, seq)); } while (read_seqretry(&state->seqlock, seq));
return ret;
} }
/* /*
...@@ -1177,7 +1207,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) ...@@ -1177,7 +1207,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
return; return;
__module_get(THIS_MODULE); __module_get(THIS_MODULE);
atomic_inc(&clp->cl_count); refcount_inc(&clp->cl_count);
/* The rcu_read_lock() is not strictly necessary, as the state /* The rcu_read_lock() is not strictly necessary, as the state
* manager is the only thread that ever changes the rpc_xprt * manager is the only thread that ever changes the rpc_xprt
...@@ -1269,7 +1299,7 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp) ...@@ -1269,7 +1299,7 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
might_sleep(); might_sleep();
atomic_inc(&clp->cl_count); refcount_inc(&clp->cl_count);
res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs_wait_bit_killable, TASK_KILLABLE); nfs_wait_bit_killable, TASK_KILLABLE);
if (res) if (res)
...@@ -1409,6 +1439,11 @@ void nfs_inode_find_state_and_recover(struct inode *inode, ...@@ -1409,6 +1439,11 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
found = true; found = true;
continue; continue;
} }
if (nfs4_stateid_match_other(&state->open_stateid, stateid) &&
nfs4_state_mark_reclaim_nograce(clp, state)) {
found = true;
continue;
}
if (nfs_state_lock_state_matches_stateid(state, stateid) && if (nfs_state_lock_state_matches_stateid(state, stateid) &&
nfs4_state_mark_reclaim_nograce(clp, state)) nfs4_state_mark_reclaim_nograce(clp, state))
found = true; found = true;
...@@ -2510,7 +2545,7 @@ static void nfs4_state_manager(struct nfs_client *clp) ...@@ -2510,7 +2545,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
break; break;
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
break; break;
} while (atomic_read(&clp->cl_count) > 1); } while (refcount_read(&clp->cl_count) > 1);
return; return;
out_error: out_error:
if (strlen(section)) if (strlen(section))
......
...@@ -202,17 +202,13 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, ...@@ -202,17 +202,13 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event,
TP_ARGS(clp, error), TP_ARGS(clp, error),
TP_STRUCT__entry( TP_STRUCT__entry(
__string(dstaddr, __string(dstaddr, clp->cl_hostname)
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR))
__field(int, error) __field(int, error)
), ),
TP_fast_assign( TP_fast_assign(
__entry->error = error; __entry->error = error;
__assign_str(dstaddr, __assign_str(dstaddr, clp->cl_hostname);
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR));
), ),
TP_printk( TP_printk(
...@@ -1066,6 +1062,8 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, ...@@ -1066,6 +1062,8 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait);
DECLARE_EVENT_CLASS(nfs4_getattr_event, DECLARE_EVENT_CLASS(nfs4_getattr_event,
TP_PROTO( TP_PROTO(
...@@ -1133,9 +1131,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, ...@@ -1133,9 +1131,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
__field(dev_t, dev) __field(dev_t, dev)
__field(u32, fhandle) __field(u32, fhandle)
__field(u64, fileid) __field(u64, fileid)
__string(dstaddr, clp ? __string(dstaddr, clp ? clp->cl_hostname : "unknown")
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR) : "unknown")
), ),
TP_fast_assign( TP_fast_assign(
...@@ -1148,9 +1144,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, ...@@ -1148,9 +1144,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
__entry->fileid = 0; __entry->fileid = 0;
__entry->dev = 0; __entry->dev = 0;
} }
__assign_str(dstaddr, clp ? __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown")
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR) : "unknown")
), ),
TP_printk( TP_printk(
...@@ -1192,9 +1186,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, ...@@ -1192,9 +1186,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
__field(dev_t, dev) __field(dev_t, dev)
__field(u32, fhandle) __field(u32, fhandle)
__field(u64, fileid) __field(u64, fileid)
__string(dstaddr, clp ? __string(dstaddr, clp ? clp->cl_hostname : "unknown")
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR) : "unknown")
__field(int, stateid_seq) __field(int, stateid_seq)
__field(u32, stateid_hash) __field(u32, stateid_hash)
), ),
...@@ -1209,9 +1201,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, ...@@ -1209,9 +1201,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
__entry->fileid = 0; __entry->fileid = 0;
__entry->dev = 0; __entry->dev = 0;
} }
__assign_str(dstaddr, clp ? __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown")
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR) : "unknown")
__entry->stateid_seq = __entry->stateid_seq =
be32_to_cpu(stateid->seqid); be32_to_cpu(stateid->seqid);
__entry->stateid_hash = __entry->stateid_hash =
......
...@@ -4385,6 +4385,14 @@ static int decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *state ...@@ -4385,6 +4385,14 @@ static int decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *state
return decode_stateid(xdr, stateid); return decode_stateid(xdr, stateid);
} }
static int decode_invalid_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
nfs4_stateid dummy;
nfs4_stateid_copy(stateid, &invalid_stateid);
return decode_stateid(xdr, &dummy);
}
static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
{ {
int status; int status;
...@@ -4393,7 +4401,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) ...@@ -4393,7 +4401,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
if (status != -EIO) if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid); nfs_increment_open_seqid(status, res->seqid);
if (!status) if (!status)
status = decode_open_stateid(xdr, &res->stateid); status = decode_invalid_stateid(xdr, &res->stateid);
return status; return status;
} }
...@@ -6108,6 +6116,8 @@ static int decode_layoutreturn(struct xdr_stream *xdr, ...@@ -6108,6 +6116,8 @@ static int decode_layoutreturn(struct xdr_stream *xdr,
res->lrs_present = be32_to_cpup(p); res->lrs_present = be32_to_cpup(p);
if (res->lrs_present) if (res->lrs_present)
status = decode_layout_stateid(xdr, &res->stateid); status = decode_layout_stateid(xdr, &res->stateid);
else
nfs4_stateid_copy(&res->stateid, &invalid_stateid);
return status; return status;
out_overflow: out_overflow:
print_overflow_msg(__func__, xdr); print_overflow_msg(__func__, xdr);
......
...@@ -251,7 +251,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); ...@@ -251,7 +251,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
void void
pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
{ {
atomic_inc(&lo->plh_refcount); refcount_inc(&lo->plh_refcount);
} }
static struct pnfs_layout_hdr * static struct pnfs_layout_hdr *
...@@ -296,7 +296,7 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) ...@@ -296,7 +296,7 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
pnfs_layoutreturn_before_put_layout_hdr(lo); pnfs_layoutreturn_before_put_layout_hdr(lo);
if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
if (!list_empty(&lo->plh_segs)) if (!list_empty(&lo->plh_segs))
WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
pnfs_detach_layout_hdr(lo); pnfs_detach_layout_hdr(lo);
...@@ -354,6 +354,24 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, ...@@ -354,6 +354,24 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
pnfs_lseg_dec_and_remove_zero(lseg, free_me); pnfs_lseg_dec_and_remove_zero(lseg, free_me);
} }
/*
* Update the seqid of a layout stateid
*/
bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode)
{
struct pnfs_layout_hdr *lo;
bool ret = false;
spin_lock(&inode->i_lock);
lo = NFS_I(inode)->layout;
if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
dst->seqid = lo->plh_stateid.seqid;
ret = true;
}
spin_unlock(&inode->i_lock);
return ret;
}
/* /*
* Mark a pnfs_layout_hdr and all associated layout segments as invalid * Mark a pnfs_layout_hdr and all associated layout segments as invalid
* *
...@@ -395,14 +413,14 @@ pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) ...@@ -395,14 +413,14 @@ pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
{ {
lo->plh_retry_timestamp = jiffies; lo->plh_retry_timestamp = jiffies;
if (!test_and_set_bit(fail_bit, &lo->plh_flags)) if (!test_and_set_bit(fail_bit, &lo->plh_flags))
atomic_inc(&lo->plh_refcount); refcount_inc(&lo->plh_refcount);
} }
static void static void
pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
{ {
if (test_and_clear_bit(fail_bit, &lo->plh_flags)) if (test_and_clear_bit(fail_bit, &lo->plh_flags))
atomic_dec(&lo->plh_refcount); refcount_dec(&lo->plh_refcount);
} }
static void static void
...@@ -450,7 +468,7 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, ...@@ -450,7 +468,7 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg,
{ {
INIT_LIST_HEAD(&lseg->pls_list); INIT_LIST_HEAD(&lseg->pls_list);
INIT_LIST_HEAD(&lseg->pls_lc_list); INIT_LIST_HEAD(&lseg->pls_lc_list);
atomic_set(&lseg->pls_refcount, 1); refcount_set(&lseg->pls_refcount, 1);
set_bit(NFS_LSEG_VALID, &lseg->pls_flags); set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
lseg->pls_layout = lo; lseg->pls_layout = lo;
lseg->pls_range = *range; lseg->pls_range = *range;
...@@ -472,7 +490,7 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, ...@@ -472,7 +490,7 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
list_del_init(&lseg->pls_list); list_del_init(&lseg->pls_list);
/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
atomic_dec(&lo->plh_refcount); refcount_dec(&lo->plh_refcount);
if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
return; return;
if (list_empty(&lo->plh_segs) && if (list_empty(&lo->plh_segs) &&
...@@ -507,13 +525,13 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) ...@@ -507,13 +525,13 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
return; return;
dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount), refcount_read(&lseg->pls_refcount),
test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
lo = lseg->pls_layout; lo = lseg->pls_layout;
inode = lo->plh_inode; inode = lo->plh_inode;
if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
return; return;
...@@ -551,7 +569,7 @@ pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, ...@@ -551,7 +569,7 @@ pnfs_lseg_range_contained(const struct pnfs_layout_range *l1,
static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list) struct list_head *tmp_list)
{ {
if (!atomic_dec_and_test(&lseg->pls_refcount)) if (!refcount_dec_and_test(&lseg->pls_refcount))
return false; return false;
pnfs_layout_remove_lseg(lseg->pls_layout, lseg); pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
list_add(&lseg->pls_list, tmp_list); list_add(&lseg->pls_list, tmp_list);
...@@ -570,7 +588,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, ...@@ -570,7 +588,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
* outstanding io is finished. * outstanding io is finished.
*/ */
dprintk("%s: lseg %p ref %d\n", __func__, lseg, dprintk("%s: lseg %p ref %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount)); refcount_read(&lseg->pls_refcount));
if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
rv = 1; rv = 1;
} }
...@@ -1451,7 +1469,7 @@ alloc_init_layout_hdr(struct inode *ino, ...@@ -1451,7 +1469,7 @@ alloc_init_layout_hdr(struct inode *ino,
lo = pnfs_alloc_layout_hdr(ino, gfp_flags); lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
if (!lo) if (!lo)
return NULL; return NULL;
atomic_set(&lo->plh_refcount, 1); refcount_set(&lo->plh_refcount, 1);
INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_layouts);
INIT_LIST_HEAD(&lo->plh_segs); INIT_LIST_HEAD(&lo->plh_segs);
INIT_LIST_HEAD(&lo->plh_return_segs); INIT_LIST_HEAD(&lo->plh_return_segs);
...@@ -1513,7 +1531,7 @@ pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, ...@@ -1513,7 +1531,7 @@ pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
if ((range->iomode == IOMODE_RW && if ((range->iomode == IOMODE_RW &&
ls_range->iomode != IOMODE_RW) || ls_range->iomode != IOMODE_RW) ||
(range->iomode != ls_range->iomode && (range->iomode != ls_range->iomode &&
strict_iomode == true) || strict_iomode) ||
!pnfs_lseg_range_intersecting(ls_range, range)) !pnfs_lseg_range_intersecting(ls_range, range))
return 0; return 0;
...@@ -1546,7 +1564,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, ...@@ -1546,7 +1564,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
} }
dprintk("%s:Return lseg %p ref %d\n", dprintk("%s:Return lseg %p ref %d\n",
__func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); __func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0);
return ret; return ret;
} }
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#ifndef FS_NFS_PNFS_H #ifndef FS_NFS_PNFS_H
#define FS_NFS_PNFS_H #define FS_NFS_PNFS_H
#include <linux/refcount.h>
#include <linux/nfs_fs.h> #include <linux/nfs_fs.h>
#include <linux/nfs_page.h> #include <linux/nfs_page.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
...@@ -54,7 +55,7 @@ struct nfs4_pnfs_ds { ...@@ -54,7 +55,7 @@ struct nfs4_pnfs_ds {
char *ds_remotestr; /* comma sep list of addrs */ char *ds_remotestr; /* comma sep list of addrs */
struct list_head ds_addrs; struct list_head ds_addrs;
struct nfs_client *ds_clp; struct nfs_client *ds_clp;
atomic_t ds_count; refcount_t ds_count;
unsigned long ds_state; unsigned long ds_state;
#define NFS4DS_CONNECTING 0 /* ds is establishing connection */ #define NFS4DS_CONNECTING 0 /* ds is establishing connection */
}; };
...@@ -63,7 +64,7 @@ struct pnfs_layout_segment { ...@@ -63,7 +64,7 @@ struct pnfs_layout_segment {
struct list_head pls_list; struct list_head pls_list;
struct list_head pls_lc_list; struct list_head pls_lc_list;
struct pnfs_layout_range pls_range; struct pnfs_layout_range pls_range;
atomic_t pls_refcount; refcount_t pls_refcount;
u32 pls_seq; u32 pls_seq;
unsigned long pls_flags; unsigned long pls_flags;
struct pnfs_layout_hdr *pls_layout; struct pnfs_layout_hdr *pls_layout;
...@@ -179,7 +180,7 @@ struct pnfs_layoutdriver_type { ...@@ -179,7 +180,7 @@ struct pnfs_layoutdriver_type {
}; };
struct pnfs_layout_hdr { struct pnfs_layout_hdr {
atomic_t plh_refcount; refcount_t plh_refcount;
atomic_t plh_outstanding; /* number of RPCs out */ atomic_t plh_outstanding; /* number of RPCs out */
struct list_head plh_layouts; /* other client layouts */ struct list_head plh_layouts; /* other client layouts */
struct list_head plh_bulk_destroy; struct list_head plh_bulk_destroy;
...@@ -251,6 +252,7 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, ...@@ -251,6 +252,7 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
bool is_recall); bool is_recall);
int pnfs_destroy_layouts_byclid(struct nfs_client *clp, int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
bool is_recall); bool is_recall);
bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode);
void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new, const nfs4_stateid *new,
...@@ -393,7 +395,7 @@ static inline struct pnfs_layout_segment * ...@@ -393,7 +395,7 @@ static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg) pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{ {
if (lseg) { if (lseg) {
atomic_inc(&lseg->pls_refcount); refcount_inc(&lseg->pls_refcount);
smp_mb__after_atomic(); smp_mb__after_atomic();
} }
return lseg; return lseg;
...@@ -764,6 +766,11 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) ...@@ -764,6 +766,11 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
{ {
} }
static inline bool nfs4_refresh_layout_stateid(nfs4_stateid *dst,
struct inode *inode)
{
return false;
}
#endif /* CONFIG_NFS_V4_1 */ #endif /* CONFIG_NFS_V4_1 */
#if IS_ENABLED(CONFIG_NFS_V4_2) #if IS_ENABLED(CONFIG_NFS_V4_2)
......
...@@ -338,7 +338,7 @@ print_ds(struct nfs4_pnfs_ds *ds) ...@@ -338,7 +338,7 @@ print_ds(struct nfs4_pnfs_ds *ds)
" client %p\n" " client %p\n"
" cl_exchange_flags %x\n", " cl_exchange_flags %x\n",
ds->ds_remotestr, ds->ds_remotestr,
atomic_read(&ds->ds_count), ds->ds_clp, refcount_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
} }
...@@ -451,7 +451,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds) ...@@ -451,7 +451,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds)
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
{ {
if (atomic_dec_and_lock(&ds->ds_count, if (refcount_dec_and_lock(&ds->ds_count,
&nfs4_ds_cache_lock)) { &nfs4_ds_cache_lock)) {
list_del_init(&ds->ds_node); list_del_init(&ds->ds_node);
spin_unlock(&nfs4_ds_cache_lock); spin_unlock(&nfs4_ds_cache_lock);
...@@ -537,7 +537,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) ...@@ -537,7 +537,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
INIT_LIST_HEAD(&ds->ds_addrs); INIT_LIST_HEAD(&ds->ds_addrs);
list_splice_init(dsaddrs, &ds->ds_addrs); list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr; ds->ds_remotestr = remotestr;
atomic_set(&ds->ds_count, 1); refcount_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node); INIT_LIST_HEAD(&ds->ds_node);
ds->ds_clp = NULL; ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache); list_add(&ds->ds_node, &nfs4_data_server_cache);
...@@ -546,10 +546,10 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) ...@@ -546,10 +546,10 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
} else { } else {
kfree(remotestr); kfree(remotestr);
kfree(ds); kfree(ds);
atomic_inc(&tmp_ds->ds_count); refcount_inc(&tmp_ds->ds_count);
dprintk("%s data server %s found, inc'ed ds_count to %d\n", dprintk("%s data server %s found, inc'ed ds_count to %d\n",
__func__, tmp_ds->ds_remotestr, __func__, tmp_ds->ds_remotestr,
atomic_read(&tmp_ds->ds_count)); refcount_read(&tmp_ds->ds_count));
ds = tmp_ds; ds = tmp_ds;
} }
spin_unlock(&nfs4_ds_cache_lock); spin_unlock(&nfs4_ds_cache_lock);
......
...@@ -1332,7 +1332,7 @@ static int nfs_parse_mount_options(char *raw, ...@@ -1332,7 +1332,7 @@ static int nfs_parse_mount_options(char *raw,
mnt->options |= NFS_OPTION_MIGRATION; mnt->options |= NFS_OPTION_MIGRATION;
break; break;
case Opt_nomigration: case Opt_nomigration:
mnt->options &= NFS_OPTION_MIGRATION; mnt->options &= ~NFS_OPTION_MIGRATION;
break; break;
/* /*
...@@ -1456,18 +1456,21 @@ static int nfs_parse_mount_options(char *raw, ...@@ -1456,18 +1456,21 @@ static int nfs_parse_mount_options(char *raw,
switch (token) { switch (token) {
case Opt_xprt_udp6: case Opt_xprt_udp6:
protofamily = AF_INET6; protofamily = AF_INET6;
/* fall through */
case Opt_xprt_udp: case Opt_xprt_udp:
mnt->flags &= ~NFS_MOUNT_TCP; mnt->flags &= ~NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
break; break;
case Opt_xprt_tcp6: case Opt_xprt_tcp6:
protofamily = AF_INET6; protofamily = AF_INET6;
/* fall through */
case Opt_xprt_tcp: case Opt_xprt_tcp:
mnt->flags |= NFS_MOUNT_TCP; mnt->flags |= NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
break; break;
case Opt_xprt_rdma6: case Opt_xprt_rdma6:
protofamily = AF_INET6; protofamily = AF_INET6;
/* fall through */
case Opt_xprt_rdma: case Opt_xprt_rdma:
/* vector side protocols to TCP */ /* vector side protocols to TCP */
mnt->flags |= NFS_MOUNT_TCP; mnt->flags |= NFS_MOUNT_TCP;
...@@ -1494,11 +1497,13 @@ static int nfs_parse_mount_options(char *raw, ...@@ -1494,11 +1497,13 @@ static int nfs_parse_mount_options(char *raw,
switch (token) { switch (token) {
case Opt_xprt_udp6: case Opt_xprt_udp6:
mountfamily = AF_INET6; mountfamily = AF_INET6;
/* fall through */
case Opt_xprt_udp: case Opt_xprt_udp:
mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
break; break;
case Opt_xprt_tcp6: case Opt_xprt_tcp6:
mountfamily = AF_INET6; mountfamily = AF_INET6;
/* fall through */
case Opt_xprt_tcp: case Opt_xprt_tcp:
mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
break; break;
...@@ -1988,9 +1993,9 @@ static int nfs23_validate_mount_data(void *options, ...@@ -1988,9 +1993,9 @@ static int nfs23_validate_mount_data(void *options,
args->version = NFS_DEFAULT_VERSION; args->version = NFS_DEFAULT_VERSION;
switch (data->version) { switch (data->version) {
case 1: case 1:
data->namlen = 0; data->namlen = 0; /* fall through */
case 2: case 2:
data->bsize = 0; data->bsize = 0; /* fall through */
case 3: case 3:
if (data->flags & NFS_MOUNT_VER3) if (data->flags & NFS_MOUNT_VER3)
goto out_no_v3; goto out_no_v3;
...@@ -1998,11 +2003,14 @@ static int nfs23_validate_mount_data(void *options, ...@@ -1998,11 +2003,14 @@ static int nfs23_validate_mount_data(void *options,
memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
/* Turn off security negotiation */ /* Turn off security negotiation */
extra_flags |= NFS_MOUNT_SECFLAVOUR; extra_flags |= NFS_MOUNT_SECFLAVOUR;
/* fall through */
case 4: case 4:
if (data->flags & NFS_MOUNT_SECFLAVOUR) if (data->flags & NFS_MOUNT_SECFLAVOUR)
goto out_no_sec; goto out_no_sec;
/* fall through */
case 5: case 5:
memset(data->context, 0, sizeof(data->context)); memset(data->context, 0, sizeof(data->context));
/* fall through */
case 6: case 6:
if (data->flags & NFS_MOUNT_VER3) { if (data->flags & NFS_MOUNT_VER3) {
if (data->root.size > NFS3_FHSIZE || data->root.size == 0) if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
......
...@@ -487,10 +487,8 @@ nfs_lock_and_join_requests(struct page *page) ...@@ -487,10 +487,8 @@ nfs_lock_and_join_requests(struct page *page)
} }
ret = nfs_page_group_lock(head); ret = nfs_page_group_lock(head);
if (ret < 0) { if (ret < 0)
nfs_unlock_and_release_request(head); goto release_request;
return ERR_PTR(ret);
}
/* lock each request in the page group */ /* lock each request in the page group */
total_bytes = head->wb_bytes; total_bytes = head->wb_bytes;
...@@ -515,8 +513,7 @@ nfs_lock_and_join_requests(struct page *page) ...@@ -515,8 +513,7 @@ nfs_lock_and_join_requests(struct page *page)
if (ret < 0) { if (ret < 0) {
nfs_unroll_locks(inode, head, subreq); nfs_unroll_locks(inode, head, subreq);
nfs_release_request(subreq); nfs_release_request(subreq);
nfs_unlock_and_release_request(head); goto release_request;
return ERR_PTR(ret);
} }
} }
/* /*
...@@ -532,8 +529,8 @@ nfs_lock_and_join_requests(struct page *page) ...@@ -532,8 +529,8 @@ nfs_lock_and_join_requests(struct page *page)
nfs_page_group_unlock(head); nfs_page_group_unlock(head);
nfs_unroll_locks(inode, head, subreq); nfs_unroll_locks(inode, head, subreq);
nfs_unlock_and_release_request(subreq); nfs_unlock_and_release_request(subreq);
nfs_unlock_and_release_request(head); ret = -EIO;
return ERR_PTR(-EIO); goto release_request;
} }
} }
...@@ -576,6 +573,10 @@ nfs_lock_and_join_requests(struct page *page) ...@@ -576,6 +573,10 @@ nfs_lock_and_join_requests(struct page *page)
/* still holds ref on head from nfs_page_find_head_request /* still holds ref on head from nfs_page_find_head_request
* and still has lock on head from lock loop */ * and still has lock on head from lock loop */
return head; return head;
release_request:
nfs_unlock_and_release_request(head);
return ERR_PTR(ret);
} }
static void nfs_write_error_remove_page(struct nfs_page *req) static void nfs_write_error_remove_page(struct nfs_page *req)
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/refcount.h>
#include <linux/rwsem.h> #include <linux/rwsem.h>
#include <linux/wait.h> #include <linux/wait.h>
...@@ -56,7 +57,7 @@ struct nfs_access_entry { ...@@ -56,7 +57,7 @@ struct nfs_access_entry {
}; };
struct nfs_lock_context { struct nfs_lock_context {
atomic_t count; refcount_t count;
struct list_head list; struct list_head list;
struct nfs_open_context *open_context; struct nfs_open_context *open_context;
fl_owner_t lockowner; fl_owner_t lockowner;
...@@ -184,6 +185,16 @@ struct nfs_inode { ...@@ -184,6 +185,16 @@ struct nfs_inode {
struct inode vfs_inode; struct inode vfs_inode;
}; };
/*
* Access bit flags
*/
#define NFS_ACCESS_READ 0x0001
#define NFS_ACCESS_LOOKUP 0x0002
#define NFS_ACCESS_MODIFY 0x0004
#define NFS_ACCESS_EXTEND 0x0008
#define NFS_ACCESS_DELETE 0x0010
#define NFS_ACCESS_EXECUTE 0x0020
/* /*
* Cache validity bit flags * Cache validity bit flags
*/ */
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/sunrpc/xprt.h> #include <linux/sunrpc/xprt.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/refcount.h>
struct nfs4_session; struct nfs4_session;
struct nfs_iostats; struct nfs_iostats;
...@@ -25,7 +26,7 @@ struct nfs41_impl_id; ...@@ -25,7 +26,7 @@ struct nfs41_impl_id;
* The nfs_client identifies our client state to the server. * The nfs_client identifies our client state to the server.
*/ */
struct nfs_client { struct nfs_client {
atomic_t cl_count; refcount_t cl_count;
atomic_t cl_mds_count; atomic_t cl_mds_count;
int cl_cons_state; /* current construction state (-ve: init error) */ int cl_cons_state; /* current construction state (-ve: init error) */
#define NFS_CS_READY 0 /* ready to be used */ #define NFS_CS_READY 0 /* ready to be used */
......
/* /*
* Copyright (c) 2015-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
...@@ -50,65 +51,6 @@ enum { ...@@ -50,65 +51,6 @@ enum {
RPCRDMA_V1_DEF_INLINE_SIZE = 1024, RPCRDMA_V1_DEF_INLINE_SIZE = 1024,
}; };
struct rpcrdma_segment {
__be32 rs_handle; /* Registered memory handle */
__be32 rs_length; /* Length of the chunk in bytes */
__be64 rs_offset; /* Chunk virtual address or offset */
};
/*
* read chunk(s), encoded as a linked list.
*/
struct rpcrdma_read_chunk {
__be32 rc_discrim; /* 1 indicates presence */
__be32 rc_position; /* Position in XDR stream */
struct rpcrdma_segment rc_target;
};
/*
* write chunk, and reply chunk.
*/
struct rpcrdma_write_chunk {
struct rpcrdma_segment wc_target;
};
/*
* write chunk(s), encoded as a counted array.
*/
struct rpcrdma_write_array {
__be32 wc_discrim; /* 1 indicates presence */
__be32 wc_nchunks; /* Array count */
struct rpcrdma_write_chunk wc_array[0];
};
struct rpcrdma_msg {
__be32 rm_xid; /* Mirrors the RPC header xid */
__be32 rm_vers; /* Version of this protocol */
__be32 rm_credit; /* Buffers requested/granted */
__be32 rm_type; /* Type of message (enum rpcrdma_proc) */
union {
struct { /* no chunks */
__be32 rm_empty[3]; /* 3 empty chunk lists */
} rm_nochunks;
struct { /* no chunks and padded */
__be32 rm_align; /* Padding alignment */
__be32 rm_thresh; /* Padding threshold */
__be32 rm_pempty[3]; /* 3 empty chunk lists */
} rm_padded;
struct {
__be32 rm_err;
__be32 rm_vers_low;
__be32 rm_vers_high;
} rm_error;
__be32 rm_chunks[0]; /* read, write and reply chunks */
} rm_body;
};
/* /*
* XDR sizes, in quads * XDR sizes, in quads
*/ */
......
...@@ -71,6 +71,36 @@ TRACE_EVENT(rpc_connect_status, ...@@ -71,6 +71,36 @@ TRACE_EVENT(rpc_connect_status,
__entry->status) __entry->status)
); );
TRACE_EVENT(rpc_request,
TP_PROTO(const struct rpc_task *task),
TP_ARGS(task),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(int, version)
__field(bool, async)
__string(progname, task->tk_client->cl_program->name)
__string(procname, rpc_proc_name(task))
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->version = task->tk_client->cl_vers;
__entry->async = RPC_IS_ASYNC(task);
__assign_str(progname, task->tk_client->cl_program->name)
__assign_str(procname, rpc_proc_name(task))
),
TP_printk("task:%u@%u %sv%d %s (%ssync)",
__entry->task_id, __entry->client_id,
__get_str(progname), __entry->version,
__get_str(procname), __entry->async ? "a": ""
)
);
DECLARE_EVENT_CLASS(rpc_task_running, DECLARE_EVENT_CLASS(rpc_task_running,
TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
...@@ -342,21 +372,21 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, ...@@ -342,21 +372,21 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
TP_ARGS(xprt, xid, status), TP_ARGS(xprt, xid, status),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(__be32, xid) __field(u32, xid)
__field(int, status) __field(int, status)
__string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR])
__string(port, xprt->address_strings[RPC_DISPLAY_PORT]) __string(port, xprt->address_strings[RPC_DISPLAY_PORT])
), ),
TP_fast_assign( TP_fast_assign(
__entry->xid = xid; __entry->xid = be32_to_cpu(xid);
__entry->status = status; __entry->status = status;
__assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]);
__assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]);
), ),
TP_printk("peer=[%s]:%s xid=0x%x status=%d", __get_str(addr), TP_printk("peer=[%s]:%s xid=0x%08x status=%d", __get_str(addr),
__get_str(port), be32_to_cpu(__entry->xid), __get_str(port), __entry->xid,
__entry->status) __entry->status)
); );
...@@ -417,7 +447,7 @@ TRACE_EVENT(xs_tcp_data_recv, ...@@ -417,7 +447,7 @@ TRACE_EVENT(xs_tcp_data_recv,
TP_STRUCT__entry( TP_STRUCT__entry(
__string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]) __string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR])
__string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]) __string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT])
__field(__be32, xid) __field(u32, xid)
__field(unsigned long, flags) __field(unsigned long, flags)
__field(unsigned long, copied) __field(unsigned long, copied)
__field(unsigned int, reclen) __field(unsigned int, reclen)
...@@ -427,15 +457,15 @@ TRACE_EVENT(xs_tcp_data_recv, ...@@ -427,15 +457,15 @@ TRACE_EVENT(xs_tcp_data_recv,
TP_fast_assign( TP_fast_assign(
__assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]); __assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]);
__assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]); __assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]);
__entry->xid = xs->tcp_xid; __entry->xid = be32_to_cpu(xs->tcp_xid);
__entry->flags = xs->tcp_flags; __entry->flags = xs->tcp_flags;
__entry->copied = xs->tcp_copied; __entry->copied = xs->tcp_copied;
__entry->reclen = xs->tcp_reclen; __entry->reclen = xs->tcp_reclen;
__entry->offset = xs->tcp_offset; __entry->offset = xs->tcp_offset;
), ),
TP_printk("peer=[%s]:%s xid=0x%x flags=%s copied=%lu reclen=%u offset=%lu", TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu reclen=%u offset=%lu",
__get_str(addr), __get_str(port), be32_to_cpu(__entry->xid), __get_str(addr), __get_str(port), __entry->xid,
rpc_show_sock_xprt_flags(__entry->flags), rpc_show_sock_xprt_flags(__entry->flags),
__entry->copied, __entry->reclen, __entry->offset) __entry->copied, __entry->reclen, __entry->offset)
); );
...@@ -457,20 +487,20 @@ TRACE_EVENT(svc_recv, ...@@ -457,20 +487,20 @@ TRACE_EVENT(svc_recv,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct sockaddr *, addr) __field(struct sockaddr *, addr)
__field(__be32, xid) __field(u32, xid)
__field(int, status) __field(int, status)
__field(unsigned long, flags) __field(unsigned long, flags)
), ),
TP_fast_assign( TP_fast_assign(
__entry->addr = (struct sockaddr *)&rqst->rq_addr; __entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = status > 0 ? rqst->rq_xid : 0; __entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0;
__entry->status = status; __entry->status = status;
__entry->flags = rqst->rq_flags; __entry->flags = rqst->rq_flags;
), ),
TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s", __entry->addr, TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", __entry->addr,
be32_to_cpu(__entry->xid), __entry->status, __entry->xid, __entry->status,
show_rqstp_flags(__entry->flags)) show_rqstp_flags(__entry->flags))
); );
...@@ -481,21 +511,21 @@ DECLARE_EVENT_CLASS(svc_rqst_event, ...@@ -481,21 +511,21 @@ DECLARE_EVENT_CLASS(svc_rqst_event,
TP_ARGS(rqst), TP_ARGS(rqst),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(__be32, xid) __field(u32, xid)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, rqst->rq_addrlen) __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xid = rqst->rq_xid; __entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->flags = rqst->rq_flags; __entry->flags = rqst->rq_flags;
memcpy(__get_dynamic_array(addr), memcpy(__get_dynamic_array(addr),
&rqst->rq_addr, rqst->rq_addrlen); &rqst->rq_addr, rqst->rq_addrlen);
), ),
TP_printk("addr=%pIScp rq_xid=0x%x flags=%s", TP_printk("addr=%pIScp rq_xid=0x%08x flags=%s",
(struct sockaddr *)__get_dynamic_array(addr), (struct sockaddr *)__get_dynamic_array(addr),
be32_to_cpu(__entry->xid), __entry->xid,
show_rqstp_flags(__entry->flags)) show_rqstp_flags(__entry->flags))
); );
...@@ -515,7 +545,7 @@ DECLARE_EVENT_CLASS(svc_rqst_status, ...@@ -515,7 +545,7 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct sockaddr *, addr) __field(struct sockaddr *, addr)
__field(__be32, xid) __field(u32, xid)
__field(int, dropme) __field(int, dropme)
__field(int, status) __field(int, status)
__field(unsigned long, flags) __field(unsigned long, flags)
...@@ -523,13 +553,13 @@ DECLARE_EVENT_CLASS(svc_rqst_status, ...@@ -523,13 +553,13 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
TP_fast_assign( TP_fast_assign(
__entry->addr = (struct sockaddr *)&rqst->rq_addr; __entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = rqst->rq_xid; __entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->status = status; __entry->status = status;
__entry->flags = rqst->rq_flags; __entry->flags = rqst->rq_flags;
), ),
TP_printk("addr=%pIScp rq_xid=0x%x status=%d flags=%s", TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s",
__entry->addr, be32_to_cpu(__entry->xid), __entry->addr, __entry->xid,
__entry->status, show_rqstp_flags(__entry->flags)) __entry->status, show_rqstp_flags(__entry->flags))
); );
...@@ -678,18 +708,19 @@ DECLARE_EVENT_CLASS(svc_deferred_event, ...@@ -678,18 +708,19 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
TP_ARGS(dr), TP_ARGS(dr),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(__be32, xid) __field(u32, xid)
__dynamic_array(unsigned char, addr, dr->addrlen) __dynamic_array(unsigned char, addr, dr->addrlen)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xid = *(__be32 *)(dr->args + (dr->xprt_hlen>>2)); __entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
(dr->xprt_hlen>>2)));
memcpy(__get_dynamic_array(addr), &dr->addr, dr->addrlen); memcpy(__get_dynamic_array(addr), &dr->addr, dr->addrlen);
), ),
TP_printk("addr=%pIScp xid=0x%x", TP_printk("addr=%pIScp xid=0x%08x",
(struct sockaddr *)__get_dynamic_array(addr), (struct sockaddr *)__get_dynamic_array(addr),
be32_to_cpu(__entry->xid)) __entry->xid)
); );
DEFINE_EVENT(svc_deferred_event, svc_drop_deferred, DEFINE_EVENT(svc_deferred_event, svc_drop_deferred,
......
...@@ -1491,7 +1491,6 @@ rpc_restart_call(struct rpc_task *task) ...@@ -1491,7 +1491,6 @@ rpc_restart_call(struct rpc_task *task)
} }
EXPORT_SYMBOL_GPL(rpc_restart_call); EXPORT_SYMBOL_GPL(rpc_restart_call);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
const char const char
*rpc_proc_name(const struct rpc_task *task) *rpc_proc_name(const struct rpc_task *task)
{ {
...@@ -1505,7 +1504,6 @@ const char ...@@ -1505,7 +1504,6 @@ const char
} else } else
return "no proc"; return "no proc";
} }
#endif
/* /*
* 0. Initial state * 0. Initial state
...@@ -1519,6 +1517,7 @@ call_start(struct rpc_task *task) ...@@ -1519,6 +1517,7 @@ call_start(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client; struct rpc_clnt *clnt = task->tk_client;
int idx = task->tk_msg.rpc_proc->p_statidx; int idx = task->tk_msg.rpc_proc->p_statidx;
trace_rpc_request(task);
dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
clnt->cl_program->name, clnt->cl_vers, clnt->cl_program->name, clnt->cl_vers,
rpc_proc_name(task), rpc_proc_name(task),
...@@ -1586,6 +1585,7 @@ call_reserveresult(struct rpc_task *task) ...@@ -1586,6 +1585,7 @@ call_reserveresult(struct rpc_task *task)
switch (status) { switch (status) {
case -ENOMEM: case -ENOMEM:
rpc_delay(task, HZ >> 2); rpc_delay(task, HZ >> 2);
/* fall through */
case -EAGAIN: /* woken up; retry */ case -EAGAIN: /* woken up; retry */
task->tk_action = call_retry_reserve; task->tk_action = call_retry_reserve;
return; return;
...@@ -1647,10 +1647,13 @@ call_refreshresult(struct rpc_task *task) ...@@ -1647,10 +1647,13 @@ call_refreshresult(struct rpc_task *task)
/* Use rate-limiting and a max number of retries if refresh /* Use rate-limiting and a max number of retries if refresh
* had status 0 but failed to update the cred. * had status 0 but failed to update the cred.
*/ */
/* fall through */
case -ETIMEDOUT: case -ETIMEDOUT:
rpc_delay(task, 3*HZ); rpc_delay(task, 3*HZ);
/* fall through */
case -EAGAIN: case -EAGAIN:
status = -EACCES; status = -EACCES;
/* fall through */
case -EKEYEXPIRED: case -EKEYEXPIRED:
if (!task->tk_cred_retry) if (!task->tk_cred_retry)
break; break;
...@@ -1911,6 +1914,7 @@ call_connect_status(struct rpc_task *task) ...@@ -1911,6 +1914,7 @@ call_connect_status(struct rpc_task *task)
task->tk_action = call_bind; task->tk_action = call_bind;
return; return;
} }
/* fall through */
case -ECONNRESET: case -ECONNRESET:
case -ECONNABORTED: case -ECONNABORTED:
case -ENETUNREACH: case -ENETUNREACH:
...@@ -1924,6 +1928,7 @@ call_connect_status(struct rpc_task *task) ...@@ -1924,6 +1928,7 @@ call_connect_status(struct rpc_task *task)
break; break;
/* retry with existing socket, after a delay */ /* retry with existing socket, after a delay */
rpc_delay(task, 3*HZ); rpc_delay(task, 3*HZ);
/* fall through */
case -EAGAIN: case -EAGAIN:
/* Check for timeouts before looping back to call_bind */ /* Check for timeouts before looping back to call_bind */
case -ETIMEDOUT: case -ETIMEDOUT:
...@@ -2025,6 +2030,7 @@ call_transmit_status(struct rpc_task *task) ...@@ -2025,6 +2030,7 @@ call_transmit_status(struct rpc_task *task)
rpc_exit(task, task->tk_status); rpc_exit(task, task->tk_status);
break; break;
} }
/* fall through */
case -ECONNRESET: case -ECONNRESET:
case -ECONNABORTED: case -ECONNABORTED:
case -EADDRINUSE: case -EADDRINUSE:
...@@ -2145,6 +2151,7 @@ call_status(struct rpc_task *task) ...@@ -2145,6 +2151,7 @@ call_status(struct rpc_task *task)
* were a timeout. * were a timeout.
*/ */
rpc_delay(task, 3*HZ); rpc_delay(task, 3*HZ);
/* fall through */
case -ETIMEDOUT: case -ETIMEDOUT:
task->tk_action = call_timeout; task->tk_action = call_timeout;
break; break;
...@@ -2152,14 +2159,17 @@ call_status(struct rpc_task *task) ...@@ -2152,14 +2159,17 @@ call_status(struct rpc_task *task)
case -ECONNRESET: case -ECONNRESET:
case -ECONNABORTED: case -ECONNABORTED:
rpc_force_rebind(clnt); rpc_force_rebind(clnt);
/* fall through */
case -EADDRINUSE: case -EADDRINUSE:
rpc_delay(task, 3*HZ); rpc_delay(task, 3*HZ);
/* fall through */
case -EPIPE: case -EPIPE:
case -ENOTCONN: case -ENOTCONN:
task->tk_action = call_bind; task->tk_action = call_bind;
break; break;
case -ENOBUFS: case -ENOBUFS:
rpc_delay(task, HZ>>2); rpc_delay(task, HZ>>2);
/* fall through */
case -EAGAIN: case -EAGAIN:
task->tk_action = call_transmit; task->tk_action = call_transmit;
break; break;
......
...@@ -1410,8 +1410,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1410,8 +1410,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return PTR_ERR(gssd_dentry); return PTR_ERR(gssd_dentry);
} }
dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n",
net, NET_NAME(net)); net->ns.inum, NET_NAME(net));
mutex_lock(&sn->pipefs_sb_lock); mutex_lock(&sn->pipefs_sb_lock);
sn->pipefs_sb = sb; sn->pipefs_sb = sb;
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
...@@ -1462,8 +1462,8 @@ static void rpc_kill_sb(struct super_block *sb) ...@@ -1462,8 +1462,8 @@ static void rpc_kill_sb(struct super_block *sb)
goto out; goto out;
} }
sn->pipefs_sb = NULL; sn->pipefs_sb = NULL;
dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", dprintk("RPC: sending pipefs UMOUNT notification for net %x%s\n",
net, NET_NAME(net)); net->ns.inum, NET_NAME(net));
blocking_notifier_call_chain(&rpc_pipefs_notifier_list, blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_UMOUNT, RPC_PIPEFS_UMOUNT,
sb); sb);
......
...@@ -216,9 +216,9 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, ...@@ -216,9 +216,9 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
smp_wmb(); smp_wmb();
sn->rpcb_users = 1; sn->rpcb_users = 1;
dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: "
"%p, rpcb_local_clnt4: %p) for net %p%s\n", "%p, rpcb_local_clnt4: %p) for net %x%s\n",
sn->rpcb_local_clnt, sn->rpcb_local_clnt4, sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
net, (net == &init_net) ? " (init_net)" : ""); net->ns.inum, (net == &init_net) ? " (init_net)" : "");
} }
/* /*
......
...@@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) ...@@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
static void rpc_set_active(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task)
{ {
trace_rpc_task_begin(task->tk_client, task, NULL);
rpc_task_set_debuginfo(task); rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
trace_rpc_task_begin(task->tk_client, task, NULL);
} }
/* /*
......
...@@ -65,10 +65,13 @@ static __net_init int sunrpc_init_net(struct net *net) ...@@ -65,10 +65,13 @@ static __net_init int sunrpc_init_net(struct net *net)
static __net_exit void sunrpc_exit_net(struct net *net) static __net_exit void sunrpc_exit_net(struct net *net)
{ {
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
rpc_pipefs_exit_net(net); rpc_pipefs_exit_net(net);
unix_gid_cache_destroy(net); unix_gid_cache_destroy(net);
ip_map_cache_destroy(net); ip_map_cache_destroy(net);
rpc_proc_exit(net); rpc_proc_exit(net);
WARN_ON_ONCE(!list_empty(&sn->all_clients));
} }
static struct pernet_operations sunrpc_net_ops = { static struct pernet_operations sunrpc_net_ops = {
......
...@@ -1139,6 +1139,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) ...@@ -1139,6 +1139,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
case -EAGAIN: case -EAGAIN:
xprt_add_backlog(xprt, task); xprt_add_backlog(xprt, task);
dprintk("RPC: waiting for request slot\n"); dprintk("RPC: waiting for request slot\n");
/* fall through */
default: default:
task->tk_status = -EAGAIN; task->tk_status = -EAGAIN;
} }
......
...@@ -43,7 +43,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, ...@@ -43,7 +43,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt); req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
req->rl_backchannel = true; __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL); DMA_TO_DEVICE, GFP_KERNEL);
...@@ -223,8 +223,8 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) ...@@ -223,8 +223,8 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
*p++ = xdr_zero; *p++ = xdr_zero;
*p = xdr_zero; *p = xdr_zero;
if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN, if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
&rqst->rq_snd_buf, rpcrdma_noch)) &rqst->rq_snd_buf, rpcrdma_noch))
return -EIO; return -EIO;
return 0; return 0;
} }
......
...@@ -306,28 +306,9 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -306,28 +306,9 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
} }
} }
/* Use a slow, safe mechanism to invalidate all memory regions
* that were registered for "req".
*/
static void
fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
bool sync)
{
struct rpcrdma_mw *mw;
while (!list_empty(&req->rl_registered)) {
mw = rpcrdma_pop_mw(&req->rl_registered);
if (sync)
fmr_op_recover_mr(mw);
else
rpcrdma_defer_mr_recovery(mw);
}
}
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map, .ro_map = fmr_op_map,
.ro_unmap_sync = fmr_op_unmap_sync, .ro_unmap_sync = fmr_op_unmap_sync,
.ro_unmap_safe = fmr_op_unmap_safe,
.ro_recover_mr = fmr_op_recover_mr, .ro_recover_mr = fmr_op_recover_mr,
.ro_open = fmr_op_open, .ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages, .ro_maxpages = fmr_op_maxpages,
......
...@@ -420,7 +420,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -420,7 +420,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ; IB_ACCESS_REMOTE_READ;
rpcrdma_set_signaled(&r_xprt->rx_ep, &reg_wr->wr);
rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr); rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
if (rc) if (rc)
goto out_senderr; goto out_senderr;
...@@ -508,12 +507,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -508,12 +507,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
f->fr_cqe.done = frwr_wc_localinv_wake; f->fr_cqe.done = frwr_wc_localinv_wake;
reinit_completion(&f->fr_linv_done); reinit_completion(&f->fr_linv_done);
/* Initialize CQ count, since there is always a signaled
* WR being posted here. The new cqcount depends on how
* many SQEs are about to be consumed.
*/
rpcrdma_init_cqcount(&r_xprt->rx_ep, count);
/* Transport disconnect drains the receive CQ before it /* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us * replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer. * unless ri_id->qp is a valid pointer.
...@@ -546,7 +539,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -546,7 +539,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
/* Find and reset the MRs in the LOCAL_INV WRs that did not /* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted. * get posted.
*/ */
rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
while (bad_wr) { while (bad_wr) {
f = container_of(bad_wr, struct rpcrdma_frmr, f = container_of(bad_wr, struct rpcrdma_frmr,
fr_invwr); fr_invwr);
...@@ -559,28 +551,9 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -559,28 +551,9 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
goto unmap; goto unmap;
} }
/* Use a slow, safe mechanism to invalidate all memory regions
* that were registered for "req".
*/
static void
frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
bool sync)
{
struct rpcrdma_mw *mw;
while (!list_empty(&req->rl_registered)) {
mw = rpcrdma_pop_mw(&req->rl_registered);
if (sync)
frwr_op_recover_mr(mw);
else
rpcrdma_defer_mr_recovery(mw);
}
}
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map, .ro_map = frwr_op_map,
.ro_unmap_sync = frwr_op_unmap_sync, .ro_unmap_sync = frwr_op_unmap_sync,
.ro_unmap_safe = frwr_op_unmap_safe,
.ro_recover_mr = frwr_op_recover_mr, .ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open, .ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages, .ro_maxpages = frwr_op_maxpages,
......
/* /*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
...@@ -75,11 +76,11 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs) ...@@ -75,11 +76,11 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
/* Maximum Read list size */ /* Maximum Read list size */
maxsegs += 2; /* segment for head and tail buffers */ maxsegs += 2; /* segment for head and tail buffers */
size = maxsegs * sizeof(struct rpcrdma_read_chunk); size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
/* Minimal Read chunk size */ /* Minimal Read chunk size */
size += sizeof(__be32); /* segment count */ size += sizeof(__be32); /* segment count */
size += sizeof(struct rpcrdma_segment); size += rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */ size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max call header size = %u\n", dprintk("RPC: %s: max call header size = %u\n",
...@@ -102,7 +103,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) ...@@ -102,7 +103,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
/* Maximum Write list size */ /* Maximum Write list size */
maxsegs += 2; /* segment for head and tail buffers */ maxsegs += 2; /* segment for head and tail buffers */
size = sizeof(__be32); /* segment count */ size = sizeof(__be32); /* segment count */
size += maxsegs * sizeof(struct rpcrdma_segment); size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */ size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max reply header size = %u\n", dprintk("RPC: %s: max reply header size = %u\n",
...@@ -511,27 +512,60 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -511,27 +512,60 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
return 0; return 0;
} }
/* Prepare the RPC-over-RDMA header SGE. /**
* rpcrdma_unmap_sendctx - DMA-unmap Send buffers
* @sc: sendctx containing SGEs to unmap
*
*/
void
rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
{
struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
struct ib_sge *sge;
unsigned int count;
dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
__func__, sc->sc_unmap_count, sc);
/* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so
* they can be cheaply re-used.
*/
sge = &sc->sc_sges[2];
for (count = sc->sc_unmap_count; count; ++sge, --count)
ib_dma_unmap_page(ia->ri_device,
sge->addr, sge->length, DMA_TO_DEVICE);
if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
smp_mb__after_atomic();
wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
}
}
/* Prepare an SGE for the RPC-over-RDMA transport header.
*/ */
static bool static bool
rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
u32 len) u32 len)
{ {
struct rpcrdma_sendctx *sc = req->rl_sendctx;
struct rpcrdma_regbuf *rb = req->rl_rdmabuf; struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
struct ib_sge *sge = &req->rl_send_sge[0]; struct ib_sge *sge = sc->sc_sges;
if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) { if (!rpcrdma_dma_map_regbuf(ia, rb))
if (!__rpcrdma_dma_map_regbuf(ia, rb)) goto out_regbuf;
return false; sge->addr = rdmab_addr(rb);
sge->addr = rdmab_addr(rb);
sge->lkey = rdmab_lkey(rb);
}
sge->length = len; sge->length = len;
sge->lkey = rdmab_lkey(rb);
ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
sge->length, DMA_TO_DEVICE); sge->length, DMA_TO_DEVICE);
req->rl_send_wr.num_sge++; sc->sc_wr.num_sge++;
return true; return true;
out_regbuf:
pr_err("rpcrdma: failed to DMA map a Send buffer\n");
return false;
} }
/* Prepare the Send SGEs. The head and tail iovec, and each entry /* Prepare the Send SGEs. The head and tail iovec, and each entry
...@@ -541,10 +575,11 @@ static bool ...@@ -541,10 +575,11 @@ static bool
rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{ {
struct rpcrdma_sendctx *sc = req->rl_sendctx;
unsigned int sge_no, page_base, len, remaining; unsigned int sge_no, page_base, len, remaining;
struct rpcrdma_regbuf *rb = req->rl_sendbuf; struct rpcrdma_regbuf *rb = req->rl_sendbuf;
struct ib_device *device = ia->ri_device; struct ib_device *device = ia->ri_device;
struct ib_sge *sge = req->rl_send_sge; struct ib_sge *sge = sc->sc_sges;
u32 lkey = ia->ri_pd->local_dma_lkey; u32 lkey = ia->ri_pd->local_dma_lkey;
struct page *page, **ppages; struct page *page, **ppages;
...@@ -552,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, ...@@ -552,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
* DMA-mapped. Sync the content that has changed. * DMA-mapped. Sync the content that has changed.
*/ */
if (!rpcrdma_dma_map_regbuf(ia, rb)) if (!rpcrdma_dma_map_regbuf(ia, rb))
return false; goto out_regbuf;
sge_no = 1; sge_no = 1;
sge[sge_no].addr = rdmab_addr(rb); sge[sge_no].addr = rdmab_addr(rb);
sge[sge_no].length = xdr->head[0].iov_len; sge[sge_no].length = xdr->head[0].iov_len;
...@@ -607,7 +642,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, ...@@ -607,7 +642,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
sge[sge_no].length = len; sge[sge_no].length = len;
sge[sge_no].lkey = lkey; sge[sge_no].lkey = lkey;
req->rl_mapped_sges++; sc->sc_unmap_count++;
ppages++; ppages++;
remaining -= len; remaining -= len;
page_base = 0; page_base = 0;
...@@ -633,56 +668,61 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, ...@@ -633,56 +668,61 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
goto out_mapping_err; goto out_mapping_err;
sge[sge_no].length = len; sge[sge_no].length = len;
sge[sge_no].lkey = lkey; sge[sge_no].lkey = lkey;
req->rl_mapped_sges++; sc->sc_unmap_count++;
} }
out: out:
req->rl_send_wr.num_sge = sge_no + 1; sc->sc_wr.num_sge += sge_no;
if (sc->sc_unmap_count)
__set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
return true; return true;
out_regbuf:
pr_err("rpcrdma: failed to DMA map a Send buffer\n");
return false;
out_mapping_overflow: out_mapping_overflow:
rpcrdma_unmap_sendctx(sc);
pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
return false; return false;
out_mapping_err: out_mapping_err:
rpcrdma_unmap_sendctx(sc);
pr_err("rpcrdma: Send mapping error\n"); pr_err("rpcrdma: Send mapping error\n");
return false; return false;
} }
bool /**
rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
u32 hdrlen, struct xdr_buf *xdr, * @r_xprt: controlling transport
enum rpcrdma_chunktype rtype) * @req: context of RPC Call being marshalled
* @hdrlen: size of transport header, in bytes
* @xdr: xdr_buf containing RPC Call
* @rtype: chunk type being encoded
*
* Returns 0 on success; otherwise a negative errno is returned.
*/
int
rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 hdrlen,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{ {
req->rl_send_wr.num_sge = 0; req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
req->rl_mapped_sges = 0; if (!req->rl_sendctx)
return -ENOBUFS;
if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen)) req->rl_sendctx->sc_wr.num_sge = 0;
goto out_map; req->rl_sendctx->sc_unmap_count = 0;
req->rl_sendctx->sc_req = req;
__clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
return -EIO;
if (rtype != rpcrdma_areadch) if (rtype != rpcrdma_areadch)
if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype)) if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
goto out_map; return -EIO;
return true;
out_map:
pr_err("rpcrdma: failed to DMA map a Send buffer\n");
return false;
}
void
rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
{
struct ib_device *device = ia->ri_device;
struct ib_sge *sge;
int count;
sge = &req->rl_send_sge[2]; return 0;
for (count = req->rl_mapped_sges; count--; sge++)
ib_dma_unmap_page(device, sge->addr, sge->length,
DMA_TO_DEVICE);
req->rl_mapped_sges = 0;
} }
/** /**
...@@ -833,12 +873,10 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) ...@@ -833,12 +873,10 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
transfertypes[rtype], transfertypes[wtype], transfertypes[rtype], transfertypes[wtype],
xdr_stream_pos(xdr)); xdr_stream_pos(xdr));
if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
xdr_stream_pos(xdr), &rqst->rq_snd_buf, rtype);
&rqst->rq_snd_buf, rtype)) { if (ret)
ret = -EIO;
goto out_err; goto out_err;
}
return 0; return 0;
out_err: out_err:
...@@ -970,14 +1008,13 @@ rpcrdma_mark_remote_invalidation(struct list_head *mws, ...@@ -970,14 +1008,13 @@ rpcrdma_mark_remote_invalidation(struct list_head *mws,
* straightforward to check the RPC header's direction field. * straightforward to check the RPC header's direction field.
*/ */
static bool static bool
rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
__be32 xid, __be32 proc)
#if defined(CONFIG_SUNRPC_BACKCHANNEL) #if defined(CONFIG_SUNRPC_BACKCHANNEL)
{ {
struct xdr_stream *xdr = &rep->rr_stream; struct xdr_stream *xdr = &rep->rr_stream;
__be32 *p; __be32 *p;
if (proc != rdma_msg) if (rep->rr_proc != rdma_msg)
return false; return false;
/* Peek at stream contents without advancing. */ /* Peek at stream contents without advancing. */
...@@ -992,7 +1029,7 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, ...@@ -992,7 +1029,7 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
return false; return false;
/* RPC header */ /* RPC header */
if (*p++ != xid) if (*p++ != rep->rr_xid)
return false; return false;
if (*p != cpu_to_be32(RPC_CALL)) if (*p != cpu_to_be32(RPC_CALL))
return false; return false;
...@@ -1212,105 +1249,170 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, ...@@ -1212,105 +1249,170 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
return -EREMOTEIO; return -EREMOTEIO;
} }
/* Perform XID lookup, reconstruction of the RPC reply, and
* RPC completion while holding the transport lock to ensure
* the rep, rqst, and rq_task pointers remain stable.
*/
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
{
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpc_rqst *rqst = rep->rr_rqst;
unsigned long cwnd;
int status;
xprt->reestablish_timeout = 0;
switch (rep->rr_proc) {
case rdma_msg:
status = rpcrdma_decode_msg(r_xprt, rep, rqst);
break;
case rdma_nomsg:
status = rpcrdma_decode_nomsg(r_xprt, rep);
break;
case rdma_error:
status = rpcrdma_decode_error(r_xprt, rep, rqst);
break;
default:
status = -EIO;
}
if (status < 0)
goto out_badheader;
out:
spin_lock(&xprt->recv_lock);
cwnd = xprt->cwnd;
xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
xprt_release_rqst_cong(rqst->rq_task);
xprt_complete_rqst(rqst->rq_task, status);
xprt_unpin_rqst(rqst);
spin_unlock(&xprt->recv_lock);
return;
/* If the incoming reply terminated a pending RPC, the next
* RPC call will post a replacement receive buffer as it is
* being marshaled.
*/
out_badheader:
dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
r_xprt->rx_stats.bad_reply_count++;
status = -EIO;
goto out;
}
void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
/* Invalidate and unmap the data payloads before waking
* the waiting application. This guarantees the memory
* regions are properly fenced from the server before the
* application accesses the data. It also ensures proper
* send flow control: waking the next RPC waits until this
* RPC has relinquished all its Send Queue entries.
*/
if (!list_empty(&req->rl_registered))
r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
&req->rl_registered);
/* Ensure that any DMA mapped pages associated with
* the Send of the RPC Call have been unmapped before
* allowing the RPC to complete. This protects argument
* memory not controlled by the RPC client from being
* re-used before we're done with it.
*/
if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
r_xprt->rx_stats.reply_waits_for_send++;
out_of_line_wait_on_bit(&req->rl_flags,
RPCRDMA_REQ_F_TX_RESOURCES,
bit_wait,
TASK_UNINTERRUPTIBLE);
}
}
/* Reply handling runs in the poll worker thread. Anything that
* might wait is deferred to a separate workqueue.
*/
void rpcrdma_deferred_completion(struct work_struct *work)
{
struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
rpcrdma_release_rqst(rep->rr_rxprt, req);
rpcrdma_complete_rqst(rep);
}
/* Process received RPC/RDMA messages. /* Process received RPC/RDMA messages.
* *
* Errors must result in the RPC task either being awakened, or * Errors must result in the RPC task either being awakened, or
* allowed to timeout, to discover the errors at that time. * allowed to timeout, to discover the errors at that time.
*/ */
void void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rpcrdma_reply_handler(struct work_struct *work)
{ {
struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
struct rpc_xprt *xprt = &r_xprt->rx_xprt; struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct xdr_stream *xdr = &rep->rr_stream; struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_req *req; struct rpcrdma_req *req;
struct rpc_rqst *rqst; struct rpc_rqst *rqst;
__be32 *p, xid, vers, proc; u32 credits;
unsigned long cwnd; __be32 *p;
int status;
dprintk("RPC: %s: incoming rep %p\n", __func__, rep); dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
if (rep->rr_hdrbuf.head[0].iov_len == 0) if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus; goto out_badstatus;
xdr_init_decode(xdr, &rep->rr_hdrbuf, xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
rep->rr_hdrbuf.head[0].iov_base); rep->rr_hdrbuf.head[0].iov_base);
/* Fixed transport header fields */ /* Fixed transport header fields */
p = xdr_inline_decode(xdr, 4 * sizeof(*p)); p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
if (unlikely(!p)) if (unlikely(!p))
goto out_shortreply; goto out_shortreply;
xid = *p++; rep->rr_xid = *p++;
vers = *p++; rep->rr_vers = *p++;
p++; /* credits */ credits = be32_to_cpu(*p++);
proc = *p++; rep->rr_proc = *p++;
if (rep->rr_vers != rpcrdma_version)
goto out_badversion;
if (rpcrdma_is_bcall(r_xprt, rep, xid, proc)) if (rpcrdma_is_bcall(r_xprt, rep))
return; return;
/* Match incoming rpcrdma_rep to an rpcrdma_req to /* Match incoming rpcrdma_rep to an rpcrdma_req to
* get context for handling any incoming chunks. * get context for handling any incoming chunks.
*/ */
spin_lock(&xprt->recv_lock); spin_lock(&xprt->recv_lock);
rqst = xprt_lookup_rqst(xprt, xid); rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
if (!rqst) if (!rqst)
goto out_norqst; goto out_norqst;
xprt_pin_rqst(rqst); xprt_pin_rqst(rqst);
if (credits == 0)
credits = 1; /* don't deadlock */
else if (credits > buf->rb_max_requests)
credits = buf->rb_max_requests;
buf->rb_credits = credits;
spin_unlock(&xprt->recv_lock); spin_unlock(&xprt->recv_lock);
req = rpcr_to_rdmar(rqst); req = rpcr_to_rdmar(rqst);
req->rl_reply = rep; req->rl_reply = rep;
rep->rr_rqst = rqst;
clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
__func__, rep, req, be32_to_cpu(xid)); __func__, rep, req, be32_to_cpu(rep->rr_xid));
/* Invalidate and unmap the data payloads before waking the
* waiting application. This guarantees the memory regions
* are properly fenced from the server before the application
* accesses the data. It also ensures proper send flow control:
* waking the next RPC waits until this RPC has relinquished
* all its Send Queue entries.
*/
if (!list_empty(&req->rl_registered)) {
rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
&req->rl_registered);
}
xprt->reestablish_timeout = 0;
if (vers != rpcrdma_version)
goto out_badversion;
switch (proc) { if (list_empty(&req->rl_registered) &&
case rdma_msg: !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
status = rpcrdma_decode_msg(r_xprt, rep, rqst); rpcrdma_complete_rqst(rep);
break; else
case rdma_nomsg: queue_work(rpcrdma_receive_wq, &rep->rr_work);
status = rpcrdma_decode_nomsg(r_xprt, rep);
break;
case rdma_error:
status = rpcrdma_decode_error(r_xprt, rep, rqst);
break;
default:
status = -EIO;
}
if (status < 0)
goto out_badheader;
out:
spin_lock(&xprt->recv_lock);
cwnd = xprt->cwnd;
xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
xprt_release_rqst_cong(rqst->rq_task);
xprt_complete_rqst(rqst->rq_task, status);
xprt_unpin_rqst(rqst);
spin_unlock(&xprt->recv_lock);
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
__func__, xprt, rqst, status);
return; return;
out_badstatus: out_badstatus:
...@@ -1321,37 +1423,22 @@ rpcrdma_reply_handler(struct work_struct *work) ...@@ -1321,37 +1423,22 @@ rpcrdma_reply_handler(struct work_struct *work)
} }
return; return;
/* If the incoming reply terminated a pending RPC, the next
* RPC call will post a replacement receive buffer as it is
* being marshaled.
*/
out_badversion: out_badversion:
dprintk("RPC: %s: invalid version %d\n", dprintk("RPC: %s: invalid version %d\n",
__func__, be32_to_cpu(vers)); __func__, be32_to_cpu(rep->rr_vers));
status = -EIO; goto repost;
r_xprt->rx_stats.bad_reply_count++;
goto out;
out_badheader:
dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
rqst->rq_task->tk_pid, __func__, be32_to_cpu(proc));
r_xprt->rx_stats.bad_reply_count++;
status = -EIO;
goto out;
/* The req was still available, but by the time the recv_lock /* The RPC transaction has already been terminated, or the header
* was acquired, the rqst and task had been released. Thus the RPC * is corrupt.
* has already been terminated.
*/ */
out_norqst: out_norqst:
spin_unlock(&xprt->recv_lock); spin_unlock(&xprt->recv_lock);
dprintk("RPC: %s: no match for incoming xid 0x%08x\n", dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
__func__, be32_to_cpu(xid)); __func__, be32_to_cpu(rep->rr_xid));
goto repost; goto repost;
out_shortreply: out_shortreply:
dprintk("RPC: %s: short/invalid reply\n", __func__); dprintk("RPC: %s: short/invalid reply\n", __func__);
goto repost;
/* If no pending RPC transaction was matched, post a replacement /* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning. * receive buffer before returning.
......
/* /*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
...@@ -678,16 +679,14 @@ xprt_rdma_free(struct rpc_task *task) ...@@ -678,16 +679,14 @@ xprt_rdma_free(struct rpc_task *task)
struct rpc_rqst *rqst = task->tk_rqstp; struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
if (req->rl_backchannel) if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
return; return;
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
if (!list_empty(&req->rl_registered)) if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); rpcrdma_release_rqst(r_xprt, req);
rpcrdma_unmap_sges(ia, req);
rpcrdma_buffer_put(req); rpcrdma_buffer_put(req);
} }
...@@ -728,7 +727,8 @@ xprt_rdma_send_request(struct rpc_task *task) ...@@ -728,7 +727,8 @@ xprt_rdma_send_request(struct rpc_task *task)
/* On retransmit, remove any previously registered chunks */ /* On retransmit, remove any previously registered chunks */
if (unlikely(!list_empty(&req->rl_registered))) if (unlikely(!list_empty(&req->rl_registered)))
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
&req->rl_registered);
rc = rpcrdma_marshal_req(r_xprt, rqst); rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0) if (rc < 0)
...@@ -742,6 +742,7 @@ xprt_rdma_send_request(struct rpc_task *task) ...@@ -742,6 +742,7 @@ xprt_rdma_send_request(struct rpc_task *task)
goto drop_connection; goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie; req->rl_connect_cookie = xprt->connect_cookie;
set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection; goto drop_connection;
...@@ -789,11 +790,13 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) ...@@ -789,11 +790,13 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
r_xprt->rx_stats.failed_marshal_count, r_xprt->rx_stats.failed_marshal_count,
r_xprt->rx_stats.bad_reply_count, r_xprt->rx_stats.bad_reply_count,
r_xprt->rx_stats.nomsg_call_count); r_xprt->rx_stats.nomsg_call_count);
seq_printf(seq, "%lu %lu %lu %lu\n", seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
r_xprt->rx_stats.mrs_recovered, r_xprt->rx_stats.mrs_recovered,
r_xprt->rx_stats.mrs_orphaned, r_xprt->rx_stats.mrs_orphaned,
r_xprt->rx_stats.mrs_allocated, r_xprt->rx_stats.mrs_allocated,
r_xprt->rx_stats.local_inv_needed); r_xprt->rx_stats.local_inv_needed,
r_xprt->rx_stats.empty_sendctx_q,
r_xprt->rx_stats.reply_waits_for_send);
} }
static int static int
......
/* /*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
...@@ -49,9 +50,10 @@ ...@@ -49,9 +50,10 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/prefetch.h>
#include <linux/sunrpc/addr.h> #include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h> #include <linux/sunrpc/svc_rdma.h>
#include <asm-generic/barrier.h>
#include <asm/bitops.h> #include <asm/bitops.h>
#include <rdma/ib_cm.h> #include <rdma/ib_cm.h>
...@@ -73,7 +75,7 @@ static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt); ...@@ -73,7 +75,7 @@ static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
static struct workqueue_struct *rpcrdma_receive_wq __read_mostly; struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
int int
rpcrdma_alloc_wq(void) rpcrdma_alloc_wq(void)
...@@ -126,30 +128,17 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) ...@@ -126,30 +128,17 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
static void static void
rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_sendctx *sc =
container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Send: %s (%u/0x%x)\n", pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status), ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err); wc->status, wc->vendor_err);
}
/* Perform basic sanity checking to avoid using garbage
* to update the credit grant value.
*/
static void
rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
{
struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
__be32 *p = rep->rr_rdmabuf->rg_base;
u32 credits;
credits = be32_to_cpup(p + 2); rpcrdma_sendctx_put_locked(sc);
if (credits == 0)
credits = 1; /* don't deadlock */
else if (credits > buffer->rb_max_requests)
credits = buffer->rb_max_requests;
atomic_set(&buffer->rb_credits, credits);
} }
/** /**
...@@ -181,11 +170,8 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) ...@@ -181,11 +170,8 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rdmab_addr(rep->rr_rdmabuf), rdmab_addr(rep->rr_rdmabuf),
wc->byte_len, DMA_FROM_DEVICE); wc->byte_len, DMA_FROM_DEVICE);
if (wc->byte_len >= RPCRDMA_HDRLEN_ERR)
rpcrdma_update_granted_credits(rep);
out_schedule: out_schedule:
queue_work(rpcrdma_receive_wq, &rep->rr_work); rpcrdma_reply_handler(rep);
return; return;
out_fail: out_fail:
...@@ -295,7 +281,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -295,7 +281,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED; connstate = -ECONNABORTED;
connected: connected:
atomic_set(&xprt->rx_buf.rb_credits, 1); xprt->rx_buf.rb_credits = 1;
ep->rep_connected = connstate; ep->rep_connected = connstate;
rpcrdma_conn_func(ep); rpcrdma_conn_func(ep);
wake_up_all(&ep->rep_connect_wait); wake_up_all(&ep->rep_connect_wait);
...@@ -564,16 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -564,16 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_sge); ep->rep_attr.cap.max_recv_sge);
/* set trigger for requesting send completion */ /* set trigger for requesting send completion */
ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
if (ep->rep_cqinit <= 2) cdata->max_requests >> 2);
ep->rep_cqinit = 0; /* always signal? */ ep->rep_send_count = ep->rep_send_batch;
rpcrdma_init_cqcount(ep, 0);
init_waitqueue_head(&ep->rep_connect_wait); init_waitqueue_head(&ep->rep_connect_wait);
INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
sendcq = ib_alloc_cq(ia->ri_device, NULL, sendcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_send_wr + 1, ep->rep_attr.cap.max_send_wr + 1,
0, IB_POLL_SOFTIRQ); 1, IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) { if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq); rc = PTR_ERR(sendcq);
dprintk("RPC: %s: failed to create send CQ: %i\n", dprintk("RPC: %s: failed to create send CQ: %i\n",
...@@ -583,7 +568,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -583,7 +568,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
recvcq = ib_alloc_cq(ia->ri_device, NULL, recvcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_recv_wr + 1, ep->rep_attr.cap.max_recv_wr + 1,
0, IB_POLL_SOFTIRQ); 0, IB_POLL_WORKQUEUE);
if (IS_ERR(recvcq)) { if (IS_ERR(recvcq)) {
rc = PTR_ERR(recvcq); rc = PTR_ERR(recvcq);
dprintk("RPC: %s: failed to create recv CQ: %i\n", dprintk("RPC: %s: failed to create recv CQ: %i\n",
...@@ -846,6 +831,168 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -846,6 +831,168 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
ib_drain_qp(ia->ri_id->qp); ib_drain_qp(ia->ri_id->qp);
} }
/* Fixed-size circular FIFO queue. This implementation is wait-free and
* lock-free.
*
* Consumer is the code path that posts Sends. This path dequeues a
* sendctx for use by a Send operation. Multiple consumer threads
* are serialized by the RPC transport lock, which allows only one
* ->send_request call at a time.
*
* Producer is the code path that handles Send completions. This path
* enqueues a sendctx that has been completed. Multiple producer
* threads are serialized by the ib_poll_cq() function.
*/
/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
* queue activity, and ib_drain_qp has flushed all remaining Send
* requests.
*/
static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
{
unsigned long i;
for (i = 0; i <= buf->rb_sc_last; i++)
kfree(buf->rb_sc_ctxs[i]);
kfree(buf->rb_sc_ctxs);
}
static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia)
{
struct rpcrdma_sendctx *sc;
sc = kzalloc(sizeof(*sc) +
ia->ri_max_send_sges * sizeof(struct ib_sge),
GFP_KERNEL);
if (!sc)
return NULL;
sc->sc_wr.wr_cqe = &sc->sc_cqe;
sc->sc_wr.sg_list = sc->sc_sges;
sc->sc_wr.opcode = IB_WR_SEND;
sc->sc_cqe.done = rpcrdma_wc_send;
return sc;
}
static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_sendctx *sc;
unsigned long i;
/* Maximum number of concurrent outstanding Send WRs. Capping
* the circular queue size stops Send Queue overflow by causing
* the ->send_request call to fail temporarily before too many
* Sends are posted.
*/
i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS;
dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i);
buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL);
if (!buf->rb_sc_ctxs)
return -ENOMEM;
buf->rb_sc_last = i - 1;
for (i = 0; i <= buf->rb_sc_last; i++) {
sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
if (!sc)
goto out_destroy;
sc->sc_xprt = r_xprt;
buf->rb_sc_ctxs[i] = sc;
}
return 0;
out_destroy:
rpcrdma_sendctxs_destroy(buf);
return -ENOMEM;
}
/* The sendctx queue is not guaranteed to have a size that is a
* power of two, thus the helpers in circ_buf.h cannot be used.
* The other option is to use modulus (%), which can be expensive.
*/
static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
unsigned long item)
{
return likely(item < buf->rb_sc_last) ? item + 1 : 0;
}
/**
* rpcrdma_sendctx_get_locked - Acquire a send context
* @buf: transport buffers from which to acquire an unused context
*
* Returns pointer to a free send completion context; or NULL if
* the queue is empty.
*
* Usage: Called to acquire an SGE array before preparing a Send WR.
*
* The caller serializes calls to this function (per rpcrdma_buffer),
* and provides an effective memory barrier that flushes the new value
* of rb_sc_head.
*/
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
{
struct rpcrdma_xprt *r_xprt;
struct rpcrdma_sendctx *sc;
unsigned long next_head;
next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head);
if (next_head == READ_ONCE(buf->rb_sc_tail))
goto out_emptyq;
/* ORDER: item must be accessed _before_ head is updated */
sc = buf->rb_sc_ctxs[next_head];
/* Releasing the lock in the caller acts as a memory
* barrier that flushes rb_sc_head.
*/
buf->rb_sc_head = next_head;
return sc;
out_emptyq:
/* The queue is "empty" if there have not been enough Send
* completions recently. This is a sign the Send Queue is
* backing up. Cause the caller to pause and try again.
*/
dprintk("RPC: %s: empty sendctx queue\n", __func__);
r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
r_xprt->rx_stats.empty_sendctx_q++;
return NULL;
}
/**
* rpcrdma_sendctx_put_locked - Release a send context
* @sc: send context to release
*
* Usage: Called from Send completion to return a sendctxt
* to the queue.
*
* The caller serializes calls to this function (per rpcrdma_buffer).
*/
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
{
struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
unsigned long next_tail;
/* Unmap SGEs of previously completed by unsignaled
* Sends by walking up the queue until @sc is found.
*/
next_tail = buf->rb_sc_tail;
do {
next_tail = rpcrdma_sendctx_next(buf, next_tail);
/* ORDER: item must be accessed _before_ tail is updated */
rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
} while (buf->rb_sc_ctxs[next_tail] != sc);
/* Paired with READ_ONCE */
smp_store_release(&buf->rb_sc_tail, next_tail);
}
static void static void
rpcrdma_mr_recovery_worker(struct work_struct *work) rpcrdma_mr_recovery_worker(struct work_struct *work)
{ {
...@@ -941,13 +1088,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) ...@@ -941,13 +1088,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
spin_lock(&buffer->rb_reqslock); spin_lock(&buffer->rb_reqslock);
list_add(&req->rl_all, &buffer->rb_allreqs); list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_reqslock); spin_unlock(&buffer->rb_reqslock);
req->rl_cqe.done = rpcrdma_wc_send;
req->rl_buffer = &r_xprt->rx_buf; req->rl_buffer = &r_xprt->rx_buf;
INIT_LIST_HEAD(&req->rl_registered); INIT_LIST_HEAD(&req->rl_registered);
req->rl_send_wr.next = NULL;
req->rl_send_wr.wr_cqe = &req->rl_cqe;
req->rl_send_wr.sg_list = req->rl_send_sge;
req->rl_send_wr.opcode = IB_WR_SEND;
return req; return req;
} }
...@@ -974,7 +1116,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) ...@@ -974,7 +1116,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_cqe.done = rpcrdma_wc_receive; rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt; rep->rr_rxprt = r_xprt;
INIT_WORK(&rep->rr_work, rpcrdma_reply_handler); INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion);
rep->rr_recv_wr.next = NULL; rep->rr_recv_wr.next = NULL;
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
...@@ -995,7 +1137,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) ...@@ -995,7 +1137,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
buf->rb_max_requests = r_xprt->rx_data.max_requests; buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0; buf->rb_bc_srv_max_requests = 0;
atomic_set(&buf->rb_credits, 1);
spin_lock_init(&buf->rb_mwlock); spin_lock_init(&buf->rb_mwlock);
spin_lock_init(&buf->rb_lock); spin_lock_init(&buf->rb_lock);
spin_lock_init(&buf->rb_recovery_lock); spin_lock_init(&buf->rb_recovery_lock);
...@@ -1022,7 +1163,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) ...@@ -1022,7 +1163,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
rc = PTR_ERR(req); rc = PTR_ERR(req);
goto out; goto out;
} }
req->rl_backchannel = false;
list_add(&req->rl_list, &buf->rb_send_bufs); list_add(&req->rl_list, &buf->rb_send_bufs);
} }
...@@ -1040,6 +1180,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) ...@@ -1040,6 +1180,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
list_add(&rep->rr_list, &buf->rb_recv_bufs); list_add(&rep->rr_list, &buf->rb_recv_bufs);
} }
rc = rpcrdma_sendctxs_create(r_xprt);
if (rc)
goto out;
return 0; return 0;
out: out:
rpcrdma_buffer_destroy(buf); rpcrdma_buffer_destroy(buf);
...@@ -1116,6 +1260,8 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) ...@@ -1116,6 +1260,8 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
cancel_delayed_work_sync(&buf->rb_recovery_worker); cancel_delayed_work_sync(&buf->rb_recovery_worker);
cancel_delayed_work_sync(&buf->rb_refresh_worker); cancel_delayed_work_sync(&buf->rb_refresh_worker);
rpcrdma_sendctxs_destroy(buf);
while (!list_empty(&buf->rb_recv_bufs)) { while (!list_empty(&buf->rb_recv_bufs)) {
struct rpcrdma_rep *rep; struct rpcrdma_rep *rep;
...@@ -1231,7 +1377,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) ...@@ -1231,7 +1377,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
struct rpcrdma_buffer *buffers = req->rl_buffer; struct rpcrdma_buffer *buffers = req->rl_buffer;
struct rpcrdma_rep *rep = req->rl_reply; struct rpcrdma_rep *rep = req->rl_reply;
req->rl_send_wr.num_sge = 0;
req->rl_reply = NULL; req->rl_reply = NULL;
spin_lock(&buffers->rb_lock); spin_lock(&buffers->rb_lock);
...@@ -1363,7 +1508,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, ...@@ -1363,7 +1508,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_ep *ep, struct rpcrdma_ep *ep,
struct rpcrdma_req *req) struct rpcrdma_req *req)
{ {
struct ib_send_wr *send_wr = &req->rl_send_wr; struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
struct ib_send_wr *send_wr_fail; struct ib_send_wr *send_wr_fail;
int rc; int rc;
...@@ -1377,7 +1522,14 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, ...@@ -1377,7 +1522,14 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
dprintk("RPC: %s: posting %d s/g entries\n", dprintk("RPC: %s: posting %d s/g entries\n",
__func__, send_wr->num_sge); __func__, send_wr->num_sge);
rpcrdma_set_signaled(ep, send_wr); if (!ep->rep_send_count ||
test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
send_wr->send_flags |= IB_SEND_SIGNALED;
ep->rep_send_count = ep->rep_send_batch;
} else {
send_wr->send_flags &= ~IB_SEND_SIGNALED;
--ep->rep_send_count;
}
rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
if (rc) if (rc)
goto out_postsend_err; goto out_postsend_err;
......
/* /*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
...@@ -93,8 +94,8 @@ enum { ...@@ -93,8 +94,8 @@ enum {
*/ */
struct rpcrdma_ep { struct rpcrdma_ep {
atomic_t rep_cqcount; unsigned int rep_send_count;
int rep_cqinit; unsigned int rep_send_batch;
int rep_connected; int rep_connected;
struct ib_qp_init_attr rep_attr; struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait; wait_queue_head_t rep_connect_wait;
...@@ -104,25 +105,6 @@ struct rpcrdma_ep { ...@@ -104,25 +105,6 @@ struct rpcrdma_ep {
struct delayed_work rep_connect_worker; struct delayed_work rep_connect_worker;
}; };
static inline void
rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count)
{
atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count);
}
/* To update send queue accounting, provider must take a
* send completion every now and then.
*/
static inline void
rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr)
{
send_wr->send_flags = 0;
if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) {
rpcrdma_init_cqcount(ep, 0);
send_wr->send_flags = IB_SEND_SIGNALED;
}
}
/* Pre-allocate extra Work Requests for handling backward receives /* Pre-allocate extra Work Requests for handling backward receives
* and sends. This is a fixed value because the Work Queues are * and sends. This is a fixed value because the Work Queues are
* allocated when the forward channel is set up. * allocated when the forward channel is set up.
...@@ -164,12 +146,6 @@ rdmab_lkey(struct rpcrdma_regbuf *rb) ...@@ -164,12 +146,6 @@ rdmab_lkey(struct rpcrdma_regbuf *rb)
return rb->rg_iov.lkey; return rb->rg_iov.lkey;
} }
static inline struct rpcrdma_msg *
rdmab_to_msg(struct rpcrdma_regbuf *rb)
{
return (struct rpcrdma_msg *)rb->rg_base;
}
static inline struct ib_device * static inline struct ib_device *
rdmab_device(struct rpcrdma_regbuf *rb) rdmab_device(struct rpcrdma_regbuf *rb)
{ {
...@@ -202,22 +178,24 @@ enum { ...@@ -202,22 +178,24 @@ enum {
}; };
/* /*
* struct rpcrdma_rep -- this structure encapsulates state required to recv * struct rpcrdma_rep -- this structure encapsulates state required
* and complete a reply, asychronously. It needs several pieces of * to receive and complete an RPC Reply, asychronously. It needs
* state: * several pieces of state:
* o recv buffer (posted to provider)
* o ib_sge (also donated to provider)
* o status of reply (length, success or not)
* o bookkeeping state to get run by reply handler (list, etc)
* *
* These are allocated during initialization, per-transport instance. * o receive buffer and ib_sge (donated to provider)
* o status of receive (success or not, length, inv rkey)
* o bookkeeping state to get run by reply handler (XDR stream)
* *
* N of these are associated with a transport instance, and stored in * These structures are allocated during transport initialization.
* struct rpcrdma_buffer. N is the max number of outstanding requests. * N of these are associated with a transport instance, managed by
* struct rpcrdma_buffer. N is the max number of outstanding RPCs.
*/ */
struct rpcrdma_rep { struct rpcrdma_rep {
struct ib_cqe rr_cqe; struct ib_cqe rr_cqe;
__be32 rr_xid;
__be32 rr_vers;
__be32 rr_proc;
int rr_wc_flags; int rr_wc_flags;
u32 rr_inv_rkey; u32 rr_inv_rkey;
struct rpcrdma_regbuf *rr_rdmabuf; struct rpcrdma_regbuf *rr_rdmabuf;
...@@ -225,10 +203,34 @@ struct rpcrdma_rep { ...@@ -225,10 +203,34 @@ struct rpcrdma_rep {
struct work_struct rr_work; struct work_struct rr_work;
struct xdr_buf rr_hdrbuf; struct xdr_buf rr_hdrbuf;
struct xdr_stream rr_stream; struct xdr_stream rr_stream;
struct rpc_rqst *rr_rqst;
struct list_head rr_list; struct list_head rr_list;
struct ib_recv_wr rr_recv_wr; struct ib_recv_wr rr_recv_wr;
}; };
/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
*/
struct rpcrdma_req;
struct rpcrdma_xprt;
struct rpcrdma_sendctx {
struct ib_send_wr sc_wr;
struct ib_cqe sc_cqe;
struct rpcrdma_xprt *sc_xprt;
struct rpcrdma_req *sc_req;
unsigned int sc_unmap_count;
struct ib_sge sc_sges[];
};
/* Limit the number of SGEs that can be unmapped during one
* Send completion. This caps the amount of work a single
* completion can do before returning to the provider.
*
* Setting this to zero disables Send completion batching.
*/
enum {
RPCRDMA_MAX_SEND_BATCH = 7,
};
/* /*
* struct rpcrdma_mw - external memory region metadata * struct rpcrdma_mw - external memory region metadata
* *
...@@ -340,26 +342,30 @@ enum { ...@@ -340,26 +342,30 @@ enum {
struct rpcrdma_buffer; struct rpcrdma_buffer;
struct rpcrdma_req { struct rpcrdma_req {
struct list_head rl_list; struct list_head rl_list;
unsigned int rl_mapped_sges;
unsigned int rl_connect_cookie; unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer; struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply; struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream; struct xdr_stream rl_stream;
struct xdr_buf rl_hdrbuf; struct xdr_buf rl_hdrbuf;
struct ib_send_wr rl_send_wr; struct rpcrdma_sendctx *rl_sendctx;
struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES];
struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */
struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */
struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
struct ib_cqe rl_cqe;
struct list_head rl_all; struct list_head rl_all;
bool rl_backchannel; unsigned long rl_flags;
struct list_head rl_registered; /* registered segments */ struct list_head rl_registered; /* registered segments */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
}; };
/* rl_flags */
enum {
RPCRDMA_REQ_F_BACKCHANNEL = 0,
RPCRDMA_REQ_F_PENDING,
RPCRDMA_REQ_F_TX_RESOURCES,
};
static inline void static inline void
rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
{ {
...@@ -399,12 +405,17 @@ struct rpcrdma_buffer { ...@@ -399,12 +405,17 @@ struct rpcrdma_buffer {
struct list_head rb_mws; struct list_head rb_mws;
struct list_head rb_all; struct list_head rb_all;
unsigned long rb_sc_head;
unsigned long rb_sc_tail;
unsigned long rb_sc_last;
struct rpcrdma_sendctx **rb_sc_ctxs;
spinlock_t rb_lock; /* protect buf lists */ spinlock_t rb_lock; /* protect buf lists */
int rb_send_count, rb_recv_count; int rb_send_count, rb_recv_count;
struct list_head rb_send_bufs; struct list_head rb_send_bufs;
struct list_head rb_recv_bufs; struct list_head rb_recv_bufs;
u32 rb_max_requests; u32 rb_max_requests;
atomic_t rb_credits; /* most recent credit grant */ u32 rb_credits; /* most recent credit grant */
u32 rb_bc_srv_max_requests; u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */ spinlock_t rb_reqslock; /* protect rb_allreqs */
...@@ -453,10 +464,12 @@ struct rpcrdma_stats { ...@@ -453,10 +464,12 @@ struct rpcrdma_stats {
unsigned long mrs_recovered; unsigned long mrs_recovered;
unsigned long mrs_orphaned; unsigned long mrs_orphaned;
unsigned long mrs_allocated; unsigned long mrs_allocated;
unsigned long empty_sendctx_q;
/* accessed when receiving a reply */ /* accessed when receiving a reply */
unsigned long long total_rdma_reply; unsigned long long total_rdma_reply;
unsigned long long fixup_copy_count; unsigned long long fixup_copy_count;
unsigned long reply_waits_for_send;
unsigned long local_inv_needed; unsigned long local_inv_needed;
unsigned long nomsg_call_count; unsigned long nomsg_call_count;
unsigned long bcall_count; unsigned long bcall_count;
...@@ -473,8 +486,6 @@ struct rpcrdma_memreg_ops { ...@@ -473,8 +486,6 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mw **); struct rpcrdma_mw **);
void (*ro_unmap_sync)(struct rpcrdma_xprt *, void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *); struct list_head *);
void (*ro_unmap_safe)(struct rpcrdma_xprt *,
struct rpcrdma_req *, bool);
void (*ro_recover_mr)(struct rpcrdma_mw *); void (*ro_recover_mr)(struct rpcrdma_mw *);
int (*ro_open)(struct rpcrdma_ia *, int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *, struct rpcrdma_ep *,
...@@ -532,6 +543,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); ...@@ -532,6 +543,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *); bool frwr_is_supported(struct rpcrdma_ia *);
bool fmr_is_supported(struct rpcrdma_ia *); bool fmr_is_supported(struct rpcrdma_ia *);
extern struct workqueue_struct *rpcrdma_receive_wq;
/* /*
* Endpoint calls - xprtrdma/verbs.c * Endpoint calls - xprtrdma/verbs.c
*/ */
...@@ -554,6 +567,8 @@ struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *); ...@@ -554,6 +567,8 @@ struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *); void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_buffer_create(struct rpcrdma_xprt *); int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
...@@ -610,12 +625,18 @@ enum rpcrdma_chunktype { ...@@ -610,12 +625,18 @@ enum rpcrdma_chunktype {
rpcrdma_replych rpcrdma_replych
}; };
bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *, int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
u32, struct xdr_buf *, enum rpcrdma_chunktype); struct rpcrdma_req *req, u32 hdrlen,
void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); struct xdr_buf *xdr,
enum rpcrdma_chunktype rtype);
void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
void rpcrdma_reply_handler(struct work_struct *work); void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req);
void rpcrdma_deferred_completion(struct work_struct *work);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
{ {
......
...@@ -552,6 +552,7 @@ static int xs_local_send_request(struct rpc_task *task) ...@@ -552,6 +552,7 @@ static int xs_local_send_request(struct rpc_task *task)
default: default:
dprintk("RPC: sendmsg returned unrecognized error %d\n", dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status); -status);
/* fall through */
case -EPIPE: case -EPIPE:
xs_close(xprt); xs_close(xprt);
status = -ENOTCONN; status = -ENOTCONN;
...@@ -1611,6 +1612,7 @@ static void xs_tcp_state_change(struct sock *sk) ...@@ -1611,6 +1612,7 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->connect_cookie++; xprt->connect_cookie++;
clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CONNECTED, &xprt->state);
xs_tcp_force_close(xprt); xs_tcp_force_close(xprt);
/* fall through */
case TCP_CLOSING: case TCP_CLOSING:
/* /*
* If the server closed down the connection, make sure that * If the server closed down the connection, make sure that
...@@ -2368,6 +2370,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) ...@@ -2368,6 +2370,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
switch (ret) { switch (ret) {
case 0: case 0:
xs_set_srcport(transport, sock); xs_set_srcport(transport, sock);
/* fall through */
case -EINPROGRESS: case -EINPROGRESS:
/* SYN_SENT! */ /* SYN_SENT! */
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
...@@ -2419,6 +2422,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) ...@@ -2419,6 +2422,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
default: default:
printk("%s: connect returned unhandled error %d\n", printk("%s: connect returned unhandled error %d\n",
__func__, status); __func__, status);
/* fall through */
case -EADDRNOTAVAIL: case -EADDRNOTAVAIL:
/* We're probably in TIME_WAIT. Get rid of existing socket, /* We're probably in TIME_WAIT. Get rid of existing socket,
* and retry * and retry
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册