提交 efd52b5d 编写于 作者: L Linus Torvalds

Merge tag 'nfs-for-4.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable bugfixes:

   - Fix breakages in the nfsstat utility due to the inclusion of the
     NFSv4 LOOKUPP operation

   - Fix a NULL pointer dereference in nfs_idmap_prepare_pipe_upcall()
     due to nfs_idmap_legacy_upcall() being called without an 'aux'
     parameter

   - Fix a refcount leak in the standard O_DIRECT error path

   - Fix a refcount leak in the pNFS O_DIRECT fallback to MDS path

   - Fix CPU latency issues with nfs_commit_release_pages()

   - Fix the LAYOUTUNAVAILABLE error case in the file layout type

   - NFS: Fix a race between mmap() and O_DIRECT

  Features:

   - Support the statx() mask and query flags to enable optimisations
     when the user is requesting only attributes that are already up to
     date in the inode cache, or is specifying the AT_STATX_DONT_SYNC
     flag

   - Add a module alias for the SCSI pNFS layout type

  Bugfixes:

   - Automounting when resolving a NFSv4 referral should preserve the
     RDMA transport protocol settings

   - Various other RDMA bugfixes from Chuck

   - pNFS block layout fixes

   - Always set NFS_LOCK_LOST when a lock is lost"

* tag 'nfs-for-4.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (69 commits)
  NFS: Fix a race between mmap() and O_DIRECT
  NFS: Remove a redundant call to unmap_mapping_range()
  pnfs/blocklayout: Ensure disk address in block device map
  pnfs/blocklayout: pnfs_block_dev_map uses bytes, not sectors
  lockd: Fix server refcounting
  SUNRPC: Fix null rpc_clnt dereference in rpc_task_queued tracepoint
  SUNRPC: Micro-optimize __rpc_execute
  SUNRPC: task_run_action should display tk_callback
  sunrpc: Format RPC events consistently for display
  SUNRPC: Trace xprt_timer events
  xprtrdma: Correct some documenting comments
  xprtrdma: Fix "bytes registered" accounting
  xprtrdma: Instrument allocation/release of rpcrdma_req/rep objects
  xprtrdma: Add trace points to instrument QP and CQ access upcalls
  xprtrdma: Add trace points in the client-side backchannel code paths
  xprtrdma: Add trace points for connect events
  xprtrdma: Add trace points to instrument MR allocation and recovery
  xprtrdma: Add trace points to instrument memory invalidation
  xprtrdma: Add trace points in reply decoder path
  xprtrdma: Add trace points to instrument memory registration
  ..
...@@ -48,13 +48,13 @@ void nlmclnt_next_cookie(struct nlm_cookie *c) ...@@ -48,13 +48,13 @@ void nlmclnt_next_cookie(struct nlm_cookie *c)
static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner)
{ {
atomic_inc(&lockowner->count); refcount_inc(&lockowner->count);
return lockowner; return lockowner;
} }
static void nlm_put_lockowner(struct nlm_lockowner *lockowner) static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
{ {
if (!atomic_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
return; return;
list_del(&lockowner->list); list_del(&lockowner->list);
spin_unlock(&lockowner->host->h_lock); spin_unlock(&lockowner->host->h_lock);
...@@ -105,7 +105,7 @@ static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_ ...@@ -105,7 +105,7 @@ static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_
res = __nlm_find_lockowner(host, owner); res = __nlm_find_lockowner(host, owner);
if (res == NULL && new != NULL) { if (res == NULL && new != NULL) {
res = new; res = new;
atomic_set(&new->count, 1); refcount_set(&new->count, 1);
new->owner = owner; new->owner = owner;
new->pid = __nlm_alloc_pid(host); new->pid = __nlm_alloc_pid(host);
new->host = nlm_get_host(host); new->host = nlm_get_host(host);
...@@ -204,7 +204,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) ...@@ -204,7 +204,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
for(;;) { for(;;) {
call = kzalloc(sizeof(*call), GFP_KERNEL); call = kzalloc(sizeof(*call), GFP_KERNEL);
if (call != NULL) { if (call != NULL) {
atomic_set(&call->a_count, 1); refcount_set(&call->a_count, 1);
locks_init_lock(&call->a_args.lock.fl); locks_init_lock(&call->a_args.lock.fl);
locks_init_lock(&call->a_res.lock.fl); locks_init_lock(&call->a_res.lock.fl);
call->a_host = nlm_get_host(host); call->a_host = nlm_get_host(host);
...@@ -222,7 +222,7 @@ void nlmclnt_release_call(struct nlm_rqst *call) ...@@ -222,7 +222,7 @@ void nlmclnt_release_call(struct nlm_rqst *call)
{ {
const struct nlmclnt_operations *nlmclnt_ops = call->a_host->h_nlmclnt_ops; const struct nlmclnt_operations *nlmclnt_ops = call->a_host->h_nlmclnt_ops;
if (!atomic_dec_and_test(&call->a_count)) if (!refcount_dec_and_test(&call->a_count))
return; return;
if (nlmclnt_ops && nlmclnt_ops->nlmclnt_release_call) if (nlmclnt_ops && nlmclnt_ops->nlmclnt_release_call)
nlmclnt_ops->nlmclnt_release_call(call->a_callback_data); nlmclnt_ops->nlmclnt_release_call(call->a_callback_data);
...@@ -678,7 +678,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) ...@@ -678,7 +678,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
goto out; goto out;
} }
atomic_inc(&req->a_count); refcount_inc(&req->a_count);
status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req, status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
NLMPROC_UNLOCK, &nlmclnt_unlock_ops); NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
if (status < 0) if (status < 0)
...@@ -769,7 +769,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl ...@@ -769,7 +769,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
nlmclnt_setlockargs(req, fl); nlmclnt_setlockargs(req, fl);
req->a_args.block = block; req->a_args.block = block;
atomic_inc(&req->a_count); refcount_inc(&req->a_count);
status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req, status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
NLMPROC_CANCEL, &nlmclnt_cancel_ops); NLMPROC_CANCEL, &nlmclnt_cancel_ops);
if (status == 0 && req->a_res.status == nlm_lck_denied) if (status == 0 && req->a_res.status == nlm_lck_denied)
......
...@@ -114,7 +114,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, ...@@ -114,7 +114,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
unsigned long now = jiffies; unsigned long now = jiffies;
if (nsm != NULL) if (nsm != NULL)
atomic_inc(&nsm->sm_count); refcount_inc(&nsm->sm_count);
else { else {
host = NULL; host = NULL;
nsm = nsm_get_handle(ni->net, ni->sap, ni->salen, nsm = nsm_get_handle(ni->net, ni->sap, ni->salen,
...@@ -151,7 +151,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, ...@@ -151,7 +151,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
host->h_state = 0; host->h_state = 0;
host->h_nsmstate = 0; host->h_nsmstate = 0;
host->h_pidcount = 0; host->h_pidcount = 0;
atomic_set(&host->h_count, 1); refcount_set(&host->h_count, 1);
mutex_init(&host->h_mutex); mutex_init(&host->h_mutex);
host->h_nextrebind = now + NLM_HOST_REBIND; host->h_nextrebind = now + NLM_HOST_REBIND;
host->h_expires = now + NLM_HOST_EXPIRE; host->h_expires = now + NLM_HOST_EXPIRE;
...@@ -290,7 +290,7 @@ void nlmclnt_release_host(struct nlm_host *host) ...@@ -290,7 +290,7 @@ void nlmclnt_release_host(struct nlm_host *host)
WARN_ON_ONCE(host->h_server); WARN_ON_ONCE(host->h_server);
if (atomic_dec_and_test(&host->h_count)) { if (refcount_dec_and_test(&host->h_count)) {
WARN_ON_ONCE(!list_empty(&host->h_lockowners)); WARN_ON_ONCE(!list_empty(&host->h_lockowners));
WARN_ON_ONCE(!list_empty(&host->h_granted)); WARN_ON_ONCE(!list_empty(&host->h_granted));
WARN_ON_ONCE(!list_empty(&host->h_reclaim)); WARN_ON_ONCE(!list_empty(&host->h_reclaim));
...@@ -388,6 +388,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, ...@@ -388,6 +388,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
ln->nrhosts++; ln->nrhosts++;
nrhosts++; nrhosts++;
refcount_inc(&host->h_count);
dprintk("lockd: %s created host %s (%s)\n", dprintk("lockd: %s created host %s (%s)\n",
__func__, host->h_name, host->h_addrbuf); __func__, host->h_name, host->h_addrbuf);
...@@ -410,7 +412,7 @@ void nlmsvc_release_host(struct nlm_host *host) ...@@ -410,7 +412,7 @@ void nlmsvc_release_host(struct nlm_host *host)
dprintk("lockd: release server host %s\n", host->h_name); dprintk("lockd: release server host %s\n", host->h_name);
WARN_ON_ONCE(!host->h_server); WARN_ON_ONCE(!host->h_server);
atomic_dec(&host->h_count); refcount_dec(&host->h_count);
} }
/* /*
...@@ -504,7 +506,7 @@ struct nlm_host * nlm_get_host(struct nlm_host *host) ...@@ -504,7 +506,7 @@ struct nlm_host * nlm_get_host(struct nlm_host *host)
{ {
if (host) { if (host) {
dprintk("lockd: get host %s\n", host->h_name); dprintk("lockd: get host %s\n", host->h_name);
atomic_inc(&host->h_count); refcount_inc(&host->h_count);
host->h_expires = jiffies + NLM_HOST_EXPIRE; host->h_expires = jiffies + NLM_HOST_EXPIRE;
} }
return host; return host;
...@@ -593,7 +595,7 @@ static void nlm_complain_hosts(struct net *net) ...@@ -593,7 +595,7 @@ static void nlm_complain_hosts(struct net *net)
if (net && host->net != net) if (net && host->net != net)
continue; continue;
dprintk(" %s (cnt %d use %d exp %ld net %x)\n", dprintk(" %s (cnt %d use %d exp %ld net %x)\n",
host->h_name, atomic_read(&host->h_count), host->h_name, refcount_read(&host->h_count),
host->h_inuse, host->h_expires, host->net->ns.inum); host->h_inuse, host->h_expires, host->net->ns.inum);
} }
} }
...@@ -662,16 +664,16 @@ nlm_gc_hosts(struct net *net) ...@@ -662,16 +664,16 @@ nlm_gc_hosts(struct net *net)
for_each_host_safe(host, next, chain, nlm_server_hosts) { for_each_host_safe(host, next, chain, nlm_server_hosts) {
if (net && host->net != net) if (net && host->net != net)
continue; continue;
if (atomic_read(&host->h_count) || host->h_inuse if (host->h_inuse || time_before(jiffies, host->h_expires)) {
|| time_before(jiffies, host->h_expires)) {
dprintk("nlm_gc_hosts skipping %s " dprintk("nlm_gc_hosts skipping %s "
"(cnt %d use %d exp %ld net %x)\n", "(cnt %d use %d exp %ld net %x)\n",
host->h_name, atomic_read(&host->h_count), host->h_name, refcount_read(&host->h_count),
host->h_inuse, host->h_expires, host->h_inuse, host->h_expires,
host->net->ns.inum); host->net->ns.inum);
continue; continue;
} }
nlm_destroy_host_locked(host); if (refcount_dec_if_one(&host->h_count))
nlm_destroy_host_locked(host);
} }
if (net) { if (net) {
......
...@@ -191,7 +191,7 @@ void nsm_unmonitor(const struct nlm_host *host) ...@@ -191,7 +191,7 @@ void nsm_unmonitor(const struct nlm_host *host)
struct nsm_res res; struct nsm_res res;
int status; int status;
if (atomic_read(&nsm->sm_count) == 1 if (refcount_read(&nsm->sm_count) == 1
&& nsm->sm_monitored && !nsm->sm_sticky) { && nsm->sm_monitored && !nsm->sm_sticky) {
dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
...@@ -279,7 +279,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, ...@@ -279,7 +279,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
if (unlikely(new == NULL)) if (unlikely(new == NULL))
return NULL; return NULL;
atomic_set(&new->sm_count, 1); refcount_set(&new->sm_count, 1);
new->sm_name = (char *)(new + 1); new->sm_name = (char *)(new + 1);
memcpy(nsm_addr(new), sap, salen); memcpy(nsm_addr(new), sap, salen);
new->sm_addrlen = salen; new->sm_addrlen = salen;
...@@ -337,13 +337,13 @@ struct nsm_handle *nsm_get_handle(const struct net *net, ...@@ -337,13 +337,13 @@ struct nsm_handle *nsm_get_handle(const struct net *net,
cached = nsm_lookup_addr(&ln->nsm_handles, sap); cached = nsm_lookup_addr(&ln->nsm_handles, sap);
if (cached != NULL) { if (cached != NULL) {
atomic_inc(&cached->sm_count); refcount_inc(&cached->sm_count);
spin_unlock(&nsm_lock); spin_unlock(&nsm_lock);
kfree(new); kfree(new);
dprintk("lockd: found nsm_handle for %s (%s), " dprintk("lockd: found nsm_handle for %s (%s), "
"cnt %d\n", cached->sm_name, "cnt %d\n", cached->sm_name,
cached->sm_addrbuf, cached->sm_addrbuf,
atomic_read(&cached->sm_count)); refcount_read(&cached->sm_count));
return cached; return cached;
} }
...@@ -388,12 +388,12 @@ struct nsm_handle *nsm_reboot_lookup(const struct net *net, ...@@ -388,12 +388,12 @@ struct nsm_handle *nsm_reboot_lookup(const struct net *net,
return cached; return cached;
} }
atomic_inc(&cached->sm_count); refcount_inc(&cached->sm_count);
spin_unlock(&nsm_lock); spin_unlock(&nsm_lock);
dprintk("lockd: host %s (%s) rebooted, cnt %d\n", dprintk("lockd: host %s (%s) rebooted, cnt %d\n",
cached->sm_name, cached->sm_addrbuf, cached->sm_name, cached->sm_addrbuf,
atomic_read(&cached->sm_count)); refcount_read(&cached->sm_count));
return cached; return cached;
} }
...@@ -404,7 +404,7 @@ struct nsm_handle *nsm_reboot_lookup(const struct net *net, ...@@ -404,7 +404,7 @@ struct nsm_handle *nsm_reboot_lookup(const struct net *net,
*/ */
void nsm_release(struct nsm_handle *nsm) void nsm_release(struct nsm_handle *nsm)
{ {
if (atomic_dec_and_lock(&nsm->sm_count, &nsm_lock)) { if (refcount_dec_and_lock(&nsm->sm_count, &nsm_lock)) {
list_del(&nsm->sm_link); list_del(&nsm->sm_link);
spin_unlock(&nsm_lock); spin_unlock(&nsm_lock);
dprintk("lockd: destroyed nsm_handle for %s (%s)\n", dprintk("lockd: destroyed nsm_handle for %s (%s)\n",
......
...@@ -295,7 +295,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) ...@@ -295,7 +295,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
void nlmsvc_release_call(struct nlm_rqst *call) void nlmsvc_release_call(struct nlm_rqst *call)
{ {
if (!atomic_dec_and_test(&call->a_count)) if (!refcount_dec_and_test(&call->a_count))
return; return;
nlmsvc_release_host(call->a_host); nlmsvc_release_host(call->a_host);
kfree(call); kfree(call);
......
...@@ -137,6 +137,11 @@ bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, ...@@ -137,6 +137,11 @@ bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
return bio; return bio;
} }
static bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map)
{
return offset >= map->start && offset < map->start + map->len;
}
static struct bio * static struct bio *
do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
struct page *page, struct pnfs_block_dev_map *map, struct page *page, struct pnfs_block_dev_map *map,
...@@ -156,8 +161,8 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, ...@@ -156,8 +161,8 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
/* translate to physical disk offset */ /* translate to physical disk offset */
disk_addr = (u64)isect << SECTOR_SHIFT; disk_addr = (u64)isect << SECTOR_SHIFT;
if (disk_addr < map->start || disk_addr >= map->start + map->len) { if (!offset_in_map(disk_addr, map)) {
if (!dev->map(dev, disk_addr, map)) if (!dev->map(dev, disk_addr, map) || !offset_in_map(disk_addr, map))
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
bio = bl_submit_bio(bio); bio = bl_submit_bio(bio);
} }
...@@ -184,6 +189,29 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, ...@@ -184,6 +189,29 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
return bio; return bio;
} }
static void bl_mark_devices_unavailable(struct nfs_pgio_header *header, bool rw)
{
struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
size_t bytes_left = header->args.count;
sector_t isect, extent_length = 0;
struct pnfs_block_extent be;
isect = header->args.offset >> SECTOR_SHIFT;
bytes_left += header->args.offset - (isect << SECTOR_SHIFT);
while (bytes_left > 0) {
if (!ext_tree_lookup(bl, isect, &be, rw))
return;
extent_length = be.be_length - (isect - be.be_f_offset);
nfs4_mark_deviceid_unavailable(be.be_device);
isect += extent_length;
if (bytes_left > extent_length << SECTOR_SHIFT)
bytes_left -= extent_length << SECTOR_SHIFT;
else
bytes_left = 0;
}
}
static void bl_end_io_read(struct bio *bio) static void bl_end_io_read(struct bio *bio)
{ {
struct parallel_io *par = bio->bi_private; struct parallel_io *par = bio->bi_private;
...@@ -194,6 +222,7 @@ static void bl_end_io_read(struct bio *bio) ...@@ -194,6 +222,7 @@ static void bl_end_io_read(struct bio *bio)
if (!header->pnfs_error) if (!header->pnfs_error)
header->pnfs_error = -EIO; header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg); pnfs_set_lo_fail(header->lseg);
bl_mark_devices_unavailable(header, false);
} }
bio_put(bio); bio_put(bio);
...@@ -323,6 +352,7 @@ static void bl_end_io_write(struct bio *bio) ...@@ -323,6 +352,7 @@ static void bl_end_io_write(struct bio *bio)
if (!header->pnfs_error) if (!header->pnfs_error)
header->pnfs_error = -EIO; header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg); pnfs_set_lo_fail(header->lseg);
bl_mark_devices_unavailable(header, true);
} }
bio_put(bio); bio_put(bio);
put_parallel(par); put_parallel(par);
...@@ -552,6 +582,31 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) ...@@ -552,6 +582,31 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
return 0; return 0;
} }
static struct nfs4_deviceid_node *
bl_find_get_deviceid(struct nfs_server *server,
const struct nfs4_deviceid *id, struct rpc_cred *cred,
gfp_t gfp_mask)
{
struct nfs4_deviceid_node *node;
unsigned long start, end;
retry:
node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
if (!node)
return ERR_PTR(-ENODEV);
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
return node;
end = jiffies;
start = end - PNFS_DEVICE_RETRY_TIMEOUT;
if (!time_in_range(node->timestamp_unavailable, start, end)) {
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
goto retry;
}
return ERR_PTR(-ENODEV);
}
static int static int
bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo, bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo,
struct layout_verification *lv, struct list_head *extents, struct layout_verification *lv, struct list_head *extents,
...@@ -573,16 +628,18 @@ bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo, ...@@ -573,16 +628,18 @@ bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo,
memcpy(&id, p, NFS4_DEVICEID4_SIZE); memcpy(&id, p, NFS4_DEVICEID4_SIZE);
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
error = -EIO; be->be_device = bl_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
be->be_device = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
lo->plh_lc_cred, gfp_mask); lo->plh_lc_cred, gfp_mask);
if (!be->be_device) if (IS_ERR(be->be_device)) {
error = PTR_ERR(be->be_device);
goto out_free_be; goto out_free_be;
}
/* /*
* The next three values are read in as bytes, but stored in the * The next three values are read in as bytes, but stored in the
* extent structure in 512-byte granularity. * extent structure in 512-byte granularity.
*/ */
error = -EIO;
if (decode_sector_number(&p, &be->be_f_offset) < 0) if (decode_sector_number(&p, &be->be_f_offset) < 0)
goto out_put_deviceid; goto out_put_deviceid;
if (decode_sector_number(&p, &be->be_length) < 0) if (decode_sector_number(&p, &be->be_length) < 0)
...@@ -692,11 +749,16 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, ...@@ -692,11 +749,16 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
__free_page(scratch); __free_page(scratch);
out: out:
dprintk("%s returns %d\n", __func__, status); dprintk("%s returns %d\n", __func__, status);
if (status) { switch (status) {
case -ENODEV:
/* Our extent block devices are unavailable */
set_bit(NFS_LSEG_UNAVAILABLE, &lseg->pls_flags);
case 0:
return lseg;
default:
kfree(lseg); kfree(lseg);
return ERR_PTR(status); return ERR_PTR(status);
} }
return lseg;
} }
static void static void
...@@ -798,6 +860,13 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) ...@@ -798,6 +860,13 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
} }
pnfs_generic_pg_init_read(pgio, req); pnfs_generic_pg_init_read(pgio, req);
if (pgio->pg_lseg &&
test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) {
pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg);
pnfs_set_lo_fail(pgio->pg_lseg);
nfs_pageio_reset_read_mds(pgio);
}
} }
/* /*
...@@ -853,6 +922,14 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) ...@@ -853,6 +922,14 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
pnfs_generic_pg_init_write(pgio, req, wb_size); pnfs_generic_pg_init_write(pgio, req, wb_size);
if (pgio->pg_lseg &&
test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) {
pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg);
pnfs_set_lo_fail(pgio->pg_lseg);
nfs_pageio_reset_write_mds(pgio);
}
} }
/* /*
...@@ -887,6 +964,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = { ...@@ -887,6 +964,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
.name = "LAYOUT_BLOCK_VOLUME", .name = "LAYOUT_BLOCK_VOLUME",
.owner = THIS_MODULE, .owner = THIS_MODULE,
.flags = PNFS_LAYOUTRET_ON_SETATTR | .flags = PNFS_LAYOUTRET_ON_SETATTR |
PNFS_LAYOUTRET_ON_ERROR |
PNFS_READ_WHOLE_PAGE, PNFS_READ_WHOLE_PAGE,
.read_pagelist = bl_read_pagelist, .read_pagelist = bl_read_pagelist,
.write_pagelist = bl_write_pagelist, .write_pagelist = bl_write_pagelist,
...@@ -910,6 +988,7 @@ static struct pnfs_layoutdriver_type scsilayout_type = { ...@@ -910,6 +988,7 @@ static struct pnfs_layoutdriver_type scsilayout_type = {
.name = "LAYOUT_SCSI", .name = "LAYOUT_SCSI",
.owner = THIS_MODULE, .owner = THIS_MODULE,
.flags = PNFS_LAYOUTRET_ON_SETATTR | .flags = PNFS_LAYOUTRET_ON_SETATTR |
PNFS_LAYOUTRET_ON_ERROR |
PNFS_READ_WHOLE_PAGE, PNFS_READ_WHOLE_PAGE,
.read_pagelist = bl_read_pagelist, .read_pagelist = bl_read_pagelist,
.write_pagelist = bl_write_pagelist, .write_pagelist = bl_write_pagelist,
...@@ -967,6 +1046,7 @@ static void __exit nfs4blocklayout_exit(void) ...@@ -967,6 +1046,7 @@ static void __exit nfs4blocklayout_exit(void)
} }
MODULE_ALIAS("nfs-layouttype4-3"); MODULE_ALIAS("nfs-layouttype4-3");
MODULE_ALIAS("nfs-layouttype4-5");
module_init(nfs4blocklayout_init); module_init(nfs4blocklayout_init);
module_exit(nfs4blocklayout_exit); module_exit(nfs4blocklayout_exit);
...@@ -92,10 +92,9 @@ struct pnfs_block_volume { ...@@ -92,10 +92,9 @@ struct pnfs_block_volume {
}; };
struct pnfs_block_dev_map { struct pnfs_block_dev_map {
sector_t start; u64 start;
sector_t len; u64 len;
u64 disk_offset;
sector_t disk_offset;
struct block_device *bdev; struct block_device *bdev;
}; };
......
...@@ -533,14 +533,11 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, ...@@ -533,14 +533,11 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_free_volumes; goto out_free_volumes;
ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
if (ret) {
bl_free_device(top);
kfree(top);
goto out_free_volumes;
}
node = &top->node; node = &top->node;
nfs4_init_deviceid_node(node, server, &pdev->dev_id); nfs4_init_deviceid_node(node, server, &pdev->dev_id);
if (ret)
nfs4_mark_deviceid_unavailable(node);
out_free_volumes: out_free_volumes:
kfree(volumes); kfree(volumes);
......
...@@ -775,10 +775,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) ...@@ -775,10 +775,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
spin_lock(&dreq->lock); spin_lock(&dreq->lock);
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
dreq->flags = 0;
dreq->error = hdr->error; dreq->error = hdr->error;
}
if (dreq->error == 0) { if (dreq->error == 0) {
nfs_direct_good_bytes(dreq, hdr); nfs_direct_good_bytes(dreq, hdr);
if (nfs_write_need_commit(hdr)) { if (nfs_write_need_commit(hdr)) {
......
...@@ -48,10 +48,6 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent) ...@@ -48,10 +48,6 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
*max_len = len; *max_len = len;
return FILEID_INVALID; return FILEID_INVALID;
} }
if (IS_AUTOMOUNT(inode)) {
*max_len = FILEID_INVALID;
goto out;
}
p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32; p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32;
p[FILEID_LOW_OFF] = NFS_FILEID(inode); p[FILEID_LOW_OFF] = NFS_FILEID(inode);
...@@ -59,7 +55,6 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent) ...@@ -59,7 +55,6 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
p[len - 1] = 0; /* Padding */ p[len - 1] = 0; /* Padding */
nfs_copy_fh(clnt_fh, server_fh); nfs_copy_fh(clnt_fh, server_fh);
*max_len = len; *max_len = len;
out:
dprintk("%s: result fh fileid %llu mode %u size %d\n", dprintk("%s: result fh fileid %llu mode %u size %d\n",
__func__, NFS_FILEID(inode), inode->i_mode, *max_len); __func__, NFS_FILEID(inode), inode->i_mode, *max_len);
return *max_len; return *max_len;
......
...@@ -895,9 +895,7 @@ fl_pnfs_update_layout(struct inode *ino, ...@@ -895,9 +895,7 @@ fl_pnfs_update_layout(struct inode *ino,
lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode, lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
gfp_flags); gfp_flags);
if (!lseg) if (IS_ERR_OR_NULL(lseg))
lseg = ERR_PTR(-ENOMEM);
if (IS_ERR(lseg))
goto out; goto out;
lo = NFS_I(ino)->layout; lo = NFS_I(ino)->layout;
......
...@@ -735,12 +735,20 @@ int nfs_getattr(const struct path *path, struct kstat *stat, ...@@ -735,12 +735,20 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags) u32 request_mask, unsigned int query_flags)
{ {
struct inode *inode = d_inode(path->dentry); struct inode *inode = d_inode(path->dentry);
int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; struct nfs_server *server = NFS_SERVER(inode);
unsigned long cache_validity;
int err = 0; int err = 0;
bool force_sync = query_flags & AT_STATX_FORCE_SYNC;
bool do_update = false;
trace_nfs_getattr_enter(inode); trace_nfs_getattr_enter(inode);
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync)
goto out_no_update;
/* Flush out writes to the server in order to update c/mtime. */ /* Flush out writes to the server in order to update c/mtime. */
if (S_ISREG(inode->i_mode)) { if ((request_mask & (STATX_CTIME|STATX_MTIME)) &&
S_ISREG(inode->i_mode)) {
err = filemap_write_and_wait(inode->i_mapping); err = filemap_write_and_wait(inode->i_mapping);
if (err) if (err)
goto out; goto out;
...@@ -757,24 +765,42 @@ int nfs_getattr(const struct path *path, struct kstat *stat, ...@@ -757,24 +765,42 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
*/ */
if ((path->mnt->mnt_flags & MNT_NOATIME) || if ((path->mnt->mnt_flags & MNT_NOATIME) ||
((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) ((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
need_atime = 0; request_mask &= ~STATX_ATIME;
if (need_atime || nfs_need_revalidate_inode(inode)) { /* Is the user requesting attributes that might need revalidation? */
struct nfs_server *server = NFS_SERVER(inode); if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME|
STATX_MTIME|STATX_UID|STATX_GID|
STATX_SIZE|STATX_BLOCKS)))
goto out_no_revalidate;
/* Check whether the cached attributes are stale */
do_update |= force_sync || nfs_attribute_cache_expired(inode);
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
do_update |= cache_validity &
(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL);
if (request_mask & STATX_ATIME)
do_update |= cache_validity & NFS_INO_INVALID_ATIME;
if (request_mask & (STATX_CTIME|STATX_MTIME))
do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE;
if (do_update) {
/* Update the attribute cache */
if (!(server->flags & NFS_MOUNT_NOAC)) if (!(server->flags & NFS_MOUNT_NOAC))
nfs_readdirplus_parent_cache_miss(path->dentry); nfs_readdirplus_parent_cache_miss(path->dentry);
else else
nfs_readdirplus_parent_cache_hit(path->dentry); nfs_readdirplus_parent_cache_hit(path->dentry);
err = __nfs_revalidate_inode(server, inode); err = __nfs_revalidate_inode(server, inode);
if (err)
goto out;
} else } else
nfs_readdirplus_parent_cache_hit(path->dentry); nfs_readdirplus_parent_cache_hit(path->dentry);
if (!err) { out_no_revalidate:
generic_fillattr(inode, stat); /* Only return attributes that were revalidated. */
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); stat->result_mask &= request_mask;
if (S_ISDIR(inode->i_mode)) out_no_update:
stat->blksize = NFS_SERVER(inode)->dtsize; generic_fillattr(inode, stat);
} stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
if (S_ISDIR(inode->i_mode))
stat->blksize = NFS_SERVER(inode)->dtsize;
out: out:
trace_nfs_getattr_exit(inode, err); trace_nfs_getattr_exit(inode, err);
return err; return err;
...@@ -1144,7 +1170,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map ...@@ -1144,7 +1170,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
if (mapping->nrpages != 0) { if (mapping->nrpages != 0) {
if (S_ISREG(inode->i_mode)) { if (S_ISREG(inode->i_mode)) {
unmap_mapping_range(mapping, 0, 0, 0);
ret = nfs_sync_mapping(mapping); ret = nfs_sync_mapping(mapping);
if (ret < 0) if (ret < 0)
return ret; return ret;
......
...@@ -99,7 +99,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) ...@@ -99,7 +99,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
{ {
if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) {
set_bit(NFS_INO_ODIRECT, &nfsi->flags); set_bit(NFS_INO_ODIRECT, &nfsi->flags);
nfs_wb_all(inode); nfs_sync_mapping(inode->i_mapping);
} }
} }
......
...@@ -861,6 +861,7 @@ static int nfs4_set_client(struct nfs_server *server, ...@@ -861,6 +861,7 @@ static int nfs4_set_client(struct nfs_server *server,
set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status)) if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
server->port = rpc_get_port(addr);
/* Allocate or find a client reference we can use */ /* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init); clp = nfs_get_client(&cl_init);
...@@ -1123,19 +1124,36 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, ...@@ -1123,19 +1124,36 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
/* Initialise the client representation from the parent server */ /* Initialise the client representation from the parent server */
nfs_server_copy_userdata(server, parent_server); nfs_server_copy_userdata(server, parent_server);
/* Get a client representation. /* Get a client representation */
* Note: NFSv4 always uses TCP, */ #ifdef CONFIG_SUNRPC_XPRT_RDMA
rpc_set_port(data->addr, NFS_RDMA_PORT);
error = nfs4_set_client(server, data->hostname, error = nfs4_set_client(server, data->hostname,
data->addr, data->addr,
data->addrlen, data->addrlen,
parent_client->cl_ipaddr, parent_client->cl_ipaddr,
rpc_protocol(parent_server->client), XPRT_TRANSPORT_RDMA,
parent_server->client->cl_timeout,
parent_client->cl_mvops->minor_version,
parent_client->cl_net);
if (!error)
goto init_server;
#endif /* CONFIG_SUNRPC_XPRT_RDMA */
rpc_set_port(data->addr, NFS_PORT);
error = nfs4_set_client(server, data->hostname,
data->addr,
data->addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_TCP,
parent_server->client->cl_timeout, parent_server->client->cl_timeout,
parent_client->cl_mvops->minor_version, parent_client->cl_mvops->minor_version,
parent_client->cl_net); parent_client->cl_net);
if (error < 0) if (error < 0)
goto error; goto error;
#ifdef CONFIG_SUNRPC_XPRT_RDMA
init_server:
#endif
error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
if (error < 0) if (error < 0)
goto error; goto error;
......
...@@ -568,9 +568,13 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, ...@@ -568,9 +568,13 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
struct idmap_msg *im; struct idmap_msg *im;
struct idmap *idmap = (struct idmap *)aux; struct idmap *idmap = (struct idmap *)aux;
struct key *key = cons->key; struct key *key = cons->key;
int ret = -ENOMEM; int ret = -ENOKEY;
if (!aux)
goto out1;
/* msg and im are freed in idmap_pipe_destroy_msg */ /* msg and im are freed in idmap_pipe_destroy_msg */
ret = -ENOMEM;
data = kzalloc(sizeof(*data), GFP_KERNEL); data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data) if (!data)
goto out1; goto out1;
......
...@@ -270,8 +270,6 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, ...@@ -270,8 +270,6 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
if (mountdata->addrlen == 0) if (mountdata->addrlen == 0)
continue; continue;
rpc_set_port(mountdata->addr, NFS_PORT);
memcpy(page2, buf->data, buf->len); memcpy(page2, buf->data, buf->len);
page2[buf->len] = '\0'; page2[buf->len] = '\0';
mountdata->hostname = page2; mountdata->hostname = page2;
......
...@@ -2020,7 +2020,7 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta ...@@ -2020,7 +2020,7 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
return ret; return ret;
} }
static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err) static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, struct file_lock *fl, int err)
{ {
switch (err) { switch (err) {
default: default:
...@@ -2067,7 +2067,11 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct ...@@ -2067,7 +2067,11 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
return -EAGAIN; return -EAGAIN;
case -ENOMEM: case -ENOMEM:
case -NFS4ERR_DENIED: case -NFS4ERR_DENIED:
/* kill_proc(fl->fl_pid, SIGLOST, 1); */ if (fl) {
struct nfs4_lock_state *lsp = fl->fl_u.nfs4_fl.owner;
if (lsp)
set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
}
return 0; return 0;
} }
return err; return err;
...@@ -2103,7 +2107,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, ...@@ -2103,7 +2107,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
err = nfs4_open_recover_helper(opendata, FMODE_READ); err = nfs4_open_recover_helper(opendata, FMODE_READ);
} }
nfs4_opendata_put(opendata); nfs4_opendata_put(opendata);
return nfs4_handle_delegation_recall_error(server, state, stateid, err); return nfs4_handle_delegation_recall_error(server, state, stateid, NULL, err);
} }
static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
...@@ -3150,6 +3154,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data) ...@@ -3150,6 +3154,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
struct nfs4_state *state = calldata->state; struct nfs4_state *state = calldata->state;
struct nfs_server *server = NFS_SERVER(calldata->inode); struct nfs_server *server = NFS_SERVER(calldata->inode);
nfs4_stateid *res_stateid = NULL; nfs4_stateid *res_stateid = NULL;
struct nfs4_exception exception = {
.state = state,
.inode = calldata->inode,
.stateid = &calldata->arg.stateid,
};
dprintk("%s: begin!\n", __func__); dprintk("%s: begin!\n", __func__);
if (!nfs4_sequence_done(task, &calldata->res.seq_res)) if (!nfs4_sequence_done(task, &calldata->res.seq_res))
...@@ -3215,7 +3224,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data) ...@@ -3215,7 +3224,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
case -NFS4ERR_BAD_STATEID: case -NFS4ERR_BAD_STATEID:
break; break;
default: default:
if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) task->tk_status = nfs4_async_handle_exception(task,
server, task->tk_status, &exception);
if (exception.retry)
goto out_restart; goto out_restart;
} }
nfs_clear_open_stateid(state, &calldata->arg.stateid, nfs_clear_open_stateid(state, &calldata->arg.stateid,
...@@ -5759,6 +5770,10 @@ struct nfs4_delegreturndata { ...@@ -5759,6 +5770,10 @@ struct nfs4_delegreturndata {
static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
{ {
struct nfs4_delegreturndata *data = calldata; struct nfs4_delegreturndata *data = calldata;
struct nfs4_exception exception = {
.inode = data->inode,
.stateid = &data->stateid,
};
if (!nfs4_sequence_done(task, &data->res.seq_res)) if (!nfs4_sequence_done(task, &data->res.seq_res))
return; return;
...@@ -5820,10 +5835,11 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) ...@@ -5820,10 +5835,11 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
} }
/* Fallthrough */ /* Fallthrough */
default: default:
if (nfs4_async_handle_error(task, data->res.server, task->tk_status = nfs4_async_handle_exception(task,
NULL, NULL) == -EAGAIN) { data->res.server, task->tk_status,
&exception);
if (exception.retry)
goto out_restart; goto out_restart;
}
} }
data->rpc_status = task->tk_status; data->rpc_status = task->tk_status;
return; return;
...@@ -6061,6 +6077,10 @@ static void nfs4_locku_release_calldata(void *data) ...@@ -6061,6 +6077,10 @@ static void nfs4_locku_release_calldata(void *data)
static void nfs4_locku_done(struct rpc_task *task, void *data) static void nfs4_locku_done(struct rpc_task *task, void *data)
{ {
struct nfs4_unlockdata *calldata = data; struct nfs4_unlockdata *calldata = data;
struct nfs4_exception exception = {
.inode = calldata->lsp->ls_state->inode,
.stateid = &calldata->arg.stateid,
};
if (!nfs4_sequence_done(task, &calldata->res.seq_res)) if (!nfs4_sequence_done(task, &calldata->res.seq_res))
return; return;
...@@ -6084,8 +6104,10 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) ...@@ -6084,8 +6104,10 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
rpc_restart_call_prepare(task); rpc_restart_call_prepare(task);
break; break;
default: default:
if (nfs4_async_handle_error(task, calldata->server, task->tk_status = nfs4_async_handle_exception(task,
NULL, NULL) == -EAGAIN) calldata->server, task->tk_status,
&exception);
if (exception.retry)
rpc_restart_call_prepare(task); rpc_restart_call_prepare(task);
} }
nfs_release_seqid(calldata->arg.seqid); nfs_release_seqid(calldata->arg.seqid);
...@@ -6741,7 +6763,7 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, ...@@ -6741,7 +6763,7 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
if (err != 0) if (err != 0)
return err; return err;
err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
return nfs4_handle_delegation_recall_error(server, state, stateid, err); return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
} }
struct nfs_release_lockowner_data { struct nfs_release_lockowner_data {
......
...@@ -1482,6 +1482,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ ...@@ -1482,6 +1482,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
struct inode *inode = state->inode; struct inode *inode = state->inode;
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);
struct file_lock *fl; struct file_lock *fl;
struct nfs4_lock_state *lsp;
int status = 0; int status = 0;
struct file_lock_context *flctx = inode->i_flctx; struct file_lock_context *flctx = inode->i_flctx;
struct list_head *list; struct list_head *list;
...@@ -1522,7 +1523,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ ...@@ -1522,7 +1523,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
case -NFS4ERR_DENIED: case -NFS4ERR_DENIED:
case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_BAD:
case -NFS4ERR_RECLAIM_CONFLICT: case -NFS4ERR_RECLAIM_CONFLICT:
/* kill_proc(fl->fl_pid, SIGLOST, 1); */ lsp = fl->fl_u.nfs4_fl.owner;
if (lsp)
set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
status = 0; status = 0;
} }
spin_lock(&flctx->flc_lock); spin_lock(&flctx->flc_lock);
......
...@@ -32,7 +32,7 @@ static struct ctl_table nfs4_cb_sysctls[] = { ...@@ -32,7 +32,7 @@ static struct ctl_table nfs4_cb_sysctls[] = {
.data = &nfs_idmap_cache_timeout, .data = &nfs_idmap_cache_timeout,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_jiffies, .proc_handler = proc_dointvec,
}, },
{ } { }
}; };
......
...@@ -7678,6 +7678,22 @@ nfs4_stat_to_errno(int stat) ...@@ -7678,6 +7678,22 @@ nfs4_stat_to_errno(int stat)
.p_name = #proc, \ .p_name = #proc, \
} }
#if defined(CONFIG_NFS_V4_1)
#define PROC41(proc, argtype, restype) \
PROC(proc, argtype, restype)
#else
#define PROC41(proc, argtype, restype) \
STUB(proc)
#endif
#if defined(CONFIG_NFS_V4_2)
#define PROC42(proc, argtype, restype) \
PROC(proc, argtype, restype)
#else
#define PROC42(proc, argtype, restype) \
STUB(proc)
#endif
const struct rpc_procinfo nfs4_procedures[] = { const struct rpc_procinfo nfs4_procedures[] = {
PROC(READ, enc_read, dec_read), PROC(READ, enc_read, dec_read),
PROC(WRITE, enc_write, dec_write), PROC(WRITE, enc_write, dec_write),
...@@ -7698,7 +7714,6 @@ const struct rpc_procinfo nfs4_procedures[] = { ...@@ -7698,7 +7714,6 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC(ACCESS, enc_access, dec_access), PROC(ACCESS, enc_access, dec_access),
PROC(GETATTR, enc_getattr, dec_getattr), PROC(GETATTR, enc_getattr, dec_getattr),
PROC(LOOKUP, enc_lookup, dec_lookup), PROC(LOOKUP, enc_lookup, dec_lookup),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root), PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
PROC(REMOVE, enc_remove, dec_remove), PROC(REMOVE, enc_remove, dec_remove),
PROC(RENAME, enc_rename, dec_rename), PROC(RENAME, enc_rename, dec_rename),
...@@ -7717,33 +7732,30 @@ const struct rpc_procinfo nfs4_procedures[] = { ...@@ -7717,33 +7732,30 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
PROC(SECINFO, enc_secinfo, dec_secinfo), PROC(SECINFO, enc_secinfo, dec_secinfo),
PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present), PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present),
#if defined(CONFIG_NFS_V4_1) PROC41(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), PROC41(CREATE_SESSION, enc_create_session, dec_create_session),
PROC(CREATE_SESSION, enc_create_session, dec_create_session), PROC41(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), PROC41(SEQUENCE, enc_sequence, dec_sequence),
PROC(SEQUENCE, enc_sequence, dec_sequence), PROC41(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), PROC41(RECLAIM_COMPLETE,enc_reclaim_complete, dec_reclaim_complete),
PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), PROC41(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), PROC41(LAYOUTGET, enc_layoutget, dec_layoutget),
PROC(LAYOUTGET, enc_layoutget, dec_layoutget), PROC41(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), PROC41(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn),
PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), PROC41(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name),
PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), PROC41(TEST_STATEID, enc_test_stateid, dec_test_stateid),
PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), PROC41(FREE_STATEID, enc_free_stateid, dec_free_stateid),
PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
STUB(GETDEVICELIST), STUB(GETDEVICELIST),
PROC(BIND_CONN_TO_SESSION, PROC41(BIND_CONN_TO_SESSION,
enc_bind_conn_to_session, dec_bind_conn_to_session), enc_bind_conn_to_session, dec_bind_conn_to_session),
PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), PROC41(DESTROY_CLIENTID,enc_destroy_clientid, dec_destroy_clientid),
#endif /* CONFIG_NFS_V4_1 */ PROC42(SEEK, enc_seek, dec_seek),
#ifdef CONFIG_NFS_V4_2 PROC42(ALLOCATE, enc_allocate, dec_allocate),
PROC(SEEK, enc_seek, dec_seek), PROC42(DEALLOCATE, enc_deallocate, dec_deallocate),
PROC(ALLOCATE, enc_allocate, dec_allocate), PROC42(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
PROC(DEALLOCATE, enc_deallocate, dec_deallocate), PROC42(CLONE, enc_clone, dec_clone),
PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), PROC42(COPY, enc_copy, dec_copy),
PROC(CLONE, enc_clone, dec_clone), PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC(COPY, enc_copy, dec_copy),
#endif /* CONFIG_NFS_V4_2 */
}; };
static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)]; static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
......
...@@ -797,15 +797,15 @@ TRACE_EVENT(nfs_readpage_done, ...@@ -797,15 +797,15 @@ TRACE_EVENT(nfs_readpage_done,
) )
); );
/* TRACE_DEFINE_ENUM(NFS_UNSTABLE);
* XXX: I tried using NFS_UNSTABLE and friends in this table, but they TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
* all evaluate to 0 for some reason, even if I include linux/nfs.h. TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
*/
#define nfs_show_stable(stable) \ #define nfs_show_stable(stable) \
__print_symbolic(stable, \ __print_symbolic(stable, \
{ 0, " (UNSTABLE)" }, \ { NFS_UNSTABLE, "UNSTABLE" }, \
{ 1, " (DATA_SYNC)" }, \ { NFS_DATA_SYNC, "DATA_SYNC" }, \
{ 2, " (FILE_SYNC)" }) { NFS_FILE_SYNC, "FILE_SYNC" })
TRACE_EVENT(nfs_initiate_write, TRACE_EVENT(nfs_initiate_write,
TP_PROTO( TP_PROTO(
...@@ -838,12 +838,12 @@ TRACE_EVENT(nfs_initiate_write, ...@@ -838,12 +838,12 @@ TRACE_EVENT(nfs_initiate_write,
TP_printk( TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x " "fileid=%02x:%02x:%llu fhandle=0x%08x "
"offset=%lld count=%lu stable=%d%s", "offset=%lld count=%lu stable=%s",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid, (unsigned long long)__entry->fileid,
__entry->fhandle, __entry->fhandle,
__entry->offset, __entry->count, __entry->offset, __entry->count,
__entry->stable, nfs_show_stable(__entry->stable) nfs_show_stable(__entry->stable)
) )
); );
...@@ -882,13 +882,13 @@ TRACE_EVENT(nfs_writeback_done, ...@@ -882,13 +882,13 @@ TRACE_EVENT(nfs_writeback_done,
TP_printk( TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x " "fileid=%02x:%02x:%llu fhandle=0x%08x "
"offset=%lld status=%d stable=%d%s " "offset=%lld status=%d stable=%s "
"verifier 0x%016llx", "verifier 0x%016llx",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid, (unsigned long long)__entry->fileid,
__entry->fhandle, __entry->fhandle,
__entry->offset, __entry->status, __entry->offset, __entry->status,
__entry->stable, nfs_show_stable(__entry->stable), nfs_show_stable(__entry->stable),
__entry->verifier __entry->verifier
) )
); );
......
...@@ -537,7 +537,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free); ...@@ -537,7 +537,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
* @cinfo: Commit information for the call (writes only) * @cinfo: Commit information for the call (writes only)
*/ */
static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
unsigned int count, unsigned int offset, unsigned int count,
int how, struct nfs_commit_info *cinfo) int how, struct nfs_commit_info *cinfo)
{ {
struct nfs_page *req = hdr->req; struct nfs_page *req = hdr->req;
...@@ -546,10 +546,10 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, ...@@ -546,10 +546,10 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
* NB: take care not to mess about with hdr->commit et al. */ * NB: take care not to mess about with hdr->commit et al. */
hdr->args.fh = NFS_FH(hdr->inode); hdr->args.fh = NFS_FH(hdr->inode);
hdr->args.offset = req_offset(req) + offset; hdr->args.offset = req_offset(req);
/* pnfs_set_layoutcommit needs this */ /* pnfs_set_layoutcommit needs this */
hdr->mds_offset = hdr->args.offset; hdr->mds_offset = hdr->args.offset;
hdr->args.pgbase = req->wb_pgbase + offset; hdr->args.pgbase = req->wb_pgbase;
hdr->args.pages = hdr->page_array.pagevec; hdr->args.pages = hdr->page_array.pagevec;
hdr->args.count = count; hdr->args.count = count;
hdr->args.context = get_nfs_open_context(req->wb_context); hdr->args.context = get_nfs_open_context(req->wb_context);
...@@ -789,7 +789,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, ...@@ -789,7 +789,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags &= ~FLUSH_COND_STABLE; desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */ /* Set up the argument struct */
nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); nfs_pgio_rpcsetup(hdr, mirror->pg_count, desc->pg_ioflags, &cinfo);
desc->pg_rpc_callops = &nfs_pgio_common_ops; desc->pg_rpc_callops = &nfs_pgio_common_ops;
return 0; return 0;
} }
......
...@@ -655,7 +655,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, ...@@ -655,7 +655,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
return 0; return 0;
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (pnfs_match_lseg_recall(lseg, recall_range, seq)) { if (pnfs_match_lseg_recall(lseg, recall_range, seq)) {
dprintk("%s: freeing lseg %p iomode %d seq %u" dprintk("%s: freeing lseg %p iomode %d seq %u "
"offset %llu length %llu\n", __func__, "offset %llu length %llu\n", __func__,
lseg, lseg->pls_range.iomode, lseg->pls_seq, lseg, lseg->pls_range.iomode, lseg->pls_seq,
lseg->pls_range.offset, lseg->pls_range.length); lseg->pls_range.offset, lseg->pls_range.length);
...@@ -2255,7 +2255,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, ...@@ -2255,7 +2255,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_write_mds(desc); nfs_pageio_reset_write_mds(desc);
mirror->pg_recoalesce = 1; mirror->pg_recoalesce = 1;
} }
hdr->release(hdr); hdr->completion_ops->completion(hdr);
} }
static enum pnfs_try_status static enum pnfs_try_status
...@@ -2378,7 +2378,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, ...@@ -2378,7 +2378,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_read_mds(desc); nfs_pageio_reset_read_mds(desc);
mirror->pg_recoalesce = 1; mirror->pg_recoalesce = 1;
} }
hdr->release(hdr); hdr->completion_ops->completion(hdr);
} }
/* /*
......
...@@ -40,6 +40,7 @@ enum { ...@@ -40,6 +40,7 @@ enum {
NFS_LSEG_ROC, /* roc bit received from server */ NFS_LSEG_ROC, /* roc bit received from server */
NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */
NFS_LSEG_LAYOUTRETURN, /* layoutreturn bit set for layoutreturn */ NFS_LSEG_LAYOUTRETURN, /* layoutreturn bit set for layoutreturn */
NFS_LSEG_UNAVAILABLE, /* unavailable bit set for temporary problem */
}; };
/* Individual ip address */ /* Individual ip address */
...@@ -86,6 +87,7 @@ enum pnfs_try_status { ...@@ -86,6 +87,7 @@ enum pnfs_try_status {
*/ */
#define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ #define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */
#define NFS4_DEF_DS_RETRANS 5 #define NFS4_DEF_DS_RETRANS 5
#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
/* error codes for internal use */ /* error codes for internal use */
#define NFS4ERR_RESET_TO_MDS 12001 #define NFS4ERR_RESET_TO_MDS 12001
...@@ -524,8 +526,10 @@ static inline int pnfs_return_layout(struct inode *ino) ...@@ -524,8 +526,10 @@ static inline int pnfs_return_layout(struct inode *ino)
struct nfs_inode *nfsi = NFS_I(ino); struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_server *nfss = NFS_SERVER(ino); struct nfs_server *nfss = NFS_SERVER(ino);
if (pnfs_enabled_sb(nfss) && nfsi->layout) if (pnfs_enabled_sb(nfss) && nfsi->layout) {
set_bit(NFS_LAYOUT_RETURN_REQUESTED, &nfsi->layout->plh_flags);
return _pnfs_return_layout(ino); return _pnfs_return_layout(ino);
}
return 0; return 0;
} }
......
...@@ -43,7 +43,6 @@ ...@@ -43,7 +43,6 @@
#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
static DEFINE_SPINLOCK(nfs4_deviceid_lock); static DEFINE_SPINLOCK(nfs4_deviceid_lock);
......
...@@ -1835,6 +1835,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) ...@@ -1835,6 +1835,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
next: next:
nfs_unlock_and_release_request(req); nfs_unlock_and_release_request(req);
/* Latency breaker */
cond_resched();
} }
nfss = NFS_SERVER(data->inode); nfss = NFS_SERVER(data->inode);
if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <net/ipv6.h> #include <net/ipv6.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/kref.h> #include <linux/kref.h>
#include <linux/refcount.h>
#include <linux/utsname.h> #include <linux/utsname.h>
#include <linux/lockd/bind.h> #include <linux/lockd/bind.h>
#include <linux/lockd/xdr.h> #include <linux/lockd/xdr.h>
...@@ -58,7 +59,7 @@ struct nlm_host { ...@@ -58,7 +59,7 @@ struct nlm_host {
u32 h_state; /* pseudo-state counter */ u32 h_state; /* pseudo-state counter */
u32 h_nsmstate; /* true remote NSM state */ u32 h_nsmstate; /* true remote NSM state */
u32 h_pidcount; /* Pseudopids */ u32 h_pidcount; /* Pseudopids */
atomic_t h_count; /* reference count */ refcount_t h_count; /* reference count */
struct mutex h_mutex; /* mutex for pmap binding */ struct mutex h_mutex; /* mutex for pmap binding */
unsigned long h_nextrebind; /* next portmap call */ unsigned long h_nextrebind; /* next portmap call */
unsigned long h_expires; /* eligible for GC */ unsigned long h_expires; /* eligible for GC */
...@@ -83,7 +84,7 @@ struct nlm_host { ...@@ -83,7 +84,7 @@ struct nlm_host {
struct nsm_handle { struct nsm_handle {
struct list_head sm_link; struct list_head sm_link;
atomic_t sm_count; refcount_t sm_count;
char *sm_mon_name; char *sm_mon_name;
char *sm_name; char *sm_name;
struct sockaddr_storage sm_addr; struct sockaddr_storage sm_addr;
...@@ -122,7 +123,7 @@ static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host) ...@@ -122,7 +123,7 @@ static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
*/ */
struct nlm_lockowner { struct nlm_lockowner {
struct list_head list; struct list_head list;
atomic_t count; refcount_t count;
struct nlm_host *host; struct nlm_host *host;
fl_owner_t owner; fl_owner_t owner;
...@@ -136,7 +137,7 @@ struct nlm_wait; ...@@ -136,7 +137,7 @@ struct nlm_wait;
*/ */
#define NLMCLNT_OHSIZE ((__NEW_UTS_LEN) + 10u) #define NLMCLNT_OHSIZE ((__NEW_UTS_LEN) + 10u)
struct nlm_rqst { struct nlm_rqst {
atomic_t a_count; refcount_t a_count;
unsigned int a_flags; /* initial RPC task flags */ unsigned int a_flags; /* initial RPC task flags */
struct nlm_host * a_host; /* host handle */ struct nlm_host * a_host; /* host handle */
struct nlm_args a_args; /* arguments */ struct nlm_args a_args; /* arguments */
......
...@@ -457,7 +457,12 @@ enum lock_type4 { ...@@ -457,7 +457,12 @@ enum lock_type4 {
#define NFS4_DEBUG 1 #define NFS4_DEBUG 1
/* Index of predefined Linux client operations */ /*
* Index of predefined Linux client operations
*
* To ensure that /proc/net/rpc/nfs remains correctly ordered, please
* append only to this enum when adding new client operations.
*/
enum { enum {
NFSPROC4_CLNT_NULL = 0, /* Unused */ NFSPROC4_CLNT_NULL = 0, /* Unused */
...@@ -480,7 +485,6 @@ enum { ...@@ -480,7 +485,6 @@ enum {
NFSPROC4_CLNT_ACCESS, NFSPROC4_CLNT_ACCESS,
NFSPROC4_CLNT_GETATTR, NFSPROC4_CLNT_GETATTR,
NFSPROC4_CLNT_LOOKUP, NFSPROC4_CLNT_LOOKUP,
NFSPROC4_CLNT_LOOKUPP,
NFSPROC4_CLNT_LOOKUP_ROOT, NFSPROC4_CLNT_LOOKUP_ROOT,
NFSPROC4_CLNT_REMOVE, NFSPROC4_CLNT_REMOVE,
NFSPROC4_CLNT_RENAME, NFSPROC4_CLNT_RENAME,
...@@ -500,7 +504,6 @@ enum { ...@@ -500,7 +504,6 @@ enum {
NFSPROC4_CLNT_SECINFO, NFSPROC4_CLNT_SECINFO,
NFSPROC4_CLNT_FSID_PRESENT, NFSPROC4_CLNT_FSID_PRESENT,
/* nfs41 */
NFSPROC4_CLNT_EXCHANGE_ID, NFSPROC4_CLNT_EXCHANGE_ID,
NFSPROC4_CLNT_CREATE_SESSION, NFSPROC4_CLNT_CREATE_SESSION,
NFSPROC4_CLNT_DESTROY_SESSION, NFSPROC4_CLNT_DESTROY_SESSION,
...@@ -518,13 +521,14 @@ enum { ...@@ -518,13 +521,14 @@ enum {
NFSPROC4_CLNT_BIND_CONN_TO_SESSION, NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
NFSPROC4_CLNT_DESTROY_CLIENTID, NFSPROC4_CLNT_DESTROY_CLIENTID,
/* nfs42 */
NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_SEEK,
NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_ALLOCATE,
NFSPROC4_CLNT_DEALLOCATE, NFSPROC4_CLNT_DEALLOCATE,
NFSPROC4_CLNT_LAYOUTSTATS, NFSPROC4_CLNT_LAYOUTSTATS,
NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_CLONE,
NFSPROC4_CLNT_COPY, NFSPROC4_CLNT_COPY,
NFSPROC4_CLNT_LOOKUPP,
}; };
/* nfs41 types */ /* nfs41 types */
......
...@@ -179,7 +179,6 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, ...@@ -179,7 +179,6 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
int rpc_restart_call_prepare(struct rpc_task *); int rpc_restart_call_prepare(struct rpc_task *);
int rpc_restart_call(struct rpc_task *); int rpc_restart_call(struct rpc_task *);
void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
int rpc_protocol(struct rpc_clnt *);
struct net * rpc_net_ns(struct rpc_clnt *); struct net * rpc_net_ns(struct rpc_clnt *);
size_t rpc_max_payload(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *);
size_t rpc_max_bc_payload(struct rpc_clnt *); size_t rpc_max_bc_payload(struct rpc_clnt *);
......
...@@ -64,7 +64,7 @@ enum rpcrdma_memreg { ...@@ -64,7 +64,7 @@ enum rpcrdma_memreg {
RPCRDMA_MEMWINDOWS, RPCRDMA_MEMWINDOWS,
RPCRDMA_MEMWINDOWS_ASYNC, RPCRDMA_MEMWINDOWS_ASYNC,
RPCRDMA_MTHCAFMR, RPCRDMA_MTHCAFMR,
RPCRDMA_FRMR, RPCRDMA_FRWR,
RPCRDMA_ALLPHYSICAL, RPCRDMA_ALLPHYSICAL,
RPCRDMA_LAST RPCRDMA_LAST
}; };
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2017 Oracle. All rights reserved.
*/
/*
* enum ib_event_type, from include/rdma/ib_verbs.h
*/
#define IB_EVENT_LIST \
ib_event(CQ_ERR) \
ib_event(QP_FATAL) \
ib_event(QP_REQ_ERR) \
ib_event(QP_ACCESS_ERR) \
ib_event(COMM_EST) \
ib_event(SQ_DRAINED) \
ib_event(PATH_MIG) \
ib_event(PATH_MIG_ERR) \
ib_event(DEVICE_FATAL) \
ib_event(PORT_ACTIVE) \
ib_event(PORT_ERR) \
ib_event(LID_CHANGE) \
ib_event(PKEY_CHANGE) \
ib_event(SM_CHANGE) \
ib_event(SRQ_ERR) \
ib_event(SRQ_LIMIT_REACHED) \
ib_event(QP_LAST_WQE_REACHED) \
ib_event(CLIENT_REREGISTER) \
ib_event(GID_CHANGE) \
ib_event_end(WQ_FATAL)
#undef ib_event
#undef ib_event_end
#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
IB_EVENT_LIST
#undef ib_event
#undef ib_event_end
#define ib_event(x) { IB_EVENT_##x, #x },
#define ib_event_end(x) { IB_EVENT_##x, #x }
#define rdma_show_ib_event(x) \
__print_symbolic(x, IB_EVENT_LIST)
/*
* enum ib_wc_status type, from include/rdma/ib_verbs.h
*/
#define IB_WC_STATUS_LIST \
ib_wc_status(SUCCESS) \
ib_wc_status(LOC_LEN_ERR) \
ib_wc_status(LOC_QP_OP_ERR) \
ib_wc_status(LOC_EEC_OP_ERR) \
ib_wc_status(LOC_PROT_ERR) \
ib_wc_status(WR_FLUSH_ERR) \
ib_wc_status(MW_BIND_ERR) \
ib_wc_status(BAD_RESP_ERR) \
ib_wc_status(LOC_ACCESS_ERR) \
ib_wc_status(REM_INV_REQ_ERR) \
ib_wc_status(REM_ACCESS_ERR) \
ib_wc_status(REM_OP_ERR) \
ib_wc_status(RETRY_EXC_ERR) \
ib_wc_status(RNR_RETRY_EXC_ERR) \
ib_wc_status(LOC_RDD_VIOL_ERR) \
ib_wc_status(REM_INV_RD_REQ_ERR) \
ib_wc_status(REM_ABORT_ERR) \
ib_wc_status(INV_EECN_ERR) \
ib_wc_status(INV_EEC_STATE_ERR) \
ib_wc_status(FATAL_ERR) \
ib_wc_status(RESP_TIMEOUT_ERR) \
ib_wc_status_end(GENERAL_ERR)
#undef ib_wc_status
#undef ib_wc_status_end
#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x);
#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x);
IB_WC_STATUS_LIST
#undef ib_wc_status
#undef ib_wc_status_end
#define ib_wc_status(x) { IB_WC_##x, #x },
#define ib_wc_status_end(x) { IB_WC_##x, #x }
#define rdma_show_wc_status(x) \
__print_symbolic(x, IB_WC_STATUS_LIST)
/*
* enum rdma_cm_event_type, from include/rdma/rdma_cm.h
*/
#define RDMA_CM_EVENT_LIST \
rdma_cm_event(ADDR_RESOLVED) \
rdma_cm_event(ADDR_ERROR) \
rdma_cm_event(ROUTE_RESOLVED) \
rdma_cm_event(ROUTE_ERROR) \
rdma_cm_event(CONNECT_REQUEST) \
rdma_cm_event(CONNECT_RESPONSE) \
rdma_cm_event(CONNECT_ERROR) \
rdma_cm_event(UNREACHABLE) \
rdma_cm_event(REJECTED) \
rdma_cm_event(ESTABLISHED) \
rdma_cm_event(DISCONNECTED) \
rdma_cm_event(DEVICE_REMOVAL) \
rdma_cm_event(MULTICAST_JOIN) \
rdma_cm_event(MULTICAST_ERROR) \
rdma_cm_event(ADDR_CHANGE) \
rdma_cm_event_end(TIMEWAIT_EXIT)
#undef rdma_cm_event
#undef rdma_cm_event_end
#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
RDMA_CM_EVENT_LIST
#undef rdma_cm_event
#undef rdma_cm_event_end
#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x },
#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x }
#define rdma_show_cm_event(x) \
__print_symbolic(x, RDMA_CM_EVENT_LIST)
此差异已折叠。
...@@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(rpc_task_status, ...@@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(rpc_task_status,
__entry->status = task->tk_status; __entry->status = task->tk_status;
), ),
TP_printk("task:%u@%u, status %d", TP_printk("task:%u@%u status=%d",
__entry->task_id, __entry->client_id, __entry->task_id, __entry->client_id,
__entry->status) __entry->status)
); );
...@@ -66,7 +66,7 @@ TRACE_EVENT(rpc_connect_status, ...@@ -66,7 +66,7 @@ TRACE_EVENT(rpc_connect_status,
__entry->status = status; __entry->status = status;
), ),
TP_printk("task:%u@%u, status %d", TP_printk("task:%u@%u status=%d",
__entry->task_id, __entry->client_id, __entry->task_id, __entry->client_id,
__entry->status) __entry->status)
); );
...@@ -175,7 +175,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued, ...@@ -175,7 +175,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
), ),
TP_fast_assign( TP_fast_assign(
__entry->client_id = clnt->cl_clid; __entry->client_id = clnt ? clnt->cl_clid : -1;
__entry->task_id = task->tk_pid; __entry->task_id = task->tk_pid;
__entry->timeout = task->tk_timeout; __entry->timeout = task->tk_timeout;
__entry->runstate = task->tk_runstate; __entry->runstate = task->tk_runstate;
...@@ -184,7 +184,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued, ...@@ -184,7 +184,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
__assign_str(q_name, rpc_qname(q)); __assign_str(q_name, rpc_qname(q));
), ),
TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s", TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
__entry->task_id, __entry->client_id, __entry->task_id, __entry->client_id,
__entry->flags, __entry->flags,
__entry->runstate, __entry->runstate,
...@@ -390,6 +390,10 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, ...@@ -390,6 +390,10 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
__entry->status) __entry->status)
); );
DEFINE_EVENT(rpc_xprt_event, xprt_timer,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status));
DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst, DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status)); TP_ARGS(xprt, xid, status));
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define NFS_PROGRAM 100003 #define NFS_PROGRAM 100003
#define NFS_PORT 2049 #define NFS_PORT 2049
#define NFS_RDMA_PORT 20049
#define NFS_MAXDATA 8192 #define NFS_MAXDATA 8192
#define NFS_MAXPATHLEN 1024 #define NFS_MAXPATHLEN 1024
#define NFS_MAXNAMLEN 255 #define NFS_MAXNAMLEN 255
......
...@@ -1375,22 +1375,6 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize ...@@ -1375,22 +1375,6 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize
} }
EXPORT_SYMBOL_GPL(rpc_setbufsize); EXPORT_SYMBOL_GPL(rpc_setbufsize);
/**
* rpc_protocol - Get transport protocol number for an RPC client
* @clnt: RPC client to query
*
*/
int rpc_protocol(struct rpc_clnt *clnt)
{
int protocol;
rcu_read_lock();
protocol = rcu_dereference(clnt->cl_xprt)->prot;
rcu_read_unlock();
return protocol;
}
EXPORT_SYMBOL_GPL(rpc_protocol);
/** /**
* rpc_net_ns - Get the network namespace for this RPC client * rpc_net_ns - Get the network namespace for this RPC client
* @clnt: RPC client to query * @clnt: RPC client to query
......
...@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task) ...@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task)
void (*do_action)(struct rpc_task *); void (*do_action)(struct rpc_task *);
/* /*
* Execute any pending callback first. * Perform the next FSM step or a pending callback.
*
* tk_action may be NULL if the task has been killed.
* In particular, note that rpc_killall_tasks may
* do this at any time, so beware when dereferencing.
*/ */
do_action = task->tk_callback; do_action = task->tk_action;
task->tk_callback = NULL; if (task->tk_callback) {
if (do_action == NULL) { do_action = task->tk_callback;
/* task->tk_callback = NULL;
* Perform the next FSM step.
* tk_action may be NULL if the task has been killed.
* In particular, note that rpc_killall_tasks may
* do this at any time, so beware when dereferencing.
*/
do_action = task->tk_action;
if (do_action == NULL)
break;
} }
trace_rpc_task_run_action(task->tk_client, task, task->tk_action); if (!do_action)
break;
trace_rpc_task_run_action(task->tk_client, task, do_action);
do_action(task); do_action(task);
/* /*
......
...@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task) ...@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task)
if (task->tk_status != -ETIMEDOUT) if (task->tk_status != -ETIMEDOUT)
return; return;
dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
trace_xprt_timer(xprt, req->rq_xid, task->tk_status);
if (!req->rq_reply_bytes_recvd) { if (!req->rq_reply_bytes_recvd) {
if (xprt->ops->timer) if (xprt->ops->timer)
xprt->ops->timer(xprt, task); xprt->ops->timer(xprt, task);
......
...@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, ...@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt); req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
__set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL); DMA_TO_DEVICE, GFP_KERNEL);
...@@ -74,21 +73,13 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, ...@@ -74,21 +73,13 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count) unsigned int count)
{ {
struct rpcrdma_rep *rep;
int rc = 0; int rc = 0;
while (count--) { while (count--) {
rep = rpcrdma_create_rep(r_xprt); rc = rpcrdma_create_rep(r_xprt);
if (IS_ERR(rep)) { if (rc)
pr_err("RPC: %s: reply buffer alloc failed\n",
__func__);
rc = PTR_ERR(rep);
break; break;
}
rpcrdma_recv_buffer_put(rep);
} }
return rc; return rc;
} }
...@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) ...@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
rqst->rq_xprt = &r_xprt->rx_xprt; rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list); INIT_LIST_HEAD(&rqst->rq_list);
INIT_LIST_HEAD(&rqst->rq_bc_list); INIT_LIST_HEAD(&rqst->rq_bc_list);
__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
goto out_free; goto out_free;
...@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) ...@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
buffer->rb_bc_srv_max_requests = reqs; buffer->rb_bc_srv_max_requests = reqs;
request_module("svcrdma"); request_module("svcrdma");
trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0; return 0;
out_free: out_free:
...@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) ...@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
return maxmsg - RPCRDMA_HDRLEN_MIN; return maxmsg - RPCRDMA_HDRLEN_MIN;
} }
/** static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
* rpcrdma_bc_marshal_reply - Send backwards direction reply
* @rqst: buffer containing RPC reply data
*
* Returns zero on success.
*/
int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{ {
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
...@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) ...@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
&rqst->rq_snd_buf, rpcrdma_noch)) &rqst->rq_snd_buf, rpcrdma_noch))
return -EIO; return -EIO;
trace_xprtrdma_cb_reply(rqst);
return 0;
}
/**
* xprt_rdma_bc_send_reply - marshal and send a backchannel reply
* @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf
*
* Caller holds the transport's write lock.
*
* Returns:
* %0 if the RPC message has been sent
* %-ENOTCONN if the caller should reconnect and call again
* %-EIO if a permanent error occurred and the request was not
* sent. Do not try to send this message again.
*/
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
int rc;
if (!xprt_connected(rqst->rq_xprt))
goto drop_connection;
rc = rpcrdma_bc_marshal_reply(rqst);
if (rc < 0)
goto failed_marshal;
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
return 0; return 0;
failed_marshal:
if (rc != -ENOTCONN)
return rc;
drop_connection:
xprt_disconnect_done(rqst->rq_xprt);
return -ENOTCONN;
} }
/** /**
...@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) ...@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
dprintk("RPC: %s: freeing rqst %p (req %p)\n", dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst)); __func__, rqst, rpcr_to_rdmar(rqst));
smp_mb__before_atomic();
WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
smp_mb__after_atomic();
spin_lock_bh(&xprt->bc_pa_lock); spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock); spin_unlock_bh(&xprt->bc_pa_lock);
...@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) ...@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
/** /**
* rpcrdma_bc_receive_call - Handle a backward direction call * rpcrdma_bc_receive_call - Handle a backward direction call
* @xprt: transport receiving the call * @r_xprt: transport receiving the call
* @rep: receive buffer containing the call * @rep: receive buffer containing the call
* *
* Operational assumptions: * Operational assumptions:
...@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, ...@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst, rq_bc_pa_list); struct rpc_rqst, rq_bc_pa_list);
list_del(&rqst->rq_bc_pa_list); list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock); spin_unlock(&xprt->bc_pa_lock);
dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
/* Prepare rqst */ /* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0; rqst->rq_reply_bytes_recvd = 0;
...@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, ...@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
rqst->rq_xid = *p; rqst->rq_xid = *p;
rqst->rq_private_buf.len = size; rqst->rq_private_buf.len = size;
set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
buf = &rqst->rq_rcv_buf; buf = &rqst->rq_rcv_buf;
memset(buf, 0, sizeof(*buf)); memset(buf, 0, sizeof(*buf));
...@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, ...@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
* the Upper Layer is done decoding it. * the Upper Layer is done decoding it.
*/ */
req = rpcr_to_rdmar(rqst); req = rpcr_to_rdmar(rqst);
dprintk("RPC: %s: attaching rep %p to req %p\n",
__func__, rep, req);
req->rl_reply = rep; req->rl_reply = rep;
trace_xprtrdma_cb_call(rqst);
/* Defeat the retransmit detection logic in send_request */
req->rl_connect_cookie = 0;
/* Queue rqst for ULP's callback service */ /* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv; bc_serv = xprt->bc_serv;
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* /*
* Copyright (c) 2015 Oracle. All rights reserved. * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/ */
...@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia) ...@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
} }
static int static int
fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{ {
static struct ib_fmr_attr fmr_attr = { static struct ib_fmr_attr fmr_attr = {
.max_pages = RPCRDMA_MAX_FMR_SGES, .max_pages = RPCRDMA_MAX_FMR_SGES,
...@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) ...@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
.page_shift = PAGE_SHIFT .page_shift = PAGE_SHIFT
}; };
mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(u64), GFP_KERNEL); sizeof(u64), GFP_KERNEL);
if (!mw->fmr.fm_physaddrs) if (!mr->fmr.fm_physaddrs)
goto out_free; goto out_free;
mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(*mw->mw_sg), GFP_KERNEL); sizeof(*mr->mr_sg), GFP_KERNEL);
if (!mw->mw_sg) if (!mr->mr_sg)
goto out_free; goto out_free;
sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES); sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
&fmr_attr); &fmr_attr);
if (IS_ERR(mw->fmr.fm_mr)) if (IS_ERR(mr->fmr.fm_mr))
goto out_fmr_err; goto out_fmr_err;
return 0; return 0;
out_fmr_err: out_fmr_err:
dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
PTR_ERR(mw->fmr.fm_mr)); PTR_ERR(mr->fmr.fm_mr));
out_free: out_free:
kfree(mw->mw_sg); kfree(mr->mr_sg);
kfree(mw->fmr.fm_physaddrs); kfree(mr->fmr.fm_physaddrs);
return -ENOMEM; return -ENOMEM;
} }
static int static int
__fmr_unmap(struct rpcrdma_mw *mw) __fmr_unmap(struct rpcrdma_mr *mr)
{ {
LIST_HEAD(l); LIST_HEAD(l);
int rc; int rc;
list_add(&mw->fmr.fm_mr->list, &l); list_add(&mr->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l); rc = ib_unmap_fmr(&l);
list_del(&mw->fmr.fm_mr->list); list_del(&mr->fmr.fm_mr->list);
return rc; return rc;
} }
static void static void
fmr_op_release_mr(struct rpcrdma_mw *r) fmr_op_release_mr(struct rpcrdma_mr *mr)
{ {
LIST_HEAD(unmap_list); LIST_HEAD(unmap_list);
int rc; int rc;
/* Ensure MW is not on any rl_registered list */ /* Ensure MW is not on any rl_registered list */
if (!list_empty(&r->mw_list)) if (!list_empty(&mr->mr_list))
list_del(&r->mw_list); list_del(&mr->mr_list);
kfree(r->fmr.fm_physaddrs); kfree(mr->fmr.fm_physaddrs);
kfree(r->mw_sg); kfree(mr->mr_sg);
/* In case this one was left mapped, try to unmap it /* In case this one was left mapped, try to unmap it
* to prevent dealloc_fmr from failing with EBUSY * to prevent dealloc_fmr from failing with EBUSY
*/ */
rc = __fmr_unmap(r); rc = __fmr_unmap(mr);
if (rc) if (rc)
pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
r, rc); mr, rc);
rc = ib_dealloc_fmr(r->fmr.fm_mr); rc = ib_dealloc_fmr(mr->fmr.fm_mr);
if (rc) if (rc)
pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
r, rc); mr, rc);
kfree(r); kfree(mr);
} }
/* Reset of a single FMR. /* Reset of a single FMR.
*/ */
static void static void
fmr_op_recover_mr(struct rpcrdma_mw *mw) fmr_op_recover_mr(struct rpcrdma_mr *mr)
{ {
struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
int rc; int rc;
/* ORDER: invalidate first */ /* ORDER: invalidate first */
rc = __fmr_unmap(mw); rc = __fmr_unmap(mr);
/* ORDER: then DMA unmap */
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
if (rc) if (rc)
goto out_release; goto out_release;
rpcrdma_put_mw(r_xprt, mw); /* ORDER: then DMA unmap */
rpcrdma_mr_unmap_and_put(mr);
r_xprt->rx_stats.mrs_recovered++; r_xprt->rx_stats.mrs_recovered++;
return; return;
out_release: out_release:
pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++; r_xprt->rx_stats.mrs_orphaned++;
spin_lock(&r_xprt->rx_buf.rb_mwlock); trace_xprtrdma_dma_unmap(mr);
list_del(&mw->mw_all); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
spin_unlock(&r_xprt->rx_buf.rb_mwlock); mr->mr_sg, mr->mr_nents, mr->mr_dir);
spin_lock(&r_xprt->rx_buf.rb_mrlock);
list_del(&mr->mr_all);
spin_unlock(&r_xprt->rx_buf.rb_mrlock);
fmr_op_release_mr(mw); fmr_op_release_mr(mr);
} }
static int static int
...@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) ...@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
*/ */
static struct rpcrdma_mr_seg * static struct rpcrdma_mr_seg *
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, struct rpcrdma_mw **out) int nsegs, bool writing, struct rpcrdma_mr **out)
{ {
struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_mr_seg *seg1 = seg;
int len, pageoff, i, rc; int len, pageoff, i, rc;
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
u64 *dma_pages; u64 *dma_pages;
mw = rpcrdma_get_mw(r_xprt); mr = rpcrdma_mr_get(r_xprt);
if (!mw) if (!mr)
return ERR_PTR(-ENOBUFS); return ERR_PTR(-ENOBUFS);
pageoff = offset_in_page(seg1->mr_offset); pageoff = offset_in_page(seg1->mr_offset);
...@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
nsegs = RPCRDMA_MAX_FMR_SGES; nsegs = RPCRDMA_MAX_FMR_SGES;
for (i = 0; i < nsegs;) { for (i = 0; i < nsegs;) {
if (seg->mr_page) if (seg->mr_page)
sg_set_page(&mw->mw_sg[i], sg_set_page(&mr->mr_sg[i],
seg->mr_page, seg->mr_page,
seg->mr_len, seg->mr_len,
offset_in_page(seg->mr_offset)); offset_in_page(seg->mr_offset));
else else
sg_set_buf(&mw->mw_sg[i], seg->mr_offset, sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len); seg->mr_len);
len += seg->mr_len; len += seg->mr_len;
++seg; ++seg;
...@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break; break;
} }
mw->mw_dir = rpcrdma_data_dir(writing); mr->mr_dir = rpcrdma_data_dir(writing);
mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, i, mw->mw_dir); mr->mr_sg, i, mr->mr_dir);
if (!mw->mw_nents) if (!mr->mr_nents)
goto out_dmamap_err; goto out_dmamap_err;
for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++) for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
dma_pages[i] = sg_dma_address(&mw->mw_sg[i]); dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents, rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
dma_pages[0]); dma_pages[0]);
if (rc) if (rc)
goto out_maperr; goto out_maperr;
mw->mw_handle = mw->fmr.fm_mr->rkey; mr->mr_handle = mr->fmr.fm_mr->rkey;
mw->mw_length = len; mr->mr_length = len;
mw->mw_offset = dma_pages[0] + pageoff; mr->mr_offset = dma_pages[0] + pageoff;
*out = mw; *out = mr;
return seg; return seg;
out_dmamap_err: out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i); mr->mr_sg, i);
rpcrdma_put_mw(r_xprt, mw); rpcrdma_mr_put(mr);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
out_maperr: out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0], len, (unsigned long long)dma_pages[0],
pageoff, mw->mw_nents, rc); pageoff, mr->mr_nents, rc);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, rpcrdma_mr_unmap_and_put(mr);
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
...@@ -256,13 +256,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -256,13 +256,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
* Sleeps until it is safe for the host CPU to access the * Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions. * previously mapped memory regions.
* *
* Caller ensures that @mws is not empty before the call. This * Caller ensures that @mrs is not empty before the call. This
* function empties the list. * function empties the list.
*/ */
static void static void
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{ {
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
LIST_HEAD(unmap_list); LIST_HEAD(unmap_list);
int rc; int rc;
...@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* ib_unmap_fmr() is slow, so use a single call instead * ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped FMR. * of one call per mapped FMR.
*/ */
list_for_each_entry(mw, mws, mw_list) { list_for_each_entry(mr, mrs, mr_list) {
dprintk("RPC: %s: unmapping fmr %p\n", dprintk("RPC: %s: unmapping fmr %p\n",
__func__, &mw->fmr); __func__, &mr->fmr);
list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); trace_xprtrdma_localinv(mr);
list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
} }
r_xprt->rx_stats.local_inv_needed++; r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list); rc = ib_unmap_fmr(&unmap_list);
...@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
/* ORDER: Now DMA unmap all of the req's MRs, and return /* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list. * them to the free MW list.
*/ */
while (!list_empty(mws)) { while (!list_empty(mrs)) {
mw = rpcrdma_pop_mw(mws); mr = rpcrdma_mr_pop(mrs);
dprintk("RPC: %s: DMA unmapping fmr %p\n", list_del(&mr->fmr.fm_mr->list);
__func__, &mw->fmr); rpcrdma_mr_unmap_and_put(mr);
list_del(&mw->fmr.fm_mr->list);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
} }
return; return;
...@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
out_reset: out_reset:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
while (!list_empty(mws)) { while (!list_empty(mrs)) {
mw = rpcrdma_pop_mw(mws); mr = rpcrdma_mr_pop(mrs);
list_del(&mw->fmr.fm_mr->list); list_del(&mr->fmr.fm_mr->list);
fmr_op_recover_mr(mw); fmr_op_recover_mr(mr);
} }
} }
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* /*
* Copyright (c) 2015 Oracle. All rights reserved. * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/ */
/* Lightweight memory registration using Fast Registration Work /* Lightweight memory registration using Fast Registration Work
* Requests (FRWR). Also referred to sometimes as FRMR mode. * Requests (FRWR).
* *
* FRWR features ordered asynchronous registration and deregistration * FRWR features ordered asynchronous registration and deregistration
* of arbitrarily sized memory regions. This is the fastest and safest * of arbitrarily sized memory regions. This is the fastest and safest
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
/* Normal operation /* Normal operation
* *
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
* Work Request (frmr_op_map). When the RDMA operation is finished, this * Work Request (frwr_op_map). When the RDMA operation is finished, this
* Memory Region is invalidated using a LOCAL_INV Work Request * Memory Region is invalidated using a LOCAL_INV Work Request
* (frmr_op_unmap). * (frwr_op_unmap_sync).
* *
* Typically these Work Requests are not signaled, and neither are RDMA * Typically these Work Requests are not signaled, and neither are RDMA
* SEND Work Requests (with the exception of signaling occasionally to * SEND Work Requests (with the exception of signaling occasionally to
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
* *
* As an optimization, frwr_op_unmap marks MRs INVALID before the * As an optimization, frwr_op_unmap marks MRs INVALID before the
* LOCAL_INV WR is posted. If posting succeeds, the MR is placed on * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
* rb_mws immediately so that no work (like managing a linked list * rb_mrs immediately so that no work (like managing a linked list
* under a spinlock) is needed in the completion upcall. * under a spinlock) is needed in the completion upcall.
* *
* But this means that frwr_op_map() can occasionally encounter an MR * But this means that frwr_op_map() can occasionally encounter an MR
...@@ -60,7 +60,7 @@ ...@@ -60,7 +60,7 @@
* When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
* with ib_dereg_mr and then are re-initialized. Because MR recovery * with ib_dereg_mr and then are re-initialized. Because MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the * allocates fresh resources, it is deferred to a workqueue, and the
* recovered MRs are placed back on the rb_mws list when recovery is * recovered MRs are placed back on the rb_mrs list when recovery is
* complete. frwr_op_map allocates another MR for the current RPC while * complete. frwr_op_map allocates another MR for the current RPC while
* the broken MR is reset. * the broken MR is reset.
* *
...@@ -96,26 +96,26 @@ frwr_is_supported(struct rpcrdma_ia *ia) ...@@ -96,26 +96,26 @@ frwr_is_supported(struct rpcrdma_ia *ia)
} }
static int static int
frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{ {
unsigned int depth = ia->ri_max_frmr_depth; unsigned int depth = ia->ri_max_frwr_depth;
struct rpcrdma_frmr *f = &r->frmr; struct rpcrdma_frwr *frwr = &mr->frwr;
int rc; int rc;
f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
if (IS_ERR(f->fr_mr)) if (IS_ERR(frwr->fr_mr))
goto out_mr_err; goto out_mr_err;
r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
if (!r->mw_sg) if (!mr->mr_sg)
goto out_list_err; goto out_list_err;
sg_init_table(r->mw_sg, depth); sg_init_table(mr->mr_sg, depth);
init_completion(&f->fr_linv_done); init_completion(&frwr->fr_linv_done);
return 0; return 0;
out_mr_err: out_mr_err:
rc = PTR_ERR(f->fr_mr); rc = PTR_ERR(frwr->fr_mr);
dprintk("RPC: %s: ib_alloc_mr status %i\n", dprintk("RPC: %s: ib_alloc_mr status %i\n",
__func__, rc); __func__, rc);
return rc; return rc;
...@@ -124,83 +124,85 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) ...@@ -124,83 +124,85 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
rc = -ENOMEM; rc = -ENOMEM;
dprintk("RPC: %s: sg allocation failure\n", dprintk("RPC: %s: sg allocation failure\n",
__func__); __func__);
ib_dereg_mr(f->fr_mr); ib_dereg_mr(frwr->fr_mr);
return rc; return rc;
} }
static void static void
frwr_op_release_mr(struct rpcrdma_mw *r) frwr_op_release_mr(struct rpcrdma_mr *mr)
{ {
int rc; int rc;
/* Ensure MW is not on any rl_registered list */ /* Ensure MR is not on any rl_registered list */
if (!list_empty(&r->mw_list)) if (!list_empty(&mr->mr_list))
list_del(&r->mw_list); list_del(&mr->mr_list);
rc = ib_dereg_mr(r->frmr.fr_mr); rc = ib_dereg_mr(mr->frwr.fr_mr);
if (rc) if (rc)
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
r, rc); mr, rc);
kfree(r->mw_sg); kfree(mr->mr_sg);
kfree(r); kfree(mr);
} }
static int static int
__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) __frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{ {
struct rpcrdma_frmr *f = &r->frmr; struct rpcrdma_frwr *frwr = &mr->frwr;
int rc; int rc;
rc = ib_dereg_mr(f->fr_mr); rc = ib_dereg_mr(frwr->fr_mr);
if (rc) { if (rc) {
pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
rc, r); rc, mr);
return rc; return rc;
} }
f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
ia->ri_max_frmr_depth); ia->ri_max_frwr_depth);
if (IS_ERR(f->fr_mr)) { if (IS_ERR(frwr->fr_mr)) {
pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
PTR_ERR(f->fr_mr), r); PTR_ERR(frwr->fr_mr), mr);
return PTR_ERR(f->fr_mr); return PTR_ERR(frwr->fr_mr);
} }
dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
f->fr_state = FRMR_IS_INVALID; frwr->fr_state = FRWR_IS_INVALID;
return 0; return 0;
} }
/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. /* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
*/ */
static void static void
frwr_op_recover_mr(struct rpcrdma_mw *mw) frwr_op_recover_mr(struct rpcrdma_mr *mr)
{ {
enum rpcrdma_frmr_state state = mw->frmr.fr_state; enum rpcrdma_frwr_state state = mr->frwr.fr_state;
struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc; int rc;
rc = __frwr_reset_mr(ia, mw); rc = __frwr_mr_reset(ia, mr);
if (state != FRMR_FLUSHED_LI) if (state != FRWR_FLUSHED_LI) {
trace_xprtrdma_dma_unmap(mr);
ib_dma_unmap_sg(ia->ri_device, ib_dma_unmap_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir); mr->mr_sg, mr->mr_nents, mr->mr_dir);
}
if (rc) if (rc)
goto out_release; goto out_release;
rpcrdma_put_mw(r_xprt, mw); rpcrdma_mr_put(mr);
r_xprt->rx_stats.mrs_recovered++; r_xprt->rx_stats.mrs_recovered++;
return; return;
out_release: out_release:
pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++; r_xprt->rx_stats.mrs_orphaned++;
spin_lock(&r_xprt->rx_buf.rb_mwlock); spin_lock(&r_xprt->rx_buf.rb_mrlock);
list_del(&mw->mw_all); list_del(&mr->mr_all);
spin_unlock(&r_xprt->rx_buf.rb_mwlock); spin_unlock(&r_xprt->rx_buf.rb_mrlock);
frwr_op_release_mr(mw); frwr_op_release_mr(mr);
} }
static int static int
...@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, ...@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
ia->ri_max_frmr_depth = ia->ri_max_frwr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
attrs->max_fast_reg_page_list_len); attrs->max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n", dprintk("RPC: %s: device's max FR page list len = %u\n",
__func__, ia->ri_max_frmr_depth); __func__, ia->ri_max_frwr_depth);
/* Add room for frmr register and invalidate WRs. /* Add room for frwr register and invalidate WRs.
* 1. FRMR reg WR for head * 1. FRWR reg WR for head
* 2. FRMR invalidate WR for head * 2. FRWR invalidate WR for head
* 3. N FRMR reg WRs for pagelist * 3. N FRWR reg WRs for pagelist
* 4. N FRMR invalidate WRs for pagelist * 4. N FRWR invalidate WRs for pagelist
* 5. FRMR reg WR for tail * 5. FRWR reg WR for tail
* 6. FRMR invalidate WR for tail * 6. FRWR invalidate WR for tail
* 7. The RDMA_SEND WR * 7. The RDMA_SEND WR
*/ */
depth = 7; depth = 7;
/* Calculate N if the device max FRMR depth is smaller than /* Calculate N if the device max FRWR depth is smaller than
* RPCRDMA_MAX_DATA_SEGS. * RPCRDMA_MAX_DATA_SEGS.
*/ */
if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
do { do {
depth += 2; /* FRMR reg + invalidate */ depth += 2; /* FRWR reg + invalidate */
delta -= ia->ri_max_frmr_depth; delta -= ia->ri_max_frwr_depth;
} while (delta > 0); } while (delta > 0);
} }
...@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, ...@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
} }
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
ia->ri_max_frmr_depth); ia->ri_max_frwr_depth);
return 0; return 0;
} }
...@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) ...@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
} }
static void static void
...@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) ...@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
static void static void
frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct rpcrdma_frmr *frmr; struct ib_cqe *cqe = wc->wr_cqe;
struct ib_cqe *cqe; struct rpcrdma_frwr *frwr =
container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_SUCCESS) {
cqe = wc->wr_cqe; frwr->fr_state = FRWR_FLUSHED_FR;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
frmr->fr_state = FRMR_FLUSHED_FR;
__frwr_sendcompletion_flush(wc, "fastreg"); __frwr_sendcompletion_flush(wc, "fastreg");
} }
trace_xprtrdma_wc_fastreg(wc, frwr);
} }
/** /**
...@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) ...@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
static void static void
frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct rpcrdma_frmr *frmr; struct ib_cqe *cqe = wc->wr_cqe;
struct ib_cqe *cqe; struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_SUCCESS) {
cqe = wc->wr_cqe; frwr->fr_state = FRWR_FLUSHED_LI;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
frmr->fr_state = FRMR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv"); __frwr_sendcompletion_flush(wc, "localinv");
} }
trace_xprtrdma_wc_li(wc, frwr);
} }
/** /**
...@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) ...@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
static void static void
frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct rpcrdma_frmr *frmr; struct ib_cqe *cqe = wc->wr_cqe;
struct ib_cqe *cqe; struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
cqe = wc->wr_cqe;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_SUCCESS) {
frmr->fr_state = FRMR_FLUSHED_LI; frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv"); __frwr_sendcompletion_flush(wc, "localinv");
} }
complete(&frmr->fr_linv_done); complete(&frwr->fr_linv_done);
trace_xprtrdma_wc_li_wake(wc, frwr);
} }
/* Post a REG_MR Work Request to register a memory region /* Post a REG_MR Work Request to register a memory region
...@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) ...@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
*/ */
static struct rpcrdma_mr_seg * static struct rpcrdma_mr_seg *
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, struct rpcrdma_mw **out) int nsegs, bool writing, struct rpcrdma_mr **out)
{ {
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
struct rpcrdma_mw *mw; struct rpcrdma_frwr *frwr;
struct rpcrdma_frmr *frmr; struct rpcrdma_mr *mr;
struct ib_mr *mr; struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr; struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr; struct ib_send_wr *bad_wr;
int rc, i, n; int rc, i, n;
u8 key; u8 key;
mw = NULL; mr = NULL;
do { do {
if (mw) if (mr)
rpcrdma_defer_mr_recovery(mw); rpcrdma_mr_defer_recovery(mr);
mw = rpcrdma_get_mw(r_xprt); mr = rpcrdma_mr_get(r_xprt);
if (!mw) if (!mr)
return ERR_PTR(-ENOBUFS); return ERR_PTR(-ENOBUFS);
} while (mw->frmr.fr_state != FRMR_IS_INVALID); } while (mr->frwr.fr_state != FRWR_IS_INVALID);
frmr = &mw->frmr; frwr = &mr->frwr;
frmr->fr_state = FRMR_IS_VALID; frwr->fr_state = FRWR_IS_VALID;
mr = frmr->fr_mr;
reg_wr = &frmr->fr_regwr; if (nsegs > ia->ri_max_frwr_depth)
nsegs = ia->ri_max_frwr_depth;
if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth;
for (i = 0; i < nsegs;) { for (i = 0; i < nsegs;) {
if (seg->mr_page) if (seg->mr_page)
sg_set_page(&mw->mw_sg[i], sg_set_page(&mr->mr_sg[i],
seg->mr_page, seg->mr_page,
seg->mr_len, seg->mr_len,
offset_in_page(seg->mr_offset)); offset_in_page(seg->mr_offset));
else else
sg_set_buf(&mw->mw_sg[i], seg->mr_offset, sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len); seg->mr_len);
++seg; ++seg;
...@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break; break;
} }
mw->mw_dir = rpcrdma_data_dir(writing); mr->mr_dir = rpcrdma_data_dir(writing);
mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir); mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
if (!mw->mw_nents) if (!mr->mr_nents)
goto out_dmamap_err; goto out_dmamap_err;
n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); ibmr = frwr->fr_mr;
if (unlikely(n != mw->mw_nents)) n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
if (unlikely(n != mr->mr_nents))
goto out_mapmr_err; goto out_mapmr_err;
dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n", key = (u8)(ibmr->rkey & 0x000000FF);
__func__, frmr, mw->mw_nents, mr->length); ib_update_fast_reg_key(ibmr, ++key);
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
reg_wr = &frwr->fr_regwr;
reg_wr->wr.next = NULL; reg_wr->wr.next = NULL;
reg_wr->wr.opcode = IB_WR_REG_MR; reg_wr->wr.opcode = IB_WR_REG_MR;
frmr->fr_cqe.done = frwr_wc_fastreg; frwr->fr_cqe.done = frwr_wc_fastreg;
reg_wr->wr.wr_cqe = &frmr->fr_cqe; reg_wr->wr.wr_cqe = &frwr->fr_cqe;
reg_wr->wr.num_sge = 0; reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = 0; reg_wr->wr.send_flags = 0;
reg_wr->mr = mr; reg_wr->mr = ibmr;
reg_wr->key = mr->rkey; reg_wr->key = ibmr->rkey;
reg_wr->access = writing ? reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ; IB_ACCESS_REMOTE_READ;
...@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (rc) if (rc)
goto out_senderr; goto out_senderr;
mw->mw_handle = mr->rkey; mr->mr_handle = ibmr->rkey;
mw->mw_length = mr->length; mr->mr_length = ibmr->length;
mw->mw_offset = mr->iova; mr->mr_offset = ibmr->iova;
*out = mw; *out = mr;
return seg; return seg;
out_dmamap_err: out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i); mr->mr_sg, i);
frmr->fr_state = FRMR_IS_INVALID; frwr->fr_state = FRWR_IS_INVALID;
rpcrdma_put_mw(r_xprt, mw); rpcrdma_mr_put(mr);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
out_mapmr_err: out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
frmr->fr_mr, n, mw->mw_nents); frwr->fr_mr, n, mr->mr_nents);
rpcrdma_defer_mr_recovery(mw); rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
out_senderr: out_senderr:
pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
rpcrdma_defer_mr_recovery(mw); rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-ENOTCONN); return ERR_PTR(-ENOTCONN);
} }
/* Handle a remotely invalidated mr on the @mrs list
*/
static void
frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
{
struct rpcrdma_mr *mr;
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del(&mr->mr_list);
trace_xprtrdma_remoteinv(mr);
mr->frwr.fr_state = FRWR_IS_INVALID;
rpcrdma_mr_unmap_and_put(mr);
break; /* only one invalidated MR per RPC */
}
}
/* Invalidate all memory regions that were registered for "req". /* Invalidate all memory regions that were registered for "req".
* *
* Sleeps until it is safe for the host CPU to access the * Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions. * previously mapped memory regions.
* *
* Caller ensures that @mws is not empty before the call. This * Caller ensures that @mrs is not empty before the call. This
* function empties the list. * function empties the list.
*/ */
static void static void
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{ {
struct ib_send_wr *first, **prev, *last, *bad_wr; struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_frmr *f; struct rpcrdma_frwr *frwr;
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
int count, rc; int count, rc;
/* ORDER: Invalidate all of the MRs first /* ORDER: Invalidate all of the MRs first
...@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* Chain the LOCAL_INV Work Requests and post them with * Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call. * a single ib_post_send() call.
*/ */
f = NULL; frwr = NULL;
count = 0; count = 0;
prev = &first; prev = &first;
list_for_each_entry(mw, mws, mw_list) { list_for_each_entry(mr, mrs, mr_list) {
mw->frmr.fr_state = FRMR_IS_INVALID; mr->frwr.fr_state = FRWR_IS_INVALID;
if (mw->mw_flags & RPCRDMA_MW_F_RI) frwr = &mr->frwr;
continue; trace_xprtrdma_localinv(mr);
f = &mw->frmr; frwr->fr_cqe.done = frwr_wc_localinv;
dprintk("RPC: %s: invalidating frmr %p\n", last = &frwr->fr_invwr;
__func__, f);
f->fr_cqe.done = frwr_wc_localinv;
last = &f->fr_invwr;
memset(last, 0, sizeof(*last)); memset(last, 0, sizeof(*last));
last->wr_cqe = &f->fr_cqe; last->wr_cqe = &frwr->fr_cqe;
last->opcode = IB_WR_LOCAL_INV; last->opcode = IB_WR_LOCAL_INV;
last->ex.invalidate_rkey = mw->mw_handle; last->ex.invalidate_rkey = mr->mr_handle;
count++; count++;
*prev = last; *prev = last;
prev = &last->next; prev = &last->next;
} }
if (!f) if (!frwr)
goto unmap; goto unmap;
/* Strong send queue ordering guarantees that when the /* Strong send queue ordering guarantees that when the
...@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* are complete. * are complete.
*/ */
last->send_flags = IB_SEND_SIGNALED; last->send_flags = IB_SEND_SIGNALED;
f->fr_cqe.done = frwr_wc_localinv_wake; frwr->fr_cqe.done = frwr_wc_localinv_wake;
reinit_completion(&f->fr_linv_done); reinit_completion(&frwr->fr_linv_done);
/* Transport disconnect drains the receive CQ before it /* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us * replaces the QP. The RPC reply handler won't call us
...@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
bad_wr = NULL; bad_wr = NULL;
rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
if (bad_wr != first) if (bad_wr != first)
wait_for_completion(&f->fr_linv_done); wait_for_completion(&frwr->fr_linv_done);
if (rc) if (rc)
goto reset_mrs; goto reset_mrs;
/* ORDER: Now DMA unmap all of the MRs, and return /* ORDER: Now DMA unmap all of the MRs, and return
* them to the free MW list. * them to the free MR list.
*/ */
unmap: unmap:
while (!list_empty(mws)) { while (!list_empty(mrs)) {
mw = rpcrdma_pop_mw(mws); mr = rpcrdma_mr_pop(mrs);
dprintk("RPC: %s: DMA unmapping frmr %p\n", rpcrdma_mr_unmap_and_put(mr);
__func__, &mw->frmr);
ib_dma_unmap_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
} }
return; return;
reset_mrs: reset_mrs:
pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
/* Find and reset the MRs in the LOCAL_INV WRs that did not /* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted. * get posted.
*/ */
while (bad_wr) { while (bad_wr) {
f = container_of(bad_wr, struct rpcrdma_frmr, frwr = container_of(bad_wr, struct rpcrdma_frwr,
fr_invwr); fr_invwr);
mw = container_of(f, struct rpcrdma_mw, frmr); mr = container_of(frwr, struct rpcrdma_mr, frwr);
__frwr_reset_mr(ia, mw); __frwr_mr_reset(ia, mr);
bad_wr = bad_wr->next; bad_wr = bad_wr->next;
} }
...@@ -553,6 +561,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -553,6 +561,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map, .ro_map = frwr_op_map,
.ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync, .ro_unmap_sync = frwr_op_unmap_sync,
.ro_recover_mr = frwr_op_recover_mr, .ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open, .ro_open = frwr_op_open,
......
/* /*
* Copyright (c) 2015 Oracle. All rights reserved. * Copyright (c) 2015, 2017 Oracle. All rights reserved.
*/ */
/* rpcrdma.ko module initialization /* rpcrdma.ko module initialization
*/ */
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h> #include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #include <asm/swab.h>
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif #define CREATE_TRACE_POINTS
#include "xprt_rdma.h"
MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
MODULE_DESCRIPTION("RPC/RDMA Transport"); MODULE_DESCRIPTION("RPC/RDMA Transport");
......
...@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr) ...@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr)
} }
static void static void
xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
{ {
*iptr++ = cpu_to_be32(mw->mw_handle); *iptr++ = cpu_to_be32(mr->mr_handle);
*iptr++ = cpu_to_be32(mw->mw_length); *iptr++ = cpu_to_be32(mr->mr_length);
xdr_encode_hyper(iptr, mw->mw_offset); xdr_encode_hyper(iptr, mr->mr_offset);
} }
static int static int
encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
{ {
__be32 *p; __be32 *p;
...@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) ...@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
if (unlikely(!p)) if (unlikely(!p))
return -EMSGSIZE; return -EMSGSIZE;
xdr_encode_rdma_segment(p, mw); xdr_encode_rdma_segment(p, mr);
return 0; return 0;
} }
static int static int
encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
u32 position) u32 position)
{ {
__be32 *p; __be32 *p;
...@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, ...@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
*p++ = xdr_one; /* Item present */ *p++ = xdr_one; /* Item present */
*p++ = cpu_to_be32(position); *p++ = cpu_to_be32(position);
xdr_encode_rdma_segment(p, mw); xdr_encode_rdma_segment(p, mr);
return 0; return 0;
} }
...@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{ {
struct xdr_stream *xdr = &req->rl_stream; struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg; struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
unsigned int pos; unsigned int pos;
int nsegs; int nsegs;
...@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
do { do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
false, &mw); false, &mr);
if (IS_ERR(seg)) if (IS_ERR(seg))
return PTR_ERR(seg); return PTR_ERR(seg);
rpcrdma_push_mw(mw, &req->rl_registered); rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_read_segment(xdr, mw, pos) < 0) if (encode_read_segment(xdr, mr, pos) < 0)
return -EMSGSIZE; return -EMSGSIZE;
dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
rqst->rq_task->tk_pid, __func__, pos,
mw->mw_length, (unsigned long long)mw->mw_offset,
mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.read_chunk_count++; r_xprt->rx_stats.read_chunk_count++;
nsegs -= mw->mw_nents; nsegs -= mr->mr_nents;
} while (nsegs); } while (nsegs);
return 0; return 0;
...@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{ {
struct xdr_stream *xdr = &req->rl_stream; struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg; struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
int nsegs, nchunks; int nsegs, nchunks;
__be32 *segcount; __be32 *segcount;
...@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0; nchunks = 0;
do { do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mw); true, &mr);
if (IS_ERR(seg)) if (IS_ERR(seg))
return PTR_ERR(seg); return PTR_ERR(seg);
rpcrdma_push_mw(mw, &req->rl_registered); rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mw) < 0) if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE; return -EMSGSIZE;
dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
rqst->rq_task->tk_pid, __func__,
mw->mw_length, (unsigned long long)mw->mw_offset,
mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.write_chunk_count++; r_xprt->rx_stats.write_chunk_count++;
r_xprt->rx_stats.total_rdma_request += seg->mr_len; r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++; nchunks++;
nsegs -= mw->mw_nents; nsegs -= mr->mr_nents;
} while (nsegs); } while (nsegs);
/* Update count of segments in this Write chunk */ /* Update count of segments in this Write chunk */
...@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{ {
struct xdr_stream *xdr = &req->rl_stream; struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg; struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
int nsegs, nchunks; int nsegs, nchunks;
__be32 *segcount; __be32 *segcount;
...@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0; nchunks = 0;
do { do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mw); true, &mr);
if (IS_ERR(seg)) if (IS_ERR(seg))
return PTR_ERR(seg); return PTR_ERR(seg);
rpcrdma_push_mw(mw, &req->rl_registered); rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mw) < 0) if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE; return -EMSGSIZE;
dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
rqst->rq_task->tk_pid, __func__,
mw->mw_length, (unsigned long long)mw->mw_offset,
mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.reply_chunk_count++; r_xprt->rx_stats.reply_chunk_count++;
r_xprt->rx_stats.total_rdma_request += seg->mr_len; r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++; nchunks++;
nsegs -= mw->mw_nents; nsegs -= mr->mr_nents;
} while (nsegs); } while (nsegs);
/* Update count of segments in the Reply chunk */ /* Update count of segments in the Reply chunk */
...@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc) ...@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
struct ib_sge *sge; struct ib_sge *sge;
unsigned int count; unsigned int count;
dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
__func__, sc->sc_unmap_count, sc);
/* The first two SGEs contain the transport header and /* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so * the inline buffer. These are always left mapped so
* they can be cheaply re-used. * they can be cheaply re-used.
...@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) ...@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
__be32 *p; __be32 *p;
int ret; int ret;
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
return rpcrdma_bc_marshal_reply(rqst);
#endif
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf, xdr_init_encode(xdr, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base); req->rl_rdmabuf->rg_base);
...@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) ...@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
rtype = rpcrdma_areadch; rtype = rpcrdma_areadch;
} }
/* If this is a retransmit, discard previously registered
* chunks. Very likely the connection has been replaced,
* so these registrations are invalid and unusable.
*/
while (unlikely(!list_empty(&req->rl_registered))) {
struct rpcrdma_mr *mr;
mr = rpcrdma_mr_pop(&req->rl_registered);
rpcrdma_mr_defer_recovery(mr);
}
/* This implementation supports the following combinations /* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message: * of chunk lists in one RPC-over-RDMA Call message:
* *
...@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) ...@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
if (ret) if (ret)
goto out_err; goto out_err;
dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
rqst->rq_task->tk_pid, __func__,
transfertypes[rtype], transfertypes[wtype],
xdr_stream_pos(xdr));
ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
&rqst->rq_snd_buf, rtype); &rqst->rq_snd_buf, rtype);
...@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) ...@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
curlen = rqst->rq_rcv_buf.head[0].iov_len; curlen = rqst->rq_rcv_buf.head[0].iov_len;
if (curlen > copy_len) if (curlen > copy_len)
curlen = copy_len; curlen = copy_len;
dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", trace_xprtrdma_fixup(rqst, copy_len, curlen);
__func__, srcp, copy_len, curlen);
srcp += curlen; srcp += curlen;
copy_len -= curlen; copy_len -= curlen;
...@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) ...@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
if (curlen > pagelist_len) if (curlen > pagelist_len)
curlen = pagelist_len; curlen = pagelist_len;
dprintk("RPC: %s: page %d" trace_xprtrdma_fixup_pg(rqst, i, srcp,
" srcp 0x%p len %d curlen %d\n", copy_len, curlen);
__func__, i, srcp, copy_len, curlen);
destp = kmap_atomic(ppages[i]); destp = kmap_atomic(ppages[i]);
memcpy(destp + page_base, srcp, curlen); memcpy(destp + page_base, srcp, curlen);
flush_dcache_page(ppages[i]); flush_dcache_page(ppages[i]);
...@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) ...@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
return fixup_copy_count; return fixup_copy_count;
} }
/* Caller must guarantee @rep remains stable during this call.
*/
static void
rpcrdma_mark_remote_invalidation(struct list_head *mws,
struct rpcrdma_rep *rep)
{
struct rpcrdma_mw *mw;
if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
return;
list_for_each_entry(mw, mws, mw_list)
if (mw->mw_handle == rep->rr_inv_rkey) {
mw->mw_flags = RPCRDMA_MW_F_RI;
break; /* only one invalidated MR per RPC */
}
}
/* By convention, backchannel calls arrive via rdma_msg type /* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes * messages, and never populate the chunk lists. This makes
* the RPC/RDMA header small and fixed in size, so it is * the RPC/RDMA header small and fixed in size, so it is
...@@ -1058,26 +1026,19 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) ...@@ -1058,26 +1026,19 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
{ {
u32 handle;
u64 offset;
__be32 *p; __be32 *p;
p = xdr_inline_decode(xdr, 4 * sizeof(*p)); p = xdr_inline_decode(xdr, 4 * sizeof(*p));
if (unlikely(!p)) if (unlikely(!p))
return -EIO; return -EIO;
ifdebug(FACILITY) { handle = be32_to_cpup(p++);
u64 offset; *length = be32_to_cpup(p++);
u32 handle; xdr_decode_hyper(p, &offset);
handle = be32_to_cpup(p++);
*length = be32_to_cpup(p++);
xdr_decode_hyper(p, &offset);
dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n",
__func__, *length, (unsigned long long)offset,
handle);
} else {
*length = be32_to_cpup(p + 1);
}
trace_xprtrdma_decode_seg(handle, *length, offset);
return 0; return 0;
} }
...@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length) ...@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
*length += seglength; *length += seglength;
} }
dprintk("RPC: %s: segcount=%u, %u bytes\n",
__func__, be32_to_cpup(p), *length);
return 0; return 0;
} }
...@@ -1296,8 +1255,7 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep) ...@@ -1296,8 +1255,7 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
* being marshaled. * being marshaled.
*/ */
out_badheader: out_badheader:
dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", trace_xprtrdma_reply_hdr(rep);
rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
r_xprt->rx_stats.bad_reply_count++; r_xprt->rx_stats.bad_reply_count++;
status = -EIO; status = -EIO;
goto out; goto out;
...@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work) ...@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work)
struct rpcrdma_rep *rep = struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work); container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
rpcrdma_mark_remote_invalidation(&req->rl_registered, rep); trace_xprtrdma_defer_cmp(rep);
rpcrdma_release_rqst(rep->rr_rxprt, req); if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
rpcrdma_release_rqst(r_xprt, req);
rpcrdma_complete_rqst(rep); rpcrdma_complete_rqst(rep);
} }
...@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits; u32 credits;
__be32 *p; __be32 *p;
dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
if (rep->rr_hdrbuf.head[0].iov_len == 0) if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus; goto out_badstatus;
...@@ -1405,8 +1364,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -1405,8 +1364,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rep->rr_rqst = rqst; rep->rr_rqst = rqst;
clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
__func__, rep, req, be32_to_cpu(rep->rr_xid));
queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
return; return;
...@@ -1420,8 +1378,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -1420,8 +1378,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
return; return;
out_badversion: out_badversion:
dprintk("RPC: %s: invalid version %d\n", trace_xprtrdma_reply_vers(rep);
__func__, be32_to_cpu(rep->rr_vers));
goto repost; goto repost;
/* The RPC transaction has already been terminated, or the header /* The RPC transaction has already been terminated, or the header
...@@ -1429,12 +1386,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -1429,12 +1386,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
*/ */
out_norqst: out_norqst:
spin_unlock(&xprt->recv_lock); spin_unlock(&xprt->recv_lock);
dprintk("RPC: %s: no match for incoming xid 0x%08x\n", trace_xprtrdma_reply_rqst(rep);
__func__, be32_to_cpu(rep->rr_xid));
goto repost; goto repost;
out_shortreply: out_shortreply:
dprintk("RPC: %s: short/invalid reply\n", __func__); trace_xprtrdma_reply_short(rep);
/* If no pending RPC transaction was matched, post a replacement /* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning. * receive buffer before returning.
......
...@@ -67,8 +67,7 @@ ...@@ -67,8 +67,7 @@
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding; unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
int xprt_rdma_pad_optimize; int xprt_rdma_pad_optimize;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
...@@ -81,6 +80,7 @@ static unsigned int zero; ...@@ -81,6 +80,7 @@ static unsigned int zero;
static unsigned int max_padding = PAGE_SIZE; static unsigned int max_padding = PAGE_SIZE;
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
static unsigned int max_memreg = RPCRDMA_LAST - 1; static unsigned int max_memreg = RPCRDMA_LAST - 1;
static unsigned int dummy;
static struct ctl_table_header *sunrpc_table_header; static struct ctl_table_header *sunrpc_table_header;
...@@ -114,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = { ...@@ -114,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = {
}, },
{ {
.procname = "rdma_inline_write_padding", .procname = "rdma_inline_write_padding",
.data = &xprt_rdma_inline_write_padding, .data = &dummy,
.maxlen = sizeof(unsigned int), .maxlen = sizeof(unsigned int),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_minmax, .proc_handler = proc_dointvec_minmax,
...@@ -259,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work) ...@@ -259,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
xprt_clear_connected(xprt); xprt_clear_connected(xprt);
dprintk("RPC: %s: %sconnect\n", __func__,
r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
if (rc) if (rc)
xprt_wake_pending_tasks(xprt, rc); xprt_wake_pending_tasks(xprt, rc);
dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt); xprt_clear_connecting(xprt);
} }
...@@ -275,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) ...@@ -275,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
rx_xprt); rx_xprt);
pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); trace_xprtrdma_inject_dsc(r_xprt);
rdma_disconnect(r_xprt->rx_ia.ri_id); rdma_disconnect(r_xprt->rx_ia.ri_id);
} }
...@@ -295,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) ...@@ -295,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
{ {
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
dprintk("RPC: %s: called\n", __func__); trace_xprtrdma_destroy(r_xprt);
cancel_delayed_work_sync(&r_xprt->rx_connect_worker); cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
...@@ -306,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) ...@@ -306,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
rpcrdma_ia_close(&r_xprt->rx_ia); rpcrdma_ia_close(&r_xprt->rx_ia);
xprt_rdma_free_addresses(xprt); xprt_rdma_free_addresses(xprt);
xprt_free(xprt); xprt_free(xprt);
dprintk("RPC: %s: returning\n", __func__);
module_put(THIS_MODULE); module_put(THIS_MODULE);
} }
...@@ -361,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -361,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args)
/* /*
* Set up RDMA-specific connect data. * Set up RDMA-specific connect data.
*/ */
sap = args->dstaddr;
sap = (struct sockaddr *)&cdata.addr;
memcpy(sap, args->dstaddr, args->addrlen);
/* Ensure xprt->addr holds valid server TCP (not RDMA) /* Ensure xprt->addr holds valid server TCP (not RDMA)
* address, for any side protocols which peek at it */ * address, for any side protocols which peek at it */
...@@ -373,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -373,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args)
if (rpc_get_port(sap)) if (rpc_get_port(sap))
xprt_set_bound(xprt); xprt_set_bound(xprt);
xprt_rdma_format_addresses(xprt, sap);
cdata.max_requests = xprt->max_reqs; cdata.max_requests = xprt->max_reqs;
...@@ -387,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -387,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args)
if (cdata.inline_rsize > cdata.rsize) if (cdata.inline_rsize > cdata.rsize)
cdata.inline_rsize = cdata.rsize; cdata.inline_rsize = cdata.rsize;
cdata.padding = xprt_rdma_inline_write_padding;
/* /*
* Create new transport instance, which includes initialized * Create new transport instance, which includes initialized
* o ia * o ia
...@@ -398,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -398,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args)
new_xprt = rpcx_to_rdmax(xprt); new_xprt = rpcx_to_rdmax(xprt);
rc = rpcrdma_ia_open(new_xprt, sap); rc = rpcrdma_ia_open(new_xprt);
if (rc) if (rc)
goto out1; goto out1;
...@@ -407,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -407,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args)
*/ */
new_xprt->rx_data = cdata; new_xprt->rx_data = cdata;
new_ep = &new_xprt->rx_ep; new_ep = &new_xprt->rx_ep;
new_ep->rep_remote_addr = cdata.addr;
rc = rpcrdma_ep_create(&new_xprt->rx_ep, rc = rpcrdma_ep_create(&new_xprt->rx_ep,
&new_xprt->rx_ia, &new_xprt->rx_data); &new_xprt->rx_ia, &new_xprt->rx_data);
if (rc) if (rc)
goto out2; goto out2;
/*
* Allocate pre-registered send and receive buffers for headers and
* any inline data. Also specify any padding which will be provided
* from a preregistered zero buffer.
*/
rc = rpcrdma_buffer_create(new_xprt); rc = rpcrdma_buffer_create(new_xprt);
if (rc) if (rc)
goto out3; goto out3;
/*
* Register a callback for connection events. This is necessary because
* connection loss notification is async. We also catch connection loss
* when reaping receives.
*/
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
xprt_rdma_connect_worker); xprt_rdma_connect_worker);
xprt_rdma_format_addresses(xprt, sap);
xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
if (xprt->max_payload == 0) if (xprt->max_payload == 0)
goto out4; goto out4;
...@@ -445,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -445,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args)
dprintk("RPC: %s: %s:%s\n", __func__, dprintk("RPC: %s: %s:%s\n", __func__,
xprt->address_strings[RPC_DISPLAY_ADDR], xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PORT]); xprt->address_strings[RPC_DISPLAY_PORT]);
trace_xprtrdma_create(new_xprt);
return xprt; return xprt;
out4: out4:
xprt_rdma_free_addresses(xprt); rpcrdma_buffer_destroy(&new_xprt->rx_buf);
rc = -EINVAL; rc = -ENODEV;
out3: out3:
rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2: out2:
rpcrdma_ia_close(&new_xprt->rx_ia); rpcrdma_ia_close(&new_xprt->rx_ia);
out1: out1:
trace_xprtrdma_destroy(new_xprt);
xprt_rdma_free_addresses(xprt);
xprt_free(xprt); xprt_free(xprt);
return ERR_PTR(rc); return ERR_PTR(rc);
} }
...@@ -488,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt) ...@@ -488,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt)
rpcrdma_ep_disconnect(ep, ia); rpcrdma_ep_disconnect(ep, ia);
} }
/**
* xprt_rdma_set_port - update server port with rpcbind result
* @xprt: controlling RPC transport
* @port: new port value
*
* Transport connect status is unchanged.
*/
static void static void
xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
{ {
struct sockaddr_in *sap; struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
char buf[8];
sap = (struct sockaddr_in *)&xprt->addr; dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
sap->sin_port = htons(port); __func__, xprt,
sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; xprt->address_strings[RPC_DISPLAY_ADDR],
sap->sin_port = htons(port); xprt->address_strings[RPC_DISPLAY_PORT],
dprintk("RPC: %s: %u\n", __func__, port); port);
rpc_set_port(sap, port);
kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
snprintf(buf, sizeof(buf), "%u", port);
xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
snprintf(buf, sizeof(buf), "%4hx", port);
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
} }
/** /**
...@@ -516,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) ...@@ -516,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
static void static void
xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
{ {
dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
xprt_force_disconnect(xprt); xprt_force_disconnect(xprt);
} }
...@@ -640,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task) ...@@ -640,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task)
req = rpcrdma_buffer_get(&r_xprt->rx_buf); req = rpcrdma_buffer_get(&r_xprt->rx_buf);
if (req == NULL) if (req == NULL)
return -ENOMEM; goto out_get;
flags = RPCRDMA_DEF_GFP; flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task)) if (RPC_IS_SWAPPER(task))
...@@ -653,19 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task) ...@@ -653,19 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task)
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail; goto out_fail;
dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n",
task->tk_pid, __func__, rqst->rq_callsize,
rqst->rq_rcvsize, req);
req->rl_cpu = smp_processor_id(); req->rl_cpu = smp_processor_id();
req->rl_connect_cookie = 0; /* our reserved value */ req->rl_connect_cookie = 0; /* our reserved value */
rpcrdma_set_xprtdata(rqst, req); rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base; rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base; rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
trace_xprtrdma_allocate(task, req);
return 0; return 0;
out_fail: out_fail:
rpcrdma_buffer_put(req); rpcrdma_buffer_put(req);
out_get:
trace_xprtrdma_allocate(task, NULL);
return -ENOMEM; return -ENOMEM;
} }
...@@ -682,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task) ...@@ -682,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
return;
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
rpcrdma_release_rqst(r_xprt, req); rpcrdma_release_rqst(r_xprt, req);
trace_xprtrdma_rpc_done(task, req);
rpcrdma_buffer_put(req); rpcrdma_buffer_put(req);
} }
...@@ -698,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task) ...@@ -698,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task)
* *
* Caller holds the transport's write lock. * Caller holds the transport's write lock.
* *
* Return values: * Returns:
* 0: The request has been sent * %0 if the RPC message has been sent
* ENOTCONN: Caller needs to invoke connect logic then call again * %-ENOTCONN if the caller should reconnect and call again
* ENOBUFS: Call again later to send the request * %-ENOBUFS if the caller should call again later
* EIO: A permanent error occurred. The request was not sent, * %-EIO if a permanent error occurred and the request was not
* and don't try it again * sent. Do not try to send this message again.
*
* send_request invokes the meat of RPC RDMA. It must do the following:
*
* 1. Marshal the RPC request into an RPC RDMA request, which means
* putting a header in front of data, and creating IOVs for RDMA
* from those in the request.
* 2. In marshaling, detect opportunities for RDMA, and use them.
* 3. Post a recv message to set up asynch completion, then send
* the request (rpcrdma_ep_post).
* 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
*/ */
static int static int
xprt_rdma_send_request(struct rpc_task *task) xprt_rdma_send_request(struct rpc_task *task)
...@@ -724,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task) ...@@ -724,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0; int rc = 0;
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
if (unlikely(!rqst->rq_buffer))
return xprt_rdma_bc_send_reply(rqst);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
if (!xprt_connected(xprt)) if (!xprt_connected(xprt))
goto drop_connection; goto drop_connection;
/* On retransmit, remove any previously registered chunks */
if (unlikely(!list_empty(&req->rl_registered)))
r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
&req->rl_registered);
rc = rpcrdma_marshal_req(r_xprt, rqst); rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0) if (rc < 0)
goto failed_marshal; goto failed_marshal;
...@@ -744,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task) ...@@ -744,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task)
goto drop_connection; goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie; req->rl_connect_cookie = xprt->connect_cookie;
set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection; goto drop_connection;
...@@ -904,8 +887,7 @@ int xprt_rdma_init(void) ...@@ -904,8 +887,7 @@ int xprt_rdma_init(void)
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
xprt_rdma_slot_table_entries, xprt_rdma_slot_table_entries,
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
dprintk("\tPadding %d\n\tMemreg %d\n", dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (!sunrpc_table_header) if (!sunrpc_table_header)
......
此差异已折叠。
...@@ -73,11 +73,10 @@ struct rpcrdma_ia { ...@@ -73,11 +73,10 @@ struct rpcrdma_ia {
struct completion ri_remove_done; struct completion ri_remove_done;
int ri_async_rc; int ri_async_rc;
unsigned int ri_max_segs; unsigned int ri_max_segs;
unsigned int ri_max_frmr_depth; unsigned int ri_max_frwr_depth;
unsigned int ri_max_inline_write; unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read; unsigned int ri_max_inline_read;
unsigned int ri_max_send_sges; unsigned int ri_max_send_sges;
bool ri_reminv_expected;
bool ri_implicit_roundup; bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype; enum ib_mr_type ri_mrtype;
unsigned long ri_flags; unsigned long ri_flags;
...@@ -101,7 +100,6 @@ struct rpcrdma_ep { ...@@ -101,7 +100,6 @@ struct rpcrdma_ep {
wait_queue_head_t rep_connect_wait; wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private; struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma; struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker; struct delayed_work rep_connect_worker;
}; };
...@@ -232,29 +230,29 @@ enum { ...@@ -232,29 +230,29 @@ enum {
}; };
/* /*
* struct rpcrdma_mw - external memory region metadata * struct rpcrdma_mr - external memory region metadata
* *
* An external memory region is any buffer or page that is registered * An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered). * on the fly (ie, not pre-registered).
* *
* Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
* call_allocate, rpcrdma_buffer_get() assigns one to each segment in * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
* an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
* track of registration metadata while each RPC is pending. * track of registration metadata while each RPC is pending.
* rpcrdma_deregister_external() uses this metadata to unmap and * rpcrdma_deregister_external() uses this metadata to unmap and
* release these resources when an RPC is complete. * release these resources when an RPC is complete.
*/ */
enum rpcrdma_frmr_state { enum rpcrdma_frwr_state {
FRMR_IS_INVALID, /* ready to be used */ FRWR_IS_INVALID, /* ready to be used */
FRMR_IS_VALID, /* in use */ FRWR_IS_VALID, /* in use */
FRMR_FLUSHED_FR, /* flushed FASTREG WR */ FRWR_FLUSHED_FR, /* flushed FASTREG WR */
FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
}; };
struct rpcrdma_frmr { struct rpcrdma_frwr {
struct ib_mr *fr_mr; struct ib_mr *fr_mr;
struct ib_cqe fr_cqe; struct ib_cqe fr_cqe;
enum rpcrdma_frmr_state fr_state; enum rpcrdma_frwr_state fr_state;
struct completion fr_linv_done; struct completion fr_linv_done;
union { union {
struct ib_reg_wr fr_regwr; struct ib_reg_wr fr_regwr;
...@@ -267,26 +265,20 @@ struct rpcrdma_fmr { ...@@ -267,26 +265,20 @@ struct rpcrdma_fmr {
u64 *fm_physaddrs; u64 *fm_physaddrs;
}; };
struct rpcrdma_mw { struct rpcrdma_mr {
struct list_head mw_list; struct list_head mr_list;
struct scatterlist *mw_sg; struct scatterlist *mr_sg;
int mw_nents; int mr_nents;
enum dma_data_direction mw_dir; enum dma_data_direction mr_dir;
unsigned long mw_flags;
union { union {
struct rpcrdma_fmr fmr; struct rpcrdma_fmr fmr;
struct rpcrdma_frmr frmr; struct rpcrdma_frwr frwr;
}; };
struct rpcrdma_xprt *mw_xprt; struct rpcrdma_xprt *mr_xprt;
u32 mw_handle; u32 mr_handle;
u32 mw_length; u32 mr_length;
u64 mw_offset; u64 mr_offset;
struct list_head mw_all; struct list_head mr_all;
};
/* mw_flags */
enum {
RPCRDMA_MW_F_RI = 1,
}; };
/* /*
...@@ -362,8 +354,7 @@ struct rpcrdma_req { ...@@ -362,8 +354,7 @@ struct rpcrdma_req {
/* rl_flags */ /* rl_flags */
enum { enum {
RPCRDMA_REQ_F_BACKCHANNEL = 0, RPCRDMA_REQ_F_PENDING = 0,
RPCRDMA_REQ_F_PENDING,
RPCRDMA_REQ_F_TX_RESOURCES, RPCRDMA_REQ_F_TX_RESOURCES,
}; };
...@@ -374,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) ...@@ -374,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
} }
static inline struct rpcrdma_req * static inline struct rpcrdma_req *
rpcr_to_rdmar(struct rpc_rqst *rqst) rpcr_to_rdmar(const struct rpc_rqst *rqst)
{ {
return rqst->rq_xprtdata; return rqst->rq_xprtdata;
} }
static inline void static inline void
rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
{ {
list_add_tail(&mw->mw_list, list); list_add_tail(&mr->mr_list, list);
} }
static inline struct rpcrdma_mw * static inline struct rpcrdma_mr *
rpcrdma_pop_mw(struct list_head *list) rpcrdma_mr_pop(struct list_head *list)
{ {
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
mw = list_first_entry(list, struct rpcrdma_mw, mw_list); mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
list_del(&mw->mw_list); list_del(&mr->mr_list);
return mw; return mr;
} }
/* /*
...@@ -402,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list) ...@@ -402,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list)
* One of these is associated with a transport instance * One of these is associated with a transport instance
*/ */
struct rpcrdma_buffer { struct rpcrdma_buffer {
spinlock_t rb_mwlock; /* protect rb_mws list */ spinlock_t rb_mrlock; /* protect rb_mrs list */
struct list_head rb_mws; struct list_head rb_mrs;
struct list_head rb_all; struct list_head rb_all;
unsigned long rb_sc_head; unsigned long rb_sc_head;
...@@ -438,13 +429,11 @@ struct rpcrdma_buffer { ...@@ -438,13 +429,11 @@ struct rpcrdma_buffer {
* This data should be set with mount options * This data should be set with mount options
*/ */
struct rpcrdma_create_data_internal { struct rpcrdma_create_data_internal {
struct sockaddr_storage addr; /* RDMA server address */
unsigned int max_requests; /* max requests (slots) in flight */ unsigned int max_requests; /* max requests (slots) in flight */
unsigned int rsize; /* mount rsize - max read hdr+data */ unsigned int rsize; /* mount rsize - max read hdr+data */
unsigned int wsize; /* mount wsize - max write hdr+data */ unsigned int wsize; /* mount wsize - max write hdr+data */
unsigned int inline_rsize; /* max non-rdma read data payload */ unsigned int inline_rsize; /* max non-rdma read data payload */
unsigned int inline_wsize; /* max non-rdma write data payload */ unsigned int inline_wsize; /* max non-rdma write data payload */
unsigned int padding; /* non-rdma write header padding */
}; };
/* /*
...@@ -484,17 +473,19 @@ struct rpcrdma_memreg_ops { ...@@ -484,17 +473,19 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mr_seg * struct rpcrdma_mr_seg *
(*ro_map)(struct rpcrdma_xprt *, (*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool, struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mw **); struct rpcrdma_mr **);
void (*ro_reminv)(struct rpcrdma_rep *rep,
struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *, void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *); struct list_head *);
void (*ro_recover_mr)(struct rpcrdma_mw *); void (*ro_recover_mr)(struct rpcrdma_mr *mr);
int (*ro_open)(struct rpcrdma_ia *, int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *, struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *); struct rpcrdma_create_data_internal *);
size_t (*ro_maxpages)(struct rpcrdma_xprt *); size_t (*ro_maxpages)(struct rpcrdma_xprt *);
int (*ro_init_mr)(struct rpcrdma_ia *, int (*ro_init_mr)(struct rpcrdma_ia *,
struct rpcrdma_mw *); struct rpcrdma_mr *);
void (*ro_release_mr)(struct rpcrdma_mw *); void (*ro_release_mr)(struct rpcrdma_mr *mr);
const char *ro_displayname; const char *ro_displayname;
const int ro_send_w_inv_ok; const int ro_send_w_inv_ok;
}; };
...@@ -525,6 +516,18 @@ struct rpcrdma_xprt { ...@@ -525,6 +516,18 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
static inline const char *
rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
{
return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
}
static inline const char *
rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
{
return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
}
/* Setting this to 0 ensures interoperability with early servers. /* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance. * Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
...@@ -538,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy; ...@@ -538,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
/* /*
* Interface Adapter calls - xprtrdma/verbs.c * Interface Adapter calls - xprtrdma/verbs.c
*/ */
int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
void rpcrdma_ia_remove(struct rpcrdma_ia *ia); void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *); void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *); bool frwr_is_supported(struct rpcrdma_ia *);
...@@ -564,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); ...@@ -564,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
* Buffer calls - xprtrdma/verbs.c * Buffer calls - xprtrdma/verbs.c
*/ */
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *); void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
int rpcrdma_buffer_create(struct rpcrdma_xprt *); int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); void rpcrdma_mr_put(struct rpcrdma_mr *mr);
void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *); void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *); void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
gfp_t); gfp_t);
bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
...@@ -663,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *); ...@@ -663,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
int rpcrdma_bc_marshal_reply(struct rpc_rqst *); int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *); void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */
...@@ -671,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); ...@@ -671,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
extern struct xprt_class xprt_rdma_bc; extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
#include <trace/events/rpcrdma.h>
...@@ -52,6 +52,8 @@ ...@@ -52,6 +52,8 @@
#include "sunrpc.h" #include "sunrpc.h"
#define RPC_TCP_READ_CHUNK_SZ (3*512*1024)
static void xs_close(struct rpc_xprt *xprt); static void xs_close(struct rpc_xprt *xprt);
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock); struct socket *sock);
...@@ -1003,6 +1005,7 @@ static void xs_local_data_receive(struct sock_xprt *transport) ...@@ -1003,6 +1005,7 @@ static void xs_local_data_receive(struct sock_xprt *transport)
struct sock *sk; struct sock *sk;
int err; int err;
restart:
mutex_lock(&transport->recv_mutex); mutex_lock(&transport->recv_mutex);
sk = transport->inet; sk = transport->inet;
if (sk == NULL) if (sk == NULL)
...@@ -1016,6 +1019,11 @@ static void xs_local_data_receive(struct sock_xprt *transport) ...@@ -1016,6 +1019,11 @@ static void xs_local_data_receive(struct sock_xprt *transport)
} }
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
break; break;
if (need_resched()) {
mutex_unlock(&transport->recv_mutex);
cond_resched();
goto restart;
}
} }
out: out:
mutex_unlock(&transport->recv_mutex); mutex_unlock(&transport->recv_mutex);
...@@ -1094,6 +1102,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) ...@@ -1094,6 +1102,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
struct sock *sk; struct sock *sk;
int err; int err;
restart:
mutex_lock(&transport->recv_mutex); mutex_lock(&transport->recv_mutex);
sk = transport->inet; sk = transport->inet;
if (sk == NULL) if (sk == NULL)
...@@ -1107,6 +1116,11 @@ static void xs_udp_data_receive(struct sock_xprt *transport) ...@@ -1107,6 +1116,11 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
} }
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
break; break;
if (need_resched()) {
mutex_unlock(&transport->recv_mutex);
cond_resched();
goto restart;
}
} }
out: out:
mutex_unlock(&transport->recv_mutex); mutex_unlock(&transport->recv_mutex);
...@@ -1479,6 +1493,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns ...@@ -1479,6 +1493,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
.offset = offset, .offset = offset,
.count = len, .count = len,
}; };
size_t ret;
dprintk("RPC: xs_tcp_data_recv started\n"); dprintk("RPC: xs_tcp_data_recv started\n");
do { do {
...@@ -1507,9 +1522,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns ...@@ -1507,9 +1522,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
/* Skip over any trailing bytes on short reads */ /* Skip over any trailing bytes on short reads */
xs_tcp_read_discard(transport, &desc); xs_tcp_read_discard(transport, &desc);
} while (desc.count); } while (desc.count);
ret = len - desc.count;
if (ret < rd_desc->count)
rd_desc->count -= ret;
else
rd_desc->count = 0;
trace_xs_tcp_data_recv(transport); trace_xs_tcp_data_recv(transport);
dprintk("RPC: xs_tcp_data_recv done\n"); dprintk("RPC: xs_tcp_data_recv done\n");
return len - desc.count; return ret;
} }
static void xs_tcp_data_receive(struct sock_xprt *transport) static void xs_tcp_data_receive(struct sock_xprt *transport)
...@@ -1517,30 +1537,34 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) ...@@ -1517,30 +1537,34 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
struct rpc_xprt *xprt = &transport->xprt; struct rpc_xprt *xprt = &transport->xprt;
struct sock *sk; struct sock *sk;
read_descriptor_t rd_desc = { read_descriptor_t rd_desc = {
.count = 2*1024*1024,
.arg.data = xprt, .arg.data = xprt,
}; };
unsigned long total = 0; unsigned long total = 0;
int loop;
int read = 0; int read = 0;
restart:
mutex_lock(&transport->recv_mutex); mutex_lock(&transport->recv_mutex);
sk = transport->inet; sk = transport->inet;
if (sk == NULL) if (sk == NULL)
goto out; goto out;
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
for (loop = 0; loop < 64; loop++) { for (;;) {
rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
lock_sock(sk); lock_sock(sk);
read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
if (read <= 0) { if (rd_desc.count != 0 || read < 0) {
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
release_sock(sk); release_sock(sk);
break; break;
} }
release_sock(sk); release_sock(sk);
total += read; total += read;
rd_desc.count = 65536; if (need_resched()) {
mutex_unlock(&transport->recv_mutex);
cond_resched();
goto restart;
}
} }
if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
queue_work(xprtiod_workqueue, &transport->recv_worker); queue_work(xprtiod_workqueue, &transport->recv_worker);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册