diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7cbdf1b2e4abd7286b6033c66a9d999e735737c9..58277859a467d878cf5cd5d947d7361f8b3396cf 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -546,6 +546,102 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, return status; } +#ifdef CONFIG_NFSD_PNFS +/* + * CB_LAYOUTRECALL4args + * + * struct layoutrecall_file4 { + * nfs_fh4 lor_fh; + * offset4 lor_offset; + * length4 lor_length; + * stateid4 lor_stateid; + * }; + * + * union layoutrecall4 switch(layoutrecall_type4 lor_recalltype) { + * case LAYOUTRECALL4_FILE: + * layoutrecall_file4 lor_layout; + * case LAYOUTRECALL4_FSID: + * fsid4 lor_fsid; + * case LAYOUTRECALL4_ALL: + * void; + * }; + * + * struct CB_LAYOUTRECALL4args { + * layouttype4 clora_type; + * layoutiomode4 clora_iomode; + * bool clora_changed; + * layoutrecall4 clora_recall; + * }; + */ +static void encode_cb_layout4args(struct xdr_stream *xdr, + const struct nfs4_layout_stateid *ls, + struct nfs4_cb_compound_hdr *hdr) +{ + __be32 *p; + + BUG_ON(hdr->minorversion == 0); + + p = xdr_reserve_space(xdr, 5 * 4); + *p++ = cpu_to_be32(OP_CB_LAYOUTRECALL); + *p++ = cpu_to_be32(ls->ls_layout_type); + *p++ = cpu_to_be32(IOMODE_ANY); + *p++ = cpu_to_be32(1); + *p = cpu_to_be32(RETURN_FILE); + + encode_nfs_fh4(xdr, &ls->ls_stid.sc_file->fi_fhandle); + + p = xdr_reserve_space(xdr, 2 * 8); + p = xdr_encode_hyper(p, 0); + xdr_encode_hyper(p, NFS4_MAX_UINT64); + + encode_stateid4(xdr, &ls->ls_recall_sid); + + hdr->nops++; +} + +static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfsd4_callback *cb) +{ + const struct nfs4_layout_stateid *ls = + container_of(cb, struct nfs4_layout_stateid, ls_recall); + struct nfs4_cb_compound_hdr hdr = { + .ident = 0, + .minorversion = cb->cb_minorversion, + }; + + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_layout4args(xdr, ls, &hdr); + encode_cb_nops(&hdr); +} + +static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfsd4_callback *cb) +{ + struct nfs4_cb_compound_hdr hdr; + enum nfsstat4 nfserr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + goto out; + if (cb) { + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status)) + goto out; + } + status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr); + if (unlikely(status)) + goto out; + if (unlikely(nfserr != NFS4_OK)) + status = nfs_cb_stat_to_errno(nfserr); +out: + return status; +} +#endif /* CONFIG_NFSD_PNFS */ + /* * RPC procedure tables */ @@ -563,6 +659,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, static struct rpc_procinfo nfs4_cb_procedures[] = { PROC(CB_NULL, NULL, cb_null, cb_null), PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), +#ifdef CONFIG_NFSD_PNFS + PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout), +#endif }; static struct rpc_version nfs_cb_version4 = { diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 8273270418b1b33cc78647aee4c02c651387b1d5..d926865df94f9f80f0855ace69d7b7499fc05b42 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -1,8 +1,11 @@ /* * Copyright (c) 2014 Christoph Hellwig. */ +#include +#include #include #include +#include #include "pnfs.h" #include "netns.h" @@ -18,6 +21,9 @@ struct nfs4_layout { static struct kmem_cache *nfs4_layout_cache; static struct kmem_cache *nfs4_layout_stateid_cache; +static struct nfsd4_callback_ops nfsd4_cb_layout_ops; +static const struct lock_manager_operations nfsd4_layouts_lm_ops; + const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { }; @@ -127,9 +133,42 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid) list_del_init(&ls->ls_perfile); spin_unlock(&fp->fi_lock); + vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls); + fput(ls->ls_file); + + if (ls->ls_recalled) + atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls); + kmem_cache_free(nfs4_layout_stateid_cache, ls); } +static int +nfsd4_layout_setlease(struct nfs4_layout_stateid *ls) +{ + struct file_lock *fl; + int status; + + fl = locks_alloc_lock(); + if (!fl) + return -ENOMEM; + locks_init_lock(fl); + fl->fl_lmops = &nfsd4_layouts_lm_ops; + fl->fl_flags = FL_LAYOUT; + fl->fl_type = F_RDLCK; + fl->fl_end = OFFSET_MAX; + fl->fl_owner = ls; + fl->fl_pid = current->tgid; + fl->fl_file = ls->ls_file; + + status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL); + if (status) { + locks_free_lock(fl); + return status; + } + BUG_ON(fl != NULL); + return 0; +} + static struct nfs4_layout_stateid * nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid *parent, u32 layout_type) @@ -152,6 +191,20 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, spin_lock_init(&ls->ls_lock); INIT_LIST_HEAD(&ls->ls_layouts); ls->ls_layout_type = layout_type; + nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops, + NFSPROC4_CLNT_CB_LAYOUT); + + if (parent->sc_type == NFS4_DELEG_STID) + ls->ls_file = get_file(fp->fi_deleg_file); + else + ls->ls_file = find_any_file(fp); + BUG_ON(!ls->ls_file); + + if (nfsd4_layout_setlease(ls)) { + put_nfs4_file(fp); + kmem_cache_free(nfs4_layout_stateid_cache, ls); + return NULL; + } spin_lock(&clp->cl_lock); stp->sc_type = NFS4_LAYOUT_STID; @@ -215,6 +268,27 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, return status; } +static void +nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) +{ + spin_lock(&ls->ls_lock); + if (ls->ls_recalled) + goto out_unlock; + + ls->ls_recalled = true; + atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls); + if (list_empty(&ls->ls_layouts)) + goto out_unlock; + + atomic_inc(&ls->ls_stid.sc_count); + update_stateid(&ls->ls_stid.sc_stateid); + memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); + nfsd4_run_cb(&ls->ls_recall); + +out_unlock: + spin_unlock(&ls->ls_lock); +} + static inline u64 layout_end(struct nfsd4_layout_seg *seg) { @@ -258,18 +332,44 @@ layouts_try_merge(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *new) return true; } +static __be32 +nfsd4_recall_conflict(struct nfs4_layout_stateid *ls) +{ + struct nfs4_file *fp = ls->ls_stid.sc_file; + struct nfs4_layout_stateid *l, *n; + __be32 nfserr = nfs_ok; + + assert_spin_locked(&fp->fi_lock); + + list_for_each_entry_safe(l, n, &fp->fi_lo_states, ls_perfile) { + if (l != ls) { + nfsd4_recall_file_layout(l); + nfserr = nfserr_recallconflict; + } + } + + return nfserr; +} + __be32 nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) { struct nfsd4_layout_seg *seg = &lgp->lg_seg; + struct nfs4_file *fp = ls->ls_stid.sc_file; struct nfs4_layout *lp, *new = NULL; + __be32 nfserr; + spin_lock(&fp->fi_lock); + nfserr = nfsd4_recall_conflict(ls); + if (nfserr) + goto out; spin_lock(&ls->ls_lock); list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) { if (layouts_try_merge(&lp->lo_seg, seg)) goto done; } spin_unlock(&ls->ls_lock); + spin_unlock(&fp->fi_lock); new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL); if (!new) @@ -277,6 +377,10 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg)); new->lo_state = ls; + spin_lock(&fp->fi_lock); + nfserr = nfsd4_recall_conflict(ls); + if (nfserr) + goto out; spin_lock(&ls->ls_lock); list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) { if (layouts_try_merge(&lp->lo_seg, seg)) @@ -290,9 +394,11 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) update_stateid(&ls->ls_stid.sc_stateid); memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); spin_unlock(&ls->ls_lock); +out: + spin_unlock(&fp->fi_lock); if (new) kmem_cache_free(nfs4_layout_cache, new); - return nfs_ok; + return nfserr; } static void @@ -448,6 +554,112 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp) nfsd4_free_layouts(&reaplist); } +static void +nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) +{ + struct nfs4_client *clp = ls->ls_stid.sc_client; + char addr_str[INET6_ADDRSTRLEN]; + static char *envp[] = { + "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL + }; + char *argv[8]; + int error; + + rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); + + printk(KERN_WARNING + "nfsd: client %s failed to respond to layout recall. " + " Fencing..\n", addr_str); + + argv[0] = "/sbin/nfsd-recall-failed"; + argv[1] = addr_str; + argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id; + argv[3] = NULL; + + error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + if (error) { + printk(KERN_ERR "nfsd: fence failed for client %s: %d!\n", + addr_str, error); + } +} + +static int +nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) +{ + struct nfs4_layout_stateid *ls = + container_of(cb, struct nfs4_layout_stateid, ls_recall); + LIST_HEAD(reaplist); + + switch (task->tk_status) { + case 0: + return 1; + case -NFS4ERR_NOMATCHING_LAYOUT: + task->tk_status = 0; + return 1; + case -NFS4ERR_DELAY: + /* Poll the client until it's done with the layout */ + /* FIXME: cap number of retries. + * The pnfs standard states that we need to only expire + * the client after at-least "lease time" .eg lease-time * 2 + * when failing to communicate a recall + */ + rpc_delay(task, HZ/100); /* 10 mili-seconds */ + return 0; + default: + /* + * Unknown error or non-responding client, we'll need to fence. + */ + nfsd4_cb_layout_fail(ls); + return -1; + } +} + +static void +nfsd4_cb_layout_release(struct nfsd4_callback *cb) +{ + struct nfs4_layout_stateid *ls = + container_of(cb, struct nfs4_layout_stateid, ls_recall); + LIST_HEAD(reaplist); + + nfsd4_return_all_layouts(ls, &reaplist); + nfsd4_free_layouts(&reaplist); + nfs4_put_stid(&ls->ls_stid); +} + +static struct nfsd4_callback_ops nfsd4_cb_layout_ops = { + .done = nfsd4_cb_layout_done, + .release = nfsd4_cb_layout_release, +}; + +static bool +nfsd4_layout_lm_break(struct file_lock *fl) +{ + /* + * We don't want the locks code to timeout the lease for us; + * we'll remove it ourself if a layout isn't returned + * in time: + */ + fl->fl_break_time = 0; + nfsd4_recall_file_layout(fl->fl_owner); + return false; +} + +static int +nfsd4_layout_lm_change(struct file_lock *onlist, int arg, + struct list_head *dispose) +{ + BUG_ON(!(arg & F_UNLCK)); + return lease_modify(onlist, arg, dispose); +} + +static const struct lock_manager_operations nfsd4_layouts_lm_ops = { + .lm_break = nfsd4_layout_lm_break, + .lm_change = nfsd4_layout_lm_change, +}; + int nfsd4_init_pnfs(void) { diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2b91443497cc7168901a3505d1c5dabe926e3115..fa14359eb956519dbdb528c0c53912d16d1b03c6 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1301,6 +1301,10 @@ nfsd4_layoutget(struct svc_rqst *rqstp, if (nfserr) goto out; + nfserr = nfserr_recallconflict; + if (atomic_read(&ls->ls_stid.sc_file->fi_lo_recalls)) + goto out_put_stid; + nfserr = ops->proc_layoutget(current_fh->fh_dentry->d_inode, current_fh, lgp); if (nfserr) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c89f79dc69e2afc35b95a1ac7617fd3a21988c7a..f6b2a09f793f453e5b6c40e80ceb383a12af7616 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3065,6 +3065,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, memset(fp->fi_access, 0, sizeof(fp->fi_access)); #ifdef CONFIG_NFSD_PNFS INIT_LIST_HEAD(&fp->fi_lo_states); + atomic_set(&fp->fi_lo_recalls, 0); #endif hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 5f66b7fd0297a8ca3604547e934d6380194bdcfd..4f3bfeb1176662ce5c7eb6f59f9440c95c8cf68e 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -502,6 +502,7 @@ struct nfs4_file { bool fi_had_conflict; #ifdef CONFIG_NFSD_PNFS struct list_head fi_lo_states; + atomic_t fi_lo_recalls; #endif }; @@ -542,6 +543,10 @@ struct nfs4_layout_stateid { spinlock_t ls_lock; struct list_head ls_layouts; u32 ls_layout_type; + struct file *ls_file; + struct nfsd4_callback ls_recall; + stateid_t ls_recall_sid; + bool ls_recalled; }; static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s) @@ -556,6 +561,7 @@ static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s) enum nfsd4_cb_op { NFSPROC4_CLNT_CB_NULL = 0, NFSPROC4_CLNT_CB_RECALL, + NFSPROC4_CLNT_CB_LAYOUT, NFSPROC4_CLNT_CB_SEQUENCE, }; diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index c5c55dfb91a92d19b34cd62e5ad377e668b0ab0e..c47f6fdb111aafbd22112efd3e8bcaedd51aa2b1 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -21,3 +21,10 @@ #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) +#define NFS4_enc_cb_layout_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + 3 + \ + enc_nfs4_fh_sz + 4) +#define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz)