提交 43f1b3da 编写于 作者: F Fred Isaman 提交者: Trond Myklebust

pnfs: add CB_LAYOUTRECALL handling

This is the heart of the wave 2 submission.  Add the code to trigger
drain and forget of any afected layouts.  In addition, we set a
"barrier", below which any LAYOUTGET reply is ignored.  This is to
compensate for the fact that we do not wait for outstanding LAYOUTGETs
to complete as per section 12.5.5.2.1 of RFC 5661.
Signed-off-by: NFred Isaman <iisaman@netapp.com>
Signed-off-by: NTrond Myklebust <Trond.Myklebust@netapp.com>
上级 f2a62561
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "callback.h" #include "callback.h"
#include "delegation.h" #include "delegation.h"
#include "internal.h" #include "internal.h"
#include "pnfs.h"
#ifdef NFS_DEBUG #ifdef NFS_DEBUG
#define NFSDBG_FACILITY NFSDBG_CALLBACK #define NFSDBG_FACILITY NFSDBG_CALLBACK
...@@ -107,10 +108,126 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf ...@@ -107,10 +108,126 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
#if defined(CONFIG_NFS_V4_1) #if defined(CONFIG_NFS_V4_1)
static u32 initiate_file_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
struct pnfs_layout_hdr *lo;
struct inode *ino;
bool found = false;
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
LIST_HEAD(free_me_list);
spin_lock(&clp->cl_lock);
list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
if (nfs_compare_fh(&args->cbl_fh,
&NFS_I(lo->plh_inode)->fh))
continue;
ino = igrab(lo->plh_inode);
if (!ino)
continue;
found = true;
/* Without this, layout can be freed as soon
* as we release cl_lock.
*/
get_layout_hdr(lo);
break;
}
spin_unlock(&clp->cl_lock);
if (!found)
return NFS4ERR_NOMATCHING_LAYOUT;
spin_lock(&ino->i_lock);
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
mark_matching_lsegs_invalid(lo, &free_me_list,
args->cbl_range.iomode))
rv = NFS4ERR_DELAY;
else
rv = NFS4ERR_NOMATCHING_LAYOUT;
pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
put_layout_hdr(lo);
iput(ino);
return rv;
}
static u32 initiate_bulk_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
struct pnfs_layout_hdr *lo;
struct inode *ino;
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
struct pnfs_layout_hdr *tmp;
LIST_HEAD(recall_list);
LIST_HEAD(free_me_list);
struct pnfs_layout_range range = {
.iomode = IOMODE_ANY,
.offset = 0,
.length = NFS4_MAX_UINT64,
};
spin_lock(&clp->cl_lock);
list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
if ((args->cbl_recall_type == RETURN_FSID) &&
memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
&args->cbl_fsid, sizeof(struct nfs_fsid)))
continue;
if (!igrab(lo->plh_inode))
continue;
get_layout_hdr(lo);
BUG_ON(!list_empty(&lo->plh_bulk_recall));
list_add(&lo->plh_bulk_recall, &recall_list);
}
spin_unlock(&clp->cl_lock);
list_for_each_entry_safe(lo, tmp,
&recall_list, plh_bulk_recall) {
ino = lo->plh_inode;
spin_lock(&ino->i_lock);
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
rv = NFS4ERR_DELAY;
list_del_init(&lo->plh_bulk_recall);
spin_unlock(&ino->i_lock);
put_layout_hdr(lo);
iput(ino);
}
pnfs_free_lseg_list(&free_me_list);
return rv;
}
static u32 do_callback_layoutrecall(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
u32 res = NFS4ERR_DELAY;
dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
goto out;
if (args->cbl_recall_type == RETURN_FILE)
res = initiate_file_draining(clp, args);
else
res = initiate_bulk_draining(clp, args);
clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
out:
dprintk("%s returning %i\n", __func__, res);
return res;
}
__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args, __be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
void *dummy, struct cb_process_state *cps) void *dummy, struct cb_process_state *cps)
{ {
return cpu_to_be32(NFS4ERR_NOTSUPP); /* STUB */ u32 res;
dprintk("%s: -->\n", __func__);
if (cps->clp)
res = do_callback_layoutrecall(cps->clp, args);
else
res = NFS4ERR_OP_NOT_IN_SESSION;
dprintk("%s: exit with status = %d\n", __func__, res);
return cpu_to_be32(res);
} }
int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
......
...@@ -44,6 +44,7 @@ enum nfs4_client_state { ...@@ -44,6 +44,7 @@ enum nfs4_client_state {
NFS4CLNT_RECLAIM_REBOOT, NFS4CLNT_RECLAIM_REBOOT,
NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_RECLAIM_NOGRACE,
NFS4CLNT_DELEGRETURN, NFS4CLNT_DELEGRETURN,
NFS4CLNT_LAYOUTRECALL,
NFS4CLNT_SESSION_RESET, NFS4CLNT_SESSION_RESET,
NFS4CLNT_RECALL_SLOT, NFS4CLNT_RECALL_SLOT,
}; };
......
...@@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); ...@@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
*/ */
/* Need to hold i_lock if caller does not already hold reference */ /* Need to hold i_lock if caller does not already hold reference */
static void void
get_layout_hdr(struct pnfs_layout_hdr *lo) get_layout_hdr(struct pnfs_layout_hdr *lo)
{ {
atomic_inc(&lo->plh_refcount); atomic_inc(&lo->plh_refcount);
...@@ -254,6 +254,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg, ...@@ -254,6 +254,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
/* List does not take a reference, so no need for put here */ /* List does not take a reference, so no need for put here */
list_del_init(&lseg->pls_layout->plh_layouts); list_del_init(&lseg->pls_layout->plh_layouts);
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
} }
list_add(&lseg->pls_list, tmp_list); list_add(&lseg->pls_list, tmp_list);
return 1; return 1;
...@@ -287,7 +288,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, ...@@ -287,7 +288,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
/* Returns count of number of matching invalid lsegs remaining in list /* Returns count of number of matching invalid lsegs remaining in list
* after call. * after call.
*/ */
static int int
mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list, struct list_head *tmp_list,
u32 iomode) u32 iomode)
...@@ -310,7 +311,7 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, ...@@ -310,7 +311,7 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
return invalid - removed; return invalid - removed;
} }
static void void
pnfs_free_lseg_list(struct list_head *free_me) pnfs_free_lseg_list(struct list_head *free_me)
{ {
struct pnfs_layout_segment *lseg, *tmp; struct pnfs_layout_segment *lseg, *tmp;
...@@ -363,23 +364,45 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) ...@@ -363,23 +364,45 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
} }
/* update lo->plh_stateid with new if is more recent */ /* update lo->plh_stateid with new if is more recent */
static void void
pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
const nfs4_stateid *new) bool update_barrier)
{ {
u32 oldseq, newseq; u32 oldseq, newseq;
oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
newseq = be32_to_cpu(new->stateid.seqid); newseq = be32_to_cpu(new->stateid.seqid);
if ((int)(newseq - oldseq) > 0) if ((int)(newseq - oldseq) > 0) {
memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
if (update_barrier) {
u32 new_barrier = be32_to_cpu(new->stateid.seqid);
if ((int)(new_barrier - lo->plh_barrier))
lo->plh_barrier = new_barrier;
} else {
/* Because of wraparound, we want to keep the barrier
* "close" to the current seqids. It needs to be
* within 2**31 to count as "behind", so if it
* gets too near that limit, give us a litle leeway
* and bring it to within 2**30.
* NOTE - and yes, this is all unsigned arithmetic.
*/
if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
lo->plh_barrier = newseq - (1 << 30);
}
}
} }
/* lget is set to 1 if called from inside send_layoutget call chain */ /* lget is set to 1 if called from inside send_layoutget call chain */
static bool static bool
pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, int lget) pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
{ int lget)
return (list_empty(&lo->plh_segs) && {
if ((stateid) &&
(int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
return true;
return test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
(list_empty(&lo->plh_segs) &&
(atomic_read(&lo->plh_outstanding) > lget)); (atomic_read(&lo->plh_outstanding) > lget));
} }
...@@ -391,7 +414,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, ...@@ -391,7 +414,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__); dprintk("--> %s\n", __func__);
spin_lock(&lo->plh_inode->i_lock); spin_lock(&lo->plh_inode->i_lock);
if (pnfs_layoutgets_blocked(lo, 1)) { if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
status = -EAGAIN; status = -EAGAIN;
} else if (list_empty(&lo->plh_segs)) { } else if (list_empty(&lo->plh_segs)) {
int seq; int seq;
...@@ -510,6 +533,7 @@ alloc_init_layout_hdr(struct inode *ino) ...@@ -510,6 +533,7 @@ alloc_init_layout_hdr(struct inode *ino)
atomic_set(&lo->plh_refcount, 1); atomic_set(&lo->plh_refcount, 1);
INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_layouts);
INIT_LIST_HEAD(&lo->plh_segs); INIT_LIST_HEAD(&lo->plh_segs);
INIT_LIST_HEAD(&lo->plh_bulk_recall);
lo->plh_inode = ino; lo->plh_inode = ino;
return lo; return lo;
} }
...@@ -561,7 +585,7 @@ is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) ...@@ -561,7 +585,7 @@ is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
* lookup range in layout * lookup range in layout
*/ */
static struct pnfs_layout_segment * static struct pnfs_layout_segment *
pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
{ {
struct pnfs_layout_segment *lseg, *ret = NULL; struct pnfs_layout_segment *lseg, *ret = NULL;
...@@ -606,19 +630,22 @@ pnfs_update_layout(struct inode *ino, ...@@ -606,19 +630,22 @@ pnfs_update_layout(struct inode *ino,
goto out_unlock; goto out_unlock;
} }
/* Check to see if the layout for the given range already exists */ /* Do we even need to bother with this? */
lseg = pnfs_has_layout(lo, iomode); if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
if (lseg) { test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
dprintk("%s: Using cached lseg %p for iomode %d)\n", dprintk("%s matches recall, use MDS\n", __func__);
__func__, lseg, iomode);
goto out_unlock; goto out_unlock;
} }
/* Check to see if the layout for the given range already exists */
lseg = pnfs_find_lseg(lo, iomode);
if (lseg)
goto out_unlock;
/* if LAYOUTGET already failed once we don't try again */ /* if LAYOUTGET already failed once we don't try again */
if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
goto out_unlock; goto out_unlock;
if (pnfs_layoutgets_blocked(lo, 0)) if (pnfs_layoutgets_blocked(lo, NULL, 0))
goto out_unlock; goto out_unlock;
atomic_inc(&lo->plh_outstanding); atomic_inc(&lo->plh_outstanding);
...@@ -641,6 +668,7 @@ pnfs_update_layout(struct inode *ino, ...@@ -641,6 +668,7 @@ pnfs_update_layout(struct inode *ino,
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
list_del_init(&lo->plh_layouts); list_del_init(&lo->plh_layouts);
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
} }
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
} }
...@@ -662,6 +690,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) ...@@ -662,6 +690,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
struct nfs4_layoutget_res *res = &lgp->res; struct nfs4_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg; struct pnfs_layout_segment *lseg;
struct inode *ino = lo->plh_inode; struct inode *ino = lo->plh_inode;
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
int status = 0; int status = 0;
/* Verify we got what we asked for. /* Verify we got what we asked for.
...@@ -688,16 +717,32 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) ...@@ -688,16 +717,32 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
} }
spin_lock(&ino->i_lock); spin_lock(&ino->i_lock);
if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
dprintk("%s forget reply due to recall\n", __func__);
goto out_forget_reply;
}
if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
dprintk("%s forget reply due to state\n", __func__);
goto out_forget_reply;
}
init_lseg(lo, lseg); init_lseg(lo, lseg);
lseg->pls_range = res->range; lseg->pls_range = res->range;
*lgp->lsegpp = lseg; *lgp->lsegpp = lseg;
pnfs_insert_layout(lo, lseg); pnfs_insert_layout(lo, lseg);
/* Done processing layoutget. Set the layout stateid */ /* Done processing layoutget. Set the layout stateid */
pnfs_set_layout_stateid(lo, &res->stateid); pnfs_set_layout_stateid(lo, &res->stateid, false);
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
out: out:
return status; return status;
out_forget_reply:
spin_unlock(&ino->i_lock);
lseg->pls_layout = lo;
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
goto out;
} }
/* /*
......
...@@ -49,6 +49,7 @@ struct pnfs_layout_segment { ...@@ -49,6 +49,7 @@ struct pnfs_layout_segment {
enum { enum {
NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
}; };
...@@ -67,9 +68,11 @@ struct pnfs_layoutdriver_type { ...@@ -67,9 +68,11 @@ struct pnfs_layoutdriver_type {
struct pnfs_layout_hdr { struct pnfs_layout_hdr {
atomic_t plh_refcount; atomic_t plh_refcount;
struct list_head plh_layouts; /* other client layouts */ struct list_head plh_layouts; /* other client layouts */
struct list_head plh_bulk_recall; /* clnt list of bulk recalls */
struct list_head plh_segs; /* layout segments list */ struct list_head plh_segs; /* layout segments list */
nfs4_stateid plh_stateid; nfs4_stateid plh_stateid;
atomic_t plh_outstanding; /* number of RPCs out */ atomic_t plh_outstanding; /* number of RPCs out */
u32 plh_barrier; /* ignore lower seqids */
unsigned long plh_flags; unsigned long plh_flags;
struct inode *plh_inode; struct inode *plh_inode;
}; };
...@@ -139,18 +142,26 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, ...@@ -139,18 +142,26 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
/* pnfs.c */ /* pnfs.c */
void get_layout_hdr(struct pnfs_layout_hdr *lo);
struct pnfs_layout_segment * struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
enum pnfs_iomode access_type); enum pnfs_iomode access_type);
void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *); void unset_pnfs_layoutdriver(struct nfs_server *);
int pnfs_layout_process(struct nfs4_layoutget *lgp); int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *); void pnfs_destroy_all_layouts(struct nfs_client *);
void put_layout_hdr(struct pnfs_layout_hdr *lo); void put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new,
bool update_barrier);
int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
struct pnfs_layout_hdr *lo, struct pnfs_layout_hdr *lo,
struct nfs4_state *open_state); struct nfs4_state *open_state);
int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
u32 iomode);
static inline int lo_fail_bit(u32 iomode) static inline int lo_fail_bit(u32 iomode)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册