提交 668f455d 编写于 作者: T Trond Myklebust

Merge branch 'pnfs'

...@@ -65,8 +65,8 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) ...@@ -65,8 +65,8 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
if (!p) if (!p)
return -EIO; return -EIO;
b->simple.nr_sigs = be32_to_cpup(p++); b->simple.nr_sigs = be32_to_cpup(p++);
if (!b->simple.nr_sigs) { if (!b->simple.nr_sigs || b->simple.nr_sigs > PNFS_BLOCK_MAX_UUIDS) {
dprintk("no signature\n"); dprintk("Bad signature count: %d\n", b->simple.nr_sigs);
return -EIO; return -EIO;
} }
...@@ -89,7 +89,8 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) ...@@ -89,7 +89,8 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
memcpy(&b->simple.sigs[i].sig, p, memcpy(&b->simple.sigs[i].sig, p,
b->simple.sigs[i].sig_len); b->simple.sigs[i].sig_len);
b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len; b->simple.len += 8 + 4 + \
(XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2);
} }
break; break;
case PNFS_BLOCK_VOLUME_SLICE: case PNFS_BLOCK_VOLUME_SLICE:
...@@ -104,7 +105,12 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) ...@@ -104,7 +105,12 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
p = xdr_inline_decode(xdr, 4); p = xdr_inline_decode(xdr, 4);
if (!p) if (!p)
return -EIO; return -EIO;
b->concat.volumes_count = be32_to_cpup(p++); b->concat.volumes_count = be32_to_cpup(p++);
if (b->concat.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
dprintk("Too many volumes: %d\n", b->concat.volumes_count);
return -EIO;
}
p = xdr_inline_decode(xdr, b->concat.volumes_count * 4); p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
if (!p) if (!p)
...@@ -116,8 +122,13 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) ...@@ -116,8 +122,13 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
p = xdr_inline_decode(xdr, 8 + 4); p = xdr_inline_decode(xdr, 8 + 4);
if (!p) if (!p)
return -EIO; return -EIO;
p = xdr_decode_hyper(p, &b->stripe.chunk_size); p = xdr_decode_hyper(p, &b->stripe.chunk_size);
b->stripe.volumes_count = be32_to_cpup(p++); b->stripe.volumes_count = be32_to_cpup(p++);
if (b->stripe.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
dprintk("Too many volumes: %d\n", b->stripe.volumes_count);
return -EIO;
}
p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4); p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
if (!p) if (!p)
...@@ -224,18 +235,20 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, ...@@ -224,18 +235,20 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{ {
struct pnfs_block_volume *v = &volumes[idx]; struct pnfs_block_volume *v = &volumes[idx];
struct block_device *bdev;
dev_t dev; dev_t dev;
dev = bl_resolve_deviceid(server, v, gfp_mask); dev = bl_resolve_deviceid(server, v, gfp_mask);
if (!dev) if (!dev)
return -EIO; return -EIO;
d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL); bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
if (IS_ERR(d->bdev)) { if (IS_ERR(bdev)) {
printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev)); MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
return PTR_ERR(d->bdev); return PTR_ERR(bdev);
} }
d->bdev = bdev;
d->len = i_size_read(d->bdev->bd_inode); d->len = i_size_read(d->bdev->bd_inode);
...@@ -287,44 +300,71 @@ bl_validate_designator(struct pnfs_block_volume *v) ...@@ -287,44 +300,71 @@ bl_validate_designator(struct pnfs_block_volume *v)
} }
} }
/*
* Try to open the udev path for the WWN. At least on Debian the udev
* by-id path will always point to the dm-multipath device if one exists.
*/
static struct block_device *
bl_open_udev_path(struct pnfs_block_volume *v)
{
struct block_device *bdev;
const char *devname;
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN",
v->scsi.designator_len, v->scsi.designator);
if (!devname)
return ERR_PTR(-ENOMEM);
bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
if (IS_ERR(bdev)) {
pr_warn("pNFS: failed to open device %s (%ld)\n",
devname, PTR_ERR(bdev));
}
kfree(devname);
return bdev;
}
/*
* Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the
* wwn- links will only point to the first discovered SCSI device there.
*/
static struct block_device *
bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v)
{
struct block_device *bdev;
const char *devname;
devname = kasprintf(GFP_KERNEL,
"/dev/disk/by-id/dm-uuid-mpath-%d%*phN",
v->scsi.designator_type,
v->scsi.designator_len, v->scsi.designator);
if (!devname)
return ERR_PTR(-ENOMEM);
bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
kfree(devname);
return bdev;
}
static int static int
bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{ {
struct pnfs_block_volume *v = &volumes[idx]; struct pnfs_block_volume *v = &volumes[idx];
struct block_device *bdev;
const struct pr_ops *ops; const struct pr_ops *ops;
const char *devname;
int error; int error;
if (!bl_validate_designator(v)) if (!bl_validate_designator(v))
return -EINVAL; return -EINVAL;
switch (v->scsi.designator_len) { bdev = bl_open_dm_mpath_udev_path(v);
case 8: if (IS_ERR(bdev))
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN", bdev = bl_open_udev_path(v);
v->scsi.designator); if (IS_ERR(bdev))
break; return PTR_ERR(bdev);
case 12: d->bdev = bdev;
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN",
v->scsi.designator);
break;
case 16:
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN",
v->scsi.designator);
break;
default:
return -EINVAL;
}
d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL);
if (IS_ERR(d->bdev)) {
pr_warn("pNFS: failed to open device %s (%ld)\n",
devname, PTR_ERR(d->bdev));
kfree(devname);
return PTR_ERR(d->bdev);
}
kfree(devname);
d->len = i_size_read(d->bdev->bd_inode); d->len = i_size_read(d->bdev->bd_inode);
d->map = bl_map_simple; d->map = bl_map_simple;
...@@ -352,7 +392,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, ...@@ -352,7 +392,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
return 0; return 0;
out_blkdev_put: out_blkdev_put:
blkdev_put(d->bdev, FMODE_READ); blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);
return error; return error;
} }
......
...@@ -119,27 +119,30 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, ...@@ -119,27 +119,30 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
* hashed by filehandle. * hashed by filehandle.
*/ */
static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
struct nfs_fh *fh, nfs4_stateid *stateid) struct nfs_fh *fh)
{ {
struct nfs_server *server; struct nfs_server *server;
struct nfs_inode *nfsi;
struct inode *ino; struct inode *ino;
struct pnfs_layout_hdr *lo; struct pnfs_layout_hdr *lo;
restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) { list_for_each_entry(lo, &server->layouts, plh_layouts) {
if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid)) nfsi = NFS_I(lo->plh_inode);
if (nfs_compare_fh(fh, &nfsi->fh))
continue; continue;
if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) if (nfsi->layout != lo)
continue; continue;
ino = igrab(lo->plh_inode); ino = igrab(lo->plh_inode);
if (!ino) if (!ino)
break; break;
spin_lock(&ino->i_lock); spin_lock(&ino->i_lock);
/* Is this layout in the process of being freed? */ /* Is this layout in the process of being freed? */
if (NFS_I(ino)->layout != lo) { if (nfsi->layout != lo) {
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
iput(ino); iput(ino);
break; goto restart;
} }
pnfs_get_layout_hdr(lo); pnfs_get_layout_hdr(lo);
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
...@@ -151,13 +154,13 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, ...@@ -151,13 +154,13 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
} }
static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
struct nfs_fh *fh, nfs4_stateid *stateid) struct nfs_fh *fh)
{ {
struct pnfs_layout_hdr *lo; struct pnfs_layout_hdr *lo;
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
rcu_read_lock(); rcu_read_lock();
lo = get_layout_by_fh_locked(clp, fh, stateid); lo = get_layout_by_fh_locked(clp, fh);
rcu_read_unlock(); rcu_read_unlock();
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
...@@ -167,17 +170,39 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, ...@@ -167,17 +170,39 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
/* /*
* Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing) * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing)
*/ */
static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo, static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new) const nfs4_stateid *new)
{ {
u32 oldseq, newseq; u32 oldseq, newseq;
oldseq = be32_to_cpu(lo->plh_stateid.seqid); /* Is the stateid still not initialised? */
if (!pnfs_layout_is_valid(lo))
return NFS4ERR_DELAY;
/* Mismatched stateid? */
if (!nfs4_stateid_match_other(&lo->plh_stateid, new))
return NFS4ERR_BAD_STATEID;
newseq = be32_to_cpu(new->seqid); newseq = be32_to_cpu(new->seqid);
/* Are we already in a layout recall situation? */
if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) &&
lo->plh_return_seq != 0) {
if (newseq < lo->plh_return_seq)
return NFS4ERR_OLD_STATEID;
if (newseq > lo->plh_return_seq)
return NFS4ERR_DELAY;
goto out;
}
/* Check that the stateid matches what we think it should be. */
oldseq = be32_to_cpu(lo->plh_stateid.seqid);
if (newseq > oldseq + 1) if (newseq > oldseq + 1)
return false; return NFS4ERR_DELAY;
return true; /* Crazy server! */
if (newseq <= oldseq)
return NFS4ERR_OLD_STATEID;
out:
return NFS_OK;
} }
static u32 initiate_file_draining(struct nfs_client *clp, static u32 initiate_file_draining(struct nfs_client *clp,
...@@ -188,7 +213,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, ...@@ -188,7 +213,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
u32 rv = NFS4ERR_NOMATCHING_LAYOUT; u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
LIST_HEAD(free_me_list); LIST_HEAD(free_me_list);
lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); lo = get_layout_by_fh(clp, &args->cbl_fh);
if (!lo) { if (!lo) {
trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL, trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL,
&args->cbl_stateid, -rv); &args->cbl_stateid, -rv);
...@@ -196,18 +221,15 @@ static u32 initiate_file_draining(struct nfs_client *clp, ...@@ -196,18 +221,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,
} }
ino = lo->plh_inode; ino = lo->plh_inode;
pnfs_layoutcommit_inode(ino, false);
spin_lock(&ino->i_lock); spin_lock(&ino->i_lock);
if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) { rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid);
rv = NFS4ERR_DELAY; if (rv != NFS_OK)
goto unlock; goto unlock;
}
pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
spin_unlock(&ino->i_lock);
pnfs_layoutcommit_inode(ino, false);
spin_lock(&ino->i_lock);
/* /*
* Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return) * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return)
*/ */
...@@ -223,11 +245,13 @@ static u32 initiate_file_draining(struct nfs_client *clp, ...@@ -223,11 +245,13 @@ static u32 initiate_file_draining(struct nfs_client *clp,
goto unlock; goto unlock;
} }
/* Embrace your forgetfulness! */
rv = NFS4ERR_NOMATCHING_LAYOUT;
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
&args->cbl_range); &args->cbl_range);
} }
pnfs_mark_layout_returned_if_empty(lo);
unlock: unlock:
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list); pnfs_free_lseg_list(&free_me_list);
......
...@@ -351,8 +351,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) ...@@ -351,8 +351,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
* Mark the bad layout state as invalid, then retry * Mark the bad layout state as invalid, then retry
* with the current stateid. * with the current stateid.
*/ */
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); pnfs_mark_layout_stateid_invalid(lo, &head);
pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head); pnfs_free_lseg_list(&head);
} else } else
......
...@@ -7944,8 +7944,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, ...@@ -7944,8 +7944,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
/* /*
* Mark the bad layout state as invalid, then retry * Mark the bad layout state as invalid, then retry
*/ */
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); pnfs_mark_layout_stateid_invalid(lo, &head);
pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head); pnfs_free_lseg_list(&head);
status = -EAGAIN; status = -EAGAIN;
...@@ -8144,8 +8143,7 @@ static void nfs4_layoutreturn_release(void *calldata) ...@@ -8144,8 +8143,7 @@ static void nfs4_layoutreturn_release(void *calldata)
spin_lock(&lo->plh_inode->i_lock); spin_lock(&lo->plh_inode->i_lock);
pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range, pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range,
be32_to_cpu(lrp->args.stateid.seqid)); be32_to_cpu(lrp->args.stateid.seqid));
pnfs_mark_layout_returned_if_empty(lo); if (lrp->res.lrs_present && pnfs_layout_is_valid(lo))
if (lrp->res.lrs_present)
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
pnfs_clear_layoutreturn_waitbit(lo); pnfs_clear_layoutreturn_waitbit(lo);
spin_unlock(&lo->plh_inode->i_lock); spin_unlock(&lo->plh_inode->i_lock);
......
...@@ -259,7 +259,7 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) ...@@ -259,7 +259,7 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
* is required. * is required.
* Note that caller must hold inode->i_lock. * Note that caller must hold inode->i_lock.
*/ */
static int int
pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
struct list_head *lseg_list) struct list_head *lseg_list)
{ {
...@@ -334,14 +334,17 @@ pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) ...@@ -334,14 +334,17 @@ pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode)
} }
static void static void
init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg,
const struct pnfs_layout_range *range,
const nfs4_stateid *stateid)
{ {
INIT_LIST_HEAD(&lseg->pls_list); INIT_LIST_HEAD(&lseg->pls_list);
INIT_LIST_HEAD(&lseg->pls_lc_list); INIT_LIST_HEAD(&lseg->pls_lc_list);
atomic_set(&lseg->pls_refcount, 1); atomic_set(&lseg->pls_refcount, 1);
smp_mb();
set_bit(NFS_LSEG_VALID, &lseg->pls_flags); set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
lseg->pls_layout = lo; lseg->pls_layout = lo;
lseg->pls_range = *range;
lseg->pls_seq = be32_to_cpu(stateid->seqid);
} }
static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) static void pnfs_free_lseg(struct pnfs_layout_segment *lseg)
...@@ -486,15 +489,6 @@ pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, ...@@ -486,15 +489,6 @@ pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1,
(end2 == NFS4_MAX_UINT64 || end2 > start1); (end2 == NFS4_MAX_UINT64 || end2 > start1);
} }
static bool
should_free_lseg(const struct pnfs_layout_range *lseg_range,
const struct pnfs_layout_range *recall_range)
{
return (recall_range->iomode == IOMODE_ANY ||
lseg_range->iomode == recall_range->iomode) &&
pnfs_lseg_range_intersecting(lseg_range, recall_range);
}
static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list) struct list_head *tmp_list)
{ {
...@@ -533,6 +527,27 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2) ...@@ -533,6 +527,27 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
return (s32)(s1 - s2) > 0; return (s32)(s1 - s2) > 0;
} }
static bool
pnfs_should_free_range(const struct pnfs_layout_range *lseg_range,
const struct pnfs_layout_range *recall_range)
{
return (recall_range->iomode == IOMODE_ANY ||
lseg_range->iomode == recall_range->iomode) &&
pnfs_lseg_range_intersecting(lseg_range, recall_range);
}
static bool
pnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg,
const struct pnfs_layout_range *recall_range,
u32 seq)
{
if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq))
return false;
if (recall_range == NULL)
return true;
return pnfs_should_free_range(&lseg->pls_range, recall_range);
}
/** /**
* pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later
* @lo: layout header containing the lsegs * @lo: layout header containing the lsegs
...@@ -562,10 +577,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, ...@@ -562,10 +577,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
if (list_empty(&lo->plh_segs)) if (list_empty(&lo->plh_segs))
return 0; return 0;
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (!recall_range || if (pnfs_match_lseg_recall(lseg, recall_range, seq)) {
should_free_lseg(&lseg->pls_range, recall_range)) {
if (seq && pnfs_seqid_is_newer(lseg->pls_seq, seq))
continue;
dprintk("%s: freeing lseg %p iomode %d seq %u" dprintk("%s: freeing lseg %p iomode %d seq %u"
"offset %llu length %llu\n", __func__, "offset %llu length %llu\n", __func__,
lseg, lseg->pls_range.iomode, lseg->pls_seq, lseg, lseg->pls_range.iomode, lseg->pls_seq,
...@@ -761,24 +773,25 @@ void ...@@ -761,24 +773,25 @@ void
pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
bool update_barrier) bool update_barrier)
{ {
u32 oldseq, newseq, new_barrier; u32 oldseq, newseq, new_barrier = 0;
int empty = list_empty(&lo->plh_segs); bool invalid = !pnfs_layout_is_valid(lo);
oldseq = be32_to_cpu(lo->plh_stateid.seqid); oldseq = be32_to_cpu(lo->plh_stateid.seqid);
newseq = be32_to_cpu(new->seqid); newseq = be32_to_cpu(new->seqid);
if (empty || pnfs_seqid_is_newer(newseq, oldseq)) { if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) {
nfs4_stateid_copy(&lo->plh_stateid, new); nfs4_stateid_copy(&lo->plh_stateid, new);
if (update_barrier) { /*
new_barrier = be32_to_cpu(new->seqid); * Because of wraparound, we want to keep the barrier
} else { * "close" to the current seqids.
/* Because of wraparound, we want to keep the barrier */
* "close" to the current seqids. new_barrier = newseq - atomic_read(&lo->plh_outstanding);
*/
new_barrier = newseq - atomic_read(&lo->plh_outstanding);
}
if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
lo->plh_barrier = new_barrier;
} }
if (update_barrier)
new_barrier = be32_to_cpu(new->seqid);
else if (new_barrier == 0)
return;
if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
lo->plh_barrier = new_barrier;
} }
static bool static bool
...@@ -873,15 +886,37 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) ...@@ -873,15 +886,37 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
} }
static void
pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
{
lo->plh_return_iomode = 0;
lo->plh_return_seq = 0;
clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
}
static bool static bool
pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
nfs4_stateid *stateid,
enum pnfs_iomode *iomode)
{ {
if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
return false; return false;
lo->plh_return_iomode = 0;
lo->plh_return_seq = 0;
pnfs_get_layout_hdr(lo); pnfs_get_layout_hdr(lo);
clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) {
if (stateid != NULL) {
nfs4_stateid_copy(stateid, &lo->plh_stateid);
if (lo->plh_return_seq != 0)
stateid->seqid = cpu_to_be32(lo->plh_return_seq);
}
if (iomode != NULL)
*iomode = lo->plh_return_iomode;
pnfs_clear_layoutreturn_info(lo);
return true;
}
if (stateid != NULL)
nfs4_stateid_copy(stateid, &lo->plh_stateid);
if (iomode != NULL)
*iomode = IOMODE_ANY;
return true; return true;
} }
...@@ -949,10 +984,7 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) ...@@ -949,10 +984,7 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
enum pnfs_iomode iomode; enum pnfs_iomode iomode;
bool send; bool send;
nfs4_stateid_copy(&stateid, &lo->plh_stateid); send = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
stateid.seqid = cpu_to_be32(lo->plh_return_seq);
iomode = lo->plh_return_iomode;
send = pnfs_prepare_layoutreturn(lo);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
if (send) { if (send) {
/* Send an async layoutreturn so we dont deadlock */ /* Send an async layoutreturn so we dont deadlock */
...@@ -989,7 +1021,6 @@ _pnfs_return_layout(struct inode *ino) ...@@ -989,7 +1021,6 @@ _pnfs_return_layout(struct inode *ino)
dprintk("NFS: %s no layout to return\n", __func__); dprintk("NFS: %s no layout to return\n", __func__);
goto out; goto out;
} }
nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid);
/* Reference matched in nfs4_layoutreturn_release */ /* Reference matched in nfs4_layoutreturn_release */
pnfs_get_layout_hdr(lo); pnfs_get_layout_hdr(lo);
empty = list_empty(&lo->plh_segs); empty = list_empty(&lo->plh_segs);
...@@ -1012,8 +1043,7 @@ _pnfs_return_layout(struct inode *ino) ...@@ -1012,8 +1043,7 @@ _pnfs_return_layout(struct inode *ino)
goto out_put_layout_hdr; goto out_put_layout_hdr;
} }
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); send = pnfs_prepare_layoutreturn(lo, &stateid, NULL);
send = pnfs_prepare_layoutreturn(lo);
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list); pnfs_free_lseg_list(&tmp_list);
if (send) if (send)
...@@ -1080,11 +1110,10 @@ bool pnfs_roc(struct inode *ino) ...@@ -1080,11 +1110,10 @@ bool pnfs_roc(struct inode *ino)
goto out_noroc; goto out_noroc;
} }
nfs4_stateid_copy(&stateid, &lo->plh_stateid);
/* always send layoutreturn if being marked so */ /* always send layoutreturn if being marked so */
if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED, if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
&lo->plh_flags)) layoutreturn = pnfs_prepare_layoutreturn(lo,
layoutreturn = pnfs_prepare_layoutreturn(lo); &stateid, NULL);
list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
/* If we are sending layoutreturn, invalidate all valid lsegs */ /* If we are sending layoutreturn, invalidate all valid lsegs */
...@@ -1132,7 +1161,6 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) ...@@ -1132,7 +1161,6 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
spin_lock(&ino->i_lock); spin_lock(&ino->i_lock);
lo = NFS_I(ino)->layout; lo = NFS_I(ino)->layout;
pnfs_mark_layout_returned_if_empty(lo);
if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))
lo->plh_barrier = barrier; lo->plh_barrier = barrier;
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
...@@ -1746,9 +1774,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) ...@@ -1746,9 +1774,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
return lseg; return lseg;
} }
init_lseg(lo, lseg); pnfs_init_lseg(lo, lseg, &res->range, &res->stateid);
lseg->pls_range = res->range;
lseg->pls_seq = be32_to_cpu(res->stateid.seqid);
spin_lock(&ino->i_lock); spin_lock(&ino->i_lock);
if (pnfs_layoutgets_blocked(lo)) { if (pnfs_layoutgets_blocked(lo)) {
...@@ -1769,16 +1795,19 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) ...@@ -1769,16 +1795,19 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
* inode invalid, and don't bother validating the stateid * inode invalid, and don't bother validating the stateid
* sequence number. * sequence number.
*/ */
pnfs_mark_matching_lsegs_invalid(lo, &free_me, NULL, 0); pnfs_mark_layout_stateid_invalid(lo, &free_me);
nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); nfs4_stateid_copy(&lo->plh_stateid, &res->stateid);
lo->plh_barrier = be32_to_cpu(res->stateid.seqid); lo->plh_barrier = be32_to_cpu(res->stateid.seqid);
} }
clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
pnfs_get_lseg(lseg); pnfs_get_lseg(lseg);
pnfs_layout_insert_lseg(lo, lseg, &free_me); pnfs_layout_insert_lseg(lo, lseg, &free_me);
if (!pnfs_layout_is_valid(lo)) {
pnfs_clear_layoutreturn_info(lo);
clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
}
if (res->return_on_close) if (res->return_on_close)
set_bit(NFS_LSEG_ROC, &lseg->pls_flags); set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
...@@ -1798,14 +1827,14 @@ static void ...@@ -1798,14 +1827,14 @@ static void
pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
u32 seq) u32 seq)
{ {
if (lo->plh_return_iomode == iomode) if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode)
return;
if (lo->plh_return_iomode != 0)
iomode = IOMODE_ANY; iomode = IOMODE_ANY;
lo->plh_return_iomode = iomode; lo->plh_return_iomode = iomode;
set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq)) if (seq != 0) {
WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq);
lo->plh_return_seq = seq; lo->plh_return_seq = seq;
}
} }
/** /**
...@@ -1835,7 +1864,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, ...@@ -1835,7 +1864,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
assert_spin_locked(&lo->plh_inode->i_lock); assert_spin_locked(&lo->plh_inode->i_lock);
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (should_free_lseg(&lseg->pls_range, return_range)) { if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
dprintk("%s: marking lseg %p iomode %d " dprintk("%s: marking lseg %p iomode %d "
"offset %llu length %llu\n", __func__, "offset %llu length %llu\n", __func__,
lseg, lseg->pls_range.iomode, lseg, lseg->pls_range.iomode,
...@@ -1866,19 +1895,17 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, ...@@ -1866,19 +1895,17 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
bool return_now = false; bool return_now = false;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
pnfs_set_plh_return_info(lo, range.iomode, lseg->pls_seq); pnfs_set_plh_return_info(lo, range.iomode, 0);
/* /*
* mark all matching lsegs so that we are sure to have no live * mark all matching lsegs so that we are sure to have no live
* segments at hand when sending layoutreturn. See pnfs_put_lseg() * segments at hand when sending layoutreturn. See pnfs_put_lseg()
* for how it works. * for how it works.
*/ */
if (!pnfs_mark_matching_lsegs_return(lo, &free_me, if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0)) {
&range, lseg->pls_seq)) {
nfs4_stateid stateid; nfs4_stateid stateid;
enum pnfs_iomode iomode = lo->plh_return_iomode; enum pnfs_iomode iomode;
nfs4_stateid_copy(&stateid, &lo->plh_stateid); return_now = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
return_now = pnfs_prepare_layoutreturn(lo);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
if (return_now) if (return_now)
pnfs_send_layoutreturn(lo, &stateid, iomode, false); pnfs_send_layoutreturn(lo, &stateid, iomode, false);
......
...@@ -268,6 +268,8 @@ int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, ...@@ -268,6 +268,8 @@ int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list, struct list_head *tmp_list,
const struct pnfs_layout_range *recall_range, const struct pnfs_layout_range *recall_range,
u32 seq); u32 seq);
int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
struct list_head *lseg_list);
bool pnfs_roc(struct inode *ino); bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino); void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
...@@ -375,6 +377,11 @@ static inline bool nfs_have_layout(struct inode *inode) ...@@ -375,6 +377,11 @@ static inline bool nfs_have_layout(struct inode *inode)
return NFS_I(inode)->layout != NULL; return NFS_I(inode)->layout != NULL;
} }
static inline bool pnfs_layout_is_valid(const struct pnfs_layout_hdr *lo)
{
return test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) == 0;
}
static inline struct nfs4_deviceid_node * static inline struct nfs4_deviceid_node *
nfs4_get_deviceid(struct nfs4_deviceid_node *d) nfs4_get_deviceid(struct nfs4_deviceid_node *d)
{ {
...@@ -545,19 +552,6 @@ pnfs_calc_offset_length(u64 offset, u64 end) ...@@ -545,19 +552,6 @@ pnfs_calc_offset_length(u64 offset, u64 end)
return 1 + end - offset; return 1 + end - offset;
} }
/**
* pnfs_mark_layout_returned_if_empty - marks the layout as returned
* @lo: layout header
*
* Note: Caller must hold inode->i_lock
*/
static inline void
pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo)
{
if (list_empty(&lo->plh_segs))
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
}
static inline void static inline void
pnfs_copy_range(struct pnfs_layout_range *dst, pnfs_copy_range(struct pnfs_layout_range *dst,
const struct pnfs_layout_range *src) const struct pnfs_layout_range *src)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册