提交 41963c10 编写于 作者: B Benjamin Coddington 提交者: Trond Myklebust

pnfs/blocklayout: update last_write_offset atomically with extents

Block/SCSI layout write completion may add committable extents to the
extent tree before updating the layout's last-written byte under the inode
lock.  If a sync happens before this value is updated, then
prepare_layoutcommit may find and encode these extents which would produce
a LAYOUTCOMMIT request whose encoded extents are larger than the request's
loca_length.

Fix this by using a last-written byte value that is updated atomically with
the extent tree so that commitable extents always match.
Signed-off-by: NBenjamin Coddington <bcodding@redhat.com>
Signed-off-by: NTrond Myklebust <trond.myklebust@primarydata.com>
上级 b88fa69e
...@@ -346,7 +346,7 @@ static void bl_write_cleanup(struct work_struct *work) ...@@ -346,7 +346,7 @@ static void bl_write_cleanup(struct work_struct *work)
PAGE_SIZE - 1) & (loff_t)PAGE_MASK; PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
ext_tree_mark_written(bl, start >> SECTOR_SHIFT, ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
(end - start) >> SECTOR_SHIFT); (end - start) >> SECTOR_SHIFT, end);
} }
pnfs_ld_write_done(hdr); pnfs_ld_write_done(hdr);
......
...@@ -141,6 +141,7 @@ struct pnfs_block_layout { ...@@ -141,6 +141,7 @@ struct pnfs_block_layout {
struct rb_root bl_ext_ro; struct rb_root bl_ext_ro;
spinlock_t bl_ext_lock; /* Protects list manipulation */ spinlock_t bl_ext_lock; /* Protects list manipulation */
bool bl_scsi_layout; bool bl_scsi_layout;
u64 bl_lwb;
}; };
static inline struct pnfs_block_layout * static inline struct pnfs_block_layout *
...@@ -182,7 +183,7 @@ int ext_tree_insert(struct pnfs_block_layout *bl, ...@@ -182,7 +183,7 @@ int ext_tree_insert(struct pnfs_block_layout *bl,
int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start, int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start,
sector_t end); sector_t end);
int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
sector_t len); sector_t len, u64 lwb);
bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
struct pnfs_block_extent *ret, bool rw); struct pnfs_block_extent *ret, bool rw);
int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg); int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);
......
...@@ -402,7 +402,7 @@ ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be, ...@@ -402,7 +402,7 @@ ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be,
int int
ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
sector_t len) sector_t len, u64 lwb)
{ {
struct rb_root *root = &bl->bl_ext_rw; struct rb_root *root = &bl->bl_ext_rw;
sector_t end = start + len; sector_t end = start + len;
...@@ -471,6 +471,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, ...@@ -471,6 +471,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
} }
} }
out: out:
if (bl->bl_lwb < lwb)
bl->bl_lwb = lwb;
spin_unlock(&bl->bl_ext_lock); spin_unlock(&bl->bl_ext_lock);
__ext_put_deviceids(&tmp); __ext_put_deviceids(&tmp);
...@@ -518,7 +520,7 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p) ...@@ -518,7 +520,7 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
} }
static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
size_t buffer_size, size_t *count) size_t buffer_size, size_t *count, __u64 *lastbyte)
{ {
struct pnfs_block_extent *be; struct pnfs_block_extent *be;
int ret = 0; int ret = 0;
...@@ -542,6 +544,8 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, ...@@ -542,6 +544,8 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
p = encode_block_extent(be, p); p = encode_block_extent(be, p);
be->be_tag = EXTENT_COMMITTING; be->be_tag = EXTENT_COMMITTING;
} }
*lastbyte = bl->bl_lwb - 1;
bl->bl_lwb = 0;
spin_unlock(&bl->bl_ext_lock); spin_unlock(&bl->bl_ext_lock);
return ret; return ret;
...@@ -564,7 +568,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) ...@@ -564,7 +568,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
arg->layoutupdate_pages = &arg->layoutupdate_page; arg->layoutupdate_pages = &arg->layoutupdate_page;
retry: retry:
ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count); ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
if (unlikely(ret)) { if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size); ext_tree_free_commitdata(arg, buffer_size);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册