提交 5dbb75ed 编写于 作者: F Filipe Manana 提交者: David Sterba

btrfs: fix RWF_NOWAIT writes blocking on extent locks and waiting for IO

A RWF_NOWAIT write is not supposed to wait on filesystem locks that can be
held for a long time or for ongoing IO to complete.

However when calling check_can_nocow(), if the inode has prealloc extents
or has the NOCOW flag set, we can block on extent (file range) locks
through the call to btrfs_lock_and_flush_ordered_range(). Such lock can
take a significant amount of time to be available. For example, a fiemap
task may be running, and iterating through the entire file range checking
all extents and doing backref walking to determine if they are shared,
or a readpage operation may be in progress.

Also at btrfs_lock_and_flush_ordered_range(), called by check_can_nocow(),
after locking the file range we wait for any existing ordered extent that
is in progress to complete. Another operation that can take a significant
amount of time and defeat the purpose of RWF_NOWAIT.

So fix this by trying to lock the file range and if it's currently locked
return -EAGAIN to user space. If we are able to lock the file range without
waiting and there is an ordered extent in the range, return -EAGAIN as
well, instead of waiting for it to complete. Finally, don't bother trying
to lock the snapshot lock of the root when attempting a RWF_NOWAIT write,
as that is only important for buffered writes.

Fixes: edf064e7 ("btrfs: nowait aio support")
Signed-off-by: NFilipe Manana <fdmanana@suse.com>
Signed-off-by: NDavid Sterba <dsterba@suse.com>
上级 260a6339
...@@ -1533,7 +1533,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, ...@@ -1533,7 +1533,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
} }
static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes) size_t *write_bytes, bool nowait)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
...@@ -1541,27 +1541,43 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, ...@@ -1541,27 +1541,43 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
u64 num_bytes; u64 num_bytes;
int ret; int ret;
if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
return -EAGAIN; return -EAGAIN;
lockstart = round_down(pos, fs_info->sectorsize); lockstart = round_down(pos, fs_info->sectorsize);
lockend = round_up(pos + *write_bytes, lockend = round_up(pos + *write_bytes,
fs_info->sectorsize) - 1; fs_info->sectorsize) - 1;
num_bytes = lockend - lockstart + 1;
if (nowait) {
struct btrfs_ordered_extent *ordered;
if (!try_lock_extent(&inode->io_tree, lockstart, lockend))
return -EAGAIN;
ordered = btrfs_lookup_ordered_range(inode, lockstart,
num_bytes);
if (ordered) {
btrfs_put_ordered_extent(ordered);
ret = -EAGAIN;
goto out_unlock;
}
} else {
btrfs_lock_and_flush_ordered_range(inode, lockstart, btrfs_lock_and_flush_ordered_range(inode, lockstart,
lockend, NULL); lockend, NULL);
}
num_bytes = lockend - lockstart + 1;
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
NULL, NULL, NULL); NULL, NULL, NULL);
if (ret <= 0) { if (ret <= 0) {
ret = 0; ret = 0;
if (!nowait)
btrfs_drew_write_unlock(&root->snapshot_lock); btrfs_drew_write_unlock(&root->snapshot_lock);
} else { } else {
*write_bytes = min_t(size_t, *write_bytes , *write_bytes = min_t(size_t, *write_bytes ,
num_bytes - pos + lockstart); num_bytes - pos + lockstart);
} }
out_unlock:
unlock_extent(&inode->io_tree, lockstart, lockend); unlock_extent(&inode->io_tree, lockstart, lockend);
return ret; return ret;
...@@ -1633,7 +1649,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, ...@@ -1633,7 +1649,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) && BTRFS_INODE_PREALLOC)) &&
check_can_nocow(BTRFS_I(inode), pos, check_can_nocow(BTRFS_I(inode), pos,
&write_bytes) > 0) { &write_bytes, false) > 0) {
/* /*
* For nodata cow case, no need to reserve * For nodata cow case, no need to reserve
* data space. * data space.
...@@ -1912,12 +1928,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, ...@@ -1912,12 +1928,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
*/ */
if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) || BTRFS_INODE_PREALLOC)) ||
check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes) <= 0) { check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes,
true) <= 0) {
inode_unlock(inode); inode_unlock(inode);
return -EAGAIN; return -EAGAIN;
} }
/* check_can_nocow() locks the snapshot lock on success */
btrfs_drew_write_unlock(&root->snapshot_lock);
/* /*
* There are holes in the range or parts of the range that must * There are holes in the range or parts of the range that must
* be COWed (shared extents, RO block groups, etc), so just bail * be COWed (shared extents, RO block groups, etc), so just bail
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册