提交 d38c3fa6 编写于 作者: L Linus Torvalds

Merge tag 'for-5.3-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - tiny race window during 2 transactions aborting at the same time can
   accidentally lead to a commit

 - regression fix, possible deadlock during fiemap

 - fix for an old bug when incremental send can fail on a file that has
   been deduplicated in a special way

* tag 'for-5.3-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  Btrfs: fix deadlock between fiemap and transaction commits
  Btrfs: fix race leading to fs corruption after transaction abort
  Btrfs: fix incremental send failure after deduplication
......@@ -1483,7 +1483,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
ulist_init(roots);
ulist_init(tmp);
trans = btrfs_attach_transaction(root);
trans = btrfs_join_transaction_nostart(root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
ret = PTR_ERR(trans);
......
......@@ -6322,68 +6322,21 @@ static int changed_extent(struct send_ctx *sctx,
{
int ret = 0;
if (sctx->cur_ino != sctx->cmp_key->objectid) {
if (result == BTRFS_COMPARE_TREE_CHANGED) {
struct extent_buffer *leaf_l;
struct extent_buffer *leaf_r;
struct btrfs_file_extent_item *ei_l;
struct btrfs_file_extent_item *ei_r;
leaf_l = sctx->left_path->nodes[0];
leaf_r = sctx->right_path->nodes[0];
ei_l = btrfs_item_ptr(leaf_l,
sctx->left_path->slots[0],
struct btrfs_file_extent_item);
ei_r = btrfs_item_ptr(leaf_r,
sctx->right_path->slots[0],
struct btrfs_file_extent_item);
/*
* We may have found an extent item that has changed
* only its disk_bytenr field and the corresponding
* inode item was not updated. This case happens due to
* very specific timings during relocation when a leaf
* that contains file extent items is COWed while
* relocation is ongoing and its in the stage where it
* updates data pointers. So when this happens we can
* safely ignore it since we know it's the same extent,
* but just at different logical and physical locations
* (when an extent is fully replaced with a new one, we
* know the generation number must have changed too,
* since snapshot creation implies committing the current
* transaction, and the inode item must have been updated
* as well).
* This replacement of the disk_bytenr happens at
* relocation.c:replace_file_extents() through
* relocation.c:btrfs_reloc_cow_block().
*/
if (btrfs_file_extent_generation(leaf_l, ei_l) ==
btrfs_file_extent_generation(leaf_r, ei_r) &&
btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
btrfs_file_extent_compression(leaf_l, ei_l) ==
btrfs_file_extent_compression(leaf_r, ei_r) &&
btrfs_file_extent_encryption(leaf_l, ei_l) ==
btrfs_file_extent_encryption(leaf_r, ei_r) &&
btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
btrfs_file_extent_type(leaf_l, ei_l) ==
btrfs_file_extent_type(leaf_r, ei_r) &&
btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
btrfs_file_extent_offset(leaf_l, ei_l) ==
btrfs_file_extent_offset(leaf_r, ei_r) &&
btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
btrfs_file_extent_num_bytes(leaf_r, ei_r))
return 0;
}
inconsistent_snapshot_error(sctx, result, "extent");
return -EIO;
}
/*
* We have found an extent item that changed without the inode item
* having changed. This can happen either after relocation (where the
* disk_bytenr of an extent item is replaced at
* relocation.c:replace_file_extents()) or after deduplication into a
* file in both the parent and send snapshots (where an extent item can
* get modified or replaced with a new one). Note that deduplication
* updates the inode item, but it only changes the iversion (sequence
* field in the inode item) of the inode, so if a file is deduplicated
* the same amount of times in both the parent and send snapshots, its
* iversion becames the same in both snapshots, whence the inode item is
* the same on both snapshots.
*/
if (sctx->cur_ino != sctx->cmp_key->objectid)
return 0;
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result != BTRFS_COMPARE_TREE_DELETED)
......
......@@ -28,15 +28,18 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
[TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
[TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
__TRANS_ATTACH |
__TRANS_JOIN),
__TRANS_JOIN |
__TRANS_JOIN_NOSTART),
[TRANS_STATE_UNBLOCKED] = (__TRANS_START |
__TRANS_ATTACH |
__TRANS_JOIN |
__TRANS_JOIN_NOLOCK),
__TRANS_JOIN_NOLOCK |
__TRANS_JOIN_NOSTART),
[TRANS_STATE_COMPLETED] = (__TRANS_START |
__TRANS_ATTACH |
__TRANS_JOIN |
__TRANS_JOIN_NOLOCK),
__TRANS_JOIN_NOLOCK |
__TRANS_JOIN_NOSTART),
};
void btrfs_put_transaction(struct btrfs_transaction *transaction)
......@@ -543,7 +546,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
ret = join_transaction(fs_info, type);
if (ret == -EBUSY) {
wait_current_trans(fs_info);
if (unlikely(type == TRANS_ATTACH))
if (unlikely(type == TRANS_ATTACH ||
type == TRANS_JOIN_NOSTART))
ret = -ENOENT;
}
} while (ret == -EBUSY);
......@@ -659,6 +663,16 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root
BTRFS_RESERVE_NO_FLUSH, true);
}
/*
* Similar to regular join but it never starts a transaction when none is
* running or after waiting for the current one to finish.
*/
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_JOIN_NOSTART,
BTRFS_RESERVE_NO_FLUSH, true);
}
/*
* btrfs_attach_transaction() - catch the running transaction
*
......@@ -2037,6 +2051,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
}
} else {
spin_unlock(&fs_info->trans_lock);
/*
* The previous transaction was aborted and was already removed
* from the list of transactions at fs_info->trans_list. So we
* abort to prevent writing a new superblock that reflects a
* corrupt state (pointing to trees with unwritten nodes/leafs).
*/
if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) {
ret = -EROFS;
goto cleanup_transaction;
}
}
extwriter_counter_dec(cur_trans, trans->type);
......
......@@ -94,11 +94,13 @@ struct btrfs_transaction {
#define __TRANS_JOIN (1U << 11)
#define __TRANS_JOIN_NOLOCK (1U << 12)
#define __TRANS_DUMMY (1U << 13)
#define __TRANS_JOIN_NOSTART (1U << 14)
#define TRANS_START (__TRANS_START | __TRANS_FREEZABLE)
#define TRANS_ATTACH (__TRANS_ATTACH)
#define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE)
#define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK)
#define TRANS_JOIN_NOSTART (__TRANS_JOIN_NOSTART)
#define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH)
......@@ -183,6 +185,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
int min_factor);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
struct btrfs_root *root);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册