diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6dcdb2ec921185ae350aa0f2ff49cd65eaa0d500..d453d62ab0c6e322b3c0df0a05b20daa6751d5ad 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -355,7 +355,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, index = srcu_read_lock(&fs_info->subvol_srcu); - root = btrfs_read_fs_root_no_name(fs_info, &root_key); + root = btrfs_get_fs_root(fs_info, &root_key, false); if (IS_ERR(root)) { srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(root); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8c58191249cc14c33c376bb0c8c0a469cb95b894..35489e7129a7e8de9d0232279d41d3bbd19ae1df 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3416,6 +3416,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, u64 bytenr); +void btrfs_get_block_group(struct btrfs_block_group_cache *cache); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int get_block_group_index(struct btrfs_block_group_cache *cache); struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, @@ -3479,6 +3480,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size); +struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( + struct btrfs_fs_info *fs_info, + const u64 chunk_offset); int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start, struct extent_map *em); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index acf3ed11cfb60e95b685aeb009e4d72fbdba3c3b..4b89680a192338c7a70a4c909e3c4374abe41284 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -124,7 +124,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) return (cache->flags & bits) == bits; } -static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) +void btrfs_get_block_group(struct btrfs_block_group_cache *cache) { atomic_inc(&cache->count); } @@ -5915,19 +5915,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, set_extent_dirty(info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); - /* - * No longer have used bytes in this block group, queue - * it for deletion. - */ - if (old_val == 0) { - spin_lock(&info->unused_bgs_lock); - if (list_empty(&cache->bg_list)) { - btrfs_get_block_group(cache); - list_add_tail(&cache->bg_list, - &info->unused_bgs); - } - spin_unlock(&info->unused_bgs_lock); - } } spin_lock(&trans->transaction->dirty_bgs_lock); @@ -5939,6 +5926,22 @@ static int update_block_group(struct btrfs_trans_handle *trans, } spin_unlock(&trans->transaction->dirty_bgs_lock); + /* + * No longer have used bytes in this block group, queue it for + * deletion. We do this after adding the block group to the + * dirty list to avoid races between cleaner kthread and space + * cache writeout. + */ + if (!alloc && old_val == 0) { + spin_lock(&info->unused_bgs_lock); + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &info->unused_bgs); + } + spin_unlock(&info->unused_bgs_lock); + } + btrfs_put_block_group(cache); total -= num_bytes; bytenr += num_bytes; @@ -8105,21 +8108,47 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, } /* - * TODO: Modify related function to add related node/leaf to dirty_extent_root, - * for later qgroup accounting. - * - * Current, this function does nothing. + * These may not be seen by the usual inc/dec ref code so we have to + * add them here. */ +static int record_one_subtree_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes) +{ + struct btrfs_qgroup_extent_record *qrecord; + struct btrfs_delayed_ref_root *delayed_refs; + + qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); + if (!qrecord) + return -ENOMEM; + + qrecord->bytenr = bytenr; + qrecord->num_bytes = num_bytes; + qrecord->old_roots = NULL; + + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord)) + kfree(qrecord); + spin_unlock(&delayed_refs->lock); + + return 0; +} + static int account_leaf_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) { int nr = btrfs_header_nritems(eb); - int i, extent_type; + int i, extent_type, ret; struct btrfs_key key; struct btrfs_file_extent_item *fi; u64 bytenr, num_bytes; + /* We can be called directly from walk_up_proc() */ + if (!root->fs_info->quota_enabled) + return 0; + for (i = 0; i < nr; i++) { btrfs_item_key_to_cpu(eb, &key, i); @@ -8138,6 +8167,10 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, continue; num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + + ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); + if (ret) + return ret; } return 0; } @@ -8206,8 +8239,6 @@ static int adjust_slots_upwards(struct btrfs_root *root, /* * root_eb is the subtree root and is locked before this function is called. - * TODO: Modify this function to mark all (including complete shared node) - * to dirty_extent_root to allow it get accounted in qgroup. */ static int account_shared_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -8285,6 +8316,11 @@ static int account_shared_subtree(struct btrfs_trans_handle *trans, btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); path->locks[level] = BTRFS_READ_LOCK_BLOCKING; + + ret = record_one_subtree_extent(trans, root, child_bytenr, + root->nodesize); + if (ret) + goto out; } if (level == 0) { @@ -10256,6 +10292,47 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, return ret; } +struct btrfs_trans_handle * +btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, + const u64 chunk_offset) +{ + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map *em; + struct map_lookup *map; + unsigned int num_items; + + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, chunk_offset, 1); + read_unlock(&em_tree->lock); + ASSERT(em && em->start == chunk_offset); + + /* + * We need to reserve 3 + N units from the metadata space info in order + * to remove a block group (done at btrfs_remove_chunk() and at + * btrfs_remove_block_group()), which are used for: + * + * 1 unit for adding the free space inode's orphan (located in the tree + * of tree roots). + * 1 unit for deleting the block group item (located in the extent + * tree). + * 1 unit for deleting the free space item (located in tree of tree + * roots). + * N units for deleting N device extent items corresponding to each + * stripe (located in the device tree). + * + * In order to remove a block group we also need to reserve units in the + * system space info in order to update the chunk tree (update one or + * more device items and remove one chunk item), but this is done at + * btrfs_remove_chunk() through a call to check_system_chunk(). + */ + map = (struct map_lookup *)em->bdev; + num_items = 3 + map->num_stripes; + free_extent_map(em); + + return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root, + num_items, 1); +} + /* * Process the unused_bgs list and remove any that don't have any allocated * space inside of them. @@ -10322,8 +10399,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * Want to do this before we do anything else so we can recover * properly if we fail to join the transaction. */ - /* 1 for btrfs_orphan_reserve_metadata() */ - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_trans_remove_block_group(fs_info, + block_group->key.objectid); if (IS_ERR(trans)) { btrfs_dec_block_group_ro(root, block_group); ret = PTR_ERR(trans); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 977e715f0bf21a4ea8908700cfe4207dee1fe95d..72e73461c0643bbf128504c93a9c47dd377784aa 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1882,8 +1882,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_log_ctx ctx; int ret = 0; bool full_sync = 0; - const u64 len = end - start + 1; + u64 len; + /* + * The range length can be represented by u64, we have to do the typecasts + * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() + */ + len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); /* @@ -2071,8 +2076,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } } if (!full_sync) { - ret = btrfs_wait_ordered_range(inode, start, - end - start + 1); + ret = btrfs_wait_ordered_range(inode, start, len); if (ret) { btrfs_end_transaction(trans, root); goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 994490d5fa6423dee4971e0ff013777d853fe6a6..a70c5790f8f5908f08f606d097fe33e6966af36b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4046,9 +4046,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, */ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) { - struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; - int ret; /* * 1 for the possible orphan item @@ -4057,27 +4055,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) * 1 for the inode ref * 1 for the inode */ - trans = btrfs_start_transaction(root, 5); - if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) - return trans; - - if (PTR_ERR(trans) == -ENOSPC) { - u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); - - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return trans; - ret = btrfs_cond_migrate_bytes(root->fs_info, - &root->fs_info->trans_block_rsv, - num_bytes, 5); - if (ret) { - btrfs_end_transaction(trans, root); - return ERR_PTR(ret); - } - trans->block_rsv = &root->fs_info->trans_block_rsv; - trans->bytes_reserved = num_bytes; - } - return trans; + return btrfs_start_transaction_fallback_global_rsv(root, 5, 5); } static int btrfs_unlink(struct inode *dir, struct dentry *dentry) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 93e12c18ffd736ec77351ecb0443bbfec606a938..5279fdae7142fbe3177a556a020ed1af3a7aa8f1 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -993,9 +993,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; - spin_lock(&fs_info->qgroup_lock); fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; + btrfs_qgroup_wait_for_completion(fs_info); + spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; fs_info->quota_root = NULL; fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; @@ -1461,6 +1462,8 @@ struct btrfs_qgroup_extent_record struct btrfs_qgroup_extent_record *entry; u64 bytenr = record->bytenr; + assert_spin_locked(&delayed_refs->lock); + while (*p) { parent_node = *p; entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2907a77fb1f6f4670885a7985ab40ed3e79c4925..b091d94ceef68013e992161b0aaf31b47cc645dc 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3432,7 +3432,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, struct btrfs_device *scrub_dev, u64 chunk_offset, u64 length, - u64 dev_offset, int is_dev_replace) + u64 dev_offset, + struct btrfs_block_group_cache *cache, + int is_dev_replace) { struct btrfs_mapping_tree *map_tree = &sctx->dev_root->fs_info->mapping_tree; @@ -3445,8 +3447,18 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); read_unlock(&map_tree->map_tree.lock); - if (!em) - return -EINVAL; + if (!em) { + /* + * Might have been an unused block group deleted by the cleaner + * kthread or relocation. + */ + spin_lock(&cache->lock); + if (!cache->removed) + ret = -EINVAL; + spin_unlock(&cache->lock); + + return ret; + } map = (struct map_lookup *)em->bdev; if (em->start != chunk_offset) @@ -3483,6 +3495,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, u64 length; u64 chunk_offset; int ret = 0; + int ro_set; int slot; struct extent_buffer *l; struct btrfs_key key; @@ -3568,7 +3581,21 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, scrub_pause_on(fs_info); ret = btrfs_inc_block_group_ro(root, cache); scrub_pause_off(fs_info); - if (ret) { + + if (ret == 0) { + ro_set = 1; + } else if (ret == -ENOSPC) { + /* + * btrfs_inc_block_group_ro return -ENOSPC when it + * failed in creating new chunk for metadata. + * It is not a problem for scrub/replace, because + * metadata are always cowed, and our scrub paused + * commit_transactions. + */ + ro_set = 0; + } else { + btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n", + ret); btrfs_put_block_group(cache); break; } @@ -3577,7 +3604,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, dev_replace->cursor_left = found_key.offset; dev_replace->item_needs_writeback = 1; ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, - found_key.offset, is_dev_replace); + found_key.offset, cache, is_dev_replace); /* * flush, submit all pending read and write bios, afterwards @@ -3611,7 +3638,30 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, scrub_pause_off(fs_info); - btrfs_dec_block_group_ro(root, cache); + if (ro_set) + btrfs_dec_block_group_ro(root, cache); + + /* + * We might have prevented the cleaner kthread from deleting + * this block group if it was already unused because we raced + * and set it to RO mode first. So add it back to the unused + * list, otherwise it might not ever be deleted unless a manual + * balance is triggered or it becomes used and unused again. + */ + spin_lock(&cache->lock); + if (!cache->removed && !cache->ro && cache->reserved == 0 && + btrfs_block_group_used(&cache->item) == 0) { + spin_unlock(&cache->lock); + spin_lock(&fs_info->unused_bgs_lock); + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &fs_info->unused_bgs); + } + spin_unlock(&fs_info->unused_bgs_lock); + } else { + spin_unlock(&cache->lock); + } btrfs_put_block_group(cache); if (ret) diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index c8c3d70c31ffad4e02acd04e0f7dcaa54ad0fe2b..8b72b005bfb9a212518a711a2e476c2d70b47b24 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -898,8 +898,10 @@ int btrfs_test_free_space_cache(void) } root = btrfs_alloc_dummy_root(); - if (!root) + if (IS_ERR(root)) { + ret = PTR_ERR(root); goto out; + } root->fs_info = btrfs_alloc_dummy_fs_info(); if (!root->fs_info) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 418c6a2ad7d88658f8624d99a1ba0e9e84c13d45..3367a3c6f214f5ca248a79fff36c6404a49fbf4f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -592,6 +592,38 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, return start_transaction(root, num_items, TRANS_START, BTRFS_RESERVE_FLUSH_ALL); } +struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( + struct btrfs_root *root, + unsigned int num_items, + int min_factor) +{ + struct btrfs_trans_handle *trans; + u64 num_bytes; + int ret; + + trans = btrfs_start_transaction(root, num_items); + if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) + return trans; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return trans; + + num_bytes = btrfs_calc_trans_metadata_size(root, num_items); + ret = btrfs_cond_migrate_bytes(root->fs_info, + &root->fs_info->trans_block_rsv, + num_bytes, + min_factor); + if (ret) { + btrfs_end_transaction(trans, root); + return ERR_PTR(ret); + } + + trans->block_rsv = &root->fs_info->trans_block_rsv; + trans->bytes_reserved = num_bytes; + + return trans; +} struct btrfs_trans_handle *btrfs_start_transaction_lflush( struct btrfs_root *root, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b05b2f64d9133313f1af231c4a4254ff16972f00..0da21ca9b3fb312a7ec77b4e8314a9e4b3410075 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -185,6 +185,10 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, unsigned int num_items); +struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( + struct btrfs_root *root, + unsigned int num_items, + int min_factor); struct btrfs_trans_handle *btrfs_start_transaction_lflush( struct btrfs_root *root, unsigned int num_items); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a6df8fdc1312ce78e97f9f90037ff5b236a58a92..45645220660996773b1f76cd1d526d4aeae33dac 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1973,8 +1973,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, if (srcdev->writeable) { fs_devices->rw_devices--; /* zero out the old super if it is writable */ - btrfs_scratch_superblocks(srcdev->bdev, - rcu_str_deref(srcdev->name)); + btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); } if (srcdev->bdev) @@ -2024,8 +2023,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); if (tgtdev->bdev) { - btrfs_scratch_superblocks(tgtdev->bdev, - rcu_str_deref(tgtdev->name)); + btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); fs_info->fs_devices->open_devices--; } fs_info->fs_devices->num_devices--; @@ -2853,7 +2851,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset) if (ret) return ret; - trans = btrfs_start_transaction(root, 0); + trans = btrfs_start_trans_remove_block_group(root->fs_info, + chunk_offset); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_std_error(root->fs_info, ret, NULL); @@ -3123,7 +3122,7 @@ static int chunk_profiles_filter(u64 chunk_type, return 1; } -static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, +static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, struct btrfs_balance_args *bargs) { struct btrfs_block_group_cache *cache; @@ -3156,7 +3155,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, return ret; } -static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, +static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, struct btrfs_balance_args *bargs) { struct btrfs_block_group_cache *cache; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ec571237273208fcb87f7be1c473b0c6a1392b50..d5c84f6b13538a338d90560b6520be4fe56c3125 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -382,7 +382,7 @@ struct map_lookup { #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) #define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6) #define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7) -#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 8) +#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10) #define BTRFS_BALANCE_ARGS_MASK \ (BTRFS_BALANCE_ARGS_PROFILES | \