提交 80e0c505 编写于 作者: L Linus Torvalds

Merge branch 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "This has Mark Fasheh's patches to fix quota accounting during subvol
  deletion, which we've been working on for a while now.  The patch is
  pretty small but it's a key fix.

  Otherwise it's a random assortment"

* 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  btrfs: fix balance range usage filters in 4.4-rc
  btrfs: qgroup: account shared subtree during snapshot delete
  Btrfs: use btrfs_get_fs_root in resolve_indirect_ref
  btrfs: qgroup: fix quota disable during rescan
  Btrfs: fix race between cleaner kthread and space cache writeout
  Btrfs: fix scrub preventing unused block groups from being deleted
  Btrfs: fix race between scrub and block group deletion
  btrfs: fix rcu warning during device replace
  btrfs: Continue replace when set_block_ro failed
  btrfs: fix clashing number of the enhanced balance usage filter
  Btrfs: fix the number of transaction units needed to remove a block group
  Btrfs: use global reserve when deleting unused block group after ENOSPC
  Btrfs: tests: checking for NULL instead of IS_ERR()
  btrfs: fix signed overflows in btrfs_sync_file
......@@ -355,7 +355,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
index = srcu_read_lock(&fs_info->subvol_srcu);
root = btrfs_read_fs_root_no_name(fs_info, &root_key);
root = btrfs_get_fs_root(fs_info, &root_key, false);
if (IS_ERR(root)) {
srcu_read_unlock(&fs_info->subvol_srcu, index);
ret = PTR_ERR(root);
......
......@@ -3416,6 +3416,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
u64 bytenr);
void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int get_block_group_index(struct btrfs_block_group_cache *cache);
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
......@@ -3479,6 +3480,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytes_used,
u64 type, u64 chunk_objectid, u64 chunk_offset,
u64 size);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
struct btrfs_fs_info *fs_info,
const u64 chunk_offset);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start,
struct extent_map *em);
......
......@@ -124,7 +124,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
return (cache->flags & bits) == bits;
}
static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
{
atomic_inc(&cache->count);
}
......@@ -5915,19 +5915,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
set_extent_dirty(info->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
/*
* No longer have used bytes in this block group, queue
* it for deletion.
*/
if (old_val == 0) {
spin_lock(&info->unused_bgs_lock);
if (list_empty(&cache->bg_list)) {
btrfs_get_block_group(cache);
list_add_tail(&cache->bg_list,
&info->unused_bgs);
}
spin_unlock(&info->unused_bgs_lock);
}
}
spin_lock(&trans->transaction->dirty_bgs_lock);
......@@ -5939,6 +5926,22 @@ static int update_block_group(struct btrfs_trans_handle *trans,
}
spin_unlock(&trans->transaction->dirty_bgs_lock);
/*
* No longer have used bytes in this block group, queue it for
* deletion. We do this after adding the block group to the
* dirty list to avoid races between cleaner kthread and space
* cache writeout.
*/
if (!alloc && old_val == 0) {
spin_lock(&info->unused_bgs_lock);
if (list_empty(&cache->bg_list)) {
btrfs_get_block_group(cache);
list_add_tail(&cache->bg_list,
&info->unused_bgs);
}
spin_unlock(&info->unused_bgs_lock);
}
btrfs_put_block_group(cache);
total -= num_bytes;
bytenr += num_bytes;
......@@ -8105,21 +8108,47 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
}
/*
* TODO: Modify related function to add related node/leaf to dirty_extent_root,
* for later qgroup accounting.
*
* Current, this function does nothing.
* These may not be seen by the usual inc/dec ref code so we have to
* add them here.
*/
static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes)
{
struct btrfs_qgroup_extent_record *qrecord;
struct btrfs_delayed_ref_root *delayed_refs;
qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
if (!qrecord)
return -ENOMEM;
qrecord->bytenr = bytenr;
qrecord->num_bytes = num_bytes;
qrecord->old_roots = NULL;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord))
kfree(qrecord);
spin_unlock(&delayed_refs->lock);
return 0;
}
static int account_leaf_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *eb)
{
int nr = btrfs_header_nritems(eb);
int i, extent_type;
int i, extent_type, ret;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
u64 bytenr, num_bytes;
/* We can be called directly from walk_up_proc() */
if (!root->fs_info->quota_enabled)
return 0;
for (i = 0; i < nr; i++) {
btrfs_item_key_to_cpu(eb, &key, i);
......@@ -8138,6 +8167,10 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
continue;
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
if (ret)
return ret;
}
return 0;
}
......@@ -8206,8 +8239,6 @@ static int adjust_slots_upwards(struct btrfs_root *root,
/*
* root_eb is the subtree root and is locked before this function is called.
* TODO: Modify this function to mark all (including complete shared node)
* to dirty_extent_root to allow it get accounted in qgroup.
*/
static int account_shared_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
......@@ -8285,6 +8316,11 @@ static int account_shared_subtree(struct btrfs_trans_handle *trans,
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
ret = record_one_subtree_extent(trans, root, child_bytenr,
root->nodesize);
if (ret)
goto out;
}
if (level == 0) {
......@@ -10256,6 +10292,47 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
return ret;
}
struct btrfs_trans_handle *
btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
const u64 chunk_offset)
{
struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
struct extent_map *em;
struct map_lookup *map;
unsigned int num_items;
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, chunk_offset, 1);
read_unlock(&em_tree->lock);
ASSERT(em && em->start == chunk_offset);
/*
* We need to reserve 3 + N units from the metadata space info in order
* to remove a block group (done at btrfs_remove_chunk() and at
* btrfs_remove_block_group()), which are used for:
*
* 1 unit for adding the free space inode's orphan (located in the tree
* of tree roots).
* 1 unit for deleting the block group item (located in the extent
* tree).
* 1 unit for deleting the free space item (located in tree of tree
* roots).
* N units for deleting N device extent items corresponding to each
* stripe (located in the device tree).
*
* In order to remove a block group we also need to reserve units in the
* system space info in order to update the chunk tree (update one or
* more device items and remove one chunk item), but this is done at
* btrfs_remove_chunk() through a call to check_system_chunk().
*/
map = (struct map_lookup *)em->bdev;
num_items = 3 + map->num_stripes;
free_extent_map(em);
return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
num_items, 1);
}
/*
* Process the unused_bgs list and remove any that don't have any allocated
* space inside of them.
......@@ -10322,8 +10399,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* Want to do this before we do anything else so we can recover
* properly if we fail to join the transaction.
*/
/* 1 for btrfs_orphan_reserve_metadata() */
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_trans_remove_block_group(fs_info,
block_group->key.objectid);
if (IS_ERR(trans)) {
btrfs_dec_block_group_ro(root, block_group);
ret = PTR_ERR(trans);
......
......@@ -1882,8 +1882,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_log_ctx ctx;
int ret = 0;
bool full_sync = 0;
const u64 len = end - start + 1;
u64 len;
/*
* The range length can be represented by u64, we have to do the typecasts
* to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
*/
len = (u64)end - (u64)start + 1;
trace_btrfs_sync_file(file, datasync);
/*
......@@ -2071,8 +2076,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
}
if (!full_sync) {
ret = btrfs_wait_ordered_range(inode, start,
end - start + 1);
ret = btrfs_wait_ordered_range(inode, start, len);
if (ret) {
btrfs_end_transaction(trans, root);
goto out;
......
......@@ -4046,9 +4046,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
*/
static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
int ret;
/*
* 1 for the possible orphan item
......@@ -4057,27 +4055,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
* 1 for the inode ref
* 1 for the inode
*/
trans = btrfs_start_transaction(root, 5);
if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
return trans;
if (PTR_ERR(trans) == -ENOSPC) {
u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans))
return trans;
ret = btrfs_cond_migrate_bytes(root->fs_info,
&root->fs_info->trans_block_rsv,
num_bytes, 5);
if (ret) {
btrfs_end_transaction(trans, root);
return ERR_PTR(ret);
}
trans->block_rsv = &root->fs_info->trans_block_rsv;
trans->bytes_reserved = num_bytes;
}
return trans;
return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
}
static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
......
......@@ -993,9 +993,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
goto out;
spin_lock(&fs_info->qgroup_lock);
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
btrfs_qgroup_wait_for_completion(fs_info);
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
fs_info->quota_root = NULL;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
......@@ -1461,6 +1462,8 @@ struct btrfs_qgroup_extent_record
struct btrfs_qgroup_extent_record *entry;
u64 bytenr = record->bytenr;
assert_spin_locked(&delayed_refs->lock);
while (*p) {
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
......
......@@ -3432,7 +3432,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev,
u64 chunk_offset, u64 length,
u64 dev_offset, int is_dev_replace)
u64 dev_offset,
struct btrfs_block_group_cache *cache,
int is_dev_replace)
{
struct btrfs_mapping_tree *map_tree =
&sctx->dev_root->fs_info->mapping_tree;
......@@ -3445,8 +3447,18 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
read_unlock(&map_tree->map_tree.lock);
if (!em)
return -EINVAL;
if (!em) {
/*
* Might have been an unused block group deleted by the cleaner
* kthread or relocation.
*/
spin_lock(&cache->lock);
if (!cache->removed)
ret = -EINVAL;
spin_unlock(&cache->lock);
return ret;
}
map = (struct map_lookup *)em->bdev;
if (em->start != chunk_offset)
......@@ -3483,6 +3495,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
u64 length;
u64 chunk_offset;
int ret = 0;
int ro_set;
int slot;
struct extent_buffer *l;
struct btrfs_key key;
......@@ -3568,7 +3581,21 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
scrub_pause_on(fs_info);
ret = btrfs_inc_block_group_ro(root, cache);
scrub_pause_off(fs_info);
if (ret) {
if (ret == 0) {
ro_set = 1;
} else if (ret == -ENOSPC) {
/*
* btrfs_inc_block_group_ro return -ENOSPC when it
* failed in creating new chunk for metadata.
* It is not a problem for scrub/replace, because
* metadata are always cowed, and our scrub paused
* commit_transactions.
*/
ro_set = 0;
} else {
btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n",
ret);
btrfs_put_block_group(cache);
break;
}
......@@ -3577,7 +3604,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1;
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
found_key.offset, is_dev_replace);
found_key.offset, cache, is_dev_replace);
/*
* flush, submit all pending read and write bios, afterwards
......@@ -3611,7 +3638,30 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
scrub_pause_off(fs_info);
btrfs_dec_block_group_ro(root, cache);
if (ro_set)
btrfs_dec_block_group_ro(root, cache);
/*
* We might have prevented the cleaner kthread from deleting
* this block group if it was already unused because we raced
* and set it to RO mode first. So add it back to the unused
* list, otherwise it might not ever be deleted unless a manual
* balance is triggered or it becomes used and unused again.
*/
spin_lock(&cache->lock);
if (!cache->removed && !cache->ro && cache->reserved == 0 &&
btrfs_block_group_used(&cache->item) == 0) {
spin_unlock(&cache->lock);
spin_lock(&fs_info->unused_bgs_lock);
if (list_empty(&cache->bg_list)) {
btrfs_get_block_group(cache);
list_add_tail(&cache->bg_list,
&fs_info->unused_bgs);
}
spin_unlock(&fs_info->unused_bgs_lock);
} else {
spin_unlock(&cache->lock);
}
btrfs_put_block_group(cache);
if (ret)
......
......@@ -898,8 +898,10 @@ int btrfs_test_free_space_cache(void)
}
root = btrfs_alloc_dummy_root();
if (!root)
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
}
root->fs_info = btrfs_alloc_dummy_fs_info();
if (!root->fs_info)
......
......@@ -592,6 +592,38 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
return start_transaction(root, num_items, TRANS_START,
BTRFS_RESERVE_FLUSH_ALL);
}
struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
struct btrfs_root *root,
unsigned int num_items,
int min_factor)
{
struct btrfs_trans_handle *trans;
u64 num_bytes;
int ret;
trans = btrfs_start_transaction(root, num_items);
if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
return trans;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans))
return trans;
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
ret = btrfs_cond_migrate_bytes(root->fs_info,
&root->fs_info->trans_block_rsv,
num_bytes,
min_factor);
if (ret) {
btrfs_end_transaction(trans, root);
return ERR_PTR(ret);
}
trans->block_rsv = &root->fs_info->trans_block_rsv;
trans->bytes_reserved = num_bytes;
return trans;
}
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
struct btrfs_root *root,
......
......@@ -185,6 +185,10 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
unsigned int num_items);
struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
struct btrfs_root *root,
unsigned int num_items,
int min_factor);
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
struct btrfs_root *root,
unsigned int num_items);
......
......@@ -1973,8 +1973,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
if (srcdev->writeable) {
fs_devices->rw_devices--;
/* zero out the old super if it is writable */
btrfs_scratch_superblocks(srcdev->bdev,
rcu_str_deref(srcdev->name));
btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
}
if (srcdev->bdev)
......@@ -2024,8 +2023,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
if (tgtdev->bdev) {
btrfs_scratch_superblocks(tgtdev->bdev,
rcu_str_deref(tgtdev->name));
btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
fs_info->fs_devices->open_devices--;
}
fs_info->fs_devices->num_devices--;
......@@ -2853,7 +2851,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
if (ret)
return ret;
trans = btrfs_start_transaction(root, 0);
trans = btrfs_start_trans_remove_block_group(root->fs_info,
chunk_offset);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_std_error(root->fs_info, ret, NULL);
......@@ -3123,7 +3122,7 @@ static int chunk_profiles_filter(u64 chunk_type,
return 1;
}
static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
struct btrfs_balance_args *bargs)
{
struct btrfs_block_group_cache *cache;
......@@ -3156,7 +3155,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
return ret;
}
static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info,
static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
u64 chunk_offset, struct btrfs_balance_args *bargs)
{
struct btrfs_block_group_cache *cache;
......
......@@ -382,7 +382,7 @@ struct map_lookup {
#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
#define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6)
#define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7)
#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 8)
#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10)
#define BTRFS_BALANCE_ARGS_MASK \
(BTRFS_BALANCE_ARGS_PROFILES | \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册