diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1df0d9db5332cbdbdbaf96e58eb4339bd4179e55..83051fae946741037070e8b4620bb68663f31547 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1491,6 +1491,12 @@ struct btrfs_fs_info { struct mutex chunk_mutex; struct mutex volume_mutex; + /* + * this is taken to make sure we don't set block groups ro after + * the free space cache has been allocated on them + */ + struct mutex ro_block_group_mutex; + /* this is used during read/modify/write to make sure * no two ios are trying to mod the same stripe at the same * time @@ -3407,6 +3413,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, u64 offset, int no_quota); +int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 568cc4e3d80e18c2aa96f86c259d47a76b3586dd..b5e3d5f6400af923137d43424b1cd27dcc81af99 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2572,6 +2572,7 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); + mutex_init(&fs_info->ro_block_group_mutex); init_rwsem(&fs_info->commit_root_sem); init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 40c95135d03717911a684f3e545b43cd47bdda08..02c2b29a084017b53e2052f4bede1355590028ec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3298,7 +3298,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, if (ret) goto out_put; - ret = btrfs_truncate_free_space_cache(root, trans, inode); + ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode); if (ret) goto out_put; } @@ -3382,20 +3382,156 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, return 0; } -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, +/* + * transaction commit does final block group cache writeback during a + * critical section where nothing is allowed to change the FS. This is + * required in order for the cache to actually match the block group, + * but can introduce a lot of latency into the commit. + * + * So, btrfs_start_dirty_block_groups is here to kick off block group + * cache IO. There's a chance we'll have to redo some of it if the + * block group changes again during the commit, but it greatly reduces + * the commit latency by getting rid of the easy block groups while + * we're still allowing others to join the commit. + */ +int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_block_group_cache *cache; struct btrfs_transaction *cur_trans = trans->transaction; int ret = 0; int should_put; - struct btrfs_path *path; - LIST_HEAD(io); + struct btrfs_path *path = NULL; + LIST_HEAD(dirty); + struct list_head *io = &cur_trans->io_bgs; int num_started = 0; - int num_waited = 0; + int loops = 0; + + spin_lock(&cur_trans->dirty_bgs_lock); + if (!list_empty(&cur_trans->dirty_bgs)) { + list_splice_init(&cur_trans->dirty_bgs, &dirty); + } + spin_unlock(&cur_trans->dirty_bgs_lock); - if (list_empty(&cur_trans->dirty_bgs)) +again: + if (list_empty(&dirty)) { + btrfs_free_path(path); return 0; + } + + /* + * make sure all the block groups on our dirty list actually + * exist + */ + btrfs_create_pending_block_groups(trans, root); + + if (!path) { + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + } + + while (!list_empty(&dirty)) { + cache = list_first_entry(&dirty, + struct btrfs_block_group_cache, + dirty_list); + + /* + * cache_write_mutex is here only to save us from balance + * deleting this block group while we are writing out the + * cache + */ + mutex_lock(&trans->transaction->cache_write_mutex); + + /* + * this can happen if something re-dirties a block + * group that is already under IO. Just wait for it to + * finish and then do it all again + */ + if (!list_empty(&cache->io_list)) { + list_del_init(&cache->io_list); + btrfs_wait_cache_io(root, trans, cache, + &cache->io_ctl, path, + cache->key.objectid); + btrfs_put_block_group(cache); + } + + + /* + * btrfs_wait_cache_io uses the cache->dirty_list to decide + * if it should update the cache_state. Don't delete + * until after we wait. + * + * Since we're not running in the commit critical section + * we need the dirty_bgs_lock to protect from update_block_group + */ + spin_lock(&cur_trans->dirty_bgs_lock); + list_del_init(&cache->dirty_list); + spin_unlock(&cur_trans->dirty_bgs_lock); + + should_put = 1; + + cache_save_setup(cache, trans, path); + + if (cache->disk_cache_state == BTRFS_DC_SETUP) { + cache->io_ctl.inode = NULL; + ret = btrfs_write_out_cache(root, trans, cache, path); + if (ret == 0 && cache->io_ctl.inode) { + num_started++; + should_put = 0; + + /* + * the cache_write_mutex is protecting + * the io_list + */ + list_add_tail(&cache->io_list, io); + } else { + /* + * if we failed to write the cache, the + * generation will be bad and life goes on + */ + ret = 0; + } + } + if (!ret) + ret = write_one_cache_group(trans, root, path, cache); + mutex_unlock(&trans->transaction->cache_write_mutex); + + /* if its not on the io list, we need to put the block group */ + if (should_put) + btrfs_put_block_group(cache); + + if (ret) + break; + } + + /* + * go through delayed refs for all the stuff we've just kicked off + * and then loop back (just once) + */ + ret = btrfs_run_delayed_refs(trans, root, 0); + if (!ret && loops == 0) { + loops++; + spin_lock(&cur_trans->dirty_bgs_lock); + list_splice_init(&cur_trans->dirty_bgs, &dirty); + spin_unlock(&cur_trans->dirty_bgs_lock); + goto again; + } + + btrfs_free_path(path); + return ret; +} + +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache; + struct btrfs_transaction *cur_trans = trans->transaction; + int ret = 0; + int should_put; + struct btrfs_path *path; + struct list_head *io = &cur_trans->io_bgs; + int num_started = 0; path = btrfs_alloc_path(); if (!path) @@ -3423,14 +3559,16 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, &cache->io_ctl, path, cache->key.objectid); btrfs_put_block_group(cache); - num_waited++; } + /* + * don't remove from the dirty list until after we've waited + * on any pending IO + */ list_del_init(&cache->dirty_list); should_put = 1; - if (cache->disk_cache_state == BTRFS_DC_CLEAR) - cache_save_setup(cache, trans, path); + cache_save_setup(cache, trans, path); if (!ret) ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1); @@ -3441,7 +3579,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (ret == 0 && cache->io_ctl.inode) { num_started++; should_put = 0; - list_add_tail(&cache->io_list, &io); + list_add_tail(&cache->io_list, io); } else { /* * if we failed to write the cache, the @@ -3458,11 +3596,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, btrfs_put_block_group(cache); } - while (!list_empty(&io)) { - cache = list_first_entry(&io, struct btrfs_block_group_cache, + while (!list_empty(io)) { + cache = list_first_entry(io, struct btrfs_block_group_cache, io_list); list_del_init(&cache->io_list); - num_waited++; btrfs_wait_cache_io(root, trans, cache, &cache->io_ctl, path, cache->key.objectid); btrfs_put_block_group(cache); @@ -5459,15 +5596,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, if (!alloc && cache->cached == BTRFS_CACHE_NO) cache_block_group(cache, 1); - spin_lock(&trans->transaction->dirty_bgs_lock); - if (list_empty(&cache->dirty_list)) { - list_add_tail(&cache->dirty_list, - &trans->transaction->dirty_bgs); - trans->transaction->num_dirty_bgs++; - btrfs_get_block_group(cache); - } - spin_unlock(&trans->transaction->dirty_bgs_lock); - byte_in_group = bytenr - cache->key.objectid; WARN_ON(byte_in_group > cache->key.offset); @@ -5516,6 +5644,16 @@ static int update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&info->unused_bgs_lock); } } + + spin_lock(&trans->transaction->dirty_bgs_lock); + if (list_empty(&cache->dirty_list)) { + list_add_tail(&cache->dirty_list, + &trans->transaction->dirty_bgs); + trans->transaction->num_dirty_bgs++; + btrfs_get_block_group(cache); + } + spin_unlock(&trans->transaction->dirty_bgs_lock); + btrfs_put_block_group(cache); total -= num_bytes; bytenr += num_bytes; @@ -8602,10 +8740,30 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, BUG_ON(cache->ro); +again: trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); + /* + * we're not allowed to set block groups readonly after the dirty + * block groups cache has started writing. If it already started, + * back off and let this transaction commit + */ + mutex_lock(&root->fs_info->ro_block_group_mutex); + if (trans->transaction->dirty_bg_run) { + u64 transid = trans->transid; + + mutex_unlock(&root->fs_info->ro_block_group_mutex); + btrfs_end_transaction(trans, root); + + ret = btrfs_wait_for_commit(root, transid); + if (ret) + return ret; + goto again; + } + + ret = set_block_group_ro(cache, 0); if (!ret) goto out; @@ -8620,6 +8778,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, alloc_flags = update_block_group_flags(root, cache->flags); check_system_chunk(trans, root, alloc_flags); } + mutex_unlock(&root->fs_info->ro_block_group_mutex); btrfs_end_transaction(trans, root); return ret; @@ -9425,7 +9584,38 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; } + /* + * get the inode first so any iput calls done for the io_list + * aren't the final iput (no unlinks allowed now) + */ inode = lookup_free_space_inode(tree_root, block_group, path); + + mutex_lock(&trans->transaction->cache_write_mutex); + /* + * make sure our free spache cache IO is done before remove the + * free space inode + */ + spin_lock(&trans->transaction->dirty_bgs_lock); + if (!list_empty(&block_group->io_list)) { + list_del_init(&block_group->io_list); + + WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode); + + spin_unlock(&trans->transaction->dirty_bgs_lock); + btrfs_wait_cache_io(root, trans, block_group, + &block_group->io_ctl, path, + block_group->key.objectid); + btrfs_put_block_group(block_group); + spin_lock(&trans->transaction->dirty_bgs_lock); + } + + if (!list_empty(&block_group->dirty_list)) { + list_del_init(&block_group->dirty_list); + btrfs_put_block_group(block_group); + } + spin_unlock(&trans->transaction->dirty_bgs_lock); + mutex_unlock(&trans->transaction->cache_write_mutex); + if (!IS_ERR(inode)) { ret = btrfs_orphan_add(trans, inode); if (ret) { @@ -9518,11 +9708,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_lock(&trans->transaction->dirty_bgs_lock); if (!list_empty(&block_group->dirty_list)) { - list_del_init(&block_group->dirty_list); - btrfs_put_block_group(block_group); + WARN_ON(1); + } + if (!list_empty(&block_group->io_list)) { + WARN_ON(1); } spin_unlock(&trans->transaction->dirty_bgs_lock); - btrfs_remove_free_space_cache(block_group); spin_lock(&block_group->space_info->lock); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 83532a2459473b42ba2d7d763cd0ea382f269295..253cb74b0e27eaa19a7caf3175f66f9576340aff 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -226,9 +226,37 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, + struct btrfs_block_group_cache *block_group, struct inode *inode) { int ret = 0; + struct btrfs_path *path = btrfs_alloc_path(); + + if (!path) { + ret = -ENOMEM; + goto fail; + } + + if (block_group) { + mutex_lock(&trans->transaction->cache_write_mutex); + if (!list_empty(&block_group->io_list)) { + list_del_init(&block_group->io_list); + + btrfs_wait_cache_io(root, trans, block_group, + &block_group->io_ctl, path, + block_group->key.objectid); + btrfs_put_block_group(block_group); + } + + /* + * now that we've truncated the cache away, its no longer + * setup or written + */ + spin_lock(&block_group->lock); + block_group->disk_cache_state = BTRFS_DC_CLEAR; + spin_unlock(&block_group->lock); + } + btrfs_free_path(path); btrfs_i_size_write(inode, 0); truncate_pagecache(inode, 0); @@ -242,11 +270,17 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, ret = btrfs_truncate_inode_items(trans, root, inode, 0, BTRFS_EXTENT_DATA_KEY); if (ret) { + mutex_unlock(&trans->transaction->cache_write_mutex); btrfs_abort_transaction(trans, root, ret); return ret; } ret = btrfs_update_inode(trans, root, inode); + + if (block_group) + mutex_unlock(&trans->transaction->cache_write_mutex); + +fail: if (ret) btrfs_abort_transaction(trans, root, ret); @@ -876,6 +910,7 @@ int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, { int ret; struct btrfs_free_cluster *cluster = NULL; + struct btrfs_free_cluster *cluster_locked = NULL; struct rb_node *node = rb_first(&ctl->free_space_offset); struct btrfs_trim_range *trim_entry; @@ -887,6 +922,8 @@ int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, } if (!node && cluster) { + cluster_locked = cluster; + spin_lock(&cluster_locked->lock); node = rb_first(&cluster->root); cluster = NULL; } @@ -910,9 +947,15 @@ int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, node = rb_next(node); if (!node && cluster) { node = rb_first(&cluster->root); + cluster_locked = cluster; + spin_lock(&cluster_locked->lock); cluster = NULL; } } + if (cluster_locked) { + spin_unlock(&cluster_locked->lock); + cluster_locked = NULL; + } /* * Make sure we don't miss any range that was removed from our rbtree @@ -930,6 +973,8 @@ int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, return 0; fail: + if (cluster_locked) + spin_unlock(&cluster_locked->lock); return -ENOSPC; } @@ -1101,6 +1146,9 @@ int btrfs_wait_cache_io(struct btrfs_root *root, int ret; struct inode *inode = io_ctl->inode; + if (!inode) + return 0; + root = root->fs_info->tree_root; /* Flush the dirty pages in the cache file. */ @@ -1127,11 +1175,16 @@ int btrfs_wait_cache_io(struct btrfs_root *root, btrfs_update_inode(trans, root, inode); if (block_group) { + /* the dirty list is protected by the dirty_bgs_lock */ + spin_lock(&trans->transaction->dirty_bgs_lock); + + /* the disk_cache_state is protected by the block group lock */ spin_lock(&block_group->lock); /* * only mark this as written if we didn't get put back on - * the dirty list while waiting for IO. + * the dirty list while waiting for IO. Otherwise our + * cache state won't be right, and we won't get written again */ if (!ret && list_empty(&block_group->dirty_list)) block_group->disk_cache_state = BTRFS_DC_WRITTEN; @@ -1139,6 +1192,7 @@ int btrfs_wait_cache_io(struct btrfs_root *root, block_group->disk_cache_state = BTRFS_DC_ERROR; spin_unlock(&block_group->lock); + spin_unlock(&trans->transaction->dirty_bgs_lock); io_ctl->inode = NULL; iput(inode); } @@ -1207,9 +1261,11 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, mutex_lock(&ctl->cache_writeout_mutex); /* Write out the extent entries in the free space cache */ + spin_lock(&ctl->tree_lock); ret = write_cache_extent_entries(io_ctl, ctl, block_group, &entries, &bitmaps, &bitmap_list); + spin_unlock(&ctl->tree_lock); if (ret) { mutex_unlock(&ctl->cache_writeout_mutex); goto out_nospc; @@ -1219,6 +1275,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, * Some spaces that are freed in the current transaction are pinned, * they will be added into free space cache after the transaction is * committed, we shouldn't lose them. + * + * If this changes while we are working we'll get added back to + * the dirty list and redo it. No locking needed */ ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries); if (ret) { @@ -1231,7 +1290,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, * locked while doing it because a concurrent trim can be manipulating * or freeing the bitmap. */ + spin_lock(&ctl->tree_lock); ret = write_bitmap_entries(io_ctl, &bitmap_list); + spin_unlock(&ctl->tree_lock); mutex_unlock(&ctl->cache_writeout_mutex); if (ret) goto out_nospc; @@ -1307,12 +1368,6 @@ int btrfs_write_out_cache(struct btrfs_root *root, spin_unlock(&block_group->lock); return 0; } - - if (block_group->delalloc_bytes) { - block_group->disk_cache_state = BTRFS_DC_WRITTEN; - spin_unlock(&block_group->lock); - return 0; - } spin_unlock(&block_group->lock); inode = lookup_free_space_inode(root, block_group, path); diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index c4339863af05d4b6b214d7bc7399fa4e691fff18..a16a029ad3b128dcd7723e6d657f43a66a26ed34 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -62,6 +62,7 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, struct btrfs_block_rsv *rsv); int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, + struct btrfs_block_group_cache *block_group, struct inode *inode); int load_free_space_cache(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 74faea3a516ee710f7b099e4bc0e06eb2c3d001d..f6a596d5a6374f1dbf66279e24ef17aed9ff399d 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -456,7 +456,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root, } if (i_size_read(inode) > 0) { - ret = btrfs_truncate_free_space_cache(root, trans, inode); + ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode); if (ret) { if (ret != -ENOSPC) btrfs_abort_transaction(trans, root, ret); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d83085381bccfa745ee0258b8cab5558afbcd639..840a4eb0f3960087cdbd3492df4a130115d2e887 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3430,7 +3430,9 @@ static int block_use_full_backref(struct reloc_control *rc, } static int delete_block_group_cache(struct btrfs_fs_info *fs_info, - struct inode *inode, u64 ino) + struct btrfs_block_group_cache *block_group, + struct inode *inode, + u64 ino) { struct btrfs_key key; struct btrfs_root *root = fs_info->tree_root; @@ -3463,7 +3465,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, goto out; } - ret = btrfs_truncate_free_space_cache(root, trans, inode); + ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode); btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); @@ -3509,6 +3511,7 @@ static int find_data_references(struct reloc_control *rc, */ if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { ret = delete_block_group_cache(rc->extent_root->fs_info, + rc->block_group, NULL, ref_objectid); if (ret != -ENOENT) return ret; @@ -4223,7 +4226,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) btrfs_free_path(path); if (!IS_ERR(inode)) - ret = delete_block_group_cache(fs_info, inode, 0); + ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0); else ret = PTR_ERR(inode); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 234d6063bbf3af431373f07ac92a422d6ce5cf72..5628e25250c0d2cca318f03d41f4e6d64b4d40e0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -222,6 +222,7 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type) atomic_set(&cur_trans->use_count, 2); cur_trans->have_free_bgs = 0; cur_trans->start_time = get_seconds(); + cur_trans->dirty_bg_run = 0; cur_trans->delayed_refs.href_root = RB_ROOT; atomic_set(&cur_trans->delayed_refs.num_entries, 0); @@ -251,6 +252,8 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type) INIT_LIST_HEAD(&cur_trans->switch_commits); INIT_LIST_HEAD(&cur_trans->pending_ordered); INIT_LIST_HEAD(&cur_trans->dirty_bgs); + INIT_LIST_HEAD(&cur_trans->io_bgs); + mutex_init(&cur_trans->cache_write_mutex); cur_trans->num_dirty_bgs = 0; spin_lock_init(&cur_trans->dirty_bgs_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); @@ -1059,6 +1062,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = root->fs_info; struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; + struct list_head *io_bgs = &trans->transaction->io_bgs; struct list_head *next; struct extent_buffer *eb; int ret; @@ -1112,7 +1116,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, return ret; } - while (!list_empty(dirty_bgs)) { + while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) { ret = btrfs_write_dirty_block_groups(trans, root); if (ret) return ret; @@ -1812,6 +1816,37 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } + if (!cur_trans->dirty_bg_run) { + int run_it = 0; + + /* this mutex is also taken before trying to set + * block groups readonly. We need to make sure + * that nobody has set a block group readonly + * after a extents from that block group have been + * allocated for cache files. btrfs_set_block_group_ro + * will wait for the transaction to commit if it + * finds dirty_bg_run = 1 + * + * The dirty_bg_run flag is also used to make sure only + * one process starts all the block group IO. It wouldn't + * hurt to have more than one go through, but there's no + * real advantage to it either. + */ + mutex_lock(&root->fs_info->ro_block_group_mutex); + if (!cur_trans->dirty_bg_run) { + run_it = 1; + cur_trans->dirty_bg_run = 1; + } + mutex_unlock(&root->fs_info->ro_block_group_mutex); + + if (run_it) + ret = btrfs_start_dirty_block_groups(trans, root); + } + if (ret) { + btrfs_end_transaction(trans, root); + return ret; + } + spin_lock(&root->fs_info->trans_lock); list_splice(&trans->ordered, &cur_trans->pending_ordered); if (cur_trans->state >= TRANS_STATE_COMMIT_START) { @@ -2005,6 +2040,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, assert_qgroups_uptodate(trans); ASSERT(list_empty(&cur_trans->dirty_bgs)); + ASSERT(list_empty(&cur_trans->io_bgs)); update_super_roots(root); btrfs_set_super_log_root(root->fs_info->super_copy, 0); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 4cb0ae264534dada0a0266d3ea05b5975206c0f0..0b24755596bab2fe4c5ba0c60511db33aff51254 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -64,10 +64,19 @@ struct btrfs_transaction { struct list_head pending_ordered; struct list_head switch_commits; struct list_head dirty_bgs; + struct list_head io_bgs; u64 num_dirty_bgs; + + /* + * we need to make sure block group deletion doesn't race with + * free space cache writeout. This mutex keeps them from stomping + * on each other + */ + struct mutex cache_write_mutex; spinlock_t dirty_bgs_lock; struct btrfs_delayed_ref_root delayed_refs; int aborted; + int dirty_bg_run; }; #define __TRANS_FREEZABLE (1U << 0)