diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5474ef14d6e6797c8bdf3699c9d209fb5760498c..2771cc56a622f50cdf834a291aa4f43a86798cb7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,6 +459,25 @@ struct btrfs_block_rsv { unsigned short full; unsigned short type; unsigned short failfast; + + /* + * Qgroup equivalent for @size @reserved + * + * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care + * about things like csum size nor how many tree blocks it will need to + * reserve. + * + * Qgroup cares more about net change of the extent usage. + * + * So for one newly inserted file extent, in worst case it will cause + * leaf split and level increase, nodesize for each file extent is + * already too much. + * + * In short, qgroup_size/reserved is the upper limit of possible needed + * qgroup metadata reservation. + */ + u64 qgroup_rsv_size; + u64 qgroup_rsv_reserved; }; /* @@ -714,6 +733,12 @@ struct btrfs_delayed_root; */ #define BTRFS_FS_EXCL_OP 16 +/* + * To info transaction_kthread we need an immediate commit so it doesn't + * need to wait for commit_interval + */ +#define BTRFS_FS_NEED_ASYNC_COMMIT 17 + struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 06ec8ab6d9ba593cc63dd58b6edea72584116e24..a8d492dbd3e7c100715011a9ddad8a22d82932dd 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -556,6 +556,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, dst_rsv = &fs_info->delayed_block_rsv; num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); + + /* + * Here we migrate space rsv from transaction rsv, since have already + * reserved space when starting a transaction. So no need to reserve + * qgroup space here. + */ ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); if (!ret) { trace_btrfs_space_reservation(fs_info, "delayed_item", @@ -577,7 +583,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, return; rsv = &fs_info->delayed_block_rsv; - btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved); + /* + * Check btrfs_delayed_item_reserve_metadata() to see why we don't need + * to release/reserve qgroup space. + */ trace_btrfs_space_reservation(fs_info, "delayed_item", item->key.objectid, item->bytes_reserved, 0); @@ -602,9 +611,6 @@ static int btrfs_delayed_inode_reserve_metadata( num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); - if (ret < 0) - return ret; /* * btrfs_dirty_inode will update the inode under btrfs_join_transaction * which doesn't reserve space for speed. This is a problem since we @@ -616,6 +622,10 @@ static int btrfs_delayed_inode_reserve_metadata( */ if (!src_rsv || (!trans->bytes_reserved && src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { + ret = btrfs_qgroup_reserve_meta_prealloc(root, + fs_info->nodesize, true); + if (ret < 0) + return ret; ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, BTRFS_RESERVE_NO_FLUSH); /* @@ -634,6 +644,8 @@ static int btrfs_delayed_inode_reserve_metadata( "delayed_inode", btrfs_ino(inode), num_bytes, 1); + } else { + btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize); } return ret; } diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 9e98295de7ce2f24fb009c99102081c49f2c4ba2..e1b0651686f7c4e988766d927c669306560c468d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref, struct btrfs_qgroup_extent_record *qrecord, u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, - int action, int is_data, int *qrecord_inserted_ret, + int action, int is_data, int is_system, + int *qrecord_inserted_ret, int *old_ref_mod, int *new_ref_mod) + { struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_root *delayed_refs; @@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, head_ref->ref_mod = count_mod; head_ref->must_insert_reserved = must_insert_reserved; head_ref->is_data = is_data; + head_ref->is_system = is_system; head_ref->ref_tree = RB_ROOT; INIT_LIST_HEAD(&head_ref->ref_add_list); RB_CLEAR_NODE(&head_ref->href_node); @@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; int qrecord_inserted; + int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID); BUG_ON(extent_op && extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); @@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, */ head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, bytenr, num_bytes, 0, 0, action, 0, - &qrecord_inserted, old_ref_mod, - new_ref_mod); + is_system, &qrecord_inserted, + old_ref_mod, new_ref_mod); add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, level, action); @@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, */ head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, bytenr, num_bytes, ref_root, reserved, - action, 1, &qrecord_inserted, + action, 1, 0, &qrecord_inserted, old_ref_mod, new_ref_mod); add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, @@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); + /* + * extent_ops just modify the flags of an extent and they don't result + * in ref count changes, hence it's safe to pass false/0 for is_system + * argument + */ add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr, num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, - extent_op->is_data, NULL, NULL, NULL); + extent_op->is_data, 0, NULL, NULL, NULL); spin_unlock(&delayed_refs->lock); return 0; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 741869dbc316b7593851355b55f532d217aa22aa..7f00db50bd242d0a29f35c430339950b90588697 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head { */ unsigned int must_insert_reserved:1; unsigned int is_data:1; + unsigned int is_system:1; unsigned int processing:1; }; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ac8b1d21bafd07e00a686d6c15228f6be6aef72..60caa68c3618d5a072f332be97e7db4ea7ba67b0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1824,6 +1824,7 @@ static int transaction_kthread(void *arg) now = get_seconds(); if (cur->state < TRANS_STATE_BLOCKED && + !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) && (now < cur->start_time || now - cur->start_time < fs_info->commit_interval)) { spin_unlock(&fs_info->trans_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 75cfb80d25518692c2b159fee0df477adb71d39a..e2f16b68fcbf4d2a941a9e988b9505820edd280e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, trace_run_delayed_ref_head(fs_info, head, 0); if (head->total_ref_mod < 0) { - struct btrfs_block_group_cache *cache; + struct btrfs_space_info *space_info; + u64 flags; - cache = btrfs_lookup_block_group(fs_info, head->bytenr); - ASSERT(cache); - percpu_counter_add(&cache->space_info->total_bytes_pinned, + if (head->is_data) + flags = BTRFS_BLOCK_GROUP_DATA; + else if (head->is_system) + flags = BTRFS_BLOCK_GROUP_SYSTEM; + else + flags = BTRFS_BLOCK_GROUP_METADATA; + space_info = __find_space_info(fs_info, flags); + ASSERT(space_info); + percpu_counter_add(&space_info->total_bytes_pinned, -head->num_bytes); - btrfs_put_block_group(cache); if (head->is_data) { spin_lock(&delayed_refs->lock); @@ -5559,14 +5565,18 @@ static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, - struct btrfs_block_rsv *dest, u64 num_bytes) + struct btrfs_block_rsv *dest, u64 num_bytes, + u64 *qgroup_to_release_ret) { struct btrfs_space_info *space_info = block_rsv->space_info; + u64 qgroup_to_release = 0; u64 ret; spin_lock(&block_rsv->lock); - if (num_bytes == (u64)-1) + if (num_bytes == (u64)-1) { num_bytes = block_rsv->size; + qgroup_to_release = block_rsv->qgroup_rsv_size; + } block_rsv->size -= num_bytes; if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; @@ -5575,6 +5585,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, } else { num_bytes = 0; } + if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { + qgroup_to_release = block_rsv->qgroup_rsv_reserved - + block_rsv->qgroup_rsv_size; + block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; + } else { + qgroup_to_release = 0; + } spin_unlock(&block_rsv->lock); ret = num_bytes; @@ -5597,6 +5614,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, space_info_add_old_bytes(fs_info, space_info, num_bytes); } + if (qgroup_to_release_ret) + *qgroup_to_release_ret = qgroup_to_release; return ret; } @@ -5738,17 +5757,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, struct btrfs_root *root = inode->root; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 num_bytes = 0; + u64 qgroup_num_bytes = 0; int ret = -ENOSPC; spin_lock(&block_rsv->lock); if (block_rsv->reserved < block_rsv->size) num_bytes = block_rsv->size - block_rsv->reserved; + if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size) + qgroup_num_bytes = block_rsv->qgroup_rsv_size - + block_rsv->qgroup_rsv_reserved; spin_unlock(&block_rsv->lock); if (num_bytes == 0) return 0; - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); + ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true); if (ret) return ret; ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); @@ -5756,7 +5779,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, block_rsv_add_bytes(block_rsv, num_bytes, 0); trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), num_bytes, 1); - } + + /* Don't forget to increase qgroup_rsv_reserved */ + spin_lock(&block_rsv->lock); + block_rsv->qgroup_rsv_reserved += qgroup_num_bytes; + spin_unlock(&block_rsv->lock); + } else + btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); return ret; } @@ -5777,20 +5806,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 released = 0; + u64 qgroup_to_release = 0; /* * Since we statically set the block_rsv->size we just want to say we * are releasing 0 bytes, and then we'll just get the reservation over * the size free'd. */ - released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0); + released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0, + &qgroup_to_release); if (released > 0) trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode), released, 0); if (qgroup_free) - btrfs_qgroup_free_meta_prealloc(inode->root, released); + btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); else - btrfs_qgroup_convert_reserved_meta(inode->root, released); + btrfs_qgroup_convert_reserved_meta(inode->root, + qgroup_to_release); } void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, @@ -5802,7 +5834,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, if (global_rsv == block_rsv || block_rsv->space_info != global_rsv->space_info) global_rsv = NULL; - block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes); + block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL); } static void update_global_block_rsv(struct btrfs_fs_info *fs_info) @@ -5882,7 +5914,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) static void release_global_block_rsv(struct btrfs_fs_info *fs_info) { block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, - (u64)-1); + (u64)-1, NULL); WARN_ON(fs_info->trans_block_rsv.size > 0); WARN_ON(fs_info->trans_block_rsv.reserved > 0); WARN_ON(fs_info->chunk_block_rsv.size > 0); @@ -5906,7 +5938,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) WARN_ON_ONCE(!list_empty(&trans->new_bgs)); block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, - trans->chunk_bytes_reserved); + trans->chunk_bytes_reserved, NULL); trans->chunk_bytes_reserved = 0; } @@ -6011,6 +6043,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, { struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 reserve_size = 0; + u64 qgroup_rsv_size = 0; u64 csum_leaves; unsigned outstanding_extents; @@ -6023,9 +6056,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, inode->csum_bytes); reserve_size += btrfs_calc_trans_metadata_size(fs_info, csum_leaves); + /* + * For qgroup rsv, the calculation is very simple: + * account one nodesize for each outstanding extent + * + * This is overestimating in most cases. + */ + qgroup_rsv_size = outstanding_extents * fs_info->nodesize; spin_lock(&block_rsv->lock); block_rsv->size = reserve_size; + block_rsv->qgroup_rsv_size = qgroup_rsv_size; spin_unlock(&block_rsv->lock); } @@ -8403,7 +8444,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u32 blocksize) { block_rsv_add_bytes(block_rsv, blocksize, 0); - block_rsv_release_bytes(fs_info, block_rsv, NULL, 0); + block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL); } /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0167a9c97c9c2933ccaf8f2456b22a525915f18d..f660ba1e5e58ef30f5c72555d7408b686e507e0e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1748,7 +1748,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, - (ret != 0)); + true); if (ret) { btrfs_drop_pages(pages, num_pages); break; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e064c49c9a9a01a49a9a862bbacd6d16de25594e..d241285a0d2a8dea0ff858f58a5590c18298f2b0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -5905,11 +5906,13 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx) struct dir_entry *entry = addr; char *name = (char *)(entry + 1); - ctx->pos = entry->offset; - if (!dir_emit(ctx, name, entry->name_len, entry->ino, - entry->type)) + ctx->pos = get_unaligned(&entry->offset); + if (!dir_emit(ctx, name, get_unaligned(&entry->name_len), + get_unaligned(&entry->ino), + get_unaligned(&entry->type))) return 1; - addr += sizeof(struct dir_entry) + entry->name_len; + addr += sizeof(struct dir_entry) + + get_unaligned(&entry->name_len); ctx->pos++; } return 0; @@ -5999,14 +6002,15 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) } entry = addr; - entry->name_len = name_len; + put_unaligned(name_len, &entry->name_len); name_ptr = (char *)(entry + 1); read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), name_len); - entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; + put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)], + &entry->type); btrfs_dir_item_key_to_cpu(leaf, di, &location); - entry->ino = location.objectid; - entry->offset = found_key.offset; + put_unaligned(location.objectid, &entry->ino); + put_unaligned(found_key.offset, &entry->offset); entries++; addr += sizeof(struct dir_entry) + name_len; total_len += sizeof(struct dir_entry) + name_len; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 124276bba8cf310074e9b1d1134c6463e3a98853..21a831d3d087418bc2e4db884f4fd6a25e9df4e5 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -189,9 +189,10 @@ void btrfs_print_leaf(struct extent_buffer *l) fs_info = l->fs_info; nr = btrfs_header_nritems(l); - btrfs_info(fs_info, "leaf %llu total ptrs %d free space %d", - btrfs_header_bytenr(l), nr, - btrfs_leaf_free_space(fs_info, l)); + btrfs_info(fs_info, + "leaf %llu gen %llu total ptrs %d free space %d owner %llu", + btrfs_header_bytenr(l), btrfs_header_generation(l), nr, + btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(i); btrfs_item_key_to_cpu(l, &key, i); @@ -325,7 +326,7 @@ void btrfs_print_leaf(struct extent_buffer *l) } } -void btrfs_print_tree(struct extent_buffer *c) +void btrfs_print_tree(struct extent_buffer *c, bool follow) { struct btrfs_fs_info *fs_info; int i; u32 nr; @@ -342,15 +343,19 @@ void btrfs_print_tree(struct extent_buffer *c) return; } btrfs_info(fs_info, - "node %llu level %d total ptrs %d free spc %u", - btrfs_header_bytenr(c), level, nr, - (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr); + "node %llu level %d gen %llu total ptrs %d free spc %u owner %llu", + btrfs_header_bytenr(c), level, btrfs_header_generation(c), + nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr, + btrfs_header_owner(c)); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); - pr_info("\tkey %d (%llu %u %llu) block %llu\n", + pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n", i, key.objectid, key.type, key.offset, - btrfs_node_blockptr(c, i)); + btrfs_node_blockptr(c, i), + btrfs_node_ptr_generation(c, i)); } + if (!follow) + return; for (i = 0; i < nr; i++) { struct btrfs_key first_key; struct extent_buffer *next; @@ -372,7 +377,7 @@ void btrfs_print_tree(struct extent_buffer *c) if (btrfs_header_level(next) != level - 1) BUG(); - btrfs_print_tree(next); + btrfs_print_tree(next, follow); free_extent_buffer(next); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 4a98481688f4398e728886be775a0a2a128d8044..e6bb38fd75ad88b38681055c542c73c0c76ba132 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -7,6 +7,6 @@ #define BTRFS_PRINT_TREE_H void btrfs_print_leaf(struct extent_buffer *l); -void btrfs_print_tree(struct extent_buffer *c); +void btrfs_print_tree(struct extent_buffer *c, bool follow); #endif diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 09c7e4fd550f4d10b8febcc745e0ba4138d9f35b..9fb758d5077a8441e994df53652fe32f771c2a49 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "ctree.h" #include "transaction.h" @@ -2375,8 +2376,21 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, return ret; } -static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) +/* + * Two limits to commit transaction in advance. + * + * For RATIO, it will be 1/RATIO of the remaining limit + * (excluding data and prealloc meta) as threshold. + * For SIZE, it will be in byte unit as threshold. + */ +#define QGROUP_PERTRANS_RATIO 32 +#define QGROUP_PERTRANS_SIZE SZ_32M +static bool qgroup_check_limits(struct btrfs_fs_info *fs_info, + const struct btrfs_qgroup *qg, u64 num_bytes) { + u64 limit; + u64 threshold; + if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) return false; @@ -2385,6 +2399,31 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) return false; + /* + * Even if we passed the check, it's better to check if reservation + * for meta_pertrans is pushing us near limit. + * If there is too much pertrans reservation or it's near the limit, + * let's try commit transaction to free some, using transaction_kthread + */ + if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER | + BTRFS_QGROUP_LIMIT_MAX_EXCL))) { + if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) + limit = qg->max_excl; + else + limit = qg->max_rfer; + threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] - + qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) / + QGROUP_PERTRANS_RATIO; + threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE); + + /* + * Use transaction_kthread to commit transaction, so we no + * longer need to bother nested transaction nor lock context. + */ + if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold) + btrfs_commit_transaction_locksafe(fs_info); + } + return true; } @@ -2434,7 +2473,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, qg = unode_aux_to_qgroup(unode); - if (enforce && !qgroup_check_limits(qg, num_bytes)) { + if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) { ret = -EDQUOT; goto out; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 63fdcab64b016061c9d42cc86f47e8079f3b4634..c944b4769e3c7c5a966e421b5c9c9b4635cab2ac 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2267,6 +2267,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ cur_trans->state = TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); + clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); spin_lock(&fs_info->trans_lock); list_del_init(&cur_trans->list); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c88fccd80bc5c940228a00cc2ce1f88e3d1c9806..d8c0826bc2c7e9aee6ff263884e4621d492f9c1f 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -199,6 +199,20 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans); int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, int wait_for_unblock); + +/* + * Try to commit transaction asynchronously, so this is safe to call + * even holding a spinlock. + * + * It's done by informing transaction_kthread to commit transaction without + * waiting for commit interval. + */ +static inline void btrfs_commit_transaction_locksafe( + struct btrfs_fs_info *fs_info) +{ + set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); + wake_up_process(fs_info->transaction_kthread); +} int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); int btrfs_should_end_transaction(struct btrfs_trans_handle *trans); void btrfs_throttle(struct btrfs_fs_info *fs_info);