diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a4e2df66bdbb74c2fbecdc1084b63240f63bcbe1..9321438c5eefec21a193ede271bd6a130d5fa774 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o + transaction.o bit-radix.o inode.o file.o tree-defrag.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7a08491e208e1b788a550f99f9caae0a7a59f372..c7e47e77723ff413c5350a7949fdeb02500b3a4b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -65,44 +65,44 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root +static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf, struct buffer_head *parent, int parent_slot, struct buffer_head - **cow_ret) + **cow_ret, u64 search_start, u64 empty_size) { struct buffer_head *cow; struct btrfs_node *cow_node; - int ret; + int ret = 0; + int different_trans = 0; + WARN_ON(root->ref_cows && trans->transid != root->last_trans); WARN_ON(!buffer_uptodate(buf)); - if (trans->transaction != root->fs_info->running_transaction) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->running_transaction->transid); - WARN_ON(1); - } - if (trans->transid != root->fs_info->generation) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->generation); - WARN_ON(1); - } - if (btrfs_header_generation(btrfs_buffer_header(buf)) == - trans->transid) { - *cow_ret = buf; - return 0; - } - cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); + cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); + cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); + memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); - ret = btrfs_inc_ref(trans, root, buf); - if (ret) - return ret; + + WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > + trans->transid); + if (btrfs_header_generation(btrfs_buffer_header(buf)) != + trans->transid) { + different_trans = 1; + ret = btrfs_inc_ref(trans, root, buf); + if (ret) + return ret; + } else { + WARN_ON(!root->ref_cows); + clean_tree_block(trans, root, buf); + } + if (buf == root->node) { root->node = cow; get_bh(cow); @@ -114,6 +114,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, bh_blocknr(cow)); btrfs_mark_buffer_dirty(parent); + WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != + trans->transid); btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); @@ -122,6 +124,115 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head + **cow_ret) +{ + u64 search_start; + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } + if (btrfs_header_generation(btrfs_buffer_header(buf)) == + trans->transid) { + *cow_ret = buf; + return 0; + } + + search_start = bh_blocknr(buf) & ~((u64)65535); + return __btrfs_cow_block(trans, root, buf, parent, + parent_slot, cow_ret, search_start, 0); +} + +static int close_blocks(u64 blocknr, u64 other) +{ + if (blocknr < other && other - blocknr < 8) + return 1; + if (blocknr > other && blocknr - other < 8) + return 1; + return 0; +} + +int btrfs_realloc_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *parent, + int cache_only) +{ + struct btrfs_node *parent_node; + struct buffer_head *cur_bh; + struct buffer_head *tmp_bh; + u64 blocknr; + u64 search_start = 0; + u64 other; + u32 parent_nritems; + int start_slot; + int end_slot; + int i; + int err = 0; + + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } + parent_node = btrfs_buffer_node(parent); + parent_nritems = btrfs_header_nritems(&parent_node->header); + + start_slot = 0; + end_slot = parent_nritems; + + if (parent_nritems == 1) + return 0; + + for (i = start_slot; i < end_slot; i++) { + int close = 1; + blocknr = btrfs_node_blockptr(parent_node, i); + if (i > 0) { + other = btrfs_node_blockptr(parent_node, i - 1); + close = close_blocks(blocknr, other); + } + if (close && i < end_slot - 1) { + other = btrfs_node_blockptr(parent_node, i + 1); + close = close_blocks(blocknr, other); + } + if (close) + continue; + + cur_bh = btrfs_find_tree_block(root, blocknr); + if (!cur_bh || !buffer_uptodate(cur_bh) || + buffer_locked(cur_bh)) { + if (cache_only) { + brelse(cur_bh); + continue; + } + brelse(cur_bh); + cur_bh = read_tree_block(root, blocknr); + } + if (search_start == 0) { + search_start = bh_blocknr(cur_bh) & ~((u64)65535); + } + err = __btrfs_cow_block(trans, root, cur_bh, parent, i, + &tmp_bh, search_start, + min(8, end_slot - i)); + if (err) + break; + search_start = bh_blocknr(tmp_bh); + brelse(tmp_bh); + } + return err; +} + /* * The leaf data grows from end-to-front in the node. * this returns the address of the start of the last item, @@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, parent_slot = path->slots[level + 1]; parent_key = &parent->ptrs[parent_slot].key; + BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != @@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, * readahead one full node of leaves */ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, - int slot) + int level, int slot) { struct btrfs_node *node; int i; @@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, unsigned long gang[8]; struct buffer_head *bh; - if (!path->nodes[1]) + if (level == 0) + return; + + if (!path->nodes[level]) return; - node = btrfs_buffer_node(path->nodes[1]); + node = btrfs_buffer_node(path->nodes[level]); search = btrfs_node_blockptr(node, slot); bh = btrfs_find_tree_block(root, search); if (bh) { @@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; i < ret; i++) { blocknr = gang[i]; clear_radix_bit(&found, blocknr); - if (nread > 64) + if (nread > 32) continue; if (direction > 0 && cluster_start <= blocknr && cluster_start + 8 > blocknr) { @@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *b; struct buffer_head *cow_buf; struct btrfs_node *c; - struct btrfs_root_item *root_item = &root->root_item; u64 blocknr; int slot; int ret; @@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int should_reada = p->reada; u8 lowest_level = 0; - if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { - lowest_level = root_item->drop_level; - WARN_ON(ins_len || cow); - } - + lowest_level = p->lowest_level; + WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: @@ -798,8 +909,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (level == lowest_level) break; blocknr = btrfs_node_blockptr(c, slot); - if (level == 1 && should_reada) - reada_for_search(root, p, slot); + if (should_reada) + reada_for_search(root, p, level, slot); b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { @@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); + t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); if (IS_ERR(t)) return PTR_ERR(t); c = btrfs_buffer_node(t); @@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); + split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); if (IS_ERR(split_buffer)) return PTR_ERR(split_buffer); @@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); if (IS_ERR(right_buffer)) return PTR_ERR(right_buffer); @@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); if (IS_ERR(right_buffer)) return PTR_ERR(right_buffer); @@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); - if (level == 1 && path->reada) - reada_for_search(root, path, slot); + if (path->reada) + reada_for_search(root, path, level, slot); next = read_tree_block(root, blocknr); break; } @@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = 0; if (!level) break; - if (level == 1 && path->reada) - reada_for_search(root, path, slot); + if (path->reada) + reada_for_search(root, path, level, slot); next = read_tree_block(root, btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c5a18d5d7f7c244b73795e7c6567529f014ff654..42aa20301bc9b27e571413788cd6fc5d4ef5b469 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -178,6 +178,7 @@ struct btrfs_path { struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; int reada; + int lowest_level; }; /* @@ -338,6 +339,9 @@ struct btrfs_root { u64 highest_inode; u64 last_inode_alloc; int ref_cows; + struct btrfs_key defrag_progress; + int defrag_running; + int defrag_level; }; /* the lower bits in the key flags defines the item type */ @@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint); + struct btrfs_root *root, u64 hint, + u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 search_start, + u64 num_blocks, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); @@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ +int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head + **cow_ret); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, @@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); +int btrfs_realloc_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *parent, + int cache_only); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); @@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 *hint_block); +/* tree-defrag.c */ +int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int cache_only); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60db85bc4ba8d70a11267f22453a484bd9758bb4..c948416fea3aca8f9673ef98eca60e3c85a77eb1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { WARN_ON(atomic_read(&buf->b_count) == 0); + lock_buffer(buf); clear_buffer_dirty(buf); + unlock_buffer(buf); return 0; } @@ -294,6 +296,9 @@ static int __setup_root(int blocksize, root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); + memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + root->defrag_running = 0; + root->defrag_level = 0; root->root_key.objectid = objectid; return 0; } @@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh) { struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + WARN_ON(!atomic_read(&bh->b_count)); + if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", (unsigned long long)bh->b_blocknr, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5d4d5d8db8efa3f7cc3a9e23bc98affee85aa215..26b8d340649128d232ab3ba41e7a2f76a0905a79 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -23,7 +23,8 @@ #include "transaction.h" static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, + *orig_root, u64 num_blocks, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_block, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data); @@ -379,7 +380,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, + ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); @@ -533,7 +534,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, + ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); /* FIXME, set bit to recalc cache groups on next mount */ if (ret) @@ -708,6 +709,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, static int try_remove_page(struct address_space *mapping, unsigned long index) { int ret; + return 0; ret = invalidate_mapping_pages(mapping, index, index); return ret; } @@ -866,7 +868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); + ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); return ret; @@ -983,8 +985,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * Any available blocks before search_start are skipped. */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, u64 hint_block, + *orig_root, u64 num_blocks, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_block, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data) { @@ -1042,6 +1044,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root data, 1); } + total_needed += empty_size; path = btrfs_alloc_path(); check_failed: @@ -1157,9 +1160,11 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root goto error; } search_start = orig_search_start; - if (wrapped) + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; full_scan = 1; - else + } else wrapped = 1; goto new_group; } @@ -1238,9 +1243,11 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = -ENOSPC; goto error; } - if (wrapped) + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; full_scan = 1; - else + } else wrapped = 1; } block_group = btrfs_lookup_block_group(info, search_start); @@ -1264,7 +1271,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 hint_block, + u64 num_blocks, u64 empty_size, u64 hint_block, u64 search_end, struct btrfs_key *ins, int data) { int ret; @@ -1303,7 +1310,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * in the correct block group. */ if (data) { - ret = find_free_extent(trans, root, 0, 0, + ret = find_free_extent(trans, root, 0, 0, 0, search_end, 0, &prealloc_key, 0, 0, 0); BUG_ON(ret); if (ret) @@ -1313,8 +1320,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } /* do the real allocation */ - ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, hint_block, ins, + ret = find_free_extent(trans, root, num_blocks, empty_size, + search_start, search_end, hint_block, ins, exclude_start, exclude_nr, data); BUG_ON(ret); if (ret) @@ -1333,7 +1340,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, exclude_start = ins->objectid; exclude_nr = ins->offset; hint_block = exclude_start + exclude_nr; - ret = find_free_extent(trans, root, 0, search_start, + ret = find_free_extent(trans, root, 0, 0, search_start, search_end, hint_block, &prealloc_key, exclude_start, exclude_nr, 0); @@ -1368,14 +1375,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint) + struct btrfs_root *root, u64 hint, + u64 empty_size) { struct btrfs_key ins; int ret; struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins, 0); + 1, empty_size, hint, + (unsigned long)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); @@ -1385,6 +1394,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, 1, 0); return ERR_PTR(-ENOMEM); } + WARN_ON(buffer_dirty(buf)); set_buffer_uptodate(buf); set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); @@ -1591,13 +1601,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; struct btrfs_disk_key *found_key; struct btrfs_node *node; + btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + level = root_item->drop_level; + path->lowest_level = level; wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { + if (wret < 0) { ret = wret; goto out; } - level = root_item->drop_level; node = btrfs_buffer_node(path->nodes[level]); found_key = &node->ptrs[path->slots[level]].key; WARN_ON(memcmp(found_key, &root_item->drop_progress, @@ -1617,8 +1629,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; num_walks++; if (num_walks > 10) { - struct btrfs_key key; - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); ret = -EAGAIN; get_bh(root->node); break; @@ -1627,6 +1637,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { btrfs_block_release(root, path->nodes[i]); + path->nodes[i] = 0; } } out: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1fe38fe841502d3a1e40394b6f559f186759968f..00b118a2db6925ba0810a461bd484dfa7a855391 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root, if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, hint_block, (u64)-1, + num_blocks, 0, hint_block, (u64)-1, &ins, 1); if (err) goto failed_truncate; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3889032fc449762ec2cfa11307390b591a774919..12aa043b9f6f2e7fad6eaeb64dde8ae6d683d5f4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -554,7 +554,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) &alloc_hint); if (ret) goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) goto out; @@ -1360,7 +1360,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (create & BTRFS_GET_BLOCK_CREATE) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, inode->i_ino, - 1, alloc_hint, (u64)-1, + 1, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { err = ret; @@ -1998,7 +1998,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0); + subvol = btrfs_alloc_free_block(trans, root, 0, 0); if (IS_ERR(subvol)) return PTR_ERR(subvol); leaf = btrfs_buffer_leaf(subvol); @@ -2159,7 +2159,9 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; + struct btrfs_trans_handle *trans; int ret = 0; + int err; struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; @@ -2196,6 +2198,31 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int else ret = create_snapshot(root, vol_args.name, namelen); break; + + case BTRFS_IOC_DEFRAG: + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + while (1) { + root->defrag_running = 1; + err = btrfs_defrag_leaves(trans, root, 0); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + btrfs_btree_balance_dirty(root); + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (err != -EAGAIN) + break; + } + root->defrag_running = 0; + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + ret = 0; + break; default: return -ENOTTY; } diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 23bed48c51a7c28bcf5cbec03e7524bb4f6f4e71..8bc47dec2865eadfcfe36721c64b0ea59e99de68 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -28,6 +28,6 @@ struct btrfs_ioctl_vol_args { #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 498626470a049d57fa9cd269bb9e697ea852e8b4..338a7199363b12875868414a7336c1f25bd7449d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -29,6 +29,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; static struct workqueue_struct *trans_wq; #define BTRFS_ROOT_TRANS_TAG 0 +#define BTRFS_ROOT_DEFRAG_TAG 1 static void put_transaction(struct btrfs_transaction *transaction) { @@ -69,35 +70,41 @@ static int join_transaction(struct btrfs_root *root) return 0; } +static int record_root_in_trans(struct btrfs_root *root) +{ + u64 running_trans_id = root->fs_info->running_transaction->transid; + if (root->ref_cows && root->last_trans < running_trans_id) { + WARN_ON(root == root->fs_info->extent_root); + if (root->root_item.refs != 0) { + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + root->commit_root = root->node; + get_bh(root->node); + } else { + WARN_ON(1); + } + root->last_trans = running_trans_id; + } + return 0; +} + struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; - u64 running_trans_id; mutex_lock(&root->fs_info->trans_mutex); ret = join_transaction(root); BUG_ON(ret); - running_trans_id = root->fs_info->running_transaction->transid; - if (root != root->fs_info->tree_root && root->last_trans < - running_trans_id) { - WARN_ON(root == root->fs_info->extent_root); - WARN_ON(root->ref_cows != 1); - if (root->root_item.refs != 0) { - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_TRANS_TAG); - root->commit_root = root->node; - get_bh(root->node); - } else { - WARN_ON(1); - } - } - root->last_trans = running_trans_id; - h->transid = running_trans_id; + record_root_in_trans(root); + h->transid = root->fs_info->running_transaction->transid; h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -155,6 +162,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, gang[i]); if (!page) continue; + if (PageWriteback(page)) { + if (PageDirty(page)) + wait_on_page_writeback(page); + else { + unlock_page(page); + page_cache_release(page); + continue; + } + } err = write_one_page(page, 0); if (err) werr = err; @@ -299,6 +315,58 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) +{ + struct btrfs_root *gang[1]; + struct btrfs_root *root; + struct btrfs_root *tree_root = info->tree_root; + struct btrfs_trans_handle *trans; + int i; + int ret; + int err = 0; + u64 last = 0; + + trans = btrfs_start_transaction(tree_root, 1); + while(1) { + ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, + (void **)gang, last, + ARRAY_SIZE(gang), + BTRFS_ROOT_DEFRAG_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + root = gang[i]; + last = root->root_key.objectid + 1; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + if (root->defrag_running) + continue; + + while (1) { + mutex_lock(&root->fs_info->trans_mutex); + record_root_in_trans(root); + mutex_unlock(&root->fs_info->trans_mutex); + + root->defrag_running = 1; + err = btrfs_defrag_leaves(trans, root, 1); + btrfs_end_transaction(trans, tree_root); + mutex_unlock(&info->fs_mutex); + + btrfs_btree_balance_dirty(root); + + mutex_lock(&info->fs_mutex); + trans = btrfs_start_transaction(tree_root, 1); + if (err != -EAGAIN) + break; + } + root->defrag_running = 0; + } + } + btrfs_end_transaction(trans, tree_root); + return err; +} + static int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { @@ -475,6 +543,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) goto out; } mutex_unlock(&root->fs_info->trans_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); out: diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ebf44f3e1118c0477b8311b23342885540fa79b9..8b2714e65562985ee64f01fb11c11e5e4eb8ea51 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -70,5 +70,6 @@ void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); +int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c new file mode 100644 index 0000000000000000000000000000000000000000..15d0a486fb593a706f3d4e4d41a85c8d5f34562b --- /dev/null +++ b/fs/btrfs/tree-defrag.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" + +static void reada_defrag(struct btrfs_root *root, + struct btrfs_node *node) +{ + int i; + u32 nritems; + u64 blocknr; + int ret; + + nritems = btrfs_header_nritems(&node->header); + for (i = 0; i < nritems; i++) { + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + +static int defrag_walk_down(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + int cache_only) +{ + struct buffer_head *next; + struct buffer_head *cur; + u64 blocknr; + int ret = 0; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + while(*level > 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + + if (!cache_only && *level > 1 && path->slots[*level] == 0) + reada_defrag(root, btrfs_buffer_node(cur)); + + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + WARN_ON(1); + + if (path->slots[*level] >= + btrfs_header_nritems(btrfs_buffer_header(cur))) + break; + + if (*level == 1) { + ret = btrfs_realloc_node(trans, root, + path->nodes[*level], + cache_only); + break; + } + blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), + path->slots[*level]); + + if (cache_only) { + next = btrfs_find_tree_block(root, blocknr); + if (!next || !buffer_uptodate(next) || + buffer_locked(next)) { + brelse(next); + path->slots[*level]++; + continue; + } + } else { + next = read_tree_block(root, blocknr); + } + ret = btrfs_cow_block(trans, root, next, path->nodes[*level], + path->slots[*level], &next); + BUG_ON(ret); + ret = btrfs_realloc_node(trans, root, next, cache_only); + BUG_ON(ret); + WARN_ON(*level <= 0); + if (path->nodes[*level-1]) + btrfs_block_release(root, path->nodes[*level-1]); + path->nodes[*level-1] = next; + *level = btrfs_header_level(btrfs_buffer_header(next)); + path->slots[*level] = 0; + } + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + btrfs_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + WARN_ON(ret); + return 0; +} + +static int defrag_walk_up(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + int cache_only) +{ + int i; + int slot; + struct btrfs_node *node; + + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < btrfs_header_nritems( + btrfs_buffer_header(path->nodes[i])) - 1) { + path->slots[i]++; + *level = i; + node = btrfs_buffer_node(path->nodes[i]); + WARN_ON(i == 0); + btrfs_disk_key_to_cpu(&root->defrag_progress, + &node->ptrs[path->slots[i]].key); + root->defrag_level = i; + return 0; + } else { + btrfs_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int cache_only) +{ + struct btrfs_path *path = NULL; + struct buffer_head *tmp; + int ret = 0; + int wret; + int level; + int orig_level; + int i; + int num_runs = 0; + + if (root->ref_cows == 0) { + goto out; + } + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + level = btrfs_header_level(btrfs_buffer_header(root->node)); + orig_level = level; + if (level == 0) { + goto out; + } + if (root->defrag_progress.objectid == 0) { + get_bh(root->node); + ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); + BUG_ON(ret); + ret = btrfs_realloc_node(trans, root, root->node, cache_only); + BUG_ON(ret); + path->nodes[level] = root->node; + path->slots[level] = 0; + } else { + level = root->defrag_level; + path->lowest_level = level; + wret = btrfs_search_slot(trans, root, &root->defrag_progress, + path, 0, 1); + + if (wret < 0) { + ret = wret; + goto out; + } + while(level > 0 && !path->nodes[level]) + level--; + if (!path->nodes[level]) { + ret = 0; + goto out; + } + } + + while(1) { + wret = defrag_walk_down(trans, root, path, &level, cache_only); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + + wret = defrag_walk_up(trans, root, path, &level, cache_only); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + if (num_runs++ > 8) { + ret = -EAGAIN; + break; + } + } + for (i = 0; i <= orig_level; i++) { + if (path->nodes[i]) { + btrfs_block_release(root, path->nodes[i]); + path->nodes[i] = 0; + } + } +out: + if (path) + btrfs_free_path(path); + if (ret != -EAGAIN) { + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + } + return ret; +}