提交 e6dcd2dc 编写于 作者: C Chris Mason

Btrfs: New data=ordered implementation

The old data=ordered code would force commit to wait until
all the data extents from the transaction were fully on disk.  This
introduced large latencies into the commit and stalled new writers
in the transaction for a long time.

The new code changes the way data allocations and extents work:

* When delayed allocation is filled, data extents are reserved, and
  the extent bit EXTENT_ORDERED is set on the entire range of the extent.
  A struct btrfs_ordered_extent is allocated an inserted into a per-inode
  rbtree to track the pending extents.

* As each page is written EXTENT_ORDERED is cleared on the bytes corresponding
  to that page.

* When all of the bytes corresponding to a single struct btrfs_ordered_extent
  are written, The previously reserved extent is inserted into the FS
  btree and into the extent allocation trees.  The checksums for the file
  data are also updated.
Signed-off-by: NChris Mason <chris.mason@oracle.com>
上级 77a41afb
......@@ -21,6 +21,7 @@
#include "extent_map.h"
#include "extent_io.h"
#include "ordered-data.h"
/* in memory btrfs inode */
struct btrfs_inode {
......@@ -32,9 +33,8 @@ struct btrfs_inode {
struct extent_io_tree io_failure_tree;
struct mutex csum_mutex;
struct inode vfs_inode;
atomic_t ordered_writeback;
struct btrfs_ordered_inode_tree ordered_tree;
u64 ordered_trans;
/*
* transid of the trans_handle that last modified this inode
*/
......
......@@ -25,6 +25,7 @@
#include <linux/fs.h>
#include <linux/completion.h>
#include <linux/backing-dev.h>
#include <linux/wait.h>
#include <asm/kmap_types.h>
#include "bit-radix.h"
#include "extent_io.h"
......@@ -37,6 +38,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_transaction_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
struct btrfs_ordered_sum;
#define BTRFS_MAGIC "_B5RfS_M"
......@@ -510,6 +512,7 @@ struct btrfs_fs_info {
u64 max_inline;
u64 alloc_start;
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
struct btrfs_super_block super_copy;
struct btrfs_super_block super_for_commit;
struct block_device *__bdev;
......@@ -541,6 +544,7 @@ struct btrfs_fs_info {
*/
struct btrfs_workers workers;
struct btrfs_workers endio_workers;
struct btrfs_workers endio_write_workers;
struct btrfs_workers submit_workers;
struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread;
......@@ -1384,6 +1388,17 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
u64 owner, u64 owner_offset,
u64 empty_size, u64 hint_byte,
u64 search_end, struct btrfs_key *ins, u64 data);
int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 ref_generation,
u64 owner, u64 owner_offset,
struct btrfs_key *ins);
int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
u64 search_end, struct btrfs_key *ins,
u64 data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf);
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
......@@ -1556,9 +1571,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
u64 bytenr, int mod);
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
struct bio *bio, char *sums);
struct btrfs_ordered_sum *sums);
int btrfs_csum_one_bio(struct btrfs_root *root,
struct bio *bio, char **sums_ret);
struct bio *bio, struct btrfs_ordered_sum **sums_ret);
struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
......
......@@ -407,7 +407,11 @@ static int end_workqueue_bio(struct bio *bio,
end_io_wq->error = err;
end_io_wq->work.func = end_workqueue_fn;
end_io_wq->work.flags = 0;
btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
if (bio->bi_rw & (1 << BIO_RW))
btrfs_queue_worker(&fs_info->endio_write_workers,
&end_io_wq->work);
else
btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
return 0;
......@@ -1286,6 +1290,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
init_waitqueue_head(&fs_info->transaction_throttle);
#if 0
ret = add_hasher(fs_info, "crc32c");
......@@ -1325,9 +1330,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
btrfs_init_workers(&fs_info->endio_write_workers,
fs_info->thread_pool_size);
btrfs_start_workers(&fs_info->workers, 1);
btrfs_start_workers(&fs_info->submit_workers, 1);
btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
btrfs_start_workers(&fs_info->endio_write_workers,
fs_info->thread_pool_size);
err = -EINVAL;
if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) {
......@@ -1447,6 +1456,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
fail_iput:
iput(fs_info->btree_inode);
......@@ -1702,6 +1712,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
iput(fs_info->btree_inode);
......
......@@ -1895,36 +1895,17 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
return ret;
}
/*
* finds a free extent and does all the dirty work required for allocation
* returns the key for the extent through ins, and a tree buffer for
* the first block of the extent through buf.
*
* returns 0 if everything worked, non-zero otherwise.
*/
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 root_objectid, u64 ref_generation,
u64 owner, u64 owner_offset,
u64 empty_size, u64 hint_byte,
u64 search_end, struct btrfs_key *ins, u64 data)
static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
u64 search_end, struct btrfs_key *ins,
u64 data)
{
int ret;
int pending_ret;
u64 super_used;
u64 root_used;
u64 search_start = 0;
u64 alloc_profile;
u32 sizes[2];
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_root *extent_root = info->extent_root;
struct btrfs_extent_item *extent_item;
struct btrfs_extent_ref *ref;
struct btrfs_path *path;
struct btrfs_key keys[2];
maybe_lock_mutex(root);
if (data) {
alloc_profile = info->avail_data_alloc_bits &
......@@ -1974,11 +1955,48 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
}
if (ret) {
printk("allocation failed flags %Lu\n", data);
}
if (ret) {
BUG();
goto out;
}
clear_extent_dirty(&root->fs_info->free_space_cache,
ins->objectid, ins->objectid + ins->offset - 1,
GFP_NOFS);
return 0;
}
int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
u64 search_end, struct btrfs_key *ins,
u64 data)
{
int ret;
maybe_lock_mutex(root);
ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
empty_size, hint_byte, search_end, ins,
data);
maybe_unlock_mutex(root);
return ret;
}
static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 ref_generation,
u64 owner, u64 owner_offset,
struct btrfs_key *ins)
{
int ret;
int pending_ret;
u64 super_used;
u64 root_used;
u64 num_bytes = ins->offset;
u32 sizes[2];
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_root *extent_root = info->extent_root;
struct btrfs_extent_item *extent_item;
struct btrfs_extent_ref *ref;
struct btrfs_path *path;
struct btrfs_key keys[2];
/* block accounting for super block */
spin_lock_irq(&info->delalloc_lock);
......@@ -1990,10 +2008,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
root_used = btrfs_root_used(&root->root_item);
btrfs_set_root_used(&root->root_item, root_used + num_bytes);
clear_extent_dirty(&root->fs_info->free_space_cache,
ins->objectid, ins->objectid + ins->offset - 1,
GFP_NOFS);
if (root == extent_root) {
set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
ins->objectid + ins->offset - 1,
......@@ -2001,10 +2015,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
goto update_block;
}
WARN_ON(trans->alloc_exclude_nr);
trans->alloc_exclude_start = ins->objectid;
trans->alloc_exclude_nr = ins->offset;
memcpy(&keys[0], ins, sizeof(*ins));
keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
owner, owner_offset);
......@@ -2054,6 +2064,51 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
BUG();
}
out:
return ret;
}
int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 ref_generation,
u64 owner, u64 owner_offset,
struct btrfs_key *ins)
{
int ret;
maybe_lock_mutex(root);
ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
ref_generation, owner,
owner_offset, ins);
maybe_unlock_mutex(root);
return ret;
}
/*
* finds a free extent and does all the dirty work required for allocation
* returns the key for the extent through ins, and a tree buffer for
* the first block of the extent through buf.
*
* returns 0 if everything worked, non-zero otherwise.
*/
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 root_objectid, u64 ref_generation,
u64 owner, u64 owner_offset,
u64 empty_size, u64 hint_byte,
u64 search_end, struct btrfs_key *ins, u64 data)
{
int ret;
maybe_lock_mutex(root);
ret = __btrfs_reserve_extent(trans, root, num_bytes,
min_alloc_size, empty_size, hint_byte,
search_end, ins, data);
BUG_ON(ret);
ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
ref_generation, owner,
owner_offset, ins);
BUG_ON(ret);
maybe_unlock_mutex(root);
return ret;
}
......@@ -2288,8 +2343,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
mutex_lock(&root->fs_info->alloc_mutex);
/* we've dropped the lock, double check */
ret = drop_snap_lookup_refcount(root, bytenr,
blocksize, &refs);
ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
&refs);
BUG_ON(ret);
if (refs != 1) {
parent = path->nodes[*level];
......@@ -2584,7 +2639,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
kfree(ra);
trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
if (trans) {
btrfs_add_ordered_inode(inode);
btrfs_end_transaction(trans, BTRFS_I(inode)->root);
mark_inode_dirty(inode);
}
......
......@@ -793,6 +793,13 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
}
EXPORT_SYMBOL(set_extent_dirty);
int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
}
EXPORT_SYMBOL(set_extent_ordered);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
......@@ -812,8 +819,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
mask);
EXTENT_DELALLOC | EXTENT_DIRTY,
0, NULL, mask);
}
EXPORT_SYMBOL(set_extent_delalloc);
......@@ -825,6 +832,13 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
}
EXPORT_SYMBOL(clear_extent_dirty);
int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
}
EXPORT_SYMBOL(clear_extent_ordered);
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
......@@ -1395,10 +1409,9 @@ static int end_bio_extent_writepage(struct bio *bio,
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
if (tree->ops && tree->ops->writepage_end_io_hook) {
ret = tree->ops->writepage_end_io_hook(page, start,
end, state);
end, state, uptodate);
if (ret)
uptodate = 0;
}
......@@ -1868,9 +1881,14 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
unlock_extent(tree, cur, end, GFP_NOFS);
break;
}
extent_offset = cur - em->start;
if (extent_map_end(em) <= cur) {
printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
}
BUG_ON(extent_map_end(em) <= cur);
if (end < cur) {
printk("2bad mapping end %Lu cur %Lu\n", end, cur);
}
BUG_ON(end < cur);
iosize = min(extent_map_end(em) - cur, end - cur + 1);
......@@ -1976,6 +1994,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 last_byte = i_size_read(inode);
u64 block_start;
u64 iosize;
u64 unlock_start;
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
......@@ -1988,7 +2007,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 nr_delalloc;
u64 delalloc_end;
WARN_ON(!PageLocked(page));
page_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index ||
......@@ -2030,6 +2048,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
delalloc_start = delalloc_end + 1;
}
lock_extent(tree, start, page_end, GFP_NOFS);
unlock_start = start;
end = page_end;
if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
......@@ -2038,6 +2057,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (last_byte <= start) {
clear_extent_dirty(tree, start, page_end, GFP_NOFS);
unlock_extent(tree, start, page_end, GFP_NOFS);
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start,
page_end, NULL, 1);
unlock_start = page_end + 1;
goto done;
}
......@@ -2047,6 +2071,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
while (cur <= end) {
if (cur >= last_byte) {
clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur,
page_end, NULL, 1);
unlock_start = page_end + 1;
break;
}
em = epd->get_extent(inode, page, page_offset, cur,
......@@ -2071,8 +2100,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
block_start == EXTENT_MAP_INLINE) {
clear_extent_dirty(tree, cur,
cur + iosize - 1, GFP_NOFS);
unlock_extent(tree, unlock_start, cur + iosize -1,
GFP_NOFS);
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur,
cur + iosize - 1,
NULL, 1);
cur = cur + iosize;
page_offset += iosize;
unlock_start = cur;
continue;
}
......@@ -2119,7 +2156,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
set_page_writeback(page);
end_page_writeback(page);
}
unlock_extent(tree, start, page_end, GFP_NOFS);
if (unlock_start <= page_end)
unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
unlock_page(page);
return 0;
}
......
......@@ -13,6 +13,8 @@
#define EXTENT_DEFRAG (1 << 6)
#define EXTENT_DEFRAG_DONE (1 << 7)
#define EXTENT_BUFFER_FILLED (1 << 8)
#define EXTENT_ORDERED (1 << 9)
#define EXTENT_ORDERED_METADATA (1 << 10)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
/*
......@@ -42,7 +44,7 @@ struct extent_io_ops {
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state);
struct extent_state *state, int uptodate);
int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
unsigned long old, unsigned long bits);
int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end,
......@@ -131,6 +133,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int filled);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int wake, int delete, gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
......@@ -141,8 +145,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask);
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, int bits);
struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
......@@ -209,6 +219,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
unsigned long start, unsigned long len);
int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
struct extent_buffer *eb);
int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end);
int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
int clear_extent_buffer_dirty(struct extent_io_tree *tree,
struct extent_buffer *eb);
int set_extent_buffer_dirty(struct extent_io_tree *tree,
......
......@@ -206,10 +206,11 @@ int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *merge = NULL;
struct rb_node *rb;
BUG_ON(spin_trylock(&tree->lock));
rb = tree_insert(&tree->map, em->start, &em->rb_node);
if (rb) {
merge = rb_entry(rb, struct extent_map, rb_node);
ret = -EEXIST;
free_extent_map(merge);
goto out;
}
atomic_inc(&em->refs);
......@@ -268,6 +269,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
struct rb_node *next = NULL;
u64 end = range_end(start, len);
BUG_ON(spin_trylock(&tree->lock));
em = tree->last;
if (em && end > em->start && start < extent_map_end(em))
goto found;
......@@ -318,6 +320,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{
int ret = 0;
BUG_ON(spin_trylock(&tree->lock));
rb_erase(&em->rb_node, &tree->map);
em->in_tree = 0;
if (tree->last == em)
......
......@@ -135,26 +135,37 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
}
int btrfs_csum_one_bio(struct btrfs_root *root,
struct bio *bio, char **sums_ret)
struct bio *bio, struct btrfs_ordered_sum **sums_ret)
{
u32 *sums;
struct btrfs_ordered_sum *sums;
struct btrfs_sector_sum *sector_sum;
char *data;
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
sums = kmalloc(bio->bi_vcnt * BTRFS_CRC32_SIZE, GFP_NOFS);
WARN_ON(bio->bi_vcnt <= 0);
sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
if (!sums)
return -ENOMEM;
*sums_ret = (char *)sums;
*sums_ret = sums;
sector_sum = &sums->sums;
sums->file_offset = page_offset(bvec->bv_page);
sums->len = bio->bi_size;
INIT_LIST_HEAD(&sums->list);
while(bio_index < bio->bi_vcnt) {
data = kmap_atomic(bvec->bv_page, KM_USER0);
*sums = ~(u32)0;
*sums = btrfs_csum_data(root, data + bvec->bv_offset,
*sums, bvec->bv_len);
sector_sum->sum = ~(u32)0;
sector_sum->sum = btrfs_csum_data(root,
data + bvec->bv_offset,
sector_sum->sum,
bvec->bv_len);
kunmap_atomic(data, KM_USER0);
btrfs_csum_final(*sums, (char *)sums);
sums++;
btrfs_csum_final(sector_sum->sum,
(char *)&sector_sum->sum);
sector_sum->offset = page_offset(bvec->bv_page) +
bvec->bv_offset;
sector_sum++;
bio_index++;
bvec++;
}
......@@ -163,7 +174,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
struct bio *bio, char *sums)
struct btrfs_ordered_sum *sums)
{
u64 objectid = inode->i_ino;
u64 offset;
......@@ -171,17 +182,16 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_key file_key;
struct btrfs_key found_key;
u64 next_offset;
u64 total_bytes = 0;
int found_next;
struct btrfs_path *path;
struct btrfs_csum_item *item;
struct btrfs_csum_item *item_end;
struct extent_buffer *leaf = NULL;
u64 csum_offset;
u32 *sums32 = (u32 *)sums;
struct btrfs_sector_sum *sector_sum;
u32 nritems;
u32 ins_size;
int bio_index = 0;
struct bio_vec *bvec = bio->bi_io_vec;
char *eb_map;
char *eb_token;
unsigned long map_len;
......@@ -189,10 +199,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
BUG_ON(!path);
sector_sum = &sums->sums;
again:
next_offset = (u64)-1;
found_next = 0;
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
offset = sector_sum->offset;
file_key.objectid = objectid;
file_key.offset = offset;
btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
......@@ -303,7 +314,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
btrfs_item_size_nr(leaf, path->slots[0]));
eb_token = NULL;
next_bvec:
next_sector:
if (!eb_token ||
(unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) {
......@@ -321,21 +332,20 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
}
if (eb_token) {
memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
sums32, BTRFS_CRC32_SIZE);
&sector_sum->sum, BTRFS_CRC32_SIZE);
} else {
write_extent_buffer(leaf, sums32, (unsigned long)item,
BTRFS_CRC32_SIZE);
write_extent_buffer(leaf, &sector_sum->sum,
(unsigned long)item, BTRFS_CRC32_SIZE);
}
bio_index++;
bvec++;
sums32++;
if (bio_index < bio->bi_vcnt) {
total_bytes += root->sectorsize;
sector_sum++;
if (total_bytes < sums->len) {
item = (struct btrfs_csum_item *)((char *)item +
BTRFS_CRC32_SIZE);
if (item < item_end && offset + PAGE_CACHE_SIZE ==
page_offset(bvec->bv_page)) {
offset = page_offset(bvec->bv_page);
goto next_bvec;
sector_sum->offset) {
offset = sector_sum->offset;
goto next_sector;
}
}
if (eb_token) {
......@@ -343,7 +353,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
eb_token = NULL;
}
btrfs_mark_buffer_dirty(path->nodes[0]);
if (bio_index < bio->bi_vcnt) {
if (total_bytes < sums->len) {
btrfs_release_path(root, path);
goto again;
}
......
......@@ -34,7 +34,6 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ordered-data.h"
#include "ioctl.h"
#include "print-tree.h"
#include "compat.h"
......@@ -273,7 +272,9 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
u64 mask = root->sectorsize - 1;
last_pos_in_file = (isize + mask) & ~mask;
hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
if (last_pos_in_file < start_pos) {
if (hole_size > 0) {
btrfs_wait_ordered_range(inode, last_pos_in_file,
last_pos_in_file + hole_size);
err = btrfs_drop_extents(trans, root, inode,
last_pos_in_file,
last_pos_in_file + hole_size,
......@@ -303,19 +304,17 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
inline_size > root->fs_info->max_inline ||
(inline_size & (root->sectorsize -1)) == 0 ||
inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
u64 last_end;
/* check for reserved extents on each page, we don't want
* to reset the delalloc bit on things that already have
* extents reserved.
*/
set_extent_delalloc(io_tree, start_pos,
end_of_last_block, GFP_NOFS);
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
SetPageUptodate(p);
set_page_dirty(p);
}
last_end = (u64)(pages[num_pages -1]->index) <<
PAGE_CACHE_SHIFT;
last_end += PAGE_CACHE_SIZE - 1;
set_extent_delalloc(io_tree, start_pos, end_of_last_block,
GFP_NOFS);
btrfs_add_ordered_inode(inode);
} else {
u64 aligned_end;
/* step one, delete the existing extents in this range */
......@@ -350,10 +349,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
struct extent_map *split = NULL;
struct extent_map *split2 = NULL;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *tmp;
u64 len = end - start + 1;
u64 next_start;
int ret;
int testend = 1;
WARN_ON(end < start);
if (end == (u64)-1) {
len = (u64)-1;
testend = 0;
......@@ -370,6 +372,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
spin_unlock(&em_tree->lock);
break;
}
tmp = rb_entry(&em->rb_node, struct extent_map, rb_node);
next_start = tmp->start;
remove_extent_mapping(em_tree, em);
if (em->block_start < EXTENT_MAP_LAST_BYTE &&
......@@ -778,37 +782,58 @@ static int prepare_pages(struct btrfs_root *root, struct file *file,
struct inode *inode = fdentry(file)->d_inode;
int err = 0;
u64 start_pos;
u64 last_pos;
start_pos = pos & ~((u64)root->sectorsize - 1);
last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
memset(pages, 0, num_pages * sizeof(struct page *));
again:
for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) {
err = -ENOMEM;
BUG_ON(1);
}
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
ClearPageDirty(pages[i]);
#else
cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
#endif
wait_on_page_writeback(pages[i]);
set_page_extent_mapped(pages[i]);
WARN_ON(!PageLocked(pages[i]));
}
if (start_pos < inode->i_size) {
u64 last_pos;
last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
struct btrfs_ordered_extent *ordered;
lock_extent(&BTRFS_I(inode)->io_tree,
start_pos, last_pos - 1, GFP_NOFS);
ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
if (ordered &&
ordered->file_offset + ordered->len > start_pos &&
ordered->file_offset < last_pos) {
btrfs_put_ordered_extent(ordered);
unlock_extent(&BTRFS_I(inode)->io_tree,
start_pos, last_pos - 1, GFP_NOFS);
for (i = 0; i < num_pages; i++) {
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
btrfs_wait_ordered_range(inode, start_pos,
last_pos - start_pos);
goto again;
}
if (ordered)
btrfs_put_ordered_extent(ordered);
clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
GFP_NOFS);
unlock_extent(&BTRFS_I(inode)->io_tree,
start_pos, last_pos - 1, GFP_NOFS);
}
for (i = 0; i < num_pages; i++) {
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
ClearPageDirty(pages[i]);
#else
cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
#endif
set_page_extent_mapped(pages[i]);
WARN_ON(!PageLocked(pages[i]));
}
return 0;
}
......@@ -969,13 +994,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
(start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
}
current->backing_dev_info = NULL;
btrfs_ordered_throttle(root, inode);
return num_written ? num_written : err;
}
int btrfs_release_file(struct inode * inode, struct file * filp)
{
btrfs_del_ordered_inode(inode, 0);
if (filp->private_data)
btrfs_ioctl_trans_end(filp);
return 0;
......
此差异已折叠。
......@@ -22,48 +22,30 @@
#include "ctree.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "extent_io.h"
struct tree_entry {
u64 root_objectid;
u64 objectid;
struct inode *inode;
struct rb_node rb_node;
};
/*
* returns > 0 if entry passed (root, objectid) is > entry,
* < 0 if (root, objectid) < entry and zero if they are equal
*/
static int comp_entry(struct tree_entry *entry, u64 root_objectid,
u64 objectid)
static u64 entry_end(struct btrfs_ordered_extent *entry)
{
if (root_objectid < entry->root_objectid)
return -1;
if (root_objectid > entry->root_objectid)
return 1;
if (objectid < entry->objectid)
return -1;
if (objectid > entry->objectid)
return 1;
return 0;
if (entry->file_offset + entry->len < entry->file_offset)
return (u64)-1;
return entry->file_offset + entry->len;
}
static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid,
u64 objectid, struct rb_node *node)
static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
struct rb_node *node)
{
struct rb_node ** p = &root->rb_node;
struct rb_node * parent = NULL;
struct tree_entry *entry;
int comp;
struct btrfs_ordered_extent *entry;
while(*p) {
parent = *p;
entry = rb_entry(parent, struct tree_entry, rb_node);
entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
comp = comp_entry(entry, root_objectid, objectid);
if (comp < 0)
if (file_offset < entry->file_offset)
p = &(*p)->rb_left;
else if (comp > 0)
else if (file_offset >= entry_end(entry))
p = &(*p)->rb_right;
else
return parent;
......@@ -74,24 +56,23 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid,
return NULL;
}
static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid,
u64 objectid, struct rb_node **prev_ret)
static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
struct rb_node **prev_ret)
{
struct rb_node * n = root->rb_node;
struct rb_node *prev = NULL;
struct tree_entry *entry;
struct tree_entry *prev_entry = NULL;
int comp;
struct rb_node *test;
struct btrfs_ordered_extent *entry;
struct btrfs_ordered_extent *prev_entry = NULL;
while(n) {
entry = rb_entry(n, struct tree_entry, rb_node);
entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
prev = n;
prev_entry = entry;
comp = comp_entry(entry, root_objectid, objectid);
if (comp < 0)
if (file_offset < entry->file_offset)
n = n->rb_left;
else if (comp > 0)
else if (file_offset >= entry_end(entry))
n = n->rb_right;
else
return n;
......@@ -99,195 +80,329 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid,
if (!prev_ret)
return NULL;
while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) {
prev = rb_next(prev);
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
while(prev && file_offset >= entry_end(prev_entry)) {
test = rb_next(prev);
if (!test)
break;
prev_entry = rb_entry(test, struct btrfs_ordered_extent,
rb_node);
if (file_offset < entry_end(prev_entry))
break;
prev = test;
}
if (prev)
prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
rb_node);
while(prev && file_offset < entry_end(prev_entry)) {
test = rb_prev(prev);
if (!test)
break;
prev_entry = rb_entry(test, struct btrfs_ordered_extent,
rb_node);
prev = test;
}
*prev_ret = prev;
return NULL;
}
static inline struct rb_node *tree_search(struct rb_root *root,
u64 root_objectid, u64 objectid)
static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
{
if (file_offset < entry->file_offset ||
entry->file_offset + entry->len <= file_offset)
return 0;
return 1;
}
static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
u64 file_offset)
{
struct rb_root *root = &tree->tree;
struct rb_node *prev;
struct rb_node *ret;
ret = __tree_search(root, root_objectid, objectid, &prev);
struct btrfs_ordered_extent *entry;
if (tree->last) {
entry = rb_entry(tree->last, struct btrfs_ordered_extent,
rb_node);
if (offset_in_entry(entry, file_offset))
return tree->last;
}
ret = __tree_search(root, file_offset, &prev);
if (!ret)
return prev;
ret = prev;
if (ret)
tree->last = ret;
return ret;
}
int btrfs_add_ordered_inode(struct inode *inode)
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 root_objectid = root->root_key.objectid;
u64 transid = root->fs_info->running_transaction->transid;
struct tree_entry *entry;
struct rb_node *node;
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry;
if (transid <= BTRFS_I(inode)->ordered_trans)
return 0;
tree = &root->fs_info->running_transaction->ordered_inode_tree;
read_lock(&tree->lock);
node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL);
read_unlock(&tree->lock);
if (node) {
return 0;
}
entry = kmalloc(sizeof(*entry), GFP_NOFS);
tree = &BTRFS_I(inode)->ordered_tree;
entry = kzalloc(sizeof(*entry), GFP_NOFS);
if (!entry)
return -ENOMEM;
write_lock(&tree->lock);
entry->objectid = inode->i_ino;
entry->root_objectid = root_objectid;
mutex_lock(&tree->mutex);
entry->file_offset = file_offset;
entry->start = start;
entry->len = len;
entry->inode = inode;
/* one ref for the tree */
atomic_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
INIT_LIST_HEAD(&entry->list);
node = tree_insert(&tree->tree, root_objectid,
inode->i_ino, &entry->rb_node);
BTRFS_I(inode)->ordered_trans = transid;
if (!node)
igrab(inode);
write_unlock(&tree->lock);
node = tree_insert(&tree->tree, file_offset,
&entry->rb_node);
if (node) {
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
atomic_inc(&entry->refs);
}
set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
entry_end(entry) - 1, GFP_NOFS);
if (node)
kfree(entry);
set_bit(BTRFS_ORDERED_START, &entry->flags);
mutex_unlock(&tree->mutex);
BUG_ON(node);
return 0;
}
int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
u64 *root_objectid, u64 *objectid,
struct inode **inode)
int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum)
{
struct tree_entry *entry;
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry;
write_lock(&tree->lock);
node = tree_search(&tree->tree, *root_objectid, *objectid);
tree = &BTRFS_I(inode)->ordered_tree;
mutex_lock(&tree->mutex);
node = tree_search(tree, sum->file_offset);
if (!node) {
write_unlock(&tree->lock);
return 0;
search_fail:
printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset);
node = rb_first(&tree->tree);
while(node) {
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start);
node = rb_next(node);
}
BUG();
}
entry = rb_entry(node, struct tree_entry, rb_node);
BUG_ON(!node);
while(comp_entry(entry, *root_objectid, *objectid) >= 0) {
node = rb_next(node);
if (!node)
break;
entry = rb_entry(node, struct tree_entry, rb_node);
}
if (!node) {
write_unlock(&tree->lock);
return 0;
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (!offset_in_entry(entry, sum->file_offset)) {
goto search_fail;
}
*root_objectid = entry->root_objectid;
*inode = entry->inode;
atomic_inc(&entry->inode->i_count);
*objectid = entry->objectid;
write_unlock(&tree->lock);
return 1;
list_add_tail(&sum->list, &entry->list);
mutex_unlock(&tree->mutex);
return 0;
}
int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
u64 *root_objectid, u64 *objectid,
struct inode **inode)
int btrfs_dec_test_ordered_pending(struct inode *inode,
u64 file_offset, u64 io_size)
{
struct tree_entry *entry;
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
write_lock(&tree->lock);
node = tree_search(&tree->tree, *root_objectid, *objectid);
struct btrfs_ordered_extent *entry;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
int ret;
tree = &BTRFS_I(inode)->ordered_tree;
mutex_lock(&tree->mutex);
clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
GFP_NOFS);
node = tree_search(tree, file_offset);
if (!node) {
write_unlock(&tree->lock);
return 0;
ret = 1;
goto out;
}
entry = rb_entry(node, struct tree_entry, rb_node);
while(comp_entry(entry, *root_objectid, *objectid) >= 0) {
node = rb_next(node);
if (!node)
break;
entry = rb_entry(node, struct tree_entry, rb_node);
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (!offset_in_entry(entry, file_offset)) {
ret = 1;
goto out;
}
if (!node) {
write_unlock(&tree->lock);
return 0;
ret = test_range_bit(io_tree, entry->file_offset,
entry->file_offset + entry->len - 1,
EXTENT_ORDERED, 0);
if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) {
printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry));
}
if (ret == 0)
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
out:
mutex_unlock(&tree->mutex);
return ret == 0;
}
*root_objectid = entry->root_objectid;
*objectid = entry->objectid;
*inode = entry->inode;
atomic_inc(&entry->inode->i_count);
rb_erase(node, &tree->tree);
write_unlock(&tree->lock);
kfree(entry);
return 1;
int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
{
if (atomic_dec_and_test(&entry->refs))
kfree(entry);
return 0;
}
static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree,
struct inode *inode,
u64 root_objectid, u64 objectid)
int btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
{
struct tree_entry *entry;
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct rb_node *prev;
write_lock(&tree->lock);
node = __tree_search(&tree->tree, root_objectid, objectid, &prev);
if (!node) {
write_unlock(&tree->lock);
return;
}
tree = &BTRFS_I(inode)->ordered_tree;
mutex_lock(&tree->mutex);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
BTRFS_I(inode)->ordered_trans = 0;
write_unlock(&tree->lock);
atomic_dec(&inode->i_count);
entry = rb_entry(node, struct tree_entry, rb_node);
kfree(entry);
return;
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
mutex_unlock(&tree->mutex);
wake_up(&entry->wait);
return 0;
}
void btrfs_del_ordered_inode(struct inode *inode, int force)
void btrfs_wait_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 root_objectid = root->root_key.objectid;
u64 start = entry->file_offset;
u64 end = start + entry->len - 1;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
#else
do_sync_mapping_range(inode->i_mapping, start, end,
SYNC_FILE_RANGE_WRITE);
#endif
wait_event(entry->wait,
test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags));
}
if (!BTRFS_I(inode)->ordered_trans) {
return;
}
static void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry, int wait)
{
u64 start = entry->file_offset;
u64 end = start + entry->len - 1;
if (!force && (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
return;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
#else
do_sync_mapping_range(inode->i_mapping, start, end,
SYNC_FILE_RANGE_WRITE);
#endif
if (wait)
wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
&entry->flags));
}
spin_lock(&root->fs_info->new_trans_lock);
if (root->fs_info->running_transaction) {
struct btrfs_ordered_inode_tree *tree;
tree = &root->fs_info->running_transaction->ordered_inode_tree;
__btrfs_del_ordered_inode(tree, inode, root_objectid,
inode->i_ino);
void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
u64 end;
struct btrfs_ordered_extent *ordered;
int found;
int should_wait = 0;
again:
if (start + len < start)
end = (u64)-1;
else
end = start + len - 1;
found = 0;
while(1) {
ordered = btrfs_lookup_first_ordered_extent(inode, end);
if (!ordered) {
break;
}
if (ordered->file_offset >= start + len) {
btrfs_put_ordered_extent(ordered);
break;
}
if (ordered->file_offset + ordered->len < start) {
btrfs_put_ordered_extent(ordered);
break;
}
btrfs_start_ordered_extent(inode, ordered, should_wait);
found++;
end = ordered->file_offset;
btrfs_put_ordered_extent(ordered);
if (end == 0)
break;
end--;
}
if (should_wait && found) {
should_wait = 0;
goto again;
}
spin_unlock(&root->fs_info->new_trans_lock);
}
int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode)
int btrfs_add_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent *ordered,
u64 start, u64 len)
{
struct btrfs_transaction *cur = root->fs_info->running_transaction;
while(cur == root->fs_info->running_transaction &&
atomic_read(&BTRFS_I(inode)->ordered_writeback)) {
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
congestion_wait(WRITE, HZ/20);
#else
blk_congestion_wait(WRITE, HZ/20);
#endif
}
WARN_ON(1);
return 0;
#if 0
int ret;
struct btrfs_ordered_inode_tree *tree;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
tree = &BTRFS_I(inode)->ordered_tree;
mutex_lock(&tree->mutex);
if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
ret = -EAGAIN;
goto out;
}
set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS);
ret = 0;
out:
mutex_unlock(&tree->mutex);
return ret;
#endif
}
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
mutex_lock(&tree->mutex);
node = tree_search(tree, file_offset);
if (!node)
goto out;
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (!offset_in_entry(entry, file_offset))
entry = NULL;
if (entry)
atomic_inc(&entry->refs);
out:
mutex_unlock(&tree->mutex);
return entry;
}
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
mutex_lock(&tree->mutex);
node = tree_search(tree, file_offset);
if (!node)
goto out;
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
atomic_inc(&entry->refs);
out:
mutex_unlock(&tree->mutex);
return entry;
}
......@@ -20,24 +20,73 @@
#define __BTRFS_ORDERED_DATA__
struct btrfs_ordered_inode_tree {
rwlock_t lock;
struct mutex mutex;
struct rb_root tree;
struct rb_node *last;
};
struct btrfs_sector_sum {
u64 offset;
u32 sum;
};
struct btrfs_ordered_sum {
u64 file_offset;
u64 len;
struct list_head list;
struct btrfs_sector_sum sums;
};
/* bits for the flags field */
#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
#define BTRFS_ORDERED_START 2 /* set when tree setup */
struct btrfs_ordered_extent {
u64 file_offset;
u64 start;
u64 len;
unsigned long flags;
atomic_t refs;
struct list_head list;
struct inode *inode;
wait_queue_head_t wait;
struct rb_node rb_node;
};
static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes)
{
unsigned long num_sectors = (bytes + root->sectorsize - 1) /
root->sectorsize;
return sizeof(struct btrfs_ordered_sum) +
num_sectors * sizeof(struct btrfs_sector_sum);
}
static inline void
btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
{
rwlock_init(&t->lock);
mutex_init(&t->mutex);
t->tree.rb_node = NULL;
t->last = NULL;
}
int btrfs_add_ordered_inode(struct inode *inode);
int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
u64 *root_objectid, u64 *objectid,
struct inode **inode);
int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
u64 *root_objectid, u64 *objectid,
struct inode **inode);
void btrfs_del_ordered_inode(struct inode *inode, int force);
int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode);
int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
int btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry);
int btrfs_dec_test_ordered_pending(struct inode *inode,
u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len);
int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset);
void btrfs_wait_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry);
void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
int btrfs_add_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent *ordered,
u64 start, u64 len);
#endif
......@@ -67,7 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root)
cur_trans->start_time = get_seconds();
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
extent_io_tree_init(&cur_trans->dirty_pages,
root->fs_info->btree_inode->i_mapping,
GFP_NOFS);
......@@ -158,10 +157,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
wake_up(&cur_trans->writer_wait);
if (cur_trans->in_commit && throttle) {
int ret;
DEFINE_WAIT(wait);
mutex_unlock(&root->fs_info->trans_mutex);
ret = wait_for_commit(root, cur_trans);
BUG_ON(ret);
prepare_to_wait(&root->fs_info->transaction_throttle, &wait,
TASK_UNINTERRUPTIBLE);
schedule();
finish_wait(&root->fs_info->transaction_throttle, &wait);
mutex_lock(&root->fs_info->trans_mutex);
}
......@@ -486,58 +487,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
return ret;
}
int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct inode *inode;
u64 root_objectid = 0;
u64 objectid = 0;
int ret;
atomic_inc(&root->fs_info->throttles);
while(1) {
ret = btrfs_find_first_ordered_inode(
&cur_trans->ordered_inode_tree,
&root_objectid, &objectid, &inode);
if (!ret)
break;
mutex_unlock(&root->fs_info->trans_mutex);
if (S_ISREG(inode->i_mode)) {
atomic_inc(&BTRFS_I(inode)->ordered_writeback);
filemap_fdatawrite(inode->i_mapping);
atomic_dec(&BTRFS_I(inode)->ordered_writeback);
}
iput(inode);
mutex_lock(&root->fs_info->trans_mutex);
}
while(1) {
root_objectid = 0;
objectid = 0;
ret = btrfs_find_del_first_ordered_inode(
&cur_trans->ordered_inode_tree,
&root_objectid, &objectid, &inode);
if (!ret)
break;
mutex_unlock(&root->fs_info->trans_mutex);
if (S_ISREG(inode->i_mode)) {
atomic_inc(&BTRFS_I(inode)->ordered_writeback);
filemap_write_and_wait(inode->i_mapping);
atomic_dec(&BTRFS_I(inode)->ordered_writeback);
}
atomic_dec(&inode->i_count);
iput(inode);
mutex_lock(&root->fs_info->trans_mutex);
}
atomic_dec(&root->fs_info->throttles);
return 0;
}
static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_pending_snapshot *pending)
......@@ -666,6 +615,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
extent_io_tree_init(pinned_copy,
root->fs_info->btree_inode->i_mapping, GFP_NOFS);
printk("commit trans %Lu\n", trans->transid);
trans->transaction->in_commit = 1;
cur_trans = trans->transaction;
if (cur_trans->list.prev != &root->fs_info->trans_list) {
......@@ -699,8 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&cur_trans->writer_wait, &wait);
ret = btrfs_write_ordered_inodes(trans, root);
} while (cur_trans->num_writers > 1 ||
(cur_trans->num_joined != joined));
......@@ -736,6 +684,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_copy_pinned(root, pinned_copy);
wake_up(&root->fs_info->transaction_throttle);
mutex_unlock(&root->fs_info->trans_mutex);
ret = btrfs_write_and_wait_transaction(trans, root);
BUG_ON(ret);
......@@ -758,6 +708,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
mutex_unlock(&root->fs_info->trans_mutex);
printk("done commit trans %Lu\n", trans->transid);
kmem_cache_free(btrfs_trans_handle_cachep, trans);
if (root->fs_info->closing) {
......
......@@ -19,7 +19,6 @@
#ifndef __BTRFS_TRANSACTION__
#define __BTRFS_TRANSACTION__
#include "btrfs_inode.h"
#include "ordered-data.h"
struct btrfs_transaction {
u64 transid;
......@@ -31,7 +30,6 @@ struct btrfs_transaction {
struct list_head list;
struct extent_io_tree dirty_pages;
unsigned long start_time;
struct btrfs_ordered_inode_tree ordered_inode_tree;
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
struct list_head pending_snapshots;
......@@ -88,8 +86,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
int btrfs_clean_old_snapshots(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册