提交 cfc442b6 编写于 作者: C Chris Mason

Merge branch 'for-chris' of...

Merge branch 'for-chris' of git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-next into HEAD
......@@ -227,7 +227,11 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
} else {
cache_no_acl(inode);
}
} else {
cache_no_acl(inode);
}
failed:
posix_acl_release(acl);
......
......@@ -24,6 +24,20 @@
#include "ordered-data.h"
#include "delayed-inode.h"
/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
* the btrfs file release call will add this inode to the
* ordered operations list so that we make sure to flush out any
* new data the application may have written before commit.
*/
#define BTRFS_INODE_ORDERED_DATA_CLOSE 0
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
#define BTRFS_INODE_DUMMY 2
#define BTRFS_INODE_IN_DEFRAG 3
#define BTRFS_INODE_DELALLOC_META_RESERVED 4
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
/* in memory btrfs inode */
struct btrfs_inode {
/* which subvolume this inode belongs to */
......@@ -57,9 +71,6 @@ struct btrfs_inode {
/* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree;
/* for keeping track of orphaned inodes */
struct list_head i_orphan;
/* list of all the delalloc inodes in the FS. There are times we need
* to write all the delalloc pages to disk, and this list is used
* to walk them all.
......@@ -78,14 +89,13 @@ struct btrfs_inode {
/* the space_info for where this inode's data allocations are done */
struct btrfs_space_info *space_info;
unsigned long runtime_flags;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
u64 generation;
/* sequence number for NFS changes */
u64 sequence;
/*
* transid of the trans_handle that last modified this inode
*/
......@@ -144,23 +154,10 @@ struct btrfs_inode {
unsigned outstanding_extents;
unsigned reserved_extents;
/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
* the btrfs file release call will add this inode to the
* ordered operations list so that we make sure to flush out any
* new data the application may have written before commit.
*/
unsigned ordered_data_close:1;
unsigned orphan_meta_reserved:1;
unsigned dummy_inode:1;
unsigned in_defrag:1;
unsigned delalloc_meta_reserved:1;
/*
* always compress this one file
*/
unsigned force_compress:4;
unsigned force_compress;
struct btrfs_delayed_node *delayed_node;
......@@ -202,4 +199,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
return false;
}
static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
mutex_lock(&root->log_mutex);
if (BTRFS_I(inode)->logged_trans == generation &&
BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
ret = 1;
mutex_unlock(&root->log_mutex);
return ret;
}
#endif
此差异已折叠。
......@@ -739,7 +739,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
if (!cur)
return -EIO;
} else if (!uptodate) {
btrfs_read_buffer(cur, gen);
err = btrfs_read_buffer(cur, gen);
if (err) {
free_extent_buffer(cur);
return err;
}
}
}
if (search_start == 0)
......@@ -854,20 +858,18 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot)
{
if (level == 0) {
if (level == 0)
return generic_bin_search(eb,
offsetof(struct btrfs_leaf, items),
sizeof(struct btrfs_item),
key, btrfs_header_nritems(eb),
slot);
} else {
else
return generic_bin_search(eb,
offsetof(struct btrfs_node, ptrs),
sizeof(struct btrfs_key_ptr),
key, btrfs_header_nritems(eb),
slot);
}
return -1;
}
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
......
......@@ -173,6 +173,9 @@ static int btrfs_csum_sizes[] = { 4, 0 };
#define BTRFS_FT_XATTR 8
#define BTRFS_FT_MAX 9
/* ioprio of readahead is set to idle */
#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
/*
* The key defines the order in the tree, and so it also defines (optimal)
* block layout.
......@@ -823,6 +826,14 @@ struct btrfs_csum_item {
u8 csum;
} __attribute__ ((__packed__));
struct btrfs_dev_stats_item {
/*
* grow this item struct at the end for future enhancements and keep
* the existing values unchanged
*/
__le64 values[BTRFS_DEV_STAT_VALUES_MAX];
} __attribute__ ((__packed__));
/* different types of block groups (and chunks) */
#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
......@@ -1375,7 +1386,7 @@ struct btrfs_root {
struct list_head root_list;
spinlock_t orphan_lock;
struct list_head orphan_list;
atomic_t orphan_inodes;
struct btrfs_block_rsv *orphan_block_rsv;
int orphan_item_inserted;
int orphan_cleanup_state;
......@@ -1507,6 +1518,12 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_BALANCE_ITEM_KEY 248
/*
* Persistantly stores the io stats in the device tree.
* One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid).
*/
#define BTRFS_DEV_STATS_KEY 249
/*
* string items are for debugging. They just store a short string of
* data in the FS
......@@ -2415,6 +2432,30 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
return btrfs_item_size(eb, e) - offset;
}
/* btrfs_dev_stats_item */
static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
struct btrfs_dev_stats_item *ptr,
int index)
{
u64 val;
read_extent_buffer(eb, &val,
offsetof(struct btrfs_dev_stats_item, values) +
((unsigned long)ptr) + (index * sizeof(u64)),
sizeof(val));
return val;
}
static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb,
struct btrfs_dev_stats_item *ptr,
int index, u64 val)
{
write_extent_buffer(eb, &val,
offsetof(struct btrfs_dev_stats_item, values) +
((unsigned long)ptr) + (index * sizeof(u64)),
sizeof(val));
}
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
{
return sb->s_fs_info;
......
......@@ -669,8 +669,8 @@ static int btrfs_delayed_inode_reserve_metadata(
return ret;
} else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
spin_lock(&BTRFS_I(inode)->lock);
if (BTRFS_I(inode)->delalloc_meta_reserved) {
BTRFS_I(inode)->delalloc_meta_reserved = 0;
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
spin_unlock(&BTRFS_I(inode)->lock);
release = true;
goto migrate;
......@@ -1706,7 +1706,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
btrfs_set_stack_inode_generation(inode_item,
BTRFS_I(inode)->generation);
btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
btrfs_set_stack_inode_transid(inode_item, trans->transid);
btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
......@@ -1754,7 +1754,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
inode->i_version = btrfs_stack_inode_sequence(inode_item);
inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item);
BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
......
......@@ -1153,7 +1153,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->orphan_block_rsv = NULL;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->orphan_list);
INIT_LIST_HEAD(&root->root_list);
spin_lock_init(&root->orphan_lock);
spin_lock_init(&root->inode_lock);
......@@ -1166,6 +1165,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->log_commit[0], 0);
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
atomic_set(&root->orphan_inodes, 0);
root->log_batch = 0;
root->log_transid = 0;
root->last_log_commit = 0;
......@@ -2001,7 +2001,8 @@ int open_ctree(struct super_block *sb,
BTRFS_I(fs_info->btree_inode)->root = tree_root;
memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
sizeof(struct btrfs_key));
BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
set_bit(BTRFS_INODE_DUMMY,
&BTRFS_I(fs_info->btree_inode)->runtime_flags);
insert_inode_hash(fs_info->btree_inode);
spin_lock_init(&fs_info->block_group_cache_lock);
......@@ -2353,6 +2354,13 @@ int open_ctree(struct super_block *sb,
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
ret = btrfs_init_dev_stats(fs_info);
if (ret) {
printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
ret);
goto fail_block_groups;
}
ret = btrfs_init_space_info(fs_info);
if (ret) {
printk(KERN_ERR "Failed to initial space info: %d\n", ret);
......@@ -2556,18 +2564,19 @@ int open_ctree(struct super_block *sb,
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
{
char b[BDEVNAME_SIZE];
if (uptodate) {
set_buffer_uptodate(bh);
} else {
struct btrfs_device *device = (struct btrfs_device *)
bh->b_private;
printk_ratelimited(KERN_WARNING "lost page write due to "
"I/O error on %s\n",
bdevname(bh->b_bdev, b));
"I/O error on %s\n", device->name);
/* note, we dont' set_buffer_write_io_error because we have
* our own ways of dealing with the IO errors
*/
clear_buffer_uptodate(bh);
btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
}
unlock_buffer(bh);
put_bh(bh);
......@@ -2682,6 +2691,7 @@ static int write_dev_supers(struct btrfs_device *device,
set_buffer_uptodate(bh);
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_write_sync;
bh->b_private = device;
}
/*
......@@ -2740,6 +2750,9 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
}
if (!bio_flagged(bio, BIO_UPTODATE)) {
ret = -EIO;
if (!bio_flagged(bio, BIO_EOPNOTSUPP))
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
/* drop the reference from the wait == 0 run */
......@@ -2902,19 +2915,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
return ret;
}
/* Kill all outstanding I/O */
void btrfs_abort_devices(struct btrfs_root *root)
{
struct list_head *head;
struct btrfs_device *dev;
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
head = &root->fs_info->fs_devices->devices;
list_for_each_entry_rcu(dev, head, dev_list) {
blk_abort_queue(dev->bdev->bd_disk->queue);
}
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
}
void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
{
spin_lock(&fs_info->fs_roots_radix_lock);
......@@ -3671,17 +3671,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
return 0;
}
static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page,
u64 start, u64 end,
struct extent_state *state)
{
struct super_block *sb = page->mapping->host->i_sb;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
btrfs_error(fs_info, -EIO,
"Error occured while writing out btree at %llu", start);
return -EIO;
}
static struct extent_io_ops btree_extent_io_ops = {
.write_cache_pages_lock_hook = btree_lock_page_hook,
.readpage_end_io_hook = btree_readpage_end_io_hook,
......@@ -3689,5 +3678,4 @@ static struct extent_io_ops btree_extent_io_ops = {
.submit_bio_hook = btree_submit_bio_hook,
/* note we're sharing with inode.c for the merge bio hook */
.merge_bio_hook = btrfs_merge_bio_hook,
.writepage_io_failed_hook = btree_writepage_io_failed_hook,
};
......@@ -89,7 +89,6 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
int btrfs_cleanup_transaction(struct btrfs_root *root);
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_root *root);
void btrfs_abort_devices(struct btrfs_root *root);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);
......
......@@ -3578,7 +3578,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info->chunk_alloc = 0;
spin_unlock(&space_info->lock);
out:
mutex_unlock(&extent_root->fs_info->chunk_mutex);
mutex_unlock(&fs_info->chunk_mutex);
return ret;
}
......@@ -4355,10 +4355,9 @@ static unsigned drop_outstanding_extent(struct inode *inode)
BTRFS_I(inode)->outstanding_extents--;
if (BTRFS_I(inode)->outstanding_extents == 0 &&
BTRFS_I(inode)->delalloc_meta_reserved) {
test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags))
drop_inode_space = 1;
BTRFS_I(inode)->delalloc_meta_reserved = 0;
}
/*
* If we have more or the same amount of outsanding extents than we have
......@@ -4465,7 +4464,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
* Add an item to reserve for updating the inode when we complete the
* delalloc io.
*/
if (!BTRFS_I(inode)->delalloc_meta_reserved) {
if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
nr_extents++;
extra_reserve = 1;
}
......@@ -4511,7 +4511,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
spin_lock(&BTRFS_I(inode)->lock);
if (extra_reserve) {
BTRFS_I(inode)->delalloc_meta_reserved = 1;
set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags);
nr_extents--;
}
BTRFS_I(inode)->reserved_extents += nr_extents;
......
......@@ -186,7 +186,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
return parent;
}
entry = rb_entry(node, struct tree_entry, rb_node);
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
......@@ -413,7 +412,7 @@ static struct extent_state *next_state(struct extent_state *state)
/*
* utility function to clear some bits in an extent state struct.
* it will optionally wake up any one waiting on this state (wake == 1)
* it will optionally wake up any one waiting on this state (wake == 1).
*
* If no bits are set on the state struct after clearing things, the
* struct is freed and removed from the tree
......@@ -570,10 +569,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (err)
goto out;
if (state->end <= end) {
clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
state = clear_state_bit(tree, state, &bits, wake);
goto next;
}
goto search_again;
}
......@@ -781,7 +778,6 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
struct rb_node *next_node;
if (state->state & exclusive_bits) {
*failed_start = state->start;
err = -EEXIST;
......@@ -789,20 +785,15 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
}
set_state_bits(tree, state, &bits);
cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
next_node = rb_next(&state->rb_node);
if (next_node && start < end && prealloc && !need_resched()) {
state = rb_entry(next_node, struct extent_state,
rb_node);
if (state->start == start)
goto hit_next;
}
state = next_state(state);
if (start < end && state && state->start == start &&
!need_resched())
goto hit_next;
goto search_again;
}
......@@ -845,6 +836,10 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
state = next_state(state);
if (start < end && state && state->start == start &&
!need_resched())
goto hit_next;
}
goto search_again;
}
......@@ -994,21 +989,14 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
struct rb_node *next_node;
set_state_bits(tree, state, &bits);
clear_state_bit(tree, state, &clear_bits, 0);
state = clear_state_bit(tree, state, &clear_bits, 0);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
next_node = rb_next(&state->rb_node);
if (next_node && start < end && prealloc && !need_resched()) {
state = rb_entry(next_node, struct extent_state,
rb_node);
if (state->start == start)
goto hit_next;
}
if (start < end && state && state->start == start &&
!need_resched())
goto hit_next;
goto search_again;
}
......@@ -1042,10 +1030,13 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
goto out;
if (state->end <= end) {
set_state_bits(tree, state, &bits);
clear_state_bit(tree, state, &clear_bits, 0);
state = clear_state_bit(tree, state, &clear_bits, 0);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
if (start < end && state && state->start == start &&
!need_resched())
goto hit_next;
}
goto search_again;
}
......@@ -1173,9 +1164,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
cached_state, mask);
}
static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state,
gfp_t mask)
int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask)
{
return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
cached_state, mask);
......@@ -1293,7 +1283,7 @@ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
* returned if we find something, and *start_ret and *end_ret are
* set to reflect the state struct that was found.
*
* If nothing was found, 1 is returned, < 0 on error
* If nothing was found, 1 is returned. If found something, return 0.
*/
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, int bits)
......@@ -1923,6 +1913,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
/* try to remap that extent elsewhere? */
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
}
......@@ -2222,17 +2213,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
uptodate = 0;
}
if (!uptodate && tree->ops &&
tree->ops->writepage_io_failed_hook) {
ret = tree->ops->writepage_io_failed_hook(NULL, page,
start, end, NULL);
/* Writeback already completed */
if (ret == 0)
return 1;
}
if (!uptodate) {
clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
ClearPageUptodate(page);
SetPageError(page);
}
......@@ -2347,10 +2328,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
if (ret)
if (ret) {
/* no IO indicated but software detected errors
* in the block, either checksum errors or
* issues with the contents */
struct btrfs_root *root =
BTRFS_I(page->mapping->host)->root;
struct btrfs_device *device;
uptodate = 0;
else
device = btrfs_find_device_for_logical(
root, start, mirror);
if (device)
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
} else {
clean_io_failure(start, page);
}
}
if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
......@@ -3164,7 +3158,7 @@ static int write_one_eb(struct extent_buffer *eb,
u64 offset = eb->start;
unsigned long i, num_pages;
int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
int ret;
int ret = 0;
clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len);
......@@ -3981,11 +3975,13 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
unsigned long start_idx)
{
unsigned long index;
unsigned long num_pages;
struct page *page;
BUG_ON(extent_buffer_under_io(eb));
index = num_extent_pages(eb->start, eb->len);
num_pages = num_extent_pages(eb->start, eb->len);
index = start_idx + num_pages;
if (start_idx >= index)
return;
......
......@@ -75,9 +75,6 @@ struct extent_io_ops {
unsigned long bio_flags);
int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
u64 start, u64 end,
struct extent_state *state);
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int mirror);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
......@@ -225,6 +222,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
......
......@@ -65,6 +65,21 @@ struct inode_defrag {
int cycled;
};
static int __compare_inode_defrag(struct inode_defrag *defrag1,
struct inode_defrag *defrag2)
{
if (defrag1->root > defrag2->root)
return 1;
else if (defrag1->root < defrag2->root)
return -1;
else if (defrag1->ino > defrag2->ino)
return 1;
else if (defrag1->ino < defrag2->ino)
return -1;
else
return 0;
}
/* pop a record for an inode into the defrag tree. The lock
* must be held already
*
......@@ -81,15 +96,17 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
struct inode_defrag *entry;
struct rb_node **p;
struct rb_node *parent = NULL;
int ret;
p = &root->fs_info->defrag_inodes.rb_node;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
if (defrag->ino < entry->ino)
ret = __compare_inode_defrag(defrag, entry);
if (ret < 0)
p = &parent->rb_left;
else if (defrag->ino > entry->ino)
else if (ret > 0)
p = &parent->rb_right;
else {
/* if we're reinserting an entry for
......@@ -103,7 +120,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
goto exists;
}
}
BTRFS_I(inode)->in_defrag = 1;
set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
rb_link_node(&defrag->rb_node, parent, p);
rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
return;
......@@ -131,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
if (btrfs_fs_closing(root->fs_info))
return 0;
if (BTRFS_I(inode)->in_defrag)
if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
return 0;
if (trans)
......@@ -148,7 +165,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
defrag->root = root->root_key.objectid;
spin_lock(&root->fs_info->defrag_inodes_lock);
if (!BTRFS_I(inode)->in_defrag)
if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
__btrfs_add_inode_defrag(inode, defrag);
else
kfree(defrag);
......@@ -159,28 +176,35 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
/*
* must be called with the defrag_inodes lock held
*/
struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino,
struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
u64 root, u64 ino,
struct rb_node **next)
{
struct inode_defrag *entry = NULL;
struct inode_defrag tmp;
struct rb_node *p;
struct rb_node *parent = NULL;
int ret;
tmp.ino = ino;
tmp.root = root;
p = info->defrag_inodes.rb_node;
while (p) {
parent = p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
if (ino < entry->ino)
ret = __compare_inode_defrag(&tmp, entry);
if (ret < 0)
p = parent->rb_left;
else if (ino > entry->ino)
else if (ret > 0)
p = parent->rb_right;
else
return entry;
}
if (next) {
while (parent && ino > entry->ino) {
while (parent && __compare_inode_defrag(&tmp, entry) > 0) {
parent = rb_next(parent);
entry = rb_entry(parent, struct inode_defrag, rb_node);
}
......@@ -202,6 +226,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
struct btrfs_key key;
struct btrfs_ioctl_defrag_range_args range;
u64 first_ino = 0;
u64 root_objectid = 0;
int num_defrag;
int defrag_batch = 1024;
......@@ -214,11 +239,14 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
n = NULL;
/* find an inode to defrag */
defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n);
defrag = btrfs_find_defrag_inode(fs_info, root_objectid,
first_ino, &n);
if (!defrag) {
if (n)
defrag = rb_entry(n, struct inode_defrag, rb_node);
else if (first_ino) {
if (n) {
defrag = rb_entry(n, struct inode_defrag,
rb_node);
} else if (root_objectid || first_ino) {
root_objectid = 0;
first_ino = 0;
continue;
} else {
......@@ -228,6 +256,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
/* remove it from the rbtree */
first_ino = defrag->ino + 1;
root_objectid = defrag->root;
rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
if (btrfs_fs_closing(fs_info))
......@@ -252,7 +281,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
goto next;
/* do a chunk of defrag */
BTRFS_I(inode)->in_defrag = 0;
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
range.start = defrag->last_offset;
num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
defrag_batch);
......@@ -1409,7 +1438,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
mutex_unlock(&inode->i_mutex);
goto out;
}
BTRFS_I(inode)->sequence++;
start_pos = round_down(pos, root->sectorsize);
if (start_pos > i_size_read(inode)) {
......@@ -1466,8 +1494,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
* flush down new bytes that may have been written if the
* application were using truncate to replace a file in place.
*/
if (BTRFS_I(inode)->ordered_data_close) {
BTRFS_I(inode)->ordered_data_close = 0;
if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
filemap_flush(inode->i_mapping);
......@@ -1498,14 +1526,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_btrfs_sync_file(file, datasync);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
mutex_lock(&inode->i_mutex);
/* we wait first, since the writeback may change the inode */
/*
* we wait first, since the writeback may change the inode, also wait
* ordered range does a filemape_write_and_wait_range which is why we
* don't do it above like other file systems.
*/
root->log_batch++;
btrfs_wait_ordered_range(inode, 0, (u64)-1);
btrfs_wait_ordered_range(inode, start, end);
root->log_batch++;
/*
......@@ -1523,7 +1552,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* syncing
*/
smp_mb();
if (BTRFS_I(inode)->last_trans <=
if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
BTRFS_I(inode)->last_trans = 0;
mutex_unlock(&inode->i_mutex);
......
......@@ -33,6 +33,8 @@
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_path *path,
......@@ -584,6 +586,44 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
return 0;
}
/*
* Since we attach pinned extents after the fact we can have contiguous sections
* of free space that are split up in entries. This poses a problem with the
* tree logging stuff since it could have allocated across what appears to be 2
* entries since we would have merged the entries when adding the pinned extents
* back to the free space cache. So run through the space cache that we just
* loaded and merge contiguous entries. This will make the log replay stuff not
* blow up and it will make for nicer allocator behavior.
*/
static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *e, *prev = NULL;
struct rb_node *n;
again:
spin_lock(&ctl->tree_lock);
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
e = rb_entry(n, struct btrfs_free_space, offset_index);
if (!prev)
goto next;
if (e->bitmap || prev->bitmap)
goto next;
if (prev->offset + prev->bytes == e->offset) {
unlink_free_space(ctl, prev);
unlink_free_space(ctl, e);
prev->bytes += e->bytes;
kmem_cache_free(btrfs_free_space_cachep, e);
link_free_space(ctl, prev);
prev = NULL;
spin_unlock(&ctl->tree_lock);
goto again;
}
next:
prev = e;
}
spin_unlock(&ctl->tree_lock);
}
int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_path *path, u64 offset)
......@@ -726,6 +766,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
io_ctl_drop_pages(&io_ctl);
merge_space_tree(ctl);
ret = 1;
out:
io_ctl_free(&io_ctl);
......@@ -972,9 +1013,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
goto out;
ret = filemap_write_and_wait(inode->i_mapping);
if (ret)
goto out;
btrfs_wait_ordered_range(inode, 0, (u64)-1);
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = offset;
......
此差异已折叠。
......@@ -261,6 +261,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
}
btrfs_update_iflags(inode);
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, inode);
......@@ -2262,10 +2263,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
di_args->bytes_used = dev->bytes_used;
di_args->total_bytes = dev->total_bytes;
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
if (dev->name)
if (dev->name) {
strncpy(di_args->path, dev->name, sizeof(di_args->path));
else
di_args->path[sizeof(di_args->path) - 1] = 0;
} else {
di_args->path[0] = '\0';
}
out:
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
......@@ -2622,6 +2625,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
inode_inc_iversion(inode);
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
/*
......@@ -2914,7 +2918,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
up_read(&info->groups_sem);
}
user_dest = (struct btrfs_ioctl_space_info *)
user_dest = (struct btrfs_ioctl_space_info __user *)
(arg + sizeof(struct btrfs_ioctl_space_args));
if (copy_to_user(user_dest, dest_orig, alloc_size))
......@@ -3042,6 +3046,28 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
return ret;
}
static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
void __user *arg, int reset_after_read)
{
struct btrfs_ioctl_get_dev_stats *sa;
int ret;
if (reset_after_read && !capable(CAP_SYS_ADMIN))
return -EPERM;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
ret = btrfs_get_dev_stats(root, sa, reset_after_read);
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
kfree(sa);
return ret;
}
static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
{
int ret = 0;
......@@ -3212,8 +3238,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
}
}
static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
static long btrfs_ioctl_balance(struct file *file, void __user *arg)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
struct btrfs_balance_control *bctl;
......@@ -3225,6 +3252,10 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
ret = mnt_want_write(file->f_path.mnt);
if (ret)
return ret;
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
......@@ -3291,6 +3322,7 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
out:
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
mnt_drop_write(file->f_path.mnt);
return ret;
}
......@@ -3386,7 +3418,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_DEV_INFO:
return btrfs_ioctl_dev_info(root, argp);
case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(root, NULL);
return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
......@@ -3419,11 +3451,15 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SCRUB_PROGRESS:
return btrfs_ioctl_scrub_progress(root, argp);
case BTRFS_IOC_BALANCE_V2:
return btrfs_ioctl_balance(root, argp);
return btrfs_ioctl_balance(file, argp);
case BTRFS_IOC_BALANCE_CTL:
return btrfs_ioctl_balance_ctl(root, arg);
case BTRFS_IOC_BALANCE_PROGRESS:
return btrfs_ioctl_balance_progress(root, argp);
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(root, argp, 0);
case BTRFS_IOC_GET_AND_RESET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(root, argp, 1);
}
return -ENOTTY;
......
......@@ -266,6 +266,35 @@ struct btrfs_ioctl_logical_ino_args {
__u64 inodes;
};
enum btrfs_dev_stat_values {
/* disk I/O failure stats */
BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
/* stats for indirect indications for I/O failures */
BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or
* contents is illegal: this is an
* indication that the block was damaged
* during read or write, or written to
* wrong location or read from wrong
* location */
BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not
* been written */
BTRFS_DEV_STAT_VALUES_MAX
};
struct btrfs_ioctl_get_dev_stats {
__u64 devid; /* in */
__u64 nr_items; /* in/out */
/* out values: */
__u64 values[BTRFS_DEV_STAT_VALUES_MAX];
__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
};
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
......@@ -330,5 +359,9 @@ struct btrfs_ioctl_logical_ino_args {
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
struct btrfs_ioctl_get_dev_stats)
#define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \
struct btrfs_ioctl_get_dev_stats)
#endif
......@@ -196,7 +196,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->len = len;
entry->disk_len = disk_len;
entry->bytes_left = len;
entry->inode = inode;
entry->inode = igrab(inode);
entry->compress_type = compress_type;
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
......@@ -212,12 +212,12 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
trace_btrfs_ordered_extent_add(inode, entry);
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
node = tree_insert(&tree->tree, file_offset,
&entry->rb_node);
if (node)
ordered_data_tree_panic(inode, -EEXIST, file_offset);
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
list_add_tail(&entry->root_extent_list,
......@@ -264,9 +264,9 @@ void btrfs_add_ordered_sum(struct inode *inode,
struct btrfs_ordered_inode_tree *tree;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
list_add_tail(&sum->list, &entry->list);
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
}
/*
......@@ -283,18 +283,19 @@ void btrfs_add_ordered_sum(struct inode *inode,
*/
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size)
u64 *file_offset, u64 io_size, int uptodate)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
int ret;
unsigned long flags;
u64 dec_end;
u64 dec_start;
u64 to_dec;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irqsave(&tree->lock, flags);
node = tree_search(tree, *file_offset);
if (!node) {
ret = 1;
......@@ -323,6 +324,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
(unsigned long long)to_dec);
}
entry->bytes_left -= to_dec;
if (!uptodate)
set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
if (entry->bytes_left == 0)
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
else
......@@ -332,7 +336,7 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
*cached = entry;
atomic_inc(&entry->refs);
}
spin_unlock(&tree->lock);
spin_unlock_irqrestore(&tree->lock, flags);
return ret == 0;
}
......@@ -347,15 +351,21 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
*/
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size)
u64 file_offset, u64 io_size, int uptodate)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
unsigned long flags;
int ret;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irqsave(&tree->lock, flags);
if (cached && *cached) {
entry = *cached;
goto have_entry;
}
node = tree_search(tree, file_offset);
if (!node) {
ret = 1;
......@@ -363,6 +373,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
}
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
have_entry:
if (!offset_in_entry(entry, file_offset)) {
ret = 1;
goto out;
......@@ -374,6 +385,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
(unsigned long long)io_size);
}
entry->bytes_left -= io_size;
if (!uptodate)
set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
if (entry->bytes_left == 0)
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
else
......@@ -383,7 +397,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
*cached = entry;
atomic_inc(&entry->refs);
}
spin_unlock(&tree->lock);
spin_unlock_irqrestore(&tree->lock, flags);
return ret == 0;
}
......@@ -399,6 +413,8 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
trace_btrfs_ordered_extent_put(entry->inode, entry);
if (atomic_dec_and_test(&entry->refs)) {
if (entry->inode)
btrfs_add_delayed_iput(entry->inode);
while (!list_empty(&entry->list)) {
cur = entry->list.next;
sum = list_entry(cur, struct btrfs_ordered_sum, list);
......@@ -411,21 +427,22 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
/*
* remove an ordered extent from the tree. No references are dropped
* and you must wake_up entry->wait. You must hold the tree lock
* while you call this function.
* and waiters are woken up.
*/
static void __btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
{
struct btrfs_ordered_inode_tree *tree;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct rb_node *node;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
spin_unlock_irq(&tree->lock);
spin_lock(&root->fs_info->ordered_extent_lock);
list_del_init(&entry->root_extent_list);
......@@ -442,21 +459,6 @@ static void __btrfs_remove_ordered_extent(struct inode *inode,
list_del_init(&BTRFS_I(inode)->ordered_operations);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
}
/*
* remove an ordered extent from the tree. No references are dropped
* but any waiters are woken.
*/
void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
{
struct btrfs_ordered_inode_tree *tree;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
__btrfs_remove_ordered_extent(inode, entry);
spin_unlock(&tree->lock);
wake_up(&entry->wait);
}
......@@ -621,19 +623,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
if (orig_end > INT_LIMIT(loff_t))
orig_end = INT_LIMIT(loff_t);
}
again:
/* start IO across the range first to instantiate any delalloc
* extents
*/
filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
/* The compression code will leave pages locked but return from
* writepage without setting the page writeback. Starting again
* with WB_SYNC_ALL will end up waiting for the IO to actually start.
*/
filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
filemap_fdatawait_range(inode->i_mapping, start, orig_end);
filemap_write_and_wait_range(inode->i_mapping, start, orig_end);
end = orig_end;
found = 0;
......@@ -657,11 +651,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
break;
end--;
}
if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
EXTENT_DELALLOC, 0, NULL)) {
schedule_timeout(1);
goto again;
}
}
/*
......@@ -676,7 +665,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
node = tree_search(tree, file_offset);
if (!node)
goto out;
......@@ -687,7 +676,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
if (entry)
atomic_inc(&entry->refs);
out:
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
return entry;
}
......@@ -703,7 +692,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
node = tree_search(tree, file_offset);
if (!node) {
node = tree_search(tree, file_offset + len);
......@@ -728,7 +717,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
out:
if (entry)
atomic_inc(&entry->refs);
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
return entry;
}
......@@ -744,7 +733,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
node = tree_search(tree, file_offset);
if (!node)
goto out;
......@@ -752,7 +741,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
atomic_inc(&entry->refs);
out:
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
return entry;
}
......@@ -764,7 +753,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered)
{
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 disk_i_size;
u64 new_i_size;
u64 i_size_test;
......@@ -779,7 +767,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
else
offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
disk_i_size = BTRFS_I(inode)->disk_i_size;
/* truncate file */
......@@ -797,14 +785,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
goto out;
}
/*
* we can't update the disk_isize if there are delalloc bytes
* between disk_i_size and this ordered extent
*/
if (test_range_bit(io_tree, disk_i_size, offset - 1,
EXTENT_DELALLOC, 0, NULL)) {
goto out;
}
/*
* walk backward from this ordered extent to disk_i_size.
* if we find an ordered extent then we can't update disk i_size
......@@ -825,15 +805,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
}
node = prev;
}
while (node) {
for (; node; node = rb_prev(node)) {
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
/* We treat this entry as if it doesnt exist */
if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
continue;
if (test->file_offset + test->len <= disk_i_size)
break;
if (test->file_offset >= i_size)
break;
if (test->file_offset >= disk_i_size)
goto out;
node = rb_prev(node);
}
new_i_size = min_t(u64, offset, i_size);
......@@ -851,43 +834,49 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
else
node = rb_first(&tree->tree);
}
i_size_test = 0;
if (node) {
/*
* do we have an area where IO might have finished
* between our ordered extent and the next one.
*/
/*
* We are looking for an area between our current extent and the next
* ordered extent to update the i_size to. There are 3 cases here
*
* 1) We don't actually have anything and we can update to i_size.
* 2) We have stuff but they already did their i_size update so again we
* can just update to i_size.
* 3) We have an outstanding ordered extent so the most we can update
* our disk_i_size to is the start of the next offset.
*/
i_size_test = i_size;
for (; node; node = rb_next(node)) {
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (test->file_offset > offset)
if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
continue;
if (test->file_offset > offset) {
i_size_test = test->file_offset;
} else {
i_size_test = i_size;
break;
}
}
/*
* i_size_test is the end of a region after this ordered
* extent where there are no ordered extents. As long as there
* are no delalloc bytes in this area, it is safe to update
* disk_i_size to the end of the region.
* extent where there are no ordered extents, we can safely set
* disk_i_size to this.
*/
if (i_size_test > offset &&
!test_range_bit(io_tree, offset, i_size_test - 1,
EXTENT_DELALLOC, 0, NULL)) {
if (i_size_test > offset)
new_i_size = min_t(u64, i_size_test, i_size);
}
BTRFS_I(inode)->disk_i_size = new_i_size;
ret = 0;
out:
/*
* we need to remove the ordered extent with the tree lock held
* so that other people calling this function don't find our fully
* processed ordered entry and skip updating the i_size
* We need to do this because we can't remove ordered extents until
* after the i_disk_size has been updated and then the inode has been
* updated to reflect the change, so we need to tell anybody who finds
* this ordered extent that we've already done all the real work, we
* just haven't completed all the other work.
*/
if (ordered)
__btrfs_remove_ordered_extent(inode, ordered);
spin_unlock(&tree->lock);
if (ordered)
wake_up(&ordered->wait);
set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags);
spin_unlock_irq(&tree->lock);
return ret;
}
......@@ -912,7 +901,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
if (!ordered)
return 1;
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
if (disk_bytenr >= ordered_sum->bytenr) {
num_sectors = ordered_sum->len / sectorsize;
......@@ -927,7 +916,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
}
}
out:
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
btrfs_put_ordered_extent(ordered);
return ret;
}
......
......@@ -74,6 +74,12 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */
#define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */
#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent
* has done its due diligence in updating
* the isize. */
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
......@@ -113,6 +119,8 @@ struct btrfs_ordered_extent {
/* a per root list of all the pending ordered extents */
struct list_head root_extent_list;
struct btrfs_work work;
};
......@@ -143,10 +151,11 @@ void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry);
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size);
u64 file_offset, u64 io_size, int uptodate);
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size);
u64 *file_offset, u64 io_size,
int uptodate);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
......
......@@ -294,6 +294,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
btrfs_dev_extent_chunk_offset(l, dev_extent),
(unsigned long long)
btrfs_dev_extent_length(l, dev_extent));
case BTRFS_DEV_STATS_KEY:
printk(KERN_INFO "\t\tdevice stats\n");
break;
};
}
}
......
......@@ -718,13 +718,18 @@ static void reada_start_machine_worker(struct btrfs_work *work)
{
struct reada_machine_work *rmw;
struct btrfs_fs_info *fs_info;
int old_ioprio;
rmw = container_of(work, struct reada_machine_work, work);
fs_info = rmw->fs_info;
kfree(rmw);
old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current),
task_nice_ioprio(current));
set_task_ioprio(current, BTRFS_IOPRIO_READA);
__reada_start_machine(fs_info);
set_task_ioprio(current, old_ioprio);
}
static void __reada_start_machine(struct btrfs_fs_info *fs_info)
......
......@@ -50,7 +50,7 @@ struct scrub_dev;
struct scrub_page {
struct scrub_block *sblock;
struct page *page;
struct block_device *bdev;
struct btrfs_device *dev;
u64 flags; /* extent flags */
u64 generation;
u64 logical;
......@@ -86,6 +86,7 @@ struct scrub_block {
unsigned int header_error:1;
unsigned int checksum_error:1;
unsigned int no_io_error_seen:1;
unsigned int generation_error:1; /* also sets header_error */
};
};
......@@ -675,6 +676,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
sdev->stat.read_errors++;
sdev->stat.uncorrectable_errors++;
spin_unlock(&sdev->stat_lock);
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_READ_ERRS);
goto out;
}
......@@ -686,6 +689,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
sdev->stat.read_errors++;
sdev->stat.uncorrectable_errors++;
spin_unlock(&sdev->stat_lock);
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_READ_ERRS);
goto out;
}
BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
......@@ -699,6 +704,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
sdev->stat.read_errors++;
sdev->stat.uncorrectable_errors++;
spin_unlock(&sdev->stat_lock);
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_READ_ERRS);
goto out;
}
......@@ -725,12 +732,16 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
spin_unlock(&sdev->stat_lock);
if (__ratelimit(&_rs))
scrub_print_warning("i/o error", sblock_to_check);
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_READ_ERRS);
} else if (sblock_bad->checksum_error) {
spin_lock(&sdev->stat_lock);
sdev->stat.csum_errors++;
spin_unlock(&sdev->stat_lock);
if (__ratelimit(&_rs))
scrub_print_warning("checksum error", sblock_to_check);
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
} else if (sblock_bad->header_error) {
spin_lock(&sdev->stat_lock);
sdev->stat.verify_errors++;
......@@ -738,6 +749,12 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
if (__ratelimit(&_rs))
scrub_print_warning("checksum/header error",
sblock_to_check);
if (sblock_bad->generation_error)
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_GENERATION_ERRS);
else
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
}
if (sdev->readonly)
......@@ -998,8 +1015,8 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev,
page = sblock->pagev + page_index;
page->logical = logical;
page->physical = bbio->stripes[mirror_index].physical;
/* for missing devices, bdev is NULL */
page->bdev = bbio->stripes[mirror_index].dev->bdev;
/* for missing devices, dev->bdev is NULL */
page->dev = bbio->stripes[mirror_index].dev;
page->mirror_num = mirror_index + 1;
page->page = alloc_page(GFP_NOFS);
if (!page->page) {
......@@ -1043,7 +1060,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
struct scrub_page *page = sblock->pagev + page_num;
DECLARE_COMPLETION_ONSTACK(complete);
if (page->bdev == NULL) {
if (page->dev->bdev == NULL) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
continue;
......@@ -1053,7 +1070,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
bio = bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_bdev = page->bdev;
bio->bi_bdev = page->dev->bdev;
bio->bi_sector = page->physical >> 9;
bio->bi_end_io = scrub_complete_bio_end_io;
bio->bi_private = &complete;
......@@ -1102,11 +1119,14 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
h = (struct btrfs_header *)mapped_buffer;
if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
generation != le64_to_cpu(h->generation) ||
memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
BTRFS_UUID_SIZE))
BTRFS_UUID_SIZE)) {
sblock->header_error = 1;
} else if (generation != le64_to_cpu(h->generation)) {
sblock->header_error = 1;
sblock->generation_error = 1;
}
csum = h->csum;
} else {
if (!have_csum)
......@@ -1182,7 +1202,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
bio = bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_bdev = page_bad->bdev;
bio->bi_bdev = page_bad->dev->bdev;
bio->bi_sector = page_bad->physical >> 9;
bio->bi_end_io = scrub_complete_bio_end_io;
bio->bi_private = &complete;
......@@ -1196,6 +1216,12 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
/* this will also unplug the queue */
wait_for_completion(&complete);
if (!bio_flagged(bio, BIO_UPTODATE)) {
btrfs_dev_stat_inc_and_print(page_bad->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
bio_put(bio);
return -EIO;
}
bio_put(bio);
}
......@@ -1352,7 +1378,8 @@ static int scrub_checksum_super(struct scrub_block *sblock)
u64 mapped_size;
void *p;
u32 crc = ~(u32)0;
int fail = 0;
int fail_gen = 0;
int fail_cor = 0;
u64 len;
int index;
......@@ -1363,13 +1390,13 @@ static int scrub_checksum_super(struct scrub_block *sblock)
memcpy(on_disk_csum, s->csum, sdev->csum_size);
if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
++fail;
++fail_cor;
if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
++fail;
++fail_gen;
if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
++fail;
++fail_cor;
len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
......@@ -1394,9 +1421,9 @@ static int scrub_checksum_super(struct scrub_block *sblock)
btrfs_csum_final(crc, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
++fail;
++fail_cor;
if (fail) {
if (fail_cor + fail_gen) {
/*
* if we find an error in a super block, we just report it.
* They will get written with the next transaction commit
......@@ -1405,9 +1432,15 @@ static int scrub_checksum_super(struct scrub_block *sblock)
spin_lock(&sdev->stat_lock);
++sdev->stat.super_errors;
spin_unlock(&sdev->stat_lock);
if (fail_cor)
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
else
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_GENERATION_ERRS);
}
return fail;
return fail_cor + fail_gen;
}
static void scrub_block_get(struct scrub_block *sblock)
......@@ -1551,7 +1584,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
return -ENOMEM;
}
spage->sblock = sblock;
spage->bdev = sdev->dev->bdev;
spage->dev = sdev->dev;
spage->flags = flags;
spage->generation = gen;
spage->logical = logical;
......
......@@ -188,7 +188,8 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
va_start(args, fmt);
if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') {
strncpy(lvl, fmt, 3);
memcpy(lvl, fmt, 3);
lvl[3] = '\0';
fmt += 3;
type = logtypes[fmt[1] - '0'];
} else
......@@ -435,11 +436,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_thread_pool:
intarg = 0;
match_int(&args[0], &intarg);
if (intarg) {
if (intarg)
info->thread_pool_size = intarg;
printk(KERN_INFO "btrfs: thread pool %d\n",
info->thread_pool_size);
}
break;
case Opt_max_inline:
num = match_strdup(&args[0]);
......@@ -769,7 +767,7 @@ static int btrfs_fill_super(struct super_block *sb,
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
sb->s_flags |= MS_POSIXACL;
#endif
sb->s_flags |= MS_I_VERSION;
err = open_ctree(sb, fs_devices, (char *)data);
if (err) {
printk("btrfs: open_ctree failed\n");
......@@ -925,63 +923,48 @@ static inline int is_subvolume_inode(struct inode *inode)
*/
static char *setup_root_args(char *args)
{
unsigned copied = 0;
unsigned len = strlen(args) + 2;
char *pos;
char *ret;
unsigned len = strlen(args) + 2 + 1;
char *src, *dst, *buf;
/*
* We need the same args as before, but minus
*
* subvol=a
*
* and add
*
* subvolid=0
* We need the same args as before, but with this substitution:
* s!subvol=[^,]+!subvolid=0!
*
* which is a difference of 2 characters, so we allocate strlen(args) +
* 2 characters.
* Since the replacement string is up to 2 bytes longer than the
* original, allocate strlen(args) + 2 + 1 bytes.
*/
ret = kzalloc(len * sizeof(char), GFP_NOFS);
if (!ret)
return NULL;
pos = strstr(args, "subvol=");
src = strstr(args, "subvol=");
/* This shouldn't happen, but just in case.. */
if (!pos) {
kfree(ret);
if (!src)
return NULL;
buf = dst = kmalloc(len, GFP_NOFS);
if (!buf)
return NULL;
}
/*
* The subvol=<> arg is not at the front of the string, copy everybody
* up to that into ret.
* If the subvol= arg is not at the start of the string,
* copy whatever precedes it into buf.
*/
if (pos != args) {
*pos = '\0';
strcpy(ret, args);
copied += strlen(args);
pos++;
if (src != args) {
*src++ = '\0';
strcpy(buf, args);
dst += strlen(args);
}
strncpy(ret + copied, "subvolid=0", len - copied);
/* Length of subvolid=0 */
copied += 10;
strcpy(dst, "subvolid=0");
dst += strlen("subvolid=0");
/*
* If there is no , after the subvol= option then we know there's no
* other options and we can just return.
* If there is a "," after the original subvol=... string,
* copy that suffix into our buffer. Otherwise, we're done.
*/
pos = strchr(pos, ',');
if (!pos)
return ret;
src = strchr(src, ',');
if (src)
strcpy(dst, src);
/* Copy the rest of the arguments into our buffer */
strncpy(ret + copied, pos, len - copied);
copied += strlen(pos);
return ret;
return buf;
}
static struct dentry *mount_subvol(const char *subvol_name, int flags,
......@@ -1118,6 +1101,40 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
return ERR_PTR(error);
}
static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit)
{
spin_lock_irq(&workers->lock);
workers->max_workers = new_limit;
spin_unlock_irq(&workers->lock);
}
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
int new_pool_size, int old_pool_size)
{
if (new_pool_size == old_pool_size)
return;
fs_info->thread_pool_size = new_pool_size;
printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n",
old_pool_size, new_pool_size);
btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size);
btrfs_set_max_workers(&fs_info->workers, new_pool_size);
btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size);
btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size);
}
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
......@@ -1137,6 +1154,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore;
}
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
return 0;
......@@ -1180,7 +1200,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
fs_info->compress_type = old_compress_type;
fs_info->max_inline = old_max_inline;
fs_info->alloc_start = old_alloc_start;
fs_info->thread_pool_size = old_thread_pool_size;
btrfs_resize_thread_pool(fs_info,
old_thread_pool_size, fs_info->thread_pool_size);
fs_info->metadata_ratio = old_metadata_ratio;
return ret;
}
......
......@@ -28,6 +28,7 @@
#include "locking.h"
#include "tree-log.h"
#include "inode-map.h"
#include "volumes.h"
#define BTRFS_ROOT_TRANS_TAG 0
......@@ -758,6 +759,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
if (ret)
return ret;
ret = btrfs_run_dev_stats(trans, root->fs_info);
BUG_ON(ret);
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
list_del_init(next);
......
......@@ -1628,7 +1628,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
int i;
int ret;
btrfs_read_buffer(eb, gen);
ret = btrfs_read_buffer(eb, gen);
if (ret)
return ret;
level = btrfs_header_level(eb);
......@@ -1749,7 +1751,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
path->slots[*level]++;
if (wc->free) {
btrfs_read_buffer(next, ptr_gen);
ret = btrfs_read_buffer(next, ptr_gen);
if (ret) {
free_extent_buffer(next);
return ret;
}
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
......@@ -1766,7 +1772,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
free_extent_buffer(next);
continue;
}
btrfs_read_buffer(next, ptr_gen);
ret = btrfs_read_buffer(next, ptr_gen);
if (ret) {
free_extent_buffer(next);
return ret;
}
WARN_ON(*level <= 0);
if (path->nodes[*level-1])
......@@ -2657,6 +2667,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
btrfs_release_path(path);
if (ret > 0)
ret = 0;
return ret;
}
......@@ -3028,21 +3040,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
return ret;
}
static int inode_in_log(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
mutex_lock(&root->log_mutex);
if (BTRFS_I(inode)->logged_trans == trans->transid &&
BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
ret = 1;
mutex_unlock(&root->log_mutex);
return ret;
}
/*
* helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref
......@@ -3083,7 +3080,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
if (inode_in_log(trans, inode)) {
if (btrfs_inode_in_log(inode, trans->transid)) {
ret = BTRFS_NO_LOG_SYNC;
goto end_no_trans;
}
......
......@@ -95,7 +95,7 @@ EXPORT_SYMBOL(ulist_reinit);
*
* The allocated ulist will be returned in an initialized state.
*/
struct ulist *ulist_alloc(unsigned long gfp_mask)
struct ulist *ulist_alloc(gfp_t gfp_mask)
{
struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
......@@ -144,7 +144,7 @@ EXPORT_SYMBOL(ulist_free);
* unaltered.
*/
int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
unsigned long gfp_mask)
gfp_t gfp_mask)
{
int i;
......
......@@ -59,10 +59,9 @@ struct ulist {
void ulist_init(struct ulist *ulist);
void ulist_fini(struct ulist *ulist);
void ulist_reinit(struct ulist *ulist);
struct ulist *ulist_alloc(unsigned long gfp_mask);
struct ulist *ulist_alloc(gfp_t gfp_mask);
void ulist_free(struct ulist *ulist);
int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
unsigned long gfp_mask);
int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, gfp_t gfp_mask);
struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev);
#endif
......@@ -23,6 +23,7 @@
#include <linux/random.h>
#include <linux/iocontext.h>
#include <linux/capability.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <asm/div64.h>
#include "compat.h"
......@@ -39,6 +40,8 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_device *device);
static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
static DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
......@@ -361,6 +364,7 @@ static noinline int device_list_add(const char *path,
return -ENOMEM;
}
device->devid = devid;
device->dev_stats_valid = 0;
device->work.func = pending_bios_fn;
memcpy(device->uuid, disk_super->dev_item.uuid,
BTRFS_UUID_SIZE);
......@@ -1633,7 +1637,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
int ret = 0;
if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
return -EINVAL;
return -EROFS;
bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
root->fs_info->bdev_holder);
......@@ -4001,13 +4005,58 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
return 0;
}
static void *merge_stripe_index_into_bio_private(void *bi_private,
unsigned int stripe_index)
{
/*
* with single, dup, RAID0, RAID1 and RAID10, stripe_index is
* at most 1.
* The alternative solution (instead of stealing bits from the
* pointer) would be to allocate an intermediate structure
* that contains the old private pointer plus the stripe_index.
*/
BUG_ON((((uintptr_t)bi_private) & 3) != 0);
BUG_ON(stripe_index > 3);
return (void *)(((uintptr_t)bi_private) | stripe_index);
}
static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
{
return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
}
static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
{
return (unsigned int)((uintptr_t)bi_private) & 3;
}
static void btrfs_end_bio(struct bio *bio, int err)
{
struct btrfs_bio *bbio = bio->bi_private;
struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private);
int is_orig_bio = 0;
if (err)
if (err) {
atomic_inc(&bbio->error);
if (err == -EIO || err == -EREMOTEIO) {
unsigned int stripe_index =
extract_stripe_index_from_bio_private(
bio->bi_private);
struct btrfs_device *dev;
BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev;
if (bio->bi_rw & WRITE)
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_WRITE_ERRS);
else
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_READ_ERRS);
if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_FLUSH_ERRS);
btrfs_dev_stat_print_on_error(dev);
}
}
if (bio == bbio->orig_bio)
is_orig_bio = 1;
......@@ -4149,6 +4198,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bio = first_bio;
}
bio->bi_private = bbio;
bio->bi_private = merge_stripe_index_into_bio_private(
bio->bi_private, (unsigned int)dev_nr);
bio->bi_end_io = btrfs_end_bio;
bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
dev = bbio->stripes[dev_nr].dev;
......@@ -4509,6 +4560,28 @@ int btrfs_read_sys_array(struct btrfs_root *root)
return ret;
}
struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
u64 logical, int mirror_num)
{
struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
int ret;
u64 map_length = 0;
struct btrfs_bio *bbio = NULL;
struct btrfs_device *device;
BUG_ON(mirror_num == 0);
ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
mirror_num);
if (ret) {
BUG_ON(bbio != NULL);
return NULL;
}
BUG_ON(mirror_num != bbio->mirror_num);
device = bbio->stripes[mirror_num - 1].dev;
kfree(bbio);
return device;
}
int btrfs_read_chunk_tree(struct btrfs_root *root)
{
struct btrfs_path *path;
......@@ -4583,3 +4656,230 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
btrfs_free_path(path);
return ret;
}
static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
{
int i;
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
btrfs_dev_stat_reset(dev, i);
}
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
{
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct extent_buffer *eb;
int slot;
int ret = 0;
struct btrfs_device *device;
struct btrfs_path *path = NULL;
int i;
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
int item_size;
struct btrfs_dev_stats_item *ptr;
key.objectid = 0;
key.type = BTRFS_DEV_STATS_KEY;
key.offset = device->devid;
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
if (ret) {
printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
device->name, (unsigned long long)device->devid);
__btrfs_reset_dev_stats(device);
device->dev_stats_valid = 1;
btrfs_release_path(path);
continue;
}
slot = path->slots[0];
eb = path->nodes[0];
btrfs_item_key_to_cpu(eb, &found_key, slot);
item_size = btrfs_item_size_nr(eb, slot);
ptr = btrfs_item_ptr(eb, slot,
struct btrfs_dev_stats_item);
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
if (item_size >= (1 + i) * sizeof(__le64))
btrfs_dev_stat_set(device, i,
btrfs_dev_stats_value(eb, ptr, i));
else
btrfs_dev_stat_reset(device, i);
}
device->dev_stats_valid = 1;
btrfs_dev_stat_print_on_load(device);
btrfs_release_path(path);
}
mutex_unlock(&fs_devices->device_list_mutex);
out:
btrfs_free_path(path);
return ret < 0 ? ret : 0;
}
static int update_dev_stat_item(struct btrfs_trans_handle *trans,
struct btrfs_root *dev_root,
struct btrfs_device *device)
{
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *eb;
struct btrfs_dev_stats_item *ptr;
int ret;
int i;
key.objectid = 0;
key.type = BTRFS_DEV_STATS_KEY;
key.offset = device->devid;
path = btrfs_alloc_path();
BUG_ON(!path);
ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
if (ret < 0) {
printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
ret, device->name);
goto out;
}
if (ret == 0 &&
btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
/* need to delete old one and insert a new one */
ret = btrfs_del_item(trans, dev_root, path);
if (ret != 0) {
printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
device->name, ret);
goto out;
}
ret = 1;
}
if (ret == 1) {
/* need to insert a new item */
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, dev_root, path,
&key, sizeof(*ptr));
if (ret < 0) {
printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
device->name, ret);
goto out;
}
}
eb = path->nodes[0];
ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
btrfs_set_dev_stats_value(eb, ptr, i,
btrfs_dev_stat_read(device, i));
btrfs_mark_buffer_dirty(eb);
out:
btrfs_free_path(path);
return ret;
}
/*
* called from commit_transaction. Writes all changed device stats to disk.
*/
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
int ret = 0;
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (!device->dev_stats_valid || !device->dev_stats_dirty)
continue;
ret = update_dev_stat_item(trans, dev_root, device);
if (!ret)
device->dev_stats_dirty = 0;
}
mutex_unlock(&fs_devices->device_list_mutex);
return ret;
}
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
{
btrfs_dev_stat_inc(dev, index);
btrfs_dev_stat_print_on_error(dev);
}
void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
{
if (!dev->dev_stats_valid)
return;
printk_ratelimited(KERN_ERR
"btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
dev->name,
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
btrfs_dev_stat_read(dev,
BTRFS_DEV_STAT_CORRUPTION_ERRS),
btrfs_dev_stat_read(dev,
BTRFS_DEV_STAT_GENERATION_ERRS));
}
static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
{
printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
dev->name,
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
}
int btrfs_get_dev_stats(struct btrfs_root *root,
struct btrfs_ioctl_get_dev_stats *stats,
int reset_after_read)
{
struct btrfs_device *dev;
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
int i;
mutex_lock(&fs_devices->device_list_mutex);
dev = btrfs_find_device(root, stats->devid, NULL, NULL);
mutex_unlock(&fs_devices->device_list_mutex);
if (!dev) {
printk(KERN_WARNING
"btrfs: get dev_stats failed, device not found\n");
return -ENODEV;
} else if (!dev->dev_stats_valid) {
printk(KERN_WARNING
"btrfs: get dev_stats failed, not yet valid\n");
return -ENODEV;
} else if (reset_after_read) {
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
if (stats->nr_items > i)
stats->values[i] =
btrfs_dev_stat_read_and_reset(dev, i);
else
btrfs_dev_stat_reset(dev, i);
}
} else {
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
if (stats->nr_items > i)
stats->values[i] = btrfs_dev_stat_read(dev, i);
}
if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
return 0;
}
......@@ -22,6 +22,7 @@
#include <linux/bio.h>
#include <linux/sort.h>
#include "async-thread.h"
#include "ioctl.h"
#define BTRFS_STRIPE_LEN (64 * 1024)
......@@ -106,6 +107,11 @@ struct btrfs_device {
struct completion flush_wait;
int nobarriers;
/* disk I/O failure stats. For detailed description refer to
* enum btrfs_dev_stat_values in ioctl.h */
int dev_stats_valid;
int dev_stats_dirty; /* counters need to be written to disk */
atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
};
struct btrfs_fs_devices {
......@@ -281,4 +287,50 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
u64 logical, int mirror_num);
void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_root *root,
struct btrfs_ioctl_get_dev_stats *stats,
int reset_after_read);
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
int index)
{
atomic_inc(dev->dev_stat_values + index);
dev->dev_stats_dirty = 1;
}
static inline int btrfs_dev_stat_read(struct btrfs_device *dev,
int index)
{
return atomic_read(dev->dev_stat_values + index);
}
static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev,
int index)
{
int ret;
ret = atomic_xchg(dev->dev_stat_values + index, 0);
dev->dev_stats_dirty = 1;
return ret;
}
static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
int index, unsigned long val)
{
atomic_set(dev->dev_stat_values + index, val);
dev->dev_stats_dirty = 1;
}
static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
int index)
{
btrfs_dev_stat_set(dev, index, 0);
}
#endif
......@@ -196,6 +196,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
if (ret)
goto out;
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册