提交 105a048a 编写于 作者: L Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (27 commits)
  Btrfs: add more error checking to btrfs_dirty_inode
  Btrfs: allow unaligned DIO
  Btrfs: drop verbose enospc printk
  Btrfs: Fix block generation verification race
  Btrfs: fix preallocation and nodatacow checks in O_DIRECT
  Btrfs: avoid ENOSPC errors in btrfs_dirty_inode
  Btrfs: move O_DIRECT space reservation to btrfs_direct_IO
  Btrfs: rework O_DIRECT enospc handling
  Btrfs: use async helpers for DIO write checksumming
  Btrfs: don't walk around with task->state != TASK_RUNNING
  Btrfs: do aio_write instead of write
  Btrfs: add basic DIO read/write support
  direct-io: do not merge logically non-contiguous requests
  direct-io: add a hook for the fs to provide its own submit_bio function
  fs: allow short direct-io reads to be completed via buffered IO
  Btrfs: Metadata ENOSPC handling for balance
  Btrfs: Pre-allocate space for data relocation
  Btrfs: Metadata ENOSPC handling for tree log
  Btrfs: Metadata reservation for orphan inodes
  Btrfs: Introduce global metadata reservation
  ...
......@@ -377,6 +377,7 @@ static int worker_loop(void *arg)
if (!list_empty(&worker->pending) ||
!list_empty(&worker->prio_pending)) {
spin_unlock_irq(&worker->lock);
set_current_state(TASK_RUNNING);
goto again;
}
......
......@@ -137,8 +137,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for.
*/
spinlock_t accounting_lock;
atomic_t outstanding_extents;
int reserved_extents;
int outstanding_extents;
/*
* ordered_data_close is set by truncate when a file that used
......@@ -151,6 +151,7 @@ struct btrfs_inode {
* of these.
*/
unsigned ordered_data_close:1;
unsigned orphan_meta_reserved:1;
unsigned dummy_inode:1;
/*
......
......@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
struct extent_buffer *cow)
struct extent_buffer *cow,
int *last_ref)
{
u64 refs;
u64 owner;
......@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
BUG_ON(ret);
}
clean_tree_block(trans, root, buf);
*last_ref = 1;
}
return 0;
}
......@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
struct extent_buffer *cow;
int level;
int last_ref = 0;
int unlock_orig = 0;
u64 parent_start;
......@@ -442,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
update_ref_for_cow(trans, root, buf, cow);
update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (root->ref_cows)
btrfs_reloc_cow_block(trans, root, buf, cow);
if (buf == root->node) {
WARN_ON(parent && parent != buf);
......@@ -457,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
extent_buffer_get(cow);
spin_unlock(&root->node_lock);
btrfs_free_tree_block(trans, root, buf->start, buf->len,
parent_start, root->root_key.objectid, level);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
......@@ -473,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
btrfs_free_tree_block(trans, root, buf->start, buf->len,
parent_start, root->root_key.objectid, level);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
......@@ -949,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
return bin_search(eb, key, level, slot);
}
static void root_add_used(struct btrfs_root *root, u32 size)
{
spin_lock(&root->accounting_lock);
btrfs_set_root_used(&root->root_item,
btrfs_root_used(&root->root_item) + size);
spin_unlock(&root->accounting_lock);
}
static void root_sub_used(struct btrfs_root *root, u32 size)
{
spin_lock(&root->accounting_lock);
btrfs_set_root_used(&root->root_item,
btrfs_root_used(&root->root_item) - size);
spin_unlock(&root->accounting_lock);
}
/* given a node and slot number, this reads the blocks it points to. The
* extent buffer is returned with a reference taken (but unlocked).
* NULL is returned on error.
......@@ -1019,7 +1041,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
BUG_ON(ret);
if (ret) {
btrfs_tree_unlock(child);
free_extent_buffer(child);
goto enospc;
}
spin_lock(&root->node_lock);
root->node = child;
......@@ -1034,11 +1060,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
0, root->root_key.objectid, level);
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
/* once for the root ptr */
free_extent_buffer(mid);
return ret;
return 0;
}
if (btrfs_header_nritems(mid) >
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
......@@ -1088,23 +1115,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
u64 bytenr = right->start;
u32 blocksize = right->len;
clean_tree_block(trans, root, right);
btrfs_tree_unlock(right);
free_extent_buffer(right);
right = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot +
1);
if (wret)
ret = wret;
wret = btrfs_free_tree_block(trans, root,
bytenr, blocksize, 0,
root->root_key.objectid,
level);
if (wret)
ret = wret;
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, root, right, 0, 1);
free_extent_buffer(right);
right = NULL;
} else {
struct btrfs_disk_key right_key;
btrfs_node_key(right, &right_key, 0);
......@@ -1136,21 +1156,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
/* we've managed to empty the middle node, drop it */
u64 bytenr = mid->start;
u32 blocksize = mid->len;
clean_tree_block(trans, root, mid);
btrfs_tree_unlock(mid);
free_extent_buffer(mid);
mid = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot);
if (wret)
ret = wret;
wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
0, root->root_key.objectid, level);
if (wret)
ret = wret;
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
free_extent_buffer(mid);
mid = NULL;
} else {
/* update the parent key to reflect our changes */
struct btrfs_disk_key mid_key;
......@@ -1590,7 +1604,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
btrfs_release_path(NULL, p);
ret = -EAGAIN;
tmp = read_tree_block(root, blocknr, blocksize, gen);
tmp = read_tree_block(root, blocknr, blocksize, 0);
if (tmp) {
/*
* If the read above didn't mark this buffer up to date,
......@@ -1740,7 +1754,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
p->nodes[level + 1],
p->slots[level + 1], &b);
if (err) {
free_extent_buffer(b);
ret = err;
goto done;
}
......@@ -2076,6 +2089,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
if (IS_ERR(c))
return PTR_ERR(c);
root_add_used(root, root->nodesize);
memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_nritems(c, 1);
btrfs_set_header_level(c, level);
......@@ -2134,6 +2149,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
int nritems;
BUG_ON(!path->nodes[level]);
btrfs_assert_tree_locked(path->nodes[level]);
lower = path->nodes[level];
nritems = btrfs_header_nritems(lower);
BUG_ON(slot > nritems);
......@@ -2202,6 +2218,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
if (IS_ERR(split))
return PTR_ERR(split);
root_add_used(root, root->nodesize);
memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_level(split, btrfs_header_level(c));
btrfs_set_header_bytenr(split, split->start);
......@@ -2415,6 +2433,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (left_nritems)
btrfs_mark_buffer_dirty(left);
else
clean_tree_block(trans, root, left);
btrfs_mark_buffer_dirty(right);
btrfs_item_key(right, &disk_key, 0);
......@@ -2660,6 +2681,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(left);
if (right_nritems)
btrfs_mark_buffer_dirty(right);
else
clean_tree_block(trans, root, right);
btrfs_item_key(right, &disk_key, 0);
wret = fixup_low_keys(trans, root, path, &disk_key, 1);
......@@ -2669,8 +2692,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
path->slots[0] += old_left_nritems;
if (btrfs_header_nritems(path->nodes[0]) == 0)
clean_tree_block(trans, root, path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = left;
......@@ -2932,10 +2953,10 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
root->root_key.objectid,
&disk_key, 0, l->start, 0);
if (IS_ERR(right)) {
BUG_ON(1);
if (IS_ERR(right))
return PTR_ERR(right);
}
root_add_used(root, root->leafsize);
memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(right, right->start);
......@@ -3054,7 +3075,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path);
ret = split_leaf(trans, root, &key, path, ins_len, 1);
BUG_ON(ret);
if (ret)
goto err;
path->keep_locks = 0;
btrfs_unlock_up_safe(path, 1);
......@@ -3796,9 +3818,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
*/
btrfs_unlock_up_safe(path, 0);
ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
0, root->root_key.objectid, 0);
return ret;
root_sub_used(root, leaf->len);
btrfs_free_tree_block(trans, root, leaf, 0, 1);
return 0;
}
/*
* delete the item at the leaf level in path. If that empties
......@@ -3865,6 +3888,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (leaf == root->node) {
btrfs_set_header_level(leaf, 0);
} else {
btrfs_set_path_blocking(path);
clean_tree_block(trans, root, leaf);
ret = btrfs_del_leaf(trans, root, path, leaf);
BUG_ON(ret);
}
......
......@@ -34,6 +34,7 @@
struct btrfs_trans_handle;
struct btrfs_transaction;
struct btrfs_pending_snapshot;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_transaction_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
......@@ -663,6 +664,7 @@ struct btrfs_csum_item {
#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
#define BTRFS_NR_RAID_TYPES 5
struct btrfs_block_group_item {
__le64 used;
......@@ -674,42 +676,46 @@ struct btrfs_space_info {
u64 flags;
u64 total_bytes; /* total bytes in the space */
u64 bytes_used; /* total bytes used on disk */
u64 bytes_used; /* total bytes used,
this does't take mirrors into account */
u64 bytes_pinned; /* total bytes pinned, will be freed when the
transaction finishes */
u64 bytes_reserved; /* total bytes the allocator has reserved for
current allocations */
u64 bytes_readonly; /* total bytes that are read only */
u64 bytes_super; /* total bytes reserved for the super blocks */
u64 bytes_root; /* the number of bytes needed to commit a
transaction */
u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
u64 bytes_delalloc; /* number of bytes currently reserved for
delayed allocation */
u64 disk_used; /* total bytes used on disk */
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
this space */
int force_delalloc; /* make people start doing filemap_flush until
we're under a threshold */
struct list_head list;
/* for controlling how we free up space for allocations */
wait_queue_head_t allocate_wait;
wait_queue_head_t flush_wait;
int allocating_chunk;
int flushing;
/* for block groups in our same type */
struct list_head block_groups;
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
spinlock_t lock;
struct rw_semaphore groups_sem;
atomic_t caching_threads;
};
struct btrfs_block_rsv {
u64 size;
u64 reserved;
u64 freed[2];
struct btrfs_space_info *space_info;
struct list_head list;
spinlock_t lock;
atomic_t usage;
unsigned int priority:8;
unsigned int durable:1;
unsigned int refill_used:1;
unsigned int full:1;
};
/*
* free clusters are used to claim free space in relatively large chunks,
* allowing us to do less seeky writes. They are used for all metadata
......@@ -760,6 +766,7 @@ struct btrfs_block_group_cache {
spinlock_t lock;
u64 pinned;
u64 reserved;
u64 reserved_pinned;
u64 bytes_super;
u64 flags;
u64 sectorsize;
......@@ -825,6 +832,22 @@ struct btrfs_fs_info {
/* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree;
/* block reservation for extent, checksum and root tree */
struct btrfs_block_rsv global_block_rsv;
/* block reservation for delay allocation */
struct btrfs_block_rsv delalloc_block_rsv;
/* block reservation for metadata operations */
struct btrfs_block_rsv trans_block_rsv;
/* block reservation for chunk tree */
struct btrfs_block_rsv chunk_block_rsv;
struct btrfs_block_rsv empty_block_rsv;
/* list of block reservations that cross multiple transactions */
struct list_head durable_block_rsv_list;
struct mutex durable_block_rsv_mutex;
u64 generation;
u64 last_trans_committed;
......@@ -927,7 +950,6 @@ struct btrfs_fs_info {
struct btrfs_workers endio_meta_write_workers;
struct btrfs_workers endio_write_workers;
struct btrfs_workers submit_workers;
struct btrfs_workers enospc_workers;
/*
* fixup workers take dirty pages that didn't properly go through
* the cow mechanism and make them safe to write. It happens
......@@ -943,6 +965,7 @@ struct btrfs_fs_info {
int do_barriers;
int closing;
int log_root_recovering;
int enospc_unlink;
u64 total_pinned;
......@@ -1012,6 +1035,9 @@ struct btrfs_root {
struct completion kobj_unregister;
struct mutex objectid_mutex;
spinlock_t accounting_lock;
struct btrfs_block_rsv *block_rsv;
struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
......@@ -1043,7 +1069,6 @@ struct btrfs_root {
int ref_cows;
int track_dirty;
int in_radix;
int clean_orphans;
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
......@@ -1057,8 +1082,11 @@ struct btrfs_root {
struct list_head root_list;
spinlock_t list_lock;
spinlock_t orphan_lock;
struct list_head orphan_list;
struct btrfs_block_rsv *orphan_block_rsv;
int orphan_item_inserted;
int orphan_cleanup_state;
spinlock_t inode_lock;
/* red-black tree that keeps track of in-memory inodes */
......@@ -1965,6 +1993,9 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags);
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num, int reserved);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
......@@ -1984,10 +2015,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size);
int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
u64 parent, u64 root_objectid, int level);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
......@@ -2041,27 +2072,49 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 size);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
struct btrfs_block_group_cache *group);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items);
int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items);
int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items);
int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items);
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes);
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items, int *retries);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode);
void btrfs_orphan_release_metadata(struct inode *inode);
int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int *retries);
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved, int min_factor);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes);
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
......@@ -2152,7 +2205,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
int btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int update_ref);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
......@@ -2245,6 +2299,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, u64 *index);
struct btrfs_inode_ref *
btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, int mod);
int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid);
......@@ -2257,6 +2317,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, u64 len);
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst);
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 logical_offset, u32 *dst);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
......@@ -2311,6 +2373,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_writepages(struct address_space *mapping,
......@@ -2349,10 +2412,20 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
void btrfs_orphan_cleanup(struct btrfs_root *root);
void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t size);
int btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_root *root);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
......@@ -2409,4 +2482,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *cow);
void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
#endif
......@@ -318,107 +318,6 @@ int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
return ret;
}
/*
* helper function to lookup reference count and flags of extent.
*
* the head node for delayed ref is used to store the sum of all the
* reference count modifications queued up in the rbtree. the head
* node may also store the extent flags to set. This way you can check
* to see what the reference count and extent flags would be if all of
* the delayed refs are not processed.
*/
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags)
{
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_path *path;
struct btrfs_extent_item *ei;
struct extent_buffer *leaf;
struct btrfs_key key;
u32 item_size;
u64 num_refs;
u64 extent_flags;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
delayed_refs = &trans->transaction->delayed_refs;
again:
ret = btrfs_search_slot(trans, root->fs_info->extent_root,
&key, path, 0, 0);
if (ret < 0)
goto out;
if (ret == 0) {
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
if (item_size >= sizeof(*ei)) {
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item);
num_refs = btrfs_extent_refs(leaf, ei);
extent_flags = btrfs_extent_flags(leaf, ei);
} else {
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
struct btrfs_extent_item_v0 *ei0;
BUG_ON(item_size != sizeof(*ei0));
ei0 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item_v0);
num_refs = btrfs_extent_refs_v0(leaf, ei0);
/* FIXME: this isn't correct for data */
extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
#else
BUG();
#endif
}
BUG_ON(num_refs == 0);
} else {
num_refs = 0;
extent_flags = 0;
ret = 0;
}
spin_lock(&delayed_refs->lock);
ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
if (ref) {
head = btrfs_delayed_node_to_head(ref);
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&ref->refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(root->fs_info->extent_root, path);
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(ref);
goto again;
}
if (head->extent_op && head->extent_op->update_flags)
extent_flags |= head->extent_op->flags_to_set;
else
BUG_ON(num_refs == 0);
num_refs += ref->ref_mod;
mutex_unlock(&head->mutex);
}
WARN_ON(num_refs == 0);
if (refs)
*refs = num_refs;
if (flags)
*flags = extent_flags;
out:
spin_unlock(&delayed_refs->lock);
btrfs_free_path(path);
return ret;
}
/*
* helper function to update an extent delayed ref in the
* rbtree. existing and update must both have the same
......
......@@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags);
int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 orig_parent,
u64 parent, u64 orig_ref_root, u64 ref_root,
......
......@@ -74,6 +74,11 @@ struct async_submit_bio {
int rw;
int mirror_num;
unsigned long bio_flags;
/*
* bio_offset is optional, can be used if the pages in the bio
* can't tell us where in the file the bio should go
*/
u64 bio_offset;
struct btrfs_work work;
};
......@@ -534,7 +539,8 @@ static void run_one_async_start(struct btrfs_work *work)
async = container_of(work, struct async_submit_bio, work);
fs_info = BTRFS_I(async->inode)->root->fs_info;
async->submit_bio_start(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags);
async->mirror_num, async->bio_flags,
async->bio_offset);
}
static void run_one_async_done(struct btrfs_work *work)
......@@ -556,7 +562,8 @@ static void run_one_async_done(struct btrfs_work *work)
wake_up(&fs_info->async_submit_wait);
async->submit_bio_done(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags);
async->mirror_num, async->bio_flags,
async->bio_offset);
}
static void run_one_async_free(struct btrfs_work *work)
......@@ -570,6 +577,7 @@ static void run_one_async_free(struct btrfs_work *work)
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
int rw, struct bio *bio, int mirror_num,
unsigned long bio_flags,
u64 bio_offset,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done)
{
......@@ -592,6 +600,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->work.flags = 0;
async->bio_flags = bio_flags;
async->bio_offset = bio_offset;
atomic_inc(&fs_info->nr_async_submits);
......@@ -627,7 +636,8 @@ static int btree_csum_one_bio(struct bio *bio)
static int __btree_submit_bio_start(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
unsigned long bio_flags)
unsigned long bio_flags,
u64 bio_offset)
{
/*
* when we're called for a write, we're already in the async
......@@ -638,7 +648,8 @@ static int __btree_submit_bio_start(struct inode *inode, int rw,
}
static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
/*
* when we're called for a write, we're already in the async
......@@ -648,7 +659,8 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
}
static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
int ret;
......@@ -671,6 +683,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
*/
return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
inode, rw, bio, mirror_num, 0,
bio_offset,
__btree_submit_bio_start,
__btree_submit_bio_done);
}
......@@ -894,7 +907,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->ref_cows = 0;
root->track_dirty = 0;
root->in_radix = 0;
root->clean_orphans = 0;
root->orphan_item_inserted = 0;
root->orphan_cleanup_state = 0;
root->fs_info = fs_info;
root->objectid = objectid;
......@@ -903,13 +917,16 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->name = NULL;
root->in_sysfs = 0;
root->inode_tree = RB_ROOT;
root->block_rsv = NULL;
root->orphan_block_rsv = NULL;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->orphan_list);
INIT_LIST_HEAD(&root->root_list);
spin_lock_init(&root->node_lock);
spin_lock_init(&root->list_lock);
spin_lock_init(&root->orphan_lock);
spin_lock_init(&root->inode_lock);
spin_lock_init(&root->accounting_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait);
......@@ -968,42 +985,6 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
return 0;
}
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct extent_buffer *eb;
struct btrfs_root *log_root_tree = fs_info->log_root_tree;
u64 start = 0;
u64 end = 0;
int ret;
if (!log_root_tree)
return 0;
while (1) {
ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
if (ret)
break;
clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
}
eb = fs_info->log_root_tree->node;
WARN_ON(btrfs_header_level(eb) != 0);
WARN_ON(btrfs_header_nritems(eb) != 0);
ret = btrfs_free_reserved_extent(fs_info->tree_root,
eb->start, eb->len);
BUG_ON(ret);
free_extent_buffer(eb);
kfree(fs_info->log_root_tree);
fs_info->log_root_tree = NULL;
return 0;
}
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
......@@ -1191,19 +1172,23 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
if (root)
return root;
ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
if (ret == 0)
ret = -ENOENT;
if (ret < 0)
return ERR_PTR(ret);
root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
if (IS_ERR(root))
return root;
WARN_ON(btrfs_root_refs(&root->root_item) == 0);
set_anon_super(&root->anon_super, NULL);
if (btrfs_root_refs(&root->root_item) == 0) {
ret = -ENOENT;
goto fail;
}
ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
if (ret < 0)
goto fail;
if (ret == 0)
root->orphan_item_inserted = 1;
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
if (ret)
goto fail;
......@@ -1212,10 +1197,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
ret = radix_tree_insert(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
root);
if (ret == 0) {
if (ret == 0)
root->in_radix = 1;
root->clean_orphans = 1;
}
spin_unlock(&fs_info->fs_roots_radix_lock);
radix_tree_preload_end();
if (ret) {
......@@ -1461,10 +1445,6 @@ static int cleaner_kthread(void *arg)
struct btrfs_root *root = arg;
do {
smp_mb();
if (root->fs_info->closing)
break;
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
......@@ -1477,11 +1457,9 @@ static int cleaner_kthread(void *arg)
if (freezing(current)) {
refrigerator();
} else {
smp_mb();
if (root->fs_info->closing)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule();
if (!kthread_should_stop())
schedule();
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());
......@@ -1493,36 +1471,40 @@ static int transaction_kthread(void *arg)
struct btrfs_root *root = arg;
struct btrfs_trans_handle *trans;
struct btrfs_transaction *cur;
u64 transid;
unsigned long now;
unsigned long delay;
int ret;
do {
smp_mb();
if (root->fs_info->closing)
break;
delay = HZ * 30;
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
mutex_lock(&root->fs_info->trans_mutex);
spin_lock(&root->fs_info->new_trans_lock);
cur = root->fs_info->running_transaction;
if (!cur) {
mutex_unlock(&root->fs_info->trans_mutex);
spin_unlock(&root->fs_info->new_trans_lock);
goto sleep;
}
now = get_seconds();
if (now < cur->start_time || now - cur->start_time < 30) {
mutex_unlock(&root->fs_info->trans_mutex);
if (!cur->blocked &&
(now < cur->start_time || now - cur->start_time < 30)) {
spin_unlock(&root->fs_info->new_trans_lock);
delay = HZ * 5;
goto sleep;
}
mutex_unlock(&root->fs_info->trans_mutex);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
transid = cur->transid;
spin_unlock(&root->fs_info->new_trans_lock);
trans = btrfs_join_transaction(root, 1);
if (transid == trans->transid) {
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
} else {
btrfs_end_transaction(trans, root);
}
sleep:
wake_up_process(root->fs_info->cleaner_kthread);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
......@@ -1530,10 +1512,10 @@ static int transaction_kthread(void *arg)
if (freezing(current)) {
refrigerator();
} else {
if (root->fs_info->closing)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(delay);
if (!kthread_should_stop() &&
!btrfs_transaction_blocked(root->fs_info))
schedule_timeout(delay);
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());
......@@ -1620,6 +1602,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv);
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
btrfs_init_block_rsv(&fs_info->trans_block_rsv);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
btrfs_init_block_rsv(&fs_info->empty_block_rsv);
INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
mutex_init(&fs_info->durable_block_rsv_mutex);
atomic_set(&fs_info->nr_async_submits, 0);
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
......@@ -1759,9 +1748,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
min_t(u64, fs_devices->num_devices,
fs_info->thread_pool_size),
&fs_info->generic_worker);
btrfs_init_workers(&fs_info->enospc_workers, "enospc",
fs_info->thread_pool_size,
&fs_info->generic_worker);
/* a higher idle thresh on the submit workers makes it much more
* likely that bios will be send down in a sane order to the
......@@ -1809,7 +1795,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_meta_workers, 1);
btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
btrfs_start_workers(&fs_info->endio_write_workers, 1);
btrfs_start_workers(&fs_info->enospc_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
......@@ -1912,17 +1897,18 @@ struct btrfs_root *open_ctree(struct super_block *sb,
csum_root->track_dirty = 1;
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
ret = btrfs_read_block_groups(extent_root);
if (ret) {
printk(KERN_ERR "Failed to read block groups: %d\n", ret);
goto fail_block_groups;
}
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread))
......@@ -1977,6 +1963,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
BUG_ON(ret);
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_cleanup_fs_roots(fs_info);
BUG_ON(ret);
ret = btrfs_recover_relocation(tree_root);
if (ret < 0) {
printk(KERN_WARNING
......@@ -2040,7 +2029,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->enospc_workers);
fail_iput:
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
iput(fs_info->btree_inode);
......@@ -2405,11 +2393,11 @@ int btrfs_commit_super(struct btrfs_root *root)
down_write(&root->fs_info->cleanup_work_sem);
up_write(&root->fs_info->cleanup_work_sem);
trans = btrfs_start_transaction(root, 1);
trans = btrfs_join_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
/* run commit again to drop the original snapshot */
trans = btrfs_start_transaction(root, 1);
trans = btrfs_join_transaction(root, 1);
btrfs_commit_transaction(trans, root);
ret = btrfs_write_and_wait_transaction(NULL, root);
BUG_ON(ret);
......@@ -2426,15 +2414,15 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
kthread_stop(root->fs_info->transaction_kthread);
kthread_stop(root->fs_info->cleaner_kthread);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
kthread_stop(root->fs_info->transaction_kthread);
kthread_stop(root->fs_info->cleaner_kthread);
fs_info->closing = 2;
smp_mb();
......@@ -2473,7 +2461,6 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->enospc_workers);
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
......
......@@ -87,7 +87,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata);
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
int rw, struct bio *bio, int mirror_num,
unsigned long bio_flags,
unsigned long bio_flags, u64 bio_offset,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done);
......@@ -95,8 +95,6 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
int btrfs_write_tree_block(struct extent_buffer *buf);
int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
......
此差异已折叠。
......@@ -135,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
return state;
}
static void free_extent_state(struct extent_state *state)
void free_extent_state(struct extent_state *state)
{
if (!state)
return;
......@@ -335,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree,
}
static int set_state_cb(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long bits)
struct extent_state *state, int *bits)
{
if (tree->ops && tree->ops->set_bit_hook) {
return tree->ops->set_bit_hook(tree->mapping->host,
state->start, state->end,
state->state, bits);
state, bits);
}
return 0;
}
static void clear_state_cb(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long bits)
struct extent_state *state, int *bits)
{
if (tree->ops && tree->ops->clear_bit_hook)
tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
......@@ -367,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree,
*/
static int insert_state(struct extent_io_tree *tree,
struct extent_state *state, u64 start, u64 end,
int bits)
int *bits)
{
struct rb_node *node;
int bits_to_set = *bits & ~EXTENT_CTLBITS;
int ret;
if (end < start) {
......@@ -384,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree,
if (ret)
return ret;
if (bits & EXTENT_DIRTY)
if (bits_to_set & EXTENT_DIRTY)
tree->dirty_bytes += end - start + 1;
state->state |= bits;
state->state |= bits_to_set;
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
......@@ -456,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
* struct is freed and removed from the tree
*/
static int clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state, int bits, int wake,
int delete)
struct extent_state *state,
int *bits, int wake)
{
int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING;
int bits_to_clear = *bits & ~EXTENT_CTLBITS;
int ret = state->state & bits_to_clear;
if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
WARN_ON(range > tree->dirty_bytes);
tree->dirty_bytes -= range;
......@@ -471,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree,
state->state &= ~bits_to_clear;
if (wake)
wake_up(&state->wq);
if (delete || state->state == 0) {
if (state->state == 0) {
if (state->tree) {
clear_state_cb(tree, state, state->state);
rb_erase(&state->rb_node, &tree->state);
state->tree = NULL;
free_extent_state(state);
......@@ -514,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int set = 0;
int clear = 0;
if (delete)
bits |= ~EXTENT_CTLBITS;
bits |= EXTENT_FIRST_DELALLOC;
if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
clear = 1;
again:
......@@ -580,8 +581,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (err)
goto out;
if (state->end <= end) {
set |= clear_state_bit(tree, state, bits, wake,
delete);
set |= clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
......@@ -602,7 +602,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (wake)
wake_up(&state->wq);
set |= clear_state_bit(tree, prealloc, bits, wake, delete);
set |= clear_state_bit(tree, prealloc, &bits, wake);
prealloc = NULL;
goto out;
......@@ -613,7 +613,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
else
next_node = NULL;
set |= clear_state_bit(tree, state, bits, wake, delete);
set |= clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
......@@ -706,19 +706,19 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
static int set_state_bits(struct extent_io_tree *tree,
struct extent_state *state,
int bits)
int *bits)
{
int ret;
int bits_to_set = *bits & ~EXTENT_CTLBITS;
ret = set_state_cb(tree, state, bits);
if (ret)
return ret;
if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes += range;
}
state->state |= bits;
state->state |= bits_to_set;
return 0;
}
......@@ -745,10 +745,9 @@ static void cache_state(struct extent_state *state,
* [start, end] is inclusive This takes the tree lock.
*/
static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int exclusive_bits, u64 *failed_start,
struct extent_state **cached_state,
gfp_t mask)
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int exclusive_bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
......@@ -757,6 +756,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
bits |= EXTENT_FIRST_DELALLOC;
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
......@@ -778,7 +778,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
*/
node = tree_search(tree, start);
if (!node) {
err = insert_state(tree, prealloc, start, end, bits);
err = insert_state(tree, prealloc, start, end, &bits);
prealloc = NULL;
BUG_ON(err == -EEXIST);
goto out;
......@@ -802,7 +802,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
goto out;
}
err = set_state_bits(tree, state, bits);
err = set_state_bits(tree, state, &bits);
if (err)
goto out;
......@@ -852,7 +852,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (err)
goto out;
if (state->end <= end) {
err = set_state_bits(tree, state, bits);
err = set_state_bits(tree, state, &bits);
if (err)
goto out;
cache_state(state, cached_state);
......@@ -877,7 +877,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
else
this_end = last_start - 1;
err = insert_state(tree, prealloc, start, this_end,
bits);
&bits);
BUG_ON(err == -EEXIST);
if (err) {
prealloc = NULL;
......@@ -903,7 +903,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
err = set_state_bits(tree, prealloc, bits);
err = set_state_bits(tree, prealloc, &bits);
if (err) {
prealloc = NULL;
goto out;
......@@ -966,8 +966,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
{
return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0,
NULL, mask);
EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
}
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
......@@ -1435,9 +1434,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
if (op & EXTENT_CLEAR_DELALLOC)
clear_bits |= EXTENT_DELALLOC;
if (op & EXTENT_CLEAR_ACCOUNTING)
clear_bits |= EXTENT_DO_ACCOUNTING;
clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
......@@ -1916,7 +1912,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
if (tree->ops && tree->ops->submit_bio_hook)
tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
mirror_num, bio_flags);
mirror_num, bio_flags, start);
else
submit_bio(rw, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
......@@ -2020,6 +2016,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
struct btrfs_ordered_extent *ordered;
int ret;
int nr = 0;
size_t page_offset = 0;
......@@ -2031,7 +2028,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
set_page_extent_mapped(page);
end = page_end;
lock_extent(tree, start, end, GFP_NOFS);
while (1) {
lock_extent(tree, start, end, GFP_NOFS);
ordered = btrfs_lookup_ordered_extent(inode, start);
if (!ordered)
break;
unlock_extent(tree, start, end, GFP_NOFS);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
char *userpage;
......
......@@ -16,7 +16,9 @@
#define EXTENT_BOUNDARY (1 << 9)
#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_DO_ACCOUNTING (1 << 11)
#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
/* flags for bio submission */
#define EXTENT_BIO_COMPRESSED 1
......@@ -47,7 +49,7 @@ struct extent_state;
typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
unsigned long bio_flags);
unsigned long bio_flags, u64 bio_offset);
struct extent_io_ops {
int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started,
......@@ -69,10 +71,10 @@ struct extent_io_ops {
struct extent_state *state);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
unsigned long old, unsigned long bits);
int (*set_bit_hook)(struct inode *inode, struct extent_state *state,
int *bits);
int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned long bits);
int *bits);
int (*merge_extent_hook)(struct inode *inode,
struct extent_state *new,
struct extent_state *other);
......@@ -176,6 +178,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
u64 max_bytes, unsigned long bits);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int filled, struct extent_state *cached_state);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
......@@ -185,6 +188,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int exclusive_bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
......
......@@ -149,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
}
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst)
static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
struct inode *inode, struct bio *bio,
u64 logical_offset, u32 *dst, int dio)
{
u32 sum;
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
u64 offset;
u64 offset = 0;
u64 item_start_offset = 0;
u64 item_last_offset = 0;
u64 disk_bytenr;
......@@ -174,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
WARN_ON(bio->bi_vcnt <= 0);
disk_bytenr = (u64)bio->bi_sector << 9;
if (dio)
offset = logical_offset;
while (bio_index < bio->bi_vcnt) {
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
if (!dio)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
if (ret == 0)
goto found;
......@@ -238,6 +242,7 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
else
set_state_private(io_tree, offset, sum);
disk_bytenr += bvec->bv_len;
offset += bvec->bv_len;
bio_index++;
bvec++;
}
......@@ -245,6 +250,18 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
return 0;
}
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst)
{
return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0);
}
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 offset, u32 *dst)
{
return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1);
}
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list)
{
......@@ -657,6 +674,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
goto found;
}
ret = PTR_ERR(item);
if (ret != -EFBIG && ret != -ENOENT)
goto fail_unlock;
if (ret == -EFBIG) {
u32 item_size;
/* we found one, but it isn't big enough yet */
......
......@@ -46,32 +46,42 @@
static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
int write_bytes,
struct page **prepared_pages,
const char __user *buf)
struct iov_iter *i)
{
long page_fault = 0;
int i;
size_t copied;
int pg = 0;
int offset = pos & (PAGE_CACHE_SIZE - 1);
for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
while (write_bytes > 0) {
size_t count = min_t(size_t,
PAGE_CACHE_SIZE - offset, write_bytes);
struct page *page = prepared_pages[i];
fault_in_pages_readable(buf, count);
struct page *page = prepared_pages[pg];
again:
if (unlikely(iov_iter_fault_in_readable(i, count)))
return -EFAULT;
/* Copy data from userspace to the current page */
kmap(page);
page_fault = __copy_from_user(page_address(page) + offset,
buf, count);
copied = iov_iter_copy_from_user(page, i, offset, count);
/* Flush processor's dcache for this page */
flush_dcache_page(page);
kunmap(page);
buf += count;
write_bytes -= count;
iov_iter_advance(i, copied);
write_bytes -= copied;
if (page_fault)
break;
if (unlikely(copied == 0)) {
count = min_t(size_t, PAGE_CACHE_SIZE - offset,
iov_iter_single_seg_count(i));
goto again;
}
if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
offset += copied;
} else {
pg++;
offset = 0;
}
}
return page_fault ? -EFAULT : 0;
return 0;
}
/*
......@@ -126,8 +136,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
end_of_last_block = start_pos + num_bytes - 1;
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
NULL);
if (err)
return err;
BUG_ON(err);
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
......@@ -142,7 +151,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
* at this time.
*/
}
return err;
return 0;
}
/*
......@@ -823,45 +832,46 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
return 0;
}
static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
loff_t pos;
struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page *pinned[2];
struct page **pages = NULL;
struct iov_iter i;
loff_t *ppos = &iocb->ki_pos;
loff_t start_pos;
ssize_t num_written = 0;
ssize_t err = 0;
size_t count;
size_t ocount;
int ret = 0;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
int nrptrs;
struct page *pinned[2];
unsigned long first_index;
unsigned long last_index;
int will_write;
int buffered = 0;
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
PAGE_CACHE_SIZE / (sizeof(struct page *)));
pinned[0] = NULL;
pinned[1] = NULL;
pos = *ppos;
start_pos = pos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* do the reserve before the mutex lock in case we have to do some
* flushing. We wouldn't deadlock, but this is more polite.
*/
err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
if (err)
goto out_nolock;
mutex_lock(&inode->i_mutex);
err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
if (err)
goto out;
count = ocount;
current->backing_dev_info = inode->i_mapping->backing_dev_info;
err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
if (err)
......@@ -875,15 +885,53 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
goto out;
file_update_time(file);
BTRFS_I(inode)->sequence++;
if (unlikely(file->f_flags & O_DIRECT)) {
num_written = generic_file_direct_write(iocb, iov, &nr_segs,
pos, ppos, count,
ocount);
/*
* the generic O_DIRECT will update in-memory i_size after the
* DIOs are done. But our endio handlers that update the on
* disk i_size never update past the in memory i_size. So we
* need one more update here to catch any additions to the
* file
*/
if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
mark_inode_dirty(inode);
}
if (num_written < 0) {
ret = num_written;
num_written = 0;
goto out;
} else if (num_written == count) {
/* pick up pos changes done by the generic code */
pos = *ppos;
goto out;
}
/*
* We are going to do buffered for the rest of the range, so we
* need to make sure to invalidate the buffered pages when we're
* done.
*/
buffered = 1;
pos += num_written;
}
iov_iter_init(&i, iov, nr_segs, count, num_written);
nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
(sizeof(struct page *)));
pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
/* generic_write_checks can change our pos */
start_pos = pos;
BTRFS_I(inode)->sequence++;
first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + count) >> PAGE_CACHE_SHIFT;
last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
/*
* there are lots of better ways to do this, but this code
......@@ -900,7 +948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
unlock_page(pinned[0]);
}
}
if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
pinned[1] = grab_cache_page(inode->i_mapping, last_index);
if (!PageUptodate(pinned[1])) {
ret = btrfs_readpage(NULL, pinned[1]);
......@@ -911,10 +959,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
}
}
while (count > 0) {
while (iov_iter_count(&i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
size_t write_bytes = min(count, nrptrs *
(size_t)PAGE_CACHE_SIZE -
size_t write_bytes = min(iov_iter_count(&i),
nrptrs * (size_t)PAGE_CACHE_SIZE -
offset);
size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
......@@ -922,7 +970,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);
ret = btrfs_check_data_free_space(root, inode, write_bytes);
ret = btrfs_delalloc_reserve_space(inode, write_bytes);
if (ret)
goto out;
......@@ -930,26 +978,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
pos, first_index, last_index,
write_bytes);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
ret = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, buf);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_drop_pages(pages, num_pages);
goto out;
write_bytes, pages, &i);
if (ret == 0) {
dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
}
ret = dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
btrfs_drop_pages(pages, num_pages);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
......@@ -965,8 +1007,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
btrfs_throttle(root);
}
buf += write_bytes;
count -= write_bytes;
pos += write_bytes;
num_written += write_bytes;
......@@ -976,9 +1016,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
mutex_unlock(&inode->i_mutex);
if (ret)
err = ret;
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
out_nolock:
kfree(pages);
if (pinned[0])
page_cache_release(pinned[0]);
......@@ -1008,7 +1046,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
num_written = err;
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_transaction(root, 0);
ret = btrfs_log_dentry_safe(trans, root,
file->f_dentry);
if (ret == 0) {
......@@ -1023,7 +1061,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
btrfs_end_transaction(trans, root);
}
}
if (file->f_flags & O_DIRECT) {
if (file->f_flags & O_DIRECT && buffered) {
invalidate_mapping_pages(inode->i_mapping,
start_pos >> PAGE_CACHE_SHIFT,
(start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
......@@ -1104,9 +1142,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
if (file && file->private_data)
btrfs_ioctl_trans_end(file);
trans = btrfs_start_transaction(root, 1);
if (!trans) {
ret = -ENOMEM;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
......@@ -1161,7 +1199,7 @@ const struct file_operations btrfs_file_operations = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.splice_read = generic_file_splice_read,
.write = btrfs_file_write,
.aio_write = btrfs_file_aio_write,
.mmap = btrfs_file_mmap,
.open = generic_file_open,
.release = btrfs_release_file,
......
......@@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
return 0;
}
struct btrfs_inode_ref *
btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, int mod)
{
struct btrfs_key key;
struct btrfs_inode_ref *ref;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
int ret;
key.objectid = inode_objectid;
key.type = BTRFS_INODE_REF_KEY;
key.offset = ref_objectid;
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
if (ret > 0)
return NULL;
if (!find_name_in_backref(path, name, name_len, &ref))
return NULL;
return ref;
}
int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
......
此差异已折叠。
此差异已折叠。
......@@ -124,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
return 1;
}
static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
u64 len)
{
if (file_offset + len <= entry->file_offset ||
entry->file_offset + entry->len <= file_offset)
return 0;
return 1;
}
/*
* look find the first ordered struct that has this offset, otherwise
* the first one less than this offset
......@@ -161,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
* The tree is given a single reference on the ordered extent that was
* inserted.
*/
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len,
int type, int dio)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
......@@ -182,6 +192,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
if (dio)
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
/* one ref for the tree */
atomic_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
......@@ -203,6 +216,20 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
return 0;
}
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
disk_len, type, 0);
}
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
disk_len, type, 1);
}
/*
* Add a struct btrfs_ordered_sum into the list of checksums to be inserted
* when an ordered extent is finished. If the list covers more than one
......@@ -311,13 +338,6 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
spin_lock(&BTRFS_I(inode)->accounting_lock);
WARN_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->accounting_lock);
btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
inode, 1);
spin_lock(&root->fs_info->ordered_extent_lock);
list_del_init(&entry->root_extent_list);
......@@ -491,7 +511,8 @@ void btrfs_start_ordered_extent(struct inode *inode,
* start IO on any dirty ones so the wait doesn't stall waiting
* for pdflush to find them
*/
filemap_fdatawrite_range(inode->i_mapping, start, end);
if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
filemap_fdatawrite_range(inode->i_mapping, start, end);
if (wait) {
wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
&entry->flags));
......@@ -588,6 +609,47 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
return entry;
}
/* Since the DIO code tries to lock a wide area we need to look for any ordered
* extents that exist in the range, rather than just the start of the range.
*/
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
u64 file_offset,
u64 len)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
node = tree_search(tree, file_offset);
if (!node) {
node = tree_search(tree, file_offset + len);
if (!node)
goto out;
}
while (1) {
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (range_overlaps(entry, file_offset, len))
break;
if (entry->file_offset >= file_offset + len) {
entry = NULL;
break;
}
entry = NULL;
node = rb_next(node);
if (!node)
break;
}
out:
if (entry)
atomic_inc(&entry->refs);
spin_unlock(&tree->lock);
return entry;
}
/*
* lookup and return any extent before 'file_offset'. NULL is returned
* if none is found
......
......@@ -72,6 +72,8 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
......@@ -140,7 +142,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int tyep);
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_sum(struct inode *inode,
struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
......@@ -151,6 +155,9 @@ void btrfs_start_ordered_extent(struct inode *inode,
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
u64 file_offset,
u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
......
此差异已折叠。
......@@ -259,6 +259,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_key root_key;
struct btrfs_root *root;
int err = 0;
int ret;
......@@ -270,6 +272,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = 0;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
while (1) {
ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
if (ret < 0) {
......@@ -294,13 +299,25 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
key.type != BTRFS_ORPHAN_ITEM_KEY)
break;
ret = btrfs_find_dead_roots(tree_root, key.offset);
if (ret) {
root_key.objectid = key.offset;
key.offset++;
root = btrfs_read_fs_root_no_name(tree_root->fs_info,
&root_key);
if (!IS_ERR(root))
continue;
ret = PTR_ERR(root);
if (ret != -ENOENT) {
err = ret;
break;
}
key.offset++;
ret = btrfs_find_dead_roots(tree_root, root_key.objectid);
if (ret) {
err = ret;
break;
}
}
btrfs_free_path(path);
......
......@@ -498,7 +498,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
btrfs_start_delalloc_inodes(root, 0);
btrfs_wait_ordered_extents(root, 0, 0);
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_transaction(root, 0);
ret = btrfs_commit_transaction(trans, root);
return ret;
}
......@@ -694,11 +694,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
return -EINVAL;
/* recover relocation */
ret = btrfs_recover_relocation(root);
ret = btrfs_cleanup_fs_roots(root->fs_info);
WARN_ON(ret);
ret = btrfs_cleanup_fs_roots(root->fs_info);
/* recover relocation */
ret = btrfs_recover_relocation(root);
WARN_ON(ret);
sb->s_flags &= ~MS_RDONLY;
......@@ -714,34 +714,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct list_head *head = &root->fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
u64 data_used = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)root->fs_info->fsid;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & (BTRFS_BLOCK_GROUP_DUP|
BTRFS_BLOCK_GROUP_RAID10|
BTRFS_BLOCK_GROUP_RAID1)) {
total_used += found->bytes_used;
if (found->flags & BTRFS_BLOCK_GROUP_DATA)
data_used += found->bytes_used;
else
data_used += found->total_bytes;
}
total_used += found->bytes_used;
if (found->flags & BTRFS_BLOCK_GROUP_DATA)
data_used += found->bytes_used;
else
data_used += found->total_bytes;
}
list_for_each_entry_rcu(found, head, list)
total_used += found->disk_used;
rcu_read_unlock();
buf->f_namelen = BTRFS_NAME_LEN;
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
buf->f_bfree = buf->f_blocks - (total_used >> bits);
buf->f_bavail = buf->f_blocks - (data_used >> bits);
buf->f_bavail = buf->f_bfree;
buf->f_bsize = dentry->d_sb->s_blocksize;
buf->f_type = BTRFS_SUPER_MAGIC;
......
此差异已折叠。
此差异已折叠。
......@@ -117,13 +117,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
path->nodes[1], 0,
cache_only, &last_ret,
&root->defrag_progress);
WARN_ON(ret && ret != -EAGAIN);
if (ret) {
WARN_ON(ret == -EAGAIN);
goto out;
}
if (next_key_ret == 0) {
memcpy(&root->defrag_progress, &key, sizeof(key));
ret = -EAGAIN;
}
btrfs_release_path(root, path);
out:
if (path)
btrfs_free_path(path);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册