提交 9f2e3a53 编写于 作者: L Linus Torvalds

Merge tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "This time the majority of changes are cleanups, though there's still a
  number of changes of user interest.

  User visible changes:

   - better read time and write checks to catch errors early and before
     writing data to disk (to catch potential memory corruption on data
     that get checksummed)

   - qgroups + metadata relocation: last speed up patch int the series
     to address the slowness, there should be no overhead comparing
     balance with and without qgroups

   - FIEMAP ioctl does not start a transaction unnecessarily, this can
     result in a speed up and less blocking due to IO

   - LOGICAL_INO (v1, v2) does not start transaction unnecessarily, this
     can speed up the mentioned ioctl and scrub as well

   - fsync on files with many (but not too many) hardlinks is faster,
     finer decision if the links should be fsynced individually or
     completely

   - send tries harder to find ranges to clone

   - trim/discard will skip unallocated chunks that haven't been touched
     since the last mount

  Fixes:

   - send flushes delayed allocation before start, otherwise it could
     miss some changes in case of a very recent rw->ro switch of a
     subvolume

   - fix fallocate with qgroups that could lead to space accounting
     underflow, reported as a warning

   - trim/discard ioctl honours the requested range

   - starting send and dedupe on a subvolume at the same time will let
     only one of them succeed, this is to prevent changes that send
     could miss due to dedupe; both operations are restartable

  Core changes:

   - more tree-checker validations, errors reported by fuzzing tools:
      - device item
      - inode item
      - block group profiles

   - tracepoints for extent buffer locking

   - async cow preallocates memory to avoid errors happening too deep in
     the call chain

   - metadata reservations for delalloc reworked to better adapt in
     many-writers/low-space scenarios

   - improved space flushing logic for intense DIO vs buffered workloads

   - lots of cleanups
      - removed unused struct members
      - redundant argument removal
      - properties and xattrs
      - extent buffer locking
      - selftests
      - use common file type conversions
      - many-argument functions reduction"

* tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (227 commits)
  btrfs: Use kvmalloc for allocating compressed path context
  btrfs: Factor out common extent locking code in submit_compressed_extents
  btrfs: Set io_tree only once in submit_compressed_extents
  btrfs: Replace clear_extent_bit with unlock_extent
  btrfs: Make compress_file_range take only struct async_chunk
  btrfs: Remove fs_info from struct async_chunk
  btrfs: Rename async_cow to async_chunk
  btrfs: Preallocate chunks in cow_file_range_async
  btrfs: reserve delalloc metadata differently
  btrfs: track DIO bytes in flight
  btrfs: merge calls of btrfs_setxattr and btrfs_setxattr_trans in btrfs_set_prop
  btrfs: delete unused function btrfs_set_prop_trans
  btrfs: start transaction in xattr_handler_set_prop
  btrfs: drop local copy of inode i_mode
  btrfs: drop old_fsflags in btrfs_ioctl_setflags
  btrfs: modify local copy of btrfs_inode flags
  btrfs: drop useless inode i_flags copy and restore
  btrfs: start transaction in btrfs_ioctl_setflags()
  btrfs: export btrfs_set_prop
  btrfs: refactor btrfs_set_props to validate externally
  ...
......@@ -93,7 +93,11 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
goto out;
}
ret = btrfs_setxattr(trans, inode, name, value, size, 0);
if (trans)
ret = btrfs_setxattr(trans, inode, name, value, size, 0);
else
ret = btrfs_setxattr_trans(inode, name, value, size, 0);
out:
kfree(value);
......
......@@ -791,7 +791,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
count = node->ref_mod * -1;
break;
default:
BUG_ON(1);
BUG();
}
*total_refs += count;
switch (node->type) {
......@@ -1460,8 +1460,8 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
* callers (such as fiemap) which want to know whether the extent is
* shared but do not need a ref count.
*
* This attempts to allocate a transaction in order to account for
* delayed refs, but continues on even when the alloc fails.
* This attempts to attach to the running transaction in order to account for
* delayed refs, but continues on even when no running transaction exists.
*
* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
*/
......@@ -1484,13 +1484,16 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
tmp = ulist_alloc(GFP_NOFS);
roots = ulist_alloc(GFP_NOFS);
if (!tmp || !roots) {
ulist_free(tmp);
ulist_free(roots);
return -ENOMEM;
ret = -ENOMEM;
goto out;
}
trans = btrfs_join_transaction(root);
trans = btrfs_attach_transaction(root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
ret = PTR_ERR(trans);
goto out;
}
trans = NULL;
down_read(&fs_info->commit_root_sem);
} else {
......@@ -1523,6 +1526,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
} else {
up_read(&fs_info->commit_root_sem);
}
out:
ulist_free(tmp);
ulist_free(roots);
return ret;
......@@ -1747,7 +1751,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
else if (flags & BTRFS_EXTENT_FLAG_DATA)
*flags_ret = BTRFS_EXTENT_FLAG_DATA;
else
BUG_ON(1);
BUG();
return 0;
}
......@@ -1912,13 +1916,19 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
extent_item_objectid);
if (!search_commit_root) {
trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
trans = btrfs_attach_transaction(fs_info->extent_root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT &&
PTR_ERR(trans) != -EROFS)
return PTR_ERR(trans);
trans = NULL;
}
}
if (trans)
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
} else {
else
down_read(&fs_info->commit_root_sem);
}
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
tree_mod_seq_elem.seq, &refs,
......@@ -1951,7 +1961,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
free_leaf_list(refs);
out:
if (!search_commit_root) {
if (trans) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
btrfs_end_transaction(trans);
} else {
......
......@@ -147,12 +147,6 @@ struct btrfs_inode {
*/
u64 last_unlink_trans;
/*
* Track the transaction id of the last transaction used to create a
* hard link for the inode. This is used by the log tree (fsync).
*/
u64 last_link_trans;
/*
* Number of bytes outstanding that are going to need csums. This is
* used in ENOSPC accounting.
......@@ -203,8 +197,6 @@ struct btrfs_inode {
struct inode vfs_inode;
};
extern unsigned char btrfs_filetype_table[];
static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
{
return container_of(inode, struct btrfs_inode, vfs_inode);
......
......@@ -251,7 +251,7 @@ static void end_compressed_bio_write(struct bio *bio)
cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
cb->start, cb->start + cb->len - 1,
bio->bi_status ? BLK_STS_OK : BLK_STS_NOTSUPP);
bio->bi_status == BLK_STS_OK);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
......
此差异已折叠。
......@@ -41,6 +41,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
extern struct kmem_cache *btrfs_free_space_cachep;
struct btrfs_ordered_sum;
struct btrfs_ref;
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
......@@ -1015,6 +1016,7 @@ struct btrfs_fs_info {
/* used to keep from writing metadata until there is a nice batch */
struct percpu_counter dirty_metadata_bytes;
struct percpu_counter delalloc_bytes;
struct percpu_counter dio_bytes;
s32 dirty_metadata_batch;
s32 delalloc_batch;
......@@ -1092,10 +1094,7 @@ struct btrfs_fs_info {
/* holds configuration and tracking. Protected by qgroup_lock */
struct rb_root qgroup_tree;
struct rb_root qgroup_op_tree;
spinlock_t qgroup_lock;
spinlock_t qgroup_op_lock;
atomic_t qgroup_op_seq;
/*
* used to avoid frequently calling ulist_alloc()/ulist_free()
......@@ -1152,12 +1151,6 @@ struct btrfs_fs_info {
struct mutex unused_bg_unpin_mutex;
struct mutex delete_unused_bgs_mutex;
/*
* Chunks that can't be freed yet (under a trim/discard operation)
* and will be latter freed. Protected by fs_info->chunk_mutex.
*/
struct list_head pinned_chunks;
/* Cached block sizes */
u32 nodesize;
u32 sectorsize;
......@@ -1348,6 +1341,12 @@ struct btrfs_root {
* manipulation with the read-only status via SUBVOL_SETFLAGS
*/
int send_in_progress;
/*
* Number of currently running deduplication operations that have a
* destination inode belonging to this root. Protected by the lock
* root_item_lock.
*/
int dedupe_in_progress;
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshotted;
atomic_t snapshot_force_cow;
......@@ -1540,6 +1539,21 @@ do { \
#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
#define BTRFS_INODE_FLAG_MASK \
(BTRFS_INODE_NODATASUM | \
BTRFS_INODE_NODATACOW | \
BTRFS_INODE_READONLY | \
BTRFS_INODE_NOCOMPRESS | \
BTRFS_INODE_PREALLOC | \
BTRFS_INODE_SYNC | \
BTRFS_INODE_IMMUTABLE | \
BTRFS_INODE_APPEND | \
BTRFS_INODE_NODUMP | \
BTRFS_INODE_NOATIME | \
BTRFS_INODE_DIRSYNC | \
BTRFS_INODE_COMPRESS | \
BTRFS_INODE_ROOT_ITEM_INIT)
struct btrfs_map_token {
const struct extent_buffer *eb;
char *kaddr;
......@@ -2163,18 +2177,16 @@ static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag)
return (btrfs_header_flags(eb) & flag) == flag;
}
static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
static inline void btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
{
u64 flags = btrfs_header_flags(eb);
btrfs_set_header_flags(eb, flags | flag);
return (flags & flag) == flag;
}
static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
static inline void btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
{
u64 flags = btrfs_header_flags(eb);
btrfs_set_header_flags(eb, flags & ~flag);
return (flags & flag) == flag;
}
static inline int btrfs_header_backref_rev(const struct extent_buffer *eb)
......@@ -2445,13 +2457,12 @@ static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
* this returns the address of the start of the last item,
* which is the stop of the leaf data stack
*/
static inline unsigned int leaf_data_end(const struct btrfs_fs_info *fs_info,
const struct extent_buffer *leaf)
static inline unsigned int leaf_data_end(const struct extent_buffer *leaf)
{
u32 nr = btrfs_header_nritems(leaf);
if (nr == 0)
return BTRFS_LEAF_DATA_SIZE(fs_info);
return BTRFS_LEAF_DATA_SIZE(leaf->fs_info);
return btrfs_item_offset_nr(leaf, nr - 1);
}
......@@ -2698,8 +2709,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
unsigned long count);
int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
unsigned long count, u64 transid, int wait);
void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head);
......@@ -2711,8 +2720,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num, int reserved);
int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes);
int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
int btrfs_exclude_logged_extents(struct extent_buffer *eb);
int btrfs_cross_ref_exist(struct btrfs_root *root,
u64 objectid, u64 offset, u64 bytenr);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
......@@ -2745,13 +2753,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 flags,
int level, int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset);
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len, int delalloc);
......@@ -2760,15 +2764,11 @@ int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
struct btrfs_ref *generic_ref);
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_fs_info *info);
......@@ -2936,10 +2936,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid);
int btrfs_block_can_be_shared(struct btrfs_root *root,
struct extent_buffer *buf);
void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
u32 data_size);
void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u32 new_size, int from_end);
void btrfs_extend_item(struct btrfs_path *path, u32 data_size);
void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end);
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
......@@ -3015,8 +3013,7 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
return btrfs_next_old_item(root, p, 0);
}
int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf);
int btrfs_leaf_free_space(struct extent_buffer *leaf);
int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
int update_ref, int for_reloc);
......@@ -3756,8 +3753,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
struct btrfs_device *dev);
int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_scrub_progress *progress);
static inline void btrfs_init_full_stripe_locks_tree(
......@@ -3806,6 +3802,8 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
return signal_pending(current);
}
#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
/* Sanity test specific functions */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_inode_set_ops(struct inode *inode);
......
......@@ -691,7 +691,6 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_delayed_item *item)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_delayed_item *curr, *next;
int free_space;
int total_data_size = 0, total_size = 0;
......@@ -708,7 +707,7 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
BUG_ON(!path->nodes[0]);
leaf = path->nodes[0];
free_space = btrfs_leaf_free_space(fs_info, leaf);
free_space = btrfs_leaf_free_space(leaf);
INIT_LIST_HEAD(&head);
next = item;
......@@ -1692,7 +1691,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
name = (char *)(di + 1);
name_len = btrfs_stack_dir_name_len(di);
d_type = btrfs_filetype_table[di->type];
d_type = fs_ftype_to_dtype(di->type);
btrfs_disk_key_to_cpu(&location, &di->location);
over = !dir_emit(ctx, name, name_len,
......
......@@ -735,8 +735,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
* transaction commits.
*/
int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_ref *generic_ref,
struct btrfs_delayed_extent_op *extent_op,
int *old_ref_mod, int *new_ref_mod)
{
......@@ -746,10 +745,18 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
bool is_system;
int action = generic_ref->action;
int level = generic_ref->tree_ref.level;
int ret;
u64 bytenr = generic_ref->bytenr;
u64 num_bytes = generic_ref->len;
u64 parent = generic_ref->parent;
u8 ref_type;
is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID);
ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
......@@ -762,7 +769,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
}
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
is_fstree(ref_root)) {
is_fstree(generic_ref->real_root) &&
is_fstree(generic_ref->tree_ref.root) &&
!generic_ref->skip_qgroup) {
record = kzalloc(sizeof(*record), GFP_NOFS);
if (!record) {
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
......@@ -777,13 +786,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
ref_type = BTRFS_TREE_BLOCK_REF_KEY;
init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
ref_root, action, ref_type);
ref->root = ref_root;
generic_ref->tree_ref.root, action, ref_type);
ref->root = generic_ref->tree_ref.root;
ref->parent = parent;
ref->level = level;
init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
ref_root, 0, action, false, is_system);
generic_ref->tree_ref.root, 0, action, false,
is_system);
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
......@@ -822,10 +832,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
* add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
*/
int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, u64 reserved, int action,
int *old_ref_mod, int *new_ref_mod)
struct btrfs_ref *generic_ref,
u64 reserved, int *old_ref_mod,
int *new_ref_mod)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_data_ref *ref;
......@@ -833,9 +842,17 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
int action = generic_ref->action;
int ret;
u64 bytenr = generic_ref->bytenr;
u64 num_bytes = generic_ref->len;
u64 parent = generic_ref->parent;
u64 ref_root = generic_ref->data_ref.ref_root;
u64 owner = generic_ref->data_ref.ino;
u64 offset = generic_ref->data_ref.offset;
u8 ref_type;
ASSERT(generic_ref->type == BTRFS_REF_DATA && action);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
......@@ -859,7 +876,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
}
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
is_fstree(ref_root)) {
is_fstree(ref_root) &&
is_fstree(generic_ref->real_root) &&
!generic_ref->skip_qgroup) {
record = kzalloc(sizeof(*record), GFP_NOFS);
if (!record) {
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
......@@ -905,8 +924,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
return 0;
}
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op)
{
......
......@@ -176,6 +176,83 @@ struct btrfs_delayed_ref_root {
u64 qgroup_to_skip;
};
enum btrfs_ref_type {
BTRFS_REF_NOT_SET,
BTRFS_REF_DATA,
BTRFS_REF_METADATA,
BTRFS_REF_LAST,
};
struct btrfs_data_ref {
/* For EXTENT_DATA_REF */
/* Root which refers to this data extent */
u64 ref_root;
/* Inode which refers to this data extent */
u64 ino;
/*
* file_offset - extent_offset
*
* file_offset is the key.offset of the EXTENT_DATA key.
* extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data.
*/
u64 offset;
};
struct btrfs_tree_ref {
/*
* Level of this tree block
*
* Shared for skinny (TREE_BLOCK_REF) and normal tree ref.
*/
int level;
/*
* Root which refers to this tree block.
*
* For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
*/
u64 root;
/* For non-skinny metadata, no special member needed */
};
struct btrfs_ref {
enum btrfs_ref_type type;
int action;
/*
* Whether this extent should go through qgroup record.
*
* Normally false, but for certain cases like delayed subtree scan,
* setting this flag can hugely reduce qgroup overhead.
*/
bool skip_qgroup;
/*
* Optional. For which root is this modification.
* Mostly used for qgroup optimization.
*
* When unset, data/tree ref init code will populate it.
* In certain cases, we're modifying reference for a different root.
* E.g. COW fs tree blocks for balance.
* In that case, tree_ref::root will be fs tree, but we're doing this
* for reloc tree, then we should set @real_root to reloc tree.
*/
u64 real_root;
u64 bytenr;
u64 len;
/* Bytenr of the parent tree block */
u64 parent;
union {
struct btrfs_data_ref data_ref;
struct btrfs_tree_ref tree_ref;
};
};
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
......@@ -184,6 +261,38 @@ extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
int __init btrfs_delayed_ref_init(void);
void __cold btrfs_delayed_ref_exit(void);
static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
int action, u64 bytenr, u64 len, u64 parent)
{
generic_ref->action = action;
generic_ref->bytenr = bytenr;
generic_ref->len = len;
generic_ref->parent = parent;
}
static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
int level, u64 root)
{
/* If @real_root not set, use @root as fallback */
if (!generic_ref->real_root)
generic_ref->real_root = root;
generic_ref->tree_ref.level = level;
generic_ref->tree_ref.root = root;
generic_ref->type = BTRFS_REF_METADATA;
}
static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
u64 ref_root, u64 ino, u64 offset)
{
/* If @real_root not set, use @root as fallback */
if (!generic_ref->real_root)
generic_ref->real_root = ref_root;
generic_ref->data_ref.ref_root = ref_root;
generic_ref->data_ref.ino = ino;
generic_ref->data_ref.offset = offset;
generic_ref->type = BTRFS_REF_DATA;
}
static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)
{
......@@ -224,17 +333,14 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea
}
int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_ref *generic_ref,
struct btrfs_delayed_extent_op *extent_op,
int *old_ref_mod, int *new_ref_mod);
int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, u64 reserved, int action,
int *old_ref_mod, int *new_ref_mod);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_ref *generic_ref,
u64 reserved, int *old_ref_mod,
int *new_ref_mod);
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
......
......@@ -273,9 +273,9 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
* called from commit_transaction. Writes changed device replace state to
* disk.
*/
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_path *path;
......@@ -662,7 +662,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_device_set_disk_total_bytes(tgt_device,
src_device->disk_total_bytes);
btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
ASSERT(list_empty(&src_device->resized_list));
ASSERT(list_empty(&src_device->post_commit_list));
tgt_device->commit_total_bytes = src_device->commit_total_bytes;
tgt_device->commit_bytes_used = src_device->bytes_used;
......@@ -696,7 +696,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* replace the sysfs entry */
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
btrfs_rm_dev_replace_free_srcdev(fs_info, src_device);
btrfs_rm_dev_replace_free_srcdev(src_device);
/* write back the superblocks */
trans = btrfs_start_transaction(root, 0);
......
......@@ -9,8 +9,7 @@
struct btrfs_ioctl_dev_replace_args;
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans);
int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
......
......@@ -36,7 +36,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
if (di)
return ERR_PTR(-EEXIST);
btrfs_extend_item(fs_info, path, data_size);
btrfs_extend_item(path, data_size);
} else if (ret < 0)
return ERR_PTR(ret);
WARN_ON(ret > 0);
......@@ -429,8 +429,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_len - (ptr + sub_item_len - start));
btrfs_truncate_item(root->fs_info, path,
item_len - sub_item_len, 1);
btrfs_truncate_item(path, item_len - sub_item_len, 1);
}
return ret;
}
......@@ -260,15 +260,12 @@ void btrfs_csum_final(u32 crc, u8 *result)
}
/*
* compute the csum for a btree block, and either verify it or write it
* into the csum field of the block.
* Compute the csum of a btree block and store the result to provided buffer.
*
* Returns error if the extent buffer cannot be mapped.
*/
static int csum_tree_block(struct btrfs_fs_info *fs_info,
struct extent_buffer *buf,
int verify)
static int csum_tree_block(struct extent_buffer *buf, u8 *result)
{
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
char result[BTRFS_CSUM_SIZE];
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
......@@ -288,7 +285,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
*/
err = map_private_extent_buffer(buf, offset, 32,
&kaddr, &map_start, &map_len);
if (err)
if (WARN_ON(err))
return err;
cur_len = min(len, map_len - (offset - map_start));
crc = btrfs_csum_data(kaddr + offset - map_start,
......@@ -300,23 +297,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
btrfs_csum_final(crc, result);
if (verify) {
if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
u32 val;
u32 found = 0;
memcpy(&found, result, csum_size);
read_extent_buffer(buf, &val, 0, csum_size);
btrfs_warn_rl(fs_info,
"%s checksum verify failed on %llu wanted %X found %X level %d",
fs_info->sb->s_id, buf->start,
val, found, btrfs_header_level(buf));
return -EUCLEAN;
}
} else {
write_extent_buffer(buf, result, 0, csum_size);
}
return 0;
}
......@@ -414,22 +394,21 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
return ret;
}
static int verify_level_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid)
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
int found_level;
struct btrfs_key found_key;
int ret;
found_level = btrfs_header_level(eb);
if (found_level != level) {
#ifdef CONFIG_BTRFS_DEBUG
WARN_ON(1);
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
KERN_ERR "BTRFS: tree level check failed\n");
btrfs_err(fs_info,
"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
eb->start, level, found_level);
#endif
return -EIO;
}
......@@ -450,9 +429,9 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
btrfs_item_key_to_cpu(eb, &found_key, 0);
ret = btrfs_comp_cpu_keys(first_key, &found_key);
#ifdef CONFIG_BTRFS_DEBUG
if (ret) {
WARN_ON(1);
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
KERN_ERR "BTRFS: tree first key check failed\n");
btrfs_err(fs_info,
"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
eb->start, parent_transid, first_key->objectid,
......@@ -460,7 +439,6 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
found_key.objectid, found_key.type,
found_key.offset);
}
#endif
return ret;
}
......@@ -472,11 +450,11 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
* @level: expected level, mandatory check
* @first_key: expected key of first slot, skip check if NULL
*/
static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb,
static int btree_read_extent_buffer_pages(struct extent_buffer *eb,
u64 parent_transid, int level,
struct btrfs_key *first_key)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
struct extent_io_tree *io_tree;
int failed = 0;
int ret;
......@@ -487,14 +465,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) {
clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
mirror_num);
ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num);
if (!ret) {
if (verify_parent_transid(io_tree, eb,
parent_transid, 0))
ret = -EIO;
else if (verify_level_key(fs_info, eb, level,
first_key, parent_transid))
else if (btrfs_verify_level_key(eb, level,
first_key, parent_transid))
ret = -EUCLEAN;
else
break;
......@@ -519,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
}
if (failed && !ret && failed_mirror)
repair_eb_io_failure(fs_info, eb, failed_mirror);
btrfs_repair_eb_io_failure(eb, failed_mirror);
return ret;
}
......@@ -533,7 +510,10 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
{
u64 start = page_offset(page);
u64 found_start;
u8 result[BTRFS_CSUM_SIZE];
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
struct extent_buffer *eb;
int ret;
eb = (struct extent_buffer *)page->private;
if (page != eb->pages[0])
......@@ -552,12 +532,28 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);
return csum_tree_block(fs_info, eb, 0);
if (csum_tree_block(eb, result))
return -EINVAL;
if (btrfs_header_level(eb))
ret = btrfs_check_node(eb);
else
ret = btrfs_check_leaf_full(eb);
if (ret < 0) {
btrfs_err(fs_info,
"block=%llu write time tree block corruption detected",
eb->start);
return ret;
}
write_extent_buffer(eb, result, 0, csum_size);
return 0;
}
static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb)
static int check_tree_block_fsid(struct extent_buffer *eb)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
u8 fsid[BTRFS_FSID_SIZE];
int ret = 1;
......@@ -595,7 +591,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int ret = 0;
u8 result[BTRFS_CSUM_SIZE];
int reads_done;
if (!page->private)
......@@ -625,7 +623,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
ret = -EIO;
goto err;
}
if (check_tree_block_fsid(fs_info, eb)) {
if (check_tree_block_fsid(eb)) {
btrfs_err_rl(fs_info, "bad fsid on block %llu",
eb->start);
ret = -EIO;
......@@ -642,25 +640,44 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
ret = csum_tree_block(fs_info, eb, 1);
ret = csum_tree_block(eb, result);
if (ret)
goto err;
if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
u32 val;
u32 found = 0;
memcpy(&found, result, csum_size);
read_extent_buffer(eb, &val, 0, csum_size);
btrfs_warn_rl(fs_info,
"%s checksum verify failed on %llu wanted %x found %x level %d",
fs_info->sb->s_id, eb->start,
val, found, btrfs_header_level(eb));
ret = -EUCLEAN;
goto err;
}
/*
* If this is a leaf block and it is corrupt, set the corrupt bit so
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) {
if (found_level == 0 && btrfs_check_leaf_full(eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
if (found_level > 0 && btrfs_check_node(fs_info, eb))
if (found_level > 0 && btrfs_check_node(eb))
ret = -EIO;
if (!ret)
set_extent_buffer_uptodate(eb);
else
btrfs_err(fs_info,
"block=%llu read time tree block corruption detected",
eb->start);
err:
if (reads_done &&
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
......@@ -867,11 +884,10 @@ static int check_async_write(struct btrfs_inode *bi)
return 1;
}
static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num,
unsigned long bio_flags)
{
struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int async = check_async_write(BTRFS_I(inode));
blk_status_t ret;
......@@ -897,8 +913,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
* checksumming can happen in parallel across all CPUs
*/
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
bio_offset, private_data,
btree_submit_bio_start);
0, inode, btree_submit_bio_start);
}
if (ret)
......@@ -1017,22 +1032,23 @@ static const struct address_space_operations btree_aops = {
void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = fs_info->btree_inode;
int ret;
buf = btrfs_find_create_tree_block(fs_info, bytenr);
if (IS_ERR(buf))
return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, WAIT_NONE, 0);
free_extent_buffer(buf);
ret = read_extent_buffer_pages(buf, WAIT_NONE, 0);
if (ret < 0)
free_extent_buffer_stale(buf);
else
free_extent_buffer(buf);
}
int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
int mirror_num, struct extent_buffer **eb)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = fs_info->btree_inode;
struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
int ret;
buf = btrfs_find_create_tree_block(fs_info, bytenr);
......@@ -1041,15 +1057,14 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
mirror_num);
ret = read_extent_buffer_pages(buf, WAIT_PAGE_LOCK, mirror_num);
if (ret) {
free_extent_buffer(buf);
free_extent_buffer_stale(buf);
return ret;
}
if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
free_extent_buffer(buf);
free_extent_buffer_stale(buf);
return -EIO;
} else if (extent_buffer_uptodate(buf)) {
*eb = buf;
......@@ -1068,19 +1083,6 @@ struct extent_buffer *btrfs_find_create_tree_block(
return alloc_extent_buffer(fs_info, bytenr);
}
int btrfs_write_tree_block(struct extent_buffer *buf)
{
return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
buf->start + buf->len - 1);
}
void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
{
filemap_fdatawait_range(buf->pages[0]->mapping,
buf->start, buf->start + buf->len - 1);
}
/*
* Read tree block at logical address @bytenr and do variant basic but critical
* verification.
......@@ -1100,19 +1102,19 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
if (IS_ERR(buf))
return buf;
ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
ret = btree_read_extent_buffer_pages(buf, parent_transid,
level, first_key);
if (ret) {
free_extent_buffer(buf);
free_extent_buffer_stale(buf);
return ERR_PTR(ret);
}
return buf;
}
void clean_tree_block(struct btrfs_fs_info *fs_info,
struct extent_buffer *buf)
void btrfs_clean_tree_block(struct extent_buffer *buf)
{
struct btrfs_fs_info *fs_info = buf->fs_info;
if (btrfs_header_generation(buf) ==
fs_info->running_transaction->transid) {
btrfs_assert_tree_locked(buf);
......@@ -1208,7 +1210,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->log_transid_committed = -1;
root->last_log_commit = 0;
if (!dummy)
extent_io_tree_init(&root->dirty_log_pages, NULL);
extent_io_tree_init(fs_info, &root->dirty_log_pages,
IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
......@@ -1255,9 +1258,9 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info)
#endif
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct extent_buffer *leaf;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *root;
......@@ -2138,8 +2141,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
inode->i_mapping->a_ops = &btree_aops;
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode);
BTRFS_I(inode)->io_tree.track_uptodate = 0;
extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
IO_TREE_INODE_IO, inode);
BTRFS_I(inode)->io_tree.track_uptodate = false;
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
BTRFS_I(inode)->io_tree.ops = &btree_extent_io_ops;
......@@ -2162,7 +2166,6 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->qgroup_lock);
mutex_init(&fs_info->qgroup_ioctl_lock);
fs_info->qgroup_tree = RB_ROOT;
fs_info->qgroup_op_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
fs_info->qgroup_ulist = NULL;
......@@ -2630,11 +2633,17 @@ int open_ctree(struct super_block *sb,
goto fail;
}
ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
ret = percpu_counter_init(&fs_info->dio_bytes, 0, GFP_KERNEL);
if (ret) {
err = ret;
goto fail_srcu;
}
ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
if (ret) {
err = ret;
goto fail_dio_bytes;
}
fs_info->dirty_metadata_batch = PAGE_SIZE *
(1 + ilog2(nr_cpu_ids));
......@@ -2667,7 +2676,6 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
......@@ -2694,7 +2702,6 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0);
atomic_set(&fs_info->reada_works_cnt, 0);
atomic_set(&fs_info->nr_delayed_iputs, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
......@@ -2748,8 +2755,10 @@ int open_ctree(struct super_block *sb,
fs_info->block_group_cache_tree = RB_ROOT;
fs_info->first_logical_byte = (u64)-1;
extent_io_tree_init(&fs_info->freed_extents[0], NULL);
extent_io_tree_init(&fs_info->freed_extents[1], NULL);
extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
fs_info->pinned_extents = &fs_info->freed_extents[0];
set_bit(BTRFS_FS_BARRIER, &fs_info->flags);
......@@ -2776,8 +2785,6 @@ int open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->delayed_iputs_wait);
INIT_LIST_HEAD(&fs_info->pinned_chunks);
/* Usable values until the real ones are cached from the superblock */
fs_info->nodesize = 4096;
fs_info->sectorsize = 4096;
......@@ -3335,6 +3342,8 @@ int open_ctree(struct super_block *sb,
percpu_counter_destroy(&fs_info->delalloc_bytes);
fail_dirty_metadata_bytes:
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
fail_dio_bytes:
percpu_counter_destroy(&fs_info->dio_bytes);
fail_srcu:
cleanup_srcu_struct(&fs_info->subvol_srcu);
fail:
......@@ -4016,6 +4025,10 @@ void close_ctree(struct btrfs_fs_info *fs_info)
percpu_counter_sum(&fs_info->delalloc_bytes));
}
if (percpu_counter_sum(&fs_info->dio_bytes))
btrfs_info(fs_info, "at unmount dio bytes count %lld",
percpu_counter_sum(&fs_info->dio_bytes));
btrfs_sysfs_remove_mounted(fs_info);
btrfs_sysfs_remove_fsid(fs_info->fs_devices);
......@@ -4042,25 +4055,17 @@ void close_ctree(struct btrfs_fs_info *fs_info)
btrfsic_unmount(fs_info->fs_devices);
#endif
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
btrfs_close_devices(fs_info->fs_devices);
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
percpu_counter_destroy(&fs_info->dio_bytes);
percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
cleanup_srcu_struct(&fs_info->subvol_srcu);
btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
while (!list_empty(&fs_info->pinned_chunks)) {
struct extent_map *em;
em = list_first_entry(&fs_info->pinned_chunks,
struct extent_map, list);
list_del_init(&em->list);
free_extent_map(em);
}
}
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
......@@ -4114,7 +4119,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
* So here we should only check item pointers, not item data.
*/
if (btrfs_header_level(buf) == 0 &&
btrfs_check_leaf_relaxed(fs_info, buf)) {
btrfs_check_leaf_relaxed(buf)) {
btrfs_print_leaf(buf);
ASSERT(0);
}
......@@ -4157,10 +4162,7 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
struct btrfs_key *first_key)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
return btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
return btree_read_extent_buffer_pages(buf, parent_transid,
level, first_key);
}
......@@ -4484,10 +4486,17 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_device *dev, *tmp;
btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
ASSERT(list_empty(&cur_trans->dirty_bgs));
ASSERT(list_empty(&cur_trans->io_bgs));
list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list,
post_commit_list) {
list_del_init(&dev->post_commit_list);
}
btrfs_destroy_delayed_refs(cur_trans, fs_info);
cur_trans->state = TRANS_STATE_COMMIT_START;
......
......@@ -39,6 +39,8 @@ static inline u64 btrfs_sb_offset(int mirror)
struct btrfs_device;
struct btrfs_fs_devices;
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 parent_transid, int level,
struct btrfs_key *first_key);
......@@ -48,7 +50,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
struct extent_buffer *btrfs_find_create_tree_block(
struct btrfs_fs_info *fs_info,
u64 bytenr);
void clean_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
void btrfs_clean_tree_block(struct extent_buffer *buf);
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
......@@ -123,8 +125,6 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
extent_submit_bio_start_t *submit_bio_start);
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num);
int btrfs_write_tree_block(struct extent_buffer *buf);
void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
......@@ -134,7 +134,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *trans,
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid);
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *));
......
此差异已折叠。
此差异已折叠。
......@@ -9,26 +9,33 @@
/* bits for the extent state */
#define EXTENT_DIRTY (1U << 0)
#define EXTENT_WRITEBACK (1U << 1)
#define EXTENT_UPTODATE (1U << 2)
#define EXTENT_LOCKED (1U << 3)
#define EXTENT_NEW (1U << 4)
#define EXTENT_DELALLOC (1U << 5)
#define EXTENT_DEFRAG (1U << 6)
#define EXTENT_BOUNDARY (1U << 9)
#define EXTENT_NODATASUM (1U << 10)
#define EXTENT_CLEAR_META_RESV (1U << 11)
#define EXTENT_NEED_WAIT (1U << 12)
#define EXTENT_DAMAGED (1U << 13)
#define EXTENT_NORESERVE (1U << 14)
#define EXTENT_QGROUP_RESERVED (1U << 15)
#define EXTENT_CLEAR_DATA_RESV (1U << 16)
#define EXTENT_DELALLOC_NEW (1U << 17)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_UPTODATE (1U << 1)
#define EXTENT_LOCKED (1U << 2)
#define EXTENT_NEW (1U << 3)
#define EXTENT_DELALLOC (1U << 4)
#define EXTENT_DEFRAG (1U << 5)
#define EXTENT_BOUNDARY (1U << 6)
#define EXTENT_NODATASUM (1U << 7)
#define EXTENT_CLEAR_META_RESV (1U << 8)
#define EXTENT_NEED_WAIT (1U << 9)
#define EXTENT_DAMAGED (1U << 10)
#define EXTENT_NORESERVE (1U << 11)
#define EXTENT_QGROUP_RESERVED (1U << 12)
#define EXTENT_CLEAR_DATA_RESV (1U << 13)
#define EXTENT_DELALLOC_NEW (1U << 14)
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
EXTENT_CLEAR_DATA_RESV)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
/*
* Redefined bits above which are used only in the device allocation tree,
* shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV
* / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit
* manipulation functions
*/
#define CHUNK_ALLOCATED EXTENT_DIRTY
#define CHUNK_TRIMMED EXTENT_DEFRAG
/*
* flags for bio submission. The high bits indicate the compression
* type for this bio
......@@ -88,9 +95,6 @@ struct btrfs_inode;
struct btrfs_io_bio;
struct io_failure_record;
typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset);
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
struct bio *bio, u64 bio_offset);
......@@ -100,17 +104,34 @@ struct extent_io_ops {
* The following callbacks must be always defined, the function
* pointer will be called unconditionally.
*/
extent_submit_bio_hook_t *submit_bio_hook;
blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int mirror);
};
enum {
IO_TREE_FS_INFO_FREED_EXTENTS0,
IO_TREE_FS_INFO_FREED_EXTENTS1,
IO_TREE_INODE_IO,
IO_TREE_INODE_IO_FAILURE,
IO_TREE_RELOC_BLOCKS,
IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES,
IO_TREE_SELFTEST,
};
struct extent_io_tree {
struct rb_root state;
struct btrfs_fs_info *fs_info;
void *private_data;
u64 dirty_bytes;
int track_uptodate;
bool track_uptodate;
/* Who owns this io tree, should be one of IO_TREE_* */
u8 owner;
spinlock_t lock;
const struct extent_io_ops *ops;
};
......@@ -146,14 +167,9 @@ struct extent_buffer {
struct rcu_head rcu_head;
pid_t lock_owner;
/* count of read lock holders on the extent buffer */
atomic_t write_locks;
atomic_t read_locks;
atomic_t blocking_writers;
atomic_t blocking_readers;
atomic_t spinning_readers;
atomic_t spinning_writers;
short lock_nested;
bool lock_nested;
/* >= 0 if eb belongs to a log tree, -1 otherwise */
short log_index;
......@@ -171,6 +187,10 @@ struct extent_buffer {
wait_queue_head_t read_lock_wq;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
#ifdef CONFIG_BTRFS_DEBUG
atomic_t spinning_writers;
atomic_t spinning_readers;
atomic_t read_locks;
atomic_t write_locks;
struct list_head leak_list;
#endif
};
......@@ -239,7 +259,10 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
u64 start, u64 len,
int create);
void extent_io_tree_init(struct extent_io_tree *tree, void *private_data);
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
struct extent_io_tree *tree, unsigned int owner,
void *private_data);
void extent_io_tree_release(struct extent_io_tree *tree);
int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
......@@ -309,6 +332,8 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits);
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, unsigned bits)
......@@ -376,6 +401,8 @@ static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits,
struct extent_state **cached_state);
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
......@@ -405,8 +432,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_NONE 0
#define WAIT_COMPLETE 1
#define WAIT_PAGE_LOCK 2
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, int wait,
int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
int mirror_num);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
......@@ -487,8 +513,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
struct extent_io_tree *io_tree, u64 start,
struct page *page, u64 ino, unsigned int pg_offset);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int mirror_num);
int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num);
/*
* When IO fails, either with EIO or csum verification fails, we
......
......@@ -4,6 +4,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include "ctree.h"
#include "volumes.h"
#include "extent_map.h"
#include "compression.h"
......@@ -337,6 +338,37 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree,
try_merge_map(tree, em);
}
static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
{
struct map_lookup *map = em->map_lookup;
u64 stripe_size = em->orig_block_len;
int i;
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_bio_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
set_extent_bits_nowait(&device->alloc_state, stripe->physical,
stripe->physical + stripe_size - 1, bits);
}
}
static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
{
struct map_lookup *map = em->map_lookup;
u64 stripe_size = em->orig_block_len;
int i;
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_bio_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
__clear_extent_bit(&device->alloc_state, stripe->physical,
stripe->physical + stripe_size - 1, bits,
0, 0, NULL, GFP_NOWAIT, NULL);
}
}
/**
* add_extent_mapping - add new extent map to the extent tree
* @tree: tree to insert new map in
......@@ -357,6 +389,10 @@ int add_extent_mapping(struct extent_map_tree *tree,
goto out;
setup_extent_mapping(tree, em, modified);
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) {
extent_map_device_set_bits(em, CHUNK_ALLOCATED);
extent_map_device_clear_bits(em, CHUNK_TRIMMED);
}
out:
return ret;
}
......@@ -438,6 +474,8 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
rb_erase_cached(&em->rb_node, &tree->map);
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
list_del_init(&em->list);
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
extent_map_device_clear_bits(em, CHUNK_ALLOCATED);
RB_CLEAR_NODE(&em->rb_node);
}
......
......@@ -413,6 +413,16 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
return ret;
}
/*
* btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
* @inode: Owner of the data inside the bio
* @bio: Contains the data to be checksummed
* @file_start: offset in file this bio begins to describe
* @contig: Boolean. If true/1 means all bio vecs in this bio are
* contiguous and they begin at @file_start in the file. False/0
* means this bio can contains potentially discontigous bio vecs
* so the logical offset of each should be calculated separately.
*/
blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig)
{
......@@ -458,8 +468,6 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
BUG_ON(!ordered); /* Logic error */
}
data = kmap_atomic(bvec.bv_page);
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
bvec.bv_len + fs_info->sectorsize
- 1);
......@@ -469,10 +477,9 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
offset < ordered->file_offset) {
unsigned long bytes_left;
kunmap_atomic(data);
sums->len = this_sum_bytes;
this_sum_bytes = 0;
btrfs_add_ordered_sum(inode, ordered, sums);
btrfs_add_ordered_sum(ordered, sums);
btrfs_put_ordered_extent(ordered);
bytes_left = bio->bi_iter.bi_size - total_bytes;
......@@ -489,16 +496,16 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9)
+ total_bytes;
index = 0;
data = kmap_atomic(bvec.bv_page);
}
sums->sums[index] = ~(u32)0;
data = kmap_atomic(bvec.bv_page);
sums->sums[index]
= btrfs_csum_data(data + bvec.bv_offset
+ (i * fs_info->sectorsize),
sums->sums[index],
fs_info->sectorsize);
kunmap_atomic(data);
btrfs_csum_final(sums->sums[index],
(char *)(sums->sums + index));
index++;
......@@ -507,10 +514,9 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
total_bytes += fs_info->sectorsize;
}
kunmap_atomic(data);
}
this_sum_bytes = 0;
btrfs_add_ordered_sum(inode, ordered, sums);
btrfs_add_ordered_sum(ordered, sums);
btrfs_put_ordered_extent(ordered);
return 0;
}
......@@ -551,7 +557,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
*/
u32 new_size = (bytenr - key->offset) >> blocksize_bits;
new_size *= csum_size;
btrfs_truncate_item(fs_info, path, new_size, 1);
btrfs_truncate_item(path, new_size, 1);
} else if (key->offset >= bytenr && csum_end > end_byte &&
end_byte > key->offset) {
/*
......@@ -563,7 +569,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
u32 new_size = (csum_end - end_byte) >> blocksize_bits;
new_size *= csum_size;
btrfs_truncate_item(fs_info, path, new_size, 0);
btrfs_truncate_item(path, new_size, 0);
key->offset = end_byte;
btrfs_set_item_key_safe(fs_info, path, key);
......@@ -832,11 +838,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
u32 diff;
u32 free_space;
if (btrfs_leaf_free_space(fs_info, leaf) <
if (btrfs_leaf_free_space(leaf) <
sizeof(struct btrfs_item) + csum_size * 2)
goto insert;
free_space = btrfs_leaf_free_space(fs_info, leaf) -
free_space = btrfs_leaf_free_space(leaf) -
sizeof(struct btrfs_item) - csum_size;
tmp = sums->len - total_bytes;
tmp >>= fs_info->sb->s_blocksize_bits;
......@@ -852,7 +858,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
diff /= csum_size;
diff *= csum_size;
btrfs_extend_item(fs_info, path, diff);
btrfs_extend_item(path, diff);
ret = 0;
goto csum;
}
......
......@@ -754,6 +754,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
struct btrfs_ref ref = { 0 };
struct btrfs_key key;
struct btrfs_key new_key;
u64 ino = btrfs_ino(BTRFS_I(inode));
......@@ -909,11 +910,14 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0) {
ret = btrfs_inc_extent_ref(trans, root,
disk_bytenr, num_bytes, 0,
btrfs_init_generic_ref(&ref,
BTRFS_ADD_DELAYED_REF,
disk_bytenr, num_bytes, 0);
btrfs_init_data_ref(&ref,
root->root_key.objectid,
new_key.objectid,
start - extent_offset);
ret = btrfs_inc_extent_ref(trans, &ref);
BUG_ON(ret); /* -ENOMEM */
}
key.offset = start;
......@@ -993,11 +997,14 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
} else if (update_refs && disk_bytenr > 0) {
ret = btrfs_free_extent(trans, root,
disk_bytenr, num_bytes, 0,
btrfs_init_generic_ref(&ref,
BTRFS_DROP_DELAYED_REF,
disk_bytenr, num_bytes, 0);
btrfs_init_data_ref(&ref,
root->root_key.objectid,
key.objectid, key.offset -
extent_offset);
key.objectid,
key.offset - extent_offset);
ret = btrfs_free_extent(trans, &ref);
BUG_ON(ret); /* -ENOMEM */
inode_sub_bytes(inode,
extent_end - key.offset);
......@@ -1025,7 +1032,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
continue;
}
BUG_ON(1);
BUG();
}
if (!ret && del_nr > 0) {
......@@ -1050,7 +1057,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
if (!ret && replace_extent && leafs_visited == 1 &&
(path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
path->locks[0] == BTRFS_WRITE_LOCK) &&
btrfs_leaf_free_space(fs_info, leaf) >=
btrfs_leaf_free_space(leaf) >=
sizeof(struct btrfs_item) + extent_item_size) {
key.objectid = ino;
......@@ -1142,6 +1149,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_file_extent_item *fi;
struct btrfs_ref ref = { 0 };
struct btrfs_key key;
struct btrfs_key new_key;
u64 bytenr;
......@@ -1287,9 +1295,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
extent_end - split);
btrfs_mark_buffer_dirty(leaf);
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
num_bytes, 0);
btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
orig_offset);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
......@@ -1311,6 +1321,9 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
other_start = end;
other_end = 0;
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
num_bytes, 0);
btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset);
if (extent_mergeable(leaf, path->slots[0] + 1,
ino, bytenr, orig_offset,
&other_start, &other_end)) {
......@@ -1321,9 +1334,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
extent_end = other_end;
del_slot = path->slots[0] + 1;
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
......@@ -1341,9 +1352,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
key.offset = other_start;
del_slot = path->slots[0];
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
......@@ -2165,7 +2174,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
inode_unlock(inode);
goto out;
}
trans->sync = true;
ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx);
if (ret < 0) {
......@@ -3132,6 +3140,7 @@ static long btrfs_fallocate(struct file *file, int mode,
ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
cur_offset, last_byte - cur_offset);
if (ret < 0) {
cur_offset = last_byte;
free_extent_map(em);
break;
}
......@@ -3181,7 +3190,7 @@ static long btrfs_fallocate(struct file *file, int mode,
/* Let go of our reservation. */
if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
btrfs_free_reserved_data_space(inode, data_reserved,
alloc_start, alloc_end - cur_offset);
cur_offset, alloc_end - cur_offset);
extent_changeset_free(data_reserved);
return ret;
}
......
......@@ -88,10 +88,11 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
return inode;
}
struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path)
struct inode *lookup_free_space_inode(
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct inode *inode = NULL;
u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
......@@ -185,20 +186,19 @@ static int __create_free_space_inode(struct btrfs_root *root,
return 0;
}
int create_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
int create_free_space_inode(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
int ret;
u64 ino;
ret = btrfs_find_free_objectid(fs_info->tree_root, &ino);
ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
if (ret < 0)
return ret;
return __create_free_space_inode(fs_info->tree_root, trans, path, ino,
block_group->key.objectid);
return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
ino, block_group->key.objectid);
}
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
......@@ -812,9 +812,9 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
goto out;
}
int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
int load_free_space_cache(struct btrfs_block_group_cache *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct inode *inode;
struct btrfs_path *path;
......@@ -858,7 +858,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
* once created get their ->cached field set to BTRFS_CACHE_FINISHED so
* we will never try to read their inode item while the fs is mounted.
*/
inode = lookup_free_space_inode(fs_info, block_group, path);
inode = lookup_free_space_inode(block_group, path);
if (IS_ERR(inode)) {
btrfs_free_path(path);
return 0;
......@@ -1039,8 +1039,7 @@ update_cache_item(struct btrfs_trans_handle *trans,
return -1;
}
static noinline_for_stack int
write_pinned_extent_entries(struct btrfs_fs_info *fs_info,
static noinline_for_stack int write_pinned_extent_entries(
struct btrfs_block_group_cache *block_group,
struct btrfs_io_ctl *io_ctl,
int *entries)
......@@ -1059,7 +1058,7 @@ write_pinned_extent_entries(struct btrfs_fs_info *fs_info,
* We shouldn't have switched the pinned extents yet so this is the
* right one
*/
unpin = fs_info->pinned_extents;
unpin = block_group->fs_info->pinned_extents;
start = block_group->key.objectid;
......@@ -1235,7 +1234,6 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_io_ctl *io_ctl,
struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_state *cached_state = NULL;
LIST_HEAD(bitmap_list);
int entries = 0;
......@@ -1293,8 +1291,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
* If this changes while we are working we'll get added back to
* the dirty list and redo it. No locking needed
*/
ret = write_pinned_extent_entries(fs_info, block_group,
io_ctl, &entries);
ret = write_pinned_extent_entries(block_group, io_ctl, &entries);
if (ret)
goto out_nospc_locked;
......@@ -1370,11 +1367,11 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
goto out;
}
int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct inode *inode;
int ret = 0;
......@@ -1386,7 +1383,7 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
}
spin_unlock(&block_group->lock);
inode = lookup_free_space_inode(fs_info, block_group, path);
inode = lookup_free_space_inode(block_group, path);
if (IS_ERR(inode))
return 0;
......@@ -3040,11 +3037,11 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
* returns zero and sets up cluster if things worked out, otherwise
* it returns -enospc
*/
int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry, *tmp;
LIST_HEAD(bitmaps);
......@@ -3366,10 +3363,6 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
em = lookup_extent_mapping(em_tree, block_group->key.objectid,
1);
BUG_ON(!em); /* logic error, can't happen */
/*
* remove_extent_mapping() will delete us from the pinned_chunks
* list, which is protected by the chunk mutex.
*/
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
mutex_unlock(&fs_info->chunk_mutex);
......
......@@ -38,11 +38,10 @@ struct btrfs_free_space_op {
struct btrfs_io_ctl;
struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path);
int create_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct inode *lookup_free_space_inode(
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int create_free_space_inode(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
......@@ -51,13 +50,11 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct inode *inode);
int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
int load_free_space_cache(struct btrfs_block_group_cache *block_group);
int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
......@@ -95,8 +92,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes);
int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size);
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
......
......@@ -76,10 +76,11 @@ static int add_new_free_space_info(struct btrfs_trans_handle *trans,
EXPORT_FOR_TESTS
struct btrfs_free_space_info *search_free_space_info(
struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, int cow)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *root = fs_info->free_space_root;
struct btrfs_key key;
int ret;
......@@ -253,7 +254,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
info = search_free_space_info(trans, fs_info, block_group, path, 1);
info = search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
goto out;
......@@ -398,7 +399,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
info = search_free_space_info(trans, fs_info, block_group, path, 1);
info = search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
goto out;
......@@ -463,8 +464,7 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
if (new_extents == 0)
return 0;
info = search_free_space_info(trans, trans->fs_info, block_group, path,
1);
info = search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
goto out;
......@@ -793,8 +793,7 @@ int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
return ret;
}
info = search_free_space_info(NULL, trans->fs_info, block_group, path,
0);
info = search_free_space_info(NULL, block_group, path, 0);
if (IS_ERR(info))
return PTR_ERR(info);
flags = btrfs_free_space_flags(path->nodes[0], info);
......@@ -977,7 +976,6 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, u64 start, u64 size)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_free_space_info *info;
u32 flags;
int ret;
......@@ -988,7 +986,7 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
return ret;
}
info = search_free_space_info(NULL, fs_info, block_group, path, 0);
info = search_free_space_info(NULL, block_group, path, 0);
if (IS_ERR(info))
return PTR_ERR(info);
flags = btrfs_free_space_flags(path->nodes[0], info);
......@@ -1150,7 +1148,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
return PTR_ERR(trans);
set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
free_space_root = btrfs_create_tree(trans, fs_info,
free_space_root = btrfs_create_tree(trans,
BTRFS_FREE_SPACE_TREE_OBJECTID);
if (IS_ERR(free_space_root)) {
ret = PTR_ERR(free_space_root);
......@@ -1248,7 +1246,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
list_del(&free_space_root->dirty_list);
btrfs_tree_lock(free_space_root->node);
clean_tree_block(fs_info, free_space_root->node);
btrfs_clean_tree_block(free_space_root->node);
btrfs_tree_unlock(free_space_root->node);
btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
0, 1);
......@@ -1534,14 +1532,12 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_fs_info *fs_info;
struct btrfs_free_space_info *info;
struct btrfs_path *path;
u32 extent_count, flags;
int ret;
block_group = caching_ctl->block_group;
fs_info = block_group->fs_info;
path = btrfs_alloc_path();
if (!path)
......@@ -1555,7 +1551,7 @@ int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
path->search_commit_root = 1;
path->reada = READA_FORWARD;
info = search_free_space_info(NULL, fs_info, block_group, path, 0);
info = search_free_space_info(NULL, block_group, path, 0);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
goto out;
......
......@@ -30,7 +30,6 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_free_space_info *
search_free_space_info(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, int cow);
int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
......
......@@ -170,7 +170,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
memmove_extent_buffer(leaf, ptr, ptr + del_len,
item_size - (ptr + del_len - item_start));
btrfs_truncate_item(root->fs_info, path, item_size - del_len, 1);
btrfs_truncate_item(path, item_size - del_len, 1);
out:
btrfs_free_path(path);
......@@ -234,7 +234,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_size - (ptr + sub_item_len - item_start));
btrfs_truncate_item(root->fs_info, path, item_size - sub_item_len, 1);
btrfs_truncate_item(path, item_size - sub_item_len, 1);
out:
btrfs_free_path(path);
......@@ -288,7 +288,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
name, name_len, NULL))
goto out;
btrfs_extend_item(root->fs_info, path, ins_len);
btrfs_extend_item(path, ins_len);
ret = 0;
}
if (ret < 0)
......@@ -347,7 +347,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
goto out;
old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
btrfs_extend_item(fs_info, path, ins_len);
btrfs_extend_item(path, ins_len);
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_ref);
ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
......
此差异已折叠。
此差异已折叠。
......@@ -12,10 +12,82 @@
#include "extent_io.h"
#include "locking.h"
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
#ifdef CONFIG_BTRFS_DEBUG
static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
{
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
}
static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
{
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
}
static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
{
WARN_ON(atomic_read(&eb->spinning_writers));
}
static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
{
atomic_inc(&eb->spinning_readers);
}
static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb)
{
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
}
static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb)
{
atomic_inc(&eb->read_locks);
}
static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb)
{
atomic_dec(&eb->read_locks);
}
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->read_locks));
}
static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
{
atomic_inc(&eb->write_locks);
}
static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
{
atomic_dec(&eb->write_locks);
}
void btrfs_assert_tree_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->write_locks));
}
#else
static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { }
static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { }
static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { }
static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { }
static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { }
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { }
static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { }
static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { }
void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { }
static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { }
#endif
void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
{
trace_btrfs_set_lock_blocking_read(eb);
/*
* No lock is required. The lock owner may change if we have a read
* lock, but it won't change to or away from us. If we have the write
......@@ -25,13 +97,13 @@ void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
return;
btrfs_assert_tree_read_locked(eb);
atomic_inc(&eb->blocking_readers);
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
btrfs_assert_spinning_readers_put(eb);
read_unlock(&eb->lock);
}
void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
{
trace_btrfs_set_lock_blocking_write(eb);
/*
* No lock is required. The lock owner may change if we have a read
* lock, but it won't change to or away from us. If we have the write
......@@ -40,8 +112,7 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
if (eb->lock_nested && current->pid == eb->lock_owner)
return;
if (atomic_read(&eb->blocking_writers) == 0) {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
btrfs_assert_spinning_writers_put(eb);
btrfs_assert_tree_locked(eb);
atomic_inc(&eb->blocking_writers);
write_unlock(&eb->lock);
......@@ -50,6 +121,7 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
{
trace_btrfs_clear_lock_blocking_read(eb);
/*
* No lock is required. The lock owner may change if we have a read
* lock, but it won't change to or away from us. If we have the write
......@@ -59,7 +131,7 @@ void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
return;
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
atomic_inc(&eb->spinning_readers);
btrfs_assert_spinning_readers_get(eb);
/* atomic_dec_and_test implies a barrier */
if (atomic_dec_and_test(&eb->blocking_readers))
cond_wake_up_nomb(&eb->read_lock_wq);
......@@ -67,6 +139,7 @@ void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
{
trace_btrfs_clear_lock_blocking_write(eb);
/*
* no lock is required. The lock owner may change if
* we have a read lock, but it won't change to or away
......@@ -77,8 +150,7 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
return;
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
write_lock(&eb->lock);
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
btrfs_assert_spinning_writers_get(eb);
/* atomic_dec_and_test implies a barrier */
if (atomic_dec_and_test(&eb->blocking_writers))
cond_wake_up_nomb(&eb->write_lock_wq);
......@@ -90,6 +162,10 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
*/
void btrfs_tree_read_lock(struct extent_buffer *eb)
{
u64 start_ns = 0;
if (trace_btrfs_tree_read_lock_enabled())
start_ns = ktime_get_ns();
again:
BUG_ON(!atomic_read(&eb->blocking_writers) &&
current->pid == eb->lock_owner);
......@@ -104,8 +180,9 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
* called on a partly (write-)locked tree.
*/
BUG_ON(eb->lock_nested);
eb->lock_nested = 1;
eb->lock_nested = true;
read_unlock(&eb->lock);
trace_btrfs_tree_read_lock(eb, start_ns);
return;
}
if (atomic_read(&eb->blocking_writers)) {
......@@ -114,8 +191,9 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
atomic_read(&eb->blocking_writers) == 0);
goto again;
}
atomic_inc(&eb->read_locks);
atomic_inc(&eb->spinning_readers);
btrfs_assert_tree_read_locks_get(eb);
btrfs_assert_spinning_readers_get(eb);
trace_btrfs_tree_read_lock(eb, start_ns);
}
/*
......@@ -133,8 +211,9 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
read_unlock(&eb->lock);
return 0;
}
atomic_inc(&eb->read_locks);
atomic_inc(&eb->spinning_readers);
btrfs_assert_tree_read_locks_get(eb);
btrfs_assert_spinning_readers_get(eb);
trace_btrfs_tree_read_lock_atomic(eb);
return 1;
}
......@@ -154,8 +233,9 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
read_unlock(&eb->lock);
return 0;
}
atomic_inc(&eb->read_locks);
atomic_inc(&eb->spinning_readers);
btrfs_assert_tree_read_locks_get(eb);
btrfs_assert_spinning_readers_get(eb);
trace_btrfs_try_tree_read_lock(eb);
return 1;
}
......@@ -175,9 +255,10 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
write_unlock(&eb->lock);
return 0;
}
atomic_inc(&eb->write_locks);
atomic_inc(&eb->spinning_writers);
btrfs_assert_tree_write_locks_get(eb);
btrfs_assert_spinning_writers_get(eb);
eb->lock_owner = current->pid;
trace_btrfs_try_tree_write_lock(eb);
return 1;
}
......@@ -186,6 +267,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
*/
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
trace_btrfs_tree_read_unlock(eb);
/*
* if we're nested, we have the write lock. No new locking
* is needed as long as we are the lock owner.
......@@ -193,13 +275,12 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
* field only matters to the lock owner.
*/
if (eb->lock_nested && current->pid == eb->lock_owner) {
eb->lock_nested = 0;
eb->lock_nested = false;
return;
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
atomic_dec(&eb->read_locks);
btrfs_assert_spinning_readers_put(eb);
btrfs_assert_tree_read_locks_put(eb);
read_unlock(&eb->lock);
}
......@@ -208,6 +289,7 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
*/
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
{
trace_btrfs_tree_read_unlock_blocking(eb);
/*
* if we're nested, we have the write lock. No new locking
* is needed as long as we are the lock owner.
......@@ -215,7 +297,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
* field only matters to the lock owner.
*/
if (eb->lock_nested && current->pid == eb->lock_owner) {
eb->lock_nested = 0;
eb->lock_nested = false;
return;
}
btrfs_assert_tree_read_locked(eb);
......@@ -223,7 +305,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
/* atomic_dec_and_test implies a barrier */
if (atomic_dec_and_test(&eb->blocking_readers))
cond_wake_up_nomb(&eb->read_lock_wq);
atomic_dec(&eb->read_locks);
btrfs_assert_tree_read_locks_put(eb);
}
/*
......@@ -232,6 +314,11 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
*/
void btrfs_tree_lock(struct extent_buffer *eb)
{
u64 start_ns = 0;
if (trace_btrfs_tree_lock_enabled())
start_ns = ktime_get_ns();
WARN_ON(eb->lock_owner == current->pid);
again:
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
......@@ -242,10 +329,10 @@ void btrfs_tree_lock(struct extent_buffer *eb)
write_unlock(&eb->lock);
goto again;
}
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
atomic_inc(&eb->write_locks);
btrfs_assert_spinning_writers_get(eb);
btrfs_assert_tree_write_locks_get(eb);
eb->lock_owner = current->pid;
trace_btrfs_tree_lock(eb, start_ns);
}
/*
......@@ -258,28 +345,18 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
BUG_ON(blockers > 1);
btrfs_assert_tree_locked(eb);
trace_btrfs_tree_unlock(eb);
eb->lock_owner = 0;
atomic_dec(&eb->write_locks);
btrfs_assert_tree_write_locks_put(eb);
if (blockers) {
WARN_ON(atomic_read(&eb->spinning_writers));
btrfs_assert_no_spinning_writers(eb);
atomic_dec(&eb->blocking_writers);
/* Use the lighter barrier after atomic */
smp_mb__after_atomic();
cond_wake_up_nomb(&eb->write_lock_wq);
} else {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
btrfs_assert_spinning_writers_put(eb);
write_unlock(&eb->lock);
}
}
void btrfs_assert_tree_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->write_locks));
}
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->read_locks));
}
此差异已折叠。
此差异已折叠。
......@@ -189,7 +189,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
btrfs_info(fs_info,
"leaf %llu gen %llu total ptrs %d free space %d owner %llu",
btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l));
btrfs_leaf_free_space(l), btrfs_header_owner(l));
print_eb_refs_lock(l);
for (i = 0 ; i < nr ; i++) {
item = btrfs_item_nr(i);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册