提交 9785dbdf 编写于 作者: C Chris Mason

Merge branch 'for-chris' of git://git.jan-o-sch.net/btrfs-unstable into integration

......@@ -8,6 +8,6 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o
reada.o backref.o ulist.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
此差异已折叠。
......@@ -20,6 +20,7 @@
#define __BTRFS_BACKREF__
#include "ioctl.h"
#include "ulist.h"
struct inode_fs_paths {
struct btrfs_path *btrfs_path;
......@@ -54,6 +55,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 seq, struct ulist **roots);
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
struct btrfs_path *path);
......
......@@ -240,7 +240,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
new_root_objectid, &disk_key, level,
buf->start, 0);
buf->start, 0, 1);
if (IS_ERR(cow))
return PTR_ERR(cow);
......@@ -261,9 +261,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(btrfs_header_generation(buf) > trans->transid);
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
ret = btrfs_inc_ref(trans, root, cow, 1);
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
ret = btrfs_inc_ref(trans, root, cow, 0, 1);
if (ret)
return ret;
......@@ -350,14 +350,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if ((owner == root->root_key.objectid ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
ret = btrfs_inc_ref(trans, root, buf, 1);
ret = btrfs_inc_ref(trans, root, buf, 1, 1);
BUG_ON(ret);
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_dec_ref(trans, root, buf, 0);
ret = btrfs_dec_ref(trans, root, buf, 0, 1);
BUG_ON(ret);
ret = btrfs_inc_ref(trans, root, cow, 1);
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
BUG_ON(ret);
}
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
......@@ -365,9 +365,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
ret = btrfs_inc_ref(trans, root, cow, 1);
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
ret = btrfs_inc_ref(trans, root, cow, 0, 1);
BUG_ON(ret);
}
if (new_flags != 0) {
......@@ -381,11 +381,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
ret = btrfs_inc_ref(trans, root, cow, 1);
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
ret = btrfs_inc_ref(trans, root, cow, 0, 1);
BUG_ON(ret);
ret = btrfs_dec_ref(trans, root, buf, 1);
ret = btrfs_dec_ref(trans, root, buf, 1, 1);
BUG_ON(ret);
}
clean_tree_block(trans, root, buf);
......@@ -446,7 +446,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
root->root_key.objectid, &disk_key,
level, search_start, empty_size);
level, search_start, empty_size, 1);
if (IS_ERR(cow))
return PTR_ERR(cow);
......@@ -484,7 +484,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
last_ref, 1);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
......@@ -500,7 +500,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid);
btrfs_mark_buffer_dirty(parent);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
last_ref, 1);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
......@@ -957,7 +957,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
free_extent_buffer(mid);
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
/* once for the root ptr */
free_extent_buffer(mid);
return 0;
......@@ -1015,7 +1015,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret)
ret = wret;
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, root, right, 0, 1);
btrfs_free_tree_block(trans, root, right, 0, 1, 0);
free_extent_buffer(right);
right = NULL;
} else {
......@@ -1055,7 +1055,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret)
ret = wret;
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
free_extent_buffer(mid);
mid = NULL;
} else {
......@@ -2089,7 +2089,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
root->root_key.objectid, &lower_key,
level, root->node->start, 0);
level, root->node->start, 0, 0);
if (IS_ERR(c))
return PTR_ERR(c);
......@@ -2216,7 +2216,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
root->root_key.objectid,
&disk_key, level, c->start, 0);
&disk_key, level, c->start, 0, 0);
if (IS_ERR(split))
return PTR_ERR(split);
......@@ -2970,7 +2970,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
root->root_key.objectid,
&disk_key, 0, l->start, 0);
&disk_key, 0, l->start, 0, 0);
if (IS_ERR(right))
return PTR_ERR(right);
......@@ -3781,7 +3781,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used(root, leaf->len);
btrfs_free_tree_block(trans, root, leaf, 0, 1);
btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
return 0;
}
/*
......
......@@ -2439,11 +2439,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size);
u64 hint, u64 empty_size, int for_cow);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref);
u64 parent, int last_ref, int for_cow);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
......@@ -2463,17 +2463,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
u64 search_end, struct btrfs_key *ins,
u64 data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
struct extent_buffer *buf, int full_backref, int for_cow);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
struct extent_buffer *buf, int full_backref, int for_cow);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int for_cow);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
......@@ -2485,7 +2485,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
u64 root_objectid, u64 owner, u64 offset, int for_cow);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
......@@ -2644,10 +2644,18 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
}
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
++p->slots[0];
if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
return btrfs_next_leaf(root, p);
return 0;
}
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
void btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int update_ref);
struct btrfs_block_rsv *block_rsv, int update_ref,
int for_reloc);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
......
......@@ -101,6 +101,11 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
return -1;
if (ref1->type > ref2->type)
return 1;
/* merging of sequenced refs is not allowed */
if (ref1->seq < ref2->seq)
return -1;
if (ref1->seq > ref2->seq)
return 1;
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
......@@ -150,16 +155,22 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
/*
* find an head entry based on bytenr. This returns the delayed ref
* head if it was able to find one, or NULL if nothing was in that spot
* head if it was able to find one, or NULL if nothing was in that spot.
* If return_bigger is given, the next bigger entry is returned if no exact
* match is found.
*/
static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
u64 bytenr,
struct btrfs_delayed_ref_node **last)
struct btrfs_delayed_ref_node **last,
int return_bigger)
{
struct rb_node *n = root->rb_node;
struct rb_node *n;
struct btrfs_delayed_ref_node *entry;
int cmp;
int cmp = 0;
again:
n = root->rb_node;
entry = NULL;
while (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
WARN_ON(!entry->in_tree);
......@@ -182,6 +193,19 @@ static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
else
return entry;
}
if (entry && return_bigger) {
if (cmp > 0) {
n = rb_next(&entry->rb_node);
if (!n)
n = rb_first(root);
entry = rb_entry(n, struct btrfs_delayed_ref_node,
rb_node);
bytenr = entry->bytenr;
return_bigger = 0;
goto again;
}
return entry;
}
return NULL;
}
......@@ -209,6 +233,24 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
{
struct seq_list *elem;
assert_spin_locked(&delayed_refs->lock);
if (list_empty(&delayed_refs->seq_head))
return 0;
elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list);
if (seq >= elem->seq) {
pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n",
seq, elem->seq, delayed_refs);
return 1;
}
return 0;
}
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 start)
{
......@@ -223,20 +265,8 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
node = rb_first(&delayed_refs->root);
} else {
ref = NULL;
find_ref_head(&delayed_refs->root, start, &ref);
find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
if (ref) {
struct btrfs_delayed_ref_node *tmp;
node = rb_prev(&ref->rb_node);
while (node) {
tmp = rb_entry(node,
struct btrfs_delayed_ref_node,
rb_node);
if (tmp->bytenr < start)
break;
ref = tmp;
node = rb_prev(&ref->rb_node);
}
node = &ref->rb_node;
} else
node = rb_first(&delayed_refs->root);
......@@ -390,7 +420,8 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
* this does all the dirty work in terms of maintaining the correct
* overall modification count.
*/
static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes,
int action, int is_data)
......@@ -437,6 +468,7 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
ref->action = 0;
ref->is_head = 1;
ref->in_tree = 1;
ref->seq = 0;
head_ref = btrfs_delayed_node_to_head(ref);
head_ref->must_insert_reserved = must_insert_reserved;
......@@ -468,14 +500,17 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
/*
* helper to insert a delayed tree ref into the rbtree.
*/
static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action)
u64 ref_root, int level, int action,
int for_cow)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
......@@ -491,14 +526,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
ref->is_head = 0;
ref->in_tree = 1;
if (need_ref_seq(for_cow, ref_root))
seq = inc_delayed_seq(delayed_refs);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
if (parent) {
full_ref->parent = parent;
full_ref->parent = parent;
full_ref->root = ref_root;
if (parent)
ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
} else {
full_ref->root = ref_root;
else
ref->type = BTRFS_TREE_BLOCK_REF_KEY;
}
full_ref->level = level;
trace_btrfs_delayed_tree_ref(ref, full_ref, action);
......@@ -522,15 +560,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
/*
* helper to insert a delayed data ref into the rbtree.
*/
static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, u64 owner, u64 offset,
int action)
int action, int for_cow)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
......@@ -546,14 +586,18 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
ref->is_head = 0;
ref->in_tree = 1;
if (need_ref_seq(for_cow, ref_root))
seq = inc_delayed_seq(delayed_refs);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
if (parent) {
full_ref->parent = parent;
full_ref->parent = parent;
full_ref->root = ref_root;
if (parent)
ref->type = BTRFS_SHARED_DATA_REF_KEY;
} else {
full_ref->root = ref_root;
else
ref->type = BTRFS_EXTENT_DATA_REF_KEY;
}
full_ref->objectid = owner;
full_ref->offset = offset;
......@@ -580,10 +624,12 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
* to make sure the delayed ref is eventually processed before this
* transaction commits.
*/
int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op)
struct btrfs_delayed_extent_op *extent_op,
int for_cow)
{
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
......@@ -610,13 +656,17 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
* insert both the head node and the new ref without dropping
* the spin lock
*/
ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
action, 0);
ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, action, 0);
BUG_ON(ret);
ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
parent, ref_root, level, action);
ret = add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
BUG_ON(ret);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
......@@ -624,11 +674,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
/*
* add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
*/
int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op)
struct btrfs_delayed_extent_op *extent_op,
int for_cow)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
......@@ -655,18 +707,23 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
* insert both the head node and the new ref without dropping
* the spin lock
*/
ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
action, 1);
ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, action, 1);
BUG_ON(ret);
ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
parent, ref_root, owner, offset, action);
ret = add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
BUG_ON(ret);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op)
{
......@@ -683,11 +740,13 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
BUG_ON(ret);
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
......@@ -704,7 +763,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
struct btrfs_delayed_ref_root *delayed_refs;
delayed_refs = &trans->transaction->delayed_refs;
ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
if (ref)
return btrfs_delayed_node_to_head(ref);
return NULL;
......
......@@ -33,6 +33,9 @@ struct btrfs_delayed_ref_node {
/* the size of the extent */
u64 num_bytes;
/* seq number to keep track of insertion order */
u64 seq;
/* ref count on this data structure */
atomic_t refs;
......@@ -98,19 +101,15 @@ struct btrfs_delayed_ref_head {
struct btrfs_delayed_tree_ref {
struct btrfs_delayed_ref_node node;
union {
u64 root;
u64 parent;
};
u64 root;
u64 parent;
int level;
};
struct btrfs_delayed_data_ref {
struct btrfs_delayed_ref_node node;
union {
u64 root;
u64 parent;
};
u64 root;
u64 parent;
u64 objectid;
u64 offset;
};
......@@ -140,6 +139,26 @@ struct btrfs_delayed_ref_root {
int flushing;
u64 run_delayed_start;
/*
* seq number of delayed refs. We need to know if a backref was being
* added before the currently processed ref or afterwards.
*/
u64 seq;
/*
* seq_list holds a list of all seq numbers that are currently being
* added to the list. While walking backrefs (btrfs_find_all_roots,
* qgroups), which might take some time, no newer ref must be processed,
* as it might influence the outcome of the walk.
*/
struct list_head seq_head;
/*
* when the only refs we have in the list must not be processed, we want
* to wait for more refs to show up or for the end of backref walking.
*/
wait_queue_head_t seq_wait;
};
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
......@@ -151,16 +170,21 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
}
}
int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_extent_op *extent_op,
int for_cow);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
struct btrfs_delayed_extent_op *extent_op,
int for_cow);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
......@@ -170,6 +194,60 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
struct seq_list {
struct list_head list;
u64 seq;
};
static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
{
assert_spin_locked(&delayed_refs->lock);
++delayed_refs->seq;
return delayed_refs->seq;
}
static inline void
btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
{
assert_spin_locked(&delayed_refs->lock);
elem->seq = delayed_refs->seq;
list_add_tail(&elem->list, &delayed_refs->seq_head);
}
static inline void
btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
{
spin_lock(&delayed_refs->lock);
list_del(&elem->list);
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
}
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
/*
* delayed refs with a ref_seq > 0 must be held back during backref walking.
* this only applies to items in one of the fs-trees. for_cow items never need
* to be held back, so they won't get a ref_seq number.
*/
static inline int need_ref_seq(int for_cow, u64 rootid)
{
if (for_cow)
return 0;
if (rootid == BTRFS_FS_TREE_OBJECTID)
return 1;
if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.
......
......@@ -1243,7 +1243,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
root->ref_cows = 0;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
BTRFS_TREE_LOG_OBJECTID, NULL,
0, 0, 0, 0);
if (IS_ERR(leaf)) {
kfree(root);
return ERR_CAST(leaf);
......
......@@ -1871,20 +1871,24 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset)
u64 root_objectid, u64 owner, u64 offset, int for_cow)
{
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
root_objectid == BTRFS_TREE_LOG_OBJECTID);
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, (int)owner,
BTRFS_ADD_DELAYED_REF, NULL);
BTRFS_ADD_DELAYED_REF, NULL, for_cow);
} else {
ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, owner, offset,
BTRFS_ADD_DELAYED_REF, NULL);
BTRFS_ADD_DELAYED_REF, NULL, for_cow);
}
return ret;
}
......@@ -2231,6 +2235,28 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
}
/*
* locked_ref is the head node, so we have to go one
* node back for any delayed ref updates
*/
ref = select_delayed_ref(locked_ref);
if (ref && ref->seq &&
btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
/*
* there are still refs with lower seq numbers in the
* process of being added. Don't run this ref yet.
*/
list_del_init(&locked_ref->cluster);
mutex_unlock(&locked_ref->mutex);
locked_ref = NULL;
delayed_refs->num_heads_ready++;
spin_unlock(&delayed_refs->lock);
cond_resched();
spin_lock(&delayed_refs->lock);
continue;
}
/*
* record the must insert reserved flag before we
* drop the spin lock.
......@@ -2241,11 +2267,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
extent_op = locked_ref->extent_op;
locked_ref->extent_op = NULL;
/*
* locked_ref is the head node, so we have to go one
* node back for any delayed ref updates
*/
ref = select_delayed_ref(locked_ref);
if (!ref) {
/* All delayed refs have been processed, Go ahead
* and send the head node to run_one_delayed_ref,
......@@ -2276,7 +2297,12 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ref->in_tree = 0;
rb_erase(&ref->rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
/*
* we modified num_entries, but as we're currently running
* delayed refs, skip
* wake_up(&delayed_refs->seq_wait);
* here.
*/
spin_unlock(&delayed_refs->lock);
ret = run_one_delayed_ref(trans, root, ref, extent_op,
......@@ -2297,6 +2323,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
return count;
}
static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
unsigned long num_refs)
{
struct list_head *first_seq = delayed_refs->seq_head.next;
spin_unlock(&delayed_refs->lock);
pr_debug("waiting for more refs (num %ld, first %p)\n",
num_refs, first_seq);
wait_event(delayed_refs->seq_wait,
num_refs != delayed_refs->num_entries ||
delayed_refs->seq_head.next != first_seq);
pr_debug("done waiting for more refs (num %ld, first %p)\n",
delayed_refs->num_entries, delayed_refs->seq_head.next);
spin_lock(&delayed_refs->lock);
}
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
......@@ -2312,8 +2355,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct list_head cluster;
int ret;
u64 delayed_start;
int run_all = count == (unsigned long)-1;
int run_most = 0;
unsigned long num_refs = 0;
int consider_waiting;
if (root == root->fs_info->extent_root)
root = root->fs_info->tree_root;
......@@ -2325,6 +2371,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
INIT_LIST_HEAD(&cluster);
again:
consider_waiting = 0;
spin_lock(&delayed_refs->lock);
if (count == 0) {
count = delayed_refs->num_entries * 2;
......@@ -2341,11 +2388,35 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
* of refs to process starting at the first one we are able to
* lock
*/
delayed_start = delayed_refs->run_delayed_start;
ret = btrfs_find_ref_cluster(trans, &cluster,
delayed_refs->run_delayed_start);
if (ret)
break;
if (delayed_start >= delayed_refs->run_delayed_start) {
if (consider_waiting == 0) {
/*
* btrfs_find_ref_cluster looped. let's do one
* more cycle. if we don't run any delayed ref
* during that cycle (because we can't because
* all of them are blocked) and if the number of
* refs doesn't change, we avoid busy waiting.
*/
consider_waiting = 1;
num_refs = delayed_refs->num_entries;
} else {
wait_for_more_refs(delayed_refs, num_refs);
/*
* after waiting, things have changed. we
* dropped the lock and someone else might have
* run some refs, built new clusters and so on.
* therefore, we restart staleness detection.
*/
consider_waiting = 0;
}
}
ret = run_clustered_refs(trans, root, &cluster);
BUG_ON(ret < 0);
......@@ -2353,6 +2424,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
if (count == 0)
break;
if (ret || delayed_refs->run_delayed_start == 0) {
/* refs were run, let's reset staleness detection */
consider_waiting = 0;
}
}
if (run_all) {
......@@ -2410,7 +2486,8 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
extent_op->update_key = 0;
extent_op->is_data = is_data ? 1 : 0;
ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
num_bytes, extent_op);
if (ret)
kfree(extent_op);
return ret;
......@@ -2595,7 +2672,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
int full_backref, int inc)
int full_backref, int inc, int for_cow)
{
u64 bytenr;
u64 num_bytes;
......@@ -2608,7 +2685,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
int level;
int ret = 0;
int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
u64, u64, u64, u64, u64, u64);
u64, u64, u64, u64, u64, u64, int);
ref_root = btrfs_header_owner(buf);
nritems = btrfs_header_nritems(buf);
......@@ -2645,14 +2722,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
key.offset -= btrfs_file_extent_offset(buf, fi);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, key.objectid,
key.offset);
key.offset, for_cow);
if (ret)
goto fail;
} else {
bytenr = btrfs_node_blockptr(buf, i);
num_bytes = btrfs_level_size(root, level - 1);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, level - 1, 0);
parent, ref_root, level - 1, 0,
for_cow);
if (ret)
goto fail;
}
......@@ -2664,15 +2742,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
}
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref)
struct extent_buffer *buf, int full_backref, int for_cow)
{
return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
}
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref)
struct extent_buffer *buf, int full_backref, int for_cow)
{
return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
}
static int write_one_cache_group(struct btrfs_trans_handle *trans,
......@@ -4954,6 +5032,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
rb_erase(&head->node.rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
/*
* we don't take a ref on the node because we're removing it from the
......@@ -4981,16 +5061,17 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref)
u64 parent, int last_ref, int for_cow)
{
struct btrfs_block_group_cache *cache = NULL;
int ret;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
parent, root->root_key.objectid,
btrfs_header_level(buf),
BTRFS_DROP_DELAYED_REF, NULL);
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
buf->start, buf->len,
parent, root->root_key.objectid,
btrfs_header_level(buf),
BTRFS_DROP_DELAYED_REF, NULL, for_cow);
BUG_ON(ret);
}
......@@ -5025,12 +5106,12 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_put_block_group(cache);
}
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset)
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int for_cow)
{
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
/*
* tree log blocks never actually go into the extent allocation
......@@ -5042,14 +5123,17 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_pin_extent(root, bytenr, num_bytes, 1);
ret = 0;
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, (int)owner,
BTRFS_DROP_DELAYED_REF, NULL);
BTRFS_DROP_DELAYED_REF, NULL, for_cow);
BUG_ON(ret);
} else {
ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
parent, root_objectid, owner,
offset, BTRFS_DROP_DELAYED_REF, NULL);
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, owner,
offset, BTRFS_DROP_DELAYED_REF,
NULL, for_cow);
BUG_ON(ret);
}
return ret;
......@@ -5877,9 +5961,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
0, root_objectid, owner, offset,
BTRFS_ADD_DELAYED_EXTENT, NULL);
ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
ins->offset, 0,
root_objectid, owner, offset,
BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
return ret;
}
......@@ -6049,7 +6134,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size)
u64 hint, u64 empty_size, int for_cow)
{
struct btrfs_key ins;
struct btrfs_block_rsv *block_rsv;
......@@ -6093,10 +6178,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
extent_op->update_flags = 1;
extent_op->is_data = 0;
ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
ins.objectid,
ins.offset, parent, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
extent_op);
extent_op, for_cow);
BUG_ON(ret);
}
return buf;
......@@ -6113,6 +6199,7 @@ struct walk_control {
int keep_locks;
int reada_slot;
int reada_count;
int for_reloc;
};
#define DROP_REFERENCE 1
......@@ -6251,9 +6338,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
/* wc->stage == UPDATE_BACKREF */
if (!(wc->flags[level] & flag)) {
BUG_ON(!path->locks[level]);
ret = btrfs_inc_ref(trans, root, eb, 1);
ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
BUG_ON(ret);
ret = btrfs_dec_ref(trans, root, eb, 0);
ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
BUG_ON(ret);
ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
eb->len, flag, 0);
......@@ -6397,7 +6484,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
}
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
root->root_key.objectid, level - 1, 0);
root->root_key.objectid, level - 1, 0, 0);
BUG_ON(ret);
}
btrfs_tree_unlock(next);
......@@ -6471,9 +6558,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (wc->refs[level] == 1) {
if (level == 0) {
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
ret = btrfs_dec_ref(trans, root, eb, 1);
ret = btrfs_dec_ref(trans, root, eb, 1,
wc->for_reloc);
else
ret = btrfs_dec_ref(trans, root, eb, 0);
ret = btrfs_dec_ref(trans, root, eb, 0,
wc->for_reloc);
BUG_ON(ret);
}
/* make block locked assertion in clean_tree_block happy */
......@@ -6500,7 +6589,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_owner(path->nodes[level + 1]));
}
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);
out:
wc->refs[level] = 0;
wc->flags[level] = 0;
......@@ -6584,7 +6673,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
* blocks are properly updated.
*/
void btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int update_ref)
struct btrfs_block_rsv *block_rsv, int update_ref,
int for_reloc)
{
struct btrfs_path *path;
struct btrfs_trans_handle *trans;
......@@ -6672,6 +6762,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
wc->stage = DROP_REFERENCE;
wc->update_ref = update_ref;
wc->keep_locks = 0;
wc->for_reloc = for_reloc;
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
......@@ -6756,6 +6847,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
* drop subtree rooted at tree block 'node'.
*
* NOTE: this function will unlock and release tree block 'node'
* only used by relocation code
*/
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
......@@ -6800,6 +6892,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
wc->stage = DROP_REFERENCE;
wc->update_ref = 0;
wc->keep_locks = 1;
wc->for_reloc = 1;
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
......
......@@ -3579,6 +3579,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
atomic_set(&eb->blocking_writers, 0);
atomic_set(&eb->spinning_readers, 0);
atomic_set(&eb->spinning_writers, 0);
eb->lock_nested = 0;
init_waitqueue_head(&eb->write_lock_wq);
init_waitqueue_head(&eb->read_lock_wq);
......
......@@ -129,6 +129,7 @@ struct extent_buffer {
struct list_head leak_list;
struct rcu_head rcu_head;
atomic_t refs;
pid_t lock_owner;
/* count of read lock holders on the extent buffer */
atomic_t write_locks;
......@@ -137,6 +138,7 @@ struct extent_buffer {
atomic_t blocking_readers;
atomic_t spinning_readers;
atomic_t spinning_writers;
int lock_nested;
/* protects write locks */
rwlock_t lock;
......
......@@ -678,7 +678,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
new_key.objectid,
start - extent_offset);
start - extent_offset, 0);
BUG_ON(ret);
*hint_byte = disk_bytenr;
}
......@@ -753,7 +753,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
key.objectid, key.offset -
extent_offset);
extent_offset, 0);
BUG_ON(ret);
inode_sub_bytes(inode,
extent_end - key.offset);
......@@ -962,7 +962,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
root->root_key.objectid,
ino, orig_offset);
ino, orig_offset, 0);
BUG_ON(ret);
if (split == start) {
......@@ -989,7 +989,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
ino, orig_offset, 0);
BUG_ON(ret);
}
other_start = 0;
......@@ -1006,7 +1006,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
ino, orig_offset, 0);
BUG_ON(ret);
}
if (del_nr == 0) {
......
......@@ -3179,7 +3179,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0,
btrfs_header_owner(leaf),
ino, extent_offset);
ino, extent_offset, 0);
BUG_ON(ret);
}
......@@ -5121,7 +5121,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
}
flush_dcache_page(page);
} else if (create && PageUptodate(page)) {
WARN_ON(1);
BUG();
if (!trans) {
kunmap(page);
free_extent_map(em);
......
......@@ -368,7 +368,7 @@ static noinline int create_subvol(struct btrfs_root *root,
return PTR_ERR(trans);
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
0, objectid, NULL, 0, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
goto fail;
......@@ -2468,7 +2468,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
disko, diskl, 0,
root->root_key.objectid,
btrfs_ino(inode),
new_key.offset - datao);
new_key.offset - datao,
0);
BUG_ON(ret);
}
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
......@@ -3018,7 +3019,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
{
int ret = 0;
int size;
u64 extent_offset;
u64 extent_item_pos;
struct btrfs_ioctl_logical_ino_args *loi;
struct btrfs_data_container *inodes = NULL;
struct btrfs_path *path = NULL;
......@@ -3049,15 +3050,17 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
}
ret = extent_from_logical(root->fs_info, loi->logical, path, &key);
btrfs_release_path(path);
if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
ret = -ENOENT;
if (ret < 0)
goto out;
extent_offset = loi->logical - key.objectid;
extent_item_pos = loi->logical - key.objectid;
ret = iterate_extent_inodes(root->fs_info, path, key.objectid,
extent_offset, build_ino_list, inodes);
extent_item_pos, build_ino_list,
inodes);
if (ret < 0)
goto out;
......
......@@ -33,6 +33,14 @@ void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
*/
void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
if (eb->lock_nested) {
read_lock(&eb->lock);
if (eb->lock_nested && current->pid == eb->lock_owner) {
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
}
if (rw == BTRFS_WRITE_LOCK) {
if (atomic_read(&eb->blocking_writers) == 0) {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
......@@ -57,6 +65,14 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
*/
void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
if (eb->lock_nested) {
read_lock(&eb->lock);
if (&eb->lock_nested && current->pid == eb->lock_owner) {
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
}
if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
write_lock(&eb->lock);
......@@ -81,12 +97,25 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
void btrfs_tree_read_lock(struct extent_buffer *eb)
{
again:
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers) &&
current->pid == eb->lock_owner) {
/*
* This extent is already write-locked by our thread. We allow
* an additional read lock to be added because it's for the same
* thread. btrfs_find_all_roots() depends on this as it may be
* called on a partly (write-)locked tree.
*/
BUG_ON(eb->lock_nested);
eb->lock_nested = 1;
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers)) {
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
atomic_read(&eb->blocking_writers) == 0);
goto again;
}
atomic_inc(&eb->read_locks);
......@@ -129,6 +158,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
}
atomic_inc(&eb->write_locks);
atomic_inc(&eb->spinning_writers);
eb->lock_owner = current->pid;
return 1;
}
......@@ -137,6 +167,15 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
*/
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
if (eb->lock_nested) {
read_lock(&eb->lock);
if (eb->lock_nested && current->pid == eb->lock_owner) {
eb->lock_nested = 0;
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
......@@ -149,6 +188,15 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
*/
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
{
if (eb->lock_nested) {
read_lock(&eb->lock);
if (eb->lock_nested && current->pid == eb->lock_owner) {
eb->lock_nested = 0;
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
if (atomic_dec_and_test(&eb->blocking_readers))
......@@ -181,6 +229,7 @@ int btrfs_tree_lock(struct extent_buffer *eb)
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
atomic_inc(&eb->write_locks);
eb->lock_owner = current->pid;
return 0;
}
......
......@@ -1604,12 +1604,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
num_bytes, parent,
btrfs_header_owner(leaf),
key.objectid, key.offset);
key.objectid, key.offset, 1);
BUG_ON(ret);
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
parent, btrfs_header_owner(leaf),
key.objectid, key.offset);
key.objectid, key.offset, 1);
BUG_ON(ret);
}
if (dirty)
......@@ -1778,21 +1778,23 @@ int replace_path(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
path->nodes[level]->start,
src->root_key.objectid, level - 1, 0);
src->root_key.objectid, level - 1, 0,
1);
BUG_ON(ret);
ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
0, dest->root_key.objectid, level - 1,
0);
0, 1);
BUG_ON(ret);
ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
path->nodes[level]->start,
src->root_key.objectid, level - 1, 0);
src->root_key.objectid, level - 1, 0,
1);
BUG_ON(ret);
ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
0, dest->root_key.objectid, level - 1,
0);
0, 1);
BUG_ON(ret);
btrfs_unlock_up_safe(path, 0);
......@@ -2244,7 +2246,7 @@ int merge_reloc_roots(struct reloc_control *rc)
} else {
list_del_init(&reloc_root->root_list);
}
btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0);
btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
}
if (found) {
......@@ -2558,7 +2560,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
node->eb->start, blocksize,
upper->eb->start,
btrfs_header_owner(upper->eb),
node->level, 0);
node->level, 0, 1);
BUG_ON(ret);
ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
......
......@@ -309,7 +309,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
u8 ref_level;
unsigned long ptr = 0;
const int bufsize = 4096;
u64 extent_offset;
u64 extent_item_pos;
path = btrfs_alloc_path();
......@@ -329,12 +329,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
if (ret < 0)
goto out;
extent_offset = swarn.logical - found_key.objectid;
extent_item_pos = swarn.logical - found_key.objectid;
swarn.extent_item_size = found_key.offset;
eb = path->nodes[0];
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
item_size = btrfs_item_size_nr(eb, path->slots[0]);
btrfs_release_path(path);
if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
do {
......@@ -351,7 +352,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
} else {
swarn.path = path;
iterate_extent_inodes(fs_info, path, found_key.objectid,
extent_offset,
extent_item_pos,
scrub_print_warning_inode, &swarn);
}
......
......@@ -36,6 +36,8 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
WARN_ON(atomic_read(&transaction->use_count) == 0);
if (atomic_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(transaction->delayed_refs.root.rb_node);
WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
......@@ -108,8 +110,11 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
cur_trans->delayed_refs.seq = 1;
init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
......@@ -1386,9 +1391,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
if (btrfs_header_backref_rev(root->node) <
BTRFS_MIXED_BACKREF_REV)
btrfs_drop_snapshot(root, NULL, 0);
btrfs_drop_snapshot(root, NULL, 0, 0);
else
btrfs_drop_snapshot(root, NULL, 1);
btrfs_drop_snapshot(root, NULL, 1, 0);
}
return 0;
}
......@@ -589,7 +589,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root,
ins.objectid, ins.offset,
0, root->root_key.objectid,
key->objectid, offset);
key->objectid, offset, 0);
BUG_ON(ret);
} else {
/*
......
/*
* Copyright (C) 2011 STRATO AG
* written by Arne Jansen <sensille@gmx.net>
* Distributed under the GNU GPL license version 2.
*/
#include <linux/slab.h>
#include <linux/module.h>
#include "ulist.h"
/*
* ulist is a generic data structure to hold a collection of unique u64
* values. The only operations it supports is adding to the list and
* enumerating it.
* It is possible to store an auxiliary value along with the key.
*
* The implementation is preliminary and can probably be sped up
* significantly. A first step would be to store the values in an rbtree
* as soon as ULIST_SIZE is exceeded.
*
* A sample usage for ulists is the enumeration of directed graphs without
* visiting a node twice. The pseudo-code could look like this:
*
* ulist = ulist_alloc();
* ulist_add(ulist, root);
* elem = NULL;
*
* while ((elem = ulist_next(ulist, elem)) {
* for (all child nodes n in elem)
* ulist_add(ulist, n);
* do something useful with the node;
* }
* ulist_free(ulist);
*
* This assumes the graph nodes are adressable by u64. This stems from the
* usage for tree enumeration in btrfs, where the logical addresses are
* 64 bit.
*
* It is also useful for tree enumeration which could be done elegantly
* recursively, but is not possible due to kernel stack limitations. The
* loop would be similar to the above.
*/
/**
* ulist_init - freshly initialize a ulist
* @ulist: the ulist to initialize
*
* Note: don't use this function to init an already used ulist, use
* ulist_reinit instead.
*/
void ulist_init(struct ulist *ulist)
{
ulist->nnodes = 0;
ulist->nodes = ulist->int_nodes;
ulist->nodes_alloced = ULIST_SIZE;
}
EXPORT_SYMBOL(ulist_init);
/**
* ulist_fini - free up additionally allocated memory for the ulist
* @ulist: the ulist from which to free the additional memory
*
* This is useful in cases where the base 'struct ulist' has been statically
* allocated.
*/
void ulist_fini(struct ulist *ulist)
{
/*
* The first ULIST_SIZE elements are stored inline in struct ulist.
* Only if more elements are alocated they need to be freed.
*/
if (ulist->nodes_alloced > ULIST_SIZE)
kfree(ulist->nodes);
ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */
}
EXPORT_SYMBOL(ulist_fini);
/**
* ulist_reinit - prepare a ulist for reuse
* @ulist: ulist to be reused
*
* Free up all additional memory allocated for the list elements and reinit
* the ulist.
*/
void ulist_reinit(struct ulist *ulist)
{
ulist_fini(ulist);
ulist_init(ulist);
}
EXPORT_SYMBOL(ulist_reinit);
/**
* ulist_alloc - dynamically allocate a ulist
* @gfp_mask: allocation flags to for base allocation
*
* The allocated ulist will be returned in an initialized state.
*/
struct ulist *ulist_alloc(unsigned long gfp_mask)
{
struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
if (!ulist)
return NULL;
ulist_init(ulist);
return ulist;
}
EXPORT_SYMBOL(ulist_alloc);
/**
* ulist_free - free dynamically allocated ulist
* @ulist: ulist to free
*
* It is not necessary to call ulist_fini before.
*/
void ulist_free(struct ulist *ulist)
{
if (!ulist)
return;
ulist_fini(ulist);
kfree(ulist);
}
EXPORT_SYMBOL(ulist_free);
/**
* ulist_add - add an element to the ulist
* @ulist: ulist to add the element to
* @val: value to add to ulist
* @aux: auxiliary value to store along with val
* @gfp_mask: flags to use for allocation
*
* Note: locking must be provided by the caller. In case of rwlocks write
* locking is needed
*
* Add an element to a ulist. The @val will only be added if it doesn't
* already exist. If it is added, the auxiliary value @aux is stored along with
* it. In case @val already exists in the ulist, @aux is ignored, even if
* it differs from the already stored value.
*
* ulist_add returns 0 if @val already exists in ulist and 1 if @val has been
* inserted.
* In case of allocation failure -ENOMEM is returned and the ulist stays
* unaltered.
*/
int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
unsigned long gfp_mask)
{
int i;
for (i = 0; i < ulist->nnodes; ++i) {
if (ulist->nodes[i].val == val)
return 0;
}
if (ulist->nnodes >= ulist->nodes_alloced) {
u64 new_alloced = ulist->nodes_alloced + 128;
struct ulist_node *new_nodes;
void *old = NULL;
/*
* if nodes_alloced == ULIST_SIZE no memory has been allocated
* yet, so pass NULL to krealloc
*/
if (ulist->nodes_alloced > ULIST_SIZE)
old = ulist->nodes;
new_nodes = krealloc(old, sizeof(*new_nodes) * new_alloced,
gfp_mask);
if (!new_nodes)
return -ENOMEM;
if (!old)
memcpy(new_nodes, ulist->int_nodes,
sizeof(ulist->int_nodes));
ulist->nodes = new_nodes;
ulist->nodes_alloced = new_alloced;
}
ulist->nodes[ulist->nnodes].val = val;
ulist->nodes[ulist->nnodes].aux = aux;
++ulist->nnodes;
return 1;
}
EXPORT_SYMBOL(ulist_add);
/**
* ulist_next - iterate ulist
* @ulist: ulist to iterate
* @prev: previously returned element or %NULL to start iteration
*
* Note: locking must be provided by the caller. In case of rwlocks only read
* locking is needed
*
* This function is used to iterate an ulist. The iteration is started with
* @prev = %NULL. It returns the next element from the ulist or %NULL when the
* end is reached. No guarantee is made with respect to the order in which
* the elements are returned. They might neither be returned in order of
* addition nor in ascending order.
* It is allowed to call ulist_add during an enumeration. Newly added items
* are guaranteed to show up in the running enumeration.
*/
struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev)
{
int next;
if (ulist->nnodes == 0)
return NULL;
if (!prev)
return &ulist->nodes[0];
next = (prev - ulist->nodes) + 1;
if (next < 0 || next >= ulist->nnodes)
return NULL;
return &ulist->nodes[next];
}
EXPORT_SYMBOL(ulist_next);
/*
* Copyright (C) 2011 STRATO AG
* written by Arne Jansen <sensille@gmx.net>
* Distributed under the GNU GPL license version 2.
*
*/
#ifndef __ULIST__
#define __ULIST__
/*
* ulist is a generic data structure to hold a collection of unique u64
* values. The only operations it supports is adding to the list and
* enumerating it.
* It is possible to store an auxiliary value along with the key.
*
* The implementation is preliminary and can probably be sped up
* significantly. A first step would be to store the values in an rbtree
* as soon as ULIST_SIZE is exceeded.
*/
/*
* number of elements statically allocated inside struct ulist
*/
#define ULIST_SIZE 16
/*
* element of the list
*/
struct ulist_node {
u64 val; /* value to store */
unsigned long aux; /* auxiliary value saved along with the val */
};
struct ulist {
/*
* number of elements stored in list
*/
unsigned long nnodes;
/*
* number of nodes we already have room for
*/
unsigned long nodes_alloced;
/*
* pointer to the array storing the elements. The first ULIST_SIZE
* elements are stored inline. In this case the it points to int_nodes.
* After exceeding ULIST_SIZE, dynamic memory is allocated.
*/
struct ulist_node *nodes;
/*
* inline storage space for the first ULIST_SIZE entries
*/
struct ulist_node int_nodes[ULIST_SIZE];
};
void ulist_init(struct ulist *ulist);
void ulist_fini(struct ulist *ulist);
void ulist_reinit(struct ulist *ulist);
struct ulist *ulist_alloc(unsigned long gfp_mask);
void ulist_free(struct ulist *ulist);
int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
unsigned long gfp_mask);
struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev);
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册