diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2bc34408872d70a86c8bdfe065d17286969392c9..a17d9991c333db4f5fa8602cb740d8f4338b97df 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -360,6 +360,44 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) write_unlock(&fs_info->tree_mod_log_lock); } +/* + * Increment the upper half of tree_mod_seq, set lower half zero. + * + * Must be called with fs_info->tree_mod_seq_lock held. + */ +static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info) +{ + u64 seq = atomic64_read(&fs_info->tree_mod_seq); + seq &= 0xffffffff00000000ull; + seq += 1ull << 32; + atomic64_set(&fs_info->tree_mod_seq, seq); + return seq; +} + +/* + * Increment the lower half of tree_mod_seq. + * + * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers + * are generated should not technically require a spin lock here. (Rationale: + * incrementing the minor while incrementing the major seq number is between its + * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it + * just returns a unique sequence number as usual.) We have decided to leave + * that requirement in here and rethink it once we notice it really imposes a + * problem on some workload. + */ +static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info) +{ + return atomic64_inc_return(&fs_info->tree_mod_seq); +} + +/* + * return the last minor in the previous major tree_mod_seq number + */ +u64 btrfs_tree_mod_seq_prev(u64 seq) +{ + return (seq & 0xffffffff00000000ull) - 1ull; +} + /* * This adds a new blocker to the tree mod log's blocker list if the @elem * passed does not already have a sequence number set. So when a caller expects @@ -376,10 +414,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, tree_mod_log_write_lock(fs_info); spin_lock(&fs_info->tree_mod_seq_lock); if (!elem->seq) { - elem->seq = btrfs_inc_tree_mod_seq(fs_info); + elem->seq = btrfs_inc_tree_mod_seq_major(fs_info); list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); } - seq = btrfs_inc_tree_mod_seq(fs_info); + seq = btrfs_inc_tree_mod_seq_minor(fs_info); spin_unlock(&fs_info->tree_mod_seq_lock); tree_mod_log_write_unlock(fs_info); @@ -524,7 +562,10 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, if (!tm) return -ENOMEM; - tm->seq = btrfs_inc_tree_mod_seq(fs_info); + spin_lock(&fs_info->tree_mod_seq_lock); + tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); + spin_unlock(&fs_info->tree_mod_seq_lock); + return tm->seq; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 37c4da3403d0655ba9dcc2c38bb7dbea8e2b6ab1..2c48f52aba405e15a82473008dfa2188938bca40 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1422,7 +1422,7 @@ struct btrfs_fs_info { /* this protects tree_mod_seq_list */ spinlock_t tree_mod_seq_lock; - atomic_t tree_mod_seq; + atomic64_t tree_mod_seq; struct list_head tree_mod_seq_list; struct seq_list tree_mod_seq_elem; @@ -3332,10 +3332,7 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem); void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem); -static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) -{ - return atomic_inc_return(&fs_info->tree_mod_seq); -} +u64 btrfs_tree_mod_seq_prev(u64 seq); int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); /* root-item.c */ diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 116abec7a29c20e801367403c0328ba773ef7915..c219463fb1fde9ede3cbe9d53622c8659b1545b0 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -361,8 +361,10 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, elem = list_first_entry(&fs_info->tree_mod_seq_list, struct seq_list, list); if (seq >= elem->seq) { - pr_debug("holding back delayed_ref %llu, lowest is " - "%llu (%p)\n", seq, elem->seq, delayed_refs); + pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n", + (u32)(seq >> 32), (u32)seq, + (u32)(elem->seq >> 32), (u32)elem->seq, + delayed_refs); ret = 1; } } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e4488b57a7aef1e435a57cc2bbb4ca28e2665f82..92c44ed78de11012b19b462b6e339f34a9f5c915 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2157,7 +2157,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->defrag_running, 0); - atomic_set(&fs_info->tree_mod_seq, 0); + atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; fs_info->max_inline = 8192 * 1024; fs_info->metadata_ratio = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f8a5652b0c4372b52dda46ed3906721bf1220c9b..1cae6631b3d856a68ee1ca3a51d886ab5c425c47 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2541,9 +2541,10 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, !trans->delayed_ref_elem.seq) { /* list without seq or seq without list */ btrfs_err(fs_info, - "qgroup accounting update error, list is%s empty, seq is %llu", + "qgroup accounting update error, list is%s empty, seq is %#x.%x", list_empty(&trans->qgroup_ref_list) ? "" : " not", - trans->delayed_ref_elem.seq); + (u32)(trans->delayed_ref_elem.seq >> 32), + (u32)trans->delayed_ref_elem.seq); BUG(); } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index f175471da8829ffd06800bcf0fbfe51af5cccf4b..e5c56238b6c67f5aa466b9e87601c4fb82bfc817 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1242,9 +1242,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, case BTRFS_ADD_DELAYED_REF: case BTRFS_ADD_DELAYED_EXTENT: sgn = 1; + seq = btrfs_tree_mod_seq_prev(node->seq); break; case BTRFS_DROP_DELAYED_REF: sgn = -1; + seq = node->seq; break; case BTRFS_UPDATE_DELAYED_HEAD: return 0; @@ -1254,14 +1256,14 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, /* * the delayed ref sequence number we pass depends on the direction of - * the operation. for add operations, we pass (node->seq - 1) to skip + * the operation. for add operations, we pass + * tree_mod_log_prev_seq(node->seq) to skip * the delayed ref's current sequence number, because we need the state * of the tree before the add operation. for delete operations, we pass * (node->seq) to include the delayed ref's current sequence number, * because we need the state of the tree after the delete operation. */ - ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, - sgn > 0 ? node->seq - 1 : node->seq, &roots); + ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots); if (ret < 0) return ret; @@ -1772,8 +1774,9 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) { if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) return; - printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n", + pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n", trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", - trans->delayed_ref_elem.seq); + (u32)(trans->delayed_ref_elem.seq >> 32), + (u32)trans->delayed_ref_elem.seq); BUG(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 258fcebc7ccf8f8ec3a7587ee32188fd53c50864..18d6fb7be26571a794c2c49e74d31faf97b9aa92 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -162,7 +162,7 @@ static noinline int join_transaction(struct btrfs_root *root, int type) if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when " "creating a fresh transaction\n"); - atomic_set(&fs_info->tree_mod_seq, 0); + atomic64_set(&fs_info->tree_mod_seq, 0); spin_lock_init(&cur_trans->commit_lock); spin_lock_init(&cur_trans->delayed_refs.lock);