提交 bb721703 编写于 作者: C Chris Mason

Btrfs: reduce CPU contention while waiting for delayed extent operations

We batch up operations to the extent allocation tree, which allows
us to deal with the recursive nature of using the extent allocation
tree to allocate extents to the extent allocation tree.

It also provides a mechanism to sort and collect extent
operations, which makes it much more efficient to record extents
that are close together.

The delayed extent operations must all be finished before the
running transaction commits, so we have code to make sure and run a few
of the batched operations when closing our transaction handles.

This creates a great deal of contention for the locks in the
delayed extent operation tree, and also contention for the lock on the
extent allocation tree itself.  All the extra contention just slows
down the operations and doesn't get things done any faster.

This commit changes things to use a wait queue instead.  As procs
want to run the delayed operations, one of them races in and gets
permission to hit the tree, and the others step back and wait for
progress to be made.
Signed-off-by: NChris Mason <chris.mason@fusionio.com>
上级 242e18c7
......@@ -131,6 +131,15 @@ struct btrfs_delayed_ref_root {
/* total number of head nodes ready for processing */
unsigned long num_heads_ready;
/*
* bumped when someone is making progress on the delayed
* refs, so that other procs know they are just adding to
* contention intead of helping
*/
atomic_t procs_running_refs;
atomic_t ref_seq;
wait_queue_head_t wait;
/*
* set when the tree is flushing before a transaction commit,
* used by the throttling code to decide if new updates need
......
......@@ -2438,6 +2438,16 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
return ret;
}
static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
int count)
{
int val = atomic_read(&delayed_refs->ref_seq);
if (val < seq || val >= seq + count)
return 1;
return 0;
}
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
......@@ -2472,6 +2482,44 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
INIT_LIST_HEAD(&cluster);
if (count == 0) {
count = delayed_refs->num_entries * 2;
run_most = 1;
}
if (!run_all && !run_most) {
int old;
int seq = atomic_read(&delayed_refs->ref_seq);
progress:
old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
if (old) {
DEFINE_WAIT(__wait);
if (delayed_refs->num_entries < 16348)
return 0;
prepare_to_wait(&delayed_refs->wait, &__wait,
TASK_UNINTERRUPTIBLE);
old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
if (old) {
schedule();
finish_wait(&delayed_refs->wait, &__wait);
if (!refs_newer(delayed_refs, seq, 256))
goto progress;
else
return 0;
} else {
finish_wait(&delayed_refs->wait, &__wait);
goto again;
}
}
} else {
atomic_inc(&delayed_refs->procs_running_refs);
}
again:
loops = 0;
spin_lock(&delayed_refs->lock);
......@@ -2480,10 +2528,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif
if (count == 0) {
count = delayed_refs->num_entries * 2;
run_most = 1;
}
while (1) {
if (!(run_all || run_most) &&
delayed_refs->num_heads_ready < 64)
......@@ -2505,9 +2549,12 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
if (ret < 0) {
spin_unlock(&delayed_refs->lock);
btrfs_abort_transaction(trans, root, ret);
atomic_dec(&delayed_refs->procs_running_refs);
return ret;
}
atomic_add(ret, &delayed_refs->ref_seq);
count -= min_t(unsigned long, ret, count);
if (count == 0)
......@@ -2576,6 +2623,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
goto again;
}
out:
atomic_dec(&delayed_refs->procs_running_refs);
smp_mb();
if (waitqueue_active(&delayed_refs->wait))
wake_up(&delayed_refs->wait);
spin_unlock(&delayed_refs->lock);
assert_qgroups_uptodate(trans);
return 0;
......
......@@ -156,6 +156,9 @@ static noinline int join_transaction(struct btrfs_root *root, int type)
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0);
atomic_set(&cur_trans->delayed_refs.ref_seq, 0);
init_waitqueue_head(&cur_trans->delayed_refs.wait);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
......@@ -577,7 +580,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, root);
while (count < 2) {
while (count < 1) {
unsigned long cur = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (cur &&
......@@ -589,6 +592,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
}
count++;
}
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册