提交 b4a620ff 编写于 作者: C Coly Li 提交者: Yang Yingliang

make bch_btree_check() to be multiple threads

mainline inclusion
from mainline-v5.7-rc1
commit 8e710227
category: performance
bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=327
CVE: NA

When registering a cache device, bch_btree_check() is called to check
all btree nodes, to make sure the btree is consistent and not
corrupted.

bch_btree_check() is recursively executed in a single thread, when there
are a lot of data cached and the btree is huge, it may take very long
time to check all the btree nodes. In my testing, I observed it took
around 50 minutes to finish bch_btree_check().

When checking the bcache btree nodes, the cache set is not running yet,
and indeed the whole tree is in read-only state, it is safe to create
multiple threads to check the btree in parallel.

This patch tries to create multiple threads, and each thread tries to
one-by-one check the sub-tree indexed by a key from the btree root node.
The parallel thread number depends on how many keys in the btree root
node. At most BCH_BTR_CHKTHREAD_MAX (64) threads can be created, but in
practice is should be min(cpu-number/2, root-node-keys-number).
Signed-off-by: NColy Li <colyli@suse.de>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: Nqinghaixiang <xuweiqhx@163.com>
Signed-off-by: NXu Wei <xuwei56@huawei.com>
Acked-by: NXie XiuQi <xiexiuqi@huawei.com>
Reviewed-by: NLi Ruilin <liruilin4@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 b278db77
...@@ -101,65 +101,6 @@ ...@@ -101,65 +101,6 @@
#define insert_lock(s, b) ((b)->level <= (s)->lock) #define insert_lock(s, b) ((b)->level <= (s)->lock)
/*
* These macros are for recursing down the btree - they handle the details of
* locking and looking up nodes in the cache for you. They're best treated as
* mere syntax when reading code that uses them.
*
* op->lock determines whether we take a read or a write lock at a given depth.
* If you've got a read lock and find that you need a write lock (i.e. you're
* going to have to split), set op->lock and return -EINTR; btree_root() will
* call you again and you'll have the correct lock.
*/
/**
* btree - recurse down the btree on a specified key
* @fn: function to call, which will be passed the child node
* @key: key to recurse on
* @b: parent btree node
* @op: pointer to struct btree_op
*/
#define btree(fn, key, b, op, ...) \
({ \
int _r, l = (b)->level - 1; \
bool _w = l <= (op)->lock; \
struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
_w, b); \
if (!IS_ERR(_child)) { \
_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
rw_unlock(_w, _child); \
} else \
_r = PTR_ERR(_child); \
_r; \
})
/**
* btree_root - call a function on the root of the btree
* @fn: function to call, which will be passed the child node
* @c: cache set
* @op: pointer to struct btree_op
*/
#define btree_root(fn, c, op, ...) \
({ \
int _r = -EINTR; \
do { \
struct btree *_b = (c)->root; \
bool _w = insert_lock(op, _b); \
rw_lock(_w, _b, _b->level); \
if (_b == (c)->root && \
_w == insert_lock(op, _b)) { \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
} \
rw_unlock(_w, _b); \
bch_cannibalize_unlock(c); \
if (_r == -EINTR) \
schedule(); \
} while (_r == -EINTR); \
\
finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
_r; \
})
static inline struct bset *write_block(struct btree *b) static inline struct bset *write_block(struct btree *b)
{ {
return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c); return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c);
...@@ -1949,13 +1890,170 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) ...@@ -1949,13 +1890,170 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
return ret; return ret;
} }
static int bch_btree_check_thread(void *arg)
{
int ret;
struct btree_check_info *info = arg;
struct btree_check_state *check_state = info->state;
struct cache_set *c = check_state->c;
struct btree_iter iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
k = p = NULL;
cur_idx = prev_idx = 0;
ret = 0;
/* root node keys are checked before thread created */
bch_btree_iter_init(&c->root->keys, &iter, NULL);
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
WARN_ON(!k);
p = k;
while (k) {
/*
* Fetch a root node key index, skip the keys which
* should be fetched by other threads, then check the
* sub-tree indexed by the fetched key.
*/
spin_lock(&check_state->idx_lock);
cur_idx = check_state->key_idx;
check_state->key_idx++;
spin_unlock(&check_state->idx_lock);
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
k = bch_btree_iter_next_filter(&iter,
&c->root->keys,
bch_ptr_bad);
if (k)
p = k;
else {
/*
* No more keys to check in root node,
* current checking threads are enough,
* stop creating more.
*/
atomic_set(&check_state->enough, 1);
/* Update check_state->enough earlier */
smp_mb();
goto out;
}
skip_nr--;
cond_resched();
}
if (p) {
struct btree_op op;
btree_node_prefetch(c->root, p);
c->gc_stats.nodes++;
bch_btree_op_init(&op, 0);
ret = btree(check_recurse, p, c->root, &op);
if (ret)
goto out;
}
p = NULL;
prev_idx = cur_idx;
cond_resched();
}
out:
info->result = ret;
/* update check_state->started among all CPUs */
smp_mb();
if (atomic_dec_and_test(&check_state->started))
wake_up(&check_state->wait);
return ret;
}
static int bch_btree_chkthread_nr(void)
{
int n = num_online_cpus() / 2;
if (n == 0)
n = 1;
else if (n > BCH_BTR_CHKTHREAD_MAX)
n = BCH_BTR_CHKTHREAD_MAX;
return n;
}
int bch_btree_check(struct cache_set *c) int bch_btree_check(struct cache_set *c)
{ {
struct btree_op op; int ret = 0;
int i;
struct bkey *k = NULL;
struct btree_iter iter;
struct btree_check_state *check_state;
char name[32];
bch_btree_op_init(&op, SHRT_MAX); /* check and mark root node keys */
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
bch_initial_mark_key(c, c->root->level, k);
bch_initial_mark_key(c, c->root->level + 1, &c->root->key);
if (c->root->level == 0)
return 0;
check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
if (!check_state)
return -ENOMEM;
return btree_root(check_recurse, c, &op); check_state->c = c;
check_state->total_threads = bch_btree_chkthread_nr();
check_state->key_idx = 0;
spin_lock_init(&check_state->idx_lock);
atomic_set(&check_state->started, 0);
atomic_set(&check_state->enough, 0);
init_waitqueue_head(&check_state->wait);
/*
* Run multiple threads to check btree nodes in parallel,
* if check_state->enough is non-zero, it means current
* running check threads are enough, unncessary to create
* more.
*/
for (i = 0; i < check_state->total_threads; i++) {
/* fetch latest check_state->enough earlier */
smp_mb();
if (atomic_read(&check_state->enough))
break;
check_state->infos[i].result = 0;
check_state->infos[i].state = check_state;
snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
atomic_inc(&check_state->started);
check_state->infos[i].thread =
kthread_run(bch_btree_check_thread,
&check_state->infos[i],
name);
if (IS_ERR(check_state->infos[i].thread)) {
pr_err("fails to run thread bch_btrchk[%d]\n", i);
for (--i; i >= 0; i--)
kthread_stop(check_state->infos[i].thread);
ret = -ENOMEM;
goto out;
}
}
wait_event_interruptible(check_state->wait,
atomic_read(&check_state->started) == 0);
for (i = 0; i < check_state->total_threads; i++) {
if (check_state->infos[i].result) {
ret = check_state->infos[i].result;
goto out;
}
}
out:
kfree(check_state);
return ret;
} }
void bch_initial_gc_finish(struct cache_set *c) void bch_initial_gc_finish(struct cache_set *c)
...@@ -2416,7 +2514,7 @@ int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c, ...@@ -2416,7 +2514,7 @@ int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
return btree_root(map_nodes_recurse, c, op, from, fn, flags); return btree_root(map_nodes_recurse, c, op, from, fn, flags);
} }
static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
struct bkey *from, btree_map_keys_fn *fn, struct bkey *from, btree_map_keys_fn *fn,
int flags) int flags)
{ {
......
...@@ -222,6 +222,25 @@ struct btree_op { ...@@ -222,6 +222,25 @@ struct btree_op {
unsigned int insert_collision:1; unsigned int insert_collision:1;
}; };
struct btree_check_state;
struct btree_check_info {
struct btree_check_state *state;
struct task_struct *thread;
int result;
};
#define BCH_BTR_CHKTHREAD_MAX 64
struct btree_check_state {
struct cache_set *c;
int total_threads;
int key_idx;
spinlock_t idx_lock;
atomic_t started;
atomic_t enough;
wait_queue_head_t wait;
struct btree_check_info infos[BCH_BTR_CHKTHREAD_MAX];
};
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level) static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
{ {
memset(op, 0, sizeof(struct btree_op)); memset(op, 0, sizeof(struct btree_op));
...@@ -266,12 +285,78 @@ void bch_initial_gc_finish(struct cache_set *c); ...@@ -266,12 +285,78 @@ void bch_initial_gc_finish(struct cache_set *c);
void bch_moving_gc(struct cache_set *c, bool only_move_dirty); void bch_moving_gc(struct cache_set *c, bool only_move_dirty);
int bch_btree_check(struct cache_set *c); int bch_btree_check(struct cache_set *c);
void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k); void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k);
typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b,
struct bkey *k);
int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_keys_fn *fn, int flags);
int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
struct bkey *from, btree_map_keys_fn *fn,
int flags);
static inline void wake_up_gc(struct cache_set *c) static inline void wake_up_gc(struct cache_set *c)
{ {
wake_up(&c->gc_wait); wake_up(&c->gc_wait);
} }
/*
* These macros are for recursing down the btree - they handle the details of
* locking and looking up nodes in the cache for you. They're best treated as
* mere syntax when reading code that uses them.
*
* op->lock determines whether we take a read or a write lock at a given depth.
* If you've got a read lock and find that you need a write lock (i.e. you're
* going to have to split), set op->lock and return -EINTR; btree_root() will
* call you again and you'll have the correct lock.
*/
/**
* btree - recurse down the btree on a specified key
* @fn: function to call, which will be passed the child node
* @key: key to recurse on
* @b: parent btree node
* @op: pointer to struct btree_op
*/
#define btree(fn, key, b, op, ...) \
({ \
int _r, l = (b)->level - 1; \
bool _w = l <= (op)->lock; \
struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
_w, b); \
if (!IS_ERR(_child)) { \
_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
rw_unlock(_w, _child); \
} else \
_r = PTR_ERR(_child); \
_r; \
})
/**
* btree_root - call a function on the root of the btree
* @fn: function to call, which will be passed the child node
* @c: cache set
* @op: pointer to struct btree_op
*/
#define btree_root(fn, c, op, ...) \
({ \
int _r = -EINTR; \
do { \
struct btree *_b = (c)->root; \
bool _w = insert_lock(op, _b); \
rw_lock(_w, _b, _b->level); \
if (_b == (c)->root && \
_w == insert_lock(op, _b)) { \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
} \
rw_unlock(_w, _b); \
bch_cannibalize_unlock(c); \
if (_r == -EINTR) \
schedule(); \
} while (_r == -EINTR); \
\
finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
_r; \
})
#define MAP_DONE 0 #define MAP_DONE 0
#define MAP_CONTINUE 1 #define MAP_CONTINUE 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册