提交 e8e1d468 编写于 作者: K Kent Overstreet

bcache: Convert try_wait to wait_queue_head_t

We never waited on c->try_wait asynchronously, so just use the standard
primitives.
Signed-off-by: NKent Overstreet <kmo@daterainc.com>
上级 0b93207a
...@@ -735,8 +735,8 @@ struct cache_set { ...@@ -735,8 +735,8 @@ struct cache_set {
* basically a lock for this that we can wait on asynchronously. The * basically a lock for this that we can wait on asynchronously. The
* btree_root() macro releases the lock when it returns. * btree_root() macro releases the lock when it returns.
*/ */
struct closure *try_harder; struct task_struct *try_harder;
struct closure_waitlist try_wait; wait_queue_head_t try_wait;
uint64_t try_harder_start; uint64_t try_harder_start;
/* /*
......
...@@ -437,7 +437,7 @@ static void bch_btree_leaf_dirty(struct btree *b, struct btree_op *op) ...@@ -437,7 +437,7 @@ static void bch_btree_leaf_dirty(struct btree *b, struct btree_op *op)
set_btree_node_dirty(b); set_btree_node_dirty(b);
if (op && op->journal) { if (op->journal) {
if (w->journal && if (w->journal &&
journal_pin_cmp(b->c, w, op)) { journal_pin_cmp(b->c, w, op)) {
atomic_dec_bug(w->journal); atomic_dec_bug(w->journal);
...@@ -574,34 +574,35 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, ...@@ -574,34 +574,35 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
return b; return b;
} }
static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order) static int mca_reap(struct btree *b, unsigned min_order, bool flush)
{ {
struct closure cl;
closure_init_stack(&cl);
lockdep_assert_held(&b->c->bucket_lock); lockdep_assert_held(&b->c->bucket_lock);
if (!down_write_trylock(&b->lock)) if (!down_write_trylock(&b->lock))
return -ENOMEM; return -ENOMEM;
if (b->page_order < min_order) { BUG_ON(btree_node_dirty(b) && !b->sets[0].data);
if (b->page_order < min_order ||
(!flush &&
(btree_node_dirty(b) ||
atomic_read(&b->io.cl.remaining) != -1))) {
rw_unlock(true, b); rw_unlock(true, b);
return -ENOMEM; return -ENOMEM;
} }
BUG_ON(btree_node_dirty(b) && !b->sets[0].data); if (btree_node_dirty(b)) {
bch_btree_node_write(b, &cl);
if (cl && btree_node_dirty(b)) closure_sync(&cl);
bch_btree_node_write(b, NULL);
if (cl)
closure_wait_event_async(&b->io.wait, cl,
atomic_read(&b->io.cl.remaining) == -1);
if (btree_node_dirty(b) ||
!closure_is_unlocked(&b->io.cl) ||
work_pending(&b->work.work)) {
rw_unlock(true, b);
return -EAGAIN;
} }
/* wait for any in flight btree write */
closure_wait_event_sync(&b->io.wait, &cl,
atomic_read(&b->io.cl.remaining) == -1);
return 0; return 0;
} }
...@@ -641,7 +642,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, ...@@ -641,7 +642,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
break; break;
if (++i > 3 && if (++i > 3 &&
!mca_reap(b, NULL, 0)) { !mca_reap(b, 0, false)) {
mca_data_free(b); mca_data_free(b);
rw_unlock(true, b); rw_unlock(true, b);
freed++; freed++;
...@@ -660,7 +661,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, ...@@ -660,7 +661,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
list_rotate_left(&c->btree_cache); list_rotate_left(&c->btree_cache);
if (!b->accessed && if (!b->accessed &&
!mca_reap(b, NULL, 0)) { !mca_reap(b, 0, false)) {
mca_bucket_free(b); mca_bucket_free(b);
mca_data_free(b); mca_data_free(b);
rw_unlock(true, b); rw_unlock(true, b);
...@@ -783,52 +784,27 @@ static struct btree *mca_find(struct cache_set *c, struct bkey *k) ...@@ -783,52 +784,27 @@ static struct btree *mca_find(struct cache_set *c, struct bkey *k)
return b; return b;
} }
static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k)
int level, struct closure *cl)
{ {
int ret = -ENOMEM; struct btree *b;
struct btree *i;
trace_bcache_btree_cache_cannibalize(c); trace_bcache_btree_cache_cannibalize(c);
if (!cl) if (!c->try_harder) {
return ERR_PTR(-ENOMEM); c->try_harder = current;
c->try_harder_start = local_clock();
/* } else if (c->try_harder != current)
* Trying to free up some memory - i.e. reuse some btree nodes - may return ERR_PTR(-ENOSPC);
* require initiating IO to flush the dirty part of the node. If we're
* running under generic_make_request(), that IO will never finish and
* we would deadlock. Returning -EAGAIN causes the cache lookup code to
* punt to workqueue and retry.
*/
if (current->bio_list)
return ERR_PTR(-EAGAIN);
if (c->try_harder && c->try_harder != cl) {
closure_wait_event_async(&c->try_wait, cl, !c->try_harder);
return ERR_PTR(-EAGAIN);
}
c->try_harder = cl; list_for_each_entry_reverse(b, &c->btree_cache, list)
c->try_harder_start = local_clock(); if (!mca_reap(b, btree_order(k), false))
retry: return b;
list_for_each_entry_reverse(i, &c->btree_cache, list) {
int r = mca_reap(i, cl, btree_order(k));
if (!r)
return i;
if (r != -ENOMEM)
ret = r;
}
if (ret == -EAGAIN && list_for_each_entry_reverse(b, &c->btree_cache, list)
closure_blocking(cl)) { if (!mca_reap(b, btree_order(k), true))
mutex_unlock(&c->bucket_lock); return b;
closure_sync(cl);
mutex_lock(&c->bucket_lock);
goto retry;
}
return ERR_PTR(ret); return ERR_PTR(-ENOMEM);
} }
/* /*
...@@ -839,18 +815,19 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, ...@@ -839,18 +815,19 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
*/ */
void bch_cannibalize_unlock(struct cache_set *c, struct closure *cl) void bch_cannibalize_unlock(struct cache_set *c, struct closure *cl)
{ {
if (c->try_harder == cl) { if (c->try_harder == current) {
bch_time_stats_update(&c->try_harder_time, c->try_harder_start); bch_time_stats_update(&c->try_harder_time, c->try_harder_start);
c->try_harder = NULL; c->try_harder = NULL;
__closure_wake_up(&c->try_wait); wake_up(&c->try_wait);
} }
} }
static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level)
int level, struct closure *cl)
{ {
struct btree *b; struct btree *b;
BUG_ON(current->bio_list);
lockdep_assert_held(&c->bucket_lock); lockdep_assert_held(&c->bucket_lock);
if (mca_find(c, k)) if (mca_find(c, k))
...@@ -860,14 +837,14 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, ...@@ -860,14 +837,14 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k,
* the list. Check if there's any freed nodes there: * the list. Check if there's any freed nodes there:
*/ */
list_for_each_entry(b, &c->btree_cache_freeable, list) list_for_each_entry(b, &c->btree_cache_freeable, list)
if (!mca_reap(b, NULL, btree_order(k))) if (!mca_reap(b, btree_order(k), false))
goto out; goto out;
/* We never free struct btree itself, just the memory that holds the on /* We never free struct btree itself, just the memory that holds the on
* disk node. Check the freed list before allocating a new one: * disk node. Check the freed list before allocating a new one:
*/ */
list_for_each_entry(b, &c->btree_cache_freed, list) list_for_each_entry(b, &c->btree_cache_freed, list)
if (!mca_reap(b, NULL, 0)) { if (!mca_reap(b, 0, false)) {
mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO); mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO);
if (!b->sets[0].data) if (!b->sets[0].data)
goto err; goto err;
...@@ -901,7 +878,7 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, ...@@ -901,7 +878,7 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k,
if (b) if (b)
rw_unlock(true, b); rw_unlock(true, b);
b = mca_cannibalize(c, k, level, cl); b = mca_cannibalize(c, k);
if (!IS_ERR(b)) if (!IS_ERR(b))
goto out; goto out;
...@@ -919,10 +896,9 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, ...@@ -919,10 +896,9 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k,
* level and op->lock. * level and op->lock.
*/ */
struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k, struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k,
int level, struct btree_op *op) int level, bool write)
{ {
int i = 0; int i = 0;
bool write = level <= op->lock;
struct btree *b; struct btree *b;
BUG_ON(level < 0); BUG_ON(level < 0);
...@@ -934,7 +910,7 @@ struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k, ...@@ -934,7 +910,7 @@ struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k,
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
mutex_lock(&c->bucket_lock); mutex_lock(&c->bucket_lock);
b = mca_alloc(c, k, level, &op->cl); b = mca_alloc(c, k, level);
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
if (!b) if (!b)
...@@ -980,7 +956,7 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) ...@@ -980,7 +956,7 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
struct btree *b; struct btree *b;
mutex_lock(&c->bucket_lock); mutex_lock(&c->bucket_lock);
b = mca_alloc(c, k, level, NULL); b = mca_alloc(c, k, level);
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
if (!IS_ERR_OR_NULL(b)) { if (!IS_ERR_OR_NULL(b)) {
...@@ -991,17 +967,12 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) ...@@ -991,17 +967,12 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
/* Btree alloc */ /* Btree alloc */
static void btree_node_free(struct btree *b, struct btree_op *op) static void btree_node_free(struct btree *b)
{ {
unsigned i; unsigned i;
trace_bcache_btree_node_free(b); trace_bcache_btree_node_free(b);
/*
* The BUG_ON() in btree_node_get() implies that we must have a write
* lock on parent to free or even invalidate a node
*/
BUG_ON(op->lock <= b->level);
BUG_ON(b == b->c->root); BUG_ON(b == b->c->root);
if (btree_node_dirty(b)) if (btree_node_dirty(b))
...@@ -1037,7 +1008,7 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, ...@@ -1037,7 +1008,7 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level,
SET_KEY_SIZE(&k.key, c->btree_pages * PAGE_SECTORS); SET_KEY_SIZE(&k.key, c->btree_pages * PAGE_SECTORS);
b = mca_alloc(c, &k.key, level, cl); b = mca_alloc(c, &k.key, level);
if (IS_ERR(b)) if (IS_ERR(b))
goto err_free; goto err_free;
...@@ -1173,8 +1144,7 @@ static int btree_gc_mark_node(struct btree *b, unsigned *keys, ...@@ -1173,8 +1144,7 @@ static int btree_gc_mark_node(struct btree *b, unsigned *keys,
return stale; return stale;
} }
static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k, static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k)
struct btree_op *op)
{ {
/* /*
* We block priorities from being written for the duration of garbage * We block priorities from being written for the duration of garbage
...@@ -1191,7 +1161,7 @@ static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k, ...@@ -1191,7 +1161,7 @@ static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k,
memcpy(k->ptr, b->key.ptr, memcpy(k->ptr, b->key.ptr,
sizeof(uint64_t) * KEY_PTRS(&b->key)); sizeof(uint64_t) * KEY_PTRS(&b->key));
btree_node_free(n, op); btree_node_free(n);
up_write(&n->lock); up_write(&n->lock);
} }
...@@ -1211,8 +1181,8 @@ struct gc_merge_info { ...@@ -1211,8 +1181,8 @@ struct gc_merge_info {
unsigned keys; unsigned keys;
}; };
static void btree_gc_coalesce(struct btree *b, struct btree_op *op, static void btree_gc_coalesce(struct btree *b, struct gc_stat *gc,
struct gc_stat *gc, struct gc_merge_info *r) struct gc_merge_info *r)
{ {
unsigned nodes = 0, keys = 0, blocks; unsigned nodes = 0, keys = 0, blocks;
int i; int i;
...@@ -1228,7 +1198,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op, ...@@ -1228,7 +1198,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
for (i = nodes - 1; i >= 0; --i) { for (i = nodes - 1; i >= 0; --i) {
if (r[i].b->written) if (r[i].b->written)
r[i].b = btree_gc_alloc(r[i].b, r[i].k, op); r[i].b = btree_gc_alloc(r[i].b, r[i].k);
if (r[i].b->written) if (r[i].b->written)
return; return;
...@@ -1292,7 +1262,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op, ...@@ -1292,7 +1262,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
r[i - 1].keys = n2->keys; r[i - 1].keys = n2->keys;
} }
btree_node_free(r->b, op); btree_node_free(r->b);
up_write(&r->b->lock); up_write(&r->b->lock);
trace_bcache_btree_gc_coalesce(nodes); trace_bcache_btree_gc_coalesce(nodes);
...@@ -1324,7 +1294,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, ...@@ -1324,7 +1294,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
memset(r, 0, sizeof(r)); memset(r, 0, sizeof(r));
while ((r->k = bch_next_recurse_key(b, &b->c->gc_done))) { while ((r->k = bch_next_recurse_key(b, &b->c->gc_done))) {
r->b = bch_btree_node_get(b->c, r->k, b->level - 1, op); r->b = bch_btree_node_get(b->c, r->k, b->level - 1, true);
if (IS_ERR(r->b)) { if (IS_ERR(r->b)) {
ret = PTR_ERR(r->b); ret = PTR_ERR(r->b);
...@@ -1337,7 +1307,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, ...@@ -1337,7 +1307,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
if (!b->written && if (!b->written &&
(r->b->level || stale > 10 || (r->b->level || stale > 10 ||
b->c->gc_always_rewrite)) b->c->gc_always_rewrite))
r->b = btree_gc_alloc(r->b, r->k, op); r->b = btree_gc_alloc(r->b, r->k);
if (r->b->level) if (r->b->level)
ret = btree_gc_recurse(r->b, op, writes, gc); ret = btree_gc_recurse(r->b, op, writes, gc);
...@@ -1350,7 +1320,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, ...@@ -1350,7 +1320,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
bkey_copy_key(&b->c->gc_done, r->k); bkey_copy_key(&b->c->gc_done, r->k);
if (!b->written) if (!b->written)
btree_gc_coalesce(b, op, gc, r); btree_gc_coalesce(b, gc, r);
if (r[GC_MERGE_NODES - 1].b) if (r[GC_MERGE_NODES - 1].b)
write(r[GC_MERGE_NODES - 1].b); write(r[GC_MERGE_NODES - 1].b);
...@@ -1404,7 +1374,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, ...@@ -1404,7 +1374,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
if (!IS_ERR_OR_NULL(n)) { if (!IS_ERR_OR_NULL(n)) {
closure_sync(&op->cl); closure_sync(&op->cl);
bch_btree_set_root(b); bch_btree_set_root(b);
btree_node_free(n, op); btree_node_free(n);
rw_unlock(true, b); rw_unlock(true, b);
} }
...@@ -2004,18 +1974,18 @@ static int btree_split(struct btree *b, struct btree_op *op, ...@@ -2004,18 +1974,18 @@ static int btree_split(struct btree *b, struct btree_op *op,
} }
rw_unlock(true, n1); rw_unlock(true, n1);
btree_node_free(b, op); btree_node_free(b);
bch_time_stats_update(&b->c->btree_split_time, start_time); bch_time_stats_update(&b->c->btree_split_time, start_time);
return 0; return 0;
err_free2: err_free2:
__bkey_put(n2->c, &n2->key); __bkey_put(n2->c, &n2->key);
btree_node_free(n2, op); btree_node_free(n2);
rw_unlock(true, n2); rw_unlock(true, n2);
err_free1: err_free1:
__bkey_put(n1->c, &n1->key); __bkey_put(n1->c, &n1->key);
btree_node_free(n1, op); btree_node_free(n1);
rw_unlock(true, n1); rw_unlock(true, n1);
err: err:
if (n3 == ERR_PTR(-EAGAIN) || if (n3 == ERR_PTR(-EAGAIN) ||
......
...@@ -326,7 +326,7 @@ static inline void rw_unlock(bool w, struct btree *b) ...@@ -326,7 +326,7 @@ static inline void rw_unlock(bool w, struct btree *b)
({ \ ({ \
int _r, l = (b)->level - 1; \ int _r, l = (b)->level - 1; \
bool _w = l <= (op)->lock; \ bool _w = l <= (op)->lock; \
struct btree *_child = bch_btree_node_get((b)->c, key, l, op); \ struct btree *_child = bch_btree_node_get((b)->c, key, l, _w); \
if (!IS_ERR(_child)) { \ if (!IS_ERR(_child)) { \
_child->parent = (b); \ _child->parent = (b); \
_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
...@@ -356,6 +356,11 @@ static inline void rw_unlock(bool w, struct btree *b) ...@@ -356,6 +356,11 @@ static inline void rw_unlock(bool w, struct btree *b)
} \ } \
rw_unlock(_w, _b); \ rw_unlock(_w, _b); \
bch_cannibalize_unlock(c, &(op)->cl); \ bch_cannibalize_unlock(c, &(op)->cl); \
if (_r == -ENOSPC) { \
wait_event((c)->try_wait, \
!(c)->try_harder); \
_r = -EINTR; \
} \
} while (_r == -EINTR); \ } while (_r == -EINTR); \
\ \
_r; \ _r; \
...@@ -375,8 +380,7 @@ void bch_btree_node_write(struct btree *, struct closure *); ...@@ -375,8 +380,7 @@ void bch_btree_node_write(struct btree *, struct closure *);
void bch_cannibalize_unlock(struct cache_set *, struct closure *); void bch_cannibalize_unlock(struct cache_set *, struct closure *);
void bch_btree_set_root(struct btree *); void bch_btree_set_root(struct btree *);
struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *); struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *);
struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, bool);
int, struct btree_op *);
int bch_btree_insert_check_key(struct btree *, struct btree_op *, int bch_btree_insert_check_key(struct btree *, struct btree_op *,
struct bkey *); struct bkey *);
......
...@@ -1436,12 +1436,14 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) ...@@ -1436,12 +1436,14 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->sort_crit_factor = int_sqrt(c->btree_pages); c->sort_crit_factor = int_sqrt(c->btree_pages);
mutex_init(&c->bucket_lock);
mutex_init(&c->sort_lock);
spin_lock_init(&c->sort_time_lock);
closure_init_unlocked(&c->sb_write); closure_init_unlocked(&c->sb_write);
mutex_init(&c->bucket_lock);
init_waitqueue_head(&c->try_wait);
closure_init_unlocked(&c->uuid_write); closure_init_unlocked(&c->uuid_write);
spin_lock_init(&c->sort_time_lock);
mutex_init(&c->sort_lock);
spin_lock_init(&c->btree_read_time_lock); spin_lock_init(&c->btree_read_time_lock);
bch_moving_init_cache_set(c); bch_moving_init_cache_set(c);
INIT_LIST_HEAD(&c->list); INIT_LIST_HEAD(&c->list);
...@@ -1529,7 +1531,7 @@ static void run_cache_set(struct cache_set *c) ...@@ -1529,7 +1531,7 @@ static void run_cache_set(struct cache_set *c)
goto err; goto err;
err = "error reading btree root"; err = "error reading btree root";
c->root = bch_btree_node_get(c, k, j->btree_level, &op); c->root = bch_btree_node_get(c, k, j->btree_level, true);
if (IS_ERR_OR_NULL(c->root)) if (IS_ERR_OR_NULL(c->root))
goto err; goto err;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册