diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e0468fd41b6ead472aa07440fc5c668049814378..45f684689c357c96783e68a7b0b5568263ce6a74 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -35,7 +35,7 @@ #include #include #include - +#include #include /* @@ -649,12 +649,25 @@ static int mca_reap(struct btree *b, unsigned int min_order, bool flush) up(&b->io_mutex); } +retry: /* * BTREE_NODE_dirty might be cleared in btree_flush_btree() by * __bch_btree_node_write(). To avoid an extra flush, acquire * b->write_lock before checking BTREE_NODE_dirty bit. */ mutex_lock(&b->write_lock); + /* + * If this btree node is selected in btree_flush_write() by journal + * code, delay and retry until the node is flushed by journal code + * and BTREE_NODE_journal_flush bit cleared by btree_flush_write(). + */ + if (btree_node_journal_flush(b)) { + pr_debug("bnode %p is flushing by journal, retry", b); + mutex_unlock(&b->write_lock); + udelay(1); + goto retry; + } + if (btree_node_dirty(b)) __bch_btree_node_write(b, &cl); mutex_unlock(&b->write_lock); @@ -1071,7 +1084,20 @@ static void btree_node_free(struct btree *b) BUG_ON(b == b->c->root); +retry: mutex_lock(&b->write_lock); + /* + * If the btree node is selected and flushing in btree_flush_write(), + * delay and retry until the BTREE_NODE_journal_flush bit cleared, + * then it is safe to free the btree node here. Otherwise this btree + * node will be in race condition. + */ + if (btree_node_journal_flush(b)) { + mutex_unlock(&b->write_lock); + pr_debug("bnode %p journal_flush set, retry", b); + udelay(1); + goto retry; + } if (btree_node_dirty(b)) { btree_complete_write(b, btree_current_write(b)); diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index a68d6c55783bd97eaf49d5744c9422f8af24be07..4d0cca145f6992a4efe7be1ff930102b4ce6c620 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -158,11 +158,13 @@ enum btree_flags { BTREE_NODE_io_error, BTREE_NODE_dirty, BTREE_NODE_write_idx, + BTREE_NODE_journal_flush, }; BTREE_FLAG(io_error); BTREE_FLAG(dirty); BTREE_FLAG(write_idx); +BTREE_FLAG(journal_flush); static inline struct btree_write *btree_current_write(struct btree *b) { diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index ec1e35a62934d1e0d9be5374fb27b9695cf2d75c..7bb15cddca5ecb6dbd25d3f21b2ca201e9d106f8 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -404,6 +404,7 @@ static void btree_flush_write(struct cache_set *c) retry: best = NULL; + mutex_lock(&c->bucket_lock); for_each_cached_btree(b, c, i) if (btree_current_write(b)->journal) { if (!best) @@ -416,9 +417,14 @@ static void btree_flush_write(struct cache_set *c) } b = best; + if (b) + set_btree_node_journal_flush(b); + mutex_unlock(&c->bucket_lock); + if (b) { mutex_lock(&b->write_lock); if (!btree_current_write(b)->journal) { + clear_bit(BTREE_NODE_journal_flush, &b->flags); mutex_unlock(&b->write_lock); /* We raced */ atomic_long_inc(&c->retry_flush_write); @@ -426,6 +432,7 @@ static void btree_flush_write(struct cache_set *c) } __bch_btree_node_write(b, NULL); + clear_bit(BTREE_NODE_journal_flush, &b->flags); mutex_unlock(&b->write_lock); } }