提交 8e8320c9 编写于 作者: J Jens Axboe

blk-mq: fix performance regression with shared tags

If we have shared tags enabled, then every IO completion will trigger
a full loop of every queue belonging to a tag set, and every hardware
queue for each of those queues, even if nothing needs to be done.
This causes a massive performance regression if you have a lot of
shared devices.

Instead of doing this huge full scan on every IO, add an atomic
counter to the main queue that tracks how many hardware queues have
been marked as needing a restart. With that, we can avoid looking for
restartable queues, if we don't have to.

Max reports that this restores performance. Before this patch, 4K
IOPS was limited to 22-23K IOPS. With the patch, we are running at
950-970K IOPS.

Fixes: 6d8c6c0f ("blk-mq: Restart a single queue if tag sets are shared")
Reported-by: NMax Gurtovoy <maxg@mellanox.com>
Tested-by: NMax Gurtovoy <maxg@mellanox.com>
Reviewed-by: NBart Van Assche <bart.vanassche@sandisk.com>
Tested-by: NBart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
上级 ec2f0fad
...@@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q, ...@@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
__blk_mq_sched_assign_ioc(q, rq, bio, ioc); __blk_mq_sched_assign_ioc(q, rq, bio, ioc);
} }
/*
* Mark a hardware queue as needing a restart. For shared queues, maintain
* a count of how many hardware queues are marked for restart.
*/
static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return;
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;
if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_inc(&q->shared_hctx_restart);
} else
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return false;
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;
if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_dec(&q->shared_hctx_restart);
} else
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (blk_mq_hctx_has_pending(hctx)) {
blk_mq_run_hw_queue(hctx, true);
return true;
}
return false;
}
struct request *blk_mq_sched_get_request(struct request_queue *q, struct request *blk_mq_sched_get_request(struct request_queue *q,
struct bio *bio, struct bio *bio,
unsigned int op, unsigned int op,
...@@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, ...@@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
return true; return true;
} }
static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (blk_mq_hctx_has_pending(hctx)) {
blk_mq_run_hw_queue(hctx, true);
return true;
}
}
return false;
}
/** /**
* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
* @pos: loop cursor. * @pos: loop cursor.
...@@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) ...@@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
unsigned int i, j; unsigned int i, j;
if (set->flags & BLK_MQ_F_TAG_SHARED) { if (set->flags & BLK_MQ_F_TAG_SHARED) {
/*
* If this is 0, then we know that no hardware queues
* have RESTART marked. We're done.
*/
if (!atomic_read(&queue->shared_hctx_restart))
return;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu_rr(q, queue, &set->tag_list, list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
tag_set_list) { tag_set_list) {
......
...@@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) ...@@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
return false; return false;
} }
/*
* Mark a hardware queue as needing a restart.
*/
static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
{ {
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
......
...@@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q, ...@@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
} }
} }
/*
* Caller needs to ensure that we're either frozen/quiesced, or that
* the queue isn't live yet.
*/
static void queue_set_hctx_shared(struct request_queue *q, bool shared) static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{ {
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
int i; int i;
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
if (shared) if (shared) {
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_inc(&q->shared_hctx_restart);
hctx->flags |= BLK_MQ_F_TAG_SHARED; hctx->flags |= BLK_MQ_F_TAG_SHARED;
else } else {
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_dec(&q->shared_hctx_restart);
hctx->flags &= ~BLK_MQ_F_TAG_SHARED; hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
} }
}
} }
static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
bool shared)
{ {
struct request_queue *q; struct request_queue *q;
......
...@@ -391,6 +391,8 @@ struct request_queue { ...@@ -391,6 +391,8 @@ struct request_queue {
int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
atomic_t shared_hctx_restart;
struct blk_queue_stats *stats; struct blk_queue_stats *stats;
struct rq_wb *rq_wb; struct rq_wb *rq_wb;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册