提交 6e768717 编写于 作者: M Ming Lei 提交者: Jens Axboe

blk-mq: dequeue request one by one from sw queue if hctx is busy

It won't be efficient to dequeue request one by one from sw queue,
but we have to do that when queue is busy for better merge performance.

This patch takes the Exponential Weighted Moving Average(EWMA) to figure
out if queue is busy, then only dequeue request one by one from sw queue
when queue is busy.

Fixes: b347689f ("blk-mq-sched: improve dispatching from sw queue")
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Reported-by: NKashyap Desai <kashyap.desai@broadcom.com>
Tested-by: NKashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: NMing Lei <ming.lei@redhat.com>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
上级 d893ff86
...@@ -622,6 +622,14 @@ static int hctx_active_show(void *data, struct seq_file *m) ...@@ -622,6 +622,14 @@ static int hctx_active_show(void *data, struct seq_file *m)
return 0; return 0;
} }
static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
{
struct blk_mq_hw_ctx *hctx = data;
seq_printf(m, "%u\n", hctx->dispatch_busy);
return 0;
}
static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos) static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
__acquires(&ctx->lock) __acquires(&ctx->lock)
{ {
...@@ -783,6 +791,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { ...@@ -783,6 +791,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
{"queued", 0600, hctx_queued_show, hctx_queued_write}, {"queued", 0600, hctx_queued_show, hctx_queued_write},
{"run", 0600, hctx_run_show, hctx_run_write}, {"run", 0600, hctx_run_show, hctx_run_write},
{"active", 0400, hctx_active_show}, {"active", 0400, hctx_active_show},
{"dispatch_busy", 0400, hctx_dispatch_busy_show},
{}, {},
}; };
......
...@@ -206,15 +206,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) ...@@ -206,15 +206,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
} }
} else if (has_sched_dispatch) { } else if (has_sched_dispatch) {
blk_mq_do_dispatch_sched(hctx); blk_mq_do_dispatch_sched(hctx);
} else if (q->mq_ops->get_budget) { } else if (hctx->dispatch_busy) {
/* /* dequeue request one by one from sw queue if queue is busy */
* If we need to get budget before queuing request, we
* dequeue request one by one from sw queue for avoiding
* to mess up I/O merge when dispatch runs out of resource.
*
* TODO: get more budgets, and dequeue more requests in
* one time.
*/
blk_mq_do_dispatch_ctx(hctx); blk_mq_do_dispatch_ctx(hctx);
} else { } else {
blk_mq_flush_busy_ctxs(hctx, &rq_list); blk_mq_flush_busy_ctxs(hctx, &rq_list);
......
...@@ -1074,6 +1074,35 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, ...@@ -1074,6 +1074,35 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
return true; return true;
} }
#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT 8
#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR 4
/*
* Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
* - EWMA is one simple way to compute running average value
* - weight(7/8 and 1/8) is applied so that it can decrease exponentially
* - take 4 as factor for avoiding to get too small(0) result, and this
* factor doesn't matter because EWMA decreases exponentially
*/
static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
{
unsigned int ewma;
if (hctx->queue->elevator)
return;
ewma = hctx->dispatch_busy;
if (!ewma && !busy)
return;
ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
if (busy)
ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
hctx->dispatch_busy = ewma;
}
#define BLK_MQ_RESOURCE_DELAY 3 /* ms units */ #define BLK_MQ_RESOURCE_DELAY 3 /* ms units */
/* /*
...@@ -1210,8 +1239,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, ...@@ -1210,8 +1239,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
else if (needs_restart && (ret == BLK_STS_RESOURCE)) else if (needs_restart && (ret == BLK_STS_RESOURCE))
blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
blk_mq_update_dispatch_busy(hctx, true);
return false; return false;
} } else
blk_mq_update_dispatch_busy(hctx, false);
/* /*
* If the host/device is unable to accept more work, inform the * If the host/device is unable to accept more work, inform the
......
...@@ -35,9 +35,10 @@ struct blk_mq_hw_ctx { ...@@ -35,9 +35,10 @@ struct blk_mq_hw_ctx {
struct sbitmap ctx_map; struct sbitmap ctx_map;
struct blk_mq_ctx *dispatch_from; struct blk_mq_ctx *dispatch_from;
unsigned int dispatch_busy;
struct blk_mq_ctx **ctxs;
unsigned int nr_ctx; unsigned int nr_ctx;
struct blk_mq_ctx **ctxs;
spinlock_t dispatch_wait_lock; spinlock_t dispatch_wait_lock;
wait_queue_entry_t dispatch_wait; wait_queue_entry_t dispatch_wait;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册