提交 506e931f 编写于 作者: J Jens Axboe

blk-mq: add basic round-robin of what CPU to queue workqueue work on

Right now we just pick the first CPU in the mask, but that can
easily overload that one. Add some basic batching and round-robin
all the entries in the mask instead.
Signed-off-by: NJens Axboe <axboe@fb.com>
上级 5cf8c227
...@@ -670,6 +670,30 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) ...@@ -670,6 +670,30 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
} }
} }
/*
* It'd be great if the workqueue API had a way to pass
* in a mask and had some smarts for more clever placement.
* For now we just round-robin here, switching for every
* BLK_MQ_CPU_WORK_BATCH queued items.
*/
static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
{
int cpu = hctx->next_cpu;
if (--hctx->next_cpu_batch <= 0) {
int next_cpu;
next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(hctx->cpumask);
hctx->next_cpu = next_cpu;
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
}
return cpu;
}
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{ {
if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
...@@ -682,13 +706,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) ...@@ -682,13 +706,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
else { else {
unsigned int cpu; unsigned int cpu;
/* cpu = blk_mq_hctx_next_cpu(hctx);
* It'd be great if the workqueue API had a way to pass
* in a mask and had some smarts for more clever placement
* than the first CPU. Or we could round-robin here. For now,
* just queue on the first CPU.
*/
cpu = cpumask_first(hctx->cpumask);
kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0); kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0);
} }
} }
...@@ -795,13 +813,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) ...@@ -795,13 +813,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
else { else {
unsigned int cpu; unsigned int cpu;
/* cpu = blk_mq_hctx_next_cpu(hctx);
* It'd be great if the workqueue API had a way to pass
* in a mask and had some smarts for more clever placement
* than the first CPU. Or we could round-robin here. For now,
* just queue on the first CPU.
*/
cpu = cpumask_first(hctx->cpumask);
kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo); kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo);
} }
} }
...@@ -1378,6 +1390,11 @@ static void blk_mq_map_swqueue(struct request_queue *q) ...@@ -1378,6 +1390,11 @@ static void blk_mq_map_swqueue(struct request_queue *q)
ctx->index_hw = hctx->nr_ctx; ctx->index_hw = hctx->nr_ctx;
hctx->ctxs[hctx->nr_ctx++] = ctx; hctx->ctxs[hctx->nr_ctx++] = ctx;
} }
queue_for_each_hw_ctx(q, hctx, i) {
hctx->next_cpu = cpumask_first(hctx->cpumask);
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
}
} }
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
......
...@@ -21,6 +21,8 @@ struct blk_mq_hw_ctx { ...@@ -21,6 +21,8 @@ struct blk_mq_hw_ctx {
struct delayed_work run_work; struct delayed_work run_work;
struct delayed_work delay_work; struct delayed_work delay_work;
cpumask_var_t cpumask; cpumask_var_t cpumask;
int next_cpu;
int next_cpu_batch;
unsigned long flags; /* BLK_MQ_F_* flags */ unsigned long flags; /* BLK_MQ_F_* flags */
...@@ -126,6 +128,8 @@ enum { ...@@ -126,6 +128,8 @@ enum {
BLK_MQ_S_STOPPED = 0, BLK_MQ_S_STOPPED = 0,
BLK_MQ_MAX_DEPTH = 2048, BLK_MQ_MAX_DEPTH = 2048,
BLK_MQ_CPU_WORK_BATCH = 8,
}; };
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册