diff --git a/block/blk-mq.c b/block/blk-mq.c index 0d379830a278130adf4d316e8cae141599dd39b4..2410e0cb7aef76d0ba5a4f18bb5334061cde43ce 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -670,6 +670,30 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) } } +/* + * It'd be great if the workqueue API had a way to pass + * in a mask and had some smarts for more clever placement. + * For now we just round-robin here, switching for every + * BLK_MQ_CPU_WORK_BATCH queued items. + */ +static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) +{ + int cpu = hctx->next_cpu; + + if (--hctx->next_cpu_batch <= 0) { + int next_cpu; + + next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(hctx->cpumask); + + hctx->next_cpu = next_cpu; + hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; + } + + return cpu; +} + void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) @@ -682,13 +706,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) else { unsigned int cpu; - /* - * It'd be great if the workqueue API had a way to pass - * in a mask and had some smarts for more clever placement - * than the first CPU. Or we could round-robin here. For now, - * just queue on the first CPU. - */ - cpu = cpumask_first(hctx->cpumask); + cpu = blk_mq_hctx_next_cpu(hctx); kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0); } } @@ -795,13 +813,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) else { unsigned int cpu; - /* - * It'd be great if the workqueue API had a way to pass - * in a mask and had some smarts for more clever placement - * than the first CPU. Or we could round-robin here. For now, - * just queue on the first CPU. - */ - cpu = cpumask_first(hctx->cpumask); + cpu = blk_mq_hctx_next_cpu(hctx); kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo); } } @@ -1378,6 +1390,11 @@ static void blk_mq_map_swqueue(struct request_queue *q) ctx->index_hw = hctx->nr_ctx; hctx->ctxs[hctx->nr_ctx++] = ctx; } + + queue_for_each_hw_ctx(q, hctx, i) { + hctx->next_cpu = cpumask_first(hctx->cpumask); + hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; + } } struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 3b561d651a0229326b41b275ab79e46f377e312a..5bd677e2dcb726b4cba82f08374256d35a3f2676 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -21,6 +21,8 @@ struct blk_mq_hw_ctx { struct delayed_work run_work; struct delayed_work delay_work; cpumask_var_t cpumask; + int next_cpu; + int next_cpu_batch; unsigned long flags; /* BLK_MQ_F_* flags */ @@ -126,6 +128,8 @@ enum { BLK_MQ_S_STOPPED = 0, BLK_MQ_MAX_DEPTH = 2048, + + BLK_MQ_CPU_WORK_BATCH = 8, }; struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);