diff --git a/block/blk-mq.c b/block/blk-mq.c index 7106c94ea58fe35c6d3118cccb4cffc7ce58a11c..55c81dcafbdc28aa6672e14ef4d151bd446fa7d0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -600,6 +600,20 @@ void blk_mq_force_complete_rq(struct request *rq) if (rq->internal_tag != -1) blk_mq_sched_completed_request(rq); + /* + * Most of single queue controllers, there is only one irq vector + * for handling IO completion, and the only irq's affinity is set + * as all possible CPUs. On most of ARCHs, this affinity means the + * irq is handled on one specific CPU. + * + * So complete IO reqeust in softirq context in case of single queue + * for not degrading IO performance by irqsoff latency. + */ + if (rq->q->nr_hw_queues == 1) { + __blk_complete_request(rq); + return; + } + if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) { rq->q->softirq_done_fn(rq); return; diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 15c1f5e12eb89460bc42eb7f5807eaa03254e51d..e47a2f751884ddb7821fededef643693db752393 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -97,8 +97,8 @@ static int blk_softirq_cpu_dead(unsigned int cpu) void __blk_complete_request(struct request *req) { - int ccpu, cpu; struct request_queue *q = req->q; + int cpu, ccpu = q->mq_ops ? req->mq_ctx->cpu : req->cpu; unsigned long flags; bool shared = false; @@ -110,8 +110,7 @@ void __blk_complete_request(struct request *req) /* * Select completion CPU */ - if (req->cpu != -1) { - ccpu = req->cpu; + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) { if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) shared = cpus_share_cache(cpu, ccpu); } else