blk-mq-sched.c 17.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9
/*
 * blk-mq scheduling framework
 *
 * Copyright (C) 2016 Jens Axboe
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blk-mq.h>
10
#include <linux/list_sort.h>
11 12 13 14 15

#include <trace/events/block.h>

#include "blk.h"
#include "blk-mq.h"
16
#include "blk-mq-debugfs.h"
17 18 19 20
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"
#include "blk-wbt.h"

D
Damien Le Moal 已提交
21
void blk_mq_sched_assign_ioc(struct request *rq)
22
{
23
	struct request_queue *q = rq->q;
24
	struct io_context *ioc;
25 26
	struct io_cq *icq;

27 28 29 30 31 32 33
	/*
	 * May not have an IO context if it's a passthrough request
	 */
	ioc = current->io_context;
	if (!ioc)
		return;

34
	spin_lock_irq(&q->queue_lock);
35
	icq = ioc_lookup_icq(ioc, q);
36
	spin_unlock_irq(&q->queue_lock);
37 38 39 40 41 42

	if (!icq) {
		icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
		if (!icq)
			return;
	}
43
	get_io_context(icq->ioc);
44
	rq->elv.icq = icq;
45 46
}

47 48 49 50
/*
 * Mark a hardware queue as needing a restart. For shared queues, maintain
 * a count of how many hardware queues are marked for restart.
 */
51
void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
52 53 54 55
{
	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
		return;

56
	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
57
}
58
EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
59

60
void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
61
{
62
	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
63

64 65 66 67 68 69 70 71 72
	/*
	 * Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
	 * in blk_mq_run_hw_queue(). Its pair is the barrier in
	 * blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
	 * meantime new request added to hctx->dispatch is missed to check in
	 * blk_mq_run_hw_queue().
	 */
	smp_mb();

73
	blk_mq_run_hw_queue(hctx, true);
74 75
}

76 77
static int sched_rq_cmp(void *priv, const struct list_head *a,
			const struct list_head *b)
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
{
	struct request *rqa = container_of(a, struct request, queuelist);
	struct request *rqb = container_of(b, struct request, queuelist);

	return rqa->mq_hctx > rqb->mq_hctx;
}

static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
{
	struct blk_mq_hw_ctx *hctx =
		list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
	struct request *rq;
	LIST_HEAD(hctx_list);
	unsigned int count = 0;

	list_for_each_entry(rq, rq_list, queuelist) {
		if (rq->mq_hctx != hctx) {
			list_cut_before(&hctx_list, rq_list, &rq->queuelist);
			goto dispatch;
		}
		count++;
	}
	list_splice_tail_init(rq_list, &hctx_list);

dispatch:
103
	return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
104 105
}

106 107
#define BLK_MQ_BUDGET_DELAY	3		/* ms units */

108 109 110 111
/*
 * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
 * its queue by itself in its completion handler, so we don't need to
 * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
112 113 114
 *
 * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
 * be run again.  This is necessary to avoid starving flushes.
115
 */
116
static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
117 118 119
{
	struct request_queue *q = hctx->queue;
	struct elevator_queue *e = q->elevator;
120 121 122
	bool multi_hctxs = false, run_queue = false;
	bool dispatched = false, busy = false;
	unsigned int max_dispatch;
123
	LIST_HEAD(rq_list);
124 125 126 127 128 129
	int count = 0;

	if (hctx->dispatch_busy)
		max_dispatch = 1;
	else
		max_dispatch = hctx->queue->nr_requests;
130 131

	do {
132
		struct request *rq;
133
		int budget_token;
134

135
		if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
136
			break;
137

138
		if (!list_empty_careful(&hctx->dispatch)) {
139
			busy = true;
140 141 142
			break;
		}

143 144
		budget_token = blk_mq_get_dispatch_budget(q);
		if (budget_token < 0)
145
			break;
146

147
		rq = e->type->ops.dispatch_request(hctx);
148
		if (!rq) {
149
			blk_mq_put_dispatch_budget(q, budget_token);
150 151 152 153 154 155 156
			/*
			 * We're releasing without dispatching. Holding the
			 * budget could have blocked any "hctx"s with the
			 * same queue and if we didn't dispatch then there's
			 * no guarantee anyone will kick the queue.  Kick it
			 * ourselves.
			 */
157
			run_queue = true;
158 159 160
			break;
		}

161 162
		blk_mq_set_rq_budget_token(rq, budget_token);

163 164 165 166 167
		/*
		 * Now this rq owns the budget which has to be released
		 * if this rq won't be queued to driver via .queue_rq()
		 * in blk_mq_dispatch_rq_list().
		 */
168
		list_add_tail(&rq->queuelist, &rq_list);
169
		count++;
170 171
		if (rq->mq_hctx != hctx)
			multi_hctxs = true;
172 173 174 175 176 177 178 179 180 181

		/*
		 * If we cannot get tag for the request, stop dequeueing
		 * requests from the IO scheduler. We are unlikely to be able
		 * to submit them anyway and it creates false impression for
		 * scheduling heuristics that the device can take more IO.
		 */
		if (!blk_mq_get_driver_tag(rq))
			break;
	} while (count < max_dispatch);
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213

	if (!count) {
		if (run_queue)
			blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
	} else if (multi_hctxs) {
		/*
		 * Requests from different hctx may be dequeued from some
		 * schedulers, such as bfq and deadline.
		 *
		 * Sort the requests in the list according to their hctx,
		 * dispatch batching requests from same hctx at a time.
		 */
		list_sort(NULL, &rq_list, sched_rq_cmp);
		do {
			dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
		} while (!list_empty(&rq_list));
	} else {
		dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
	}

	if (busy)
		return -EAGAIN;
	return !!dispatched;
}

static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
{
	int ret;

	do {
		ret = __blk_mq_do_dispatch_sched(hctx);
	} while (ret == 1);
214 215

	return ret;
216 217
}

218 219 220
static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
					  struct blk_mq_ctx *ctx)
{
221
	unsigned short idx = ctx->index_hw[hctx->type];
222 223 224 225 226 227 228

	if (++idx == hctx->nr_ctx)
		idx = 0;

	return hctx->ctxs[idx];
}

229 230 231 232
/*
 * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
 * its queue by itself in its completion handler, so we don't need to
 * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
233 234
 *
 * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
235
 * be run again.  This is necessary to avoid starving flushes.
236
 */
237
static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
238 239 240 241
{
	struct request_queue *q = hctx->queue;
	LIST_HEAD(rq_list);
	struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
242
	int ret = 0;
243
	struct request *rq;
244 245

	do {
246 247
		int budget_token;

248 249 250 251 252
		if (!list_empty_careful(&hctx->dispatch)) {
			ret = -EAGAIN;
			break;
		}

253 254 255
		if (!sbitmap_any_bit_set(&hctx->ctx_map))
			break;

256 257
		budget_token = blk_mq_get_dispatch_budget(q);
		if (budget_token < 0)
258
			break;
259 260 261

		rq = blk_mq_dequeue_from_ctx(hctx, ctx);
		if (!rq) {
262
			blk_mq_put_dispatch_budget(q, budget_token);
263 264 265 266 267 268 269 270
			/*
			 * We're releasing without dispatching. Holding the
			 * budget could have blocked any "hctx"s with the
			 * same queue and if we didn't dispatch then there's
			 * no guarantee anyone will kick the queue.  Kick it
			 * ourselves.
			 */
			blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
271 272 273
			break;
		}

274 275
		blk_mq_set_rq_budget_token(rq, budget_token);

276 277 278 279 280 281 282 283 284 285
		/*
		 * Now this rq owns the budget which has to be released
		 * if this rq won't be queued to driver via .queue_rq()
		 * in blk_mq_dispatch_rq_list().
		 */
		list_add(&rq->queuelist, &rq_list);

		/* round robin for fair dispatch */
		ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);

286
	} while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
287 288

	WRITE_ONCE(hctx->dispatch_from, ctx);
289
	return ret;
290 291
}

292
static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
293
{
294
	struct request_queue *q = hctx->queue;
295
	const bool has_sched = q->elevator;
296
	int ret = 0;
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
	LIST_HEAD(rq_list);

	/*
	 * If we have previous entries on our dispatch list, grab them first for
	 * more fair dispatch.
	 */
	if (!list_empty_careful(&hctx->dispatch)) {
		spin_lock(&hctx->lock);
		if (!list_empty(&hctx->dispatch))
			list_splice_init(&hctx->dispatch, &rq_list);
		spin_unlock(&hctx->lock);
	}

	/*
	 * Only ask the scheduler for requests, if we didn't have residual
	 * requests from the dispatch list. This is to avoid the case where
	 * we only ever dispatch a fraction of the requests available because
	 * of low device queue depth. Once we pull requests out of the IO
	 * scheduler, we can no longer merge or sort them. So it's best to
	 * leave them there for as long as we can. Mark the hw queue as
	 * needing a restart in that case.
318 319 320 321
	 *
	 * We want to dispatch from the scheduler if there was nothing
	 * on the dispatch list or we were able to dispatch from the
	 * dispatch list.
322
	 */
323
	if (!list_empty(&rq_list)) {
324
		blk_mq_sched_mark_restart_hctx(hctx);
325
		if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
326
			if (has_sched)
327
				ret = blk_mq_do_dispatch_sched(hctx);
328
			else
329
				ret = blk_mq_do_dispatch_ctx(hctx);
330
		}
331
	} else if (has_sched) {
332
		ret = blk_mq_do_dispatch_sched(hctx);
333 334
	} else if (hctx->dispatch_busy) {
		/* dequeue request one by one from sw queue if queue is busy */
335
		ret = blk_mq_do_dispatch_ctx(hctx);
336
	} else {
337
		blk_mq_flush_busy_ctxs(hctx, &rq_list);
338
		blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
339
	}
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361

	return ret;
}

void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
	struct request_queue *q = hctx->queue;

	/* RCU or SRCU read lock is needed before checking quiesced flag */
	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
		return;

	hctx->run++;

	/*
	 * A return of -EAGAIN is an indication that hctx->dispatch is not
	 * empty and we must run again in order to avoid starving flushes.
	 */
	if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
		if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
			blk_mq_run_hw_queue(hctx, true);
	}
362 363
}

364
bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
365
		unsigned int nr_segs)
366 367
{
	struct elevator_queue *e = q->elevator;
368 369
	struct blk_mq_ctx *ctx;
	struct blk_mq_hw_ctx *hctx;
370
	bool ret = false;
M
Ming Lei 已提交
371
	enum hctx_type type;
372

373 374 375 376
	if (e && e->type->ops.bio_merge) {
		ret = e->type->ops.bio_merge(q, bio, nr_segs);
		goto out_put;
	}
377

378 379
	ctx = blk_mq_get_ctx(q);
	hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
M
Ming Lei 已提交
380
	type = hctx->type;
381 382
	if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
	    list_empty_careful(&ctx->rq_lists[type]))
383
		goto out_put;
384 385 386 387 388 389 390 391

	/* default per sw-queue merge */
	spin_lock(&ctx->lock);
	/*
	 * Reverse check our software queue for entries that we could
	 * potentially merge with. Currently includes a hand-wavy stop
	 * count of 8, to not spend too much time checking for merges.
	 */
392
	if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs))
393
		ret = true;
394

395
	spin_unlock(&ctx->lock);
396
out_put:
397
	return ret;
398 399
}

400 401
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
				   struct list_head *free)
402
{
403
	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq, free);
404 405 406
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);

407 408
static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
				       struct request *rq)
409
{
410 411 412 413 414 415 416 417 418 419 420 421
	/*
	 * dispatch flush and passthrough rq directly
	 *
	 * passthrough request has to be added to hctx->dispatch directly.
	 * For some reason, device may be in one situation which can't
	 * handle FS request, so STS_RESOURCE is always returned and the
	 * FS request will be added to hctx->dispatch. However passthrough
	 * request may be required at that time for fixing the problem. If
	 * passthrough request is added to scheduler queue, there isn't any
	 * chance to dispatch it given we prioritize requests in hctx->dispatch.
	 */
	if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
422 423 424
		return true;

	return false;
425 426
}

427
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
428
				 bool run_queue, bool async)
429 430 431 432
{
	struct request_queue *q = rq->q;
	struct elevator_queue *e = q->elevator;
	struct blk_mq_ctx *ctx = rq->mq_ctx;
433
	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
434

435
	WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
436

437
	if (blk_mq_sched_bypass_insert(hctx, rq)) {
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
		/*
		 * Firstly normal IO request is inserted to scheduler queue or
		 * sw queue, meantime we add flush request to dispatch queue(
		 * hctx->dispatch) directly and there is at most one in-flight
		 * flush request for each hw queue, so it doesn't matter to add
		 * flush request to tail or front of the dispatch queue.
		 *
		 * Secondly in case of NCQ, flush request belongs to non-NCQ
		 * command, and queueing it will fail when there is any
		 * in-flight normal IO request(NCQ command). When adding flush
		 * rq to the front of hctx->dispatch, it is easier to introduce
		 * extra time to flush rq's latency because of S_SCHED_RESTART
		 * compared with adding to the tail of dispatch queue, then
		 * chance of flush merge is increased, and less flush requests
		 * will be issued to controller. It is observed that ~10% time
		 * is saved in blktests block/004 on disk attached to AHCI/NCQ
		 * drive when adding flush rq to the front of hctx->dispatch.
		 *
		 * Simply queue flush rq to the front of hctx->dispatch so that
		 * intensive flush workloads can benefit in case of NCQ HW.
		 */
		at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
460
		blk_mq_request_bypass_insert(rq, at_head, false);
461
		goto run;
462
	}
463

464
	if (e) {
465 466 467
		LIST_HEAD(list);

		list_add(&rq->queuelist, &list);
468
		e->type->ops.insert_requests(hctx, &list, at_head);
469 470 471 472 473 474
	} else {
		spin_lock(&ctx->lock);
		__blk_mq_insert_request(hctx, rq, at_head);
		spin_unlock(&ctx->lock);
	}

475
run:
476 477 478 479
	if (run_queue)
		blk_mq_run_hw_queue(hctx, async);
}

480
void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
481 482 483
				  struct blk_mq_ctx *ctx,
				  struct list_head *list, bool run_queue_async)
{
484
	struct elevator_queue *e;
485 486 487 488 489 490 491 492
	struct request_queue *q = hctx->queue;

	/*
	 * blk_mq_sched_insert_requests() is called from flush plug
	 * context only, and hold one usage counter to prevent queue
	 * from being released.
	 */
	percpu_ref_get(&q->q_usage_counter);
493

494
	e = hctx->queue->elevator;
495
	if (e) {
496
		e->type->ops.insert_requests(hctx, list, false);
497
	} else {
498 499 500 501 502
		/*
		 * try to issue requests directly if the hw queue isn't
		 * busy in case of 'none' scheduler, and this way may save
		 * us one extra enqueue & dequeue to sw queue.
		 */
503
		if (!hctx->dispatch_busy && !run_queue_async) {
504
			blk_mq_try_issue_list_directly(hctx, list);
505
			if (list_empty(list))
506
				goto out;
507 508
		}
		blk_mq_insert_requests(hctx, ctx, list);
509
	}
510 511

	blk_mq_run_hw_queue(hctx, run_queue_async);
512 513
 out:
	percpu_ref_put(&q->q_usage_counter);
514 515
}

516 517 518
static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
					  struct blk_mq_hw_ctx *hctx,
					  unsigned int hctx_idx)
519
{
520 521
	if (blk_mq_is_shared_tags(q->tag_set->flags)) {
		hctx->sched_tags = q->sched_shared_tags;
522 523 524
		return 0;
	}

525 526
	hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx,
						    q->nr_requests);
527 528 529

	if (!hctx->sched_tags)
		return -ENOMEM;
530
	return 0;
531 532
}

533
static void blk_mq_exit_sched_shared_tags(struct request_queue *queue)
534
{
535 536
	blk_mq_free_rq_map(queue->sched_shared_tags);
	queue->sched_shared_tags = NULL;
537 538
}

539
/* called in queue's release handler, tagset has gone away */
540
static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
541 542
{
	struct blk_mq_hw_ctx *hctx;
543 544
	int i;

545 546
	queue_for_each_hw_ctx(q, hctx, i) {
		if (hctx->sched_tags) {
547
			if (!blk_mq_is_shared_tags(flags))
548
				blk_mq_free_rq_map(hctx->sched_tags);
549 550 551
			hctx->sched_tags = NULL;
		}
	}
552

553 554
	if (blk_mq_is_shared_tags(flags))
		blk_mq_exit_sched_shared_tags(q);
555 556
}

557
static int blk_mq_init_sched_shared_tags(struct request_queue *queue)
558 559 560 561 562 563 564
{
	struct blk_mq_tag_set *set = queue->tag_set;

	/*
	 * Set initial depth at max so that we don't need to reallocate for
	 * updating nr_requests.
	 */
565
	queue->sched_shared_tags = blk_mq_alloc_map_and_rqs(set,
566 567
						BLK_MQ_NO_HCTX_IDX,
						MAX_SCHED_RQ);
568
	if (!queue->sched_shared_tags)
569
		return -ENOMEM;
570

571
	blk_mq_tag_update_sched_shared_tags(queue);
572 573 574 575

	return 0;
}

576 577
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
578
	unsigned int i, flags = q->tag_set->flags;
579
	struct blk_mq_hw_ctx *hctx;
580
	struct elevator_queue *eq;
581 582 583 584
	int ret;

	if (!e) {
		q->elevator = NULL;
585
		q->nr_requests = q->tag_set->queue_depth;
586 587
		return 0;
	}
588 589

	/*
590 591 592
	 * Default to double of smaller one between hw queue_depth and 128,
	 * since we don't split into sync/async like the old code did.
	 * Additionally, this is a per-hw queue depth.
593
	 */
594
	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
595
				   BLKDEV_DEFAULT_RQ);
596

597 598
	if (blk_mq_is_shared_tags(flags)) {
		ret = blk_mq_init_sched_shared_tags(q);
599
		if (ret)
600
			return ret;
601 602
	}

603 604
	queue_for_each_hw_ctx(q, hctx, i) {
		ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
605
		if (ret)
606
			goto err_free_map_and_rqs;
607 608
	}

609
	ret = e->ops.init_sched(q, e);
610
	if (ret)
611
		goto err_free_map_and_rqs;
612

613 614 615
	blk_mq_debugfs_register_sched(q);

	queue_for_each_hw_ctx(q, hctx, i) {
616 617
		if (e->ops.init_hctx) {
			ret = e->ops.init_hctx(hctx, i);
618 619
			if (ret) {
				eq = q->elevator;
620
				blk_mq_sched_free_rqs(q);
621 622 623 624 625
				blk_mq_exit_sched(q, eq);
				kobject_put(&eq->kobj);
				return ret;
			}
		}
626
		blk_mq_debugfs_register_sched_hctx(q, hctx);
627 628
	}

629 630
	return 0;

631
err_free_map_and_rqs:
632
	blk_mq_sched_free_rqs(q);
633 634
	blk_mq_sched_tags_teardown(q, flags);

635
	q->elevator = NULL;
636
	return ret;
637
}
638

639 640 641 642
/*
 * called in either blk_queue_cleanup or elevator_switch, tagset
 * is required for freeing requests
 */
643
void blk_mq_sched_free_rqs(struct request_queue *q)
644 645 646 647
{
	struct blk_mq_hw_ctx *hctx;
	int i;

648 649
	if (blk_mq_is_shared_tags(q->tag_set->flags)) {
		blk_mq_free_rqs(q->tag_set, q->sched_shared_tags,
650 651 652 653 654 655 656
				BLK_MQ_NO_HCTX_IDX);
	} else {
		queue_for_each_hw_ctx(q, hctx, i) {
			if (hctx->sched_tags)
				blk_mq_free_rqs(q->tag_set,
						hctx->sched_tags, i);
		}
657 658 659
	}
}

660 661
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
{
662 663
	struct blk_mq_hw_ctx *hctx;
	unsigned int i;
664
	unsigned int flags = 0;
665

666 667
	queue_for_each_hw_ctx(q, hctx, i) {
		blk_mq_debugfs_unregister_sched_hctx(hctx);
668 669
		if (e->type->ops.exit_hctx && hctx->sched_data) {
			e->type->ops.exit_hctx(hctx, i);
670
			hctx->sched_data = NULL;
671
		}
672
		flags = hctx->flags;
673
	}
674
	blk_mq_debugfs_unregister_sched(q);
675 676
	if (e->type->ops.exit_sched)
		e->type->ops.exit_sched(e);
677
	blk_mq_sched_tags_teardown(q, flags);
678 679
	q->elevator = NULL;
}