blk-mq-tag.c 17.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * Fast and scalable bitmap tagging variant. Uses sparser bitmaps spread
 * over multiple cachelines to avoid ping-pong between multiple submitters
 * or submitter and completer. Uses rolling wakeups to avoid falling of
 * the scaling cliff when we run out of tags and have to start putting
 * submitters to sleep.
 *
 * Uses active queue tracking to support fairer distribution of tags
 * between multiple submitters when a shared tag map is used.
 *
 * Copyright (C) 2013-2014 Jens Axboe
 */
13 14
#include <linux/kernel.h>
#include <linux/module.h>
15
#include <linux/random.h>
16 17 18 19 20 21

#include <linux/blk-mq.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"

22 23 24 25 26
static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt)
{
	int i;

	for (i = 0; i < bt->map_nr; i++) {
27
		struct blk_align_bitmap *bm = &bt->map[i];
28 29 30 31 32 33 34 35
		int ret;

		ret = find_first_zero_bit(&bm->word, bm->depth);
		if (ret < bm->depth)
			return true;
	}

	return false;
36 37 38 39
}

bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
{
40 41 42 43 44 45
	if (!tags)
		return true;

	return bt_has_free_tags(&tags->bitmap_tags);
}

46
static inline int bt_index_inc(int index)
47
{
48 49 50 51 52 53 54 55
	return (index + 1) & (BT_WAIT_QUEUES - 1);
}

static inline void bt_index_atomic_inc(atomic_t *index)
{
	int old = atomic_read(index);
	int new = bt_index_inc(old);
	atomic_cmpxchg(index, old, new);
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
}

/*
 * If a previously inactive queue goes active, bump the active user count.
 */
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
	if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
	    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
		atomic_inc(&hctx->tags->active_queues);

	return true;
}

/*
71
 * Wakeup all potentially sleeping on tags
72
 */
73
void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
74 75 76 77
{
	struct blk_mq_bitmap_tags *bt;
	int i, wake_index;

78 79 80 81
	/*
	 * Make sure all changes prior to this are visible from other CPUs.
	 */
	smp_mb();
82
	bt = &tags->bitmap_tags;
83
	wake_index = atomic_read(&bt->wake_index);
84 85 86 87 88 89
	for (i = 0; i < BT_WAIT_QUEUES; i++) {
		struct bt_wait_state *bs = &bt->bs[wake_index];

		if (waitqueue_active(&bs->wait))
			wake_up(&bs->wait);

90
		wake_index = bt_index_inc(wake_index);
91
	}
92 93 94 95 96 97

	if (include_reserve) {
		bt = &tags->breserved_tags;
		if (waitqueue_active(&bt->bs[0].wait))
			wake_up(&bt->bs[0].wait);
	}
98 99
}

100 101 102 103 104 105 106 107 108 109 110 111 112
/*
 * If a previously busy queue goes inactive, potential waiters could now
 * be allowed to queue. Wake them up and check.
 */
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
	struct blk_mq_tags *tags = hctx->tags;

	if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
		return;

	atomic_dec(&tags->active_queues);

113
	blk_mq_tag_wakeup_all(tags, false);
114 115
}

116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
/*
 * For shared tag users, we track the number of currently active users
 * and attempt to provide a fair share of the tag depth for each of them.
 */
static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
				  struct blk_mq_bitmap_tags *bt)
{
	unsigned int depth, users;

	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
		return true;
	if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
		return true;

	/*
	 * Don't try dividing an ant
	 */
	if (bt->depth == 1)
		return true;

	users = atomic_read(&hctx->tags->active_queues);
	if (!users)
		return true;

	/*
	 * Allow at least some tags
	 */
	depth = max((bt->depth + users - 1) / users, 4U);
	return atomic_read(&hctx->nr_active) < depth;
}

S
Shaohua Li 已提交
147 148
static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag,
			 bool nowrap)
149
{
150 151 152 153 154
	int tag, org_last_tag = last_tag;

	while (1) {
		tag = find_next_zero_bit(&bm->word, bm->depth, last_tag);
		if (unlikely(tag >= bm->depth)) {
155
			/*
156 157
			 * We started with an offset, and we didn't reset the
			 * offset to 0 in a failure case, so start from 0 to
158 159
			 * exhaust the map.
			 */
S
Shaohua Li 已提交
160
			if (org_last_tag && last_tag && !nowrap) {
161 162
				last_tag = org_last_tag = 0;
				continue;
163 164 165
			}
			return -1;
		}
166 167 168 169

		if (!test_and_set_bit(tag, &bm->word))
			break;

170
		last_tag = tag + 1;
171 172 173
		if (last_tag >= bm->depth - 1)
			last_tag = 0;
	}
174 175 176 177

	return tag;
}

S
Shaohua Li 已提交
178 179
#define BT_ALLOC_RR(tags) (tags->alloc_policy == BLK_TAG_ALLOC_RR)

180 181 182 183 184 185 186 187 188 189 190
/*
 * Straight forward bitmap tag implementation, where each bit is a tag
 * (cleared == free, and set == busy). The small twist is using per-cpu
 * last_tag caches, which blk-mq stores in the blk_mq_ctx software queue
 * contexts. This enables us to drastically limit the space searched,
 * without dirtying an extra shared cacheline like we would if we stored
 * the cache value inside the shared blk_mq_bitmap_tags structure. On top
 * of that, each word of tags is in a separate cacheline. This means that
 * multiple users will tend to stick to different cachelines, at least
 * until the map is exhausted.
 */
191
static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
S
Shaohua Li 已提交
192
		    unsigned int *tag_cache, struct blk_mq_tags *tags)
193 194 195 196
{
	unsigned int last_tag, org_last_tag;
	int index, i, tag;

197 198 199
	if (!hctx_may_queue(hctx, bt))
		return -1;

200
	last_tag = org_last_tag = *tag_cache;
201
	index = TAG_TO_INDEX(bt, last_tag);
202 203

	for (i = 0; i < bt->map_nr; i++) {
S
Shaohua Li 已提交
204 205
		tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag),
				    BT_ALLOC_RR(tags));
206
		if (tag != -1) {
207
			tag += (index << bt->bits_per_word);
208 209 210
			goto done;
		}

211 212 213 214 215 216 217 218
		/*
		 * Jump to next index, and reset the last tag to be the
		 * first tag of that index
		 */
		index++;
		last_tag = (index << bt->bits_per_word);

		if (index >= bt->map_nr) {
219
			index = 0;
220 221
			last_tag = 0;
		}
222 223 224 225 226 227 228 229 230 231
	}

	*tag_cache = 0;
	return -1;

	/*
	 * Only update the cache from the allocation path, if we ended
	 * up using the specific cached tag.
	 */
done:
S
Shaohua Li 已提交
232
	if (tag == org_last_tag || unlikely(BT_ALLOC_RR(tags))) {
233 234 235 236 237 238 239 240 241 242 243 244 245 246
		last_tag = tag + 1;
		if (last_tag >= bt->depth - 1)
			last_tag = 0;

		*tag_cache = last_tag;
	}

	return tag;
}

static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
					 struct blk_mq_hw_ctx *hctx)
{
	struct bt_wait_state *bs;
247
	int wait_index;
248 249 250 251

	if (!hctx)
		return &bt->bs[0];

252 253 254
	wait_index = atomic_read(&hctx->wait_index);
	bs = &bt->bs[wait_index];
	bt_index_atomic_inc(&hctx->wait_index);
255
	return bs;
256 257
}

258 259 260
static int bt_get(struct blk_mq_alloc_data *data,
		struct blk_mq_bitmap_tags *bt,
		struct blk_mq_hw_ctx *hctx,
S
Shaohua Li 已提交
261
		unsigned int *last_tag, struct blk_mq_tags *tags)
262
{
263 264
	struct bt_wait_state *bs;
	DEFINE_WAIT(wait);
265 266
	int tag;

S
Shaohua Li 已提交
267
	tag = __bt_get(hctx, bt, last_tag, tags);
268 269 270
	if (tag != -1)
		return tag;

271
	if (data->flags & BLK_MQ_REQ_NOWAIT)
272 273
		return -1;

274
	bs = bt_wait_ptr(bt, hctx);
275 276 277
	do {
		prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);

S
Shaohua Li 已提交
278
		tag = __bt_get(hctx, bt, last_tag, tags);
279 280 281
		if (tag != -1)
			break;

B
Bart Van Assche 已提交
282 283 284
		/*
		 * We're out of tags on this hardware queue, kick any
		 * pending IO submits before going to sleep waiting for
285 286
		 * some to complete. Note that hctx can be NULL here for
		 * reserved tag allocation.
B
Bart Van Assche 已提交
287
		 */
288 289
		if (hctx)
			blk_mq_run_hw_queue(hctx, false);
B
Bart Van Assche 已提交
290

291 292 293 294
		/*
		 * Retry tag allocation after running the hardware queue,
		 * as running the queue may also have found completions.
		 */
S
Shaohua Li 已提交
295
		tag = __bt_get(hctx, bt, last_tag, tags);
296 297 298
		if (tag != -1)
			break;

299 300
		blk_mq_put_ctx(data->ctx);

301
		io_schedule();
302 303 304 305

		data->ctx = blk_mq_get_ctx(data->q);
		data->hctx = data->q->mq_ops->map_queue(data->q,
				data->ctx->cpu);
306
		if (data->flags & BLK_MQ_REQ_RESERVED) {
307 308 309 310 311 312
			bt = &data->hctx->tags->breserved_tags;
		} else {
			last_tag = &data->ctx->last_tag;
			hctx = data->hctx;
			bt = &hctx->tags->bitmap_tags;
		}
313 314
		finish_wait(&bs->wait, &wait);
		bs = bt_wait_ptr(bt, hctx);
315 316 317 318 319 320
	} while (1);

	finish_wait(&bs->wait, &wait);
	return tag;
}

321
static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
322 323 324
{
	int tag;

325
	tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
S
Shaohua Li 已提交
326
			&data->ctx->last_tag, data->hctx->tags);
327
	if (tag >= 0)
328
		return tag + data->hctx->tags->nr_reserved_tags;
329 330

	return BLK_MQ_TAG_FAIL;
331 332
}

333
static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
334
{
335
	int tag, zero = 0;
336

337
	if (unlikely(!data->hctx->tags->nr_reserved_tags)) {
338 339 340 341
		WARN_ON_ONCE(1);
		return BLK_MQ_TAG_FAIL;
	}

S
Shaohua Li 已提交
342 343
	tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero,
		data->hctx->tags);
344 345
	if (tag < 0)
		return BLK_MQ_TAG_FAIL;
346

347 348 349
	return tag;
}

350
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
351
{
352 353 354
	if (data->flags & BLK_MQ_REQ_RESERVED)
		return __blk_mq_get_reserved_tag(data);
	return __blk_mq_get_tag(data);
355 356
}

357 358 359 360
static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
{
	int i, wake_index;

361
	wake_index = atomic_read(&bt->wake_index);
362 363 364 365
	for (i = 0; i < BT_WAIT_QUEUES; i++) {
		struct bt_wait_state *bs = &bt->bs[wake_index];

		if (waitqueue_active(&bs->wait)) {
366 367 368
			int o = atomic_read(&bt->wake_index);
			if (wake_index != o)
				atomic_cmpxchg(&bt->wake_index, o, wake_index);
369 370 371 372

			return bs;
		}

373
		wake_index = bt_index_inc(wake_index);
374 375 376 377 378 379 380
	}

	return NULL;
}

static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
{
381
	const int index = TAG_TO_INDEX(bt, tag);
382
	struct bt_wait_state *bs;
383
	int wait_cnt;
384

385 386 387 388
	clear_bit(TAG_TO_BIT(bt, tag), &bt->map[index].word);

	/* Ensure that the wait list checks occur after clear_bit(). */
	smp_mb();
389 390

	bs = bt_wake_ptr(bt);
391 392 393 394
	if (!bs)
		return;

	wait_cnt = atomic_dec_return(&bs->wait_cnt);
395 396
	if (unlikely(wait_cnt < 0))
		wait_cnt = atomic_inc_return(&bs->wait_cnt);
397 398
	if (wait_cnt == 0) {
		atomic_add(bt->wake_cnt, &bs->wait_cnt);
399
		bt_index_atomic_inc(&bt->wake_index);
400 401 402 403
		wake_up(&bs->wait);
	}
}

404
void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
405
		    unsigned int *last_tag)
406
{
407 408
	struct blk_mq_tags *tags = hctx->tags;

409 410 411
	if (tag >= tags->nr_reserved_tags) {
		const int real_tag = tag - tags->nr_reserved_tags;

J
Jens Axboe 已提交
412 413
		BUG_ON(real_tag >= tags->nr_tags);
		bt_clear_tag(&tags->bitmap_tags, real_tag);
S
Shaohua Li 已提交
414 415
		if (likely(tags->alloc_policy == BLK_TAG_ALLOC_FIFO))
			*last_tag = real_tag;
J
Jens Axboe 已提交
416 417 418 419
	} else {
		BUG_ON(tag >= tags->nr_reserved_tags);
		bt_clear_tag(&tags->breserved_tags, tag);
	}
420 421
}

422 423 424
static void bt_for_each(struct blk_mq_hw_ctx *hctx,
		struct blk_mq_bitmap_tags *bt, unsigned int off,
		busy_iter_fn *fn, void *data, bool reserved)
425
{
426 427
	struct request *rq;
	int bit, i;
428 429

	for (i = 0; i < bt->map_nr; i++) {
430
		struct blk_align_bitmap *bm = &bt->map[i];
431

432 433 434
		for (bit = find_first_bit(&bm->word, bm->depth);
		     bit < bm->depth;
		     bit = find_next_bit(&bm->word, bm->depth, bit + 1)) {
435
			rq = hctx->tags->rqs[off + bit];
436 437 438
			if (rq->q == hctx->queue)
				fn(hctx, rq, data, reserved);
		}
439

440
		off += (1 << bt->bits_per_word);
441
	}
442 443
}

K
Keith Busch 已提交
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
static void bt_tags_for_each(struct blk_mq_tags *tags,
		struct blk_mq_bitmap_tags *bt, unsigned int off,
		busy_tag_iter_fn *fn, void *data, bool reserved)
{
	struct request *rq;
	int bit, i;

	if (!tags->rqs)
		return;
	for (i = 0; i < bt->map_nr; i++) {
		struct blk_align_bitmap *bm = &bt->map[i];

		for (bit = find_first_bit(&bm->word, bm->depth);
		     bit < bm->depth;
		     bit = find_next_bit(&bm->word, bm->depth, bit + 1)) {
459
			rq = tags->rqs[off + bit];
K
Keith Busch 已提交
460 461 462 463 464 465 466
			fn(rq, data, reserved);
		}

		off += (1 << bt->bits_per_word);
	}
}

467 468
static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
		busy_tag_iter_fn *fn, void *priv)
K
Keith Busch 已提交
469 470 471 472 473 474 475
{
	if (tags->nr_reserved_tags)
		bt_tags_for_each(tags, &tags->breserved_tags, 0, fn, priv, true);
	bt_tags_for_each(tags, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
			false);
}

476 477 478 479 480 481 482 483 484 485 486 487
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
		busy_tag_iter_fn *fn, void *priv)
{
	int i;

	for (i = 0; i < tagset->nr_hw_queues; i++) {
		if (tagset->tags && tagset->tags[i])
			blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
	}
}
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);

488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513
int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
{
	int i, j, ret = 0;

	if (!set->ops->reinit_request)
		goto out;

	for (i = 0; i < set->nr_hw_queues; i++) {
		struct blk_mq_tags *tags = set->tags[i];

		for (j = 0; j < tags->nr_tags; j++) {
			if (!tags->rqs[j])
				continue;

			ret = set->ops->reinit_request(set->driver_data,
						tags->rqs[j]);
			if (ret)
				goto out;
		}
	}

out:
	return ret;
}
EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset);

514
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
515
		void *priv)
516
{
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
	struct blk_mq_hw_ctx *hctx;
	int i;


	queue_for_each_hw_ctx(q, hctx, i) {
		struct blk_mq_tags *tags = hctx->tags;

		/*
		 * If not software queues are currently mapped to this
		 * hardware queue, there's nothing to check
		 */
		if (!blk_mq_hw_queue_mapped(hctx))
			continue;

		if (tags->nr_reserved_tags)
			bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true);
		bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
		      false);
	}
536 537 538

}

539 540 541 542 543
static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
{
	unsigned int i, used;

	for (i = 0, used = 0; i < bt->map_nr; i++) {
544
		struct blk_align_bitmap *bm = &bt->map[i];
545 546 547 548 549 550 551

		used += bitmap_weight(&bm->word, bm->depth);
	}

	return bt->depth - used;
}

552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
static void bt_update_count(struct blk_mq_bitmap_tags *bt,
			    unsigned int depth)
{
	unsigned int tags_per_word = 1U << bt->bits_per_word;
	unsigned int map_depth = depth;

	if (depth) {
		int i;

		for (i = 0; i < bt->map_nr; i++) {
			bt->map[i].depth = min(map_depth, tags_per_word);
			map_depth -= bt->map[i].depth;
		}
	}

	bt->wake_cnt = BT_WAIT_BATCH;
568 569
	if (bt->wake_cnt > depth / BT_WAIT_QUEUES)
		bt->wake_cnt = max(1U, depth / BT_WAIT_QUEUES);
570 571 572 573

	bt->depth = depth;
}

574 575 576 577 578
static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
			int node, bool reserved)
{
	int i;

579 580
	bt->bits_per_word = ilog2(BITS_PER_LONG);

581 582 583 584 585
	/*
	 * Depth can be zero for reserved tags, that's not a failure
	 * condition.
	 */
	if (depth) {
586
		unsigned int nr, tags_per_word;
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601

		tags_per_word = (1 << bt->bits_per_word);

		/*
		 * If the tag space is small, shrink the number of tags
		 * per word so we spread over a few cachelines, at least.
		 * If less than 4 tags, just forget about it, it's not
		 * going to work optimally anyway.
		 */
		if (depth >= 4) {
			while (tags_per_word * 4 > depth) {
				bt->bits_per_word--;
				tags_per_word = (1 << bt->bits_per_word);
			}
		}
602

603
		nr = ALIGN(depth, tags_per_word) / tags_per_word;
604
		bt->map = kzalloc_node(nr * sizeof(struct blk_align_bitmap),
605 606 607 608 609 610 611 612 613 614
						GFP_KERNEL, node);
		if (!bt->map)
			return -ENOMEM;

		bt->map_nr = nr;
	}

	bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
	if (!bt->bs) {
		kfree(bt->map);
615
		bt->map = NULL;
616 617 618
		return -ENOMEM;
	}

619 620 621
	bt_update_count(bt, depth);

	for (i = 0; i < BT_WAIT_QUEUES; i++) {
622
		init_waitqueue_head(&bt->bs[i].wait);
623 624
		atomic_set(&bt->bs[i].wait_cnt, bt->wake_cnt);
	}
625 626 627 628 629 630 631 632 633 634 635

	return 0;
}

static void bt_free(struct blk_mq_bitmap_tags *bt)
{
	kfree(bt->map);
	kfree(bt->bs);
}

static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
S
Shaohua Li 已提交
636
						   int node, int alloc_policy)
637 638 639
{
	unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;

S
Shaohua Li 已提交
640 641
	tags->alloc_policy = alloc_policy;

642 643 644 645 646 647 648 649 650 651 652 653
	if (bt_alloc(&tags->bitmap_tags, depth, node, false))
		goto enomem;
	if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true))
		goto enomem;

	return tags;
enomem:
	bt_free(&tags->bitmap_tags);
	kfree(tags);
	return NULL;
}

654
struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
S
Shaohua Li 已提交
655 656
				     unsigned int reserved_tags,
				     int node, int alloc_policy)
657 658 659 660 661 662 663 664 665 666 667 668
{
	struct blk_mq_tags *tags;

	if (total_tags > BLK_MQ_TAG_MAX) {
		pr_err("blk-mq: tag depth too large\n");
		return NULL;
	}

	tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
	if (!tags)
		return NULL;

K
Keith Busch 已提交
669 670 671 672 673
	if (!zalloc_cpumask_var(&tags->cpumask, GFP_KERNEL)) {
		kfree(tags);
		return NULL;
	}

674 675 676
	tags->nr_tags = total_tags;
	tags->nr_reserved_tags = reserved_tags;

S
Shaohua Li 已提交
677
	return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
678 679 680 681
}

void blk_mq_free_tags(struct blk_mq_tags *tags)
{
682 683
	bt_free(&tags->bitmap_tags);
	bt_free(&tags->breserved_tags);
684
	free_cpumask_var(tags->cpumask);
685 686 687
	kfree(tags);
}

688 689 690 691
void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag)
{
	unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;

692
	*tag = prandom_u32() % depth;
693 694
}

695 696 697 698 699 700 701 702 703 704 705
int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
{
	tdepth -= tags->nr_reserved_tags;
	if (tdepth > tags->nr_tags)
		return -EINVAL;

	/*
	 * Don't need (or can't) update reserved tags here, they remain
	 * static and should never need resizing.
	 */
	bt_update_count(&tags->bitmap_tags, tdepth);
706
	blk_mq_tag_wakeup_all(tags, false);
707 708 709
	return 0;
}

B
Bart Van Assche 已提交
710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
/**
 * blk_mq_unique_tag() - return a tag that is unique queue-wide
 * @rq: request for which to compute a unique tag
 *
 * The tag field in struct request is unique per hardware queue but not over
 * all hardware queues. Hence this function that returns a tag with the
 * hardware context index in the upper bits and the per hardware queue tag in
 * the lower bits.
 *
 * Note: When called for a request that is queued on a non-multiqueue request
 * queue, the hardware context index is set to zero.
 */
u32 blk_mq_unique_tag(struct request *rq)
{
	struct request_queue *q = rq->q;
	struct blk_mq_hw_ctx *hctx;
	int hwq = 0;

	if (q->mq_ops) {
		hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
		hwq = hctx->queue_num;
	}

	return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
		(rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
}
EXPORT_SYMBOL(blk_mq_unique_tag);

738 739 740
ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
{
	char *orig_page = page;
741
	unsigned int free, res;
742 743 744 745

	if (!tags)
		return 0;

746 747 748 749
	page += sprintf(page, "nr_tags=%u, reserved_tags=%u, "
			"bits_per_word=%u\n",
			tags->nr_tags, tags->nr_reserved_tags,
			tags->bitmap_tags.bits_per_word);
750

751 752
	free = bt_unused_tags(&tags->bitmap_tags);
	res = bt_unused_tags(&tags->breserved_tags);
753

754
	page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
755
	page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
756 757 758

	return page - orig_page;
}