sbitmap.c 19.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6
/*
 * Copyright (C) 2016 Facebook
 * Copyright (C) 2013-2014 Jens Axboe
 */

7
#include <linux/sched.h>
8
#include <linux/random.h>
9
#include <linux/sbitmap.h>
10
#include <linux/seq_file.h>
11

12
static int init_alloc_hint(struct sbitmap *sb, gfp_t flags)
13
{
14
	unsigned depth = sb->depth;
15

16 17
	sb->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
	if (!sb->alloc_hint)
18 19
		return -ENOMEM;

20
	if (depth && !sb->round_robin) {
21 22 23
		int i;

		for_each_possible_cpu(i)
24
			*per_cpu_ptr(sb->alloc_hint, i) = prandom_u32() % depth;
25 26 27 28
	}
	return 0;
}

29
static inline unsigned update_alloc_hint_before_get(struct sbitmap *sb,
30 31 32 33
						    unsigned int depth)
{
	unsigned hint;

34
	hint = this_cpu_read(*sb->alloc_hint);
35 36
	if (unlikely(hint >= depth)) {
		hint = depth ? prandom_u32() % depth : 0;
37
		this_cpu_write(*sb->alloc_hint, hint);
38 39 40 41 42
	}

	return hint;
}

43
static inline void update_alloc_hint_after_get(struct sbitmap *sb,
44 45 46 47 48 49
					       unsigned int depth,
					       unsigned int hint,
					       unsigned int nr)
{
	if (nr == -1) {
		/* If the map is full, a hint won't do us much good. */
50 51
		this_cpu_write(*sb->alloc_hint, 0);
	} else if (nr == hint || unlikely(sb->round_robin)) {
52 53 54 55
		/* Only update the hint if we used it. */
		hint = nr + 1;
		if (hint >= depth - 1)
			hint = 0;
56
		this_cpu_write(*sb->alloc_hint, hint);
57 58 59
	}
}

60 61 62
/*
 * See if we have deferred clears that we can batch move
 */
63
static inline bool sbitmap_deferred_clear(struct sbitmap_word *map)
64
{
65
	unsigned long mask;
66

P
Pavel Begunkov 已提交
67 68
	if (!READ_ONCE(map->cleared))
		return false;
69 70 71 72

	/*
	 * First get a stable cleared mask, setting the old mask to 0.
	 */
73
	mask = xchg(&map->cleared, 0);
74 75 76 77

	/*
	 * Now clear the masked bits in our free word
	 */
78 79
	atomic_long_andnot(mask, (atomic_long_t *)&map->word);
	BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(map->word));
P
Pavel Begunkov 已提交
80
	return true;
81 82
}

83
int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
84 85
		      gfp_t flags, int node, bool round_robin,
		      bool alloc_hint)
86 87 88
{
	unsigned int bits_per_word;

89 90 91
	if (shift < 0)
		shift = sbitmap_calculate_shift(depth);

92 93 94 95 96 97 98
	bits_per_word = 1U << shift;
	if (bits_per_word > BITS_PER_LONG)
		return -EINVAL;

	sb->shift = shift;
	sb->depth = depth;
	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
99
	sb->round_robin = round_robin;
100 101 102 103 104 105

	if (depth == 0) {
		sb->map = NULL;
		return 0;
	}

106 107 108 109 110 111 112
	if (alloc_hint) {
		if (init_alloc_hint(sb, flags))
			return -ENOMEM;
	} else {
		sb->alloc_hint = NULL;
	}

113
	sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node);
114 115
	if (!sb->map) {
		free_percpu(sb->alloc_hint);
116
		return -ENOMEM;
117
	}
118 119 120 121 122 123 124 125 126 127

	return 0;
}
EXPORT_SYMBOL_GPL(sbitmap_init_node);

void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
{
	unsigned int bits_per_word = 1U << sb->shift;
	unsigned int i;

128
	for (i = 0; i < sb->map_nr; i++)
129
		sbitmap_deferred_clear(&sb->map[i]);
130

131 132 133 134 135
	sb->depth = depth;
	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
}
EXPORT_SYMBOL_GPL(sbitmap_resize);

136 137
static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
			      unsigned int hint, bool wrap)
138 139 140
{
	int nr;

P
Pavel Begunkov 已提交
141 142 143
	/* don't wrap if starting from 0 */
	wrap = wrap && hint;

144
	while (1) {
145 146
		nr = find_next_zero_bit(word, depth, hint);
		if (unlikely(nr >= depth)) {
147 148 149 150 151
			/*
			 * We started with an offset, and we didn't reset the
			 * offset to 0 in a failure case, so start from 0 to
			 * exhaust the map.
			 */
P
Pavel Begunkov 已提交
152 153
			if (hint && wrap) {
				hint = 0;
154 155 156 157 158
				continue;
			}
			return -1;
		}

159
		if (!test_and_set_bit_lock(nr, word))
160 161 162
			break;

		hint = nr + 1;
163
		if (hint >= depth - 1)
164 165 166 167 168 169
			hint = 0;
	}

	return nr;
}

170
static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
171
				     unsigned int alloc_hint)
172
{
173
	struct sbitmap_word *map = &sb->map[index];
174 175 176
	int nr;

	do {
177 178
		nr = __sbitmap_get_word(&map->word, __map_depth(sb, index),
					alloc_hint, !sb->round_robin);
179 180
		if (nr != -1)
			break;
181
		if (!sbitmap_deferred_clear(map))
182 183 184 185 186 187
			break;
	} while (1);

	return nr;
}

188
static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint)
189 190 191 192 193 194
{
	unsigned int i, index;
	int nr = -1;

	index = SB_NR_TO_INDEX(sb, alloc_hint);

195 196 197 198 199
	/*
	 * Unless we're doing round robin tag allocation, just use the
	 * alloc_hint to find the right word index. No point in looping
	 * twice in find_next_zero_bit() for that case.
	 */
200
	if (sb->round_robin)
201 202 203 204
		alloc_hint = SB_NR_TO_BIT(sb, alloc_hint);
	else
		alloc_hint = 0;

205
	for (i = 0; i < sb->map_nr; i++) {
206
		nr = sbitmap_find_bit_in_index(sb, index, alloc_hint);
207 208 209 210 211 212
		if (nr != -1) {
			nr += index << sb->shift;
			break;
		}

		/* Jump to next index. */
213 214
		alloc_hint = 0;
		if (++index >= sb->map_nr)
215 216 217 218 219
			index = 0;
	}

	return nr;
}
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235

int sbitmap_get(struct sbitmap *sb)
{
	int nr;
	unsigned int hint, depth;

	if (WARN_ON_ONCE(unlikely(!sb->alloc_hint)))
		return -1;

	depth = READ_ONCE(sb->depth);
	hint = update_alloc_hint_before_get(sb, depth);
	nr = __sbitmap_get(sb, hint);
	update_alloc_hint_after_get(sb, depth, hint, nr);

	return nr;
}
236 237
EXPORT_SYMBOL_GPL(sbitmap_get);

238 239 240
static int __sbitmap_get_shallow(struct sbitmap *sb,
				 unsigned int alloc_hint,
				 unsigned long shallow_depth)
241 242 243 244 245 246 247
{
	unsigned int i, index;
	int nr = -1;

	index = SB_NR_TO_INDEX(sb, alloc_hint);

	for (i = 0; i < sb->map_nr; i++) {
248
again:
249
		nr = __sbitmap_get_word(&sb->map[index].word,
250 251 252
					min_t(unsigned int,
					      __map_depth(sb, index),
					      shallow_depth),
253 254 255 256 257 258
					SB_NR_TO_BIT(sb, alloc_hint), true);
		if (nr != -1) {
			nr += index << sb->shift;
			break;
		}

259
		if (sbitmap_deferred_clear(&sb->map[index]))
260 261
			goto again;

262 263 264 265 266 267 268 269 270 271 272 273
		/* Jump to next index. */
		index++;
		alloc_hint = index << sb->shift;

		if (index >= sb->map_nr) {
			index = 0;
			alloc_hint = 0;
		}
	}

	return nr;
}
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289

int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth)
{
	int nr;
	unsigned int hint, depth;

	if (WARN_ON_ONCE(unlikely(!sb->alloc_hint)))
		return -1;

	depth = READ_ONCE(sb->depth);
	hint = update_alloc_hint_before_get(sb, depth);
	nr = __sbitmap_get_shallow(sb, hint, shallow_depth);
	update_alloc_hint_after_get(sb, depth, hint, nr);

	return nr;
}
290 291
EXPORT_SYMBOL_GPL(sbitmap_get_shallow);

292 293 294 295 296
bool sbitmap_any_bit_set(const struct sbitmap *sb)
{
	unsigned int i;

	for (i = 0; i < sb->map_nr; i++) {
297
		if (sb->map[i].word & ~sb->map[i].cleared)
298 299 300 301 302 303
			return true;
	}
	return false;
}
EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);

304
static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
305
{
306
	unsigned int i, weight = 0;
307 308 309

	for (i = 0; i < sb->map_nr; i++) {
		const struct sbitmap_word *word = &sb->map[i];
310
		unsigned int word_depth = __map_depth(sb, i);
311

312
		if (set)
313
			weight += bitmap_weight(&word->word, word_depth);
314
		else
315
			weight += bitmap_weight(&word->cleared, word_depth);
316 317 318
	}
	return weight;
}
319

M
Ming Lei 已提交
320
static unsigned int sbitmap_cleared(const struct sbitmap *sb)
321
{
M
Ming Lei 已提交
322
	return __sbitmap_weight(sb, false);
323 324
}

M
Ming Lei 已提交
325
unsigned int sbitmap_weight(const struct sbitmap *sb)
326
{
M
Ming Lei 已提交
327
	return __sbitmap_weight(sb, true) - sbitmap_cleared(sb);
328
}
M
Ming Lei 已提交
329
EXPORT_SYMBOL_GPL(sbitmap_weight);
330

331 332 333
void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
{
	seq_printf(m, "depth=%u\n", sb->depth);
M
Ming Lei 已提交
334
	seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
335
	seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb));
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
	seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
	seq_printf(m, "map_nr=%u\n", sb->map_nr);
}
EXPORT_SYMBOL_GPL(sbitmap_show);

static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte)
{
	if ((offset & 0xf) == 0) {
		if (offset != 0)
			seq_putc(m, '\n');
		seq_printf(m, "%08x:", offset);
	}
	if ((offset & 0x1) == 0)
		seq_putc(m, ' ');
	seq_printf(m, "%02x", byte);
}

void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
{
	u8 byte = 0;
	unsigned int byte_bits = 0;
	unsigned int offset = 0;
	int i;

	for (i = 0; i < sb->map_nr; i++) {
		unsigned long word = READ_ONCE(sb->map[i].word);
362
		unsigned long cleared = READ_ONCE(sb->map[i].cleared);
363
		unsigned int word_bits = __map_depth(sb, i);
364

365 366
		word &= ~cleared;

367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
		while (word_bits > 0) {
			unsigned int bits = min(8 - byte_bits, word_bits);

			byte |= (word & (BIT(bits) - 1)) << byte_bits;
			byte_bits += bits;
			if (byte_bits == 8) {
				emit_byte(m, offset, byte);
				byte = 0;
				byte_bits = 0;
				offset++;
			}
			word >>= bits;
			word_bits -= bits;
		}
	}
	if (byte_bits) {
		emit_byte(m, offset, byte);
		offset++;
	}
	if (offset)
		seq_putc(m, '\n');
}
EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);

391 392
static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
					unsigned int depth)
393 394
{
	unsigned int wake_batch;
395
	unsigned int shallow_depth;
396 397 398

	/*
	 * For each batch, we wake up one queue. We need to make sure that our
399 400 401 402 403 404 405 406 407 408 409 410 411
	 * batch size is small enough that the full depth of the bitmap,
	 * potentially limited by a shallow depth, is enough to wake up all of
	 * the queues.
	 *
	 * Each full word of the bitmap has bits_per_word bits, and there might
	 * be a partial word. There are depth / bits_per_word full words and
	 * depth % bits_per_word bits left over. In bitwise arithmetic:
	 *
	 * bits_per_word = 1 << shift
	 * depth / bits_per_word = depth >> shift
	 * depth % bits_per_word = depth & ((1 << shift) - 1)
	 *
	 * Each word can be limited to sbq->min_shallow_depth bits.
412
	 */
413 414 415 416 417
	shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
	depth = ((depth >> sbq->sb.shift) * shallow_depth +
		 min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
	wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
			     SBQ_WAKE_BATCH);
418 419 420 421 422

	return wake_batch;
}

int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
423
			    int shift, bool round_robin, gfp_t flags, int node)
424 425 426 427
{
	int ret;
	int i;

428
	ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node,
429
				round_robin, true);
430 431 432
	if (ret)
		return ret;

433 434
	sbq->min_shallow_depth = UINT_MAX;
	sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
435
	atomic_set(&sbq->wake_index, 0);
J
Jens Axboe 已提交
436
	atomic_set(&sbq->ws_active, 0);
437

438
	sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
439 440 441 442 443 444 445 446 447
	if (!sbq->ws) {
		sbitmap_free(&sbq->sb);
		return -ENOMEM;
	}

	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
		init_waitqueue_head(&sbq->ws[i].wait);
		atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
	}
448

449 450 451 452
	return 0;
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);

453 454
static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
					    unsigned int wake_batch)
455
{
456 457 458 459 460
	int i;

	if (sbq->wake_batch != wake_batch) {
		WRITE_ONCE(sbq->wake_batch, wake_batch);
		/*
461 462 463
		 * Pairs with the memory barrier in sbitmap_queue_wake_up()
		 * to ensure that the batch size is updated before the wait
		 * counts.
464
		 */
465
		smp_mb();
466 467 468
		for (i = 0; i < SBQ_WAIT_QUEUES; i++)
			atomic_set(&sbq->ws[i].wait_cnt, 1);
	}
469 470
}

471 472 473 474 475 476 477 478 479 480 481 482 483
static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
					    unsigned int depth)
{
	unsigned int wake_batch;

	wake_batch = sbq_calc_wake_batch(sbq, depth);
	__sbitmap_queue_update_wake_batch(sbq, wake_batch);
}

void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
					    unsigned int users)
{
	unsigned int wake_batch;
484 485
	unsigned int min_batch;
	unsigned int depth = (sbq->sb.depth + users - 1) / users;
486

487 488 489 490
	min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1;

	wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES,
			min_batch, SBQ_WAKE_BATCH);
491 492 493 494
	__sbitmap_queue_update_wake_batch(sbq, wake_batch);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);

495 496 497
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
	sbitmap_queue_update_wake_batch(sbq, depth);
498 499 500 501
	sbitmap_resize(&sbq->sb, depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_resize);

502
int __sbitmap_queue_get(struct sbitmap_queue *sbq)
503
{
504
	return sbitmap_get(&sbq->sb);
505 506 507
}
EXPORT_SYMBOL_GPL(__sbitmap_queue_get);

508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
					unsigned int *offset)
{
	struct sbitmap *sb = &sbq->sb;
	unsigned int hint, depth;
	unsigned long index, nr;
	int i;

	if (unlikely(sb->round_robin))
		return 0;

	depth = READ_ONCE(sb->depth);
	hint = update_alloc_hint_before_get(sb, depth);

	index = SB_NR_TO_INDEX(sb, hint);

	for (i = 0; i < sb->map_nr; i++) {
		struct sbitmap_word *map = &sb->map[index];
		unsigned long get_mask;
527
		unsigned int map_depth = __map_depth(sb, index);
528 529

		sbitmap_deferred_clear(map);
530
		if (map->word == (1UL << (map_depth - 1)) - 1)
531 532
			continue;

533 534
		nr = find_first_zero_bit(&map->word, map_depth);
		if (nr + nr_tags <= map_depth) {
535
			atomic_long_t *ptr = (atomic_long_t *) &map->word;
536
			int map_tags = min_t(int, nr_tags, map_depth);
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
			unsigned long val, ret;

			get_mask = ((1UL << map_tags) - 1) << nr;
			do {
				val = READ_ONCE(map->word);
				ret = atomic_long_cmpxchg(ptr, val, get_mask | val);
			} while (ret != val);
			get_mask = (get_mask & ~ret) >> nr;
			if (get_mask) {
				*offset = nr + (index << sb->shift);
				update_alloc_hint_after_get(sb, depth, hint,
							*offset + map_tags - 1);
				return get_mask;
			}
		}
		/* Jump to next index. */
		if (++index >= sb->map_nr)
			index = 0;
	}

	return 0;
}

560 561
int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
			      unsigned int shallow_depth)
562
{
563 564
	WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);

565
	return sbitmap_get_shallow(&sbq->sb, shallow_depth);
566
}
567
EXPORT_SYMBOL_GPL(sbitmap_queue_get_shallow);
568

569 570 571 572 573 574 575 576
void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
				     unsigned int min_shallow_depth)
{
	sbq->min_shallow_depth = min_shallow_depth;
	sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);

577 578 579 580
static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
{
	int i, wake_index;

J
Jens Axboe 已提交
581 582 583
	if (!atomic_read(&sbq->ws_active))
		return NULL;

584 585 586 587 588
	wake_index = atomic_read(&sbq->wake_index);
	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
		struct sbq_wait_state *ws = &sbq->ws[wake_index];

		if (waitqueue_active(&ws->wait)) {
589 590
			if (wake_index != atomic_read(&sbq->wake_index))
				atomic_set(&sbq->wake_index, wake_index);
591 592 593 594 595 596 597 598 599
			return ws;
		}

		wake_index = sbq_index_inc(wake_index);
	}

	return NULL;
}

600
static bool __sbq_wake_up(struct sbitmap_queue *sbq)
601 602
{
	struct sbq_wait_state *ws;
603
	unsigned int wake_batch;
604 605 606 607
	int wait_cnt;

	ws = sbq_wake_ptr(sbq);
	if (!ws)
608
		return false;
609 610

	wait_cnt = atomic_dec_return(&ws->wait_cnt);
611
	if (wait_cnt <= 0) {
612 613
		int ret;

614
		wake_batch = READ_ONCE(sbq->wake_batch);
615

616 617 618 619 620 621
		/*
		 * Pairs with the memory barrier in sbitmap_queue_resize() to
		 * ensure that we see the batch size update before the wait
		 * count is reset.
		 */
		smp_mb__before_atomic();
622

623
		/*
624 625 626
		 * For concurrent callers of this, the one that failed the
		 * atomic_cmpxhcg() race should call this function again
		 * to wakeup a new batch on a different 'ws'.
627
		 */
628 629 630 631 632 633 634 635
		ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
		if (ret == wait_cnt) {
			sbq_index_atomic_inc(&sbq->wake_index);
			wake_up_nr(&ws->wait, wake_batch);
			return false;
		}

		return true;
636
	}
637 638 639 640

	return false;
}

641
void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
642 643 644
{
	while (__sbq_wake_up(sbq))
		;
645
}
646
EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
647

648 649 650
static inline void sbitmap_update_cpu_hint(struct sbitmap *sb, int cpu, int tag)
{
	if (likely(!sb->round_robin && tag < sb->depth))
J
Jens Axboe 已提交
651
		data_race(*per_cpu_ptr(sb->alloc_hint, cpu) = tag);
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687
}

void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset,
				int *tags, int nr_tags)
{
	struct sbitmap *sb = &sbq->sb;
	unsigned long *addr = NULL;
	unsigned long mask = 0;
	int i;

	smp_mb__before_atomic();
	for (i = 0; i < nr_tags; i++) {
		const int tag = tags[i] - offset;
		unsigned long *this_addr;

		/* since we're clearing a batch, skip the deferred map */
		this_addr = &sb->map[SB_NR_TO_INDEX(sb, tag)].word;
		if (!addr) {
			addr = this_addr;
		} else if (addr != this_addr) {
			atomic_long_andnot(mask, (atomic_long_t *) addr);
			mask = 0;
			addr = this_addr;
		}
		mask |= (1UL << SB_NR_TO_BIT(sb, tag));
	}

	if (mask)
		atomic_long_andnot(mask, (atomic_long_t *) addr);

	smp_mb__after_atomic();
	sbitmap_queue_wake_up(sbq);
	sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(),
					tags[nr_tags - 1] - offset);
}

688
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
689
			 unsigned int cpu)
690
{
691 692 693
	/*
	 * Once the clear bit is set, the bit may be allocated out.
	 *
Z
Zhen Lei 已提交
694
	 * Orders READ/WRITE on the associated instance(such as request
695 696 697 698 699 700 701
	 * of blk_mq) by this bit for avoiding race with re-allocation,
	 * and its pair is the memory barrier implied in __sbitmap_get_word.
	 *
	 * One invariant is that the clear bit has to be zero when the bit
	 * is in use.
	 */
	smp_mb__before_atomic();
702 703
	sbitmap_deferred_clear_bit(&sbq->sb, nr);

704 705 706 707 708 709 710 711
	/*
	 * Pairs with the memory barrier in set_current_state() to ensure the
	 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
	 * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
	 * waiter. See the comment on waitqueue_active().
	 */
	smp_mb__after_atomic();
	sbitmap_queue_wake_up(sbq);
712
	sbitmap_update_cpu_hint(&sbq->sb, cpu, nr);
713 714 715 716 717 718 719 720
}
EXPORT_SYMBOL_GPL(sbitmap_queue_clear);

void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
{
	int i, wake_index;

	/*
721
	 * Pairs with the memory barrier in set_current_state() like in
722
	 * sbitmap_queue_wake_up().
723 724 725 726 727 728 729 730 731 732 733 734 735
	 */
	smp_mb();
	wake_index = atomic_read(&sbq->wake_index);
	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
		struct sbq_wait_state *ws = &sbq->ws[wake_index];

		if (waitqueue_active(&ws->wait))
			wake_up(&ws->wait);

		wake_index = sbq_index_inc(wake_index);
	}
}
EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
736 737 738 739 740 741 742 743 744 745 746 747 748 749

void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
{
	bool first;
	int i;

	sbitmap_show(&sbq->sb, m);

	seq_puts(m, "alloc_hint={");
	first = true;
	for_each_possible_cpu(i) {
		if (!first)
			seq_puts(m, ", ");
		first = false;
750
		seq_printf(m, "%u", *per_cpu_ptr(sbq->sb.alloc_hint, i));
751 752 753 754 755
	}
	seq_puts(m, "}\n");

	seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
	seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
J
Jens Axboe 已提交
756
	seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active));
757 758 759 760 761 762 763 764 765 766 767

	seq_puts(m, "ws={\n");
	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
		struct sbq_wait_state *ws = &sbq->ws[i];

		seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n",
			   atomic_read(&ws->wait_cnt),
			   waitqueue_active(&ws->wait) ? "active" : "inactive");
	}
	seq_puts(m, "}\n");

768
	seq_printf(m, "round_robin=%d\n", sbq->sb.round_robin);
769
	seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
770 771
}
EXPORT_SYMBOL_GPL(sbitmap_queue_show);
J
Jens Axboe 已提交
772

773 774 775 776 777 778 779
void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
			    struct sbq_wait_state *ws,
			    struct sbq_wait *sbq_wait)
{
	if (!sbq_wait->sbq) {
		sbq_wait->sbq = sbq;
		atomic_inc(&sbq->ws_active);
780
		add_wait_queue(&ws->wait, &sbq_wait->wait);
781 782 783 784 785 786 787 788 789 790 791 792 793 794
	}
}
EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue);

void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait)
{
	list_del_init(&sbq_wait->wait.entry);
	if (sbq_wait->sbq) {
		atomic_dec(&sbq_wait->sbq->ws_active);
		sbq_wait->sbq = NULL;
	}
}
EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue);

J
Jens Axboe 已提交
795 796 797 798
void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
			     struct sbq_wait_state *ws,
			     struct sbq_wait *sbq_wait, int state)
{
799
	if (!sbq_wait->sbq) {
J
Jens Axboe 已提交
800
		atomic_inc(&sbq->ws_active);
801
		sbq_wait->sbq = sbq;
J
Jens Axboe 已提交
802 803 804 805 806 807 808 809 810
	}
	prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state);
}
EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait);

void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
			 struct sbq_wait *sbq_wait)
{
	finish_wait(&ws->wait, &sbq_wait->wait);
811
	if (sbq_wait->sbq) {
J
Jens Axboe 已提交
812
		atomic_dec(&sbq->ws_active);
813
		sbq_wait->sbq = NULL;
J
Jens Axboe 已提交
814 815 816
	}
}
EXPORT_SYMBOL_GPL(sbitmap_finish_wait);