radix-tree.c 26.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 * Copyright (C) 2001 Momchil Velikov
 * Portions Copyright (C) 2001 Christoph Hellwig
4
 * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
5
 * Copyright (C) 2006 Nick Piggin
L
Linus Torvalds 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/radix-tree.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/gfp.h>
#include <linux/string.h>
#include <linux/bitops.h>
34
#include <linux/rcupdate.h>
L
Linus Torvalds 已提交
35 36 37


#ifdef __KERNEL__
N
Nick Piggin 已提交
38
#define RADIX_TREE_MAP_SHIFT	(CONFIG_BASE_SMALL ? 4 : 6)
L
Linus Torvalds 已提交
39 40 41 42 43 44 45 46 47 48 49
#else
#define RADIX_TREE_MAP_SHIFT	3	/* For more stressful testing */
#endif

#define RADIX_TREE_MAP_SIZE	(1UL << RADIX_TREE_MAP_SHIFT)
#define RADIX_TREE_MAP_MASK	(RADIX_TREE_MAP_SIZE-1)

#define RADIX_TREE_TAG_LONGS	\
	((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)

struct radix_tree_node {
50
	unsigned int	height;		/* Height from the bottom */
L
Linus Torvalds 已提交
51
	unsigned int	count;
52
	struct rcu_head	rcu_head;
L
Linus Torvalds 已提交
53
	void		*slots[RADIX_TREE_MAP_SIZE];
54
	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
L
Linus Torvalds 已提交
55 56 57
};

struct radix_tree_path {
58
	struct radix_tree_node *node;
L
Linus Torvalds 已提交
59 60 61 62 63 64
	int offset;
};

#define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)

65
static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly;
L
Linus Torvalds 已提交
66 67 68 69

/*
 * Radix tree node cache.
 */
70
static struct kmem_cache *radix_tree_node_cachep;
L
Linus Torvalds 已提交
71 72 73 74 75 76 77 78 79 80

/*
 * Per-cpu pool of preloaded nodes
 */
struct radix_tree_preload {
	int nr;
	struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
};
DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };

N
Nick Piggin 已提交
81 82 83 84 85
static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
{
	return root->gfp_mask & __GFP_BITS_MASK;
}

L
Linus Torvalds 已提交
86 87 88 89 90 91 92 93
/*
 * This assumes that the caller has performed appropriate preallocation, and
 * that the caller has pinned this thread of control to the current CPU.
 */
static struct radix_tree_node *
radix_tree_node_alloc(struct radix_tree_root *root)
{
	struct radix_tree_node *ret;
N
Nick Piggin 已提交
94
	gfp_t gfp_mask = root_gfp_mask(root);
L
Linus Torvalds 已提交
95

N
Nick Piggin 已提交
96 97
	ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
	if (ret == NULL && !(gfp_mask & __GFP_WAIT)) {
L
Linus Torvalds 已提交
98 99 100 101 102 103 104 105 106
		struct radix_tree_preload *rtp;

		rtp = &__get_cpu_var(radix_tree_preloads);
		if (rtp->nr) {
			ret = rtp->nodes[rtp->nr - 1];
			rtp->nodes[rtp->nr - 1] = NULL;
			rtp->nr--;
		}
	}
107
	BUG_ON(radix_tree_is_direct_ptr(ret));
L
Linus Torvalds 已提交
108 109 110
	return ret;
}

111 112 113 114 115 116 117
static void radix_tree_node_rcu_free(struct rcu_head *head)
{
	struct radix_tree_node *node =
			container_of(head, struct radix_tree_node, rcu_head);
	kmem_cache_free(radix_tree_node_cachep, node);
}

L
Linus Torvalds 已提交
118 119 120
static inline void
radix_tree_node_free(struct radix_tree_node *node)
{
121
	call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
L
Linus Torvalds 已提交
122 123 124 125 126 127 128 129
}

/*
 * Load up this CPU's radix_tree_node buffer with sufficient objects to
 * ensure that the addition of a single element in the tree cannot fail.  On
 * success, return zero, with preemption disabled.  On error, return -ENOMEM
 * with preemption not disabled.
 */
A
Al Viro 已提交
130
int radix_tree_preload(gfp_t gfp_mask)
L
Linus Torvalds 已提交
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
{
	struct radix_tree_preload *rtp;
	struct radix_tree_node *node;
	int ret = -ENOMEM;

	preempt_disable();
	rtp = &__get_cpu_var(radix_tree_preloads);
	while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
		preempt_enable();
		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
		if (node == NULL)
			goto out;
		preempt_disable();
		rtp = &__get_cpu_var(radix_tree_preloads);
		if (rtp->nr < ARRAY_SIZE(rtp->nodes))
			rtp->nodes[rtp->nr++] = node;
		else
			kmem_cache_free(radix_tree_node_cachep, node);
	}
	ret = 0;
out:
	return ret;
}
154
EXPORT_SYMBOL(radix_tree_preload);
L
Linus Torvalds 已提交
155

156 157
static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
		int offset)
L
Linus Torvalds 已提交
158
{
159
	__set_bit(offset, node->tags[tag]);
L
Linus Torvalds 已提交
160 161
}

162 163
static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
		int offset)
L
Linus Torvalds 已提交
164
{
165
	__clear_bit(offset, node->tags[tag]);
L
Linus Torvalds 已提交
166 167
}

168 169
static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
		int offset)
L
Linus Torvalds 已提交
170
{
171
	return test_bit(offset, node->tags[tag]);
L
Linus Torvalds 已提交
172 173
}

N
Nick Piggin 已提交
174 175
static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
{
A
Al Viro 已提交
176
	root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
N
Nick Piggin 已提交
177 178 179 180 181
}


static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
{
A
Al Viro 已提交
182
	root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
N
Nick Piggin 已提交
183 184 185 186 187 188 189 190 191
}

static inline void root_tag_clear_all(struct radix_tree_root *root)
{
	root->gfp_mask &= __GFP_BITS_MASK;
}

static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
{
A
Al Viro 已提交
192
	return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
N
Nick Piggin 已提交
193 194
}

195 196 197 198
/*
 * Returns 1 if any slot in the node has this tag set.
 * Otherwise returns 0.
 */
199
static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
200 201 202 203 204 205 206 207 208
{
	int idx;
	for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
		if (node->tags[tag][idx])
			return 1;
	}
	return 0;
}

L
Linus Torvalds 已提交
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
/*
 *	Return the maximum key which can be store into a
 *	radix tree with height HEIGHT.
 */
static inline unsigned long radix_tree_maxindex(unsigned int height)
{
	return height_to_maxindex[height];
}

/*
 *	Extend a radix tree so it can store key @index.
 */
static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
{
	struct radix_tree_node *node;
	unsigned int height;
	int tag;

	/* Figure out what the height should be.  */
	height = root->height + 1;
	while (index > radix_tree_maxindex(height))
		height++;

	if (root->rnode == NULL) {
		root->height = height;
		goto out;
	}

	do {
238
		unsigned int newheight;
L
Linus Torvalds 已提交
239 240 241 242
		if (!(node = radix_tree_node_alloc(root)))
			return -ENOMEM;

		/* Increase the height.  */
243
		node->slots[0] = radix_tree_direct_to_ptr(root->rnode);
L
Linus Torvalds 已提交
244 245

		/* Propagate the aggregated tag info into the new root */
246
		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
N
Nick Piggin 已提交
247
			if (root_tag_get(root, tag))
L
Linus Torvalds 已提交
248 249 250
				tag_set(node, tag, 0);
		}

251 252
		newheight = root->height+1;
		node->height = newheight;
L
Linus Torvalds 已提交
253
		node->count = 1;
254 255
		rcu_assign_pointer(root->rnode, node);
		root->height = newheight;
L
Linus Torvalds 已提交
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
	} while (height > root->height);
out:
	return 0;
}

/**
 *	radix_tree_insert    -    insert into a radix tree
 *	@root:		radix tree root
 *	@index:		index key
 *	@item:		item to insert
 *
 *	Insert an item into the radix tree at position @index.
 */
int radix_tree_insert(struct radix_tree_root *root,
			unsigned long index, void *item)
{
272
	struct radix_tree_node *node = NULL, *slot;
L
Linus Torvalds 已提交
273 274 275 276
	unsigned int height, shift;
	int offset;
	int error;

277 278
	BUG_ON(radix_tree_is_direct_ptr(item));

L
Linus Torvalds 已提交
279
	/* Make sure the tree is high enough.  */
N
Nick Piggin 已提交
280
	if (index > radix_tree_maxindex(root->height)) {
L
Linus Torvalds 已提交
281 282 283 284 285
		error = radix_tree_extend(root, index);
		if (error)
			return error;
	}

286
	slot = root->rnode;
L
Linus Torvalds 已提交
287 288 289 290
	height = root->height;
	shift = (height-1) * RADIX_TREE_MAP_SHIFT;

	offset = 0;			/* uninitialised var warning */
N
Nick Piggin 已提交
291
	while (height > 0) {
292
		if (slot == NULL) {
L
Linus Torvalds 已提交
293
			/* Have to add a child node.  */
294
			if (!(slot = radix_tree_node_alloc(root)))
L
Linus Torvalds 已提交
295
				return -ENOMEM;
296
			slot->height = height;
297
			if (node) {
298
				rcu_assign_pointer(node->slots[offset], slot);
L
Linus Torvalds 已提交
299
				node->count++;
300
			} else
301
				rcu_assign_pointer(root->rnode, slot);
L
Linus Torvalds 已提交
302 303 304 305
		}

		/* Go a level down */
		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
306 307
		node = slot;
		slot = node->slots[offset];
L
Linus Torvalds 已提交
308 309
		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
N
Nick Piggin 已提交
310
	}
L
Linus Torvalds 已提交
311

312
	if (slot != NULL)
L
Linus Torvalds 已提交
313
		return -EEXIST;
314

N
Nick Piggin 已提交
315 316
	if (node) {
		node->count++;
317
		rcu_assign_pointer(node->slots[offset], item);
N
Nick Piggin 已提交
318 319 320
		BUG_ON(tag_get(node, 0, offset));
		BUG_ON(tag_get(node, 1, offset));
	} else {
321
		rcu_assign_pointer(root->rnode, radix_tree_ptr_to_direct(item));
N
Nick Piggin 已提交
322 323 324
		BUG_ON(root_tag_get(root, 0));
		BUG_ON(root_tag_get(root, 1));
	}
L
Linus Torvalds 已提交
325 326 327 328 329

	return 0;
}
EXPORT_SYMBOL(radix_tree_insert);

330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
/**
 *	radix_tree_lookup_slot    -    lookup a slot in a radix tree
 *	@root:		radix tree root
 *	@index:		index key
 *
 *	Returns:  the slot corresponding to the position @index in the
 *	radix tree @root. This is useful for update-if-exists operations.
 *
 *	This function cannot be called under rcu_read_lock, it must be
 *	excluded from writers, as must the returned slot for subsequent
 *	use by radix_tree_deref_slot() and radix_tree_replace slot.
 *	Caller must hold tree write locked across slot lookup and
 *	replace.
 */
void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
L
Linus Torvalds 已提交
345 346
{
	unsigned int height, shift;
347
	struct radix_tree_node *node, **slot;
N
Nick Piggin 已提交
348

349 350
	node = root->rnode;
	if (node == NULL)
L
Linus Torvalds 已提交
351 352
		return NULL;

353 354 355
	if (radix_tree_is_direct_ptr(node)) {
		if (index > 0)
			return NULL;
N
Nick Piggin 已提交
356
		return (void **)&root->rnode;
357 358 359 360 361
	}

	height = node->height;
	if (index > radix_tree_maxindex(height))
		return NULL;
N
Nick Piggin 已提交
362

L
Linus Torvalds 已提交
363 364
	shift = (height-1) * RADIX_TREE_MAP_SHIFT;

365 366 367 368 369
	do {
		slot = (struct radix_tree_node **)
			(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
		node = *slot;
		if (node == NULL)
L
Linus Torvalds 已提交
370 371 372 373
			return NULL;

		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
374
	} while (height > 0);
L
Linus Torvalds 已提交
375

376 377 378 379 380 381 382 383 384 385
	return (void **)slot;
}
EXPORT_SYMBOL(radix_tree_lookup_slot);

/**
 *	radix_tree_lookup    -    perform lookup operation on a radix tree
 *	@root:		radix tree root
 *	@index:		index key
 *
 *	Lookup the item at the position @index in the radix tree @root.
386 387 388 389 390
 *
 *	This function can be called under rcu_read_lock, however the caller
 *	must manage lifetimes of leaf nodes (eg. RCU may also be used to free
 *	them safely). No RCU barriers are required to access or modify the
 *	returned item, however.
391 392 393
 */
void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
{
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
	unsigned int height, shift;
	struct radix_tree_node *node, **slot;

	node = rcu_dereference(root->rnode);
	if (node == NULL)
		return NULL;

	if (radix_tree_is_direct_ptr(node)) {
		if (index > 0)
			return NULL;
		return radix_tree_direct_to_ptr(node);
	}

	height = node->height;
	if (index > radix_tree_maxindex(height))
		return NULL;

	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
412

413 414 415 416 417 418 419 420 421 422 423 424
	do {
		slot = (struct radix_tree_node **)
			(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
		node = rcu_dereference(*slot);
		if (node == NULL)
			return NULL;

		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
	} while (height > 0);

	return node;
L
Linus Torvalds 已提交
425 426 427 428 429 430 431 432 433
}
EXPORT_SYMBOL(radix_tree_lookup);

/**
 *	radix_tree_tag_set - set a tag on a radix tree node
 *	@root:		radix tree root
 *	@index:		index key
 *	@tag: 		tag index
 *
434 435
 *	Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
 *	corresponding to @index in the radix tree.  From
L
Linus Torvalds 已提交
436 437 438 439 440 441
 *	the root all the way down to the leaf node.
 *
 *	Returns the address of the tagged item.   Setting a tag on a not-present
 *	item is a bug.
 */
void *radix_tree_tag_set(struct radix_tree_root *root,
442
			unsigned long index, unsigned int tag)
L
Linus Torvalds 已提交
443 444
{
	unsigned int height, shift;
445
	struct radix_tree_node *slot;
L
Linus Torvalds 已提交
446 447

	height = root->height;
448
	BUG_ON(index > radix_tree_maxindex(height));
L
Linus Torvalds 已提交
449

450
	slot = root->rnode;
N
Nick Piggin 已提交
451
	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
L
Linus Torvalds 已提交
452 453 454 455 456

	while (height > 0) {
		int offset;

		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
457 458
		if (!tag_get(slot, tag, offset))
			tag_set(slot, tag, offset);
459 460
		slot = slot->slots[offset];
		BUG_ON(slot == NULL);
L
Linus Torvalds 已提交
461 462 463 464
		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
	}

N
Nick Piggin 已提交
465 466 467 468
	/* set the root's tag bit */
	if (slot && !root_tag_get(root, tag))
		root_tag_set(root, tag);

469
	return slot;
L
Linus Torvalds 已提交
470 471 472 473 474 475 476 477 478
}
EXPORT_SYMBOL(radix_tree_tag_set);

/**
 *	radix_tree_tag_clear - clear a tag on a radix tree node
 *	@root:		radix tree root
 *	@index:		index key
 *	@tag: 		tag index
 *
479 480
 *	Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
 *	corresponding to @index in the radix tree.  If
L
Linus Torvalds 已提交
481 482 483 484 485 486 487
 *	this causes the leaf node to have no tags set then clear the tag in the
 *	next-to-leaf node, etc.
 *
 *	Returns the address of the tagged item on success, else NULL.  ie:
 *	has the same return value and semantics as radix_tree_lookup().
 */
void *radix_tree_tag_clear(struct radix_tree_root *root,
488
			unsigned long index, unsigned int tag)
L
Linus Torvalds 已提交
489 490
{
	struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
N
Nick Piggin 已提交
491
	struct radix_tree_node *slot = NULL;
L
Linus Torvalds 已提交
492 493 494 495 496 497 498 499
	unsigned int height, shift;

	height = root->height;
	if (index > radix_tree_maxindex(height))
		goto out;

	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
	pathp->node = NULL;
500
	slot = root->rnode;
L
Linus Torvalds 已提交
501 502 503 504

	while (height > 0) {
		int offset;

505
		if (slot == NULL)
L
Linus Torvalds 已提交
506 507 508 509
			goto out;

		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
		pathp[1].offset = offset;
510 511
		pathp[1].node = slot;
		slot = slot->slots[offset];
L
Linus Torvalds 已提交
512 513 514 515 516
		pathp++;
		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
	}

N
Nick Piggin 已提交
517
	if (slot == NULL)
L
Linus Torvalds 已提交
518 519
		goto out;

N
Nick Piggin 已提交
520
	while (pathp->node) {
521 522
		if (!tag_get(pathp->node, tag, pathp->offset))
			goto out;
523
		tag_clear(pathp->node, tag, pathp->offset);
524 525
		if (any_tag_set(pathp->node, tag))
			goto out;
L
Linus Torvalds 已提交
526
		pathp--;
N
Nick Piggin 已提交
527 528 529 530 531 532
	}

	/* clear the root's tag bit */
	if (root_tag_get(root, tag))
		root_tag_clear(root, tag);

L
Linus Torvalds 已提交
533
out:
N
Nick Piggin 已提交
534
	return slot;
L
Linus Torvalds 已提交
535 536 537 538 539
}
EXPORT_SYMBOL(radix_tree_tag_clear);

#ifndef __KERNEL__	/* Only the test harness uses this at present */
/**
540 541 542
 * radix_tree_tag_get - get a tag on a radix tree node
 * @root:		radix tree root
 * @index:		index key
543
 * @tag: 		tag index (< RADIX_TREE_MAX_TAGS)
L
Linus Torvalds 已提交
544
 *
545
 * Return values:
L
Linus Torvalds 已提交
546
 *
N
Nick Piggin 已提交
547 548
 *  0: tag not present or not set
 *  1: tag set
L
Linus Torvalds 已提交
549 550
 */
int radix_tree_tag_get(struct radix_tree_root *root,
551
			unsigned long index, unsigned int tag)
L
Linus Torvalds 已提交
552 553
{
	unsigned int height, shift;
554
	struct radix_tree_node *node;
L
Linus Torvalds 已提交
555 556
	int saw_unset_tag = 0;

N
Nick Piggin 已提交
557 558 559 560
	/* check the root's tag bit */
	if (!root_tag_get(root, tag))
		return 0;

561 562 563 564 565 566 567 568 569 570
	node = rcu_dereference(root->rnode);
	if (node == NULL)
		return 0;

	if (radix_tree_is_direct_ptr(node))
		return (index == 0);

	height = node->height;
	if (index > radix_tree_maxindex(height))
		return 0;
N
Nick Piggin 已提交
571

L
Linus Torvalds 已提交
572 573 574 575 576
	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;

	for ( ; ; ) {
		int offset;

577
		if (node == NULL)
L
Linus Torvalds 已提交
578 579 580 581 582 583 584 585
			return 0;

		offset = (index >> shift) & RADIX_TREE_MAP_MASK;

		/*
		 * This is just a debug check.  Later, we can bale as soon as
		 * we see an unset tag.
		 */
586
		if (!tag_get(node, tag, offset))
L
Linus Torvalds 已提交
587 588
			saw_unset_tag = 1;
		if (height == 1) {
589
			int ret = tag_get(node, tag, offset);
L
Linus Torvalds 已提交
590 591

			BUG_ON(ret && saw_unset_tag);
592
			return !!ret;
L
Linus Torvalds 已提交
593
		}
594
		node = rcu_dereference(node->slots[offset]);
L
Linus Torvalds 已提交
595 596 597 598 599 600 601
		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
	}
}
EXPORT_SYMBOL(radix_tree_tag_get);
#endif

602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
/**
 *	radix_tree_next_hole    -    find the next hole (not-present entry)
 *	@root:		tree root
 *	@index:		index key
 *	@max_scan:	maximum range to search
 *
 *	Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
 *	indexed hole.
 *
 *	Returns: the index of the hole if found, otherwise returns an index
 *	outside of the set specified (in which case 'return - index >= max_scan'
 *	will be true).
 *
 *	radix_tree_next_hole may be called under rcu_read_lock. However, like
 *	radix_tree_gang_lookup, this will not atomically search a snapshot of the
 *	tree at a single point in time. For example, if a hole is created at index
 *	5, then subsequently a hole is created at index 10, radix_tree_next_hole
 *	covering both indexes may return 10 if called under rcu_read_lock.
 */
unsigned long radix_tree_next_hole(struct radix_tree_root *root,
				unsigned long index, unsigned long max_scan)
{
	unsigned long i;

	for (i = 0; i < max_scan; i++) {
		if (!radix_tree_lookup(root, index))
			break;
		index++;
		if (index == 0)
			break;
	}

	return index;
}
EXPORT_SYMBOL(radix_tree_next_hole);

L
Linus Torvalds 已提交
638
static unsigned int
639
__lookup(struct radix_tree_node *slot, void **results, unsigned long index,
L
Linus Torvalds 已提交
640 641 642
	unsigned int max_items, unsigned long *next_index)
{
	unsigned int nr_found = 0;
643 644 645
	unsigned int shift, height;
	unsigned long i;

646 647
	height = slot->height;
	if (height == 0)
648
		goto out;
L
Linus Torvalds 已提交
649 650
	shift = (height-1) * RADIX_TREE_MAP_SHIFT;

651
	for ( ; height > 1; height--) {
652 653
		i = (index >> shift) & RADIX_TREE_MAP_MASK;
		for (;;) {
L
Linus Torvalds 已提交
654 655 656 657 658 659
			if (slot->slots[i] != NULL)
				break;
			index &= ~((1UL << shift) - 1);
			index += 1UL << shift;
			if (index == 0)
				goto out;	/* 32-bit wraparound */
660 661 662
			i++;
			if (i == RADIX_TREE_MAP_SIZE)
				goto out;
L
Linus Torvalds 已提交
663 664 665
		}

		shift -= RADIX_TREE_MAP_SHIFT;
666 667 668
		slot = rcu_dereference(slot->slots[i]);
		if (slot == NULL)
			goto out;
L
Linus Torvalds 已提交
669
	}
670 671 672

	/* Bottom level: grab some items */
	for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
673
		struct radix_tree_node *node;
674
		index++;
675 676 677
		node = slot->slots[i];
		if (node) {
			results[nr_found++] = rcu_dereference(node);
678 679 680 681
			if (nr_found == max_items)
				goto out;
		}
	}
L
Linus Torvalds 已提交
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
out:
	*next_index = index;
	return nr_found;
}

/**
 *	radix_tree_gang_lookup - perform multiple lookup on a radix tree
 *	@root:		radix tree root
 *	@results:	where the results of the lookup are placed
 *	@first_index:	start the lookup from this key
 *	@max_items:	place up to this many items at *results
 *
 *	Performs an index-ascending scan of the tree for present items.  Places
 *	them at *@results and returns the number of items which were placed at
 *	*@results.
 *
 *	The implementation is naive.
699 700 701 702 703 704
 *
 *	Like radix_tree_lookup, radix_tree_gang_lookup may be called under
 *	rcu_read_lock. In this case, rather than the returned results being
 *	an atomic snapshot of the tree at a single point in time, the semantics
 *	of an RCU protected gang lookup are as though multiple radix_tree_lookups
 *	have been issued in individual locks, and results stored in 'results'.
L
Linus Torvalds 已提交
705 706 707 708 709
 */
unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
			unsigned long first_index, unsigned int max_items)
{
710 711
	unsigned long max_index;
	struct radix_tree_node *node;
L
Linus Torvalds 已提交
712
	unsigned long cur_index = first_index;
713 714 715 716 717
	unsigned int ret;

	node = rcu_dereference(root->rnode);
	if (!node)
		return 0;
L
Linus Torvalds 已提交
718

719 720 721 722 723 724 725 726 727 728 729
	if (radix_tree_is_direct_ptr(node)) {
		if (first_index > 0)
			return 0;
		node = radix_tree_direct_to_ptr(node);
		results[0] = rcu_dereference(node);
		return 1;
	}

	max_index = radix_tree_maxindex(node->height);

	ret = 0;
L
Linus Torvalds 已提交
730 731 732 733 734 735
	while (ret < max_items) {
		unsigned int nr_found;
		unsigned long next_index;	/* Index of next search */

		if (cur_index > max_index)
			break;
736
		nr_found = __lookup(node, results + ret, cur_index,
L
Linus Torvalds 已提交
737 738 739 740 741 742
					max_items - ret, &next_index);
		ret += nr_found;
		if (next_index == 0)
			break;
		cur_index = next_index;
	}
743

L
Linus Torvalds 已提交
744 745 746 747 748 749 750 751 752
	return ret;
}
EXPORT_SYMBOL(radix_tree_gang_lookup);

/*
 * FIXME: the two tag_get()s here should use find_next_bit() instead of
 * open-coding the search.
 */
static unsigned int
753
__lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index,
754
	unsigned int max_items, unsigned long *next_index, unsigned int tag)
L
Linus Torvalds 已提交
755 756
{
	unsigned int nr_found = 0;
757
	unsigned int shift, height;
L
Linus Torvalds 已提交
758

759 760
	height = slot->height;
	if (height == 0)
N
Nick Piggin 已提交
761
		goto out;
762
	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
L
Linus Torvalds 已提交
763

764 765
	while (height > 0) {
		unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ;
L
Linus Torvalds 已提交
766

767 768
		for (;;) {
			if (tag_get(slot, tag, i))
L
Linus Torvalds 已提交
769 770 771 772 773
				break;
			index &= ~((1UL << shift) - 1);
			index += 1UL << shift;
			if (index == 0)
				goto out;	/* 32-bit wraparound */
774 775 776
			i++;
			if (i == RADIX_TREE_MAP_SIZE)
				goto out;
L
Linus Torvalds 已提交
777 778 779 780 781 782
		}
		height--;
		if (height == 0) {	/* Bottom level: grab some items */
			unsigned long j = index & RADIX_TREE_MAP_MASK;

			for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
783
				struct radix_tree_node *node;
L
Linus Torvalds 已提交
784
				index++;
785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800
				if (!tag_get(slot, tag, j))
					continue;
				node = slot->slots[j];
				/*
				 * Even though the tag was found set, we need to
				 * recheck that we have a non-NULL node, because
				 * if this lookup is lockless, it may have been
				 * subsequently deleted.
				 *
				 * Similar care must be taken in any place that
				 * lookup ->slots[x] without a lock (ie. can't
				 * rely on its value remaining the same).
				 */
				if (node) {
					node = rcu_dereference(node);
					results[nr_found++] = node;
L
Linus Torvalds 已提交
801 802 803 804 805 806
					if (nr_found == max_items)
						goto out;
				}
			}
		}
		shift -= RADIX_TREE_MAP_SHIFT;
807 808 809 810
		slot = rcu_dereference(slot->slots[i]);
		if (slot == NULL)
			break;
	}
L
Linus Torvalds 已提交
811 812 813 814 815 816 817 818 819 820 821 822
out:
	*next_index = index;
	return nr_found;
}

/**
 *	radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
 *	                             based on a tag
 *	@root:		radix tree root
 *	@results:	where the results of the lookup are placed
 *	@first_index:	start the lookup from this key
 *	@max_items:	place up to this many items at *results
823
 *	@tag:		the tag index (< RADIX_TREE_MAX_TAGS)
L
Linus Torvalds 已提交
824 825 826 827 828 829 830
 *
 *	Performs an index-ascending scan of the tree for present items which
 *	have the tag indexed by @tag set.  Places the items at *@results and
 *	returns the number of items which were placed at *@results.
 */
unsigned int
radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
831 832
		unsigned long first_index, unsigned int max_items,
		unsigned int tag)
L
Linus Torvalds 已提交
833
{
834 835
	struct radix_tree_node *node;
	unsigned long max_index;
L
Linus Torvalds 已提交
836
	unsigned long cur_index = first_index;
837
	unsigned int ret;
L
Linus Torvalds 已提交
838

N
Nick Piggin 已提交
839 840 841 842
	/* check the root's tag bit */
	if (!root_tag_get(root, tag))
		return 0;

843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
	node = rcu_dereference(root->rnode);
	if (!node)
		return 0;

	if (radix_tree_is_direct_ptr(node)) {
		if (first_index > 0)
			return 0;
		node = radix_tree_direct_to_ptr(node);
		results[0] = rcu_dereference(node);
		return 1;
	}

	max_index = radix_tree_maxindex(node->height);

	ret = 0;
L
Linus Torvalds 已提交
858 859 860 861 862 863
	while (ret < max_items) {
		unsigned int nr_found;
		unsigned long next_index;	/* Index of next search */

		if (cur_index > max_index)
			break;
864
		nr_found = __lookup_tag(node, results + ret, cur_index,
L
Linus Torvalds 已提交
865 866 867 868 869 870
					max_items - ret, &next_index, tag);
		ret += nr_found;
		if (next_index == 0)
			break;
		cur_index = next_index;
	}
871

L
Linus Torvalds 已提交
872 873 874 875
	return ret;
}
EXPORT_SYMBOL(radix_tree_gang_lookup_tag);

876 877 878 879 880 881 882
/**
 *	radix_tree_shrink    -    shrink height of a radix tree to minimal
 *	@root		radix tree root
 */
static inline void radix_tree_shrink(struct radix_tree_root *root)
{
	/* try to shrink tree height */
N
Nick Piggin 已提交
883
	while (root->height > 0 &&
884 885 886
			root->rnode->count == 1 &&
			root->rnode->slots[0]) {
		struct radix_tree_node *to_free = root->rnode;
887
		void *newptr;
888

889 890 891 892 893 894 895 896 897 898 899
		/*
		 * We don't need rcu_assign_pointer(), since we are simply
		 * moving the node from one part of the tree to another. If
		 * it was safe to dereference the old pointer to it
		 * (to_free->slots[0]), it will be safe to dereference the new
		 * one (root->rnode).
		 */
		newptr = to_free->slots[0];
		if (root->height == 1)
			newptr = radix_tree_ptr_to_direct(newptr);
		root->rnode = newptr;
900 901 902 903 904 905 906 907 908 909
		root->height--;
		/* must only free zeroed nodes into the slab */
		tag_clear(to_free, 0, 0);
		tag_clear(to_free, 1, 0);
		to_free->slots[0] = NULL;
		to_free->count = 0;
		radix_tree_node_free(to_free);
	}
}

L
Linus Torvalds 已提交
910 911 912 913 914 915 916 917 918 919 920 921
/**
 *	radix_tree_delete    -    delete an item from a radix tree
 *	@root:		radix tree root
 *	@index:		index key
 *
 *	Remove the item at @index from the radix tree rooted at @root.
 *
 *	Returns the address of the deleted item, or NULL if it was not present.
 */
void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
{
	struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
N
Nick Piggin 已提交
922
	struct radix_tree_node *slot = NULL;
923
	struct radix_tree_node *to_free;
L
Linus Torvalds 已提交
924
	unsigned int height, shift;
925 926
	int tag;
	int offset;
L
Linus Torvalds 已提交
927 928 929 930 931

	height = root->height;
	if (index > radix_tree_maxindex(height))
		goto out;

N
Nick Piggin 已提交
932 933
	slot = root->rnode;
	if (height == 0 && root->rnode) {
934
		slot = radix_tree_direct_to_ptr(slot);
N
Nick Piggin 已提交
935 936 937 938 939
		root_tag_clear_all(root);
		root->rnode = NULL;
		goto out;
	}

L
Linus Torvalds 已提交
940 941 942
	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
	pathp->node = NULL;

N
Nick Piggin 已提交
943
	do {
944
		if (slot == NULL)
L
Linus Torvalds 已提交
945 946
			goto out;

947
		pathp++;
L
Linus Torvalds 已提交
948
		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
949 950
		pathp->offset = offset;
		pathp->node = slot;
951
		slot = slot->slots[offset];
L
Linus Torvalds 已提交
952
		shift -= RADIX_TREE_MAP_SHIFT;
N
Nick Piggin 已提交
953 954
		height--;
	} while (height > 0);
L
Linus Torvalds 已提交
955

N
Nick Piggin 已提交
956
	if (slot == NULL)
L
Linus Torvalds 已提交
957 958 959 960 961
		goto out;

	/*
	 * Clear all tags associated with the just-deleted item
	 */
962
	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
N
Nick Piggin 已提交
963 964
		if (tag_get(pathp->node, tag, pathp->offset))
			radix_tree_tag_clear(root, index, tag);
965
	}
L
Linus Torvalds 已提交
966

967
	to_free = NULL;
968
	/* Now free the nodes we do not need anymore */
N
Nick Piggin 已提交
969
	while (pathp->node) {
970
		pathp->node->slots[pathp->offset] = NULL;
971
		pathp->node->count--;
972 973 974 975 976 977
		/*
		 * Queue the node for deferred freeing after the
		 * last reference to it disappears (set NULL, above).
		 */
		if (to_free)
			radix_tree_node_free(to_free);
978 979 980 981

		if (pathp->node->count) {
			if (pathp->node == root->rnode)
				radix_tree_shrink(root);
982
			goto out;
983
		}
984 985

		/* Node with zero slots in use so free it */
986
		to_free = pathp->node;
N
Nick Piggin 已提交
987
		pathp--;
988

L
Linus Torvalds 已提交
989
	}
N
Nick Piggin 已提交
990
	root_tag_clear_all(root);
991
	root->height = 0;
N
Nick Piggin 已提交
992
	root->rnode = NULL;
993 994
	if (to_free)
		radix_tree_node_free(to_free);
N
Nick Piggin 已提交
995

L
Linus Torvalds 已提交
996
out:
N
Nick Piggin 已提交
997
	return slot;
L
Linus Torvalds 已提交
998 999 1000 1001 1002 1003 1004 1005
}
EXPORT_SYMBOL(radix_tree_delete);

/**
 *	radix_tree_tagged - test whether any items in the tree are tagged
 *	@root:		radix tree root
 *	@tag:		tag to test
 */
1006
int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
L
Linus Torvalds 已提交
1007
{
N
Nick Piggin 已提交
1008
	return root_tag_get(root, tag);
L
Linus Torvalds 已提交
1009 1010 1011 1012
}
EXPORT_SYMBOL(radix_tree_tagged);

static void
1013
radix_tree_node_ctor(void *node, struct kmem_cache *cachep, unsigned long flags)
L
Linus Torvalds 已提交
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
{
	memset(node, 0, sizeof(struct radix_tree_node));
}

static __init unsigned long __maxindex(unsigned int height)
{
	unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
	unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;

	if (tmp >= RADIX_TREE_INDEX_BITS)
		index = ~0UL;
	return index;
}

static __init void radix_tree_init_maxindex(void)
{
	unsigned int i;

	for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
		height_to_maxindex[i] = __maxindex(i);
}

static int radix_tree_callback(struct notifier_block *nfb,
                            unsigned long action,
                            void *hcpu)
{
       int cpu = (long)hcpu;
       struct radix_tree_preload *rtp;

       /* Free per-cpu pool of perloaded nodes */
1044
       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
L
Linus Torvalds 已提交
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059
               rtp = &per_cpu(radix_tree_preloads, cpu);
               while (rtp->nr) {
                       kmem_cache_free(radix_tree_node_cachep,
                                       rtp->nodes[rtp->nr-1]);
                       rtp->nodes[rtp->nr-1] = NULL;
                       rtp->nr--;
               }
       }
       return NOTIFY_OK;
}

void __init radix_tree_init(void)
{
	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
			sizeof(struct radix_tree_node), 0,
1060
			SLAB_PANIC, radix_tree_node_ctor);
L
Linus Torvalds 已提交
1061 1062 1063
	radix_tree_init_maxindex();
	hotcpu_notifier(radix_tree_callback, 0);
}