radix-tree.c 27.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 * Copyright (C) 2001 Momchil Velikov
 * Portions Copyright (C) 2001 Christoph Hellwig
4
 * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
5
 * Copyright (C) 2006 Nick Piggin
L
Linus Torvalds 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/radix-tree.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/gfp.h>
#include <linux/string.h>
#include <linux/bitops.h>
34
#include <linux/rcupdate.h>
L
Linus Torvalds 已提交
35 36 37


#ifdef __KERNEL__
N
Nick Piggin 已提交
38
#define RADIX_TREE_MAP_SHIFT	(CONFIG_BASE_SMALL ? 4 : 6)
L
Linus Torvalds 已提交
39 40 41 42 43 44 45 46 47 48 49
#else
#define RADIX_TREE_MAP_SHIFT	3	/* For more stressful testing */
#endif

#define RADIX_TREE_MAP_SIZE	(1UL << RADIX_TREE_MAP_SHIFT)
#define RADIX_TREE_MAP_MASK	(RADIX_TREE_MAP_SIZE-1)

#define RADIX_TREE_TAG_LONGS	\
	((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)

struct radix_tree_node {
50
	unsigned int	height;		/* Height from the bottom */
L
Linus Torvalds 已提交
51
	unsigned int	count;
52
	struct rcu_head	rcu_head;
L
Linus Torvalds 已提交
53
	void		*slots[RADIX_TREE_MAP_SIZE];
54
	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
L
Linus Torvalds 已提交
55 56 57
};

struct radix_tree_path {
58
	struct radix_tree_node *node;
L
Linus Torvalds 已提交
59 60 61 62
	int offset;
};

#define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
63 64
#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
					  RADIX_TREE_MAP_SHIFT))
L
Linus Torvalds 已提交
65

66 67 68 69 70
/*
 * The height_to_maxindex array needs to be one deeper than the maximum
 * path as height 0 holds only 1 entry.
 */
static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
L
Linus Torvalds 已提交
71 72 73 74

/*
 * Radix tree node cache.
 */
75
static struct kmem_cache *radix_tree_node_cachep;
L
Linus Torvalds 已提交
76 77 78 79 80 81 82 83 84 85

/*
 * Per-cpu pool of preloaded nodes
 */
struct radix_tree_preload {
	int nr;
	struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
};
DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };

N
Nick Piggin 已提交
86 87 88 89 90
static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
{
	return root->gfp_mask & __GFP_BITS_MASK;
}

L
Linus Torvalds 已提交
91 92 93 94 95 96 97 98
/*
 * This assumes that the caller has performed appropriate preallocation, and
 * that the caller has pinned this thread of control to the current CPU.
 */
static struct radix_tree_node *
radix_tree_node_alloc(struct radix_tree_root *root)
{
	struct radix_tree_node *ret;
N
Nick Piggin 已提交
99
	gfp_t gfp_mask = root_gfp_mask(root);
L
Linus Torvalds 已提交
100

101 102
	ret = kmem_cache_alloc(radix_tree_node_cachep,
				set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
N
Nick Piggin 已提交
103
	if (ret == NULL && !(gfp_mask & __GFP_WAIT)) {
L
Linus Torvalds 已提交
104 105 106 107 108 109 110 111 112
		struct radix_tree_preload *rtp;

		rtp = &__get_cpu_var(radix_tree_preloads);
		if (rtp->nr) {
			ret = rtp->nodes[rtp->nr - 1];
			rtp->nodes[rtp->nr - 1] = NULL;
			rtp->nr--;
		}
	}
N
Nick Piggin 已提交
113
	BUG_ON(radix_tree_is_indirect_ptr(ret));
L
Linus Torvalds 已提交
114 115 116
	return ret;
}

117 118 119 120 121 122 123
static void radix_tree_node_rcu_free(struct rcu_head *head)
{
	struct radix_tree_node *node =
			container_of(head, struct radix_tree_node, rcu_head);
	kmem_cache_free(radix_tree_node_cachep, node);
}

L
Linus Torvalds 已提交
124 125 126
static inline void
radix_tree_node_free(struct radix_tree_node *node)
{
127
	call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
L
Linus Torvalds 已提交
128 129 130 131 132 133 134 135
}

/*
 * Load up this CPU's radix_tree_node buffer with sufficient objects to
 * ensure that the addition of a single element in the tree cannot fail.  On
 * success, return zero, with preemption disabled.  On error, return -ENOMEM
 * with preemption not disabled.
 */
A
Al Viro 已提交
136
int radix_tree_preload(gfp_t gfp_mask)
L
Linus Torvalds 已提交
137 138 139 140 141 142 143 144 145
{
	struct radix_tree_preload *rtp;
	struct radix_tree_node *node;
	int ret = -ENOMEM;

	preempt_disable();
	rtp = &__get_cpu_var(radix_tree_preloads);
	while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
		preempt_enable();
146 147
		node = kmem_cache_alloc(radix_tree_node_cachep,
				set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
L
Linus Torvalds 已提交
148 149 150 151 152 153 154 155 156 157 158 159 160
		if (node == NULL)
			goto out;
		preempt_disable();
		rtp = &__get_cpu_var(radix_tree_preloads);
		if (rtp->nr < ARRAY_SIZE(rtp->nodes))
			rtp->nodes[rtp->nr++] = node;
		else
			kmem_cache_free(radix_tree_node_cachep, node);
	}
	ret = 0;
out:
	return ret;
}
161
EXPORT_SYMBOL(radix_tree_preload);
L
Linus Torvalds 已提交
162

163 164
static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
		int offset)
L
Linus Torvalds 已提交
165
{
166
	__set_bit(offset, node->tags[tag]);
L
Linus Torvalds 已提交
167 168
}

169 170
static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
		int offset)
L
Linus Torvalds 已提交
171
{
172
	__clear_bit(offset, node->tags[tag]);
L
Linus Torvalds 已提交
173 174
}

175 176
static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
		int offset)
L
Linus Torvalds 已提交
177
{
178
	return test_bit(offset, node->tags[tag]);
L
Linus Torvalds 已提交
179 180
}

N
Nick Piggin 已提交
181 182
static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
{
A
Al Viro 已提交
183
	root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
N
Nick Piggin 已提交
184 185 186 187 188
}


static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
{
A
Al Viro 已提交
189
	root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
N
Nick Piggin 已提交
190 191 192 193 194 195 196 197 198
}

static inline void root_tag_clear_all(struct radix_tree_root *root)
{
	root->gfp_mask &= __GFP_BITS_MASK;
}

static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
{
A
Al Viro 已提交
199
	return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
N
Nick Piggin 已提交
200 201
}

202 203 204 205
/*
 * Returns 1 if any slot in the node has this tag set.
 * Otherwise returns 0.
 */
206
static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
207 208 209 210 211 212 213 214 215
{
	int idx;
	for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
		if (node->tags[tag][idx])
			return 1;
	}
	return 0;
}

L
Linus Torvalds 已提交
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
/*
 *	Return the maximum key which can be store into a
 *	radix tree with height HEIGHT.
 */
static inline unsigned long radix_tree_maxindex(unsigned int height)
{
	return height_to_maxindex[height];
}

/*
 *	Extend a radix tree so it can store key @index.
 */
static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
{
	struct radix_tree_node *node;
	unsigned int height;
	int tag;

	/* Figure out what the height should be.  */
	height = root->height + 1;
	while (index > radix_tree_maxindex(height))
		height++;

	if (root->rnode == NULL) {
		root->height = height;
		goto out;
	}

	do {
245
		unsigned int newheight;
L
Linus Torvalds 已提交
246 247 248 249
		if (!(node = radix_tree_node_alloc(root)))
			return -ENOMEM;

		/* Increase the height.  */
N
Nick Piggin 已提交
250
		node->slots[0] = radix_tree_indirect_to_ptr(root->rnode);
L
Linus Torvalds 已提交
251 252

		/* Propagate the aggregated tag info into the new root */
253
		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
N
Nick Piggin 已提交
254
			if (root_tag_get(root, tag))
L
Linus Torvalds 已提交
255 256 257
				tag_set(node, tag, 0);
		}

258 259
		newheight = root->height+1;
		node->height = newheight;
L
Linus Torvalds 已提交
260
		node->count = 1;
N
Nick Piggin 已提交
261
		node = radix_tree_ptr_to_indirect(node);
262 263
		rcu_assign_pointer(root->rnode, node);
		root->height = newheight;
L
Linus Torvalds 已提交
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
	} while (height > root->height);
out:
	return 0;
}

/**
 *	radix_tree_insert    -    insert into a radix tree
 *	@root:		radix tree root
 *	@index:		index key
 *	@item:		item to insert
 *
 *	Insert an item into the radix tree at position @index.
 */
int radix_tree_insert(struct radix_tree_root *root,
			unsigned long index, void *item)
{
280
	struct radix_tree_node *node = NULL, *slot;
L
Linus Torvalds 已提交
281 282 283 284
	unsigned int height, shift;
	int offset;
	int error;

N
Nick Piggin 已提交
285
	BUG_ON(radix_tree_is_indirect_ptr(item));
286

L
Linus Torvalds 已提交
287
	/* Make sure the tree is high enough.  */
N
Nick Piggin 已提交
288
	if (index > radix_tree_maxindex(root->height)) {
L
Linus Torvalds 已提交
289 290 291 292 293
		error = radix_tree_extend(root, index);
		if (error)
			return error;
	}

N
Nick Piggin 已提交
294 295
	slot = radix_tree_indirect_to_ptr(root->rnode);

L
Linus Torvalds 已提交
296 297 298 299
	height = root->height;
	shift = (height-1) * RADIX_TREE_MAP_SHIFT;

	offset = 0;			/* uninitialised var warning */
N
Nick Piggin 已提交
300
	while (height > 0) {
301
		if (slot == NULL) {
L
Linus Torvalds 已提交
302
			/* Have to add a child node.  */
303
			if (!(slot = radix_tree_node_alloc(root)))
L
Linus Torvalds 已提交
304
				return -ENOMEM;
305
			slot->height = height;
306
			if (node) {
307
				rcu_assign_pointer(node->slots[offset], slot);
L
Linus Torvalds 已提交
308
				node->count++;
309
			} else
N
Nick Piggin 已提交
310 311
				rcu_assign_pointer(root->rnode,
					radix_tree_ptr_to_indirect(slot));
L
Linus Torvalds 已提交
312 313 314 315
		}

		/* Go a level down */
		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
316 317
		node = slot;
		slot = node->slots[offset];
L
Linus Torvalds 已提交
318 319
		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
N
Nick Piggin 已提交
320
	}
L
Linus Torvalds 已提交
321

322
	if (slot != NULL)
L
Linus Torvalds 已提交
323
		return -EEXIST;
324

N
Nick Piggin 已提交
325 326
	if (node) {
		node->count++;
327
		rcu_assign_pointer(node->slots[offset], item);
N
Nick Piggin 已提交
328 329 330
		BUG_ON(tag_get(node, 0, offset));
		BUG_ON(tag_get(node, 1, offset));
	} else {
N
Nick Piggin 已提交
331
		rcu_assign_pointer(root->rnode, item);
N
Nick Piggin 已提交
332 333 334
		BUG_ON(root_tag_get(root, 0));
		BUG_ON(root_tag_get(root, 1));
	}
L
Linus Torvalds 已提交
335 336 337 338 339

	return 0;
}
EXPORT_SYMBOL(radix_tree_insert);

340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
/**
 *	radix_tree_lookup_slot    -    lookup a slot in a radix tree
 *	@root:		radix tree root
 *	@index:		index key
 *
 *	Returns:  the slot corresponding to the position @index in the
 *	radix tree @root. This is useful for update-if-exists operations.
 *
 *	This function cannot be called under rcu_read_lock, it must be
 *	excluded from writers, as must the returned slot for subsequent
 *	use by radix_tree_deref_slot() and radix_tree_replace slot.
 *	Caller must hold tree write locked across slot lookup and
 *	replace.
 */
void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
L
Linus Torvalds 已提交
355 356
{
	unsigned int height, shift;
357
	struct radix_tree_node *node, **slot;
N
Nick Piggin 已提交
358

359 360
	node = root->rnode;
	if (node == NULL)
L
Linus Torvalds 已提交
361 362
		return NULL;

N
Nick Piggin 已提交
363
	if (!radix_tree_is_indirect_ptr(node)) {
364 365
		if (index > 0)
			return NULL;
N
Nick Piggin 已提交
366
		return (void **)&root->rnode;
367
	}
N
Nick Piggin 已提交
368
	node = radix_tree_indirect_to_ptr(node);
369 370 371 372

	height = node->height;
	if (index > radix_tree_maxindex(height))
		return NULL;
N
Nick Piggin 已提交
373

L
Linus Torvalds 已提交
374 375
	shift = (height-1) * RADIX_TREE_MAP_SHIFT;

376 377 378 379 380
	do {
		slot = (struct radix_tree_node **)
			(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
		node = *slot;
		if (node == NULL)
L
Linus Torvalds 已提交
381 382 383 384
			return NULL;

		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
385
	} while (height > 0);
L
Linus Torvalds 已提交
386

387 388 389 390 391 392 393 394 395 396
	return (void **)slot;
}
EXPORT_SYMBOL(radix_tree_lookup_slot);

/**
 *	radix_tree_lookup    -    perform lookup operation on a radix tree
 *	@root:		radix tree root
 *	@index:		index key
 *
 *	Lookup the item at the position @index in the radix tree @root.
397 398 399 400 401
 *
 *	This function can be called under rcu_read_lock, however the caller
 *	must manage lifetimes of leaf nodes (eg. RCU may also be used to free
 *	them safely). No RCU barriers are required to access or modify the
 *	returned item, however.
402 403 404
 */
void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
{
405 406 407 408 409 410 411
	unsigned int height, shift;
	struct radix_tree_node *node, **slot;

	node = rcu_dereference(root->rnode);
	if (node == NULL)
		return NULL;

N
Nick Piggin 已提交
412
	if (!radix_tree_is_indirect_ptr(node)) {
413 414
		if (index > 0)
			return NULL;
N
Nick Piggin 已提交
415
		return node;
416
	}
N
Nick Piggin 已提交
417
	node = radix_tree_indirect_to_ptr(node);
418 419 420 421 422 423

	height = node->height;
	if (index > radix_tree_maxindex(height))
		return NULL;

	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
424

425 426 427 428 429 430 431 432 433 434 435 436
	do {
		slot = (struct radix_tree_node **)
			(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
		node = rcu_dereference(*slot);
		if (node == NULL)
			return NULL;

		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
	} while (height > 0);

	return node;
L
Linus Torvalds 已提交
437 438 439 440 441 442 443 444 445
}
EXPORT_SYMBOL(radix_tree_lookup);

/**
 *	radix_tree_tag_set - set a tag on a radix tree node
 *	@root:		radix tree root
 *	@index:		index key
 *	@tag: 		tag index
 *
446 447
 *	Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
 *	corresponding to @index in the radix tree.  From
L
Linus Torvalds 已提交
448 449 450 451 452 453
 *	the root all the way down to the leaf node.
 *
 *	Returns the address of the tagged item.   Setting a tag on a not-present
 *	item is a bug.
 */
void *radix_tree_tag_set(struct radix_tree_root *root,
454
			unsigned long index, unsigned int tag)
L
Linus Torvalds 已提交
455 456
{
	unsigned int height, shift;
457
	struct radix_tree_node *slot;
L
Linus Torvalds 已提交
458 459

	height = root->height;
460
	BUG_ON(index > radix_tree_maxindex(height));
L
Linus Torvalds 已提交
461

N
Nick Piggin 已提交
462
	slot = radix_tree_indirect_to_ptr(root->rnode);
N
Nick Piggin 已提交
463
	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
L
Linus Torvalds 已提交
464 465 466 467 468

	while (height > 0) {
		int offset;

		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
469 470
		if (!tag_get(slot, tag, offset))
			tag_set(slot, tag, offset);
471 472
		slot = slot->slots[offset];
		BUG_ON(slot == NULL);
L
Linus Torvalds 已提交
473 474 475 476
		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
	}

N
Nick Piggin 已提交
477 478 479 480
	/* set the root's tag bit */
	if (slot && !root_tag_get(root, tag))
		root_tag_set(root, tag);

481
	return slot;
L
Linus Torvalds 已提交
482 483 484 485 486 487 488 489 490
}
EXPORT_SYMBOL(radix_tree_tag_set);

/**
 *	radix_tree_tag_clear - clear a tag on a radix tree node
 *	@root:		radix tree root
 *	@index:		index key
 *	@tag: 		tag index
 *
491 492
 *	Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
 *	corresponding to @index in the radix tree.  If
L
Linus Torvalds 已提交
493 494 495 496 497 498 499
 *	this causes the leaf node to have no tags set then clear the tag in the
 *	next-to-leaf node, etc.
 *
 *	Returns the address of the tagged item on success, else NULL.  ie:
 *	has the same return value and semantics as radix_tree_lookup().
 */
void *radix_tree_tag_clear(struct radix_tree_root *root,
500
			unsigned long index, unsigned int tag)
L
Linus Torvalds 已提交
501
{
502 503 504 505 506
	/*
	 * The radix tree path needs to be one longer than the maximum path
	 * since the "list" is null terminated.
	 */
	struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
N
Nick Piggin 已提交
507
	struct radix_tree_node *slot = NULL;
L
Linus Torvalds 已提交
508 509 510 511 512 513 514 515
	unsigned int height, shift;

	height = root->height;
	if (index > radix_tree_maxindex(height))
		goto out;

	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
	pathp->node = NULL;
N
Nick Piggin 已提交
516
	slot = radix_tree_indirect_to_ptr(root->rnode);
L
Linus Torvalds 已提交
517 518 519 520

	while (height > 0) {
		int offset;

521
		if (slot == NULL)
L
Linus Torvalds 已提交
522 523 524 525
			goto out;

		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
		pathp[1].offset = offset;
526 527
		pathp[1].node = slot;
		slot = slot->slots[offset];
L
Linus Torvalds 已提交
528 529 530 531 532
		pathp++;
		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
	}

N
Nick Piggin 已提交
533
	if (slot == NULL)
L
Linus Torvalds 已提交
534 535
		goto out;

N
Nick Piggin 已提交
536
	while (pathp->node) {
537 538
		if (!tag_get(pathp->node, tag, pathp->offset))
			goto out;
539
		tag_clear(pathp->node, tag, pathp->offset);
540 541
		if (any_tag_set(pathp->node, tag))
			goto out;
L
Linus Torvalds 已提交
542
		pathp--;
N
Nick Piggin 已提交
543 544 545 546 547 548
	}

	/* clear the root's tag bit */
	if (root_tag_get(root, tag))
		root_tag_clear(root, tag);

L
Linus Torvalds 已提交
549
out:
N
Nick Piggin 已提交
550
	return slot;
L
Linus Torvalds 已提交
551 552 553 554 555
}
EXPORT_SYMBOL(radix_tree_tag_clear);

#ifndef __KERNEL__	/* Only the test harness uses this at present */
/**
556 557 558
 * radix_tree_tag_get - get a tag on a radix tree node
 * @root:		radix tree root
 * @index:		index key
559
 * @tag: 		tag index (< RADIX_TREE_MAX_TAGS)
L
Linus Torvalds 已提交
560
 *
561
 * Return values:
L
Linus Torvalds 已提交
562
 *
N
Nick Piggin 已提交
563 564
 *  0: tag not present or not set
 *  1: tag set
L
Linus Torvalds 已提交
565 566
 */
int radix_tree_tag_get(struct radix_tree_root *root,
567
			unsigned long index, unsigned int tag)
L
Linus Torvalds 已提交
568 569
{
	unsigned int height, shift;
570
	struct radix_tree_node *node;
L
Linus Torvalds 已提交
571 572
	int saw_unset_tag = 0;

N
Nick Piggin 已提交
573 574 575 576
	/* check the root's tag bit */
	if (!root_tag_get(root, tag))
		return 0;

577 578 579 580
	node = rcu_dereference(root->rnode);
	if (node == NULL)
		return 0;

N
Nick Piggin 已提交
581
	if (!radix_tree_is_indirect_ptr(node))
582
		return (index == 0);
N
Nick Piggin 已提交
583
	node = radix_tree_indirect_to_ptr(node);
584 585 586 587

	height = node->height;
	if (index > radix_tree_maxindex(height))
		return 0;
N
Nick Piggin 已提交
588

L
Linus Torvalds 已提交
589 590 591 592 593
	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;

	for ( ; ; ) {
		int offset;

594
		if (node == NULL)
L
Linus Torvalds 已提交
595 596 597 598 599 600 601 602
			return 0;

		offset = (index >> shift) & RADIX_TREE_MAP_MASK;

		/*
		 * This is just a debug check.  Later, we can bale as soon as
		 * we see an unset tag.
		 */
603
		if (!tag_get(node, tag, offset))
L
Linus Torvalds 已提交
604 605
			saw_unset_tag = 1;
		if (height == 1) {
606
			int ret = tag_get(node, tag, offset);
L
Linus Torvalds 已提交
607 608

			BUG_ON(ret && saw_unset_tag);
609
			return !!ret;
L
Linus Torvalds 已提交
610
		}
611
		node = rcu_dereference(node->slots[offset]);
L
Linus Torvalds 已提交
612 613 614 615 616 617 618
		shift -= RADIX_TREE_MAP_SHIFT;
		height--;
	}
}
EXPORT_SYMBOL(radix_tree_tag_get);
#endif

619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
/**
 *	radix_tree_next_hole    -    find the next hole (not-present entry)
 *	@root:		tree root
 *	@index:		index key
 *	@max_scan:	maximum range to search
 *
 *	Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
 *	indexed hole.
 *
 *	Returns: the index of the hole if found, otherwise returns an index
 *	outside of the set specified (in which case 'return - index >= max_scan'
 *	will be true).
 *
 *	radix_tree_next_hole may be called under rcu_read_lock. However, like
 *	radix_tree_gang_lookup, this will not atomically search a snapshot of the
 *	tree at a single point in time. For example, if a hole is created at index
 *	5, then subsequently a hole is created at index 10, radix_tree_next_hole
 *	covering both indexes may return 10 if called under rcu_read_lock.
 */
unsigned long radix_tree_next_hole(struct radix_tree_root *root,
				unsigned long index, unsigned long max_scan)
{
	unsigned long i;

	for (i = 0; i < max_scan; i++) {
		if (!radix_tree_lookup(root, index))
			break;
		index++;
		if (index == 0)
			break;
	}

	return index;
}
EXPORT_SYMBOL(radix_tree_next_hole);

L
Linus Torvalds 已提交
655
static unsigned int
656
__lookup(struct radix_tree_node *slot, void **results, unsigned long index,
L
Linus Torvalds 已提交
657 658 659
	unsigned int max_items, unsigned long *next_index)
{
	unsigned int nr_found = 0;
660 661 662
	unsigned int shift, height;
	unsigned long i;

663 664
	height = slot->height;
	if (height == 0)
665
		goto out;
L
Linus Torvalds 已提交
666 667
	shift = (height-1) * RADIX_TREE_MAP_SHIFT;

668
	for ( ; height > 1; height--) {
669 670
		i = (index >> shift) & RADIX_TREE_MAP_MASK;
		for (;;) {
L
Linus Torvalds 已提交
671 672 673 674 675 676
			if (slot->slots[i] != NULL)
				break;
			index &= ~((1UL << shift) - 1);
			index += 1UL << shift;
			if (index == 0)
				goto out;	/* 32-bit wraparound */
677 678 679
			i++;
			if (i == RADIX_TREE_MAP_SIZE)
				goto out;
L
Linus Torvalds 已提交
680 681 682
		}

		shift -= RADIX_TREE_MAP_SHIFT;
683 684 685
		slot = rcu_dereference(slot->slots[i]);
		if (slot == NULL)
			goto out;
L
Linus Torvalds 已提交
686
	}
687 688 689

	/* Bottom level: grab some items */
	for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
690
		struct radix_tree_node *node;
691
		index++;
692 693 694
		node = slot->slots[i];
		if (node) {
			results[nr_found++] = rcu_dereference(node);
695 696 697 698
			if (nr_found == max_items)
				goto out;
		}
	}
L
Linus Torvalds 已提交
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
out:
	*next_index = index;
	return nr_found;
}

/**
 *	radix_tree_gang_lookup - perform multiple lookup on a radix tree
 *	@root:		radix tree root
 *	@results:	where the results of the lookup are placed
 *	@first_index:	start the lookup from this key
 *	@max_items:	place up to this many items at *results
 *
 *	Performs an index-ascending scan of the tree for present items.  Places
 *	them at *@results and returns the number of items which were placed at
 *	*@results.
 *
 *	The implementation is naive.
716 717 718 719 720 721
 *
 *	Like radix_tree_lookup, radix_tree_gang_lookup may be called under
 *	rcu_read_lock. In this case, rather than the returned results being
 *	an atomic snapshot of the tree at a single point in time, the semantics
 *	of an RCU protected gang lookup are as though multiple radix_tree_lookups
 *	have been issued in individual locks, and results stored in 'results'.
L
Linus Torvalds 已提交
722 723 724 725 726
 */
unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
			unsigned long first_index, unsigned int max_items)
{
727 728
	unsigned long max_index;
	struct radix_tree_node *node;
L
Linus Torvalds 已提交
729
	unsigned long cur_index = first_index;
730 731 732 733 734
	unsigned int ret;

	node = rcu_dereference(root->rnode);
	if (!node)
		return 0;
L
Linus Torvalds 已提交
735

N
Nick Piggin 已提交
736
	if (!radix_tree_is_indirect_ptr(node)) {
737 738
		if (first_index > 0)
			return 0;
N
Nick Piggin 已提交
739
		results[0] = node;
740 741
		return 1;
	}
N
Nick Piggin 已提交
742
	node = radix_tree_indirect_to_ptr(node);
743 744 745 746

	max_index = radix_tree_maxindex(node->height);

	ret = 0;
L
Linus Torvalds 已提交
747 748 749 750 751 752
	while (ret < max_items) {
		unsigned int nr_found;
		unsigned long next_index;	/* Index of next search */

		if (cur_index > max_index)
			break;
753
		nr_found = __lookup(node, results + ret, cur_index,
L
Linus Torvalds 已提交
754 755 756 757 758 759
					max_items - ret, &next_index);
		ret += nr_found;
		if (next_index == 0)
			break;
		cur_index = next_index;
	}
760

L
Linus Torvalds 已提交
761 762 763 764 765 766 767 768 769
	return ret;
}
EXPORT_SYMBOL(radix_tree_gang_lookup);

/*
 * FIXME: the two tag_get()s here should use find_next_bit() instead of
 * open-coding the search.
 */
static unsigned int
770
__lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index,
771
	unsigned int max_items, unsigned long *next_index, unsigned int tag)
L
Linus Torvalds 已提交
772 773
{
	unsigned int nr_found = 0;
774
	unsigned int shift, height;
L
Linus Torvalds 已提交
775

776 777
	height = slot->height;
	if (height == 0)
N
Nick Piggin 已提交
778
		goto out;
779
	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
L
Linus Torvalds 已提交
780

781 782
	while (height > 0) {
		unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ;
L
Linus Torvalds 已提交
783

784 785
		for (;;) {
			if (tag_get(slot, tag, i))
L
Linus Torvalds 已提交
786 787 788 789 790
				break;
			index &= ~((1UL << shift) - 1);
			index += 1UL << shift;
			if (index == 0)
				goto out;	/* 32-bit wraparound */
791 792 793
			i++;
			if (i == RADIX_TREE_MAP_SIZE)
				goto out;
L
Linus Torvalds 已提交
794 795 796 797 798 799
		}
		height--;
		if (height == 0) {	/* Bottom level: grab some items */
			unsigned long j = index & RADIX_TREE_MAP_MASK;

			for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
800
				struct radix_tree_node *node;
L
Linus Torvalds 已提交
801
				index++;
802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
				if (!tag_get(slot, tag, j))
					continue;
				node = slot->slots[j];
				/*
				 * Even though the tag was found set, we need to
				 * recheck that we have a non-NULL node, because
				 * if this lookup is lockless, it may have been
				 * subsequently deleted.
				 *
				 * Similar care must be taken in any place that
				 * lookup ->slots[x] without a lock (ie. can't
				 * rely on its value remaining the same).
				 */
				if (node) {
					node = rcu_dereference(node);
					results[nr_found++] = node;
L
Linus Torvalds 已提交
818 819 820 821 822 823
					if (nr_found == max_items)
						goto out;
				}
			}
		}
		shift -= RADIX_TREE_MAP_SHIFT;
824 825 826 827
		slot = rcu_dereference(slot->slots[i]);
		if (slot == NULL)
			break;
	}
L
Linus Torvalds 已提交
828 829 830 831 832 833 834 835 836 837 838 839
out:
	*next_index = index;
	return nr_found;
}

/**
 *	radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
 *	                             based on a tag
 *	@root:		radix tree root
 *	@results:	where the results of the lookup are placed
 *	@first_index:	start the lookup from this key
 *	@max_items:	place up to this many items at *results
840
 *	@tag:		the tag index (< RADIX_TREE_MAX_TAGS)
L
Linus Torvalds 已提交
841 842 843 844 845 846 847
 *
 *	Performs an index-ascending scan of the tree for present items which
 *	have the tag indexed by @tag set.  Places the items at *@results and
 *	returns the number of items which were placed at *@results.
 */
unsigned int
radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
848 849
		unsigned long first_index, unsigned int max_items,
		unsigned int tag)
L
Linus Torvalds 已提交
850
{
851 852
	struct radix_tree_node *node;
	unsigned long max_index;
L
Linus Torvalds 已提交
853
	unsigned long cur_index = first_index;
854
	unsigned int ret;
L
Linus Torvalds 已提交
855

N
Nick Piggin 已提交
856 857 858 859
	/* check the root's tag bit */
	if (!root_tag_get(root, tag))
		return 0;

860 861 862 863
	node = rcu_dereference(root->rnode);
	if (!node)
		return 0;

N
Nick Piggin 已提交
864
	if (!radix_tree_is_indirect_ptr(node)) {
865 866
		if (first_index > 0)
			return 0;
N
Nick Piggin 已提交
867
		results[0] = node;
868 869
		return 1;
	}
N
Nick Piggin 已提交
870
	node = radix_tree_indirect_to_ptr(node);
871 872 873 874

	max_index = radix_tree_maxindex(node->height);

	ret = 0;
L
Linus Torvalds 已提交
875 876 877 878 879 880
	while (ret < max_items) {
		unsigned int nr_found;
		unsigned long next_index;	/* Index of next search */

		if (cur_index > max_index)
			break;
881
		nr_found = __lookup_tag(node, results + ret, cur_index,
L
Linus Torvalds 已提交
882 883 884 885 886 887
					max_items - ret, &next_index, tag);
		ret += nr_found;
		if (next_index == 0)
			break;
		cur_index = next_index;
	}
888

L
Linus Torvalds 已提交
889 890 891 892
	return ret;
}
EXPORT_SYMBOL(radix_tree_gang_lookup_tag);

893 894 895 896 897 898 899
/**
 *	radix_tree_shrink    -    shrink height of a radix tree to minimal
 *	@root		radix tree root
 */
static inline void radix_tree_shrink(struct radix_tree_root *root)
{
	/* try to shrink tree height */
N
Nick Piggin 已提交
900
	while (root->height > 0) {
901
		struct radix_tree_node *to_free = root->rnode;
902
		void *newptr;
903

N
Nick Piggin 已提交
904 905 906 907 908 909 910 911 912 913 914 915
		BUG_ON(!radix_tree_is_indirect_ptr(to_free));
		to_free = radix_tree_indirect_to_ptr(to_free);

		/*
		 * The candidate node has more than one child, or its child
		 * is not at the leftmost slot, we cannot shrink.
		 */
		if (to_free->count != 1)
			break;
		if (!to_free->slots[0])
			break;

916 917 918 919 920 921 922 923
		/*
		 * We don't need rcu_assign_pointer(), since we are simply
		 * moving the node from one part of the tree to another. If
		 * it was safe to dereference the old pointer to it
		 * (to_free->slots[0]), it will be safe to dereference the new
		 * one (root->rnode).
		 */
		newptr = to_free->slots[0];
N
Nick Piggin 已提交
924 925
		if (root->height > 1)
			newptr = radix_tree_ptr_to_indirect(newptr);
926
		root->rnode = newptr;
927 928 929 930 931 932 933 934 935 936
		root->height--;
		/* must only free zeroed nodes into the slab */
		tag_clear(to_free, 0, 0);
		tag_clear(to_free, 1, 0);
		to_free->slots[0] = NULL;
		to_free->count = 0;
		radix_tree_node_free(to_free);
	}
}

L
Linus Torvalds 已提交
937 938 939 940 941 942 943 944 945 946 947
/**
 *	radix_tree_delete    -    delete an item from a radix tree
 *	@root:		radix tree root
 *	@index:		index key
 *
 *	Remove the item at @index from the radix tree rooted at @root.
 *
 *	Returns the address of the deleted item, or NULL if it was not present.
 */
void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
{
948 949 950 951 952
	/*
	 * The radix tree path needs to be one longer than the maximum path
	 * since the "list" is null terminated.
	 */
	struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
N
Nick Piggin 已提交
953
	struct radix_tree_node *slot = NULL;
954
	struct radix_tree_node *to_free;
L
Linus Torvalds 已提交
955
	unsigned int height, shift;
956 957
	int tag;
	int offset;
L
Linus Torvalds 已提交
958 959 960 961 962

	height = root->height;
	if (index > radix_tree_maxindex(height))
		goto out;

N
Nick Piggin 已提交
963
	slot = root->rnode;
N
Nick Piggin 已提交
964
	if (height == 0) {
N
Nick Piggin 已提交
965 966 967 968
		root_tag_clear_all(root);
		root->rnode = NULL;
		goto out;
	}
N
Nick Piggin 已提交
969
	slot = radix_tree_indirect_to_ptr(slot);
N
Nick Piggin 已提交
970

L
Linus Torvalds 已提交
971 972 973
	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
	pathp->node = NULL;

N
Nick Piggin 已提交
974
	do {
975
		if (slot == NULL)
L
Linus Torvalds 已提交
976 977
			goto out;

978
		pathp++;
L
Linus Torvalds 已提交
979
		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
980 981
		pathp->offset = offset;
		pathp->node = slot;
982
		slot = slot->slots[offset];
L
Linus Torvalds 已提交
983
		shift -= RADIX_TREE_MAP_SHIFT;
N
Nick Piggin 已提交
984 985
		height--;
	} while (height > 0);
L
Linus Torvalds 已提交
986

N
Nick Piggin 已提交
987
	if (slot == NULL)
L
Linus Torvalds 已提交
988 989 990 991 992
		goto out;

	/*
	 * Clear all tags associated with the just-deleted item
	 */
993
	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
N
Nick Piggin 已提交
994 995
		if (tag_get(pathp->node, tag, pathp->offset))
			radix_tree_tag_clear(root, index, tag);
996
	}
L
Linus Torvalds 已提交
997

998
	to_free = NULL;
999
	/* Now free the nodes we do not need anymore */
N
Nick Piggin 已提交
1000
	while (pathp->node) {
1001
		pathp->node->slots[pathp->offset] = NULL;
1002
		pathp->node->count--;
1003 1004 1005 1006 1007 1008
		/*
		 * Queue the node for deferred freeing after the
		 * last reference to it disappears (set NULL, above).
		 */
		if (to_free)
			radix_tree_node_free(to_free);
1009 1010

		if (pathp->node->count) {
N
Nick Piggin 已提交
1011 1012
			if (pathp->node ==
					radix_tree_indirect_to_ptr(root->rnode))
1013
				radix_tree_shrink(root);
1014
			goto out;
1015
		}
1016 1017

		/* Node with zero slots in use so free it */
1018
		to_free = pathp->node;
N
Nick Piggin 已提交
1019
		pathp--;
1020

L
Linus Torvalds 已提交
1021
	}
N
Nick Piggin 已提交
1022
	root_tag_clear_all(root);
1023
	root->height = 0;
N
Nick Piggin 已提交
1024
	root->rnode = NULL;
1025 1026
	if (to_free)
		radix_tree_node_free(to_free);
N
Nick Piggin 已提交
1027

L
Linus Torvalds 已提交
1028
out:
N
Nick Piggin 已提交
1029
	return slot;
L
Linus Torvalds 已提交
1030 1031 1032 1033 1034 1035 1036 1037
}
EXPORT_SYMBOL(radix_tree_delete);

/**
 *	radix_tree_tagged - test whether any items in the tree are tagged
 *	@root:		radix tree root
 *	@tag:		tag to test
 */
1038
int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
L
Linus Torvalds 已提交
1039
{
N
Nick Piggin 已提交
1040
	return root_tag_get(root, tag);
L
Linus Torvalds 已提交
1041 1042 1043 1044
}
EXPORT_SYMBOL(radix_tree_tagged);

static void
1045
radix_tree_node_ctor(void *node, struct kmem_cache *cachep, unsigned long flags)
L
Linus Torvalds 已提交
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
{
	memset(node, 0, sizeof(struct radix_tree_node));
}

static __init unsigned long __maxindex(unsigned int height)
{
	unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
	unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;

	if (tmp >= RADIX_TREE_INDEX_BITS)
		index = ~0UL;
	return index;
}

static __init void radix_tree_init_maxindex(void)
{
	unsigned int i;

	for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
		height_to_maxindex[i] = __maxindex(i);
}

static int radix_tree_callback(struct notifier_block *nfb,
                            unsigned long action,
                            void *hcpu)
{
       int cpu = (long)hcpu;
       struct radix_tree_preload *rtp;

       /* Free per-cpu pool of perloaded nodes */
1076
       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
L
Linus Torvalds 已提交
1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
               rtp = &per_cpu(radix_tree_preloads, cpu);
               while (rtp->nr) {
                       kmem_cache_free(radix_tree_node_cachep,
                                       rtp->nodes[rtp->nr-1]);
                       rtp->nodes[rtp->nr-1] = NULL;
                       rtp->nr--;
               }
       }
       return NOTIFY_OK;
}

void __init radix_tree_init(void)
{
	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
			sizeof(struct radix_tree_node), 0,
1092
			SLAB_PANIC, radix_tree_node_ctor);
L
Linus Torvalds 已提交
1093 1094 1095
	radix_tree_init_maxindex();
	hotcpu_notifier(radix_tree_callback, 0);
}