diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 7a8d2516c73a8d7c570e2b14494bc5287c22339d..935293a24f7d4f3b1a17a8a0f2fc0ee626afc38c 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -335,6 +335,9 @@ static inline void radix_tree_preload_end(void) preempt_enable(); } +int radix_tree_join(struct radix_tree_root *, unsigned long index, + unsigned new_order, void *); + #define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */ #define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */ #define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */ diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d1f4ca5b798961cf5e7bdc2b4199092a2c17a34c..962cfb3a76713572d8b16ccda80bbb4f681094bd 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -339,17 +339,14 @@ static void radix_tree_node_rcu_free(struct rcu_head *head) { struct radix_tree_node *node = container_of(head, struct radix_tree_node, rcu_head); - int i; /* - * must only free zeroed nodes into the slab. radix_tree_shrink - * can leave us with a non-NULL entry in the first slot, so clear - * that here to make sure. + * Must only free zeroed nodes into the slab. We can be left with + * non-NULL entries by radix_tree_free_nodes, so clear the entries + * and tags here. */ - for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) - tag_clear(node, i, 0); - - node->slots[0] = NULL; + memset(node->slots, 0, sizeof(node->slots)); + memset(node->tags, 0, sizeof(node->tags)); INIT_LIST_HEAD(&node->private_list); kmem_cache_free(radix_tree_node_cachep, node); @@ -678,14 +675,14 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, shift = radix_tree_load_root(root, &child, &maxindex); /* Make sure the tree is high enough. */ + if (order > 0 && max == ((1UL << order) - 1)) + max++; if (max > maxindex) { int error = radix_tree_extend(root, max, shift); if (error < 0) return error; shift = error; child = root->rnode; - if (order == shift) - shift += RADIX_TREE_MAP_SHIFT; } while (shift > order) { @@ -697,6 +694,8 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, return -ENOMEM; child->shift = shift; child->offset = offset; + child->count = 0; + child->exceptional = 0; child->parent = node; rcu_assign_pointer(*slot, node_to_entry(child)); if (node) @@ -710,31 +709,121 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot = &node->slots[offset]; } + if (nodep) + *nodep = node; + if (slotp) + *slotp = slot; + return 0; +} + #ifdef CONFIG_RADIX_TREE_MULTIORDER - /* Insert pointers to the canonical entry */ - if (order > shift) { - unsigned i, n = 1 << (order - shift); +/* + * Free any nodes below this node. The tree is presumed to not need + * shrinking, and any user data in the tree is presumed to not need a + * destructor called on it. If we need to add a destructor, we can + * add that functionality later. Note that we may not clear tags or + * slots from the tree as an RCU walker may still have a pointer into + * this subtree. We could replace the entries with RADIX_TREE_RETRY, + * but we'll still have to clear those in rcu_free. + */ +static void radix_tree_free_nodes(struct radix_tree_node *node) +{ + unsigned offset = 0; + struct radix_tree_node *child = entry_to_node(node); + + for (;;) { + void *entry = child->slots[offset]; + if (radix_tree_is_internal_node(entry) && + !is_sibling_entry(child, entry)) { + child = entry_to_node(entry); + offset = 0; + continue; + } + offset++; + while (offset == RADIX_TREE_MAP_SIZE) { + struct radix_tree_node *old = child; + offset = child->offset + 1; + child = child->parent; + radix_tree_node_free(old); + if (old == entry_to_node(node)) + return; + } + } +} + +static inline int insert_entries(struct radix_tree_node *node, void **slot, + void *item, unsigned order, bool replace) +{ + struct radix_tree_node *child; + unsigned i, n, tag, offset, tags = 0; + + if (node) { + n = 1 << (order - node->shift); + offset = get_slot_offset(node, slot); + } else { + n = 1; + offset = 0; + } + + if (n > 1) { offset = offset & ~(n - 1); slot = &node->slots[offset]; - child = node_to_entry(slot); - for (i = 0; i < n; i++) { - if (slot[i]) + } + child = node_to_entry(slot); + + for (i = 0; i < n; i++) { + if (slot[i]) { + if (replace) { + node->count--; + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + if (tag_get(node, tag, offset + i)) + tags |= 1 << tag; + } else return -EEXIST; } + } - for (i = 1; i < n; i++) { + for (i = 0; i < n; i++) { + struct radix_tree_node *old = slot[i]; + if (i) { rcu_assign_pointer(slot[i], child); - node->count++; + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + if (tags & (1 << tag)) + tag_clear(node, tag, offset + i); + } else { + rcu_assign_pointer(slot[i], item); + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + if (tags & (1 << tag)) + tag_set(node, tag, offset); } + if (radix_tree_is_internal_node(old) && + !is_sibling_entry(node, old)) + radix_tree_free_nodes(old); + if (radix_tree_exceptional_entry(old)) + node->exceptional--; } -#endif - - if (nodep) - *nodep = node; - if (slotp) - *slotp = slot; - return 0; + if (node) { + node->count += n; + if (radix_tree_exceptional_entry(item)) + node->exceptional += n; + } + return n; } +#else +static inline int insert_entries(struct radix_tree_node *node, void **slot, + void *item, unsigned order, bool replace) +{ + if (*slot) + return -EEXIST; + rcu_assign_pointer(*slot, item); + if (node) { + node->count++; + if (radix_tree_exceptional_entry(item)) + node->exceptional++; + } + return 1; +} +#endif /** * __radix_tree_insert - insert into a radix tree @@ -757,15 +846,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, error = __radix_tree_create(root, index, order, &node, &slot); if (error) return error; - if (*slot != NULL) - return -EEXIST; - rcu_assign_pointer(*slot, item); + + error = insert_entries(node, slot, item, order, false); + if (error < 0) + return error; if (node) { unsigned offset = get_slot_offset(node, slot); - node->count++; - if (radix_tree_exceptional_entry(item)) - node->exceptional++; BUG_ON(tag_get(node, 0, offset)); BUG_ON(tag_get(node, 1, offset)); BUG_ON(tag_get(node, 2, offset)); @@ -942,6 +1029,40 @@ void radix_tree_replace_slot(struct radix_tree_root *root, replace_slot(root, NULL, slot, item, true); } +#ifdef CONFIG_RADIX_TREE_MULTIORDER +/** + * radix_tree_join - replace multiple entries with one multiorder entry + * @root: radix tree root + * @index: an index inside the new entry + * @order: order of the new entry + * @item: new entry + * + * Call this function to replace several entries with one larger entry. + * The existing entries are presumed to not need freeing as a result of + * this call. + * + * The replacement entry will have all the tags set on it that were set + * on any of the entries it is replacing. + */ +int radix_tree_join(struct radix_tree_root *root, unsigned long index, + unsigned order, void *item) +{ + struct radix_tree_node *node; + void **slot; + int error; + + BUG_ON(radix_tree_is_internal_node(item)); + + error = __radix_tree_create(root, index, order, &node, &slot); + if (!error) + error = insert_entries(node, slot, item, order, true); + if (error > 0) + error = 0; + + return error; +} +#endif + /** * radix_tree_tag_set - set a tag on a radix tree node * @root: radix tree root diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 86daf23b3509620de06a8b0d2daee0ca58c69188..c9f656cf5f5291a43d408ac2fd0cbe3372cb8fac 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -332,6 +332,63 @@ void multiorder_tagged_iteration(void) item_kill_tree(&tree); } +static void __multiorder_join(unsigned long index, + unsigned order1, unsigned order2) +{ + unsigned long loc; + void *item, *item2 = item_create(index + 1, order1); + RADIX_TREE(tree, GFP_KERNEL); + + item_insert_order(&tree, index, order2); + item = radix_tree_lookup(&tree, index); + radix_tree_join(&tree, index + 1, order1, item2); + loc = find_item(&tree, item); + if (loc == -1) + free(item); + item = radix_tree_lookup(&tree, index + 1); + assert(item == item2); + item_kill_tree(&tree); +} + +static void __multiorder_join2(unsigned order1, unsigned order2) +{ + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_node *node; + void *item1 = item_create(0, order1); + void *item2; + + item_insert_order(&tree, 0, order2); + radix_tree_insert(&tree, 1 << order2, (void *)0x12UL); + item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL); + assert(item2 == (void *)0x12UL); + assert(node->exceptional == 1); + + radix_tree_join(&tree, 0, order1, item1); + item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL); + assert(item2 == item1); + assert(node->exceptional == 0); + item_kill_tree(&tree); +} + +static void multiorder_join(void) +{ + int i, j, idx; + + for (idx = 0; idx < 1024; idx = idx * 2 + 3) { + for (i = 1; i < 15; i++) { + for (j = 0; j < i; j++) { + __multiorder_join(idx, i, j); + } + } + } + + for (i = 1; i < 15; i++) { + for (j = 0; j < i; j++) { + __multiorder_join2(i, j); + } + } +} + void multiorder_checks(void) { int i; @@ -349,4 +406,5 @@ void multiorder_checks(void) multiorder_tag_tests(); multiorder_iteration(); multiorder_tagged_iteration(); + multiorder_join(); }