ctree.c 27.9 KB
Newer Older
1 2 3
#include <stdio.h>
#include <stdlib.h>
#include "kerncompat.h"
4 5 6
#include "radix-tree.h"
#include "ctree.h"
#include "disk-io.h"
C
Chris Mason 已提交
7
#include "print-tree.h"
8

C
Chris Mason 已提交
9 10
int split_node(struct ctree_root *root, struct ctree_path *path, int level);
int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
C
Chris Mason 已提交
11 12 13 14
int push_node_left(struct ctree_root *root, struct ctree_path *path, int level);
int push_node_right(struct ctree_root *root,
		    struct ctree_path *path, int level);
int del_ptr(struct ctree_root *root, struct ctree_path *path, int level);
15

C
Chris Mason 已提交
16
inline void init_path(struct ctree_path *p)
17 18 19 20
{
	memset(p, 0, sizeof(*p));
}

C
Chris Mason 已提交
21
void release_path(struct ctree_root *root, struct ctree_path *p)
22 23 24 25 26 27 28 29 30
{
	int i;
	for (i = 0; i < MAX_LEVEL; i++) {
		if (!p->nodes[i])
			break;
		tree_block_release(root, p->nodes[i]);
	}
}

C
Chris Mason 已提交
31 32 33 34 35
/*
 * The leaf data grows from end-to-front in the node.
 * this returns the address of the start of the last item,
 * which is the stop of the leaf data stack
 */
36 37 38 39
static inline unsigned int leaf_data_end(struct leaf *leaf)
{
	unsigned int nr = leaf->header.nritems;
	if (nr == 0)
40
		return sizeof(leaf->data);
41 42 43
	return leaf->items[nr-1].offset;
}

C
Chris Mason 已提交
44 45 46 47 48
/*
 * The space between the end of the leaf items and
 * the start of the leaf data.  IOW, how much room
 * the leaf has left for both items and data
 */
C
Chris Mason 已提交
49
int leaf_free_space(struct leaf *leaf)
50 51 52 53 54 55 56
{
	int data_end = leaf_data_end(leaf);
	int nritems = leaf->header.nritems;
	char *items_end = (char *)(leaf->items + nritems + 1);
	return (char *)(leaf->data + data_end) - (char *)items_end;
}

C
Chris Mason 已提交
57 58 59
/*
 * compare two keys in a memcmp fashion
 */
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
int comp_keys(struct key *k1, struct key *k2)
{
	if (k1->objectid > k2->objectid)
		return 1;
	if (k1->objectid < k2->objectid)
		return -1;
	if (k1->flags > k2->flags)
		return 1;
	if (k1->flags < k2->flags)
		return -1;
	if (k1->offset > k2->offset)
		return 1;
	if (k1->offset < k2->offset)
		return -1;
	return 0;
}
C
Chris Mason 已提交
76 77 78 79 80 81 82 83 84 85

/*
 * search for key in the array p.  items p are item_size apart
 * and there are 'max' items in p
 * the slot in the array is returned via slot, and it points to
 * the place where you would insert key if it is not found in
 * the array.
 *
 * slot may point to max if the key is bigger than all of the keys
 */
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
int generic_bin_search(char *p, int item_size, struct key *key,
		       int max, int *slot)
{
	int low = 0;
	int high = max;
	int mid;
	int ret;
	struct key *tmp;

	while(low < high) {
		mid = (low + high) / 2;
		tmp = (struct key *)(p + mid * item_size);
		ret = comp_keys(tmp, key);

		if (ret < 0)
			low = mid + 1;
		else if (ret > 0)
			high = mid;
		else {
			*slot = mid;
			return 0;
		}
	}
	*slot = low;
	return 1;
}

int bin_search(struct node *c, struct key *key, int *slot)
{
	if (is_leaf(c->header.flags)) {
		struct leaf *l = (struct leaf *)c;
		return generic_bin_search((void *)l->items, sizeof(struct item),
					  key, c->header.nritems, slot);
	} else {
		return generic_bin_search((void *)c->keys, sizeof(struct key),
					  key, c->header.nritems, slot);
	}
	return -1;
}

C
Chris Mason 已提交
126 127 128 129 130 131 132 133
/*
 * look for key in the tree.  path is filled in with nodes along the way
 * if key is found, we return zero and you can find the item in the leaf
 * level of the path (level 0)
 *
 * If the key isn't found, the path points to the slot where it should
 * be inserted.
 */
C
Chris Mason 已提交
134 135
int search_slot(struct ctree_root *root, struct key *key,
		struct ctree_path *p, int ins_len)
136
{
137 138
	struct tree_buffer *b = root->node;
	struct node *c;
139 140 141
	int slot;
	int ret;
	int level;
C
Chris Mason 已提交
142

143 144 145
	b->count++;
	while (b) {
		c = &b->node;
146
		level = node_level(c->header.flags);
147
		p->nodes[level] = b;
148 149 150 151 152
		ret = bin_search(c, key, &slot);
		if (!is_leaf(c->header.flags)) {
			if (ret && slot > 0)
				slot -= 1;
			p->slots[level] = slot;
C
Chris Mason 已提交
153 154
			if (ins_len > 0 &&
			    c->header.nritems == NODEPTRS_PER_BLOCK) {
C
Chris Mason 已提交
155 156 157 158 159 160 161
				int sret = split_node(root, p, level);
				BUG_ON(sret > 0);
				if (sret)
					return sret;
				b = p->nodes[level];
				c = &b->node;
				slot = p->slots[level];
C
Chris Mason 已提交
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
			} else if (ins_len < 0 &&
				   c->header.nritems <= NODEPTRS_PER_BLOCK/4) {
				u64 blocknr = b->blocknr;
				slot = p->slots[level +1];
				b->count++;
				if (push_node_left(root, p, level))
					push_node_right(root, p, level);
				if (c->header.nritems == 0 &&
				    level < MAX_LEVEL - 1 &&
				    p->nodes[level + 1]) {
					int tslot = p->slots[level + 1];

					p->slots[level + 1] = slot;
					del_ptr(root, p, level + 1);
					p->slots[level + 1] = tslot;
					tree_block_release(root, b);
					free_extent(root, blocknr, 1);
				} else {
					tree_block_release(root, b);
				}
				b = p->nodes[level];
				c = &b->node;
				slot = p->slots[level];
C
Chris Mason 已提交
185
			}
186
			b = read_tree_block(root, c->blockptrs[slot]);
187 188
			continue;
		} else {
C
Chris Mason 已提交
189
			struct leaf *l = (struct leaf *)c;
190
			p->slots[level] = slot;
C
Chris Mason 已提交
191 192
			if (ins_len > 0 && leaf_free_space(l) <
			    sizeof(struct item) + ins_len) {
C
Chris Mason 已提交
193 194 195 196 197
				int sret = split_leaf(root, p, ins_len);
				BUG_ON(sret > 0);
				if (sret)
					return sret;
			}
198 199 200 201 202 203
			return ret;
		}
	}
	return -1;
}

C
Chris Mason 已提交
204 205 206 207 208 209 210
/*
 * adjust the pointers going up the tree, starting at level
 * making sure the right key of each node is points to 'key'.
 * This is used after shifting pointers to the left, so it stops
 * fixing up pointers when a given leaf/node is not in slot 0 of the
 * higher levels
 */
211 212 213
static void fixup_low_keys(struct ctree_root *root,
			   struct ctree_path *path, struct key *key,
			   int level)
214 215 216
{
	int i;
	for (i = level; i < MAX_LEVEL; i++) {
217
		struct node *t;
218
		int tslot = path->slots[i];
219
		if (!path->nodes[i])
220
			break;
221
		t = &path->nodes[i]->node;
222
		memcpy(t->keys + tslot, key, sizeof(*key));
223
		write_tree_block(root, path->nodes[i]);
224 225 226 227 228
		if (tslot != 0)
			break;
	}
}

C
Chris Mason 已提交
229 230 231 232 233 234 235 236 237 238
/*
 * try to push data from one node into the next node left in the
 * tree.  The src node is found at specified level in the path.
 * If some bytes were pushed, return 0, otherwise return 1.
 *
 * Lower nodes/leaves in the path are not touched, higher nodes may
 * be modified to reflect the push.
 *
 * The path is altered to reflect the push.
 */
239 240 241 242 243 244 245 246
int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
	struct node *left;
	struct node *right;
	int push_items = 0;
	int left_nritems;
	int right_nritems;
247 248
	struct tree_buffer *t;
	struct tree_buffer *right_buf;
249 250 251 252 253 254 255

	if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
		return 1;
	slot = path->slots[level + 1];
	if (slot == 0)
		return 1;

256 257 258 259 260
	t = read_tree_block(root,
		            path->nodes[level + 1]->node.blockptrs[slot - 1]);
	left = &t->node;
	right_buf = path->nodes[level];
	right = &right_buf->node;
261 262 263
	left_nritems = left->header.nritems;
	right_nritems = right->header.nritems;
	push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
264 265
	if (push_items <= 0) {
		tree_block_release(root, t);
266
		return 1;
267
	}
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282

	if (right_nritems < push_items)
		push_items = right_nritems;
	memcpy(left->keys + left_nritems, right->keys,
		push_items * sizeof(struct key));
	memcpy(left->blockptrs + left_nritems, right->blockptrs,
		push_items * sizeof(u64));
	memmove(right->keys, right->keys + push_items,
		(right_nritems - push_items) * sizeof(struct key));
	memmove(right->blockptrs, right->blockptrs + push_items,
		(right_nritems - push_items) * sizeof(u64));
	right->header.nritems -= push_items;
	left->header.nritems += push_items;

	/* adjust the pointers going up the tree */
283 284 285 286
	fixup_low_keys(root, path, right->keys, level + 1);

	write_tree_block(root, t);
	write_tree_block(root, right_buf);
287 288 289 290

	/* then fixup the leaf pointer in the path */
	if (path->slots[level] < push_items) {
		path->slots[level] += left_nritems;
291 292
		tree_block_release(root, path->nodes[level]);
		path->nodes[level] = t;
293 294 295
		path->slots[level + 1] -= 1;
	} else {
		path->slots[level] -= push_items;
296
		tree_block_release(root, t);
297 298 299 300
	}
	return 0;
}

C
Chris Mason 已提交
301 302 303 304 305 306 307 308 309 310
/*
 * try to push data from one node into the next node right in the
 * tree.  The src node is found at specified level in the path.
 * If some bytes were pushed, return 0, otherwise return 1.
 *
 * Lower nodes/leaves in the path are not touched, higher nodes may
 * be modified to reflect the push.
 *
 * The path is altered to reflect the push.
 */
311 312 313
int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
314 315
	struct tree_buffer *t;
	struct tree_buffer *src_buffer;
316 317 318 319 320 321
	struct node *dst;
	struct node *src;
	int push_items = 0;
	int dst_nritems;
	int src_nritems;

C
Chris Mason 已提交
322
	/* can't push from the root */
323 324
	if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
		return 1;
C
Chris Mason 已提交
325 326

	/* only try to push inside the node higher up */
327 328 329 330
	slot = path->slots[level + 1];
	if (slot == NODEPTRS_PER_BLOCK - 1)
		return 1;

331
	if (slot >= path->nodes[level + 1]->node.header.nritems -1)
332 333
		return 1;

334 335 336 337 338
	t = read_tree_block(root,
			    path->nodes[level + 1]->node.blockptrs[slot + 1]);
	dst = &t->node;
	src_buffer = path->nodes[level];
	src = &src_buffer->node;
339 340 341
	dst_nritems = dst->header.nritems;
	src_nritems = src->header.nritems;
	push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
342 343
	if (push_items <= 0) {
		tree_block_release(root, t);
344
		return 1;
345
	}
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362

	if (src_nritems < push_items)
		push_items = src_nritems;
	memmove(dst->keys + push_items, dst->keys,
		dst_nritems * sizeof(struct key));
	memcpy(dst->keys, src->keys + src_nritems - push_items,
		push_items * sizeof(struct key));

	memmove(dst->blockptrs + push_items, dst->blockptrs,
		dst_nritems * sizeof(u64));
	memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
		push_items * sizeof(u64));

	src->header.nritems -= push_items;
	dst->header.nritems += push_items;

	/* adjust the pointers going up the tree */
363
	memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
364
		dst->keys, sizeof(struct key));
365 366 367 368 369

	write_tree_block(root, path->nodes[level + 1]);
	write_tree_block(root, t);
	write_tree_block(root, src_buffer);

C
Chris Mason 已提交
370
	/* then fixup the pointers in the path */
371 372
	if (path->slots[level] >= src->header.nritems) {
		path->slots[level] -= src->header.nritems;
373 374
		tree_block_release(root, path->nodes[level]);
		path->nodes[level] = t;
375
		path->slots[level + 1] += 1;
376 377
	} else {
		tree_block_release(root, t);
378 379 380 381
	}
	return 0;
}

C
Chris Mason 已提交
382 383
static int insert_new_root(struct ctree_root *root,
			   struct ctree_path *path, int level)
C
Chris Mason 已提交
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
{
	struct tree_buffer *t;
	struct node *lower;
	struct node *c;
	struct key *lower_key;

	BUG_ON(path->nodes[level]);
	BUG_ON(path->nodes[level-1] != root->node);

	t = alloc_free_block(root);
	c = &t->node;
	memset(c, 0, sizeof(c));
	c->header.nritems = 1;
	c->header.flags = node_level(level);
	c->header.blocknr = t->blocknr;
	c->header.parentid = root->node->node.header.parentid;
	lower = &path->nodes[level-1]->node;
	if (is_leaf(lower->header.flags))
		lower_key = &((struct leaf *)lower)->items[0].key;
	else
		lower_key = lower->keys;
	memcpy(c->keys, lower_key, sizeof(struct key));
	c->blockptrs[0] = path->nodes[level-1]->blocknr;
	/* the super has an extra ref to root->node */
	tree_block_release(root, root->node);
	root->node = t;
	t->count++;
	write_tree_block(root, t);
	path->nodes[level] = t;
	path->slots[level] = 0;
	return 0;
}

C
Chris Mason 已提交
417 418 419 420 421 422
/*
 * worker function to insert a single pointer in a node.
 * the node should have enough room for the pointer already
 * slot and level indicate where you want the key to go, and
 * blocknr is the block the key points to.
 */
C
Chris Mason 已提交
423
int insert_ptr(struct ctree_root *root,
C
Chris Mason 已提交
424 425 426 427 428
		struct ctree_path *path, struct key *key,
		u64 blocknr, int slot, int level)
{
	struct node *lower;
	int nritems;
C
Chris Mason 已提交
429 430

	BUG_ON(!path->nodes[level]);
C
Chris Mason 已提交
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
	lower = &path->nodes[level]->node;
	nritems = lower->header.nritems;
	if (slot > nritems)
		BUG();
	if (nritems == NODEPTRS_PER_BLOCK)
		BUG();
	if (slot != nritems) {
		memmove(lower->keys + slot + 1, lower->keys + slot,
			(nritems - slot) * sizeof(struct key));
		memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
			(nritems - slot) * sizeof(u64));
	}
	memcpy(lower->keys + slot, key, sizeof(struct key));
	lower->blockptrs[slot] = blocknr;
	lower->header.nritems++;
	if (lower->keys[1].objectid == 0)
			BUG();
	write_tree_block(root, path->nodes[level]);
	return 0;
}

C
Chris Mason 已提交
452
int split_node(struct ctree_root *root, struct ctree_path *path, int level)
453
{
C
Chris Mason 已提交
454 455 456 457
	struct tree_buffer *t;
	struct node *c;
	struct tree_buffer *split_buffer;
	struct node *split;
458
	int mid;
C
Chris Mason 已提交
459
	int ret;
460

C
Chris Mason 已提交
461 462 463 464 465 466 467 468 469 470 471 472 473
	ret = push_node_left(root, path, level);
	if (!ret)
		return 0;
	ret = push_node_right(root, path, level);
	if (!ret)
		return 0;
	t = path->nodes[level];
	c = &t->node;
	if (t == root->node) {
		/* trying to split the root, lets make a new one */
		ret = insert_new_root(root, path, level + 1);
		if (ret)
			return ret;
474
	}
C
Chris Mason 已提交
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
	split_buffer = alloc_free_block(root);
	split = &split_buffer->node;
	split->header.flags = c->header.flags;
	split->header.blocknr = split_buffer->blocknr;
	split->header.parentid = root->node->node.header.parentid;
	mid = (c->header.nritems + 1) / 2;
	memcpy(split->keys, c->keys + mid,
		(c->header.nritems - mid) * sizeof(struct key));
	memcpy(split->blockptrs, c->blockptrs + mid,
		(c->header.nritems - mid) * sizeof(u64));
	split->header.nritems = c->header.nritems - mid;
	c->header.nritems = mid;
	write_tree_block(root, t);
	write_tree_block(root, split_buffer);
	insert_ptr(root, path, split->keys, split_buffer->blocknr,
		     path->slots[level + 1] + 1, level + 1);
C
Chris Mason 已提交
491
	if (path->slots[level] >= mid) {
C
Chris Mason 已提交
492 493 494 495 496 497
		path->slots[level] -= mid;
		tree_block_release(root, t);
		path->nodes[level] = split_buffer;
		path->slots[level + 1] += 1;
	} else {
		tree_block_release(root, split_buffer);
498
	}
C
Chris Mason 已提交
499
	return 0;
500 501
}

C
Chris Mason 已提交
502 503 504 505 506
/*
 * how many bytes are required to store the items in a leaf.  start
 * and nr indicate which items in the leaf to check.  This totals up the
 * space used both by the item structs and the item data
 */
507 508 509 510 511 512 513 514 515 516 517 518 519
int leaf_space_used(struct leaf *l, int start, int nr)
{
	int data_len;
	int end = start + nr - 1;

	if (!nr)
		return 0;
	data_len = l->items[start].offset + l->items[start].size;
	data_len = data_len - l->items[end].offset;
	data_len += sizeof(struct item) * nr;
	return data_len;
}

C
Chris Mason 已提交
520 521 522 523
/*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
524 525 526
int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
		   int data_size)
{
527 528 529
	struct tree_buffer *right_buf = path->nodes[0];
	struct leaf *right = &right_buf->leaf;
	struct tree_buffer *t;
530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
	struct leaf *left;
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
	struct item *item;
	int old_left_nritems;

	slot = path->slots[1];
	if (slot == 0) {
		return 1;
	}
	if (!path->nodes[1]) {
		return 1;
	}
546 547
	t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
	left = &t->leaf;
548 549
	free_space = leaf_free_space(left);
	if (free_space < data_size + sizeof(struct item)) {
550
		tree_block_release(root, t);
551 552 553 554 555 556 557 558 559 560 561 562
		return 1;
	}
	for (i = 0; i < right->header.nritems; i++) {
		item = right->items + i;
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
		if (item->size + sizeof(*item) + push_space > free_space)
			break;
		push_items++;
		push_space += item->size + sizeof(*item);
	}
	if (push_items == 0) {
563
		tree_block_release(root, t);
564 565 566 567 568 569 570 571 572 573
		return 1;
	}
	/* push data from right to left */
	memcpy(left->items + left->header.nritems,
		right->items, push_items * sizeof(struct item));
	push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
	memcpy(left->data + leaf_data_end(left) - push_space,
		right->data + right->items[push_items - 1].offset,
		push_space);
	old_left_nritems = left->header.nritems;
574 575
	BUG_ON(old_left_nritems < 0);

576 577 578 579 580 581 582 583 584 585 586 587 588 589
	for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
		left->items[i].offset -= LEAF_DATA_SIZE -
			left->items[old_left_nritems -1].offset;
	}
	left->header.nritems += push_items;

	/* fixup right node */
	push_space = right->items[push_items-1].offset - leaf_data_end(right);
	memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
		leaf_data_end(right), push_space);
	memmove(right->items, right->items + push_items,
		(right->header.nritems - push_items) * sizeof(struct item));
	right->header.nritems -= push_items;
	push_space = LEAF_DATA_SIZE;
590

591 592 593 594
	for (i = 0; i < right->header.nritems; i++) {
		right->items[i].offset = push_space - right->items[i].size;
		push_space = right->items[i].offset;
	}
595 596 597 598 599

	write_tree_block(root, t);
	write_tree_block(root, right_buf);

	fixup_low_keys(root, path, &right->items[0].key, 1);
600 601 602 603

	/* then fixup the leaf pointer in the path */
	if (path->slots[0] < push_items) {
		path->slots[0] += old_left_nritems;
604 605
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = t;
606 607
		path->slots[1] -= 1;
	} else {
608
		tree_block_release(root, t);
609 610
		path->slots[0] -= push_items;
	}
611
	BUG_ON(path->slots[0] < 0);
612 613 614
	return 0;
}

C
Chris Mason 已提交
615 616 617 618
/*
 * split the path's leaf in two, making sure there is at least data_size
 * available for the resulting leaf level of the path.
 */
619 620
int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
{
621 622 623 624 625
	struct tree_buffer *l_buf = path->nodes[0];
	struct leaf *l = &l_buf->leaf;
	int nritems;
	int mid;
	int slot;
626
	struct leaf *right;
627
	struct tree_buffer *right_buffer;
628 629 630 631 632 633 634
	int space_needed = data_size + sizeof(struct item);
	int data_copy_size;
	int rt_data_off;
	int i;
	int ret;

	if (push_leaf_left(root, path, data_size) == 0) {
635 636 637 638
		l_buf = path->nodes[0];
		l = &l_buf->leaf;
		if (leaf_free_space(l) >= sizeof(struct item) + data_size)
			return 0;
639
	}
C
Chris Mason 已提交
640 641 642 643 644
	if (!path->nodes[1]) {
		ret = insert_new_root(root, path, 1);
		if (ret)
			return ret;
	}
645 646 647 648 649 650 651 652
	slot = path->slots[0];
	nritems = l->header.nritems;
	mid = (nritems + 1)/ 2;

	right_buffer = alloc_free_block(root);
	BUG_ON(!right_buffer);
	BUG_ON(mid == nritems);
	right = &right_buffer->leaf;
653 654 655 656 657 658 659 660 661 662 663
	memset(right, 0, sizeof(*right));
	if (mid <= slot) {
		if (leaf_space_used(l, mid, nritems - mid) + space_needed >
			LEAF_DATA_SIZE)
			BUG();
	} else {
		if (leaf_space_used(l, 0, mid + 1) + space_needed >
			LEAF_DATA_SIZE)
			BUG();
	}
	right->header.nritems = nritems - mid;
664 665
	right->header.blocknr = right_buffer->blocknr;
	right->header.flags = node_level(0);
C
Chris Mason 已提交
666
	right->header.parentid = root->node->node.header.parentid;
667 668 669 670 671 672 673 674
	data_copy_size = l->items[mid].offset + l->items[mid].size -
			 leaf_data_end(l);
	memcpy(right->items, l->items + mid,
	       (nritems - mid) * sizeof(struct item));
	memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
	       l->data + leaf_data_end(l), data_copy_size);
	rt_data_off = LEAF_DATA_SIZE -
		     (l->items[mid].offset + l->items[mid].size);
C
Chris Mason 已提交
675 676

	for (i = 0; i < right->header.nritems; i++)
677
		right->items[i].offset += rt_data_off;
C
Chris Mason 已提交
678

679 680
	l->header.nritems = mid;
	ret = insert_ptr(root, path, &right->items[0].key,
C
Chris Mason 已提交
681
			  right_buffer->blocknr, path->slots[1] + 1, 1);
682 683 684 685
	write_tree_block(root, right_buffer);
	write_tree_block(root, l_buf);

	BUG_ON(path->slots[0] != slot);
686
	if (mid <= slot) {
687 688
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = right_buffer;
689 690
		path->slots[0] -= mid;
		path->slots[1] += 1;
691 692 693
	} else
		tree_block_release(root, right_buffer);
	BUG_ON(path->slots[0] < 0);
694 695 696
	return ret;
}

C
Chris Mason 已提交
697 698 699 700
/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
701 702 703 704 705
int insert_item(struct ctree_root *root, struct key *key,
			  void *data, int data_size)
{
	int ret;
	int slot;
706
	int slot_orig;
707
	struct leaf *leaf;
708
	struct tree_buffer *leaf_buf;
709 710 711 712
	unsigned int nritems;
	unsigned int data_end;
	struct ctree_path path;

C
Chris Mason 已提交
713
	/* create a root if there isn't one */
C
Chris Mason 已提交
714
	if (!root->node)
C
Chris Mason 已提交
715
		BUG();
716
	init_path(&path);
C
Chris Mason 已提交
717
	ret = search_slot(root, key, &path, data_size);
718 719
	if (ret == 0) {
		release_path(root, &path);
720
		return -EEXIST;
721
	}
722

723 724 725
	slot_orig = path.slots[0];
	leaf_buf = path.nodes[0];
	leaf = &leaf_buf->leaf;
C
Chris Mason 已提交
726

727 728
	nritems = leaf->header.nritems;
	data_end = leaf_data_end(leaf);
729

730 731 732 733
	if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
		BUG();

	slot = path.slots[0];
734
	BUG_ON(slot < 0);
735
	if (slot == 0)
736
		fixup_low_keys(root, &path, key, 1);
737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757
	if (slot != nritems) {
		int i;
		unsigned int old_data = leaf->items[slot].offset +
					leaf->items[slot].size;

		/*
		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
		 */
		/* first correct the data pointers */
		for (i = slot; i < nritems; i++)
			leaf->items[i].offset -= data_size;

		/* shift the items */
		memmove(leaf->items + slot + 1, leaf->items + slot,
		        (nritems - slot) * sizeof(struct item));

		/* shift the data */
		memmove(leaf->data + data_end - data_size, leaf->data +
		        data_end, old_data - data_end);
		data_end = old_data;
	}
C
Chris Mason 已提交
758
	/* copy the new data in */
759 760 761 762 763
	memcpy(&leaf->items[slot].key, key, sizeof(struct key));
	leaf->items[slot].offset = data_end - data_size;
	leaf->items[slot].size = data_size;
	memcpy(leaf->data + data_end - data_size, data, data_size);
	leaf->header.nritems += 1;
764
	write_tree_block(root, leaf_buf);
765 766
	if (leaf_free_space(leaf) < 0)
		BUG();
767
	release_path(root, &path);
768 769 770
	return 0;
}

C
Chris Mason 已提交
771
/*
C
Chris Mason 已提交
772
 * delete the pointer from a given node.
C
Chris Mason 已提交
773 774 775 776 777
 *
 * If the delete empties a node, the node is removed from the tree,
 * continuing all the way the root if required.  The root is converted into
 * a leaf if all the nodes are emptied.
 */
778 779 780
int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
781
	struct tree_buffer *t;
782 783
	struct node *node;
	int nritems;
784
	u64 blocknr;
785 786

	while(1) {
787 788
		t = path->nodes[level];
		if (!t)
789
			break;
790
		node = &t->node;
791 792 793 794 795 796 797 798 799 800 801
		slot = path->slots[level];
		nritems = node->header.nritems;

		if (slot != nritems -1) {
			memmove(node->keys + slot, node->keys + slot + 1,
				sizeof(struct key) * (nritems - slot - 1));
			memmove(node->blockptrs + slot,
				node->blockptrs + slot + 1,
				sizeof(u64) * (nritems - slot - 1));
		}
		node->header.nritems--;
802
		write_tree_block(root, t);
803
		blocknr = t->blocknr;
804 805
		if (node->header.nritems != 0) {
			if (slot == 0)
806 807
				fixup_low_keys(root, path, node->keys,
					       level + 1);
C
Chris Mason 已提交
808
			break;
809
		}
810 811 812 813
		if (t == root->node) {
			/* just turn the root into a leaf and break */
			root->node->node.header.flags = node_level(0);
			write_tree_block(root, t);
814 815 816
			break;
		}
		level++;
817
		free_extent(root, blocknr, 1);
818 819 820 821 822 823
		if (!path->nodes[level])
			BUG();
	}
	return 0;
}

C
Chris Mason 已提交
824 825 826 827
/*
 * delete the item at the leaf level in path.  If that empties
 * the leaf, remove it from the tree
 */
828
int del_item(struct ctree_root *root, struct ctree_path *path)
829 830 831
{
	int slot;
	struct leaf *leaf;
832
	struct tree_buffer *leaf_buf;
833 834 835
	int doff;
	int dsize;

836 837
	leaf_buf = path->nodes[0];
	leaf = &leaf_buf->leaf;
838
	slot = path->slots[0];
839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
	doff = leaf->items[slot].offset;
	dsize = leaf->items[slot].size;

	if (slot != leaf->header.nritems - 1) {
		int i;
		int data_end = leaf_data_end(leaf);
		memmove(leaf->data + data_end + dsize,
			leaf->data + data_end,
			doff - data_end);
		for (i = slot + 1; i < leaf->header.nritems; i++)
			leaf->items[i].offset += dsize;
		memmove(leaf->items + slot, leaf->items + slot + 1,
			sizeof(struct item) *
			(leaf->header.nritems - slot - 1));
	}
	leaf->header.nritems -= 1;
C
Chris Mason 已提交
855
	/* delete the leaf if we've emptied it */
856
	if (leaf->header.nritems == 0) {
857 858 859
		if (leaf_buf == root->node) {
			leaf->header.flags = node_level(0);
			write_tree_block(root, leaf_buf);
860
		} else {
861
			del_ptr(root, path, 1);
862 863
			free_extent(root, leaf_buf->blocknr, 1);
		}
864
	} else {
C
Chris Mason 已提交
865
		int used = leaf_space_used(leaf, 0, leaf->header.nritems);
866
		if (slot == 0)
867 868
			fixup_low_keys(root, path, &leaf->items[0].key, 1);
		write_tree_block(root, leaf_buf);
C
Chris Mason 已提交
869
		/* delete the leaf if it is mostly empty */
C
Chris Mason 已提交
870
		if (used < LEAF_DATA_SIZE / 3) {
871 872 873 874
			/* push_leaf_left fixes the path.
			 * make sure the path still points to our leaf
			 * for possible call to del_ptr below
			 */
875
			slot = path->slots[1];
876
			leaf_buf->count++;
877
			push_leaf_left(root, path, 1);
878
			if (leaf->header.nritems == 0) {
C
Chris Mason 已提交
879
				u64 blocknr = leaf_buf->blocknr;
880 881
				path->slots[1] = slot;
				del_ptr(root, path, 1);
C
Chris Mason 已提交
882 883 884 885
				tree_block_release(root, leaf_buf);
				free_extent(root, blocknr, 1);
			} else {
				tree_block_release(root, leaf_buf);
886 887 888 889 890 891
			}
		}
	}
	return 0;
}

892 893 894 895 896 897
int next_leaf(struct ctree_root *root, struct ctree_path *path)
{
	int slot;
	int level = 1;
	u64 blocknr;
	struct tree_buffer *c;
C
Chris Mason 已提交
898
	struct tree_buffer *next = NULL;
899 900 901 902 903 904 905 906 907 908 909

	while(level < MAX_LEVEL) {
		if (!path->nodes[level])
			return -1;
		slot = path->slots[level] + 1;
		c = path->nodes[level];
		if (slot >= c->node.header.nritems) {
			level++;
			continue;
		}
		blocknr = c->node.blockptrs[slot];
C
Chris Mason 已提交
910 911
		if (next)
			tree_block_release(root, next);
912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928
		next = read_tree_block(root, blocknr);
		break;
	}
	path->slots[level] = slot;
	while(1) {
		level--;
		c = path->nodes[level];
		tree_block_release(root, c);
		path->nodes[level] = next;
		path->slots[level] = 0;
		if (!level)
			break;
		next = read_tree_block(root, next->node.blockptrs[0]);
	}
	return 0;
}

929 930
/* for testing only */
int next_key(int i, int max_key) {
C
Chris Mason 已提交
931 932
	return rand() % max_key;
	// return i;
933 934 935
}

int main() {
936
	struct ctree_root *root;
937
	struct key ins;
938
	struct key last = { (u64)-1, 0, 0};
939 940 941 942
	char *buf;
	int i;
	int num;
	int ret;
C
Chris Mason 已提交
943 944
	int run_size = 20000000;
	int max_key =  100000000;
945 946
	int tree_size = 0;
	struct ctree_path path;
C
Chris Mason 已提交
947
	struct ctree_super_block super;
948

949 950 951
	radix_tree_init();


C
Chris Mason 已提交
952
	root = open_ctree("dbfile", &super);
953 954 955 956 957 958 959

	srand(55);
	for (i = 0; i < run_size; i++) {
		buf = malloc(64);
		num = next_key(i, max_key);
		// num = i;
		sprintf(buf, "string-%d", num);
C
Chris Mason 已提交
960 961
		if (i % 10000 == 0)
			printf("insert %d:%d\n", num, i);
962 963 964
		ins.objectid = num;
		ins.offset = 0;
		ins.flags = 0;
965
		ret = insert_item(root, &ins, buf, strlen(buf));
966 967
		if (!ret)
			tree_size++;
C
Chris Mason 已提交
968
		free(buf);
969
	}
C
Chris Mason 已提交
970
	write_ctree_super(root, &super);
971
	close_ctree(root);
C
Chris Mason 已提交
972 973

	root = open_ctree("dbfile", &super);
974
	printf("starting search\n");
975 976 977 978 979
	srand(55);
	for (i = 0; i < run_size; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
980 981
		if (i % 10000 == 0)
			printf("search %d:%d\n", num, i);
C
Chris Mason 已提交
982
		ret = search_slot(root, &ins, &path, 0);
983
		if (ret) {
984
			print_tree(root, root->node);
985 986 987
			printf("unable to find %d\n", num);
			exit(1);
		}
988 989
		release_path(root, &path);
	}
C
Chris Mason 已提交
990
	write_ctree_super(root, &super);
991
	close_ctree(root);
C
Chris Mason 已提交
992
	root = open_ctree("dbfile", &super);
993 994 995 996 997
	printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
	        node_level(root->node->node.header.flags),
		root->node->node.header.nritems,
		NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
	printf("all searches good, deleting some items\n");
998 999
	i = 0;
	srand(55);
1000 1001 1002 1003
	for (i = 0 ; i < run_size/4; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1004 1005 1006 1007 1008 1009 1010 1011 1012
		ret = search_slot(root, &ins, &path, -1);
		if (!ret) {
			if (i % 10000 == 0)
				printf("del %d:%d\n", num, i);
			ret = del_item(root, &path);
			if (ret != 0)
				BUG();
			tree_size--;
		}
1013
		release_path(root, &path);
1014
	}
C
Chris Mason 已提交
1015 1016 1017
	write_ctree_super(root, &super);
	close_ctree(root);
	root = open_ctree("dbfile", &super);
1018
	srand(128);
1019
	for (i = 0; i < run_size; i++) {
1020
		buf = malloc(64);
1021
		num = next_key(i, max_key);
1022
		sprintf(buf, "string-%d", num);
1023
		ins.objectid = num;
C
Chris Mason 已提交
1024 1025
		if (i % 10000 == 0)
			printf("insert %d:%d\n", num, i);
1026
		ret = insert_item(root, &ins, buf, strlen(buf));
1027 1028
		if (!ret)
			tree_size++;
C
Chris Mason 已提交
1029
		free(buf);
1030
	}
C
Chris Mason 已提交
1031
	write_ctree_super(root, &super);
1032
	close_ctree(root);
C
Chris Mason 已提交
1033
	root = open_ctree("dbfile", &super);
1034
	srand(128);
1035
	printf("starting search2\n");
1036 1037 1038 1039
	for (i = 0; i < run_size; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1040 1041
		if (i % 10000 == 0)
			printf("search %d:%d\n", num, i);
C
Chris Mason 已提交
1042
		ret = search_slot(root, &ins, &path, 0);
1043 1044 1045 1046 1047 1048 1049 1050 1051
		if (ret) {
			print_tree(root, root->node);
			printf("unable to find %d\n", num);
			exit(1);
		}
		release_path(root, &path);
	}
	printf("starting big long delete run\n");
	while(root->node && root->node->node.header.nritems > 0) {
1052 1053 1054 1055
		struct leaf *leaf;
		int slot;
		ins.objectid = (u64)-1;
		init_path(&path);
C
Chris Mason 已提交
1056
		ret = search_slot(root, &ins, &path, -1);
1057 1058 1059
		if (ret == 0)
			BUG();

1060
		leaf = &path.nodes[0]->leaf;
1061 1062 1063 1064 1065 1066
		slot = path.slots[0];
		if (slot != leaf->header.nritems)
			BUG();
		while(path.slots[0] > 0) {
			path.slots[0] -= 1;
			slot = path.slots[0];
1067
			leaf = &path.nodes[0]->leaf;
1068 1069 1070 1071

			if (comp_keys(&last, &leaf->items[slot].key) <= 0)
				BUG();
			memcpy(&last, &leaf->items[slot].key, sizeof(last));
C
Chris Mason 已提交
1072 1073
			if (tree_size % 10000 == 0)
				printf("big del %d:%d\n", tree_size, i);
1074 1075 1076
			ret = del_item(root, &path);
			if (ret != 0) {
				printf("del_item returned %d\n", ret);
1077
				BUG();
1078
			}
1079 1080
			tree_size--;
		}
1081
		release_path(root, &path);
1082
	}
1083
	printf("tree size is now %d\n", tree_size);
1084
	printf("map tree\n");
C
Chris Mason 已提交
1085 1086
	write_ctree_super(root, &super);
	close_ctree(root);
1087 1088
	return 0;
}