ctree.c 31.8 KB
Newer Older
1 2 3
#include <stdio.h>
#include <stdlib.h>
#include "kerncompat.h"
4 5 6
#include "radix-tree.h"
#include "ctree.h"
#include "disk-io.h"
C
Chris Mason 已提交
7
#include "print-tree.h"
8

C
Chris Mason 已提交
9 10
int split_node(struct ctree_root *root, struct ctree_path *path, int level);
int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
C
Chris Mason 已提交
11 12 13 14
int push_node_left(struct ctree_root *root, struct ctree_path *path, int level);
int push_node_right(struct ctree_root *root,
		    struct ctree_path *path, int level);
int del_ptr(struct ctree_root *root, struct ctree_path *path, int level);
15

C
Chris Mason 已提交
16
inline void init_path(struct ctree_path *p)
17 18 19 20
{
	memset(p, 0, sizeof(*p));
}

C
Chris Mason 已提交
21
void release_path(struct ctree_root *root, struct ctree_path *p)
22 23 24 25 26 27 28 29 30
{
	int i;
	for (i = 0; i < MAX_LEVEL; i++) {
		if (!p->nodes[i])
			break;
		tree_block_release(root, p->nodes[i]);
	}
}

C
Chris Mason 已提交
31 32 33 34 35
/*
 * The leaf data grows from end-to-front in the node.
 * this returns the address of the start of the last item,
 * which is the stop of the leaf data stack
 */
36 37 38 39
static inline unsigned int leaf_data_end(struct leaf *leaf)
{
	unsigned int nr = leaf->header.nritems;
	if (nr == 0)
40
		return sizeof(leaf->data);
41 42 43
	return leaf->items[nr-1].offset;
}

C
Chris Mason 已提交
44 45 46 47 48
/*
 * The space between the end of the leaf items and
 * the start of the leaf data.  IOW, how much room
 * the leaf has left for both items and data
 */
C
Chris Mason 已提交
49
int leaf_free_space(struct leaf *leaf)
50 51 52 53 54 55 56
{
	int data_end = leaf_data_end(leaf);
	int nritems = leaf->header.nritems;
	char *items_end = (char *)(leaf->items + nritems + 1);
	return (char *)(leaf->data + data_end) - (char *)items_end;
}

C
Chris Mason 已提交
57 58 59
/*
 * compare two keys in a memcmp fashion
 */
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
int comp_keys(struct key *k1, struct key *k2)
{
	if (k1->objectid > k2->objectid)
		return 1;
	if (k1->objectid < k2->objectid)
		return -1;
	if (k1->flags > k2->flags)
		return 1;
	if (k1->flags < k2->flags)
		return -1;
	if (k1->offset > k2->offset)
		return 1;
	if (k1->offset < k2->offset)
		return -1;
	return 0;
}
C
Chris Mason 已提交
76 77 78 79 80 81 82 83 84 85

/*
 * search for key in the array p.  items p are item_size apart
 * and there are 'max' items in p
 * the slot in the array is returned via slot, and it points to
 * the place where you would insert key if it is not found in
 * the array.
 *
 * slot may point to max if the key is bigger than all of the keys
 */
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
int generic_bin_search(char *p, int item_size, struct key *key,
		       int max, int *slot)
{
	int low = 0;
	int high = max;
	int mid;
	int ret;
	struct key *tmp;

	while(low < high) {
		mid = (low + high) / 2;
		tmp = (struct key *)(p + mid * item_size);
		ret = comp_keys(tmp, key);

		if (ret < 0)
			low = mid + 1;
		else if (ret > 0)
			high = mid;
		else {
			*slot = mid;
			return 0;
		}
	}
	*slot = low;
	return 1;
}

C
Chris Mason 已提交
113 114 115 116
/*
 * simple bin_search frontend that does the right thing for
 * leaves vs nodes
 */
117 118 119 120 121 122 123 124 125 126 127 128 129
int bin_search(struct node *c, struct key *key, int *slot)
{
	if (is_leaf(c->header.flags)) {
		struct leaf *l = (struct leaf *)c;
		return generic_bin_search((void *)l->items, sizeof(struct item),
					  key, c->header.nritems, slot);
	} else {
		return generic_bin_search((void *)c->keys, sizeof(struct key),
					  key, c->header.nritems, slot);
	}
	return -1;
}

C
Chris Mason 已提交
130 131 132 133 134 135 136
/*
 * look for key in the tree.  path is filled in with nodes along the way
 * if key is found, we return zero and you can find the item in the leaf
 * level of the path (level 0)
 *
 * If the key isn't found, the path points to the slot where it should
 * be inserted.
C
Chris Mason 已提交
137 138 139 140
 *
 * if ins_len > 0, nodes and leaves will be split as we walk down the
 * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
 * possible)
C
Chris Mason 已提交
141
 */
C
Chris Mason 已提交
142 143
int search_slot(struct ctree_root *root, struct key *key,
		struct ctree_path *p, int ins_len)
144
{
145 146
	struct tree_buffer *b = root->node;
	struct node *c;
147 148 149
	int slot;
	int ret;
	int level;
C
Chris Mason 已提交
150

151 152 153
	b->count++;
	while (b) {
		c = &b->node;
154
		level = node_level(c->header.flags);
155
		p->nodes[level] = b;
156 157 158 159 160
		ret = bin_search(c, key, &slot);
		if (!is_leaf(c->header.flags)) {
			if (ret && slot > 0)
				slot -= 1;
			p->slots[level] = slot;
C
Chris Mason 已提交
161 162
			if (ins_len > 0 &&
			    c->header.nritems == NODEPTRS_PER_BLOCK) {
C
Chris Mason 已提交
163 164 165 166 167 168 169
				int sret = split_node(root, p, level);
				BUG_ON(sret > 0);
				if (sret)
					return sret;
				b = p->nodes[level];
				c = &b->node;
				slot = p->slots[level];
C
Chris Mason 已提交
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
			} else if (ins_len < 0 &&
				   c->header.nritems <= NODEPTRS_PER_BLOCK/4) {
				u64 blocknr = b->blocknr;
				slot = p->slots[level +1];
				b->count++;
				if (push_node_left(root, p, level))
					push_node_right(root, p, level);
				if (c->header.nritems == 0 &&
				    level < MAX_LEVEL - 1 &&
				    p->nodes[level + 1]) {
					int tslot = p->slots[level + 1];

					p->slots[level + 1] = slot;
					del_ptr(root, p, level + 1);
					p->slots[level + 1] = tslot;
					tree_block_release(root, b);
					free_extent(root, blocknr, 1);
				} else {
					tree_block_release(root, b);
				}
				b = p->nodes[level];
				c = &b->node;
				slot = p->slots[level];
C
Chris Mason 已提交
193
			}
194
			b = read_tree_block(root, c->blockptrs[slot]);
195 196
			continue;
		} else {
C
Chris Mason 已提交
197
			struct leaf *l = (struct leaf *)c;
198
			p->slots[level] = slot;
C
Chris Mason 已提交
199 200
			if (ins_len > 0 && leaf_free_space(l) <
			    sizeof(struct item) + ins_len) {
C
Chris Mason 已提交
201 202 203 204 205
				int sret = split_leaf(root, p, ins_len);
				BUG_ON(sret > 0);
				if (sret)
					return sret;
			}
206 207 208 209 210 211
			return ret;
		}
	}
	return -1;
}

C
Chris Mason 已提交
212 213 214 215 216 217 218
/*
 * adjust the pointers going up the tree, starting at level
 * making sure the right key of each node is points to 'key'.
 * This is used after shifting pointers to the left, so it stops
 * fixing up pointers when a given leaf/node is not in slot 0 of the
 * higher levels
 */
219 220 221
static void fixup_low_keys(struct ctree_root *root,
			   struct ctree_path *path, struct key *key,
			   int level)
222 223 224
{
	int i;
	for (i = level; i < MAX_LEVEL; i++) {
225
		struct node *t;
226
		int tslot = path->slots[i];
227
		if (!path->nodes[i])
228
			break;
229
		t = &path->nodes[i]->node;
230
		memcpy(t->keys + tslot, key, sizeof(*key));
231
		write_tree_block(root, path->nodes[i]);
232 233 234 235 236
		if (tslot != 0)
			break;
	}
}

C
Chris Mason 已提交
237 238 239 240 241 242 243 244 245 246
/*
 * try to push data from one node into the next node left in the
 * tree.  The src node is found at specified level in the path.
 * If some bytes were pushed, return 0, otherwise return 1.
 *
 * Lower nodes/leaves in the path are not touched, higher nodes may
 * be modified to reflect the push.
 *
 * The path is altered to reflect the push.
 */
247 248 249 250 251 252 253 254
int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
	struct node *left;
	struct node *right;
	int push_items = 0;
	int left_nritems;
	int right_nritems;
255 256
	struct tree_buffer *t;
	struct tree_buffer *right_buf;
257 258 259 260 261 262 263

	if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
		return 1;
	slot = path->slots[level + 1];
	if (slot == 0)
		return 1;

264 265 266 267 268
	t = read_tree_block(root,
		            path->nodes[level + 1]->node.blockptrs[slot - 1]);
	left = &t->node;
	right_buf = path->nodes[level];
	right = &right_buf->node;
269 270 271
	left_nritems = left->header.nritems;
	right_nritems = right->header.nritems;
	push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
272 273
	if (push_items <= 0) {
		tree_block_release(root, t);
274
		return 1;
275
	}
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290

	if (right_nritems < push_items)
		push_items = right_nritems;
	memcpy(left->keys + left_nritems, right->keys,
		push_items * sizeof(struct key));
	memcpy(left->blockptrs + left_nritems, right->blockptrs,
		push_items * sizeof(u64));
	memmove(right->keys, right->keys + push_items,
		(right_nritems - push_items) * sizeof(struct key));
	memmove(right->blockptrs, right->blockptrs + push_items,
		(right_nritems - push_items) * sizeof(u64));
	right->header.nritems -= push_items;
	left->header.nritems += push_items;

	/* adjust the pointers going up the tree */
291 292 293 294
	fixup_low_keys(root, path, right->keys, level + 1);

	write_tree_block(root, t);
	write_tree_block(root, right_buf);
295 296 297 298

	/* then fixup the leaf pointer in the path */
	if (path->slots[level] < push_items) {
		path->slots[level] += left_nritems;
299 300
		tree_block_release(root, path->nodes[level]);
		path->nodes[level] = t;
301 302 303
		path->slots[level + 1] -= 1;
	} else {
		path->slots[level] -= push_items;
304
		tree_block_release(root, t);
305 306 307 308
	}
	return 0;
}

C
Chris Mason 已提交
309 310 311 312 313 314 315 316 317 318
/*
 * try to push data from one node into the next node right in the
 * tree.  The src node is found at specified level in the path.
 * If some bytes were pushed, return 0, otherwise return 1.
 *
 * Lower nodes/leaves in the path are not touched, higher nodes may
 * be modified to reflect the push.
 *
 * The path is altered to reflect the push.
 */
319 320 321
int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
322 323
	struct tree_buffer *t;
	struct tree_buffer *src_buffer;
324 325 326 327 328 329
	struct node *dst;
	struct node *src;
	int push_items = 0;
	int dst_nritems;
	int src_nritems;

C
Chris Mason 已提交
330
	/* can't push from the root */
331 332
	if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
		return 1;
C
Chris Mason 已提交
333 334

	/* only try to push inside the node higher up */
335 336 337 338
	slot = path->slots[level + 1];
	if (slot == NODEPTRS_PER_BLOCK - 1)
		return 1;

339
	if (slot >= path->nodes[level + 1]->node.header.nritems -1)
340 341
		return 1;

342 343 344 345 346
	t = read_tree_block(root,
			    path->nodes[level + 1]->node.blockptrs[slot + 1]);
	dst = &t->node;
	src_buffer = path->nodes[level];
	src = &src_buffer->node;
347 348 349
	dst_nritems = dst->header.nritems;
	src_nritems = src->header.nritems;
	push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
350 351
	if (push_items <= 0) {
		tree_block_release(root, t);
352
		return 1;
353
	}
354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370

	if (src_nritems < push_items)
		push_items = src_nritems;
	memmove(dst->keys + push_items, dst->keys,
		dst_nritems * sizeof(struct key));
	memcpy(dst->keys, src->keys + src_nritems - push_items,
		push_items * sizeof(struct key));

	memmove(dst->blockptrs + push_items, dst->blockptrs,
		dst_nritems * sizeof(u64));
	memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
		push_items * sizeof(u64));

	src->header.nritems -= push_items;
	dst->header.nritems += push_items;

	/* adjust the pointers going up the tree */
371
	memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
372
		dst->keys, sizeof(struct key));
373 374 375 376 377

	write_tree_block(root, path->nodes[level + 1]);
	write_tree_block(root, t);
	write_tree_block(root, src_buffer);

C
Chris Mason 已提交
378
	/* then fixup the pointers in the path */
379 380
	if (path->slots[level] >= src->header.nritems) {
		path->slots[level] -= src->header.nritems;
381 382
		tree_block_release(root, path->nodes[level]);
		path->nodes[level] = t;
383
		path->slots[level + 1] += 1;
384 385
	} else {
		tree_block_release(root, t);
386 387 388 389
	}
	return 0;
}

C
Chris Mason 已提交
390 391 392 393 394
/*
 * helper function to insert a new root level in the tree.
 * A new node is allocated, and a single item is inserted to
 * point to the existing root
 */
C
Chris Mason 已提交
395 396
static int insert_new_root(struct ctree_root *root,
			   struct ctree_path *path, int level)
C
Chris Mason 已提交
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
{
	struct tree_buffer *t;
	struct node *lower;
	struct node *c;
	struct key *lower_key;

	BUG_ON(path->nodes[level]);
	BUG_ON(path->nodes[level-1] != root->node);

	t = alloc_free_block(root);
	c = &t->node;
	memset(c, 0, sizeof(c));
	c->header.nritems = 1;
	c->header.flags = node_level(level);
	c->header.blocknr = t->blocknr;
	c->header.parentid = root->node->node.header.parentid;
	lower = &path->nodes[level-1]->node;
	if (is_leaf(lower->header.flags))
		lower_key = &((struct leaf *)lower)->items[0].key;
	else
		lower_key = lower->keys;
	memcpy(c->keys, lower_key, sizeof(struct key));
	c->blockptrs[0] = path->nodes[level-1]->blocknr;
	/* the super has an extra ref to root->node */
	tree_block_release(root, root->node);
	root->node = t;
	t->count++;
	write_tree_block(root, t);
	path->nodes[level] = t;
	path->slots[level] = 0;
	return 0;
}

C
Chris Mason 已提交
430 431 432
/*
 * worker function to insert a single pointer in a node.
 * the node should have enough room for the pointer already
C
Chris Mason 已提交
433
 *
C
Chris Mason 已提交
434 435 436
 * slot and level indicate where you want the key to go, and
 * blocknr is the block the key points to.
 */
C
Chris Mason 已提交
437
int insert_ptr(struct ctree_root *root,
C
Chris Mason 已提交
438 439 440 441 442
		struct ctree_path *path, struct key *key,
		u64 blocknr, int slot, int level)
{
	struct node *lower;
	int nritems;
C
Chris Mason 已提交
443 444

	BUG_ON(!path->nodes[level]);
C
Chris Mason 已提交
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
	lower = &path->nodes[level]->node;
	nritems = lower->header.nritems;
	if (slot > nritems)
		BUG();
	if (nritems == NODEPTRS_PER_BLOCK)
		BUG();
	if (slot != nritems) {
		memmove(lower->keys + slot + 1, lower->keys + slot,
			(nritems - slot) * sizeof(struct key));
		memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
			(nritems - slot) * sizeof(u64));
	}
	memcpy(lower->keys + slot, key, sizeof(struct key));
	lower->blockptrs[slot] = blocknr;
	lower->header.nritems++;
	if (lower->keys[1].objectid == 0)
			BUG();
	write_tree_block(root, path->nodes[level]);
	return 0;
}

C
Chris Mason 已提交
466 467 468 469 470 471 472
/*
 * split the node at the specified level in path in two.
 * The path is corrected to point to the appropriate node after the split
 *
 * Before splitting this tries to make some room in the node by pushing
 * left and right, if either one works, it returns right away.
 */
C
Chris Mason 已提交
473
int split_node(struct ctree_root *root, struct ctree_path *path, int level)
474
{
C
Chris Mason 已提交
475 476 477 478
	struct tree_buffer *t;
	struct node *c;
	struct tree_buffer *split_buffer;
	struct node *split;
479
	int mid;
C
Chris Mason 已提交
480
	int ret;
481

C
Chris Mason 已提交
482 483 484 485 486 487 488 489 490 491 492 493 494
	ret = push_node_left(root, path, level);
	if (!ret)
		return 0;
	ret = push_node_right(root, path, level);
	if (!ret)
		return 0;
	t = path->nodes[level];
	c = &t->node;
	if (t == root->node) {
		/* trying to split the root, lets make a new one */
		ret = insert_new_root(root, path, level + 1);
		if (ret)
			return ret;
495
	}
C
Chris Mason 已提交
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
	split_buffer = alloc_free_block(root);
	split = &split_buffer->node;
	split->header.flags = c->header.flags;
	split->header.blocknr = split_buffer->blocknr;
	split->header.parentid = root->node->node.header.parentid;
	mid = (c->header.nritems + 1) / 2;
	memcpy(split->keys, c->keys + mid,
		(c->header.nritems - mid) * sizeof(struct key));
	memcpy(split->blockptrs, c->blockptrs + mid,
		(c->header.nritems - mid) * sizeof(u64));
	split->header.nritems = c->header.nritems - mid;
	c->header.nritems = mid;
	write_tree_block(root, t);
	write_tree_block(root, split_buffer);
	insert_ptr(root, path, split->keys, split_buffer->blocknr,
		     path->slots[level + 1] + 1, level + 1);
C
Chris Mason 已提交
512
	if (path->slots[level] >= mid) {
C
Chris Mason 已提交
513 514 515 516 517 518
		path->slots[level] -= mid;
		tree_block_release(root, t);
		path->nodes[level] = split_buffer;
		path->slots[level + 1] += 1;
	} else {
		tree_block_release(root, split_buffer);
519
	}
C
Chris Mason 已提交
520
	return 0;
521 522
}

C
Chris Mason 已提交
523 524 525 526 527
/*
 * how many bytes are required to store the items in a leaf.  start
 * and nr indicate which items in the leaf to check.  This totals up the
 * space used both by the item structs and the item data
 */
528 529 530 531 532 533 534 535 536 537 538 539 540
int leaf_space_used(struct leaf *l, int start, int nr)
{
	int data_len;
	int end = start + nr - 1;

	if (!nr)
		return 0;
	data_len = l->items[start].offset + l->items[start].size;
	data_len = data_len - l->items[end].offset;
	data_len += sizeof(struct item) * nr;
	return data_len;
}

C
Chris Mason 已提交
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
/*
 * push some data in the path leaf to the right, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
int push_leaf_right(struct ctree_root *root, struct ctree_path *path,
		   int data_size)
{
	struct tree_buffer *left_buf = path->nodes[0];
	struct leaf *left = &left_buf->leaf;
	struct leaf *right;
	struct tree_buffer *right_buf;
	struct tree_buffer *upper;
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
	struct item *item;

	slot = path->slots[1];
	if (!path->nodes[1]) {
		return 1;
	}
	upper = path->nodes[1];
	if (slot >= upper->node.header.nritems - 1) {
		return 1;
	}
	right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]);
	right = &right_buf->leaf;
	free_space = leaf_free_space(right);
	if (free_space < data_size + sizeof(struct item)) {
		tree_block_release(root, right_buf);
		return 1;
	}
	for (i = left->header.nritems - 1; i >= 0; i--) {
		item = left->items + i;
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
		if (item->size + sizeof(*item) + push_space > free_space)
			break;
		push_items++;
		push_space += item->size + sizeof(*item);
	}
	if (push_items == 0) {
		tree_block_release(root, right_buf);
		return 1;
	}
	/* push left to right */
	push_space = left->items[left->header.nritems - push_items].offset +
		     left->items[left->header.nritems - push_items].size;
	push_space -= leaf_data_end(left);
	/* make room in the right data area */
	memmove(right->data + leaf_data_end(right) - push_space,
		right->data + leaf_data_end(right),
		LEAF_DATA_SIZE - leaf_data_end(right));
	/* copy from the left data area */
	memcpy(right->data + LEAF_DATA_SIZE - push_space,
		left->data + leaf_data_end(left),
		push_space);
	memmove(right->items + push_items, right->items,
		right->header.nritems * sizeof(struct item));
	/* copy the items from left to right */
	memcpy(right->items, left->items + left->header.nritems - push_items,
		push_items * sizeof(struct item));

	/* update the item pointers */
	right->header.nritems += push_items;
	push_space = LEAF_DATA_SIZE;
	for (i = 0; i < right->header.nritems; i++) {
		right->items[i].offset = push_space - right->items[i].size;
		push_space = right->items[i].offset;
	}
	left->header.nritems -= push_items;

	write_tree_block(root, left_buf);
	write_tree_block(root, right_buf);
	memcpy(upper->node.keys + slot + 1,
		&right->items[0].key, sizeof(struct key));
	write_tree_block(root, upper);
	/* then fixup the leaf pointer in the path */
	// FIXME use nritems in here somehow
	if (path->slots[0] >= left->header.nritems) {
		path->slots[0] -= left->header.nritems;
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = right_buf;
		path->slots[1] += 1;
	} else {
		tree_block_release(root, right_buf);
	}
	return 0;
}
C
Chris Mason 已提交
632 633 634 635
/*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
636 637 638
int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
		   int data_size)
{
639 640 641
	struct tree_buffer *right_buf = path->nodes[0];
	struct leaf *right = &right_buf->leaf;
	struct tree_buffer *t;
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
	struct leaf *left;
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
	struct item *item;
	int old_left_nritems;

	slot = path->slots[1];
	if (slot == 0) {
		return 1;
	}
	if (!path->nodes[1]) {
		return 1;
	}
658 659
	t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
	left = &t->leaf;
660 661
	free_space = leaf_free_space(left);
	if (free_space < data_size + sizeof(struct item)) {
662
		tree_block_release(root, t);
663 664 665 666 667 668 669 670 671 672 673 674
		return 1;
	}
	for (i = 0; i < right->header.nritems; i++) {
		item = right->items + i;
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
		if (item->size + sizeof(*item) + push_space > free_space)
			break;
		push_items++;
		push_space += item->size + sizeof(*item);
	}
	if (push_items == 0) {
675
		tree_block_release(root, t);
676 677 678 679 680 681 682 683 684 685
		return 1;
	}
	/* push data from right to left */
	memcpy(left->items + left->header.nritems,
		right->items, push_items * sizeof(struct item));
	push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
	memcpy(left->data + leaf_data_end(left) - push_space,
		right->data + right->items[push_items - 1].offset,
		push_space);
	old_left_nritems = left->header.nritems;
686 687
	BUG_ON(old_left_nritems < 0);

688 689 690 691 692 693 694 695 696 697 698 699 700 701
	for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
		left->items[i].offset -= LEAF_DATA_SIZE -
			left->items[old_left_nritems -1].offset;
	}
	left->header.nritems += push_items;

	/* fixup right node */
	push_space = right->items[push_items-1].offset - leaf_data_end(right);
	memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
		leaf_data_end(right), push_space);
	memmove(right->items, right->items + push_items,
		(right->header.nritems - push_items) * sizeof(struct item));
	right->header.nritems -= push_items;
	push_space = LEAF_DATA_SIZE;
702

703 704 705 706
	for (i = 0; i < right->header.nritems; i++) {
		right->items[i].offset = push_space - right->items[i].size;
		push_space = right->items[i].offset;
	}
707 708 709 710 711

	write_tree_block(root, t);
	write_tree_block(root, right_buf);

	fixup_low_keys(root, path, &right->items[0].key, 1);
712 713 714 715

	/* then fixup the leaf pointer in the path */
	if (path->slots[0] < push_items) {
		path->slots[0] += old_left_nritems;
716 717
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = t;
718 719
		path->slots[1] -= 1;
	} else {
720
		tree_block_release(root, t);
721 722
		path->slots[0] -= push_items;
	}
723
	BUG_ON(path->slots[0] < 0);
724 725 726
	return 0;
}

C
Chris Mason 已提交
727 728 729 730
/*
 * split the path's leaf in two, making sure there is at least data_size
 * available for the resulting leaf level of the path.
 */
731 732
int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
{
733 734 735 736 737
	struct tree_buffer *l_buf = path->nodes[0];
	struct leaf *l = &l_buf->leaf;
	int nritems;
	int mid;
	int slot;
738
	struct leaf *right;
739
	struct tree_buffer *right_buffer;
740 741 742 743 744 745
	int space_needed = data_size + sizeof(struct item);
	int data_copy_size;
	int rt_data_off;
	int i;
	int ret;

C
Chris Mason 已提交
746 747
	if (push_leaf_left(root, path, data_size) == 0 ||
	    push_leaf_right(root, path, data_size) == 0) {
748 749 750 751
		l_buf = path->nodes[0];
		l = &l_buf->leaf;
		if (leaf_free_space(l) >= sizeof(struct item) + data_size)
			return 0;
752
	}
C
Chris Mason 已提交
753 754 755 756 757
	if (!path->nodes[1]) {
		ret = insert_new_root(root, path, 1);
		if (ret)
			return ret;
	}
758 759 760 761 762 763 764 765
	slot = path->slots[0];
	nritems = l->header.nritems;
	mid = (nritems + 1)/ 2;

	right_buffer = alloc_free_block(root);
	BUG_ON(!right_buffer);
	BUG_ON(mid == nritems);
	right = &right_buffer->leaf;
766 767
	memset(right, 0, sizeof(*right));
	if (mid <= slot) {
C
Chris Mason 已提交
768
		/* FIXME, just alloc a new leaf here */
769 770 771 772
		if (leaf_space_used(l, mid, nritems - mid) + space_needed >
			LEAF_DATA_SIZE)
			BUG();
	} else {
C
Chris Mason 已提交
773
		/* FIXME, just alloc a new leaf here */
774 775 776 777 778
		if (leaf_space_used(l, 0, mid + 1) + space_needed >
			LEAF_DATA_SIZE)
			BUG();
	}
	right->header.nritems = nritems - mid;
779 780
	right->header.blocknr = right_buffer->blocknr;
	right->header.flags = node_level(0);
C
Chris Mason 已提交
781
	right->header.parentid = root->node->node.header.parentid;
782 783 784 785 786 787 788 789
	data_copy_size = l->items[mid].offset + l->items[mid].size -
			 leaf_data_end(l);
	memcpy(right->items, l->items + mid,
	       (nritems - mid) * sizeof(struct item));
	memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
	       l->data + leaf_data_end(l), data_copy_size);
	rt_data_off = LEAF_DATA_SIZE -
		     (l->items[mid].offset + l->items[mid].size);
C
Chris Mason 已提交
790 791

	for (i = 0; i < right->header.nritems; i++)
792
		right->items[i].offset += rt_data_off;
C
Chris Mason 已提交
793

794 795
	l->header.nritems = mid;
	ret = insert_ptr(root, path, &right->items[0].key,
C
Chris Mason 已提交
796
			  right_buffer->blocknr, path->slots[1] + 1, 1);
797 798 799 800
	write_tree_block(root, right_buffer);
	write_tree_block(root, l_buf);

	BUG_ON(path->slots[0] != slot);
801
	if (mid <= slot) {
802 803
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = right_buffer;
804 805
		path->slots[0] -= mid;
		path->slots[1] += 1;
806 807 808
	} else
		tree_block_release(root, right_buffer);
	BUG_ON(path->slots[0] < 0);
809 810 811
	return ret;
}

C
Chris Mason 已提交
812 813 814 815
/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
816 817 818 819 820
int insert_item(struct ctree_root *root, struct key *key,
			  void *data, int data_size)
{
	int ret;
	int slot;
821
	int slot_orig;
822
	struct leaf *leaf;
823
	struct tree_buffer *leaf_buf;
824 825 826 827
	unsigned int nritems;
	unsigned int data_end;
	struct ctree_path path;

C
Chris Mason 已提交
828
	/* create a root if there isn't one */
C
Chris Mason 已提交
829
	if (!root->node)
C
Chris Mason 已提交
830
		BUG();
831
	init_path(&path);
C
Chris Mason 已提交
832
	ret = search_slot(root, key, &path, data_size);
833 834
	if (ret == 0) {
		release_path(root, &path);
835
		return -EEXIST;
836
	}
837

838 839 840
	slot_orig = path.slots[0];
	leaf_buf = path.nodes[0];
	leaf = &leaf_buf->leaf;
C
Chris Mason 已提交
841

842 843
	nritems = leaf->header.nritems;
	data_end = leaf_data_end(leaf);
844

845 846 847 848
	if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
		BUG();

	slot = path.slots[0];
849
	BUG_ON(slot < 0);
850
	if (slot == 0)
851
		fixup_low_keys(root, &path, key, 1);
852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
	if (slot != nritems) {
		int i;
		unsigned int old_data = leaf->items[slot].offset +
					leaf->items[slot].size;

		/*
		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
		 */
		/* first correct the data pointers */
		for (i = slot; i < nritems; i++)
			leaf->items[i].offset -= data_size;

		/* shift the items */
		memmove(leaf->items + slot + 1, leaf->items + slot,
		        (nritems - slot) * sizeof(struct item));

		/* shift the data */
		memmove(leaf->data + data_end - data_size, leaf->data +
		        data_end, old_data - data_end);
		data_end = old_data;
	}
C
Chris Mason 已提交
873
	/* copy the new data in */
874 875 876 877 878
	memcpy(&leaf->items[slot].key, key, sizeof(struct key));
	leaf->items[slot].offset = data_end - data_size;
	leaf->items[slot].size = data_size;
	memcpy(leaf->data + data_end - data_size, data, data_size);
	leaf->header.nritems += 1;
879
	write_tree_block(root, leaf_buf);
880 881
	if (leaf_free_space(leaf) < 0)
		BUG();
882
	release_path(root, &path);
883 884 885
	return 0;
}

C
Chris Mason 已提交
886
/*
C
Chris Mason 已提交
887
 * delete the pointer from a given node.
C
Chris Mason 已提交
888 889 890 891 892
 *
 * If the delete empties a node, the node is removed from the tree,
 * continuing all the way the root if required.  The root is converted into
 * a leaf if all the nodes are emptied.
 */
893 894 895
int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
896
	struct tree_buffer *t;
897 898
	struct node *node;
	int nritems;
899
	u64 blocknr;
900 901

	while(1) {
902 903
		t = path->nodes[level];
		if (!t)
904
			break;
905
		node = &t->node;
906 907 908 909 910 911 912 913 914 915 916
		slot = path->slots[level];
		nritems = node->header.nritems;

		if (slot != nritems -1) {
			memmove(node->keys + slot, node->keys + slot + 1,
				sizeof(struct key) * (nritems - slot - 1));
			memmove(node->blockptrs + slot,
				node->blockptrs + slot + 1,
				sizeof(u64) * (nritems - slot - 1));
		}
		node->header.nritems--;
917
		write_tree_block(root, t);
918
		blocknr = t->blocknr;
919 920
		if (node->header.nritems != 0) {
			if (slot == 0)
921 922
				fixup_low_keys(root, path, node->keys,
					       level + 1);
C
Chris Mason 已提交
923
			break;
924
		}
925 926 927 928
		if (t == root->node) {
			/* just turn the root into a leaf and break */
			root->node->node.header.flags = node_level(0);
			write_tree_block(root, t);
929 930 931
			break;
		}
		level++;
932
		free_extent(root, blocknr, 1);
933 934 935 936 937 938
		if (!path->nodes[level])
			BUG();
	}
	return 0;
}

C
Chris Mason 已提交
939 940 941 942
/*
 * delete the item at the leaf level in path.  If that empties
 * the leaf, remove it from the tree
 */
943
int del_item(struct ctree_root *root, struct ctree_path *path)
944 945 946
{
	int slot;
	struct leaf *leaf;
947
	struct tree_buffer *leaf_buf;
948 949 950
	int doff;
	int dsize;

951 952
	leaf_buf = path->nodes[0];
	leaf = &leaf_buf->leaf;
953
	slot = path->slots[0];
954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969
	doff = leaf->items[slot].offset;
	dsize = leaf->items[slot].size;

	if (slot != leaf->header.nritems - 1) {
		int i;
		int data_end = leaf_data_end(leaf);
		memmove(leaf->data + data_end + dsize,
			leaf->data + data_end,
			doff - data_end);
		for (i = slot + 1; i < leaf->header.nritems; i++)
			leaf->items[i].offset += dsize;
		memmove(leaf->items + slot, leaf->items + slot + 1,
			sizeof(struct item) *
			(leaf->header.nritems - slot - 1));
	}
	leaf->header.nritems -= 1;
C
Chris Mason 已提交
970
	/* delete the leaf if we've emptied it */
971
	if (leaf->header.nritems == 0) {
972 973 974
		if (leaf_buf == root->node) {
			leaf->header.flags = node_level(0);
			write_tree_block(root, leaf_buf);
975
		} else {
976
			del_ptr(root, path, 1);
977 978
			free_extent(root, leaf_buf->blocknr, 1);
		}
979
	} else {
C
Chris Mason 已提交
980
		int used = leaf_space_used(leaf, 0, leaf->header.nritems);
981
		if (slot == 0)
982 983
			fixup_low_keys(root, path, &leaf->items[0].key, 1);
		write_tree_block(root, leaf_buf);
C
Chris Mason 已提交
984
		/* delete the leaf if it is mostly empty */
C
Chris Mason 已提交
985
		if (used < LEAF_DATA_SIZE / 3) {
986 987 988 989
			/* push_leaf_left fixes the path.
			 * make sure the path still points to our leaf
			 * for possible call to del_ptr below
			 */
990
			slot = path->slots[1];
991
			leaf_buf->count++;
992
			push_leaf_left(root, path, 1);
C
Chris Mason 已提交
993 994
			if (leaf->header.nritems)
				push_leaf_right(root, path, 1);
995
			if (leaf->header.nritems == 0) {
C
Chris Mason 已提交
996
				u64 blocknr = leaf_buf->blocknr;
997 998
				path->slots[1] = slot;
				del_ptr(root, path, 1);
C
Chris Mason 已提交
999 1000 1001 1002
				tree_block_release(root, leaf_buf);
				free_extent(root, blocknr, 1);
			} else {
				tree_block_release(root, leaf_buf);
1003 1004 1005 1006 1007 1008
			}
		}
	}
	return 0;
}

C
Chris Mason 已提交
1009 1010 1011 1012
/*
 * walk up the tree as far as required to find the next leaf.
 * returns 0 if it found something or -1 if there are no greater leaves.
 */
1013 1014 1015 1016 1017 1018
int next_leaf(struct ctree_root *root, struct ctree_path *path)
{
	int slot;
	int level = 1;
	u64 blocknr;
	struct tree_buffer *c;
C
Chris Mason 已提交
1019
	struct tree_buffer *next = NULL;
1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030

	while(level < MAX_LEVEL) {
		if (!path->nodes[level])
			return -1;
		slot = path->slots[level] + 1;
		c = path->nodes[level];
		if (slot >= c->node.header.nritems) {
			level++;
			continue;
		}
		blocknr = c->node.blockptrs[slot];
C
Chris Mason 已提交
1031 1032
		if (next)
			tree_block_release(root, next);
1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
		next = read_tree_block(root, blocknr);
		break;
	}
	path->slots[level] = slot;
	while(1) {
		level--;
		c = path->nodes[level];
		tree_block_release(root, c);
		path->nodes[level] = next;
		path->slots[level] = 0;
		if (!level)
			break;
		next = read_tree_block(root, next->node.blockptrs[0]);
	}
	return 0;
}

1050 1051
/* for testing only */
int next_key(int i, int max_key) {
C
Chris Mason 已提交
1052
	return rand() % max_key;
C
Chris Mason 已提交
1053
	//return i;
1054 1055 1056
}

int main() {
1057
	struct ctree_root *root;
1058
	struct key ins;
1059
	struct key last = { (u64)-1, 0, 0};
1060 1061 1062 1063
	char *buf;
	int i;
	int num;
	int ret;
C
Chris Mason 已提交
1064 1065
	int run_size = 20000000;
	int max_key =  100000000;
1066 1067
	int tree_size = 0;
	struct ctree_path path;
C
Chris Mason 已提交
1068
	struct ctree_super_block super;
1069

1070 1071 1072
	radix_tree_init();


C
Chris Mason 已提交
1073
	root = open_ctree("dbfile", &super);
1074 1075 1076 1077 1078 1079
	srand(55);
	for (i = 0; i < run_size; i++) {
		buf = malloc(64);
		num = next_key(i, max_key);
		// num = i;
		sprintf(buf, "string-%d", num);
C
Chris Mason 已提交
1080
		if (i % 10000 == 0)
C
Chris Mason 已提交
1081
			fprintf(stderr, "insert %d:%d\n", num, i);
1082 1083 1084
		ins.objectid = num;
		ins.offset = 0;
		ins.flags = 0;
1085
		ret = insert_item(root, &ins, buf, strlen(buf));
1086 1087
		if (!ret)
			tree_size++;
C
Chris Mason 已提交
1088
		free(buf);
1089
	}
C
Chris Mason 已提交
1090
	write_ctree_super(root, &super);
1091
	close_ctree(root);
C
Chris Mason 已提交
1092 1093

	root = open_ctree("dbfile", &super);
1094
	printf("starting search\n");
1095 1096 1097 1098 1099
	srand(55);
	for (i = 0; i < run_size; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1100
		if (i % 10000 == 0)
C
Chris Mason 已提交
1101
			fprintf(stderr, "search %d:%d\n", num, i);
C
Chris Mason 已提交
1102
		ret = search_slot(root, &ins, &path, 0);
1103
		if (ret) {
1104
			print_tree(root, root->node);
1105 1106 1107
			printf("unable to find %d\n", num);
			exit(1);
		}
1108 1109
		release_path(root, &path);
	}
C
Chris Mason 已提交
1110
	write_ctree_super(root, &super);
1111
	close_ctree(root);
C
Chris Mason 已提交
1112
	root = open_ctree("dbfile", &super);
1113 1114 1115 1116 1117
	printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
	        node_level(root->node->node.header.flags),
		root->node->node.header.nritems,
		NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
	printf("all searches good, deleting some items\n");
1118 1119
	i = 0;
	srand(55);
1120 1121 1122 1123
	for (i = 0 ; i < run_size/4; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1124 1125 1126
		ret = search_slot(root, &ins, &path, -1);
		if (!ret) {
			if (i % 10000 == 0)
C
Chris Mason 已提交
1127
				fprintf(stderr, "del %d:%d\n", num, i);
C
Chris Mason 已提交
1128 1129 1130 1131 1132
			ret = del_item(root, &path);
			if (ret != 0)
				BUG();
			tree_size--;
		}
1133
		release_path(root, &path);
1134
	}
C
Chris Mason 已提交
1135 1136 1137
	write_ctree_super(root, &super);
	close_ctree(root);
	root = open_ctree("dbfile", &super);
1138
	srand(128);
1139
	for (i = 0; i < run_size; i++) {
1140
		buf = malloc(64);
1141
		num = next_key(i, max_key);
1142
		sprintf(buf, "string-%d", num);
1143
		ins.objectid = num;
C
Chris Mason 已提交
1144
		if (i % 10000 == 0)
C
Chris Mason 已提交
1145
			fprintf(stderr, "insert %d:%d\n", num, i);
1146
		ret = insert_item(root, &ins, buf, strlen(buf));
1147 1148
		if (!ret)
			tree_size++;
C
Chris Mason 已提交
1149
		free(buf);
1150
	}
C
Chris Mason 已提交
1151
	write_ctree_super(root, &super);
1152
	close_ctree(root);
C
Chris Mason 已提交
1153
	root = open_ctree("dbfile", &super);
1154
	srand(128);
1155
	printf("starting search2\n");
1156 1157 1158 1159
	for (i = 0; i < run_size; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1160
		if (i % 10000 == 0)
C
Chris Mason 已提交
1161
			fprintf(stderr, "search %d:%d\n", num, i);
C
Chris Mason 已提交
1162
		ret = search_slot(root, &ins, &path, 0);
1163 1164 1165 1166 1167 1168 1169 1170 1171
		if (ret) {
			print_tree(root, root->node);
			printf("unable to find %d\n", num);
			exit(1);
		}
		release_path(root, &path);
	}
	printf("starting big long delete run\n");
	while(root->node && root->node->node.header.nritems > 0) {
1172 1173 1174 1175
		struct leaf *leaf;
		int slot;
		ins.objectid = (u64)-1;
		init_path(&path);
C
Chris Mason 已提交
1176
		ret = search_slot(root, &ins, &path, -1);
1177 1178 1179
		if (ret == 0)
			BUG();

1180
		leaf = &path.nodes[0]->leaf;
1181 1182 1183 1184 1185 1186
		slot = path.slots[0];
		if (slot != leaf->header.nritems)
			BUG();
		while(path.slots[0] > 0) {
			path.slots[0] -= 1;
			slot = path.slots[0];
1187
			leaf = &path.nodes[0]->leaf;
1188 1189 1190 1191

			if (comp_keys(&last, &leaf->items[slot].key) <= 0)
				BUG();
			memcpy(&last, &leaf->items[slot].key, sizeof(last));
C
Chris Mason 已提交
1192 1193
			if (tree_size % 10000 == 0)
				printf("big del %d:%d\n", tree_size, i);
1194 1195 1196
			ret = del_item(root, &path);
			if (ret != 0) {
				printf("del_item returned %d\n", ret);
1197
				BUG();
1198
			}
1199 1200
			tree_size--;
		}
1201
		release_path(root, &path);
1202
	}
1203
	printf("tree size is now %d\n", tree_size);
1204
	printf("map tree\n");
C
Chris Mason 已提交
1205
	print_tree(root->extent_root, root->extent_root->node);
C
Chris Mason 已提交
1206 1207
	write_ctree_super(root, &super);
	close_ctree(root);
1208 1209
	return 0;
}