ctree.c 30.9 KB
Newer Older
1 2 3
#include <stdio.h>
#include <stdlib.h>
#include "kerncompat.h"
4 5 6
#include "radix-tree.h"
#include "ctree.h"
#include "disk-io.h"
C
Chris Mason 已提交
7
#include "print-tree.h"
8

C
Chris Mason 已提交
9 10
int split_node(struct ctree_root *root, struct ctree_path *path, int level);
int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
C
Chris Mason 已提交
11 12 13 14
int push_node_left(struct ctree_root *root, struct ctree_path *path, int level);
int push_node_right(struct ctree_root *root,
		    struct ctree_path *path, int level);
int del_ptr(struct ctree_root *root, struct ctree_path *path, int level);
15

C
Chris Mason 已提交
16
inline void init_path(struct ctree_path *p)
17 18 19 20
{
	memset(p, 0, sizeof(*p));
}

C
Chris Mason 已提交
21
void release_path(struct ctree_root *root, struct ctree_path *p)
22 23 24 25 26 27 28 29 30
{
	int i;
	for (i = 0; i < MAX_LEVEL; i++) {
		if (!p->nodes[i])
			break;
		tree_block_release(root, p->nodes[i]);
	}
}

C
Chris Mason 已提交
31 32 33 34 35
/*
 * The leaf data grows from end-to-front in the node.
 * this returns the address of the start of the last item,
 * which is the stop of the leaf data stack
 */
36 37 38 39
static inline unsigned int leaf_data_end(struct leaf *leaf)
{
	unsigned int nr = leaf->header.nritems;
	if (nr == 0)
40
		return sizeof(leaf->data);
41 42 43
	return leaf->items[nr-1].offset;
}

C
Chris Mason 已提交
44 45 46 47 48
/*
 * The space between the end of the leaf items and
 * the start of the leaf data.  IOW, how much room
 * the leaf has left for both items and data
 */
C
Chris Mason 已提交
49
int leaf_free_space(struct leaf *leaf)
50 51 52 53 54 55 56
{
	int data_end = leaf_data_end(leaf);
	int nritems = leaf->header.nritems;
	char *items_end = (char *)(leaf->items + nritems + 1);
	return (char *)(leaf->data + data_end) - (char *)items_end;
}

C
Chris Mason 已提交
57 58 59
/*
 * compare two keys in a memcmp fashion
 */
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
int comp_keys(struct key *k1, struct key *k2)
{
	if (k1->objectid > k2->objectid)
		return 1;
	if (k1->objectid < k2->objectid)
		return -1;
	if (k1->flags > k2->flags)
		return 1;
	if (k1->flags < k2->flags)
		return -1;
	if (k1->offset > k2->offset)
		return 1;
	if (k1->offset < k2->offset)
		return -1;
	return 0;
}
C
Chris Mason 已提交
76 77 78 79 80 81 82 83 84 85

/*
 * search for key in the array p.  items p are item_size apart
 * and there are 'max' items in p
 * the slot in the array is returned via slot, and it points to
 * the place where you would insert key if it is not found in
 * the array.
 *
 * slot may point to max if the key is bigger than all of the keys
 */
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
int generic_bin_search(char *p, int item_size, struct key *key,
		       int max, int *slot)
{
	int low = 0;
	int high = max;
	int mid;
	int ret;
	struct key *tmp;

	while(low < high) {
		mid = (low + high) / 2;
		tmp = (struct key *)(p + mid * item_size);
		ret = comp_keys(tmp, key);

		if (ret < 0)
			low = mid + 1;
		else if (ret > 0)
			high = mid;
		else {
			*slot = mid;
			return 0;
		}
	}
	*slot = low;
	return 1;
}

int bin_search(struct node *c, struct key *key, int *slot)
{
	if (is_leaf(c->header.flags)) {
		struct leaf *l = (struct leaf *)c;
		return generic_bin_search((void *)l->items, sizeof(struct item),
					  key, c->header.nritems, slot);
	} else {
		return generic_bin_search((void *)c->keys, sizeof(struct key),
					  key, c->header.nritems, slot);
	}
	return -1;
}

C
Chris Mason 已提交
126 127 128 129 130 131 132 133
/*
 * look for key in the tree.  path is filled in with nodes along the way
 * if key is found, we return zero and you can find the item in the leaf
 * level of the path (level 0)
 *
 * If the key isn't found, the path points to the slot where it should
 * be inserted.
 */
C
Chris Mason 已提交
134 135
int search_slot(struct ctree_root *root, struct key *key,
		struct ctree_path *p, int ins_len)
136
{
137 138
	struct tree_buffer *b = root->node;
	struct node *c;
139 140 141
	int slot;
	int ret;
	int level;
C
Chris Mason 已提交
142

143 144 145
	b->count++;
	while (b) {
		c = &b->node;
146
		level = node_level(c->header.flags);
147
		p->nodes[level] = b;
148 149 150 151 152
		ret = bin_search(c, key, &slot);
		if (!is_leaf(c->header.flags)) {
			if (ret && slot > 0)
				slot -= 1;
			p->slots[level] = slot;
C
Chris Mason 已提交
153 154
			if (ins_len > 0 &&
			    c->header.nritems == NODEPTRS_PER_BLOCK) {
C
Chris Mason 已提交
155 156 157 158 159 160 161
				int sret = split_node(root, p, level);
				BUG_ON(sret > 0);
				if (sret)
					return sret;
				b = p->nodes[level];
				c = &b->node;
				slot = p->slots[level];
C
Chris Mason 已提交
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
			} else if (ins_len < 0 &&
				   c->header.nritems <= NODEPTRS_PER_BLOCK/4) {
				u64 blocknr = b->blocknr;
				slot = p->slots[level +1];
				b->count++;
				if (push_node_left(root, p, level))
					push_node_right(root, p, level);
				if (c->header.nritems == 0 &&
				    level < MAX_LEVEL - 1 &&
				    p->nodes[level + 1]) {
					int tslot = p->slots[level + 1];

					p->slots[level + 1] = slot;
					del_ptr(root, p, level + 1);
					p->slots[level + 1] = tslot;
					tree_block_release(root, b);
					free_extent(root, blocknr, 1);
				} else {
					tree_block_release(root, b);
				}
				b = p->nodes[level];
				c = &b->node;
				slot = p->slots[level];
C
Chris Mason 已提交
185
			}
186
			b = read_tree_block(root, c->blockptrs[slot]);
187 188
			continue;
		} else {
C
Chris Mason 已提交
189
			struct leaf *l = (struct leaf *)c;
190
			p->slots[level] = slot;
C
Chris Mason 已提交
191 192
			if (ins_len > 0 && leaf_free_space(l) <
			    sizeof(struct item) + ins_len) {
C
Chris Mason 已提交
193 194 195 196 197
				int sret = split_leaf(root, p, ins_len);
				BUG_ON(sret > 0);
				if (sret)
					return sret;
			}
198 199 200 201 202 203
			return ret;
		}
	}
	return -1;
}

C
Chris Mason 已提交
204 205 206 207 208 209 210
/*
 * adjust the pointers going up the tree, starting at level
 * making sure the right key of each node is points to 'key'.
 * This is used after shifting pointers to the left, so it stops
 * fixing up pointers when a given leaf/node is not in slot 0 of the
 * higher levels
 */
211 212 213
static void fixup_low_keys(struct ctree_root *root,
			   struct ctree_path *path, struct key *key,
			   int level)
214 215 216
{
	int i;
	for (i = level; i < MAX_LEVEL; i++) {
217
		struct node *t;
218
		int tslot = path->slots[i];
219
		if (!path->nodes[i])
220
			break;
221
		t = &path->nodes[i]->node;
222
		memcpy(t->keys + tslot, key, sizeof(*key));
223
		write_tree_block(root, path->nodes[i]);
224 225 226 227 228
		if (tslot != 0)
			break;
	}
}

C
Chris Mason 已提交
229 230 231 232 233 234 235 236 237 238
/*
 * try to push data from one node into the next node left in the
 * tree.  The src node is found at specified level in the path.
 * If some bytes were pushed, return 0, otherwise return 1.
 *
 * Lower nodes/leaves in the path are not touched, higher nodes may
 * be modified to reflect the push.
 *
 * The path is altered to reflect the push.
 */
239 240 241 242 243 244 245 246
int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
	struct node *left;
	struct node *right;
	int push_items = 0;
	int left_nritems;
	int right_nritems;
247 248
	struct tree_buffer *t;
	struct tree_buffer *right_buf;
249 250 251 252 253 254 255

	if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
		return 1;
	slot = path->slots[level + 1];
	if (slot == 0)
		return 1;

256 257 258 259 260
	t = read_tree_block(root,
		            path->nodes[level + 1]->node.blockptrs[slot - 1]);
	left = &t->node;
	right_buf = path->nodes[level];
	right = &right_buf->node;
261 262 263
	left_nritems = left->header.nritems;
	right_nritems = right->header.nritems;
	push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
264 265
	if (push_items <= 0) {
		tree_block_release(root, t);
266
		return 1;
267
	}
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282

	if (right_nritems < push_items)
		push_items = right_nritems;
	memcpy(left->keys + left_nritems, right->keys,
		push_items * sizeof(struct key));
	memcpy(left->blockptrs + left_nritems, right->blockptrs,
		push_items * sizeof(u64));
	memmove(right->keys, right->keys + push_items,
		(right_nritems - push_items) * sizeof(struct key));
	memmove(right->blockptrs, right->blockptrs + push_items,
		(right_nritems - push_items) * sizeof(u64));
	right->header.nritems -= push_items;
	left->header.nritems += push_items;

	/* adjust the pointers going up the tree */
283 284 285 286
	fixup_low_keys(root, path, right->keys, level + 1);

	write_tree_block(root, t);
	write_tree_block(root, right_buf);
287 288 289 290

	/* then fixup the leaf pointer in the path */
	if (path->slots[level] < push_items) {
		path->slots[level] += left_nritems;
291 292
		tree_block_release(root, path->nodes[level]);
		path->nodes[level] = t;
293 294 295
		path->slots[level + 1] -= 1;
	} else {
		path->slots[level] -= push_items;
296
		tree_block_release(root, t);
297 298 299 300
	}
	return 0;
}

C
Chris Mason 已提交
301 302 303 304 305 306 307 308 309 310
/*
 * try to push data from one node into the next node right in the
 * tree.  The src node is found at specified level in the path.
 * If some bytes were pushed, return 0, otherwise return 1.
 *
 * Lower nodes/leaves in the path are not touched, higher nodes may
 * be modified to reflect the push.
 *
 * The path is altered to reflect the push.
 */
311 312 313
int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
314 315
	struct tree_buffer *t;
	struct tree_buffer *src_buffer;
316 317 318 319 320 321
	struct node *dst;
	struct node *src;
	int push_items = 0;
	int dst_nritems;
	int src_nritems;

C
Chris Mason 已提交
322
	/* can't push from the root */
323 324
	if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
		return 1;
C
Chris Mason 已提交
325 326

	/* only try to push inside the node higher up */
327 328 329 330
	slot = path->slots[level + 1];
	if (slot == NODEPTRS_PER_BLOCK - 1)
		return 1;

331
	if (slot >= path->nodes[level + 1]->node.header.nritems -1)
332 333
		return 1;

334 335 336 337 338
	t = read_tree_block(root,
			    path->nodes[level + 1]->node.blockptrs[slot + 1]);
	dst = &t->node;
	src_buffer = path->nodes[level];
	src = &src_buffer->node;
339 340 341
	dst_nritems = dst->header.nritems;
	src_nritems = src->header.nritems;
	push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
342 343
	if (push_items <= 0) {
		tree_block_release(root, t);
344
		return 1;
345
	}
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362

	if (src_nritems < push_items)
		push_items = src_nritems;
	memmove(dst->keys + push_items, dst->keys,
		dst_nritems * sizeof(struct key));
	memcpy(dst->keys, src->keys + src_nritems - push_items,
		push_items * sizeof(struct key));

	memmove(dst->blockptrs + push_items, dst->blockptrs,
		dst_nritems * sizeof(u64));
	memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
		push_items * sizeof(u64));

	src->header.nritems -= push_items;
	dst->header.nritems += push_items;

	/* adjust the pointers going up the tree */
363
	memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
364
		dst->keys, sizeof(struct key));
365 366 367 368 369

	write_tree_block(root, path->nodes[level + 1]);
	write_tree_block(root, t);
	write_tree_block(root, src_buffer);

C
Chris Mason 已提交
370
	/* then fixup the pointers in the path */
371 372
	if (path->slots[level] >= src->header.nritems) {
		path->slots[level] -= src->header.nritems;
373 374
		tree_block_release(root, path->nodes[level]);
		path->nodes[level] = t;
375
		path->slots[level + 1] += 1;
376 377
	} else {
		tree_block_release(root, t);
378 379 380 381
	}
	return 0;
}

C
Chris Mason 已提交
382 383
static int insert_new_root(struct ctree_root *root,
			   struct ctree_path *path, int level)
C
Chris Mason 已提交
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
{
	struct tree_buffer *t;
	struct node *lower;
	struct node *c;
	struct key *lower_key;

	BUG_ON(path->nodes[level]);
	BUG_ON(path->nodes[level-1] != root->node);

	t = alloc_free_block(root);
	c = &t->node;
	memset(c, 0, sizeof(c));
	c->header.nritems = 1;
	c->header.flags = node_level(level);
	c->header.blocknr = t->blocknr;
	c->header.parentid = root->node->node.header.parentid;
	lower = &path->nodes[level-1]->node;
	if (is_leaf(lower->header.flags))
		lower_key = &((struct leaf *)lower)->items[0].key;
	else
		lower_key = lower->keys;
	memcpy(c->keys, lower_key, sizeof(struct key));
	c->blockptrs[0] = path->nodes[level-1]->blocknr;
	/* the super has an extra ref to root->node */
	tree_block_release(root, root->node);
	root->node = t;
	t->count++;
	write_tree_block(root, t);
	path->nodes[level] = t;
	path->slots[level] = 0;
	return 0;
}

C
Chris Mason 已提交
417 418 419 420 421 422
/*
 * worker function to insert a single pointer in a node.
 * the node should have enough room for the pointer already
 * slot and level indicate where you want the key to go, and
 * blocknr is the block the key points to.
 */
C
Chris Mason 已提交
423
int insert_ptr(struct ctree_root *root,
C
Chris Mason 已提交
424 425 426 427 428
		struct ctree_path *path, struct key *key,
		u64 blocknr, int slot, int level)
{
	struct node *lower;
	int nritems;
C
Chris Mason 已提交
429 430

	BUG_ON(!path->nodes[level]);
C
Chris Mason 已提交
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
	lower = &path->nodes[level]->node;
	nritems = lower->header.nritems;
	if (slot > nritems)
		BUG();
	if (nritems == NODEPTRS_PER_BLOCK)
		BUG();
	if (slot != nritems) {
		memmove(lower->keys + slot + 1, lower->keys + slot,
			(nritems - slot) * sizeof(struct key));
		memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
			(nritems - slot) * sizeof(u64));
	}
	memcpy(lower->keys + slot, key, sizeof(struct key));
	lower->blockptrs[slot] = blocknr;
	lower->header.nritems++;
	if (lower->keys[1].objectid == 0)
			BUG();
	write_tree_block(root, path->nodes[level]);
	return 0;
}

C
Chris Mason 已提交
452
int split_node(struct ctree_root *root, struct ctree_path *path, int level)
453
{
C
Chris Mason 已提交
454 455 456 457
	struct tree_buffer *t;
	struct node *c;
	struct tree_buffer *split_buffer;
	struct node *split;
458
	int mid;
C
Chris Mason 已提交
459
	int ret;
460

C
Chris Mason 已提交
461 462 463 464 465 466 467 468 469 470 471 472 473
	ret = push_node_left(root, path, level);
	if (!ret)
		return 0;
	ret = push_node_right(root, path, level);
	if (!ret)
		return 0;
	t = path->nodes[level];
	c = &t->node;
	if (t == root->node) {
		/* trying to split the root, lets make a new one */
		ret = insert_new_root(root, path, level + 1);
		if (ret)
			return ret;
474
	}
C
Chris Mason 已提交
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
	split_buffer = alloc_free_block(root);
	split = &split_buffer->node;
	split->header.flags = c->header.flags;
	split->header.blocknr = split_buffer->blocknr;
	split->header.parentid = root->node->node.header.parentid;
	mid = (c->header.nritems + 1) / 2;
	memcpy(split->keys, c->keys + mid,
		(c->header.nritems - mid) * sizeof(struct key));
	memcpy(split->blockptrs, c->blockptrs + mid,
		(c->header.nritems - mid) * sizeof(u64));
	split->header.nritems = c->header.nritems - mid;
	c->header.nritems = mid;
	write_tree_block(root, t);
	write_tree_block(root, split_buffer);
	insert_ptr(root, path, split->keys, split_buffer->blocknr,
		     path->slots[level + 1] + 1, level + 1);
C
Chris Mason 已提交
491
	if (path->slots[level] >= mid) {
C
Chris Mason 已提交
492 493 494 495 496 497
		path->slots[level] -= mid;
		tree_block_release(root, t);
		path->nodes[level] = split_buffer;
		path->slots[level + 1] += 1;
	} else {
		tree_block_release(root, split_buffer);
498
	}
C
Chris Mason 已提交
499
	return 0;
500 501
}

C
Chris Mason 已提交
502 503 504 505 506
/*
 * how many bytes are required to store the items in a leaf.  start
 * and nr indicate which items in the leaf to check.  This totals up the
 * space used both by the item structs and the item data
 */
507 508 509 510 511 512 513 514 515 516 517 518 519
int leaf_space_used(struct leaf *l, int start, int nr)
{
	int data_len;
	int end = start + nr - 1;

	if (!nr)
		return 0;
	data_len = l->items[start].offset + l->items[start].size;
	data_len = data_len - l->items[end].offset;
	data_len += sizeof(struct item) * nr;
	return data_len;
}

C
Chris Mason 已提交
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
/*
 * push some data in the path leaf to the right, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
int push_leaf_right(struct ctree_root *root, struct ctree_path *path,
		   int data_size)
{
	struct tree_buffer *left_buf = path->nodes[0];
	struct leaf *left = &left_buf->leaf;
	struct leaf *right;
	struct tree_buffer *right_buf;
	struct tree_buffer *upper;
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
	struct item *item;

	slot = path->slots[1];
	if (!path->nodes[1]) {
		return 1;
	}
	upper = path->nodes[1];
	if (slot >= upper->node.header.nritems - 1) {
		return 1;
	}
	right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]);
	right = &right_buf->leaf;
	free_space = leaf_free_space(right);
	if (free_space < data_size + sizeof(struct item)) {
		tree_block_release(root, right_buf);
		return 1;
	}
	for (i = left->header.nritems - 1; i >= 0; i--) {
		item = left->items + i;
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
		if (item->size + sizeof(*item) + push_space > free_space)
			break;
		push_items++;
		push_space += item->size + sizeof(*item);
	}
	if (push_items == 0) {
		tree_block_release(root, right_buf);
		return 1;
	}
	/* push left to right */
	push_space = left->items[left->header.nritems - push_items].offset +
		     left->items[left->header.nritems - push_items].size;
	push_space -= leaf_data_end(left);
	/* make room in the right data area */
	memmove(right->data + leaf_data_end(right) - push_space,
		right->data + leaf_data_end(right),
		LEAF_DATA_SIZE - leaf_data_end(right));
	/* copy from the left data area */
	memcpy(right->data + LEAF_DATA_SIZE - push_space,
		left->data + leaf_data_end(left),
		push_space);
	memmove(right->items + push_items, right->items,
		right->header.nritems * sizeof(struct item));
	/* copy the items from left to right */
	memcpy(right->items, left->items + left->header.nritems - push_items,
		push_items * sizeof(struct item));

	/* update the item pointers */
	right->header.nritems += push_items;
	push_space = LEAF_DATA_SIZE;
	for (i = 0; i < right->header.nritems; i++) {
		right->items[i].offset = push_space - right->items[i].size;
		push_space = right->items[i].offset;
	}
	left->header.nritems -= push_items;

	write_tree_block(root, left_buf);
	write_tree_block(root, right_buf);
	memcpy(upper->node.keys + slot + 1,
		&right->items[0].key, sizeof(struct key));
	write_tree_block(root, upper);
	/* then fixup the leaf pointer in the path */
	// FIXME use nritems in here somehow
	if (path->slots[0] >= left->header.nritems) {
		path->slots[0] -= left->header.nritems;
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = right_buf;
		path->slots[1] += 1;
	} else {
		tree_block_release(root, right_buf);
	}
	return 0;
}
C
Chris Mason 已提交
611 612 613 614
/*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
615 616 617
int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
		   int data_size)
{
618 619 620
	struct tree_buffer *right_buf = path->nodes[0];
	struct leaf *right = &right_buf->leaf;
	struct tree_buffer *t;
621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
	struct leaf *left;
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
	struct item *item;
	int old_left_nritems;

	slot = path->slots[1];
	if (slot == 0) {
		return 1;
	}
	if (!path->nodes[1]) {
		return 1;
	}
637 638
	t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
	left = &t->leaf;
639 640
	free_space = leaf_free_space(left);
	if (free_space < data_size + sizeof(struct item)) {
641
		tree_block_release(root, t);
642 643 644 645 646 647 648 649 650 651 652 653
		return 1;
	}
	for (i = 0; i < right->header.nritems; i++) {
		item = right->items + i;
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
		if (item->size + sizeof(*item) + push_space > free_space)
			break;
		push_items++;
		push_space += item->size + sizeof(*item);
	}
	if (push_items == 0) {
654
		tree_block_release(root, t);
655 656 657 658 659 660 661 662 663 664
		return 1;
	}
	/* push data from right to left */
	memcpy(left->items + left->header.nritems,
		right->items, push_items * sizeof(struct item));
	push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
	memcpy(left->data + leaf_data_end(left) - push_space,
		right->data + right->items[push_items - 1].offset,
		push_space);
	old_left_nritems = left->header.nritems;
665 666
	BUG_ON(old_left_nritems < 0);

667 668 669 670 671 672 673 674 675 676 677 678 679 680
	for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
		left->items[i].offset -= LEAF_DATA_SIZE -
			left->items[old_left_nritems -1].offset;
	}
	left->header.nritems += push_items;

	/* fixup right node */
	push_space = right->items[push_items-1].offset - leaf_data_end(right);
	memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
		leaf_data_end(right), push_space);
	memmove(right->items, right->items + push_items,
		(right->header.nritems - push_items) * sizeof(struct item));
	right->header.nritems -= push_items;
	push_space = LEAF_DATA_SIZE;
681

682 683 684 685
	for (i = 0; i < right->header.nritems; i++) {
		right->items[i].offset = push_space - right->items[i].size;
		push_space = right->items[i].offset;
	}
686 687 688 689 690

	write_tree_block(root, t);
	write_tree_block(root, right_buf);

	fixup_low_keys(root, path, &right->items[0].key, 1);
691 692 693 694

	/* then fixup the leaf pointer in the path */
	if (path->slots[0] < push_items) {
		path->slots[0] += old_left_nritems;
695 696
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = t;
697 698
		path->slots[1] -= 1;
	} else {
699
		tree_block_release(root, t);
700 701
		path->slots[0] -= push_items;
	}
702
	BUG_ON(path->slots[0] < 0);
703 704 705
	return 0;
}

C
Chris Mason 已提交
706 707 708 709
/*
 * split the path's leaf in two, making sure there is at least data_size
 * available for the resulting leaf level of the path.
 */
710 711
int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
{
712 713 714 715 716
	struct tree_buffer *l_buf = path->nodes[0];
	struct leaf *l = &l_buf->leaf;
	int nritems;
	int mid;
	int slot;
717
	struct leaf *right;
718
	struct tree_buffer *right_buffer;
719 720 721 722 723 724
	int space_needed = data_size + sizeof(struct item);
	int data_copy_size;
	int rt_data_off;
	int i;
	int ret;

C
Chris Mason 已提交
725 726
	if (push_leaf_left(root, path, data_size) == 0 ||
	    push_leaf_right(root, path, data_size) == 0) {
727 728 729 730
		l_buf = path->nodes[0];
		l = &l_buf->leaf;
		if (leaf_free_space(l) >= sizeof(struct item) + data_size)
			return 0;
731
	}
C
Chris Mason 已提交
732 733 734 735 736
	if (!path->nodes[1]) {
		ret = insert_new_root(root, path, 1);
		if (ret)
			return ret;
	}
737 738 739 740 741 742 743 744
	slot = path->slots[0];
	nritems = l->header.nritems;
	mid = (nritems + 1)/ 2;

	right_buffer = alloc_free_block(root);
	BUG_ON(!right_buffer);
	BUG_ON(mid == nritems);
	right = &right_buffer->leaf;
745 746 747 748 749 750 751 752 753 754 755
	memset(right, 0, sizeof(*right));
	if (mid <= slot) {
		if (leaf_space_used(l, mid, nritems - mid) + space_needed >
			LEAF_DATA_SIZE)
			BUG();
	} else {
		if (leaf_space_used(l, 0, mid + 1) + space_needed >
			LEAF_DATA_SIZE)
			BUG();
	}
	right->header.nritems = nritems - mid;
756 757
	right->header.blocknr = right_buffer->blocknr;
	right->header.flags = node_level(0);
C
Chris Mason 已提交
758
	right->header.parentid = root->node->node.header.parentid;
759 760 761 762 763 764 765 766
	data_copy_size = l->items[mid].offset + l->items[mid].size -
			 leaf_data_end(l);
	memcpy(right->items, l->items + mid,
	       (nritems - mid) * sizeof(struct item));
	memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
	       l->data + leaf_data_end(l), data_copy_size);
	rt_data_off = LEAF_DATA_SIZE -
		     (l->items[mid].offset + l->items[mid].size);
C
Chris Mason 已提交
767 768

	for (i = 0; i < right->header.nritems; i++)
769
		right->items[i].offset += rt_data_off;
C
Chris Mason 已提交
770

771 772
	l->header.nritems = mid;
	ret = insert_ptr(root, path, &right->items[0].key,
C
Chris Mason 已提交
773
			  right_buffer->blocknr, path->slots[1] + 1, 1);
774 775 776 777
	write_tree_block(root, right_buffer);
	write_tree_block(root, l_buf);

	BUG_ON(path->slots[0] != slot);
778
	if (mid <= slot) {
779 780
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = right_buffer;
781 782
		path->slots[0] -= mid;
		path->slots[1] += 1;
783 784 785
	} else
		tree_block_release(root, right_buffer);
	BUG_ON(path->slots[0] < 0);
786 787 788
	return ret;
}

C
Chris Mason 已提交
789 790 791 792
/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
793 794 795 796 797
int insert_item(struct ctree_root *root, struct key *key,
			  void *data, int data_size)
{
	int ret;
	int slot;
798
	int slot_orig;
799
	struct leaf *leaf;
800
	struct tree_buffer *leaf_buf;
801 802 803 804
	unsigned int nritems;
	unsigned int data_end;
	struct ctree_path path;

C
Chris Mason 已提交
805
	/* create a root if there isn't one */
C
Chris Mason 已提交
806
	if (!root->node)
C
Chris Mason 已提交
807
		BUG();
808
	init_path(&path);
C
Chris Mason 已提交
809
	ret = search_slot(root, key, &path, data_size);
810 811
	if (ret == 0) {
		release_path(root, &path);
812
		return -EEXIST;
813
	}
814

815 816 817
	slot_orig = path.slots[0];
	leaf_buf = path.nodes[0];
	leaf = &leaf_buf->leaf;
C
Chris Mason 已提交
818

819 820
	nritems = leaf->header.nritems;
	data_end = leaf_data_end(leaf);
821

822 823 824 825
	if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
		BUG();

	slot = path.slots[0];
826
	BUG_ON(slot < 0);
827
	if (slot == 0)
828
		fixup_low_keys(root, &path, key, 1);
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849
	if (slot != nritems) {
		int i;
		unsigned int old_data = leaf->items[slot].offset +
					leaf->items[slot].size;

		/*
		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
		 */
		/* first correct the data pointers */
		for (i = slot; i < nritems; i++)
			leaf->items[i].offset -= data_size;

		/* shift the items */
		memmove(leaf->items + slot + 1, leaf->items + slot,
		        (nritems - slot) * sizeof(struct item));

		/* shift the data */
		memmove(leaf->data + data_end - data_size, leaf->data +
		        data_end, old_data - data_end);
		data_end = old_data;
	}
C
Chris Mason 已提交
850
	/* copy the new data in */
851 852 853 854 855
	memcpy(&leaf->items[slot].key, key, sizeof(struct key));
	leaf->items[slot].offset = data_end - data_size;
	leaf->items[slot].size = data_size;
	memcpy(leaf->data + data_end - data_size, data, data_size);
	leaf->header.nritems += 1;
856
	write_tree_block(root, leaf_buf);
857 858
	if (leaf_free_space(leaf) < 0)
		BUG();
859
	release_path(root, &path);
860 861 862
	return 0;
}

C
Chris Mason 已提交
863
/*
C
Chris Mason 已提交
864
 * delete the pointer from a given node.
C
Chris Mason 已提交
865 866 867 868 869
 *
 * If the delete empties a node, the node is removed from the tree,
 * continuing all the way the root if required.  The root is converted into
 * a leaf if all the nodes are emptied.
 */
870 871 872
int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
873
	struct tree_buffer *t;
874 875
	struct node *node;
	int nritems;
876
	u64 blocknr;
877 878

	while(1) {
879 880
		t = path->nodes[level];
		if (!t)
881
			break;
882
		node = &t->node;
883 884 885 886 887 888 889 890 891 892 893
		slot = path->slots[level];
		nritems = node->header.nritems;

		if (slot != nritems -1) {
			memmove(node->keys + slot, node->keys + slot + 1,
				sizeof(struct key) * (nritems - slot - 1));
			memmove(node->blockptrs + slot,
				node->blockptrs + slot + 1,
				sizeof(u64) * (nritems - slot - 1));
		}
		node->header.nritems--;
894
		write_tree_block(root, t);
895
		blocknr = t->blocknr;
896 897
		if (node->header.nritems != 0) {
			if (slot == 0)
898 899
				fixup_low_keys(root, path, node->keys,
					       level + 1);
C
Chris Mason 已提交
900
			break;
901
		}
902 903 904 905
		if (t == root->node) {
			/* just turn the root into a leaf and break */
			root->node->node.header.flags = node_level(0);
			write_tree_block(root, t);
906 907 908
			break;
		}
		level++;
909
		free_extent(root, blocknr, 1);
910 911 912 913 914 915
		if (!path->nodes[level])
			BUG();
	}
	return 0;
}

C
Chris Mason 已提交
916 917 918 919
/*
 * delete the item at the leaf level in path.  If that empties
 * the leaf, remove it from the tree
 */
920
int del_item(struct ctree_root *root, struct ctree_path *path)
921 922 923
{
	int slot;
	struct leaf *leaf;
924
	struct tree_buffer *leaf_buf;
925 926 927
	int doff;
	int dsize;

928 929
	leaf_buf = path->nodes[0];
	leaf = &leaf_buf->leaf;
930
	slot = path->slots[0];
931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
	doff = leaf->items[slot].offset;
	dsize = leaf->items[slot].size;

	if (slot != leaf->header.nritems - 1) {
		int i;
		int data_end = leaf_data_end(leaf);
		memmove(leaf->data + data_end + dsize,
			leaf->data + data_end,
			doff - data_end);
		for (i = slot + 1; i < leaf->header.nritems; i++)
			leaf->items[i].offset += dsize;
		memmove(leaf->items + slot, leaf->items + slot + 1,
			sizeof(struct item) *
			(leaf->header.nritems - slot - 1));
	}
	leaf->header.nritems -= 1;
C
Chris Mason 已提交
947
	/* delete the leaf if we've emptied it */
948
	if (leaf->header.nritems == 0) {
949 950 951
		if (leaf_buf == root->node) {
			leaf->header.flags = node_level(0);
			write_tree_block(root, leaf_buf);
952
		} else {
953
			del_ptr(root, path, 1);
954 955
			free_extent(root, leaf_buf->blocknr, 1);
		}
956
	} else {
C
Chris Mason 已提交
957
		int used = leaf_space_used(leaf, 0, leaf->header.nritems);
958
		if (slot == 0)
959 960
			fixup_low_keys(root, path, &leaf->items[0].key, 1);
		write_tree_block(root, leaf_buf);
C
Chris Mason 已提交
961
		/* delete the leaf if it is mostly empty */
C
Chris Mason 已提交
962
		if (used < LEAF_DATA_SIZE / 3) {
963 964 965 966
			/* push_leaf_left fixes the path.
			 * make sure the path still points to our leaf
			 * for possible call to del_ptr below
			 */
967
			slot = path->slots[1];
968
			leaf_buf->count++;
969
			push_leaf_left(root, path, 1);
C
Chris Mason 已提交
970 971
			if (leaf->header.nritems)
				push_leaf_right(root, path, 1);
972
			if (leaf->header.nritems == 0) {
C
Chris Mason 已提交
973
				u64 blocknr = leaf_buf->blocknr;
974 975
				path->slots[1] = slot;
				del_ptr(root, path, 1);
C
Chris Mason 已提交
976 977 978 979
				tree_block_release(root, leaf_buf);
				free_extent(root, blocknr, 1);
			} else {
				tree_block_release(root, leaf_buf);
980 981 982 983 984 985
			}
		}
	}
	return 0;
}

986 987 988 989 990 991
int next_leaf(struct ctree_root *root, struct ctree_path *path)
{
	int slot;
	int level = 1;
	u64 blocknr;
	struct tree_buffer *c;
C
Chris Mason 已提交
992
	struct tree_buffer *next = NULL;
993 994 995 996 997 998 999 1000 1001 1002 1003

	while(level < MAX_LEVEL) {
		if (!path->nodes[level])
			return -1;
		slot = path->slots[level] + 1;
		c = path->nodes[level];
		if (slot >= c->node.header.nritems) {
			level++;
			continue;
		}
		blocknr = c->node.blockptrs[slot];
C
Chris Mason 已提交
1004 1005
		if (next)
			tree_block_release(root, next);
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022
		next = read_tree_block(root, blocknr);
		break;
	}
	path->slots[level] = slot;
	while(1) {
		level--;
		c = path->nodes[level];
		tree_block_release(root, c);
		path->nodes[level] = next;
		path->slots[level] = 0;
		if (!level)
			break;
		next = read_tree_block(root, next->node.blockptrs[0]);
	}
	return 0;
}

1023 1024
/* for testing only */
int next_key(int i, int max_key) {
C
Chris Mason 已提交
1025
	return rand() % max_key;
C
Chris Mason 已提交
1026
	//return i;
1027 1028 1029
}

int main() {
1030
	struct ctree_root *root;
1031
	struct key ins;
1032
	struct key last = { (u64)-1, 0, 0};
1033 1034 1035 1036
	char *buf;
	int i;
	int num;
	int ret;
C
Chris Mason 已提交
1037 1038
	int run_size = 20000000;
	int max_key =  100000000;
1039 1040
	int tree_size = 0;
	struct ctree_path path;
C
Chris Mason 已提交
1041
	struct ctree_super_block super;
1042

1043 1044 1045
	radix_tree_init();


C
Chris Mason 已提交
1046
	root = open_ctree("dbfile", &super);
1047 1048 1049 1050 1051 1052 1053

	srand(55);
	for (i = 0; i < run_size; i++) {
		buf = malloc(64);
		num = next_key(i, max_key);
		// num = i;
		sprintf(buf, "string-%d", num);
C
Chris Mason 已提交
1054
		if (i % 10000 == 0)
C
Chris Mason 已提交
1055
			fprintf(stderr, "insert %d:%d\n", num, i);
1056 1057 1058
		ins.objectid = num;
		ins.offset = 0;
		ins.flags = 0;
1059
		ret = insert_item(root, &ins, buf, strlen(buf));
1060 1061
		if (!ret)
			tree_size++;
C
Chris Mason 已提交
1062
		free(buf);
1063
	}
C
Chris Mason 已提交
1064
	write_ctree_super(root, &super);
1065
	close_ctree(root);
C
Chris Mason 已提交
1066 1067

	root = open_ctree("dbfile", &super);
1068
	printf("starting search\n");
1069 1070 1071 1072 1073
	srand(55);
	for (i = 0; i < run_size; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1074
		if (i % 10000 == 0)
C
Chris Mason 已提交
1075
			fprintf(stderr, "search %d:%d\n", num, i);
C
Chris Mason 已提交
1076
		ret = search_slot(root, &ins, &path, 0);
1077
		if (ret) {
1078
			print_tree(root, root->node);
1079 1080 1081
			printf("unable to find %d\n", num);
			exit(1);
		}
1082 1083
		release_path(root, &path);
	}
C
Chris Mason 已提交
1084
	write_ctree_super(root, &super);
1085
	close_ctree(root);
C
Chris Mason 已提交
1086
	root = open_ctree("dbfile", &super);
1087 1088 1089 1090 1091
	printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
	        node_level(root->node->node.header.flags),
		root->node->node.header.nritems,
		NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
	printf("all searches good, deleting some items\n");
1092 1093
	i = 0;
	srand(55);
1094 1095 1096 1097
	for (i = 0 ; i < run_size/4; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1098 1099 1100
		ret = search_slot(root, &ins, &path, -1);
		if (!ret) {
			if (i % 10000 == 0)
C
Chris Mason 已提交
1101
				fprintf(stderr, "del %d:%d\n", num, i);
C
Chris Mason 已提交
1102 1103 1104 1105 1106
			ret = del_item(root, &path);
			if (ret != 0)
				BUG();
			tree_size--;
		}
1107
		release_path(root, &path);
1108
	}
C
Chris Mason 已提交
1109 1110 1111
	write_ctree_super(root, &super);
	close_ctree(root);
	root = open_ctree("dbfile", &super);
1112
	srand(128);
1113
	for (i = 0; i < run_size; i++) {
1114
		buf = malloc(64);
1115
		num = next_key(i, max_key);
1116
		sprintf(buf, "string-%d", num);
1117
		ins.objectid = num;
C
Chris Mason 已提交
1118
		if (i % 10000 == 0)
C
Chris Mason 已提交
1119
			fprintf(stderr, "insert %d:%d\n", num, i);
1120
		ret = insert_item(root, &ins, buf, strlen(buf));
1121 1122
		if (!ret)
			tree_size++;
C
Chris Mason 已提交
1123
		free(buf);
1124
	}
C
Chris Mason 已提交
1125
	write_ctree_super(root, &super);
1126
	close_ctree(root);
C
Chris Mason 已提交
1127
	root = open_ctree("dbfile", &super);
1128
	srand(128);
1129
	printf("starting search2\n");
1130 1131 1132 1133
	for (i = 0; i < run_size; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1134
		if (i % 10000 == 0)
C
Chris Mason 已提交
1135
			fprintf(stderr, "search %d:%d\n", num, i);
C
Chris Mason 已提交
1136
		ret = search_slot(root, &ins, &path, 0);
1137 1138 1139 1140 1141 1142 1143 1144 1145
		if (ret) {
			print_tree(root, root->node);
			printf("unable to find %d\n", num);
			exit(1);
		}
		release_path(root, &path);
	}
	printf("starting big long delete run\n");
	while(root->node && root->node->node.header.nritems > 0) {
1146 1147 1148 1149
		struct leaf *leaf;
		int slot;
		ins.objectid = (u64)-1;
		init_path(&path);
C
Chris Mason 已提交
1150
		ret = search_slot(root, &ins, &path, -1);
1151 1152 1153
		if (ret == 0)
			BUG();

1154
		leaf = &path.nodes[0]->leaf;
1155 1156 1157 1158 1159 1160
		slot = path.slots[0];
		if (slot != leaf->header.nritems)
			BUG();
		while(path.slots[0] > 0) {
			path.slots[0] -= 1;
			slot = path.slots[0];
1161
			leaf = &path.nodes[0]->leaf;
1162 1163 1164 1165

			if (comp_keys(&last, &leaf->items[slot].key) <= 0)
				BUG();
			memcpy(&last, &leaf->items[slot].key, sizeof(last));
C
Chris Mason 已提交
1166 1167
			if (tree_size % 10000 == 0)
				printf("big del %d:%d\n", tree_size, i);
1168 1169 1170
			ret = del_item(root, &path);
			if (ret != 0) {
				printf("del_item returned %d\n", ret);
1171
				BUG();
1172
			}
1173 1174
			tree_size--;
		}
1175
		release_path(root, &path);
1176
	}
1177
	printf("tree size is now %d\n", tree_size);
1178
	printf("map tree\n");
C
Chris Mason 已提交
1179
	print_tree(root->extent_root, root->extent_root->node);
C
Chris Mason 已提交
1180 1181
	write_ctree_super(root, &super);
	close_ctree(root);
1182 1183
	return 0;
}