ctree.c 31.5 KB
Newer Older
1 2 3
#include <stdio.h>
#include <stdlib.h>
#include "kerncompat.h"
4 5 6
#include "radix-tree.h"
#include "ctree.h"
#include "disk-io.h"
C
Chris Mason 已提交
7
#include "print-tree.h"
8

C
Chris Mason 已提交
9 10
int split_node(struct ctree_root *root, struct ctree_path *path, int level);
int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
C
Chris Mason 已提交
11 12 13 14
int push_node_left(struct ctree_root *root, struct ctree_path *path, int level);
int push_node_right(struct ctree_root *root,
		    struct ctree_path *path, int level);
int del_ptr(struct ctree_root *root, struct ctree_path *path, int level);
15

C
Chris Mason 已提交
16
inline void init_path(struct ctree_path *p)
17 18 19 20
{
	memset(p, 0, sizeof(*p));
}

C
Chris Mason 已提交
21
void release_path(struct ctree_root *root, struct ctree_path *p)
22 23 24 25 26 27 28 29 30
{
	int i;
	for (i = 0; i < MAX_LEVEL; i++) {
		if (!p->nodes[i])
			break;
		tree_block_release(root, p->nodes[i]);
	}
}

C
Chris Mason 已提交
31 32 33 34 35
/*
 * The leaf data grows from end-to-front in the node.
 * this returns the address of the start of the last item,
 * which is the stop of the leaf data stack
 */
36 37 38 39
static inline unsigned int leaf_data_end(struct leaf *leaf)
{
	unsigned int nr = leaf->header.nritems;
	if (nr == 0)
40
		return sizeof(leaf->data);
41 42 43
	return leaf->items[nr-1].offset;
}

C
Chris Mason 已提交
44 45 46 47 48
/*
 * The space between the end of the leaf items and
 * the start of the leaf data.  IOW, how much room
 * the leaf has left for both items and data
 */
C
Chris Mason 已提交
49
int leaf_free_space(struct leaf *leaf)
50 51 52 53 54 55 56
{
	int data_end = leaf_data_end(leaf);
	int nritems = leaf->header.nritems;
	char *items_end = (char *)(leaf->items + nritems + 1);
	return (char *)(leaf->data + data_end) - (char *)items_end;
}

C
Chris Mason 已提交
57 58 59
/*
 * compare two keys in a memcmp fashion
 */
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
int comp_keys(struct key *k1, struct key *k2)
{
	if (k1->objectid > k2->objectid)
		return 1;
	if (k1->objectid < k2->objectid)
		return -1;
	if (k1->flags > k2->flags)
		return 1;
	if (k1->flags < k2->flags)
		return -1;
	if (k1->offset > k2->offset)
		return 1;
	if (k1->offset < k2->offset)
		return -1;
	return 0;
}
C
Chris Mason 已提交
76 77 78 79 80 81 82 83 84 85

/*
 * search for key in the array p.  items p are item_size apart
 * and there are 'max' items in p
 * the slot in the array is returned via slot, and it points to
 * the place where you would insert key if it is not found in
 * the array.
 *
 * slot may point to max if the key is bigger than all of the keys
 */
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
int generic_bin_search(char *p, int item_size, struct key *key,
		       int max, int *slot)
{
	int low = 0;
	int high = max;
	int mid;
	int ret;
	struct key *tmp;

	while(low < high) {
		mid = (low + high) / 2;
		tmp = (struct key *)(p + mid * item_size);
		ret = comp_keys(tmp, key);

		if (ret < 0)
			low = mid + 1;
		else if (ret > 0)
			high = mid;
		else {
			*slot = mid;
			return 0;
		}
	}
	*slot = low;
	return 1;
}

C
Chris Mason 已提交
113 114 115 116
/*
 * simple bin_search frontend that does the right thing for
 * leaves vs nodes
 */
117 118 119 120 121 122 123 124 125 126 127 128 129
int bin_search(struct node *c, struct key *key, int *slot)
{
	if (is_leaf(c->header.flags)) {
		struct leaf *l = (struct leaf *)c;
		return generic_bin_search((void *)l->items, sizeof(struct item),
					  key, c->header.nritems, slot);
	} else {
		return generic_bin_search((void *)c->keys, sizeof(struct key),
					  key, c->header.nritems, slot);
	}
	return -1;
}

C
Chris Mason 已提交
130 131 132 133 134 135 136
/*
 * look for key in the tree.  path is filled in with nodes along the way
 * if key is found, we return zero and you can find the item in the leaf
 * level of the path (level 0)
 *
 * If the key isn't found, the path points to the slot where it should
 * be inserted.
C
Chris Mason 已提交
137 138 139 140
 *
 * if ins_len > 0, nodes and leaves will be split as we walk down the
 * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
 * possible)
C
Chris Mason 已提交
141
 */
C
Chris Mason 已提交
142 143
int search_slot(struct ctree_root *root, struct key *key,
		struct ctree_path *p, int ins_len)
144
{
145 146
	struct tree_buffer *b = root->node;
	struct node *c;
147 148 149
	int slot;
	int ret;
	int level;
C
Chris Mason 已提交
150

151 152 153
	b->count++;
	while (b) {
		c = &b->node;
154
		level = node_level(c->header.flags);
155
		p->nodes[level] = b;
156 157 158 159 160
		ret = bin_search(c, key, &slot);
		if (!is_leaf(c->header.flags)) {
			if (ret && slot > 0)
				slot -= 1;
			p->slots[level] = slot;
C
Chris Mason 已提交
161 162
			if (ins_len > 0 &&
			    c->header.nritems == NODEPTRS_PER_BLOCK) {
C
Chris Mason 已提交
163 164 165 166 167 168 169 170
				int sret = split_node(root, p, level);
				BUG_ON(sret > 0);
				if (sret)
					return sret;
				b = p->nodes[level];
				c = &b->node;
				slot = p->slots[level];
			}
171
			b = read_tree_block(root, c->blockptrs[slot]);
172 173
			continue;
		} else {
C
Chris Mason 已提交
174
			struct leaf *l = (struct leaf *)c;
175
			p->slots[level] = slot;
C
Chris Mason 已提交
176 177
			if (ins_len > 0 && leaf_free_space(l) <
			    sizeof(struct item) + ins_len) {
C
Chris Mason 已提交
178 179 180 181 182
				int sret = split_leaf(root, p, ins_len);
				BUG_ON(sret > 0);
				if (sret)
					return sret;
			}
183 184 185 186 187 188
			return ret;
		}
	}
	return -1;
}

C
Chris Mason 已提交
189 190 191 192 193 194 195
/*
 * adjust the pointers going up the tree, starting at level
 * making sure the right key of each node is points to 'key'.
 * This is used after shifting pointers to the left, so it stops
 * fixing up pointers when a given leaf/node is not in slot 0 of the
 * higher levels
 */
196 197 198
static void fixup_low_keys(struct ctree_root *root,
			   struct ctree_path *path, struct key *key,
			   int level)
199 200 201
{
	int i;
	for (i = level; i < MAX_LEVEL; i++) {
202
		struct node *t;
203
		int tslot = path->slots[i];
204
		if (!path->nodes[i])
205
			break;
206
		t = &path->nodes[i]->node;
207
		memcpy(t->keys + tslot, key, sizeof(*key));
208
		write_tree_block(root, path->nodes[i]);
209 210 211 212 213
		if (tslot != 0)
			break;
	}
}

C
Chris Mason 已提交
214 215 216 217 218 219 220 221 222 223
/*
 * try to push data from one node into the next node left in the
 * tree.  The src node is found at specified level in the path.
 * If some bytes were pushed, return 0, otherwise return 1.
 *
 * Lower nodes/leaves in the path are not touched, higher nodes may
 * be modified to reflect the push.
 *
 * The path is altered to reflect the push.
 */
224 225 226 227 228 229 230 231
int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
	struct node *left;
	struct node *right;
	int push_items = 0;
	int left_nritems;
	int right_nritems;
232 233
	struct tree_buffer *t;
	struct tree_buffer *right_buf;
234 235 236 237 238 239 240

	if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
		return 1;
	slot = path->slots[level + 1];
	if (slot == 0)
		return 1;

241 242 243 244 245
	t = read_tree_block(root,
		            path->nodes[level + 1]->node.blockptrs[slot - 1]);
	left = &t->node;
	right_buf = path->nodes[level];
	right = &right_buf->node;
246 247 248
	left_nritems = left->header.nritems;
	right_nritems = right->header.nritems;
	push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
249 250
	if (push_items <= 0) {
		tree_block_release(root, t);
251
		return 1;
252
	}
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267

	if (right_nritems < push_items)
		push_items = right_nritems;
	memcpy(left->keys + left_nritems, right->keys,
		push_items * sizeof(struct key));
	memcpy(left->blockptrs + left_nritems, right->blockptrs,
		push_items * sizeof(u64));
	memmove(right->keys, right->keys + push_items,
		(right_nritems - push_items) * sizeof(struct key));
	memmove(right->blockptrs, right->blockptrs + push_items,
		(right_nritems - push_items) * sizeof(u64));
	right->header.nritems -= push_items;
	left->header.nritems += push_items;

	/* adjust the pointers going up the tree */
268 269 270 271
	fixup_low_keys(root, path, right->keys, level + 1);

	write_tree_block(root, t);
	write_tree_block(root, right_buf);
272 273 274 275

	/* then fixup the leaf pointer in the path */
	if (path->slots[level] < push_items) {
		path->slots[level] += left_nritems;
276 277
		tree_block_release(root, path->nodes[level]);
		path->nodes[level] = t;
278 279 280
		path->slots[level + 1] -= 1;
	} else {
		path->slots[level] -= push_items;
281
		tree_block_release(root, t);
282 283 284 285
	}
	return 0;
}

C
Chris Mason 已提交
286 287 288 289 290 291 292 293 294 295
/*
 * try to push data from one node into the next node right in the
 * tree.  The src node is found at specified level in the path.
 * If some bytes were pushed, return 0, otherwise return 1.
 *
 * Lower nodes/leaves in the path are not touched, higher nodes may
 * be modified to reflect the push.
 *
 * The path is altered to reflect the push.
 */
296 297 298
int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
299 300
	struct tree_buffer *t;
	struct tree_buffer *src_buffer;
301 302 303 304 305 306
	struct node *dst;
	struct node *src;
	int push_items = 0;
	int dst_nritems;
	int src_nritems;

C
Chris Mason 已提交
307
	/* can't push from the root */
308 309
	if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
		return 1;
C
Chris Mason 已提交
310 311

	/* only try to push inside the node higher up */
312 313 314 315
	slot = path->slots[level + 1];
	if (slot == NODEPTRS_PER_BLOCK - 1)
		return 1;

316
	if (slot >= path->nodes[level + 1]->node.header.nritems -1)
317 318
		return 1;

319 320 321 322 323
	t = read_tree_block(root,
			    path->nodes[level + 1]->node.blockptrs[slot + 1]);
	dst = &t->node;
	src_buffer = path->nodes[level];
	src = &src_buffer->node;
324 325 326
	dst_nritems = dst->header.nritems;
	src_nritems = src->header.nritems;
	push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
327 328
	if (push_items <= 0) {
		tree_block_release(root, t);
329
		return 1;
330
	}
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347

	if (src_nritems < push_items)
		push_items = src_nritems;
	memmove(dst->keys + push_items, dst->keys,
		dst_nritems * sizeof(struct key));
	memcpy(dst->keys, src->keys + src_nritems - push_items,
		push_items * sizeof(struct key));

	memmove(dst->blockptrs + push_items, dst->blockptrs,
		dst_nritems * sizeof(u64));
	memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
		push_items * sizeof(u64));

	src->header.nritems -= push_items;
	dst->header.nritems += push_items;

	/* adjust the pointers going up the tree */
348
	memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
349
		dst->keys, sizeof(struct key));
350 351 352 353 354

	write_tree_block(root, path->nodes[level + 1]);
	write_tree_block(root, t);
	write_tree_block(root, src_buffer);

C
Chris Mason 已提交
355
	/* then fixup the pointers in the path */
356 357
	if (path->slots[level] >= src->header.nritems) {
		path->slots[level] -= src->header.nritems;
358 359
		tree_block_release(root, path->nodes[level]);
		path->nodes[level] = t;
360
		path->slots[level + 1] += 1;
361 362
	} else {
		tree_block_release(root, t);
363 364 365 366
	}
	return 0;
}

C
Chris Mason 已提交
367 368 369 370 371
/*
 * helper function to insert a new root level in the tree.
 * A new node is allocated, and a single item is inserted to
 * point to the existing root
 */
C
Chris Mason 已提交
372 373
static int insert_new_root(struct ctree_root *root,
			   struct ctree_path *path, int level)
C
Chris Mason 已提交
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406
{
	struct tree_buffer *t;
	struct node *lower;
	struct node *c;
	struct key *lower_key;

	BUG_ON(path->nodes[level]);
	BUG_ON(path->nodes[level-1] != root->node);

	t = alloc_free_block(root);
	c = &t->node;
	memset(c, 0, sizeof(c));
	c->header.nritems = 1;
	c->header.flags = node_level(level);
	c->header.blocknr = t->blocknr;
	c->header.parentid = root->node->node.header.parentid;
	lower = &path->nodes[level-1]->node;
	if (is_leaf(lower->header.flags))
		lower_key = &((struct leaf *)lower)->items[0].key;
	else
		lower_key = lower->keys;
	memcpy(c->keys, lower_key, sizeof(struct key));
	c->blockptrs[0] = path->nodes[level-1]->blocknr;
	/* the super has an extra ref to root->node */
	tree_block_release(root, root->node);
	root->node = t;
	t->count++;
	write_tree_block(root, t);
	path->nodes[level] = t;
	path->slots[level] = 0;
	return 0;
}

C
Chris Mason 已提交
407 408 409
/*
 * worker function to insert a single pointer in a node.
 * the node should have enough room for the pointer already
C
Chris Mason 已提交
410
 *
C
Chris Mason 已提交
411 412 413
 * slot and level indicate where you want the key to go, and
 * blocknr is the block the key points to.
 */
C
Chris Mason 已提交
414
int insert_ptr(struct ctree_root *root,
C
Chris Mason 已提交
415 416 417 418 419
		struct ctree_path *path, struct key *key,
		u64 blocknr, int slot, int level)
{
	struct node *lower;
	int nritems;
C
Chris Mason 已提交
420 421

	BUG_ON(!path->nodes[level]);
C
Chris Mason 已提交
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
	lower = &path->nodes[level]->node;
	nritems = lower->header.nritems;
	if (slot > nritems)
		BUG();
	if (nritems == NODEPTRS_PER_BLOCK)
		BUG();
	if (slot != nritems) {
		memmove(lower->keys + slot + 1, lower->keys + slot,
			(nritems - slot) * sizeof(struct key));
		memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
			(nritems - slot) * sizeof(u64));
	}
	memcpy(lower->keys + slot, key, sizeof(struct key));
	lower->blockptrs[slot] = blocknr;
	lower->header.nritems++;
	if (lower->keys[1].objectid == 0)
			BUG();
	write_tree_block(root, path->nodes[level]);
	return 0;
}

C
Chris Mason 已提交
443 444 445 446 447 448 449
/*
 * split the node at the specified level in path in two.
 * The path is corrected to point to the appropriate node after the split
 *
 * Before splitting this tries to make some room in the node by pushing
 * left and right, if either one works, it returns right away.
 */
C
Chris Mason 已提交
450
int split_node(struct ctree_root *root, struct ctree_path *path, int level)
451
{
C
Chris Mason 已提交
452 453 454 455
	struct tree_buffer *t;
	struct node *c;
	struct tree_buffer *split_buffer;
	struct node *split;
456
	int mid;
C
Chris Mason 已提交
457
	int ret;
458

C
Chris Mason 已提交
459 460 461 462 463 464 465 466 467 468 469 470 471
	ret = push_node_left(root, path, level);
	if (!ret)
		return 0;
	ret = push_node_right(root, path, level);
	if (!ret)
		return 0;
	t = path->nodes[level];
	c = &t->node;
	if (t == root->node) {
		/* trying to split the root, lets make a new one */
		ret = insert_new_root(root, path, level + 1);
		if (ret)
			return ret;
472
	}
C
Chris Mason 已提交
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
	split_buffer = alloc_free_block(root);
	split = &split_buffer->node;
	split->header.flags = c->header.flags;
	split->header.blocknr = split_buffer->blocknr;
	split->header.parentid = root->node->node.header.parentid;
	mid = (c->header.nritems + 1) / 2;
	memcpy(split->keys, c->keys + mid,
		(c->header.nritems - mid) * sizeof(struct key));
	memcpy(split->blockptrs, c->blockptrs + mid,
		(c->header.nritems - mid) * sizeof(u64));
	split->header.nritems = c->header.nritems - mid;
	c->header.nritems = mid;
	write_tree_block(root, t);
	write_tree_block(root, split_buffer);
	insert_ptr(root, path, split->keys, split_buffer->blocknr,
		     path->slots[level + 1] + 1, level + 1);
C
Chris Mason 已提交
489
	if (path->slots[level] >= mid) {
C
Chris Mason 已提交
490 491 492 493 494 495
		path->slots[level] -= mid;
		tree_block_release(root, t);
		path->nodes[level] = split_buffer;
		path->slots[level + 1] += 1;
	} else {
		tree_block_release(root, split_buffer);
496
	}
C
Chris Mason 已提交
497
	return 0;
498 499
}

C
Chris Mason 已提交
500 501 502 503 504
/*
 * how many bytes are required to store the items in a leaf.  start
 * and nr indicate which items in the leaf to check.  This totals up the
 * space used both by the item structs and the item data
 */
505 506 507 508 509 510 511 512 513 514 515 516 517
int leaf_space_used(struct leaf *l, int start, int nr)
{
	int data_len;
	int end = start + nr - 1;

	if (!nr)
		return 0;
	data_len = l->items[start].offset + l->items[start].size;
	data_len = data_len - l->items[end].offset;
	data_len += sizeof(struct item) * nr;
	return data_len;
}

C
Chris Mason 已提交
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
/*
 * push some data in the path leaf to the right, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
int push_leaf_right(struct ctree_root *root, struct ctree_path *path,
		   int data_size)
{
	struct tree_buffer *left_buf = path->nodes[0];
	struct leaf *left = &left_buf->leaf;
	struct leaf *right;
	struct tree_buffer *right_buf;
	struct tree_buffer *upper;
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
	struct item *item;

	slot = path->slots[1];
	if (!path->nodes[1]) {
		return 1;
	}
	upper = path->nodes[1];
	if (slot >= upper->node.header.nritems - 1) {
		return 1;
	}
	right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]);
	right = &right_buf->leaf;
	free_space = leaf_free_space(right);
	if (free_space < data_size + sizeof(struct item)) {
		tree_block_release(root, right_buf);
		return 1;
	}
	for (i = left->header.nritems - 1; i >= 0; i--) {
		item = left->items + i;
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
		if (item->size + sizeof(*item) + push_space > free_space)
			break;
		push_items++;
		push_space += item->size + sizeof(*item);
	}
	if (push_items == 0) {
		tree_block_release(root, right_buf);
		return 1;
	}
	/* push left to right */
	push_space = left->items[left->header.nritems - push_items].offset +
		     left->items[left->header.nritems - push_items].size;
	push_space -= leaf_data_end(left);
	/* make room in the right data area */
	memmove(right->data + leaf_data_end(right) - push_space,
		right->data + leaf_data_end(right),
		LEAF_DATA_SIZE - leaf_data_end(right));
	/* copy from the left data area */
	memcpy(right->data + LEAF_DATA_SIZE - push_space,
		left->data + leaf_data_end(left),
		push_space);
	memmove(right->items + push_items, right->items,
		right->header.nritems * sizeof(struct item));
	/* copy the items from left to right */
	memcpy(right->items, left->items + left->header.nritems - push_items,
		push_items * sizeof(struct item));

	/* update the item pointers */
	right->header.nritems += push_items;
	push_space = LEAF_DATA_SIZE;
	for (i = 0; i < right->header.nritems; i++) {
		right->items[i].offset = push_space - right->items[i].size;
		push_space = right->items[i].offset;
	}
	left->header.nritems -= push_items;

	write_tree_block(root, left_buf);
	write_tree_block(root, right_buf);
	memcpy(upper->node.keys + slot + 1,
		&right->items[0].key, sizeof(struct key));
	write_tree_block(root, upper);
	/* then fixup the leaf pointer in the path */
	if (path->slots[0] >= left->header.nritems) {
		path->slots[0] -= left->header.nritems;
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = right_buf;
		path->slots[1] += 1;
	} else {
		tree_block_release(root, right_buf);
	}
	return 0;
}
C
Chris Mason 已提交
608 609 610 611
/*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
612 613 614
int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
		   int data_size)
{
615 616 617
	struct tree_buffer *right_buf = path->nodes[0];
	struct leaf *right = &right_buf->leaf;
	struct tree_buffer *t;
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633
	struct leaf *left;
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
	struct item *item;
	int old_left_nritems;

	slot = path->slots[1];
	if (slot == 0) {
		return 1;
	}
	if (!path->nodes[1]) {
		return 1;
	}
634 635
	t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
	left = &t->leaf;
636 637
	free_space = leaf_free_space(left);
	if (free_space < data_size + sizeof(struct item)) {
638
		tree_block_release(root, t);
639 640 641 642 643 644 645 646 647 648 649 650
		return 1;
	}
	for (i = 0; i < right->header.nritems; i++) {
		item = right->items + i;
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
		if (item->size + sizeof(*item) + push_space > free_space)
			break;
		push_items++;
		push_space += item->size + sizeof(*item);
	}
	if (push_items == 0) {
651
		tree_block_release(root, t);
652 653 654 655 656 657 658 659 660 661
		return 1;
	}
	/* push data from right to left */
	memcpy(left->items + left->header.nritems,
		right->items, push_items * sizeof(struct item));
	push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
	memcpy(left->data + leaf_data_end(left) - push_space,
		right->data + right->items[push_items - 1].offset,
		push_space);
	old_left_nritems = left->header.nritems;
662 663
	BUG_ON(old_left_nritems < 0);

664 665 666 667 668 669 670 671 672 673 674 675 676 677
	for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
		left->items[i].offset -= LEAF_DATA_SIZE -
			left->items[old_left_nritems -1].offset;
	}
	left->header.nritems += push_items;

	/* fixup right node */
	push_space = right->items[push_items-1].offset - leaf_data_end(right);
	memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
		leaf_data_end(right), push_space);
	memmove(right->items, right->items + push_items,
		(right->header.nritems - push_items) * sizeof(struct item));
	right->header.nritems -= push_items;
	push_space = LEAF_DATA_SIZE;
678

679 680 681 682
	for (i = 0; i < right->header.nritems; i++) {
		right->items[i].offset = push_space - right->items[i].size;
		push_space = right->items[i].offset;
	}
683 684 685 686 687

	write_tree_block(root, t);
	write_tree_block(root, right_buf);

	fixup_low_keys(root, path, &right->items[0].key, 1);
688 689 690 691

	/* then fixup the leaf pointer in the path */
	if (path->slots[0] < push_items) {
		path->slots[0] += old_left_nritems;
692 693
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = t;
694 695
		path->slots[1] -= 1;
	} else {
696
		tree_block_release(root, t);
697 698
		path->slots[0] -= push_items;
	}
699
	BUG_ON(path->slots[0] < 0);
700 701 702
	return 0;
}

C
Chris Mason 已提交
703 704 705 706
/*
 * split the path's leaf in two, making sure there is at least data_size
 * available for the resulting leaf level of the path.
 */
707 708
int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
{
709 710 711 712 713
	struct tree_buffer *l_buf = path->nodes[0];
	struct leaf *l = &l_buf->leaf;
	int nritems;
	int mid;
	int slot;
714
	struct leaf *right;
715
	struct tree_buffer *right_buffer;
716 717 718 719 720 721
	int space_needed = data_size + sizeof(struct item);
	int data_copy_size;
	int rt_data_off;
	int i;
	int ret;

C
Chris Mason 已提交
722 723
	if (push_leaf_left(root, path, data_size) == 0 ||
	    push_leaf_right(root, path, data_size) == 0) {
724 725 726 727
		l_buf = path->nodes[0];
		l = &l_buf->leaf;
		if (leaf_free_space(l) >= sizeof(struct item) + data_size)
			return 0;
728
	}
C
Chris Mason 已提交
729 730 731 732 733
	if (!path->nodes[1]) {
		ret = insert_new_root(root, path, 1);
		if (ret)
			return ret;
	}
734 735 736 737 738 739 740 741
	slot = path->slots[0];
	nritems = l->header.nritems;
	mid = (nritems + 1)/ 2;

	right_buffer = alloc_free_block(root);
	BUG_ON(!right_buffer);
	BUG_ON(mid == nritems);
	right = &right_buffer->leaf;
742 743
	memset(right, 0, sizeof(*right));
	if (mid <= slot) {
C
Chris Mason 已提交
744
		/* FIXME, just alloc a new leaf here */
745 746 747 748
		if (leaf_space_used(l, mid, nritems - mid) + space_needed >
			LEAF_DATA_SIZE)
			BUG();
	} else {
C
Chris Mason 已提交
749
		/* FIXME, just alloc a new leaf here */
750 751 752 753 754
		if (leaf_space_used(l, 0, mid + 1) + space_needed >
			LEAF_DATA_SIZE)
			BUG();
	}
	right->header.nritems = nritems - mid;
755 756
	right->header.blocknr = right_buffer->blocknr;
	right->header.flags = node_level(0);
C
Chris Mason 已提交
757
	right->header.parentid = root->node->node.header.parentid;
758 759 760 761 762 763 764 765
	data_copy_size = l->items[mid].offset + l->items[mid].size -
			 leaf_data_end(l);
	memcpy(right->items, l->items + mid,
	       (nritems - mid) * sizeof(struct item));
	memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
	       l->data + leaf_data_end(l), data_copy_size);
	rt_data_off = LEAF_DATA_SIZE -
		     (l->items[mid].offset + l->items[mid].size);
C
Chris Mason 已提交
766 767

	for (i = 0; i < right->header.nritems; i++)
768
		right->items[i].offset += rt_data_off;
C
Chris Mason 已提交
769

770 771
	l->header.nritems = mid;
	ret = insert_ptr(root, path, &right->items[0].key,
C
Chris Mason 已提交
772
			  right_buffer->blocknr, path->slots[1] + 1, 1);
773 774 775 776
	write_tree_block(root, right_buffer);
	write_tree_block(root, l_buf);

	BUG_ON(path->slots[0] != slot);
777
	if (mid <= slot) {
778 779
		tree_block_release(root, path->nodes[0]);
		path->nodes[0] = right_buffer;
780 781
		path->slots[0] -= mid;
		path->slots[1] += 1;
782 783 784
	} else
		tree_block_release(root, right_buffer);
	BUG_ON(path->slots[0] < 0);
785 786 787
	return ret;
}

C
Chris Mason 已提交
788 789 790 791
/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
792 793 794 795 796
int insert_item(struct ctree_root *root, struct key *key,
			  void *data, int data_size)
{
	int ret;
	int slot;
797
	int slot_orig;
798
	struct leaf *leaf;
799
	struct tree_buffer *leaf_buf;
800 801 802 803
	unsigned int nritems;
	unsigned int data_end;
	struct ctree_path path;

C
Chris Mason 已提交
804
	/* create a root if there isn't one */
C
Chris Mason 已提交
805
	if (!root->node)
C
Chris Mason 已提交
806
		BUG();
807
	init_path(&path);
C
Chris Mason 已提交
808
	ret = search_slot(root, key, &path, data_size);
809 810
	if (ret == 0) {
		release_path(root, &path);
811
		return -EEXIST;
812
	}
813

814 815 816
	slot_orig = path.slots[0];
	leaf_buf = path.nodes[0];
	leaf = &leaf_buf->leaf;
C
Chris Mason 已提交
817

818 819
	nritems = leaf->header.nritems;
	data_end = leaf_data_end(leaf);
820

821 822 823 824
	if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
		BUG();

	slot = path.slots[0];
825
	BUG_ON(slot < 0);
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846
	if (slot != nritems) {
		int i;
		unsigned int old_data = leaf->items[slot].offset +
					leaf->items[slot].size;

		/*
		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
		 */
		/* first correct the data pointers */
		for (i = slot; i < nritems; i++)
			leaf->items[i].offset -= data_size;

		/* shift the items */
		memmove(leaf->items + slot + 1, leaf->items + slot,
		        (nritems - slot) * sizeof(struct item));

		/* shift the data */
		memmove(leaf->data + data_end - data_size, leaf->data +
		        data_end, old_data - data_end);
		data_end = old_data;
	}
C
Chris Mason 已提交
847
	/* copy the new data in */
848 849 850 851 852
	memcpy(&leaf->items[slot].key, key, sizeof(struct key));
	leaf->items[slot].offset = data_end - data_size;
	leaf->items[slot].size = data_size;
	memcpy(leaf->data + data_end - data_size, data, data_size);
	leaf->header.nritems += 1;
853
	write_tree_block(root, leaf_buf);
854 855
	if (slot == 0)
		fixup_low_keys(root, &path, key, 1);
856 857
	if (leaf_free_space(leaf) < 0)
		BUG();
858
	release_path(root, &path);
859 860 861
	return 0;
}

C
Chris Mason 已提交
862
/*
C
Chris Mason 已提交
863
 * delete the pointer from a given node.
C
Chris Mason 已提交
864 865 866 867 868
 *
 * If the delete empties a node, the node is removed from the tree,
 * continuing all the way the root if required.  The root is converted into
 * a leaf if all the nodes are emptied.
 */
869 870 871
int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
{
	int slot;
872
	struct tree_buffer *t;
873 874
	struct node *node;
	int nritems;
875
	u64 blocknr;
876 877

	while(1) {
878 879
		t = path->nodes[level];
		if (!t)
880
			break;
881
		node = &t->node;
882 883 884 885 886 887 888 889 890 891 892
		slot = path->slots[level];
		nritems = node->header.nritems;

		if (slot != nritems -1) {
			memmove(node->keys + slot, node->keys + slot + 1,
				sizeof(struct key) * (nritems - slot - 1));
			memmove(node->blockptrs + slot,
				node->blockptrs + slot + 1,
				sizeof(u64) * (nritems - slot - 1));
		}
		node->header.nritems--;
893
		blocknr = t->blocknr;
894
		write_tree_block(root, t);
895
		if (node->header.nritems != 0) {
896
			int tslot;
897
			if (slot == 0)
898 899
				fixup_low_keys(root, path, node->keys,
					       level + 1);
900 901 902 903 904 905 906 907 908 909
			tslot = path->slots[level + 1];
			t->count++;
			if (push_node_left(root, path, level))
				push_node_right(root, path, level);
			path->slots[level + 1] = tslot;
			if (node->header.nritems != 0) {
				tree_block_release(root, t);
				break;
			}
			tree_block_release(root, t);
910
		}
911 912 913 914
		if (t == root->node) {
			/* just turn the root into a leaf and break */
			root->node->node.header.flags = node_level(0);
			write_tree_block(root, t);
915 916 917
			break;
		}
		level++;
918
		free_extent(root, blocknr, 1);
919 920 921 922 923 924
		if (!path->nodes[level])
			BUG();
	}
	return 0;
}

C
Chris Mason 已提交
925 926 927 928
/*
 * delete the item at the leaf level in path.  If that empties
 * the leaf, remove it from the tree
 */
929
int del_item(struct ctree_root *root, struct ctree_path *path)
930 931 932
{
	int slot;
	struct leaf *leaf;
933
	struct tree_buffer *leaf_buf;
934 935 936
	int doff;
	int dsize;

937 938
	leaf_buf = path->nodes[0];
	leaf = &leaf_buf->leaf;
939
	slot = path->slots[0];
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
	doff = leaf->items[slot].offset;
	dsize = leaf->items[slot].size;

	if (slot != leaf->header.nritems - 1) {
		int i;
		int data_end = leaf_data_end(leaf);
		memmove(leaf->data + data_end + dsize,
			leaf->data + data_end,
			doff - data_end);
		for (i = slot + 1; i < leaf->header.nritems; i++)
			leaf->items[i].offset += dsize;
		memmove(leaf->items + slot, leaf->items + slot + 1,
			sizeof(struct item) *
			(leaf->header.nritems - slot - 1));
	}
	leaf->header.nritems -= 1;
C
Chris Mason 已提交
956
	/* delete the leaf if we've emptied it */
957
	if (leaf->header.nritems == 0) {
958 959 960
		if (leaf_buf == root->node) {
			leaf->header.flags = node_level(0);
			write_tree_block(root, leaf_buf);
961
		} else {
962
			del_ptr(root, path, 1);
963 964
			free_extent(root, leaf_buf->blocknr, 1);
		}
965
	} else {
C
Chris Mason 已提交
966
		int used = leaf_space_used(leaf, 0, leaf->header.nritems);
967
		if (slot == 0)
968 969
			fixup_low_keys(root, path, &leaf->items[0].key, 1);
		write_tree_block(root, leaf_buf);
C
Chris Mason 已提交
970
		/* delete the leaf if it is mostly empty */
C
Chris Mason 已提交
971
		if (used < LEAF_DATA_SIZE / 3) {
972 973 974 975
			/* push_leaf_left fixes the path.
			 * make sure the path still points to our leaf
			 * for possible call to del_ptr below
			 */
976
			slot = path->slots[1];
977
			leaf_buf->count++;
978
			push_leaf_left(root, path, 1);
C
Chris Mason 已提交
979 980
			if (leaf->header.nritems)
				push_leaf_right(root, path, 1);
981
			if (leaf->header.nritems == 0) {
C
Chris Mason 已提交
982
				u64 blocknr = leaf_buf->blocknr;
983 984
				path->slots[1] = slot;
				del_ptr(root, path, 1);
C
Chris Mason 已提交
985 986 987 988
				tree_block_release(root, leaf_buf);
				free_extent(root, blocknr, 1);
			} else {
				tree_block_release(root, leaf_buf);
989 990 991 992 993 994
			}
		}
	}
	return 0;
}

C
Chris Mason 已提交
995 996 997 998
/*
 * walk up the tree as far as required to find the next leaf.
 * returns 0 if it found something or -1 if there are no greater leaves.
 */
999 1000 1001 1002 1003 1004
int next_leaf(struct ctree_root *root, struct ctree_path *path)
{
	int slot;
	int level = 1;
	u64 blocknr;
	struct tree_buffer *c;
C
Chris Mason 已提交
1005
	struct tree_buffer *next = NULL;
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016

	while(level < MAX_LEVEL) {
		if (!path->nodes[level])
			return -1;
		slot = path->slots[level] + 1;
		c = path->nodes[level];
		if (slot >= c->node.header.nritems) {
			level++;
			continue;
		}
		blocknr = c->node.blockptrs[slot];
C
Chris Mason 已提交
1017 1018
		if (next)
			tree_block_release(root, next);
1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
		next = read_tree_block(root, blocknr);
		break;
	}
	path->slots[level] = slot;
	while(1) {
		level--;
		c = path->nodes[level];
		tree_block_release(root, c);
		path->nodes[level] = next;
		path->slots[level] = 0;
		if (!level)
			break;
		next = read_tree_block(root, next->node.blockptrs[0]);
	}
	return 0;
}

1036 1037
/* some sample code to insert,search & delete items */
#if 0
1038 1039
/* for testing only */
int next_key(int i, int max_key) {
C
Chris Mason 已提交
1040
	return rand() % max_key;
C
Chris Mason 已提交
1041
	//return i;
1042 1043 1044
}
int main() {
	struct key ins;
1045
	struct key last = { (u64)-1, 0, 0};
1046 1047 1048 1049
	char *buf;
	int i;
	int num;
	int ret;
C
Chris Mason 已提交
1050 1051
	int run_size = 20000000;
	int max_key =  100000000;
1052 1053
	int tree_size = 0;
	struct ctree_path path;
C
Chris Mason 已提交
1054
	struct ctree_super_block super;
1055
	struct ctree_root *root;
1056

1057 1058 1059
	radix_tree_init();


C
Chris Mason 已提交
1060
	root = open_ctree("dbfile", &super);
1061 1062 1063 1064 1065 1066
	srand(55);
	for (i = 0; i < run_size; i++) {
		buf = malloc(64);
		num = next_key(i, max_key);
		// num = i;
		sprintf(buf, "string-%d", num);
C
Chris Mason 已提交
1067
		if (i % 10000 == 0)
C
Chris Mason 已提交
1068
			fprintf(stderr, "insert %d:%d\n", num, i);
1069 1070 1071
		ins.objectid = num;
		ins.offset = 0;
		ins.flags = 0;
1072
		ret = insert_item(root, &ins, buf, strlen(buf));
1073 1074
		if (!ret)
			tree_size++;
C
Chris Mason 已提交
1075
		free(buf);
1076
	}
C
Chris Mason 已提交
1077
	write_ctree_super(root, &super);
1078
	close_ctree(root);
C
Chris Mason 已提交
1079 1080

	root = open_ctree("dbfile", &super);
1081
	printf("starting search\n");
1082 1083 1084 1085 1086
	srand(55);
	for (i = 0; i < run_size; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1087
		if (i % 10000 == 0)
C
Chris Mason 已提交
1088
			fprintf(stderr, "search %d:%d\n", num, i);
C
Chris Mason 已提交
1089
		ret = search_slot(root, &ins, &path, 0);
1090
		if (ret) {
1091
			print_tree(root, root->node);
1092 1093 1094
			printf("unable to find %d\n", num);
			exit(1);
		}
1095 1096
		release_path(root, &path);
	}
C
Chris Mason 已提交
1097
	write_ctree_super(root, &super);
1098
	close_ctree(root);
C
Chris Mason 已提交
1099
	root = open_ctree("dbfile", &super);
1100 1101 1102 1103 1104
	printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
	        node_level(root->node->node.header.flags),
		root->node->node.header.nritems,
		NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
	printf("all searches good, deleting some items\n");
1105 1106
	i = 0;
	srand(55);
1107 1108 1109 1110
	for (i = 0 ; i < run_size/4; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1111 1112 1113
		ret = search_slot(root, &ins, &path, -1);
		if (!ret) {
			if (i % 10000 == 0)
C
Chris Mason 已提交
1114
				fprintf(stderr, "del %d:%d\n", num, i);
C
Chris Mason 已提交
1115 1116 1117 1118 1119
			ret = del_item(root, &path);
			if (ret != 0)
				BUG();
			tree_size--;
		}
1120
		release_path(root, &path);
1121
	}
C
Chris Mason 已提交
1122 1123 1124
	write_ctree_super(root, &super);
	close_ctree(root);
	root = open_ctree("dbfile", &super);
1125
	srand(128);
1126
	for (i = 0; i < run_size; i++) {
1127
		buf = malloc(64);
1128
		num = next_key(i, max_key);
1129
		sprintf(buf, "string-%d", num);
1130
		ins.objectid = num;
C
Chris Mason 已提交
1131
		if (i % 10000 == 0)
C
Chris Mason 已提交
1132
			fprintf(stderr, "insert %d:%d\n", num, i);
1133
		ret = insert_item(root, &ins, buf, strlen(buf));
1134 1135
		if (!ret)
			tree_size++;
C
Chris Mason 已提交
1136
		free(buf);
1137
	}
C
Chris Mason 已提交
1138
	write_ctree_super(root, &super);
1139
	close_ctree(root);
C
Chris Mason 已提交
1140
	root = open_ctree("dbfile", &super);
1141
	srand(128);
1142
	printf("starting search2\n");
1143 1144 1145 1146
	for (i = 0; i < run_size; i++) {
		num = next_key(i, max_key);
		ins.objectid = num;
		init_path(&path);
C
Chris Mason 已提交
1147
		if (i % 10000 == 0)
C
Chris Mason 已提交
1148
			fprintf(stderr, "search %d:%d\n", num, i);
C
Chris Mason 已提交
1149
		ret = search_slot(root, &ins, &path, 0);
1150 1151 1152 1153 1154 1155 1156 1157 1158
		if (ret) {
			print_tree(root, root->node);
			printf("unable to find %d\n", num);
			exit(1);
		}
		release_path(root, &path);
	}
	printf("starting big long delete run\n");
	while(root->node && root->node->node.header.nritems > 0) {
1159 1160 1161 1162
		struct leaf *leaf;
		int slot;
		ins.objectid = (u64)-1;
		init_path(&path);
C
Chris Mason 已提交
1163
		ret = search_slot(root, &ins, &path, -1);
1164 1165 1166
		if (ret == 0)
			BUG();

1167
		leaf = &path.nodes[0]->leaf;
1168 1169 1170 1171 1172 1173
		slot = path.slots[0];
		if (slot != leaf->header.nritems)
			BUG();
		while(path.slots[0] > 0) {
			path.slots[0] -= 1;
			slot = path.slots[0];
1174
			leaf = &path.nodes[0]->leaf;
1175 1176 1177 1178

			if (comp_keys(&last, &leaf->items[slot].key) <= 0)
				BUG();
			memcpy(&last, &leaf->items[slot].key, sizeof(last));
C
Chris Mason 已提交
1179 1180
			if (tree_size % 10000 == 0)
				printf("big del %d:%d\n", tree_size, i);
1181 1182 1183
			ret = del_item(root, &path);
			if (ret != 0) {
				printf("del_item returned %d\n", ret);
1184
				BUG();
1185
			}
1186 1187
			tree_size--;
		}
1188
		release_path(root, &path);
1189
	}
1190
	printf("tree size is now %d\n", tree_size);
1191
	printf("map tree\n");
C
Chris Mason 已提交
1192
	print_tree(root->extent_root, root->extent_root->node);
C
Chris Mason 已提交
1193 1194
	write_ctree_super(root, &super);
	close_ctree(root);
1195 1196
	return 0;
}
1197
#endif