ctree.c 84.9 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

19
#include <linux/sched.h>
20 21
#include "ctree.h"
#include "disk-io.h"
22
#include "transaction.h"
23
#include "print-tree.h"
24
#include "locking.h"
25

26 27 28
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, int level);
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
29
		      *root, struct btrfs_key *ins_key,
30
		      struct btrfs_path *path, int data_size, int extend);
31 32
static int push_node_left(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct extent_buffer *dst,
33
			  struct extent_buffer *src, int empty);
34 35 36 37
static int balance_node_right(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct extent_buffer *dst_buf,
			      struct extent_buffer *src_buf);
38 39
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path, int level, int slot);
40

C
Chris Mason 已提交
41
inline void btrfs_init_path(struct btrfs_path *p)
C
Chris Mason 已提交
42
{
C
Chris Mason 已提交
43
	memset(p, 0, sizeof(*p));
C
Chris Mason 已提交
44 45
}

C
Chris Mason 已提交
46
struct btrfs_path *btrfs_alloc_path(void)
C
Chris Mason 已提交
47
{
C
Chris Mason 已提交
48 49
	struct btrfs_path *path;
	path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
50
	if (path) {
C
Chris Mason 已提交
51
		btrfs_init_path(path);
52 53
		path->reada = 1;
	}
C
Chris Mason 已提交
54
	return path;
C
Chris Mason 已提交
55 56
}

C
Chris Mason 已提交
57
void btrfs_free_path(struct btrfs_path *p)
58
{
C
Chris Mason 已提交
59 60
	btrfs_release_path(NULL, p);
	kmem_cache_free(btrfs_path_cachep, p);
61 62
}

C
Chris Mason 已提交
63
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
64 65
{
	int i;
66

C
Chris Mason 已提交
67
	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
68
		p->slots[i] = 0;
69
		if (!p->nodes[i])
70 71 72 73 74
			continue;
		if (p->locks[i]) {
			btrfs_tree_unlock(p->nodes[i]);
			p->locks[i] = 0;
		}
75
		free_extent_buffer(p->nodes[i]);
76
		p->nodes[i] = NULL;
77 78 79
	}
}

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
{
	struct extent_buffer *eb;
	spin_lock(&root->node_lock);
	eb = root->node;
	extent_buffer_get(eb);
	spin_unlock(&root->node_lock);
	return eb;
}

struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
{
	struct extent_buffer *eb;

	while(1) {
		eb = btrfs_root_node(root);
		btrfs_tree_lock(eb);

		spin_lock(&root->node_lock);
		if (eb == root->node) {
			spin_unlock(&root->node_lock);
			break;
		}
		spin_unlock(&root->node_lock);

		btrfs_tree_unlock(eb);
		free_extent_buffer(eb);
	}
	return eb;
}

111 112 113 114 115 116 117 118
static void add_root_to_dirty_list(struct btrfs_root *root)
{
	if (root->track_dirty && list_empty(&root->dirty_list)) {
		list_add(&root->dirty_list,
			 &root->fs_info->dirty_cowonly_roots);
	}
}

119 120 121 122 123 124 125 126 127 128
int btrfs_copy_root(struct btrfs_trans_handle *trans,
		      struct btrfs_root *root,
		      struct extent_buffer *buf,
		      struct extent_buffer **cow_ret, u64 new_root_objectid)
{
	struct extent_buffer *cow;
	u32 nritems;
	int ret = 0;
	int level;
	struct btrfs_key first_key;
129
	struct btrfs_root *new_root;
130

131 132 133 134 135 136
	new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
	if (!new_root)
		return -ENOMEM;

	memcpy(new_root, root, sizeof(*new_root));
	new_root->root_key.objectid = new_root_objectid;
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151

	WARN_ON(root->ref_cows && trans->transid !=
		root->fs_info->running_transaction->transid);
	WARN_ON(root->ref_cows && trans->transid != root->last_trans);

	level = btrfs_header_level(buf);
	nritems = btrfs_header_nritems(buf);
	if (nritems) {
		if (level == 0)
			btrfs_item_key_to_cpu(buf, &first_key, 0);
		else
			btrfs_node_key_to_cpu(buf, &first_key, 0);
	} else {
		first_key.objectid = 0;
	}
152
	cow = btrfs_alloc_free_block(trans, new_root, buf->len,
153 154 155
				       new_root_objectid,
				       trans->transid, first_key.objectid,
				       level, buf->start, 0);
156 157
	if (IS_ERR(cow)) {
		kfree(new_root);
158
		return PTR_ERR(cow);
159
	}
160 161 162 163 164

	copy_extent_buffer(cow, buf, 0, 0, cow->len);
	btrfs_set_header_bytenr(cow, cow->start);
	btrfs_set_header_generation(cow, trans->transid);
	btrfs_set_header_owner(cow, new_root_objectid);
165
	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
166 167

	WARN_ON(btrfs_header_generation(buf) > trans->transid);
Y
Yan Zheng 已提交
168
	ret = btrfs_inc_ref(trans, new_root, buf, 0);
169 170
	kfree(new_root);

171 172 173 174 175 176 177 178 179
	if (ret)
		return ret;

	btrfs_mark_buffer_dirty(cow);
	*cow_ret = cow;
	return 0;
}

int __btrfs_cow_block(struct btrfs_trans_handle *trans,
180 181 182 183
			     struct btrfs_root *root,
			     struct extent_buffer *buf,
			     struct extent_buffer *parent, int parent_slot,
			     struct extent_buffer **cow_ret,
184 185
			     u64 search_start, u64 empty_size,
			     u64 prealloc_dest)
C
Chris Mason 已提交
186
{
187
	u64 root_gen;
188
	struct extent_buffer *cow;
189
	u32 nritems;
190 191
	int ret = 0;
	int different_trans = 0;
192
	int level;
193
	int unlock_orig = 0;
194 195
	struct btrfs_key first_key;

196 197 198 199 200
	if (*cow_ret == buf)
		unlock_orig = 1;

	WARN_ON(!btrfs_tree_locked(buf));

201 202 203 204 205 206 207
	if (root->ref_cows) {
		root_gen = trans->transid;
	} else {
		root_gen = 0;
	}
	WARN_ON(root->ref_cows && trans->transid !=
		root->fs_info->running_transaction->transid);
208
	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
209

210 211 212 213 214 215 216 217 218 219
	level = btrfs_header_level(buf);
	nritems = btrfs_header_nritems(buf);
	if (nritems) {
		if (level == 0)
			btrfs_item_key_to_cpu(buf, &first_key, 0);
		else
			btrfs_node_key_to_cpu(buf, &first_key, 0);
	} else {
		first_key.objectid = 0;
	}
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
	if (prealloc_dest) {
		struct btrfs_key ins;

		ins.objectid = prealloc_dest;
		ins.offset = buf->len;
		ins.type = BTRFS_EXTENT_ITEM_KEY;

		ret = btrfs_alloc_reserved_extent(trans, root,
						  root->root_key.objectid,
						  root_gen, level,
						  first_key.objectid,
						  &ins);
		BUG_ON(ret);
		cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
					    buf->len);
	} else {
		cow = btrfs_alloc_free_block(trans, root, buf->len,
					     root->root_key.objectid,
					     root_gen, first_key.objectid,
					     level, search_start, empty_size);
	}
241 242
	if (IS_ERR(cow))
		return PTR_ERR(cow);
243

244
	copy_extent_buffer(cow, buf, 0, 0, cow->len);
245
	btrfs_set_header_bytenr(cow, cow->start);
246 247
	btrfs_set_header_generation(cow, trans->transid);
	btrfs_set_header_owner(cow, root->root_key.objectid);
248
	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
249

250 251
	WARN_ON(btrfs_header_generation(buf) > trans->transid);
	if (btrfs_header_generation(buf) != trans->transid) {
252
		different_trans = 1;
Y
Yan Zheng 已提交
253
		ret = btrfs_inc_ref(trans, root, buf, 1);
254 255 256 257 258 259
		if (ret)
			return ret;
	} else {
		clean_tree_block(trans, root, buf);
	}

C
Chris Mason 已提交
260
	if (buf == root->node) {
261
		WARN_ON(parent && parent != buf);
262
		root_gen = btrfs_header_generation(buf);
263 264

		spin_lock(&root->node_lock);
C
Chris Mason 已提交
265
		root->node = cow;
266
		extent_buffer_get(cow);
267 268
		spin_unlock(&root->node_lock);

C
Chris Mason 已提交
269
		if (buf != root->commit_root) {
270
			btrfs_free_extent(trans, root, buf->start,
271 272
					  buf->len, root->root_key.objectid,
					  root_gen, 0, 0, 1);
C
Chris Mason 已提交
273
		}
274
		free_extent_buffer(buf);
275
		add_root_to_dirty_list(root);
C
Chris Mason 已提交
276
	} else {
277
		root_gen = btrfs_header_generation(parent);
278
		btrfs_set_node_blockptr(parent, parent_slot,
279
					cow->start);
280 281 282
		WARN_ON(trans->transid == 0);
		btrfs_set_node_ptr_generation(parent, parent_slot,
					      trans->transid);
C
Chris Mason 已提交
283
		btrfs_mark_buffer_dirty(parent);
284
		WARN_ON(btrfs_header_generation(parent) != trans->transid);
285 286 287
		btrfs_free_extent(trans, root, buf->start, buf->len,
				  btrfs_header_owner(parent), root_gen,
				  0, 0, 1);
C
Chris Mason 已提交
288
	}
289 290
	if (unlock_orig)
		btrfs_tree_unlock(buf);
291
	free_extent_buffer(buf);
C
Chris Mason 已提交
292
	btrfs_mark_buffer_dirty(cow);
C
Chris Mason 已提交
293
	*cow_ret = cow;
C
Chris Mason 已提交
294 295 296
	return 0;
}

297 298 299
int btrfs_cow_block(struct btrfs_trans_handle *trans,
		    struct btrfs_root *root, struct extent_buffer *buf,
		    struct extent_buffer *parent, int parent_slot,
300
		    struct extent_buffer **cow_ret, u64 prealloc_dest)
301 302
{
	u64 search_start;
C
Chris Mason 已提交
303
	u64 header_trans;
304
	int ret;
C
Chris Mason 已提交
305

306 307 308 309 310 311 312 313 314 315
	if (trans->transaction != root->fs_info->running_transaction) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->running_transaction->transid);
		WARN_ON(1);
	}
	if (trans->transid != root->fs_info->generation) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->generation);
		WARN_ON(1);
	}
C
Chris Mason 已提交
316 317

	header_trans = btrfs_header_generation(buf);
318 319 320
	spin_lock(&root->fs_info->hash_lock);
	if (header_trans == trans->transid &&
	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
321
		*cow_ret = buf;
322
		spin_unlock(&root->fs_info->hash_lock);
323
		WARN_ON(prealloc_dest);
324 325
		return 0;
	}
326
	spin_unlock(&root->fs_info->hash_lock);
327
	search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
328
	ret = __btrfs_cow_block(trans, root, buf, parent,
329 330
				 parent_slot, cow_ret, search_start, 0,
				 prealloc_dest);
331
	return ret;
332 333
}

334
static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
335
{
336
	if (blocknr < other && other - (blocknr + blocksize) < 32768)
337
		return 1;
338
	if (blocknr > other && blocknr - (other + blocksize) < 32768)
339 340 341 342
		return 1;
	return 0;
}

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
/*
 * compare two keys in a memcmp fashion
 */
static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
{
	struct btrfs_key k1;

	btrfs_disk_key_to_cpu(&k1, disk);

	if (k1.objectid > k2->objectid)
		return 1;
	if (k1.objectid < k2->objectid)
		return -1;
	if (k1.type > k2->type)
		return 1;
	if (k1.type < k2->type)
		return -1;
	if (k1.offset > k2->offset)
		return 1;
	if (k1.offset < k2->offset)
		return -1;
	return 0;
}


368
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
369
		       struct btrfs_root *root, struct extent_buffer *parent,
370 371
		       int start_slot, int cache_only, u64 *last_ret,
		       struct btrfs_key *progress)
372
{
373
	struct extent_buffer *cur;
374
	u64 blocknr;
375
	u64 gen;
376 377
	u64 search_start = *last_ret;
	u64 last_block = 0;
378 379 380 381 382
	u64 other;
	u32 parent_nritems;
	int end_slot;
	int i;
	int err = 0;
383
	int parent_level;
384 385
	int uptodate;
	u32 blocksize;
386 387
	int progress_passed = 0;
	struct btrfs_disk_key disk_key;
388

389 390 391 392
	parent_level = btrfs_header_level(parent);
	if (cache_only && parent_level != 1)
		return 0;

393 394 395 396 397 398 399 400 401 402
	if (trans->transaction != root->fs_info->running_transaction) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->running_transaction->transid);
		WARN_ON(1);
	}
	if (trans->transid != root->fs_info->generation) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->generation);
		WARN_ON(1);
	}
403

404 405
	parent_nritems = btrfs_header_nritems(parent);
	blocksize = btrfs_level_size(root, parent_level - 1);
406 407 408 409 410 411 412
	end_slot = parent_nritems;

	if (parent_nritems == 1)
		return 0;

	for (i = start_slot; i < end_slot; i++) {
		int close = 1;
413

414 415 416 417 418 419 420 421
		if (!parent->map_token) {
			map_extent_buffer(parent,
					btrfs_node_key_ptr_offset(i),
					sizeof(struct btrfs_key_ptr),
					&parent->map_token, &parent->kaddr,
					&parent->map_start, &parent->map_len,
					KM_USER1);
		}
422 423 424 425 426
		btrfs_node_key(parent, &disk_key, i);
		if (!progress_passed && comp_keys(&disk_key, progress) < 0)
			continue;

		progress_passed = 1;
427
		blocknr = btrfs_node_blockptr(parent, i);
428
		gen = btrfs_node_ptr_generation(parent, i);
429 430
		if (last_block == 0)
			last_block = blocknr;
431

432
		if (i > 0) {
433 434
			other = btrfs_node_blockptr(parent, i - 1);
			close = close_blocks(blocknr, other, blocksize);
435
		}
C
Chris Mason 已提交
436
		if (!close && i < end_slot - 2) {
437 438
			other = btrfs_node_blockptr(parent, i + 1);
			close = close_blocks(blocknr, other, blocksize);
439
		}
440 441
		if (close) {
			last_block = blocknr;
442
			continue;
443
		}
444 445 446 447 448
		if (parent->map_token) {
			unmap_extent_buffer(parent, parent->map_token,
					    KM_USER1);
			parent->map_token = NULL;
		}
449

450 451
		cur = btrfs_find_tree_block(root, blocknr, blocksize);
		if (cur)
452
			uptodate = btrfs_buffer_uptodate(cur, gen);
453 454
		else
			uptodate = 0;
455
		if (!cur || !uptodate) {
456
			if (cache_only) {
457
				free_extent_buffer(cur);
458 459
				continue;
			}
460 461
			if (!cur) {
				cur = read_tree_block(root, blocknr,
462
							 blocksize, gen);
463
			} else if (!uptodate) {
464
				btrfs_read_buffer(cur, gen);
465
			}
466
		}
467
		if (search_start == 0)
468
			search_start = last_block;
469

470
		btrfs_tree_lock(cur);
471
		err = __btrfs_cow_block(trans, root, cur, parent, i,
472
					&cur, search_start,
473
					min(16 * blocksize,
474
					    (end_slot - i) * blocksize), 0);
Y
Yan 已提交
475
		if (err) {
476
			btrfs_tree_unlock(cur);
477
			free_extent_buffer(cur);
478
			break;
Y
Yan 已提交
479
		}
480 481
		search_start = cur->start;
		last_block = cur->start;
482
		*last_ret = search_start;
483 484
		btrfs_tree_unlock(cur);
		free_extent_buffer(cur);
485
	}
486 487 488 489 490
	if (parent->map_token) {
		unmap_extent_buffer(parent, parent->map_token,
				    KM_USER1);
		parent->map_token = NULL;
	}
491 492 493
	return err;
}

C
Chris Mason 已提交
494 495 496 497 498
/*
 * The leaf data grows from end-to-front in the node.
 * this returns the address of the start of the last item,
 * which is the stop of the leaf data stack
 */
C
Chris Mason 已提交
499
static inline unsigned int leaf_data_end(struct btrfs_root *root,
500
					 struct extent_buffer *leaf)
501
{
502
	u32 nr = btrfs_header_nritems(leaf);
503
	if (nr == 0)
C
Chris Mason 已提交
504
		return BTRFS_LEAF_DATA_SIZE(root);
505
	return btrfs_item_offset_nr(leaf, nr - 1);
506 507
}

C
Chris Mason 已提交
508 509
static int check_node(struct btrfs_root *root, struct btrfs_path *path,
		      int level)
C
Chris Mason 已提交
510
{
511 512 513 514
	struct extent_buffer *parent = NULL;
	struct extent_buffer *node = path->nodes[level];
	struct btrfs_disk_key parent_key;
	struct btrfs_disk_key node_key;
C
Chris Mason 已提交
515
	int parent_slot;
516 517
	int slot;
	struct btrfs_key cpukey;
518
	u32 nritems = btrfs_header_nritems(node);
C
Chris Mason 已提交
519 520

	if (path->nodes[level + 1])
521
		parent = path->nodes[level + 1];
A
Aneesh 已提交
522

523
	slot = path->slots[level];
524 525
	BUG_ON(nritems == 0);
	if (parent) {
A
Aneesh 已提交
526
		parent_slot = path->slots[level + 1];
527 528 529
		btrfs_node_key(parent, &parent_key, parent_slot);
		btrfs_node_key(node, &node_key, 0);
		BUG_ON(memcmp(&parent_key, &node_key,
C
Chris Mason 已提交
530
			      sizeof(struct btrfs_disk_key)));
531
		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
532
		       btrfs_header_bytenr(node));
C
Chris Mason 已提交
533
	}
C
Chris Mason 已提交
534
	BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
535
	if (slot != 0) {
536 537 538
		btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
		btrfs_node_key(node, &node_key, slot);
		BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
539 540
	}
	if (slot < nritems - 1) {
541 542 543
		btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
		btrfs_node_key(node, &node_key, slot);
		BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
C
Chris Mason 已提交
544 545 546 547
	}
	return 0;
}

C
Chris Mason 已提交
548 549
static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
		      int level)
C
Chris Mason 已提交
550
{
551 552
	struct extent_buffer *leaf = path->nodes[level];
	struct extent_buffer *parent = NULL;
C
Chris Mason 已提交
553
	int parent_slot;
554
	struct btrfs_key cpukey;
555 556 557
	struct btrfs_disk_key parent_key;
	struct btrfs_disk_key leaf_key;
	int slot = path->slots[0];
558

559
	u32 nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
560 561

	if (path->nodes[level + 1])
562
		parent = path->nodes[level + 1];
563 564 565 566 567

	if (nritems == 0)
		return 0;

	if (parent) {
A
Aneesh 已提交
568
		parent_slot = path->slots[level + 1];
569 570
		btrfs_node_key(parent, &parent_key, parent_slot);
		btrfs_item_key(leaf, &leaf_key, 0);
571

572
		BUG_ON(memcmp(&parent_key, &leaf_key,
C
Chris Mason 已提交
573
		       sizeof(struct btrfs_disk_key)));
574
		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
575
		       btrfs_header_bytenr(leaf));
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
	}
#if 0
	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
		btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
		btrfs_item_key(leaf, &leaf_key, i);
		if (comp_keys(&leaf_key, &cpukey) >= 0) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad key\n", i);
			BUG_ON(1);
		}
		if (btrfs_item_offset_nr(leaf, i) !=
			btrfs_item_end_nr(leaf, i + 1)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", i);
			BUG_ON(1);
		}
		if (i == 0) {
			if (btrfs_item_offset_nr(leaf, i) +
			       btrfs_item_size_nr(leaf, i) !=
			       BTRFS_LEAF_DATA_SIZE(root)) {
				btrfs_print_leaf(root, leaf);
				printk("slot %d first offset bad\n", i);
				BUG_ON(1);
			}
		}
C
Chris Mason 已提交
601
	}
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
	if (nritems > 0) {
		if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
				btrfs_print_leaf(root, leaf);
				printk("slot %d bad size \n", nritems - 1);
				BUG_ON(1);
		}
	}
#endif
	if (slot != 0 && slot < nritems - 1) {
		btrfs_item_key(leaf, &leaf_key, slot);
		btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
		if (comp_keys(&leaf_key, &cpukey) <= 0) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad key\n", slot);
			BUG_ON(1);
		}
		if (btrfs_item_offset_nr(leaf, slot - 1) !=
		       btrfs_item_end_nr(leaf, slot)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", slot);
			BUG_ON(1);
		}
624 625
	}
	if (slot < nritems - 1) {
626 627 628 629 630 631 632 633 634
		btrfs_item_key(leaf, &leaf_key, slot);
		btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
		BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
		if (btrfs_item_offset_nr(leaf, slot) !=
			btrfs_item_end_nr(leaf, slot + 1)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", slot);
			BUG_ON(1);
		}
C
Chris Mason 已提交
635
	}
636 637
	BUG_ON(btrfs_item_offset_nr(leaf, 0) +
	       btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
C
Chris Mason 已提交
638 639 640
	return 0;
}

641 642
static int noinline check_block(struct btrfs_root *root,
				struct btrfs_path *path, int level)
C
Chris Mason 已提交
643
{
644
	u64 found_start;
645
	return 0;
646 647 648 649 650 651 652 653 654
	if (btrfs_header_level(path->nodes[level]) != level)
	    printk("warning: bad level %Lu wanted %d found %d\n",
		   path->nodes[level]->start, level,
		   btrfs_header_level(path->nodes[level]));
	found_start = btrfs_header_bytenr(path->nodes[level]);
	if (found_start != path->nodes[level]->start) {
	    printk("warning: bad bytentr %Lu found %Lu\n",
		   path->nodes[level]->start, found_start);
	}
655
#if 0
656 657
	struct extent_buffer *buf = path->nodes[level];

658 659 660
	if (memcmp_extent_buffer(buf, root->fs_info->fsid,
				 (unsigned long)btrfs_header_fsid(buf),
				 BTRFS_FSID_SIZE)) {
661
		printk("warning bad block %Lu\n", buf->start);
662
		return 1;
663
	}
664
#endif
C
Chris Mason 已提交
665
	if (level == 0)
C
Chris Mason 已提交
666 667
		return check_leaf(root, path, level);
	return check_node(root, path, level);
C
Chris Mason 已提交
668 669
}

C
Chris Mason 已提交
670
/*
671 672 673
 * search for key in the extent_buffer.  The items start at offset p,
 * and they are item_size apart.  There are 'max' items in p.
 *
C
Chris Mason 已提交
674 675 676 677 678 679
 * the slot in the array is returned via slot, and it points to
 * the place where you would insert key if it is not found in
 * the array.
 *
 * slot may point to max if the key is bigger than all of the keys
 */
680 681 682
static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
			      int item_size, struct btrfs_key *key,
			      int max, int *slot)
683 684 685 686 687
{
	int low = 0;
	int high = max;
	int mid;
	int ret;
688
	struct btrfs_disk_key *tmp = NULL;
689 690 691 692 693 694
	struct btrfs_disk_key unaligned;
	unsigned long offset;
	char *map_token = NULL;
	char *kaddr = NULL;
	unsigned long map_start = 0;
	unsigned long map_len = 0;
695
	int err;
696 697 698

	while(low < high) {
		mid = (low + high) / 2;
699 700 701 702 703
		offset = p + mid * item_size;

		if (!map_token || offset < map_start ||
		    (offset + sizeof(struct btrfs_disk_key)) >
		    map_start + map_len) {
704
			if (map_token) {
705
				unmap_extent_buffer(eb, map_token, KM_USER0);
706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
				map_token = NULL;
			}
			err = map_extent_buffer(eb, offset,
						sizeof(struct btrfs_disk_key),
						&map_token, &kaddr,
						&map_start, &map_len, KM_USER0);

			if (!err) {
				tmp = (struct btrfs_disk_key *)(kaddr + offset -
							map_start);
			} else {
				read_extent_buffer(eb, &unaligned,
						   offset, sizeof(unaligned));
				tmp = &unaligned;
			}
721 722 723 724 725

		} else {
			tmp = (struct btrfs_disk_key *)(kaddr + offset -
							map_start);
		}
726 727 728 729 730 731 732 733
		ret = comp_keys(tmp, key);

		if (ret < 0)
			low = mid + 1;
		else if (ret > 0)
			high = mid;
		else {
			*slot = mid;
734 735
			if (map_token)
				unmap_extent_buffer(eb, map_token, KM_USER0);
736 737 738 739
			return 0;
		}
	}
	*slot = low;
740 741
	if (map_token)
		unmap_extent_buffer(eb, map_token, KM_USER0);
742 743 744
	return 1;
}

C
Chris Mason 已提交
745 746 747 748
/*
 * simple bin_search frontend that does the right thing for
 * leaves vs nodes
 */
749 750
static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
		      int level, int *slot)
751
{
752 753 754
	if (level == 0) {
		return generic_bin_search(eb,
					  offsetof(struct btrfs_leaf, items),
C
Chris Mason 已提交
755
					  sizeof(struct btrfs_item),
756
					  key, btrfs_header_nritems(eb),
757
					  slot);
758
	} else {
759 760
		return generic_bin_search(eb,
					  offsetof(struct btrfs_node, ptrs),
C
Chris Mason 已提交
761
					  sizeof(struct btrfs_key_ptr),
762
					  key, btrfs_header_nritems(eb),
763
					  slot);
764 765 766 767
	}
	return -1;
}

768 769
static struct extent_buffer *read_node_slot(struct btrfs_root *root,
				   struct extent_buffer *parent, int slot)
770
{
771
	int level = btrfs_header_level(parent);
772 773
	if (slot < 0)
		return NULL;
774
	if (slot >= btrfs_header_nritems(parent))
775
		return NULL;
776 777 778

	BUG_ON(level == 0);

779
	return read_tree_block(root, btrfs_node_blockptr(parent, slot),
780 781
		       btrfs_level_size(root, level - 1),
		       btrfs_node_ptr_generation(parent, slot));
782 783
}

784 785 786
static int balance_level(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root,
			 struct btrfs_path *path, int level)
787
{
788 789 790 791
	struct extent_buffer *right = NULL;
	struct extent_buffer *mid;
	struct extent_buffer *left = NULL;
	struct extent_buffer *parent = NULL;
792 793 794 795
	int ret = 0;
	int wret;
	int pslot;
	int orig_slot = path->slots[level];
796
	int err_on_enospc = 0;
797
	u64 orig_ptr;
798 799 800 801

	if (level == 0)
		return 0;

802
	mid = path->nodes[level];
803
	WARN_ON(!path->locks[level]);
804 805
	WARN_ON(btrfs_header_generation(mid) != trans->transid);

806
	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
807

C
Chris Mason 已提交
808
	if (level < BTRFS_MAX_LEVEL - 1)
809
		parent = path->nodes[level + 1];
810 811
	pslot = path->slots[level + 1];

C
Chris Mason 已提交
812 813 814 815
	/*
	 * deal with the case where there is only one pointer in the root
	 * by promoting the node below to a root
	 */
816 817
	if (!parent) {
		struct extent_buffer *child;
818

819
		if (btrfs_header_nritems(mid) != 1)
820 821 822
			return 0;

		/* promote the child to a root */
823
		child = read_node_slot(root, mid, 0);
824
		btrfs_tree_lock(child);
825
		BUG_ON(!child);
826
		ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
827 828
		BUG_ON(ret);

829
		spin_lock(&root->node_lock);
830
		root->node = child;
831 832
		spin_unlock(&root->node_lock);

833
		add_root_to_dirty_list(root);
834 835
		btrfs_tree_unlock(child);
		path->locks[level] = 0;
836
		path->nodes[level] = NULL;
837
		clean_tree_block(trans, root, mid);
838
		btrfs_tree_unlock(mid);
839
		/* once for the path */
840
		free_extent_buffer(mid);
841 842 843
		ret = btrfs_free_extent(trans, root, mid->start, mid->len,
					root->root_key.objectid,
					btrfs_header_generation(mid), 0, 0, 1);
844
		/* once for the root ptr */
845
		free_extent_buffer(mid);
846
		return ret;
847
	}
848
	if (btrfs_header_nritems(mid) >
C
Chris Mason 已提交
849
	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
850 851
		return 0;

852
	if (btrfs_header_nritems(mid) < 2)
853 854
		err_on_enospc = 1;

855 856
	left = read_node_slot(root, parent, pslot - 1);
	if (left) {
857
		btrfs_tree_lock(left);
858
		wret = btrfs_cow_block(trans, root, left,
859
				       parent, pslot - 1, &left, 0);
860 861 862 863
		if (wret) {
			ret = wret;
			goto enospc;
		}
864
	}
865 866
	right = read_node_slot(root, parent, pslot + 1);
	if (right) {
867
		btrfs_tree_lock(right);
868
		wret = btrfs_cow_block(trans, root, right,
869
				       parent, pslot + 1, &right, 0);
870 871 872 873 874 875 876
		if (wret) {
			ret = wret;
			goto enospc;
		}
	}

	/* first, try to make some room in the middle buffer */
877 878
	if (left) {
		orig_slot += btrfs_header_nritems(left);
879
		wret = push_node_left(trans, root, left, mid, 1);
880 881
		if (wret < 0)
			ret = wret;
882
		if (btrfs_header_nritems(mid) < 2)
883
			err_on_enospc = 1;
884
	}
885 886 887 888

	/*
	 * then try to empty the right most buffer into the middle
	 */
889
	if (right) {
890
		wret = push_node_left(trans, root, mid, right, 1);
891
		if (wret < 0 && wret != -ENOSPC)
892
			ret = wret;
893
		if (btrfs_header_nritems(right) == 0) {
894
			u64 bytenr = right->start;
895
			u64 generation = btrfs_header_generation(parent);
896 897
			u32 blocksize = right->len;

898
			clean_tree_block(trans, root, right);
899
			btrfs_tree_unlock(right);
900
			free_extent_buffer(right);
901
			right = NULL;
902 903
			wret = del_ptr(trans, root, path, level + 1, pslot +
				       1);
904 905
			if (wret)
				ret = wret;
906
			wret = btrfs_free_extent(trans, root, bytenr,
907 908 909
						 blocksize,
						 btrfs_header_owner(parent),
						 generation, 0, 0, 1);
910 911 912
			if (wret)
				ret = wret;
		} else {
913 914 915 916
			struct btrfs_disk_key right_key;
			btrfs_node_key(right, &right_key, 0);
			btrfs_set_node_key(parent, &right_key, pslot + 1);
			btrfs_mark_buffer_dirty(parent);
917 918
		}
	}
919
	if (btrfs_header_nritems(mid) == 1) {
920 921 922 923 924 925 926 927 928
		/*
		 * we're not allowed to leave a node with one item in the
		 * tree during a delete.  A deletion from lower in the tree
		 * could try to delete the only pointer in this node.
		 * So, pull some keys from the left.
		 * There has to be a left pointer at this point because
		 * otherwise we would have pulled some pointers from the
		 * right
		 */
929 930
		BUG_ON(!left);
		wret = balance_node_right(trans, root, mid, left);
931
		if (wret < 0) {
932
			ret = wret;
933 934
			goto enospc;
		}
935 936 937 938 939
		if (wret == 1) {
			wret = push_node_left(trans, root, left, mid, 1);
			if (wret < 0)
				ret = wret;
		}
940 941
		BUG_ON(wret == 1);
	}
942
	if (btrfs_header_nritems(mid) == 0) {
943
		/* we've managed to empty the middle node, drop it */
944
		u64 root_gen = btrfs_header_generation(parent);
945 946
		u64 bytenr = mid->start;
		u32 blocksize = mid->len;
947

948
		clean_tree_block(trans, root, mid);
949
		btrfs_tree_unlock(mid);
950
		free_extent_buffer(mid);
951
		mid = NULL;
952
		wret = del_ptr(trans, root, path, level + 1, pslot);
953 954
		if (wret)
			ret = wret;
955 956 957
		wret = btrfs_free_extent(trans, root, bytenr, blocksize,
					 btrfs_header_owner(parent),
					 root_gen, 0, 0, 1);
958 959
		if (wret)
			ret = wret;
960 961
	} else {
		/* update the parent key to reflect our changes */
962 963 964 965
		struct btrfs_disk_key mid_key;
		btrfs_node_key(mid, &mid_key, 0);
		btrfs_set_node_key(parent, &mid_key, pslot);
		btrfs_mark_buffer_dirty(parent);
966
	}
967

968
	/* update the path */
969 970 971
	if (left) {
		if (btrfs_header_nritems(left) > orig_slot) {
			extent_buffer_get(left);
972
			/* left was locked after cow */
973
			path->nodes[level] = left;
974 975
			path->slots[level + 1] -= 1;
			path->slots[level] = orig_slot;
976 977
			if (mid) {
				btrfs_tree_unlock(mid);
978
				free_extent_buffer(mid);
979
			}
980
		} else {
981
			orig_slot -= btrfs_header_nritems(left);
982 983 984
			path->slots[level] = orig_slot;
		}
	}
985
	/* double check we haven't messed things up */
C
Chris Mason 已提交
986
	check_block(root, path, level);
C
Chris Mason 已提交
987
	if (orig_ptr !=
988
	    btrfs_node_blockptr(path->nodes[level], path->slots[level]))
989
		BUG();
990
enospc:
991 992
	if (right) {
		btrfs_tree_unlock(right);
993
		free_extent_buffer(right);
994 995 996 997
	}
	if (left) {
		if (path->nodes[level] != left)
			btrfs_tree_unlock(left);
998
		free_extent_buffer(left);
999
	}
1000 1001 1002
	return ret;
}

1003
/* returns zero if the push worked, non-zero otherwise */
1004 1005 1006
static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
					  struct btrfs_root *root,
					  struct btrfs_path *path, int level)
1007
{
1008 1009 1010 1011
	struct extent_buffer *right = NULL;
	struct extent_buffer *mid;
	struct extent_buffer *left = NULL;
	struct extent_buffer *parent = NULL;
1012 1013 1014 1015 1016 1017 1018 1019 1020
	int ret = 0;
	int wret;
	int pslot;
	int orig_slot = path->slots[level];
	u64 orig_ptr;

	if (level == 0)
		return 1;

1021
	mid = path->nodes[level];
1022
	WARN_ON(btrfs_header_generation(mid) != trans->transid);
1023 1024 1025
	orig_ptr = btrfs_node_blockptr(mid, orig_slot);

	if (level < BTRFS_MAX_LEVEL - 1)
1026
		parent = path->nodes[level + 1];
1027 1028
	pslot = path->slots[level + 1];

1029
	if (!parent)
1030 1031
		return 1;

1032
	left = read_node_slot(root, parent, pslot - 1);
1033 1034

	/* first, try to make some room in the middle buffer */
1035
	if (left) {
1036
		u32 left_nr;
1037 1038

		btrfs_tree_lock(left);
1039
		left_nr = btrfs_header_nritems(left);
C
Chris Mason 已提交
1040 1041 1042
		if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
			wret = 1;
		} else {
1043
			ret = btrfs_cow_block(trans, root, left, parent,
1044
					      pslot - 1, &left, 0);
1045 1046 1047 1048
			if (ret)
				wret = 1;
			else {
				wret = push_node_left(trans, root,
1049
						      left, mid, 0);
1050
			}
C
Chris Mason 已提交
1051
		}
1052 1053 1054
		if (wret < 0)
			ret = wret;
		if (wret == 0) {
1055
			struct btrfs_disk_key disk_key;
1056
			orig_slot += left_nr;
1057 1058 1059 1060 1061
			btrfs_node_key(mid, &disk_key, 0);
			btrfs_set_node_key(parent, &disk_key, pslot);
			btrfs_mark_buffer_dirty(parent);
			if (btrfs_header_nritems(left) > orig_slot) {
				path->nodes[level] = left;
1062 1063
				path->slots[level + 1] -= 1;
				path->slots[level] = orig_slot;
1064
				btrfs_tree_unlock(mid);
1065
				free_extent_buffer(mid);
1066 1067
			} else {
				orig_slot -=
1068
					btrfs_header_nritems(left);
1069
				path->slots[level] = orig_slot;
1070
				btrfs_tree_unlock(left);
1071
				free_extent_buffer(left);
1072 1073 1074
			}
			return 0;
		}
1075
		btrfs_tree_unlock(left);
1076
		free_extent_buffer(left);
1077
	}
1078
	right = read_node_slot(root, parent, pslot + 1);
1079 1080 1081 1082

	/*
	 * then try to empty the right most buffer into the middle
	 */
1083
	if (right) {
C
Chris Mason 已提交
1084
		u32 right_nr;
1085
		btrfs_tree_lock(right);
1086
		right_nr = btrfs_header_nritems(right);
C
Chris Mason 已提交
1087 1088 1089
		if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
			wret = 1;
		} else {
1090 1091
			ret = btrfs_cow_block(trans, root, right,
					      parent, pslot + 1,
1092
					      &right, 0);
1093 1094 1095 1096
			if (ret)
				wret = 1;
			else {
				wret = balance_node_right(trans, root,
1097
							  right, mid);
1098
			}
C
Chris Mason 已提交
1099
		}
1100 1101 1102
		if (wret < 0)
			ret = wret;
		if (wret == 0) {
1103 1104 1105 1106 1107 1108 1109 1110
			struct btrfs_disk_key disk_key;

			btrfs_node_key(right, &disk_key, 0);
			btrfs_set_node_key(parent, &disk_key, pslot + 1);
			btrfs_mark_buffer_dirty(parent);

			if (btrfs_header_nritems(mid) <= orig_slot) {
				path->nodes[level] = right;
1111 1112
				path->slots[level + 1] += 1;
				path->slots[level] = orig_slot -
1113
					btrfs_header_nritems(mid);
1114
				btrfs_tree_unlock(mid);
1115
				free_extent_buffer(mid);
1116
			} else {
1117
				btrfs_tree_unlock(right);
1118
				free_extent_buffer(right);
1119 1120 1121
			}
			return 0;
		}
1122
		btrfs_tree_unlock(right);
1123
		free_extent_buffer(right);
1124 1125 1126 1127
	}
	return 1;
}

1128 1129 1130 1131
/*
 * readahead one full node of leaves
 */
static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
1132
			     int level, int slot, u64 objectid)
1133
{
1134
	struct extent_buffer *node;
1135
	struct btrfs_disk_key disk_key;
1136 1137
	u32 nritems;
	u64 search;
1138 1139 1140
	u64 lowest_read;
	u64 highest_read;
	u64 nread = 0;
1141
	int direction = path->reada;
1142
	struct extent_buffer *eb;
1143 1144 1145
	u32 nr;
	u32 blocksize;
	u32 nscan = 0;
1146

1147
	if (level != 1)
1148 1149 1150
		return;

	if (!path->nodes[level])
1151 1152
		return;

1153
	node = path->nodes[level];
1154

1155
	search = btrfs_node_blockptr(node, slot);
1156 1157
	blocksize = btrfs_level_size(root, level - 1);
	eb = btrfs_find_tree_block(root, search, blocksize);
1158 1159
	if (eb) {
		free_extent_buffer(eb);
1160 1161 1162
		return;
	}

1163 1164 1165
	highest_read = search;
	lowest_read = search;

1166
	nritems = btrfs_header_nritems(node);
1167
	nr = slot;
1168
	while(1) {
1169 1170 1171 1172 1173 1174 1175 1176
		if (direction < 0) {
			if (nr == 0)
				break;
			nr--;
		} else if (direction > 0) {
			nr++;
			if (nr >= nritems)
				break;
1177
		}
1178 1179 1180 1181 1182
		if (path->reada < 0 && objectid) {
			btrfs_node_key(node, &disk_key, nr);
			if (btrfs_disk_key_objectid(&disk_key) != objectid)
				break;
		}
1183 1184 1185 1186
		search = btrfs_node_blockptr(node, nr);
		if ((search >= lowest_read && search <= highest_read) ||
		    (search < lowest_read && lowest_read - search <= 32768) ||
		    (search > highest_read && search - highest_read <= 32768)) {
1187 1188
			readahead_tree_block(root, search, blocksize,
				     btrfs_node_ptr_generation(node, nr));
1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
			nread += blocksize;
		}
		nscan++;
		if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
			break;
		if(nread > (1024 * 1024) || nscan > 128)
			break;

		if (search < lowest_read)
			lowest_read = search;
		if (search > highest_read)
			highest_read = search;
1201 1202
	}
}
1203 1204 1205 1206 1207

static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock)
{
	int i;
	int skip_level = level;
1208
	int no_skips = 0;
1209 1210 1211 1212 1213 1214 1215
	struct extent_buffer *t;

	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
		if (!path->nodes[i])
			break;
		if (!path->locks[i])
			break;
1216
		if (!no_skips && path->slots[i] == 0) {
1217 1218 1219
			skip_level = i + 1;
			continue;
		}
1220
		if (!no_skips && path->keep_locks) {
1221 1222 1223
			u32 nritems;
			t = path->nodes[i];
			nritems = btrfs_header_nritems(t);
1224
			if (nritems < 1 || path->slots[i] >= nritems - 1) {
1225 1226 1227 1228
				skip_level = i + 1;
				continue;
			}
		}
1229 1230 1231
		if (skip_level < i && i >= lowest_unlock)
			no_skips = 1;

1232 1233 1234 1235 1236 1237 1238 1239
		t = path->nodes[i];
		if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
			btrfs_tree_unlock(t);
			path->locks[i] = 0;
		}
	}
}

C
Chris Mason 已提交
1240 1241 1242 1243 1244 1245
/*
 * look for key in the tree.  path is filled in with nodes along the way
 * if key is found, we return zero and you can find the item in the leaf
 * level of the path (level 0)
 *
 * If the key isn't found, the path points to the slot where it should
C
Chris Mason 已提交
1246 1247
 * be inserted, and 1 is returned.  If there are other errors during the
 * search a negative error number is returned.
C
Chris Mason 已提交
1248 1249 1250 1251
 *
 * if ins_len > 0, nodes and leaves will be split as we walk down the
 * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
 * possible)
C
Chris Mason 已提交
1252
 */
1253 1254 1255
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_key *key, struct btrfs_path *p, int
		      ins_len, int cow)
1256
{
1257
	struct extent_buffer *b;
1258
	struct extent_buffer *tmp;
1259 1260 1261
	int slot;
	int ret;
	int level;
1262
	int should_reada = p->reada;
1263
	int lowest_unlock = 1;
1264
	int blocksize;
1265
	u8 lowest_level = 0;
1266 1267
	u64 blocknr;
	u64 gen;
1268
	struct btrfs_key prealloc_block;
1269

1270 1271
	lowest_level = p->lowest_level;
	WARN_ON(lowest_level && ins_len);
C
Chris Mason 已提交
1272
	WARN_ON(p->nodes[0] != NULL);
1273
	WARN_ON(cow && root == root->fs_info->extent_root &&
1274 1275 1276
		!mutex_is_locked(&root->fs_info->alloc_mutex));
	if (ins_len < 0)
		lowest_unlock = 2;
1277 1278 1279

	prealloc_block.objectid = 0;

1280
again:
1281 1282 1283 1284
	if (p->skip_locking)
		b = btrfs_root_node(root);
	else
		b = btrfs_lock_root_node(root);
1285

1286
	while (b) {
1287
		level = btrfs_header_level(b);
1288 1289 1290 1291 1292 1293 1294 1295 1296

		/*
		 * setup the path here so we can release it under lock
		 * contention with the cow code
		 */
		p->nodes[level] = b;
		if (!p->skip_locking)
			p->locks[level] = 1;

C
Chris Mason 已提交
1297 1298
		if (cow) {
			int wret;
1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337

			/* is a cow on this block not required */
			spin_lock(&root->fs_info->hash_lock);
			if (btrfs_header_generation(b) == trans->transid &&
			    !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
				spin_unlock(&root->fs_info->hash_lock);
				goto cow_done;
			}
			spin_unlock(&root->fs_info->hash_lock);

			/* ok, we have to cow, is our old prealloc the right
			 * size?
			 */
			if (prealloc_block.objectid &&
			    prealloc_block.offset != b->len) {
				btrfs_free_reserved_extent(root,
					   prealloc_block.objectid,
					   prealloc_block.offset);
				prealloc_block.objectid = 0;
			}

			/*
			 * for higher level blocks, try not to allocate blocks
			 * with the block and the parent locks held.
			 */
			if (level > 1 && !prealloc_block.objectid &&
			    btrfs_path_lock_waiting(p, level)) {
				u32 size = b->len;
				u64 hint = b->start;

				btrfs_release_path(root, p);
				ret = btrfs_reserve_extent(trans, root,
							   size, size, 0,
							   hint, (u64)-1,
							   &prealloc_block, 0);
				BUG_ON(ret);
				goto again;
			}

C
Chris Mason 已提交
1338 1339 1340
			wret = btrfs_cow_block(trans, root, b,
					       p->nodes[level + 1],
					       p->slots[level + 1],
1341 1342
					       &b, prealloc_block.objectid);
			prealloc_block.objectid = 0;
1343
			if (wret) {
1344
				free_extent_buffer(b);
1345 1346
				ret = wret;
				goto done;
1347
			}
C
Chris Mason 已提交
1348
		}
1349
cow_done:
C
Chris Mason 已提交
1350
		BUG_ON(!cow && ins_len);
1351
		if (level != btrfs_header_level(b))
C
Chris Mason 已提交
1352
			WARN_ON(1);
1353
		level = btrfs_header_level(b);
1354

1355
		p->nodes[level] = b;
1356 1357
		if (!p->skip_locking)
			p->locks[level] = 1;
1358

C
Chris Mason 已提交
1359
		ret = check_block(root, p, level);
1360 1361 1362 1363
		if (ret) {
			ret = -1;
			goto done;
		}
1364

1365 1366
		ret = bin_search(b, key, level, &slot);
		if (level != 0) {
1367 1368 1369
			if (ret && slot > 0)
				slot -= 1;
			p->slots[level] = slot;
1370
			if (ins_len > 0 && btrfs_header_nritems(b) >=
1371
			    BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1372
				int sret = split_node(trans, root, p, level);
C
Chris Mason 已提交
1373
				BUG_ON(sret > 0);
1374 1375 1376 1377
				if (sret) {
					ret = sret;
					goto done;
				}
C
Chris Mason 已提交
1378 1379
				b = p->nodes[level];
				slot = p->slots[level];
1380
			} else if (ins_len < 0) {
1381 1382
				int sret = balance_level(trans, root, p,
							 level);
1383 1384 1385 1386
				if (sret) {
					ret = sret;
					goto done;
				}
1387
				b = p->nodes[level];
1388 1389
				if (!b) {
					btrfs_release_path(NULL, p);
1390
					goto again;
1391
				}
1392
				slot = p->slots[level];
1393
				BUG_ON(btrfs_header_nritems(b) == 1);
C
Chris Mason 已提交
1394
			}
1395 1396
			unlock_up(p, level, lowest_unlock);

1397
			/* this is only true while dropping a snapshot */
1398
			if (level == lowest_level) {
1399
				break;
1400
			}
1401

1402 1403 1404 1405 1406 1407
			blocknr = btrfs_node_blockptr(b, slot);
			gen = btrfs_node_ptr_generation(b, slot);
			blocksize = btrfs_level_size(root, level - 1);

			tmp = btrfs_find_tree_block(root, blocknr, blocksize);
			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1408 1409 1410 1411 1412 1413 1414 1415 1416
				b = tmp;
			} else {
				/*
				 * reduce lock contention at high levels
				 * of the btree by dropping locks before
				 * we read.
				 */
				if (level > 1) {
					btrfs_release_path(NULL, p);
1417 1418
					if (tmp)
						free_extent_buffer(tmp);
1419 1420 1421 1422 1423
					if (should_reada)
						reada_for_search(root, p,
								 level, slot,
								 key->objectid);

1424 1425
					tmp = read_tree_block(root, blocknr,
							 blocksize, gen);
1426 1427 1428 1429
					if (tmp)
						free_extent_buffer(tmp);
					goto again;
				} else {
1430 1431
					if (tmp)
						free_extent_buffer(tmp);
1432 1433 1434 1435
					if (should_reada)
						reada_for_search(root, p,
								 level, slot,
								 key->objectid);
1436 1437 1438
					b = read_node_slot(root, b, slot);
				}
			}
1439 1440
			if (!p->skip_locking)
				btrfs_tree_lock(b);
1441 1442
		} else {
			p->slots[level] = slot;
1443
			if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
C
Chris Mason 已提交
1444
			    sizeof(struct btrfs_item) + ins_len) {
1445
				int sret = split_leaf(trans, root, key,
1446
						      p, ins_len, ret == 0);
C
Chris Mason 已提交
1447
				BUG_ON(sret > 0);
1448 1449 1450 1451
				if (sret) {
					ret = sret;
					goto done;
				}
C
Chris Mason 已提交
1452
			}
1453
			unlock_up(p, level, lowest_unlock);
1454
			goto done;
1455 1456
		}
	}
1457 1458 1459 1460 1461 1462 1463 1464 1465
	ret = 1;
done:
	if (prealloc_block.objectid) {
		btrfs_free_reserved_extent(root,
			   prealloc_block.objectid,
			   prealloc_block.offset);
	}

	return ret;
1466 1467
}

C
Chris Mason 已提交
1468 1469 1470 1471 1472 1473
/*
 * adjust the pointers going up the tree, starting at level
 * making sure the right key of each node is points to 'key'.
 * This is used after shifting pointers to the left, so it stops
 * fixing up pointers when a given leaf/node is not in slot 0 of the
 * higher levels
C
Chris Mason 已提交
1474 1475 1476
 *
 * If this fails to write a tree block, it returns -1, but continues
 * fixing up the blocks in ram so the tree is consistent.
C
Chris Mason 已提交
1477
 */
1478 1479 1480
static int fixup_low_keys(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct btrfs_path *path,
			  struct btrfs_disk_key *key, int level)
1481 1482
{
	int i;
C
Chris Mason 已提交
1483
	int ret = 0;
1484 1485
	struct extent_buffer *t;

C
Chris Mason 已提交
1486
	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1487
		int tslot = path->slots[i];
1488
		if (!path->nodes[i])
1489
			break;
1490 1491
		t = path->nodes[i];
		btrfs_set_node_key(t, key, tslot);
C
Chris Mason 已提交
1492
		btrfs_mark_buffer_dirty(path->nodes[i]);
1493 1494 1495
		if (tslot != 0)
			break;
	}
C
Chris Mason 已提交
1496
	return ret;
1497 1498
}

C
Chris Mason 已提交
1499 1500
/*
 * try to push data from one node into the next node left in the
1501
 * tree.
C
Chris Mason 已提交
1502 1503 1504
 *
 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
 * error, and > 0 if there was no room in the left hand block.
C
Chris Mason 已提交
1505
 */
1506 1507
static int push_node_left(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct extent_buffer *dst,
1508
			  struct extent_buffer *src, int empty)
1509 1510
{
	int push_items = 0;
1511 1512
	int src_nritems;
	int dst_nritems;
C
Chris Mason 已提交
1513
	int ret = 0;
1514

1515 1516
	src_nritems = btrfs_header_nritems(src);
	dst_nritems = btrfs_header_nritems(dst);
C
Chris Mason 已提交
1517
	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1518 1519
	WARN_ON(btrfs_header_generation(src) != trans->transid);
	WARN_ON(btrfs_header_generation(dst) != trans->transid);
1520

1521
	if (!empty && src_nritems <= 8)
1522 1523
		return 1;

1524
	if (push_items <= 0) {
1525
		return 1;
1526
	}
1527

1528
	if (empty) {
1529
		push_items = min(src_nritems, push_items);
1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
		if (push_items < src_nritems) {
			/* leave at least 8 pointers in the node if
			 * we aren't going to empty it
			 */
			if (src_nritems - push_items < 8) {
				if (push_items <= 8)
					return 1;
				push_items -= 8;
			}
		}
	} else
		push_items = min(src_nritems - 8, push_items);
1542

1543 1544 1545 1546 1547
	copy_extent_buffer(dst, src,
			   btrfs_node_key_ptr_offset(dst_nritems),
			   btrfs_node_key_ptr_offset(0),
		           push_items * sizeof(struct btrfs_key_ptr));

1548
	if (push_items < src_nritems) {
1549 1550 1551 1552 1553 1554 1555 1556 1557
		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
				      btrfs_node_key_ptr_offset(push_items),
				      (src_nritems - push_items) *
				      sizeof(struct btrfs_key_ptr));
	}
	btrfs_set_header_nritems(src, src_nritems - push_items);
	btrfs_set_header_nritems(dst, dst_nritems + push_items);
	btrfs_mark_buffer_dirty(src);
	btrfs_mark_buffer_dirty(dst);
1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569
	return ret;
}

/*
 * try to push data from one node into the next node right in the
 * tree.
 *
 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
 * error, and > 0 if there was no room in the right hand block.
 *
 * this will  only push up to 1/2 the contents of the left node over
 */
1570 1571 1572 1573
static int balance_node_right(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct extent_buffer *dst,
			      struct extent_buffer *src)
1574 1575 1576 1577 1578 1579 1580
{
	int push_items = 0;
	int max_push;
	int src_nritems;
	int dst_nritems;
	int ret = 0;

1581 1582 1583
	WARN_ON(btrfs_header_generation(src) != trans->transid);
	WARN_ON(btrfs_header_generation(dst) != trans->transid);

1584 1585
	src_nritems = btrfs_header_nritems(src);
	dst_nritems = btrfs_header_nritems(dst);
C
Chris Mason 已提交
1586
	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1587
	if (push_items <= 0) {
1588
		return 1;
1589 1590 1591 1592 1593
	}

	if (src_nritems < 4) {
		return 1;
	}
1594 1595 1596

	max_push = src_nritems / 2 + 1;
	/* don't try to empty the node */
1597
	if (max_push >= src_nritems) {
1598
		return 1;
1599
	}
Y
Yan 已提交
1600

1601 1602 1603
	if (max_push < push_items)
		push_items = max_push;

1604 1605 1606 1607
	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
				      btrfs_node_key_ptr_offset(0),
				      (dst_nritems) *
				      sizeof(struct btrfs_key_ptr));
C
Chris Mason 已提交
1608

1609 1610 1611 1612
	copy_extent_buffer(dst, src,
			   btrfs_node_key_ptr_offset(0),
			   btrfs_node_key_ptr_offset(src_nritems - push_items),
		           push_items * sizeof(struct btrfs_key_ptr));
1613

1614 1615
	btrfs_set_header_nritems(src, src_nritems - push_items);
	btrfs_set_header_nritems(dst, dst_nritems + push_items);
1616

1617 1618
	btrfs_mark_buffer_dirty(src);
	btrfs_mark_buffer_dirty(dst);
C
Chris Mason 已提交
1619
	return ret;
1620 1621
}

C
Chris Mason 已提交
1622 1623 1624 1625
/*
 * helper function to insert a new root level in the tree.
 * A new node is allocated, and a single item is inserted to
 * point to the existing root
C
Chris Mason 已提交
1626 1627
 *
 * returns zero on success or < 0 on failure.
C
Chris Mason 已提交
1628
 */
1629
static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1630 1631
			   struct btrfs_root *root,
			   struct btrfs_path *path, int level)
C
Chris Mason 已提交
1632
{
1633 1634
	u64 root_gen;
	u64 lower_gen;
1635 1636
	struct extent_buffer *lower;
	struct extent_buffer *c;
1637
	struct extent_buffer *old;
1638
	struct btrfs_disk_key lower_key;
C
Chris Mason 已提交
1639 1640 1641 1642

	BUG_ON(path->nodes[level]);
	BUG_ON(path->nodes[level-1] != root->node);

1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653
	if (root->ref_cows)
		root_gen = trans->transid;
	else
		root_gen = 0;

	lower = path->nodes[level-1];
	if (level == 1)
		btrfs_item_key(lower, &lower_key, 0);
	else
		btrfs_node_key(lower, &lower_key, 0);

1654
	c = btrfs_alloc_free_block(trans, root, root->nodesize,
1655 1656
				   root->root_key.objectid,
				   root_gen, lower_key.objectid, level,
1657
				   root->node->start, 0);
1658 1659
	if (IS_ERR(c))
		return PTR_ERR(c);
1660

1661 1662 1663
	memset_extent_buffer(c, 0, 0, root->nodesize);
	btrfs_set_header_nritems(c, 1);
	btrfs_set_header_level(c, level);
1664
	btrfs_set_header_bytenr(c, c->start);
1665 1666 1667 1668 1669 1670
	btrfs_set_header_generation(c, trans->transid);
	btrfs_set_header_owner(c, root->root_key.objectid);

	write_extent_buffer(c, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(c),
			    BTRFS_FSID_SIZE);
1671 1672 1673 1674 1675

	write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
			    (unsigned long)btrfs_header_chunk_tree_uuid(c),
			    BTRFS_UUID_SIZE);

1676
	btrfs_set_node_key(c, &lower_key, 0);
1677
	btrfs_set_node_blockptr(c, 0, lower->start);
1678 1679 1680 1681
	lower_gen = btrfs_header_generation(lower);
	WARN_ON(lower_gen == 0);

	btrfs_set_node_ptr_generation(c, 0, lower_gen);
1682

1683
	btrfs_mark_buffer_dirty(c);
1684

1685 1686
	spin_lock(&root->node_lock);
	old = root->node;
1687
	root->node = c;
1688 1689 1690 1691 1692
	spin_unlock(&root->node_lock);

	/* the super has an extra ref to root->node */
	free_extent_buffer(old);

1693
	add_root_to_dirty_list(root);
1694 1695
	extent_buffer_get(c);
	path->nodes[level] = c;
1696
	path->locks[level] = 1;
C
Chris Mason 已提交
1697
	path->slots[level] = 0;
1698 1699 1700 1701

	if (root->ref_cows && lower_gen != trans->transid) {
		struct btrfs_path *back_path = btrfs_alloc_path();
		int ret;
1702
		mutex_lock(&root->fs_info->alloc_mutex);
1703 1704 1705 1706 1707 1708
		ret = btrfs_insert_extent_backref(trans,
						  root->fs_info->extent_root,
						  path, lower->start,
						  root->root_key.objectid,
						  trans->transid, 0, 0);
		BUG_ON(ret);
1709
		mutex_unlock(&root->fs_info->alloc_mutex);
1710 1711
		btrfs_free_path(back_path);
	}
C
Chris Mason 已提交
1712 1713 1714
	return 0;
}

C
Chris Mason 已提交
1715 1716 1717
/*
 * worker function to insert a single pointer in a node.
 * the node should have enough room for the pointer already
C
Chris Mason 已提交
1718
 *
C
Chris Mason 已提交
1719 1720
 * slot and level indicate where you want the key to go, and
 * blocknr is the block the key points to.
C
Chris Mason 已提交
1721 1722
 *
 * returns zero on success and < 0 on any error
C
Chris Mason 已提交
1723
 */
1724 1725
static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, struct btrfs_disk_key
1726
		      *key, u64 bytenr, int slot, int level)
C
Chris Mason 已提交
1727
{
1728
	struct extent_buffer *lower;
C
Chris Mason 已提交
1729
	int nritems;
C
Chris Mason 已提交
1730 1731

	BUG_ON(!path->nodes[level]);
1732 1733
	lower = path->nodes[level];
	nritems = btrfs_header_nritems(lower);
C
Chris Mason 已提交
1734 1735
	if (slot > nritems)
		BUG();
C
Chris Mason 已提交
1736
	if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
C
Chris Mason 已提交
1737 1738
		BUG();
	if (slot != nritems) {
1739 1740 1741
		memmove_extent_buffer(lower,
			      btrfs_node_key_ptr_offset(slot + 1),
			      btrfs_node_key_ptr_offset(slot),
C
Chris Mason 已提交
1742
			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
C
Chris Mason 已提交
1743
	}
1744
	btrfs_set_node_key(lower, key, slot);
1745
	btrfs_set_node_blockptr(lower, slot, bytenr);
1746 1747
	WARN_ON(trans->transid == 0);
	btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1748 1749
	btrfs_set_header_nritems(lower, nritems + 1);
	btrfs_mark_buffer_dirty(lower);
C
Chris Mason 已提交
1750 1751 1752
	return 0;
}

C
Chris Mason 已提交
1753 1754 1755 1756 1757 1758
/*
 * split the node at the specified level in path in two.
 * The path is corrected to point to the appropriate node after the split
 *
 * Before splitting this tries to make some room in the node by pushing
 * left and right, if either one works, it returns right away.
C
Chris Mason 已提交
1759 1760
 *
 * returns 0 on success and < 0 on failure
C
Chris Mason 已提交
1761
 */
1762 1763
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, int level)
1764
{
1765
	u64 root_gen;
1766 1767 1768
	struct extent_buffer *c;
	struct extent_buffer *split;
	struct btrfs_disk_key disk_key;
1769
	int mid;
C
Chris Mason 已提交
1770
	int ret;
C
Chris Mason 已提交
1771
	int wret;
1772
	u32 c_nritems;
1773

1774
	c = path->nodes[level];
1775
	WARN_ON(btrfs_header_generation(c) != trans->transid);
1776
	if (c == root->node) {
C
Chris Mason 已提交
1777
		/* trying to split the root, lets make a new one */
1778
		ret = insert_new_root(trans, root, path, level + 1);
C
Chris Mason 已提交
1779 1780
		if (ret)
			return ret;
1781 1782
	} else {
		ret = push_nodes_for_insert(trans, root, path, level);
1783 1784
		c = path->nodes[level];
		if (!ret && btrfs_header_nritems(c) <
1785
		    BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
1786
			return 0;
1787 1788
		if (ret < 0)
			return ret;
1789
	}
1790

1791
	c_nritems = btrfs_header_nritems(c);
1792 1793 1794 1795 1796 1797
	if (root->ref_cows)
		root_gen = trans->transid;
	else
		root_gen = 0;

	btrfs_node_key(c, &disk_key, 0);
1798
	split = btrfs_alloc_free_block(trans, root, root->nodesize,
1799 1800 1801 1802
					 root->root_key.objectid,
					 root_gen,
					 btrfs_disk_key_objectid(&disk_key),
					 level, c->start, 0);
1803 1804 1805 1806 1807
	if (IS_ERR(split))
		return PTR_ERR(split);

	btrfs_set_header_flags(split, btrfs_header_flags(c));
	btrfs_set_header_level(split, btrfs_header_level(c));
1808
	btrfs_set_header_bytenr(split, split->start);
1809 1810
	btrfs_set_header_generation(split, trans->transid);
	btrfs_set_header_owner(split, root->root_key.objectid);
1811
	btrfs_set_header_flags(split, 0);
1812 1813 1814
	write_extent_buffer(split, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(split),
			    BTRFS_FSID_SIZE);
1815 1816 1817
	write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
			    (unsigned long)btrfs_header_chunk_tree_uuid(split),
			    BTRFS_UUID_SIZE);
1818

1819
	mid = (c_nritems + 1) / 2;
1820 1821 1822 1823 1824 1825 1826

	copy_extent_buffer(split, c,
			   btrfs_node_key_ptr_offset(0),
			   btrfs_node_key_ptr_offset(mid),
			   (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
	btrfs_set_header_nritems(split, c_nritems - mid);
	btrfs_set_header_nritems(c, mid);
C
Chris Mason 已提交
1827 1828
	ret = 0;

1829 1830 1831 1832
	btrfs_mark_buffer_dirty(c);
	btrfs_mark_buffer_dirty(split);

	btrfs_node_key(split, &disk_key, 0);
1833
	wret = insert_ptr(trans, root, path, &disk_key, split->start,
1834
			  path->slots[level + 1] + 1,
C
Chris Mason 已提交
1835
			  level + 1);
C
Chris Mason 已提交
1836 1837 1838
	if (wret)
		ret = wret;

C
Chris Mason 已提交
1839
	if (path->slots[level] >= mid) {
C
Chris Mason 已提交
1840
		path->slots[level] -= mid;
1841
		btrfs_tree_unlock(c);
1842 1843
		free_extent_buffer(c);
		path->nodes[level] = split;
C
Chris Mason 已提交
1844 1845
		path->slots[level + 1] += 1;
	} else {
1846
		btrfs_tree_unlock(split);
1847
		free_extent_buffer(split);
1848
	}
C
Chris Mason 已提交
1849
	return ret;
1850 1851
}

C
Chris Mason 已提交
1852 1853 1854 1855 1856
/*
 * how many bytes are required to store the items in a leaf.  start
 * and nr indicate which items in the leaf to check.  This totals up the
 * space used both by the item structs and the item data
 */
1857
static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1858 1859
{
	int data_len;
1860
	int nritems = btrfs_header_nritems(l);
1861
	int end = min(nritems, start + nr) - 1;
1862 1863 1864

	if (!nr)
		return 0;
1865 1866
	data_len = btrfs_item_end_nr(l, start);
	data_len = data_len - btrfs_item_offset_nr(l, end);
C
Chris Mason 已提交
1867
	data_len += sizeof(struct btrfs_item) * nr;
1868
	WARN_ON(data_len < 0);
1869 1870 1871
	return data_len;
}

1872 1873 1874 1875 1876
/*
 * The space between the end of the leaf items and
 * the start of the leaf data.  IOW, how much room
 * the leaf has left for both items and data
 */
1877
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1878
{
1879 1880 1881 1882 1883
	int nritems = btrfs_header_nritems(leaf);
	int ret;
	ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
	if (ret < 0) {
		printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
J
Jens Axboe 已提交
1884
		       ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1885 1886 1887
		       leaf_space_used(leaf, 0, nritems), nritems);
	}
	return ret;
1888 1889
}

C
Chris Mason 已提交
1890 1891 1892
/*
 * push some data in the path leaf to the right, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
C
Chris Mason 已提交
1893 1894 1895
 *
 * returns 1 if the push failed because the other node didn't have enough
 * room, 0 if everything worked out and < 0 if there were major errors.
C
Chris Mason 已提交
1896
 */
1897
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1898 1899
			   *root, struct btrfs_path *path, int data_size,
			   int empty)
C
Chris Mason 已提交
1900
{
1901 1902 1903 1904
	struct extent_buffer *left = path->nodes[0];
	struct extent_buffer *right;
	struct extent_buffer *upper;
	struct btrfs_disk_key disk_key;
C
Chris Mason 已提交
1905
	int slot;
1906
	u32 i;
C
Chris Mason 已提交
1907 1908 1909
	int free_space;
	int push_space = 0;
	int push_items = 0;
C
Chris Mason 已提交
1910
	struct btrfs_item *item;
1911
	u32 left_nritems;
1912
	u32 nr;
1913
	u32 right_nritems;
1914
	u32 data_end;
1915
	u32 this_item_size;
1916
	int ret;
C
Chris Mason 已提交
1917 1918 1919 1920 1921 1922

	slot = path->slots[1];
	if (!path->nodes[1]) {
		return 1;
	}
	upper = path->nodes[1];
1923
	if (slot >= btrfs_header_nritems(upper) - 1)
C
Chris Mason 已提交
1924
		return 1;
1925

1926 1927
	WARN_ON(!btrfs_tree_locked(path->nodes[1]));

1928
	right = read_node_slot(root, upper, slot + 1);
1929
	btrfs_tree_lock(right);
C
Chris Mason 已提交
1930
	free_space = btrfs_leaf_free_space(root, right);
1931 1932
	if (free_space < data_size + sizeof(struct btrfs_item))
		goto out_unlock;
1933

C
Chris Mason 已提交
1934
	/* cow and double check */
1935
	ret = btrfs_cow_block(trans, root, right, upper,
1936
			      slot + 1, &right, 0);
1937 1938 1939
	if (ret)
		goto out_unlock;

C
Chris Mason 已提交
1940
	free_space = btrfs_leaf_free_space(root, right);
1941 1942
	if (free_space < data_size + sizeof(struct btrfs_item))
		goto out_unlock;
C
Chris Mason 已提交
1943

1944
	left_nritems = btrfs_header_nritems(left);
1945 1946
	if (left_nritems == 0)
		goto out_unlock;
1947

1948 1949 1950 1951 1952 1953 1954
	if (empty)
		nr = 0;
	else
		nr = 1;

	i = left_nritems - 1;
	while (i >= nr) {
1955
		item = btrfs_item_nr(left, i);
1956

C
Chris Mason 已提交
1957 1958
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969

		if (!left->map_token) {
			map_extent_buffer(left, (unsigned long)item,
					sizeof(struct btrfs_item),
					&left->map_token, &left->kaddr,
					&left->map_start, &left->map_len,
					KM_USER1);
		}

		this_item_size = btrfs_item_size(left, item);
		if (this_item_size + sizeof(*item) + push_space > free_space)
C
Chris Mason 已提交
1970 1971
			break;
		push_items++;
1972
		push_space += this_item_size + sizeof(*item);
1973 1974 1975
		if (i == 0)
			break;
		i--;
1976 1977 1978 1979
	}
	if (left->map_token) {
		unmap_extent_buffer(left, left->map_token, KM_USER1);
		left->map_token = NULL;
C
Chris Mason 已提交
1980
	}
1981

1982 1983
	if (push_items == 0)
		goto out_unlock;
1984

1985
	if (!empty && push_items == left_nritems)
1986
		WARN_ON(1);
1987

C
Chris Mason 已提交
1988
	/* push left to right */
1989
	right_nritems = btrfs_header_nritems(right);
1990

1991
	push_space = btrfs_item_end_nr(left, left_nritems - push_items);
C
Chris Mason 已提交
1992
	push_space -= leaf_data_end(root, left);
1993

C
Chris Mason 已提交
1994
	/* make room in the right data area */
1995 1996 1997 1998 1999 2000
	data_end = leaf_data_end(root, right);
	memmove_extent_buffer(right,
			      btrfs_leaf_data(right) + data_end - push_space,
			      btrfs_leaf_data(right) + data_end,
			      BTRFS_LEAF_DATA_SIZE(root) - data_end);

C
Chris Mason 已提交
2001
	/* copy from the left data area */
2002
	copy_extent_buffer(right, left, btrfs_leaf_data(right) +
C
Chris Mason 已提交
2003 2004 2005
		     BTRFS_LEAF_DATA_SIZE(root) - push_space,
		     btrfs_leaf_data(left) + leaf_data_end(root, left),
		     push_space);
2006 2007 2008 2009 2010

	memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
			      btrfs_item_nr_offset(0),
			      right_nritems * sizeof(struct btrfs_item));

C
Chris Mason 已提交
2011
	/* copy the items from left to right */
2012 2013 2014
	copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
		   btrfs_item_nr_offset(left_nritems - push_items),
		   push_items * sizeof(struct btrfs_item));
C
Chris Mason 已提交
2015 2016

	/* update the item pointers */
2017
	right_nritems += push_items;
2018
	btrfs_set_header_nritems(right, right_nritems);
C
Chris Mason 已提交
2019
	push_space = BTRFS_LEAF_DATA_SIZE(root);
2020
	for (i = 0; i < right_nritems; i++) {
2021
		item = btrfs_item_nr(right, i);
2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035
		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}
		push_space -= btrfs_item_size(right, item);
		btrfs_set_item_offset(right, item, push_space);
	}

	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
C
Chris Mason 已提交
2036
	}
2037
	left_nritems -= push_items;
2038
	btrfs_set_header_nritems(left, left_nritems);
C
Chris Mason 已提交
2039

2040 2041
	if (left_nritems)
		btrfs_mark_buffer_dirty(left);
2042
	btrfs_mark_buffer_dirty(right);
2043

2044 2045
	btrfs_item_key(right, &disk_key, 0);
	btrfs_set_node_key(upper, &disk_key, slot + 1);
C
Chris Mason 已提交
2046
	btrfs_mark_buffer_dirty(upper);
C
Chris Mason 已提交
2047

C
Chris Mason 已提交
2048
	/* then fixup the leaf pointer in the path */
2049 2050
	if (path->slots[0] >= left_nritems) {
		path->slots[0] -= left_nritems;
2051 2052 2053
		if (btrfs_header_nritems(path->nodes[0]) == 0)
			clean_tree_block(trans, root, path->nodes[0]);
		btrfs_tree_unlock(path->nodes[0]);
2054 2055
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = right;
C
Chris Mason 已提交
2056 2057
		path->slots[1] += 1;
	} else {
2058
		btrfs_tree_unlock(right);
2059
		free_extent_buffer(right);
C
Chris Mason 已提交
2060 2061
	}
	return 0;
2062 2063 2064 2065 2066

out_unlock:
	btrfs_tree_unlock(right);
	free_extent_buffer(right);
	return 1;
C
Chris Mason 已提交
2067
}
2068

C
Chris Mason 已提交
2069 2070 2071 2072
/*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
2073
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2074 2075
			  *root, struct btrfs_path *path, int data_size,
			  int empty)
2076
{
2077 2078 2079
	struct btrfs_disk_key disk_key;
	struct extent_buffer *right = path->nodes[0];
	struct extent_buffer *left;
2080 2081 2082 2083 2084
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
C
Chris Mason 已提交
2085
	struct btrfs_item *item;
2086
	u32 old_left_nritems;
2087
	u32 right_nritems;
2088
	u32 nr;
C
Chris Mason 已提交
2089 2090
	int ret = 0;
	int wret;
2091 2092
	u32 this_item_size;
	u32 old_left_item_size;
2093 2094

	slot = path->slots[1];
2095
	if (slot == 0)
2096
		return 1;
2097
	if (!path->nodes[1])
2098
		return 1;
2099

2100 2101 2102 2103 2104
	right_nritems = btrfs_header_nritems(right);
	if (right_nritems == 0) {
		return 1;
	}

2105 2106
	WARN_ON(!btrfs_tree_locked(path->nodes[1]));

2107
	left = read_node_slot(root, path->nodes[1], slot - 1);
2108
	btrfs_tree_lock(left);
C
Chris Mason 已提交
2109
	free_space = btrfs_leaf_free_space(root, left);
C
Chris Mason 已提交
2110
	if (free_space < data_size + sizeof(struct btrfs_item)) {
2111 2112
		ret = 1;
		goto out;
2113
	}
C
Chris Mason 已提交
2114 2115

	/* cow and double check */
2116
	ret = btrfs_cow_block(trans, root, left,
2117
			      path->nodes[1], slot - 1, &left, 0);
2118 2119
	if (ret) {
		/* we hit -ENOSPC, but it isn't fatal here */
2120 2121
		ret = 1;
		goto out;
2122
	}
2123

C
Chris Mason 已提交
2124
	free_space = btrfs_leaf_free_space(root, left);
C
Chris Mason 已提交
2125
	if (free_space < data_size + sizeof(struct btrfs_item)) {
2126 2127
		ret = 1;
		goto out;
C
Chris Mason 已提交
2128 2129
	}

2130 2131 2132 2133 2134 2135
	if (empty)
		nr = right_nritems;
	else
		nr = right_nritems - 1;

	for (i = 0; i < nr; i++) {
2136
		item = btrfs_item_nr(right, i);
2137 2138 2139 2140 2141 2142 2143 2144
		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

2145 2146
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
2147 2148 2149

		this_item_size = btrfs_item_size(right, item);
		if (this_item_size + sizeof(*item) + push_space > free_space)
2150
			break;
2151

2152
		push_items++;
2153 2154 2155 2156 2157 2158
		push_space += this_item_size + sizeof(*item);
	}

	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
2159
	}
2160

2161
	if (push_items == 0) {
2162 2163
		ret = 1;
		goto out;
2164
	}
2165
	if (!empty && push_items == btrfs_header_nritems(right))
2166
		WARN_ON(1);
2167

2168
	/* push data from right to left */
2169 2170 2171 2172 2173
	copy_extent_buffer(left, right,
			   btrfs_item_nr_offset(btrfs_header_nritems(left)),
			   btrfs_item_nr_offset(0),
			   push_items * sizeof(struct btrfs_item));

C
Chris Mason 已提交
2174
	push_space = BTRFS_LEAF_DATA_SIZE(root) -
2175 2176 2177
		     btrfs_item_offset_nr(right, push_items -1);

	copy_extent_buffer(left, right, btrfs_leaf_data(left) +
C
Chris Mason 已提交
2178 2179
		     leaf_data_end(root, left) - push_space,
		     btrfs_leaf_data(right) +
2180
		     btrfs_item_offset_nr(right, push_items - 1),
C
Chris Mason 已提交
2181
		     push_space);
2182
	old_left_nritems = btrfs_header_nritems(left);
2183 2184
	BUG_ON(old_left_nritems < 0);

2185
	old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
C
Chris Mason 已提交
2186
	for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2187
		u32 ioff;
2188

2189
		item = btrfs_item_nr(left, i);
2190 2191 2192 2193 2194 2195 2196 2197
		if (!left->map_token) {
			map_extent_buffer(left, (unsigned long)item,
					sizeof(struct btrfs_item),
					&left->map_token, &left->kaddr,
					&left->map_start, &left->map_len,
					KM_USER1);
		}

2198 2199
		ioff = btrfs_item_offset(left, item);
		btrfs_set_item_offset(left, item,
2200
		      ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
2201
	}
2202
	btrfs_set_header_nritems(left, old_left_nritems + push_items);
2203 2204 2205 2206
	if (left->map_token) {
		unmap_extent_buffer(left, left->map_token, KM_USER1);
		left->map_token = NULL;
	}
2207 2208

	/* fixup right node */
2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222
	if (push_items > right_nritems) {
		printk("push items %d nr %u\n", push_items, right_nritems);
		WARN_ON(1);
	}

	if (push_items < right_nritems) {
		push_space = btrfs_item_offset_nr(right, push_items - 1) -
						  leaf_data_end(root, right);
		memmove_extent_buffer(right, btrfs_leaf_data(right) +
				      BTRFS_LEAF_DATA_SIZE(root) - push_space,
				      btrfs_leaf_data(right) +
				      leaf_data_end(root, right), push_space);

		memmove_extent_buffer(right, btrfs_item_nr_offset(0),
2223 2224 2225
			      btrfs_item_nr_offset(push_items),
			     (btrfs_header_nritems(right) - push_items) *
			     sizeof(struct btrfs_item));
2226
	}
2227 2228
	right_nritems -= push_items;
	btrfs_set_header_nritems(right, right_nritems);
C
Chris Mason 已提交
2229
	push_space = BTRFS_LEAF_DATA_SIZE(root);
2230 2231
	for (i = 0; i < right_nritems; i++) {
		item = btrfs_item_nr(right, i);
2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246

		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

		push_space = push_space - btrfs_item_size(right, item);
		btrfs_set_item_offset(right, item, push_space);
	}
	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
2247
	}
2248

2249
	btrfs_mark_buffer_dirty(left);
2250 2251
	if (right_nritems)
		btrfs_mark_buffer_dirty(right);
2252

2253 2254
	btrfs_item_key(right, &disk_key, 0);
	wret = fixup_low_keys(trans, root, path, &disk_key, 1);
C
Chris Mason 已提交
2255 2256
	if (wret)
		ret = wret;
2257 2258 2259 2260

	/* then fixup the leaf pointer in the path */
	if (path->slots[0] < push_items) {
		path->slots[0] += old_left_nritems;
2261 2262 2263
		if (btrfs_header_nritems(path->nodes[0]) == 0)
			clean_tree_block(trans, root, path->nodes[0]);
		btrfs_tree_unlock(path->nodes[0]);
2264 2265
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = left;
2266 2267
		path->slots[1] -= 1;
	} else {
2268
		btrfs_tree_unlock(left);
2269
		free_extent_buffer(left);
2270 2271
		path->slots[0] -= push_items;
	}
2272
	BUG_ON(path->slots[0] < 0);
C
Chris Mason 已提交
2273
	return ret;
2274 2275 2276 2277
out:
	btrfs_tree_unlock(left);
	free_extent_buffer(left);
	return ret;
2278 2279
}

C
Chris Mason 已提交
2280 2281 2282
/*
 * split the path's leaf in two, making sure there is at least data_size
 * available for the resulting leaf level of the path.
C
Chris Mason 已提交
2283 2284
 *
 * returns 0 if all went well and < 0 on failure.
C
Chris Mason 已提交
2285
 */
2286
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
2287
		      *root, struct btrfs_key *ins_key,
2288
		      struct btrfs_path *path, int data_size, int extend)
2289
{
2290
	u64 root_gen;
2291
	struct extent_buffer *l;
2292
	u32 nritems;
2293 2294
	int mid;
	int slot;
2295
	struct extent_buffer *right;
C
Chris Mason 已提交
2296
	int space_needed = data_size + sizeof(struct btrfs_item);
2297 2298 2299
	int data_copy_size;
	int rt_data_off;
	int i;
2300
	int ret = 0;
C
Chris Mason 已提交
2301
	int wret;
2302 2303
	int double_split;
	int num_doubles = 0;
2304
	struct btrfs_disk_key disk_key;
C
Chris Mason 已提交
2305

2306 2307 2308
	if (extend)
		space_needed = data_size;

2309 2310 2311 2312 2313
	if (root->ref_cows)
		root_gen = trans->transid;
	else
		root_gen = 0;

C
Chris Mason 已提交
2314
	/* first try to make some room by pushing left and right */
2315
	if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
2316
		wret = push_leaf_right(trans, root, path, data_size, 0);
2317
		if (wret < 0) {
C
Chris Mason 已提交
2318
			return wret;
2319 2320
		}
		if (wret) {
2321
			wret = push_leaf_left(trans, root, path, data_size, 0);
2322 2323 2324 2325
			if (wret < 0)
				return wret;
		}
		l = path->nodes[0];
C
Chris Mason 已提交
2326

2327
		/* did the pushes work? */
2328
		if (btrfs_leaf_free_space(root, l) >= space_needed)
2329
			return 0;
2330
	}
C
Chris Mason 已提交
2331

C
Chris Mason 已提交
2332
	if (!path->nodes[1]) {
2333
		ret = insert_new_root(trans, root, path, 1);
C
Chris Mason 已提交
2334 2335 2336
		if (ret)
			return ret;
	}
2337 2338 2339
again:
	double_split = 0;
	l = path->nodes[0];
2340
	slot = path->slots[0];
2341
	nritems = btrfs_header_nritems(l);
2342
	mid = (nritems + 1)/ 2;
2343

2344 2345
	btrfs_item_key(l, &disk_key, 0);

2346
	right = btrfs_alloc_free_block(trans, root, root->leafsize,
2347 2348 2349
					 root->root_key.objectid,
					 root_gen, disk_key.objectid, 0,
					 l->start, 0);
2350 2351
	if (IS_ERR(right)) {
		BUG_ON(1);
2352
		return PTR_ERR(right);
2353
	}
2354 2355

	memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2356
	btrfs_set_header_bytenr(right, right->start);
2357 2358 2359 2360 2361 2362
	btrfs_set_header_generation(right, trans->transid);
	btrfs_set_header_owner(right, root->root_key.objectid);
	btrfs_set_header_level(right, 0);
	write_extent_buffer(right, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(right),
			    BTRFS_FSID_SIZE);
2363 2364 2365 2366

	write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
			    (unsigned long)btrfs_header_chunk_tree_uuid(right),
			    BTRFS_UUID_SIZE);
2367 2368 2369 2370 2371 2372
	if (mid <= slot) {
		if (nritems == 1 ||
		    leaf_space_used(l, mid, nritems - mid) + space_needed >
			BTRFS_LEAF_DATA_SIZE(root)) {
			if (slot >= nritems) {
				btrfs_cpu_key_to_disk(&disk_key, ins_key);
2373
				btrfs_set_header_nritems(right, 0);
2374
				wret = insert_ptr(trans, root, path,
2375
						  &disk_key, right->start,
2376 2377 2378
						  path->slots[1] + 1, 1);
				if (wret)
					ret = wret;
2379 2380

				btrfs_tree_unlock(path->nodes[0]);
2381 2382
				free_extent_buffer(path->nodes[0]);
				path->nodes[0] = right;
2383 2384
				path->slots[0] = 0;
				path->slots[1] += 1;
2385
				btrfs_mark_buffer_dirty(right);
2386 2387 2388
				return ret;
			}
			mid = slot;
2389 2390 2391 2392 2393
			if (mid != nritems &&
			    leaf_space_used(l, mid, nritems - mid) +
			    space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
				double_split = 1;
			}
2394 2395 2396 2397
		}
	} else {
		if (leaf_space_used(l, 0, mid + 1) + space_needed >
			BTRFS_LEAF_DATA_SIZE(root)) {
2398
			if (!extend && slot == 0) {
2399
				btrfs_cpu_key_to_disk(&disk_key, ins_key);
2400
				btrfs_set_header_nritems(right, 0);
2401 2402
				wret = insert_ptr(trans, root, path,
						  &disk_key,
2403
						  right->start,
2404
						  path->slots[1], 1);
2405 2406
				if (wret)
					ret = wret;
2407
				btrfs_tree_unlock(path->nodes[0]);
2408 2409
				free_extent_buffer(path->nodes[0]);
				path->nodes[0] = right;
2410
				path->slots[0] = 0;
2411 2412 2413 2414 2415 2416
				if (path->slots[1] == 0) {
					wret = fixup_low_keys(trans, root,
					           path, &disk_key, 1);
					if (wret)
						ret = wret;
				}
2417
				btrfs_mark_buffer_dirty(right);
2418
				return ret;
2419 2420 2421 2422 2423 2424 2425 2426 2427
			} else if (extend && slot == 0) {
				mid = 1;
			} else {
				mid = slot;
				if (mid != nritems &&
				    leaf_space_used(l, mid, nritems - mid) +
				    space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
					double_split = 1;
				}
2428
			}
2429 2430
		}
	}
2431 2432 2433 2434 2435 2436 2437 2438 2439
	nritems = nritems - mid;
	btrfs_set_header_nritems(right, nritems);
	data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);

	copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
			   btrfs_item_nr_offset(mid),
			   nritems * sizeof(struct btrfs_item));

	copy_extent_buffer(right, l,
C
Chris Mason 已提交
2440 2441 2442
		     btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
		     data_copy_size, btrfs_leaf_data(l) +
		     leaf_data_end(root, l), data_copy_size);
2443

C
Chris Mason 已提交
2444
	rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2445
		      btrfs_item_end_nr(l, mid);
C
Chris Mason 已提交
2446

2447 2448
	for (i = 0; i < nritems; i++) {
		struct btrfs_item *item = btrfs_item_nr(right, i);
2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459
		u32 ioff;

		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

		ioff = btrfs_item_offset(right, item);
2460
		btrfs_set_item_offset(right, item, ioff + rt_data_off);
C
Chris Mason 已提交
2461
	}
C
Chris Mason 已提交
2462

2463 2464 2465 2466 2467
	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
	}

2468
	btrfs_set_header_nritems(l, mid);
C
Chris Mason 已提交
2469
	ret = 0;
2470
	btrfs_item_key(right, &disk_key, 0);
2471 2472
	wret = insert_ptr(trans, root, path, &disk_key, right->start,
			  path->slots[1] + 1, 1);
C
Chris Mason 已提交
2473 2474
	if (wret)
		ret = wret;
2475 2476 2477

	btrfs_mark_buffer_dirty(right);
	btrfs_mark_buffer_dirty(l);
2478
	BUG_ON(path->slots[0] != slot);
2479

2480
	if (mid <= slot) {
2481
		btrfs_tree_unlock(path->nodes[0]);
2482 2483
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = right;
2484 2485
		path->slots[0] -= mid;
		path->slots[1] += 1;
2486 2487
	} else {
		btrfs_tree_unlock(right);
2488
		free_extent_buffer(right);
2489
	}
2490

2491
	BUG_ON(path->slots[0] < 0);
2492

2493 2494 2495 2496
	if (double_split) {
		BUG_ON(num_doubles != 0);
		num_doubles++;
		goto again;
2497
	}
2498 2499 2500
	return ret;
}

C
Chris Mason 已提交
2501 2502 2503
int btrfs_truncate_item(struct btrfs_trans_handle *trans,
			struct btrfs_root *root,
			struct btrfs_path *path,
2504
			u32 new_size, int from_end)
C
Chris Mason 已提交
2505 2506 2507 2508
{
	int ret = 0;
	int slot;
	int slot_orig;
2509 2510
	struct extent_buffer *leaf;
	struct btrfs_item *item;
C
Chris Mason 已提交
2511 2512 2513 2514 2515 2516 2517 2518
	u32 nritems;
	unsigned int data_end;
	unsigned int old_data_start;
	unsigned int old_size;
	unsigned int size_diff;
	int i;

	slot_orig = path->slots[0];
2519
	leaf = path->nodes[0];
2520 2521 2522 2523 2524
	slot = path->slots[0];

	old_size = btrfs_item_size_nr(leaf, slot);
	if (old_size == new_size)
		return 0;
C
Chris Mason 已提交
2525

2526
	nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
2527 2528
	data_end = leaf_data_end(root, leaf);

2529
	old_data_start = btrfs_item_offset_nr(leaf, slot);
2530

C
Chris Mason 已提交
2531 2532 2533 2534 2535 2536 2537 2538 2539 2540
	size_diff = old_size - new_size;

	BUG_ON(slot < 0);
	BUG_ON(slot >= nritems);

	/*
	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
	 */
	/* first correct the data pointers */
	for (i = slot; i < nritems; i++) {
2541 2542
		u32 ioff;
		item = btrfs_item_nr(leaf, i);
2543 2544 2545 2546 2547 2548 2549 2550 2551

		if (!leaf->map_token) {
			map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
		}

2552 2553
		ioff = btrfs_item_offset(leaf, item);
		btrfs_set_item_offset(leaf, item, ioff + size_diff);
C
Chris Mason 已提交
2554
	}
2555 2556 2557 2558 2559 2560

	if (leaf->map_token) {
		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
		leaf->map_token = NULL;
	}

C
Chris Mason 已提交
2561
	/* shift the data */
2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600
	if (from_end) {
		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
			      data_end + size_diff, btrfs_leaf_data(leaf) +
			      data_end, old_data_start + new_size - data_end);
	} else {
		struct btrfs_disk_key disk_key;
		u64 offset;

		btrfs_item_key(leaf, &disk_key, slot);

		if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
			unsigned long ptr;
			struct btrfs_file_extent_item *fi;

			fi = btrfs_item_ptr(leaf, slot,
					    struct btrfs_file_extent_item);
			fi = (struct btrfs_file_extent_item *)(
			     (unsigned long)fi - size_diff);

			if (btrfs_file_extent_type(leaf, fi) ==
			    BTRFS_FILE_EXTENT_INLINE) {
				ptr = btrfs_item_ptr_offset(leaf, slot);
				memmove_extent_buffer(leaf, ptr,
				        (unsigned long)fi,
				        offsetof(struct btrfs_file_extent_item,
						 disk_bytenr));
			}
		}

		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
			      data_end + size_diff, btrfs_leaf_data(leaf) +
			      data_end, old_data_start - data_end);

		offset = btrfs_disk_key_offset(&disk_key);
		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
		btrfs_set_item_key(leaf, &disk_key, slot);
		if (slot == 0)
			fixup_low_keys(trans, root, path, &disk_key, 1);
	}
2601 2602 2603 2604

	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_size(leaf, item, new_size);
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
2605 2606

	ret = 0;
2607 2608
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
C
Chris Mason 已提交
2609
		BUG();
2610
	}
C
Chris Mason 已提交
2611 2612 2613
	return ret;
}

2614 2615 2616
int btrfs_extend_item(struct btrfs_trans_handle *trans,
		      struct btrfs_root *root, struct btrfs_path *path,
		      u32 data_size)
2617 2618 2619 2620
{
	int ret = 0;
	int slot;
	int slot_orig;
2621 2622
	struct extent_buffer *leaf;
	struct btrfs_item *item;
2623 2624 2625 2626 2627 2628 2629
	u32 nritems;
	unsigned int data_end;
	unsigned int old_data;
	unsigned int old_size;
	int i;

	slot_orig = path->slots[0];
2630
	leaf = path->nodes[0];
2631

2632
	nritems = btrfs_header_nritems(leaf);
2633 2634
	data_end = leaf_data_end(root, leaf);

2635 2636
	if (btrfs_leaf_free_space(root, leaf) < data_size) {
		btrfs_print_leaf(root, leaf);
2637
		BUG();
2638
	}
2639
	slot = path->slots[0];
2640
	old_data = btrfs_item_end_nr(leaf, slot);
2641 2642

	BUG_ON(slot < 0);
2643 2644 2645 2646 2647
	if (slot >= nritems) {
		btrfs_print_leaf(root, leaf);
		printk("slot %d too large, nritems %d\n", slot, nritems);
		BUG_ON(1);
	}
2648 2649 2650 2651 2652 2653

	/*
	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
	 */
	/* first correct the data pointers */
	for (i = slot; i < nritems; i++) {
2654 2655
		u32 ioff;
		item = btrfs_item_nr(leaf, i);
2656 2657 2658 2659 2660 2661 2662 2663

		if (!leaf->map_token) {
			map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
		}
2664 2665
		ioff = btrfs_item_offset(leaf, item);
		btrfs_set_item_offset(leaf, item, ioff - data_size);
2666
	}
2667

2668 2669 2670 2671 2672
	if (leaf->map_token) {
		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
		leaf->map_token = NULL;
	}

2673
	/* shift the data */
2674
	memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2675 2676
		      data_end - data_size, btrfs_leaf_data(leaf) +
		      data_end, old_data - data_end);
2677

2678
	data_end = old_data;
2679 2680 2681 2682
	old_size = btrfs_item_size_nr(leaf, slot);
	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_size(leaf, item, old_size + data_size);
	btrfs_mark_buffer_dirty(leaf);
2683 2684

	ret = 0;
2685 2686
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
2687
		BUG();
2688
	}
2689 2690 2691
	return ret;
}

C
Chris Mason 已提交
2692 2693 2694 2695
/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
2696
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2697 2698
			    struct btrfs_root *root,
			    struct btrfs_path *path,
2699 2700
			    struct btrfs_key *cpu_key, u32 *data_size,
			    int nr)
2701
{
2702 2703
	struct extent_buffer *leaf;
	struct btrfs_item *item;
C
Chris Mason 已提交
2704
	int ret = 0;
2705
	int slot;
2706
	int slot_orig;
2707
	int i;
2708
	u32 nritems;
2709 2710
	u32 total_size = 0;
	u32 total_data = 0;
2711
	unsigned int data_end;
C
Chris Mason 已提交
2712 2713
	struct btrfs_disk_key disk_key;

2714 2715 2716
	for (i = 0; i < nr; i++) {
		total_data += data_size[i];
	}
2717

2718
	total_size = total_data + (nr * sizeof(struct btrfs_item));
2719
	ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2720
	if (ret == 0) {
2721
		return -EEXIST;
C
Chris Mason 已提交
2722
	}
2723 2724
	if (ret < 0)
		goto out;
2725

2726
	slot_orig = path->slots[0];
2727
	leaf = path->nodes[0];
C
Chris Mason 已提交
2728

2729
	nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
2730
	data_end = leaf_data_end(root, leaf);
2731

C
Chris Mason 已提交
2732
	if (btrfs_leaf_free_space(root, leaf) <
2733
	    sizeof(struct btrfs_item) + total_size) {
2734 2735
		btrfs_print_leaf(root, leaf);
		printk("not enough freespace need %u have %d\n",
2736
		       total_size, btrfs_leaf_free_space(root, leaf));
2737
		BUG();
2738
	}
2739

2740
	slot = path->slots[0];
2741
	BUG_ON(slot < 0);
2742

2743 2744
	if (slot != nritems) {
		int i;
2745
		unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2746

2747 2748 2749 2750 2751 2752
		if (old_data < data_end) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d old_data %d data_end %d\n",
			       slot, old_data, data_end);
			BUG_ON(1);
		}
2753 2754 2755 2756
		/*
		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
		 */
		/* first correct the data pointers */
2757
		WARN_ON(leaf->map_token);
C
Chris Mason 已提交
2758
		for (i = slot; i < nritems; i++) {
2759
			u32 ioff;
2760

2761
			item = btrfs_item_nr(leaf, i);
2762 2763 2764 2765 2766 2767 2768 2769
			if (!leaf->map_token) {
				map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
			}

2770
			ioff = btrfs_item_offset(leaf, item);
2771
			btrfs_set_item_offset(leaf, item, ioff - total_data);
C
Chris Mason 已提交
2772
		}
2773 2774 2775 2776
		if (leaf->map_token) {
			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
			leaf->map_token = NULL;
		}
2777 2778

		/* shift the items */
2779
		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2780
			      btrfs_item_nr_offset(slot),
C
Chris Mason 已提交
2781
			      (nritems - slot) * sizeof(struct btrfs_item));
2782 2783

		/* shift the data */
2784
		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2785
			      data_end - total_data, btrfs_leaf_data(leaf) +
C
Chris Mason 已提交
2786
			      data_end, old_data - data_end);
2787 2788
		data_end = old_data;
	}
2789

2790
	/* setup the item for the new data */
2791 2792 2793 2794 2795 2796 2797 2798 2799
	for (i = 0; i < nr; i++) {
		btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
		btrfs_set_item_key(leaf, &disk_key, slot + i);
		item = btrfs_item_nr(leaf, slot + i);
		btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
		data_end -= data_size[i];
		btrfs_set_item_size(leaf, item, data_size[i]);
	}
	btrfs_set_header_nritems(leaf, nritems + nr);
2800
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
2801 2802

	ret = 0;
2803 2804
	if (slot == 0) {
		btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2805
		ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2806
	}
C
Chris Mason 已提交
2807

2808 2809
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
2810
		BUG();
2811
	}
2812
out:
2813 2814 2815 2816 2817 2818 2819
	return ret;
}

/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
2820 2821 2822
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_key *cpu_key, void *data, u32
		      data_size)
2823 2824
{
	int ret = 0;
C
Chris Mason 已提交
2825
	struct btrfs_path *path;
2826 2827
	struct extent_buffer *leaf;
	unsigned long ptr;
2828

C
Chris Mason 已提交
2829 2830 2831
	path = btrfs_alloc_path();
	BUG_ON(!path);
	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2832
	if (!ret) {
2833 2834 2835 2836
		leaf = path->nodes[0];
		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
		write_extent_buffer(leaf, data, ptr, data_size);
		btrfs_mark_buffer_dirty(leaf);
2837
	}
C
Chris Mason 已提交
2838
	btrfs_free_path(path);
C
Chris Mason 已提交
2839
	return ret;
2840 2841
}

C
Chris Mason 已提交
2842
/*
C
Chris Mason 已提交
2843
 * delete the pointer from a given node.
C
Chris Mason 已提交
2844 2845 2846 2847 2848
 *
 * If the delete empties a node, the node is removed from the tree,
 * continuing all the way the root if required.  The root is converted into
 * a leaf if all the nodes are emptied.
 */
2849 2850
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path, int level, int slot)
2851
{
2852
	struct extent_buffer *parent = path->nodes[level];
2853
	u32 nritems;
C
Chris Mason 已提交
2854
	int ret = 0;
2855
	int wret;
2856

2857
	nritems = btrfs_header_nritems(parent);
2858
	if (slot != nritems -1) {
2859 2860 2861
		memmove_extent_buffer(parent,
			      btrfs_node_key_ptr_offset(slot),
			      btrfs_node_key_ptr_offset(slot + 1),
C
Chris Mason 已提交
2862 2863
			      sizeof(struct btrfs_key_ptr) *
			      (nritems - slot - 1));
2864
	}
2865
	nritems--;
2866
	btrfs_set_header_nritems(parent, nritems);
2867
	if (nritems == 0 && parent == root->node) {
2868
		BUG_ON(btrfs_header_level(root->node) != 1);
2869
		/* just turn the root into a leaf and break */
2870
		btrfs_set_header_level(root->node, 0);
2871
	} else if (slot == 0) {
2872 2873 2874 2875
		struct btrfs_disk_key disk_key;

		btrfs_node_key(parent, &disk_key, 0);
		wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
C
Chris Mason 已提交
2876 2877
		if (wret)
			ret = wret;
2878
	}
C
Chris Mason 已提交
2879
	btrfs_mark_buffer_dirty(parent);
C
Chris Mason 已提交
2880
	return ret;
2881 2882
}

C
Chris Mason 已提交
2883 2884 2885 2886
/*
 * delete the item at the leaf level in path.  If that empties
 * the leaf, remove it from the tree
 */
2887 2888
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		    struct btrfs_path *path, int slot, int nr)
2889
{
2890 2891
	struct extent_buffer *leaf;
	struct btrfs_item *item;
2892 2893
	int last_off;
	int dsize = 0;
C
Chris Mason 已提交
2894 2895
	int ret = 0;
	int wret;
2896
	int i;
2897
	u32 nritems;
2898

2899
	leaf = path->nodes[0];
2900 2901 2902 2903 2904
	last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);

	for (i = 0; i < nr; i++)
		dsize += btrfs_item_size_nr(leaf, slot + i);

2905
	nritems = btrfs_header_nritems(leaf);
2906

2907
	if (slot + nr != nritems) {
2908
		int i;
C
Chris Mason 已提交
2909
		int data_end = leaf_data_end(root, leaf);
2910 2911

		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
C
Chris Mason 已提交
2912 2913
			      data_end + dsize,
			      btrfs_leaf_data(leaf) + data_end,
2914
			      last_off - data_end);
2915

2916
		for (i = slot + nr; i < nritems; i++) {
2917
			u32 ioff;
2918

2919
			item = btrfs_item_nr(leaf, i);
2920 2921 2922 2923 2924 2925 2926
			if (!leaf->map_token) {
				map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
			}
2927 2928
			ioff = btrfs_item_offset(leaf, item);
			btrfs_set_item_offset(leaf, item, ioff + dsize);
C
Chris Mason 已提交
2929
		}
2930 2931 2932 2933 2934 2935

		if (leaf->map_token) {
			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
			leaf->map_token = NULL;
		}

2936
		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2937
			      btrfs_item_nr_offset(slot + nr),
C
Chris Mason 已提交
2938
			      sizeof(struct btrfs_item) *
2939
			      (nritems - slot - nr));
2940
	}
2941 2942
	btrfs_set_header_nritems(leaf, nritems - nr);
	nritems -= nr;
2943

C
Chris Mason 已提交
2944
	/* delete the leaf if we've emptied it */
2945
	if (nritems == 0) {
2946 2947
		if (leaf == root->node) {
			btrfs_set_header_level(leaf, 0);
2948
		} else {
2949
			u64 root_gen = btrfs_header_generation(path->nodes[1]);
2950
			wret = del_ptr(trans, root, path, 1, path->slots[1]);
C
Chris Mason 已提交
2951 2952
			if (wret)
				ret = wret;
2953
			wret = btrfs_free_extent(trans, root,
2954 2955 2956
					 leaf->start, leaf->len,
					 btrfs_header_owner(path->nodes[1]),
					 root_gen, 0, 0, 1);
C
Chris Mason 已提交
2957 2958
			if (wret)
				ret = wret;
2959
		}
2960
	} else {
2961
		int used = leaf_space_used(leaf, 0, nritems);
C
Chris Mason 已提交
2962
		if (slot == 0) {
2963 2964 2965
			struct btrfs_disk_key disk_key;

			btrfs_item_key(leaf, &disk_key, 0);
2966
			wret = fixup_low_keys(trans, root, path,
2967
					      &disk_key, 1);
C
Chris Mason 已提交
2968 2969 2970 2971
			if (wret)
				ret = wret;
		}

C
Chris Mason 已提交
2972
		/* delete the leaf if it is mostly empty */
2973
		if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2974 2975 2976 2977
			/* push_leaf_left fixes the path.
			 * make sure the path still points to our leaf
			 * for possible call to del_ptr below
			 */
2978
			slot = path->slots[1];
2979 2980
			extent_buffer_get(leaf);

2981
			wret = push_leaf_left(trans, root, path, 1, 1);
2982
			if (wret < 0 && wret != -ENOSPC)
C
Chris Mason 已提交
2983
				ret = wret;
2984 2985 2986

			if (path->nodes[0] == leaf &&
			    btrfs_header_nritems(leaf)) {
2987
				wret = push_leaf_right(trans, root, path, 1, 1);
2988
				if (wret < 0 && wret != -ENOSPC)
C
Chris Mason 已提交
2989 2990
					ret = wret;
			}
2991 2992

			if (btrfs_header_nritems(leaf) == 0) {
2993
				u64 root_gen;
2994 2995
				u64 bytenr = leaf->start;
				u32 blocksize = leaf->len;
2996

2997 2998 2999
				root_gen = btrfs_header_generation(
							   path->nodes[1]);

3000
				wret = del_ptr(trans, root, path, 1, slot);
C
Chris Mason 已提交
3001 3002
				if (wret)
					ret = wret;
3003 3004

				free_extent_buffer(leaf);
3005
				wret = btrfs_free_extent(trans, root, bytenr,
3006 3007 3008
					     blocksize,
					     btrfs_header_owner(path->nodes[1]),
					     root_gen, 0, 0, 1);
C
Chris Mason 已提交
3009 3010
				if (wret)
					ret = wret;
C
Chris Mason 已提交
3011
			} else {
3012 3013 3014 3015 3016 3017 3018
				/* if we're still in the path, make sure
				 * we're dirty.  Otherwise, one of the
				 * push_leaf functions must have already
				 * dirtied this buffer
				 */
				if (path->nodes[0] == leaf)
					btrfs_mark_buffer_dirty(leaf);
3019
				free_extent_buffer(leaf);
3020
			}
3021
		} else {
3022
			btrfs_mark_buffer_dirty(leaf);
3023 3024
		}
	}
C
Chris Mason 已提交
3025
	return ret;
3026 3027
}

3028
/*
3029
 * search the tree again to find a leaf with lesser keys
3030 3031 3032 3033 3034
 * returns 0 if it found something or 1 if there are no lesser leaves.
 * returns < 0 on io errors.
 */
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
3035 3036 3037
	struct btrfs_key key;
	struct btrfs_disk_key found_key;
	int ret;
3038

3039
	btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
3040

3041 3042 3043 3044 3045 3046 3047 3048
	if (key.offset > 0)
		key.offset--;
	else if (key.type > 0)
		key.type--;
	else if (key.objectid > 0)
		key.objectid--;
	else
		return 1;
3049

3050 3051 3052 3053 3054 3055 3056 3057 3058
	btrfs_release_path(root, path);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		return ret;
	btrfs_item_key(path->nodes[0], &found_key, 0);
	ret = comp_keys(&found_key, &key);
	if (ret < 0)
		return 0;
	return 1;
3059 3060
}

3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087
/*
 * A helper function to walk down the tree starting at min_key, and looking
 * for nodes or leaves that are either in cache or have a minimum
 * transaction id.  This is used by the btree defrag code, but could
 * also be used to search for blocks that have changed since a given
 * transaction id.
 *
 * This does not cow, but it does stuff the starting key it finds back
 * into min_key, so you can call btrfs_search_slot with cow=1 on the
 * key and get a writable path.
 *
 * This does lock as it descends, and path->keep_locks should be set
 * to 1 by the caller.
 *
 * This honors path->lowest_level to prevent descent past a given level
 * of the tree.
 *
 * returns zero if something useful was found, < 0 on error and 1 if there
 * was nothing in the tree that matched the search criteria.
 */
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
			 struct btrfs_path *path, int cache_only,
			 u64 min_trans)
{
	struct extent_buffer *cur;
	struct btrfs_key found_key;
	int slot;
3088
	int sret;
3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105
	u32 nritems;
	int level;
	int ret = 1;

again:
	cur = btrfs_lock_root_node(root);
	level = btrfs_header_level(cur);
	path->nodes[level] = cur;
	path->locks[level] = 1;

	if (btrfs_header_generation(cur) < min_trans) {
		ret = 1;
		goto out;
	}
	while(1) {
		nritems = btrfs_header_nritems(cur);
		level = btrfs_header_level(cur);
3106
		sret = bin_search(cur, min_key, level, &slot);
3107 3108 3109 3110 3111 3112 3113 3114

		/* at level = 0, we're done, setup the path and exit */
		if (level == 0) {
			ret = 0;
			path->slots[level] = slot;
			btrfs_item_key_to_cpu(cur, &found_key, slot);
			goto out;
		}
3115 3116
		if (sret && slot > 0)
			slot--;
3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192
		/*
		 * check this node pointer against the cache_only and
		 * min_trans parameters.  If it isn't in cache or is too
		 * old, skip to the next one.
		 */
		while(slot < nritems) {
			u64 blockptr;
			u64 gen;
			struct extent_buffer *tmp;
			blockptr = btrfs_node_blockptr(cur, slot);
			gen = btrfs_node_ptr_generation(cur, slot);
			if (gen < min_trans) {
				slot++;
				continue;
			}
			if (!cache_only)
				break;

			tmp = btrfs_find_tree_block(root, blockptr,
					    btrfs_level_size(root, level - 1));

			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
				free_extent_buffer(tmp);
				break;
			}
			if (tmp)
				free_extent_buffer(tmp);
			slot++;
		}
		/*
		 * we didn't find a candidate key in this node, walk forward
		 * and find another one
		 */
		if (slot >= nritems) {
			ret = btrfs_find_next_key(root, path, min_key, level,
						  cache_only, min_trans);
			if (ret == 0) {
				btrfs_release_path(root, path);
				goto again;
			} else {
				goto out;
			}
		}
		/* save our key for returning back */
		btrfs_node_key_to_cpu(cur, &found_key, slot);
		path->slots[level] = slot;
		if (level == path->lowest_level) {
			ret = 0;
			unlock_up(path, level, 1);
			goto out;
		}
		cur = read_node_slot(root, cur, slot);

		btrfs_tree_lock(cur);
		path->locks[level - 1] = 1;
		path->nodes[level - 1] = cur;
		unlock_up(path, level, 1);
	}
out:
	if (ret == 0)
		memcpy(min_key, &found_key, sizeof(found_key));
	return ret;
}

/*
 * this is similar to btrfs_next_leaf, but does not try to preserve
 * and fixup the path.  It looks for and returns the next key in the
 * tree based on the current path and the cache_only and min_trans
 * parameters.
 *
 * 0 is returned if another key is found, < 0 if there are any errors
 * and 1 is returned if there are no higher keys in the tree
 *
 * path->keep_locks should be set to 1 on the search made before
 * calling this function.
 */
3193
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
3194 3195
			struct btrfs_key *key, int lowest_level,
			int cache_only, u64 min_trans)
3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206
{
	int level = lowest_level;
	int slot;
	struct extent_buffer *c;

	while(level < BTRFS_MAX_LEVEL) {
		if (!path->nodes[level])
			return 1;

		slot = path->slots[level] + 1;
		c = path->nodes[level];
3207
next:
3208 3209 3210 3211 3212 3213 3214 3215 3216
		if (slot >= btrfs_header_nritems(c)) {
			level++;
			if (level == BTRFS_MAX_LEVEL) {
				return 1;
			}
			continue;
		}
		if (level == 0)
			btrfs_item_key_to_cpu(c, key, slot);
3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236
		else {
			u64 blockptr = btrfs_node_blockptr(c, slot);
			u64 gen = btrfs_node_ptr_generation(c, slot);

			if (cache_only) {
				struct extent_buffer *cur;
				cur = btrfs_find_tree_block(root, blockptr,
					    btrfs_level_size(root, level - 1));
				if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
					slot++;
					if (cur)
						free_extent_buffer(cur);
					goto next;
				}
				free_extent_buffer(cur);
			}
			if (gen < min_trans) {
				slot++;
				goto next;
			}
3237
			btrfs_node_key_to_cpu(c, key, slot);
3238
		}
3239 3240 3241 3242 3243
		return 0;
	}
	return 1;
}

C
Chris Mason 已提交
3244
/*
3245
 * search the tree again to find a leaf with greater keys
C
Chris Mason 已提交
3246 3247
 * returns 0 if it found something or 1 if there are no greater leaves.
 * returns < 0 on io errors.
C
Chris Mason 已提交
3248
 */
C
Chris Mason 已提交
3249
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
3250 3251 3252
{
	int slot;
	int level = 1;
3253 3254
	struct extent_buffer *c;
	struct extent_buffer *next = NULL;
3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266
	struct btrfs_key key;
	u32 nritems;
	int ret;

	nritems = btrfs_header_nritems(path->nodes[0]);
	if (nritems == 0) {
		return 1;
	}

	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);

	btrfs_release_path(root, path);
3267
	path->keep_locks = 1;
3268 3269 3270 3271 3272 3273
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	path->keep_locks = 0;

	if (ret < 0)
		return ret;

3274
	nritems = btrfs_header_nritems(path->nodes[0]);
3275 3276 3277 3278 3279 3280
	/*
	 * by releasing the path above we dropped all our locks.  A balance
	 * could have added more items next to the key that used to be
	 * at the very end of the block.  So, check again here and
	 * advance the path if there are now more items available.
	 */
3281
	if (nritems > 0 && path->slots[0] < nritems - 1) {
3282
		path->slots[0]++;
3283 3284
		goto done;
	}
3285

C
Chris Mason 已提交
3286
	while(level < BTRFS_MAX_LEVEL) {
3287
		if (!path->nodes[level])
C
Chris Mason 已提交
3288
			return 1;
3289

3290 3291
		slot = path->slots[level] + 1;
		c = path->nodes[level];
3292
		if (slot >= btrfs_header_nritems(c)) {
3293
			level++;
3294
			if (level == BTRFS_MAX_LEVEL) {
3295
				return 1;
3296
			}
3297 3298
			continue;
		}
3299

3300 3301
		if (next) {
			btrfs_tree_unlock(next);
3302
			free_extent_buffer(next);
3303
		}
3304

3305 3306
		if (level == 1 && (path->locks[1] || path->skip_locking) &&
		    path->reada)
3307
			reada_for_search(root, path, level, slot, 0);
3308

3309
		next = read_node_slot(root, c, slot);
3310 3311 3312 3313
		if (!path->skip_locking) {
			WARN_ON(!btrfs_tree_locked(c));
			btrfs_tree_lock(next);
		}
3314 3315 3316 3317 3318 3319
		break;
	}
	path->slots[level] = slot;
	while(1) {
		level--;
		c = path->nodes[level];
3320 3321
		if (path->locks[level])
			btrfs_tree_unlock(c);
3322
		free_extent_buffer(c);
3323 3324
		path->nodes[level] = next;
		path->slots[level] = 0;
3325 3326
		if (!path->skip_locking)
			path->locks[level] = 1;
3327 3328
		if (!level)
			break;
3329 3330
		if (level == 1 && path->locks[1] && path->reada)
			reada_for_search(root, path, level, slot, 0);
3331
		next = read_node_slot(root, next, 0);
3332 3333 3334 3335
		if (!path->skip_locking) {
			WARN_ON(!btrfs_tree_locked(path->nodes[level]));
			btrfs_tree_lock(next);
		}
3336
	}
3337 3338
done:
	unlock_up(path, 0, 1);
3339 3340
	return 0;
}
3341

3342 3343 3344 3345 3346 3347
/*
 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
 * searching until it gets past min_objectid or finds an item of 'type'
 *
 * returns 0 if something is found, 1 if nothing was found and < 0 on error
 */
3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370
int btrfs_previous_item(struct btrfs_root *root,
			struct btrfs_path *path, u64 min_objectid,
			int type)
{
	struct btrfs_key found_key;
	struct extent_buffer *leaf;
	int ret;

	while(1) {
		if (path->slots[0] == 0) {
			ret = btrfs_prev_leaf(root, path);
			if (ret != 0)
				return ret;
		} else {
			path->slots[0]--;
		}
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
		if (found_key.type == type)
			return 0;
	}
	return 1;
}