ctree.c 70.5 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

19
#include <linux/sched.h>
20 21
#include "ctree.h"
#include "disk-io.h"
22
#include "transaction.h"
23
#include "print-tree.h"
24

25 26 27
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, int level);
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28
		      *root, struct btrfs_key *ins_key,
29
		      struct btrfs_path *path, int data_size, int extend);
30 31 32 33 34 35 36
static int push_node_left(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct extent_buffer *dst,
			  struct extent_buffer *src);
static int balance_node_right(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct extent_buffer *dst_buf,
			      struct extent_buffer *src_buf);
37 38
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path, int level, int slot);
39

C
Chris Mason 已提交
40
inline void btrfs_init_path(struct btrfs_path *p)
C
Chris Mason 已提交
41
{
C
Chris Mason 已提交
42
	memset(p, 0, sizeof(*p));
C
Chris Mason 已提交
43 44
}

C
Chris Mason 已提交
45
struct btrfs_path *btrfs_alloc_path(void)
C
Chris Mason 已提交
46
{
C
Chris Mason 已提交
47 48
	struct btrfs_path *path;
	path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49
	if (path) {
C
Chris Mason 已提交
50
		btrfs_init_path(path);
51 52
		path->reada = 1;
	}
C
Chris Mason 已提交
53
	return path;
C
Chris Mason 已提交
54 55
}

C
Chris Mason 已提交
56
void btrfs_free_path(struct btrfs_path *p)
57
{
C
Chris Mason 已提交
58 59
	btrfs_release_path(NULL, p);
	kmem_cache_free(btrfs_path_cachep, p);
60 61
}

C
Chris Mason 已提交
62
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 64
{
	int i;
C
Chris Mason 已提交
65
	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66 67
		if (!p->nodes[i])
			break;
68
		free_extent_buffer(p->nodes[i]);
69
	}
C
Chris Mason 已提交
70
	memset(p, 0, sizeof(*p));
71 72
}

73 74 75 76 77 78 79 80 81 82
int btrfs_copy_root(struct btrfs_trans_handle *trans,
		      struct btrfs_root *root,
		      struct extent_buffer *buf,
		      struct extent_buffer **cow_ret, u64 new_root_objectid)
{
	struct extent_buffer *cow;
	u32 nritems;
	int ret = 0;
	int level;
	struct btrfs_key first_key;
83
	struct btrfs_root *new_root;
84

85 86 87 88 89 90
	new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
	if (!new_root)
		return -ENOMEM;

	memcpy(new_root, root, sizeof(*new_root));
	new_root->root_key.objectid = new_root_objectid;
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105

	WARN_ON(root->ref_cows && trans->transid !=
		root->fs_info->running_transaction->transid);
	WARN_ON(root->ref_cows && trans->transid != root->last_trans);

	level = btrfs_header_level(buf);
	nritems = btrfs_header_nritems(buf);
	if (nritems) {
		if (level == 0)
			btrfs_item_key_to_cpu(buf, &first_key, 0);
		else
			btrfs_node_key_to_cpu(buf, &first_key, 0);
	} else {
		first_key.objectid = 0;
	}
106
	cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
107 108 109
				       new_root_objectid,
				       trans->transid, first_key.objectid,
				       level, buf->start, 0);
110 111
	if (IS_ERR(cow)) {
		kfree(new_root);
112
		return PTR_ERR(cow);
113
	}
114 115 116 117 118 119 120

	copy_extent_buffer(cow, buf, 0, 0, cow->len);
	btrfs_set_header_bytenr(cow, cow->start);
	btrfs_set_header_generation(cow, trans->transid);
	btrfs_set_header_owner(cow, new_root_objectid);

	WARN_ON(btrfs_header_generation(buf) > trans->transid);
121 122 123
	ret = btrfs_inc_ref(trans, new_root, buf);
	kfree(new_root);

124 125 126 127 128 129 130 131 132
	if (ret)
		return ret;

	btrfs_mark_buffer_dirty(cow);
	*cow_ret = cow;
	return 0;
}

int __btrfs_cow_block(struct btrfs_trans_handle *trans,
133 134 135 136 137
			     struct btrfs_root *root,
			     struct extent_buffer *buf,
			     struct extent_buffer *parent, int parent_slot,
			     struct extent_buffer **cow_ret,
			     u64 search_start, u64 empty_size)
C
Chris Mason 已提交
138
{
139
	u64 root_gen;
140
	struct extent_buffer *cow;
141
	u32 nritems;
142 143
	int ret = 0;
	int different_trans = 0;
144 145 146 147 148 149 150 151
	int level;
	struct btrfs_key first_key;

	if (root->ref_cows) {
		root_gen = trans->transid;
	} else {
		root_gen = 0;
	}
C
Chris Mason 已提交
152

153 154
	WARN_ON(root->ref_cows && trans->transid !=
		root->fs_info->running_transaction->transid);
155
	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
156

157 158 159 160 161 162 163 164 165 166 167 168 169
	level = btrfs_header_level(buf);
	nritems = btrfs_header_nritems(buf);
	if (nritems) {
		if (level == 0)
			btrfs_item_key_to_cpu(buf, &first_key, 0);
		else
			btrfs_node_key_to_cpu(buf, &first_key, 0);
	} else {
		first_key.objectid = 0;
	}
	cow = __btrfs_alloc_free_block(trans, root, buf->len,
				     root->root_key.objectid,
				     root_gen, first_key.objectid, level,
170
				     search_start, empty_size);
171 172
	if (IS_ERR(cow))
		return PTR_ERR(cow);
173

174
	copy_extent_buffer(cow, buf, 0, 0, cow->len);
175
	btrfs_set_header_bytenr(cow, cow->start);
176 177
	btrfs_set_header_generation(cow, trans->transid);
	btrfs_set_header_owner(cow, root->root_key.objectid);
178

179 180
	WARN_ON(btrfs_header_generation(buf) > trans->transid);
	if (btrfs_header_generation(buf) != trans->transid) {
181 182 183 184 185 186 187 188
		different_trans = 1;
		ret = btrfs_inc_ref(trans, root, buf);
		if (ret)
			return ret;
	} else {
		clean_tree_block(trans, root, buf);
	}

C
Chris Mason 已提交
189
	if (buf == root->node) {
190
		root_gen = btrfs_header_generation(buf);
C
Chris Mason 已提交
191
		root->node = cow;
192
		extent_buffer_get(cow);
C
Chris Mason 已提交
193
		if (buf != root->commit_root) {
194
			btrfs_free_extent(trans, root, buf->start,
195 196
					  buf->len, root->root_key.objectid,
					  root_gen, 0, 0, 1);
C
Chris Mason 已提交
197
		}
198
		free_extent_buffer(buf);
C
Chris Mason 已提交
199
	} else {
200
		root_gen = btrfs_header_generation(parent);
201
		btrfs_set_node_blockptr(parent, parent_slot,
202
					cow->start);
203 204 205
		WARN_ON(trans->transid == 0);
		btrfs_set_node_ptr_generation(parent, parent_slot,
					      trans->transid);
C
Chris Mason 已提交
206
		btrfs_mark_buffer_dirty(parent);
207
		WARN_ON(btrfs_header_generation(parent) != trans->transid);
208 209 210
		btrfs_free_extent(trans, root, buf->start, buf->len,
				  btrfs_header_owner(parent), root_gen,
				  0, 0, 1);
C
Chris Mason 已提交
211
	}
212
	free_extent_buffer(buf);
C
Chris Mason 已提交
213
	btrfs_mark_buffer_dirty(cow);
C
Chris Mason 已提交
214
	*cow_ret = cow;
C
Chris Mason 已提交
215 216 217
	return 0;
}

218 219 220 221
int btrfs_cow_block(struct btrfs_trans_handle *trans,
		    struct btrfs_root *root, struct extent_buffer *buf,
		    struct extent_buffer *parent, int parent_slot,
		    struct extent_buffer **cow_ret)
222 223
{
	u64 search_start;
224
	int ret;
225 226 227 228 229 230 231 232 233 234
	if (trans->transaction != root->fs_info->running_transaction) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->running_transaction->transid);
		WARN_ON(1);
	}
	if (trans->transid != root->fs_info->generation) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->generation);
		WARN_ON(1);
	}
235
	if (btrfs_header_generation(buf) == trans->transid) {
236 237 238 239
		*cow_ret = buf;
		return 0;
	}

240
	search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
241
	ret = __btrfs_cow_block(trans, root, buf, parent,
242
				 parent_slot, cow_ret, search_start, 0);
243
	return ret;
244 245
}

246
static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
247
{
248
	if (blocknr < other && other - (blocknr + blocksize) < 32768)
249
		return 1;
250
	if (blocknr > other && blocknr - (other + blocksize) < 32768)
251 252 253 254
		return 1;
	return 0;
}

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
/*
 * compare two keys in a memcmp fashion
 */
static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
{
	struct btrfs_key k1;

	btrfs_disk_key_to_cpu(&k1, disk);

	if (k1.objectid > k2->objectid)
		return 1;
	if (k1.objectid < k2->objectid)
		return -1;
	if (k1.type > k2->type)
		return 1;
	if (k1.type < k2->type)
		return -1;
	if (k1.offset > k2->offset)
		return 1;
	if (k1.offset < k2->offset)
		return -1;
	return 0;
}


280
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
281
		       struct btrfs_root *root, struct extent_buffer *parent,
282 283
		       int start_slot, int cache_only, u64 *last_ret,
		       struct btrfs_key *progress)
284
{
285 286
	struct extent_buffer *cur;
	struct extent_buffer *tmp;
287
	u64 blocknr;
288 289
	u64 search_start = *last_ret;
	u64 last_block = 0;
290 291 292 293 294
	u64 other;
	u32 parent_nritems;
	int end_slot;
	int i;
	int err = 0;
295
	int parent_level;
296 297
	int uptodate;
	u32 blocksize;
298 299
	int progress_passed = 0;
	struct btrfs_disk_key disk_key;
300

301 302 303 304
	parent_level = btrfs_header_level(parent);
	if (cache_only && parent_level != 1)
		return 0;

305 306 307 308 309 310 311 312 313 314
	if (trans->transaction != root->fs_info->running_transaction) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->running_transaction->transid);
		WARN_ON(1);
	}
	if (trans->transid != root->fs_info->generation) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->generation);
		WARN_ON(1);
	}
315

316 317
	parent_nritems = btrfs_header_nritems(parent);
	blocksize = btrfs_level_size(root, parent_level - 1);
318 319 320 321 322 323 324
	end_slot = parent_nritems;

	if (parent_nritems == 1)
		return 0;

	for (i = start_slot; i < end_slot; i++) {
		int close = 1;
325

326 327 328 329 330 331 332 333
		if (!parent->map_token) {
			map_extent_buffer(parent,
					btrfs_node_key_ptr_offset(i),
					sizeof(struct btrfs_key_ptr),
					&parent->map_token, &parent->kaddr,
					&parent->map_start, &parent->map_len,
					KM_USER1);
		}
334 335 336 337 338
		btrfs_node_key(parent, &disk_key, i);
		if (!progress_passed && comp_keys(&disk_key, progress) < 0)
			continue;

		progress_passed = 1;
339
		blocknr = btrfs_node_blockptr(parent, i);
340 341
		if (last_block == 0)
			last_block = blocknr;
342

343
		if (i > 0) {
344 345
			other = btrfs_node_blockptr(parent, i - 1);
			close = close_blocks(blocknr, other, blocksize);
346
		}
347
		if (close && i < end_slot - 2) {
348 349
			other = btrfs_node_blockptr(parent, i + 1);
			close = close_blocks(blocknr, other, blocksize);
350
		}
351 352
		if (close) {
			last_block = blocknr;
353
			continue;
354
		}
355 356 357 358 359
		if (parent->map_token) {
			unmap_extent_buffer(parent, parent->map_token,
					    KM_USER1);
			parent->map_token = NULL;
		}
360

361 362 363 364 365
		cur = btrfs_find_tree_block(root, blocknr, blocksize);
		if (cur)
			uptodate = btrfs_buffer_uptodate(cur);
		else
			uptodate = 0;
366
		if (!cur || !uptodate) {
367
			if (cache_only) {
368
				free_extent_buffer(cur);
369 370
				continue;
			}
371 372 373 374 375
			if (!cur) {
				cur = read_tree_block(root, blocknr,
							 blocksize);
			} else if (!uptodate) {
				btrfs_read_buffer(cur);
376
			}
377
		}
378
		if (search_start == 0)
379
			search_start = last_block;
380

381 382 383 384
		err = __btrfs_cow_block(trans, root, cur, parent, i,
					&tmp, search_start,
					min(16 * blocksize,
					    (end_slot - i) * blocksize));
Y
Yan 已提交
385
		if (err) {
386
			free_extent_buffer(cur);
387
			break;
Y
Yan 已提交
388
		}
389
		search_start = tmp->start;
390
		last_block = tmp->start;
391 392
		*last_ret = search_start;
		if (parent_level == 1)
393 394
			btrfs_clear_buffer_defrag(tmp);
		free_extent_buffer(tmp);
395
	}
396 397 398 399 400
	if (parent->map_token) {
		unmap_extent_buffer(parent, parent->map_token,
				    KM_USER1);
		parent->map_token = NULL;
	}
401 402 403
	return err;
}

C
Chris Mason 已提交
404 405 406 407 408
/*
 * The leaf data grows from end-to-front in the node.
 * this returns the address of the start of the last item,
 * which is the stop of the leaf data stack
 */
C
Chris Mason 已提交
409
static inline unsigned int leaf_data_end(struct btrfs_root *root,
410
					 struct extent_buffer *leaf)
411
{
412
	u32 nr = btrfs_header_nritems(leaf);
413
	if (nr == 0)
C
Chris Mason 已提交
414
		return BTRFS_LEAF_DATA_SIZE(root);
415
	return btrfs_item_offset_nr(leaf, nr - 1);
416 417
}

C
Chris Mason 已提交
418 419
static int check_node(struct btrfs_root *root, struct btrfs_path *path,
		      int level)
C
Chris Mason 已提交
420
{
421 422 423 424
	struct extent_buffer *parent = NULL;
	struct extent_buffer *node = path->nodes[level];
	struct btrfs_disk_key parent_key;
	struct btrfs_disk_key node_key;
C
Chris Mason 已提交
425
	int parent_slot;
426 427
	int slot;
	struct btrfs_key cpukey;
428
	u32 nritems = btrfs_header_nritems(node);
C
Chris Mason 已提交
429 430

	if (path->nodes[level + 1])
431
		parent = path->nodes[level + 1];
A
Aneesh 已提交
432

433
	slot = path->slots[level];
434 435
	BUG_ON(nritems == 0);
	if (parent) {
A
Aneesh 已提交
436
		parent_slot = path->slots[level + 1];
437 438 439
		btrfs_node_key(parent, &parent_key, parent_slot);
		btrfs_node_key(node, &node_key, 0);
		BUG_ON(memcmp(&parent_key, &node_key,
C
Chris Mason 已提交
440
			      sizeof(struct btrfs_disk_key)));
441
		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
442
		       btrfs_header_bytenr(node));
C
Chris Mason 已提交
443
	}
C
Chris Mason 已提交
444
	BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
445
	if (slot != 0) {
446 447 448
		btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
		btrfs_node_key(node, &node_key, slot);
		BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
449 450
	}
	if (slot < nritems - 1) {
451 452 453
		btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
		btrfs_node_key(node, &node_key, slot);
		BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
C
Chris Mason 已提交
454 455 456 457
	}
	return 0;
}

C
Chris Mason 已提交
458 459
static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
		      int level)
C
Chris Mason 已提交
460
{
461 462
	struct extent_buffer *leaf = path->nodes[level];
	struct extent_buffer *parent = NULL;
C
Chris Mason 已提交
463
	int parent_slot;
464
	struct btrfs_key cpukey;
465 466 467
	struct btrfs_disk_key parent_key;
	struct btrfs_disk_key leaf_key;
	int slot = path->slots[0];
468

469
	u32 nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
470 471

	if (path->nodes[level + 1])
472
		parent = path->nodes[level + 1];
473 474 475 476 477

	if (nritems == 0)
		return 0;

	if (parent) {
A
Aneesh 已提交
478
		parent_slot = path->slots[level + 1];
479 480
		btrfs_node_key(parent, &parent_key, parent_slot);
		btrfs_item_key(leaf, &leaf_key, 0);
481

482
		BUG_ON(memcmp(&parent_key, &leaf_key,
C
Chris Mason 已提交
483
		       sizeof(struct btrfs_disk_key)));
484
		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
485
		       btrfs_header_bytenr(leaf));
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
	}
#if 0
	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
		btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
		btrfs_item_key(leaf, &leaf_key, i);
		if (comp_keys(&leaf_key, &cpukey) >= 0) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad key\n", i);
			BUG_ON(1);
		}
		if (btrfs_item_offset_nr(leaf, i) !=
			btrfs_item_end_nr(leaf, i + 1)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", i);
			BUG_ON(1);
		}
		if (i == 0) {
			if (btrfs_item_offset_nr(leaf, i) +
			       btrfs_item_size_nr(leaf, i) !=
			       BTRFS_LEAF_DATA_SIZE(root)) {
				btrfs_print_leaf(root, leaf);
				printk("slot %d first offset bad\n", i);
				BUG_ON(1);
			}
		}
C
Chris Mason 已提交
511
	}
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
	if (nritems > 0) {
		if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
				btrfs_print_leaf(root, leaf);
				printk("slot %d bad size \n", nritems - 1);
				BUG_ON(1);
		}
	}
#endif
	if (slot != 0 && slot < nritems - 1) {
		btrfs_item_key(leaf, &leaf_key, slot);
		btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
		if (comp_keys(&leaf_key, &cpukey) <= 0) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad key\n", slot);
			BUG_ON(1);
		}
		if (btrfs_item_offset_nr(leaf, slot - 1) !=
		       btrfs_item_end_nr(leaf, slot)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", slot);
			BUG_ON(1);
		}
534 535
	}
	if (slot < nritems - 1) {
536 537 538 539 540 541 542 543 544
		btrfs_item_key(leaf, &leaf_key, slot);
		btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
		BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
		if (btrfs_item_offset_nr(leaf, slot) !=
			btrfs_item_end_nr(leaf, slot + 1)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", slot);
			BUG_ON(1);
		}
C
Chris Mason 已提交
545
	}
546 547
	BUG_ON(btrfs_item_offset_nr(leaf, 0) +
	       btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
C
Chris Mason 已提交
548 549 550
	return 0;
}

C
Chris Mason 已提交
551 552
static int check_block(struct btrfs_root *root, struct btrfs_path *path,
			int level)
C
Chris Mason 已提交
553
{
554
	return 0;
555
#if 0
556 557
	struct extent_buffer *buf = path->nodes[level];

558 559 560
	if (memcmp_extent_buffer(buf, root->fs_info->fsid,
				 (unsigned long)btrfs_header_fsid(buf),
				 BTRFS_FSID_SIZE)) {
561
		printk("warning bad block %Lu\n", buf->start);
562
		return 1;
563
	}
564
#endif
C
Chris Mason 已提交
565
	if (level == 0)
C
Chris Mason 已提交
566 567
		return check_leaf(root, path, level);
	return check_node(root, path, level);
C
Chris Mason 已提交
568 569
}

C
Chris Mason 已提交
570
/*
571 572 573
 * search for key in the extent_buffer.  The items start at offset p,
 * and they are item_size apart.  There are 'max' items in p.
 *
C
Chris Mason 已提交
574 575 576 577 578 579
 * the slot in the array is returned via slot, and it points to
 * the place where you would insert key if it is not found in
 * the array.
 *
 * slot may point to max if the key is bigger than all of the keys
 */
580 581 582
static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
			      int item_size, struct btrfs_key *key,
			      int max, int *slot)
583 584 585 586 587
{
	int low = 0;
	int high = max;
	int mid;
	int ret;
588
	struct btrfs_disk_key *tmp = NULL;
589 590 591 592 593 594
	struct btrfs_disk_key unaligned;
	unsigned long offset;
	char *map_token = NULL;
	char *kaddr = NULL;
	unsigned long map_start = 0;
	unsigned long map_len = 0;
595
	int err;
596 597 598

	while(low < high) {
		mid = (low + high) / 2;
599 600 601 602 603
		offset = p + mid * item_size;

		if (!map_token || offset < map_start ||
		    (offset + sizeof(struct btrfs_disk_key)) >
		    map_start + map_len) {
604
			if (map_token) {
605
				unmap_extent_buffer(eb, map_token, KM_USER0);
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
				map_token = NULL;
			}
			err = map_extent_buffer(eb, offset,
						sizeof(struct btrfs_disk_key),
						&map_token, &kaddr,
						&map_start, &map_len, KM_USER0);

			if (!err) {
				tmp = (struct btrfs_disk_key *)(kaddr + offset -
							map_start);
			} else {
				read_extent_buffer(eb, &unaligned,
						   offset, sizeof(unaligned));
				tmp = &unaligned;
			}
621 622 623 624 625

		} else {
			tmp = (struct btrfs_disk_key *)(kaddr + offset -
							map_start);
		}
626 627 628 629 630 631 632 633
		ret = comp_keys(tmp, key);

		if (ret < 0)
			low = mid + 1;
		else if (ret > 0)
			high = mid;
		else {
			*slot = mid;
634 635
			if (map_token)
				unmap_extent_buffer(eb, map_token, KM_USER0);
636 637 638 639
			return 0;
		}
	}
	*slot = low;
640 641
	if (map_token)
		unmap_extent_buffer(eb, map_token, KM_USER0);
642 643 644
	return 1;
}

C
Chris Mason 已提交
645 646 647 648
/*
 * simple bin_search frontend that does the right thing for
 * leaves vs nodes
 */
649 650
static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
		      int level, int *slot)
651
{
652 653 654
	if (level == 0) {
		return generic_bin_search(eb,
					  offsetof(struct btrfs_leaf, items),
C
Chris Mason 已提交
655
					  sizeof(struct btrfs_item),
656
					  key, btrfs_header_nritems(eb),
657
					  slot);
658
	} else {
659 660
		return generic_bin_search(eb,
					  offsetof(struct btrfs_node, ptrs),
C
Chris Mason 已提交
661
					  sizeof(struct btrfs_key_ptr),
662
					  key, btrfs_header_nritems(eb),
663
					  slot);
664 665 666 667
	}
	return -1;
}

668 669
static struct extent_buffer *read_node_slot(struct btrfs_root *root,
				   struct extent_buffer *parent, int slot)
670 671 672
{
	if (slot < 0)
		return NULL;
673
	if (slot >= btrfs_header_nritems(parent))
674
		return NULL;
675 676
	return read_tree_block(root, btrfs_node_blockptr(parent, slot),
		       btrfs_level_size(root, btrfs_header_level(parent) - 1));
677 678
}

679 680
static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
			 *root, struct btrfs_path *path, int level)
681
{
682 683 684 685
	struct extent_buffer *right = NULL;
	struct extent_buffer *mid;
	struct extent_buffer *left = NULL;
	struct extent_buffer *parent = NULL;
686 687 688 689
	int ret = 0;
	int wret;
	int pslot;
	int orig_slot = path->slots[level];
690
	int err_on_enospc = 0;
691
	u64 orig_ptr;
692 693 694 695

	if (level == 0)
		return 0;

696
	mid = path->nodes[level];
697 698
	WARN_ON(btrfs_header_generation(mid) != trans->transid);

699
	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
700

C
Chris Mason 已提交
701
	if (level < BTRFS_MAX_LEVEL - 1)
702
		parent = path->nodes[level + 1];
703 704
	pslot = path->slots[level + 1];

C
Chris Mason 已提交
705 706 707 708
	/*
	 * deal with the case where there is only one pointer in the root
	 * by promoting the node below to a root
	 */
709 710
	if (!parent) {
		struct extent_buffer *child;
711

712
		if (btrfs_header_nritems(mid) != 1)
713 714 715
			return 0;

		/* promote the child to a root */
716
		child = read_node_slot(root, mid, 0);
717 718 719
		BUG_ON(!child);
		root->node = child;
		path->nodes[level] = NULL;
720 721
		clean_tree_block(trans, root, mid);
		wait_on_tree_block_writeback(root, mid);
722
		/* once for the path */
723
		free_extent_buffer(mid);
724 725 726
		ret = btrfs_free_extent(trans, root, mid->start, mid->len,
					root->root_key.objectid,
					btrfs_header_generation(mid), 0, 0, 1);
727
		/* once for the root ptr */
728
		free_extent_buffer(mid);
729
		return ret;
730
	}
731
	if (btrfs_header_nritems(mid) >
C
Chris Mason 已提交
732
	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
733 734
		return 0;

735
	if (btrfs_header_nritems(mid) < 2)
736 737
		err_on_enospc = 1;

738 739 740 741
	left = read_node_slot(root, parent, pslot - 1);
	if (left) {
		wret = btrfs_cow_block(trans, root, left,
				       parent, pslot - 1, &left);
742 743 744 745
		if (wret) {
			ret = wret;
			goto enospc;
		}
746
	}
747 748 749 750
	right = read_node_slot(root, parent, pslot + 1);
	if (right) {
		wret = btrfs_cow_block(trans, root, right,
				       parent, pslot + 1, &right);
751 752 753 754 755 756 757
		if (wret) {
			ret = wret;
			goto enospc;
		}
	}

	/* first, try to make some room in the middle buffer */
758 759 760
	if (left) {
		orig_slot += btrfs_header_nritems(left);
		wret = push_node_left(trans, root, left, mid);
761 762
		if (wret < 0)
			ret = wret;
763
		if (btrfs_header_nritems(mid) < 2)
764
			err_on_enospc = 1;
765
	}
766 767 768 769

	/*
	 * then try to empty the right most buffer into the middle
	 */
770 771
	if (right) {
		wret = push_node_left(trans, root, mid, right);
772
		if (wret < 0 && wret != -ENOSPC)
773
			ret = wret;
774
		if (btrfs_header_nritems(right) == 0) {
775
			u64 bytenr = right->start;
776
			u64 generation = btrfs_header_generation(parent);
777 778
			u32 blocksize = right->len;

779 780 781
			clean_tree_block(trans, root, right);
			wait_on_tree_block_writeback(root, right);
			free_extent_buffer(right);
782
			right = NULL;
783 784
			wret = del_ptr(trans, root, path, level + 1, pslot +
				       1);
785 786
			if (wret)
				ret = wret;
787
			wret = btrfs_free_extent(trans, root, bytenr,
788 789 790
						 blocksize,
						 btrfs_header_owner(parent),
						 generation, 0, 0, 1);
791 792 793
			if (wret)
				ret = wret;
		} else {
794 795 796 797
			struct btrfs_disk_key right_key;
			btrfs_node_key(right, &right_key, 0);
			btrfs_set_node_key(parent, &right_key, pslot + 1);
			btrfs_mark_buffer_dirty(parent);
798 799
		}
	}
800
	if (btrfs_header_nritems(mid) == 1) {
801 802 803 804 805 806 807 808 809
		/*
		 * we're not allowed to leave a node with one item in the
		 * tree during a delete.  A deletion from lower in the tree
		 * could try to delete the only pointer in this node.
		 * So, pull some keys from the left.
		 * There has to be a left pointer at this point because
		 * otherwise we would have pulled some pointers from the
		 * right
		 */
810 811
		BUG_ON(!left);
		wret = balance_node_right(trans, root, mid, left);
812
		if (wret < 0) {
813
			ret = wret;
814 815
			goto enospc;
		}
816 817
		BUG_ON(wret == 1);
	}
818
	if (btrfs_header_nritems(mid) == 0) {
819
		/* we've managed to empty the middle node, drop it */
820
		u64 root_gen = btrfs_header_generation(parent);
821 822
		u64 bytenr = mid->start;
		u32 blocksize = mid->len;
823 824 825
		clean_tree_block(trans, root, mid);
		wait_on_tree_block_writeback(root, mid);
		free_extent_buffer(mid);
826
		mid = NULL;
827
		wret = del_ptr(trans, root, path, level + 1, pslot);
828 829
		if (wret)
			ret = wret;
830 831 832
		wret = btrfs_free_extent(trans, root, bytenr, blocksize,
					 btrfs_header_owner(parent),
					 root_gen, 0, 0, 1);
833 834
		if (wret)
			ret = wret;
835 836
	} else {
		/* update the parent key to reflect our changes */
837 838 839 840
		struct btrfs_disk_key mid_key;
		btrfs_node_key(mid, &mid_key, 0);
		btrfs_set_node_key(parent, &mid_key, pslot);
		btrfs_mark_buffer_dirty(parent);
841
	}
842

843
	/* update the path */
844 845 846 847
	if (left) {
		if (btrfs_header_nritems(left) > orig_slot) {
			extent_buffer_get(left);
			path->nodes[level] = left;
848 849
			path->slots[level + 1] -= 1;
			path->slots[level] = orig_slot;
850 851
			if (mid)
				free_extent_buffer(mid);
852
		} else {
853
			orig_slot -= btrfs_header_nritems(left);
854 855 856
			path->slots[level] = orig_slot;
		}
	}
857
	/* double check we haven't messed things up */
C
Chris Mason 已提交
858
	check_block(root, path, level);
C
Chris Mason 已提交
859
	if (orig_ptr !=
860
	    btrfs_node_blockptr(path->nodes[level], path->slots[level]))
861
		BUG();
862
enospc:
863 864 865 866
	if (right)
		free_extent_buffer(right);
	if (left)
		free_extent_buffer(left);
867 868 869
	return ret;
}

870 871 872 873 874
/* returns zero if the push worked, non-zero otherwise */
static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
				struct btrfs_path *path, int level)
{
875 876 877 878
	struct extent_buffer *right = NULL;
	struct extent_buffer *mid;
	struct extent_buffer *left = NULL;
	struct extent_buffer *parent = NULL;
879 880 881 882 883 884 885 886 887
	int ret = 0;
	int wret;
	int pslot;
	int orig_slot = path->slots[level];
	u64 orig_ptr;

	if (level == 0)
		return 1;

888
	mid = path->nodes[level];
889
	WARN_ON(btrfs_header_generation(mid) != trans->transid);
890 891 892
	orig_ptr = btrfs_node_blockptr(mid, orig_slot);

	if (level < BTRFS_MAX_LEVEL - 1)
893
		parent = path->nodes[level + 1];
894 895
	pslot = path->slots[level + 1];

896
	if (!parent)
897 898
		return 1;

899
	left = read_node_slot(root, parent, pslot - 1);
900 901

	/* first, try to make some room in the middle buffer */
902
	if (left) {
903
		u32 left_nr;
904
		left_nr = btrfs_header_nritems(left);
C
Chris Mason 已提交
905 906 907
		if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
			wret = 1;
		} else {
908 909
			ret = btrfs_cow_block(trans, root, left, parent,
					      pslot - 1, &left);
910 911 912 913
			if (ret)
				wret = 1;
			else {
				wret = push_node_left(trans, root,
914
						      left, mid);
915
			}
C
Chris Mason 已提交
916
		}
917 918 919
		if (wret < 0)
			ret = wret;
		if (wret == 0) {
920
			struct btrfs_disk_key disk_key;
921
			orig_slot += left_nr;
922 923 924 925 926
			btrfs_node_key(mid, &disk_key, 0);
			btrfs_set_node_key(parent, &disk_key, pslot);
			btrfs_mark_buffer_dirty(parent);
			if (btrfs_header_nritems(left) > orig_slot) {
				path->nodes[level] = left;
927 928
				path->slots[level + 1] -= 1;
				path->slots[level] = orig_slot;
929
				free_extent_buffer(mid);
930 931
			} else {
				orig_slot -=
932
					btrfs_header_nritems(left);
933
				path->slots[level] = orig_slot;
934
				free_extent_buffer(left);
935 936 937
			}
			return 0;
		}
938
		free_extent_buffer(left);
939
	}
940
	right= read_node_slot(root, parent, pslot + 1);
941 942 943 944

	/*
	 * then try to empty the right most buffer into the middle
	 */
945
	if (right) {
C
Chris Mason 已提交
946
		u32 right_nr;
947
		right_nr = btrfs_header_nritems(right);
C
Chris Mason 已提交
948 949 950
		if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
			wret = 1;
		} else {
951 952 953
			ret = btrfs_cow_block(trans, root, right,
					      parent, pslot + 1,
					      &right);
954 955 956 957
			if (ret)
				wret = 1;
			else {
				wret = balance_node_right(trans, root,
958
							  right, mid);
959
			}
C
Chris Mason 已提交
960
		}
961 962 963
		if (wret < 0)
			ret = wret;
		if (wret == 0) {
964 965 966 967 968 969 970 971
			struct btrfs_disk_key disk_key;

			btrfs_node_key(right, &disk_key, 0);
			btrfs_set_node_key(parent, &disk_key, pslot + 1);
			btrfs_mark_buffer_dirty(parent);

			if (btrfs_header_nritems(mid) <= orig_slot) {
				path->nodes[level] = right;
972 973
				path->slots[level + 1] += 1;
				path->slots[level] = orig_slot -
974 975
					btrfs_header_nritems(mid);
				free_extent_buffer(mid);
976
			} else {
977
				free_extent_buffer(right);
978 979 980
			}
			return 0;
		}
981
		free_extent_buffer(right);
982 983 984 985
	}
	return 1;
}

986 987 988 989
/*
 * readahead one full node of leaves
 */
static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
990
			     int level, int slot, u64 objectid)
991
{
992
	struct extent_buffer *node;
993
	struct btrfs_disk_key disk_key;
994 995
	u32 nritems;
	u64 search;
996 997 998
	u64 lowest_read;
	u64 highest_read;
	u64 nread = 0;
999
	int direction = path->reada;
1000
	struct extent_buffer *eb;
1001 1002 1003
	u32 nr;
	u32 blocksize;
	u32 nscan = 0;
1004

1005
	if (level != 1)
1006 1007 1008
		return;

	if (!path->nodes[level])
1009 1010
		return;

1011
	node = path->nodes[level];
1012
	search = btrfs_node_blockptr(node, slot);
1013 1014
	blocksize = btrfs_level_size(root, level - 1);
	eb = btrfs_find_tree_block(root, search, blocksize);
1015 1016
	if (eb) {
		free_extent_buffer(eb);
1017 1018 1019
		return;
	}

1020 1021 1022
	highest_read = search;
	lowest_read = search;

1023
	nritems = btrfs_header_nritems(node);
1024
	nr = slot;
1025
	while(1) {
1026 1027 1028 1029 1030 1031 1032 1033
		if (direction < 0) {
			if (nr == 0)
				break;
			nr--;
		} else if (direction > 0) {
			nr++;
			if (nr >= nritems)
				break;
1034
		}
1035 1036 1037 1038 1039
		if (path->reada < 0 && objectid) {
			btrfs_node_key(node, &disk_key, nr);
			if (btrfs_disk_key_objectid(&disk_key) != objectid)
				break;
		}
1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
		search = btrfs_node_blockptr(node, nr);
		if ((search >= lowest_read && search <= highest_read) ||
		    (search < lowest_read && lowest_read - search <= 32768) ||
		    (search > highest_read && search - highest_read <= 32768)) {
			readahead_tree_block(root, search, blocksize);
			nread += blocksize;
		}
		nscan++;
		if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
			break;
		if(nread > (1024 * 1024) || nscan > 128)
			break;

		if (search < lowest_read)
			lowest_read = search;
		if (search > highest_read)
			highest_read = search;
1057 1058
	}
}
C
Chris Mason 已提交
1059 1060 1061 1062 1063 1064
/*
 * look for key in the tree.  path is filled in with nodes along the way
 * if key is found, we return zero and you can find the item in the leaf
 * level of the path (level 0)
 *
 * If the key isn't found, the path points to the slot where it should
C
Chris Mason 已提交
1065 1066
 * be inserted, and 1 is returned.  If there are other errors during the
 * search a negative error number is returned.
C
Chris Mason 已提交
1067 1068 1069 1070
 *
 * if ins_len > 0, nodes and leaves will be split as we walk down the
 * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
 * possible)
C
Chris Mason 已提交
1071
 */
1072 1073 1074
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_key *key, struct btrfs_path *p, int
		      ins_len, int cow)
1075
{
1076
	struct extent_buffer *b;
1077
	u64 bytenr;
1078
	u64 ptr_gen;
1079 1080 1081
	int slot;
	int ret;
	int level;
1082
	int should_reada = p->reada;
1083 1084
	u8 lowest_level = 0;

1085 1086
	lowest_level = p->lowest_level;
	WARN_ON(lowest_level && ins_len);
C
Chris Mason 已提交
1087 1088
	WARN_ON(p->nodes[0] != NULL);
	WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1089 1090
again:
	b = root->node;
1091
	extent_buffer_get(b);
1092
	while (b) {
1093
		level = btrfs_header_level(b);
C
Chris Mason 已提交
1094 1095
		if (cow) {
			int wret;
C
Chris Mason 已提交
1096 1097 1098
			wret = btrfs_cow_block(trans, root, b,
					       p->nodes[level + 1],
					       p->slots[level + 1],
Y
Yan 已提交
1099
					       &b);
1100
			if (wret) {
1101
				free_extent_buffer(b);
1102 1103
				return wret;
			}
C
Chris Mason 已提交
1104 1105
		}
		BUG_ON(!cow && ins_len);
1106
		if (level != btrfs_header_level(b))
C
Chris Mason 已提交
1107
			WARN_ON(1);
1108
		level = btrfs_header_level(b);
1109
		p->nodes[level] = b;
C
Chris Mason 已提交
1110
		ret = check_block(root, p, level);
C
Chris Mason 已提交
1111 1112
		if (ret)
			return -1;
1113 1114
		ret = bin_search(b, key, level, &slot);
		if (level != 0) {
1115 1116 1117
			if (ret && slot > 0)
				slot -= 1;
			p->slots[level] = slot;
1118
			if (ins_len > 0 && btrfs_header_nritems(b) >=
1119
			    BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1120
				int sret = split_node(trans, root, p, level);
C
Chris Mason 已提交
1121 1122 1123 1124 1125
				BUG_ON(sret > 0);
				if (sret)
					return sret;
				b = p->nodes[level];
				slot = p->slots[level];
1126
			} else if (ins_len < 0) {
1127 1128
				int sret = balance_level(trans, root, p,
							 level);
1129 1130 1131
				if (sret)
					return sret;
				b = p->nodes[level];
1132 1133
				if (!b) {
					btrfs_release_path(NULL, p);
1134
					goto again;
1135
				}
1136
				slot = p->slots[level];
1137
				BUG_ON(btrfs_header_nritems(b) == 1);
C
Chris Mason 已提交
1138
			}
1139 1140 1141
			/* this is only true while dropping a snapshot */
			if (level == lowest_level)
				break;
1142
			bytenr = btrfs_node_blockptr(b, slot);
1143
			ptr_gen = btrfs_node_ptr_generation(b, slot);
1144
			if (should_reada)
1145 1146
				reada_for_search(root, p, level, slot,
						 key->objectid);
1147 1148
			b = read_tree_block(root, bytenr,
					    btrfs_level_size(root, level - 1));
1149 1150 1151 1152 1153 1154 1155
			if (ptr_gen != btrfs_header_generation(b)) {
				printk("block %llu bad gen wanted %llu "
				       "found %llu\n",
			        (unsigned long long)b->start,
				(unsigned long long)ptr_gen,
			        (unsigned long long)btrfs_header_generation(b));
			}
1156 1157
		} else {
			p->slots[level] = slot;
1158
			if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
C
Chris Mason 已提交
1159
			    sizeof(struct btrfs_item) + ins_len) {
1160
				int sret = split_leaf(trans, root, key,
1161
						      p, ins_len, ret == 0);
C
Chris Mason 已提交
1162 1163 1164 1165
				BUG_ON(sret > 0);
				if (sret)
					return sret;
			}
1166 1167 1168
			return ret;
		}
	}
C
Chris Mason 已提交
1169
	return 1;
1170 1171
}

C
Chris Mason 已提交
1172 1173 1174 1175 1176 1177
/*
 * adjust the pointers going up the tree, starting at level
 * making sure the right key of each node is points to 'key'.
 * This is used after shifting pointers to the left, so it stops
 * fixing up pointers when a given leaf/node is not in slot 0 of the
 * higher levels
C
Chris Mason 已提交
1178 1179 1180
 *
 * If this fails to write a tree block, it returns -1, but continues
 * fixing up the blocks in ram so the tree is consistent.
C
Chris Mason 已提交
1181
 */
1182 1183 1184
static int fixup_low_keys(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct btrfs_path *path,
			  struct btrfs_disk_key *key, int level)
1185 1186
{
	int i;
C
Chris Mason 已提交
1187
	int ret = 0;
1188 1189
	struct extent_buffer *t;

C
Chris Mason 已提交
1190
	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1191
		int tslot = path->slots[i];
1192
		if (!path->nodes[i])
1193
			break;
1194 1195
		t = path->nodes[i];
		btrfs_set_node_key(t, key, tslot);
C
Chris Mason 已提交
1196
		btrfs_mark_buffer_dirty(path->nodes[i]);
1197 1198 1199
		if (tslot != 0)
			break;
	}
C
Chris Mason 已提交
1200
	return ret;
1201 1202
}

C
Chris Mason 已提交
1203 1204
/*
 * try to push data from one node into the next node left in the
1205
 * tree.
C
Chris Mason 已提交
1206 1207 1208
 *
 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
 * error, and > 0 if there was no room in the left hand block.
C
Chris Mason 已提交
1209
 */
1210
static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
1211 1212
			  *root, struct extent_buffer *dst,
			  struct extent_buffer *src)
1213 1214
{
	int push_items = 0;
1215 1216
	int src_nritems;
	int dst_nritems;
C
Chris Mason 已提交
1217
	int ret = 0;
1218

1219 1220
	src_nritems = btrfs_header_nritems(src);
	dst_nritems = btrfs_header_nritems(dst);
C
Chris Mason 已提交
1221
	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1222 1223
	WARN_ON(btrfs_header_generation(src) != trans->transid);
	WARN_ON(btrfs_header_generation(dst) != trans->transid);
1224

1225
	if (push_items <= 0) {
1226
		return 1;
1227
	}
1228

1229
	if (src_nritems < push_items)
1230 1231
		push_items = src_nritems;

1232 1233 1234 1235 1236
	copy_extent_buffer(dst, src,
			   btrfs_node_key_ptr_offset(dst_nritems),
			   btrfs_node_key_ptr_offset(0),
		           push_items * sizeof(struct btrfs_key_ptr));

1237
	if (push_items < src_nritems) {
1238 1239 1240 1241 1242 1243 1244 1245 1246
		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
				      btrfs_node_key_ptr_offset(push_items),
				      (src_nritems - push_items) *
				      sizeof(struct btrfs_key_ptr));
	}
	btrfs_set_header_nritems(src, src_nritems - push_items);
	btrfs_set_header_nritems(dst, dst_nritems + push_items);
	btrfs_mark_buffer_dirty(src);
	btrfs_mark_buffer_dirty(dst);
1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
	return ret;
}

/*
 * try to push data from one node into the next node right in the
 * tree.
 *
 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
 * error, and > 0 if there was no room in the right hand block.
 *
 * this will  only push up to 1/2 the contents of the left node over
 */
1259 1260 1261 1262
static int balance_node_right(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct extent_buffer *dst,
			      struct extent_buffer *src)
1263 1264 1265 1266 1267 1268 1269
{
	int push_items = 0;
	int max_push;
	int src_nritems;
	int dst_nritems;
	int ret = 0;

1270 1271 1272
	WARN_ON(btrfs_header_generation(src) != trans->transid);
	WARN_ON(btrfs_header_generation(dst) != trans->transid);

1273 1274
	src_nritems = btrfs_header_nritems(src);
	dst_nritems = btrfs_header_nritems(dst);
C
Chris Mason 已提交
1275
	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1276
	if (push_items <= 0)
1277 1278 1279 1280
		return 1;

	max_push = src_nritems / 2 + 1;
	/* don't try to empty the node */
Y
Yan 已提交
1281
	if (max_push >= src_nritems)
1282
		return 1;
Y
Yan 已提交
1283

1284 1285 1286
	if (max_push < push_items)
		push_items = max_push;

1287 1288 1289 1290
	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
				      btrfs_node_key_ptr_offset(0),
				      (dst_nritems) *
				      sizeof(struct btrfs_key_ptr));
C
Chris Mason 已提交
1291

1292 1293 1294 1295
	copy_extent_buffer(dst, src,
			   btrfs_node_key_ptr_offset(0),
			   btrfs_node_key_ptr_offset(src_nritems - push_items),
		           push_items * sizeof(struct btrfs_key_ptr));
1296

1297 1298
	btrfs_set_header_nritems(src, src_nritems - push_items);
	btrfs_set_header_nritems(dst, dst_nritems + push_items);
1299

1300 1301
	btrfs_mark_buffer_dirty(src);
	btrfs_mark_buffer_dirty(dst);
C
Chris Mason 已提交
1302
	return ret;
1303 1304
}

C
Chris Mason 已提交
1305 1306 1307 1308
/*
 * helper function to insert a new root level in the tree.
 * A new node is allocated, and a single item is inserted to
 * point to the existing root
C
Chris Mason 已提交
1309 1310
 *
 * returns zero on success or < 0 on failure.
C
Chris Mason 已提交
1311
 */
1312 1313 1314
static int insert_new_root(struct btrfs_trans_handle *trans,
			   struct btrfs_root *root,
			   struct btrfs_path *path, int level)
C
Chris Mason 已提交
1315
{
1316 1317
	u64 root_gen;
	u64 lower_gen;
1318 1319 1320
	struct extent_buffer *lower;
	struct extent_buffer *c;
	struct btrfs_disk_key lower_key;
C
Chris Mason 已提交
1321 1322 1323 1324

	BUG_ON(path->nodes[level]);
	BUG_ON(path->nodes[level-1] != root->node);

1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338
	if (root->ref_cows)
		root_gen = trans->transid;
	else
		root_gen = 0;

	lower = path->nodes[level-1];
	if (level == 1)
		btrfs_item_key(lower, &lower_key, 0);
	else
		btrfs_node_key(lower, &lower_key, 0);

	c = __btrfs_alloc_free_block(trans, root, root->nodesize,
				   root->root_key.objectid,
				   root_gen, lower_key.objectid, level,
1339
				   root->node->start, 0);
1340 1341 1342 1343 1344
	if (IS_ERR(c))
		return PTR_ERR(c);
	memset_extent_buffer(c, 0, 0, root->nodesize);
	btrfs_set_header_nritems(c, 1);
	btrfs_set_header_level(c, level);
1345
	btrfs_set_header_bytenr(c, c->start);
1346 1347 1348 1349 1350 1351 1352
	btrfs_set_header_generation(c, trans->transid);
	btrfs_set_header_owner(c, root->root_key.objectid);

	write_extent_buffer(c, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(c),
			    BTRFS_FSID_SIZE);
	btrfs_set_node_key(c, &lower_key, 0);
1353
	btrfs_set_node_blockptr(c, 0, lower->start);
1354 1355 1356 1357
	lower_gen = btrfs_header_generation(lower);
	WARN_ON(lower_gen == 0);

	btrfs_set_node_ptr_generation(c, 0, lower_gen);
1358

1359
	btrfs_mark_buffer_dirty(c);
1360

C
Chris Mason 已提交
1361
	/* the super has an extra ref to root->node */
1362 1363 1364 1365
	free_extent_buffer(root->node);
	root->node = c;
	extent_buffer_get(c);
	path->nodes[level] = c;
C
Chris Mason 已提交
1366
	path->slots[level] = 0;
1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378

	if (root->ref_cows && lower_gen != trans->transid) {
		struct btrfs_path *back_path = btrfs_alloc_path();
		int ret;
		ret = btrfs_insert_extent_backref(trans,
						  root->fs_info->extent_root,
						  path, lower->start,
						  root->root_key.objectid,
						  trans->transid, 0, 0);
		BUG_ON(ret);
		btrfs_free_path(back_path);
	}
C
Chris Mason 已提交
1379 1380 1381
	return 0;
}

C
Chris Mason 已提交
1382 1383 1384
/*
 * worker function to insert a single pointer in a node.
 * the node should have enough room for the pointer already
C
Chris Mason 已提交
1385
 *
C
Chris Mason 已提交
1386 1387
 * slot and level indicate where you want the key to go, and
 * blocknr is the block the key points to.
C
Chris Mason 已提交
1388 1389
 *
 * returns zero on success and < 0 on any error
C
Chris Mason 已提交
1390
 */
1391 1392
static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, struct btrfs_disk_key
1393
		      *key, u64 bytenr, int slot, int level)
C
Chris Mason 已提交
1394
{
1395
	struct extent_buffer *lower;
C
Chris Mason 已提交
1396
	int nritems;
C
Chris Mason 已提交
1397 1398

	BUG_ON(!path->nodes[level]);
1399 1400
	lower = path->nodes[level];
	nritems = btrfs_header_nritems(lower);
C
Chris Mason 已提交
1401 1402
	if (slot > nritems)
		BUG();
C
Chris Mason 已提交
1403
	if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
C
Chris Mason 已提交
1404 1405
		BUG();
	if (slot != nritems) {
1406 1407 1408
		memmove_extent_buffer(lower,
			      btrfs_node_key_ptr_offset(slot + 1),
			      btrfs_node_key_ptr_offset(slot),
C
Chris Mason 已提交
1409
			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
C
Chris Mason 已提交
1410
	}
1411
	btrfs_set_node_key(lower, key, slot);
1412
	btrfs_set_node_blockptr(lower, slot, bytenr);
1413 1414
	WARN_ON(trans->transid == 0);
	btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1415 1416
	btrfs_set_header_nritems(lower, nritems + 1);
	btrfs_mark_buffer_dirty(lower);
C
Chris Mason 已提交
1417 1418 1419
	return 0;
}

C
Chris Mason 已提交
1420 1421 1422 1423 1424 1425
/*
 * split the node at the specified level in path in two.
 * The path is corrected to point to the appropriate node after the split
 *
 * Before splitting this tries to make some room in the node by pushing
 * left and right, if either one works, it returns right away.
C
Chris Mason 已提交
1426 1427
 *
 * returns 0 on success and < 0 on failure
C
Chris Mason 已提交
1428
 */
1429 1430
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, int level)
1431
{
1432
	u64 root_gen;
1433 1434 1435
	struct extent_buffer *c;
	struct extent_buffer *split;
	struct btrfs_disk_key disk_key;
1436
	int mid;
C
Chris Mason 已提交
1437
	int ret;
C
Chris Mason 已提交
1438
	int wret;
1439
	u32 c_nritems;
1440

1441
	c = path->nodes[level];
1442
	WARN_ON(btrfs_header_generation(c) != trans->transid);
1443
	if (c == root->node) {
C
Chris Mason 已提交
1444
		/* trying to split the root, lets make a new one */
1445
		ret = insert_new_root(trans, root, path, level + 1);
C
Chris Mason 已提交
1446 1447
		if (ret)
			return ret;
1448 1449
	} else {
		ret = push_nodes_for_insert(trans, root, path, level);
1450 1451
		c = path->nodes[level];
		if (!ret && btrfs_header_nritems(c) <
1452 1453
		    BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
			return 0;
1454 1455
		if (ret < 0)
			return ret;
1456
	}
1457

1458
	c_nritems = btrfs_header_nritems(c);
1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469
	if (root->ref_cows)
		root_gen = trans->transid;
	else
		root_gen = 0;

	btrfs_node_key(c, &disk_key, 0);
	split = __btrfs_alloc_free_block(trans, root, root->nodesize,
					 root->root_key.objectid,
					 root_gen,
					 btrfs_disk_key_objectid(&disk_key),
					 level, c->start, 0);
1470 1471 1472 1473 1474
	if (IS_ERR(split))
		return PTR_ERR(split);

	btrfs_set_header_flags(split, btrfs_header_flags(c));
	btrfs_set_header_level(split, btrfs_header_level(c));
1475
	btrfs_set_header_bytenr(split, split->start);
1476 1477 1478 1479 1480
	btrfs_set_header_generation(split, trans->transid);
	btrfs_set_header_owner(split, root->root_key.objectid);
	write_extent_buffer(split, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(split),
			    BTRFS_FSID_SIZE);
1481

1482
	mid = (c_nritems + 1) / 2;
1483 1484 1485 1486 1487 1488 1489

	copy_extent_buffer(split, c,
			   btrfs_node_key_ptr_offset(0),
			   btrfs_node_key_ptr_offset(mid),
			   (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
	btrfs_set_header_nritems(split, c_nritems - mid);
	btrfs_set_header_nritems(c, mid);
C
Chris Mason 已提交
1490 1491
	ret = 0;

1492 1493 1494 1495
	btrfs_mark_buffer_dirty(c);
	btrfs_mark_buffer_dirty(split);

	btrfs_node_key(split, &disk_key, 0);
1496
	wret = insert_ptr(trans, root, path, &disk_key, split->start,
1497
			  path->slots[level + 1] + 1,
C
Chris Mason 已提交
1498
			  level + 1);
C
Chris Mason 已提交
1499 1500 1501
	if (wret)
		ret = wret;

C
Chris Mason 已提交
1502
	if (path->slots[level] >= mid) {
C
Chris Mason 已提交
1503
		path->slots[level] -= mid;
1504 1505
		free_extent_buffer(c);
		path->nodes[level] = split;
C
Chris Mason 已提交
1506 1507
		path->slots[level + 1] += 1;
	} else {
1508
		free_extent_buffer(split);
1509
	}
C
Chris Mason 已提交
1510
	return ret;
1511 1512
}

C
Chris Mason 已提交
1513 1514 1515 1516 1517
/*
 * how many bytes are required to store the items in a leaf.  start
 * and nr indicate which items in the leaf to check.  This totals up the
 * space used both by the item structs and the item data
 */
1518
static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1519 1520
{
	int data_len;
1521
	int nritems = btrfs_header_nritems(l);
1522
	int end = min(nritems, start + nr) - 1;
1523 1524 1525

	if (!nr)
		return 0;
1526 1527
	data_len = btrfs_item_end_nr(l, start);
	data_len = data_len - btrfs_item_offset_nr(l, end);
C
Chris Mason 已提交
1528
	data_len += sizeof(struct btrfs_item) * nr;
1529
	WARN_ON(data_len < 0);
1530 1531 1532
	return data_len;
}

1533 1534 1535 1536 1537
/*
 * The space between the end of the leaf items and
 * the start of the leaf data.  IOW, how much room
 * the leaf has left for both items and data
 */
1538
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1539
{
1540 1541 1542 1543 1544
	int nritems = btrfs_header_nritems(leaf);
	int ret;
	ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
	if (ret < 0) {
		printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
J
Jens Axboe 已提交
1545
		       ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1546 1547 1548
		       leaf_space_used(leaf, 0, nritems), nritems);
	}
	return ret;
1549 1550
}

C
Chris Mason 已提交
1551 1552 1553
/*
 * push some data in the path leaf to the right, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
C
Chris Mason 已提交
1554 1555 1556
 *
 * returns 1 if the push failed because the other node didn't have enough
 * room, 0 if everything worked out and < 0 if there were major errors.
C
Chris Mason 已提交
1557
 */
1558
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1559 1560
			   *root, struct btrfs_path *path, int data_size,
			   int empty)
C
Chris Mason 已提交
1561
{
1562 1563 1564 1565
	struct extent_buffer *left = path->nodes[0];
	struct extent_buffer *right;
	struct extent_buffer *upper;
	struct btrfs_disk_key disk_key;
C
Chris Mason 已提交
1566
	int slot;
1567
	u32 i;
C
Chris Mason 已提交
1568 1569 1570
	int free_space;
	int push_space = 0;
	int push_items = 0;
C
Chris Mason 已提交
1571
	struct btrfs_item *item;
1572
	u32 left_nritems;
1573
	u32 nr;
1574
	u32 right_nritems;
1575
	u32 data_end;
1576
	u32 this_item_size;
1577
	int ret;
C
Chris Mason 已提交
1578 1579 1580 1581 1582 1583

	slot = path->slots[1];
	if (!path->nodes[1]) {
		return 1;
	}
	upper = path->nodes[1];
1584
	if (slot >= btrfs_header_nritems(upper) - 1)
C
Chris Mason 已提交
1585
		return 1;
1586

1587 1588
	right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
				root->leafsize);
C
Chris Mason 已提交
1589
	free_space = btrfs_leaf_free_space(root, right);
C
Chris Mason 已提交
1590
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1591
		free_extent_buffer(right);
C
Chris Mason 已提交
1592 1593
		return 1;
	}
1594

C
Chris Mason 已提交
1595
	/* cow and double check */
1596 1597
	ret = btrfs_cow_block(trans, root, right, upper,
			      slot + 1, &right);
1598
	if (ret) {
1599
		free_extent_buffer(right);
1600 1601
		return 1;
	}
C
Chris Mason 已提交
1602
	free_space = btrfs_leaf_free_space(root, right);
C
Chris Mason 已提交
1603
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1604
		free_extent_buffer(right);
C
Chris Mason 已提交
1605 1606 1607
		return 1;
	}

1608
	left_nritems = btrfs_header_nritems(left);
1609
	if (left_nritems == 0) {
1610
		free_extent_buffer(right);
1611 1612
		return 1;
	}
1613

1614 1615 1616 1617 1618 1619 1620
	if (empty)
		nr = 0;
	else
		nr = 1;

	i = left_nritems - 1;
	while (i >= nr) {
1621
		item = btrfs_item_nr(left, i);
1622

C
Chris Mason 已提交
1623 1624
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635

		if (!left->map_token) {
			map_extent_buffer(left, (unsigned long)item,
					sizeof(struct btrfs_item),
					&left->map_token, &left->kaddr,
					&left->map_start, &left->map_len,
					KM_USER1);
		}

		this_item_size = btrfs_item_size(left, item);
		if (this_item_size + sizeof(*item) + push_space > free_space)
C
Chris Mason 已提交
1636 1637
			break;
		push_items++;
1638
		push_space += this_item_size + sizeof(*item);
1639 1640 1641
		if (i == 0)
			break;
		i--;
1642 1643 1644 1645
	}
	if (left->map_token) {
		unmap_extent_buffer(left, left->map_token, KM_USER1);
		left->map_token = NULL;
C
Chris Mason 已提交
1646
	}
1647

C
Chris Mason 已提交
1648
	if (push_items == 0) {
1649
		free_extent_buffer(right);
C
Chris Mason 已提交
1650 1651
		return 1;
	}
1652

1653
	if (!empty && push_items == left_nritems)
1654
		WARN_ON(1);
1655

C
Chris Mason 已提交
1656
	/* push left to right */
1657
	right_nritems = btrfs_header_nritems(right);
1658

1659
	push_space = btrfs_item_end_nr(left, left_nritems - push_items);
C
Chris Mason 已提交
1660
	push_space -= leaf_data_end(root, left);
1661

C
Chris Mason 已提交
1662
	/* make room in the right data area */
1663 1664 1665 1666 1667 1668
	data_end = leaf_data_end(root, right);
	memmove_extent_buffer(right,
			      btrfs_leaf_data(right) + data_end - push_space,
			      btrfs_leaf_data(right) + data_end,
			      BTRFS_LEAF_DATA_SIZE(root) - data_end);

C
Chris Mason 已提交
1669
	/* copy from the left data area */
1670
	copy_extent_buffer(right, left, btrfs_leaf_data(right) +
C
Chris Mason 已提交
1671 1672 1673
		     BTRFS_LEAF_DATA_SIZE(root) - push_space,
		     btrfs_leaf_data(left) + leaf_data_end(root, left),
		     push_space);
1674 1675 1676 1677 1678

	memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
			      btrfs_item_nr_offset(0),
			      right_nritems * sizeof(struct btrfs_item));

C
Chris Mason 已提交
1679
	/* copy the items from left to right */
1680 1681 1682
	copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
		   btrfs_item_nr_offset(left_nritems - push_items),
		   push_items * sizeof(struct btrfs_item));
C
Chris Mason 已提交
1683 1684

	/* update the item pointers */
1685
	right_nritems += push_items;
1686
	btrfs_set_header_nritems(right, right_nritems);
C
Chris Mason 已提交
1687
	push_space = BTRFS_LEAF_DATA_SIZE(root);
1688
	for (i = 0; i < right_nritems; i++) {
1689
		item = btrfs_item_nr(right, i);
1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703
		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}
		push_space -= btrfs_item_size(right, item);
		btrfs_set_item_offset(right, item, push_space);
	}

	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
C
Chris Mason 已提交
1704
	}
1705
	left_nritems -= push_items;
1706
	btrfs_set_header_nritems(left, left_nritems);
C
Chris Mason 已提交
1707

1708 1709
	if (left_nritems)
		btrfs_mark_buffer_dirty(left);
1710
	btrfs_mark_buffer_dirty(right);
1711

1712 1713
	btrfs_item_key(right, &disk_key, 0);
	btrfs_set_node_key(upper, &disk_key, slot + 1);
C
Chris Mason 已提交
1714
	btrfs_mark_buffer_dirty(upper);
C
Chris Mason 已提交
1715

C
Chris Mason 已提交
1716
	/* then fixup the leaf pointer in the path */
1717 1718
	if (path->slots[0] >= left_nritems) {
		path->slots[0] -= left_nritems;
1719 1720
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = right;
C
Chris Mason 已提交
1721 1722
		path->slots[1] += 1;
	} else {
1723
		free_extent_buffer(right);
C
Chris Mason 已提交
1724 1725 1726
	}
	return 0;
}
C
Chris Mason 已提交
1727 1728 1729 1730
/*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
1731
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1732 1733
			  *root, struct btrfs_path *path, int data_size,
			  int empty)
1734
{
1735 1736 1737
	struct btrfs_disk_key disk_key;
	struct extent_buffer *right = path->nodes[0];
	struct extent_buffer *left;
1738 1739 1740 1741 1742
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
C
Chris Mason 已提交
1743
	struct btrfs_item *item;
1744
	u32 old_left_nritems;
1745
	u32 right_nritems;
1746
	u32 nr;
C
Chris Mason 已提交
1747 1748
	int ret = 0;
	int wret;
1749 1750
	u32 this_item_size;
	u32 old_left_item_size;
1751 1752

	slot = path->slots[1];
1753
	if (slot == 0)
1754
		return 1;
1755
	if (!path->nodes[1])
1756
		return 1;
1757

1758 1759 1760 1761 1762
	right_nritems = btrfs_header_nritems(right);
	if (right_nritems == 0) {
		return 1;
	}

1763
	left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1764
			       slot - 1), root->leafsize);
C
Chris Mason 已提交
1765
	free_space = btrfs_leaf_free_space(root, left);
C
Chris Mason 已提交
1766
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1767
		free_extent_buffer(left);
1768 1769
		return 1;
	}
C
Chris Mason 已提交
1770 1771

	/* cow and double check */
1772 1773
	ret = btrfs_cow_block(trans, root, left,
			      path->nodes[1], slot - 1, &left);
1774 1775
	if (ret) {
		/* we hit -ENOSPC, but it isn't fatal here */
1776
		free_extent_buffer(left);
1777 1778
		return 1;
	}
1779

C
Chris Mason 已提交
1780
	free_space = btrfs_leaf_free_space(root, left);
C
Chris Mason 已提交
1781
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1782
		free_extent_buffer(left);
C
Chris Mason 已提交
1783 1784 1785
		return 1;
	}

1786 1787 1788 1789 1790 1791
	if (empty)
		nr = right_nritems;
	else
		nr = right_nritems - 1;

	for (i = 0; i < nr; i++) {
1792
		item = btrfs_item_nr(right, i);
1793 1794 1795 1796 1797 1798 1799 1800
		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

1801 1802
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
1803 1804 1805

		this_item_size = btrfs_item_size(right, item);
		if (this_item_size + sizeof(*item) + push_space > free_space)
1806
			break;
1807

1808
		push_items++;
1809 1810 1811 1812 1813 1814
		push_space += this_item_size + sizeof(*item);
	}

	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
1815
	}
1816

1817
	if (push_items == 0) {
1818
		free_extent_buffer(left);
1819 1820
		return 1;
	}
1821
	if (!empty && push_items == btrfs_header_nritems(right))
1822
		WARN_ON(1);
1823

1824
	/* push data from right to left */
1825 1826 1827 1828 1829
	copy_extent_buffer(left, right,
			   btrfs_item_nr_offset(btrfs_header_nritems(left)),
			   btrfs_item_nr_offset(0),
			   push_items * sizeof(struct btrfs_item));

C
Chris Mason 已提交
1830
	push_space = BTRFS_LEAF_DATA_SIZE(root) -
1831 1832 1833
		     btrfs_item_offset_nr(right, push_items -1);

	copy_extent_buffer(left, right, btrfs_leaf_data(left) +
C
Chris Mason 已提交
1834 1835
		     leaf_data_end(root, left) - push_space,
		     btrfs_leaf_data(right) +
1836
		     btrfs_item_offset_nr(right, push_items - 1),
C
Chris Mason 已提交
1837
		     push_space);
1838
	old_left_nritems = btrfs_header_nritems(left);
1839 1840
	BUG_ON(old_left_nritems < 0);

1841
	old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
C
Chris Mason 已提交
1842
	for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1843
		u32 ioff;
1844

1845
		item = btrfs_item_nr(left, i);
1846 1847 1848 1849 1850 1851 1852 1853
		if (!left->map_token) {
			map_extent_buffer(left, (unsigned long)item,
					sizeof(struct btrfs_item),
					&left->map_token, &left->kaddr,
					&left->map_start, &left->map_len,
					KM_USER1);
		}

1854 1855
		ioff = btrfs_item_offset(left, item);
		btrfs_set_item_offset(left, item,
1856
		      ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1857
	}
1858
	btrfs_set_header_nritems(left, old_left_nritems + push_items);
1859 1860 1861 1862
	if (left->map_token) {
		unmap_extent_buffer(left, left->map_token, KM_USER1);
		left->map_token = NULL;
	}
1863 1864

	/* fixup right node */
1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878
	if (push_items > right_nritems) {
		printk("push items %d nr %u\n", push_items, right_nritems);
		WARN_ON(1);
	}

	if (push_items < right_nritems) {
		push_space = btrfs_item_offset_nr(right, push_items - 1) -
						  leaf_data_end(root, right);
		memmove_extent_buffer(right, btrfs_leaf_data(right) +
				      BTRFS_LEAF_DATA_SIZE(root) - push_space,
				      btrfs_leaf_data(right) +
				      leaf_data_end(root, right), push_space);

		memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1879 1880 1881
			      btrfs_item_nr_offset(push_items),
			     (btrfs_header_nritems(right) - push_items) *
			     sizeof(struct btrfs_item));
1882
	}
1883 1884
	right_nritems -= push_items;
	btrfs_set_header_nritems(right, right_nritems);
C
Chris Mason 已提交
1885
	push_space = BTRFS_LEAF_DATA_SIZE(root);
1886 1887
	for (i = 0; i < right_nritems; i++) {
		item = btrfs_item_nr(right, i);
1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902

		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

		push_space = push_space - btrfs_item_size(right, item);
		btrfs_set_item_offset(right, item, push_space);
	}
	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
1903
	}
1904

1905
	btrfs_mark_buffer_dirty(left);
1906 1907
	if (right_nritems)
		btrfs_mark_buffer_dirty(right);
1908

1909 1910
	btrfs_item_key(right, &disk_key, 0);
	wret = fixup_low_keys(trans, root, path, &disk_key, 1);
C
Chris Mason 已提交
1911 1912
	if (wret)
		ret = wret;
1913 1914 1915 1916

	/* then fixup the leaf pointer in the path */
	if (path->slots[0] < push_items) {
		path->slots[0] += old_left_nritems;
1917 1918
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = left;
1919 1920
		path->slots[1] -= 1;
	} else {
1921
		free_extent_buffer(left);
1922 1923
		path->slots[0] -= push_items;
	}
1924
	BUG_ON(path->slots[0] < 0);
C
Chris Mason 已提交
1925
	return ret;
1926 1927
}

C
Chris Mason 已提交
1928 1929 1930
/*
 * split the path's leaf in two, making sure there is at least data_size
 * available for the resulting leaf level of the path.
C
Chris Mason 已提交
1931 1932
 *
 * returns 0 if all went well and < 0 on failure.
C
Chris Mason 已提交
1933
 */
1934
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1935
		      *root, struct btrfs_key *ins_key,
1936
		      struct btrfs_path *path, int data_size, int extend)
1937
{
1938
	u64 root_gen;
1939
	struct extent_buffer *l;
1940
	u32 nritems;
1941 1942
	int mid;
	int slot;
1943
	struct extent_buffer *right;
C
Chris Mason 已提交
1944
	int space_needed = data_size + sizeof(struct btrfs_item);
1945 1946 1947
	int data_copy_size;
	int rt_data_off;
	int i;
1948
	int ret = 0;
C
Chris Mason 已提交
1949
	int wret;
1950 1951
	int double_split;
	int num_doubles = 0;
1952
	struct btrfs_disk_key disk_key;
C
Chris Mason 已提交
1953

1954 1955 1956
	if (extend)
		space_needed = data_size;

1957 1958 1959 1960 1961
	if (root->ref_cows)
		root_gen = trans->transid;
	else
		root_gen = 0;

C
Chris Mason 已提交
1962
	/* first try to make some room by pushing left and right */
1963
	if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1964
		wret = push_leaf_right(trans, root, path, data_size, 0);
1965
		if (wret < 0) {
C
Chris Mason 已提交
1966
			return wret;
1967 1968
		}
		if (wret) {
1969
			wret = push_leaf_left(trans, root, path, data_size, 0);
1970 1971 1972 1973
			if (wret < 0)
				return wret;
		}
		l = path->nodes[0];
C
Chris Mason 已提交
1974

1975
		/* did the pushes work? */
1976
		if (btrfs_leaf_free_space(root, l) >= space_needed)
1977
			return 0;
1978
	}
C
Chris Mason 已提交
1979

C
Chris Mason 已提交
1980
	if (!path->nodes[1]) {
1981
		ret = insert_new_root(trans, root, path, 1);
C
Chris Mason 已提交
1982 1983 1984
		if (ret)
			return ret;
	}
1985 1986 1987
again:
	double_split = 0;
	l = path->nodes[0];
1988
	slot = path->slots[0];
1989
	nritems = btrfs_header_nritems(l);
1990
	mid = (nritems + 1)/ 2;
1991

1992 1993 1994 1995 1996 1997
	btrfs_item_key(l, &disk_key, 0);

	right = __btrfs_alloc_free_block(trans, root, root->leafsize,
					 root->root_key.objectid,
					 root_gen, disk_key.objectid, 0,
					 l->start, 0);
1998 1999 2000 2001
	if (IS_ERR(right))
		return PTR_ERR(right);

	memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2002
	btrfs_set_header_bytenr(right, right->start);
2003 2004 2005 2006 2007 2008
	btrfs_set_header_generation(right, trans->transid);
	btrfs_set_header_owner(right, root->root_key.objectid);
	btrfs_set_header_level(right, 0);
	write_extent_buffer(right, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(right),
			    BTRFS_FSID_SIZE);
2009 2010 2011 2012 2013 2014
	if (mid <= slot) {
		if (nritems == 1 ||
		    leaf_space_used(l, mid, nritems - mid) + space_needed >
			BTRFS_LEAF_DATA_SIZE(root)) {
			if (slot >= nritems) {
				btrfs_cpu_key_to_disk(&disk_key, ins_key);
2015
				btrfs_set_header_nritems(right, 0);
2016
				wret = insert_ptr(trans, root, path,
2017
						  &disk_key, right->start,
2018 2019 2020
						  path->slots[1] + 1, 1);
				if (wret)
					ret = wret;
2021 2022
				free_extent_buffer(path->nodes[0]);
				path->nodes[0] = right;
2023 2024 2025 2026 2027
				path->slots[0] = 0;
				path->slots[1] += 1;
				return ret;
			}
			mid = slot;
2028 2029 2030 2031 2032
			if (mid != nritems &&
			    leaf_space_used(l, mid, nritems - mid) +
			    space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
				double_split = 1;
			}
2033 2034 2035 2036
		}
	} else {
		if (leaf_space_used(l, 0, mid + 1) + space_needed >
			BTRFS_LEAF_DATA_SIZE(root)) {
2037
			if (!extend && slot == 0) {
2038
				btrfs_cpu_key_to_disk(&disk_key, ins_key);
2039
				btrfs_set_header_nritems(right, 0);
2040 2041
				wret = insert_ptr(trans, root, path,
						  &disk_key,
2042
						  right->start,
2043
						  path->slots[1], 1);
2044 2045
				if (wret)
					ret = wret;
2046 2047
				free_extent_buffer(path->nodes[0]);
				path->nodes[0] = right;
2048
				path->slots[0] = 0;
2049 2050 2051 2052 2053 2054
				if (path->slots[1] == 0) {
					wret = fixup_low_keys(trans, root,
					           path, &disk_key, 1);
					if (wret)
						ret = wret;
				}
2055
				return ret;
2056 2057 2058 2059 2060 2061 2062 2063 2064
			} else if (extend && slot == 0) {
				mid = 1;
			} else {
				mid = slot;
				if (mid != nritems &&
				    leaf_space_used(l, mid, nritems - mid) +
				    space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
					double_split = 1;
				}
2065
			}
2066 2067
		}
	}
2068 2069 2070 2071 2072 2073 2074 2075 2076
	nritems = nritems - mid;
	btrfs_set_header_nritems(right, nritems);
	data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);

	copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
			   btrfs_item_nr_offset(mid),
			   nritems * sizeof(struct btrfs_item));

	copy_extent_buffer(right, l,
C
Chris Mason 已提交
2077 2078 2079
		     btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
		     data_copy_size, btrfs_leaf_data(l) +
		     leaf_data_end(root, l), data_copy_size);
2080

C
Chris Mason 已提交
2081
	rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2082
		      btrfs_item_end_nr(l, mid);
C
Chris Mason 已提交
2083

2084 2085
	for (i = 0; i < nritems; i++) {
		struct btrfs_item *item = btrfs_item_nr(right, i);
2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096
		u32 ioff;

		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

		ioff = btrfs_item_offset(right, item);
2097
		btrfs_set_item_offset(right, item, ioff + rt_data_off);
C
Chris Mason 已提交
2098
	}
C
Chris Mason 已提交
2099

2100 2101 2102 2103 2104
	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
	}

2105
	btrfs_set_header_nritems(l, mid);
C
Chris Mason 已提交
2106
	ret = 0;
2107
	btrfs_item_key(right, &disk_key, 0);
2108 2109
	wret = insert_ptr(trans, root, path, &disk_key, right->start,
			  path->slots[1] + 1, 1);
C
Chris Mason 已提交
2110 2111
	if (wret)
		ret = wret;
2112 2113 2114

	btrfs_mark_buffer_dirty(right);
	btrfs_mark_buffer_dirty(l);
2115
	BUG_ON(path->slots[0] != slot);
2116

2117
	if (mid <= slot) {
2118 2119
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = right;
2120 2121
		path->slots[0] -= mid;
		path->slots[1] += 1;
2122
	} else
2123 2124
		free_extent_buffer(right);

2125
	BUG_ON(path->slots[0] < 0);
2126

2127 2128 2129 2130
	if (double_split) {
		BUG_ON(num_doubles != 0);
		num_doubles++;
		goto again;
2131
	}
2132 2133 2134
	return ret;
}

C
Chris Mason 已提交
2135 2136 2137
int btrfs_truncate_item(struct btrfs_trans_handle *trans,
			struct btrfs_root *root,
			struct btrfs_path *path,
2138
			u32 new_size, int from_end)
C
Chris Mason 已提交
2139 2140 2141 2142
{
	int ret = 0;
	int slot;
	int slot_orig;
2143 2144
	struct extent_buffer *leaf;
	struct btrfs_item *item;
C
Chris Mason 已提交
2145 2146 2147 2148 2149 2150 2151 2152
	u32 nritems;
	unsigned int data_end;
	unsigned int old_data_start;
	unsigned int old_size;
	unsigned int size_diff;
	int i;

	slot_orig = path->slots[0];
2153
	leaf = path->nodes[0];
2154 2155 2156 2157 2158
	slot = path->slots[0];

	old_size = btrfs_item_size_nr(leaf, slot);
	if (old_size == new_size)
		return 0;
C
Chris Mason 已提交
2159

2160
	nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
2161 2162
	data_end = leaf_data_end(root, leaf);

2163
	old_data_start = btrfs_item_offset_nr(leaf, slot);
2164

C
Chris Mason 已提交
2165 2166 2167 2168 2169 2170 2171 2172 2173 2174
	size_diff = old_size - new_size;

	BUG_ON(slot < 0);
	BUG_ON(slot >= nritems);

	/*
	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
	 */
	/* first correct the data pointers */
	for (i = slot; i < nritems; i++) {
2175 2176
		u32 ioff;
		item = btrfs_item_nr(leaf, i);
2177 2178 2179 2180 2181 2182 2183 2184 2185

		if (!leaf->map_token) {
			map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
		}

2186 2187
		ioff = btrfs_item_offset(leaf, item);
		btrfs_set_item_offset(leaf, item, ioff + size_diff);
C
Chris Mason 已提交
2188
	}
2189 2190 2191 2192 2193 2194

	if (leaf->map_token) {
		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
		leaf->map_token = NULL;
	}

C
Chris Mason 已提交
2195
	/* shift the data */
2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234
	if (from_end) {
		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
			      data_end + size_diff, btrfs_leaf_data(leaf) +
			      data_end, old_data_start + new_size - data_end);
	} else {
		struct btrfs_disk_key disk_key;
		u64 offset;

		btrfs_item_key(leaf, &disk_key, slot);

		if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
			unsigned long ptr;
			struct btrfs_file_extent_item *fi;

			fi = btrfs_item_ptr(leaf, slot,
					    struct btrfs_file_extent_item);
			fi = (struct btrfs_file_extent_item *)(
			     (unsigned long)fi - size_diff);

			if (btrfs_file_extent_type(leaf, fi) ==
			    BTRFS_FILE_EXTENT_INLINE) {
				ptr = btrfs_item_ptr_offset(leaf, slot);
				memmove_extent_buffer(leaf, ptr,
				        (unsigned long)fi,
				        offsetof(struct btrfs_file_extent_item,
						 disk_bytenr));
			}
		}

		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
			      data_end + size_diff, btrfs_leaf_data(leaf) +
			      data_end, old_data_start - data_end);

		offset = btrfs_disk_key_offset(&disk_key);
		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
		btrfs_set_item_key(leaf, &disk_key, slot);
		if (slot == 0)
			fixup_low_keys(trans, root, path, &disk_key, 1);
	}
2235 2236 2237 2238

	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_size(leaf, item, new_size);
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
2239 2240

	ret = 0;
2241 2242
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
C
Chris Mason 已提交
2243
		BUG();
2244
	}
C
Chris Mason 已提交
2245 2246 2247
	return ret;
}

2248 2249 2250
int btrfs_extend_item(struct btrfs_trans_handle *trans,
		      struct btrfs_root *root, struct btrfs_path *path,
		      u32 data_size)
2251 2252 2253 2254
{
	int ret = 0;
	int slot;
	int slot_orig;
2255 2256
	struct extent_buffer *leaf;
	struct btrfs_item *item;
2257 2258 2259 2260 2261 2262 2263
	u32 nritems;
	unsigned int data_end;
	unsigned int old_data;
	unsigned int old_size;
	int i;

	slot_orig = path->slots[0];
2264
	leaf = path->nodes[0];
2265

2266
	nritems = btrfs_header_nritems(leaf);
2267 2268
	data_end = leaf_data_end(root, leaf);

2269 2270
	if (btrfs_leaf_free_space(root, leaf) < data_size) {
		btrfs_print_leaf(root, leaf);
2271
		BUG();
2272
	}
2273
	slot = path->slots[0];
2274
	old_data = btrfs_item_end_nr(leaf, slot);
2275 2276

	BUG_ON(slot < 0);
2277 2278 2279 2280 2281
	if (slot >= nritems) {
		btrfs_print_leaf(root, leaf);
		printk("slot %d too large, nritems %d\n", slot, nritems);
		BUG_ON(1);
	}
2282 2283 2284 2285 2286 2287

	/*
	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
	 */
	/* first correct the data pointers */
	for (i = slot; i < nritems; i++) {
2288 2289
		u32 ioff;
		item = btrfs_item_nr(leaf, i);
2290 2291 2292 2293 2294 2295 2296 2297

		if (!leaf->map_token) {
			map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
		}
2298 2299
		ioff = btrfs_item_offset(leaf, item);
		btrfs_set_item_offset(leaf, item, ioff - data_size);
2300
	}
2301

2302 2303 2304 2305 2306
	if (leaf->map_token) {
		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
		leaf->map_token = NULL;
	}

2307
	/* shift the data */
2308
	memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2309 2310
		      data_end - data_size, btrfs_leaf_data(leaf) +
		      data_end, old_data - data_end);
2311

2312
	data_end = old_data;
2313 2314 2315 2316
	old_size = btrfs_item_size_nr(leaf, slot);
	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_size(leaf, item, old_size + data_size);
	btrfs_mark_buffer_dirty(leaf);
2317 2318

	ret = 0;
2319 2320
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
2321
		BUG();
2322
	}
2323 2324 2325
	return ret;
}

C
Chris Mason 已提交
2326 2327 2328 2329
/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
2330 2331 2332 2333
int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root,
			    struct btrfs_path *path,
			    struct btrfs_key *cpu_key, u32 data_size)
2334
{
2335 2336
	struct extent_buffer *leaf;
	struct btrfs_item *item;
C
Chris Mason 已提交
2337
	int ret = 0;
2338
	int slot;
2339
	int slot_orig;
2340
	u32 nritems;
2341
	unsigned int data_end;
C
Chris Mason 已提交
2342 2343 2344
	struct btrfs_disk_key disk_key;

	btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2345

C
Chris Mason 已提交
2346
	/* create a root if there isn't one */
C
Chris Mason 已提交
2347
	if (!root->node)
C
Chris Mason 已提交
2348
		BUG();
2349

2350
	ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2351
	if (ret == 0) {
2352
		return -EEXIST;
C
Chris Mason 已提交
2353
	}
2354 2355
	if (ret < 0)
		goto out;
2356

2357
	slot_orig = path->slots[0];
2358
	leaf = path->nodes[0];
C
Chris Mason 已提交
2359

2360
	nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
2361
	data_end = leaf_data_end(root, leaf);
2362

C
Chris Mason 已提交
2363
	if (btrfs_leaf_free_space(root, leaf) <
2364
	    sizeof(struct btrfs_item) + data_size) {
2365 2366 2367
		btrfs_print_leaf(root, leaf);
		printk("not enough freespace need %u have %d\n",
		       data_size, btrfs_leaf_free_space(root, leaf));
2368
		BUG();
2369
	}
2370

2371
	slot = path->slots[0];
2372
	BUG_ON(slot < 0);
2373

2374 2375
	if (slot != nritems) {
		int i;
2376
		unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2377

2378 2379 2380 2381 2382 2383
		if (old_data < data_end) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d old_data %d data_end %d\n",
			       slot, old_data, data_end);
			BUG_ON(1);
		}
2384 2385 2386 2387
		/*
		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
		 */
		/* first correct the data pointers */
2388
		WARN_ON(leaf->map_token);
C
Chris Mason 已提交
2389
		for (i = slot; i < nritems; i++) {
2390
			u32 ioff;
2391

2392
			item = btrfs_item_nr(leaf, i);
2393 2394 2395 2396 2397 2398 2399 2400
			if (!leaf->map_token) {
				map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
			}

2401 2402
			ioff = btrfs_item_offset(leaf, item);
			btrfs_set_item_offset(leaf, item, ioff - data_size);
C
Chris Mason 已提交
2403
		}
2404 2405 2406 2407
		if (leaf->map_token) {
			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
			leaf->map_token = NULL;
		}
2408 2409

		/* shift the items */
2410 2411
		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
			      btrfs_item_nr_offset(slot),
C
Chris Mason 已提交
2412
			      (nritems - slot) * sizeof(struct btrfs_item));
2413 2414

		/* shift the data */
2415
		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
C
Chris Mason 已提交
2416 2417
			      data_end - data_size, btrfs_leaf_data(leaf) +
			      data_end, old_data - data_end);
2418 2419
		data_end = old_data;
	}
2420

2421
	/* setup the item for the new data */
2422 2423 2424 2425 2426 2427
	btrfs_set_item_key(leaf, &disk_key, slot);
	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_offset(leaf, item, data_end - data_size);
	btrfs_set_item_size(leaf, item, data_size);
	btrfs_set_header_nritems(leaf, nritems + 1);
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
2428 2429

	ret = 0;
2430
	if (slot == 0)
2431
		ret = fixup_low_keys(trans, root, path, &disk_key, 1);
C
Chris Mason 已提交
2432

2433 2434
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
2435
		BUG();
2436
	}
2437
out:
2438 2439 2440 2441 2442 2443 2444
	return ret;
}

/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
2445 2446 2447
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_key *cpu_key, void *data, u32
		      data_size)
2448 2449
{
	int ret = 0;
C
Chris Mason 已提交
2450
	struct btrfs_path *path;
2451 2452
	struct extent_buffer *leaf;
	unsigned long ptr;
2453

C
Chris Mason 已提交
2454 2455 2456
	path = btrfs_alloc_path();
	BUG_ON(!path);
	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2457
	if (!ret) {
2458 2459 2460 2461
		leaf = path->nodes[0];
		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
		write_extent_buffer(leaf, data, ptr, data_size);
		btrfs_mark_buffer_dirty(leaf);
2462
	}
C
Chris Mason 已提交
2463
	btrfs_free_path(path);
C
Chris Mason 已提交
2464
	return ret;
2465 2466
}

C
Chris Mason 已提交
2467
/*
C
Chris Mason 已提交
2468
 * delete the pointer from a given node.
C
Chris Mason 已提交
2469 2470 2471 2472 2473
 *
 * If the delete empties a node, the node is removed from the tree,
 * continuing all the way the root if required.  The root is converted into
 * a leaf if all the nodes are emptied.
 */
2474 2475
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path, int level, int slot)
2476
{
2477
	struct extent_buffer *parent = path->nodes[level];
2478
	u32 nritems;
C
Chris Mason 已提交
2479
	int ret = 0;
2480
	int wret;
2481

2482
	nritems = btrfs_header_nritems(parent);
2483
	if (slot != nritems -1) {
2484 2485 2486
		memmove_extent_buffer(parent,
			      btrfs_node_key_ptr_offset(slot),
			      btrfs_node_key_ptr_offset(slot + 1),
C
Chris Mason 已提交
2487 2488
			      sizeof(struct btrfs_key_ptr) *
			      (nritems - slot - 1));
2489
	}
2490
	nritems--;
2491
	btrfs_set_header_nritems(parent, nritems);
2492
	if (nritems == 0 && parent == root->node) {
2493
		BUG_ON(btrfs_header_level(root->node) != 1);
2494
		/* just turn the root into a leaf and break */
2495
		btrfs_set_header_level(root->node, 0);
2496
	} else if (slot == 0) {
2497 2498 2499 2500
		struct btrfs_disk_key disk_key;

		btrfs_node_key(parent, &disk_key, 0);
		wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
C
Chris Mason 已提交
2501 2502
		if (wret)
			ret = wret;
2503
	}
C
Chris Mason 已提交
2504
	btrfs_mark_buffer_dirty(parent);
C
Chris Mason 已提交
2505
	return ret;
2506 2507
}

C
Chris Mason 已提交
2508 2509 2510 2511
/*
 * delete the item at the leaf level in path.  If that empties
 * the leaf, remove it from the tree
 */
2512 2513
int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path)
2514 2515
{
	int slot;
2516 2517
	struct extent_buffer *leaf;
	struct btrfs_item *item;
2518 2519
	int doff;
	int dsize;
C
Chris Mason 已提交
2520 2521
	int ret = 0;
	int wret;
2522
	u32 nritems;
2523

2524
	leaf = path->nodes[0];
2525
	slot = path->slots[0];
2526 2527 2528
	doff = btrfs_item_offset_nr(leaf, slot);
	dsize = btrfs_item_size_nr(leaf, slot);
	nritems = btrfs_header_nritems(leaf);
2529

2530
	if (slot != nritems - 1) {
2531
		int i;
C
Chris Mason 已提交
2532
		int data_end = leaf_data_end(root, leaf);
2533 2534

		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
C
Chris Mason 已提交
2535 2536 2537
			      data_end + dsize,
			      btrfs_leaf_data(leaf) + data_end,
			      doff - data_end);
2538

C
Chris Mason 已提交
2539
		for (i = slot + 1; i < nritems; i++) {
2540
			u32 ioff;
2541

2542
			item = btrfs_item_nr(leaf, i);
2543 2544 2545 2546 2547 2548 2549
			if (!leaf->map_token) {
				map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
			}
2550 2551
			ioff = btrfs_item_offset(leaf, item);
			btrfs_set_item_offset(leaf, item, ioff + dsize);
C
Chris Mason 已提交
2552
		}
2553 2554 2555 2556 2557 2558

		if (leaf->map_token) {
			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
			leaf->map_token = NULL;
		}

2559 2560
		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
			      btrfs_item_nr_offset(slot + 1),
C
Chris Mason 已提交
2561 2562
			      sizeof(struct btrfs_item) *
			      (nritems - slot - 1));
2563
	}
2564
	btrfs_set_header_nritems(leaf, nritems - 1);
2565
	nritems--;
2566

C
Chris Mason 已提交
2567
	/* delete the leaf if we've emptied it */
2568
	if (nritems == 0) {
2569 2570
		if (leaf == root->node) {
			btrfs_set_header_level(leaf, 0);
2571
		} else {
2572
			u64 root_gen = btrfs_header_generation(path->nodes[1]);
2573 2574
			clean_tree_block(trans, root, leaf);
			wait_on_tree_block_writeback(root, leaf);
2575
			wret = del_ptr(trans, root, path, 1, path->slots[1]);
C
Chris Mason 已提交
2576 2577
			if (wret)
				ret = wret;
2578
			wret = btrfs_free_extent(trans, root,
2579 2580 2581
					 leaf->start, leaf->len,
					 btrfs_header_owner(path->nodes[1]),
					 root_gen, 0, 0, 1);
C
Chris Mason 已提交
2582 2583
			if (wret)
				ret = wret;
2584
		}
2585
	} else {
2586
		int used = leaf_space_used(leaf, 0, nritems);
C
Chris Mason 已提交
2587
		if (slot == 0) {
2588 2589 2590
			struct btrfs_disk_key disk_key;

			btrfs_item_key(leaf, &disk_key, 0);
2591
			wret = fixup_low_keys(trans, root, path,
2592
					      &disk_key, 1);
C
Chris Mason 已提交
2593 2594 2595 2596
			if (wret)
				ret = wret;
		}

C
Chris Mason 已提交
2597
		/* delete the leaf if it is mostly empty */
2598
		if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2599 2600 2601 2602
			/* push_leaf_left fixes the path.
			 * make sure the path still points to our leaf
			 * for possible call to del_ptr below
			 */
2603
			slot = path->slots[1];
2604 2605
			extent_buffer_get(leaf);

2606
			wret = push_leaf_right(trans, root, path, 1, 1);
2607
			if (wret < 0 && wret != -ENOSPC)
C
Chris Mason 已提交
2608
				ret = wret;
2609 2610 2611

			if (path->nodes[0] == leaf &&
			    btrfs_header_nritems(leaf)) {
2612
				wret = push_leaf_left(trans, root, path, 1, 1);
2613
				if (wret < 0 && wret != -ENOSPC)
C
Chris Mason 已提交
2614 2615
					ret = wret;
			}
2616 2617

			if (btrfs_header_nritems(leaf) == 0) {
2618
				u64 root_gen;
2619 2620
				u64 bytenr = leaf->start;
				u32 blocksize = leaf->len;
2621

2622 2623 2624
				root_gen = btrfs_header_generation(
							   path->nodes[1]);

2625 2626 2627
				clean_tree_block(trans, root, leaf);
				wait_on_tree_block_writeback(root, leaf);

2628
				wret = del_ptr(trans, root, path, 1, slot);
C
Chris Mason 已提交
2629 2630
				if (wret)
					ret = wret;
2631 2632

				free_extent_buffer(leaf);
2633
				wret = btrfs_free_extent(trans, root, bytenr,
2634 2635 2636
					     blocksize,
					     btrfs_header_owner(path->nodes[1]),
					     root_gen, 0, 0, 1);
C
Chris Mason 已提交
2637 2638
				if (wret)
					ret = wret;
C
Chris Mason 已提交
2639
			} else {
2640 2641
				btrfs_mark_buffer_dirty(leaf);
				free_extent_buffer(leaf);
2642
			}
2643
		} else {
2644
			btrfs_mark_buffer_dirty(leaf);
2645 2646
		}
	}
C
Chris Mason 已提交
2647
	return ret;
2648 2649
}

2650 2651 2652 2653 2654 2655 2656
/*
 * walk up the tree as far as required to find the previous leaf.
 * returns 0 if it found something or 1 if there are no lesser leaves.
 * returns < 0 on io errors.
 */
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
2657
	u64 bytenr;
2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689
	int slot;
	int level = 1;
	struct extent_buffer *c;
	struct extent_buffer *next = NULL;

	while(level < BTRFS_MAX_LEVEL) {
		if (!path->nodes[level])
			return 1;

		slot = path->slots[level];
		c = path->nodes[level];
		if (slot == 0) {
			level++;
			if (level == BTRFS_MAX_LEVEL)
				return 1;
			continue;
		}
		slot--;

		bytenr = btrfs_node_blockptr(c, slot);
		if (next)
			free_extent_buffer(next);

		next = read_tree_block(root, bytenr,
				       btrfs_level_size(root, level - 1));
		break;
	}
	path->slots[level] = slot;
	while(1) {
		level--;
		c = path->nodes[level];
		free_extent_buffer(c);
2690 2691 2692
		slot = btrfs_header_nritems(next);
		if (slot != 0)
			slot--;
2693
		path->nodes[level] = next;
2694
		path->slots[level] = slot;
2695 2696
		if (!level)
			break;
2697
		next = read_tree_block(root, btrfs_node_blockptr(next, slot),
2698 2699 2700 2701 2702
				       btrfs_level_size(root, level - 1));
	}
	return 0;
}

C
Chris Mason 已提交
2703 2704
/*
 * walk up the tree as far as required to find the next leaf.
C
Chris Mason 已提交
2705 2706
 * returns 0 if it found something or 1 if there are no greater leaves.
 * returns < 0 on io errors.
C
Chris Mason 已提交
2707
 */
C
Chris Mason 已提交
2708
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2709 2710 2711
{
	int slot;
	int level = 1;
2712
	u64 bytenr;
2713 2714
	struct extent_buffer *c;
	struct extent_buffer *next = NULL;
2715

C
Chris Mason 已提交
2716
	while(level < BTRFS_MAX_LEVEL) {
2717
		if (!path->nodes[level])
C
Chris Mason 已提交
2718
			return 1;
2719

2720 2721
		slot = path->slots[level] + 1;
		c = path->nodes[level];
2722
		if (slot >= btrfs_header_nritems(c)) {
2723
			level++;
2724 2725
			if (level == BTRFS_MAX_LEVEL)
				return 1;
2726 2727
			continue;
		}
2728

2729
		bytenr = btrfs_node_blockptr(c, slot);
C
Chris Mason 已提交
2730
		if (next)
2731 2732
			free_extent_buffer(next);

2733
		if (path->reada)
2734
			reada_for_search(root, path, level, slot, 0);
2735

2736 2737
		next = read_tree_block(root, bytenr,
				       btrfs_level_size(root, level -1));
2738 2739 2740 2741 2742 2743
		break;
	}
	path->slots[level] = slot;
	while(1) {
		level--;
		c = path->nodes[level];
2744
		free_extent_buffer(c);
2745 2746 2747 2748
		path->nodes[level] = next;
		path->slots[level] = 0;
		if (!level)
			break;
2749
		if (path->reada)
2750
			reada_for_search(root, path, level, 0, 0);
2751 2752
		next = read_tree_block(root, btrfs_node_blockptr(next, 0),
				       btrfs_level_size(root, level - 1));
2753 2754 2755
	}
	return 0;
}