ctree.c 70.5 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

19
#include <linux/sched.h>
20 21
#include "ctree.h"
#include "disk-io.h"
22
#include "transaction.h"
23
#include "print-tree.h"
24

25 26 27
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, int level);
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28
		      *root, struct btrfs_key *ins_key,
29
		      struct btrfs_path *path, int data_size, int extend);
30 31 32 33 34 35 36
static int push_node_left(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct extent_buffer *dst,
			  struct extent_buffer *src);
static int balance_node_right(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct extent_buffer *dst_buf,
			      struct extent_buffer *src_buf);
37 38
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path, int level, int slot);
39

C
Chris Mason 已提交
40
inline void btrfs_init_path(struct btrfs_path *p)
C
Chris Mason 已提交
41
{
C
Chris Mason 已提交
42
	memset(p, 0, sizeof(*p));
C
Chris Mason 已提交
43 44
}

C
Chris Mason 已提交
45
struct btrfs_path *btrfs_alloc_path(void)
C
Chris Mason 已提交
46
{
C
Chris Mason 已提交
47 48
	struct btrfs_path *path;
	path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49
	if (path) {
C
Chris Mason 已提交
50
		btrfs_init_path(path);
51 52
		path->reada = 1;
	}
C
Chris Mason 已提交
53
	return path;
C
Chris Mason 已提交
54 55
}

C
Chris Mason 已提交
56
void btrfs_free_path(struct btrfs_path *p)
57
{
C
Chris Mason 已提交
58 59
	btrfs_release_path(NULL, p);
	kmem_cache_free(btrfs_path_cachep, p);
60 61
}

C
Chris Mason 已提交
62
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 64
{
	int i;
C
Chris Mason 已提交
65
	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66 67
		if (!p->nodes[i])
			break;
68
		free_extent_buffer(p->nodes[i]);
69
	}
C
Chris Mason 已提交
70
	memset(p, 0, sizeof(*p));
71 72
}

73 74 75 76 77 78 79 80 81 82
int btrfs_copy_root(struct btrfs_trans_handle *trans,
		      struct btrfs_root *root,
		      struct extent_buffer *buf,
		      struct extent_buffer **cow_ret, u64 new_root_objectid)
{
	struct extent_buffer *cow;
	u32 nritems;
	int ret = 0;
	int level;
	struct btrfs_key first_key;
83
	struct btrfs_root *new_root;
84

85 86 87 88 89 90
	new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
	if (!new_root)
		return -ENOMEM;

	memcpy(new_root, root, sizeof(*new_root));
	new_root->root_key.objectid = new_root_objectid;
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105

	WARN_ON(root->ref_cows && trans->transid !=
		root->fs_info->running_transaction->transid);
	WARN_ON(root->ref_cows && trans->transid != root->last_trans);

	level = btrfs_header_level(buf);
	nritems = btrfs_header_nritems(buf);
	if (nritems) {
		if (level == 0)
			btrfs_item_key_to_cpu(buf, &first_key, 0);
		else
			btrfs_node_key_to_cpu(buf, &first_key, 0);
	} else {
		first_key.objectid = 0;
	}
106
	cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
107 108 109
				       new_root_objectid,
				       trans->transid, first_key.objectid,
				       level, buf->start, 0);
110 111
	if (IS_ERR(cow)) {
		kfree(new_root);
112
		return PTR_ERR(cow);
113
	}
114 115 116 117 118 119 120

	copy_extent_buffer(cow, buf, 0, 0, cow->len);
	btrfs_set_header_bytenr(cow, cow->start);
	btrfs_set_header_generation(cow, trans->transid);
	btrfs_set_header_owner(cow, new_root_objectid);

	WARN_ON(btrfs_header_generation(buf) > trans->transid);
121 122 123
	ret = btrfs_inc_ref(trans, new_root, buf);
	kfree(new_root);

124 125 126 127 128 129 130 131 132
	if (ret)
		return ret;

	btrfs_mark_buffer_dirty(cow);
	*cow_ret = cow;
	return 0;
}

int __btrfs_cow_block(struct btrfs_trans_handle *trans,
133 134 135 136 137
			     struct btrfs_root *root,
			     struct extent_buffer *buf,
			     struct extent_buffer *parent, int parent_slot,
			     struct extent_buffer **cow_ret,
			     u64 search_start, u64 empty_size)
C
Chris Mason 已提交
138
{
139
	u64 root_gen;
140
	struct extent_buffer *cow;
141
	u32 nritems;
142 143
	int ret = 0;
	int different_trans = 0;
144 145 146 147 148 149 150 151
	int level;
	struct btrfs_key first_key;

	if (root->ref_cows) {
		root_gen = trans->transid;
	} else {
		root_gen = 0;
	}
C
Chris Mason 已提交
152

153 154
	WARN_ON(root->ref_cows && trans->transid !=
		root->fs_info->running_transaction->transid);
155
	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
156

157 158 159 160 161 162 163 164 165 166 167 168 169
	level = btrfs_header_level(buf);
	nritems = btrfs_header_nritems(buf);
	if (nritems) {
		if (level == 0)
			btrfs_item_key_to_cpu(buf, &first_key, 0);
		else
			btrfs_node_key_to_cpu(buf, &first_key, 0);
	} else {
		first_key.objectid = 0;
	}
	cow = __btrfs_alloc_free_block(trans, root, buf->len,
				     root->root_key.objectid,
				     root_gen, first_key.objectid, level,
170
				     search_start, empty_size);
171 172
	if (IS_ERR(cow))
		return PTR_ERR(cow);
173

174
	copy_extent_buffer(cow, buf, 0, 0, cow->len);
175
	btrfs_set_header_bytenr(cow, cow->start);
176 177
	btrfs_set_header_generation(cow, trans->transid);
	btrfs_set_header_owner(cow, root->root_key.objectid);
178

179 180
	WARN_ON(btrfs_header_generation(buf) > trans->transid);
	if (btrfs_header_generation(buf) != trans->transid) {
181 182 183 184 185 186 187 188
		different_trans = 1;
		ret = btrfs_inc_ref(trans, root, buf);
		if (ret)
			return ret;
	} else {
		clean_tree_block(trans, root, buf);
	}

C
Chris Mason 已提交
189
	if (buf == root->node) {
190
		root_gen = btrfs_header_generation(buf);
C
Chris Mason 已提交
191
		root->node = cow;
192
		extent_buffer_get(cow);
C
Chris Mason 已提交
193
		if (buf != root->commit_root) {
194
			btrfs_free_extent(trans, root, buf->start,
195 196
					  buf->len, root->root_key.objectid,
					  root_gen, 0, 0, 1);
C
Chris Mason 已提交
197
		}
198
		free_extent_buffer(buf);
C
Chris Mason 已提交
199
	} else {
200
		root_gen = btrfs_header_generation(parent);
201
		btrfs_set_node_blockptr(parent, parent_slot,
202
					cow->start);
203 204 205
		WARN_ON(trans->transid == 0);
		btrfs_set_node_ptr_generation(parent, parent_slot,
					      trans->transid);
C
Chris Mason 已提交
206
		btrfs_mark_buffer_dirty(parent);
207
		WARN_ON(btrfs_header_generation(parent) != trans->transid);
208 209 210
		btrfs_free_extent(trans, root, buf->start, buf->len,
				  btrfs_header_owner(parent), root_gen,
				  0, 0, 1);
C
Chris Mason 已提交
211
	}
212
	free_extent_buffer(buf);
C
Chris Mason 已提交
213
	btrfs_mark_buffer_dirty(cow);
C
Chris Mason 已提交
214
	*cow_ret = cow;
C
Chris Mason 已提交
215 216 217
	return 0;
}

218 219 220 221
int btrfs_cow_block(struct btrfs_trans_handle *trans,
		    struct btrfs_root *root, struct extent_buffer *buf,
		    struct extent_buffer *parent, int parent_slot,
		    struct extent_buffer **cow_ret)
222 223
{
	u64 search_start;
224
	int ret;
225 226 227 228 229 230 231 232 233 234
	if (trans->transaction != root->fs_info->running_transaction) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->running_transaction->transid);
		WARN_ON(1);
	}
	if (trans->transid != root->fs_info->generation) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->generation);
		WARN_ON(1);
	}
235
	if (btrfs_header_generation(buf) == trans->transid) {
236 237 238 239
		*cow_ret = buf;
		return 0;
	}

240
	search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
241
	ret = __btrfs_cow_block(trans, root, buf, parent,
242
				 parent_slot, cow_ret, search_start, 0);
243
	return ret;
244 245
}

246
static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
247
{
248
	if (blocknr < other && other - (blocknr + blocksize) < 32768)
249
		return 1;
250
	if (blocknr > other && blocknr - (other + blocksize) < 32768)
251 252 253 254
		return 1;
	return 0;
}

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
/*
 * compare two keys in a memcmp fashion
 */
static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
{
	struct btrfs_key k1;

	btrfs_disk_key_to_cpu(&k1, disk);

	if (k1.objectid > k2->objectid)
		return 1;
	if (k1.objectid < k2->objectid)
		return -1;
	if (k1.type > k2->type)
		return 1;
	if (k1.type < k2->type)
		return -1;
	if (k1.offset > k2->offset)
		return 1;
	if (k1.offset < k2->offset)
		return -1;
	return 0;
}


280
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
281
		       struct btrfs_root *root, struct extent_buffer *parent,
282 283
		       int start_slot, int cache_only, u64 *last_ret,
		       struct btrfs_key *progress)
284
{
285 286
	struct extent_buffer *cur;
	struct extent_buffer *tmp;
287
	u64 blocknr;
288 289
	u64 search_start = *last_ret;
	u64 last_block = 0;
290 291 292 293 294
	u64 other;
	u32 parent_nritems;
	int end_slot;
	int i;
	int err = 0;
295
	int parent_level;
296 297
	int uptodate;
	u32 blocksize;
298 299
	int progress_passed = 0;
	struct btrfs_disk_key disk_key;
300

301 302 303 304
	parent_level = btrfs_header_level(parent);
	if (cache_only && parent_level != 1)
		return 0;

305 306 307 308 309 310 311 312 313 314
	if (trans->transaction != root->fs_info->running_transaction) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->running_transaction->transid);
		WARN_ON(1);
	}
	if (trans->transid != root->fs_info->generation) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->generation);
		WARN_ON(1);
	}
315

316 317
	parent_nritems = btrfs_header_nritems(parent);
	blocksize = btrfs_level_size(root, parent_level - 1);
318 319 320 321 322 323 324
	end_slot = parent_nritems;

	if (parent_nritems == 1)
		return 0;

	for (i = start_slot; i < end_slot; i++) {
		int close = 1;
325

326 327 328 329 330 331 332 333
		if (!parent->map_token) {
			map_extent_buffer(parent,
					btrfs_node_key_ptr_offset(i),
					sizeof(struct btrfs_key_ptr),
					&parent->map_token, &parent->kaddr,
					&parent->map_start, &parent->map_len,
					KM_USER1);
		}
334 335 336 337 338
		btrfs_node_key(parent, &disk_key, i);
		if (!progress_passed && comp_keys(&disk_key, progress) < 0)
			continue;

		progress_passed = 1;
339
		blocknr = btrfs_node_blockptr(parent, i);
340 341
		if (last_block == 0)
			last_block = blocknr;
342

343
		if (i > 0) {
344 345
			other = btrfs_node_blockptr(parent, i - 1);
			close = close_blocks(blocknr, other, blocksize);
346
		}
347
		if (close && i < end_slot - 2) {
348 349
			other = btrfs_node_blockptr(parent, i + 1);
			close = close_blocks(blocknr, other, blocksize);
350
		}
351 352
		if (close) {
			last_block = blocknr;
353
			continue;
354
		}
355 356 357 358 359
		if (parent->map_token) {
			unmap_extent_buffer(parent, parent->map_token,
					    KM_USER1);
			parent->map_token = NULL;
		}
360

361 362 363 364 365
		cur = btrfs_find_tree_block(root, blocknr, blocksize);
		if (cur)
			uptodate = btrfs_buffer_uptodate(cur);
		else
			uptodate = 0;
366
		if (!cur || !uptodate) {
367
			if (cache_only) {
368
				free_extent_buffer(cur);
369 370
				continue;
			}
371 372 373 374 375
			if (!cur) {
				cur = read_tree_block(root, blocknr,
							 blocksize);
			} else if (!uptodate) {
				btrfs_read_buffer(cur);
376
			}
377
		}
378
		if (search_start == 0)
379
			search_start = last_block;
380

381 382 383 384
		err = __btrfs_cow_block(trans, root, cur, parent, i,
					&tmp, search_start,
					min(16 * blocksize,
					    (end_slot - i) * blocksize));
Y
Yan 已提交
385
		if (err) {
386
			free_extent_buffer(cur);
387
			break;
Y
Yan 已提交
388
		}
389
		search_start = tmp->start;
390
		last_block = tmp->start;
391 392
		*last_ret = search_start;
		if (parent_level == 1)
393 394
			btrfs_clear_buffer_defrag(tmp);
		free_extent_buffer(tmp);
395
	}
396 397 398 399 400
	if (parent->map_token) {
		unmap_extent_buffer(parent, parent->map_token,
				    KM_USER1);
		parent->map_token = NULL;
	}
401 402 403
	return err;
}

C
Chris Mason 已提交
404 405 406 407 408
/*
 * The leaf data grows from end-to-front in the node.
 * this returns the address of the start of the last item,
 * which is the stop of the leaf data stack
 */
C
Chris Mason 已提交
409
static inline unsigned int leaf_data_end(struct btrfs_root *root,
410
					 struct extent_buffer *leaf)
411
{
412
	u32 nr = btrfs_header_nritems(leaf);
413
	if (nr == 0)
C
Chris Mason 已提交
414
		return BTRFS_LEAF_DATA_SIZE(root);
415
	return btrfs_item_offset_nr(leaf, nr - 1);
416 417
}

C
Chris Mason 已提交
418 419
static int check_node(struct btrfs_root *root, struct btrfs_path *path,
		      int level)
C
Chris Mason 已提交
420
{
421 422 423 424
	struct extent_buffer *parent = NULL;
	struct extent_buffer *node = path->nodes[level];
	struct btrfs_disk_key parent_key;
	struct btrfs_disk_key node_key;
C
Chris Mason 已提交
425
	int parent_slot;
426 427
	int slot;
	struct btrfs_key cpukey;
428
	u32 nritems = btrfs_header_nritems(node);
C
Chris Mason 已提交
429 430

	if (path->nodes[level + 1])
431
		parent = path->nodes[level + 1];
A
Aneesh 已提交
432

433
	slot = path->slots[level];
434 435
	BUG_ON(nritems == 0);
	if (parent) {
A
Aneesh 已提交
436
		parent_slot = path->slots[level + 1];
437 438 439
		btrfs_node_key(parent, &parent_key, parent_slot);
		btrfs_node_key(node, &node_key, 0);
		BUG_ON(memcmp(&parent_key, &node_key,
C
Chris Mason 已提交
440
			      sizeof(struct btrfs_disk_key)));
441
		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
442
		       btrfs_header_bytenr(node));
C
Chris Mason 已提交
443
	}
C
Chris Mason 已提交
444
	BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
445
	if (slot != 0) {
446 447 448
		btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
		btrfs_node_key(node, &node_key, slot);
		BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
449 450
	}
	if (slot < nritems - 1) {
451 452 453
		btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
		btrfs_node_key(node, &node_key, slot);
		BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
C
Chris Mason 已提交
454 455 456 457
	}
	return 0;
}

C
Chris Mason 已提交
458 459
static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
		      int level)
C
Chris Mason 已提交
460
{
461 462
	struct extent_buffer *leaf = path->nodes[level];
	struct extent_buffer *parent = NULL;
C
Chris Mason 已提交
463
	int parent_slot;
464
	struct btrfs_key cpukey;
465 466 467
	struct btrfs_disk_key parent_key;
	struct btrfs_disk_key leaf_key;
	int slot = path->slots[0];
468

469
	u32 nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
470 471

	if (path->nodes[level + 1])
472
		parent = path->nodes[level + 1];
473 474 475 476 477

	if (nritems == 0)
		return 0;

	if (parent) {
A
Aneesh 已提交
478
		parent_slot = path->slots[level + 1];
479 480
		btrfs_node_key(parent, &parent_key, parent_slot);
		btrfs_item_key(leaf, &leaf_key, 0);
481

482
		BUG_ON(memcmp(&parent_key, &leaf_key,
C
Chris Mason 已提交
483
		       sizeof(struct btrfs_disk_key)));
484
		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
485
		       btrfs_header_bytenr(leaf));
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
	}
#if 0
	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
		btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
		btrfs_item_key(leaf, &leaf_key, i);
		if (comp_keys(&leaf_key, &cpukey) >= 0) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad key\n", i);
			BUG_ON(1);
		}
		if (btrfs_item_offset_nr(leaf, i) !=
			btrfs_item_end_nr(leaf, i + 1)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", i);
			BUG_ON(1);
		}
		if (i == 0) {
			if (btrfs_item_offset_nr(leaf, i) +
			       btrfs_item_size_nr(leaf, i) !=
			       BTRFS_LEAF_DATA_SIZE(root)) {
				btrfs_print_leaf(root, leaf);
				printk("slot %d first offset bad\n", i);
				BUG_ON(1);
			}
		}
C
Chris Mason 已提交
511
	}
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
	if (nritems > 0) {
		if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
				btrfs_print_leaf(root, leaf);
				printk("slot %d bad size \n", nritems - 1);
				BUG_ON(1);
		}
	}
#endif
	if (slot != 0 && slot < nritems - 1) {
		btrfs_item_key(leaf, &leaf_key, slot);
		btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
		if (comp_keys(&leaf_key, &cpukey) <= 0) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad key\n", slot);
			BUG_ON(1);
		}
		if (btrfs_item_offset_nr(leaf, slot - 1) !=
		       btrfs_item_end_nr(leaf, slot)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", slot);
			BUG_ON(1);
		}
534 535
	}
	if (slot < nritems - 1) {
536 537 538 539 540 541 542 543 544
		btrfs_item_key(leaf, &leaf_key, slot);
		btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
		BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
		if (btrfs_item_offset_nr(leaf, slot) !=
			btrfs_item_end_nr(leaf, slot + 1)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", slot);
			BUG_ON(1);
		}
C
Chris Mason 已提交
545
	}
546 547
	BUG_ON(btrfs_item_offset_nr(leaf, 0) +
	       btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
C
Chris Mason 已提交
548 549 550
	return 0;
}

551 552
static int noinline check_block(struct btrfs_root *root,
				struct btrfs_path *path, int level)
C
Chris Mason 已提交
553
{
554
	return 0;
555
#if 0
556 557
	struct extent_buffer *buf = path->nodes[level];

558 559 560
	if (memcmp_extent_buffer(buf, root->fs_info->fsid,
				 (unsigned long)btrfs_header_fsid(buf),
				 BTRFS_FSID_SIZE)) {
561
		printk("warning bad block %Lu\n", buf->start);
562
		return 1;
563
	}
564
#endif
C
Chris Mason 已提交
565
	if (level == 0)
C
Chris Mason 已提交
566 567
		return check_leaf(root, path, level);
	return check_node(root, path, level);
C
Chris Mason 已提交
568 569
}

C
Chris Mason 已提交
570
/*
571 572 573
 * search for key in the extent_buffer.  The items start at offset p,
 * and they are item_size apart.  There are 'max' items in p.
 *
C
Chris Mason 已提交
574 575 576 577 578 579
 * the slot in the array is returned via slot, and it points to
 * the place where you would insert key if it is not found in
 * the array.
 *
 * slot may point to max if the key is bigger than all of the keys
 */
580 581 582
static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
			      int item_size, struct btrfs_key *key,
			      int max, int *slot)
583 584 585 586 587
{
	int low = 0;
	int high = max;
	int mid;
	int ret;
588
	struct btrfs_disk_key *tmp = NULL;
589 590 591 592 593 594
	struct btrfs_disk_key unaligned;
	unsigned long offset;
	char *map_token = NULL;
	char *kaddr = NULL;
	unsigned long map_start = 0;
	unsigned long map_len = 0;
595
	int err;
596 597 598

	while(low < high) {
		mid = (low + high) / 2;
599 600 601 602 603
		offset = p + mid * item_size;

		if (!map_token || offset < map_start ||
		    (offset + sizeof(struct btrfs_disk_key)) >
		    map_start + map_len) {
604
			if (map_token) {
605
				unmap_extent_buffer(eb, map_token, KM_USER0);
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
				map_token = NULL;
			}
			err = map_extent_buffer(eb, offset,
						sizeof(struct btrfs_disk_key),
						&map_token, &kaddr,
						&map_start, &map_len, KM_USER0);

			if (!err) {
				tmp = (struct btrfs_disk_key *)(kaddr + offset -
							map_start);
			} else {
				read_extent_buffer(eb, &unaligned,
						   offset, sizeof(unaligned));
				tmp = &unaligned;
			}
621 622 623 624 625

		} else {
			tmp = (struct btrfs_disk_key *)(kaddr + offset -
							map_start);
		}
626 627 628 629 630 631 632 633
		ret = comp_keys(tmp, key);

		if (ret < 0)
			low = mid + 1;
		else if (ret > 0)
			high = mid;
		else {
			*slot = mid;
634 635
			if (map_token)
				unmap_extent_buffer(eb, map_token, KM_USER0);
636 637 638 639
			return 0;
		}
	}
	*slot = low;
640 641
	if (map_token)
		unmap_extent_buffer(eb, map_token, KM_USER0);
642 643 644
	return 1;
}

C
Chris Mason 已提交
645 646 647 648
/*
 * simple bin_search frontend that does the right thing for
 * leaves vs nodes
 */
649 650
static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
		      int level, int *slot)
651
{
652 653 654
	if (level == 0) {
		return generic_bin_search(eb,
					  offsetof(struct btrfs_leaf, items),
C
Chris Mason 已提交
655
					  sizeof(struct btrfs_item),
656
					  key, btrfs_header_nritems(eb),
657
					  slot);
658
	} else {
659 660
		return generic_bin_search(eb,
					  offsetof(struct btrfs_node, ptrs),
C
Chris Mason 已提交
661
					  sizeof(struct btrfs_key_ptr),
662
					  key, btrfs_header_nritems(eb),
663
					  slot);
664 665 666 667
	}
	return -1;
}

668 669
static struct extent_buffer *read_node_slot(struct btrfs_root *root,
				   struct extent_buffer *parent, int slot)
670 671 672
{
	if (slot < 0)
		return NULL;
673
	if (slot >= btrfs_header_nritems(parent))
674
		return NULL;
675 676
	return read_tree_block(root, btrfs_node_blockptr(parent, slot),
		       btrfs_level_size(root, btrfs_header_level(parent) - 1));
677 678
}

679 680 681
static int balance_level(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root,
			 struct btrfs_path *path, int level)
682
{
683 684 685 686
	struct extent_buffer *right = NULL;
	struct extent_buffer *mid;
	struct extent_buffer *left = NULL;
	struct extent_buffer *parent = NULL;
687 688 689 690
	int ret = 0;
	int wret;
	int pslot;
	int orig_slot = path->slots[level];
691
	int err_on_enospc = 0;
692
	u64 orig_ptr;
693 694 695 696

	if (level == 0)
		return 0;

697
	mid = path->nodes[level];
698 699
	WARN_ON(btrfs_header_generation(mid) != trans->transid);

700
	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
701

C
Chris Mason 已提交
702
	if (level < BTRFS_MAX_LEVEL - 1)
703
		parent = path->nodes[level + 1];
704 705
	pslot = path->slots[level + 1];

C
Chris Mason 已提交
706 707 708 709
	/*
	 * deal with the case where there is only one pointer in the root
	 * by promoting the node below to a root
	 */
710 711
	if (!parent) {
		struct extent_buffer *child;
712

713
		if (btrfs_header_nritems(mid) != 1)
714 715 716
			return 0;

		/* promote the child to a root */
717
		child = read_node_slot(root, mid, 0);
718 719 720
		BUG_ON(!child);
		root->node = child;
		path->nodes[level] = NULL;
721 722
		clean_tree_block(trans, root, mid);
		wait_on_tree_block_writeback(root, mid);
723
		/* once for the path */
724
		free_extent_buffer(mid);
725 726 727
		ret = btrfs_free_extent(trans, root, mid->start, mid->len,
					root->root_key.objectid,
					btrfs_header_generation(mid), 0, 0, 1);
728
		/* once for the root ptr */
729
		free_extent_buffer(mid);
730
		return ret;
731
	}
732
	if (btrfs_header_nritems(mid) >
C
Chris Mason 已提交
733
	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
734 735
		return 0;

736
	if (btrfs_header_nritems(mid) < 2)
737 738
		err_on_enospc = 1;

739 740 741 742
	left = read_node_slot(root, parent, pslot - 1);
	if (left) {
		wret = btrfs_cow_block(trans, root, left,
				       parent, pslot - 1, &left);
743 744 745 746
		if (wret) {
			ret = wret;
			goto enospc;
		}
747
	}
748 749 750 751
	right = read_node_slot(root, parent, pslot + 1);
	if (right) {
		wret = btrfs_cow_block(trans, root, right,
				       parent, pslot + 1, &right);
752 753 754 755 756 757 758
		if (wret) {
			ret = wret;
			goto enospc;
		}
	}

	/* first, try to make some room in the middle buffer */
759 760 761
	if (left) {
		orig_slot += btrfs_header_nritems(left);
		wret = push_node_left(trans, root, left, mid);
762 763
		if (wret < 0)
			ret = wret;
764
		if (btrfs_header_nritems(mid) < 2)
765
			err_on_enospc = 1;
766
	}
767 768 769 770

	/*
	 * then try to empty the right most buffer into the middle
	 */
771 772
	if (right) {
		wret = push_node_left(trans, root, mid, right);
773
		if (wret < 0 && wret != -ENOSPC)
774
			ret = wret;
775
		if (btrfs_header_nritems(right) == 0) {
776
			u64 bytenr = right->start;
777
			u64 generation = btrfs_header_generation(parent);
778 779
			u32 blocksize = right->len;

780 781 782
			clean_tree_block(trans, root, right);
			wait_on_tree_block_writeback(root, right);
			free_extent_buffer(right);
783
			right = NULL;
784 785
			wret = del_ptr(trans, root, path, level + 1, pslot +
				       1);
786 787
			if (wret)
				ret = wret;
788
			wret = btrfs_free_extent(trans, root, bytenr,
789 790 791
						 blocksize,
						 btrfs_header_owner(parent),
						 generation, 0, 0, 1);
792 793 794
			if (wret)
				ret = wret;
		} else {
795 796 797 798
			struct btrfs_disk_key right_key;
			btrfs_node_key(right, &right_key, 0);
			btrfs_set_node_key(parent, &right_key, pslot + 1);
			btrfs_mark_buffer_dirty(parent);
799 800
		}
	}
801
	if (btrfs_header_nritems(mid) == 1) {
802 803 804 805 806 807 808 809 810
		/*
		 * we're not allowed to leave a node with one item in the
		 * tree during a delete.  A deletion from lower in the tree
		 * could try to delete the only pointer in this node.
		 * So, pull some keys from the left.
		 * There has to be a left pointer at this point because
		 * otherwise we would have pulled some pointers from the
		 * right
		 */
811 812
		BUG_ON(!left);
		wret = balance_node_right(trans, root, mid, left);
813
		if (wret < 0) {
814
			ret = wret;
815 816
			goto enospc;
		}
817 818
		BUG_ON(wret == 1);
	}
819
	if (btrfs_header_nritems(mid) == 0) {
820
		/* we've managed to empty the middle node, drop it */
821
		u64 root_gen = btrfs_header_generation(parent);
822 823
		u64 bytenr = mid->start;
		u32 blocksize = mid->len;
824 825 826
		clean_tree_block(trans, root, mid);
		wait_on_tree_block_writeback(root, mid);
		free_extent_buffer(mid);
827
		mid = NULL;
828
		wret = del_ptr(trans, root, path, level + 1, pslot);
829 830
		if (wret)
			ret = wret;
831 832 833
		wret = btrfs_free_extent(trans, root, bytenr, blocksize,
					 btrfs_header_owner(parent),
					 root_gen, 0, 0, 1);
834 835
		if (wret)
			ret = wret;
836 837
	} else {
		/* update the parent key to reflect our changes */
838 839 840 841
		struct btrfs_disk_key mid_key;
		btrfs_node_key(mid, &mid_key, 0);
		btrfs_set_node_key(parent, &mid_key, pslot);
		btrfs_mark_buffer_dirty(parent);
842
	}
843

844
	/* update the path */
845 846 847 848
	if (left) {
		if (btrfs_header_nritems(left) > orig_slot) {
			extent_buffer_get(left);
			path->nodes[level] = left;
849 850
			path->slots[level + 1] -= 1;
			path->slots[level] = orig_slot;
851 852
			if (mid)
				free_extent_buffer(mid);
853
		} else {
854
			orig_slot -= btrfs_header_nritems(left);
855 856 857
			path->slots[level] = orig_slot;
		}
	}
858
	/* double check we haven't messed things up */
C
Chris Mason 已提交
859
	check_block(root, path, level);
C
Chris Mason 已提交
860
	if (orig_ptr !=
861
	    btrfs_node_blockptr(path->nodes[level], path->slots[level]))
862
		BUG();
863
enospc:
864 865 866 867
	if (right)
		free_extent_buffer(right);
	if (left)
		free_extent_buffer(left);
868 869 870
	return ret;
}

871
/* returns zero if the push worked, non-zero otherwise */
872 873 874
static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
					  struct btrfs_root *root,
					  struct btrfs_path *path, int level)
875
{
876 877 878 879
	struct extent_buffer *right = NULL;
	struct extent_buffer *mid;
	struct extent_buffer *left = NULL;
	struct extent_buffer *parent = NULL;
880 881 882 883 884 885 886 887 888
	int ret = 0;
	int wret;
	int pslot;
	int orig_slot = path->slots[level];
	u64 orig_ptr;

	if (level == 0)
		return 1;

889
	mid = path->nodes[level];
890
	WARN_ON(btrfs_header_generation(mid) != trans->transid);
891 892 893
	orig_ptr = btrfs_node_blockptr(mid, orig_slot);

	if (level < BTRFS_MAX_LEVEL - 1)
894
		parent = path->nodes[level + 1];
895 896
	pslot = path->slots[level + 1];

897
	if (!parent)
898 899
		return 1;

900
	left = read_node_slot(root, parent, pslot - 1);
901 902

	/* first, try to make some room in the middle buffer */
903
	if (left) {
904
		u32 left_nr;
905
		left_nr = btrfs_header_nritems(left);
C
Chris Mason 已提交
906 907 908
		if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
			wret = 1;
		} else {
909 910
			ret = btrfs_cow_block(trans, root, left, parent,
					      pslot - 1, &left);
911 912 913 914
			if (ret)
				wret = 1;
			else {
				wret = push_node_left(trans, root,
915
						      left, mid);
916
			}
C
Chris Mason 已提交
917
		}
918 919 920
		if (wret < 0)
			ret = wret;
		if (wret == 0) {
921
			struct btrfs_disk_key disk_key;
922
			orig_slot += left_nr;
923 924 925 926 927
			btrfs_node_key(mid, &disk_key, 0);
			btrfs_set_node_key(parent, &disk_key, pslot);
			btrfs_mark_buffer_dirty(parent);
			if (btrfs_header_nritems(left) > orig_slot) {
				path->nodes[level] = left;
928 929
				path->slots[level + 1] -= 1;
				path->slots[level] = orig_slot;
930
				free_extent_buffer(mid);
931 932
			} else {
				orig_slot -=
933
					btrfs_header_nritems(left);
934
				path->slots[level] = orig_slot;
935
				free_extent_buffer(left);
936 937 938
			}
			return 0;
		}
939
		free_extent_buffer(left);
940
	}
941
	right= read_node_slot(root, parent, pslot + 1);
942 943 944 945

	/*
	 * then try to empty the right most buffer into the middle
	 */
946
	if (right) {
C
Chris Mason 已提交
947
		u32 right_nr;
948
		right_nr = btrfs_header_nritems(right);
C
Chris Mason 已提交
949 950 951
		if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
			wret = 1;
		} else {
952 953 954
			ret = btrfs_cow_block(trans, root, right,
					      parent, pslot + 1,
					      &right);
955 956 957 958
			if (ret)
				wret = 1;
			else {
				wret = balance_node_right(trans, root,
959
							  right, mid);
960
			}
C
Chris Mason 已提交
961
		}
962 963 964
		if (wret < 0)
			ret = wret;
		if (wret == 0) {
965 966 967 968 969 970 971 972
			struct btrfs_disk_key disk_key;

			btrfs_node_key(right, &disk_key, 0);
			btrfs_set_node_key(parent, &disk_key, pslot + 1);
			btrfs_mark_buffer_dirty(parent);

			if (btrfs_header_nritems(mid) <= orig_slot) {
				path->nodes[level] = right;
973 974
				path->slots[level + 1] += 1;
				path->slots[level] = orig_slot -
975 976
					btrfs_header_nritems(mid);
				free_extent_buffer(mid);
977
			} else {
978
				free_extent_buffer(right);
979 980 981
			}
			return 0;
		}
982
		free_extent_buffer(right);
983 984 985 986
	}
	return 1;
}

987 988 989 990
/*
 * readahead one full node of leaves
 */
static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
991
			     int level, int slot, u64 objectid)
992
{
993
	struct extent_buffer *node;
994
	struct btrfs_disk_key disk_key;
995 996
	u32 nritems;
	u64 search;
997 998 999
	u64 lowest_read;
	u64 highest_read;
	u64 nread = 0;
1000
	int direction = path->reada;
1001
	struct extent_buffer *eb;
1002 1003 1004
	u32 nr;
	u32 blocksize;
	u32 nscan = 0;
1005

1006
	if (level != 1)
1007 1008 1009
		return;

	if (!path->nodes[level])
1010 1011
		return;

1012
	node = path->nodes[level];
1013
	search = btrfs_node_blockptr(node, slot);
1014 1015
	blocksize = btrfs_level_size(root, level - 1);
	eb = btrfs_find_tree_block(root, search, blocksize);
1016 1017
	if (eb) {
		free_extent_buffer(eb);
1018 1019 1020
		return;
	}

1021 1022 1023
	highest_read = search;
	lowest_read = search;

1024
	nritems = btrfs_header_nritems(node);
1025
	nr = slot;
1026
	while(1) {
1027 1028 1029 1030 1031 1032 1033 1034
		if (direction < 0) {
			if (nr == 0)
				break;
			nr--;
		} else if (direction > 0) {
			nr++;
			if (nr >= nritems)
				break;
1035
		}
1036 1037 1038 1039 1040
		if (path->reada < 0 && objectid) {
			btrfs_node_key(node, &disk_key, nr);
			if (btrfs_disk_key_objectid(&disk_key) != objectid)
				break;
		}
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
		search = btrfs_node_blockptr(node, nr);
		if ((search >= lowest_read && search <= highest_read) ||
		    (search < lowest_read && lowest_read - search <= 32768) ||
		    (search > highest_read && search - highest_read <= 32768)) {
			readahead_tree_block(root, search, blocksize);
			nread += blocksize;
		}
		nscan++;
		if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
			break;
		if(nread > (1024 * 1024) || nscan > 128)
			break;

		if (search < lowest_read)
			lowest_read = search;
		if (search > highest_read)
			highest_read = search;
1058 1059
	}
}
C
Chris Mason 已提交
1060 1061 1062 1063 1064 1065
/*
 * look for key in the tree.  path is filled in with nodes along the way
 * if key is found, we return zero and you can find the item in the leaf
 * level of the path (level 0)
 *
 * If the key isn't found, the path points to the slot where it should
C
Chris Mason 已提交
1066 1067
 * be inserted, and 1 is returned.  If there are other errors during the
 * search a negative error number is returned.
C
Chris Mason 已提交
1068 1069 1070 1071
 *
 * if ins_len > 0, nodes and leaves will be split as we walk down the
 * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
 * possible)
C
Chris Mason 已提交
1072
 */
1073 1074 1075
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_key *key, struct btrfs_path *p, int
		      ins_len, int cow)
1076
{
1077
	struct extent_buffer *b;
1078
	u64 bytenr;
1079
	u64 ptr_gen;
1080 1081 1082
	int slot;
	int ret;
	int level;
1083
	int should_reada = p->reada;
1084 1085
	u8 lowest_level = 0;

1086 1087
	lowest_level = p->lowest_level;
	WARN_ON(lowest_level && ins_len);
C
Chris Mason 已提交
1088 1089
	WARN_ON(p->nodes[0] != NULL);
	WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1090 1091
again:
	b = root->node;
1092
	extent_buffer_get(b);
1093
	while (b) {
1094
		level = btrfs_header_level(b);
C
Chris Mason 已提交
1095 1096
		if (cow) {
			int wret;
C
Chris Mason 已提交
1097 1098 1099
			wret = btrfs_cow_block(trans, root, b,
					       p->nodes[level + 1],
					       p->slots[level + 1],
Y
Yan 已提交
1100
					       &b);
1101
			if (wret) {
1102
				free_extent_buffer(b);
1103 1104
				return wret;
			}
C
Chris Mason 已提交
1105 1106
		}
		BUG_ON(!cow && ins_len);
1107
		if (level != btrfs_header_level(b))
C
Chris Mason 已提交
1108
			WARN_ON(1);
1109
		level = btrfs_header_level(b);
1110
		p->nodes[level] = b;
C
Chris Mason 已提交
1111
		ret = check_block(root, p, level);
C
Chris Mason 已提交
1112 1113
		if (ret)
			return -1;
1114 1115
		ret = bin_search(b, key, level, &slot);
		if (level != 0) {
1116 1117 1118
			if (ret && slot > 0)
				slot -= 1;
			p->slots[level] = slot;
1119
			if (ins_len > 0 && btrfs_header_nritems(b) >=
1120
			    BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1121
				int sret = split_node(trans, root, p, level);
C
Chris Mason 已提交
1122 1123 1124 1125 1126
				BUG_ON(sret > 0);
				if (sret)
					return sret;
				b = p->nodes[level];
				slot = p->slots[level];
1127
			} else if (ins_len < 0) {
1128 1129
				int sret = balance_level(trans, root, p,
							 level);
1130 1131 1132
				if (sret)
					return sret;
				b = p->nodes[level];
1133 1134
				if (!b) {
					btrfs_release_path(NULL, p);
1135
					goto again;
1136
				}
1137
				slot = p->slots[level];
1138
				BUG_ON(btrfs_header_nritems(b) == 1);
C
Chris Mason 已提交
1139
			}
1140 1141 1142
			/* this is only true while dropping a snapshot */
			if (level == lowest_level)
				break;
1143
			bytenr = btrfs_node_blockptr(b, slot);
1144
			ptr_gen = btrfs_node_ptr_generation(b, slot);
1145
			if (should_reada)
1146 1147
				reada_for_search(root, p, level, slot,
						 key->objectid);
1148 1149
			b = read_tree_block(root, bytenr,
					    btrfs_level_size(root, level - 1));
1150 1151 1152 1153 1154 1155 1156
			if (ptr_gen != btrfs_header_generation(b)) {
				printk("block %llu bad gen wanted %llu "
				       "found %llu\n",
			        (unsigned long long)b->start,
				(unsigned long long)ptr_gen,
			        (unsigned long long)btrfs_header_generation(b));
			}
1157 1158
		} else {
			p->slots[level] = slot;
1159
			if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
C
Chris Mason 已提交
1160
			    sizeof(struct btrfs_item) + ins_len) {
1161
				int sret = split_leaf(trans, root, key,
1162
						      p, ins_len, ret == 0);
C
Chris Mason 已提交
1163 1164 1165 1166
				BUG_ON(sret > 0);
				if (sret)
					return sret;
			}
1167 1168 1169
			return ret;
		}
	}
C
Chris Mason 已提交
1170
	return 1;
1171 1172
}

C
Chris Mason 已提交
1173 1174 1175 1176 1177 1178
/*
 * adjust the pointers going up the tree, starting at level
 * making sure the right key of each node is points to 'key'.
 * This is used after shifting pointers to the left, so it stops
 * fixing up pointers when a given leaf/node is not in slot 0 of the
 * higher levels
C
Chris Mason 已提交
1179 1180 1181
 *
 * If this fails to write a tree block, it returns -1, but continues
 * fixing up the blocks in ram so the tree is consistent.
C
Chris Mason 已提交
1182
 */
1183 1184 1185
static int fixup_low_keys(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct btrfs_path *path,
			  struct btrfs_disk_key *key, int level)
1186 1187
{
	int i;
C
Chris Mason 已提交
1188
	int ret = 0;
1189 1190
	struct extent_buffer *t;

C
Chris Mason 已提交
1191
	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1192
		int tslot = path->slots[i];
1193
		if (!path->nodes[i])
1194
			break;
1195 1196
		t = path->nodes[i];
		btrfs_set_node_key(t, key, tslot);
C
Chris Mason 已提交
1197
		btrfs_mark_buffer_dirty(path->nodes[i]);
1198 1199 1200
		if (tslot != 0)
			break;
	}
C
Chris Mason 已提交
1201
	return ret;
1202 1203
}

C
Chris Mason 已提交
1204 1205
/*
 * try to push data from one node into the next node left in the
1206
 * tree.
C
Chris Mason 已提交
1207 1208 1209
 *
 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
 * error, and > 0 if there was no room in the left hand block.
C
Chris Mason 已提交
1210
 */
1211 1212
static int push_node_left(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct extent_buffer *dst,
1213
			  struct extent_buffer *src)
1214 1215
{
	int push_items = 0;
1216 1217
	int src_nritems;
	int dst_nritems;
C
Chris Mason 已提交
1218
	int ret = 0;
1219

1220 1221
	src_nritems = btrfs_header_nritems(src);
	dst_nritems = btrfs_header_nritems(dst);
C
Chris Mason 已提交
1222
	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1223 1224
	WARN_ON(btrfs_header_generation(src) != trans->transid);
	WARN_ON(btrfs_header_generation(dst) != trans->transid);
1225

1226
	if (push_items <= 0) {
1227
		return 1;
1228
	}
1229

1230
	if (src_nritems < push_items)
1231 1232
		push_items = src_nritems;

1233 1234 1235 1236 1237
	copy_extent_buffer(dst, src,
			   btrfs_node_key_ptr_offset(dst_nritems),
			   btrfs_node_key_ptr_offset(0),
		           push_items * sizeof(struct btrfs_key_ptr));

1238
	if (push_items < src_nritems) {
1239 1240 1241 1242 1243 1244 1245 1246 1247
		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
				      btrfs_node_key_ptr_offset(push_items),
				      (src_nritems - push_items) *
				      sizeof(struct btrfs_key_ptr));
	}
	btrfs_set_header_nritems(src, src_nritems - push_items);
	btrfs_set_header_nritems(dst, dst_nritems + push_items);
	btrfs_mark_buffer_dirty(src);
	btrfs_mark_buffer_dirty(dst);
1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259
	return ret;
}

/*
 * try to push data from one node into the next node right in the
 * tree.
 *
 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
 * error, and > 0 if there was no room in the right hand block.
 *
 * this will  only push up to 1/2 the contents of the left node over
 */
1260 1261 1262 1263
static int balance_node_right(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct extent_buffer *dst,
			      struct extent_buffer *src)
1264 1265 1266 1267 1268 1269 1270
{
	int push_items = 0;
	int max_push;
	int src_nritems;
	int dst_nritems;
	int ret = 0;

1271 1272 1273
	WARN_ON(btrfs_header_generation(src) != trans->transid);
	WARN_ON(btrfs_header_generation(dst) != trans->transid);

1274 1275
	src_nritems = btrfs_header_nritems(src);
	dst_nritems = btrfs_header_nritems(dst);
C
Chris Mason 已提交
1276
	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1277
	if (push_items <= 0)
1278 1279 1280 1281
		return 1;

	max_push = src_nritems / 2 + 1;
	/* don't try to empty the node */
Y
Yan 已提交
1282
	if (max_push >= src_nritems)
1283
		return 1;
Y
Yan 已提交
1284

1285 1286 1287
	if (max_push < push_items)
		push_items = max_push;

1288 1289 1290 1291
	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
				      btrfs_node_key_ptr_offset(0),
				      (dst_nritems) *
				      sizeof(struct btrfs_key_ptr));
C
Chris Mason 已提交
1292

1293 1294 1295 1296
	copy_extent_buffer(dst, src,
			   btrfs_node_key_ptr_offset(0),
			   btrfs_node_key_ptr_offset(src_nritems - push_items),
		           push_items * sizeof(struct btrfs_key_ptr));
1297

1298 1299
	btrfs_set_header_nritems(src, src_nritems - push_items);
	btrfs_set_header_nritems(dst, dst_nritems + push_items);
1300

1301 1302
	btrfs_mark_buffer_dirty(src);
	btrfs_mark_buffer_dirty(dst);
C
Chris Mason 已提交
1303
	return ret;
1304 1305
}

C
Chris Mason 已提交
1306 1307 1308 1309
/*
 * helper function to insert a new root level in the tree.
 * A new node is allocated, and a single item is inserted to
 * point to the existing root
C
Chris Mason 已提交
1310 1311
 *
 * returns zero on success or < 0 on failure.
C
Chris Mason 已提交
1312
 */
1313
static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1314 1315
			   struct btrfs_root *root,
			   struct btrfs_path *path, int level)
C
Chris Mason 已提交
1316
{
1317 1318
	u64 root_gen;
	u64 lower_gen;
1319 1320 1321
	struct extent_buffer *lower;
	struct extent_buffer *c;
	struct btrfs_disk_key lower_key;
C
Chris Mason 已提交
1322 1323 1324 1325

	BUG_ON(path->nodes[level]);
	BUG_ON(path->nodes[level-1] != root->node);

1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339
	if (root->ref_cows)
		root_gen = trans->transid;
	else
		root_gen = 0;

	lower = path->nodes[level-1];
	if (level == 1)
		btrfs_item_key(lower, &lower_key, 0);
	else
		btrfs_node_key(lower, &lower_key, 0);

	c = __btrfs_alloc_free_block(trans, root, root->nodesize,
				   root->root_key.objectid,
				   root_gen, lower_key.objectid, level,
1340
				   root->node->start, 0);
1341 1342 1343 1344 1345
	if (IS_ERR(c))
		return PTR_ERR(c);
	memset_extent_buffer(c, 0, 0, root->nodesize);
	btrfs_set_header_nritems(c, 1);
	btrfs_set_header_level(c, level);
1346
	btrfs_set_header_bytenr(c, c->start);
1347 1348 1349 1350 1351 1352 1353
	btrfs_set_header_generation(c, trans->transid);
	btrfs_set_header_owner(c, root->root_key.objectid);

	write_extent_buffer(c, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(c),
			    BTRFS_FSID_SIZE);
	btrfs_set_node_key(c, &lower_key, 0);
1354
	btrfs_set_node_blockptr(c, 0, lower->start);
1355 1356 1357 1358
	lower_gen = btrfs_header_generation(lower);
	WARN_ON(lower_gen == 0);

	btrfs_set_node_ptr_generation(c, 0, lower_gen);
1359

1360
	btrfs_mark_buffer_dirty(c);
1361

C
Chris Mason 已提交
1362
	/* the super has an extra ref to root->node */
1363 1364 1365 1366
	free_extent_buffer(root->node);
	root->node = c;
	extent_buffer_get(c);
	path->nodes[level] = c;
C
Chris Mason 已提交
1367
	path->slots[level] = 0;
1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379

	if (root->ref_cows && lower_gen != trans->transid) {
		struct btrfs_path *back_path = btrfs_alloc_path();
		int ret;
		ret = btrfs_insert_extent_backref(trans,
						  root->fs_info->extent_root,
						  path, lower->start,
						  root->root_key.objectid,
						  trans->transid, 0, 0);
		BUG_ON(ret);
		btrfs_free_path(back_path);
	}
C
Chris Mason 已提交
1380 1381 1382
	return 0;
}

C
Chris Mason 已提交
1383 1384 1385
/*
 * worker function to insert a single pointer in a node.
 * the node should have enough room for the pointer already
C
Chris Mason 已提交
1386
 *
C
Chris Mason 已提交
1387 1388
 * slot and level indicate where you want the key to go, and
 * blocknr is the block the key points to.
C
Chris Mason 已提交
1389 1390
 *
 * returns zero on success and < 0 on any error
C
Chris Mason 已提交
1391
 */
1392 1393
static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, struct btrfs_disk_key
1394
		      *key, u64 bytenr, int slot, int level)
C
Chris Mason 已提交
1395
{
1396
	struct extent_buffer *lower;
C
Chris Mason 已提交
1397
	int nritems;
C
Chris Mason 已提交
1398 1399

	BUG_ON(!path->nodes[level]);
1400 1401
	lower = path->nodes[level];
	nritems = btrfs_header_nritems(lower);
C
Chris Mason 已提交
1402 1403
	if (slot > nritems)
		BUG();
C
Chris Mason 已提交
1404
	if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
C
Chris Mason 已提交
1405 1406
		BUG();
	if (slot != nritems) {
1407 1408 1409
		memmove_extent_buffer(lower,
			      btrfs_node_key_ptr_offset(slot + 1),
			      btrfs_node_key_ptr_offset(slot),
C
Chris Mason 已提交
1410
			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
C
Chris Mason 已提交
1411
	}
1412
	btrfs_set_node_key(lower, key, slot);
1413
	btrfs_set_node_blockptr(lower, slot, bytenr);
1414 1415
	WARN_ON(trans->transid == 0);
	btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1416 1417
	btrfs_set_header_nritems(lower, nritems + 1);
	btrfs_mark_buffer_dirty(lower);
C
Chris Mason 已提交
1418 1419 1420
	return 0;
}

C
Chris Mason 已提交
1421 1422 1423 1424 1425 1426
/*
 * split the node at the specified level in path in two.
 * The path is corrected to point to the appropriate node after the split
 *
 * Before splitting this tries to make some room in the node by pushing
 * left and right, if either one works, it returns right away.
C
Chris Mason 已提交
1427 1428
 *
 * returns 0 on success and < 0 on failure
C
Chris Mason 已提交
1429
 */
1430 1431
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, int level)
1432
{
1433
	u64 root_gen;
1434 1435 1436
	struct extent_buffer *c;
	struct extent_buffer *split;
	struct btrfs_disk_key disk_key;
1437
	int mid;
C
Chris Mason 已提交
1438
	int ret;
C
Chris Mason 已提交
1439
	int wret;
1440
	u32 c_nritems;
1441

1442
	c = path->nodes[level];
1443
	WARN_ON(btrfs_header_generation(c) != trans->transid);
1444
	if (c == root->node) {
C
Chris Mason 已提交
1445
		/* trying to split the root, lets make a new one */
1446
		ret = insert_new_root(trans, root, path, level + 1);
C
Chris Mason 已提交
1447 1448
		if (ret)
			return ret;
1449 1450
	} else {
		ret = push_nodes_for_insert(trans, root, path, level);
1451 1452
		c = path->nodes[level];
		if (!ret && btrfs_header_nritems(c) <
1453 1454
		    BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
			return 0;
1455 1456
		if (ret < 0)
			return ret;
1457
	}
1458

1459
	c_nritems = btrfs_header_nritems(c);
1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470
	if (root->ref_cows)
		root_gen = trans->transid;
	else
		root_gen = 0;

	btrfs_node_key(c, &disk_key, 0);
	split = __btrfs_alloc_free_block(trans, root, root->nodesize,
					 root->root_key.objectid,
					 root_gen,
					 btrfs_disk_key_objectid(&disk_key),
					 level, c->start, 0);
1471 1472 1473 1474 1475
	if (IS_ERR(split))
		return PTR_ERR(split);

	btrfs_set_header_flags(split, btrfs_header_flags(c));
	btrfs_set_header_level(split, btrfs_header_level(c));
1476
	btrfs_set_header_bytenr(split, split->start);
1477 1478 1479 1480 1481
	btrfs_set_header_generation(split, trans->transid);
	btrfs_set_header_owner(split, root->root_key.objectid);
	write_extent_buffer(split, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(split),
			    BTRFS_FSID_SIZE);
1482

1483
	mid = (c_nritems + 1) / 2;
1484 1485 1486 1487 1488 1489 1490

	copy_extent_buffer(split, c,
			   btrfs_node_key_ptr_offset(0),
			   btrfs_node_key_ptr_offset(mid),
			   (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
	btrfs_set_header_nritems(split, c_nritems - mid);
	btrfs_set_header_nritems(c, mid);
C
Chris Mason 已提交
1491 1492
	ret = 0;

1493 1494 1495 1496
	btrfs_mark_buffer_dirty(c);
	btrfs_mark_buffer_dirty(split);

	btrfs_node_key(split, &disk_key, 0);
1497
	wret = insert_ptr(trans, root, path, &disk_key, split->start,
1498
			  path->slots[level + 1] + 1,
C
Chris Mason 已提交
1499
			  level + 1);
C
Chris Mason 已提交
1500 1501 1502
	if (wret)
		ret = wret;

C
Chris Mason 已提交
1503
	if (path->slots[level] >= mid) {
C
Chris Mason 已提交
1504
		path->slots[level] -= mid;
1505 1506
		free_extent_buffer(c);
		path->nodes[level] = split;
C
Chris Mason 已提交
1507 1508
		path->slots[level + 1] += 1;
	} else {
1509
		free_extent_buffer(split);
1510
	}
C
Chris Mason 已提交
1511
	return ret;
1512 1513
}

C
Chris Mason 已提交
1514 1515 1516 1517 1518
/*
 * how many bytes are required to store the items in a leaf.  start
 * and nr indicate which items in the leaf to check.  This totals up the
 * space used both by the item structs and the item data
 */
1519
static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1520 1521
{
	int data_len;
1522
	int nritems = btrfs_header_nritems(l);
1523
	int end = min(nritems, start + nr) - 1;
1524 1525 1526

	if (!nr)
		return 0;
1527 1528
	data_len = btrfs_item_end_nr(l, start);
	data_len = data_len - btrfs_item_offset_nr(l, end);
C
Chris Mason 已提交
1529
	data_len += sizeof(struct btrfs_item) * nr;
1530
	WARN_ON(data_len < 0);
1531 1532 1533
	return data_len;
}

1534 1535 1536 1537 1538
/*
 * The space between the end of the leaf items and
 * the start of the leaf data.  IOW, how much room
 * the leaf has left for both items and data
 */
1539
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1540
{
1541 1542 1543 1544 1545
	int nritems = btrfs_header_nritems(leaf);
	int ret;
	ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
	if (ret < 0) {
		printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
J
Jens Axboe 已提交
1546
		       ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1547 1548 1549
		       leaf_space_used(leaf, 0, nritems), nritems);
	}
	return ret;
1550 1551
}

C
Chris Mason 已提交
1552 1553 1554
/*
 * push some data in the path leaf to the right, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
C
Chris Mason 已提交
1555 1556 1557
 *
 * returns 1 if the push failed because the other node didn't have enough
 * room, 0 if everything worked out and < 0 if there were major errors.
C
Chris Mason 已提交
1558
 */
1559
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1560 1561
			   *root, struct btrfs_path *path, int data_size,
			   int empty)
C
Chris Mason 已提交
1562
{
1563 1564 1565 1566
	struct extent_buffer *left = path->nodes[0];
	struct extent_buffer *right;
	struct extent_buffer *upper;
	struct btrfs_disk_key disk_key;
C
Chris Mason 已提交
1567
	int slot;
1568
	u32 i;
C
Chris Mason 已提交
1569 1570 1571
	int free_space;
	int push_space = 0;
	int push_items = 0;
C
Chris Mason 已提交
1572
	struct btrfs_item *item;
1573
	u32 left_nritems;
1574
	u32 nr;
1575
	u32 right_nritems;
1576
	u32 data_end;
1577
	u32 this_item_size;
1578
	int ret;
C
Chris Mason 已提交
1579 1580 1581 1582 1583 1584

	slot = path->slots[1];
	if (!path->nodes[1]) {
		return 1;
	}
	upper = path->nodes[1];
1585
	if (slot >= btrfs_header_nritems(upper) - 1)
C
Chris Mason 已提交
1586
		return 1;
1587

1588 1589
	right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
				root->leafsize);
C
Chris Mason 已提交
1590
	free_space = btrfs_leaf_free_space(root, right);
C
Chris Mason 已提交
1591
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1592
		free_extent_buffer(right);
C
Chris Mason 已提交
1593 1594
		return 1;
	}
1595

C
Chris Mason 已提交
1596
	/* cow and double check */
1597 1598
	ret = btrfs_cow_block(trans, root, right, upper,
			      slot + 1, &right);
1599
	if (ret) {
1600
		free_extent_buffer(right);
1601 1602
		return 1;
	}
C
Chris Mason 已提交
1603
	free_space = btrfs_leaf_free_space(root, right);
C
Chris Mason 已提交
1604
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1605
		free_extent_buffer(right);
C
Chris Mason 已提交
1606 1607 1608
		return 1;
	}

1609
	left_nritems = btrfs_header_nritems(left);
1610
	if (left_nritems == 0) {
1611
		free_extent_buffer(right);
1612 1613
		return 1;
	}
1614

1615 1616 1617 1618 1619 1620 1621
	if (empty)
		nr = 0;
	else
		nr = 1;

	i = left_nritems - 1;
	while (i >= nr) {
1622
		item = btrfs_item_nr(left, i);
1623

C
Chris Mason 已提交
1624 1625
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636

		if (!left->map_token) {
			map_extent_buffer(left, (unsigned long)item,
					sizeof(struct btrfs_item),
					&left->map_token, &left->kaddr,
					&left->map_start, &left->map_len,
					KM_USER1);
		}

		this_item_size = btrfs_item_size(left, item);
		if (this_item_size + sizeof(*item) + push_space > free_space)
C
Chris Mason 已提交
1637 1638
			break;
		push_items++;
1639
		push_space += this_item_size + sizeof(*item);
1640 1641 1642
		if (i == 0)
			break;
		i--;
1643 1644 1645 1646
	}
	if (left->map_token) {
		unmap_extent_buffer(left, left->map_token, KM_USER1);
		left->map_token = NULL;
C
Chris Mason 已提交
1647
	}
1648

C
Chris Mason 已提交
1649
	if (push_items == 0) {
1650
		free_extent_buffer(right);
C
Chris Mason 已提交
1651 1652
		return 1;
	}
1653

1654
	if (!empty && push_items == left_nritems)
1655
		WARN_ON(1);
1656

C
Chris Mason 已提交
1657
	/* push left to right */
1658
	right_nritems = btrfs_header_nritems(right);
1659

1660
	push_space = btrfs_item_end_nr(left, left_nritems - push_items);
C
Chris Mason 已提交
1661
	push_space -= leaf_data_end(root, left);
1662

C
Chris Mason 已提交
1663
	/* make room in the right data area */
1664 1665 1666 1667 1668 1669
	data_end = leaf_data_end(root, right);
	memmove_extent_buffer(right,
			      btrfs_leaf_data(right) + data_end - push_space,
			      btrfs_leaf_data(right) + data_end,
			      BTRFS_LEAF_DATA_SIZE(root) - data_end);

C
Chris Mason 已提交
1670
	/* copy from the left data area */
1671
	copy_extent_buffer(right, left, btrfs_leaf_data(right) +
C
Chris Mason 已提交
1672 1673 1674
		     BTRFS_LEAF_DATA_SIZE(root) - push_space,
		     btrfs_leaf_data(left) + leaf_data_end(root, left),
		     push_space);
1675 1676 1677 1678 1679

	memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
			      btrfs_item_nr_offset(0),
			      right_nritems * sizeof(struct btrfs_item));

C
Chris Mason 已提交
1680
	/* copy the items from left to right */
1681 1682 1683
	copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
		   btrfs_item_nr_offset(left_nritems - push_items),
		   push_items * sizeof(struct btrfs_item));
C
Chris Mason 已提交
1684 1685

	/* update the item pointers */
1686
	right_nritems += push_items;
1687
	btrfs_set_header_nritems(right, right_nritems);
C
Chris Mason 已提交
1688
	push_space = BTRFS_LEAF_DATA_SIZE(root);
1689
	for (i = 0; i < right_nritems; i++) {
1690
		item = btrfs_item_nr(right, i);
1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704
		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}
		push_space -= btrfs_item_size(right, item);
		btrfs_set_item_offset(right, item, push_space);
	}

	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
C
Chris Mason 已提交
1705
	}
1706
	left_nritems -= push_items;
1707
	btrfs_set_header_nritems(left, left_nritems);
C
Chris Mason 已提交
1708

1709 1710
	if (left_nritems)
		btrfs_mark_buffer_dirty(left);
1711
	btrfs_mark_buffer_dirty(right);
1712

1713 1714
	btrfs_item_key(right, &disk_key, 0);
	btrfs_set_node_key(upper, &disk_key, slot + 1);
C
Chris Mason 已提交
1715
	btrfs_mark_buffer_dirty(upper);
C
Chris Mason 已提交
1716

C
Chris Mason 已提交
1717
	/* then fixup the leaf pointer in the path */
1718 1719
	if (path->slots[0] >= left_nritems) {
		path->slots[0] -= left_nritems;
1720 1721
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = right;
C
Chris Mason 已提交
1722 1723
		path->slots[1] += 1;
	} else {
1724
		free_extent_buffer(right);
C
Chris Mason 已提交
1725 1726 1727
	}
	return 0;
}
C
Chris Mason 已提交
1728 1729 1730 1731
/*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
1732
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1733 1734
			  *root, struct btrfs_path *path, int data_size,
			  int empty)
1735
{
1736 1737 1738
	struct btrfs_disk_key disk_key;
	struct extent_buffer *right = path->nodes[0];
	struct extent_buffer *left;
1739 1740 1741 1742 1743
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
C
Chris Mason 已提交
1744
	struct btrfs_item *item;
1745
	u32 old_left_nritems;
1746
	u32 right_nritems;
1747
	u32 nr;
C
Chris Mason 已提交
1748 1749
	int ret = 0;
	int wret;
1750 1751
	u32 this_item_size;
	u32 old_left_item_size;
1752 1753

	slot = path->slots[1];
1754
	if (slot == 0)
1755
		return 1;
1756
	if (!path->nodes[1])
1757
		return 1;
1758

1759 1760 1761 1762 1763
	right_nritems = btrfs_header_nritems(right);
	if (right_nritems == 0) {
		return 1;
	}

1764
	left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1765
			       slot - 1), root->leafsize);
C
Chris Mason 已提交
1766
	free_space = btrfs_leaf_free_space(root, left);
C
Chris Mason 已提交
1767
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1768
		free_extent_buffer(left);
1769 1770
		return 1;
	}
C
Chris Mason 已提交
1771 1772

	/* cow and double check */
1773 1774
	ret = btrfs_cow_block(trans, root, left,
			      path->nodes[1], slot - 1, &left);
1775 1776
	if (ret) {
		/* we hit -ENOSPC, but it isn't fatal here */
1777
		free_extent_buffer(left);
1778 1779
		return 1;
	}
1780

C
Chris Mason 已提交
1781
	free_space = btrfs_leaf_free_space(root, left);
C
Chris Mason 已提交
1782
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1783
		free_extent_buffer(left);
C
Chris Mason 已提交
1784 1785 1786
		return 1;
	}

1787 1788 1789 1790 1791 1792
	if (empty)
		nr = right_nritems;
	else
		nr = right_nritems - 1;

	for (i = 0; i < nr; i++) {
1793
		item = btrfs_item_nr(right, i);
1794 1795 1796 1797 1798 1799 1800 1801
		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

1802 1803
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
1804 1805 1806

		this_item_size = btrfs_item_size(right, item);
		if (this_item_size + sizeof(*item) + push_space > free_space)
1807
			break;
1808

1809
		push_items++;
1810 1811 1812 1813 1814 1815
		push_space += this_item_size + sizeof(*item);
	}

	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
1816
	}
1817

1818
	if (push_items == 0) {
1819
		free_extent_buffer(left);
1820 1821
		return 1;
	}
1822
	if (!empty && push_items == btrfs_header_nritems(right))
1823
		WARN_ON(1);
1824

1825
	/* push data from right to left */
1826 1827 1828 1829 1830
	copy_extent_buffer(left, right,
			   btrfs_item_nr_offset(btrfs_header_nritems(left)),
			   btrfs_item_nr_offset(0),
			   push_items * sizeof(struct btrfs_item));

C
Chris Mason 已提交
1831
	push_space = BTRFS_LEAF_DATA_SIZE(root) -
1832 1833 1834
		     btrfs_item_offset_nr(right, push_items -1);

	copy_extent_buffer(left, right, btrfs_leaf_data(left) +
C
Chris Mason 已提交
1835 1836
		     leaf_data_end(root, left) - push_space,
		     btrfs_leaf_data(right) +
1837
		     btrfs_item_offset_nr(right, push_items - 1),
C
Chris Mason 已提交
1838
		     push_space);
1839
	old_left_nritems = btrfs_header_nritems(left);
1840 1841
	BUG_ON(old_left_nritems < 0);

1842
	old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
C
Chris Mason 已提交
1843
	for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1844
		u32 ioff;
1845

1846
		item = btrfs_item_nr(left, i);
1847 1848 1849 1850 1851 1852 1853 1854
		if (!left->map_token) {
			map_extent_buffer(left, (unsigned long)item,
					sizeof(struct btrfs_item),
					&left->map_token, &left->kaddr,
					&left->map_start, &left->map_len,
					KM_USER1);
		}

1855 1856
		ioff = btrfs_item_offset(left, item);
		btrfs_set_item_offset(left, item,
1857
		      ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1858
	}
1859
	btrfs_set_header_nritems(left, old_left_nritems + push_items);
1860 1861 1862 1863
	if (left->map_token) {
		unmap_extent_buffer(left, left->map_token, KM_USER1);
		left->map_token = NULL;
	}
1864 1865

	/* fixup right node */
1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879
	if (push_items > right_nritems) {
		printk("push items %d nr %u\n", push_items, right_nritems);
		WARN_ON(1);
	}

	if (push_items < right_nritems) {
		push_space = btrfs_item_offset_nr(right, push_items - 1) -
						  leaf_data_end(root, right);
		memmove_extent_buffer(right, btrfs_leaf_data(right) +
				      BTRFS_LEAF_DATA_SIZE(root) - push_space,
				      btrfs_leaf_data(right) +
				      leaf_data_end(root, right), push_space);

		memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1880 1881 1882
			      btrfs_item_nr_offset(push_items),
			     (btrfs_header_nritems(right) - push_items) *
			     sizeof(struct btrfs_item));
1883
	}
1884 1885
	right_nritems -= push_items;
	btrfs_set_header_nritems(right, right_nritems);
C
Chris Mason 已提交
1886
	push_space = BTRFS_LEAF_DATA_SIZE(root);
1887 1888
	for (i = 0; i < right_nritems; i++) {
		item = btrfs_item_nr(right, i);
1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903

		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

		push_space = push_space - btrfs_item_size(right, item);
		btrfs_set_item_offset(right, item, push_space);
	}
	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
1904
	}
1905

1906
	btrfs_mark_buffer_dirty(left);
1907 1908
	if (right_nritems)
		btrfs_mark_buffer_dirty(right);
1909

1910 1911
	btrfs_item_key(right, &disk_key, 0);
	wret = fixup_low_keys(trans, root, path, &disk_key, 1);
C
Chris Mason 已提交
1912 1913
	if (wret)
		ret = wret;
1914 1915 1916 1917

	/* then fixup the leaf pointer in the path */
	if (path->slots[0] < push_items) {
		path->slots[0] += old_left_nritems;
1918 1919
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = left;
1920 1921
		path->slots[1] -= 1;
	} else {
1922
		free_extent_buffer(left);
1923 1924
		path->slots[0] -= push_items;
	}
1925
	BUG_ON(path->slots[0] < 0);
C
Chris Mason 已提交
1926
	return ret;
1927 1928
}

C
Chris Mason 已提交
1929 1930 1931
/*
 * split the path's leaf in two, making sure there is at least data_size
 * available for the resulting leaf level of the path.
C
Chris Mason 已提交
1932 1933
 *
 * returns 0 if all went well and < 0 on failure.
C
Chris Mason 已提交
1934
 */
1935
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1936
		      *root, struct btrfs_key *ins_key,
1937
		      struct btrfs_path *path, int data_size, int extend)
1938
{
1939
	u64 root_gen;
1940
	struct extent_buffer *l;
1941
	u32 nritems;
1942 1943
	int mid;
	int slot;
1944
	struct extent_buffer *right;
C
Chris Mason 已提交
1945
	int space_needed = data_size + sizeof(struct btrfs_item);
1946 1947 1948
	int data_copy_size;
	int rt_data_off;
	int i;
1949
	int ret = 0;
C
Chris Mason 已提交
1950
	int wret;
1951 1952
	int double_split;
	int num_doubles = 0;
1953
	struct btrfs_disk_key disk_key;
C
Chris Mason 已提交
1954

1955 1956 1957
	if (extend)
		space_needed = data_size;

1958 1959 1960 1961 1962
	if (root->ref_cows)
		root_gen = trans->transid;
	else
		root_gen = 0;

C
Chris Mason 已提交
1963
	/* first try to make some room by pushing left and right */
1964
	if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1965
		wret = push_leaf_right(trans, root, path, data_size, 0);
1966
		if (wret < 0) {
C
Chris Mason 已提交
1967
			return wret;
1968 1969
		}
		if (wret) {
1970
			wret = push_leaf_left(trans, root, path, data_size, 0);
1971 1972 1973 1974
			if (wret < 0)
				return wret;
		}
		l = path->nodes[0];
C
Chris Mason 已提交
1975

1976
		/* did the pushes work? */
1977
		if (btrfs_leaf_free_space(root, l) >= space_needed)
1978
			return 0;
1979
	}
C
Chris Mason 已提交
1980

C
Chris Mason 已提交
1981
	if (!path->nodes[1]) {
1982
		ret = insert_new_root(trans, root, path, 1);
C
Chris Mason 已提交
1983 1984 1985
		if (ret)
			return ret;
	}
1986 1987 1988
again:
	double_split = 0;
	l = path->nodes[0];
1989
	slot = path->slots[0];
1990
	nritems = btrfs_header_nritems(l);
1991
	mid = (nritems + 1)/ 2;
1992

1993 1994 1995 1996 1997 1998
	btrfs_item_key(l, &disk_key, 0);

	right = __btrfs_alloc_free_block(trans, root, root->leafsize,
					 root->root_key.objectid,
					 root_gen, disk_key.objectid, 0,
					 l->start, 0);
1999 2000 2001 2002
	if (IS_ERR(right))
		return PTR_ERR(right);

	memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2003
	btrfs_set_header_bytenr(right, right->start);
2004 2005 2006 2007 2008 2009
	btrfs_set_header_generation(right, trans->transid);
	btrfs_set_header_owner(right, root->root_key.objectid);
	btrfs_set_header_level(right, 0);
	write_extent_buffer(right, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(right),
			    BTRFS_FSID_SIZE);
2010 2011 2012 2013 2014 2015
	if (mid <= slot) {
		if (nritems == 1 ||
		    leaf_space_used(l, mid, nritems - mid) + space_needed >
			BTRFS_LEAF_DATA_SIZE(root)) {
			if (slot >= nritems) {
				btrfs_cpu_key_to_disk(&disk_key, ins_key);
2016
				btrfs_set_header_nritems(right, 0);
2017
				wret = insert_ptr(trans, root, path,
2018
						  &disk_key, right->start,
2019 2020 2021
						  path->slots[1] + 1, 1);
				if (wret)
					ret = wret;
2022 2023
				free_extent_buffer(path->nodes[0]);
				path->nodes[0] = right;
2024 2025 2026 2027 2028
				path->slots[0] = 0;
				path->slots[1] += 1;
				return ret;
			}
			mid = slot;
2029 2030 2031 2032 2033
			if (mid != nritems &&
			    leaf_space_used(l, mid, nritems - mid) +
			    space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
				double_split = 1;
			}
2034 2035 2036 2037
		}
	} else {
		if (leaf_space_used(l, 0, mid + 1) + space_needed >
			BTRFS_LEAF_DATA_SIZE(root)) {
2038
			if (!extend && slot == 0) {
2039
				btrfs_cpu_key_to_disk(&disk_key, ins_key);
2040
				btrfs_set_header_nritems(right, 0);
2041 2042
				wret = insert_ptr(trans, root, path,
						  &disk_key,
2043
						  right->start,
2044
						  path->slots[1], 1);
2045 2046
				if (wret)
					ret = wret;
2047 2048
				free_extent_buffer(path->nodes[0]);
				path->nodes[0] = right;
2049
				path->slots[0] = 0;
2050 2051 2052 2053 2054 2055
				if (path->slots[1] == 0) {
					wret = fixup_low_keys(trans, root,
					           path, &disk_key, 1);
					if (wret)
						ret = wret;
				}
2056
				return ret;
2057 2058 2059 2060 2061 2062 2063 2064 2065
			} else if (extend && slot == 0) {
				mid = 1;
			} else {
				mid = slot;
				if (mid != nritems &&
				    leaf_space_used(l, mid, nritems - mid) +
				    space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
					double_split = 1;
				}
2066
			}
2067 2068
		}
	}
2069 2070 2071 2072 2073 2074 2075 2076 2077
	nritems = nritems - mid;
	btrfs_set_header_nritems(right, nritems);
	data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);

	copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
			   btrfs_item_nr_offset(mid),
			   nritems * sizeof(struct btrfs_item));

	copy_extent_buffer(right, l,
C
Chris Mason 已提交
2078 2079 2080
		     btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
		     data_copy_size, btrfs_leaf_data(l) +
		     leaf_data_end(root, l), data_copy_size);
2081

C
Chris Mason 已提交
2082
	rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2083
		      btrfs_item_end_nr(l, mid);
C
Chris Mason 已提交
2084

2085 2086
	for (i = 0; i < nritems; i++) {
		struct btrfs_item *item = btrfs_item_nr(right, i);
2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097
		u32 ioff;

		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

		ioff = btrfs_item_offset(right, item);
2098
		btrfs_set_item_offset(right, item, ioff + rt_data_off);
C
Chris Mason 已提交
2099
	}
C
Chris Mason 已提交
2100

2101 2102 2103 2104 2105
	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
	}

2106
	btrfs_set_header_nritems(l, mid);
C
Chris Mason 已提交
2107
	ret = 0;
2108
	btrfs_item_key(right, &disk_key, 0);
2109 2110
	wret = insert_ptr(trans, root, path, &disk_key, right->start,
			  path->slots[1] + 1, 1);
C
Chris Mason 已提交
2111 2112
	if (wret)
		ret = wret;
2113 2114 2115

	btrfs_mark_buffer_dirty(right);
	btrfs_mark_buffer_dirty(l);
2116
	BUG_ON(path->slots[0] != slot);
2117

2118
	if (mid <= slot) {
2119 2120
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = right;
2121 2122
		path->slots[0] -= mid;
		path->slots[1] += 1;
2123
	} else
2124 2125
		free_extent_buffer(right);

2126
	BUG_ON(path->slots[0] < 0);
2127

2128 2129 2130 2131
	if (double_split) {
		BUG_ON(num_doubles != 0);
		num_doubles++;
		goto again;
2132
	}
2133 2134 2135
	return ret;
}

C
Chris Mason 已提交
2136 2137 2138
int btrfs_truncate_item(struct btrfs_trans_handle *trans,
			struct btrfs_root *root,
			struct btrfs_path *path,
2139
			u32 new_size, int from_end)
C
Chris Mason 已提交
2140 2141 2142 2143
{
	int ret = 0;
	int slot;
	int slot_orig;
2144 2145
	struct extent_buffer *leaf;
	struct btrfs_item *item;
C
Chris Mason 已提交
2146 2147 2148 2149 2150 2151 2152 2153
	u32 nritems;
	unsigned int data_end;
	unsigned int old_data_start;
	unsigned int old_size;
	unsigned int size_diff;
	int i;

	slot_orig = path->slots[0];
2154
	leaf = path->nodes[0];
2155 2156 2157 2158 2159
	slot = path->slots[0];

	old_size = btrfs_item_size_nr(leaf, slot);
	if (old_size == new_size)
		return 0;
C
Chris Mason 已提交
2160

2161
	nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
2162 2163
	data_end = leaf_data_end(root, leaf);

2164
	old_data_start = btrfs_item_offset_nr(leaf, slot);
2165

C
Chris Mason 已提交
2166 2167 2168 2169 2170 2171 2172 2173 2174 2175
	size_diff = old_size - new_size;

	BUG_ON(slot < 0);
	BUG_ON(slot >= nritems);

	/*
	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
	 */
	/* first correct the data pointers */
	for (i = slot; i < nritems; i++) {
2176 2177
		u32 ioff;
		item = btrfs_item_nr(leaf, i);
2178 2179 2180 2181 2182 2183 2184 2185 2186

		if (!leaf->map_token) {
			map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
		}

2187 2188
		ioff = btrfs_item_offset(leaf, item);
		btrfs_set_item_offset(leaf, item, ioff + size_diff);
C
Chris Mason 已提交
2189
	}
2190 2191 2192 2193 2194 2195

	if (leaf->map_token) {
		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
		leaf->map_token = NULL;
	}

C
Chris Mason 已提交
2196
	/* shift the data */
2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235
	if (from_end) {
		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
			      data_end + size_diff, btrfs_leaf_data(leaf) +
			      data_end, old_data_start + new_size - data_end);
	} else {
		struct btrfs_disk_key disk_key;
		u64 offset;

		btrfs_item_key(leaf, &disk_key, slot);

		if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
			unsigned long ptr;
			struct btrfs_file_extent_item *fi;

			fi = btrfs_item_ptr(leaf, slot,
					    struct btrfs_file_extent_item);
			fi = (struct btrfs_file_extent_item *)(
			     (unsigned long)fi - size_diff);

			if (btrfs_file_extent_type(leaf, fi) ==
			    BTRFS_FILE_EXTENT_INLINE) {
				ptr = btrfs_item_ptr_offset(leaf, slot);
				memmove_extent_buffer(leaf, ptr,
				        (unsigned long)fi,
				        offsetof(struct btrfs_file_extent_item,
						 disk_bytenr));
			}
		}

		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
			      data_end + size_diff, btrfs_leaf_data(leaf) +
			      data_end, old_data_start - data_end);

		offset = btrfs_disk_key_offset(&disk_key);
		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
		btrfs_set_item_key(leaf, &disk_key, slot);
		if (slot == 0)
			fixup_low_keys(trans, root, path, &disk_key, 1);
	}
2236 2237 2238 2239

	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_size(leaf, item, new_size);
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
2240 2241

	ret = 0;
2242 2243
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
C
Chris Mason 已提交
2244
		BUG();
2245
	}
C
Chris Mason 已提交
2246 2247 2248
	return ret;
}

2249 2250 2251
int btrfs_extend_item(struct btrfs_trans_handle *trans,
		      struct btrfs_root *root, struct btrfs_path *path,
		      u32 data_size)
2252 2253 2254 2255
{
	int ret = 0;
	int slot;
	int slot_orig;
2256 2257
	struct extent_buffer *leaf;
	struct btrfs_item *item;
2258 2259 2260 2261 2262 2263 2264
	u32 nritems;
	unsigned int data_end;
	unsigned int old_data;
	unsigned int old_size;
	int i;

	slot_orig = path->slots[0];
2265
	leaf = path->nodes[0];
2266

2267
	nritems = btrfs_header_nritems(leaf);
2268 2269
	data_end = leaf_data_end(root, leaf);

2270 2271
	if (btrfs_leaf_free_space(root, leaf) < data_size) {
		btrfs_print_leaf(root, leaf);
2272
		BUG();
2273
	}
2274
	slot = path->slots[0];
2275
	old_data = btrfs_item_end_nr(leaf, slot);
2276 2277

	BUG_ON(slot < 0);
2278 2279 2280 2281 2282
	if (slot >= nritems) {
		btrfs_print_leaf(root, leaf);
		printk("slot %d too large, nritems %d\n", slot, nritems);
		BUG_ON(1);
	}
2283 2284 2285 2286 2287 2288

	/*
	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
	 */
	/* first correct the data pointers */
	for (i = slot; i < nritems; i++) {
2289 2290
		u32 ioff;
		item = btrfs_item_nr(leaf, i);
2291 2292 2293 2294 2295 2296 2297 2298

		if (!leaf->map_token) {
			map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
		}
2299 2300
		ioff = btrfs_item_offset(leaf, item);
		btrfs_set_item_offset(leaf, item, ioff - data_size);
2301
	}
2302

2303 2304 2305 2306 2307
	if (leaf->map_token) {
		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
		leaf->map_token = NULL;
	}

2308
	/* shift the data */
2309
	memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2310 2311
		      data_end - data_size, btrfs_leaf_data(leaf) +
		      data_end, old_data - data_end);
2312

2313
	data_end = old_data;
2314 2315 2316 2317
	old_size = btrfs_item_size_nr(leaf, slot);
	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_size(leaf, item, old_size + data_size);
	btrfs_mark_buffer_dirty(leaf);
2318 2319

	ret = 0;
2320 2321
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
2322
		BUG();
2323
	}
2324 2325 2326
	return ret;
}

C
Chris Mason 已提交
2327 2328 2329 2330
/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
2331 2332 2333 2334
int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root,
			    struct btrfs_path *path,
			    struct btrfs_key *cpu_key, u32 data_size)
2335
{
2336 2337
	struct extent_buffer *leaf;
	struct btrfs_item *item;
C
Chris Mason 已提交
2338
	int ret = 0;
2339
	int slot;
2340
	int slot_orig;
2341
	u32 nritems;
2342
	unsigned int data_end;
C
Chris Mason 已提交
2343 2344 2345
	struct btrfs_disk_key disk_key;

	btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2346

C
Chris Mason 已提交
2347
	/* create a root if there isn't one */
C
Chris Mason 已提交
2348
	if (!root->node)
C
Chris Mason 已提交
2349
		BUG();
2350

2351
	ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2352
	if (ret == 0) {
2353
		return -EEXIST;
C
Chris Mason 已提交
2354
	}
2355 2356
	if (ret < 0)
		goto out;
2357

2358
	slot_orig = path->slots[0];
2359
	leaf = path->nodes[0];
C
Chris Mason 已提交
2360

2361
	nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
2362
	data_end = leaf_data_end(root, leaf);
2363

C
Chris Mason 已提交
2364
	if (btrfs_leaf_free_space(root, leaf) <
2365
	    sizeof(struct btrfs_item) + data_size) {
2366 2367 2368
		btrfs_print_leaf(root, leaf);
		printk("not enough freespace need %u have %d\n",
		       data_size, btrfs_leaf_free_space(root, leaf));
2369
		BUG();
2370
	}
2371

2372
	slot = path->slots[0];
2373
	BUG_ON(slot < 0);
2374

2375 2376
	if (slot != nritems) {
		int i;
2377
		unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2378

2379 2380 2381 2382 2383 2384
		if (old_data < data_end) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d old_data %d data_end %d\n",
			       slot, old_data, data_end);
			BUG_ON(1);
		}
2385 2386 2387 2388
		/*
		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
		 */
		/* first correct the data pointers */
2389
		WARN_ON(leaf->map_token);
C
Chris Mason 已提交
2390
		for (i = slot; i < nritems; i++) {
2391
			u32 ioff;
2392

2393
			item = btrfs_item_nr(leaf, i);
2394 2395 2396 2397 2398 2399 2400 2401
			if (!leaf->map_token) {
				map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
			}

2402 2403
			ioff = btrfs_item_offset(leaf, item);
			btrfs_set_item_offset(leaf, item, ioff - data_size);
C
Chris Mason 已提交
2404
		}
2405 2406 2407 2408
		if (leaf->map_token) {
			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
			leaf->map_token = NULL;
		}
2409 2410

		/* shift the items */
2411 2412
		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
			      btrfs_item_nr_offset(slot),
C
Chris Mason 已提交
2413
			      (nritems - slot) * sizeof(struct btrfs_item));
2414 2415

		/* shift the data */
2416
		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
C
Chris Mason 已提交
2417 2418
			      data_end - data_size, btrfs_leaf_data(leaf) +
			      data_end, old_data - data_end);
2419 2420
		data_end = old_data;
	}
2421

2422
	/* setup the item for the new data */
2423 2424 2425 2426 2427 2428
	btrfs_set_item_key(leaf, &disk_key, slot);
	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_offset(leaf, item, data_end - data_size);
	btrfs_set_item_size(leaf, item, data_size);
	btrfs_set_header_nritems(leaf, nritems + 1);
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
2429 2430

	ret = 0;
2431
	if (slot == 0)
2432
		ret = fixup_low_keys(trans, root, path, &disk_key, 1);
C
Chris Mason 已提交
2433

2434 2435
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
2436
		BUG();
2437
	}
2438
out:
2439 2440 2441 2442 2443 2444 2445
	return ret;
}

/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
2446 2447 2448
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_key *cpu_key, void *data, u32
		      data_size)
2449 2450
{
	int ret = 0;
C
Chris Mason 已提交
2451
	struct btrfs_path *path;
2452 2453
	struct extent_buffer *leaf;
	unsigned long ptr;
2454

C
Chris Mason 已提交
2455 2456 2457
	path = btrfs_alloc_path();
	BUG_ON(!path);
	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2458
	if (!ret) {
2459 2460 2461 2462
		leaf = path->nodes[0];
		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
		write_extent_buffer(leaf, data, ptr, data_size);
		btrfs_mark_buffer_dirty(leaf);
2463
	}
C
Chris Mason 已提交
2464
	btrfs_free_path(path);
C
Chris Mason 已提交
2465
	return ret;
2466 2467
}

C
Chris Mason 已提交
2468
/*
C
Chris Mason 已提交
2469
 * delete the pointer from a given node.
C
Chris Mason 已提交
2470 2471 2472 2473 2474
 *
 * If the delete empties a node, the node is removed from the tree,
 * continuing all the way the root if required.  The root is converted into
 * a leaf if all the nodes are emptied.
 */
2475 2476
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path, int level, int slot)
2477
{
2478
	struct extent_buffer *parent = path->nodes[level];
2479
	u32 nritems;
C
Chris Mason 已提交
2480
	int ret = 0;
2481
	int wret;
2482

2483
	nritems = btrfs_header_nritems(parent);
2484
	if (slot != nritems -1) {
2485 2486 2487
		memmove_extent_buffer(parent,
			      btrfs_node_key_ptr_offset(slot),
			      btrfs_node_key_ptr_offset(slot + 1),
C
Chris Mason 已提交
2488 2489
			      sizeof(struct btrfs_key_ptr) *
			      (nritems - slot - 1));
2490
	}
2491
	nritems--;
2492
	btrfs_set_header_nritems(parent, nritems);
2493
	if (nritems == 0 && parent == root->node) {
2494
		BUG_ON(btrfs_header_level(root->node) != 1);
2495
		/* just turn the root into a leaf and break */
2496
		btrfs_set_header_level(root->node, 0);
2497
	} else if (slot == 0) {
2498 2499 2500 2501
		struct btrfs_disk_key disk_key;

		btrfs_node_key(parent, &disk_key, 0);
		wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
C
Chris Mason 已提交
2502 2503
		if (wret)
			ret = wret;
2504
	}
C
Chris Mason 已提交
2505
	btrfs_mark_buffer_dirty(parent);
C
Chris Mason 已提交
2506
	return ret;
2507 2508
}

C
Chris Mason 已提交
2509 2510 2511 2512
/*
 * delete the item at the leaf level in path.  If that empties
 * the leaf, remove it from the tree
 */
2513 2514
int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path)
2515 2516
{
	int slot;
2517 2518
	struct extent_buffer *leaf;
	struct btrfs_item *item;
2519 2520
	int doff;
	int dsize;
C
Chris Mason 已提交
2521 2522
	int ret = 0;
	int wret;
2523
	u32 nritems;
2524

2525
	leaf = path->nodes[0];
2526
	slot = path->slots[0];
2527 2528 2529
	doff = btrfs_item_offset_nr(leaf, slot);
	dsize = btrfs_item_size_nr(leaf, slot);
	nritems = btrfs_header_nritems(leaf);
2530

2531
	if (slot != nritems - 1) {
2532
		int i;
C
Chris Mason 已提交
2533
		int data_end = leaf_data_end(root, leaf);
2534 2535

		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
C
Chris Mason 已提交
2536 2537 2538
			      data_end + dsize,
			      btrfs_leaf_data(leaf) + data_end,
			      doff - data_end);
2539

C
Chris Mason 已提交
2540
		for (i = slot + 1; i < nritems; i++) {
2541
			u32 ioff;
2542

2543
			item = btrfs_item_nr(leaf, i);
2544 2545 2546 2547 2548 2549 2550
			if (!leaf->map_token) {
				map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
			}
2551 2552
			ioff = btrfs_item_offset(leaf, item);
			btrfs_set_item_offset(leaf, item, ioff + dsize);
C
Chris Mason 已提交
2553
		}
2554 2555 2556 2557 2558 2559

		if (leaf->map_token) {
			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
			leaf->map_token = NULL;
		}

2560 2561
		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
			      btrfs_item_nr_offset(slot + 1),
C
Chris Mason 已提交
2562 2563
			      sizeof(struct btrfs_item) *
			      (nritems - slot - 1));
2564
	}
2565
	btrfs_set_header_nritems(leaf, nritems - 1);
2566
	nritems--;
2567

C
Chris Mason 已提交
2568
	/* delete the leaf if we've emptied it */
2569
	if (nritems == 0) {
2570 2571
		if (leaf == root->node) {
			btrfs_set_header_level(leaf, 0);
2572
		} else {
2573
			u64 root_gen = btrfs_header_generation(path->nodes[1]);
2574 2575
			clean_tree_block(trans, root, leaf);
			wait_on_tree_block_writeback(root, leaf);
2576
			wret = del_ptr(trans, root, path, 1, path->slots[1]);
C
Chris Mason 已提交
2577 2578
			if (wret)
				ret = wret;
2579
			wret = btrfs_free_extent(trans, root,
2580 2581 2582
					 leaf->start, leaf->len,
					 btrfs_header_owner(path->nodes[1]),
					 root_gen, 0, 0, 1);
C
Chris Mason 已提交
2583 2584
			if (wret)
				ret = wret;
2585
		}
2586
	} else {
2587
		int used = leaf_space_used(leaf, 0, nritems);
C
Chris Mason 已提交
2588
		if (slot == 0) {
2589 2590 2591
			struct btrfs_disk_key disk_key;

			btrfs_item_key(leaf, &disk_key, 0);
2592
			wret = fixup_low_keys(trans, root, path,
2593
					      &disk_key, 1);
C
Chris Mason 已提交
2594 2595 2596 2597
			if (wret)
				ret = wret;
		}

C
Chris Mason 已提交
2598
		/* delete the leaf if it is mostly empty */
2599
		if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2600 2601 2602 2603
			/* push_leaf_left fixes the path.
			 * make sure the path still points to our leaf
			 * for possible call to del_ptr below
			 */
2604
			slot = path->slots[1];
2605 2606
			extent_buffer_get(leaf);

2607
			wret = push_leaf_right(trans, root, path, 1, 1);
2608
			if (wret < 0 && wret != -ENOSPC)
C
Chris Mason 已提交
2609
				ret = wret;
2610 2611 2612

			if (path->nodes[0] == leaf &&
			    btrfs_header_nritems(leaf)) {
2613
				wret = push_leaf_left(trans, root, path, 1, 1);
2614
				if (wret < 0 && wret != -ENOSPC)
C
Chris Mason 已提交
2615 2616
					ret = wret;
			}
2617 2618

			if (btrfs_header_nritems(leaf) == 0) {
2619
				u64 root_gen;
2620 2621
				u64 bytenr = leaf->start;
				u32 blocksize = leaf->len;
2622

2623 2624 2625
				root_gen = btrfs_header_generation(
							   path->nodes[1]);

2626 2627 2628
				clean_tree_block(trans, root, leaf);
				wait_on_tree_block_writeback(root, leaf);

2629
				wret = del_ptr(trans, root, path, 1, slot);
C
Chris Mason 已提交
2630 2631
				if (wret)
					ret = wret;
2632 2633

				free_extent_buffer(leaf);
2634
				wret = btrfs_free_extent(trans, root, bytenr,
2635 2636 2637
					     blocksize,
					     btrfs_header_owner(path->nodes[1]),
					     root_gen, 0, 0, 1);
C
Chris Mason 已提交
2638 2639
				if (wret)
					ret = wret;
C
Chris Mason 已提交
2640
			} else {
2641 2642
				btrfs_mark_buffer_dirty(leaf);
				free_extent_buffer(leaf);
2643
			}
2644
		} else {
2645
			btrfs_mark_buffer_dirty(leaf);
2646 2647
		}
	}
C
Chris Mason 已提交
2648
	return ret;
2649 2650
}

2651 2652 2653 2654 2655 2656 2657
/*
 * walk up the tree as far as required to find the previous leaf.
 * returns 0 if it found something or 1 if there are no lesser leaves.
 * returns < 0 on io errors.
 */
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
2658
	u64 bytenr;
2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690
	int slot;
	int level = 1;
	struct extent_buffer *c;
	struct extent_buffer *next = NULL;

	while(level < BTRFS_MAX_LEVEL) {
		if (!path->nodes[level])
			return 1;

		slot = path->slots[level];
		c = path->nodes[level];
		if (slot == 0) {
			level++;
			if (level == BTRFS_MAX_LEVEL)
				return 1;
			continue;
		}
		slot--;

		bytenr = btrfs_node_blockptr(c, slot);
		if (next)
			free_extent_buffer(next);

		next = read_tree_block(root, bytenr,
				       btrfs_level_size(root, level - 1));
		break;
	}
	path->slots[level] = slot;
	while(1) {
		level--;
		c = path->nodes[level];
		free_extent_buffer(c);
2691 2692 2693
		slot = btrfs_header_nritems(next);
		if (slot != 0)
			slot--;
2694
		path->nodes[level] = next;
2695
		path->slots[level] = slot;
2696 2697
		if (!level)
			break;
2698
		next = read_tree_block(root, btrfs_node_blockptr(next, slot),
2699 2700 2701 2702 2703
				       btrfs_level_size(root, level - 1));
	}
	return 0;
}

C
Chris Mason 已提交
2704 2705
/*
 * walk up the tree as far as required to find the next leaf.
C
Chris Mason 已提交
2706 2707
 * returns 0 if it found something or 1 if there are no greater leaves.
 * returns < 0 on io errors.
C
Chris Mason 已提交
2708
 */
C
Chris Mason 已提交
2709
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2710 2711 2712
{
	int slot;
	int level = 1;
2713
	u64 bytenr;
2714 2715
	struct extent_buffer *c;
	struct extent_buffer *next = NULL;
2716

C
Chris Mason 已提交
2717
	while(level < BTRFS_MAX_LEVEL) {
2718
		if (!path->nodes[level])
C
Chris Mason 已提交
2719
			return 1;
2720

2721 2722
		slot = path->slots[level] + 1;
		c = path->nodes[level];
2723
		if (slot >= btrfs_header_nritems(c)) {
2724
			level++;
2725 2726
			if (level == BTRFS_MAX_LEVEL)
				return 1;
2727 2728
			continue;
		}
2729

2730
		bytenr = btrfs_node_blockptr(c, slot);
C
Chris Mason 已提交
2731
		if (next)
2732 2733
			free_extent_buffer(next);

2734
		if (path->reada)
2735
			reada_for_search(root, path, level, slot, 0);
2736

2737 2738
		next = read_tree_block(root, bytenr,
				       btrfs_level_size(root, level -1));
2739 2740 2741 2742 2743 2744
		break;
	}
	path->slots[level] = slot;
	while(1) {
		level--;
		c = path->nodes[level];
2745
		free_extent_buffer(c);
2746 2747 2748 2749
		path->nodes[level] = next;
		path->slots[level] = 0;
		if (!level)
			break;
2750
		if (path->reada)
2751
			reada_for_search(root, path, level, 0, 0);
2752 2753
		next = read_tree_block(root, btrfs_node_blockptr(next, 0),
				       btrfs_level_size(root, level - 1));
2754 2755 2756
	}
	return 0;
}