ctree.c 65.0 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

19
#include <linux/sched.h>
20 21
#include "ctree.h"
#include "disk-io.h"
22
#include "transaction.h"
23
#include "print-tree.h"
24

25 26 27
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, int level);
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28
		      *root, struct btrfs_key *ins_key,
29
		      struct btrfs_path *path, int data_size, int extend);
30 31 32 33 34 35 36
static int push_node_left(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct extent_buffer *dst,
			  struct extent_buffer *src);
static int balance_node_right(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct extent_buffer *dst_buf,
			      struct extent_buffer *src_buf);
37 38
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path, int level, int slot);
39

C
Chris Mason 已提交
40
inline void btrfs_init_path(struct btrfs_path *p)
C
Chris Mason 已提交
41
{
C
Chris Mason 已提交
42
	memset(p, 0, sizeof(*p));
C
Chris Mason 已提交
43 44
}

C
Chris Mason 已提交
45
struct btrfs_path *btrfs_alloc_path(void)
C
Chris Mason 已提交
46
{
C
Chris Mason 已提交
47 48
	struct btrfs_path *path;
	path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49
	if (path) {
C
Chris Mason 已提交
50
		btrfs_init_path(path);
51 52
		path->reada = 1;
	}
C
Chris Mason 已提交
53
	return path;
C
Chris Mason 已提交
54 55
}

C
Chris Mason 已提交
56
void btrfs_free_path(struct btrfs_path *p)
57
{
C
Chris Mason 已提交
58 59
	btrfs_release_path(NULL, p);
	kmem_cache_free(btrfs_path_cachep, p);
60 61
}

C
Chris Mason 已提交
62
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 64
{
	int i;
C
Chris Mason 已提交
65
	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66 67
		if (!p->nodes[i])
			break;
68
		free_extent_buffer(p->nodes[i]);
69
	}
C
Chris Mason 已提交
70
	memset(p, 0, sizeof(*p));
71 72
}

73 74 75 76 77 78
static int __btrfs_cow_block(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root,
			     struct extent_buffer *buf,
			     struct extent_buffer *parent, int parent_slot,
			     struct extent_buffer **cow_ret,
			     u64 search_start, u64 empty_size)
C
Chris Mason 已提交
79
{
80
	struct extent_buffer *cow;
81 82
	int ret = 0;
	int different_trans = 0;
C
Chris Mason 已提交
83

84
	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
85

86 87
	cow = btrfs_alloc_free_block(trans, root, buf->len,
				     search_start, empty_size);
88 89
	if (IS_ERR(cow))
		return PTR_ERR(cow);
90

91
	copy_extent_buffer(cow, buf, 0, 0, cow->len);
92
	btrfs_set_header_bytenr(cow, cow->start);
93 94
	btrfs_set_header_generation(cow, trans->transid);
	btrfs_set_header_owner(cow, root->root_key.objectid);
95

96 97
	WARN_ON(btrfs_header_generation(buf) > trans->transid);
	if (btrfs_header_generation(buf) != trans->transid) {
98 99 100 101 102 103 104 105
		different_trans = 1;
		ret = btrfs_inc_ref(trans, root, buf);
		if (ret)
			return ret;
	} else {
		clean_tree_block(trans, root, buf);
	}

C
Chris Mason 已提交
106 107
	if (buf == root->node) {
		root->node = cow;
108
		extent_buffer_get(cow);
C
Chris Mason 已提交
109
		if (buf != root->commit_root) {
110 111
			btrfs_free_extent(trans, root, buf->start,
					  buf->len, 1);
C
Chris Mason 已提交
112
		}
113
		free_extent_buffer(buf);
C
Chris Mason 已提交
114
	} else {
115
		btrfs_set_node_blockptr(parent, parent_slot,
116
					cow->start);
117 118 119
		WARN_ON(trans->transid == 0);
		btrfs_set_node_ptr_generation(parent, parent_slot,
					      trans->transid);
C
Chris Mason 已提交
120
		btrfs_mark_buffer_dirty(parent);
121
		WARN_ON(btrfs_header_generation(parent) != trans->transid);
122
		btrfs_free_extent(trans, root, buf->start, buf->len, 1);
C
Chris Mason 已提交
123
	}
124
	free_extent_buffer(buf);
C
Chris Mason 已提交
125
	btrfs_mark_buffer_dirty(cow);
C
Chris Mason 已提交
126
	*cow_ret = cow;
C
Chris Mason 已提交
127 128 129
	return 0;
}

130 131 132 133
int btrfs_cow_block(struct btrfs_trans_handle *trans,
		    struct btrfs_root *root, struct extent_buffer *buf,
		    struct extent_buffer *parent, int parent_slot,
		    struct extent_buffer **cow_ret)
134 135
{
	u64 search_start;
136
	int ret;
137 138 139 140 141 142 143 144 145 146
	if (trans->transaction != root->fs_info->running_transaction) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->running_transaction->transid);
		WARN_ON(1);
	}
	if (trans->transid != root->fs_info->generation) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->generation);
		WARN_ON(1);
	}
147
	if (btrfs_header_generation(buf) == trans->transid) {
148 149 150 151
		*cow_ret = buf;
		return 0;
	}

152
	search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
153
	ret = __btrfs_cow_block(trans, root, buf, parent,
154
				 parent_slot, cow_ret, search_start, 0);
155
	return ret;
156 157
}

158
static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
159
{
160
	if (blocknr < other && other - (blocknr + blocksize) < 32768)
161
		return 1;
162
	if (blocknr > other && blocknr - (other + blocksize) < 32768)
163 164 165 166
		return 1;
	return 0;
}

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
/*
 * compare two keys in a memcmp fashion
 */
static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
{
	struct btrfs_key k1;

	btrfs_disk_key_to_cpu(&k1, disk);

	if (k1.objectid > k2->objectid)
		return 1;
	if (k1.objectid < k2->objectid)
		return -1;
	if (k1.type > k2->type)
		return 1;
	if (k1.type < k2->type)
		return -1;
	if (k1.offset > k2->offset)
		return 1;
	if (k1.offset < k2->offset)
		return -1;
	return 0;
}


192
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
193
		       struct btrfs_root *root, struct extent_buffer *parent,
194 195
		       int start_slot, int cache_only, u64 *last_ret,
		       struct btrfs_key *progress)
196
{
197 198
	struct extent_buffer *cur;
	struct extent_buffer *tmp;
199
	u64 blocknr;
200 201
	u64 search_start = *last_ret;
	u64 last_block = 0;
202 203 204 205 206
	u64 other;
	u32 parent_nritems;
	int end_slot;
	int i;
	int err = 0;
207
	int parent_level;
208 209
	int uptodate;
	u32 blocksize;
210 211
	int progress_passed = 0;
	struct btrfs_disk_key disk_key;
212

213 214 215 216
	parent_level = btrfs_header_level(parent);
	if (cache_only && parent_level != 1)
		return 0;

217 218 219 220 221 222 223 224 225 226
	if (trans->transaction != root->fs_info->running_transaction) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->running_transaction->transid);
		WARN_ON(1);
	}
	if (trans->transid != root->fs_info->generation) {
		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
		       root->fs_info->generation);
		WARN_ON(1);
	}
227

228 229
	parent_nritems = btrfs_header_nritems(parent);
	blocksize = btrfs_level_size(root, parent_level - 1);
230 231 232 233 234 235 236
	end_slot = parent_nritems;

	if (parent_nritems == 1)
		return 0;

	for (i = start_slot; i < end_slot; i++) {
		int close = 1;
237

238 239 240 241 242 243 244 245
		if (!parent->map_token) {
			map_extent_buffer(parent,
					btrfs_node_key_ptr_offset(i),
					sizeof(struct btrfs_key_ptr),
					&parent->map_token, &parent->kaddr,
					&parent->map_start, &parent->map_len,
					KM_USER1);
		}
246 247 248 249 250
		btrfs_node_key(parent, &disk_key, i);
		if (!progress_passed && comp_keys(&disk_key, progress) < 0)
			continue;

		progress_passed = 1;
251
		blocknr = btrfs_node_blockptr(parent, i);
252 253
		if (last_block == 0)
			last_block = blocknr;
254

255
		if (i > 0) {
256 257
			other = btrfs_node_blockptr(parent, i - 1);
			close = close_blocks(blocknr, other, blocksize);
258
		}
259
		if (close && i < end_slot - 2) {
260 261
			other = btrfs_node_blockptr(parent, i + 1);
			close = close_blocks(blocknr, other, blocksize);
262
		}
263 264
		if (close) {
			last_block = blocknr;
265
			continue;
266
		}
267 268 269 270 271
		if (parent->map_token) {
			unmap_extent_buffer(parent, parent->map_token,
					    KM_USER1);
			parent->map_token = NULL;
		}
272

273 274 275 276 277
		cur = btrfs_find_tree_block(root, blocknr, blocksize);
		if (cur)
			uptodate = btrfs_buffer_uptodate(cur);
		else
			uptodate = 0;
278
		if (!cur || !uptodate) {
279
			if (cache_only) {
280
				free_extent_buffer(cur);
281 282
				continue;
			}
283 284 285 286 287
			if (!cur) {
				cur = read_tree_block(root, blocknr,
							 blocksize);
			} else if (!uptodate) {
				btrfs_read_buffer(cur);
288
			}
289
		}
290
		if (search_start == 0)
291
			search_start = last_block;
292

293 294 295 296
		err = __btrfs_cow_block(trans, root, cur, parent, i,
					&tmp, search_start,
					min(16 * blocksize,
					    (end_slot - i) * blocksize));
Y
Yan 已提交
297
		if (err) {
298
			free_extent_buffer(cur);
299
			break;
Y
Yan 已提交
300
		}
301
		search_start = tmp->start;
302
		last_block = tmp->start;
303 304
		*last_ret = search_start;
		if (parent_level == 1)
305 306
			btrfs_clear_buffer_defrag(tmp);
		free_extent_buffer(tmp);
307
	}
308 309 310 311 312
	if (parent->map_token) {
		unmap_extent_buffer(parent, parent->map_token,
				    KM_USER1);
		parent->map_token = NULL;
	}
313 314 315
	return err;
}

C
Chris Mason 已提交
316 317 318 319 320
/*
 * The leaf data grows from end-to-front in the node.
 * this returns the address of the start of the last item,
 * which is the stop of the leaf data stack
 */
C
Chris Mason 已提交
321
static inline unsigned int leaf_data_end(struct btrfs_root *root,
322
					 struct extent_buffer *leaf)
323
{
324
	u32 nr = btrfs_header_nritems(leaf);
325
	if (nr == 0)
C
Chris Mason 已提交
326
		return BTRFS_LEAF_DATA_SIZE(root);
327
	return btrfs_item_offset_nr(leaf, nr - 1);
328 329
}

C
Chris Mason 已提交
330 331
static int check_node(struct btrfs_root *root, struct btrfs_path *path,
		      int level)
C
Chris Mason 已提交
332
{
333 334 335 336
	struct extent_buffer *parent = NULL;
	struct extent_buffer *node = path->nodes[level];
	struct btrfs_disk_key parent_key;
	struct btrfs_disk_key node_key;
C
Chris Mason 已提交
337
	int parent_slot;
338 339
	int slot;
	struct btrfs_key cpukey;
340
	u32 nritems = btrfs_header_nritems(node);
C
Chris Mason 已提交
341 342

	if (path->nodes[level + 1])
343
		parent = path->nodes[level + 1];
A
Aneesh 已提交
344

345
	slot = path->slots[level];
346 347
	BUG_ON(nritems == 0);
	if (parent) {
A
Aneesh 已提交
348
		parent_slot = path->slots[level + 1];
349 350 351
		btrfs_node_key(parent, &parent_key, parent_slot);
		btrfs_node_key(node, &node_key, 0);
		BUG_ON(memcmp(&parent_key, &node_key,
C
Chris Mason 已提交
352
			      sizeof(struct btrfs_disk_key)));
353
		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
354
		       btrfs_header_bytenr(node));
C
Chris Mason 已提交
355
	}
C
Chris Mason 已提交
356
	BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
357
	if (slot != 0) {
358 359 360
		btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
		btrfs_node_key(node, &node_key, slot);
		BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
361 362
	}
	if (slot < nritems - 1) {
363 364 365
		btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
		btrfs_node_key(node, &node_key, slot);
		BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
C
Chris Mason 已提交
366 367 368 369
	}
	return 0;
}

C
Chris Mason 已提交
370 371
static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
		      int level)
C
Chris Mason 已提交
372
{
373 374
	struct extent_buffer *leaf = path->nodes[level];
	struct extent_buffer *parent = NULL;
C
Chris Mason 已提交
375
	int parent_slot;
376
	struct btrfs_key cpukey;
377 378 379
	struct btrfs_disk_key parent_key;
	struct btrfs_disk_key leaf_key;
	int slot = path->slots[0];
380

381
	u32 nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
382 383

	if (path->nodes[level + 1])
384
		parent = path->nodes[level + 1];
385 386 387 388 389

	if (nritems == 0)
		return 0;

	if (parent) {
A
Aneesh 已提交
390
		parent_slot = path->slots[level + 1];
391 392
		btrfs_node_key(parent, &parent_key, parent_slot);
		btrfs_item_key(leaf, &leaf_key, 0);
393

394
		BUG_ON(memcmp(&parent_key, &leaf_key,
C
Chris Mason 已提交
395
		       sizeof(struct btrfs_disk_key)));
396
		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
397
		       btrfs_header_bytenr(leaf));
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
	}
#if 0
	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
		btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
		btrfs_item_key(leaf, &leaf_key, i);
		if (comp_keys(&leaf_key, &cpukey) >= 0) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad key\n", i);
			BUG_ON(1);
		}
		if (btrfs_item_offset_nr(leaf, i) !=
			btrfs_item_end_nr(leaf, i + 1)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", i);
			BUG_ON(1);
		}
		if (i == 0) {
			if (btrfs_item_offset_nr(leaf, i) +
			       btrfs_item_size_nr(leaf, i) !=
			       BTRFS_LEAF_DATA_SIZE(root)) {
				btrfs_print_leaf(root, leaf);
				printk("slot %d first offset bad\n", i);
				BUG_ON(1);
			}
		}
C
Chris Mason 已提交
423
	}
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
	if (nritems > 0) {
		if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
				btrfs_print_leaf(root, leaf);
				printk("slot %d bad size \n", nritems - 1);
				BUG_ON(1);
		}
	}
#endif
	if (slot != 0 && slot < nritems - 1) {
		btrfs_item_key(leaf, &leaf_key, slot);
		btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
		if (comp_keys(&leaf_key, &cpukey) <= 0) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad key\n", slot);
			BUG_ON(1);
		}
		if (btrfs_item_offset_nr(leaf, slot - 1) !=
		       btrfs_item_end_nr(leaf, slot)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", slot);
			BUG_ON(1);
		}
446 447
	}
	if (slot < nritems - 1) {
448 449 450 451 452 453 454 455 456
		btrfs_item_key(leaf, &leaf_key, slot);
		btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
		BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
		if (btrfs_item_offset_nr(leaf, slot) !=
			btrfs_item_end_nr(leaf, slot + 1)) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d offset bad\n", slot);
			BUG_ON(1);
		}
C
Chris Mason 已提交
457
	}
458 459
	BUG_ON(btrfs_item_offset_nr(leaf, 0) +
	       btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
C
Chris Mason 已提交
460 461 462
	return 0;
}

C
Chris Mason 已提交
463 464
static int check_block(struct btrfs_root *root, struct btrfs_path *path,
			int level)
C
Chris Mason 已提交
465
{
466
	return 0;
467
#if 0
468 469
	struct extent_buffer *buf = path->nodes[level];

470 471 472
	if (memcmp_extent_buffer(buf, root->fs_info->fsid,
				 (unsigned long)btrfs_header_fsid(buf),
				 BTRFS_FSID_SIZE)) {
473
		printk("warning bad block %Lu\n", buf->start);
474
		return 1;
475
	}
476
#endif
C
Chris Mason 已提交
477
	if (level == 0)
C
Chris Mason 已提交
478 479
		return check_leaf(root, path, level);
	return check_node(root, path, level);
C
Chris Mason 已提交
480 481
}

C
Chris Mason 已提交
482
/*
483 484 485
 * search for key in the extent_buffer.  The items start at offset p,
 * and they are item_size apart.  There are 'max' items in p.
 *
C
Chris Mason 已提交
486 487 488 489 490 491
 * the slot in the array is returned via slot, and it points to
 * the place where you would insert key if it is not found in
 * the array.
 *
 * slot may point to max if the key is bigger than all of the keys
 */
492 493 494
static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
			      int item_size, struct btrfs_key *key,
			      int max, int *slot)
495 496 497 498 499
{
	int low = 0;
	int high = max;
	int mid;
	int ret;
500
	struct btrfs_disk_key *tmp = NULL;
501 502 503 504 505 506
	struct btrfs_disk_key unaligned;
	unsigned long offset;
	char *map_token = NULL;
	char *kaddr = NULL;
	unsigned long map_start = 0;
	unsigned long map_len = 0;
507
	int err;
508 509 510

	while(low < high) {
		mid = (low + high) / 2;
511 512 513 514 515
		offset = p + mid * item_size;

		if (!map_token || offset < map_start ||
		    (offset + sizeof(struct btrfs_disk_key)) >
		    map_start + map_len) {
516
			if (map_token) {
517
				unmap_extent_buffer(eb, map_token, KM_USER0);
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
				map_token = NULL;
			}
			err = map_extent_buffer(eb, offset,
						sizeof(struct btrfs_disk_key),
						&map_token, &kaddr,
						&map_start, &map_len, KM_USER0);

			if (!err) {
				tmp = (struct btrfs_disk_key *)(kaddr + offset -
							map_start);
			} else {
				read_extent_buffer(eb, &unaligned,
						   offset, sizeof(unaligned));
				tmp = &unaligned;
			}
533 534 535 536 537

		} else {
			tmp = (struct btrfs_disk_key *)(kaddr + offset -
							map_start);
		}
538 539 540 541 542 543 544 545
		ret = comp_keys(tmp, key);

		if (ret < 0)
			low = mid + 1;
		else if (ret > 0)
			high = mid;
		else {
			*slot = mid;
546 547
			if (map_token)
				unmap_extent_buffer(eb, map_token, KM_USER0);
548 549 550 551
			return 0;
		}
	}
	*slot = low;
552 553
	if (map_token)
		unmap_extent_buffer(eb, map_token, KM_USER0);
554 555 556
	return 1;
}

C
Chris Mason 已提交
557 558 559 560
/*
 * simple bin_search frontend that does the right thing for
 * leaves vs nodes
 */
561 562
static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
		      int level, int *slot)
563
{
564 565 566
	if (level == 0) {
		return generic_bin_search(eb,
					  offsetof(struct btrfs_leaf, items),
C
Chris Mason 已提交
567
					  sizeof(struct btrfs_item),
568
					  key, btrfs_header_nritems(eb),
569
					  slot);
570
	} else {
571 572
		return generic_bin_search(eb,
					  offsetof(struct btrfs_node, ptrs),
C
Chris Mason 已提交
573
					  sizeof(struct btrfs_key_ptr),
574
					  key, btrfs_header_nritems(eb),
575
					  slot);
576 577 578 579
	}
	return -1;
}

580 581
static struct extent_buffer *read_node_slot(struct btrfs_root *root,
				   struct extent_buffer *parent, int slot)
582 583 584
{
	if (slot < 0)
		return NULL;
585
	if (slot >= btrfs_header_nritems(parent))
586
		return NULL;
587 588
	return read_tree_block(root, btrfs_node_blockptr(parent, slot),
		       btrfs_level_size(root, btrfs_header_level(parent) - 1));
589 590
}

591 592
static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
			 *root, struct btrfs_path *path, int level)
593
{
594 595 596 597
	struct extent_buffer *right = NULL;
	struct extent_buffer *mid;
	struct extent_buffer *left = NULL;
	struct extent_buffer *parent = NULL;
598 599 600 601
	int ret = 0;
	int wret;
	int pslot;
	int orig_slot = path->slots[level];
602
	int err_on_enospc = 0;
603
	u64 orig_ptr;
604 605 606 607

	if (level == 0)
		return 0;

608
	mid = path->nodes[level];
609
	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
610

C
Chris Mason 已提交
611
	if (level < BTRFS_MAX_LEVEL - 1)
612
		parent = path->nodes[level + 1];
613 614
	pslot = path->slots[level + 1];

C
Chris Mason 已提交
615 616 617 618
	/*
	 * deal with the case where there is only one pointer in the root
	 * by promoting the node below to a root
	 */
619 620
	if (!parent) {
		struct extent_buffer *child;
621

622
		if (btrfs_header_nritems(mid) != 1)
623 624 625
			return 0;

		/* promote the child to a root */
626
		child = read_node_slot(root, mid, 0);
627 628 629
		BUG_ON(!child);
		root->node = child;
		path->nodes[level] = NULL;
630 631
		clean_tree_block(trans, root, mid);
		wait_on_tree_block_writeback(root, mid);
632
		/* once for the path */
633
		free_extent_buffer(mid);
634
		ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1);
635
		/* once for the root ptr */
636
		free_extent_buffer(mid);
637
		return ret;
638
	}
639
	if (btrfs_header_nritems(mid) >
C
Chris Mason 已提交
640
	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
641 642
		return 0;

643
	if (btrfs_header_nritems(mid) < 2)
644 645
		err_on_enospc = 1;

646 647 648 649
	left = read_node_slot(root, parent, pslot - 1);
	if (left) {
		wret = btrfs_cow_block(trans, root, left,
				       parent, pslot - 1, &left);
650 651 652 653
		if (wret) {
			ret = wret;
			goto enospc;
		}
654
	}
655 656 657 658
	right = read_node_slot(root, parent, pslot + 1);
	if (right) {
		wret = btrfs_cow_block(trans, root, right,
				       parent, pslot + 1, &right);
659 660 661 662 663 664 665
		if (wret) {
			ret = wret;
			goto enospc;
		}
	}

	/* first, try to make some room in the middle buffer */
666 667 668
	if (left) {
		orig_slot += btrfs_header_nritems(left);
		wret = push_node_left(trans, root, left, mid);
669 670
		if (wret < 0)
			ret = wret;
671
		if (btrfs_header_nritems(mid) < 2)
672
			err_on_enospc = 1;
673
	}
674 675 676 677

	/*
	 * then try to empty the right most buffer into the middle
	 */
678 679
	if (right) {
		wret = push_node_left(trans, root, mid, right);
680
		if (wret < 0 && wret != -ENOSPC)
681
			ret = wret;
682
		if (btrfs_header_nritems(right) == 0) {
683 684 685
			u64 bytenr = right->start;
			u32 blocksize = right->len;

686 687 688
			clean_tree_block(trans, root, right);
			wait_on_tree_block_writeback(root, right);
			free_extent_buffer(right);
689
			right = NULL;
690 691
			wret = del_ptr(trans, root, path, level + 1, pslot +
				       1);
692 693
			if (wret)
				ret = wret;
694 695
			wret = btrfs_free_extent(trans, root, bytenr,
						 blocksize, 1);
696 697 698
			if (wret)
				ret = wret;
		} else {
699 700 701 702
			struct btrfs_disk_key right_key;
			btrfs_node_key(right, &right_key, 0);
			btrfs_set_node_key(parent, &right_key, pslot + 1);
			btrfs_mark_buffer_dirty(parent);
703 704
		}
	}
705
	if (btrfs_header_nritems(mid) == 1) {
706 707 708 709 710 711 712 713 714
		/*
		 * we're not allowed to leave a node with one item in the
		 * tree during a delete.  A deletion from lower in the tree
		 * could try to delete the only pointer in this node.
		 * So, pull some keys from the left.
		 * There has to be a left pointer at this point because
		 * otherwise we would have pulled some pointers from the
		 * right
		 */
715 716
		BUG_ON(!left);
		wret = balance_node_right(trans, root, mid, left);
717
		if (wret < 0) {
718
			ret = wret;
719 720
			goto enospc;
		}
721 722
		BUG_ON(wret == 1);
	}
723
	if (btrfs_header_nritems(mid) == 0) {
724
		/* we've managed to empty the middle node, drop it */
725 726
		u64 bytenr = mid->start;
		u32 blocksize = mid->len;
727 728 729
		clean_tree_block(trans, root, mid);
		wait_on_tree_block_writeback(root, mid);
		free_extent_buffer(mid);
730
		mid = NULL;
731
		wret = del_ptr(trans, root, path, level + 1, pslot);
732 733
		if (wret)
			ret = wret;
734
		wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1);
735 736
		if (wret)
			ret = wret;
737 738
	} else {
		/* update the parent key to reflect our changes */
739 740 741 742
		struct btrfs_disk_key mid_key;
		btrfs_node_key(mid, &mid_key, 0);
		btrfs_set_node_key(parent, &mid_key, pslot);
		btrfs_mark_buffer_dirty(parent);
743
	}
744

745
	/* update the path */
746 747 748 749
	if (left) {
		if (btrfs_header_nritems(left) > orig_slot) {
			extent_buffer_get(left);
			path->nodes[level] = left;
750 751
			path->slots[level + 1] -= 1;
			path->slots[level] = orig_slot;
752 753
			if (mid)
				free_extent_buffer(mid);
754
		} else {
755
			orig_slot -= btrfs_header_nritems(left);
756 757 758
			path->slots[level] = orig_slot;
		}
	}
759
	/* double check we haven't messed things up */
C
Chris Mason 已提交
760
	check_block(root, path, level);
C
Chris Mason 已提交
761
	if (orig_ptr !=
762
	    btrfs_node_blockptr(path->nodes[level], path->slots[level]))
763
		BUG();
764
enospc:
765 766 767 768
	if (right)
		free_extent_buffer(right);
	if (left)
		free_extent_buffer(left);
769 770 771
	return ret;
}

772 773 774 775 776
/* returns zero if the push worked, non-zero otherwise */
static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
				struct btrfs_path *path, int level)
{
777 778 779 780
	struct extent_buffer *right = NULL;
	struct extent_buffer *mid;
	struct extent_buffer *left = NULL;
	struct extent_buffer *parent = NULL;
781 782 783 784 785 786 787 788 789
	int ret = 0;
	int wret;
	int pslot;
	int orig_slot = path->slots[level];
	u64 orig_ptr;

	if (level == 0)
		return 1;

790
	mid = path->nodes[level];
791 792 793
	orig_ptr = btrfs_node_blockptr(mid, orig_slot);

	if (level < BTRFS_MAX_LEVEL - 1)
794
		parent = path->nodes[level + 1];
795 796
	pslot = path->slots[level + 1];

797
	if (!parent)
798 799
		return 1;

800
	left = read_node_slot(root, parent, pslot - 1);
801 802

	/* first, try to make some room in the middle buffer */
803
	if (left) {
804
		u32 left_nr;
805
		left_nr = btrfs_header_nritems(left);
C
Chris Mason 已提交
806 807 808
		if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
			wret = 1;
		} else {
809 810
			ret = btrfs_cow_block(trans, root, left, parent,
					      pslot - 1, &left);
811 812 813 814
			if (ret)
				wret = 1;
			else {
				wret = push_node_left(trans, root,
815
						      left, mid);
816
			}
C
Chris Mason 已提交
817
		}
818 819 820
		if (wret < 0)
			ret = wret;
		if (wret == 0) {
821
			struct btrfs_disk_key disk_key;
822
			orig_slot += left_nr;
823 824 825 826 827
			btrfs_node_key(mid, &disk_key, 0);
			btrfs_set_node_key(parent, &disk_key, pslot);
			btrfs_mark_buffer_dirty(parent);
			if (btrfs_header_nritems(left) > orig_slot) {
				path->nodes[level] = left;
828 829
				path->slots[level + 1] -= 1;
				path->slots[level] = orig_slot;
830
				free_extent_buffer(mid);
831 832
			} else {
				orig_slot -=
833
					btrfs_header_nritems(left);
834
				path->slots[level] = orig_slot;
835
				free_extent_buffer(left);
836 837 838
			}
			return 0;
		}
839
		free_extent_buffer(left);
840
	}
841
	right= read_node_slot(root, parent, pslot + 1);
842 843 844 845

	/*
	 * then try to empty the right most buffer into the middle
	 */
846
	if (right) {
C
Chris Mason 已提交
847
		u32 right_nr;
848
		right_nr = btrfs_header_nritems(right);
C
Chris Mason 已提交
849 850 851
		if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
			wret = 1;
		} else {
852 853 854
			ret = btrfs_cow_block(trans, root, right,
					      parent, pslot + 1,
					      &right);
855 856 857 858
			if (ret)
				wret = 1;
			else {
				wret = balance_node_right(trans, root,
859
							  right, mid);
860
			}
C
Chris Mason 已提交
861
		}
862 863 864
		if (wret < 0)
			ret = wret;
		if (wret == 0) {
865 866 867 868 869 870 871 872
			struct btrfs_disk_key disk_key;

			btrfs_node_key(right, &disk_key, 0);
			btrfs_set_node_key(parent, &disk_key, pslot + 1);
			btrfs_mark_buffer_dirty(parent);

			if (btrfs_header_nritems(mid) <= orig_slot) {
				path->nodes[level] = right;
873 874
				path->slots[level + 1] += 1;
				path->slots[level] = orig_slot -
875 876
					btrfs_header_nritems(mid);
				free_extent_buffer(mid);
877
			} else {
878
				free_extent_buffer(right);
879 880 881
			}
			return 0;
		}
882
		free_extent_buffer(right);
883 884 885 886
	}
	return 1;
}

887 888 889 890
/*
 * readahead one full node of leaves
 */
static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
891
			     int level, int slot)
892
{
893
	struct extent_buffer *node;
894 895
	u32 nritems;
	u64 search;
896 897 898
	u64 lowest_read;
	u64 highest_read;
	u64 nread = 0;
899
	int direction = path->reada;
900
	struct extent_buffer *eb;
901 902 903
	u32 nr;
	u32 blocksize;
	u32 nscan = 0;
904

905
	if (level != 1)
906 907 908
		return;

	if (!path->nodes[level])
909 910
		return;

911
	node = path->nodes[level];
912
	search = btrfs_node_blockptr(node, slot);
913 914
	blocksize = btrfs_level_size(root, level - 1);
	eb = btrfs_find_tree_block(root, search, blocksize);
915 916
	if (eb) {
		free_extent_buffer(eb);
917 918 919
		return;
	}

920 921 922
	highest_read = search;
	lowest_read = search;

923
	nritems = btrfs_header_nritems(node);
924
	nr = slot;
925
	while(1) {
926 927 928 929 930 931 932 933
		if (direction < 0) {
			if (nr == 0)
				break;
			nr--;
		} else if (direction > 0) {
			nr++;
			if (nr >= nritems)
				break;
934
		}
935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951
		search = btrfs_node_blockptr(node, nr);
		if ((search >= lowest_read && search <= highest_read) ||
		    (search < lowest_read && lowest_read - search <= 32768) ||
		    (search > highest_read && search - highest_read <= 32768)) {
			readahead_tree_block(root, search, blocksize);
			nread += blocksize;
		}
		nscan++;
		if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
			break;
		if(nread > (1024 * 1024) || nscan > 128)
			break;

		if (search < lowest_read)
			lowest_read = search;
		if (search > highest_read)
			highest_read = search;
952 953
	}
}
C
Chris Mason 已提交
954 955 956 957 958 959
/*
 * look for key in the tree.  path is filled in with nodes along the way
 * if key is found, we return zero and you can find the item in the leaf
 * level of the path (level 0)
 *
 * If the key isn't found, the path points to the slot where it should
C
Chris Mason 已提交
960 961
 * be inserted, and 1 is returned.  If there are other errors during the
 * search a negative error number is returned.
C
Chris Mason 已提交
962 963 964 965
 *
 * if ins_len > 0, nodes and leaves will be split as we walk down the
 * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
 * possible)
C
Chris Mason 已提交
966
 */
967 968 969
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_key *key, struct btrfs_path *p, int
		      ins_len, int cow)
970
{
971
	struct extent_buffer *b;
972
	u64 bytenr;
973
	u64 ptr_gen;
974 975 976
	int slot;
	int ret;
	int level;
977
	int should_reada = p->reada;
978 979
	u8 lowest_level = 0;

980 981
	lowest_level = p->lowest_level;
	WARN_ON(lowest_level && ins_len);
C
Chris Mason 已提交
982 983
	WARN_ON(p->nodes[0] != NULL);
	WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
984 985
again:
	b = root->node;
986
	extent_buffer_get(b);
987
	while (b) {
988
		level = btrfs_header_level(b);
C
Chris Mason 已提交
989 990
		if (cow) {
			int wret;
C
Chris Mason 已提交
991 992 993
			wret = btrfs_cow_block(trans, root, b,
					       p->nodes[level + 1],
					       p->slots[level + 1],
Y
Yan 已提交
994
					       &b);
995
			if (wret) {
996
				free_extent_buffer(b);
997 998
				return wret;
			}
C
Chris Mason 已提交
999 1000
		}
		BUG_ON(!cow && ins_len);
1001
		if (level != btrfs_header_level(b))
C
Chris Mason 已提交
1002
			WARN_ON(1);
1003
		level = btrfs_header_level(b);
1004
		p->nodes[level] = b;
C
Chris Mason 已提交
1005
		ret = check_block(root, p, level);
C
Chris Mason 已提交
1006 1007
		if (ret)
			return -1;
1008 1009
		ret = bin_search(b, key, level, &slot);
		if (level != 0) {
1010 1011 1012
			if (ret && slot > 0)
				slot -= 1;
			p->slots[level] = slot;
1013
			if (ins_len > 0 && btrfs_header_nritems(b) >=
1014
			    BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1015
				int sret = split_node(trans, root, p, level);
C
Chris Mason 已提交
1016 1017 1018 1019 1020
				BUG_ON(sret > 0);
				if (sret)
					return sret;
				b = p->nodes[level];
				slot = p->slots[level];
1021
			} else if (ins_len < 0) {
1022 1023
				int sret = balance_level(trans, root, p,
							 level);
1024 1025 1026
				if (sret)
					return sret;
				b = p->nodes[level];
1027 1028
				if (!b) {
					btrfs_release_path(NULL, p);
1029
					goto again;
1030
				}
1031
				slot = p->slots[level];
1032
				BUG_ON(btrfs_header_nritems(b) == 1);
C
Chris Mason 已提交
1033
			}
1034 1035 1036
			/* this is only true while dropping a snapshot */
			if (level == lowest_level)
				break;
1037
			bytenr = btrfs_node_blockptr(b, slot);
1038
			ptr_gen = btrfs_node_ptr_generation(b, slot);
1039 1040
			if (should_reada)
				reada_for_search(root, p, level, slot);
1041 1042
			b = read_tree_block(root, bytenr,
					    btrfs_level_size(root, level - 1));
1043 1044 1045 1046 1047 1048 1049
			if (ptr_gen != btrfs_header_generation(b)) {
				printk("block %llu bad gen wanted %llu "
				       "found %llu\n",
			        (unsigned long long)b->start,
				(unsigned long long)ptr_gen,
			        (unsigned long long)btrfs_header_generation(b));
			}
1050 1051
		} else {
			p->slots[level] = slot;
1052
			if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
C
Chris Mason 已提交
1053
			    sizeof(struct btrfs_item) + ins_len) {
1054
				int sret = split_leaf(trans, root, key,
1055
						      p, ins_len, ret == 0);
C
Chris Mason 已提交
1056 1057 1058 1059
				BUG_ON(sret > 0);
				if (sret)
					return sret;
			}
1060 1061 1062
			return ret;
		}
	}
C
Chris Mason 已提交
1063
	return 1;
1064 1065
}

C
Chris Mason 已提交
1066 1067 1068 1069 1070 1071
/*
 * adjust the pointers going up the tree, starting at level
 * making sure the right key of each node is points to 'key'.
 * This is used after shifting pointers to the left, so it stops
 * fixing up pointers when a given leaf/node is not in slot 0 of the
 * higher levels
C
Chris Mason 已提交
1072 1073 1074
 *
 * If this fails to write a tree block, it returns -1, but continues
 * fixing up the blocks in ram so the tree is consistent.
C
Chris Mason 已提交
1075
 */
1076 1077 1078
static int fixup_low_keys(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct btrfs_path *path,
			  struct btrfs_disk_key *key, int level)
1079 1080
{
	int i;
C
Chris Mason 已提交
1081
	int ret = 0;
1082 1083
	struct extent_buffer *t;

C
Chris Mason 已提交
1084
	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1085
		int tslot = path->slots[i];
1086
		if (!path->nodes[i])
1087
			break;
1088 1089
		t = path->nodes[i];
		btrfs_set_node_key(t, key, tslot);
C
Chris Mason 已提交
1090
		btrfs_mark_buffer_dirty(path->nodes[i]);
1091 1092 1093
		if (tslot != 0)
			break;
	}
C
Chris Mason 已提交
1094
	return ret;
1095 1096
}

C
Chris Mason 已提交
1097 1098
/*
 * try to push data from one node into the next node left in the
1099
 * tree.
C
Chris Mason 已提交
1100 1101 1102
 *
 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
 * error, and > 0 if there was no room in the left hand block.
C
Chris Mason 已提交
1103
 */
1104
static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
1105 1106
			  *root, struct extent_buffer *dst,
			  struct extent_buffer *src)
1107 1108
{
	int push_items = 0;
1109 1110
	int src_nritems;
	int dst_nritems;
C
Chris Mason 已提交
1111
	int ret = 0;
1112

1113 1114
	src_nritems = btrfs_header_nritems(src);
	dst_nritems = btrfs_header_nritems(dst);
C
Chris Mason 已提交
1115
	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1116

1117
	if (push_items <= 0) {
1118
		return 1;
1119
	}
1120

1121
	if (src_nritems < push_items)
1122 1123
		push_items = src_nritems;

1124 1125 1126 1127 1128
	copy_extent_buffer(dst, src,
			   btrfs_node_key_ptr_offset(dst_nritems),
			   btrfs_node_key_ptr_offset(0),
		           push_items * sizeof(struct btrfs_key_ptr));

1129
	if (push_items < src_nritems) {
1130 1131 1132 1133 1134 1135 1136 1137 1138
		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
				      btrfs_node_key_ptr_offset(push_items),
				      (src_nritems - push_items) *
				      sizeof(struct btrfs_key_ptr));
	}
	btrfs_set_header_nritems(src, src_nritems - push_items);
	btrfs_set_header_nritems(dst, dst_nritems + push_items);
	btrfs_mark_buffer_dirty(src);
	btrfs_mark_buffer_dirty(dst);
1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150
	return ret;
}

/*
 * try to push data from one node into the next node right in the
 * tree.
 *
 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
 * error, and > 0 if there was no room in the right hand block.
 *
 * this will  only push up to 1/2 the contents of the left node over
 */
1151 1152 1153 1154
static int balance_node_right(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct extent_buffer *dst,
			      struct extent_buffer *src)
1155 1156 1157 1158 1159 1160 1161
{
	int push_items = 0;
	int max_push;
	int src_nritems;
	int dst_nritems;
	int ret = 0;

1162 1163
	src_nritems = btrfs_header_nritems(src);
	dst_nritems = btrfs_header_nritems(dst);
C
Chris Mason 已提交
1164
	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1165
	if (push_items <= 0)
1166 1167 1168 1169
		return 1;

	max_push = src_nritems / 2 + 1;
	/* don't try to empty the node */
Y
Yan 已提交
1170
	if (max_push >= src_nritems)
1171
		return 1;
Y
Yan 已提交
1172

1173 1174 1175
	if (max_push < push_items)
		push_items = max_push;

1176 1177 1178 1179
	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
				      btrfs_node_key_ptr_offset(0),
				      (dst_nritems) *
				      sizeof(struct btrfs_key_ptr));
C
Chris Mason 已提交
1180

1181 1182 1183 1184
	copy_extent_buffer(dst, src,
			   btrfs_node_key_ptr_offset(0),
			   btrfs_node_key_ptr_offset(src_nritems - push_items),
		           push_items * sizeof(struct btrfs_key_ptr));
1185

1186 1187
	btrfs_set_header_nritems(src, src_nritems - push_items);
	btrfs_set_header_nritems(dst, dst_nritems + push_items);
1188

1189 1190
	btrfs_mark_buffer_dirty(src);
	btrfs_mark_buffer_dirty(dst);
C
Chris Mason 已提交
1191
	return ret;
1192 1193
}

C
Chris Mason 已提交
1194 1195 1196 1197
/*
 * helper function to insert a new root level in the tree.
 * A new node is allocated, and a single item is inserted to
 * point to the existing root
C
Chris Mason 已提交
1198 1199
 *
 * returns zero on success or < 0 on failure.
C
Chris Mason 已提交
1200
 */
1201 1202 1203
static int insert_new_root(struct btrfs_trans_handle *trans,
			   struct btrfs_root *root,
			   struct btrfs_path *path, int level)
C
Chris Mason 已提交
1204
{
1205 1206 1207
	struct extent_buffer *lower;
	struct extent_buffer *c;
	struct btrfs_disk_key lower_key;
C
Chris Mason 已提交
1208 1209 1210 1211

	BUG_ON(path->nodes[level]);
	BUG_ON(path->nodes[level-1] != root->node);

1212 1213
	c = btrfs_alloc_free_block(trans, root, root->nodesize,
				   root->node->start, 0);
1214 1215 1216 1217 1218
	if (IS_ERR(c))
		return PTR_ERR(c);
	memset_extent_buffer(c, 0, 0, root->nodesize);
	btrfs_set_header_nritems(c, 1);
	btrfs_set_header_level(c, level);
1219
	btrfs_set_header_bytenr(c, c->start);
1220 1221 1222 1223 1224 1225 1226 1227 1228
	btrfs_set_header_generation(c, trans->transid);
	btrfs_set_header_owner(c, root->root_key.objectid);
	lower = path->nodes[level-1];

	write_extent_buffer(c, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(c),
			    BTRFS_FSID_SIZE);
	if (level == 1)
		btrfs_item_key(lower, &lower_key, 0);
C
Chris Mason 已提交
1229
	else
1230 1231
		btrfs_node_key(lower, &lower_key, 0);
	btrfs_set_node_key(c, &lower_key, 0);
1232
	btrfs_set_node_blockptr(c, 0, lower->start);
1233 1234
	WARN_ON(btrfs_header_generation(lower) == 0);
	btrfs_set_node_ptr_generation(c, 0, btrfs_header_generation(lower));
1235

1236
	btrfs_mark_buffer_dirty(c);
1237

C
Chris Mason 已提交
1238
	/* the super has an extra ref to root->node */
1239 1240 1241 1242
	free_extent_buffer(root->node);
	root->node = c;
	extent_buffer_get(c);
	path->nodes[level] = c;
C
Chris Mason 已提交
1243 1244 1245 1246
	path->slots[level] = 0;
	return 0;
}

C
Chris Mason 已提交
1247 1248 1249
/*
 * worker function to insert a single pointer in a node.
 * the node should have enough room for the pointer already
C
Chris Mason 已提交
1250
 *
C
Chris Mason 已提交
1251 1252
 * slot and level indicate where you want the key to go, and
 * blocknr is the block the key points to.
C
Chris Mason 已提交
1253 1254
 *
 * returns zero on success and < 0 on any error
C
Chris Mason 已提交
1255
 */
1256 1257
static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, struct btrfs_disk_key
1258
		      *key, u64 bytenr, int slot, int level)
C
Chris Mason 已提交
1259
{
1260
	struct extent_buffer *lower;
C
Chris Mason 已提交
1261
	int nritems;
C
Chris Mason 已提交
1262 1263

	BUG_ON(!path->nodes[level]);
1264 1265
	lower = path->nodes[level];
	nritems = btrfs_header_nritems(lower);
C
Chris Mason 已提交
1266 1267
	if (slot > nritems)
		BUG();
C
Chris Mason 已提交
1268
	if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
C
Chris Mason 已提交
1269 1270
		BUG();
	if (slot != nritems) {
1271 1272 1273
		memmove_extent_buffer(lower,
			      btrfs_node_key_ptr_offset(slot + 1),
			      btrfs_node_key_ptr_offset(slot),
C
Chris Mason 已提交
1274
			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
C
Chris Mason 已提交
1275
	}
1276
	btrfs_set_node_key(lower, key, slot);
1277
	btrfs_set_node_blockptr(lower, slot, bytenr);
1278 1279
	WARN_ON(trans->transid == 0);
	btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1280 1281
	btrfs_set_header_nritems(lower, nritems + 1);
	btrfs_mark_buffer_dirty(lower);
C
Chris Mason 已提交
1282 1283 1284
	return 0;
}

C
Chris Mason 已提交
1285 1286 1287 1288 1289 1290
/*
 * split the node at the specified level in path in two.
 * The path is corrected to point to the appropriate node after the split
 *
 * Before splitting this tries to make some room in the node by pushing
 * left and right, if either one works, it returns right away.
C
Chris Mason 已提交
1291 1292
 *
 * returns 0 on success and < 0 on failure
C
Chris Mason 已提交
1293
 */
1294 1295
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_path *path, int level)
1296
{
1297 1298 1299
	struct extent_buffer *c;
	struct extent_buffer *split;
	struct btrfs_disk_key disk_key;
1300
	int mid;
C
Chris Mason 已提交
1301
	int ret;
C
Chris Mason 已提交
1302
	int wret;
1303
	u32 c_nritems;
1304

1305 1306
	c = path->nodes[level];
	if (c == root->node) {
C
Chris Mason 已提交
1307
		/* trying to split the root, lets make a new one */
1308
		ret = insert_new_root(trans, root, path, level + 1);
C
Chris Mason 已提交
1309 1310
		if (ret)
			return ret;
1311 1312
	} else {
		ret = push_nodes_for_insert(trans, root, path, level);
1313 1314
		c = path->nodes[level];
		if (!ret && btrfs_header_nritems(c) <
1315 1316
		    BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
			return 0;
1317 1318
		if (ret < 0)
			return ret;
1319
	}
1320

1321
	c_nritems = btrfs_header_nritems(c);
1322 1323
	split = btrfs_alloc_free_block(trans, root, root->nodesize,
				       c->start, 0);
1324 1325 1326 1327 1328
	if (IS_ERR(split))
		return PTR_ERR(split);

	btrfs_set_header_flags(split, btrfs_header_flags(c));
	btrfs_set_header_level(split, btrfs_header_level(c));
1329
	btrfs_set_header_bytenr(split, split->start);
1330 1331 1332 1333 1334
	btrfs_set_header_generation(split, trans->transid);
	btrfs_set_header_owner(split, root->root_key.objectid);
	write_extent_buffer(split, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(split),
			    BTRFS_FSID_SIZE);
1335

1336
	mid = (c_nritems + 1) / 2;
1337 1338 1339 1340 1341 1342 1343

	copy_extent_buffer(split, c,
			   btrfs_node_key_ptr_offset(0),
			   btrfs_node_key_ptr_offset(mid),
			   (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
	btrfs_set_header_nritems(split, c_nritems - mid);
	btrfs_set_header_nritems(c, mid);
C
Chris Mason 已提交
1344 1345
	ret = 0;

1346 1347 1348 1349
	btrfs_mark_buffer_dirty(c);
	btrfs_mark_buffer_dirty(split);

	btrfs_node_key(split, &disk_key, 0);
1350
	wret = insert_ptr(trans, root, path, &disk_key, split->start,
1351
			  path->slots[level + 1] + 1,
C
Chris Mason 已提交
1352
			  level + 1);
C
Chris Mason 已提交
1353 1354 1355
	if (wret)
		ret = wret;

C
Chris Mason 已提交
1356
	if (path->slots[level] >= mid) {
C
Chris Mason 已提交
1357
		path->slots[level] -= mid;
1358 1359
		free_extent_buffer(c);
		path->nodes[level] = split;
C
Chris Mason 已提交
1360 1361
		path->slots[level + 1] += 1;
	} else {
1362
		free_extent_buffer(split);
1363
	}
C
Chris Mason 已提交
1364
	return ret;
1365 1366
}

C
Chris Mason 已提交
1367 1368 1369 1370 1371
/*
 * how many bytes are required to store the items in a leaf.  start
 * and nr indicate which items in the leaf to check.  This totals up the
 * space used both by the item structs and the item data
 */
1372
static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1373 1374
{
	int data_len;
1375
	int nritems = btrfs_header_nritems(l);
1376
	int end = min(nritems, start + nr) - 1;
1377 1378 1379

	if (!nr)
		return 0;
1380 1381
	data_len = btrfs_item_end_nr(l, start);
	data_len = data_len - btrfs_item_offset_nr(l, end);
C
Chris Mason 已提交
1382
	data_len += sizeof(struct btrfs_item) * nr;
1383
	WARN_ON(data_len < 0);
1384 1385 1386
	return data_len;
}

1387 1388 1389 1390 1391
/*
 * The space between the end of the leaf items and
 * the start of the leaf data.  IOW, how much room
 * the leaf has left for both items and data
 */
1392
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1393
{
1394 1395 1396 1397 1398
	int nritems = btrfs_header_nritems(leaf);
	int ret;
	ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
	if (ret < 0) {
		printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
J
Jens Axboe 已提交
1399
		       ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1400 1401 1402
		       leaf_space_used(leaf, 0, nritems), nritems);
	}
	return ret;
1403 1404
}

C
Chris Mason 已提交
1405 1406 1407
/*
 * push some data in the path leaf to the right, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
C
Chris Mason 已提交
1408 1409 1410
 *
 * returns 1 if the push failed because the other node didn't have enough
 * room, 0 if everything worked out and < 0 if there were major errors.
C
Chris Mason 已提交
1411
 */
1412
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1413 1414
			   *root, struct btrfs_path *path, int data_size,
			   int empty)
C
Chris Mason 已提交
1415
{
1416 1417 1418 1419
	struct extent_buffer *left = path->nodes[0];
	struct extent_buffer *right;
	struct extent_buffer *upper;
	struct btrfs_disk_key disk_key;
C
Chris Mason 已提交
1420
	int slot;
1421
	u32 i;
C
Chris Mason 已提交
1422 1423 1424
	int free_space;
	int push_space = 0;
	int push_items = 0;
C
Chris Mason 已提交
1425
	struct btrfs_item *item;
1426
	u32 left_nritems;
1427
	u32 nr;
1428
	u32 right_nritems;
1429
	u32 data_end;
1430
	u32 this_item_size;
1431
	int ret;
C
Chris Mason 已提交
1432 1433 1434 1435 1436 1437

	slot = path->slots[1];
	if (!path->nodes[1]) {
		return 1;
	}
	upper = path->nodes[1];
1438
	if (slot >= btrfs_header_nritems(upper) - 1)
C
Chris Mason 已提交
1439
		return 1;
1440

1441 1442
	right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
				root->leafsize);
C
Chris Mason 已提交
1443
	free_space = btrfs_leaf_free_space(root, right);
C
Chris Mason 已提交
1444
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1445
		free_extent_buffer(right);
C
Chris Mason 已提交
1446 1447
		return 1;
	}
1448

C
Chris Mason 已提交
1449
	/* cow and double check */
1450 1451
	ret = btrfs_cow_block(trans, root, right, upper,
			      slot + 1, &right);
1452
	if (ret) {
1453
		free_extent_buffer(right);
1454 1455
		return 1;
	}
C
Chris Mason 已提交
1456
	free_space = btrfs_leaf_free_space(root, right);
C
Chris Mason 已提交
1457
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1458
		free_extent_buffer(right);
C
Chris Mason 已提交
1459 1460 1461
		return 1;
	}

1462
	left_nritems = btrfs_header_nritems(left);
1463
	if (left_nritems == 0) {
1464
		free_extent_buffer(right);
1465 1466
		return 1;
	}
1467

1468 1469 1470 1471 1472 1473 1474
	if (empty)
		nr = 0;
	else
		nr = 1;

	i = left_nritems - 1;
	while (i >= nr) {
1475
		item = btrfs_item_nr(left, i);
1476

C
Chris Mason 已提交
1477 1478
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489

		if (!left->map_token) {
			map_extent_buffer(left, (unsigned long)item,
					sizeof(struct btrfs_item),
					&left->map_token, &left->kaddr,
					&left->map_start, &left->map_len,
					KM_USER1);
		}

		this_item_size = btrfs_item_size(left, item);
		if (this_item_size + sizeof(*item) + push_space > free_space)
C
Chris Mason 已提交
1490 1491
			break;
		push_items++;
1492
		push_space += this_item_size + sizeof(*item);
1493 1494 1495
		if (i == 0)
			break;
		i--;
1496 1497 1498 1499
	}
	if (left->map_token) {
		unmap_extent_buffer(left, left->map_token, KM_USER1);
		left->map_token = NULL;
C
Chris Mason 已提交
1500
	}
1501

C
Chris Mason 已提交
1502
	if (push_items == 0) {
1503
		free_extent_buffer(right);
C
Chris Mason 已提交
1504 1505
		return 1;
	}
1506

1507
	if (!empty && push_items == left_nritems)
1508
		WARN_ON(1);
1509

C
Chris Mason 已提交
1510
	/* push left to right */
1511
	right_nritems = btrfs_header_nritems(right);
1512

1513
	push_space = btrfs_item_end_nr(left, left_nritems - push_items);
C
Chris Mason 已提交
1514
	push_space -= leaf_data_end(root, left);
1515

C
Chris Mason 已提交
1516
	/* make room in the right data area */
1517 1518 1519 1520 1521 1522
	data_end = leaf_data_end(root, right);
	memmove_extent_buffer(right,
			      btrfs_leaf_data(right) + data_end - push_space,
			      btrfs_leaf_data(right) + data_end,
			      BTRFS_LEAF_DATA_SIZE(root) - data_end);

C
Chris Mason 已提交
1523
	/* copy from the left data area */
1524
	copy_extent_buffer(right, left, btrfs_leaf_data(right) +
C
Chris Mason 已提交
1525 1526 1527
		     BTRFS_LEAF_DATA_SIZE(root) - push_space,
		     btrfs_leaf_data(left) + leaf_data_end(root, left),
		     push_space);
1528 1529 1530 1531 1532

	memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
			      btrfs_item_nr_offset(0),
			      right_nritems * sizeof(struct btrfs_item));

C
Chris Mason 已提交
1533
	/* copy the items from left to right */
1534 1535 1536
	copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
		   btrfs_item_nr_offset(left_nritems - push_items),
		   push_items * sizeof(struct btrfs_item));
C
Chris Mason 已提交
1537 1538

	/* update the item pointers */
1539
	right_nritems += push_items;
1540
	btrfs_set_header_nritems(right, right_nritems);
C
Chris Mason 已提交
1541
	push_space = BTRFS_LEAF_DATA_SIZE(root);
1542
	for (i = 0; i < right_nritems; i++) {
1543
		item = btrfs_item_nr(right, i);
1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557
		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}
		push_space -= btrfs_item_size(right, item);
		btrfs_set_item_offset(right, item, push_space);
	}

	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
C
Chris Mason 已提交
1558
	}
1559
	left_nritems -= push_items;
1560
	btrfs_set_header_nritems(left, left_nritems);
C
Chris Mason 已提交
1561

1562 1563
	if (left_nritems)
		btrfs_mark_buffer_dirty(left);
1564
	btrfs_mark_buffer_dirty(right);
1565

1566 1567
	btrfs_item_key(right, &disk_key, 0);
	btrfs_set_node_key(upper, &disk_key, slot + 1);
C
Chris Mason 已提交
1568
	btrfs_mark_buffer_dirty(upper);
C
Chris Mason 已提交
1569

C
Chris Mason 已提交
1570
	/* then fixup the leaf pointer in the path */
1571 1572
	if (path->slots[0] >= left_nritems) {
		path->slots[0] -= left_nritems;
1573 1574
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = right;
C
Chris Mason 已提交
1575 1576
		path->slots[1] += 1;
	} else {
1577
		free_extent_buffer(right);
C
Chris Mason 已提交
1578 1579 1580
	}
	return 0;
}
C
Chris Mason 已提交
1581 1582 1583 1584
/*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
1585
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1586 1587
			  *root, struct btrfs_path *path, int data_size,
			  int empty)
1588
{
1589 1590 1591
	struct btrfs_disk_key disk_key;
	struct extent_buffer *right = path->nodes[0];
	struct extent_buffer *left;
1592 1593 1594 1595 1596
	int slot;
	int i;
	int free_space;
	int push_space = 0;
	int push_items = 0;
C
Chris Mason 已提交
1597
	struct btrfs_item *item;
1598
	u32 old_left_nritems;
1599
	u32 right_nritems;
1600
	u32 nr;
C
Chris Mason 已提交
1601 1602
	int ret = 0;
	int wret;
1603 1604
	u32 this_item_size;
	u32 old_left_item_size;
1605 1606

	slot = path->slots[1];
1607
	if (slot == 0)
1608
		return 1;
1609
	if (!path->nodes[1])
1610
		return 1;
1611

1612 1613 1614 1615 1616
	right_nritems = btrfs_header_nritems(right);
	if (right_nritems == 0) {
		return 1;
	}

1617
	left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1618
			       slot - 1), root->leafsize);
C
Chris Mason 已提交
1619
	free_space = btrfs_leaf_free_space(root, left);
C
Chris Mason 已提交
1620
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1621
		free_extent_buffer(left);
1622 1623
		return 1;
	}
C
Chris Mason 已提交
1624 1625

	/* cow and double check */
1626 1627
	ret = btrfs_cow_block(trans, root, left,
			      path->nodes[1], slot - 1, &left);
1628 1629
	if (ret) {
		/* we hit -ENOSPC, but it isn't fatal here */
1630
		free_extent_buffer(left);
1631 1632
		return 1;
	}
1633

C
Chris Mason 已提交
1634
	free_space = btrfs_leaf_free_space(root, left);
C
Chris Mason 已提交
1635
	if (free_space < data_size + sizeof(struct btrfs_item)) {
1636
		free_extent_buffer(left);
C
Chris Mason 已提交
1637 1638 1639
		return 1;
	}

1640 1641 1642 1643 1644 1645
	if (empty)
		nr = right_nritems;
	else
		nr = right_nritems - 1;

	for (i = 0; i < nr; i++) {
1646
		item = btrfs_item_nr(right, i);
1647 1648 1649 1650 1651 1652 1653 1654
		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

1655 1656
		if (path->slots[0] == i)
			push_space += data_size + sizeof(*item);
1657 1658 1659

		this_item_size = btrfs_item_size(right, item);
		if (this_item_size + sizeof(*item) + push_space > free_space)
1660
			break;
1661

1662
		push_items++;
1663 1664 1665 1666 1667 1668
		push_space += this_item_size + sizeof(*item);
	}

	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
1669
	}
1670

1671
	if (push_items == 0) {
1672
		free_extent_buffer(left);
1673 1674
		return 1;
	}
1675
	if (!empty && push_items == btrfs_header_nritems(right))
1676
		WARN_ON(1);
1677

1678
	/* push data from right to left */
1679 1680 1681 1682 1683
	copy_extent_buffer(left, right,
			   btrfs_item_nr_offset(btrfs_header_nritems(left)),
			   btrfs_item_nr_offset(0),
			   push_items * sizeof(struct btrfs_item));

C
Chris Mason 已提交
1684
	push_space = BTRFS_LEAF_DATA_SIZE(root) -
1685 1686 1687
		     btrfs_item_offset_nr(right, push_items -1);

	copy_extent_buffer(left, right, btrfs_leaf_data(left) +
C
Chris Mason 已提交
1688 1689
		     leaf_data_end(root, left) - push_space,
		     btrfs_leaf_data(right) +
1690
		     btrfs_item_offset_nr(right, push_items - 1),
C
Chris Mason 已提交
1691
		     push_space);
1692
	old_left_nritems = btrfs_header_nritems(left);
1693 1694
	BUG_ON(old_left_nritems < 0);

1695
	old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
C
Chris Mason 已提交
1696
	for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1697
		u32 ioff;
1698

1699
		item = btrfs_item_nr(left, i);
1700 1701 1702 1703 1704 1705 1706 1707
		if (!left->map_token) {
			map_extent_buffer(left, (unsigned long)item,
					sizeof(struct btrfs_item),
					&left->map_token, &left->kaddr,
					&left->map_start, &left->map_len,
					KM_USER1);
		}

1708 1709
		ioff = btrfs_item_offset(left, item);
		btrfs_set_item_offset(left, item,
1710
		      ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1711
	}
1712
	btrfs_set_header_nritems(left, old_left_nritems + push_items);
1713 1714 1715 1716
	if (left->map_token) {
		unmap_extent_buffer(left, left->map_token, KM_USER1);
		left->map_token = NULL;
	}
1717 1718

	/* fixup right node */
1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732
	if (push_items > right_nritems) {
		printk("push items %d nr %u\n", push_items, right_nritems);
		WARN_ON(1);
	}

	if (push_items < right_nritems) {
		push_space = btrfs_item_offset_nr(right, push_items - 1) -
						  leaf_data_end(root, right);
		memmove_extent_buffer(right, btrfs_leaf_data(right) +
				      BTRFS_LEAF_DATA_SIZE(root) - push_space,
				      btrfs_leaf_data(right) +
				      leaf_data_end(root, right), push_space);

		memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1733 1734 1735
			      btrfs_item_nr_offset(push_items),
			     (btrfs_header_nritems(right) - push_items) *
			     sizeof(struct btrfs_item));
1736
	}
1737 1738
	right_nritems -= push_items;
	btrfs_set_header_nritems(right, right_nritems);
C
Chris Mason 已提交
1739
	push_space = BTRFS_LEAF_DATA_SIZE(root);
1740 1741
	for (i = 0; i < right_nritems; i++) {
		item = btrfs_item_nr(right, i);
1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756

		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

		push_space = push_space - btrfs_item_size(right, item);
		btrfs_set_item_offset(right, item, push_space);
	}
	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
1757
	}
1758

1759
	btrfs_mark_buffer_dirty(left);
1760 1761
	if (right_nritems)
		btrfs_mark_buffer_dirty(right);
1762

1763 1764
	btrfs_item_key(right, &disk_key, 0);
	wret = fixup_low_keys(trans, root, path, &disk_key, 1);
C
Chris Mason 已提交
1765 1766
	if (wret)
		ret = wret;
1767 1768 1769 1770

	/* then fixup the leaf pointer in the path */
	if (path->slots[0] < push_items) {
		path->slots[0] += old_left_nritems;
1771 1772
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = left;
1773 1774
		path->slots[1] -= 1;
	} else {
1775
		free_extent_buffer(left);
1776 1777
		path->slots[0] -= push_items;
	}
1778
	BUG_ON(path->slots[0] < 0);
C
Chris Mason 已提交
1779
	return ret;
1780 1781
}

C
Chris Mason 已提交
1782 1783 1784
/*
 * split the path's leaf in two, making sure there is at least data_size
 * available for the resulting leaf level of the path.
C
Chris Mason 已提交
1785 1786
 *
 * returns 0 if all went well and < 0 on failure.
C
Chris Mason 已提交
1787
 */
1788
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1789
		      *root, struct btrfs_key *ins_key,
1790
		      struct btrfs_path *path, int data_size, int extend)
1791
{
1792
	struct extent_buffer *l;
1793
	u32 nritems;
1794 1795
	int mid;
	int slot;
1796
	struct extent_buffer *right;
C
Chris Mason 已提交
1797
	int space_needed = data_size + sizeof(struct btrfs_item);
1798 1799 1800
	int data_copy_size;
	int rt_data_off;
	int i;
1801
	int ret = 0;
C
Chris Mason 已提交
1802
	int wret;
1803 1804
	int double_split;
	int num_doubles = 0;
1805
	struct btrfs_disk_key disk_key;
C
Chris Mason 已提交
1806

1807 1808 1809
	if (extend)
		space_needed = data_size;

C
Chris Mason 已提交
1810
	/* first try to make some room by pushing left and right */
1811
	if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1812
		wret = push_leaf_right(trans, root, path, data_size, 0);
1813
		if (wret < 0) {
C
Chris Mason 已提交
1814
			return wret;
1815 1816
		}
		if (wret) {
1817
			wret = push_leaf_left(trans, root, path, data_size, 0);
1818 1819 1820 1821
			if (wret < 0)
				return wret;
		}
		l = path->nodes[0];
C
Chris Mason 已提交
1822

1823
		/* did the pushes work? */
1824
		if (btrfs_leaf_free_space(root, l) >= space_needed)
1825
			return 0;
1826
	}
C
Chris Mason 已提交
1827

C
Chris Mason 已提交
1828
	if (!path->nodes[1]) {
1829
		ret = insert_new_root(trans, root, path, 1);
C
Chris Mason 已提交
1830 1831 1832
		if (ret)
			return ret;
	}
1833 1834 1835
again:
	double_split = 0;
	l = path->nodes[0];
1836
	slot = path->slots[0];
1837
	nritems = btrfs_header_nritems(l);
1838
	mid = (nritems + 1)/ 2;
1839

1840 1841
	right = btrfs_alloc_free_block(trans, root, root->leafsize,
				       l->start, 0);
1842 1843 1844 1845
	if (IS_ERR(right))
		return PTR_ERR(right);

	memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1846
	btrfs_set_header_bytenr(right, right->start);
1847 1848 1849 1850 1851 1852
	btrfs_set_header_generation(right, trans->transid);
	btrfs_set_header_owner(right, root->root_key.objectid);
	btrfs_set_header_level(right, 0);
	write_extent_buffer(right, root->fs_info->fsid,
			    (unsigned long)btrfs_header_fsid(right),
			    BTRFS_FSID_SIZE);
1853 1854 1855 1856 1857 1858
	if (mid <= slot) {
		if (nritems == 1 ||
		    leaf_space_used(l, mid, nritems - mid) + space_needed >
			BTRFS_LEAF_DATA_SIZE(root)) {
			if (slot >= nritems) {
				btrfs_cpu_key_to_disk(&disk_key, ins_key);
1859
				btrfs_set_header_nritems(right, 0);
1860
				wret = insert_ptr(trans, root, path,
1861
						  &disk_key, right->start,
1862 1863 1864
						  path->slots[1] + 1, 1);
				if (wret)
					ret = wret;
1865 1866
				free_extent_buffer(path->nodes[0]);
				path->nodes[0] = right;
1867 1868 1869 1870 1871
				path->slots[0] = 0;
				path->slots[1] += 1;
				return ret;
			}
			mid = slot;
1872 1873 1874 1875 1876
			if (mid != nritems &&
			    leaf_space_used(l, mid, nritems - mid) +
			    space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
				double_split = 1;
			}
1877 1878 1879 1880
		}
	} else {
		if (leaf_space_used(l, 0, mid + 1) + space_needed >
			BTRFS_LEAF_DATA_SIZE(root)) {
1881
			if (!extend && slot == 0) {
1882
				btrfs_cpu_key_to_disk(&disk_key, ins_key);
1883
				btrfs_set_header_nritems(right, 0);
1884 1885
				wret = insert_ptr(trans, root, path,
						  &disk_key,
1886
						  right->start,
1887
						  path->slots[1], 1);
1888 1889
				if (wret)
					ret = wret;
1890 1891
				free_extent_buffer(path->nodes[0]);
				path->nodes[0] = right;
1892
				path->slots[0] = 0;
1893 1894 1895 1896 1897 1898
				if (path->slots[1] == 0) {
					wret = fixup_low_keys(trans, root,
					           path, &disk_key, 1);
					if (wret)
						ret = wret;
				}
1899
				return ret;
1900 1901 1902 1903 1904 1905 1906 1907 1908
			} else if (extend && slot == 0) {
				mid = 1;
			} else {
				mid = slot;
				if (mid != nritems &&
				    leaf_space_used(l, mid, nritems - mid) +
				    space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
					double_split = 1;
				}
1909
			}
1910 1911
		}
	}
1912 1913 1914 1915 1916 1917 1918 1919 1920
	nritems = nritems - mid;
	btrfs_set_header_nritems(right, nritems);
	data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);

	copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
			   btrfs_item_nr_offset(mid),
			   nritems * sizeof(struct btrfs_item));

	copy_extent_buffer(right, l,
C
Chris Mason 已提交
1921 1922 1923
		     btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
		     data_copy_size, btrfs_leaf_data(l) +
		     leaf_data_end(root, l), data_copy_size);
1924

C
Chris Mason 已提交
1925
	rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1926
		      btrfs_item_end_nr(l, mid);
C
Chris Mason 已提交
1927

1928 1929
	for (i = 0; i < nritems; i++) {
		struct btrfs_item *item = btrfs_item_nr(right, i);
1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940
		u32 ioff;

		if (!right->map_token) {
			map_extent_buffer(right, (unsigned long)item,
					sizeof(struct btrfs_item),
					&right->map_token, &right->kaddr,
					&right->map_start, &right->map_len,
					KM_USER1);
		}

		ioff = btrfs_item_offset(right, item);
1941
		btrfs_set_item_offset(right, item, ioff + rt_data_off);
C
Chris Mason 已提交
1942
	}
C
Chris Mason 已提交
1943

1944 1945 1946 1947 1948
	if (right->map_token) {
		unmap_extent_buffer(right, right->map_token, KM_USER1);
		right->map_token = NULL;
	}

1949
	btrfs_set_header_nritems(l, mid);
C
Chris Mason 已提交
1950
	ret = 0;
1951
	btrfs_item_key(right, &disk_key, 0);
1952 1953
	wret = insert_ptr(trans, root, path, &disk_key, right->start,
			  path->slots[1] + 1, 1);
C
Chris Mason 已提交
1954 1955
	if (wret)
		ret = wret;
1956 1957 1958

	btrfs_mark_buffer_dirty(right);
	btrfs_mark_buffer_dirty(l);
1959
	BUG_ON(path->slots[0] != slot);
1960

1961
	if (mid <= slot) {
1962 1963
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = right;
1964 1965
		path->slots[0] -= mid;
		path->slots[1] += 1;
1966
	} else
1967 1968
		free_extent_buffer(right);

1969
	BUG_ON(path->slots[0] < 0);
1970

1971 1972 1973 1974
	if (double_split) {
		BUG_ON(num_doubles != 0);
		num_doubles++;
		goto again;
1975
	}
1976 1977 1978
	return ret;
}

C
Chris Mason 已提交
1979 1980 1981
int btrfs_truncate_item(struct btrfs_trans_handle *trans,
			struct btrfs_root *root,
			struct btrfs_path *path,
1982
			u32 new_size, int from_end)
C
Chris Mason 已提交
1983 1984 1985 1986
{
	int ret = 0;
	int slot;
	int slot_orig;
1987 1988
	struct extent_buffer *leaf;
	struct btrfs_item *item;
C
Chris Mason 已提交
1989 1990 1991 1992 1993 1994 1995 1996
	u32 nritems;
	unsigned int data_end;
	unsigned int old_data_start;
	unsigned int old_size;
	unsigned int size_diff;
	int i;

	slot_orig = path->slots[0];
1997
	leaf = path->nodes[0];
1998 1999 2000 2001 2002
	slot = path->slots[0];

	old_size = btrfs_item_size_nr(leaf, slot);
	if (old_size == new_size)
		return 0;
C
Chris Mason 已提交
2003

2004
	nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
2005 2006
	data_end = leaf_data_end(root, leaf);

2007
	old_data_start = btrfs_item_offset_nr(leaf, slot);
2008

C
Chris Mason 已提交
2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
	size_diff = old_size - new_size;

	BUG_ON(slot < 0);
	BUG_ON(slot >= nritems);

	/*
	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
	 */
	/* first correct the data pointers */
	for (i = slot; i < nritems; i++) {
2019 2020
		u32 ioff;
		item = btrfs_item_nr(leaf, i);
2021 2022 2023 2024 2025 2026 2027 2028 2029

		if (!leaf->map_token) {
			map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
		}

2030 2031
		ioff = btrfs_item_offset(leaf, item);
		btrfs_set_item_offset(leaf, item, ioff + size_diff);
C
Chris Mason 已提交
2032
	}
2033 2034 2035 2036 2037 2038

	if (leaf->map_token) {
		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
		leaf->map_token = NULL;
	}

C
Chris Mason 已提交
2039
	/* shift the data */
2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078
	if (from_end) {
		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
			      data_end + size_diff, btrfs_leaf_data(leaf) +
			      data_end, old_data_start + new_size - data_end);
	} else {
		struct btrfs_disk_key disk_key;
		u64 offset;

		btrfs_item_key(leaf, &disk_key, slot);

		if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
			unsigned long ptr;
			struct btrfs_file_extent_item *fi;

			fi = btrfs_item_ptr(leaf, slot,
					    struct btrfs_file_extent_item);
			fi = (struct btrfs_file_extent_item *)(
			     (unsigned long)fi - size_diff);

			if (btrfs_file_extent_type(leaf, fi) ==
			    BTRFS_FILE_EXTENT_INLINE) {
				ptr = btrfs_item_ptr_offset(leaf, slot);
				memmove_extent_buffer(leaf, ptr,
				        (unsigned long)fi,
				        offsetof(struct btrfs_file_extent_item,
						 disk_bytenr));
			}
		}

		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
			      data_end + size_diff, btrfs_leaf_data(leaf) +
			      data_end, old_data_start - data_end);

		offset = btrfs_disk_key_offset(&disk_key);
		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
		btrfs_set_item_key(leaf, &disk_key, slot);
		if (slot == 0)
			fixup_low_keys(trans, root, path, &disk_key, 1);
	}
2079 2080 2081 2082

	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_size(leaf, item, new_size);
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
2083 2084

	ret = 0;
2085 2086
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
C
Chris Mason 已提交
2087
		BUG();
2088
	}
C
Chris Mason 已提交
2089 2090 2091
	return ret;
}

2092 2093 2094
int btrfs_extend_item(struct btrfs_trans_handle *trans,
		      struct btrfs_root *root, struct btrfs_path *path,
		      u32 data_size)
2095 2096 2097 2098
{
	int ret = 0;
	int slot;
	int slot_orig;
2099 2100
	struct extent_buffer *leaf;
	struct btrfs_item *item;
2101 2102 2103 2104 2105 2106 2107
	u32 nritems;
	unsigned int data_end;
	unsigned int old_data;
	unsigned int old_size;
	int i;

	slot_orig = path->slots[0];
2108
	leaf = path->nodes[0];
2109

2110
	nritems = btrfs_header_nritems(leaf);
2111 2112
	data_end = leaf_data_end(root, leaf);

2113 2114
	if (btrfs_leaf_free_space(root, leaf) < data_size) {
		btrfs_print_leaf(root, leaf);
2115
		BUG();
2116
	}
2117
	slot = path->slots[0];
2118
	old_data = btrfs_item_end_nr(leaf, slot);
2119 2120

	BUG_ON(slot < 0);
2121 2122 2123 2124 2125
	if (slot >= nritems) {
		btrfs_print_leaf(root, leaf);
		printk("slot %d too large, nritems %d\n", slot, nritems);
		BUG_ON(1);
	}
2126 2127 2128 2129 2130 2131

	/*
	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
	 */
	/* first correct the data pointers */
	for (i = slot; i < nritems; i++) {
2132 2133
		u32 ioff;
		item = btrfs_item_nr(leaf, i);
2134 2135 2136 2137 2138 2139 2140 2141

		if (!leaf->map_token) {
			map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
		}
2142 2143
		ioff = btrfs_item_offset(leaf, item);
		btrfs_set_item_offset(leaf, item, ioff - data_size);
2144
	}
2145

2146 2147 2148 2149 2150
	if (leaf->map_token) {
		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
		leaf->map_token = NULL;
	}

2151
	/* shift the data */
2152
	memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2153 2154
		      data_end - data_size, btrfs_leaf_data(leaf) +
		      data_end, old_data - data_end);
2155

2156
	data_end = old_data;
2157 2158 2159 2160
	old_size = btrfs_item_size_nr(leaf, slot);
	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_size(leaf, item, old_size + data_size);
	btrfs_mark_buffer_dirty(leaf);
2161 2162

	ret = 0;
2163 2164
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
2165
		BUG();
2166
	}
2167 2168 2169
	return ret;
}

C
Chris Mason 已提交
2170 2171 2172 2173
/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
2174 2175 2176 2177
int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root,
			    struct btrfs_path *path,
			    struct btrfs_key *cpu_key, u32 data_size)
2178
{
2179 2180
	struct extent_buffer *leaf;
	struct btrfs_item *item;
C
Chris Mason 已提交
2181
	int ret = 0;
2182
	int slot;
2183
	int slot_orig;
2184
	u32 nritems;
2185
	unsigned int data_end;
C
Chris Mason 已提交
2186 2187 2188
	struct btrfs_disk_key disk_key;

	btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2189

C
Chris Mason 已提交
2190
	/* create a root if there isn't one */
C
Chris Mason 已提交
2191
	if (!root->node)
C
Chris Mason 已提交
2192
		BUG();
2193

2194
	ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2195
	if (ret == 0) {
2196
		return -EEXIST;
C
Chris Mason 已提交
2197
	}
2198 2199
	if (ret < 0)
		goto out;
2200

2201
	slot_orig = path->slots[0];
2202
	leaf = path->nodes[0];
C
Chris Mason 已提交
2203

2204
	nritems = btrfs_header_nritems(leaf);
C
Chris Mason 已提交
2205
	data_end = leaf_data_end(root, leaf);
2206

C
Chris Mason 已提交
2207
	if (btrfs_leaf_free_space(root, leaf) <
2208
	    sizeof(struct btrfs_item) + data_size) {
2209 2210 2211
		btrfs_print_leaf(root, leaf);
		printk("not enough freespace need %u have %d\n",
		       data_size, btrfs_leaf_free_space(root, leaf));
2212
		BUG();
2213
	}
2214

2215
	slot = path->slots[0];
2216
	BUG_ON(slot < 0);
2217

2218 2219
	if (slot != nritems) {
		int i;
2220
		unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2221

2222 2223 2224 2225 2226 2227
		if (old_data < data_end) {
			btrfs_print_leaf(root, leaf);
			printk("slot %d old_data %d data_end %d\n",
			       slot, old_data, data_end);
			BUG_ON(1);
		}
2228 2229 2230 2231
		/*
		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
		 */
		/* first correct the data pointers */
2232
		WARN_ON(leaf->map_token);
C
Chris Mason 已提交
2233
		for (i = slot; i < nritems; i++) {
2234
			u32 ioff;
2235

2236
			item = btrfs_item_nr(leaf, i);
2237 2238 2239 2240 2241 2242 2243 2244
			if (!leaf->map_token) {
				map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
			}

2245 2246
			ioff = btrfs_item_offset(leaf, item);
			btrfs_set_item_offset(leaf, item, ioff - data_size);
C
Chris Mason 已提交
2247
		}
2248 2249 2250 2251
		if (leaf->map_token) {
			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
			leaf->map_token = NULL;
		}
2252 2253

		/* shift the items */
2254 2255
		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
			      btrfs_item_nr_offset(slot),
C
Chris Mason 已提交
2256
			      (nritems - slot) * sizeof(struct btrfs_item));
2257 2258

		/* shift the data */
2259
		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
C
Chris Mason 已提交
2260 2261
			      data_end - data_size, btrfs_leaf_data(leaf) +
			      data_end, old_data - data_end);
2262 2263
		data_end = old_data;
	}
2264

2265
	/* setup the item for the new data */
2266 2267 2268 2269 2270 2271
	btrfs_set_item_key(leaf, &disk_key, slot);
	item = btrfs_item_nr(leaf, slot);
	btrfs_set_item_offset(leaf, item, data_end - data_size);
	btrfs_set_item_size(leaf, item, data_size);
	btrfs_set_header_nritems(leaf, nritems + 1);
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
2272 2273

	ret = 0;
2274
	if (slot == 0)
2275
		ret = fixup_low_keys(trans, root, path, &disk_key, 1);
C
Chris Mason 已提交
2276

2277 2278
	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
2279
		BUG();
2280
	}
2281
out:
2282 2283 2284 2285 2286 2287 2288
	return ret;
}

/*
 * Given a key and some data, insert an item into the tree.
 * This does all the path init required, making room in the tree if needed.
 */
2289 2290 2291
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, struct btrfs_key *cpu_key, void *data, u32
		      data_size)
2292 2293
{
	int ret = 0;
C
Chris Mason 已提交
2294
	struct btrfs_path *path;
2295 2296
	struct extent_buffer *leaf;
	unsigned long ptr;
2297

C
Chris Mason 已提交
2298 2299 2300
	path = btrfs_alloc_path();
	BUG_ON(!path);
	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2301
	if (!ret) {
2302 2303 2304 2305
		leaf = path->nodes[0];
		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
		write_extent_buffer(leaf, data, ptr, data_size);
		btrfs_mark_buffer_dirty(leaf);
2306
	}
C
Chris Mason 已提交
2307
	btrfs_free_path(path);
C
Chris Mason 已提交
2308
	return ret;
2309 2310
}

C
Chris Mason 已提交
2311
/*
C
Chris Mason 已提交
2312
 * delete the pointer from a given node.
C
Chris Mason 已提交
2313 2314 2315 2316 2317
 *
 * If the delete empties a node, the node is removed from the tree,
 * continuing all the way the root if required.  The root is converted into
 * a leaf if all the nodes are emptied.
 */
2318 2319
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path, int level, int slot)
2320
{
2321
	struct extent_buffer *parent = path->nodes[level];
2322
	u32 nritems;
C
Chris Mason 已提交
2323
	int ret = 0;
2324
	int wret;
2325

2326
	nritems = btrfs_header_nritems(parent);
2327
	if (slot != nritems -1) {
2328 2329 2330
		memmove_extent_buffer(parent,
			      btrfs_node_key_ptr_offset(slot),
			      btrfs_node_key_ptr_offset(slot + 1),
C
Chris Mason 已提交
2331 2332
			      sizeof(struct btrfs_key_ptr) *
			      (nritems - slot - 1));
2333
	}
2334
	nritems--;
2335
	btrfs_set_header_nritems(parent, nritems);
2336
	if (nritems == 0 && parent == root->node) {
2337
		BUG_ON(btrfs_header_level(root->node) != 1);
2338
		/* just turn the root into a leaf and break */
2339
		btrfs_set_header_level(root->node, 0);
2340
	} else if (slot == 0) {
2341 2342 2343 2344
		struct btrfs_disk_key disk_key;

		btrfs_node_key(parent, &disk_key, 0);
		wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
C
Chris Mason 已提交
2345 2346
		if (wret)
			ret = wret;
2347
	}
C
Chris Mason 已提交
2348
	btrfs_mark_buffer_dirty(parent);
C
Chris Mason 已提交
2349
	return ret;
2350 2351
}

C
Chris Mason 已提交
2352 2353 2354 2355
/*
 * delete the item at the leaf level in path.  If that empties
 * the leaf, remove it from the tree
 */
2356 2357
int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path)
2358 2359
{
	int slot;
2360 2361
	struct extent_buffer *leaf;
	struct btrfs_item *item;
2362 2363
	int doff;
	int dsize;
C
Chris Mason 已提交
2364 2365
	int ret = 0;
	int wret;
2366
	u32 nritems;
2367

2368
	leaf = path->nodes[0];
2369
	slot = path->slots[0];
2370 2371 2372
	doff = btrfs_item_offset_nr(leaf, slot);
	dsize = btrfs_item_size_nr(leaf, slot);
	nritems = btrfs_header_nritems(leaf);
2373

2374
	if (slot != nritems - 1) {
2375
		int i;
C
Chris Mason 已提交
2376
		int data_end = leaf_data_end(root, leaf);
2377 2378

		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
C
Chris Mason 已提交
2379 2380 2381
			      data_end + dsize,
			      btrfs_leaf_data(leaf) + data_end,
			      doff - data_end);
2382

C
Chris Mason 已提交
2383
		for (i = slot + 1; i < nritems; i++) {
2384
			u32 ioff;
2385

2386
			item = btrfs_item_nr(leaf, i);
2387 2388 2389 2390 2391 2392 2393
			if (!leaf->map_token) {
				map_extent_buffer(leaf, (unsigned long)item,
					sizeof(struct btrfs_item),
					&leaf->map_token, &leaf->kaddr,
					&leaf->map_start, &leaf->map_len,
					KM_USER1);
			}
2394 2395
			ioff = btrfs_item_offset(leaf, item);
			btrfs_set_item_offset(leaf, item, ioff + dsize);
C
Chris Mason 已提交
2396
		}
2397 2398 2399 2400 2401 2402

		if (leaf->map_token) {
			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
			leaf->map_token = NULL;
		}

2403 2404
		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
			      btrfs_item_nr_offset(slot + 1),
C
Chris Mason 已提交
2405 2406
			      sizeof(struct btrfs_item) *
			      (nritems - slot - 1));
2407
	}
2408
	btrfs_set_header_nritems(leaf, nritems - 1);
2409
	nritems--;
2410

C
Chris Mason 已提交
2411
	/* delete the leaf if we've emptied it */
2412
	if (nritems == 0) {
2413 2414
		if (leaf == root->node) {
			btrfs_set_header_level(leaf, 0);
2415
		} else {
2416 2417
			clean_tree_block(trans, root, leaf);
			wait_on_tree_block_writeback(root, leaf);
2418
			wret = del_ptr(trans, root, path, 1, path->slots[1]);
C
Chris Mason 已提交
2419 2420
			if (wret)
				ret = wret;
2421
			wret = btrfs_free_extent(trans, root,
2422
						 leaf->start, leaf->len, 1);
C
Chris Mason 已提交
2423 2424
			if (wret)
				ret = wret;
2425
		}
2426
	} else {
2427
		int used = leaf_space_used(leaf, 0, nritems);
C
Chris Mason 已提交
2428
		if (slot == 0) {
2429 2430 2431
			struct btrfs_disk_key disk_key;

			btrfs_item_key(leaf, &disk_key, 0);
2432
			wret = fixup_low_keys(trans, root, path,
2433
					      &disk_key, 1);
C
Chris Mason 已提交
2434 2435 2436 2437
			if (wret)
				ret = wret;
		}

C
Chris Mason 已提交
2438
		/* delete the leaf if it is mostly empty */
2439
		if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2440 2441 2442 2443
			/* push_leaf_left fixes the path.
			 * make sure the path still points to our leaf
			 * for possible call to del_ptr below
			 */
2444
			slot = path->slots[1];
2445 2446
			extent_buffer_get(leaf);

2447
			wret = push_leaf_right(trans, root, path, 1, 1);
2448
			if (wret < 0 && wret != -ENOSPC)
C
Chris Mason 已提交
2449
				ret = wret;
2450 2451 2452

			if (path->nodes[0] == leaf &&
			    btrfs_header_nritems(leaf)) {
2453
				wret = push_leaf_left(trans, root, path, 1, 1);
2454
				if (wret < 0 && wret != -ENOSPC)
C
Chris Mason 已提交
2455 2456
					ret = wret;
			}
2457 2458

			if (btrfs_header_nritems(leaf) == 0) {
2459 2460
				u64 bytenr = leaf->start;
				u32 blocksize = leaf->len;
2461 2462 2463 2464

				clean_tree_block(trans, root, leaf);
				wait_on_tree_block_writeback(root, leaf);

2465
				wret = del_ptr(trans, root, path, 1, slot);
C
Chris Mason 已提交
2466 2467
				if (wret)
					ret = wret;
2468 2469

				free_extent_buffer(leaf);
2470 2471
				wret = btrfs_free_extent(trans, root, bytenr,
							 blocksize, 1);
C
Chris Mason 已提交
2472 2473
				if (wret)
					ret = wret;
C
Chris Mason 已提交
2474
			} else {
2475 2476
				btrfs_mark_buffer_dirty(leaf);
				free_extent_buffer(leaf);
2477
			}
2478
		} else {
2479
			btrfs_mark_buffer_dirty(leaf);
2480 2481
		}
	}
C
Chris Mason 已提交
2482
	return ret;
2483 2484
}

C
Chris Mason 已提交
2485 2486
/*
 * walk up the tree as far as required to find the next leaf.
C
Chris Mason 已提交
2487 2488
 * returns 0 if it found something or 1 if there are no greater leaves.
 * returns < 0 on io errors.
C
Chris Mason 已提交
2489
 */
C
Chris Mason 已提交
2490
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2491 2492 2493
{
	int slot;
	int level = 1;
2494
	u64 bytenr;
2495 2496
	struct extent_buffer *c;
	struct extent_buffer *next = NULL;
2497

C
Chris Mason 已提交
2498
	while(level < BTRFS_MAX_LEVEL) {
2499
		if (!path->nodes[level])
C
Chris Mason 已提交
2500
			return 1;
2501

2502 2503
		slot = path->slots[level] + 1;
		c = path->nodes[level];
2504
		if (slot >= btrfs_header_nritems(c)) {
2505 2506 2507
			level++;
			continue;
		}
2508

2509
		bytenr = btrfs_node_blockptr(c, slot);
C
Chris Mason 已提交
2510
		if (next)
2511 2512
			free_extent_buffer(next);

2513 2514
		if (path->reada)
			reada_for_search(root, path, level, slot);
2515

2516 2517
		next = read_tree_block(root, bytenr,
				       btrfs_level_size(root, level -1));
2518 2519 2520 2521 2522 2523
		break;
	}
	path->slots[level] = slot;
	while(1) {
		level--;
		c = path->nodes[level];
2524
		free_extent_buffer(c);
2525 2526 2527 2528
		path->nodes[level] = next;
		path->slots[level] = 0;
		if (!level)
			break;
2529
		if (path->reada)
Y
Yan 已提交
2530
			reada_for_search(root, path, level, 0);
2531 2532
		next = read_tree_block(root, btrfs_node_blockptr(next, 0),
				       btrfs_level_size(root, level - 1));
2533 2534 2535
	}
	return 0;
}