extent-tree.c 68.0 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Z
Zach Brown 已提交
19
#include <linux/sched.h>
20
#include <linux/crc32c.h>
21
#include <linux/pagemap.h>
22
#include "hash.h"
23 24 25
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
26
#include "transaction.h"
27

28 29 30 31
#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
#define BLOCK_GROUP_DIRTY EXTENT_DIRTY

32 33
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root);
C
Chris Mason 已提交
34 35
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root);
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
static int find_previous_extent(struct btrfs_root *root,
				struct btrfs_path *path)
{
	struct btrfs_key found_key;
	struct extent_buffer *leaf;
	int ret;

	while(1) {
		if (path->slots[0] == 0) {
			ret = btrfs_prev_leaf(root, path);
			if (ret != 0)
				return ret;
		} else {
			path->slots[0]--;
		}
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
		if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
			return 0;
	}
	return 1;
}
58

59 60 61 62 63 64
static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
	struct btrfs_path *path;
	int ret;
	struct btrfs_key key;
65
	struct extent_buffer *leaf;
66
	struct extent_map_tree *free_space_cache;
67 68 69
	int slot;
	u64 last = 0;
	u64 hole_size;
70
	u64 first_free;
71 72
	int found = 0;

73 74 75
	if (!block_group)
		return 0;

76
	root = root->fs_info->extent_root;
77
	free_space_cache = &root->fs_info->free_space_cache;
78 79 80

	if (block_group->cached)
		return 0;
81

82 83 84
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
85

86
	path->reada = 2;
87
	first_free = block_group->key.objectid;
88 89 90 91 92 93
	key.objectid = block_group->key.objectid;
	key.offset = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		return ret;
94 95 96 97 98 99 100 101 102
	ret = find_previous_extent(root, path);
	if (ret < 0)
		return ret;
	if (ret == 0) {
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		if (key.objectid + key.offset > first_free)
			first_free = key.objectid + key.offset;
	}
103
	while(1) {
104
		leaf = path->nodes[0];
105
		slot = path->slots[0];
106
		if (slot >= btrfs_header_nritems(leaf)) {
107
			ret = btrfs_next_leaf(root, path);
108 109
			if (ret < 0)
				goto err;
110
			if (ret == 0) {
111
				continue;
112
			} else {
113 114 115
				break;
			}
		}
116
		btrfs_item_key_to_cpu(leaf, &key, slot);
117 118 119
		if (key.objectid < block_group->key.objectid) {
			goto next;
		}
120 121 122 123
		if (key.objectid >= block_group->key.objectid +
		    block_group->key.offset) {
			break;
		}
124

125 126
		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			if (!found) {
127
				last = first_free;
128 129
				found = 1;
			}
130 131 132 133 134
			if (key.objectid > last) {
				hole_size = key.objectid - last;
				set_extent_dirty(free_space_cache, last,
						 last + hole_size - 1,
						 GFP_NOFS);
135 136
			}
			last = key.objectid + key.offset;
137
		}
138
next:
139 140 141
		path->slots[0]++;
	}

142 143 144 145 146 147
	if (!found)
		last = first_free;
	if (block_group->key.objectid +
	    block_group->key.offset > last) {
		hole_size = block_group->key.objectid +
			block_group->key.offset - last;
148 149
		set_extent_dirty(free_space_cache, last,
				 last + hole_size - 1, GFP_NOFS);
150
	}
151
	block_group->cached = 1;
152
err:
153 154 155 156
	btrfs_free_path(path);
	return 0;
}

157 158
struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
							 btrfs_fs_info *info,
159
							 u64 bytenr)
C
Chris Mason 已提交
160
{
161 162 163 164 165
	struct extent_map_tree *block_group_cache;
	struct btrfs_block_group_cache *block_group = NULL;
	u64 ptr;
	u64 start;
	u64 end;
C
Chris Mason 已提交
166 167
	int ret;

168 169
	block_group_cache = &info->block_group_cache;
	ret = find_first_extent_bit(block_group_cache,
170
				    bytenr, &start, &end,
171
				    BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
C
Chris Mason 已提交
172
	if (ret) {
173
		return NULL;
C
Chris Mason 已提交
174
	}
175 176 177 178
	ret = get_state_private(block_group_cache, start, &ptr);
	if (ret)
		return NULL;

J
Jens Axboe 已提交
179
	block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
Y
Yan 已提交
180
	if (block_group->key.objectid <= bytenr && bytenr <
181 182
	    block_group->key.objectid + block_group->key.offset)
		return block_group;
C
Chris Mason 已提交
183 184
	return NULL;
}
185 186
static u64 noinline find_search_start(struct btrfs_root *root,
			      struct btrfs_block_group_cache **cache_ret,
187
			      u64 search_start, int num, int data)
188 189 190
{
	int ret;
	struct btrfs_block_group_cache *cache = *cache_ret;
191
	u64 last;
192 193
	u64 start = 0;
	u64 end = 0;
194
	u64 cache_miss = 0;
195
	int wrapped = 0;
196

197
	if (!cache) {
198
		goto out;
199
	}
200
again:
201 202 203
	ret = cache_block_group(root, cache);
	if (ret)
		goto out;
204

205 206
	last = max(search_start, cache->key.objectid);

207
	while(1) {
208 209
		ret = find_first_extent_bit(&root->fs_info->free_space_cache,
					    last, &start, &end, EXTENT_DIRTY);
210
		if (ret) {
211 212
			if (!cache_miss)
				cache_miss = last;
213 214
			goto new_group;
		}
215 216 217

		start = max(last, start);
		last = end + 1;
218 219 220
		if (last - start < num) {
			if (last == cache->key.objectid + cache->key.offset)
				cache_miss = start;
221
			continue;
222 223
		}
		if (data != BTRFS_BLOCK_GROUP_MIXED &&
Y
Yan 已提交
224
		    start + num > cache->key.objectid + cache->key.offset)
225
			goto new_group;
226
		return start;
227 228
	}
out:
229 230 231 232 233 234 235
	cache = btrfs_lookup_block_group(root->fs_info, search_start);
	if (!cache) {
		printk("Unable to find block group for %Lu\n",
		       search_start);
		WARN_ON(1);
		return search_start;
	}
236
	return search_start;
237 238

new_group:
239
	last = cache->key.objectid + cache->key.offset;
240
wrapped:
241
	cache = btrfs_lookup_block_group(root->fs_info, last);
242
	if (!cache) {
243
no_cache:
244 245 246 247 248 249
		if (!wrapped) {
			wrapped = 1;
			last = search_start;
			data = BTRFS_BLOCK_GROUP_MIXED;
			goto wrapped;
		}
250
		goto out;
251
	}
252 253 254 255 256
	if (cache_miss && !cache->cached) {
		cache_block_group(root, cache);
		last = cache_miss;
		cache = btrfs_lookup_block_group(root->fs_info, last);
	}
257
	cache = btrfs_find_block_group(root, cache, last, data, 0);
258 259
	if (!cache)
		goto no_cache;
260
	*cache_ret = cache;
261
	cache_miss = 0;
262 263 264
	goto again;
}

C
Chris Mason 已提交
265 266
static u64 div_factor(u64 num, int factor)
{
267 268
	if (factor == 10)
		return num;
C
Chris Mason 已提交
269 270 271 272 273
	num *= factor;
	do_div(num, 10);
	return num;
}

274 275
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
C
Chris Mason 已提交
276
						 *hint, u64 search_start,
277
						 int data, int owner)
C
Chris Mason 已提交
278
{
279 280
	struct btrfs_block_group_cache *cache;
	struct extent_map_tree *block_group_cache;
281
	struct btrfs_block_group_cache *found_group = NULL;
C
Chris Mason 已提交
282 283
	struct btrfs_fs_info *info = root->fs_info;
	u64 used;
284 285
	u64 last = 0;
	u64 hint_last;
286 287 288 289 290
	u64 start;
	u64 end;
	u64 free_check;
	u64 ptr;
	int bit;
C
Chris Mason 已提交
291
	int ret;
292
	int full_search = 0;
293
	int factor = 8;
C
Chris Mason 已提交
294
	int data_swap = 0;
295

296 297
	block_group_cache = &info->block_group_cache;

298
	if (!owner)
299
		factor = 8;
C
Chris Mason 已提交
300

301
	if (data == BTRFS_BLOCK_GROUP_MIXED) {
302
		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
303 304
		factor = 10;
	} else if (data)
305 306 307
		bit = BLOCK_GROUP_DATA;
	else
		bit = BLOCK_GROUP_METADATA;
C
Chris Mason 已提交
308 309 310

	if (search_start) {
		struct btrfs_block_group_cache *shint;
311
		shint = btrfs_lookup_block_group(info, search_start);
312 313
		if (shint && (shint->data == data ||
			      shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
C
Chris Mason 已提交
314
			used = btrfs_block_group_used(&shint->item);
315 316
			if (used + shint->pinned <
			    div_factor(shint->key.offset, factor)) {
C
Chris Mason 已提交
317 318 319 320
				return shint;
			}
		}
	}
321 322
	if (hint && (hint->data == data ||
		     hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
323
		used = btrfs_block_group_used(&hint->item);
324 325
		if (used + hint->pinned <
		    div_factor(hint->key.offset, factor)) {
326 327
			return hint;
		}
328
		last = hint->key.objectid + hint->key.offset;
329 330
		hint_last = last;
	} else {
331 332 333 334 335 336
		if (hint)
			hint_last = max(hint->key.objectid, search_start);
		else
			hint_last = search_start;

		last = hint_last;
337 338
	}
again:
C
Chris Mason 已提交
339
	while(1) {
340 341 342
		ret = find_first_extent_bit(block_group_cache, last,
					    &start, &end, bit);
		if (ret)
C
Chris Mason 已提交
343
			break;
344 345 346 347 348

		ret = get_state_private(block_group_cache, start, &ptr);
		if (ret)
			break;

J
Jens Axboe 已提交
349
		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
350 351 352 353 354 355 356
		last = cache->key.objectid + cache->key.offset;
		used = btrfs_block_group_used(&cache->item);

		if (full_search)
			free_check = cache->key.offset;
		else
			free_check = div_factor(cache->key.offset, factor);
357
		if (used + cache->pinned < free_check) {
358 359
			found_group = cache;
			goto found;
C
Chris Mason 已提交
360
		}
361
		cond_resched();
C
Chris Mason 已提交
362
	}
363
	if (!full_search) {
C
Chris Mason 已提交
364
		last = search_start;
365 366 367
		full_search = 1;
		goto again;
	}
C
Chris Mason 已提交
368 369
	if (!data_swap) {
		data_swap = 1;
370
		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
C
Chris Mason 已提交
371 372 373
		last = search_start;
		goto again;
	}
C
Chris Mason 已提交
374
found:
375
	return found_group;
C
Chris Mason 已提交
376 377
}

378
static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
379 380 381 382 383 384 385 386
			   u64 owner, u64 owner_offset)
{
	u32 high_crc = ~(u32)0;
	u32 low_crc = ~(u32)0;
	__le64 lenum;

	lenum = cpu_to_le64(root_objectid);
	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
387 388
	lenum = cpu_to_le64(ref_generation);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
389

390
#if 0
391 392 393 394
	lenum = cpu_to_le64(owner);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
	lenum = cpu_to_le64(owner_offset);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
395
#endif
396 397 398
	return ((u64)high_crc << 32) | (u64)low_crc;
}

399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
static int match_extent_ref(struct extent_buffer *leaf,
			    struct btrfs_extent_ref *disk_ref,
			    struct btrfs_extent_ref *cpu_ref)
{
	int ret;
	int len;

	if (cpu_ref->objectid)
		len = sizeof(*cpu_ref);
	else
		len = 2 * sizeof(u64);
	ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
				   len);
	return ret == 0;
}

415 416 417 418 419 420
static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
					  struct btrfs_root *root,
					  struct btrfs_path *path, u64 bytenr,
					  u64 root_objectid,
					  u64 ref_generation, u64 owner,
					  u64 owner_offset, int del)
421 422 423
{
	u64 hash;
	struct btrfs_key key;
424
	struct btrfs_key found_key;
425
	struct btrfs_extent_ref ref;
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
	struct extent_buffer *leaf;
	struct btrfs_extent_ref *disk_ref;
	int ret;
	int ret2;

	btrfs_set_stack_ref_root(&ref, root_objectid);
	btrfs_set_stack_ref_generation(&ref, ref_generation);
	btrfs_set_stack_ref_objectid(&ref, owner);
	btrfs_set_stack_ref_offset(&ref, owner_offset);

	hash = hash_extent_ref(root_objectid, ref_generation, owner,
			       owner_offset);
	key.offset = hash;
	key.objectid = bytenr;
	key.type = BTRFS_EXTENT_REF_KEY;

	while (1) {
		ret = btrfs_search_slot(trans, root, &key, path,
					del ? -1 : 0, del);
		if (ret < 0)
			goto out;
		leaf = path->nodes[0];
		if (ret != 0) {
			u32 nritems = btrfs_header_nritems(leaf);
			if (path->slots[0] >= nritems) {
				ret2 = btrfs_next_leaf(root, path);
				if (ret2)
					goto out;
				leaf = path->nodes[0];
			}
			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
			if (found_key.objectid != bytenr ||
			    found_key.type != BTRFS_EXTENT_REF_KEY)
				goto out;
			key.offset = found_key.offset;
			if (del) {
				btrfs_release_path(root, path);
				continue;
			}
		}
		disk_ref = btrfs_item_ptr(path->nodes[0],
					  path->slots[0],
					  struct btrfs_extent_ref);
		if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
			ret = 0;
			goto out;
		}
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
		key.offset = found_key.offset + 1;
		btrfs_release_path(root, path);
	}
out:
	return ret;
}

481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
/*
 * Back reference rules.  Back refs have three main goals:
 *
 * 1) differentiate between all holders of references to an extent so that
 *    when a reference is dropped we can make sure it was a valid reference
 *    before freeing the extent.
 *
 * 2) Provide enough information to quickly find the holders of an extent
 *    if we notice a given block is corrupted or bad.
 *
 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
 *    maintenance.  This is actually the same as #2, but with a slightly
 *    different use case.
 *
 * File extents can be referenced by:
 *
 * - multiple snapshots, subvolumes, or different generations in one subvol
 * - different files inside a single subvolume (in theory, not implemented yet)
 * - different offsets inside a file (bookend extents in file.c)
 *
 * The extent ref structure has fields for:
 *
 * - Objectid of the subvolume root
 * - Generation number of the tree holding the reference
 * - objectid of the file holding the reference
 * - offset in the file corresponding to the key holding the reference
 *
 * When a file extent is allocated the fields are filled in:
 *     (root_key.objectid, trans->transid, inode objectid, offset in file)
 *
 * When a leaf is cow'd new references are added for every file extent found
 * in the leaf.  It looks the same as the create case, but trans->transid
 * will be different when the block is cow'd.
 *
 *     (root_key.objectid, trans->transid, inode objectid, offset in file)
 *
 * When a file extent is removed either during snapshot deletion or file
 * truncation, the corresponding back reference is found
 * by searching for:
 *
 *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
 *      inode objectid, offset in file)
 *
 * Btree extents can be referenced by:
 *
 * - Different subvolumes
 * - Different generations of the same subvolume
 *
 * Storing sufficient information for a full reverse mapping of a btree
 * block would require storing the lowest key of the block in the backref,
 * and it would require updating that lowest key either before write out or
 * every time it changed.  Instead, the objectid of the lowest key is stored
 * along with the level of the tree block.  This provides a hint
 * about where in the btree the block can be found.  Searches through the
 * btree only need to look for a pointer to that block, so they stop one
 * level higher than the level recorded in the backref.
 *
 * Some btrees do not do reference counting on their extents.  These
 * include the extent tree and the tree of tree roots.  Backrefs for these
 * trees always have a generation of zero.
 *
 * When a tree block is created, back references are inserted:
 *
544
 * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
545 546 547 548 549
 *
 * When a tree block is cow'd in a reference counted root,
 * new back references are added for all the blocks it points to.
 * These are of the form (trans->transid will have increased since creation):
 *
550
 * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
 *
 * Because the lowest_key_objectid and the level are just hints
 * they are not used when backrefs are deleted.  When a backref is deleted:
 *
 * if backref was for a tree root:
 *     root_objectid = root->root_key.objectid
 * else
 *     root_objectid = btrfs_header_owner(parent)
 *
 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
 *
 * Back Reference Key hashing:
 *
 * Back references have four fields, each 64 bits long.  Unfortunately,
 * This is hashed into a single 64 bit number and placed into the key offset.
 * The key objectid corresponds to the first byte in the extent, and the
 * key type is set to BTRFS_EXTENT_REF_KEY
 */
569 570 571 572 573 574 575 576 577 578
int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path, u64 bytenr,
				 u64 root_objectid, u64 ref_generation,
				 u64 owner, u64 owner_offset)
{
	u64 hash;
	struct btrfs_key key;
	struct btrfs_extent_ref ref;
	struct btrfs_extent_ref *disk_ref;
579 580 581
	int ret;

	btrfs_set_stack_ref_root(&ref, root_objectid);
582
	btrfs_set_stack_ref_generation(&ref, ref_generation);
583 584 585
	btrfs_set_stack_ref_objectid(&ref, owner);
	btrfs_set_stack_ref_offset(&ref, owner_offset);

586 587
	hash = hash_extent_ref(root_objectid, ref_generation, owner,
			       owner_offset);
588 589 590 591 592 593
	key.offset = hash;
	key.objectid = bytenr;
	key.type = BTRFS_EXTENT_REF_KEY;

	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
	while (ret == -EEXIST) {
594 595 596 597 598 599 600 601
		disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
					  struct btrfs_extent_ref);
		if (match_extent_ref(path->nodes[0], disk_ref, &ref))
			goto out;
		key.offset++;
		btrfs_release_path(root, path);
		ret = btrfs_insert_empty_item(trans, root, path, &key,
					      sizeof(ref));
602
	}
603 604 605 606 607 608 609 610 611 612
	if (ret)
		goto out;
	disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
				  struct btrfs_extent_ref);
	write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
			    sizeof(ref));
	btrfs_mark_buffer_dirty(path->nodes[0]);
out:
	btrfs_release_path(root, path);
	return ret;
613 614
}

C
Chris Mason 已提交
615 616
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
617
				u64 bytenr, u64 num_bytes,
618
				u64 root_objectid, u64 ref_generation,
619
				u64 owner, u64 owner_offset)
C
Chris Mason 已提交
620
{
621
	struct btrfs_path *path;
C
Chris Mason 已提交
622
	int ret;
C
Chris Mason 已提交
623
	struct btrfs_key key;
624
	struct extent_buffer *l;
C
Chris Mason 已提交
625
	struct btrfs_extent_item *item;
C
Chris Mason 已提交
626
	u32 refs;
C
Chris Mason 已提交
627

628
	WARN_ON(num_bytes < root->sectorsize);
629
	path = btrfs_alloc_path();
630 631
	if (!path)
		return -ENOMEM;
632

633
	path->reada = 0;
634
	key.objectid = bytenr;
635
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
636
	key.offset = num_bytes;
637
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
638
				0, 1);
639 640
	if (ret < 0)
		return ret;
641
	if (ret != 0) {
642
		BUG();
643
	}
C
Chris Mason 已提交
644
	BUG_ON(ret != 0);
645
	l = path->nodes[0];
646
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
647 648
	refs = btrfs_extent_refs(l, item);
	btrfs_set_extent_refs(l, item, refs + 1);
649
	btrfs_mark_buffer_dirty(path->nodes[0]);
650

651
	btrfs_release_path(root->fs_info->extent_root, path);
652

653
	path->reada = 0;
654 655 656 657
	ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
					  path, bytenr, root_objectid,
					  ref_generation, owner, owner_offset);
	BUG_ON(ret);
658
	finish_current_insert(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
659
	del_pending_extents(trans, root->fs_info->extent_root);
660 661

	btrfs_free_path(path);
C
Chris Mason 已提交
662 663 664
	return 0;
}

665 666 667 668 669 670 671 672
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root)
{
	finish_current_insert(trans, root->fs_info->extent_root);
	del_pending_extents(trans, root->fs_info->extent_root);
	return 0;
}

C
Chris Mason 已提交
673
static int lookup_extent_ref(struct btrfs_trans_handle *trans,
674 675
			     struct btrfs_root *root, u64 bytenr,
			     u64 num_bytes, u32 *refs)
676
{
677
	struct btrfs_path *path;
678
	int ret;
C
Chris Mason 已提交
679
	struct btrfs_key key;
680
	struct extent_buffer *l;
C
Chris Mason 已提交
681
	struct btrfs_extent_item *item;
682

683
	WARN_ON(num_bytes < root->sectorsize);
684
	path = btrfs_alloc_path();
685
	path->reada = 0;
686 687
	key.objectid = bytenr;
	key.offset = num_bytes;
688
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
689
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
690
				0, 0);
691 692
	if (ret < 0)
		goto out;
693 694
	if (ret != 0) {
		btrfs_print_leaf(root, path->nodes[0]);
695
		printk("failed to find block number %Lu\n", bytenr);
696
		BUG();
697 698
	}
	l = path->nodes[0];
699
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
700
	*refs = btrfs_extent_refs(l, item);
701
out:
702
	btrfs_free_path(path);
703 704 705
	return 0;
}

706 707 708 709 710 711 712 713
u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
				  struct btrfs_path *count_path,
				  u64 first_extent)
{
	struct btrfs_root *extent_root = root->fs_info->extent_root;
	struct btrfs_path *path;
	u64 bytenr;
	u64 found_objectid;
714
	u64 root_objectid = root->root_key.objectid;
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
	u32 total_count = 0;
	u32 cur_count;
	u32 nritems;
	int ret;
	struct btrfs_key key;
	struct btrfs_key found_key;
	struct extent_buffer *l;
	struct btrfs_extent_item *item;
	struct btrfs_extent_ref *ref_item;
	int level = -1;

	path = btrfs_alloc_path();
again:
	if (level == -1)
		bytenr = first_extent;
	else
		bytenr = count_path->nodes[level]->start;

	cur_count = 0;
	key.objectid = bytenr;
	key.offset = 0;

	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
	if (ret < 0)
		goto out;
	BUG_ON(ret == 0);

	l = path->nodes[0];
	btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);

	if (found_key.objectid != bytenr ||
	    found_key.type != BTRFS_EXTENT_ITEM_KEY) {
		goto out;
	}

	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
	while (1) {
753
		l = path->nodes[0];
754 755 756 757 758 759 760 761 762 763
		nritems = btrfs_header_nritems(l);
		if (path->slots[0] >= nritems) {
			ret = btrfs_next_leaf(extent_root, path);
			if (ret == 0)
				continue;
			break;
		}
		btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
		if (found_key.objectid != bytenr)
			break;
764

765 766 767 768 769 770 771 772 773 774
		if (found_key.type != BTRFS_EXTENT_REF_KEY) {
			path->slots[0]++;
			continue;
		}

		cur_count++;
		ref_item = btrfs_item_ptr(l, path->slots[0],
					  struct btrfs_extent_ref);
		found_objectid = btrfs_ref_root(l, ref_item);

775 776
		if (found_objectid != root_objectid) {
			total_count = 2;
777
			goto out;
778 779
		}
		total_count = 1;
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
		path->slots[0]++;
	}
	if (cur_count == 0) {
		total_count = 0;
		goto out;
	}
	if (level >= 0 && root->node == count_path->nodes[level])
		goto out;
	level++;
	btrfs_release_path(root, path);
	goto again;

out:
	btrfs_free_path(path);
	return total_count;
}
C
Chris Mason 已提交
796
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
797
		       struct btrfs_root *root, u64 owner_objectid)
C
Chris Mason 已提交
798
{
799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816
	u64 generation;
	u64 key_objectid;
	u64 level;
	u32 nritems;
	struct btrfs_disk_key disk_key;

	level = btrfs_header_level(root->node);
	generation = trans->transid;
	nritems = btrfs_header_nritems(root->node);
	if (nritems > 0) {
		if (level == 0)
			btrfs_item_key(root->node, &disk_key, 0);
		else
			btrfs_node_key(root->node, &disk_key, 0);
		key_objectid = btrfs_disk_key_objectid(&disk_key);
	} else {
		key_objectid = 0;
	}
817
	return btrfs_inc_extent_ref(trans, root, root->node->start,
818
				    root->node->len, owner_objectid,
819
				    generation, level, key_objectid);
C
Chris Mason 已提交
820 821
}

822
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
823
		  struct extent_buffer *buf)
C
Chris Mason 已提交
824
{
825
	u64 bytenr;
826 827
	u32 nritems;
	struct btrfs_key key;
828
	struct btrfs_file_extent_item *fi;
C
Chris Mason 已提交
829
	int i;
830
	int level;
831
	int ret;
832
	int faili;
833

834
	if (!root->ref_cows)
835
		return 0;
836

837
	level = btrfs_header_level(buf);
838 839
	nritems = btrfs_header_nritems(buf);
	for (i = 0; i < nritems; i++) {
840 841
		if (level == 0) {
			u64 disk_bytenr;
842 843
			btrfs_item_key_to_cpu(buf, &key, i);
			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
844
				continue;
845
			fi = btrfs_item_ptr(buf, i,
846
					    struct btrfs_file_extent_item);
847
			if (btrfs_file_extent_type(buf, fi) ==
848 849
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
850 851
			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
			if (disk_bytenr == 0)
C
Chris Mason 已提交
852
				continue;
853
			ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
854 855 856
				    btrfs_file_extent_disk_num_bytes(buf, fi),
				    root->root_key.objectid, trans->transid,
				    key.objectid, key.offset);
857 858 859 860
			if (ret) {
				faili = i;
				goto fail;
			}
861
		} else {
862
			bytenr = btrfs_node_blockptr(buf, i);
863
			btrfs_node_key_to_cpu(buf, &key, i);
864
			ret = btrfs_inc_extent_ref(trans, root, bytenr,
865 866
					   btrfs_level_size(root, level - 1),
					   root->root_key.objectid,
867 868
					   trans->transid,
					   level - 1, key.objectid);
869 870 871 872
			if (ret) {
				faili = i;
				goto fail;
			}
873
		}
C
Chris Mason 已提交
874 875
	}
	return 0;
876
fail:
C
Chris Mason 已提交
877
	WARN_ON(1);
878
#if 0
879
	for (i =0; i < faili; i++) {
880 881
		if (level == 0) {
			u64 disk_bytenr;
882 883
			btrfs_item_key_to_cpu(buf, &key, i);
			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
884
				continue;
885
			fi = btrfs_item_ptr(buf, i,
886
					    struct btrfs_file_extent_item);
887
			if (btrfs_file_extent_type(buf, fi) ==
888 889
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
890 891
			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
			if (disk_bytenr == 0)
892
				continue;
893 894
			err = btrfs_free_extent(trans, root, disk_bytenr,
				    btrfs_file_extent_disk_num_bytes(buf,
895
								      fi), 0);
896 897
			BUG_ON(err);
		} else {
898 899 900
			bytenr = btrfs_node_blockptr(buf, i);
			err = btrfs_free_extent(trans, root, bytenr,
					btrfs_level_size(root, level - 1), 0);
901 902 903
			BUG_ON(err);
		}
	}
904
#endif
905
	return ret;
C
Chris Mason 已提交
906 907
}

C
Chris Mason 已提交
908 909 910 911 912 913 914 915
static int write_one_cache_group(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_block_group_cache *cache)
{
	int ret;
	int pending_ret;
	struct btrfs_root *extent_root = root->fs_info->extent_root;
916 917
	unsigned long bi;
	struct extent_buffer *leaf;
C
Chris Mason 已提交
918 919

	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
920 921
	if (ret < 0)
		goto fail;
C
Chris Mason 已提交
922
	BUG_ON(ret);
923 924 925 926 927

	leaf = path->nodes[0];
	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
928
	btrfs_release_path(extent_root, path);
929
fail:
C
Chris Mason 已提交
930 931 932 933 934 935 936 937 938 939
	finish_current_insert(trans, extent_root);
	pending_ret = del_pending_extents(trans, extent_root);
	if (ret)
		return ret;
	if (pending_ret)
		return pending_ret;
	return 0;

}

940 941
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
C
Chris Mason 已提交
942
{
943 944
	struct extent_map_tree *block_group_cache;
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
945 946 947 948
	int ret;
	int err = 0;
	int werr = 0;
	struct btrfs_path *path;
949 950 951 952
	u64 last = 0;
	u64 start;
	u64 end;
	u64 ptr;
C
Chris Mason 已提交
953

954
	block_group_cache = &root->fs_info->block_group_cache;
C
Chris Mason 已提交
955 956 957 958 959
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
960 961 962
		ret = find_first_extent_bit(block_group_cache, last,
					    &start, &end, BLOCK_GROUP_DIRTY);
		if (ret)
C
Chris Mason 已提交
963
			break;
964

965 966 967 968 969
		last = end + 1;
		ret = get_state_private(block_group_cache, start, &ptr);
		if (ret)
			break;

J
Jens Axboe 已提交
970
		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
971 972 973 974 975 976 977 978 979 980
		err = write_one_cache_group(trans, root,
					    path, cache);
		/*
		 * if we fail to write the cache group, we want
		 * to keep it marked dirty in hopes that a later
		 * write will work
		 */
		if (err) {
			werr = err;
			continue;
C
Chris Mason 已提交
981
		}
982 983
		clear_extent_bits(block_group_cache, start, end,
				  BLOCK_GROUP_DIRTY, GFP_NOFS);
C
Chris Mason 已提交
984 985 986 987 988 989 990
	}
	btrfs_free_path(path);
	return werr;
}

static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
991 992
			      u64 bytenr, u64 num_bytes, int alloc,
			      int mark_free, int data)
C
Chris Mason 已提交
993 994 995
{
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
996
	u64 total = num_bytes;
C
Chris Mason 已提交
997
	u64 old_val;
998
	u64 byte_in_group;
999 1000
	u64 start;
	u64 end;
C
Chris Mason 已提交
1001

C
Chris Mason 已提交
1002
	while(total) {
1003
		cache = btrfs_lookup_block_group(info, bytenr);
C
Chris Mason 已提交
1004
		if (!cache) {
C
Chris Mason 已提交
1005
			return -1;
C
Chris Mason 已提交
1006
		}
1007 1008
		byte_in_group = bytenr - cache->key.objectid;
		WARN_ON(byte_in_group > cache->key.offset);
1009 1010 1011 1012
		start = cache->key.objectid;
		end = start + cache->key.offset - 1;
		set_extent_bits(&info->block_group_cache, start, end,
				BLOCK_GROUP_DIRTY, GFP_NOFS);
C
Chris Mason 已提交
1013 1014

		old_val = btrfs_block_group_used(&cache->item);
1015
		num_bytes = min(total, cache->key.offset - byte_in_group);
C
Chris Mason 已提交
1016
		if (alloc) {
C
Chris Mason 已提交
1017
			if (cache->data != data &&
C
Chris Mason 已提交
1018
			    old_val < (cache->key.offset >> 1)) {
1019 1020 1021
				int bit_to_clear;
				int bit_to_set;
				cache->data = data;
C
Chris Mason 已提交
1022
				if (data) {
1023 1024
					bit_to_clear = BLOCK_GROUP_METADATA;
					bit_to_set = BLOCK_GROUP_DATA;
1025 1026
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_MIXED;
C
Chris Mason 已提交
1027 1028 1029
					cache->item.flags |=
						BTRFS_BLOCK_GROUP_DATA;
				} else {
1030 1031
					bit_to_clear = BLOCK_GROUP_DATA;
					bit_to_set = BLOCK_GROUP_METADATA;
1032 1033
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_MIXED;
C
Chris Mason 已提交
1034 1035 1036
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_DATA;
				}
1037 1038 1039 1040 1041 1042
				clear_extent_bits(&info->block_group_cache,
						  start, end, bit_to_clear,
						  GFP_NOFS);
				set_extent_bits(&info->block_group_cache,
						start, end, bit_to_set,
						GFP_NOFS);
1043 1044 1045 1046 1047 1048 1049 1050
			} else if (cache->data != data &&
				   cache->data != BTRFS_BLOCK_GROUP_MIXED) {
				cache->data = BTRFS_BLOCK_GROUP_MIXED;
				set_extent_bits(&info->block_group_cache,
						start, end,
						BLOCK_GROUP_DATA |
						BLOCK_GROUP_METADATA,
						GFP_NOFS);
C
Chris Mason 已提交
1051
			}
1052
			old_val += num_bytes;
C
Chris Mason 已提交
1053
		} else {
1054
			old_val -= num_bytes;
1055 1056
			if (mark_free) {
				set_extent_dirty(&info->free_space_cache,
1057
						 bytenr, bytenr + num_bytes - 1,
1058
						 GFP_NOFS);
1059
			}
C
Chris Mason 已提交
1060
		}
C
Chris Mason 已提交
1061
		btrfs_set_block_group_used(&cache->item, old_val);
1062 1063
		total -= num_bytes;
		bytenr += num_bytes;
C
Chris Mason 已提交
1064 1065 1066
	}
	return 0;
}
1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097
static int update_pinned_extents(struct btrfs_root *root,
				u64 bytenr, u64 num, int pin)
{
	u64 len;
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *fs_info = root->fs_info;

	if (pin) {
		set_extent_dirty(&fs_info->pinned_extents,
				bytenr, bytenr + num - 1, GFP_NOFS);
	} else {
		clear_extent_dirty(&fs_info->pinned_extents,
				bytenr, bytenr + num - 1, GFP_NOFS);
	}
	while (num > 0) {
		cache = btrfs_lookup_block_group(fs_info, bytenr);
		WARN_ON(!cache);
		len = min(num, cache->key.offset -
			  (bytenr - cache->key.objectid));
		if (pin) {
			cache->pinned += len;
			fs_info->total_pinned += len;
		} else {
			cache->pinned -= len;
			fs_info->total_pinned -= len;
		}
		bytenr += len;
		num -= len;
	}
	return 0;
}
C
Chris Mason 已提交
1098

1099
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
C
Chris Mason 已提交
1100 1101
{
	u64 last = 0;
1102 1103 1104
	u64 start;
	u64 end;
	struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
C
Chris Mason 已提交
1105 1106 1107
	int ret;

	while(1) {
1108 1109 1110
		ret = find_first_extent_bit(pinned_extents, last,
					    &start, &end, EXTENT_DIRTY);
		if (ret)
C
Chris Mason 已提交
1111
			break;
1112 1113
		set_extent_dirty(copy, start, end, GFP_NOFS);
		last = end + 1;
C
Chris Mason 已提交
1114 1115 1116 1117 1118 1119
	}
	return 0;
}

int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root,
1120
			       struct extent_map_tree *unpin)
1121
{
1122 1123
	u64 start;
	u64 end;
1124
	int ret;
1125 1126
	struct extent_map_tree *free_space_cache;
	free_space_cache = &root->fs_info->free_space_cache;
1127 1128

	while(1) {
1129 1130 1131
		ret = find_first_extent_bit(unpin, 0, &start, &end,
					    EXTENT_DIRTY);
		if (ret)
1132
			break;
1133
		update_pinned_extents(root, start, end + 1 - start, 0);
1134 1135
		clear_extent_dirty(unpin, start, end, GFP_NOFS);
		set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1136 1137 1138 1139
	}
	return 0;
}

1140 1141
static int finish_current_insert(struct btrfs_trans_handle *trans,
				 struct btrfs_root *extent_root)
C
Chris Mason 已提交
1142
{
1143 1144 1145
	u64 start;
	u64 end;
	struct btrfs_fs_info *info = extent_root->fs_info;
1146
	struct extent_buffer *eb;
1147
	struct btrfs_path *path;
C
Chris Mason 已提交
1148
	struct btrfs_key ins;
1149
	struct btrfs_disk_key first;
C
Chris Mason 已提交
1150
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
1151
	int ret;
1152
	int level;
1153
	int err = 0;
C
Chris Mason 已提交
1154

1155
	btrfs_set_stack_extent_refs(&extent_item, 1);
1156
	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1157
	path = btrfs_alloc_path();
C
Chris Mason 已提交
1158

1159
	while(1) {
1160 1161 1162
		ret = find_first_extent_bit(&info->extent_ins, 0, &start,
					    &end, EXTENT_LOCKED);
		if (ret)
1163 1164
			break;

1165 1166 1167 1168 1169 1170
		ins.objectid = start;
		ins.offset = end + 1 - start;
		err = btrfs_insert_item(trans, extent_root, &ins,
					&extent_item, sizeof(extent_item));
		clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
				  GFP_NOFS);
1171 1172 1173 1174 1175 1176 1177
		eb = read_tree_block(extent_root, ins.objectid, ins.offset);
		level = btrfs_header_level(eb);
		if (level == 0) {
			btrfs_item_key(eb, &first, 0);
		} else {
			btrfs_node_key(eb, &first, 0);
		}
1178 1179
		err = btrfs_insert_extent_backref(trans, extent_root, path,
					  start, extent_root->root_key.objectid,
1180 1181
					  0, level,
					  btrfs_disk_key_objectid(&first));
1182
		BUG_ON(err);
1183
		free_extent_buffer(eb);
C
Chris Mason 已提交
1184
	}
1185
	btrfs_free_path(path);
C
Chris Mason 已提交
1186 1187 1188
	return 0;
}

1189 1190
static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
			  int pending)
C
Chris Mason 已提交
1191
{
1192
	int err = 0;
1193
	struct extent_buffer *buf;
C
Chris Mason 已提交
1194

C
Chris Mason 已提交
1195
	if (!pending) {
1196
		buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1197 1198
		if (buf) {
			if (btrfs_buffer_uptodate(buf)) {
C
Chris Mason 已提交
1199 1200
				u64 transid =
				    root->fs_info->running_transaction->transid;
C
Chris Mason 已提交
1201 1202 1203
				u64 header_transid =
					btrfs_header_generation(buf);
				if (header_transid == transid) {
1204
					free_extent_buffer(buf);
1205
					return 1;
C
Chris Mason 已提交
1206
				}
C
Chris Mason 已提交
1207
			}
1208
			free_extent_buffer(buf);
C
Chris Mason 已提交
1209
		}
1210
		update_pinned_extents(root, bytenr, num_bytes, 1);
C
Chris Mason 已提交
1211
	} else {
1212
		set_extent_bits(&root->fs_info->pending_del,
1213 1214
				bytenr, bytenr + num_bytes - 1,
				EXTENT_LOCKED, GFP_NOFS);
C
Chris Mason 已提交
1215
	}
C
Chris Mason 已提交
1216
	BUG_ON(err < 0);
C
Chris Mason 已提交
1217 1218 1219
	return 0;
}

1220
/*
1221
 * remove an extent from the root, returns 0 on success
1222
 */
1223
static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1224 1225 1226
			 *root, u64 bytenr, u64 num_bytes,
			 u64 root_objectid, u64 ref_generation,
			 u64 owner_objectid, u64 owner_offset, int pin,
1227
			 int mark_free)
1228
{
1229
	struct btrfs_path *path;
C
Chris Mason 已提交
1230
	struct btrfs_key key;
1231 1232
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
1233
	struct extent_buffer *leaf;
1234
	int ret;
C
Chris Mason 已提交
1235
	struct btrfs_extent_item *ei;
C
Chris Mason 已提交
1236
	u32 refs;
C
Chris Mason 已提交
1237

1238
	key.objectid = bytenr;
1239
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1240
	key.offset = num_bytes;
1241

1242
	path = btrfs_alloc_path();
1243 1244
	if (!path)
		return -ENOMEM;
1245

1246
	path->reada = 0;
1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
	ret = lookup_extent_backref(trans, extent_root, path,
				    bytenr, root_objectid,
				    ref_generation,
				    owner_objectid, owner_offset, 1);
	if (ret == 0) {
		ret = btrfs_del_item(trans, extent_root, path);
	} else {
		btrfs_print_leaf(extent_root, path->nodes[0]);
		WARN_ON(1);
		printk("Unable to find ref byte nr %Lu root %Lu "
		       " gen %Lu owner %Lu offset %Lu\n", bytenr,
		       root_objectid, ref_generation, owner_objectid,
		       owner_offset);
	}
	btrfs_release_path(extent_root, path);
1262 1263 1264 1265
	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
	if (ret < 0)
		return ret;
	BUG_ON(ret);
1266 1267 1268

	leaf = path->nodes[0];
	ei = btrfs_item_ptr(leaf, path->slots[0],
C
Chris Mason 已提交
1269
			    struct btrfs_extent_item);
1270 1271 1272 1273 1274 1275
	refs = btrfs_extent_refs(leaf, ei);
	BUG_ON(refs == 0);
	refs -= 1;
	btrfs_set_extent_refs(leaf, ei, refs);
	btrfs_mark_buffer_dirty(leaf);

C
Chris Mason 已提交
1276
	if (refs == 0) {
1277 1278
		u64 super_used;
		u64 root_used;
C
Chris Mason 已提交
1279 1280

		if (pin) {
1281
			ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1282 1283 1284
			if (ret > 0)
				mark_free = 1;
			BUG_ON(ret < 0);
C
Chris Mason 已提交
1285 1286
		}

1287
		/* block accounting for super block */
1288 1289 1290
		super_used = btrfs_super_bytes_used(&info->super_copy);
		btrfs_set_super_bytes_used(&info->super_copy,
					   super_used - num_bytes);
1291 1292

		/* block accounting for root item */
1293
		root_used = btrfs_root_used(&root->root_item);
1294
		btrfs_set_root_used(&root->root_item,
1295
					   root_used - num_bytes);
1296

1297
		ret = btrfs_del_item(trans, extent_root, path);
1298 1299 1300
		if (ret) {
			return ret;
		}
1301
		ret = update_block_group(trans, root, bytenr, num_bytes, 0,
C
Chris Mason 已提交
1302
					 mark_free, 0);
C
Chris Mason 已提交
1303
		BUG_ON(ret);
1304
	}
1305
	btrfs_free_path(path);
1306
	finish_current_insert(trans, extent_root);
1307 1308 1309 1310 1311 1312 1313
	return ret;
}

/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
1314 1315
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root)
1316 1317
{
	int ret;
C
Chris Mason 已提交
1318
	int err = 0;
1319 1320 1321 1322
	u64 start;
	u64 end;
	struct extent_map_tree *pending_del;
	struct extent_map_tree *pinned_extents;
C
Chris Mason 已提交
1323

1324 1325
	pending_del = &extent_root->fs_info->pending_del;
	pinned_extents = &extent_root->fs_info->pinned_extents;
1326 1327

	while(1) {
1328 1329 1330
		ret = find_first_extent_bit(pending_del, 0, &start, &end,
					    EXTENT_LOCKED);
		if (ret)
1331
			break;
1332
		update_pinned_extents(extent_root, start, end + 1 - start, 1);
1333 1334 1335
		clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
				  GFP_NOFS);
		ret = __free_extent(trans, extent_root,
1336 1337 1338
				     start, end + 1 - start,
				     extent_root->root_key.objectid,
				     0, 0, 0, 0, 0);
1339 1340
		if (ret)
			err = ret;
1341
	}
C
Chris Mason 已提交
1342
	return err;
1343 1344 1345 1346 1347
}

/*
 * remove an extent from the root, returns 0 on success
 */
1348
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1349 1350 1351
		      *root, u64 bytenr, u64 num_bytes,
		      u64 root_objectid, u64 ref_generation,
		      u64 owner_objectid, u64 owner_offset, int pin)
1352
{
1353
	struct btrfs_root *extent_root = root->fs_info->extent_root;
1354 1355
	int pending_ret;
	int ret;
1356

1357
	WARN_ON(num_bytes < root->sectorsize);
1358 1359 1360
	if (!root->ref_cows)
		ref_generation = 0;

1361
	if (root == extent_root) {
1362
		pin_down_bytes(root, bytenr, num_bytes, 1);
1363 1364
		return 0;
	}
1365 1366 1367
	ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
			    ref_generation, owner_objectid, owner_offset,
			    pin, pin == 0);
C
Chris Mason 已提交
1368
	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1369 1370 1371
	return ret ? ret : pending_ret;
}

1372 1373 1374 1375 1376 1377 1378
static u64 stripe_align(struct btrfs_root *root, u64 val)
{
	u64 mask = ((u64)root->stripesize - 1);
	u64 ret = (val + mask) & ~mask;
	return ret;
}

1379 1380 1381 1382
/*
 * walks the btree of allocated extents and find a hole of a given size.
 * The key ins is changed to record the hole:
 * ins->objectid == block start
1383
 * ins->flags = BTRFS_EXTENT_ITEM_KEY
1384 1385 1386
 * ins->offset == number of blocks
 * Any available blocks before search_start are skipped.
 */
1387 1388 1389 1390 1391 1392 1393
static int noinline find_free_extent(struct btrfs_trans_handle *trans,
				     struct btrfs_root *orig_root,
				     u64 num_bytes, u64 empty_size,
				     u64 search_start, u64 search_end,
				     u64 hint_byte, struct btrfs_key *ins,
				     u64 exclude_start, u64 exclude_nr,
				     int data)
1394
{
1395
	struct btrfs_path *path;
C
Chris Mason 已提交
1396
	struct btrfs_key key;
1397
	u64 hole_size = 0;
1398 1399
	u64 aligned;
	int ret;
1400
	int slot = 0;
1401
	u64 last_byte = 0;
C
Chris Mason 已提交
1402
	u64 orig_search_start = search_start;
1403
	int start_found;
1404
	struct extent_buffer *l;
1405
	struct btrfs_root * root = orig_root->fs_info->extent_root;
1406
	struct btrfs_fs_info *info = root->fs_info;
1407
	u64 total_needed = num_bytes;
C
Chris Mason 已提交
1408
	int level;
C
Chris Mason 已提交
1409
	struct btrfs_block_group_cache *block_group;
C
Chris Mason 已提交
1410
	int full_scan = 0;
1411
	int wrapped = 0;
1412
	u64 cached_start;
1413

1414
	WARN_ON(num_bytes < root->sectorsize);
1415 1416
	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);

1417 1418
	level = btrfs_header_level(root->node);

1419
	if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1420 1421 1422
		data = BTRFS_BLOCK_GROUP_MIXED;
	}

C
Chris Mason 已提交
1423
	if (search_end == (u64)-1)
1424 1425 1426
		search_end = btrfs_super_total_bytes(&info->super_copy);
	if (hint_byte) {
		block_group = btrfs_lookup_block_group(info, hint_byte);
1427 1428
		if (!block_group)
			hint_byte = search_start;
C
Chris Mason 已提交
1429
		block_group = btrfs_find_block_group(root, block_group,
1430
						     hint_byte, data, 1);
C
Chris Mason 已提交
1431 1432
	} else {
		block_group = btrfs_find_block_group(root,
1433 1434
						     trans->block_group,
						     search_start, data, 1);
C
Chris Mason 已提交
1435 1436
	}

1437
	total_needed += empty_size;
1438
	path = btrfs_alloc_path();
C
Chris Mason 已提交
1439
check_failed:
1440 1441 1442 1443 1444 1445
	if (!block_group) {
		block_group = btrfs_lookup_block_group(info, search_start);
		if (!block_group)
			block_group = btrfs_lookup_block_group(info,
						       orig_search_start);
	}
1446
	search_start = find_search_start(root, &block_group, search_start,
1447
					 total_needed, data);
1448
	search_start = stripe_align(root, search_start);
1449
	cached_start = search_start;
1450
	btrfs_init_path(path);
1451 1452 1453
	ins->objectid = search_start;
	ins->offset = 0;
	start_found = 0;
1454
	path->reada = 2;
1455

1456
	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
C
Chris Mason 已提交
1457 1458
	if (ret < 0)
		goto error;
1459 1460 1461
	ret = find_previous_extent(root, path);
	if (ret < 0)
		goto error;
1462 1463
	l = path->nodes[0];
	btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1464
	while (1) {
1465
		l = path->nodes[0];
1466
		slot = path->slots[0];
1467
		if (slot >= btrfs_header_nritems(l)) {
1468
			ret = btrfs_next_leaf(root, path);
1469 1470
			if (ret == 0)
				continue;
C
Chris Mason 已提交
1471 1472
			if (ret < 0)
				goto error;
1473 1474 1475

			search_start = max(search_start,
					   block_group->key.objectid);
1476
			if (!start_found) {
1477 1478 1479 1480 1481 1482 1483
				aligned = stripe_align(root, search_start);
				ins->objectid = aligned;
				if (aligned >= search_end) {
					ret = -ENOSPC;
					goto error;
				}
				ins->offset = search_end - aligned;
1484 1485 1486
				start_found = 1;
				goto check_pending;
			}
1487 1488 1489 1490 1491 1492 1493
			ins->objectid = stripe_align(root,
						     last_byte > search_start ?
						     last_byte : search_start);
			if (search_end <= ins->objectid) {
				ret = -ENOSPC;
				goto error;
			}
C
Chris Mason 已提交
1494
			ins->offset = search_end - ins->objectid;
1495
			BUG_ON(ins->objectid >= search_end);
1496 1497
			goto check_pending;
		}
1498
		btrfs_item_key_to_cpu(l, &key, slot);
1499

1500
		if (key.objectid >= search_start && key.objectid > last_byte &&
1501
		    start_found) {
1502 1503
			if (last_byte < search_start)
				last_byte = search_start;
1504 1505 1506 1507
			aligned = stripe_align(root, last_byte);
			hole_size = key.objectid - aligned;
			if (key.objectid > aligned && hole_size >= num_bytes) {
				ins->objectid = aligned;
1508 1509
				ins->offset = hole_size;
				goto check_pending;
1510
			}
1511
		}
1512
		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1513 1514
			if (!start_found && btrfs_key_type(&key) ==
			    BTRFS_BLOCK_GROUP_ITEM_KEY) {
1515
				last_byte = key.objectid;
1516 1517
				start_found = 1;
			}
1518
			goto next;
1519 1520
		}

1521

1522
		start_found = 1;
1523
		last_byte = key.objectid + key.offset;
1524

1525 1526
		if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
		    last_byte >= block_group->key.objectid +
C
Chris Mason 已提交
1527 1528 1529
		    block_group->key.offset) {
			btrfs_release_path(root, path);
			search_start = block_group->key.objectid +
1530
				block_group->key.offset;
C
Chris Mason 已提交
1531 1532
			goto new_group;
		}
C
Chris Mason 已提交
1533
next:
1534
		path->slots[0]++;
1535
		cond_resched();
1536 1537 1538 1539 1540
	}
check_pending:
	/* we have to make sure we didn't find an extent that has already
	 * been allocated by the map tree or the original allocation
	 */
1541
	btrfs_release_path(root, path);
1542
	BUG_ON(ins->objectid < search_start);
1543

1544
	if (ins->objectid + num_bytes >= search_end)
1545
		goto enospc;
1546
	if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
Y
Yan 已提交
1547
	    ins->objectid + num_bytes > block_group->
1548 1549 1550 1551 1552
	    key.objectid + block_group->key.offset) {
		search_start = block_group->key.objectid +
			block_group->key.offset;
		goto new_group;
	}
1553
	if (test_range_bit(&info->extent_ins, ins->objectid,
1554 1555
			   ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
		search_start = ins->objectid + num_bytes;
1556 1557 1558
		goto new_group;
	}
	if (test_range_bit(&info->pinned_extents, ins->objectid,
1559 1560
			   ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
		search_start = ins->objectid + num_bytes;
1561
		goto new_group;
1562
	}
1563
	if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1564 1565 1566 1567
	    ins->objectid < exclude_start + exclude_nr)) {
		search_start = exclude_start + exclude_nr;
		goto new_group;
	}
1568
	if (!data) {
1569
		block_group = btrfs_lookup_block_group(info, ins->objectid);
1570 1571
		if (block_group)
			trans->block_group = block_group;
1572
	}
1573
	ins->offset = num_bytes;
1574
	btrfs_free_path(path);
1575
	return 0;
C
Chris Mason 已提交
1576 1577

new_group:
1578
	if (search_start + num_bytes >= search_end) {
1579
enospc:
C
Chris Mason 已提交
1580
		search_start = orig_search_start;
1581 1582 1583 1584
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
1585 1586 1587
		if (wrapped) {
			if (!full_scan)
				total_needed -= empty_size;
1588
			full_scan = 1;
1589
			data = BTRFS_BLOCK_GROUP_MIXED;
1590
		} else
1591
			wrapped = 1;
C
Chris Mason 已提交
1592
	}
1593
	block_group = btrfs_lookup_block_group(info, search_start);
1594
	cond_resched();
1595 1596
	block_group = btrfs_find_block_group(root, block_group,
					     search_start, data, 0);
C
Chris Mason 已提交
1597 1598
	goto check_failed;

C
Chris Mason 已提交
1599
error:
1600 1601
	btrfs_release_path(root, path);
	btrfs_free_path(path);
C
Chris Mason 已提交
1602
	return ret;
1603 1604 1605 1606 1607 1608 1609 1610
}
/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns 0 if everything worked, non-zero otherwise.
 */
1611
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1612 1613 1614 1615
		       struct btrfs_root *root,
		       u64 num_bytes, u64 root_objectid, u64 ref_generation,
		       u64 owner, u64 owner_offset,
		       u64 empty_size, u64 hint_byte,
C
Chris Mason 已提交
1616
		       u64 search_end, struct btrfs_key *ins, int data)
1617 1618 1619
{
	int ret;
	int pending_ret;
1620
	u64 super_used, root_used;
1621
	u64 search_start = 0;
1622
	u64 new_hint;
1623 1624
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
C
Chris Mason 已提交
1625
	struct btrfs_extent_item extent_item;
1626
	struct btrfs_path *path;
C
Chris Mason 已提交
1627

1628
	btrfs_set_stack_extent_refs(&extent_item, 1);
1629 1630

	new_hint = max(hint_byte, root->fs_info->alloc_start);
1631 1632
	if (new_hint < btrfs_super_total_bytes(&info->super_copy))
		hint_byte = new_hint;
1633

1634 1635 1636
	WARN_ON(num_bytes < root->sectorsize);
	ret = find_free_extent(trans, root, num_bytes, empty_size,
			       search_start, search_end, hint_byte, ins,
1637 1638
			       trans->alloc_exclude_start,
			       trans->alloc_exclude_nr, data);
C
Chris Mason 已提交
1639
	BUG_ON(ret);
1640 1641
	if (ret)
		return ret;
1642

1643
	/* block accounting for super block */
1644 1645
	super_used = btrfs_super_bytes_used(&info->super_copy);
	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1646

1647
	/* block accounting for root item */
1648 1649
	root_used = btrfs_root_used(&root->root_item);
	btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1650

1651 1652 1653 1654
	clear_extent_dirty(&root->fs_info->free_space_cache,
			   ins->objectid, ins->objectid + ins->offset - 1,
			   GFP_NOFS);

1655
	if (root == extent_root) {
1656 1657 1658
		set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
				ins->objectid + ins->offset - 1,
				EXTENT_LOCKED, GFP_NOFS);
1659
		WARN_ON(data == 1);
1660 1661 1662 1663 1664 1665
		goto update_block;
	}

	WARN_ON(trans->alloc_exclude_nr);
	trans->alloc_exclude_start = ins->objectid;
	trans->alloc_exclude_nr = ins->offset;
1666 1667
	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
				sizeof(extent_item));
C
Chris Mason 已提交
1668

1669 1670
	trans->alloc_exclude_start = 0;
	trans->alloc_exclude_nr = 0;
1671 1672 1673 1674 1675 1676 1677
	BUG_ON(ret);

	path = btrfs_alloc_path();
	BUG_ON(!path);
	ret = btrfs_insert_extent_backref(trans, extent_root, path,
					  ins->objectid, root_objectid,
					  ref_generation, owner, owner_offset);
1678

C
Chris Mason 已提交
1679
	BUG_ON(ret);
1680
	btrfs_free_path(path);
1681
	finish_current_insert(trans, extent_root);
C
Chris Mason 已提交
1682
	pending_ret = del_pending_extents(trans, extent_root);
1683

1684
	if (ret) {
C
Chris Mason 已提交
1685
		return ret;
1686 1687
	}
	if (pending_ret) {
C
Chris Mason 已提交
1688
		return pending_ret;
1689
	}
1690 1691

update_block:
C
Chris Mason 已提交
1692 1693
	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
				 data);
C
Chris Mason 已提交
1694
	BUG_ON(ret);
C
Chris Mason 已提交
1695
	return 0;
1696 1697 1698 1699 1700 1701
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
1702
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1703
					     struct btrfs_root *root,
1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731
					     u32 blocksize,
					     u64 root_objectid, u64 hint,
					     u64 empty_size)
{
	u64 ref_generation;

	if (root->ref_cows)
		ref_generation = trans->transid;
	else
		ref_generation = 0;


	return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
					ref_generation, 0, 0, hint, empty_size);
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
					     struct btrfs_root *root,
					     u32 blocksize,
					     u64 root_objectid,
					     u64 ref_generation,
					     u64 first_objectid,
					     int level,
					     u64 hint,
1732
					     u64 empty_size)
1733
{
C
Chris Mason 已提交
1734
	struct btrfs_key ins;
1735
	int ret;
1736
	struct extent_buffer *buf;
1737

1738 1739
	ret = btrfs_alloc_extent(trans, root, blocksize,
				 root_objectid, ref_generation,
1740
				 level, first_objectid, empty_size, hint,
1741
				 (u64)-1, &ins, 0);
1742
	if (ret) {
1743 1744
		BUG_ON(ret > 0);
		return ERR_PTR(ret);
1745
	}
1746
	buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1747
	if (!buf) {
1748 1749 1750
		btrfs_free_extent(trans, root, ins.objectid, blocksize,
				  root->root_key.objectid, ref_generation,
				  0, 0, 0);
1751 1752
		return ERR_PTR(-ENOMEM);
	}
1753 1754 1755
	btrfs_set_buffer_uptodate(buf);
	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
			 buf->start + buf->len - 1, GFP_NOFS);
1756 1757 1758 1759
	set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
			buf->start, buf->start + buf->len - 1,
			EXTENT_CSUM, GFP_NOFS);
	buf->flags |= EXTENT_CSUM;
1760
	btrfs_set_buffer_defrag(buf);
1761
	trans->blocks_used++;
1762 1763
	return buf;
}
1764

1765 1766 1767
static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
				  struct btrfs_root *root,
				  struct extent_buffer *leaf)
1768
{
1769 1770
	u64 leaf_owner;
	u64 leaf_generation;
1771
	struct btrfs_key key;
1772 1773 1774 1775 1776
	struct btrfs_file_extent_item *fi;
	int i;
	int nritems;
	int ret;

1777 1778
	BUG_ON(!btrfs_is_leaf(leaf));
	nritems = btrfs_header_nritems(leaf);
1779 1780 1781
	leaf_owner = btrfs_header_owner(leaf);
	leaf_generation = btrfs_header_generation(leaf);

1782
	for (i = 0; i < nritems; i++) {
1783
		u64 disk_bytenr;
1784 1785 1786

		btrfs_item_key_to_cpu(leaf, &key, i);
		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1787 1788
			continue;
		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1789 1790
		if (btrfs_file_extent_type(leaf, fi) ==
		    BTRFS_FILE_EXTENT_INLINE)
1791
			continue;
1792 1793 1794 1795
		/*
		 * FIXME make sure to insert a trans record that
		 * repeats the snapshot del on crash
		 */
1796 1797
		disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
		if (disk_bytenr == 0)
C
Chris Mason 已提交
1798
			continue;
1799
		ret = btrfs_free_extent(trans, root, disk_bytenr,
1800 1801 1802
				btrfs_file_extent_disk_num_bytes(leaf, fi),
				leaf_owner, leaf_generation,
				key.objectid, key.offset, 0);
1803 1804 1805 1806 1807
		BUG_ON(ret);
	}
	return 0;
}

1808 1809
static void noinline reada_walk_down(struct btrfs_root *root,
				     struct extent_buffer *node)
1810 1811 1812
{
	int i;
	u32 nritems;
1813
	u64 bytenr;
1814 1815
	int ret;
	u32 refs;
1816 1817
	int level;
	u32 blocksize;
1818

1819
	nritems = btrfs_header_nritems(node);
1820
	level = btrfs_header_level(node);
1821
	for (i = 0; i < nritems; i++) {
1822 1823 1824
		bytenr = btrfs_node_blockptr(node, i);
		blocksize = btrfs_level_size(root, level - 1);
		ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1825 1826 1827
		BUG_ON(ret);
		if (refs != 1)
			continue;
1828
		mutex_unlock(&root->fs_info->fs_mutex);
1829
		ret = readahead_tree_block(root, bytenr, blocksize);
1830 1831
		cond_resched();
		mutex_lock(&root->fs_info->fs_mutex);
1832 1833 1834 1835 1836
		if (ret)
			break;
	}
}

C
Chris Mason 已提交
1837 1838 1839 1840
/*
 * helper function for drop_snapshot, this walks down the tree dropping ref
 * counts as it goes.
 */
1841 1842 1843
static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root,
				   struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1844
{
1845 1846 1847
	u64 root_owner;
	u64 root_gen;
	u64 bytenr;
1848 1849
	struct extent_buffer *next;
	struct extent_buffer *cur;
1850
	struct extent_buffer *parent;
1851
	u32 blocksize;
C
Chris Mason 已提交
1852 1853 1854
	int ret;
	u32 refs;

1855 1856
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1857
	ret = lookup_extent_ref(trans, root,
1858 1859
				path->nodes[*level]->start,
				path->nodes[*level]->len, &refs);
C
Chris Mason 已提交
1860 1861 1862
	BUG_ON(ret);
	if (refs > 1)
		goto out;
1863

C
Chris Mason 已提交
1864 1865 1866
	/*
	 * walk down to the last node level and free all the leaves
	 */
1867
	while(*level >= 0) {
1868 1869
		WARN_ON(*level < 0);
		WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1870
		cur = path->nodes[*level];
1871 1872

		if (*level > 0 && path->slots[*level] == 0)
1873
			reada_walk_down(root, cur);
1874

1875
		if (btrfs_header_level(cur) != *level)
C
Chris Mason 已提交
1876
			WARN_ON(1);
1877

1878
		if (path->slots[*level] >=
1879
		    btrfs_header_nritems(cur))
C
Chris Mason 已提交
1880
			break;
1881 1882 1883 1884 1885
		if (*level == 0) {
			ret = drop_leaf_ref(trans, root, cur);
			BUG_ON(ret);
			break;
		}
1886 1887 1888
		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
		blocksize = btrfs_level_size(root, *level - 1);
		ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1889 1890
		BUG_ON(ret);
		if (refs != 1) {
1891 1892 1893
			parent = path->nodes[*level];
			root_owner = btrfs_header_owner(parent);
			root_gen = btrfs_header_generation(parent);
C
Chris Mason 已提交
1894
			path->slots[*level]++;
1895
			ret = btrfs_free_extent(trans, root, bytenr,
1896 1897
						blocksize, root_owner,
						root_gen, 0, 0, 1);
C
Chris Mason 已提交
1898 1899 1900
			BUG_ON(ret);
			continue;
		}
1901
		next = btrfs_find_tree_block(root, bytenr, blocksize);
1902 1903
		if (!next || !btrfs_buffer_uptodate(next)) {
			free_extent_buffer(next);
1904
			mutex_unlock(&root->fs_info->fs_mutex);
1905
			next = read_tree_block(root, bytenr, blocksize);
1906 1907 1908
			mutex_lock(&root->fs_info->fs_mutex);

			/* we dropped the lock, check one more time */
1909 1910
			ret = lookup_extent_ref(trans, root, bytenr,
						blocksize, &refs);
1911 1912
			BUG_ON(ret);
			if (refs != 1) {
1913 1914 1915 1916
				parent = path->nodes[*level];
				root_owner = btrfs_header_owner(parent);
				root_gen = btrfs_header_generation(parent);

1917
				path->slots[*level]++;
1918
				free_extent_buffer(next);
1919 1920 1921 1922
				ret = btrfs_free_extent(trans, root, bytenr,
							blocksize,
							root_owner,
							root_gen, 0, 0, 1);
1923 1924 1925 1926
				BUG_ON(ret);
				continue;
			}
		}
1927
		WARN_ON(*level <= 0);
C
Chris Mason 已提交
1928
		if (path->nodes[*level-1])
1929
			free_extent_buffer(path->nodes[*level-1]);
C
Chris Mason 已提交
1930
		path->nodes[*level-1] = next;
1931
		*level = btrfs_header_level(next);
C
Chris Mason 已提交
1932 1933 1934
		path->slots[*level] = 0;
	}
out:
1935 1936
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1937 1938 1939 1940 1941 1942 1943 1944 1945 1946

	if (path->nodes[*level] == root->node) {
		root_owner = root->root_key.objectid;
		parent = path->nodes[*level];
	} else {
		parent = path->nodes[*level + 1];
		root_owner = btrfs_header_owner(parent);
	}

	root_gen = btrfs_header_generation(parent);
1947
	ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1948 1949
				path->nodes[*level]->len,
				root_owner, root_gen, 0, 0, 1);
1950
	free_extent_buffer(path->nodes[*level]);
C
Chris Mason 已提交
1951 1952 1953 1954 1955 1956
	path->nodes[*level] = NULL;
	*level += 1;
	BUG_ON(ret);
	return 0;
}

C
Chris Mason 已提交
1957 1958 1959 1960 1961
/*
 * helper for dropping snapshots.  This walks back up the tree in the path
 * to find the first node higher up where we haven't yet gone through
 * all the slots
 */
1962 1963 1964
static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1965
{
1966 1967 1968
	u64 root_owner;
	u64 root_gen;
	struct btrfs_root_item *root_item = &root->root_item;
C
Chris Mason 已提交
1969 1970 1971
	int i;
	int slot;
	int ret;
1972

C
Chris Mason 已提交
1973
	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
C
Chris Mason 已提交
1974
		slot = path->slots[i];
1975 1976 1977 1978
		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
			struct extent_buffer *node;
			struct btrfs_disk_key disk_key;
			node = path->nodes[i];
C
Chris Mason 已提交
1979 1980
			path->slots[i]++;
			*level = i;
1981
			WARN_ON(*level == 0);
1982
			btrfs_node_key(node, &disk_key, path->slots[i]);
1983
			memcpy(&root_item->drop_progress,
1984
			       &disk_key, sizeof(disk_key));
1985
			root_item->drop_level = i;
C
Chris Mason 已提交
1986 1987
			return 0;
		} else {
1988 1989 1990 1991 1992 1993 1994 1995 1996 1997
			if (path->nodes[*level] == root->node) {
				root_owner = root->root_key.objectid;
				root_gen =
				   btrfs_header_generation(path->nodes[*level]);
			} else {
				struct extent_buffer *node;
				node = path->nodes[*level + 1];
				root_owner = btrfs_header_owner(node);
				root_gen = btrfs_header_generation(node);
			}
1998
			ret = btrfs_free_extent(trans, root,
1999
						path->nodes[*level]->start,
2000 2001
						path->nodes[*level]->len,
						root_owner, root_gen, 0, 0, 1);
2002
			BUG_ON(ret);
2003
			free_extent_buffer(path->nodes[*level]);
C
Chris Mason 已提交
2004
			path->nodes[*level] = NULL;
C
Chris Mason 已提交
2005 2006 2007 2008 2009 2010
			*level = i + 1;
		}
	}
	return 1;
}

C
Chris Mason 已提交
2011 2012 2013 2014 2015
/*
 * drop the reference count on the tree rooted at 'snap'.  This traverses
 * the tree freeing any blocks that have a ref count of zero after being
 * decremented.
 */
2016
int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2017
			*root)
C
Chris Mason 已提交
2018
{
2019
	int ret = 0;
C
Chris Mason 已提交
2020
	int wret;
C
Chris Mason 已提交
2021
	int level;
2022
	struct btrfs_path *path;
C
Chris Mason 已提交
2023 2024
	int i;
	int orig_level;
2025
	struct btrfs_root_item *root_item = &root->root_item;
C
Chris Mason 已提交
2026

2027 2028
	path = btrfs_alloc_path();
	BUG_ON(!path);
C
Chris Mason 已提交
2029

2030
	level = btrfs_header_level(root->node);
C
Chris Mason 已提交
2031
	orig_level = level;
2032 2033
	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
		path->nodes[level] = root->node;
2034
		extent_buffer_get(root->node);
2035 2036 2037
		path->slots[level] = 0;
	} else {
		struct btrfs_key key;
2038 2039
		struct btrfs_disk_key found_key;
		struct extent_buffer *node;
2040

2041
		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2042 2043
		level = root_item->drop_level;
		path->lowest_level = level;
2044
		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2045
		if (wret < 0) {
2046 2047 2048
			ret = wret;
			goto out;
		}
2049 2050 2051 2052
		node = path->nodes[level];
		btrfs_node_key(node, &found_key, path->slots[level]);
		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
			       sizeof(found_key)));
2053
	}
C
Chris Mason 已提交
2054
	while(1) {
2055
		wret = walk_down_tree(trans, root, path, &level);
C
Chris Mason 已提交
2056
		if (wret > 0)
C
Chris Mason 已提交
2057
			break;
C
Chris Mason 已提交
2058 2059 2060
		if (wret < 0)
			ret = wret;

2061
		wret = walk_up_tree(trans, root, path, &level);
C
Chris Mason 已提交
2062
		if (wret > 0)
C
Chris Mason 已提交
2063
			break;
C
Chris Mason 已提交
2064 2065
		if (wret < 0)
			ret = wret;
2066 2067
		ret = -EAGAIN;
		break;
C
Chris Mason 已提交
2068
	}
C
Chris Mason 已提交
2069
	for (i = 0; i <= orig_level; i++) {
2070
		if (path->nodes[i]) {
2071
			free_extent_buffer(path->nodes[i]);
2072
			path->nodes[i] = NULL;
C
Chris Mason 已提交
2073
		}
C
Chris Mason 已提交
2074
	}
2075
out:
2076
	btrfs_free_path(path);
C
Chris Mason 已提交
2077
	return ret;
C
Chris Mason 已提交
2078
}
C
Chris Mason 已提交
2079

2080
int btrfs_free_block_groups(struct btrfs_fs_info *info)
C
Chris Mason 已提交
2081
{
2082 2083
	u64 start;
	u64 end;
2084
	u64 ptr;
C
Chris Mason 已提交
2085 2086
	int ret;
	while(1) {
2087 2088 2089
		ret = find_first_extent_bit(&info->block_group_cache, 0,
					    &start, &end, (unsigned int)-1);
		if (ret)
C
Chris Mason 已提交
2090
			break;
2091 2092 2093
		ret = get_state_private(&info->block_group_cache, start, &ptr);
		if (!ret)
			kfree((void *)(unsigned long)ptr);
2094 2095
		clear_extent_bits(&info->block_group_cache, start,
				  end, (unsigned int)-1, GFP_NOFS);
C
Chris Mason 已提交
2096
	}
2097
	while(1) {
2098 2099 2100
		ret = find_first_extent_bit(&info->free_space_cache, 0,
					    &start, &end, EXTENT_DIRTY);
		if (ret)
2101
			break;
2102 2103
		clear_extent_dirty(&info->free_space_cache, start,
				   end, GFP_NOFS);
2104
	}
C
Chris Mason 已提交
2105 2106 2107
	return 0;
}

2108 2109
static int noinline relocate_inode_pages(struct inode *inode, u64 start,
					 u64 len)
2110 2111 2112 2113 2114 2115 2116 2117 2118 2119
{
	u64 page_start;
	u64 page_end;
	u64 delalloc_start;
	u64 existing_delalloc;
	unsigned long last_index;
	unsigned long i;
	struct page *page;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2120 2121 2122
	struct file_ra_state *ra;

	ra = kzalloc(sizeof(*ra), GFP_NOFS);
2123 2124

	mutex_lock(&inode->i_mutex);
2125
	i = start >> PAGE_CACHE_SHIFT;
2126 2127
	last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;

2128 2129 2130
	file_ra_state_init(ra, inode->i_mapping);
	btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index);
	kfree(ra);
2131

2132
	for (; i <= last_index; i++) {
2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174
		page = grab_cache_page(inode->i_mapping, i);
		if (!page)
			goto out_unlock;
		if (!PageUptodate(page)) {
			btrfs_readpage(NULL, page);
			lock_page(page);
			if (!PageUptodate(page)) {
				unlock_page(page);
				page_cache_release(page);
				goto out_unlock;
			}
		}
		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
		page_end = page_start + PAGE_CACHE_SIZE - 1;

		lock_extent(em_tree, page_start, page_end, GFP_NOFS);

		delalloc_start = page_start;
		existing_delalloc =
			count_range_bits(&BTRFS_I(inode)->extent_tree,
					 &delalloc_start, page_end,
					 PAGE_CACHE_SIZE, EXTENT_DELALLOC);

		set_extent_delalloc(em_tree, page_start,
				    page_end, GFP_NOFS);

		spin_lock(&root->fs_info->delalloc_lock);
		root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE -
						 existing_delalloc;
		spin_unlock(&root->fs_info->delalloc_lock);

		unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
		set_page_dirty(page);
		unlock_page(page);
		page_cache_release(page);
	}

out_unlock:
	mutex_unlock(&inode->i_mutex);
	return 0;
}

2175 2176 2177
/*
 * note, this releases the path
 */
2178
static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2179
				  struct btrfs_path *path,
2180
				  struct btrfs_key *extent_key)
2181 2182 2183
{
	struct inode *inode;
	struct btrfs_root *found_root;
2184 2185 2186 2187 2188 2189
	struct btrfs_key *root_location;
	struct btrfs_extent_ref *ref;
	u64 ref_root;
	u64 ref_gen;
	u64 ref_objectid;
	u64 ref_offset;
2190 2191
	int ret;

2192 2193 2194 2195 2196 2197 2198 2199 2200 2201
	ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
			     struct btrfs_extent_ref);
	ref_root = btrfs_ref_root(path->nodes[0], ref);
	ref_gen = btrfs_ref_generation(path->nodes[0], ref);
	ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
	ref_offset = btrfs_ref_offset(path->nodes[0], ref);
	btrfs_release_path(extent_root, path);

	root_location = kmalloc(sizeof(*root_location), GFP_NOFS);
	root_location->objectid = ref_root;
2202
	if (ref_gen == 0)
2203
		root_location->offset = 0;
2204
	else
2205 2206
		root_location->offset = (u64)-1;
	root_location->type = BTRFS_ROOT_ITEM_KEY;
2207 2208

	found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2209
						root_location);
2210
	BUG_ON(!found_root);
2211
	kfree(root_location);
2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258

	if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
		mutex_unlock(&extent_root->fs_info->fs_mutex);
		inode = btrfs_iget_locked(extent_root->fs_info->sb,
					  ref_objectid, found_root);
		if (inode->i_state & I_NEW) {
			/* the inode and parent dir are two different roots */
			BTRFS_I(inode)->root = found_root;
			BTRFS_I(inode)->location.objectid = ref_objectid;
			BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
			BTRFS_I(inode)->location.offset = 0;
			btrfs_read_locked_inode(inode);
			unlock_new_inode(inode);

		}
		/* this can happen if the reference is not against
		 * the latest version of the tree root
		 */
		if (is_bad_inode(inode)) {
			mutex_lock(&extent_root->fs_info->fs_mutex);
			goto out;
		}
		relocate_inode_pages(inode, ref_offset, extent_key->offset);
		/* FIXME, data=ordered will help get rid of this */
		filemap_fdatawrite(inode->i_mapping);
		iput(inode);
		mutex_lock(&extent_root->fs_info->fs_mutex);
	} else {
		struct btrfs_trans_handle *trans;
		struct btrfs_key found_key;
		struct extent_buffer *eb;
		int level;
		int i;

		trans = btrfs_start_transaction(found_root, 1);
		eb = read_tree_block(found_root, extent_key->objectid,
				     extent_key->offset);
		level = btrfs_header_level(eb);

		if (level == 0)
			btrfs_item_key_to_cpu(eb, &found_key, 0);
		else
			btrfs_node_key_to_cpu(eb, &found_key, 0);

		free_extent_buffer(eb);

		path->lowest_level = level;
2259
		path->reada = 2;
2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276
		ret = btrfs_search_slot(trans, found_root, &found_key, path,
					0, 1);
		path->lowest_level = 0;
		for (i = level; i < BTRFS_MAX_LEVEL; i++) {
			if (!path->nodes[i])
				break;
			free_extent_buffer(path->nodes[i]);
			path->nodes[i] = NULL;
		}
		btrfs_release_path(found_root, path);
		btrfs_end_transaction(trans, found_root);
	}

out:
	return 0;
}

2277 2278 2279
static int noinline relocate_one_extent(struct btrfs_root *extent_root,
					struct btrfs_path *path,
					struct btrfs_key *extent_key)
2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315
{
	struct btrfs_key key;
	struct btrfs_key found_key;
	struct extent_buffer *leaf;
	u32 nritems;
	u32 item_size;
	int ret = 0;

	key.objectid = extent_key->objectid;
	key.type = BTRFS_EXTENT_REF_KEY;
	key.offset = 0;

	while(1) {
		ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);

		BUG_ON(ret == 0);

		if (ret < 0)
			goto out;

		ret = 0;
		leaf = path->nodes[0];
		nritems = btrfs_header_nritems(leaf);
		if (path->slots[0] == nritems)
			goto out;

		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
		if (found_key.objectid != extent_key->objectid)
			break;

		if (found_key.type != BTRFS_EXTENT_REF_KEY)
			break;

		key.offset = found_key.offset + 1;
		item_size = btrfs_item_size_nr(leaf, path->slots[0]);

2316
		ret = relocate_one_reference(extent_root, path, extent_key);
2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335
		if (ret)
			goto out;
	}
	ret = 0;
out:
	btrfs_release_path(extent_root, path);
	return ret;
}

int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
{
	struct btrfs_trans_handle *trans;
	struct btrfs_root *tree_root = root->fs_info->tree_root;
	struct btrfs_path *path;
	u64 cur_byte;
	u64 total_found;
	struct btrfs_fs_info *info = root->fs_info;
	struct extent_map_tree *block_group_cache;
	struct btrfs_key key;
2336
	struct btrfs_key found_key;
2337 2338 2339
	struct extent_buffer *leaf;
	u32 nritems;
	int ret;
2340
	int progress = 0;
2341 2342 2343 2344 2345

	btrfs_set_super_total_bytes(&info->super_copy, new_size);
	block_group_cache = &info->block_group_cache;
	path = btrfs_alloc_path();
	root = root->fs_info->extent_root;
2346
	path->reada = 2;
2347 2348 2349 2350 2351 2352

again:
	total_found = 0;
	key.objectid = new_size;
	key.offset = 0;
	key.type = 0;
2353
	cur_byte = key.objectid;
2354

2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		goto out;

	ret = find_previous_extent(root, path);
	if (ret < 0)
		goto out;
	if (ret == 0) {
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
		if (found_key.objectid + found_key.offset > new_size) {
			cur_byte = found_key.objectid;
			key.objectid = cur_byte;
		}
	}
	btrfs_release_path(root, path);

	while(1) {
2373 2374 2375
		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
		if (ret < 0)
			goto out;
2376

2377
		leaf = path->nodes[0];
2378 2379 2380 2381 2382 2383 2384 2385 2386
		nritems = btrfs_header_nritems(leaf);
next:
		if (path->slots[0] >= nritems) {
			ret = btrfs_next_leaf(root, path);
			if (ret < 0)
				goto out;
			if (ret == 1) {
				ret = 0;
				break;
2387
			}
2388 2389
			leaf = path->nodes[0];
			nritems = btrfs_header_nritems(leaf);
2390
		}
2391 2392

		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405

		if (progress && need_resched()) {
			memcpy(&key, &found_key, sizeof(key));
			mutex_unlock(&root->fs_info->fs_mutex);
			cond_resched();
			mutex_lock(&root->fs_info->fs_mutex);
			btrfs_release_path(root, path);
			btrfs_search_slot(NULL, root, &key, path, 0, 0);
			progress = 0;
			goto next;
		}
		progress = 1;

2406 2407
		if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
		    found_key.objectid + found_key.offset <= cur_byte) {
2408 2409 2410
			path->slots[0]++;
			goto next;
		}
2411

2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438
		total_found++;
		cur_byte = found_key.objectid + found_key.offset;
		key.objectid = cur_byte;
		btrfs_release_path(root, path);
		ret = relocate_one_extent(root, path, &found_key);
	}

	btrfs_release_path(root, path);

	if (total_found > 0) {
		trans = btrfs_start_transaction(tree_root, 1);
		btrfs_commit_transaction(trans, tree_root);

		mutex_unlock(&root->fs_info->fs_mutex);
		btrfs_clean_old_snapshots(tree_root);
		mutex_lock(&root->fs_info->fs_mutex);

		trans = btrfs_start_transaction(tree_root, 1);
		btrfs_commit_transaction(trans, tree_root);
		goto again;
	}

	trans = btrfs_start_transaction(root, 1);
	key.objectid = new_size;
	key.offset = 0;
	key.type = 0;
	while(1) {
2439 2440
		u64 ptr;

2441 2442 2443
		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
		if (ret < 0)
			goto out;
2444

2445 2446
		leaf = path->nodes[0];
		nritems = btrfs_header_nritems(leaf);
2447 2448 2449 2450 2451 2452 2453 2454
bg_next:
		if (path->slots[0] >= nritems) {
			ret = btrfs_next_leaf(root, path);
			if (ret < 0)
				break;
			if (ret == 1) {
				ret = 0;
				break;
2455
			}
2456
			leaf = path->nodes[0];
2457 2458 2459 2460 2461 2462 2463 2464
			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);

			/*
			 * btrfs_next_leaf doesn't cow buffers, we have to
			 * do the search again
			 */
			memcpy(&key, &found_key, sizeof(key));
			btrfs_release_path(root, path);
2465
			goto resched_check;
2466 2467 2468 2469 2470 2471 2472 2473
		}

		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
		if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
			printk("shrinker found key %Lu %u %Lu\n",
				found_key.objectid, found_key.type,
				found_key.offset);
			path->slots[0]++;
2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487
			goto bg_next;
		}
		ret = get_state_private(&info->block_group_cache,
					found_key.objectid, &ptr);
		if (!ret)
			kfree((void *)(unsigned long)ptr);

		clear_extent_bits(&info->block_group_cache, found_key.objectid,
				  found_key.objectid + found_key.offset - 1,
				  (unsigned int)-1, GFP_NOFS);

		key.objectid = found_key.objectid + 1;
		btrfs_del_item(trans, root, path);
		btrfs_release_path(root, path);
2488 2489 2490 2491 2492 2493
resched_check:
		if (need_resched()) {
			mutex_unlock(&root->fs_info->fs_mutex);
			cond_resched();
			mutex_lock(&root->fs_info->fs_mutex);
		}
2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509
	}
	clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
			   GFP_NOFS);
	btrfs_commit_transaction(trans, root);
out:
	btrfs_free_path(path);
	return ret;
}

int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
			   struct btrfs_root *root, u64 new_size)
{
	struct btrfs_path *path;
	u64 nr = 0;
	u64 cur_byte;
	u64 old_size;
2510
	unsigned long rem;
2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547
	struct btrfs_block_group_cache *cache;
	struct btrfs_block_group_item *item;
	struct btrfs_fs_info *info = root->fs_info;
	struct extent_map_tree *block_group_cache;
	struct btrfs_key key;
	struct extent_buffer *leaf;
	int ret;
	int bit;

	old_size = btrfs_super_total_bytes(&info->super_copy);
	block_group_cache = &info->block_group_cache;

	root = info->extent_root;

	cache = btrfs_lookup_block_group(root->fs_info, old_size - 1);

	cur_byte = cache->key.objectid + cache->key.offset;
	if (cur_byte >= new_size)
		goto set_size;

	key.offset = BTRFS_BLOCK_GROUP_SIZE;
	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(cur_byte < new_size) {
		key.objectid = cur_byte;
		ret = btrfs_insert_empty_item(trans, root, path, &key,
				        sizeof(struct btrfs_block_group_item));
		BUG_ON(ret);
		leaf = path->nodes[0];
		item = btrfs_item_ptr(leaf, path->slots[0],
				      struct btrfs_block_group_item);

		btrfs_set_disk_block_group_used(leaf, item, 0);
2548 2549
		div_long_long_rem(nr, 3, &rem);
		if (rem) {
2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589
			btrfs_set_disk_block_group_flags(leaf, item,
						 BTRFS_BLOCK_GROUP_DATA);
		} else {
			btrfs_set_disk_block_group_flags(leaf, item, 0);
		}
		nr++;

		cache = kmalloc(sizeof(*cache), GFP_NOFS);
		BUG_ON(!cache);

		read_extent_buffer(leaf, &cache->item, (unsigned long)item,
				   sizeof(cache->item));

		memcpy(&cache->key, &key, sizeof(key));
		cache->cached = 0;
		cache->pinned = 0;
		cur_byte = key.objectid + key.offset;
		btrfs_release_path(root, path);

		if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
			bit = BLOCK_GROUP_DATA;
			cache->data = BTRFS_BLOCK_GROUP_DATA;
		} else {
			bit = BLOCK_GROUP_METADATA;
			cache->data = 0;
		}

		/* use EXTENT_LOCKED to prevent merging */
		set_extent_bits(block_group_cache, key.objectid,
				key.objectid + key.offset - 1,
				bit | EXTENT_LOCKED, GFP_NOFS);
		set_state_private(block_group_cache, key.objectid,
				  (unsigned long)cache);
	}
	btrfs_free_path(path);
set_size:
	btrfs_set_super_total_bytes(&info->super_copy, new_size);
	return 0;
}

C
Chris Mason 已提交
2590 2591 2592 2593 2594
int btrfs_read_block_groups(struct btrfs_root *root)
{
	struct btrfs_path *path;
	int ret;
	int err = 0;
2595
	int bit;
C
Chris Mason 已提交
2596
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
2597
	struct btrfs_fs_info *info = root->fs_info;
2598
	struct extent_map_tree *block_group_cache;
C
Chris Mason 已提交
2599 2600
	struct btrfs_key key;
	struct btrfs_key found_key;
2601
	struct extent_buffer *leaf;
2602 2603

	block_group_cache = &info->block_group_cache;
C
Chris Mason 已提交
2604

C
Chris Mason 已提交
2605
	root = info->extent_root;
C
Chris Mason 已提交
2606
	key.objectid = 0;
2607
	key.offset = BTRFS_BLOCK_GROUP_SIZE;
C
Chris Mason 已提交
2608 2609 2610 2611 2612 2613 2614
	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
C
Chris Mason 已提交
2615
		ret = btrfs_search_slot(NULL, info->extent_root,
C
Chris Mason 已提交
2616 2617 2618 2619 2620
					&key, path, 0, 0);
		if (ret != 0) {
			err = ret;
			break;
		}
2621 2622
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
C
Chris Mason 已提交
2623 2624 2625 2626 2627
		cache = kmalloc(sizeof(*cache), GFP_NOFS);
		if (!cache) {
			err = -1;
			break;
		}
C
Chris Mason 已提交
2628

2629 2630 2631
		read_extent_buffer(leaf, &cache->item,
				   btrfs_item_ptr_offset(leaf, path->slots[0]),
				   sizeof(cache->item));
C
Chris Mason 已提交
2632
		memcpy(&cache->key, &found_key, sizeof(found_key));
2633
		cache->cached = 0;
2634
		cache->pinned = 0;
C
Chris Mason 已提交
2635 2636
		key.objectid = found_key.objectid + found_key.offset;
		btrfs_release_path(root, path);
2637

2638 2639 2640 2641
		if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
			bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
			cache->data = BTRFS_BLOCK_GROUP_MIXED;
		} else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2642
			bit = BLOCK_GROUP_DATA;
2643
			cache->data = BTRFS_BLOCK_GROUP_DATA;
2644 2645 2646
		} else {
			bit = BLOCK_GROUP_METADATA;
			cache->data = 0;
2647
		}
2648 2649 2650 2651 2652 2653

		/* use EXTENT_LOCKED to prevent merging */
		set_extent_bits(block_group_cache, found_key.objectid,
				found_key.objectid + found_key.offset - 1,
				bit | EXTENT_LOCKED, GFP_NOFS);
		set_state_private(block_group_cache, found_key.objectid,
J
Jens Axboe 已提交
2654
				  (unsigned long)cache);
2655

C
Chris Mason 已提交
2656
		if (key.objectid >=
2657
		    btrfs_super_total_bytes(&info->super_copy))
C
Chris Mason 已提交
2658 2659 2660 2661 2662 2663
			break;
	}

	btrfs_free_path(path);
	return 0;
}