extent-tree.c 53.5 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Z
Zach Brown 已提交
19
#include <linux/sched.h>
20
#include <linux/crc32c.h>
21
#include "hash.h"
22 23 24
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
25
#include "transaction.h"
26

27 28 29 30
#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
#define BLOCK_GROUP_DIRTY EXTENT_DIRTY

31 32
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root);
C
Chris Mason 已提交
33 34
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root);
35

36 37 38 39 40 41
static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
	struct btrfs_path *path;
	int ret;
	struct btrfs_key key;
42
	struct extent_buffer *leaf;
43
	struct extent_map_tree *free_space_cache;
44 45 46
	int slot;
	u64 last = 0;
	u64 hole_size;
47
	u64 first_free;
48 49
	int found = 0;

50 51 52
	if (!block_group)
		return 0;

53
	root = root->fs_info->extent_root;
54
	free_space_cache = &root->fs_info->free_space_cache;
55 56 57

	if (block_group->cached)
		return 0;
58

59 60 61
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
62

63
	path->reada = 2;
64
	first_free = block_group->key.objectid;
65 66
	key.objectid = block_group->key.objectid;
	key.offset = 0;
67

68 69
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
70

71 72
	if (ret < 0)
		return ret;
73

74 75
	if (ret && path->slots[0] > 0)
		path->slots[0]--;
76

77
	while(1) {
78
		leaf = path->nodes[0];
79
		slot = path->slots[0];
80
		if (slot >= btrfs_header_nritems(leaf)) {
81
			ret = btrfs_next_leaf(root, path);
82 83
			if (ret < 0)
				goto err;
84
			if (ret == 0) {
85
				continue;
86
			} else {
87 88 89
				break;
			}
		}
90

91
		btrfs_item_key_to_cpu(leaf, &key, slot);
92
		if (key.objectid < block_group->key.objectid) {
93 94
			if (btrfs_key_type(&key) != BTRFS_EXTENT_REF_KEY &&
			    key.objectid + key.offset > first_free)
95 96 97 98
				first_free = key.objectid + key.offset;
			goto next;
		}

99 100 101 102
		if (key.objectid >= block_group->key.objectid +
		    block_group->key.offset) {
			break;
		}
103

104 105
		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			if (!found) {
106
				last = first_free;
107 108
				found = 1;
			}
109 110 111 112 113
			if (key.objectid > last) {
				hole_size = key.objectid - last;
				set_extent_dirty(free_space_cache, last,
						 last + hole_size - 1,
						 GFP_NOFS);
114 115
			}
			last = key.objectid + key.offset;
116
		}
117
next:
118 119 120
		path->slots[0]++;
	}

121 122 123 124 125 126
	if (!found)
		last = first_free;
	if (block_group->key.objectid +
	    block_group->key.offset > last) {
		hole_size = block_group->key.objectid +
			block_group->key.offset - last;
127 128
		set_extent_dirty(free_space_cache, last,
				 last + hole_size - 1, GFP_NOFS);
129
	}
130
	block_group->cached = 1;
131
err:
132 133 134 135
	btrfs_free_path(path);
	return 0;
}

136 137
struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
							 btrfs_fs_info *info,
138
							 u64 bytenr)
C
Chris Mason 已提交
139
{
140 141 142 143 144
	struct extent_map_tree *block_group_cache;
	struct btrfs_block_group_cache *block_group = NULL;
	u64 ptr;
	u64 start;
	u64 end;
C
Chris Mason 已提交
145 146
	int ret;

147 148
	block_group_cache = &info->block_group_cache;
	ret = find_first_extent_bit(block_group_cache,
149
				    bytenr, &start, &end,
150
				    BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
C
Chris Mason 已提交
151
	if (ret) {
152
		return NULL;
C
Chris Mason 已提交
153
	}
154 155 156 157
	ret = get_state_private(block_group_cache, start, &ptr);
	if (ret)
		return NULL;

J
Jens Axboe 已提交
158
	block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
Y
Yan 已提交
159
	if (block_group->key.objectid <= bytenr && bytenr <
160 161
	    block_group->key.objectid + block_group->key.offset)
		return block_group;
C
Chris Mason 已提交
162 163
	return NULL;
}
164 165
static u64 find_search_start(struct btrfs_root *root,
			     struct btrfs_block_group_cache **cache_ret,
166 167
			     u64 search_start, int num,
			     int data, int full_scan)
168 169 170
{
	int ret;
	struct btrfs_block_group_cache *cache = *cache_ret;
171
	u64 last;
172 173
	u64 start = 0;
	u64 end = 0;
174
	u64 cache_miss = 0;
175
	int wrapped = 0;
176

177
	if (!cache) {
178
		goto out;
179
	}
180
again:
181 182 183
	ret = cache_block_group(root, cache);
	if (ret)
		goto out;
184

185 186
	last = max(search_start, cache->key.objectid);

187
	while(1) {
188 189
		ret = find_first_extent_bit(&root->fs_info->free_space_cache,
					    last, &start, &end, EXTENT_DIRTY);
190
		if (ret) {
191 192
			if (!cache_miss)
				cache_miss = last;
193 194
			goto new_group;
		}
195 196 197

		start = max(last, start);
		last = end + 1;
198 199 200
		if (last - start < num) {
			if (last == cache->key.objectid + cache->key.offset)
				cache_miss = start;
201
			continue;
202 203
		}
		if (data != BTRFS_BLOCK_GROUP_MIXED &&
Y
Yan 已提交
204
		    start + num > cache->key.objectid + cache->key.offset)
205
			goto new_group;
206
		return start;
207 208
	}
out:
209 210 211 212 213 214 215
	cache = btrfs_lookup_block_group(root->fs_info, search_start);
	if (!cache) {
		printk("Unable to find block group for %Lu\n",
		       search_start);
		WARN_ON(1);
		return search_start;
	}
216
	return search_start;
217 218

new_group:
219
	last = cache->key.objectid + cache->key.offset;
220
wrapped:
221
	cache = btrfs_lookup_block_group(root->fs_info, last);
222
	if (!cache) {
223
no_cache:
224 225 226 227 228 229
		if (!wrapped) {
			wrapped = 1;
			last = search_start;
			data = BTRFS_BLOCK_GROUP_MIXED;
			goto wrapped;
		}
230
		goto out;
231
	}
232 233 234 235 236
	if (cache_miss && !cache->cached) {
		cache_block_group(root, cache);
		last = cache_miss;
		cache = btrfs_lookup_block_group(root->fs_info, last);
	}
237
	cache = btrfs_find_block_group(root, cache, last, data, 0);
238 239
	if (!cache)
		goto no_cache;
240
	*cache_ret = cache;
241
	cache_miss = 0;
242 243 244
	goto again;
}

C
Chris Mason 已提交
245 246
static u64 div_factor(u64 num, int factor)
{
247 248
	if (factor == 10)
		return num;
C
Chris Mason 已提交
249 250 251 252 253
	num *= factor;
	do_div(num, 10);
	return num;
}

254 255
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
C
Chris Mason 已提交
256
						 *hint, u64 search_start,
257
						 int data, int owner)
C
Chris Mason 已提交
258
{
259 260
	struct btrfs_block_group_cache *cache;
	struct extent_map_tree *block_group_cache;
261
	struct btrfs_block_group_cache *found_group = NULL;
C
Chris Mason 已提交
262 263
	struct btrfs_fs_info *info = root->fs_info;
	u64 used;
264 265
	u64 last = 0;
	u64 hint_last;
266 267 268 269 270
	u64 start;
	u64 end;
	u64 free_check;
	u64 ptr;
	int bit;
C
Chris Mason 已提交
271
	int ret;
272
	int full_search = 0;
273
	int factor = 8;
C
Chris Mason 已提交
274
	int data_swap = 0;
275

276 277
	block_group_cache = &info->block_group_cache;

278
	if (!owner)
279
		factor = 8;
C
Chris Mason 已提交
280

281
	if (data == BTRFS_BLOCK_GROUP_MIXED) {
282
		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
283 284
		factor = 10;
	} else if (data)
285 286 287
		bit = BLOCK_GROUP_DATA;
	else
		bit = BLOCK_GROUP_METADATA;
C
Chris Mason 已提交
288 289 290

	if (search_start) {
		struct btrfs_block_group_cache *shint;
291
		shint = btrfs_lookup_block_group(info, search_start);
292 293
		if (shint && (shint->data == data ||
			      shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
C
Chris Mason 已提交
294
			used = btrfs_block_group_used(&shint->item);
295 296
			if (used + shint->pinned <
			    div_factor(shint->key.offset, factor)) {
C
Chris Mason 已提交
297 298 299 300
				return shint;
			}
		}
	}
301 302
	if (hint && (hint->data == data ||
		     hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
303
		used = btrfs_block_group_used(&hint->item);
304 305
		if (used + hint->pinned <
		    div_factor(hint->key.offset, factor)) {
306 307
			return hint;
		}
308
		last = hint->key.objectid + hint->key.offset;
309 310
		hint_last = last;
	} else {
311 312 313 314 315 316
		if (hint)
			hint_last = max(hint->key.objectid, search_start);
		else
			hint_last = search_start;

		last = hint_last;
317 318
	}
again:
C
Chris Mason 已提交
319
	while(1) {
320 321 322
		ret = find_first_extent_bit(block_group_cache, last,
					    &start, &end, bit);
		if (ret)
C
Chris Mason 已提交
323
			break;
324 325 326 327 328

		ret = get_state_private(block_group_cache, start, &ptr);
		if (ret)
			break;

J
Jens Axboe 已提交
329
		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
330 331 332 333 334 335 336
		last = cache->key.objectid + cache->key.offset;
		used = btrfs_block_group_used(&cache->item);

		if (full_search)
			free_check = cache->key.offset;
		else
			free_check = div_factor(cache->key.offset, factor);
337
		if (used + cache->pinned < free_check) {
338 339
			found_group = cache;
			goto found;
C
Chris Mason 已提交
340
		}
341
		cond_resched();
C
Chris Mason 已提交
342
	}
343
	if (!full_search) {
C
Chris Mason 已提交
344
		last = search_start;
345 346 347
		full_search = 1;
		goto again;
	}
C
Chris Mason 已提交
348 349
	if (!data_swap) {
		data_swap = 1;
350
		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
C
Chris Mason 已提交
351 352 353
		last = search_start;
		goto again;
	}
C
Chris Mason 已提交
354
found:
355
	return found_group;
C
Chris Mason 已提交
356 357
}

358
static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
359 360 361 362 363 364 365 366
			   u64 owner, u64 owner_offset)
{
	u32 high_crc = ~(u32)0;
	u32 low_crc = ~(u32)0;
	__le64 lenum;

	lenum = cpu_to_le64(root_objectid);
	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
367 368
	lenum = cpu_to_le64(ref_generation);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
369

370
#if 0
371 372 373 374
	lenum = cpu_to_le64(owner);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
	lenum = cpu_to_le64(owner_offset);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
375
#endif
376 377 378
	return ((u64)high_crc << 32) | (u64)low_crc;
}

379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
static int match_extent_ref(struct extent_buffer *leaf,
			    struct btrfs_extent_ref *disk_ref,
			    struct btrfs_extent_ref *cpu_ref)
{
	int ret;
	int len;

	if (cpu_ref->objectid)
		len = sizeof(*cpu_ref);
	else
		len = 2 * sizeof(u64);
	ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
				   len);
	return ret == 0;
}

static int lookup_extent_backref(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path, u64 bytenr,
				 u64 root_objectid, u64 ref_generation,
				 u64 owner, u64 owner_offset, int del)
400 401 402
{
	u64 hash;
	struct btrfs_key key;
403
	struct btrfs_key found_key;
404
	struct btrfs_extent_ref ref;
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
	struct extent_buffer *leaf;
	struct btrfs_extent_ref *disk_ref;
	int ret;
	int ret2;

	btrfs_set_stack_ref_root(&ref, root_objectid);
	btrfs_set_stack_ref_generation(&ref, ref_generation);
	btrfs_set_stack_ref_objectid(&ref, owner);
	btrfs_set_stack_ref_offset(&ref, owner_offset);

	hash = hash_extent_ref(root_objectid, ref_generation, owner,
			       owner_offset);
	key.offset = hash;
	key.objectid = bytenr;
	key.type = BTRFS_EXTENT_REF_KEY;

	while (1) {
		ret = btrfs_search_slot(trans, root, &key, path,
					del ? -1 : 0, del);
		if (ret < 0)
			goto out;
		leaf = path->nodes[0];
		if (ret != 0) {
			u32 nritems = btrfs_header_nritems(leaf);
			if (path->slots[0] >= nritems) {
				ret2 = btrfs_next_leaf(root, path);
				if (ret2)
					goto out;
				leaf = path->nodes[0];
			}
			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
			if (found_key.objectid != bytenr ||
			    found_key.type != BTRFS_EXTENT_REF_KEY)
				goto out;
			key.offset = found_key.offset;
			if (del) {
				btrfs_release_path(root, path);
				continue;
			}
		}
		disk_ref = btrfs_item_ptr(path->nodes[0],
					  path->slots[0],
					  struct btrfs_extent_ref);
		if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
			ret = 0;
			goto out;
		}
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
		key.offset = found_key.offset + 1;
		btrfs_release_path(root, path);
	}
out:
	return ret;
}

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
/*
 * Back reference rules.  Back refs have three main goals:
 *
 * 1) differentiate between all holders of references to an extent so that
 *    when a reference is dropped we can make sure it was a valid reference
 *    before freeing the extent.
 *
 * 2) Provide enough information to quickly find the holders of an extent
 *    if we notice a given block is corrupted or bad.
 *
 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
 *    maintenance.  This is actually the same as #2, but with a slightly
 *    different use case.
 *
 * File extents can be referenced by:
 *
 * - multiple snapshots, subvolumes, or different generations in one subvol
 * - different files inside a single subvolume (in theory, not implemented yet)
 * - different offsets inside a file (bookend extents in file.c)
 *
 * The extent ref structure has fields for:
 *
 * - Objectid of the subvolume root
 * - Generation number of the tree holding the reference
 * - objectid of the file holding the reference
 * - offset in the file corresponding to the key holding the reference
 *
 * When a file extent is allocated the fields are filled in:
 *     (root_key.objectid, trans->transid, inode objectid, offset in file)
 *
 * When a leaf is cow'd new references are added for every file extent found
 * in the leaf.  It looks the same as the create case, but trans->transid
 * will be different when the block is cow'd.
 *
 *     (root_key.objectid, trans->transid, inode objectid, offset in file)
 *
 * When a file extent is removed either during snapshot deletion or file
 * truncation, the corresponding back reference is found
 * by searching for:
 *
 *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
 *      inode objectid, offset in file)
 *
 * Btree extents can be referenced by:
 *
 * - Different subvolumes
 * - Different generations of the same subvolume
 *
 * Storing sufficient information for a full reverse mapping of a btree
 * block would require storing the lowest key of the block in the backref,
 * and it would require updating that lowest key either before write out or
 * every time it changed.  Instead, the objectid of the lowest key is stored
 * along with the level of the tree block.  This provides a hint
 * about where in the btree the block can be found.  Searches through the
 * btree only need to look for a pointer to that block, so they stop one
 * level higher than the level recorded in the backref.
 *
 * Some btrees do not do reference counting on their extents.  These
 * include the extent tree and the tree of tree roots.  Backrefs for these
 * trees always have a generation of zero.
 *
 * When a tree block is created, back references are inserted:
 *
 * (root->root_key.objectid, trans->transid or zero, lowest_key_objectid, level)
 *
 * When a tree block is cow'd in a reference counted root,
 * new back references are added for all the blocks it points to.
 * These are of the form (trans->transid will have increased since creation):
 *
 * (root->root_key.objectid, trans->transid, lowest_key_objectid, level)
 *
 * Because the lowest_key_objectid and the level are just hints
 * they are not used when backrefs are deleted.  When a backref is deleted:
 *
 * if backref was for a tree root:
 *     root_objectid = root->root_key.objectid
 * else
 *     root_objectid = btrfs_header_owner(parent)
 *
 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
 *
 * Back Reference Key hashing:
 *
 * Back references have four fields, each 64 bits long.  Unfortunately,
 * This is hashed into a single 64 bit number and placed into the key offset.
 * The key objectid corresponds to the first byte in the extent, and the
 * key type is set to BTRFS_EXTENT_REF_KEY
 */
548 549 550 551 552 553 554 555 556 557
int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path, u64 bytenr,
				 u64 root_objectid, u64 ref_generation,
				 u64 owner, u64 owner_offset)
{
	u64 hash;
	struct btrfs_key key;
	struct btrfs_extent_ref ref;
	struct btrfs_extent_ref *disk_ref;
558 559 560
	int ret;

	btrfs_set_stack_ref_root(&ref, root_objectid);
561
	btrfs_set_stack_ref_generation(&ref, ref_generation);
562 563 564
	btrfs_set_stack_ref_objectid(&ref, owner);
	btrfs_set_stack_ref_offset(&ref, owner_offset);

565 566
	hash = hash_extent_ref(root_objectid, ref_generation, owner,
			       owner_offset);
567 568 569 570 571 572
	key.offset = hash;
	key.objectid = bytenr;
	key.type = BTRFS_EXTENT_REF_KEY;

	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
	while (ret == -EEXIST) {
573 574 575 576 577 578 579 580
		disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
					  struct btrfs_extent_ref);
		if (match_extent_ref(path->nodes[0], disk_ref, &ref))
			goto out;
		key.offset++;
		btrfs_release_path(root, path);
		ret = btrfs_insert_empty_item(trans, root, path, &key,
					      sizeof(ref));
581
	}
582 583 584 585 586 587 588 589 590 591
	if (ret)
		goto out;
	disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
				  struct btrfs_extent_ref);
	write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
			    sizeof(ref));
	btrfs_mark_buffer_dirty(path->nodes[0]);
out:
	btrfs_release_path(root, path);
	return ret;
592 593
}

C
Chris Mason 已提交
594 595
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
596
				u64 bytenr, u64 num_bytes,
597
				u64 root_objectid, u64 ref_generation,
598
				u64 owner, u64 owner_offset)
C
Chris Mason 已提交
599
{
600
	struct btrfs_path *path;
C
Chris Mason 已提交
601
	int ret;
C
Chris Mason 已提交
602
	struct btrfs_key key;
603
	struct extent_buffer *l;
C
Chris Mason 已提交
604
	struct btrfs_extent_item *item;
C
Chris Mason 已提交
605
	u32 refs;
C
Chris Mason 已提交
606

607
	WARN_ON(num_bytes < root->sectorsize);
608
	path = btrfs_alloc_path();
609 610
	if (!path)
		return -ENOMEM;
611

612
	key.objectid = bytenr;
613
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
614
	key.offset = num_bytes;
615
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
616
				0, 1);
617 618
	if (ret < 0)
		return ret;
619
	if (ret != 0) {
620
		BUG();
621
	}
C
Chris Mason 已提交
622
	BUG_ON(ret != 0);
623
	l = path->nodes[0];
624
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
625 626
	refs = btrfs_extent_refs(l, item);
	btrfs_set_extent_refs(l, item, refs + 1);
627
	btrfs_mark_buffer_dirty(path->nodes[0]);
628

629
	btrfs_release_path(root->fs_info->extent_root, path);
630 631 632 633 634

	ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
					  path, bytenr, root_objectid,
					  ref_generation, owner, owner_offset);
	BUG_ON(ret);
635
	finish_current_insert(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
636
	del_pending_extents(trans, root->fs_info->extent_root);
637 638

	btrfs_free_path(path);
C
Chris Mason 已提交
639 640 641
	return 0;
}

642 643 644 645 646 647 648 649
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root)
{
	finish_current_insert(trans, root->fs_info->extent_root);
	del_pending_extents(trans, root->fs_info->extent_root);
	return 0;
}

C
Chris Mason 已提交
650
static int lookup_extent_ref(struct btrfs_trans_handle *trans,
651 652
			     struct btrfs_root *root, u64 bytenr,
			     u64 num_bytes, u32 *refs)
653
{
654
	struct btrfs_path *path;
655
	int ret;
C
Chris Mason 已提交
656
	struct btrfs_key key;
657
	struct extent_buffer *l;
C
Chris Mason 已提交
658
	struct btrfs_extent_item *item;
659

660
	WARN_ON(num_bytes < root->sectorsize);
661
	path = btrfs_alloc_path();
662 663
	key.objectid = bytenr;
	key.offset = num_bytes;
664
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
665
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
666
				0, 0);
667 668
	if (ret < 0)
		goto out;
669 670
	if (ret != 0) {
		btrfs_print_leaf(root, path->nodes[0]);
671
		printk("failed to find block number %Lu\n", bytenr);
672
		BUG();
673 674
	}
	l = path->nodes[0];
675
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
676
	*refs = btrfs_extent_refs(l, item);
677
out:
678
	btrfs_free_path(path);
679 680 681
	return 0;
}

C
Chris Mason 已提交
682
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
683
		       struct btrfs_root *root, u64 owner_objectid)
C
Chris Mason 已提交
684
{
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
	u64 generation;
	u64 key_objectid;
	u64 level;
	u32 nritems;
	struct btrfs_disk_key disk_key;

	level = btrfs_header_level(root->node);
	generation = trans->transid;
	nritems = btrfs_header_nritems(root->node);
	if (nritems > 0) {
		if (level == 0)
			btrfs_item_key(root->node, &disk_key, 0);
		else
			btrfs_node_key(root->node, &disk_key, 0);
		key_objectid = btrfs_disk_key_objectid(&disk_key);
	} else {
		key_objectid = 0;
	}
703
	return btrfs_inc_extent_ref(trans, root, root->node->start,
704 705
				    root->node->len, owner_objectid,
				    generation, 0, 0);
C
Chris Mason 已提交
706 707
}

708
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
709
		  struct extent_buffer *buf)
C
Chris Mason 已提交
710
{
711
	u64 bytenr;
712 713
	u32 nritems;
	struct btrfs_key key;
714
	struct btrfs_file_extent_item *fi;
C
Chris Mason 已提交
715
	int i;
716
	int level;
717
	int ret;
718
	int faili;
719

720
	if (!root->ref_cows)
721
		return 0;
722

723
	level = btrfs_header_level(buf);
724 725
	nritems = btrfs_header_nritems(buf);
	for (i = 0; i < nritems; i++) {
726 727
		if (level == 0) {
			u64 disk_bytenr;
728 729
			btrfs_item_key_to_cpu(buf, &key, i);
			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
730
				continue;
731
			fi = btrfs_item_ptr(buf, i,
732
					    struct btrfs_file_extent_item);
733
			if (btrfs_file_extent_type(buf, fi) ==
734 735
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
736 737
			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
			if (disk_bytenr == 0)
C
Chris Mason 已提交
738
				continue;
739
			ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
740 741 742
				    btrfs_file_extent_disk_num_bytes(buf, fi),
				    root->root_key.objectid, trans->transid,
				    key.objectid, key.offset);
743 744 745 746
			if (ret) {
				faili = i;
				goto fail;
			}
747
		} else {
748 749
			bytenr = btrfs_node_blockptr(buf, i);
			ret = btrfs_inc_extent_ref(trans, root, bytenr,
750 751 752
					   btrfs_level_size(root, level - 1),
					   root->root_key.objectid,
					   trans->transid, 0, 0);
753 754 755 756
			if (ret) {
				faili = i;
				goto fail;
			}
757
		}
C
Chris Mason 已提交
758 759
	}
	return 0;
760
fail:
C
Chris Mason 已提交
761
	WARN_ON(1);
762
#if 0
763
	for (i =0; i < faili; i++) {
764 765
		if (level == 0) {
			u64 disk_bytenr;
766 767
			btrfs_item_key_to_cpu(buf, &key, i);
			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
768
				continue;
769
			fi = btrfs_item_ptr(buf, i,
770
					    struct btrfs_file_extent_item);
771
			if (btrfs_file_extent_type(buf, fi) ==
772 773
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
774 775
			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
			if (disk_bytenr == 0)
776
				continue;
777 778
			err = btrfs_free_extent(trans, root, disk_bytenr,
				    btrfs_file_extent_disk_num_bytes(buf,
779
								      fi), 0);
780 781
			BUG_ON(err);
		} else {
782 783 784
			bytenr = btrfs_node_blockptr(buf, i);
			err = btrfs_free_extent(trans, root, bytenr,
					btrfs_level_size(root, level - 1), 0);
785 786 787
			BUG_ON(err);
		}
	}
788
#endif
789
	return ret;
C
Chris Mason 已提交
790 791
}

C
Chris Mason 已提交
792 793 794 795 796 797 798 799
static int write_one_cache_group(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_block_group_cache *cache)
{
	int ret;
	int pending_ret;
	struct btrfs_root *extent_root = root->fs_info->extent_root;
800 801
	unsigned long bi;
	struct extent_buffer *leaf;
C
Chris Mason 已提交
802 803

	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
804 805
	if (ret < 0)
		goto fail;
C
Chris Mason 已提交
806
	BUG_ON(ret);
807 808 809 810 811

	leaf = path->nodes[0];
	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
812
	btrfs_release_path(extent_root, path);
813
fail:
C
Chris Mason 已提交
814 815 816 817 818 819 820 821 822 823
	finish_current_insert(trans, extent_root);
	pending_ret = del_pending_extents(trans, extent_root);
	if (ret)
		return ret;
	if (pending_ret)
		return pending_ret;
	return 0;

}

824 825
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
C
Chris Mason 已提交
826
{
827 828
	struct extent_map_tree *block_group_cache;
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
829 830 831 832
	int ret;
	int err = 0;
	int werr = 0;
	struct btrfs_path *path;
833 834 835 836
	u64 last = 0;
	u64 start;
	u64 end;
	u64 ptr;
C
Chris Mason 已提交
837

838
	block_group_cache = &root->fs_info->block_group_cache;
C
Chris Mason 已提交
839 840 841 842 843
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
844 845 846
		ret = find_first_extent_bit(block_group_cache, last,
					    &start, &end, BLOCK_GROUP_DIRTY);
		if (ret)
C
Chris Mason 已提交
847
			break;
848

849 850 851 852 853
		last = end + 1;
		ret = get_state_private(block_group_cache, start, &ptr);
		if (ret)
			break;

J
Jens Axboe 已提交
854
		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
855 856 857 858 859 860 861 862 863 864
		err = write_one_cache_group(trans, root,
					    path, cache);
		/*
		 * if we fail to write the cache group, we want
		 * to keep it marked dirty in hopes that a later
		 * write will work
		 */
		if (err) {
			werr = err;
			continue;
C
Chris Mason 已提交
865
		}
866 867
		clear_extent_bits(block_group_cache, start, end,
				  BLOCK_GROUP_DIRTY, GFP_NOFS);
C
Chris Mason 已提交
868 869 870 871 872 873 874
	}
	btrfs_free_path(path);
	return werr;
}

static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
875 876
			      u64 bytenr, u64 num_bytes, int alloc,
			      int mark_free, int data)
C
Chris Mason 已提交
877 878 879
{
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
880
	u64 total = num_bytes;
C
Chris Mason 已提交
881
	u64 old_val;
882
	u64 byte_in_group;
883 884
	u64 start;
	u64 end;
C
Chris Mason 已提交
885

C
Chris Mason 已提交
886
	while(total) {
887
		cache = btrfs_lookup_block_group(info, bytenr);
C
Chris Mason 已提交
888
		if (!cache) {
C
Chris Mason 已提交
889
			return -1;
C
Chris Mason 已提交
890
		}
891 892
		byte_in_group = bytenr - cache->key.objectid;
		WARN_ON(byte_in_group > cache->key.offset);
893 894 895 896
		start = cache->key.objectid;
		end = start + cache->key.offset - 1;
		set_extent_bits(&info->block_group_cache, start, end,
				BLOCK_GROUP_DIRTY, GFP_NOFS);
C
Chris Mason 已提交
897 898

		old_val = btrfs_block_group_used(&cache->item);
899
		num_bytes = min(total, cache->key.offset - byte_in_group);
C
Chris Mason 已提交
900
		if (alloc) {
C
Chris Mason 已提交
901
			if (cache->data != data &&
C
Chris Mason 已提交
902
			    old_val < (cache->key.offset >> 1)) {
903 904 905
				int bit_to_clear;
				int bit_to_set;
				cache->data = data;
C
Chris Mason 已提交
906
				if (data) {
907 908
					bit_to_clear = BLOCK_GROUP_METADATA;
					bit_to_set = BLOCK_GROUP_DATA;
909 910
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_MIXED;
C
Chris Mason 已提交
911 912 913
					cache->item.flags |=
						BTRFS_BLOCK_GROUP_DATA;
				} else {
914 915
					bit_to_clear = BLOCK_GROUP_DATA;
					bit_to_set = BLOCK_GROUP_METADATA;
916 917
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_MIXED;
C
Chris Mason 已提交
918 919 920
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_DATA;
				}
921 922 923 924 925 926
				clear_extent_bits(&info->block_group_cache,
						  start, end, bit_to_clear,
						  GFP_NOFS);
				set_extent_bits(&info->block_group_cache,
						start, end, bit_to_set,
						GFP_NOFS);
927 928 929 930 931 932 933 934
			} else if (cache->data != data &&
				   cache->data != BTRFS_BLOCK_GROUP_MIXED) {
				cache->data = BTRFS_BLOCK_GROUP_MIXED;
				set_extent_bits(&info->block_group_cache,
						start, end,
						BLOCK_GROUP_DATA |
						BLOCK_GROUP_METADATA,
						GFP_NOFS);
C
Chris Mason 已提交
935
			}
936
			old_val += num_bytes;
C
Chris Mason 已提交
937
		} else {
938
			old_val -= num_bytes;
939 940
			if (mark_free) {
				set_extent_dirty(&info->free_space_cache,
941
						 bytenr, bytenr + num_bytes - 1,
942
						 GFP_NOFS);
943
			}
C
Chris Mason 已提交
944
		}
C
Chris Mason 已提交
945
		btrfs_set_block_group_used(&cache->item, old_val);
946 947
		total -= num_bytes;
		bytenr += num_bytes;
C
Chris Mason 已提交
948 949 950
	}
	return 0;
}
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981
static int update_pinned_extents(struct btrfs_root *root,
				u64 bytenr, u64 num, int pin)
{
	u64 len;
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *fs_info = root->fs_info;

	if (pin) {
		set_extent_dirty(&fs_info->pinned_extents,
				bytenr, bytenr + num - 1, GFP_NOFS);
	} else {
		clear_extent_dirty(&fs_info->pinned_extents,
				bytenr, bytenr + num - 1, GFP_NOFS);
	}
	while (num > 0) {
		cache = btrfs_lookup_block_group(fs_info, bytenr);
		WARN_ON(!cache);
		len = min(num, cache->key.offset -
			  (bytenr - cache->key.objectid));
		if (pin) {
			cache->pinned += len;
			fs_info->total_pinned += len;
		} else {
			cache->pinned -= len;
			fs_info->total_pinned -= len;
		}
		bytenr += len;
		num -= len;
	}
	return 0;
}
C
Chris Mason 已提交
982

983
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
C
Chris Mason 已提交
984 985
{
	u64 last = 0;
986 987 988
	u64 start;
	u64 end;
	struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
C
Chris Mason 已提交
989 990 991
	int ret;

	while(1) {
992 993 994
		ret = find_first_extent_bit(pinned_extents, last,
					    &start, &end, EXTENT_DIRTY);
		if (ret)
C
Chris Mason 已提交
995
			break;
996 997
		set_extent_dirty(copy, start, end, GFP_NOFS);
		last = end + 1;
C
Chris Mason 已提交
998 999 1000 1001 1002 1003
	}
	return 0;
}

int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root,
1004
			       struct extent_map_tree *unpin)
1005
{
1006 1007
	u64 start;
	u64 end;
1008
	int ret;
1009 1010
	struct extent_map_tree *free_space_cache;
	free_space_cache = &root->fs_info->free_space_cache;
1011 1012

	while(1) {
1013 1014 1015
		ret = find_first_extent_bit(unpin, 0, &start, &end,
					    EXTENT_DIRTY);
		if (ret)
1016
			break;
1017
		update_pinned_extents(root, start, end + 1 - start, 0);
1018 1019
		clear_extent_dirty(unpin, start, end, GFP_NOFS);
		set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1020 1021 1022 1023
	}
	return 0;
}

1024 1025
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root)
C
Chris Mason 已提交
1026
{
1027 1028 1029
	u64 start;
	u64 end;
	struct btrfs_fs_info *info = extent_root->fs_info;
1030
	struct extent_buffer *eb;
1031
	struct btrfs_path *path;
C
Chris Mason 已提交
1032
	struct btrfs_key ins;
1033
	struct btrfs_disk_key first;
C
Chris Mason 已提交
1034
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
1035
	int ret;
1036
	int level;
1037
	int err = 0;
C
Chris Mason 已提交
1038

1039
	btrfs_set_stack_extent_refs(&extent_item, 1);
1040
	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1041
	path = btrfs_alloc_path();
C
Chris Mason 已提交
1042

1043
	while(1) {
1044 1045 1046
		ret = find_first_extent_bit(&info->extent_ins, 0, &start,
					    &end, EXTENT_LOCKED);
		if (ret)
1047 1048
			break;

1049 1050 1051 1052 1053 1054
		ins.objectid = start;
		ins.offset = end + 1 - start;
		err = btrfs_insert_item(trans, extent_root, &ins,
					&extent_item, sizeof(extent_item));
		clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
				  GFP_NOFS);
1055 1056 1057 1058 1059 1060 1061
		eb = read_tree_block(extent_root, ins.objectid, ins.offset);
		level = btrfs_header_level(eb);
		if (level == 0) {
			btrfs_item_key(eb, &first, 0);
		} else {
			btrfs_node_key(eb, &first, 0);
		}
1062 1063
		err = btrfs_insert_extent_backref(trans, extent_root, path,
					  start, extent_root->root_key.objectid,
1064 1065
					  0, btrfs_disk_key_objectid(&first),
					  level);
1066
		BUG_ON(err);
1067
		free_extent_buffer(eb);
C
Chris Mason 已提交
1068
	}
1069
	btrfs_free_path(path);
C
Chris Mason 已提交
1070 1071 1072
	return 0;
}

1073 1074
static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
			  int pending)
C
Chris Mason 已提交
1075
{
1076
	int err = 0;
1077
	struct extent_buffer *buf;
C
Chris Mason 已提交
1078

C
Chris Mason 已提交
1079
	if (!pending) {
1080
		buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1081 1082
		if (buf) {
			if (btrfs_buffer_uptodate(buf)) {
C
Chris Mason 已提交
1083 1084
				u64 transid =
				    root->fs_info->running_transaction->transid;
1085 1086
				if (btrfs_header_generation(buf) == transid) {
					free_extent_buffer(buf);
1087
					return 1;
C
Chris Mason 已提交
1088
				}
C
Chris Mason 已提交
1089
			}
1090
			free_extent_buffer(buf);
C
Chris Mason 已提交
1091
		}
1092
		update_pinned_extents(root, bytenr, num_bytes, 1);
C
Chris Mason 已提交
1093
	} else {
1094
		set_extent_bits(&root->fs_info->pending_del,
1095 1096
				bytenr, bytenr + num_bytes - 1,
				EXTENT_LOCKED, GFP_NOFS);
C
Chris Mason 已提交
1097
	}
C
Chris Mason 已提交
1098
	BUG_ON(err < 0);
C
Chris Mason 已提交
1099 1100 1101
	return 0;
}

1102
/*
1103
 * remove an extent from the root, returns 0 on success
1104
 */
1105
static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1106 1107 1108
			 *root, u64 bytenr, u64 num_bytes,
			 u64 root_objectid, u64 ref_generation,
			 u64 owner_objectid, u64 owner_offset, int pin,
1109
			 int mark_free)
1110
{
1111
	struct btrfs_path *path;
C
Chris Mason 已提交
1112
	struct btrfs_key key;
1113 1114
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
1115
	struct extent_buffer *leaf;
1116
	int ret;
C
Chris Mason 已提交
1117
	struct btrfs_extent_item *ei;
C
Chris Mason 已提交
1118
	u32 refs;
C
Chris Mason 已提交
1119

1120
	key.objectid = bytenr;
1121
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1122
	key.offset = num_bytes;
1123

1124
	path = btrfs_alloc_path();
1125 1126
	if (!path)
		return -ENOMEM;
1127

1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
	if (ref_generation && owner_objectid == 0 && root_objectid == 3) {
//printk("drop backref root %Lu gen %Lu byte %Lu\n", root_objectid, ref_generation, bytenr );
	}
	ret = lookup_extent_backref(trans, extent_root, path,
				    bytenr, root_objectid,
				    ref_generation,
				    owner_objectid, owner_offset, 1);
	if (ret == 0) {
		ret = btrfs_del_item(trans, extent_root, path);
	} else {
		btrfs_print_leaf(extent_root, path->nodes[0]);
		WARN_ON(1);
		printk("Unable to find ref byte nr %Lu root %Lu "
		       " gen %Lu owner %Lu offset %Lu\n", bytenr,
		       root_objectid, ref_generation, owner_objectid,
		       owner_offset);
	}
	btrfs_release_path(extent_root, path);
1146 1147 1148 1149
	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
	if (ret < 0)
		return ret;
	BUG_ON(ret);
1150 1151 1152

	leaf = path->nodes[0];
	ei = btrfs_item_ptr(leaf, path->slots[0],
C
Chris Mason 已提交
1153
			    struct btrfs_extent_item);
1154 1155 1156 1157 1158 1159
	refs = btrfs_extent_refs(leaf, ei);
	BUG_ON(refs == 0);
	refs -= 1;
	btrfs_set_extent_refs(leaf, ei, refs);
	btrfs_mark_buffer_dirty(leaf);

C
Chris Mason 已提交
1160
	if (refs == 0) {
1161 1162
		u64 super_used;
		u64 root_used;
C
Chris Mason 已提交
1163 1164

		if (pin) {
1165
			ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1166 1167 1168
			if (ret > 0)
				mark_free = 1;
			BUG_ON(ret < 0);
C
Chris Mason 已提交
1169 1170
		}

1171
		/* block accounting for super block */
1172 1173 1174
		super_used = btrfs_super_bytes_used(&info->super_copy);
		btrfs_set_super_bytes_used(&info->super_copy,
					   super_used - num_bytes);
1175 1176

		/* block accounting for root item */
1177
		root_used = btrfs_root_used(&root->root_item);
1178
		btrfs_set_root_used(&root->root_item,
1179
					   root_used - num_bytes);
1180

1181
		ret = btrfs_del_item(trans, extent_root, path);
1182 1183 1184
		if (ret) {
			return ret;
		}
1185
		ret = update_block_group(trans, root, bytenr, num_bytes, 0,
C
Chris Mason 已提交
1186
					 mark_free, 0);
C
Chris Mason 已提交
1187
		BUG_ON(ret);
1188
	}
1189
	btrfs_free_path(path);
1190
	finish_current_insert(trans, extent_root);
1191 1192 1193 1194 1195 1196 1197
	return ret;
}

/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
1198 1199
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root)
1200 1201
{
	int ret;
C
Chris Mason 已提交
1202
	int err = 0;
1203 1204 1205 1206
	u64 start;
	u64 end;
	struct extent_map_tree *pending_del;
	struct extent_map_tree *pinned_extents;
C
Chris Mason 已提交
1207

1208 1209
	pending_del = &extent_root->fs_info->pending_del;
	pinned_extents = &extent_root->fs_info->pinned_extents;
1210 1211

	while(1) {
1212 1213 1214
		ret = find_first_extent_bit(pending_del, 0, &start, &end,
					    EXTENT_LOCKED);
		if (ret)
1215
			break;
1216
		update_pinned_extents(extent_root, start, end + 1 - start, 1);
1217 1218 1219
		clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
				  GFP_NOFS);
		ret = __free_extent(trans, extent_root,
1220 1221 1222
				     start, end + 1 - start,
				     extent_root->root_key.objectid,
				     0, 0, 0, 0, 0);
1223 1224
		if (ret)
			err = ret;
1225
	}
C
Chris Mason 已提交
1226
	return err;
1227 1228 1229 1230 1231
}

/*
 * remove an extent from the root, returns 0 on success
 */
1232
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1233 1234 1235
		      *root, u64 bytenr, u64 num_bytes,
		      u64 root_objectid, u64 ref_generation,
		      u64 owner_objectid, u64 owner_offset, int pin)
1236
{
1237
	struct btrfs_root *extent_root = root->fs_info->extent_root;
1238 1239
	int pending_ret;
	int ret;
1240

1241
	WARN_ON(num_bytes < root->sectorsize);
1242 1243 1244
	if (!root->ref_cows)
		ref_generation = 0;

1245
	if (root == extent_root) {
1246
		pin_down_bytes(root, bytenr, num_bytes, 1);
1247 1248
		return 0;
	}
1249 1250 1251
	ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
			    ref_generation, owner_objectid, owner_offset,
			    pin, pin == 0);
C
Chris Mason 已提交
1252
	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1253 1254 1255
	return ret ? ret : pending_ret;
}

1256 1257 1258 1259 1260 1261 1262
static u64 stripe_align(struct btrfs_root *root, u64 val)
{
	u64 mask = ((u64)root->stripesize - 1);
	u64 ret = (val + mask) & ~mask;
	return ret;
}

1263 1264 1265 1266
/*
 * walks the btree of allocated extents and find a hole of a given size.
 * The key ins is changed to record the hole:
 * ins->objectid == block start
1267
 * ins->flags = BTRFS_EXTENT_ITEM_KEY
1268 1269 1270
 * ins->offset == number of blocks
 * Any available blocks before search_start are skipped.
 */
1271
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1272 1273
			    *orig_root, u64 num_bytes, u64 empty_size,
			    u64 search_start, u64 search_end, u64 hint_byte,
1274 1275
			    struct btrfs_key *ins, u64 exclude_start,
			    u64 exclude_nr, int data)
1276
{
1277
	struct btrfs_path *path;
C
Chris Mason 已提交
1278
	struct btrfs_key key;
1279
	u64 hole_size = 0;
1280 1281
	u64 aligned;
	int ret;
1282
	int slot = 0;
1283
	u64 last_byte = 0;
C
Chris Mason 已提交
1284
	u64 orig_search_start = search_start;
1285
	int start_found;
1286
	struct extent_buffer *l;
1287
	struct btrfs_root * root = orig_root->fs_info->extent_root;
1288
	struct btrfs_fs_info *info = root->fs_info;
1289
	u64 total_needed = num_bytes;
C
Chris Mason 已提交
1290
	int level;
C
Chris Mason 已提交
1291
	struct btrfs_block_group_cache *block_group;
C
Chris Mason 已提交
1292
	int full_scan = 0;
1293
	int wrapped = 0;
1294
	u64 cached_start;
1295

1296
	WARN_ON(num_bytes < root->sectorsize);
1297 1298
	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);

1299 1300
	level = btrfs_header_level(root->node);

1301
	if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1302 1303 1304
		data = BTRFS_BLOCK_GROUP_MIXED;
	}

C
Chris Mason 已提交
1305
	if (search_end == (u64)-1)
1306 1307 1308
		search_end = btrfs_super_total_bytes(&info->super_copy);
	if (hint_byte) {
		block_group = btrfs_lookup_block_group(info, hint_byte);
1309 1310
		if (!block_group)
			hint_byte = search_start;
C
Chris Mason 已提交
1311
		block_group = btrfs_find_block_group(root, block_group,
1312
						     hint_byte, data, 1);
C
Chris Mason 已提交
1313 1314
	} else {
		block_group = btrfs_find_block_group(root,
1315 1316
						     trans->block_group,
						     search_start, data, 1);
C
Chris Mason 已提交
1317 1318
	}

1319
	total_needed += empty_size;
1320
	path = btrfs_alloc_path();
C
Chris Mason 已提交
1321
check_failed:
1322 1323
	search_start = find_search_start(root, &block_group, search_start,
					 total_needed, data, full_scan);
1324
	search_start = stripe_align(root, search_start);
1325
	cached_start = search_start;
1326
	btrfs_init_path(path);
1327 1328 1329
	ins->objectid = search_start;
	ins->offset = 0;
	start_found = 0;
1330
	path->reada = 2;
1331

1332
	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
C
Chris Mason 已提交
1333 1334
	if (ret < 0)
		goto error;
C
Chris Mason 已提交
1335

1336
	if (path->slots[0] > 0) {
1337
		path->slots[0]--;
1338 1339
	}

1340 1341 1342
	l = path->nodes[0];
	btrfs_item_key_to_cpu(l, &key, path->slots[0]);

1343
	/*
1344
	 * walk backwards to find the first extent item key
1345
	 */
1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358
	while(btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
		if (path->slots[0] == 0) {
			ret = btrfs_prev_leaf(root, path);
			if (ret != 0) {
				ret = btrfs_search_slot(trans, root, ins,
							path, 0, 0);
				if (ret < 0)
					goto error;
				if (path->slots[0] > 0)
					path->slots[0]--;
				break;
			}
		} else {
1359 1360
			path->slots[0]--;
		}
1361 1362
		l = path->nodes[0];
		btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1363
	}
1364
	while (1) {
1365
		l = path->nodes[0];
1366
		slot = path->slots[0];
1367
		if (slot >= btrfs_header_nritems(l)) {
1368
			ret = btrfs_next_leaf(root, path);
1369 1370
			if (ret == 0)
				continue;
C
Chris Mason 已提交
1371 1372
			if (ret < 0)
				goto error;
1373 1374 1375

			search_start = max(search_start,
					   block_group->key.objectid);
1376
			if (!start_found) {
1377 1378 1379 1380 1381 1382 1383
				aligned = stripe_align(root, search_start);
				ins->objectid = aligned;
				if (aligned >= search_end) {
					ret = -ENOSPC;
					goto error;
				}
				ins->offset = search_end - aligned;
1384 1385 1386
				start_found = 1;
				goto check_pending;
			}
1387 1388 1389 1390 1391 1392 1393
			ins->objectid = stripe_align(root,
						     last_byte > search_start ?
						     last_byte : search_start);
			if (search_end <= ins->objectid) {
				ret = -ENOSPC;
				goto error;
			}
C
Chris Mason 已提交
1394
			ins->offset = search_end - ins->objectid;
1395
			BUG_ON(ins->objectid >= search_end);
1396 1397
			goto check_pending;
		}
1398
		btrfs_item_key_to_cpu(l, &key, slot);
1399

1400
		if (key.objectid >= search_start && key.objectid > last_byte &&
1401
		    start_found) {
1402 1403
			if (last_byte < search_start)
				last_byte = search_start;
1404 1405 1406 1407
			aligned = stripe_align(root, last_byte);
			hole_size = key.objectid - aligned;
			if (key.objectid > aligned && hole_size >= num_bytes) {
				ins->objectid = aligned;
1408 1409
				ins->offset = hole_size;
				goto check_pending;
1410
			}
1411
		}
1412
		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1413 1414
			if (!start_found && btrfs_key_type(&key) ==
			    BTRFS_BLOCK_GROUP_ITEM_KEY) {
1415
				last_byte = key.objectid;
1416 1417
				start_found = 1;
			}
1418
			goto next;
1419 1420
		}

1421

1422
		start_found = 1;
1423
		last_byte = key.objectid + key.offset;
1424

1425 1426
		if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
		    last_byte >= block_group->key.objectid +
C
Chris Mason 已提交
1427 1428 1429
		    block_group->key.offset) {
			btrfs_release_path(root, path);
			search_start = block_group->key.objectid +
1430
				block_group->key.offset;
C
Chris Mason 已提交
1431 1432
			goto new_group;
		}
C
Chris Mason 已提交
1433
next:
1434
		path->slots[0]++;
1435
		cond_resched();
1436 1437 1438 1439 1440
	}
check_pending:
	/* we have to make sure we didn't find an extent that has already
	 * been allocated by the map tree or the original allocation
	 */
1441
	btrfs_release_path(root, path);
1442
	BUG_ON(ins->objectid < search_start);
1443

1444
	if (ins->objectid + num_bytes >= search_end)
1445
		goto enospc;
1446
	if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
Y
Yan 已提交
1447
	    ins->objectid + num_bytes > block_group->
1448 1449 1450 1451 1452
	    key.objectid + block_group->key.offset) {
		search_start = block_group->key.objectid +
			block_group->key.offset;
		goto new_group;
	}
1453
	if (test_range_bit(&info->extent_ins, ins->objectid,
1454 1455
			   ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
		search_start = ins->objectid + num_bytes;
1456 1457 1458
		goto new_group;
	}
	if (test_range_bit(&info->pinned_extents, ins->objectid,
1459 1460
			   ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
		search_start = ins->objectid + num_bytes;
1461
		goto new_group;
1462
	}
1463
	if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1464 1465 1466 1467
	    ins->objectid < exclude_start + exclude_nr)) {
		search_start = exclude_start + exclude_nr;
		goto new_group;
	}
1468
	if (!data) {
1469
		block_group = btrfs_lookup_block_group(info, ins->objectid);
1470 1471
		if (block_group)
			trans->block_group = block_group;
1472
	}
1473
	ins->offset = num_bytes;
1474
	btrfs_free_path(path);
1475
	return 0;
C
Chris Mason 已提交
1476 1477

new_group:
1478
	if (search_start + num_bytes >= search_end) {
1479
enospc:
C
Chris Mason 已提交
1480
		search_start = orig_search_start;
1481 1482 1483 1484
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
1485 1486 1487
		if (wrapped) {
			if (!full_scan)
				total_needed -= empty_size;
1488
			full_scan = 1;
1489
			data = BTRFS_BLOCK_GROUP_MIXED;
1490
		} else
1491
			wrapped = 1;
C
Chris Mason 已提交
1492
	}
1493
	block_group = btrfs_lookup_block_group(info, search_start);
1494
	cond_resched();
1495 1496
	block_group = btrfs_find_block_group(root, block_group,
					     search_start, data, 0);
C
Chris Mason 已提交
1497 1498
	goto check_failed;

C
Chris Mason 已提交
1499
error:
1500 1501
	btrfs_release_path(root, path);
	btrfs_free_path(path);
C
Chris Mason 已提交
1502
	return ret;
1503 1504 1505 1506 1507 1508 1509 1510
}
/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns 0 if everything worked, non-zero otherwise.
 */
1511
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1512 1513 1514 1515
		       struct btrfs_root *root,
		       u64 num_bytes, u64 root_objectid, u64 ref_generation,
		       u64 owner, u64 owner_offset,
		       u64 empty_size, u64 hint_byte,
C
Chris Mason 已提交
1516
		       u64 search_end, struct btrfs_key *ins, int data)
1517 1518 1519
{
	int ret;
	int pending_ret;
1520
	u64 super_used, root_used;
1521
	u64 search_start = 0;
1522 1523
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
C
Chris Mason 已提交
1524
	struct btrfs_extent_item extent_item;
1525
	struct btrfs_path *path;
C
Chris Mason 已提交
1526

1527
	btrfs_set_stack_extent_refs(&extent_item, 1);
1528

1529 1530 1531
	WARN_ON(num_bytes < root->sectorsize);
	ret = find_free_extent(trans, root, num_bytes, empty_size,
			       search_start, search_end, hint_byte, ins,
1532 1533
			       trans->alloc_exclude_start,
			       trans->alloc_exclude_nr, data);
C
Chris Mason 已提交
1534
	BUG_ON(ret);
1535 1536
	if (ret)
		return ret;
1537

1538
	/* block accounting for super block */
1539 1540
	super_used = btrfs_super_bytes_used(&info->super_copy);
	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1541

1542
	/* block accounting for root item */
1543 1544
	root_used = btrfs_root_used(&root->root_item);
	btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1545

1546 1547 1548 1549
	clear_extent_dirty(&root->fs_info->free_space_cache,
			   ins->objectid, ins->objectid + ins->offset - 1,
			   GFP_NOFS);

1550
	if (root == extent_root) {
1551 1552 1553
		set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
				ins->objectid + ins->offset - 1,
				EXTENT_LOCKED, GFP_NOFS);
1554
		WARN_ON(data == 1);
1555 1556 1557 1558 1559 1560
		goto update_block;
	}

	WARN_ON(trans->alloc_exclude_nr);
	trans->alloc_exclude_start = ins->objectid;
	trans->alloc_exclude_nr = ins->offset;
1561 1562
	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
				sizeof(extent_item));
C
Chris Mason 已提交
1563

1564 1565
	trans->alloc_exclude_start = 0;
	trans->alloc_exclude_nr = 0;
1566 1567 1568 1569 1570 1571 1572
	BUG_ON(ret);

	path = btrfs_alloc_path();
	BUG_ON(!path);
	ret = btrfs_insert_extent_backref(trans, extent_root, path,
					  ins->objectid, root_objectid,
					  ref_generation, owner, owner_offset);
1573

C
Chris Mason 已提交
1574
	BUG_ON(ret);
1575
	btrfs_free_path(path);
1576
	finish_current_insert(trans, extent_root);
C
Chris Mason 已提交
1577
	pending_ret = del_pending_extents(trans, extent_root);
1578

1579
	if (ret) {
C
Chris Mason 已提交
1580
		return ret;
1581 1582
	}
	if (pending_ret) {
C
Chris Mason 已提交
1583
		return pending_ret;
1584
	}
1585 1586

update_block:
C
Chris Mason 已提交
1587 1588
	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
				 data);
C
Chris Mason 已提交
1589
	BUG_ON(ret);
C
Chris Mason 已提交
1590
	return 0;
1591 1592 1593 1594 1595 1596
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
1597
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1598
					     struct btrfs_root *root,
1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626
					     u32 blocksize,
					     u64 root_objectid, u64 hint,
					     u64 empty_size)
{
	u64 ref_generation;

	if (root->ref_cows)
		ref_generation = trans->transid;
	else
		ref_generation = 0;


	return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
					ref_generation, 0, 0, hint, empty_size);
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
					     struct btrfs_root *root,
					     u32 blocksize,
					     u64 root_objectid,
					     u64 ref_generation,
					     u64 first_objectid,
					     int level,
					     u64 hint,
1627
					     u64 empty_size)
1628
{
C
Chris Mason 已提交
1629
	struct btrfs_key ins;
1630
	int ret;
1631
	struct extent_buffer *buf;
1632

1633 1634 1635
	ret = btrfs_alloc_extent(trans, root, blocksize,
				 root_objectid, ref_generation,
				 first_objectid, level, empty_size, hint,
1636
				 (u64)-1, &ins, 0);
1637
	if (ret) {
1638 1639
		BUG_ON(ret > 0);
		return ERR_PTR(ret);
1640
	}
1641
	buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1642
	if (!buf) {
1643 1644 1645
		btrfs_free_extent(trans, root, ins.objectid, blocksize,
				  root->root_key.objectid, ref_generation,
				  0, 0, 0);
1646 1647
		return ERR_PTR(-ENOMEM);
	}
1648 1649 1650
	btrfs_set_buffer_uptodate(buf);
	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
			 buf->start + buf->len - 1, GFP_NOFS);
1651 1652 1653 1654
	set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
			buf->start, buf->start + buf->len - 1,
			EXTENT_CSUM, GFP_NOFS);
	buf->flags |= EXTENT_CSUM;
1655
	btrfs_set_buffer_defrag(buf);
1656
	trans->blocks_used++;
1657 1658
	return buf;
}
1659

1660
static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1661
			 struct btrfs_root *root, struct extent_buffer *leaf)
1662
{
1663 1664
	u64 leaf_owner;
	u64 leaf_generation;
1665
	struct btrfs_key key;
1666 1667 1668 1669 1670
	struct btrfs_file_extent_item *fi;
	int i;
	int nritems;
	int ret;

1671 1672
	BUG_ON(!btrfs_is_leaf(leaf));
	nritems = btrfs_header_nritems(leaf);
1673 1674 1675
	leaf_owner = btrfs_header_owner(leaf);
	leaf_generation = btrfs_header_generation(leaf);

1676
	for (i = 0; i < nritems; i++) {
1677
		u64 disk_bytenr;
1678 1679 1680

		btrfs_item_key_to_cpu(leaf, &key, i);
		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1681 1682
			continue;
		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1683 1684
		if (btrfs_file_extent_type(leaf, fi) ==
		    BTRFS_FILE_EXTENT_INLINE)
1685
			continue;
1686 1687 1688 1689
		/*
		 * FIXME make sure to insert a trans record that
		 * repeats the snapshot del on crash
		 */
1690 1691
		disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
		if (disk_bytenr == 0)
C
Chris Mason 已提交
1692
			continue;
1693
		ret = btrfs_free_extent(trans, root, disk_bytenr,
1694 1695 1696
				btrfs_file_extent_disk_num_bytes(leaf, fi),
				leaf_owner, leaf_generation,
				key.objectid, key.offset, 0);
1697 1698 1699 1700 1701
		BUG_ON(ret);
	}
	return 0;
}

1702
static void reada_walk_down(struct btrfs_root *root,
1703
			    struct extent_buffer *node)
1704 1705 1706
{
	int i;
	u32 nritems;
1707
	u64 bytenr;
1708 1709
	int ret;
	u32 refs;
1710 1711
	int level;
	u32 blocksize;
1712

1713
	nritems = btrfs_header_nritems(node);
1714
	level = btrfs_header_level(node);
1715
	for (i = 0; i < nritems; i++) {
1716 1717 1718
		bytenr = btrfs_node_blockptr(node, i);
		blocksize = btrfs_level_size(root, level - 1);
		ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1719 1720 1721
		BUG_ON(ret);
		if (refs != 1)
			continue;
1722
		mutex_unlock(&root->fs_info->fs_mutex);
1723
		ret = readahead_tree_block(root, bytenr, blocksize);
1724 1725
		cond_resched();
		mutex_lock(&root->fs_info->fs_mutex);
1726 1727 1728 1729 1730
		if (ret)
			break;
	}
}

C
Chris Mason 已提交
1731 1732 1733 1734
/*
 * helper function for drop_snapshot, this walks down the tree dropping ref
 * counts as it goes.
 */
1735 1736
static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			  *root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1737
{
1738 1739 1740
	u64 root_owner;
	u64 root_gen;
	u64 bytenr;
1741 1742
	struct extent_buffer *next;
	struct extent_buffer *cur;
1743
	struct extent_buffer *parent;
1744
	u32 blocksize;
C
Chris Mason 已提交
1745 1746 1747
	int ret;
	u32 refs;

1748 1749
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1750
	ret = lookup_extent_ref(trans, root,
1751 1752
				path->nodes[*level]->start,
				path->nodes[*level]->len, &refs);
C
Chris Mason 已提交
1753 1754 1755
	BUG_ON(ret);
	if (refs > 1)
		goto out;
1756

C
Chris Mason 已提交
1757 1758 1759
	/*
	 * walk down to the last node level and free all the leaves
	 */
1760
	while(*level >= 0) {
1761 1762
		WARN_ON(*level < 0);
		WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1763
		cur = path->nodes[*level];
1764 1765

		if (*level > 0 && path->slots[*level] == 0)
1766
			reada_walk_down(root, cur);
1767

1768
		if (btrfs_header_level(cur) != *level)
C
Chris Mason 已提交
1769
			WARN_ON(1);
1770

1771
		if (path->slots[*level] >=
1772
		    btrfs_header_nritems(cur))
C
Chris Mason 已提交
1773
			break;
1774 1775 1776 1777 1778
		if (*level == 0) {
			ret = drop_leaf_ref(trans, root, cur);
			BUG_ON(ret);
			break;
		}
1779 1780 1781
		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
		blocksize = btrfs_level_size(root, *level - 1);
		ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1782 1783
		BUG_ON(ret);
		if (refs != 1) {
1784 1785 1786
			parent = path->nodes[*level];
			root_owner = btrfs_header_owner(parent);
			root_gen = btrfs_header_generation(parent);
C
Chris Mason 已提交
1787
			path->slots[*level]++;
1788
			ret = btrfs_free_extent(trans, root, bytenr,
1789 1790
						blocksize, root_owner,
						root_gen, 0, 0, 1);
C
Chris Mason 已提交
1791 1792 1793
			BUG_ON(ret);
			continue;
		}
1794
		next = btrfs_find_tree_block(root, bytenr, blocksize);
1795 1796
		if (!next || !btrfs_buffer_uptodate(next)) {
			free_extent_buffer(next);
1797
			mutex_unlock(&root->fs_info->fs_mutex);
1798
			next = read_tree_block(root, bytenr, blocksize);
1799 1800 1801
			mutex_lock(&root->fs_info->fs_mutex);

			/* we dropped the lock, check one more time */
1802 1803
			ret = lookup_extent_ref(trans, root, bytenr,
						blocksize, &refs);
1804 1805
			BUG_ON(ret);
			if (refs != 1) {
1806 1807 1808 1809
				parent = path->nodes[*level];
				root_owner = btrfs_header_owner(parent);
				root_gen = btrfs_header_generation(parent);

1810
				path->slots[*level]++;
1811
				free_extent_buffer(next);
1812 1813 1814 1815
				ret = btrfs_free_extent(trans, root, bytenr,
							blocksize,
							root_owner,
							root_gen, 0, 0, 1);
1816 1817 1818 1819
				BUG_ON(ret);
				continue;
			}
		}
1820
		WARN_ON(*level <= 0);
C
Chris Mason 已提交
1821
		if (path->nodes[*level-1])
1822
			free_extent_buffer(path->nodes[*level-1]);
C
Chris Mason 已提交
1823
		path->nodes[*level-1] = next;
1824
		*level = btrfs_header_level(next);
C
Chris Mason 已提交
1825 1826 1827
		path->slots[*level] = 0;
	}
out:
1828 1829
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1830 1831 1832 1833 1834 1835 1836 1837 1838 1839

	if (path->nodes[*level] == root->node) {
		root_owner = root->root_key.objectid;
		parent = path->nodes[*level];
	} else {
		parent = path->nodes[*level + 1];
		root_owner = btrfs_header_owner(parent);
	}

	root_gen = btrfs_header_generation(parent);
1840
	ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1841 1842
				path->nodes[*level]->len,
				root_owner, root_gen, 0, 0, 1);
1843
	free_extent_buffer(path->nodes[*level]);
C
Chris Mason 已提交
1844 1845 1846 1847 1848 1849
	path->nodes[*level] = NULL;
	*level += 1;
	BUG_ON(ret);
	return 0;
}

C
Chris Mason 已提交
1850 1851 1852 1853 1854
/*
 * helper for dropping snapshots.  This walks back up the tree in the path
 * to find the first node higher up where we haven't yet gone through
 * all the slots
 */
1855 1856
static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			*root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1857
{
1858 1859 1860
	u64 root_owner;
	u64 root_gen;
	struct btrfs_root_item *root_item = &root->root_item;
C
Chris Mason 已提交
1861 1862 1863
	int i;
	int slot;
	int ret;
1864

C
Chris Mason 已提交
1865
	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
C
Chris Mason 已提交
1866
		slot = path->slots[i];
1867 1868 1869 1870
		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
			struct extent_buffer *node;
			struct btrfs_disk_key disk_key;
			node = path->nodes[i];
C
Chris Mason 已提交
1871 1872
			path->slots[i]++;
			*level = i;
1873
			WARN_ON(*level == 0);
1874
			btrfs_node_key(node, &disk_key, path->slots[i]);
1875
			memcpy(&root_item->drop_progress,
1876
			       &disk_key, sizeof(disk_key));
1877
			root_item->drop_level = i;
C
Chris Mason 已提交
1878 1879
			return 0;
		} else {
1880 1881 1882 1883 1884 1885 1886 1887 1888 1889
			if (path->nodes[*level] == root->node) {
				root_owner = root->root_key.objectid;
				root_gen =
				   btrfs_header_generation(path->nodes[*level]);
			} else {
				struct extent_buffer *node;
				node = path->nodes[*level + 1];
				root_owner = btrfs_header_owner(node);
				root_gen = btrfs_header_generation(node);
			}
1890
			ret = btrfs_free_extent(trans, root,
1891
						path->nodes[*level]->start,
1892 1893
						path->nodes[*level]->len,
						root_owner, root_gen, 0, 0, 1);
1894
			BUG_ON(ret);
1895
			free_extent_buffer(path->nodes[*level]);
C
Chris Mason 已提交
1896
			path->nodes[*level] = NULL;
C
Chris Mason 已提交
1897 1898 1899 1900 1901 1902
			*level = i + 1;
		}
	}
	return 1;
}

C
Chris Mason 已提交
1903 1904 1905 1906 1907
/*
 * drop the reference count on the tree rooted at 'snap'.  This traverses
 * the tree freeing any blocks that have a ref count of zero after being
 * decremented.
 */
1908
int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1909
			*root)
C
Chris Mason 已提交
1910
{
1911
	int ret = 0;
C
Chris Mason 已提交
1912
	int wret;
C
Chris Mason 已提交
1913
	int level;
1914
	struct btrfs_path *path;
C
Chris Mason 已提交
1915 1916
	int i;
	int orig_level;
1917
	struct btrfs_root_item *root_item = &root->root_item;
C
Chris Mason 已提交
1918

1919 1920
	path = btrfs_alloc_path();
	BUG_ON(!path);
C
Chris Mason 已提交
1921

1922
	level = btrfs_header_level(root->node);
C
Chris Mason 已提交
1923
	orig_level = level;
1924 1925
	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
		path->nodes[level] = root->node;
1926
		extent_buffer_get(root->node);
1927 1928 1929
		path->slots[level] = 0;
	} else {
		struct btrfs_key key;
1930 1931
		struct btrfs_disk_key found_key;
		struct extent_buffer *node;
1932

1933
		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1934 1935
		level = root_item->drop_level;
		path->lowest_level = level;
1936
		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1937
		if (wret < 0) {
1938 1939 1940
			ret = wret;
			goto out;
		}
1941 1942 1943 1944
		node = path->nodes[level];
		btrfs_node_key(node, &found_key, path->slots[level]);
		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
			       sizeof(found_key)));
1945
	}
C
Chris Mason 已提交
1946
	while(1) {
1947
		wret = walk_down_tree(trans, root, path, &level);
C
Chris Mason 已提交
1948
		if (wret > 0)
C
Chris Mason 已提交
1949
			break;
C
Chris Mason 已提交
1950 1951 1952
		if (wret < 0)
			ret = wret;

1953
		wret = walk_up_tree(trans, root, path, &level);
C
Chris Mason 已提交
1954
		if (wret > 0)
C
Chris Mason 已提交
1955
			break;
C
Chris Mason 已提交
1956 1957
		if (wret < 0)
			ret = wret;
1958 1959
		ret = -EAGAIN;
		break;
C
Chris Mason 已提交
1960
	}
C
Chris Mason 已提交
1961
	for (i = 0; i <= orig_level; i++) {
1962
		if (path->nodes[i]) {
1963
			free_extent_buffer(path->nodes[i]);
1964
			path->nodes[i] = NULL;
C
Chris Mason 已提交
1965
		}
C
Chris Mason 已提交
1966
	}
1967
out:
1968
	btrfs_free_path(path);
C
Chris Mason 已提交
1969
	return ret;
C
Chris Mason 已提交
1970
}
C
Chris Mason 已提交
1971

1972
int btrfs_free_block_groups(struct btrfs_fs_info *info)
C
Chris Mason 已提交
1973
{
1974 1975
	u64 start;
	u64 end;
1976
	u64 ptr;
C
Chris Mason 已提交
1977 1978
	int ret;
	while(1) {
1979 1980 1981
		ret = find_first_extent_bit(&info->block_group_cache, 0,
					    &start, &end, (unsigned int)-1);
		if (ret)
C
Chris Mason 已提交
1982
			break;
1983 1984 1985
		ret = get_state_private(&info->block_group_cache, start, &ptr);
		if (!ret)
			kfree((void *)(unsigned long)ptr);
1986 1987
		clear_extent_bits(&info->block_group_cache, start,
				  end, (unsigned int)-1, GFP_NOFS);
C
Chris Mason 已提交
1988
	}
1989
	while(1) {
1990 1991 1992
		ret = find_first_extent_bit(&info->free_space_cache, 0,
					    &start, &end, EXTENT_DIRTY);
		if (ret)
1993
			break;
1994 1995
		clear_extent_dirty(&info->free_space_cache, start,
				   end, GFP_NOFS);
1996
	}
C
Chris Mason 已提交
1997 1998 1999
	return 0;
}

C
Chris Mason 已提交
2000 2001 2002 2003 2004
int btrfs_read_block_groups(struct btrfs_root *root)
{
	struct btrfs_path *path;
	int ret;
	int err = 0;
2005
	int bit;
C
Chris Mason 已提交
2006
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
2007
	struct btrfs_fs_info *info = root->fs_info;
2008
	struct extent_map_tree *block_group_cache;
C
Chris Mason 已提交
2009 2010
	struct btrfs_key key;
	struct btrfs_key found_key;
2011
	struct extent_buffer *leaf;
2012 2013

	block_group_cache = &info->block_group_cache;
C
Chris Mason 已提交
2014

C
Chris Mason 已提交
2015
	root = info->extent_root;
C
Chris Mason 已提交
2016
	key.objectid = 0;
2017
	key.offset = BTRFS_BLOCK_GROUP_SIZE;
C
Chris Mason 已提交
2018 2019 2020 2021 2022 2023 2024
	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
C
Chris Mason 已提交
2025
		ret = btrfs_search_slot(NULL, info->extent_root,
C
Chris Mason 已提交
2026 2027 2028 2029 2030
					&key, path, 0, 0);
		if (ret != 0) {
			err = ret;
			break;
		}
2031 2032
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
C
Chris Mason 已提交
2033 2034 2035 2036 2037
		cache = kmalloc(sizeof(*cache), GFP_NOFS);
		if (!cache) {
			err = -1;
			break;
		}
C
Chris Mason 已提交
2038

2039 2040 2041
		read_extent_buffer(leaf, &cache->item,
				   btrfs_item_ptr_offset(leaf, path->slots[0]),
				   sizeof(cache->item));
C
Chris Mason 已提交
2042
		memcpy(&cache->key, &found_key, sizeof(found_key));
2043
		cache->cached = 0;
2044
		cache->pinned = 0;
C
Chris Mason 已提交
2045 2046
		key.objectid = found_key.objectid + found_key.offset;
		btrfs_release_path(root, path);
2047

2048 2049 2050 2051
		if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
			bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
			cache->data = BTRFS_BLOCK_GROUP_MIXED;
		} else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2052
			bit = BLOCK_GROUP_DATA;
2053
			cache->data = BTRFS_BLOCK_GROUP_DATA;
2054 2055 2056
		} else {
			bit = BLOCK_GROUP_METADATA;
			cache->data = 0;
2057
		}
2058 2059 2060 2061 2062 2063

		/* use EXTENT_LOCKED to prevent merging */
		set_extent_bits(block_group_cache, found_key.objectid,
				found_key.objectid + found_key.offset - 1,
				bit | EXTENT_LOCKED, GFP_NOFS);
		set_state_private(block_group_cache, found_key.objectid,
J
Jens Axboe 已提交
2064
				  (unsigned long)cache);
2065

C
Chris Mason 已提交
2066
		if (key.objectid >=
2067
		    btrfs_super_total_bytes(&info->super_copy))
C
Chris Mason 已提交
2068 2069 2070 2071 2072 2073
			break;
	}

	btrfs_free_path(path);
	return 0;
}