extent-tree.c 53.8 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Z
Zach Brown 已提交
19
#include <linux/sched.h>
20
#include <linux/crc32c.h>
21
#include "hash.h"
22 23 24
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
25
#include "transaction.h"
26

27 28 29 30
#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
#define BLOCK_GROUP_DIRTY EXTENT_DIRTY

31 32
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root);
C
Chris Mason 已提交
33 34
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root);
35

36 37 38 39 40 41
static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
	struct btrfs_path *path;
	int ret;
	struct btrfs_key key;
42
	struct extent_buffer *leaf;
43
	struct extent_map_tree *free_space_cache;
44 45 46
	int slot;
	u64 last = 0;
	u64 hole_size;
47
	u64 first_free;
48 49
	int found = 0;

50 51 52
	if (!block_group)
		return 0;

53
	root = root->fs_info->extent_root;
54
	free_space_cache = &root->fs_info->free_space_cache;
55 56 57

	if (block_group->cached)
		return 0;
58

59 60 61
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
62

63
	path->reada = 2;
64
	first_free = block_group->key.objectid;
65 66
	key.objectid = block_group->key.objectid;
	key.offset = 0;
67

68 69
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
70

71 72
	if (ret < 0)
		return ret;
73

74 75
	if (ret && path->slots[0] > 0)
		path->slots[0]--;
76

77
	while(1) {
78
		leaf = path->nodes[0];
79
		slot = path->slots[0];
80
		if (slot >= btrfs_header_nritems(leaf)) {
81
			ret = btrfs_next_leaf(root, path);
82 83
			if (ret < 0)
				goto err;
84
			if (ret == 0) {
85
				continue;
86
			} else {
87 88 89
				break;
			}
		}
90

91
		btrfs_item_key_to_cpu(leaf, &key, slot);
92
		if (key.objectid < block_group->key.objectid) {
93 94
			if (btrfs_key_type(&key) != BTRFS_EXTENT_REF_KEY &&
			    key.objectid + key.offset > first_free)
95 96 97 98
				first_free = key.objectid + key.offset;
			goto next;
		}

99 100 101 102
		if (key.objectid >= block_group->key.objectid +
		    block_group->key.offset) {
			break;
		}
103

104 105
		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			if (!found) {
106
				last = first_free;
107 108
				found = 1;
			}
109 110 111 112 113
			if (key.objectid > last) {
				hole_size = key.objectid - last;
				set_extent_dirty(free_space_cache, last,
						 last + hole_size - 1,
						 GFP_NOFS);
114 115
			}
			last = key.objectid + key.offset;
116
		}
117
next:
118 119 120
		path->slots[0]++;
	}

121 122 123 124 125 126
	if (!found)
		last = first_free;
	if (block_group->key.objectid +
	    block_group->key.offset > last) {
		hole_size = block_group->key.objectid +
			block_group->key.offset - last;
127 128
		set_extent_dirty(free_space_cache, last,
				 last + hole_size - 1, GFP_NOFS);
129
	}
130
	block_group->cached = 1;
131
err:
132 133 134 135
	btrfs_free_path(path);
	return 0;
}

136 137
struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
							 btrfs_fs_info *info,
138
							 u64 bytenr)
C
Chris Mason 已提交
139
{
140 141 142 143 144
	struct extent_map_tree *block_group_cache;
	struct btrfs_block_group_cache *block_group = NULL;
	u64 ptr;
	u64 start;
	u64 end;
C
Chris Mason 已提交
145 146
	int ret;

147 148
	block_group_cache = &info->block_group_cache;
	ret = find_first_extent_bit(block_group_cache,
149
				    bytenr, &start, &end,
150
				    BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
C
Chris Mason 已提交
151
	if (ret) {
152
		return NULL;
C
Chris Mason 已提交
153
	}
154 155 156 157
	ret = get_state_private(block_group_cache, start, &ptr);
	if (ret)
		return NULL;

J
Jens Axboe 已提交
158
	block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
Y
Yan 已提交
159
	if (block_group->key.objectid <= bytenr && bytenr <
160 161
	    block_group->key.objectid + block_group->key.offset)
		return block_group;
C
Chris Mason 已提交
162 163
	return NULL;
}
164 165
static u64 find_search_start(struct btrfs_root *root,
			     struct btrfs_block_group_cache **cache_ret,
166 167
			     u64 search_start, int num,
			     int data, int full_scan)
168 169 170
{
	int ret;
	struct btrfs_block_group_cache *cache = *cache_ret;
171
	u64 last;
172 173
	u64 start = 0;
	u64 end = 0;
174
	u64 cache_miss = 0;
175
	int wrapped = 0;
176

177
	if (!cache) {
178
		goto out;
179
	}
180
again:
181 182 183
	ret = cache_block_group(root, cache);
	if (ret)
		goto out;
184

185 186
	last = max(search_start, cache->key.objectid);

187
	while(1) {
188 189
		ret = find_first_extent_bit(&root->fs_info->free_space_cache,
					    last, &start, &end, EXTENT_DIRTY);
190
		if (ret) {
191 192
			if (!cache_miss)
				cache_miss = last;
193 194
			goto new_group;
		}
195 196 197

		start = max(last, start);
		last = end + 1;
198 199 200
		if (last - start < num) {
			if (last == cache->key.objectid + cache->key.offset)
				cache_miss = start;
201
			continue;
202 203
		}
		if (data != BTRFS_BLOCK_GROUP_MIXED &&
Y
Yan 已提交
204
		    start + num > cache->key.objectid + cache->key.offset)
205
			goto new_group;
206
		return start;
207 208
	}
out:
209 210 211 212 213 214 215
	cache = btrfs_lookup_block_group(root->fs_info, search_start);
	if (!cache) {
		printk("Unable to find block group for %Lu\n",
		       search_start);
		WARN_ON(1);
		return search_start;
	}
216
	return search_start;
217 218

new_group:
219
	last = cache->key.objectid + cache->key.offset;
220
wrapped:
221
	cache = btrfs_lookup_block_group(root->fs_info, last);
222
	if (!cache) {
223
no_cache:
224 225 226 227 228 229
		if (!wrapped) {
			wrapped = 1;
			last = search_start;
			data = BTRFS_BLOCK_GROUP_MIXED;
			goto wrapped;
		}
230
		goto out;
231
	}
232 233 234 235 236
	if (cache_miss && !cache->cached) {
		cache_block_group(root, cache);
		last = cache_miss;
		cache = btrfs_lookup_block_group(root->fs_info, last);
	}
237
	cache = btrfs_find_block_group(root, cache, last, data, 0);
238 239
	if (!cache)
		goto no_cache;
240
	*cache_ret = cache;
241
	cache_miss = 0;
242 243 244
	goto again;
}

C
Chris Mason 已提交
245 246
static u64 div_factor(u64 num, int factor)
{
247 248
	if (factor == 10)
		return num;
C
Chris Mason 已提交
249 250 251 252 253
	num *= factor;
	do_div(num, 10);
	return num;
}

254 255
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
C
Chris Mason 已提交
256
						 *hint, u64 search_start,
257
						 int data, int owner)
C
Chris Mason 已提交
258
{
259 260
	struct btrfs_block_group_cache *cache;
	struct extent_map_tree *block_group_cache;
261
	struct btrfs_block_group_cache *found_group = NULL;
C
Chris Mason 已提交
262 263
	struct btrfs_fs_info *info = root->fs_info;
	u64 used;
264 265
	u64 last = 0;
	u64 hint_last;
266 267 268 269 270
	u64 start;
	u64 end;
	u64 free_check;
	u64 ptr;
	int bit;
C
Chris Mason 已提交
271
	int ret;
272
	int full_search = 0;
273
	int factor = 8;
C
Chris Mason 已提交
274
	int data_swap = 0;
275

276 277
	block_group_cache = &info->block_group_cache;

278
	if (!owner)
279
		factor = 8;
C
Chris Mason 已提交
280

281
	if (data == BTRFS_BLOCK_GROUP_MIXED) {
282
		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
283 284
		factor = 10;
	} else if (data)
285 286 287
		bit = BLOCK_GROUP_DATA;
	else
		bit = BLOCK_GROUP_METADATA;
C
Chris Mason 已提交
288 289 290

	if (search_start) {
		struct btrfs_block_group_cache *shint;
291
		shint = btrfs_lookup_block_group(info, search_start);
292 293
		if (shint && (shint->data == data ||
			      shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
C
Chris Mason 已提交
294
			used = btrfs_block_group_used(&shint->item);
295 296
			if (used + shint->pinned <
			    div_factor(shint->key.offset, factor)) {
C
Chris Mason 已提交
297 298 299 300
				return shint;
			}
		}
	}
301 302
	if (hint && (hint->data == data ||
		     hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
303
		used = btrfs_block_group_used(&hint->item);
304 305
		if (used + hint->pinned <
		    div_factor(hint->key.offset, factor)) {
306 307
			return hint;
		}
308
		last = hint->key.objectid + hint->key.offset;
309 310
		hint_last = last;
	} else {
311 312 313 314 315 316
		if (hint)
			hint_last = max(hint->key.objectid, search_start);
		else
			hint_last = search_start;

		last = hint_last;
317 318
	}
again:
C
Chris Mason 已提交
319
	while(1) {
320 321 322
		ret = find_first_extent_bit(block_group_cache, last,
					    &start, &end, bit);
		if (ret)
C
Chris Mason 已提交
323
			break;
324 325 326 327 328

		ret = get_state_private(block_group_cache, start, &ptr);
		if (ret)
			break;

J
Jens Axboe 已提交
329
		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
330 331 332 333 334 335 336
		last = cache->key.objectid + cache->key.offset;
		used = btrfs_block_group_used(&cache->item);

		if (full_search)
			free_check = cache->key.offset;
		else
			free_check = div_factor(cache->key.offset, factor);
337
		if (used + cache->pinned < free_check) {
338 339
			found_group = cache;
			goto found;
C
Chris Mason 已提交
340
		}
341
		cond_resched();
C
Chris Mason 已提交
342
	}
343
	if (!full_search) {
C
Chris Mason 已提交
344
		last = search_start;
345 346 347
		full_search = 1;
		goto again;
	}
C
Chris Mason 已提交
348 349
	if (!data_swap) {
		data_swap = 1;
350
		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
C
Chris Mason 已提交
351 352 353
		last = search_start;
		goto again;
	}
C
Chris Mason 已提交
354
found:
355
	return found_group;
C
Chris Mason 已提交
356 357
}

358
static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
359 360 361 362 363 364 365 366
			   u64 owner, u64 owner_offset)
{
	u32 high_crc = ~(u32)0;
	u32 low_crc = ~(u32)0;
	__le64 lenum;

	lenum = cpu_to_le64(root_objectid);
	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
367 368
	lenum = cpu_to_le64(ref_generation);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
369

370
#if 0
371 372 373 374
	lenum = cpu_to_le64(owner);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
	lenum = cpu_to_le64(owner_offset);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
375
#endif
376 377 378
	return ((u64)high_crc << 32) | (u64)low_crc;
}

379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
static int match_extent_ref(struct extent_buffer *leaf,
			    struct btrfs_extent_ref *disk_ref,
			    struct btrfs_extent_ref *cpu_ref)
{
	int ret;
	int len;

	if (cpu_ref->objectid)
		len = sizeof(*cpu_ref);
	else
		len = 2 * sizeof(u64);
	ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
				   len);
	return ret == 0;
}

static int lookup_extent_backref(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path, u64 bytenr,
				 u64 root_objectid, u64 ref_generation,
				 u64 owner, u64 owner_offset, int del)
400 401 402
{
	u64 hash;
	struct btrfs_key key;
403
	struct btrfs_key found_key;
404
	struct btrfs_extent_ref ref;
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
	struct extent_buffer *leaf;
	struct btrfs_extent_ref *disk_ref;
	int ret;
	int ret2;

	btrfs_set_stack_ref_root(&ref, root_objectid);
	btrfs_set_stack_ref_generation(&ref, ref_generation);
	btrfs_set_stack_ref_objectid(&ref, owner);
	btrfs_set_stack_ref_offset(&ref, owner_offset);

	hash = hash_extent_ref(root_objectid, ref_generation, owner,
			       owner_offset);
	key.offset = hash;
	key.objectid = bytenr;
	key.type = BTRFS_EXTENT_REF_KEY;

	while (1) {
		ret = btrfs_search_slot(trans, root, &key, path,
					del ? -1 : 0, del);
		if (ret < 0)
			goto out;
		leaf = path->nodes[0];
		if (ret != 0) {
			u32 nritems = btrfs_header_nritems(leaf);
			if (path->slots[0] >= nritems) {
				ret2 = btrfs_next_leaf(root, path);
				if (ret2)
					goto out;
				leaf = path->nodes[0];
			}
			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
			if (found_key.objectid != bytenr ||
			    found_key.type != BTRFS_EXTENT_REF_KEY)
				goto out;
			key.offset = found_key.offset;
			if (del) {
				btrfs_release_path(root, path);
				continue;
			}
		}
		disk_ref = btrfs_item_ptr(path->nodes[0],
					  path->slots[0],
					  struct btrfs_extent_ref);
		if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
			ret = 0;
			goto out;
		}
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
		key.offset = found_key.offset + 1;
		btrfs_release_path(root, path);
	}
out:
	return ret;
}

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
/*
 * Back reference rules.  Back refs have three main goals:
 *
 * 1) differentiate between all holders of references to an extent so that
 *    when a reference is dropped we can make sure it was a valid reference
 *    before freeing the extent.
 *
 * 2) Provide enough information to quickly find the holders of an extent
 *    if we notice a given block is corrupted or bad.
 *
 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
 *    maintenance.  This is actually the same as #2, but with a slightly
 *    different use case.
 *
 * File extents can be referenced by:
 *
 * - multiple snapshots, subvolumes, or different generations in one subvol
 * - different files inside a single subvolume (in theory, not implemented yet)
 * - different offsets inside a file (bookend extents in file.c)
 *
 * The extent ref structure has fields for:
 *
 * - Objectid of the subvolume root
 * - Generation number of the tree holding the reference
 * - objectid of the file holding the reference
 * - offset in the file corresponding to the key holding the reference
 *
 * When a file extent is allocated the fields are filled in:
 *     (root_key.objectid, trans->transid, inode objectid, offset in file)
 *
 * When a leaf is cow'd new references are added for every file extent found
 * in the leaf.  It looks the same as the create case, but trans->transid
 * will be different when the block is cow'd.
 *
 *     (root_key.objectid, trans->transid, inode objectid, offset in file)
 *
 * When a file extent is removed either during snapshot deletion or file
 * truncation, the corresponding back reference is found
 * by searching for:
 *
 *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
 *      inode objectid, offset in file)
 *
 * Btree extents can be referenced by:
 *
 * - Different subvolumes
 * - Different generations of the same subvolume
 *
 * Storing sufficient information for a full reverse mapping of a btree
 * block would require storing the lowest key of the block in the backref,
 * and it would require updating that lowest key either before write out or
 * every time it changed.  Instead, the objectid of the lowest key is stored
 * along with the level of the tree block.  This provides a hint
 * about where in the btree the block can be found.  Searches through the
 * btree only need to look for a pointer to that block, so they stop one
 * level higher than the level recorded in the backref.
 *
 * Some btrees do not do reference counting on their extents.  These
 * include the extent tree and the tree of tree roots.  Backrefs for these
 * trees always have a generation of zero.
 *
 * When a tree block is created, back references are inserted:
 *
 * (root->root_key.objectid, trans->transid or zero, lowest_key_objectid, level)
 *
 * When a tree block is cow'd in a reference counted root,
 * new back references are added for all the blocks it points to.
 * These are of the form (trans->transid will have increased since creation):
 *
 * (root->root_key.objectid, trans->transid, lowest_key_objectid, level)
 *
 * Because the lowest_key_objectid and the level are just hints
 * they are not used when backrefs are deleted.  When a backref is deleted:
 *
 * if backref was for a tree root:
 *     root_objectid = root->root_key.objectid
 * else
 *     root_objectid = btrfs_header_owner(parent)
 *
 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
 *
 * Back Reference Key hashing:
 *
 * Back references have four fields, each 64 bits long.  Unfortunately,
 * This is hashed into a single 64 bit number and placed into the key offset.
 * The key objectid corresponds to the first byte in the extent, and the
 * key type is set to BTRFS_EXTENT_REF_KEY
 */
548 549 550 551 552 553 554 555 556 557
int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path, u64 bytenr,
				 u64 root_objectid, u64 ref_generation,
				 u64 owner, u64 owner_offset)
{
	u64 hash;
	struct btrfs_key key;
	struct btrfs_extent_ref ref;
	struct btrfs_extent_ref *disk_ref;
558 559 560
	int ret;

	btrfs_set_stack_ref_root(&ref, root_objectid);
561
	btrfs_set_stack_ref_generation(&ref, ref_generation);
562 563 564
	btrfs_set_stack_ref_objectid(&ref, owner);
	btrfs_set_stack_ref_offset(&ref, owner_offset);

565 566
	hash = hash_extent_ref(root_objectid, ref_generation, owner,
			       owner_offset);
567 568 569 570 571 572
	key.offset = hash;
	key.objectid = bytenr;
	key.type = BTRFS_EXTENT_REF_KEY;

	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
	while (ret == -EEXIST) {
573 574 575 576 577 578 579 580
		disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
					  struct btrfs_extent_ref);
		if (match_extent_ref(path->nodes[0], disk_ref, &ref))
			goto out;
		key.offset++;
		btrfs_release_path(root, path);
		ret = btrfs_insert_empty_item(trans, root, path, &key,
					      sizeof(ref));
581
	}
582 583 584 585 586 587 588 589 590 591
	if (ret)
		goto out;
	disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
				  struct btrfs_extent_ref);
	write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
			    sizeof(ref));
	btrfs_mark_buffer_dirty(path->nodes[0]);
out:
	btrfs_release_path(root, path);
	return ret;
592 593
}

C
Chris Mason 已提交
594 595
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
596
				u64 bytenr, u64 num_bytes,
597
				u64 root_objectid, u64 ref_generation,
598
				u64 owner, u64 owner_offset)
C
Chris Mason 已提交
599
{
600
	struct btrfs_path *path;
C
Chris Mason 已提交
601
	int ret;
C
Chris Mason 已提交
602
	struct btrfs_key key;
603
	struct extent_buffer *l;
C
Chris Mason 已提交
604
	struct btrfs_extent_item *item;
C
Chris Mason 已提交
605
	u32 refs;
C
Chris Mason 已提交
606

607
	WARN_ON(num_bytes < root->sectorsize);
608
	path = btrfs_alloc_path();
609 610
	if (!path)
		return -ENOMEM;
611

612
	key.objectid = bytenr;
613
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
614
	key.offset = num_bytes;
615
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
616
				0, 1);
617 618
	if (ret < 0)
		return ret;
619
	if (ret != 0) {
620
		BUG();
621
	}
C
Chris Mason 已提交
622
	BUG_ON(ret != 0);
623
	l = path->nodes[0];
624
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
625 626
	refs = btrfs_extent_refs(l, item);
	btrfs_set_extent_refs(l, item, refs + 1);
627
	btrfs_mark_buffer_dirty(path->nodes[0]);
628

629
	btrfs_release_path(root->fs_info->extent_root, path);
630 631 632 633 634

	ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
					  path, bytenr, root_objectid,
					  ref_generation, owner, owner_offset);
	BUG_ON(ret);
635
	finish_current_insert(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
636
	del_pending_extents(trans, root->fs_info->extent_root);
637 638

	btrfs_free_path(path);
C
Chris Mason 已提交
639 640 641
	return 0;
}

642 643 644 645 646 647 648 649
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root)
{
	finish_current_insert(trans, root->fs_info->extent_root);
	del_pending_extents(trans, root->fs_info->extent_root);
	return 0;
}

C
Chris Mason 已提交
650
static int lookup_extent_ref(struct btrfs_trans_handle *trans,
651 652
			     struct btrfs_root *root, u64 bytenr,
			     u64 num_bytes, u32 *refs)
653
{
654
	struct btrfs_path *path;
655
	int ret;
C
Chris Mason 已提交
656
	struct btrfs_key key;
657
	struct extent_buffer *l;
C
Chris Mason 已提交
658
	struct btrfs_extent_item *item;
659

660
	WARN_ON(num_bytes < root->sectorsize);
661
	path = btrfs_alloc_path();
662 663
	key.objectid = bytenr;
	key.offset = num_bytes;
664
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
665
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
666
				0, 0);
667 668
	if (ret < 0)
		goto out;
669 670
	if (ret != 0) {
		btrfs_print_leaf(root, path->nodes[0]);
671
		printk("failed to find block number %Lu\n", bytenr);
672
		BUG();
673 674
	}
	l = path->nodes[0];
675
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
676
	*refs = btrfs_extent_refs(l, item);
677
out:
678
	btrfs_free_path(path);
679 680 681
	return 0;
}

C
Chris Mason 已提交
682
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
683
		       struct btrfs_root *root, u64 owner_objectid)
C
Chris Mason 已提交
684
{
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
	u64 generation;
	u64 key_objectid;
	u64 level;
	u32 nritems;
	struct btrfs_disk_key disk_key;

	level = btrfs_header_level(root->node);
	generation = trans->transid;
	nritems = btrfs_header_nritems(root->node);
	if (nritems > 0) {
		if (level == 0)
			btrfs_item_key(root->node, &disk_key, 0);
		else
			btrfs_node_key(root->node, &disk_key, 0);
		key_objectid = btrfs_disk_key_objectid(&disk_key);
	} else {
		key_objectid = 0;
	}
703
	return btrfs_inc_extent_ref(trans, root, root->node->start,
704
				    root->node->len, owner_objectid,
705
				    generation, key_objectid, level);
C
Chris Mason 已提交
706 707
}

708
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
709
		  struct extent_buffer *buf)
C
Chris Mason 已提交
710
{
711
	u64 bytenr;
712 713
	u32 nritems;
	struct btrfs_key key;
714
	struct btrfs_file_extent_item *fi;
C
Chris Mason 已提交
715
	int i;
716
	int level;
717
	int ret;
718
	int faili;
719

720
	if (!root->ref_cows)
721
		return 0;
722

723
	level = btrfs_header_level(buf);
724 725
	nritems = btrfs_header_nritems(buf);
	for (i = 0; i < nritems; i++) {
726 727
		if (level == 0) {
			u64 disk_bytenr;
728 729
			btrfs_item_key_to_cpu(buf, &key, i);
			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
730
				continue;
731
			fi = btrfs_item_ptr(buf, i,
732
					    struct btrfs_file_extent_item);
733
			if (btrfs_file_extent_type(buf, fi) ==
734 735
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
736 737
			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
			if (disk_bytenr == 0)
C
Chris Mason 已提交
738
				continue;
739
			ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
740 741 742
				    btrfs_file_extent_disk_num_bytes(buf, fi),
				    root->root_key.objectid, trans->transid,
				    key.objectid, key.offset);
743 744 745 746
			if (ret) {
				faili = i;
				goto fail;
			}
747
		} else {
748
			bytenr = btrfs_node_blockptr(buf, i);
749
			btrfs_node_key_to_cpu(buf, &key, i);
750
			ret = btrfs_inc_extent_ref(trans, root, bytenr,
751 752
					   btrfs_level_size(root, level - 1),
					   root->root_key.objectid,
753 754
					   trans->transid, key.objectid,
					   level - 1);
755 756 757 758
			if (ret) {
				faili = i;
				goto fail;
			}
759
		}
C
Chris Mason 已提交
760 761
	}
	return 0;
762
fail:
C
Chris Mason 已提交
763
	WARN_ON(1);
764
#if 0
765
	for (i =0; i < faili; i++) {
766 767
		if (level == 0) {
			u64 disk_bytenr;
768 769
			btrfs_item_key_to_cpu(buf, &key, i);
			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
770
				continue;
771
			fi = btrfs_item_ptr(buf, i,
772
					    struct btrfs_file_extent_item);
773
			if (btrfs_file_extent_type(buf, fi) ==
774 775
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
776 777
			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
			if (disk_bytenr == 0)
778
				continue;
779 780
			err = btrfs_free_extent(trans, root, disk_bytenr,
				    btrfs_file_extent_disk_num_bytes(buf,
781
								      fi), 0);
782 783
			BUG_ON(err);
		} else {
784 785 786
			bytenr = btrfs_node_blockptr(buf, i);
			err = btrfs_free_extent(trans, root, bytenr,
					btrfs_level_size(root, level - 1), 0);
787 788 789
			BUG_ON(err);
		}
	}
790
#endif
791
	return ret;
C
Chris Mason 已提交
792 793
}

C
Chris Mason 已提交
794 795 796 797 798 799 800 801
static int write_one_cache_group(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_block_group_cache *cache)
{
	int ret;
	int pending_ret;
	struct btrfs_root *extent_root = root->fs_info->extent_root;
802 803
	unsigned long bi;
	struct extent_buffer *leaf;
C
Chris Mason 已提交
804 805

	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
806 807
	if (ret < 0)
		goto fail;
C
Chris Mason 已提交
808
	BUG_ON(ret);
809 810 811 812 813

	leaf = path->nodes[0];
	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
814
	btrfs_release_path(extent_root, path);
815
fail:
C
Chris Mason 已提交
816 817 818 819 820 821 822 823 824 825
	finish_current_insert(trans, extent_root);
	pending_ret = del_pending_extents(trans, extent_root);
	if (ret)
		return ret;
	if (pending_ret)
		return pending_ret;
	return 0;

}

826 827
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
C
Chris Mason 已提交
828
{
829 830
	struct extent_map_tree *block_group_cache;
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
831 832 833 834
	int ret;
	int err = 0;
	int werr = 0;
	struct btrfs_path *path;
835 836 837 838
	u64 last = 0;
	u64 start;
	u64 end;
	u64 ptr;
C
Chris Mason 已提交
839

840
	block_group_cache = &root->fs_info->block_group_cache;
C
Chris Mason 已提交
841 842 843 844 845
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
846 847 848
		ret = find_first_extent_bit(block_group_cache, last,
					    &start, &end, BLOCK_GROUP_DIRTY);
		if (ret)
C
Chris Mason 已提交
849
			break;
850

851 852 853 854 855
		last = end + 1;
		ret = get_state_private(block_group_cache, start, &ptr);
		if (ret)
			break;

J
Jens Axboe 已提交
856
		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
857 858 859 860 861 862 863 864 865 866
		err = write_one_cache_group(trans, root,
					    path, cache);
		/*
		 * if we fail to write the cache group, we want
		 * to keep it marked dirty in hopes that a later
		 * write will work
		 */
		if (err) {
			werr = err;
			continue;
C
Chris Mason 已提交
867
		}
868 869
		clear_extent_bits(block_group_cache, start, end,
				  BLOCK_GROUP_DIRTY, GFP_NOFS);
C
Chris Mason 已提交
870 871 872 873 874 875 876
	}
	btrfs_free_path(path);
	return werr;
}

static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
877 878
			      u64 bytenr, u64 num_bytes, int alloc,
			      int mark_free, int data)
C
Chris Mason 已提交
879 880 881
{
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
882
	u64 total = num_bytes;
C
Chris Mason 已提交
883
	u64 old_val;
884
	u64 byte_in_group;
885 886
	u64 start;
	u64 end;
C
Chris Mason 已提交
887

C
Chris Mason 已提交
888
	while(total) {
889
		cache = btrfs_lookup_block_group(info, bytenr);
C
Chris Mason 已提交
890
		if (!cache) {
C
Chris Mason 已提交
891
			return -1;
C
Chris Mason 已提交
892
		}
893 894
		byte_in_group = bytenr - cache->key.objectid;
		WARN_ON(byte_in_group > cache->key.offset);
895 896 897 898
		start = cache->key.objectid;
		end = start + cache->key.offset - 1;
		set_extent_bits(&info->block_group_cache, start, end,
				BLOCK_GROUP_DIRTY, GFP_NOFS);
C
Chris Mason 已提交
899 900

		old_val = btrfs_block_group_used(&cache->item);
901
		num_bytes = min(total, cache->key.offset - byte_in_group);
C
Chris Mason 已提交
902
		if (alloc) {
C
Chris Mason 已提交
903
			if (cache->data != data &&
C
Chris Mason 已提交
904
			    old_val < (cache->key.offset >> 1)) {
905 906 907
				int bit_to_clear;
				int bit_to_set;
				cache->data = data;
C
Chris Mason 已提交
908
				if (data) {
909 910
					bit_to_clear = BLOCK_GROUP_METADATA;
					bit_to_set = BLOCK_GROUP_DATA;
911 912
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_MIXED;
C
Chris Mason 已提交
913 914 915
					cache->item.flags |=
						BTRFS_BLOCK_GROUP_DATA;
				} else {
916 917
					bit_to_clear = BLOCK_GROUP_DATA;
					bit_to_set = BLOCK_GROUP_METADATA;
918 919
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_MIXED;
C
Chris Mason 已提交
920 921 922
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_DATA;
				}
923 924 925 926 927 928
				clear_extent_bits(&info->block_group_cache,
						  start, end, bit_to_clear,
						  GFP_NOFS);
				set_extent_bits(&info->block_group_cache,
						start, end, bit_to_set,
						GFP_NOFS);
929 930 931 932 933 934 935 936
			} else if (cache->data != data &&
				   cache->data != BTRFS_BLOCK_GROUP_MIXED) {
				cache->data = BTRFS_BLOCK_GROUP_MIXED;
				set_extent_bits(&info->block_group_cache,
						start, end,
						BLOCK_GROUP_DATA |
						BLOCK_GROUP_METADATA,
						GFP_NOFS);
C
Chris Mason 已提交
937
			}
938
			old_val += num_bytes;
C
Chris Mason 已提交
939
		} else {
940
			old_val -= num_bytes;
941 942
			if (mark_free) {
				set_extent_dirty(&info->free_space_cache,
943
						 bytenr, bytenr + num_bytes - 1,
944
						 GFP_NOFS);
945
			}
C
Chris Mason 已提交
946
		}
C
Chris Mason 已提交
947
		btrfs_set_block_group_used(&cache->item, old_val);
948 949
		total -= num_bytes;
		bytenr += num_bytes;
C
Chris Mason 已提交
950 951 952
	}
	return 0;
}
953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983
static int update_pinned_extents(struct btrfs_root *root,
				u64 bytenr, u64 num, int pin)
{
	u64 len;
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *fs_info = root->fs_info;

	if (pin) {
		set_extent_dirty(&fs_info->pinned_extents,
				bytenr, bytenr + num - 1, GFP_NOFS);
	} else {
		clear_extent_dirty(&fs_info->pinned_extents,
				bytenr, bytenr + num - 1, GFP_NOFS);
	}
	while (num > 0) {
		cache = btrfs_lookup_block_group(fs_info, bytenr);
		WARN_ON(!cache);
		len = min(num, cache->key.offset -
			  (bytenr - cache->key.objectid));
		if (pin) {
			cache->pinned += len;
			fs_info->total_pinned += len;
		} else {
			cache->pinned -= len;
			fs_info->total_pinned -= len;
		}
		bytenr += len;
		num -= len;
	}
	return 0;
}
C
Chris Mason 已提交
984

985
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
C
Chris Mason 已提交
986 987
{
	u64 last = 0;
988 989 990
	u64 start;
	u64 end;
	struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
C
Chris Mason 已提交
991 992 993
	int ret;

	while(1) {
994 995 996
		ret = find_first_extent_bit(pinned_extents, last,
					    &start, &end, EXTENT_DIRTY);
		if (ret)
C
Chris Mason 已提交
997
			break;
998 999
		set_extent_dirty(copy, start, end, GFP_NOFS);
		last = end + 1;
C
Chris Mason 已提交
1000 1001 1002 1003 1004 1005
	}
	return 0;
}

int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root,
1006
			       struct extent_map_tree *unpin)
1007
{
1008 1009
	u64 start;
	u64 end;
1010
	int ret;
1011 1012
	struct extent_map_tree *free_space_cache;
	free_space_cache = &root->fs_info->free_space_cache;
1013 1014

	while(1) {
1015 1016 1017
		ret = find_first_extent_bit(unpin, 0, &start, &end,
					    EXTENT_DIRTY);
		if (ret)
1018
			break;
1019
		update_pinned_extents(root, start, end + 1 - start, 0);
1020 1021
		clear_extent_dirty(unpin, start, end, GFP_NOFS);
		set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1022 1023 1024 1025
	}
	return 0;
}

1026 1027
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root)
C
Chris Mason 已提交
1028
{
1029 1030 1031
	u64 start;
	u64 end;
	struct btrfs_fs_info *info = extent_root->fs_info;
1032
	struct extent_buffer *eb;
1033
	struct btrfs_path *path;
C
Chris Mason 已提交
1034
	struct btrfs_key ins;
1035
	struct btrfs_disk_key first;
C
Chris Mason 已提交
1036
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
1037
	int ret;
1038
	int level;
1039
	int err = 0;
C
Chris Mason 已提交
1040

1041
	btrfs_set_stack_extent_refs(&extent_item, 1);
1042
	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1043
	path = btrfs_alloc_path();
C
Chris Mason 已提交
1044

1045
	while(1) {
1046 1047 1048
		ret = find_first_extent_bit(&info->extent_ins, 0, &start,
					    &end, EXTENT_LOCKED);
		if (ret)
1049 1050
			break;

1051 1052 1053 1054 1055 1056
		ins.objectid = start;
		ins.offset = end + 1 - start;
		err = btrfs_insert_item(trans, extent_root, &ins,
					&extent_item, sizeof(extent_item));
		clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
				  GFP_NOFS);
1057 1058 1059 1060 1061 1062 1063
		eb = read_tree_block(extent_root, ins.objectid, ins.offset);
		level = btrfs_header_level(eb);
		if (level == 0) {
			btrfs_item_key(eb, &first, 0);
		} else {
			btrfs_node_key(eb, &first, 0);
		}
1064 1065
		err = btrfs_insert_extent_backref(trans, extent_root, path,
					  start, extent_root->root_key.objectid,
1066 1067
					  0, btrfs_disk_key_objectid(&first),
					  level);
1068
		BUG_ON(err);
1069
		free_extent_buffer(eb);
C
Chris Mason 已提交
1070
	}
1071
	btrfs_free_path(path);
C
Chris Mason 已提交
1072 1073 1074
	return 0;
}

1075 1076
static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
			  int pending)
C
Chris Mason 已提交
1077
{
1078
	int err = 0;
1079
	struct extent_buffer *buf;
C
Chris Mason 已提交
1080

C
Chris Mason 已提交
1081
	if (!pending) {
1082
		buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1083 1084
		if (buf) {
			if (btrfs_buffer_uptodate(buf)) {
C
Chris Mason 已提交
1085 1086
				u64 transid =
				    root->fs_info->running_transaction->transid;
1087 1088
				if (btrfs_header_generation(buf) == transid) {
					free_extent_buffer(buf);
1089
					return 1;
C
Chris Mason 已提交
1090
				}
C
Chris Mason 已提交
1091
			}
1092
			free_extent_buffer(buf);
C
Chris Mason 已提交
1093
		}
1094
		update_pinned_extents(root, bytenr, num_bytes, 1);
C
Chris Mason 已提交
1095
	} else {
1096
		set_extent_bits(&root->fs_info->pending_del,
1097 1098
				bytenr, bytenr + num_bytes - 1,
				EXTENT_LOCKED, GFP_NOFS);
C
Chris Mason 已提交
1099
	}
C
Chris Mason 已提交
1100
	BUG_ON(err < 0);
C
Chris Mason 已提交
1101 1102 1103
	return 0;
}

1104
/*
1105
 * remove an extent from the root, returns 0 on success
1106
 */
1107
static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1108 1109 1110
			 *root, u64 bytenr, u64 num_bytes,
			 u64 root_objectid, u64 ref_generation,
			 u64 owner_objectid, u64 owner_offset, int pin,
1111
			 int mark_free)
1112
{
1113
	struct btrfs_path *path;
C
Chris Mason 已提交
1114
	struct btrfs_key key;
1115 1116
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
1117
	struct extent_buffer *leaf;
1118
	int ret;
C
Chris Mason 已提交
1119
	struct btrfs_extent_item *ei;
C
Chris Mason 已提交
1120
	u32 refs;
C
Chris Mason 已提交
1121

1122
	key.objectid = bytenr;
1123
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1124
	key.offset = num_bytes;
1125

1126
	path = btrfs_alloc_path();
1127 1128
	if (!path)
		return -ENOMEM;
1129

1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
	if (ref_generation && owner_objectid == 0 && root_objectid == 3) {
//printk("drop backref root %Lu gen %Lu byte %Lu\n", root_objectid, ref_generation, bytenr );
	}
	ret = lookup_extent_backref(trans, extent_root, path,
				    bytenr, root_objectid,
				    ref_generation,
				    owner_objectid, owner_offset, 1);
	if (ret == 0) {
		ret = btrfs_del_item(trans, extent_root, path);
	} else {
		btrfs_print_leaf(extent_root, path->nodes[0]);
		WARN_ON(1);
		printk("Unable to find ref byte nr %Lu root %Lu "
		       " gen %Lu owner %Lu offset %Lu\n", bytenr,
		       root_objectid, ref_generation, owner_objectid,
		       owner_offset);
	}
	btrfs_release_path(extent_root, path);
1148 1149 1150 1151
	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
	if (ret < 0)
		return ret;
	BUG_ON(ret);
1152 1153 1154

	leaf = path->nodes[0];
	ei = btrfs_item_ptr(leaf, path->slots[0],
C
Chris Mason 已提交
1155
			    struct btrfs_extent_item);
1156 1157 1158 1159 1160 1161
	refs = btrfs_extent_refs(leaf, ei);
	BUG_ON(refs == 0);
	refs -= 1;
	btrfs_set_extent_refs(leaf, ei, refs);
	btrfs_mark_buffer_dirty(leaf);

C
Chris Mason 已提交
1162
	if (refs == 0) {
1163 1164
		u64 super_used;
		u64 root_used;
C
Chris Mason 已提交
1165 1166

		if (pin) {
1167
			ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1168 1169 1170
			if (ret > 0)
				mark_free = 1;
			BUG_ON(ret < 0);
C
Chris Mason 已提交
1171 1172
		}

1173
		/* block accounting for super block */
1174 1175 1176
		super_used = btrfs_super_bytes_used(&info->super_copy);
		btrfs_set_super_bytes_used(&info->super_copy,
					   super_used - num_bytes);
1177 1178

		/* block accounting for root item */
1179
		root_used = btrfs_root_used(&root->root_item);
1180
		btrfs_set_root_used(&root->root_item,
1181
					   root_used - num_bytes);
1182

1183
		ret = btrfs_del_item(trans, extent_root, path);
1184 1185 1186
		if (ret) {
			return ret;
		}
1187
		ret = update_block_group(trans, root, bytenr, num_bytes, 0,
C
Chris Mason 已提交
1188
					 mark_free, 0);
C
Chris Mason 已提交
1189
		BUG_ON(ret);
1190
	}
1191
	btrfs_free_path(path);
1192
	finish_current_insert(trans, extent_root);
1193 1194 1195 1196 1197 1198 1199
	return ret;
}

/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
1200 1201
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root)
1202 1203
{
	int ret;
C
Chris Mason 已提交
1204
	int err = 0;
1205 1206 1207 1208
	u64 start;
	u64 end;
	struct extent_map_tree *pending_del;
	struct extent_map_tree *pinned_extents;
C
Chris Mason 已提交
1209

1210 1211
	pending_del = &extent_root->fs_info->pending_del;
	pinned_extents = &extent_root->fs_info->pinned_extents;
1212 1213

	while(1) {
1214 1215 1216
		ret = find_first_extent_bit(pending_del, 0, &start, &end,
					    EXTENT_LOCKED);
		if (ret)
1217
			break;
1218
		update_pinned_extents(extent_root, start, end + 1 - start, 1);
1219 1220 1221
		clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
				  GFP_NOFS);
		ret = __free_extent(trans, extent_root,
1222 1223 1224
				     start, end + 1 - start,
				     extent_root->root_key.objectid,
				     0, 0, 0, 0, 0);
1225 1226
		if (ret)
			err = ret;
1227
	}
C
Chris Mason 已提交
1228
	return err;
1229 1230 1231 1232 1233
}

/*
 * remove an extent from the root, returns 0 on success
 */
1234
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1235 1236 1237
		      *root, u64 bytenr, u64 num_bytes,
		      u64 root_objectid, u64 ref_generation,
		      u64 owner_objectid, u64 owner_offset, int pin)
1238
{
1239
	struct btrfs_root *extent_root = root->fs_info->extent_root;
1240 1241
	int pending_ret;
	int ret;
1242

1243
	WARN_ON(num_bytes < root->sectorsize);
1244 1245 1246
	if (!root->ref_cows)
		ref_generation = 0;

1247
	if (root == extent_root) {
1248
		pin_down_bytes(root, bytenr, num_bytes, 1);
1249 1250
		return 0;
	}
1251 1252 1253
	ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
			    ref_generation, owner_objectid, owner_offset,
			    pin, pin == 0);
C
Chris Mason 已提交
1254
	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1255 1256 1257
	return ret ? ret : pending_ret;
}

1258 1259 1260 1261 1262 1263 1264
static u64 stripe_align(struct btrfs_root *root, u64 val)
{
	u64 mask = ((u64)root->stripesize - 1);
	u64 ret = (val + mask) & ~mask;
	return ret;
}

1265 1266 1267 1268
/*
 * walks the btree of allocated extents and find a hole of a given size.
 * The key ins is changed to record the hole:
 * ins->objectid == block start
1269
 * ins->flags = BTRFS_EXTENT_ITEM_KEY
1270 1271 1272
 * ins->offset == number of blocks
 * Any available blocks before search_start are skipped.
 */
1273
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1274 1275
			    *orig_root, u64 num_bytes, u64 empty_size,
			    u64 search_start, u64 search_end, u64 hint_byte,
1276 1277
			    struct btrfs_key *ins, u64 exclude_start,
			    u64 exclude_nr, int data)
1278
{
1279
	struct btrfs_path *path;
C
Chris Mason 已提交
1280
	struct btrfs_key key;
1281
	u64 hole_size = 0;
1282 1283
	u64 aligned;
	int ret;
1284
	int slot = 0;
1285
	u64 last_byte = 0;
C
Chris Mason 已提交
1286
	u64 orig_search_start = search_start;
1287
	int start_found;
1288
	struct extent_buffer *l;
1289
	struct btrfs_root * root = orig_root->fs_info->extent_root;
1290
	struct btrfs_fs_info *info = root->fs_info;
1291
	u64 total_needed = num_bytes;
C
Chris Mason 已提交
1292
	int level;
C
Chris Mason 已提交
1293
	struct btrfs_block_group_cache *block_group;
C
Chris Mason 已提交
1294
	int full_scan = 0;
1295
	int wrapped = 0;
1296
	u64 cached_start;
1297

1298
	WARN_ON(num_bytes < root->sectorsize);
1299 1300
	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);

1301 1302
	level = btrfs_header_level(root->node);

1303
	if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1304 1305 1306
		data = BTRFS_BLOCK_GROUP_MIXED;
	}

C
Chris Mason 已提交
1307
	if (search_end == (u64)-1)
1308 1309 1310
		search_end = btrfs_super_total_bytes(&info->super_copy);
	if (hint_byte) {
		block_group = btrfs_lookup_block_group(info, hint_byte);
1311 1312
		if (!block_group)
			hint_byte = search_start;
C
Chris Mason 已提交
1313
		block_group = btrfs_find_block_group(root, block_group,
1314
						     hint_byte, data, 1);
C
Chris Mason 已提交
1315 1316
	} else {
		block_group = btrfs_find_block_group(root,
1317 1318
						     trans->block_group,
						     search_start, data, 1);
C
Chris Mason 已提交
1319 1320
	}

1321
	total_needed += empty_size;
1322
	path = btrfs_alloc_path();
C
Chris Mason 已提交
1323
check_failed:
1324 1325 1326 1327 1328 1329
	if (!block_group) {
		block_group = btrfs_lookup_block_group(info, search_start);
		if (!block_group)
			block_group = btrfs_lookup_block_group(info,
						       orig_search_start);
	}
1330 1331
	search_start = find_search_start(root, &block_group, search_start,
					 total_needed, data, full_scan);
1332
	search_start = stripe_align(root, search_start);
1333
	cached_start = search_start;
1334
	btrfs_init_path(path);
1335 1336 1337
	ins->objectid = search_start;
	ins->offset = 0;
	start_found = 0;
1338
	path->reada = 2;
1339

1340
	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
C
Chris Mason 已提交
1341 1342
	if (ret < 0)
		goto error;
C
Chris Mason 已提交
1343

1344
	if (path->slots[0] > 0) {
1345
		path->slots[0]--;
1346 1347
	}

1348 1349 1350
	l = path->nodes[0];
	btrfs_item_key_to_cpu(l, &key, path->slots[0]);

1351
	/*
1352
	 * walk backwards to find the first extent item key
1353
	 */
1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366
	while(btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
		if (path->slots[0] == 0) {
			ret = btrfs_prev_leaf(root, path);
			if (ret != 0) {
				ret = btrfs_search_slot(trans, root, ins,
							path, 0, 0);
				if (ret < 0)
					goto error;
				if (path->slots[0] > 0)
					path->slots[0]--;
				break;
			}
		} else {
1367 1368
			path->slots[0]--;
		}
1369 1370
		l = path->nodes[0];
		btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1371
	}
1372
	while (1) {
1373
		l = path->nodes[0];
1374
		slot = path->slots[0];
1375
		if (slot >= btrfs_header_nritems(l)) {
1376
			ret = btrfs_next_leaf(root, path);
1377 1378
			if (ret == 0)
				continue;
C
Chris Mason 已提交
1379 1380
			if (ret < 0)
				goto error;
1381 1382 1383

			search_start = max(search_start,
					   block_group->key.objectid);
1384
			if (!start_found) {
1385 1386 1387 1388 1389 1390 1391
				aligned = stripe_align(root, search_start);
				ins->objectid = aligned;
				if (aligned >= search_end) {
					ret = -ENOSPC;
					goto error;
				}
				ins->offset = search_end - aligned;
1392 1393 1394
				start_found = 1;
				goto check_pending;
			}
1395 1396 1397 1398 1399 1400 1401
			ins->objectid = stripe_align(root,
						     last_byte > search_start ?
						     last_byte : search_start);
			if (search_end <= ins->objectid) {
				ret = -ENOSPC;
				goto error;
			}
C
Chris Mason 已提交
1402
			ins->offset = search_end - ins->objectid;
1403
			BUG_ON(ins->objectid >= search_end);
1404 1405
			goto check_pending;
		}
1406
		btrfs_item_key_to_cpu(l, &key, slot);
1407

1408
		if (key.objectid >= search_start && key.objectid > last_byte &&
1409
		    start_found) {
1410 1411
			if (last_byte < search_start)
				last_byte = search_start;
1412 1413 1414 1415
			aligned = stripe_align(root, last_byte);
			hole_size = key.objectid - aligned;
			if (key.objectid > aligned && hole_size >= num_bytes) {
				ins->objectid = aligned;
1416 1417
				ins->offset = hole_size;
				goto check_pending;
1418
			}
1419
		}
1420
		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1421 1422
			if (!start_found && btrfs_key_type(&key) ==
			    BTRFS_BLOCK_GROUP_ITEM_KEY) {
1423
				last_byte = key.objectid;
1424 1425
				start_found = 1;
			}
1426
			goto next;
1427 1428
		}

1429

1430
		start_found = 1;
1431
		last_byte = key.objectid + key.offset;
1432

1433 1434
		if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
		    last_byte >= block_group->key.objectid +
C
Chris Mason 已提交
1435 1436 1437
		    block_group->key.offset) {
			btrfs_release_path(root, path);
			search_start = block_group->key.objectid +
1438
				block_group->key.offset;
C
Chris Mason 已提交
1439 1440
			goto new_group;
		}
C
Chris Mason 已提交
1441
next:
1442
		path->slots[0]++;
1443
		cond_resched();
1444 1445 1446 1447 1448
	}
check_pending:
	/* we have to make sure we didn't find an extent that has already
	 * been allocated by the map tree or the original allocation
	 */
1449
	btrfs_release_path(root, path);
1450
	BUG_ON(ins->objectid < search_start);
1451

1452
	if (ins->objectid + num_bytes >= search_end)
1453
		goto enospc;
1454
	if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
Y
Yan 已提交
1455
	    ins->objectid + num_bytes > block_group->
1456 1457 1458 1459 1460
	    key.objectid + block_group->key.offset) {
		search_start = block_group->key.objectid +
			block_group->key.offset;
		goto new_group;
	}
1461
	if (test_range_bit(&info->extent_ins, ins->objectid,
1462 1463
			   ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
		search_start = ins->objectid + num_bytes;
1464 1465 1466
		goto new_group;
	}
	if (test_range_bit(&info->pinned_extents, ins->objectid,
1467 1468
			   ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
		search_start = ins->objectid + num_bytes;
1469
		goto new_group;
1470
	}
1471
	if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1472 1473 1474 1475
	    ins->objectid < exclude_start + exclude_nr)) {
		search_start = exclude_start + exclude_nr;
		goto new_group;
	}
1476
	if (!data) {
1477
		block_group = btrfs_lookup_block_group(info, ins->objectid);
1478 1479
		if (block_group)
			trans->block_group = block_group;
1480
	}
1481
	ins->offset = num_bytes;
1482
	btrfs_free_path(path);
1483
	return 0;
C
Chris Mason 已提交
1484 1485

new_group:
1486
	if (search_start + num_bytes >= search_end) {
1487
enospc:
C
Chris Mason 已提交
1488
		search_start = orig_search_start;
1489 1490 1491 1492
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
1493 1494 1495
		if (wrapped) {
			if (!full_scan)
				total_needed -= empty_size;
1496
			full_scan = 1;
1497
			data = BTRFS_BLOCK_GROUP_MIXED;
1498
		} else
1499
			wrapped = 1;
C
Chris Mason 已提交
1500
	}
1501
	block_group = btrfs_lookup_block_group(info, search_start);
1502
	cond_resched();
1503 1504
	block_group = btrfs_find_block_group(root, block_group,
					     search_start, data, 0);
C
Chris Mason 已提交
1505 1506
	goto check_failed;

C
Chris Mason 已提交
1507
error:
1508 1509
	btrfs_release_path(root, path);
	btrfs_free_path(path);
C
Chris Mason 已提交
1510
	return ret;
1511 1512 1513 1514 1515 1516 1517 1518
}
/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns 0 if everything worked, non-zero otherwise.
 */
1519
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1520 1521 1522 1523
		       struct btrfs_root *root,
		       u64 num_bytes, u64 root_objectid, u64 ref_generation,
		       u64 owner, u64 owner_offset,
		       u64 empty_size, u64 hint_byte,
C
Chris Mason 已提交
1524
		       u64 search_end, struct btrfs_key *ins, int data)
1525 1526 1527
{
	int ret;
	int pending_ret;
1528
	u64 super_used, root_used;
1529
	u64 search_start = 0;
1530 1531
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
C
Chris Mason 已提交
1532
	struct btrfs_extent_item extent_item;
1533
	struct btrfs_path *path;
C
Chris Mason 已提交
1534

1535
	btrfs_set_stack_extent_refs(&extent_item, 1);
1536

1537 1538 1539
	WARN_ON(num_bytes < root->sectorsize);
	ret = find_free_extent(trans, root, num_bytes, empty_size,
			       search_start, search_end, hint_byte, ins,
1540 1541
			       trans->alloc_exclude_start,
			       trans->alloc_exclude_nr, data);
C
Chris Mason 已提交
1542
	BUG_ON(ret);
1543 1544
	if (ret)
		return ret;
1545

1546
	/* block accounting for super block */
1547 1548
	super_used = btrfs_super_bytes_used(&info->super_copy);
	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1549

1550
	/* block accounting for root item */
1551 1552
	root_used = btrfs_root_used(&root->root_item);
	btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1553

1554 1555 1556 1557
	clear_extent_dirty(&root->fs_info->free_space_cache,
			   ins->objectid, ins->objectid + ins->offset - 1,
			   GFP_NOFS);

1558
	if (root == extent_root) {
1559 1560 1561
		set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
				ins->objectid + ins->offset - 1,
				EXTENT_LOCKED, GFP_NOFS);
1562
		WARN_ON(data == 1);
1563 1564 1565 1566 1567 1568
		goto update_block;
	}

	WARN_ON(trans->alloc_exclude_nr);
	trans->alloc_exclude_start = ins->objectid;
	trans->alloc_exclude_nr = ins->offset;
1569 1570
	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
				sizeof(extent_item));
C
Chris Mason 已提交
1571

1572 1573
	trans->alloc_exclude_start = 0;
	trans->alloc_exclude_nr = 0;
1574 1575 1576 1577 1578 1579 1580
	BUG_ON(ret);

	path = btrfs_alloc_path();
	BUG_ON(!path);
	ret = btrfs_insert_extent_backref(trans, extent_root, path,
					  ins->objectid, root_objectid,
					  ref_generation, owner, owner_offset);
1581

C
Chris Mason 已提交
1582
	BUG_ON(ret);
1583
	btrfs_free_path(path);
1584
	finish_current_insert(trans, extent_root);
C
Chris Mason 已提交
1585
	pending_ret = del_pending_extents(trans, extent_root);
1586

1587
	if (ret) {
C
Chris Mason 已提交
1588
		return ret;
1589 1590
	}
	if (pending_ret) {
C
Chris Mason 已提交
1591
		return pending_ret;
1592
	}
1593 1594

update_block:
C
Chris Mason 已提交
1595 1596
	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
				 data);
C
Chris Mason 已提交
1597
	BUG_ON(ret);
C
Chris Mason 已提交
1598
	return 0;
1599 1600 1601 1602 1603 1604
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
1605
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1606
					     struct btrfs_root *root,
1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634
					     u32 blocksize,
					     u64 root_objectid, u64 hint,
					     u64 empty_size)
{
	u64 ref_generation;

	if (root->ref_cows)
		ref_generation = trans->transid;
	else
		ref_generation = 0;


	return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
					ref_generation, 0, 0, hint, empty_size);
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
					     struct btrfs_root *root,
					     u32 blocksize,
					     u64 root_objectid,
					     u64 ref_generation,
					     u64 first_objectid,
					     int level,
					     u64 hint,
1635
					     u64 empty_size)
1636
{
C
Chris Mason 已提交
1637
	struct btrfs_key ins;
1638
	int ret;
1639
	struct extent_buffer *buf;
1640

1641 1642 1643
	ret = btrfs_alloc_extent(trans, root, blocksize,
				 root_objectid, ref_generation,
				 first_objectid, level, empty_size, hint,
1644
				 (u64)-1, &ins, 0);
1645
	if (ret) {
1646 1647
		BUG_ON(ret > 0);
		return ERR_PTR(ret);
1648
	}
1649
	buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1650
	if (!buf) {
1651 1652 1653
		btrfs_free_extent(trans, root, ins.objectid, blocksize,
				  root->root_key.objectid, ref_generation,
				  0, 0, 0);
1654 1655
		return ERR_PTR(-ENOMEM);
	}
1656 1657 1658
	btrfs_set_buffer_uptodate(buf);
	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
			 buf->start + buf->len - 1, GFP_NOFS);
1659 1660 1661 1662
	set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
			buf->start, buf->start + buf->len - 1,
			EXTENT_CSUM, GFP_NOFS);
	buf->flags |= EXTENT_CSUM;
1663
	btrfs_set_buffer_defrag(buf);
1664
	trans->blocks_used++;
1665 1666
	return buf;
}
1667

1668
static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1669
			 struct btrfs_root *root, struct extent_buffer *leaf)
1670
{
1671 1672
	u64 leaf_owner;
	u64 leaf_generation;
1673
	struct btrfs_key key;
1674 1675 1676 1677 1678
	struct btrfs_file_extent_item *fi;
	int i;
	int nritems;
	int ret;

1679 1680
	BUG_ON(!btrfs_is_leaf(leaf));
	nritems = btrfs_header_nritems(leaf);
1681 1682 1683
	leaf_owner = btrfs_header_owner(leaf);
	leaf_generation = btrfs_header_generation(leaf);

1684
	for (i = 0; i < nritems; i++) {
1685
		u64 disk_bytenr;
1686 1687 1688

		btrfs_item_key_to_cpu(leaf, &key, i);
		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1689 1690
			continue;
		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1691 1692
		if (btrfs_file_extent_type(leaf, fi) ==
		    BTRFS_FILE_EXTENT_INLINE)
1693
			continue;
1694 1695 1696 1697
		/*
		 * FIXME make sure to insert a trans record that
		 * repeats the snapshot del on crash
		 */
1698 1699
		disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
		if (disk_bytenr == 0)
C
Chris Mason 已提交
1700
			continue;
1701
		ret = btrfs_free_extent(trans, root, disk_bytenr,
1702 1703 1704
				btrfs_file_extent_disk_num_bytes(leaf, fi),
				leaf_owner, leaf_generation,
				key.objectid, key.offset, 0);
1705 1706 1707 1708 1709
		BUG_ON(ret);
	}
	return 0;
}

1710
static void reada_walk_down(struct btrfs_root *root,
1711
			    struct extent_buffer *node)
1712 1713 1714
{
	int i;
	u32 nritems;
1715
	u64 bytenr;
1716 1717
	int ret;
	u32 refs;
1718 1719
	int level;
	u32 blocksize;
1720

1721
	nritems = btrfs_header_nritems(node);
1722
	level = btrfs_header_level(node);
1723
	for (i = 0; i < nritems; i++) {
1724 1725 1726
		bytenr = btrfs_node_blockptr(node, i);
		blocksize = btrfs_level_size(root, level - 1);
		ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1727 1728 1729
		BUG_ON(ret);
		if (refs != 1)
			continue;
1730
		mutex_unlock(&root->fs_info->fs_mutex);
1731
		ret = readahead_tree_block(root, bytenr, blocksize);
1732 1733
		cond_resched();
		mutex_lock(&root->fs_info->fs_mutex);
1734 1735 1736 1737 1738
		if (ret)
			break;
	}
}

C
Chris Mason 已提交
1739 1740 1741 1742
/*
 * helper function for drop_snapshot, this walks down the tree dropping ref
 * counts as it goes.
 */
1743 1744
static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			  *root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1745
{
1746 1747 1748
	u64 root_owner;
	u64 root_gen;
	u64 bytenr;
1749 1750
	struct extent_buffer *next;
	struct extent_buffer *cur;
1751
	struct extent_buffer *parent;
1752
	u32 blocksize;
C
Chris Mason 已提交
1753 1754 1755
	int ret;
	u32 refs;

1756 1757
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1758
	ret = lookup_extent_ref(trans, root,
1759 1760
				path->nodes[*level]->start,
				path->nodes[*level]->len, &refs);
C
Chris Mason 已提交
1761 1762 1763
	BUG_ON(ret);
	if (refs > 1)
		goto out;
1764

C
Chris Mason 已提交
1765 1766 1767
	/*
	 * walk down to the last node level and free all the leaves
	 */
1768
	while(*level >= 0) {
1769 1770
		WARN_ON(*level < 0);
		WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1771
		cur = path->nodes[*level];
1772 1773

		if (*level > 0 && path->slots[*level] == 0)
1774
			reada_walk_down(root, cur);
1775

1776
		if (btrfs_header_level(cur) != *level)
C
Chris Mason 已提交
1777
			WARN_ON(1);
1778

1779
		if (path->slots[*level] >=
1780
		    btrfs_header_nritems(cur))
C
Chris Mason 已提交
1781
			break;
1782 1783 1784 1785 1786
		if (*level == 0) {
			ret = drop_leaf_ref(trans, root, cur);
			BUG_ON(ret);
			break;
		}
1787 1788 1789
		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
		blocksize = btrfs_level_size(root, *level - 1);
		ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1790 1791
		BUG_ON(ret);
		if (refs != 1) {
1792 1793 1794
			parent = path->nodes[*level];
			root_owner = btrfs_header_owner(parent);
			root_gen = btrfs_header_generation(parent);
C
Chris Mason 已提交
1795
			path->slots[*level]++;
1796
			ret = btrfs_free_extent(trans, root, bytenr,
1797 1798
						blocksize, root_owner,
						root_gen, 0, 0, 1);
C
Chris Mason 已提交
1799 1800 1801
			BUG_ON(ret);
			continue;
		}
1802
		next = btrfs_find_tree_block(root, bytenr, blocksize);
1803 1804
		if (!next || !btrfs_buffer_uptodate(next)) {
			free_extent_buffer(next);
1805
			mutex_unlock(&root->fs_info->fs_mutex);
1806
			next = read_tree_block(root, bytenr, blocksize);
1807 1808 1809
			mutex_lock(&root->fs_info->fs_mutex);

			/* we dropped the lock, check one more time */
1810 1811
			ret = lookup_extent_ref(trans, root, bytenr,
						blocksize, &refs);
1812 1813
			BUG_ON(ret);
			if (refs != 1) {
1814 1815 1816 1817
				parent = path->nodes[*level];
				root_owner = btrfs_header_owner(parent);
				root_gen = btrfs_header_generation(parent);

1818
				path->slots[*level]++;
1819
				free_extent_buffer(next);
1820 1821 1822 1823
				ret = btrfs_free_extent(trans, root, bytenr,
							blocksize,
							root_owner,
							root_gen, 0, 0, 1);
1824 1825 1826 1827
				BUG_ON(ret);
				continue;
			}
		}
1828
		WARN_ON(*level <= 0);
C
Chris Mason 已提交
1829
		if (path->nodes[*level-1])
1830
			free_extent_buffer(path->nodes[*level-1]);
C
Chris Mason 已提交
1831
		path->nodes[*level-1] = next;
1832
		*level = btrfs_header_level(next);
C
Chris Mason 已提交
1833 1834 1835
		path->slots[*level] = 0;
	}
out:
1836 1837
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1838 1839 1840 1841 1842 1843 1844 1845 1846 1847

	if (path->nodes[*level] == root->node) {
		root_owner = root->root_key.objectid;
		parent = path->nodes[*level];
	} else {
		parent = path->nodes[*level + 1];
		root_owner = btrfs_header_owner(parent);
	}

	root_gen = btrfs_header_generation(parent);
1848
	ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1849 1850
				path->nodes[*level]->len,
				root_owner, root_gen, 0, 0, 1);
1851
	free_extent_buffer(path->nodes[*level]);
C
Chris Mason 已提交
1852 1853 1854 1855 1856 1857
	path->nodes[*level] = NULL;
	*level += 1;
	BUG_ON(ret);
	return 0;
}

C
Chris Mason 已提交
1858 1859 1860 1861 1862
/*
 * helper for dropping snapshots.  This walks back up the tree in the path
 * to find the first node higher up where we haven't yet gone through
 * all the slots
 */
1863 1864
static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			*root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1865
{
1866 1867 1868
	u64 root_owner;
	u64 root_gen;
	struct btrfs_root_item *root_item = &root->root_item;
C
Chris Mason 已提交
1869 1870 1871
	int i;
	int slot;
	int ret;
1872

C
Chris Mason 已提交
1873
	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
C
Chris Mason 已提交
1874
		slot = path->slots[i];
1875 1876 1877 1878
		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
			struct extent_buffer *node;
			struct btrfs_disk_key disk_key;
			node = path->nodes[i];
C
Chris Mason 已提交
1879 1880
			path->slots[i]++;
			*level = i;
1881
			WARN_ON(*level == 0);
1882
			btrfs_node_key(node, &disk_key, path->slots[i]);
1883
			memcpy(&root_item->drop_progress,
1884
			       &disk_key, sizeof(disk_key));
1885
			root_item->drop_level = i;
C
Chris Mason 已提交
1886 1887
			return 0;
		} else {
1888 1889 1890 1891 1892 1893 1894 1895 1896 1897
			if (path->nodes[*level] == root->node) {
				root_owner = root->root_key.objectid;
				root_gen =
				   btrfs_header_generation(path->nodes[*level]);
			} else {
				struct extent_buffer *node;
				node = path->nodes[*level + 1];
				root_owner = btrfs_header_owner(node);
				root_gen = btrfs_header_generation(node);
			}
1898
			ret = btrfs_free_extent(trans, root,
1899
						path->nodes[*level]->start,
1900 1901
						path->nodes[*level]->len,
						root_owner, root_gen, 0, 0, 1);
1902
			BUG_ON(ret);
1903
			free_extent_buffer(path->nodes[*level]);
C
Chris Mason 已提交
1904
			path->nodes[*level] = NULL;
C
Chris Mason 已提交
1905 1906 1907 1908 1909 1910
			*level = i + 1;
		}
	}
	return 1;
}

C
Chris Mason 已提交
1911 1912 1913 1914 1915
/*
 * drop the reference count on the tree rooted at 'snap'.  This traverses
 * the tree freeing any blocks that have a ref count of zero after being
 * decremented.
 */
1916
int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1917
			*root)
C
Chris Mason 已提交
1918
{
1919
	int ret = 0;
C
Chris Mason 已提交
1920
	int wret;
C
Chris Mason 已提交
1921
	int level;
1922
	struct btrfs_path *path;
C
Chris Mason 已提交
1923 1924
	int i;
	int orig_level;
1925
	struct btrfs_root_item *root_item = &root->root_item;
C
Chris Mason 已提交
1926

1927 1928
	path = btrfs_alloc_path();
	BUG_ON(!path);
C
Chris Mason 已提交
1929

1930
	level = btrfs_header_level(root->node);
C
Chris Mason 已提交
1931
	orig_level = level;
1932 1933
	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
		path->nodes[level] = root->node;
1934
		extent_buffer_get(root->node);
1935 1936 1937
		path->slots[level] = 0;
	} else {
		struct btrfs_key key;
1938 1939
		struct btrfs_disk_key found_key;
		struct extent_buffer *node;
1940

1941
		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1942 1943
		level = root_item->drop_level;
		path->lowest_level = level;
1944
		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1945
		if (wret < 0) {
1946 1947 1948
			ret = wret;
			goto out;
		}
1949 1950 1951 1952
		node = path->nodes[level];
		btrfs_node_key(node, &found_key, path->slots[level]);
		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
			       sizeof(found_key)));
1953
	}
C
Chris Mason 已提交
1954
	while(1) {
1955
		wret = walk_down_tree(trans, root, path, &level);
C
Chris Mason 已提交
1956
		if (wret > 0)
C
Chris Mason 已提交
1957
			break;
C
Chris Mason 已提交
1958 1959 1960
		if (wret < 0)
			ret = wret;

1961
		wret = walk_up_tree(trans, root, path, &level);
C
Chris Mason 已提交
1962
		if (wret > 0)
C
Chris Mason 已提交
1963
			break;
C
Chris Mason 已提交
1964 1965
		if (wret < 0)
			ret = wret;
1966 1967
		ret = -EAGAIN;
		break;
C
Chris Mason 已提交
1968
	}
C
Chris Mason 已提交
1969
	for (i = 0; i <= orig_level; i++) {
1970
		if (path->nodes[i]) {
1971
			free_extent_buffer(path->nodes[i]);
1972
			path->nodes[i] = NULL;
C
Chris Mason 已提交
1973
		}
C
Chris Mason 已提交
1974
	}
1975
out:
1976
	btrfs_free_path(path);
C
Chris Mason 已提交
1977
	return ret;
C
Chris Mason 已提交
1978
}
C
Chris Mason 已提交
1979

1980
int btrfs_free_block_groups(struct btrfs_fs_info *info)
C
Chris Mason 已提交
1981
{
1982 1983
	u64 start;
	u64 end;
1984
	u64 ptr;
C
Chris Mason 已提交
1985 1986
	int ret;
	while(1) {
1987 1988 1989
		ret = find_first_extent_bit(&info->block_group_cache, 0,
					    &start, &end, (unsigned int)-1);
		if (ret)
C
Chris Mason 已提交
1990
			break;
1991 1992 1993
		ret = get_state_private(&info->block_group_cache, start, &ptr);
		if (!ret)
			kfree((void *)(unsigned long)ptr);
1994 1995
		clear_extent_bits(&info->block_group_cache, start,
				  end, (unsigned int)-1, GFP_NOFS);
C
Chris Mason 已提交
1996
	}
1997
	while(1) {
1998 1999 2000
		ret = find_first_extent_bit(&info->free_space_cache, 0,
					    &start, &end, EXTENT_DIRTY);
		if (ret)
2001
			break;
2002 2003
		clear_extent_dirty(&info->free_space_cache, start,
				   end, GFP_NOFS);
2004
	}
C
Chris Mason 已提交
2005 2006 2007
	return 0;
}

C
Chris Mason 已提交
2008 2009 2010 2011 2012
int btrfs_read_block_groups(struct btrfs_root *root)
{
	struct btrfs_path *path;
	int ret;
	int err = 0;
2013
	int bit;
C
Chris Mason 已提交
2014
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
2015
	struct btrfs_fs_info *info = root->fs_info;
2016
	struct extent_map_tree *block_group_cache;
C
Chris Mason 已提交
2017 2018
	struct btrfs_key key;
	struct btrfs_key found_key;
2019
	struct extent_buffer *leaf;
2020 2021

	block_group_cache = &info->block_group_cache;
C
Chris Mason 已提交
2022

C
Chris Mason 已提交
2023
	root = info->extent_root;
C
Chris Mason 已提交
2024
	key.objectid = 0;
2025
	key.offset = BTRFS_BLOCK_GROUP_SIZE;
C
Chris Mason 已提交
2026 2027 2028 2029 2030 2031 2032
	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
C
Chris Mason 已提交
2033
		ret = btrfs_search_slot(NULL, info->extent_root,
C
Chris Mason 已提交
2034 2035 2036 2037 2038
					&key, path, 0, 0);
		if (ret != 0) {
			err = ret;
			break;
		}
2039 2040
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
C
Chris Mason 已提交
2041 2042 2043 2044 2045
		cache = kmalloc(sizeof(*cache), GFP_NOFS);
		if (!cache) {
			err = -1;
			break;
		}
C
Chris Mason 已提交
2046

2047 2048 2049
		read_extent_buffer(leaf, &cache->item,
				   btrfs_item_ptr_offset(leaf, path->slots[0]),
				   sizeof(cache->item));
C
Chris Mason 已提交
2050
		memcpy(&cache->key, &found_key, sizeof(found_key));
2051
		cache->cached = 0;
2052
		cache->pinned = 0;
C
Chris Mason 已提交
2053 2054
		key.objectid = found_key.objectid + found_key.offset;
		btrfs_release_path(root, path);
2055

2056 2057 2058 2059
		if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
			bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
			cache->data = BTRFS_BLOCK_GROUP_MIXED;
		} else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2060
			bit = BLOCK_GROUP_DATA;
2061
			cache->data = BTRFS_BLOCK_GROUP_DATA;
2062 2063 2064
		} else {
			bit = BLOCK_GROUP_METADATA;
			cache->data = 0;
2065
		}
2066 2067 2068 2069 2070 2071

		/* use EXTENT_LOCKED to prevent merging */
		set_extent_bits(block_group_cache, found_key.objectid,
				found_key.objectid + found_key.offset - 1,
				bit | EXTENT_LOCKED, GFP_NOFS);
		set_state_private(block_group_cache, found_key.objectid,
J
Jens Axboe 已提交
2072
				  (unsigned long)cache);
2073

C
Chris Mason 已提交
2074
		if (key.objectid >=
2075
		    btrfs_super_total_bytes(&info->super_copy))
C
Chris Mason 已提交
2076 2077 2078 2079 2080 2081
			break;
	}

	btrfs_free_path(path);
	return 0;
}