extent-tree.c 42.6 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Z
Zach Brown 已提交
19
#include <linux/sched.h>
20
#include "hash.h"
21 22 23
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
24
#include "transaction.h"
25

26 27 28 29
#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
#define BLOCK_GROUP_DIRTY EXTENT_DIRTY

30 31
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root);
C
Chris Mason 已提交
32 33
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root);
34

35 36 37 38 39 40
static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
	struct btrfs_path *path;
	int ret;
	struct btrfs_key key;
41
	struct extent_buffer *leaf;
42
	struct extent_map_tree *free_space_cache;
43 44 45
	int slot;
	u64 last = 0;
	u64 hole_size;
46
	u64 first_free;
47 48
	int found = 0;

49 50 51
	if (!block_group)
		return 0;

52
	root = root->fs_info->extent_root;
53
	free_space_cache = &root->fs_info->free_space_cache;
54 55 56

	if (block_group->cached)
		return 0;
57

58 59 60
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
61

62
	path->reada = 2;
63
	first_free = block_group->key.objectid;
64 65
	key.objectid = block_group->key.objectid;
	key.offset = 0;
66

67 68
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
69

70 71
	if (ret < 0)
		return ret;
72

73 74
	if (ret && path->slots[0] > 0)
		path->slots[0]--;
75

76
	while(1) {
77
		leaf = path->nodes[0];
78
		slot = path->slots[0];
79
		if (slot >= btrfs_header_nritems(leaf)) {
80
			ret = btrfs_next_leaf(root, path);
81 82
			if (ret < 0)
				goto err;
83
			if (ret == 0) {
84
				continue;
85
			} else {
86 87 88
				break;
			}
		}
89

90
		btrfs_item_key_to_cpu(leaf, &key, slot);
91 92 93 94 95 96
		if (key.objectid < block_group->key.objectid) {
			if (key.objectid + key.offset > first_free)
				first_free = key.objectid + key.offset;
			goto next;
		}

97 98 99 100
		if (key.objectid >= block_group->key.objectid +
		    block_group->key.offset) {
			break;
		}
101

102 103
		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			if (!found) {
104
				last = first_free;
105 106
				found = 1;
			}
107 108 109 110 111
			if (key.objectid > last) {
				hole_size = key.objectid - last;
				set_extent_dirty(free_space_cache, last,
						 last + hole_size - 1,
						 GFP_NOFS);
112 113
			}
			last = key.objectid + key.offset;
114
		}
115
next:
116 117 118
		path->slots[0]++;
	}

119 120 121 122 123 124
	if (!found)
		last = first_free;
	if (block_group->key.objectid +
	    block_group->key.offset > last) {
		hole_size = block_group->key.objectid +
			block_group->key.offset - last;
125 126
		set_extent_dirty(free_space_cache, last,
				 last + hole_size - 1, GFP_NOFS);
127
	}
128
	block_group->cached = 1;
129
err:
130 131 132 133
	btrfs_free_path(path);
	return 0;
}

134 135
struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
							 btrfs_fs_info *info,
136
							 u64 bytenr)
C
Chris Mason 已提交
137
{
138 139 140 141 142
	struct extent_map_tree *block_group_cache;
	struct btrfs_block_group_cache *block_group = NULL;
	u64 ptr;
	u64 start;
	u64 end;
C
Chris Mason 已提交
143 144
	int ret;

145 146
	block_group_cache = &info->block_group_cache;
	ret = find_first_extent_bit(block_group_cache,
147
				    bytenr, &start, &end,
148
				    BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
C
Chris Mason 已提交
149
	if (ret) {
150
		return NULL;
C
Chris Mason 已提交
151
	}
152 153 154 155
	ret = get_state_private(block_group_cache, start, &ptr);
	if (ret)
		return NULL;

J
Jens Axboe 已提交
156
	block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
Y
Yan 已提交
157
	if (block_group->key.objectid <= bytenr && bytenr <
158 159
	    block_group->key.objectid + block_group->key.offset)
		return block_group;
C
Chris Mason 已提交
160 161
	return NULL;
}
162 163
static u64 find_search_start(struct btrfs_root *root,
			     struct btrfs_block_group_cache **cache_ret,
164 165
			     u64 search_start, int num,
			     int data, int full_scan)
166 167 168
{
	int ret;
	struct btrfs_block_group_cache *cache = *cache_ret;
169
	u64 last;
170 171
	u64 start = 0;
	u64 end = 0;
172
	u64 cache_miss = 0;
173
	int wrapped = 0;
174

175
	if (!cache) {
176
		goto out;
177
	}
178
again:
179 180 181
	ret = cache_block_group(root, cache);
	if (ret)
		goto out;
182

183 184
	last = max(search_start, cache->key.objectid);

185
	while(1) {
186 187
		ret = find_first_extent_bit(&root->fs_info->free_space_cache,
					    last, &start, &end, EXTENT_DIRTY);
188
		if (ret) {
189 190
			if (!cache_miss)
				cache_miss = last;
191 192
			goto new_group;
		}
193 194 195

		start = max(last, start);
		last = end + 1;
196 197 198
		if (last - start < num) {
			if (last == cache->key.objectid + cache->key.offset)
				cache_miss = start;
199
			continue;
200 201
		}
		if (data != BTRFS_BLOCK_GROUP_MIXED &&
Y
Yan 已提交
202
		    start + num > cache->key.objectid + cache->key.offset)
203
			goto new_group;
204
		return start;
205 206
	}
out:
207 208 209 210 211 212 213
	cache = btrfs_lookup_block_group(root->fs_info, search_start);
	if (!cache) {
		printk("Unable to find block group for %Lu\n",
		       search_start);
		WARN_ON(1);
		return search_start;
	}
214
	return search_start;
215 216

new_group:
217
	last = cache->key.objectid + cache->key.offset;
218
wrapped:
219
	cache = btrfs_lookup_block_group(root->fs_info, last);
220
	if (!cache) {
221
no_cache:
222 223 224 225 226 227
		if (!wrapped) {
			wrapped = 1;
			last = search_start;
			data = BTRFS_BLOCK_GROUP_MIXED;
			goto wrapped;
		}
228
		goto out;
229
	}
230 231 232 233 234
	if (cache_miss && !cache->cached) {
		cache_block_group(root, cache);
		last = cache_miss;
		cache = btrfs_lookup_block_group(root->fs_info, last);
	}
235
	cache = btrfs_find_block_group(root, cache, last, data, 0);
236 237
	if (!cache)
		goto no_cache;
238
	*cache_ret = cache;
239
	cache_miss = 0;
240 241 242
	goto again;
}

C
Chris Mason 已提交
243 244
static u64 div_factor(u64 num, int factor)
{
245 246
	if (factor == 10)
		return num;
C
Chris Mason 已提交
247 248 249 250 251
	num *= factor;
	do_div(num, 10);
	return num;
}

252 253
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
C
Chris Mason 已提交
254
						 *hint, u64 search_start,
255
						 int data, int owner)
C
Chris Mason 已提交
256
{
257 258
	struct btrfs_block_group_cache *cache;
	struct extent_map_tree *block_group_cache;
259
	struct btrfs_block_group_cache *found_group = NULL;
C
Chris Mason 已提交
260 261
	struct btrfs_fs_info *info = root->fs_info;
	u64 used;
262 263
	u64 last = 0;
	u64 hint_last;
264 265 266 267 268
	u64 start;
	u64 end;
	u64 free_check;
	u64 ptr;
	int bit;
C
Chris Mason 已提交
269
	int ret;
270
	int full_search = 0;
271
	int factor = 8;
C
Chris Mason 已提交
272
	int data_swap = 0;
273

274 275
	block_group_cache = &info->block_group_cache;

276
	if (!owner)
277
		factor = 8;
C
Chris Mason 已提交
278

279
	if (data == BTRFS_BLOCK_GROUP_MIXED) {
280
		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
281 282
		factor = 10;
	} else if (data)
283 284 285
		bit = BLOCK_GROUP_DATA;
	else
		bit = BLOCK_GROUP_METADATA;
C
Chris Mason 已提交
286 287 288

	if (search_start) {
		struct btrfs_block_group_cache *shint;
289
		shint = btrfs_lookup_block_group(info, search_start);
290 291
		if (shint && (shint->data == data ||
			      shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
C
Chris Mason 已提交
292
			used = btrfs_block_group_used(&shint->item);
293 294
			if (used + shint->pinned <
			    div_factor(shint->key.offset, factor)) {
C
Chris Mason 已提交
295 296 297 298
				return shint;
			}
		}
	}
299 300
	if (hint && (hint->data == data ||
		     hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
301
		used = btrfs_block_group_used(&hint->item);
302 303
		if (used + hint->pinned <
		    div_factor(hint->key.offset, factor)) {
304 305
			return hint;
		}
306
		last = hint->key.objectid + hint->key.offset;
307 308
		hint_last = last;
	} else {
309 310 311 312 313 314
		if (hint)
			hint_last = max(hint->key.objectid, search_start);
		else
			hint_last = search_start;

		last = hint_last;
315 316
	}
again:
C
Chris Mason 已提交
317
	while(1) {
318 319 320
		ret = find_first_extent_bit(block_group_cache, last,
					    &start, &end, bit);
		if (ret)
C
Chris Mason 已提交
321
			break;
322 323 324 325 326

		ret = get_state_private(block_group_cache, start, &ptr);
		if (ret)
			break;

J
Jens Axboe 已提交
327
		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
328 329 330 331 332 333 334
		last = cache->key.objectid + cache->key.offset;
		used = btrfs_block_group_used(&cache->item);

		if (full_search)
			free_check = cache->key.offset;
		else
			free_check = div_factor(cache->key.offset, factor);
335
		if (used + cache->pinned < free_check) {
336 337
			found_group = cache;
			goto found;
C
Chris Mason 已提交
338
		}
339
		cond_resched();
C
Chris Mason 已提交
340
	}
341
	if (!full_search) {
C
Chris Mason 已提交
342
		last = search_start;
343 344 345
		full_search = 1;
		goto again;
	}
C
Chris Mason 已提交
346 347
	if (!data_swap) {
		data_swap = 1;
348
		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
C
Chris Mason 已提交
349 350 351
		last = search_start;
		goto again;
	}
C
Chris Mason 已提交
352
found:
353
	return found_group;
C
Chris Mason 已提交
354 355
}

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
static u64 hash_extent_ref(u64 root_objectid, u64 root_generation,
			   u64 owner, u64 owner_offset)
{
	u32 high_crc = ~(u32)0;
	u32 low_crc = ~(u32)0;
	__le64 lenum;

	lenum = cpu_to_le64(root_objectid);
	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
	lenum = cpu_to_le64(root_generation);
	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));

	lenum = cpu_to_le64(owner);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));

	lenum = cpu_to_le64(owner_offset);
	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));

	return ((u64)high_crc << 32) | (u64)low_crc;
}

int insert_extent_ref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
				struct btrfs_path *path,
				u64 bytenr,
				u64 root_objectid, u64 root_generation,
				u64 owner, u64 owner_offset)
{
	u64 hash;
	struct btrfs_key key;
	struct btrfs_extent_ref ref;
	struct extent_buffer *l;
	struct btrfs_extent_item *item;
	int ret;

	btrfs_set_stack_ref_root(&ref, root_objectid);
	btrfs_set_stack_ref_generation(&ref, root_generation);
	btrfs_set_stack_ref_objectid(&ref, owner);
	btrfs_set_stack_ref_offset(&ref, owner_offset);

	ret = btrfs_name_hash(&ref, sizeof(ref), &hash);
	key.offset = hash;
	key.objectid = bytenr;
	key.type = BTRFS_EXTENT_REF_KEY;

	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
	while (ret == -EEXIST) {

	}

}

C
Chris Mason 已提交
408 409
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
410 411 412
				u64 bytenr, u64 num_bytes,
				u64 root_objectid, u64 root_generation,
				u64 owner, u64 owner_offset)
C
Chris Mason 已提交
413
{
414
	struct btrfs_path *path;
C
Chris Mason 已提交
415
	int ret;
C
Chris Mason 已提交
416
	struct btrfs_key key;
417
	struct extent_buffer *l;
C
Chris Mason 已提交
418
	struct btrfs_extent_item *item;
C
Chris Mason 已提交
419
	u32 refs;
C
Chris Mason 已提交
420

421
	WARN_ON(num_bytes < root->sectorsize);
422
	path = btrfs_alloc_path();
423 424
	if (!path)
		return -ENOMEM;
425

426
	key.objectid = bytenr;
427
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
428
	key.offset = num_bytes;
429
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
430
				0, 1);
431 432
	if (ret < 0)
		return ret;
433
	if (ret != 0) {
434
		BUG();
435
	}
C
Chris Mason 已提交
436
	BUG_ON(ret != 0);
437
	l = path->nodes[0];
438
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
439 440
	refs = btrfs_extent_refs(l, item);
	btrfs_set_extent_refs(l, item, refs + 1);
441
	btrfs_mark_buffer_dirty(path->nodes[0]);
442

443
	btrfs_release_path(root->fs_info->extent_root, path);
444
	finish_current_insert(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
445
	del_pending_extents(trans, root->fs_info->extent_root);
446 447

	btrfs_free_path(path);
C
Chris Mason 已提交
448 449 450
	return 0;
}

451 452 453 454 455 456 457 458
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root)
{
	finish_current_insert(trans, root->fs_info->extent_root);
	del_pending_extents(trans, root->fs_info->extent_root);
	return 0;
}

C
Chris Mason 已提交
459
static int lookup_extent_ref(struct btrfs_trans_handle *trans,
460 461
			     struct btrfs_root *root, u64 bytenr,
			     u64 num_bytes, u32 *refs)
462
{
463
	struct btrfs_path *path;
464
	int ret;
C
Chris Mason 已提交
465
	struct btrfs_key key;
466
	struct extent_buffer *l;
C
Chris Mason 已提交
467
	struct btrfs_extent_item *item;
468

469
	WARN_ON(num_bytes < root->sectorsize);
470
	path = btrfs_alloc_path();
471 472
	key.objectid = bytenr;
	key.offset = num_bytes;
473
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
474
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
475
				0, 0);
476 477
	if (ret < 0)
		goto out;
478 479
	if (ret != 0) {
		btrfs_print_leaf(root, path->nodes[0]);
480
		printk("failed to find block number %Lu\n", bytenr);
481
		BUG();
482 483
	}
	l = path->nodes[0];
484
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
485
	*refs = btrfs_extent_refs(l, item);
486
out:
487
	btrfs_free_path(path);
488 489 490
	return 0;
}

C
Chris Mason 已提交
491 492 493
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root)
{
494 495
	return btrfs_inc_extent_ref(trans, root, root->node->start,
				    root->node->len);
C
Chris Mason 已提交
496 497
}

498
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
499
		  struct extent_buffer *buf)
C
Chris Mason 已提交
500
{
501
	u64 bytenr;
502 503
	u32 nritems;
	struct btrfs_key key;
504
	struct btrfs_file_extent_item *fi;
C
Chris Mason 已提交
505
	int i;
506
	int level;
507
	int ret;
508 509
	int faili;
	int err;
510

511
	if (!root->ref_cows)
512
		return 0;
513

514
	level = btrfs_header_level(buf);
515 516
	nritems = btrfs_header_nritems(buf);
	for (i = 0; i < nritems; i++) {
517 518
		if (level == 0) {
			u64 disk_bytenr;
519 520
			btrfs_item_key_to_cpu(buf, &key, i);
			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
521
				continue;
522
			fi = btrfs_item_ptr(buf, i,
523
					    struct btrfs_file_extent_item);
524
			if (btrfs_file_extent_type(buf, fi) ==
525 526
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
527 528
			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
			if (disk_bytenr == 0)
C
Chris Mason 已提交
529
				continue;
530 531
			ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
				    btrfs_file_extent_disk_num_bytes(buf, fi));
532 533 534 535
			if (ret) {
				faili = i;
				goto fail;
			}
536
		} else {
537 538 539
			bytenr = btrfs_node_blockptr(buf, i);
			ret = btrfs_inc_extent_ref(trans, root, bytenr,
					   btrfs_level_size(root, level - 1));
540 541 542 543
			if (ret) {
				faili = i;
				goto fail;
			}
544
		}
C
Chris Mason 已提交
545 546
	}
	return 0;
547
fail:
C
Chris Mason 已提交
548
	WARN_ON(1);
549
	for (i =0; i < faili; i++) {
550 551
		if (level == 0) {
			u64 disk_bytenr;
552 553
			btrfs_item_key_to_cpu(buf, &key, i);
			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
554
				continue;
555
			fi = btrfs_item_ptr(buf, i,
556
					    struct btrfs_file_extent_item);
557
			if (btrfs_file_extent_type(buf, fi) ==
558 559
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
560 561
			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
			if (disk_bytenr == 0)
562
				continue;
563 564
			err = btrfs_free_extent(trans, root, disk_bytenr,
				    btrfs_file_extent_disk_num_bytes(buf,
565
								      fi), 0);
566 567
			BUG_ON(err);
		} else {
568 569 570
			bytenr = btrfs_node_blockptr(buf, i);
			err = btrfs_free_extent(trans, root, bytenr,
					btrfs_level_size(root, level - 1), 0);
571 572 573 574
			BUG_ON(err);
		}
	}
	return ret;
C
Chris Mason 已提交
575 576
}

C
Chris Mason 已提交
577 578 579 580 581 582 583 584
static int write_one_cache_group(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_block_group_cache *cache)
{
	int ret;
	int pending_ret;
	struct btrfs_root *extent_root = root->fs_info->extent_root;
585 586
	unsigned long bi;
	struct extent_buffer *leaf;
C
Chris Mason 已提交
587 588

	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
589 590
	if (ret < 0)
		goto fail;
C
Chris Mason 已提交
591
	BUG_ON(ret);
592 593 594 595 596

	leaf = path->nodes[0];
	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
597
	btrfs_release_path(extent_root, path);
598
fail:
C
Chris Mason 已提交
599 600 601 602 603 604 605 606 607 608
	finish_current_insert(trans, extent_root);
	pending_ret = del_pending_extents(trans, extent_root);
	if (ret)
		return ret;
	if (pending_ret)
		return pending_ret;
	return 0;

}

609 610
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
C
Chris Mason 已提交
611
{
612 613
	struct extent_map_tree *block_group_cache;
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
614 615 616 617
	int ret;
	int err = 0;
	int werr = 0;
	struct btrfs_path *path;
618 619 620 621
	u64 last = 0;
	u64 start;
	u64 end;
	u64 ptr;
C
Chris Mason 已提交
622

623
	block_group_cache = &root->fs_info->block_group_cache;
C
Chris Mason 已提交
624 625 626 627 628
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
629 630 631
		ret = find_first_extent_bit(block_group_cache, last,
					    &start, &end, BLOCK_GROUP_DIRTY);
		if (ret)
C
Chris Mason 已提交
632
			break;
633

634 635 636 637 638
		last = end + 1;
		ret = get_state_private(block_group_cache, start, &ptr);
		if (ret)
			break;

J
Jens Axboe 已提交
639
		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
640 641 642 643 644 645 646 647 648 649
		err = write_one_cache_group(trans, root,
					    path, cache);
		/*
		 * if we fail to write the cache group, we want
		 * to keep it marked dirty in hopes that a later
		 * write will work
		 */
		if (err) {
			werr = err;
			continue;
C
Chris Mason 已提交
650
		}
651 652
		clear_extent_bits(block_group_cache, start, end,
				  BLOCK_GROUP_DIRTY, GFP_NOFS);
C
Chris Mason 已提交
653 654 655 656 657 658 659
	}
	btrfs_free_path(path);
	return werr;
}

static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
660 661
			      u64 bytenr, u64 num_bytes, int alloc,
			      int mark_free, int data)
C
Chris Mason 已提交
662 663 664
{
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
665
	u64 total = num_bytes;
C
Chris Mason 已提交
666
	u64 old_val;
667
	u64 byte_in_group;
668 669
	u64 start;
	u64 end;
C
Chris Mason 已提交
670

C
Chris Mason 已提交
671
	while(total) {
672
		cache = btrfs_lookup_block_group(info, bytenr);
C
Chris Mason 已提交
673
		if (!cache) {
C
Chris Mason 已提交
674
			return -1;
C
Chris Mason 已提交
675
		}
676 677
		byte_in_group = bytenr - cache->key.objectid;
		WARN_ON(byte_in_group > cache->key.offset);
678 679 680 681
		start = cache->key.objectid;
		end = start + cache->key.offset - 1;
		set_extent_bits(&info->block_group_cache, start, end,
				BLOCK_GROUP_DIRTY, GFP_NOFS);
C
Chris Mason 已提交
682 683

		old_val = btrfs_block_group_used(&cache->item);
684
		num_bytes = min(total, cache->key.offset - byte_in_group);
C
Chris Mason 已提交
685
		if (alloc) {
C
Chris Mason 已提交
686
			if (cache->data != data &&
C
Chris Mason 已提交
687
			    old_val < (cache->key.offset >> 1)) {
688 689 690
				int bit_to_clear;
				int bit_to_set;
				cache->data = data;
C
Chris Mason 已提交
691
				if (data) {
692 693
					bit_to_clear = BLOCK_GROUP_METADATA;
					bit_to_set = BLOCK_GROUP_DATA;
694 695
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_MIXED;
C
Chris Mason 已提交
696 697 698
					cache->item.flags |=
						BTRFS_BLOCK_GROUP_DATA;
				} else {
699 700
					bit_to_clear = BLOCK_GROUP_DATA;
					bit_to_set = BLOCK_GROUP_METADATA;
701 702
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_MIXED;
C
Chris Mason 已提交
703 704 705
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_DATA;
				}
706 707 708 709 710 711
				clear_extent_bits(&info->block_group_cache,
						  start, end, bit_to_clear,
						  GFP_NOFS);
				set_extent_bits(&info->block_group_cache,
						start, end, bit_to_set,
						GFP_NOFS);
712 713 714 715 716 717 718 719
			} else if (cache->data != data &&
				   cache->data != BTRFS_BLOCK_GROUP_MIXED) {
				cache->data = BTRFS_BLOCK_GROUP_MIXED;
				set_extent_bits(&info->block_group_cache,
						start, end,
						BLOCK_GROUP_DATA |
						BLOCK_GROUP_METADATA,
						GFP_NOFS);
C
Chris Mason 已提交
720
			}
721
			old_val += num_bytes;
C
Chris Mason 已提交
722
		} else {
723
			old_val -= num_bytes;
724 725
			if (mark_free) {
				set_extent_dirty(&info->free_space_cache,
726
						 bytenr, bytenr + num_bytes - 1,
727
						 GFP_NOFS);
728
			}
C
Chris Mason 已提交
729
		}
C
Chris Mason 已提交
730
		btrfs_set_block_group_used(&cache->item, old_val);
731 732
		total -= num_bytes;
		bytenr += num_bytes;
C
Chris Mason 已提交
733 734 735
	}
	return 0;
}
736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766
static int update_pinned_extents(struct btrfs_root *root,
				u64 bytenr, u64 num, int pin)
{
	u64 len;
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *fs_info = root->fs_info;

	if (pin) {
		set_extent_dirty(&fs_info->pinned_extents,
				bytenr, bytenr + num - 1, GFP_NOFS);
	} else {
		clear_extent_dirty(&fs_info->pinned_extents,
				bytenr, bytenr + num - 1, GFP_NOFS);
	}
	while (num > 0) {
		cache = btrfs_lookup_block_group(fs_info, bytenr);
		WARN_ON(!cache);
		len = min(num, cache->key.offset -
			  (bytenr - cache->key.objectid));
		if (pin) {
			cache->pinned += len;
			fs_info->total_pinned += len;
		} else {
			cache->pinned -= len;
			fs_info->total_pinned -= len;
		}
		bytenr += len;
		num -= len;
	}
	return 0;
}
C
Chris Mason 已提交
767

768
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
C
Chris Mason 已提交
769 770
{
	u64 last = 0;
771 772 773
	u64 start;
	u64 end;
	struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
C
Chris Mason 已提交
774 775 776
	int ret;

	while(1) {
777 778 779
		ret = find_first_extent_bit(pinned_extents, last,
					    &start, &end, EXTENT_DIRTY);
		if (ret)
C
Chris Mason 已提交
780
			break;
781 782
		set_extent_dirty(copy, start, end, GFP_NOFS);
		last = end + 1;
C
Chris Mason 已提交
783 784 785 786 787 788
	}
	return 0;
}

int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root,
789
			       struct extent_map_tree *unpin)
790
{
791 792
	u64 start;
	u64 end;
793
	int ret;
794 795
	struct extent_map_tree *free_space_cache;
	free_space_cache = &root->fs_info->free_space_cache;
796 797

	while(1) {
798 799 800
		ret = find_first_extent_bit(unpin, 0, &start, &end,
					    EXTENT_DIRTY);
		if (ret)
801
			break;
802
		update_pinned_extents(root, start, end + 1 - start, 0);
803 804
		clear_extent_dirty(unpin, start, end, GFP_NOFS);
		set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
805 806 807 808
	}
	return 0;
}

809 810
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root)
C
Chris Mason 已提交
811
{
C
Chris Mason 已提交
812
	struct btrfs_key ins;
C
Chris Mason 已提交
813
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
814
	int ret;
815 816 817
	int err = 0;
	u64 start;
	u64 end;
818
	struct btrfs_fs_info *info = extent_root->fs_info;
C
Chris Mason 已提交
819

820
	btrfs_set_stack_extent_refs(&extent_item, 1);
821
	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
822 823
	btrfs_set_stack_extent_owner(&extent_item,
				     extent_root->root_key.objectid);
C
Chris Mason 已提交
824

825
	while(1) {
826 827 828
		ret = find_first_extent_bit(&info->extent_ins, 0, &start,
					    &end, EXTENT_LOCKED);
		if (ret)
829 830
			break;

831 832 833 834 835 836
		ins.objectid = start;
		ins.offset = end + 1 - start;
		err = btrfs_insert_item(trans, extent_root, &ins,
					&extent_item, sizeof(extent_item));
		clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
				  GFP_NOFS);
C
Chris Mason 已提交
837 838 839 840
	}
	return 0;
}

841 842
static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
			  int pending)
C
Chris Mason 已提交
843
{
844
	int err = 0;
845
	struct extent_buffer *buf;
C
Chris Mason 已提交
846

C
Chris Mason 已提交
847
	if (!pending) {
848
		buf = btrfs_find_tree_block(root, bytenr, num_bytes);
849 850
		if (buf) {
			if (btrfs_buffer_uptodate(buf)) {
C
Chris Mason 已提交
851 852
				u64 transid =
				    root->fs_info->running_transaction->transid;
853 854
				if (btrfs_header_generation(buf) == transid) {
					free_extent_buffer(buf);
855
					return 1;
C
Chris Mason 已提交
856
				}
C
Chris Mason 已提交
857
			}
858
			free_extent_buffer(buf);
C
Chris Mason 已提交
859
		}
860
		update_pinned_extents(root, bytenr, num_bytes, 1);
C
Chris Mason 已提交
861
	} else {
862
		set_extent_bits(&root->fs_info->pending_del,
863 864
				bytenr, bytenr + num_bytes - 1,
				EXTENT_LOCKED, GFP_NOFS);
C
Chris Mason 已提交
865
	}
C
Chris Mason 已提交
866
	BUG_ON(err < 0);
C
Chris Mason 已提交
867 868 869
	return 0;
}

870
/*
871
 * remove an extent from the root, returns 0 on success
872
 */
873
static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
874
			 *root, u64 bytenr, u64 num_bytes, int pin,
875
			 int mark_free)
876
{
877
	struct btrfs_path *path;
C
Chris Mason 已提交
878
	struct btrfs_key key;
879 880
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
881
	struct extent_buffer *leaf;
882
	int ret;
C
Chris Mason 已提交
883
	struct btrfs_extent_item *ei;
C
Chris Mason 已提交
884
	u32 refs;
C
Chris Mason 已提交
885

886
	key.objectid = bytenr;
887
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
888
	key.offset = num_bytes;
889

890
	path = btrfs_alloc_path();
891 892
	if (!path)
		return -ENOMEM;
893

894 895 896 897
	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
	if (ret < 0)
		return ret;
	BUG_ON(ret);
898 899 900

	leaf = path->nodes[0];
	ei = btrfs_item_ptr(leaf, path->slots[0],
C
Chris Mason 已提交
901
			    struct btrfs_extent_item);
902 903 904 905 906 907
	refs = btrfs_extent_refs(leaf, ei);
	BUG_ON(refs == 0);
	refs -= 1;
	btrfs_set_extent_refs(leaf, ei, refs);
	btrfs_mark_buffer_dirty(leaf);

C
Chris Mason 已提交
908
	if (refs == 0) {
909 910
		u64 super_used;
		u64 root_used;
C
Chris Mason 已提交
911 912

		if (pin) {
913
			ret = pin_down_bytes(root, bytenr, num_bytes, 0);
914 915 916
			if (ret > 0)
				mark_free = 1;
			BUG_ON(ret < 0);
C
Chris Mason 已提交
917 918
		}

919
		/* block accounting for super block */
920 921 922
		super_used = btrfs_super_bytes_used(&info->super_copy);
		btrfs_set_super_bytes_used(&info->super_copy,
					   super_used - num_bytes);
923 924

		/* block accounting for root item */
925
		root_used = btrfs_root_used(&root->root_item);
926
		btrfs_set_root_used(&root->root_item,
927
					   root_used - num_bytes);
928

929
		ret = btrfs_del_item(trans, extent_root, path);
930 931 932
		if (ret) {
			return ret;
		}
933
		ret = update_block_group(trans, root, bytenr, num_bytes, 0,
C
Chris Mason 已提交
934
					 mark_free, 0);
C
Chris Mason 已提交
935
		BUG_ON(ret);
936
	}
937
	btrfs_free_path(path);
938
	finish_current_insert(trans, extent_root);
939 940 941 942 943 944 945
	return ret;
}

/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
946 947
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root)
948 949
{
	int ret;
C
Chris Mason 已提交
950
	int err = 0;
951 952 953 954
	u64 start;
	u64 end;
	struct extent_map_tree *pending_del;
	struct extent_map_tree *pinned_extents;
C
Chris Mason 已提交
955

956 957
	pending_del = &extent_root->fs_info->pending_del;
	pinned_extents = &extent_root->fs_info->pinned_extents;
958 959

	while(1) {
960 961 962
		ret = find_first_extent_bit(pending_del, 0, &start, &end,
					    EXTENT_LOCKED);
		if (ret)
963
			break;
964
		update_pinned_extents(extent_root, start, end + 1 - start, 1);
965 966 967 968 969 970
		clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
				  GFP_NOFS);
		ret = __free_extent(trans, extent_root,
				     start, end + 1 - start, 0, 0);
		if (ret)
			err = ret;
971
	}
C
Chris Mason 已提交
972
	return err;
973 974 975 976 977
}

/*
 * remove an extent from the root, returns 0 on success
 */
978
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
979
		      *root, u64 bytenr, u64 num_bytes, int pin)
980
{
981
	struct btrfs_root *extent_root = root->fs_info->extent_root;
982 983
	int pending_ret;
	int ret;
984

985
	WARN_ON(num_bytes < root->sectorsize);
986
	if (root == extent_root) {
987
		pin_down_bytes(root, bytenr, num_bytes, 1);
988 989
		return 0;
	}
990
	ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0);
C
Chris Mason 已提交
991
	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
992 993 994
	return ret ? ret : pending_ret;
}

995 996 997 998 999 1000 1001
static u64 stripe_align(struct btrfs_root *root, u64 val)
{
	u64 mask = ((u64)root->stripesize - 1);
	u64 ret = (val + mask) & ~mask;
	return ret;
}

1002 1003 1004 1005
/*
 * walks the btree of allocated extents and find a hole of a given size.
 * The key ins is changed to record the hole:
 * ins->objectid == block start
1006
 * ins->flags = BTRFS_EXTENT_ITEM_KEY
1007 1008 1009
 * ins->offset == number of blocks
 * Any available blocks before search_start are skipped.
 */
1010
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1011 1012
			    *orig_root, u64 num_bytes, u64 empty_size,
			    u64 search_start, u64 search_end, u64 hint_byte,
1013 1014
			    struct btrfs_key *ins, u64 exclude_start,
			    u64 exclude_nr, int data)
1015
{
1016
	struct btrfs_path *path;
C
Chris Mason 已提交
1017
	struct btrfs_key key;
1018
	u64 hole_size = 0;
1019 1020
	u64 aligned;
	int ret;
1021
	int slot = 0;
1022
	u64 last_byte = 0;
C
Chris Mason 已提交
1023
	u64 orig_search_start = search_start;
1024
	int start_found;
1025
	struct extent_buffer *l;
1026
	struct btrfs_root * root = orig_root->fs_info->extent_root;
1027
	struct btrfs_fs_info *info = root->fs_info;
1028
	u64 total_needed = num_bytes;
C
Chris Mason 已提交
1029
	int level;
C
Chris Mason 已提交
1030
	struct btrfs_block_group_cache *block_group;
C
Chris Mason 已提交
1031
	int full_scan = 0;
1032
	int wrapped = 0;
1033
	u64 cached_start;
1034

1035
	WARN_ON(num_bytes < root->sectorsize);
1036 1037
	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);

1038 1039
	level = btrfs_header_level(root->node);

1040
	if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1041 1042 1043
		data = BTRFS_BLOCK_GROUP_MIXED;
	}

C
Chris Mason 已提交
1044
	if (search_end == (u64)-1)
1045 1046 1047
		search_end = btrfs_super_total_bytes(&info->super_copy);
	if (hint_byte) {
		block_group = btrfs_lookup_block_group(info, hint_byte);
1048 1049
		if (!block_group)
			hint_byte = search_start;
C
Chris Mason 已提交
1050
		block_group = btrfs_find_block_group(root, block_group,
1051
						     hint_byte, data, 1);
C
Chris Mason 已提交
1052 1053
	} else {
		block_group = btrfs_find_block_group(root,
1054 1055
						     trans->block_group,
						     search_start, data, 1);
C
Chris Mason 已提交
1056 1057
	}

1058
	total_needed += empty_size;
1059
	path = btrfs_alloc_path();
C
Chris Mason 已提交
1060
check_failed:
1061 1062
	search_start = find_search_start(root, &block_group, search_start,
					 total_needed, data, full_scan);
1063
	search_start = stripe_align(root, search_start);
1064
	cached_start = search_start;
1065
	btrfs_init_path(path);
1066 1067 1068
	ins->objectid = search_start;
	ins->offset = 0;
	start_found = 0;
1069
	path->reada = 2;
1070

1071
	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
C
Chris Mason 已提交
1072 1073
	if (ret < 0)
		goto error;
C
Chris Mason 已提交
1074

1075
	if (path->slots[0] > 0) {
1076
		path->slots[0]--;
1077 1078
	}

1079 1080 1081
	l = path->nodes[0];
	btrfs_item_key_to_cpu(l, &key, path->slots[0]);

1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
	/*
	 * a rare case, go back one key if we hit a block group item
	 * instead of an extent item
	 */
	if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
	    key.objectid + key.offset >= search_start) {
		ins->objectid = key.objectid;
		ins->offset = key.offset - 1;
		btrfs_release_path(root, path);
		ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
		if (ret < 0)
			goto error;

		if (path->slots[0] > 0) {
			path->slots[0]--;
		}
	}
1099

1100
	while (1) {
1101
		l = path->nodes[0];
1102
		slot = path->slots[0];
1103
		if (slot >= btrfs_header_nritems(l)) {
1104
			ret = btrfs_next_leaf(root, path);
1105 1106
			if (ret == 0)
				continue;
C
Chris Mason 已提交
1107 1108
			if (ret < 0)
				goto error;
1109 1110 1111

			search_start = max(search_start,
					   block_group->key.objectid);
1112
			if (!start_found) {
1113 1114 1115 1116 1117 1118 1119
				aligned = stripe_align(root, search_start);
				ins->objectid = aligned;
				if (aligned >= search_end) {
					ret = -ENOSPC;
					goto error;
				}
				ins->offset = search_end - aligned;
1120 1121 1122
				start_found = 1;
				goto check_pending;
			}
1123 1124 1125 1126 1127 1128 1129
			ins->objectid = stripe_align(root,
						     last_byte > search_start ?
						     last_byte : search_start);
			if (search_end <= ins->objectid) {
				ret = -ENOSPC;
				goto error;
			}
C
Chris Mason 已提交
1130
			ins->offset = search_end - ins->objectid;
1131
			BUG_ON(ins->objectid >= search_end);
1132 1133
			goto check_pending;
		}
1134
		btrfs_item_key_to_cpu(l, &key, slot);
1135

1136
		if (key.objectid >= search_start && key.objectid > last_byte &&
1137
		    start_found) {
1138 1139
			if (last_byte < search_start)
				last_byte = search_start;
1140 1141 1142 1143
			aligned = stripe_align(root, last_byte);
			hole_size = key.objectid - aligned;
			if (key.objectid > aligned && hole_size >= num_bytes) {
				ins->objectid = aligned;
1144 1145
				ins->offset = hole_size;
				goto check_pending;
1146
			}
1147
		}
1148 1149
		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
			if (!start_found) {
1150
				last_byte = key.objectid;
1151 1152
				start_found = 1;
			}
1153
			goto next;
1154 1155
		}

1156

1157
		start_found = 1;
1158
		last_byte = key.objectid + key.offset;
1159

1160 1161
		if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
		    last_byte >= block_group->key.objectid +
C
Chris Mason 已提交
1162 1163 1164
		    block_group->key.offset) {
			btrfs_release_path(root, path);
			search_start = block_group->key.objectid +
1165
				block_group->key.offset;
C
Chris Mason 已提交
1166 1167
			goto new_group;
		}
C
Chris Mason 已提交
1168
next:
1169
		path->slots[0]++;
1170
		cond_resched();
1171 1172 1173 1174 1175
	}
check_pending:
	/* we have to make sure we didn't find an extent that has already
	 * been allocated by the map tree or the original allocation
	 */
1176
	btrfs_release_path(root, path);
1177
	BUG_ON(ins->objectid < search_start);
1178

1179
	if (ins->objectid + num_bytes >= search_end)
1180
		goto enospc;
1181
	if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
Y
Yan 已提交
1182
	    ins->objectid + num_bytes > block_group->
1183 1184 1185 1186 1187
	    key.objectid + block_group->key.offset) {
		search_start = block_group->key.objectid +
			block_group->key.offset;
		goto new_group;
	}
1188
	if (test_range_bit(&info->extent_ins, ins->objectid,
1189 1190
			   ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
		search_start = ins->objectid + num_bytes;
1191 1192 1193
		goto new_group;
	}
	if (test_range_bit(&info->pinned_extents, ins->objectid,
1194 1195
			   ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
		search_start = ins->objectid + num_bytes;
1196
		goto new_group;
1197
	}
1198
	if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1199 1200 1201 1202
	    ins->objectid < exclude_start + exclude_nr)) {
		search_start = exclude_start + exclude_nr;
		goto new_group;
	}
1203
	if (!data) {
1204
		block_group = btrfs_lookup_block_group(info, ins->objectid);
1205 1206
		if (block_group)
			trans->block_group = block_group;
1207
	}
1208
	ins->offset = num_bytes;
1209
	btrfs_free_path(path);
1210
	return 0;
C
Chris Mason 已提交
1211 1212

new_group:
1213
	if (search_start + num_bytes >= search_end) {
1214
enospc:
C
Chris Mason 已提交
1215
		search_start = orig_search_start;
1216 1217 1218 1219
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
1220 1221 1222
		if (wrapped) {
			if (!full_scan)
				total_needed -= empty_size;
1223
			full_scan = 1;
1224
			data = BTRFS_BLOCK_GROUP_MIXED;
1225
		} else
1226
			wrapped = 1;
C
Chris Mason 已提交
1227
	}
1228
	block_group = btrfs_lookup_block_group(info, search_start);
1229
	cond_resched();
1230 1231
	block_group = btrfs_find_block_group(root, block_group,
					     search_start, data, 0);
C
Chris Mason 已提交
1232 1233
	goto check_failed;

C
Chris Mason 已提交
1234
error:
1235 1236
	btrfs_release_path(root, path);
	btrfs_free_path(path);
C
Chris Mason 已提交
1237
	return ret;
1238 1239 1240 1241 1242 1243 1244 1245
}
/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns 0 if everything worked, non-zero otherwise.
 */
1246 1247
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, u64 owner,
1248
		       u64 num_bytes, u64 empty_size, u64 hint_byte,
C
Chris Mason 已提交
1249
		       u64 search_end, struct btrfs_key *ins, int data)
1250 1251 1252
{
	int ret;
	int pending_ret;
1253
	u64 super_used, root_used;
1254
	u64 search_start = 0;
1255 1256
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
C
Chris Mason 已提交
1257
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
1258

1259 1260
	btrfs_set_stack_extent_refs(&extent_item, 1);
	btrfs_set_stack_extent_owner(&extent_item, owner);
1261

1262 1263 1264
	WARN_ON(num_bytes < root->sectorsize);
	ret = find_free_extent(trans, root, num_bytes, empty_size,
			       search_start, search_end, hint_byte, ins,
1265 1266
			       trans->alloc_exclude_start,
			       trans->alloc_exclude_nr, data);
C
Chris Mason 已提交
1267
	BUG_ON(ret);
1268 1269
	if (ret)
		return ret;
1270

1271
	/* block accounting for super block */
1272 1273
	super_used = btrfs_super_bytes_used(&info->super_copy);
	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1274

1275
	/* block accounting for root item */
1276 1277
	root_used = btrfs_root_used(&root->root_item);
	btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1278

1279 1280 1281 1282
	clear_extent_dirty(&root->fs_info->free_space_cache,
			   ins->objectid, ins->objectid + ins->offset - 1,
			   GFP_NOFS);

1283
	if (root == extent_root) {
1284 1285 1286
		set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
				ins->objectid + ins->offset - 1,
				EXTENT_LOCKED, GFP_NOFS);
1287
		WARN_ON(data == 1);
1288 1289 1290 1291 1292 1293
		goto update_block;
	}

	WARN_ON(trans->alloc_exclude_nr);
	trans->alloc_exclude_start = ins->objectid;
	trans->alloc_exclude_nr = ins->offset;
1294 1295
	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
				sizeof(extent_item));
C
Chris Mason 已提交
1296

1297 1298 1299
	trans->alloc_exclude_start = 0;
	trans->alloc_exclude_nr = 0;

C
Chris Mason 已提交
1300
	BUG_ON(ret);
1301
	finish_current_insert(trans, extent_root);
C
Chris Mason 已提交
1302
	pending_ret = del_pending_extents(trans, extent_root);
1303

1304
	if (ret) {
C
Chris Mason 已提交
1305
		return ret;
1306 1307
	}
	if (pending_ret) {
C
Chris Mason 已提交
1308
		return pending_ret;
1309
	}
1310 1311

update_block:
C
Chris Mason 已提交
1312 1313
	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
				 data);
C
Chris Mason 已提交
1314
	BUG_ON(ret);
C
Chris Mason 已提交
1315
	return 0;
1316 1317 1318 1319 1320 1321
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
1322
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1323 1324
					     struct btrfs_root *root,
					     u32 blocksize, u64 hint,
1325
					     u64 empty_size)
1326
{
C
Chris Mason 已提交
1327
	struct btrfs_key ins;
1328
	int ret;
1329
	struct extent_buffer *buf;
1330

1331
	ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1332 1333
				 blocksize, empty_size, hint,
				 (u64)-1, &ins, 0);
1334
	if (ret) {
1335 1336
		BUG_ON(ret > 0);
		return ERR_PTR(ret);
1337
	}
1338
	buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1339
	if (!buf) {
1340
		btrfs_free_extent(trans, root, ins.objectid, blocksize, 0);
1341 1342
		return ERR_PTR(-ENOMEM);
	}
1343 1344 1345
	btrfs_set_buffer_uptodate(buf);
	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
			 buf->start + buf->len - 1, GFP_NOFS);
1346 1347 1348 1349
	set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
			buf->start, buf->start + buf->len - 1,
			EXTENT_CSUM, GFP_NOFS);
	buf->flags |= EXTENT_CSUM;
1350
	btrfs_set_buffer_defrag(buf);
1351
	trans->blocks_used++;
1352 1353
	return buf;
}
1354

1355
static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1356
			 struct btrfs_root *root, struct extent_buffer *leaf)
1357
{
1358
	struct btrfs_key key;
1359 1360 1361 1362 1363
	struct btrfs_file_extent_item *fi;
	int i;
	int nritems;
	int ret;

1364 1365
	BUG_ON(!btrfs_is_leaf(leaf));
	nritems = btrfs_header_nritems(leaf);
1366
	for (i = 0; i < nritems; i++) {
1367
		u64 disk_bytenr;
1368 1369 1370

		btrfs_item_key_to_cpu(leaf, &key, i);
		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1371 1372
			continue;
		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1373 1374
		if (btrfs_file_extent_type(leaf, fi) ==
		    BTRFS_FILE_EXTENT_INLINE)
1375
			continue;
1376 1377 1378 1379
		/*
		 * FIXME make sure to insert a trans record that
		 * repeats the snapshot del on crash
		 */
1380 1381
		disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
		if (disk_bytenr == 0)
C
Chris Mason 已提交
1382
			continue;
1383 1384
		ret = btrfs_free_extent(trans, root, disk_bytenr,
				btrfs_file_extent_disk_num_bytes(leaf, fi), 0);
1385 1386 1387 1388 1389
		BUG_ON(ret);
	}
	return 0;
}

1390
static void reada_walk_down(struct btrfs_root *root,
1391
			    struct extent_buffer *node)
1392 1393 1394
{
	int i;
	u32 nritems;
1395
	u64 bytenr;
1396 1397
	int ret;
	u32 refs;
1398 1399
	int level;
	u32 blocksize;
1400

1401
	nritems = btrfs_header_nritems(node);
1402
	level = btrfs_header_level(node);
1403
	for (i = 0; i < nritems; i++) {
1404 1405 1406
		bytenr = btrfs_node_blockptr(node, i);
		blocksize = btrfs_level_size(root, level - 1);
		ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1407 1408 1409
		BUG_ON(ret);
		if (refs != 1)
			continue;
1410
		mutex_unlock(&root->fs_info->fs_mutex);
1411
		ret = readahead_tree_block(root, bytenr, blocksize);
1412 1413
		cond_resched();
		mutex_lock(&root->fs_info->fs_mutex);
1414 1415 1416 1417 1418
		if (ret)
			break;
	}
}

C
Chris Mason 已提交
1419 1420 1421 1422
/*
 * helper function for drop_snapshot, this walks down the tree dropping ref
 * counts as it goes.
 */
1423 1424
static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			  *root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1425
{
1426 1427
	struct extent_buffer *next;
	struct extent_buffer *cur;
1428 1429
	u64 bytenr;
	u32 blocksize;
C
Chris Mason 已提交
1430 1431 1432
	int ret;
	u32 refs;

1433 1434
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1435
	ret = lookup_extent_ref(trans, root,
1436 1437
				path->nodes[*level]->start,
				path->nodes[*level]->len, &refs);
C
Chris Mason 已提交
1438 1439 1440
	BUG_ON(ret);
	if (refs > 1)
		goto out;
1441

C
Chris Mason 已提交
1442 1443 1444
	/*
	 * walk down to the last node level and free all the leaves
	 */
1445
	while(*level >= 0) {
1446 1447
		WARN_ON(*level < 0);
		WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1448
		cur = path->nodes[*level];
1449 1450

		if (*level > 0 && path->slots[*level] == 0)
1451
			reada_walk_down(root, cur);
1452

1453
		if (btrfs_header_level(cur) != *level)
C
Chris Mason 已提交
1454
			WARN_ON(1);
1455

1456
		if (path->slots[*level] >=
1457
		    btrfs_header_nritems(cur))
C
Chris Mason 已提交
1458
			break;
1459 1460 1461 1462 1463
		if (*level == 0) {
			ret = drop_leaf_ref(trans, root, cur);
			BUG_ON(ret);
			break;
		}
1464 1465 1466
		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
		blocksize = btrfs_level_size(root, *level - 1);
		ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1467 1468
		BUG_ON(ret);
		if (refs != 1) {
C
Chris Mason 已提交
1469
			path->slots[*level]++;
1470 1471
			ret = btrfs_free_extent(trans, root, bytenr,
						blocksize, 1);
C
Chris Mason 已提交
1472 1473 1474
			BUG_ON(ret);
			continue;
		}
1475
		next = btrfs_find_tree_block(root, bytenr, blocksize);
1476 1477
		if (!next || !btrfs_buffer_uptodate(next)) {
			free_extent_buffer(next);
1478
			mutex_unlock(&root->fs_info->fs_mutex);
1479
			next = read_tree_block(root, bytenr, blocksize);
1480 1481 1482
			mutex_lock(&root->fs_info->fs_mutex);

			/* we dropped the lock, check one more time */
1483 1484
			ret = lookup_extent_ref(trans, root, bytenr,
						blocksize, &refs);
1485 1486 1487
			BUG_ON(ret);
			if (refs != 1) {
				path->slots[*level]++;
1488
				free_extent_buffer(next);
1489
				ret = btrfs_free_extent(trans, root,
1490
							bytenr, blocksize, 1);
1491 1492 1493 1494
				BUG_ON(ret);
				continue;
			}
		}
1495
		WARN_ON(*level <= 0);
C
Chris Mason 已提交
1496
		if (path->nodes[*level-1])
1497
			free_extent_buffer(path->nodes[*level-1]);
C
Chris Mason 已提交
1498
		path->nodes[*level-1] = next;
1499
		*level = btrfs_header_level(next);
C
Chris Mason 已提交
1500 1501 1502
		path->slots[*level] = 0;
	}
out:
1503 1504
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1505 1506
	ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
				path->nodes[*level]->len, 1);
1507
	free_extent_buffer(path->nodes[*level]);
C
Chris Mason 已提交
1508 1509 1510 1511 1512 1513
	path->nodes[*level] = NULL;
	*level += 1;
	BUG_ON(ret);
	return 0;
}

C
Chris Mason 已提交
1514 1515 1516 1517 1518
/*
 * helper for dropping snapshots.  This walks back up the tree in the path
 * to find the first node higher up where we haven't yet gone through
 * all the slots
 */
1519 1520
static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			*root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1521 1522 1523 1524
{
	int i;
	int slot;
	int ret;
1525 1526
	struct btrfs_root_item *root_item = &root->root_item;

C
Chris Mason 已提交
1527
	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
C
Chris Mason 已提交
1528
		slot = path->slots[i];
1529 1530 1531 1532
		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
			struct extent_buffer *node;
			struct btrfs_disk_key disk_key;
			node = path->nodes[i];
C
Chris Mason 已提交
1533 1534
			path->slots[i]++;
			*level = i;
1535
			WARN_ON(*level == 0);
1536
			btrfs_node_key(node, &disk_key, path->slots[i]);
1537
			memcpy(&root_item->drop_progress,
1538
			       &disk_key, sizeof(disk_key));
1539
			root_item->drop_level = i;
C
Chris Mason 已提交
1540 1541
			return 0;
		} else {
1542
			ret = btrfs_free_extent(trans, root,
1543 1544
						path->nodes[*level]->start,
						path->nodes[*level]->len, 1);
1545
			BUG_ON(ret);
1546
			free_extent_buffer(path->nodes[*level]);
C
Chris Mason 已提交
1547
			path->nodes[*level] = NULL;
C
Chris Mason 已提交
1548 1549 1550 1551 1552 1553
			*level = i + 1;
		}
	}
	return 1;
}

C
Chris Mason 已提交
1554 1555 1556 1557 1558
/*
 * drop the reference count on the tree rooted at 'snap'.  This traverses
 * the tree freeing any blocks that have a ref count of zero after being
 * decremented.
 */
1559
int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1560
			*root)
C
Chris Mason 已提交
1561
{
1562
	int ret = 0;
C
Chris Mason 已提交
1563
	int wret;
C
Chris Mason 已提交
1564
	int level;
1565
	struct btrfs_path *path;
C
Chris Mason 已提交
1566 1567
	int i;
	int orig_level;
1568
	struct btrfs_root_item *root_item = &root->root_item;
C
Chris Mason 已提交
1569

1570 1571
	path = btrfs_alloc_path();
	BUG_ON(!path);
C
Chris Mason 已提交
1572

1573
	level = btrfs_header_level(root->node);
C
Chris Mason 已提交
1574
	orig_level = level;
1575 1576
	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
		path->nodes[level] = root->node;
1577
		extent_buffer_get(root->node);
1578 1579 1580
		path->slots[level] = 0;
	} else {
		struct btrfs_key key;
1581 1582
		struct btrfs_disk_key found_key;
		struct extent_buffer *node;
1583

1584
		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1585 1586
		level = root_item->drop_level;
		path->lowest_level = level;
1587
		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1588
		if (wret < 0) {
1589 1590 1591
			ret = wret;
			goto out;
		}
1592 1593 1594 1595
		node = path->nodes[level];
		btrfs_node_key(node, &found_key, path->slots[level]);
		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
			       sizeof(found_key)));
1596
	}
C
Chris Mason 已提交
1597
	while(1) {
1598
		wret = walk_down_tree(trans, root, path, &level);
C
Chris Mason 已提交
1599
		if (wret > 0)
C
Chris Mason 已提交
1600
			break;
C
Chris Mason 已提交
1601 1602 1603
		if (wret < 0)
			ret = wret;

1604
		wret = walk_up_tree(trans, root, path, &level);
C
Chris Mason 已提交
1605
		if (wret > 0)
C
Chris Mason 已提交
1606
			break;
C
Chris Mason 已提交
1607 1608
		if (wret < 0)
			ret = wret;
1609 1610
		ret = -EAGAIN;
		break;
C
Chris Mason 已提交
1611
	}
C
Chris Mason 已提交
1612
	for (i = 0; i <= orig_level; i++) {
1613
		if (path->nodes[i]) {
1614
			free_extent_buffer(path->nodes[i]);
1615
			path->nodes[i] = NULL;
C
Chris Mason 已提交
1616
		}
C
Chris Mason 已提交
1617
	}
1618
out:
1619
	btrfs_free_path(path);
C
Chris Mason 已提交
1620
	return ret;
C
Chris Mason 已提交
1621
}
C
Chris Mason 已提交
1622

1623
int btrfs_free_block_groups(struct btrfs_fs_info *info)
C
Chris Mason 已提交
1624
{
1625 1626
	u64 start;
	u64 end;
1627
	u64 ptr;
C
Chris Mason 已提交
1628 1629
	int ret;
	while(1) {
1630 1631 1632
		ret = find_first_extent_bit(&info->block_group_cache, 0,
					    &start, &end, (unsigned int)-1);
		if (ret)
C
Chris Mason 已提交
1633
			break;
1634 1635 1636
		ret = get_state_private(&info->block_group_cache, start, &ptr);
		if (!ret)
			kfree((void *)(unsigned long)ptr);
1637 1638
		clear_extent_bits(&info->block_group_cache, start,
				  end, (unsigned int)-1, GFP_NOFS);
C
Chris Mason 已提交
1639
	}
1640
	while(1) {
1641 1642 1643
		ret = find_first_extent_bit(&info->free_space_cache, 0,
					    &start, &end, EXTENT_DIRTY);
		if (ret)
1644
			break;
1645 1646
		clear_extent_dirty(&info->free_space_cache, start,
				   end, GFP_NOFS);
1647
	}
C
Chris Mason 已提交
1648 1649 1650
	return 0;
}

C
Chris Mason 已提交
1651 1652 1653 1654 1655
int btrfs_read_block_groups(struct btrfs_root *root)
{
	struct btrfs_path *path;
	int ret;
	int err = 0;
1656
	int bit;
C
Chris Mason 已提交
1657
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
1658
	struct btrfs_fs_info *info = root->fs_info;
1659
	struct extent_map_tree *block_group_cache;
C
Chris Mason 已提交
1660 1661
	struct btrfs_key key;
	struct btrfs_key found_key;
1662
	struct extent_buffer *leaf;
1663 1664

	block_group_cache = &info->block_group_cache;
C
Chris Mason 已提交
1665

C
Chris Mason 已提交
1666
	root = info->extent_root;
C
Chris Mason 已提交
1667
	key.objectid = 0;
1668
	key.offset = BTRFS_BLOCK_GROUP_SIZE;
C
Chris Mason 已提交
1669 1670 1671 1672 1673 1674 1675
	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
C
Chris Mason 已提交
1676
		ret = btrfs_search_slot(NULL, info->extent_root,
C
Chris Mason 已提交
1677 1678 1679 1680 1681
					&key, path, 0, 0);
		if (ret != 0) {
			err = ret;
			break;
		}
1682 1683
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
C
Chris Mason 已提交
1684 1685 1686 1687 1688
		cache = kmalloc(sizeof(*cache), GFP_NOFS);
		if (!cache) {
			err = -1;
			break;
		}
C
Chris Mason 已提交
1689

1690 1691 1692
		read_extent_buffer(leaf, &cache->item,
				   btrfs_item_ptr_offset(leaf, path->slots[0]),
				   sizeof(cache->item));
C
Chris Mason 已提交
1693
		memcpy(&cache->key, &found_key, sizeof(found_key));
1694
		cache->cached = 0;
1695
		cache->pinned = 0;
C
Chris Mason 已提交
1696 1697
		key.objectid = found_key.objectid + found_key.offset;
		btrfs_release_path(root, path);
1698

1699 1700 1701 1702
		if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
			bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
			cache->data = BTRFS_BLOCK_GROUP_MIXED;
		} else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
1703
			bit = BLOCK_GROUP_DATA;
1704
			cache->data = BTRFS_BLOCK_GROUP_DATA;
1705 1706 1707
		} else {
			bit = BLOCK_GROUP_METADATA;
			cache->data = 0;
1708
		}
1709 1710 1711 1712 1713 1714

		/* use EXTENT_LOCKED to prevent merging */
		set_extent_bits(block_group_cache, found_key.objectid,
				found_key.objectid + found_key.offset - 1,
				bit | EXTENT_LOCKED, GFP_NOFS);
		set_state_private(block_group_cache, found_key.objectid,
J
Jens Axboe 已提交
1715
				  (unsigned long)cache);
1716

C
Chris Mason 已提交
1717
		if (key.objectid >=
1718
		    btrfs_super_total_bytes(&info->super_copy))
C
Chris Mason 已提交
1719 1720 1721 1722 1723 1724
			break;
	}

	btrfs_free_path(path);
	return 0;
}