extent-tree.c 40.3 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Z
Zach Brown 已提交
19
#include <linux/sched.h>
20 21 22
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
23
#include "transaction.h"
24

25 26 27 28
#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
#define BLOCK_GROUP_DIRTY EXTENT_DIRTY

29 30
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root);
C
Chris Mason 已提交
31 32
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root);
33

34 35 36 37 38 39
static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
	struct btrfs_path *path;
	int ret;
	struct btrfs_key key;
40
	struct extent_buffer *leaf;
41
	struct extent_map_tree *free_space_cache;
42 43 44
	int slot;
	u64 last = 0;
	u64 hole_size;
45
	u64 first_free;
46 47 48
	int found = 0;

	root = root->fs_info->extent_root;
49
	free_space_cache = &root->fs_info->free_space_cache;
50 51 52

	if (block_group->cached)
		return 0;
53

54 55 56
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
57

58
	path->reada = 2;
59
	first_free = block_group->key.objectid;
60 61
	key.objectid = block_group->key.objectid;
	key.offset = 0;
62

63 64
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
65

66 67
	if (ret < 0)
		return ret;
68

69 70
	if (ret && path->slots[0] > 0)
		path->slots[0]--;
71

72
	while(1) {
73
		leaf = path->nodes[0];
74
		slot = path->slots[0];
75
		if (slot >= btrfs_header_nritems(leaf)) {
76
			ret = btrfs_next_leaf(root, path);
77 78
			if (ret < 0)
				goto err;
79
			if (ret == 0) {
80
				continue;
81
			} else {
82 83 84
				break;
			}
		}
85

86
		btrfs_item_key_to_cpu(leaf, &key, slot);
87 88 89 90 91 92
		if (key.objectid < block_group->key.objectid) {
			if (key.objectid + key.offset > first_free)
				first_free = key.objectid + key.offset;
			goto next;
		}

93 94 95 96
		if (key.objectid >= block_group->key.objectid +
		    block_group->key.offset) {
			break;
		}
97

98 99
		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			if (!found) {
100
				last = first_free;
101 102
				found = 1;
			}
103 104 105 106 107
			if (key.objectid > last) {
				hole_size = key.objectid - last;
				set_extent_dirty(free_space_cache, last,
						 last + hole_size - 1,
						 GFP_NOFS);
108 109
			}
			last = key.objectid + key.offset;
110
		}
111
next:
112 113 114
		path->slots[0]++;
	}

115 116 117 118 119 120
	if (!found)
		last = first_free;
	if (block_group->key.objectid +
	    block_group->key.offset > last) {
		hole_size = block_group->key.objectid +
			block_group->key.offset - last;
121 122
		set_extent_dirty(free_space_cache, last,
				 last + hole_size - 1, GFP_NOFS);
123
	}
124
	block_group->cached = 1;
125
err:
126 127 128 129
	btrfs_free_path(path);
	return 0;
}

130 131
struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
							 btrfs_fs_info *info,
132
							 u64 bytenr)
C
Chris Mason 已提交
133
{
134 135 136 137 138
	struct extent_map_tree *block_group_cache;
	struct btrfs_block_group_cache *block_group = NULL;
	u64 ptr;
	u64 start;
	u64 end;
C
Chris Mason 已提交
139 140
	int ret;

141 142
	block_group_cache = &info->block_group_cache;
	ret = find_first_extent_bit(block_group_cache,
143
				    bytenr, &start, &end,
144
				    BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
C
Chris Mason 已提交
145
	if (ret) {
146
		return NULL;
C
Chris Mason 已提交
147
	}
148 149 150 151
	ret = get_state_private(block_group_cache, start, &ptr);
	if (ret)
		return NULL;

J
Jens Axboe 已提交
152
	block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
Y
Yan 已提交
153
	if (block_group->key.objectid <= bytenr && bytenr <
154 155
	    block_group->key.objectid + block_group->key.offset)
		return block_group;
C
Chris Mason 已提交
156 157
	return NULL;
}
158 159
static u64 find_search_start(struct btrfs_root *root,
			     struct btrfs_block_group_cache **cache_ret,
160 161
			     u64 search_start, int num,
			     int data, int full_scan)
162 163 164
{
	int ret;
	struct btrfs_block_group_cache *cache = *cache_ret;
165
	u64 last;
166 167
	u64 start = 0;
	u64 end = 0;
168
	u64 cache_miss = 0;
169
	int wrapped = 0;
170 171

again:
172 173 174
	ret = cache_block_group(root, cache);
	if (ret)
		goto out;
175

176 177
	last = max(search_start, cache->key.objectid);

178
	while(1) {
179 180
		ret = find_first_extent_bit(&root->fs_info->free_space_cache,
					    last, &start, &end, EXTENT_DIRTY);
181
		if (ret) {
182 183
			if (!cache_miss)
				cache_miss = last;
184 185
			goto new_group;
		}
186 187 188

		start = max(last, start);
		last = end + 1;
189 190 191
		if (last - start < num) {
			if (last == cache->key.objectid + cache->key.offset)
				cache_miss = start;
192
			continue;
193 194
		}
		if (data != BTRFS_BLOCK_GROUP_MIXED &&
Y
Yan 已提交
195
		    start + num > cache->key.objectid + cache->key.offset)
196
			goto new_group;
197
		return start;
198 199
	}
out:
200
	return search_start;
201 202

new_group:
203
	last = cache->key.objectid + cache->key.offset;
204
wrapped:
205
	cache = btrfs_lookup_block_group(root->fs_info, last);
206
	if (!cache) {
207 208 209 210 211 212
		if (!wrapped) {
			wrapped = 1;
			last = search_start;
			data = BTRFS_BLOCK_GROUP_MIXED;
			goto wrapped;
		}
213
		return search_start;
214
	}
215 216 217 218 219
	if (cache_miss && !cache->cached) {
		cache_block_group(root, cache);
		last = cache_miss;
		cache = btrfs_lookup_block_group(root->fs_info, last);
	}
220 221
	if (!full_scan)
		cache = btrfs_find_block_group(root, cache, last, data, 0);
222
	*cache_ret = cache;
223
	cache_miss = 0;
224 225 226
	goto again;
}

C
Chris Mason 已提交
227 228
static u64 div_factor(u64 num, int factor)
{
229 230
	if (factor == 10)
		return num;
C
Chris Mason 已提交
231 232 233 234 235
	num *= factor;
	do_div(num, 10);
	return num;
}

236 237
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
C
Chris Mason 已提交
238
						 *hint, u64 search_start,
239
						 int data, int owner)
C
Chris Mason 已提交
240
{
241 242
	struct btrfs_block_group_cache *cache;
	struct extent_map_tree *block_group_cache;
243
	struct btrfs_block_group_cache *found_group = NULL;
C
Chris Mason 已提交
244 245
	struct btrfs_fs_info *info = root->fs_info;
	u64 used;
246 247
	u64 last = 0;
	u64 hint_last;
248 249 250 251 252
	u64 start;
	u64 end;
	u64 free_check;
	u64 ptr;
	int bit;
C
Chris Mason 已提交
253
	int ret;
254
	int full_search = 0;
255
	int factor = 8;
C
Chris Mason 已提交
256
	int data_swap = 0;
257

258 259
	block_group_cache = &info->block_group_cache;

260
	if (!owner)
261
		factor = 8;
C
Chris Mason 已提交
262

263
	if (data == BTRFS_BLOCK_GROUP_MIXED) {
264
		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
265 266
		factor = 10;
	} else if (data)
267 268 269
		bit = BLOCK_GROUP_DATA;
	else
		bit = BLOCK_GROUP_METADATA;
C
Chris Mason 已提交
270 271 272

	if (search_start) {
		struct btrfs_block_group_cache *shint;
273
		shint = btrfs_lookup_block_group(info, search_start);
274 275
		if (shint && (shint->data == data ||
			      shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
C
Chris Mason 已提交
276
			used = btrfs_block_group_used(&shint->item);
277 278
			if (used + shint->pinned <
			    div_factor(shint->key.offset, factor)) {
C
Chris Mason 已提交
279 280 281 282
				return shint;
			}
		}
	}
283 284
	if (hint && (hint->data == data ||
		     hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
285
		used = btrfs_block_group_used(&hint->item);
286 287
		if (used + hint->pinned <
		    div_factor(hint->key.offset, factor)) {
288 289
			return hint;
		}
290
		last = hint->key.objectid + hint->key.offset;
291 292
		hint_last = last;
	} else {
293 294 295 296 297 298
		if (hint)
			hint_last = max(hint->key.objectid, search_start);
		else
			hint_last = search_start;

		last = hint_last;
299 300
	}
again:
C
Chris Mason 已提交
301
	while(1) {
302 303 304
		ret = find_first_extent_bit(block_group_cache, last,
					    &start, &end, bit);
		if (ret)
C
Chris Mason 已提交
305
			break;
306 307 308 309 310

		ret = get_state_private(block_group_cache, start, &ptr);
		if (ret)
			break;

J
Jens Axboe 已提交
311
		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
312 313 314 315 316 317 318
		last = cache->key.objectid + cache->key.offset;
		used = btrfs_block_group_used(&cache->item);

		if (full_search)
			free_check = cache->key.offset;
		else
			free_check = div_factor(cache->key.offset, factor);
319
		if (used + cache->pinned < free_check) {
320 321
			found_group = cache;
			goto found;
C
Chris Mason 已提交
322
		}
323
		cond_resched();
C
Chris Mason 已提交
324
	}
325
	if (!full_search) {
C
Chris Mason 已提交
326
		last = search_start;
327 328 329
		full_search = 1;
		goto again;
	}
C
Chris Mason 已提交
330 331
	if (!data_swap) {
		data_swap = 1;
332
		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
C
Chris Mason 已提交
333 334 335
		last = search_start;
		goto again;
	}
C
Chris Mason 已提交
336
found:
337
	return found_group;
C
Chris Mason 已提交
338 339
}

C
Chris Mason 已提交
340 341
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
342
				u64 bytenr, u64 num_bytes)
C
Chris Mason 已提交
343
{
344
	struct btrfs_path *path;
C
Chris Mason 已提交
345
	int ret;
C
Chris Mason 已提交
346
	struct btrfs_key key;
347
	struct extent_buffer *l;
C
Chris Mason 已提交
348
	struct btrfs_extent_item *item;
C
Chris Mason 已提交
349
	u32 refs;
C
Chris Mason 已提交
350

351
	WARN_ON(num_bytes < root->sectorsize);
352
	path = btrfs_alloc_path();
353 354
	if (!path)
		return -ENOMEM;
355

356
	key.objectid = bytenr;
357
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
358
	key.offset = num_bytes;
359
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
360
				0, 1);
361 362
	if (ret < 0)
		return ret;
363
	if (ret != 0) {
364
		BUG();
365
	}
C
Chris Mason 已提交
366
	BUG_ON(ret != 0);
367
	l = path->nodes[0];
368
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
369 370
	refs = btrfs_extent_refs(l, item);
	btrfs_set_extent_refs(l, item, refs + 1);
371
	btrfs_mark_buffer_dirty(path->nodes[0]);
372

373 374
	btrfs_release_path(root->fs_info->extent_root, path);
	btrfs_free_path(path);
375
	finish_current_insert(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
376
	del_pending_extents(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
377 378 379
	return 0;
}

380 381 382 383 384 385 386 387
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root)
{
	finish_current_insert(trans, root->fs_info->extent_root);
	del_pending_extents(trans, root->fs_info->extent_root);
	return 0;
}

C
Chris Mason 已提交
388
static int lookup_extent_ref(struct btrfs_trans_handle *trans,
389 390
			     struct btrfs_root *root, u64 bytenr,
			     u64 num_bytes, u32 *refs)
391
{
392
	struct btrfs_path *path;
393
	int ret;
C
Chris Mason 已提交
394
	struct btrfs_key key;
395
	struct extent_buffer *l;
C
Chris Mason 已提交
396
	struct btrfs_extent_item *item;
397

398
	WARN_ON(num_bytes < root->sectorsize);
399
	path = btrfs_alloc_path();
400 401
	key.objectid = bytenr;
	key.offset = num_bytes;
402
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
403
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
404
				0, 0);
405 406
	if (ret < 0)
		goto out;
407 408
	if (ret != 0) {
		btrfs_print_leaf(root, path->nodes[0]);
409
		printk("failed to find block number %Lu\n", bytenr);
410
		BUG();
411 412
	}
	l = path->nodes[0];
413
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
414
	*refs = btrfs_extent_refs(l, item);
415
out:
416
	btrfs_free_path(path);
417 418 419
	return 0;
}

C
Chris Mason 已提交
420 421 422
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root)
{
423 424
	return btrfs_inc_extent_ref(trans, root, root->node->start,
				    root->node->len);
C
Chris Mason 已提交
425 426
}

427
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
428
		  struct extent_buffer *buf)
C
Chris Mason 已提交
429
{
430
	u64 bytenr;
431 432
	u32 nritems;
	struct btrfs_key key;
433
	struct btrfs_file_extent_item *fi;
C
Chris Mason 已提交
434
	int i;
435
	int level;
436
	int ret;
437 438
	int faili;
	int err;
439

440
	if (!root->ref_cows)
441
		return 0;
442

443
	level = btrfs_header_level(buf);
444 445
	nritems = btrfs_header_nritems(buf);
	for (i = 0; i < nritems; i++) {
446 447
		if (level == 0) {
			u64 disk_bytenr;
448 449
			btrfs_item_key_to_cpu(buf, &key, i);
			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
450
				continue;
451
			fi = btrfs_item_ptr(buf, i,
452
					    struct btrfs_file_extent_item);
453
			if (btrfs_file_extent_type(buf, fi) ==
454 455
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
456 457
			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
			if (disk_bytenr == 0)
C
Chris Mason 已提交
458
				continue;
459 460
			ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
				    btrfs_file_extent_disk_num_bytes(buf, fi));
461 462 463 464
			if (ret) {
				faili = i;
				goto fail;
			}
465
		} else {
466 467 468
			bytenr = btrfs_node_blockptr(buf, i);
			ret = btrfs_inc_extent_ref(trans, root, bytenr,
					   btrfs_level_size(root, level - 1));
469 470 471 472
			if (ret) {
				faili = i;
				goto fail;
			}
473
		}
C
Chris Mason 已提交
474 475
	}
	return 0;
476
fail:
C
Chris Mason 已提交
477
	WARN_ON(1);
478
	for (i =0; i < faili; i++) {
479 480
		if (level == 0) {
			u64 disk_bytenr;
481 482
			btrfs_item_key_to_cpu(buf, &key, i);
			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
483
				continue;
484
			fi = btrfs_item_ptr(buf, i,
485
					    struct btrfs_file_extent_item);
486
			if (btrfs_file_extent_type(buf, fi) ==
487 488
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
489 490
			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
			if (disk_bytenr == 0)
491
				continue;
492 493
			err = btrfs_free_extent(trans, root, disk_bytenr,
				    btrfs_file_extent_disk_num_bytes(buf,
494
								      fi), 0);
495 496
			BUG_ON(err);
		} else {
497 498 499
			bytenr = btrfs_node_blockptr(buf, i);
			err = btrfs_free_extent(trans, root, bytenr,
					btrfs_level_size(root, level - 1), 0);
500 501 502 503
			BUG_ON(err);
		}
	}
	return ret;
C
Chris Mason 已提交
504 505
}

C
Chris Mason 已提交
506 507 508 509 510 511 512 513
static int write_one_cache_group(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_block_group_cache *cache)
{
	int ret;
	int pending_ret;
	struct btrfs_root *extent_root = root->fs_info->extent_root;
514 515
	unsigned long bi;
	struct extent_buffer *leaf;
C
Chris Mason 已提交
516 517

	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
518 519
	if (ret < 0)
		goto fail;
C
Chris Mason 已提交
520
	BUG_ON(ret);
521 522 523 524 525

	leaf = path->nodes[0];
	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
	btrfs_mark_buffer_dirty(leaf);
C
Chris Mason 已提交
526
	btrfs_release_path(extent_root, path);
527
fail:
C
Chris Mason 已提交
528 529 530 531 532 533 534 535 536 537
	finish_current_insert(trans, extent_root);
	pending_ret = del_pending_extents(trans, extent_root);
	if (ret)
		return ret;
	if (pending_ret)
		return pending_ret;
	return 0;

}

538 539
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
C
Chris Mason 已提交
540
{
541 542
	struct extent_map_tree *block_group_cache;
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
543 544 545 546
	int ret;
	int err = 0;
	int werr = 0;
	struct btrfs_path *path;
547 548 549 550
	u64 last = 0;
	u64 start;
	u64 end;
	u64 ptr;
C
Chris Mason 已提交
551

552
	block_group_cache = &root->fs_info->block_group_cache;
C
Chris Mason 已提交
553 554 555 556 557
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
558 559 560
		ret = find_first_extent_bit(block_group_cache, last,
					    &start, &end, BLOCK_GROUP_DIRTY);
		if (ret)
C
Chris Mason 已提交
561
			break;
562

563 564 565 566 567
		last = end + 1;
		ret = get_state_private(block_group_cache, start, &ptr);
		if (ret)
			break;

J
Jens Axboe 已提交
568
		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
569 570 571 572 573 574 575 576 577 578
		err = write_one_cache_group(trans, root,
					    path, cache);
		/*
		 * if we fail to write the cache group, we want
		 * to keep it marked dirty in hopes that a later
		 * write will work
		 */
		if (err) {
			werr = err;
			continue;
C
Chris Mason 已提交
579
		}
580 581
		clear_extent_bits(block_group_cache, start, end,
				  BLOCK_GROUP_DIRTY, GFP_NOFS);
C
Chris Mason 已提交
582 583 584 585 586 587 588
	}
	btrfs_free_path(path);
	return werr;
}

static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
589 590
			      u64 bytenr, u64 num_bytes, int alloc,
			      int mark_free, int data)
C
Chris Mason 已提交
591 592 593
{
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
594
	u64 total = num_bytes;
C
Chris Mason 已提交
595
	u64 old_val;
596
	u64 byte_in_group;
597 598
	u64 start;
	u64 end;
C
Chris Mason 已提交
599

C
Chris Mason 已提交
600
	while(total) {
601
		cache = btrfs_lookup_block_group(info, bytenr);
C
Chris Mason 已提交
602
		if (!cache) {
C
Chris Mason 已提交
603
			return -1;
C
Chris Mason 已提交
604
		}
605 606
		byte_in_group = bytenr - cache->key.objectid;
		WARN_ON(byte_in_group > cache->key.offset);
607 608 609 610
		start = cache->key.objectid;
		end = start + cache->key.offset - 1;
		set_extent_bits(&info->block_group_cache, start, end,
				BLOCK_GROUP_DIRTY, GFP_NOFS);
C
Chris Mason 已提交
611 612

		old_val = btrfs_block_group_used(&cache->item);
613
		num_bytes = min(total, cache->key.offset - byte_in_group);
C
Chris Mason 已提交
614
		if (alloc) {
C
Chris Mason 已提交
615
			if (cache->data != data &&
C
Chris Mason 已提交
616
			    old_val < (cache->key.offset >> 1)) {
617 618 619
				int bit_to_clear;
				int bit_to_set;
				cache->data = data;
C
Chris Mason 已提交
620
				if (data) {
621 622
					bit_to_clear = BLOCK_GROUP_METADATA;
					bit_to_set = BLOCK_GROUP_DATA;
623 624
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_MIXED;
C
Chris Mason 已提交
625 626 627
					cache->item.flags |=
						BTRFS_BLOCK_GROUP_DATA;
				} else {
628 629
					bit_to_clear = BLOCK_GROUP_DATA;
					bit_to_set = BLOCK_GROUP_METADATA;
630 631
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_MIXED;
C
Chris Mason 已提交
632 633 634
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_DATA;
				}
635 636 637 638 639 640
				clear_extent_bits(&info->block_group_cache,
						  start, end, bit_to_clear,
						  GFP_NOFS);
				set_extent_bits(&info->block_group_cache,
						start, end, bit_to_set,
						GFP_NOFS);
641 642 643 644 645 646 647 648
			} else if (cache->data != data &&
				   cache->data != BTRFS_BLOCK_GROUP_MIXED) {
				cache->data = BTRFS_BLOCK_GROUP_MIXED;
				set_extent_bits(&info->block_group_cache,
						start, end,
						BLOCK_GROUP_DATA |
						BLOCK_GROUP_METADATA,
						GFP_NOFS);
C
Chris Mason 已提交
649
			}
650
			old_val += num_bytes;
C
Chris Mason 已提交
651
		} else {
652
			old_val -= num_bytes;
653 654
			if (mark_free) {
				set_extent_dirty(&info->free_space_cache,
655
						 bytenr, bytenr + num_bytes - 1,
656
						 GFP_NOFS);
657
			}
C
Chris Mason 已提交
658
		}
C
Chris Mason 已提交
659
		btrfs_set_block_group_used(&cache->item, old_val);
660 661
		total -= num_bytes;
		bytenr += num_bytes;
C
Chris Mason 已提交
662 663 664
	}
	return 0;
}
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
static int update_pinned_extents(struct btrfs_root *root,
				u64 bytenr, u64 num, int pin)
{
	u64 len;
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *fs_info = root->fs_info;

	if (pin) {
		set_extent_dirty(&fs_info->pinned_extents,
				bytenr, bytenr + num - 1, GFP_NOFS);
	} else {
		clear_extent_dirty(&fs_info->pinned_extents,
				bytenr, bytenr + num - 1, GFP_NOFS);
	}
	while (num > 0) {
		cache = btrfs_lookup_block_group(fs_info, bytenr);
		WARN_ON(!cache);
		len = min(num, cache->key.offset -
			  (bytenr - cache->key.objectid));
		if (pin) {
			cache->pinned += len;
			fs_info->total_pinned += len;
		} else {
			cache->pinned -= len;
			fs_info->total_pinned -= len;
		}
		bytenr += len;
		num -= len;
	}
	return 0;
}
C
Chris Mason 已提交
696

697
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
C
Chris Mason 已提交
698 699
{
	u64 last = 0;
700 701 702
	u64 start;
	u64 end;
	struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
C
Chris Mason 已提交
703 704 705
	int ret;

	while(1) {
706 707 708
		ret = find_first_extent_bit(pinned_extents, last,
					    &start, &end, EXTENT_DIRTY);
		if (ret)
C
Chris Mason 已提交
709
			break;
710 711
		set_extent_dirty(copy, start, end, GFP_NOFS);
		last = end + 1;
C
Chris Mason 已提交
712 713 714 715 716 717
	}
	return 0;
}

int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root,
718
			       struct extent_map_tree *unpin)
719
{
720 721
	u64 start;
	u64 end;
722
	int ret;
723 724
	struct extent_map_tree *free_space_cache;
	free_space_cache = &root->fs_info->free_space_cache;
725 726

	while(1) {
727 728 729
		ret = find_first_extent_bit(unpin, 0, &start, &end,
					    EXTENT_DIRTY);
		if (ret)
730
			break;
731
		update_pinned_extents(root, start, end + 1 - start, 0);
732 733
		clear_extent_dirty(unpin, start, end, GFP_NOFS);
		set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
734 735 736 737
	}
	return 0;
}

738 739
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root)
C
Chris Mason 已提交
740
{
C
Chris Mason 已提交
741
	struct btrfs_key ins;
C
Chris Mason 已提交
742
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
743
	int ret;
744 745 746
	int err = 0;
	u64 start;
	u64 end;
747
	struct btrfs_fs_info *info = extent_root->fs_info;
C
Chris Mason 已提交
748

749
	btrfs_set_stack_extent_refs(&extent_item, 1);
750
	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
751 752
	btrfs_set_stack_extent_owner(&extent_item,
				     extent_root->root_key.objectid);
C
Chris Mason 已提交
753

754
	while(1) {
755 756 757
		ret = find_first_extent_bit(&info->extent_ins, 0, &start,
					    &end, EXTENT_LOCKED);
		if (ret)
758 759
			break;

760 761 762 763 764 765
		ins.objectid = start;
		ins.offset = end + 1 - start;
		err = btrfs_insert_item(trans, extent_root, &ins,
					&extent_item, sizeof(extent_item));
		clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
				  GFP_NOFS);
C
Chris Mason 已提交
766 767 768 769
	}
	return 0;
}

770 771
static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
			  int pending)
C
Chris Mason 已提交
772
{
773
	int err = 0;
774
	struct extent_buffer *buf;
C
Chris Mason 已提交
775

C
Chris Mason 已提交
776
	if (!pending) {
777
		buf = btrfs_find_tree_block(root, bytenr, num_bytes);
778 779
		if (buf) {
			if (btrfs_buffer_uptodate(buf)) {
C
Chris Mason 已提交
780 781
				u64 transid =
				    root->fs_info->running_transaction->transid;
782 783
				if (btrfs_header_generation(buf) == transid) {
					free_extent_buffer(buf);
784
					return 1;
C
Chris Mason 已提交
785
				}
C
Chris Mason 已提交
786
			}
787
			free_extent_buffer(buf);
C
Chris Mason 已提交
788
		}
789
		update_pinned_extents(root, bytenr, num_bytes, 1);
C
Chris Mason 已提交
790
	} else {
791
		set_extent_bits(&root->fs_info->pending_del,
792 793
				bytenr, bytenr + num_bytes - 1,
				EXTENT_LOCKED, GFP_NOFS);
C
Chris Mason 已提交
794
	}
C
Chris Mason 已提交
795
	BUG_ON(err < 0);
C
Chris Mason 已提交
796 797 798
	return 0;
}

799
/*
800
 * remove an extent from the root, returns 0 on success
801
 */
802
static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
803
			 *root, u64 bytenr, u64 num_bytes, int pin,
804
			 int mark_free)
805
{
806
	struct btrfs_path *path;
C
Chris Mason 已提交
807
	struct btrfs_key key;
808 809
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
810
	struct extent_buffer *leaf;
811
	int ret;
C
Chris Mason 已提交
812
	struct btrfs_extent_item *ei;
C
Chris Mason 已提交
813
	u32 refs;
C
Chris Mason 已提交
814

815
	key.objectid = bytenr;
816
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
817
	key.offset = num_bytes;
818

819
	path = btrfs_alloc_path();
820 821
	if (!path)
		return -ENOMEM;
822

823 824 825 826
	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
	if (ret < 0)
		return ret;
	BUG_ON(ret);
827 828 829

	leaf = path->nodes[0];
	ei = btrfs_item_ptr(leaf, path->slots[0],
C
Chris Mason 已提交
830
			    struct btrfs_extent_item);
831 832 833 834 835 836
	refs = btrfs_extent_refs(leaf, ei);
	BUG_ON(refs == 0);
	refs -= 1;
	btrfs_set_extent_refs(leaf, ei, refs);
	btrfs_mark_buffer_dirty(leaf);

C
Chris Mason 已提交
837
	if (refs == 0) {
838 839
		u64 super_used;
		u64 root_used;
C
Chris Mason 已提交
840 841

		if (pin) {
842
			ret = pin_down_bytes(root, bytenr, num_bytes, 0);
843 844 845
			if (ret > 0)
				mark_free = 1;
			BUG_ON(ret < 0);
C
Chris Mason 已提交
846 847
		}

848
		/* block accounting for super block */
849 850 851
		super_used = btrfs_super_bytes_used(&info->super_copy);
		btrfs_set_super_bytes_used(&info->super_copy,
					   super_used - num_bytes);
852 853

		/* block accounting for root item */
854
		root_used = btrfs_root_used(&root->root_item);
855
		btrfs_set_root_used(&root->root_item,
856
					   root_used - num_bytes);
857

858
		ret = btrfs_del_item(trans, extent_root, path);
859 860 861
		if (ret) {
			return ret;
		}
862
		ret = update_block_group(trans, root, bytenr, num_bytes, 0,
C
Chris Mason 已提交
863
					 mark_free, 0);
C
Chris Mason 已提交
864
		BUG_ON(ret);
865
	}
866
	btrfs_free_path(path);
867
	finish_current_insert(trans, extent_root);
868 869 870 871 872 873 874
	return ret;
}

/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
875 876
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root)
877 878
{
	int ret;
C
Chris Mason 已提交
879
	int err = 0;
880 881 882 883
	u64 start;
	u64 end;
	struct extent_map_tree *pending_del;
	struct extent_map_tree *pinned_extents;
C
Chris Mason 已提交
884

885 886
	pending_del = &extent_root->fs_info->pending_del;
	pinned_extents = &extent_root->fs_info->pinned_extents;
887 888

	while(1) {
889 890 891
		ret = find_first_extent_bit(pending_del, 0, &start, &end,
					    EXTENT_LOCKED);
		if (ret)
892
			break;
893
		update_pinned_extents(extent_root, start, end + 1 - start, 1);
894 895 896 897 898 899
		clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
				  GFP_NOFS);
		ret = __free_extent(trans, extent_root,
				     start, end + 1 - start, 0, 0);
		if (ret)
			err = ret;
900
	}
C
Chris Mason 已提交
901
	return err;
902 903 904 905 906
}

/*
 * remove an extent from the root, returns 0 on success
 */
907
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
908
		      *root, u64 bytenr, u64 num_bytes, int pin)
909
{
910
	struct btrfs_root *extent_root = root->fs_info->extent_root;
911 912
	int pending_ret;
	int ret;
913

914
	WARN_ON(num_bytes < root->sectorsize);
915
	if (root == extent_root) {
916
		pin_down_bytes(root, bytenr, num_bytes, 1);
917 918
		return 0;
	}
919
	ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0);
C
Chris Mason 已提交
920
	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
921 922 923 924 925 926 927
	return ret ? ret : pending_ret;
}

/*
 * walks the btree of allocated extents and find a hole of a given size.
 * The key ins is changed to record the hole:
 * ins->objectid == block start
928
 * ins->flags = BTRFS_EXTENT_ITEM_KEY
929 930 931
 * ins->offset == number of blocks
 * Any available blocks before search_start are skipped.
 */
932
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
933 934
			    *orig_root, u64 num_bytes, u64 empty_size,
			    u64 search_start, u64 search_end, u64 hint_byte,
935 936
			    struct btrfs_key *ins, u64 exclude_start,
			    u64 exclude_nr, int data)
937
{
938
	struct btrfs_path *path;
C
Chris Mason 已提交
939
	struct btrfs_key key;
940 941 942
	int ret;
	u64 hole_size = 0;
	int slot = 0;
943
	u64 last_byte = 0;
C
Chris Mason 已提交
944
	u64 orig_search_start = search_start;
945
	int start_found;
946
	struct extent_buffer *l;
947
	struct btrfs_root * root = orig_root->fs_info->extent_root;
948
	struct btrfs_fs_info *info = root->fs_info;
949
	u64 total_needed = num_bytes;
C
Chris Mason 已提交
950
	int level;
C
Chris Mason 已提交
951
	struct btrfs_block_group_cache *block_group;
C
Chris Mason 已提交
952
	int full_scan = 0;
953
	int wrapped = 0;
954
	u64 cached_start;
955

956
	WARN_ON(num_bytes < root->sectorsize);
957 958
	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);

959 960
	level = btrfs_header_level(root->node);

961 962 963 964
	if (num_bytes >= 96 * 1024 * 1024 && hint_byte) {
		data = BTRFS_BLOCK_GROUP_MIXED;
	}

C
Chris Mason 已提交
965
	if (search_end == (u64)-1)
966 967 968
		search_end = btrfs_super_total_bytes(&info->super_copy);
	if (hint_byte) {
		block_group = btrfs_lookup_block_group(info, hint_byte);
C
Chris Mason 已提交
969
		block_group = btrfs_find_block_group(root, block_group,
970
						     hint_byte, data, 1);
C
Chris Mason 已提交
971 972 973
	} else {
		block_group = btrfs_find_block_group(root,
						     trans->block_group, 0,
974
						     data, 1);
C
Chris Mason 已提交
975 976
	}

977
	total_needed += empty_size;
978
	path = btrfs_alloc_path();
C
Chris Mason 已提交
979
check_failed:
980 981
	search_start = find_search_start(root, &block_group, search_start,
					 total_needed, data, full_scan);
982
	cached_start = search_start;
983
	btrfs_init_path(path);
984 985 986
	ins->objectid = search_start;
	ins->offset = 0;
	start_found = 0;
987
	path->reada = 2;
988

989
	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
C
Chris Mason 已提交
990 991
	if (ret < 0)
		goto error;
C
Chris Mason 已提交
992

993
	if (path->slots[0] > 0) {
994
		path->slots[0]--;
995 996
	}

997 998 999
	l = path->nodes[0];
	btrfs_item_key_to_cpu(l, &key, path->slots[0]);

1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
	/*
	 * a rare case, go back one key if we hit a block group item
	 * instead of an extent item
	 */
	if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
	    key.objectid + key.offset >= search_start) {
		ins->objectid = key.objectid;
		ins->offset = key.offset - 1;
		btrfs_release_path(root, path);
		ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
		if (ret < 0)
			goto error;

		if (path->slots[0] > 0) {
			path->slots[0]--;
		}
	}
1017

1018
	while (1) {
1019
		l = path->nodes[0];
1020
		slot = path->slots[0];
1021
		if (slot >= btrfs_header_nritems(l)) {
1022
			ret = btrfs_next_leaf(root, path);
1023 1024
			if (ret == 0)
				continue;
C
Chris Mason 已提交
1025 1026
			if (ret < 0)
				goto error;
1027 1028 1029

			search_start = max(search_start,
					   block_group->key.objectid);
1030 1031
			if (!start_found) {
				ins->objectid = search_start;
C
Chris Mason 已提交
1032
				ins->offset = search_end - search_start;
1033 1034 1035
				start_found = 1;
				goto check_pending;
			}
1036 1037
			ins->objectid = last_byte > search_start ?
					last_byte : search_start;
C
Chris Mason 已提交
1038
			ins->offset = search_end - ins->objectid;
1039
			BUG_ON(ins->objectid >= search_end);
1040 1041
			goto check_pending;
		}
1042
		btrfs_item_key_to_cpu(l, &key, slot);
1043

1044
		if (key.objectid >= search_start && key.objectid > last_byte &&
1045
		    start_found) {
1046 1047 1048 1049 1050
			if (last_byte < search_start)
				last_byte = search_start;
			hole_size = key.objectid - last_byte;
			if (hole_size >= num_bytes) {
				ins->objectid = last_byte;
1051 1052
				ins->offset = hole_size;
				goto check_pending;
1053
			}
1054
		}
1055 1056
		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
			if (!start_found) {
1057
				last_byte = key.objectid;
1058 1059
				start_found = 1;
			}
1060
			goto next;
1061 1062
		}

1063

1064
		start_found = 1;
1065
		last_byte = key.objectid + key.offset;
1066

1067 1068
		if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
		    last_byte >= block_group->key.objectid +
C
Chris Mason 已提交
1069 1070 1071
		    block_group->key.offset) {
			btrfs_release_path(root, path);
			search_start = block_group->key.objectid +
1072
				block_group->key.offset;
C
Chris Mason 已提交
1073 1074
			goto new_group;
		}
C
Chris Mason 已提交
1075
next:
1076
		path->slots[0]++;
1077
		cond_resched();
1078 1079 1080 1081 1082
	}
check_pending:
	/* we have to make sure we didn't find an extent that has already
	 * been allocated by the map tree or the original allocation
	 */
1083
	btrfs_release_path(root, path);
1084
	BUG_ON(ins->objectid < search_start);
1085

1086
	if (ins->objectid + num_bytes >= search_end)
1087
		goto enospc;
1088
	if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
Y
Yan 已提交
1089
	    ins->objectid + num_bytes > block_group->
1090 1091 1092 1093 1094
	    key.objectid + block_group->key.offset) {
		search_start = block_group->key.objectid +
			block_group->key.offset;
		goto new_group;
	}
1095
	if (test_range_bit(&info->extent_ins, ins->objectid,
1096 1097
			   ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
		search_start = ins->objectid + num_bytes;
1098 1099 1100
		goto new_group;
	}
	if (test_range_bit(&info->pinned_extents, ins->objectid,
1101 1102
			   ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
		search_start = ins->objectid + num_bytes;
1103
		goto new_group;
1104
	}
1105
	if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1106 1107 1108 1109
	    ins->objectid < exclude_start + exclude_nr)) {
		search_start = exclude_start + exclude_nr;
		goto new_group;
	}
1110
	if (!data) {
1111
		block_group = btrfs_lookup_block_group(info, ins->objectid);
1112 1113
		if (block_group)
			trans->block_group = block_group;
1114
	}
1115
	ins->offset = num_bytes;
1116
	btrfs_free_path(path);
1117
	return 0;
C
Chris Mason 已提交
1118 1119

new_group:
1120
	if (search_start + num_bytes >= search_end) {
1121
enospc:
C
Chris Mason 已提交
1122
		search_start = orig_search_start;
1123 1124 1125 1126
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
1127 1128 1129
		if (wrapped) {
			if (!full_scan)
				total_needed -= empty_size;
1130
			full_scan = 1;
1131
		} else
1132
			wrapped = 1;
C
Chris Mason 已提交
1133
	}
1134
	block_group = btrfs_lookup_block_group(info, search_start);
1135
	cond_resched();
C
Chris Mason 已提交
1136 1137
	if (!full_scan)
		block_group = btrfs_find_block_group(root, block_group,
1138
						     search_start, data, 0);
C
Chris Mason 已提交
1139 1140
	goto check_failed;

C
Chris Mason 已提交
1141
error:
1142 1143
	btrfs_release_path(root, path);
	btrfs_free_path(path);
C
Chris Mason 已提交
1144
	return ret;
1145 1146 1147 1148 1149 1150 1151 1152
}
/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns 0 if everything worked, non-zero otherwise.
 */
1153 1154
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, u64 owner,
1155
		       u64 num_bytes, u64 empty_size, u64 hint_byte,
C
Chris Mason 已提交
1156
		       u64 search_end, struct btrfs_key *ins, int data)
1157 1158 1159
{
	int ret;
	int pending_ret;
1160
	u64 super_used, root_used;
1161
	u64 search_start = 0;
1162 1163
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
C
Chris Mason 已提交
1164
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
1165

1166 1167
	btrfs_set_stack_extent_refs(&extent_item, 1);
	btrfs_set_stack_extent_owner(&extent_item, owner);
1168

1169 1170 1171
	WARN_ON(num_bytes < root->sectorsize);
	ret = find_free_extent(trans, root, num_bytes, empty_size,
			       search_start, search_end, hint_byte, ins,
1172 1173
			       trans->alloc_exclude_start,
			       trans->alloc_exclude_nr, data);
C
Chris Mason 已提交
1174
	BUG_ON(ret);
1175 1176
	if (ret)
		return ret;
1177

1178
	/* block accounting for super block */
1179 1180
	super_used = btrfs_super_bytes_used(&info->super_copy);
	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1181

1182
	/* block accounting for root item */
1183 1184
	root_used = btrfs_root_used(&root->root_item);
	btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1185

1186 1187 1188 1189
	clear_extent_dirty(&root->fs_info->free_space_cache,
			   ins->objectid, ins->objectid + ins->offset - 1,
			   GFP_NOFS);

1190
	if (root == extent_root) {
1191 1192 1193
		set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
				ins->objectid + ins->offset - 1,
				EXTENT_LOCKED, GFP_NOFS);
1194
		WARN_ON(data == 1);
1195 1196 1197 1198 1199 1200
		goto update_block;
	}

	WARN_ON(trans->alloc_exclude_nr);
	trans->alloc_exclude_start = ins->objectid;
	trans->alloc_exclude_nr = ins->offset;
1201 1202
	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
				sizeof(extent_item));
C
Chris Mason 已提交
1203

1204 1205 1206
	trans->alloc_exclude_start = 0;
	trans->alloc_exclude_nr = 0;

C
Chris Mason 已提交
1207
	BUG_ON(ret);
1208
	finish_current_insert(trans, extent_root);
C
Chris Mason 已提交
1209
	pending_ret = del_pending_extents(trans, extent_root);
1210

1211
	if (ret) {
C
Chris Mason 已提交
1212
		return ret;
1213 1214
	}
	if (pending_ret) {
C
Chris Mason 已提交
1215
		return pending_ret;
1216
	}
1217 1218

update_block:
C
Chris Mason 已提交
1219 1220
	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
				 data);
C
Chris Mason 已提交
1221
	BUG_ON(ret);
C
Chris Mason 已提交
1222
	return 0;
1223 1224 1225 1226 1227 1228
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
1229
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1230 1231
					     struct btrfs_root *root,
					     u32 blocksize, u64 hint,
1232
					     u64 empty_size)
1233
{
C
Chris Mason 已提交
1234
	struct btrfs_key ins;
1235
	int ret;
1236
	struct extent_buffer *buf;
1237

1238
	ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1239 1240
				 blocksize, empty_size, hint,
				 (u64)-1, &ins, 0);
1241
	if (ret) {
1242 1243
		BUG_ON(ret > 0);
		return ERR_PTR(ret);
1244
	}
1245
	buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1246
	if (!buf) {
1247
		btrfs_free_extent(trans, root, ins.objectid, blocksize, 0);
1248 1249
		return ERR_PTR(-ENOMEM);
	}
1250 1251 1252
	btrfs_set_buffer_uptodate(buf);
	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
			 buf->start + buf->len - 1, GFP_NOFS);
1253 1254 1255 1256
	set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
			buf->start, buf->start + buf->len - 1,
			EXTENT_CSUM, GFP_NOFS);
	buf->flags |= EXTENT_CSUM;
1257
	btrfs_set_buffer_defrag(buf);
1258
	trans->blocks_used++;
1259 1260
	return buf;
}
1261

1262
static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1263
			 struct btrfs_root *root, struct extent_buffer *leaf)
1264
{
1265
	struct btrfs_key key;
1266 1267 1268 1269 1270
	struct btrfs_file_extent_item *fi;
	int i;
	int nritems;
	int ret;

1271 1272
	BUG_ON(!btrfs_is_leaf(leaf));
	nritems = btrfs_header_nritems(leaf);
1273
	for (i = 0; i < nritems; i++) {
1274
		u64 disk_bytenr;
1275 1276 1277

		btrfs_item_key_to_cpu(leaf, &key, i);
		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1278 1279
			continue;
		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1280 1281
		if (btrfs_file_extent_type(leaf, fi) ==
		    BTRFS_FILE_EXTENT_INLINE)
1282
			continue;
1283 1284 1285 1286
		/*
		 * FIXME make sure to insert a trans record that
		 * repeats the snapshot del on crash
		 */
1287 1288
		disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
		if (disk_bytenr == 0)
C
Chris Mason 已提交
1289
			continue;
1290 1291
		ret = btrfs_free_extent(trans, root, disk_bytenr,
				btrfs_file_extent_disk_num_bytes(leaf, fi), 0);
1292 1293 1294 1295 1296
		BUG_ON(ret);
	}
	return 0;
}

1297
static void reada_walk_down(struct btrfs_root *root,
1298
			    struct extent_buffer *node)
1299 1300 1301
{
	int i;
	u32 nritems;
1302
	u64 bytenr;
1303 1304
	int ret;
	u32 refs;
1305 1306
	int level;
	u32 blocksize;
1307

1308
	nritems = btrfs_header_nritems(node);
1309
	level = btrfs_header_level(node);
1310
	for (i = 0; i < nritems; i++) {
1311 1312 1313
		bytenr = btrfs_node_blockptr(node, i);
		blocksize = btrfs_level_size(root, level - 1);
		ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1314 1315 1316
		BUG_ON(ret);
		if (refs != 1)
			continue;
1317
		mutex_unlock(&root->fs_info->fs_mutex);
1318
		ret = readahead_tree_block(root, bytenr, blocksize);
1319 1320
		cond_resched();
		mutex_lock(&root->fs_info->fs_mutex);
1321 1322 1323 1324 1325
		if (ret)
			break;
	}
}

C
Chris Mason 已提交
1326 1327 1328 1329
/*
 * helper function for drop_snapshot, this walks down the tree dropping ref
 * counts as it goes.
 */
1330 1331
static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			  *root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1332
{
1333 1334
	struct extent_buffer *next;
	struct extent_buffer *cur;
1335 1336
	u64 bytenr;
	u32 blocksize;
C
Chris Mason 已提交
1337 1338 1339
	int ret;
	u32 refs;

1340 1341
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1342
	ret = lookup_extent_ref(trans, root,
1343 1344
				path->nodes[*level]->start,
				path->nodes[*level]->len, &refs);
C
Chris Mason 已提交
1345 1346 1347
	BUG_ON(ret);
	if (refs > 1)
		goto out;
1348

C
Chris Mason 已提交
1349 1350 1351
	/*
	 * walk down to the last node level and free all the leaves
	 */
1352
	while(*level >= 0) {
1353 1354
		WARN_ON(*level < 0);
		WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1355
		cur = path->nodes[*level];
1356 1357

		if (*level > 0 && path->slots[*level] == 0)
1358
			reada_walk_down(root, cur);
1359

1360
		if (btrfs_header_level(cur) != *level)
C
Chris Mason 已提交
1361
			WARN_ON(1);
1362

1363
		if (path->slots[*level] >=
1364
		    btrfs_header_nritems(cur))
C
Chris Mason 已提交
1365
			break;
1366 1367 1368 1369 1370
		if (*level == 0) {
			ret = drop_leaf_ref(trans, root, cur);
			BUG_ON(ret);
			break;
		}
1371 1372 1373
		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
		blocksize = btrfs_level_size(root, *level - 1);
		ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1374 1375
		BUG_ON(ret);
		if (refs != 1) {
C
Chris Mason 已提交
1376
			path->slots[*level]++;
1377 1378
			ret = btrfs_free_extent(trans, root, bytenr,
						blocksize, 1);
C
Chris Mason 已提交
1379 1380 1381
			BUG_ON(ret);
			continue;
		}
1382
		next = btrfs_find_tree_block(root, bytenr, blocksize);
1383 1384
		if (!next || !btrfs_buffer_uptodate(next)) {
			free_extent_buffer(next);
1385
			mutex_unlock(&root->fs_info->fs_mutex);
1386
			next = read_tree_block(root, bytenr, blocksize);
1387 1388 1389
			mutex_lock(&root->fs_info->fs_mutex);

			/* we dropped the lock, check one more time */
1390 1391
			ret = lookup_extent_ref(trans, root, bytenr,
						blocksize, &refs);
1392 1393 1394
			BUG_ON(ret);
			if (refs != 1) {
				path->slots[*level]++;
1395
				free_extent_buffer(next);
1396
				ret = btrfs_free_extent(trans, root,
1397
							bytenr, blocksize, 1);
1398 1399 1400 1401
				BUG_ON(ret);
				continue;
			}
		}
1402
		WARN_ON(*level <= 0);
C
Chris Mason 已提交
1403
		if (path->nodes[*level-1])
1404
			free_extent_buffer(path->nodes[*level-1]);
C
Chris Mason 已提交
1405
		path->nodes[*level-1] = next;
1406
		*level = btrfs_header_level(next);
C
Chris Mason 已提交
1407 1408 1409
		path->slots[*level] = 0;
	}
out:
1410 1411
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1412 1413
	ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
				path->nodes[*level]->len, 1);
1414
	free_extent_buffer(path->nodes[*level]);
C
Chris Mason 已提交
1415 1416 1417 1418 1419 1420
	path->nodes[*level] = NULL;
	*level += 1;
	BUG_ON(ret);
	return 0;
}

C
Chris Mason 已提交
1421 1422 1423 1424 1425
/*
 * helper for dropping snapshots.  This walks back up the tree in the path
 * to find the first node higher up where we haven't yet gone through
 * all the slots
 */
1426 1427
static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			*root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1428 1429 1430 1431
{
	int i;
	int slot;
	int ret;
1432 1433
	struct btrfs_root_item *root_item = &root->root_item;

C
Chris Mason 已提交
1434
	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
C
Chris Mason 已提交
1435
		slot = path->slots[i];
1436 1437 1438 1439
		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
			struct extent_buffer *node;
			struct btrfs_disk_key disk_key;
			node = path->nodes[i];
C
Chris Mason 已提交
1440 1441
			path->slots[i]++;
			*level = i;
1442
			WARN_ON(*level == 0);
1443
			btrfs_node_key(node, &disk_key, path->slots[i]);
1444
			memcpy(&root_item->drop_progress,
1445
			       &disk_key, sizeof(disk_key));
1446
			root_item->drop_level = i;
C
Chris Mason 已提交
1447 1448
			return 0;
		} else {
1449
			ret = btrfs_free_extent(trans, root,
1450 1451
						path->nodes[*level]->start,
						path->nodes[*level]->len, 1);
1452
			BUG_ON(ret);
1453
			free_extent_buffer(path->nodes[*level]);
C
Chris Mason 已提交
1454
			path->nodes[*level] = NULL;
C
Chris Mason 已提交
1455 1456 1457 1458 1459 1460
			*level = i + 1;
		}
	}
	return 1;
}

C
Chris Mason 已提交
1461 1462 1463 1464 1465
/*
 * drop the reference count on the tree rooted at 'snap'.  This traverses
 * the tree freeing any blocks that have a ref count of zero after being
 * decremented.
 */
1466
int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1467
			*root)
C
Chris Mason 已提交
1468
{
1469
	int ret = 0;
C
Chris Mason 已提交
1470
	int wret;
C
Chris Mason 已提交
1471
	int level;
1472
	struct btrfs_path *path;
C
Chris Mason 已提交
1473 1474
	int i;
	int orig_level;
1475
	struct btrfs_root_item *root_item = &root->root_item;
C
Chris Mason 已提交
1476

1477 1478
	path = btrfs_alloc_path();
	BUG_ON(!path);
C
Chris Mason 已提交
1479

1480
	level = btrfs_header_level(root->node);
C
Chris Mason 已提交
1481
	orig_level = level;
1482 1483
	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
		path->nodes[level] = root->node;
1484
		extent_buffer_get(root->node);
1485 1486 1487
		path->slots[level] = 0;
	} else {
		struct btrfs_key key;
1488 1489
		struct btrfs_disk_key found_key;
		struct extent_buffer *node;
1490

1491
		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1492 1493
		level = root_item->drop_level;
		path->lowest_level = level;
1494
		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1495
		if (wret < 0) {
1496 1497 1498
			ret = wret;
			goto out;
		}
1499 1500 1501 1502
		node = path->nodes[level];
		btrfs_node_key(node, &found_key, path->slots[level]);
		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
			       sizeof(found_key)));
1503
	}
C
Chris Mason 已提交
1504
	while(1) {
1505
		wret = walk_down_tree(trans, root, path, &level);
C
Chris Mason 已提交
1506
		if (wret > 0)
C
Chris Mason 已提交
1507
			break;
C
Chris Mason 已提交
1508 1509 1510
		if (wret < 0)
			ret = wret;

1511
		wret = walk_up_tree(trans, root, path, &level);
C
Chris Mason 已提交
1512
		if (wret > 0)
C
Chris Mason 已提交
1513
			break;
C
Chris Mason 已提交
1514 1515
		if (wret < 0)
			ret = wret;
1516 1517
		ret = -EAGAIN;
		break;
C
Chris Mason 已提交
1518
	}
C
Chris Mason 已提交
1519
	for (i = 0; i <= orig_level; i++) {
1520
		if (path->nodes[i]) {
1521
			free_extent_buffer(path->nodes[i]);
1522
			path->nodes[i] = NULL;
C
Chris Mason 已提交
1523
		}
C
Chris Mason 已提交
1524
	}
1525
out:
1526
	btrfs_free_path(path);
C
Chris Mason 已提交
1527
	return ret;
C
Chris Mason 已提交
1528
}
C
Chris Mason 已提交
1529

1530
int btrfs_free_block_groups(struct btrfs_fs_info *info)
C
Chris Mason 已提交
1531
{
1532 1533
	u64 start;
	u64 end;
1534
	u64 ptr;
C
Chris Mason 已提交
1535 1536
	int ret;
	while(1) {
1537 1538 1539
		ret = find_first_extent_bit(&info->block_group_cache, 0,
					    &start, &end, (unsigned int)-1);
		if (ret)
C
Chris Mason 已提交
1540
			break;
1541 1542 1543
		ret = get_state_private(&info->block_group_cache, start, &ptr);
		if (!ret)
			kfree((void *)(unsigned long)ptr);
1544 1545
		clear_extent_bits(&info->block_group_cache, start,
				  end, (unsigned int)-1, GFP_NOFS);
C
Chris Mason 已提交
1546
	}
1547
	while(1) {
1548 1549 1550
		ret = find_first_extent_bit(&info->free_space_cache, 0,
					    &start, &end, EXTENT_DIRTY);
		if (ret)
1551
			break;
1552 1553
		clear_extent_dirty(&info->free_space_cache, start,
				   end, GFP_NOFS);
1554
	}
C
Chris Mason 已提交
1555 1556 1557
	return 0;
}

C
Chris Mason 已提交
1558 1559 1560 1561 1562
int btrfs_read_block_groups(struct btrfs_root *root)
{
	struct btrfs_path *path;
	int ret;
	int err = 0;
1563
	int bit;
C
Chris Mason 已提交
1564
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
1565
	struct btrfs_fs_info *info = root->fs_info;
1566
	struct extent_map_tree *block_group_cache;
C
Chris Mason 已提交
1567 1568
	struct btrfs_key key;
	struct btrfs_key found_key;
1569
	struct extent_buffer *leaf;
1570 1571

	block_group_cache = &info->block_group_cache;
C
Chris Mason 已提交
1572

C
Chris Mason 已提交
1573
	root = info->extent_root;
C
Chris Mason 已提交
1574
	key.objectid = 0;
1575
	key.offset = BTRFS_BLOCK_GROUP_SIZE;
C
Chris Mason 已提交
1576 1577 1578 1579 1580 1581 1582
	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
C
Chris Mason 已提交
1583
		ret = btrfs_search_slot(NULL, info->extent_root,
C
Chris Mason 已提交
1584 1585 1586 1587 1588
					&key, path, 0, 0);
		if (ret != 0) {
			err = ret;
			break;
		}
1589 1590
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
C
Chris Mason 已提交
1591 1592 1593 1594 1595
		cache = kmalloc(sizeof(*cache), GFP_NOFS);
		if (!cache) {
			err = -1;
			break;
		}
C
Chris Mason 已提交
1596

1597 1598 1599
		read_extent_buffer(leaf, &cache->item,
				   btrfs_item_ptr_offset(leaf, path->slots[0]),
				   sizeof(cache->item));
C
Chris Mason 已提交
1600
		memcpy(&cache->key, &found_key, sizeof(found_key));
1601
		cache->cached = 0;
1602
		cache->pinned = 0;
C
Chris Mason 已提交
1603 1604
		key.objectid = found_key.objectid + found_key.offset;
		btrfs_release_path(root, path);
1605

1606 1607 1608 1609
		if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
			bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
			cache->data = BTRFS_BLOCK_GROUP_MIXED;
		} else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
1610
			bit = BLOCK_GROUP_DATA;
1611
			cache->data = BTRFS_BLOCK_GROUP_DATA;
1612 1613 1614
		} else {
			bit = BLOCK_GROUP_METADATA;
			cache->data = 0;
1615
		}
1616 1617 1618 1619 1620 1621

		/* use EXTENT_LOCKED to prevent merging */
		set_extent_bits(block_group_cache, found_key.objectid,
				found_key.objectid + found_key.offset - 1,
				bit | EXTENT_LOCKED, GFP_NOFS);
		set_state_private(block_group_cache, found_key.objectid,
J
Jens Axboe 已提交
1622
				  (unsigned long)cache);
1623

C
Chris Mason 已提交
1624
		if (key.objectid >=
1625
		    btrfs_super_total_bytes(&info->super_copy))
C
Chris Mason 已提交
1626 1627 1628 1629 1630 1631
			break;
	}

	btrfs_free_path(path);
	return 0;
}