extent-tree.c 41.1 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

19
#include <linux/module.h>
20 21 22
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
23
#include "transaction.h"
24

25
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26 27 28
			    *orig_root, u64 num_blocks, u64 search_start,
			    u64 search_end, u64 hint_block,
			    struct btrfs_key *ins, int data);
29 30
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root);
C
Chris Mason 已提交
31 32
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root);
33

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
static void reada_extent_leaves(struct btrfs_root *root,
				struct btrfs_path *path, u64 limit)
{
	struct btrfs_node *node;
	int i;
	int nritems;
	u64 item_objectid;
	u64 blocknr;
	int slot;
	int ret;

	if (!path->nodes[1])
		return;
	node = btrfs_buffer_node(path->nodes[1]);
	slot = path->slots[1] + 1;
	nritems = btrfs_header_nritems(&node->header);
	for (i = slot; i < nritems && i < slot + 8; i++) {
		item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
		if (item_objectid > limit)
			break;
		blocknr = btrfs_node_blockptr(node, i);
		ret = readahead_tree_block(root, blocknr);
		if (ret)
			break;
	}
}

61 62 63 64 65 66 67 68 69 70 71 72
static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
	struct btrfs_path *path;
	int ret;
	struct btrfs_key key;
	struct btrfs_leaf *leaf;
	struct radix_tree_root *extent_radix;
	int slot;
	u64 i;
	u64 last = 0;
	u64 hole_size;
73
	u64 limit;
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
	int found = 0;

	root = root->fs_info->extent_root;
	extent_radix = &root->fs_info->extent_map_radix;

	if (block_group->cached)
		return 0;
	if (block_group->data)
		return 0;
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	key.objectid = block_group->key.objectid;
	key.flags = 0;
	key.offset = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		return ret;
	if (ret && path->slots[0] > 0)
		path->slots[0]--;
95 96
	limit = block_group->key.objectid + block_group->key.offset;
	reada_extent_leaves(root, path, limit);
97 98 99 100
	while(1) {
		leaf = btrfs_buffer_leaf(path->nodes[0]);
		slot = path->slots[0];
		if (slot >= btrfs_header_nritems(&leaf->header)) {
101
			reada_extent_leaves(root, path, limit);
102
			ret = btrfs_next_leaf(root, path);
103
			if (ret == 0) {
104
				continue;
105
			} else {
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
				if (found) {
					hole_size = block_group->key.objectid +
						block_group->key.offset - last;
				} else {
					last = block_group->key.objectid;
					hole_size = block_group->key.offset;
				}
				for (i = 0; i < hole_size; i++) {
					set_radix_bit(extent_radix,
						      last + i);
				}
				break;
			}
		}
		btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
		if (key.objectid >= block_group->key.objectid +
		    block_group->key.offset) {
			if (found) {
				hole_size = block_group->key.objectid +
					block_group->key.offset - last;
			} else {
				last = block_group->key.objectid;
				hole_size = block_group->key.offset;
			}
			for (i = 0; i < hole_size; i++) {
				set_radix_bit(extent_radix, last + i);
			}
			break;
		}
		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			if (!found) {
				last = key.objectid + key.offset;
				found = 1;
			} else {
				hole_size = key.objectid - last;
				for (i = 0; i < hole_size; i++) {
					set_radix_bit(extent_radix, last + i);
				}
				last = key.objectid + key.offset;
			}
		}
		path->slots[0]++;
	}

	block_group->cached = 1;
	btrfs_free_path(path);
	return 0;
}

155 156 157
struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
							 btrfs_fs_info *info,
							 u64 blocknr)
C
Chris Mason 已提交
158 159 160 161 162 163 164 165
{
	struct btrfs_block_group_cache *block_group;
	int ret;

	ret = radix_tree_gang_lookup(&info->block_group_radix,
				     (void **)&block_group,
				     blocknr, 1);
	if (ret) {
C
Chris Mason 已提交
166
		if (block_group->key.objectid <= blocknr && blocknr <=
C
Chris Mason 已提交
167 168 169 170 171 172 173
		    block_group->key.objectid + block_group->key.offset)
			return block_group;
	}
	ret = radix_tree_gang_lookup(&info->block_group_data_radix,
				     (void **)&block_group,
				     blocknr, 1);
	if (ret) {
C
Chris Mason 已提交
174
		if (block_group->key.objectid <= blocknr && blocknr <=
C
Chris Mason 已提交
175 176 177 178 179 180
		    block_group->key.objectid + block_group->key.offset)
			return block_group;
	}
	return NULL;
}

181 182 183
static u64 leaf_range(struct btrfs_root *root)
{
	u64 size = BTRFS_LEAF_DATA_SIZE(root);
C
Chris Mason 已提交
184 185
	do_div(size, sizeof(struct btrfs_extent_item) +
		sizeof(struct btrfs_item));
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
	return size;
}

static u64 find_search_start(struct btrfs_root *root,
			     struct btrfs_block_group_cache **cache_ret,
			     u64 search_start, int num)
{
	unsigned long gang[8];
	int ret;
	struct btrfs_block_group_cache *cache = *cache_ret;
	u64 last = max(search_start, cache->key.objectid);

	if (cache->data)
		goto out;
	if (num > 1) {
		last = max(last, cache->last_prealloc);
	}
again:
	cache_block_group(root, cache);
	while(1) {
		ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
					   gang, last, ARRAY_SIZE(gang));
		if (!ret)
			goto out;
		last = gang[ret-1] + 1;
		if (num > 1) {
			if (ret != ARRAY_SIZE(gang)) {
				goto new_group;
			}
			if (gang[ret-1] - gang[0] > leaf_range(root)) {
				continue;
			}
		}
		if (gang[0] >= cache->key.objectid + cache->key.offset) {
			goto new_group;
		}
		return gang[0];
	}
out:
	return max(cache->last_alloc, search_start);

new_group:
228 229
	cache = btrfs_lookup_block_group(root->fs_info,
					 last + cache->key.offset - 1);
230 231 232 233
	if (!cache) {
		return max((*cache_ret)->last_alloc, search_start);
	}
	cache = btrfs_find_block_group(root, cache,
234
				       last + cache->key.offset - 1, 0, 0);
235 236 237 238
	*cache_ret = cache;
	goto again;
}

C
Chris Mason 已提交
239 240 241 242 243 244 245
static u64 div_factor(u64 num, int factor)
{
	num *= factor;
	do_div(num, 10);
	return num;
}

246 247
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
C
Chris Mason 已提交
248
						 *hint, u64 search_start,
249
						 int data, int owner)
C
Chris Mason 已提交
250 251
{
	struct btrfs_block_group_cache *cache[8];
252
	struct btrfs_block_group_cache *found_group = NULL;
C
Chris Mason 已提交
253
	struct btrfs_fs_info *info = root->fs_info;
C
Chris Mason 已提交
254
	struct radix_tree_root *radix;
C
Chris Mason 已提交
255
	struct radix_tree_root *swap_radix;
C
Chris Mason 已提交
256
	u64 used;
257 258
	u64 last = 0;
	u64 hint_last;
C
Chris Mason 已提交
259 260
	int i;
	int ret;
261
	int full_search = 0;
262
	int factor = 8;
C
Chris Mason 已提交
263
	int data_swap = 0;
264 265 266

	if (!owner)
		factor = 5;
C
Chris Mason 已提交
267

C
Chris Mason 已提交
268
	if (data) {
C
Chris Mason 已提交
269
		radix = &info->block_group_data_radix;
C
Chris Mason 已提交
270 271
		swap_radix = &info->block_group_radix;
	} else {
C
Chris Mason 已提交
272
		radix = &info->block_group_radix;
C
Chris Mason 已提交
273 274
		swap_radix = &info->block_group_data_radix;
	}
C
Chris Mason 已提交
275 276 277

	if (search_start) {
		struct btrfs_block_group_cache *shint;
278
		shint = btrfs_lookup_block_group(info, search_start);
C
Chris Mason 已提交
279 280 281
		if (shint->data == data) {
			used = btrfs_block_group_used(&shint->item);
			if (used + shint->pinned <
C
Chris Mason 已提交
282
			    div_factor(shint->key.offset, factor)) {
C
Chris Mason 已提交
283 284 285 286 287
				return shint;
			}
		}
	}
	if (hint && hint->data == data) {
288
		used = btrfs_block_group_used(&hint->item);
C
Chris Mason 已提交
289 290
		if (used + hint->pinned <
		    div_factor(hint->key.offset, factor)) {
291 292
			return hint;
		}
C
Chris Mason 已提交
293
		if (used >= div_factor(hint->key.offset, 8)) {
C
Chris Mason 已提交
294 295 296 297 298
			radix_tree_tag_clear(radix,
					     hint->key.objectid +
					     hint->key.offset - 1,
					     BTRFS_BLOCK_GROUP_AVAIL);
		}
299
		last = hint->key.offset * 3;
C
Chris Mason 已提交
300
		if (hint->key.objectid >= last)
301 302
			last = max(search_start + hint->key.offset - 1,
				   hint->key.objectid - last);
C
Chris Mason 已提交
303 304
		else
			last = hint->key.objectid + hint->key.offset;
305 306
		hint_last = last;
	} else {
307 308 309 310 311 312
		if (hint)
			hint_last = max(hint->key.objectid, search_start);
		else
			hint_last = search_start;

		last = hint_last;
313
	}
C
Chris Mason 已提交
314
	while(1) {
C
Chris Mason 已提交
315
		ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
C
Chris Mason 已提交
316
						 last, ARRAY_SIZE(cache),
317
						 BTRFS_BLOCK_GROUP_AVAIL);
C
Chris Mason 已提交
318 319 320
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
321 322
			last = cache[i]->key.objectid +
				cache[i]->key.offset;
C
Chris Mason 已提交
323
			used = btrfs_block_group_used(&cache[i]->item);
C
Chris Mason 已提交
324
			if (used + cache[i]->pinned <
C
Chris Mason 已提交
325
			    div_factor(cache[i]->key.offset, factor)) {
326 327
				found_group = cache[i];
				goto found;
C
Chris Mason 已提交
328
			}
C
Chris Mason 已提交
329
			if (used >= div_factor(cache[i]->key.offset, 8)) {
C
Chris Mason 已提交
330 331 332 333 334
				radix_tree_tag_clear(radix,
						     cache[i]->key.objectid +
						     cache[i]->key.offset - 1,
						     BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
335
		}
336
		cond_resched();
C
Chris Mason 已提交
337
	}
338 339
	last = hint_last;
again:
C
Chris Mason 已提交
340
	while(1) {
C
Chris Mason 已提交
341 342
		ret = radix_tree_gang_lookup(radix, (void **)cache,
					     last, ARRAY_SIZE(cache));
C
Chris Mason 已提交
343 344 345
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
346 347
			last = cache[i]->key.objectid +
				cache[i]->key.offset;
C
Chris Mason 已提交
348
			used = btrfs_block_group_used(&cache[i]->item);
C
Chris Mason 已提交
349
			if (used + cache[i]->pinned < cache[i]->key.offset) {
350 351
				found_group = cache[i];
				goto found;
C
Chris Mason 已提交
352
			}
C
Chris Mason 已提交
353 354 355 356 357 358
			if (used >= cache[i]->key.offset) {
				radix_tree_tag_clear(radix,
						     cache[i]->key.objectid +
						     cache[i]->key.offset - 1,
						     BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
359
		}
360
		cond_resched();
C
Chris Mason 已提交
361
	}
362
	if (!full_search) {
C
Chris Mason 已提交
363
		last = search_start;
364 365 366
		full_search = 1;
		goto again;
	}
C
Chris Mason 已提交
367 368 369 370 371 372 373 374
	if (!data_swap) {
		struct radix_tree_root *tmp = radix;
		data_swap = 1;
		radix = swap_radix;
		swap_radix = tmp;
		last = search_start;
		goto again;
	}
375
	if (!found_group) {
C
Chris Mason 已提交
376
		ret = radix_tree_gang_lookup(radix,
377
					     (void **)&found_group, 0, 1);
C
Chris Mason 已提交
378 379 380 381 382
		if (ret == 0) {
			ret = radix_tree_gang_lookup(swap_radix,
						     (void **)&found_group,
						     0, 1);
		}
383 384
		BUG_ON(ret != 1);
	}
C
Chris Mason 已提交
385
found:
386
	return found_group;
C
Chris Mason 已提交
387 388
}

C
Chris Mason 已提交
389 390 391
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
				u64 blocknr, u64 num_blocks)
C
Chris Mason 已提交
392
{
393
	struct btrfs_path *path;
C
Chris Mason 已提交
394
	int ret;
C
Chris Mason 已提交
395
	struct btrfs_key key;
C
Chris Mason 已提交
396 397
	struct btrfs_leaf *l;
	struct btrfs_extent_item *item;
C
Chris Mason 已提交
398
	struct btrfs_key ins;
C
Chris Mason 已提交
399
	u32 refs;
C
Chris Mason 已提交
400

401
	find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0,
C
Chris Mason 已提交
402
			 &ins, 0);
403 404
	path = btrfs_alloc_path();
	BUG_ON(!path);
C
Chris Mason 已提交
405 406
	key.objectid = blocknr;
	key.flags = 0;
407
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
408
	key.offset = num_blocks;
409
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
410
				0, 1);
411
	if (ret != 0) {
412
		BUG();
413
	}
C
Chris Mason 已提交
414
	BUG_ON(ret != 0);
415 416
	l = btrfs_buffer_leaf(path->nodes[0]);
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
C
Chris Mason 已提交
417 418
	refs = btrfs_extent_refs(item);
	btrfs_set_extent_refs(item, refs + 1);
419
	btrfs_mark_buffer_dirty(path->nodes[0]);
420

421 422
	btrfs_release_path(root->fs_info->extent_root, path);
	btrfs_free_path(path);
423
	finish_current_insert(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
424
	del_pending_extents(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
425 426 427
	return 0;
}

C
Chris Mason 已提交
428 429 430
static int lookup_extent_ref(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root, u64 blocknr,
			     u64 num_blocks, u32 *refs)
431
{
432
	struct btrfs_path *path;
433
	int ret;
C
Chris Mason 已提交
434
	struct btrfs_key key;
C
Chris Mason 已提交
435 436
	struct btrfs_leaf *l;
	struct btrfs_extent_item *item;
437 438

	path = btrfs_alloc_path();
439
	key.objectid = blocknr;
440
	key.offset = num_blocks;
441 442
	key.flags = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
443
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
444
				0, 0);
445 446
	if (ret != 0)
		BUG();
447 448
	l = btrfs_buffer_leaf(path->nodes[0]);
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
C
Chris Mason 已提交
449
	*refs = btrfs_extent_refs(item);
450 451
	btrfs_release_path(root->fs_info->extent_root, path);
	btrfs_free_path(path);
452 453 454
	return 0;
}

C
Chris Mason 已提交
455 456 457
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root)
{
C
Chris Mason 已提交
458
	return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
C
Chris Mason 已提交
459 460
}

461
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
C
Chris Mason 已提交
462
		  struct buffer_head *buf)
C
Chris Mason 已提交
463 464
{
	u64 blocknr;
C
Chris Mason 已提交
465
	struct btrfs_node *buf_node;
466 467 468
	struct btrfs_leaf *buf_leaf;
	struct btrfs_disk_key *key;
	struct btrfs_file_extent_item *fi;
C
Chris Mason 已提交
469
	int i;
470 471
	int leaf;
	int ret;
472

473
	if (!root->ref_cows)
474
		return 0;
C
Chris Mason 已提交
475
	buf_node = btrfs_buffer_node(buf);
476 477
	leaf = btrfs_is_leaf(buf_node);
	buf_leaf = btrfs_buffer_leaf(buf);
C
Chris Mason 已提交
478
	for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
479
		if (leaf) {
C
Chris Mason 已提交
480
			u64 disk_blocknr;
481 482 483 484 485
			key = &buf_leaf->items[i].key;
			if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
				continue;
			fi = btrfs_item_ptr(buf_leaf, i,
					    struct btrfs_file_extent_item);
486 487 488
			if (btrfs_file_extent_type(fi) ==
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
C
Chris Mason 已提交
489 490 491 492
			disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
			if (disk_blocknr == 0)
				continue;
			ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
493 494 495 496
				    btrfs_file_extent_disk_num_blocks(fi));
			BUG_ON(ret);
		} else {
			blocknr = btrfs_node_blockptr(buf_node, i);
C
Chris Mason 已提交
497
			ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
498 499
			BUG_ON(ret);
		}
C
Chris Mason 已提交
500 501 502 503
	}
	return 0;
}

C
Chris Mason 已提交
504 505 506 507 508 509 510 511 512 513 514
static int write_one_cache_group(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_block_group_cache *cache)
{
	int ret;
	int pending_ret;
	struct btrfs_root *extent_root = root->fs_info->extent_root;
	struct btrfs_block_group_item *bi;
	struct btrfs_key ins;

515
	find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0);
C
Chris Mason 已提交
516 517 518 519 520 521 522 523 524 525 526 527 528 529
	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
	BUG_ON(ret);
	bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
			    struct btrfs_block_group_item);
	memcpy(bi, &cache->item, sizeof(*bi));
	mark_buffer_dirty(path->nodes[0]);
	btrfs_release_path(extent_root, path);

	finish_current_insert(trans, extent_root);
	pending_ret = del_pending_extents(trans, extent_root);
	if (ret)
		return ret;
	if (pending_ret)
		return pending_ret;
C
Chris Mason 已提交
530 531
	if (cache->data)
		cache->last_alloc = cache->first_free;
C
Chris Mason 已提交
532 533 534 535
	return 0;

}

C
Chris Mason 已提交
536 537 538
static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root,
				   struct radix_tree_root *radix)
C
Chris Mason 已提交
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
{
	struct btrfs_block_group_cache *cache[8];
	int ret;
	int err = 0;
	int werr = 0;
	int i;
	struct btrfs_path *path;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
		ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
						 0, ARRAY_SIZE(cache),
						 BTRFS_BLOCK_GROUP_DIRTY);
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			radix_tree_tag_clear(radix, cache[i]->key.objectid +
					     cache[i]->key.offset - 1,
					     BTRFS_BLOCK_GROUP_DIRTY);
			err = write_one_cache_group(trans, root,
						    path, cache[i]);
			if (err)
				werr = err;
		}
	}
	btrfs_free_path(path);
	return werr;
}

C
Chris Mason 已提交
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
{
	int ret;
	int ret2;
	ret = write_dirty_block_radix(trans, root,
				      &root->fs_info->block_group_radix);
	ret2 = write_dirty_block_radix(trans, root,
				      &root->fs_info->block_group_data_radix);
	if (ret)
		return ret;
	if (ret2)
		return ret2;
	return 0;
}

C
Chris Mason 已提交
587 588
static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
C
Chris Mason 已提交
589 590
			      u64 blocknr, u64 num, int alloc, int mark_free,
			      int data)
C
Chris Mason 已提交
591 592 593 594 595 596
{
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
	u64 total = num;
	u64 old_val;
	u64 block_in_group;
597
	u64 i;
C
Chris Mason 已提交
598
	int ret;
C
Chris Mason 已提交
599

C
Chris Mason 已提交
600
	while(total) {
601
		cache = btrfs_lookup_block_group(info, blocknr);
C
Chris Mason 已提交
602
		if (!cache) {
C
Chris Mason 已提交
603
			return -1;
C
Chris Mason 已提交
604
		}
C
Chris Mason 已提交
605 606
		block_in_group = blocknr - cache->key.objectid;
		WARN_ON(block_in_group > cache->key.offset);
C
Chris Mason 已提交
607
		radix_tree_tag_set(cache->radix, cache->key.objectid +
C
Chris Mason 已提交
608
				   cache->key.offset - 1,
C
Chris Mason 已提交
609 610 611 612
				   BTRFS_BLOCK_GROUP_DIRTY);

		old_val = btrfs_block_group_used(&cache->item);
		num = min(total, cache->key.offset - block_in_group);
C
Chris Mason 已提交
613 614 615
		if (alloc) {
			if (blocknr > cache->last_alloc)
				cache->last_alloc = blocknr;
616 617 618 619 620 621
			if (!cache->data) {
				for (i = 0; i < num; i++) {
					clear_radix_bit(&info->extent_map_radix,
						        blocknr + i);
				}
			}
C
Chris Mason 已提交
622
			if (cache->data != data &&
C
Chris Mason 已提交
623
			    old_val < (cache->key.offset >> 1)) {
C
Chris Mason 已提交
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
				cache->data = data;
				radix_tree_delete(cache->radix,
						  cache->key.objectid +
						  cache->key.offset - 1);

				if (data) {
					cache->radix =
						&info->block_group_data_radix;
					cache->item.flags |=
						BTRFS_BLOCK_GROUP_DATA;
				} else {
					cache->radix = &info->block_group_radix;
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_DATA;
				}
				ret = radix_tree_insert(cache->radix,
							cache->key.objectid +
							cache->key.offset - 1,
							(void *)cache);
			}
			old_val += num;
C
Chris Mason 已提交
645
		} else {
C
Chris Mason 已提交
646
			old_val -= num;
C
Chris Mason 已提交
647 648
			if (blocknr < cache->first_free)
				cache->first_free = blocknr;
649 650 651 652 653 654
			if (!cache->data && mark_free) {
				for (i = 0; i < num; i++) {
					set_radix_bit(&info->extent_map_radix,
						      blocknr + i);
				}
			}
C
Chris Mason 已提交
655 656
			if (old_val < (cache->key.offset >> 1) &&
			    old_val + num >= (cache->key.offset >> 1)) {
657 658 659 660 661
				radix_tree_tag_set(cache->radix,
						   cache->key.objectid +
						   cache->key.offset - 1,
						   BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
662
		}
C
Chris Mason 已提交
663
		btrfs_set_block_group_used(&cache->item, old_val);
664 665
		total -= num;
		blocknr += num;
C
Chris Mason 已提交
666 667 668 669
	}
	return 0;
}

C
Chris Mason 已提交
670 671 672 673 674 675 676
static int try_remove_page(struct address_space *mapping, unsigned long index)
{
	int ret;
	ret = invalidate_mapping_pages(mapping, index, index);
	return ret;
}

677 678
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
			       btrfs_root *root)
679
{
C
Chris Mason 已提交
680
	unsigned long gang[8];
C
Chris Mason 已提交
681
	struct inode *btree_inode = root->fs_info->btree_inode;
C
Chris Mason 已提交
682
	struct btrfs_block_group_cache *block_group;
683
	u64 first = 0;
684 685
	int ret;
	int i;
C
Chris Mason 已提交
686
	struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
687
	struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
688 689

	while(1) {
690
		ret = find_first_radix_bit(pinned_radix, gang, 0,
C
Chris Mason 已提交
691
					   ARRAY_SIZE(gang));
692 693
		if (!ret)
			break;
694
		if (!first)
C
Chris Mason 已提交
695
			first = gang[0];
696
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
697
			clear_radix_bit(pinned_radix, gang[i]);
698 699
			block_group = btrfs_lookup_block_group(root->fs_info,
							       gang[i]);
C
Chris Mason 已提交
700 701 702 703 704
			if (block_group) {
				WARN_ON(block_group->pinned == 0);
				block_group->pinned--;
				if (gang[i] < block_group->last_alloc)
					block_group->last_alloc = gang[i];
705 706 707 708
				if (gang[i] < block_group->last_prealloc)
					block_group->last_prealloc = gang[i];
				if (!block_group->data)
					set_radix_bit(extent_radix, gang[i]);
C
Chris Mason 已提交
709
			}
C
Chris Mason 已提交
710 711 712
			try_remove_page(btree_inode->i_mapping,
					gang[i] << (PAGE_CACHE_SHIFT -
						    btree_inode->i_blkbits));
713
		}
714 715 716 717
	}
	return 0;
}

718 719
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root)
C
Chris Mason 已提交
720
{
C
Chris Mason 已提交
721
	struct btrfs_key ins;
C
Chris Mason 已提交
722
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
723 724
	int i;
	int ret;
725 726
	u64 super_blocks_used;
	struct btrfs_fs_info *info = extent_root->fs_info;
C
Chris Mason 已提交
727

C
Chris Mason 已提交
728
	btrfs_set_extent_refs(&extent_item, 1);
C
Chris Mason 已提交
729 730
	ins.offset = 1;
	ins.flags = 0;
731
	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
732
	btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
C
Chris Mason 已提交
733

734 735
	for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
		ins.objectid = extent_root->fs_info->extent_tree_insert[i];
736 737 738
		super_blocks_used = btrfs_super_blocks_used(info->disk_super);
		btrfs_set_super_blocks_used(info->disk_super,
					    super_blocks_used + 1);
739 740
		ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
					sizeof(extent_item));
C
Chris Mason 已提交
741 742
		BUG_ON(ret);
	}
743 744
	extent_root->fs_info->extent_tree_insert_nr = 0;
	extent_root->fs_info->extent_tree_prealloc_nr = 0;
C
Chris Mason 已提交
745 746 747
	return 0;
}

C
Chris Mason 已提交
748
static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
C
Chris Mason 已提交
749 750
{
	int err;
C
Chris Mason 已提交
751
	struct btrfs_header *header;
C
Chris Mason 已提交
752 753
	struct buffer_head *bh;

C
Chris Mason 已提交
754
	if (!pending) {
755
		bh = btrfs_find_tree_block(root, blocknr);
C
Chris Mason 已提交
756 757 758 759 760 761 762 763 764 765
		if (bh) {
			if (buffer_uptodate(bh)) {
				u64 transid =
				    root->fs_info->running_transaction->transid;
				header = btrfs_buffer_header(bh);
				if (btrfs_header_generation(header) ==
				    transid) {
					btrfs_block_release(root, bh);
					return 0;
				}
C
Chris Mason 已提交
766
			}
C
Chris Mason 已提交
767
			btrfs_block_release(root, bh);
C
Chris Mason 已提交
768 769
		}
		err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
C
Chris Mason 已提交
770 771
		if (!err) {
			struct btrfs_block_group_cache *cache;
772 773
			cache = btrfs_lookup_block_group(root->fs_info,
							 blocknr);
C
Chris Mason 已提交
774 775 776
			if (cache)
				cache->pinned++;
		}
C
Chris Mason 已提交
777 778 779
	} else {
		err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
	}
C
Chris Mason 已提交
780
	BUG_ON(err < 0);
C
Chris Mason 已提交
781 782 783
	return 0;
}

784
/*
785
 * remove an extent from the root, returns 0 on success
786
 */
787
static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
788 789
			 *root, u64 blocknr, u64 num_blocks, int pin,
			 int mark_free)
790
{
791
	struct btrfs_path *path;
C
Chris Mason 已提交
792
	struct btrfs_key key;
793 794
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
795
	int ret;
C
Chris Mason 已提交
796
	struct btrfs_extent_item *ei;
C
Chris Mason 已提交
797
	struct btrfs_key ins;
C
Chris Mason 已提交
798
	u32 refs;
C
Chris Mason 已提交
799

800 801
	key.objectid = blocknr;
	key.flags = 0;
802
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
803 804
	key.offset = num_blocks;

805
	find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0);
806 807
	path = btrfs_alloc_path();
	BUG_ON(!path);
808

809
	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
810 811 812
	if (ret) {
		BUG();
	}
813
	ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
C
Chris Mason 已提交
814
			    struct btrfs_extent_item);
815
	BUG_ON(ei->refs == 0);
C
Chris Mason 已提交
816 817
	refs = btrfs_extent_refs(ei) - 1;
	btrfs_set_extent_refs(ei, refs);
818
	btrfs_mark_buffer_dirty(path->nodes[0]);
C
Chris Mason 已提交
819
	if (refs == 0) {
820
		u64 super_blocks_used;
C
Chris Mason 已提交
821 822

		if (pin) {
C
Chris Mason 已提交
823
			ret = pin_down_block(root, blocknr, 0);
C
Chris Mason 已提交
824 825 826
			BUG_ON(ret);
		}

827 828 829
		super_blocks_used = btrfs_super_blocks_used(info->disk_super);
		btrfs_set_super_blocks_used(info->disk_super,
					    super_blocks_used - num_blocks);
830
		ret = btrfs_del_item(trans, extent_root, path);
831 832
		if (ret)
			BUG();
833
		ret = update_block_group(trans, root, blocknr, num_blocks, 0,
C
Chris Mason 已提交
834
					 mark_free, 0);
C
Chris Mason 已提交
835
		BUG_ON(ret);
836
	}
837
	btrfs_free_path(path);
838
	finish_current_insert(trans, extent_root);
839 840 841 842 843 844 845
	return ret;
}

/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
846 847
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root)
848 849
{
	int ret;
C
Chris Mason 已提交
850 851
	int wret;
	int err = 0;
C
Chris Mason 已提交
852
	unsigned long gang[4];
853
	int i;
C
Chris Mason 已提交
854 855
	struct radix_tree_root *pending_radix;
	struct radix_tree_root *pinned_radix;
C
Chris Mason 已提交
856
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
857 858 859

	pending_radix = &extent_root->fs_info->pending_del_radix;
	pinned_radix = &extent_root->fs_info->pinned_radix;
860 861

	while(1) {
862
		ret = find_first_radix_bit(pending_radix, gang, 0,
C
Chris Mason 已提交
863
					   ARRAY_SIZE(gang));
864 865 866
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
867
			wret = set_radix_bit(pinned_radix, gang[i]);
C
Chris Mason 已提交
868
			if (wret == 0) {
869 870
				cache =
				  btrfs_lookup_block_group(extent_root->fs_info,
C
Chris Mason 已提交
871 872 873 874 875 876 877 878 879
							   gang[i]);
				if (cache)
					cache->pinned++;
			}
			if (wret < 0) {
				printk(KERN_CRIT "set_radix_bit, err %d\n",
				       wret);
				BUG_ON(wret < 0);
			}
C
Chris Mason 已提交
880 881
			wret = clear_radix_bit(pending_radix, gang[i]);
			BUG_ON(wret);
882
			wret = __free_extent(trans, extent_root,
883
					     gang[i], 1, 0, 0);
C
Chris Mason 已提交
884 885
			if (wret)
				err = wret;
886 887
		}
	}
C
Chris Mason 已提交
888
	return err;
889 890 891 892 893
}

/*
 * remove an extent from the root, returns 0 on success
 */
894 895
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, u64 blocknr, u64 num_blocks, int pin)
896
{
897
	struct btrfs_root *extent_root = root->fs_info->extent_root;
898 899
	int pending_ret;
	int ret;
900

901
	if (root == extent_root) {
C
Chris Mason 已提交
902
		pin_down_block(root, blocknr, 1);
903 904
		return 0;
	}
905
	ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
C
Chris Mason 已提交
906
	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
907 908 909 910 911 912 913
	return ret ? ret : pending_ret;
}

/*
 * walks the btree of allocated extents and find a hole of a given size.
 * The key ins is changed to record the hole:
 * ins->objectid == block start
914
 * ins->flags = BTRFS_EXTENT_ITEM_KEY
915 916 917
 * ins->offset == number of blocks
 * Any available blocks before search_start are skipped.
 */
918 919
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
			    *orig_root, u64 num_blocks, u64 search_start, u64
920 921
			    search_end, u64 hint_block,
			    struct btrfs_key *ins, int data)
922
{
923
	struct btrfs_path *path;
C
Chris Mason 已提交
924
	struct btrfs_key key;
925 926 927
	int ret;
	u64 hole_size = 0;
	int slot = 0;
C
Chris Mason 已提交
928
	u64 last_block = 0;
C
Chris Mason 已提交
929
	u64 test_block;
C
Chris Mason 已提交
930
	u64 orig_search_start = search_start;
931
	int start_found;
C
Chris Mason 已提交
932
	struct btrfs_leaf *l;
933
	struct btrfs_root * root = orig_root->fs_info->extent_root;
934
	struct btrfs_fs_info *info = root->fs_info;
935
	int total_needed = num_blocks;
936 937
	int total_found = 0;
	int fill_prealloc = 0;
C
Chris Mason 已提交
938
	int level;
C
Chris Mason 已提交
939
	struct btrfs_block_group_cache *block_group;
C
Chris Mason 已提交
940
	int full_scan = 0;
941
	int wrapped = 0;
942
	u64 limit;
943

944 945 946 947
	path = btrfs_alloc_path();
	ins->flags = 0;
	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);

C
Chris Mason 已提交
948
	level = btrfs_header_level(btrfs_buffer_header(root->node));
949 950 951
	if (num_blocks == 0) {
		fill_prealloc = 1;
		num_blocks = 1;
952
		total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
953
	}
C
Chris Mason 已提交
954 955
	if (search_end == (u64)-1)
		search_end = btrfs_super_total_blocks(info->disk_super);
956
	if (hint_block) {
957
		block_group = btrfs_lookup_block_group(info, hint_block);
C
Chris Mason 已提交
958
		block_group = btrfs_find_block_group(root, block_group,
959
						     hint_block, data, 1);
C
Chris Mason 已提交
960 961 962
	} else {
		block_group = btrfs_find_block_group(root,
						     trans->block_group, 0,
963
						     data, 1);
C
Chris Mason 已提交
964 965 966
	}

check_failed:
C
Chris Mason 已提交
967
	if (!block_group->data)
968 969
		search_start = find_search_start(root, &block_group,
						 search_start, total_needed);
970
	else if (!full_scan)
971 972
		search_start = max(block_group->last_alloc, search_start);

973
	btrfs_init_path(path);
974 975 976
	ins->objectid = search_start;
	ins->offset = 0;
	start_found = 0;
977

978
	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
C
Chris Mason 已提交
979 980
	if (ret < 0)
		goto error;
C
Chris Mason 已提交
981

982
	if (path->slots[0] > 0) {
983
		path->slots[0]--;
984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
	}

	l = btrfs_buffer_leaf(path->nodes[0]);
	btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
	/*
	 * a rare case, go back one key if we hit a block group item
	 * instead of an extent item
	 */
	if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
	    key.objectid + key.offset >= search_start) {
		ins->objectid = key.objectid;
		ins->offset = key.offset - 1;
		btrfs_release_path(root, path);
		ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
		if (ret < 0)
			goto error;

		if (path->slots[0] > 0) {
			path->slots[0]--;
		}
	}
1005

1006
	while (1) {
1007 1008
		l = btrfs_buffer_leaf(path->nodes[0]);
		slot = path->slots[0];
1009
		if (slot >= btrfs_header_nritems(&l->header)) {
1010 1011 1012 1013
			if (fill_prealloc) {
				info->extent_tree_prealloc_nr = 0;
				total_found = 0;
			}
1014 1015
			if (start_found)
				limit = last_block +
C
Chris Mason 已提交
1016
					(block_group->key.offset >> 1);
1017 1018
			else
				limit = search_start +
C
Chris Mason 已提交
1019
					(block_group->key.offset >> 1);
1020
			ret = btrfs_next_leaf(root, path);
1021 1022
			if (ret == 0)
				continue;
C
Chris Mason 已提交
1023 1024
			if (ret < 0)
				goto error;
1025 1026
			if (!start_found) {
				ins->objectid = search_start;
C
Chris Mason 已提交
1027
				ins->offset = search_end - search_start;
1028 1029 1030 1031 1032
				start_found = 1;
				goto check_pending;
			}
			ins->objectid = last_block > search_start ?
					last_block : search_start;
C
Chris Mason 已提交
1033
			ins->offset = search_end - ins->objectid;
1034 1035
			goto check_pending;
		}
1036

C
Chris Mason 已提交
1037
		btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1038 1039 1040 1041 1042 1043 1044 1045 1046
		if (key.objectid >= search_start && key.objectid > last_block &&
		    start_found) {
			if (last_block < search_start)
				last_block = search_start;
			hole_size = key.objectid - last_block;
			if (hole_size >= num_blocks) {
				ins->objectid = last_block;
				ins->offset = hole_size;
				goto check_pending;
1047
			}
1048
		}
1049 1050 1051 1052

		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
			goto next;

1053
		start_found = 1;
C
Chris Mason 已提交
1054
		last_block = key.objectid + key.offset;
1055
		if (!full_scan && last_block >= block_group->key.objectid +
C
Chris Mason 已提交
1056 1057 1058 1059 1060 1061
		    block_group->key.offset) {
			btrfs_release_path(root, path);
			search_start = block_group->key.objectid +
				block_group->key.offset * 2;
			goto new_group;
		}
C
Chris Mason 已提交
1062
next:
1063
		path->slots[0]++;
1064
		cond_resched();
1065 1066 1067 1068 1069 1070
	}
	// FIXME -ENOSPC
check_pending:
	/* we have to make sure we didn't find an extent that has already
	 * been allocated by the map tree or the original allocation
	 */
1071
	btrfs_release_path(root, path);
1072
	BUG_ON(ins->objectid < search_start);
1073

C
Chris Mason 已提交
1074
	if (ins->objectid + num_blocks >= search_end) {
1075 1076 1077 1078
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
C
Chris Mason 已提交
1079
		search_start = orig_search_start;
1080 1081 1082 1083
		if (wrapped)
			full_scan = 1;
		else
			wrapped = 1;
C
Chris Mason 已提交
1084
		goto new_group;
1085
	}
C
Chris Mason 已提交
1086
	for (test_block = ins->objectid;
1087 1088
	     test_block < ins->objectid + num_blocks; test_block++) {
		if (test_radix_bit(&info->pinned_radix, test_block)) {
C
Chris Mason 已提交
1089
			search_start = test_block + 1;
C
Chris Mason 已提交
1090
			goto new_group;
1091 1092
		}
	}
1093 1094 1095 1096 1097 1098 1099
	if (!fill_prealloc && info->extent_tree_insert_nr) {
		u64 last =
		  info->extent_tree_insert[info->extent_tree_insert_nr - 1];
		if (ins->objectid + num_blocks >
		    info->extent_tree_insert[0] &&
		    ins->objectid <= last) {
			search_start = last + 1;
1100
			WARN_ON(!full_scan);
C
Chris Mason 已提交
1101
			goto new_group;
1102 1103 1104 1105 1106 1107 1108 1109
		}
	}
	if (!fill_prealloc && info->extent_tree_prealloc_nr) {
		u64 first =
		  info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
		if (ins->objectid + num_blocks > first &&
		    ins->objectid <= info->extent_tree_prealloc[0]) {
			search_start = info->extent_tree_prealloc[0] + 1;
1110
			WARN_ON(!full_scan);
C
Chris Mason 已提交
1111
			goto new_group;
1112 1113 1114 1115 1116
		}
	}
	if (fill_prealloc) {
		int nr;
		test_block = ins->objectid;
1117 1118 1119 1120 1121
		if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
		    leaf_range(root)) {
			total_found = 0;
			info->extent_tree_prealloc_nr = total_found;
		}
1122 1123 1124 1125
		while(test_block < ins->objectid + ins->offset &&
		      total_found < total_needed) {
			nr = total_needed - total_found - 1;
			BUG_ON(nr < 0);
C
Chris Mason 已提交
1126
			info->extent_tree_prealloc[nr] = test_block;
1127 1128 1129 1130 1131
			total_found++;
			test_block++;
		}
		if (total_found < total_needed) {
			search_start = test_block;
C
Chris Mason 已提交
1132
			goto new_group;
1133
		}
C
Chris Mason 已提交
1134 1135
		info->extent_tree_prealloc_nr = total_found;
	}
1136
	if (!data) {
1137
		block_group = btrfs_lookup_block_group(info, ins->objectid);
1138 1139 1140 1141 1142 1143 1144
		if (block_group) {
			if (fill_prealloc)
				block_group->last_prealloc =
				     info->extent_tree_prealloc[total_needed-1];
			else
				trans->block_group = block_group;
		}
1145
	}
C
Chris Mason 已提交
1146
	ins->offset = num_blocks;
1147
	btrfs_free_path(path);
1148
	return 0;
C
Chris Mason 已提交
1149 1150

new_group:
C
Chris Mason 已提交
1151
	if (search_start + num_blocks >= search_end) {
C
Chris Mason 已提交
1152
		search_start = orig_search_start;
1153 1154 1155 1156 1157 1158 1159 1160
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
		if (wrapped)
			full_scan = 1;
		else
			wrapped = 1;
C
Chris Mason 已提交
1161
	}
1162
	block_group = btrfs_lookup_block_group(info, search_start);
1163
	cond_resched();
C
Chris Mason 已提交
1164 1165
	if (!full_scan)
		block_group = btrfs_find_block_group(root, block_group,
1166
						     search_start, data, 0);
C
Chris Mason 已提交
1167 1168
	goto check_failed;

C
Chris Mason 已提交
1169
error:
1170 1171
	btrfs_release_path(root, path);
	btrfs_free_path(path);
C
Chris Mason 已提交
1172
	return ret;
1173 1174 1175 1176 1177 1178 1179 1180
}
/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns 0 if everything worked, non-zero otherwise.
 */
1181 1182
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, u64 owner,
1183
		       u64 num_blocks, u64 hint_block,
C
Chris Mason 已提交
1184
		       u64 search_end, struct btrfs_key *ins, int data)
1185 1186 1187
{
	int ret;
	int pending_ret;
1188
	u64 super_blocks_used;
1189
	u64 search_start = 0;
1190 1191
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
C
Chris Mason 已提交
1192
	struct btrfs_extent_item extent_item;
1193
	struct btrfs_key prealloc_key;
C
Chris Mason 已提交
1194

C
Chris Mason 已提交
1195
	btrfs_set_extent_refs(&extent_item, 1);
1196
	btrfs_set_extent_owner(&extent_item, owner);
1197

C
Chris Mason 已提交
1198
	if (root == extent_root) {
1199 1200
		int nr;
		BUG_ON(info->extent_tree_prealloc_nr == 0);
C
Chris Mason 已提交
1201 1202
		BUG_ON(num_blocks != 1);
		ins->offset = 1;
1203 1204 1205 1206 1207
		info->extent_tree_prealloc_nr--;
		nr = info->extent_tree_prealloc_nr;
		ins->objectid = info->extent_tree_prealloc[nr];
		info->extent_tree_insert[info->extent_tree_insert_nr++] =
			ins->objectid;
C
Chris Mason 已提交
1208
		ret = update_block_group(trans, root,
C
Chris Mason 已提交
1209
					 ins->objectid, ins->offset, 1, 0, 0);
C
Chris Mason 已提交
1210
		BUG_ON(ret);
1211 1212
		return 0;
	}
1213 1214 1215 1216 1217 1218 1219

	/*
	 * if we're doing a data allocation, preallocate room in the
	 * extent tree first.  This way the extent tree blocks end up
	 * in the correct block group.
	 */
	if (data) {
1220
		ret = find_free_extent(trans, root, 0, 0,
1221
				       search_end, 0, &prealloc_key, 0);
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231
		if (ret) {
			return ret;
		}
		if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
			int nr = info->extent_tree_prealloc_nr;
			search_end = info->extent_tree_prealloc[nr - 1] - 1;
		} else {
			search_start = info->extent_tree_prealloc[0] + 1;
		}
	}
1232 1233
	if (hint_block < search_start)
		hint_block = search_start;
1234
	/* do the real allocation */
1235
	ret = find_free_extent(trans, root, num_blocks, search_start,
1236
			       search_end, hint_block, ins, data);
1237
	if (ret) {
C
Chris Mason 已提交
1238
		return ret;
1239
	}
1240

1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254
	/*
	 * if we're doing a metadata allocation, preallocate space in the
	 * extent tree second.  This way, we don't create a tiny hole
	 * in the allocation map between any unused preallocation blocks
	 * and the metadata block we're actually allocating.  On disk,
	 * it'll go:
	 * [block we've allocated], [used prealloc 1], [ unused prealloc ]
	 * The unused prealloc will get reused the next time around.
	 */
	if (!data) {
		if (ins->objectid + ins->offset >= search_end)
			search_end = ins->objectid - 1;
		else
			search_start = ins->objectid + ins->offset;
C
Chris Mason 已提交
1255

1256 1257 1258
		if (hint_block < search_start)
			hint_block = search_start;

1259
		ret = find_free_extent(trans, root, 0, search_start,
1260 1261
				       search_end, hint_block,
				       &prealloc_key, 0);
1262 1263 1264 1265
		if (ret) {
			return ret;
		}
	}
1266

1267 1268 1269
	super_blocks_used = btrfs_super_blocks_used(info->disk_super);
	btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
				    num_blocks);
1270 1271
	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
				sizeof(extent_item));
C
Chris Mason 已提交
1272

1273
	finish_current_insert(trans, extent_root);
C
Chris Mason 已提交
1274
	pending_ret = del_pending_extents(trans, extent_root);
1275
	if (ret) {
C
Chris Mason 已提交
1276
		return ret;
1277 1278
	}
	if (pending_ret) {
C
Chris Mason 已提交
1279
		return pending_ret;
1280
	}
C
Chris Mason 已提交
1281 1282
	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
				 data);
C
Chris Mason 已提交
1283
	BUG_ON(ret);
C
Chris Mason 已提交
1284
	return 0;
1285 1286 1287 1288 1289 1290
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
C
Chris Mason 已提交
1291
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1292
					   struct btrfs_root *root, u64 hint)
1293
{
C
Chris Mason 已提交
1294
	struct btrfs_key ins;
1295
	int ret;
C
Chris Mason 已提交
1296
	struct buffer_head *buf;
1297

1298
	ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1299
				 1, hint, (unsigned long)-1, &ins, 0);
1300 1301 1302 1303
	if (ret) {
		BUG();
		return NULL;
	}
C
Chris Mason 已提交
1304
	BUG_ON(ret);
1305
	buf = btrfs_find_create_tree_block(root, ins.objectid);
1306
	set_buffer_uptodate(buf);
C
Chris Mason 已提交
1307
	set_buffer_checked(buf);
1308
	set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1309 1310
	return buf;
}
1311

1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325
static int drop_leaf_ref(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root, struct buffer_head *cur)
{
	struct btrfs_disk_key *key;
	struct btrfs_leaf *leaf;
	struct btrfs_file_extent_item *fi;
	int i;
	int nritems;
	int ret;

	BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
	leaf = btrfs_buffer_leaf(cur);
	nritems = btrfs_header_nritems(&leaf->header);
	for (i = 0; i < nritems; i++) {
C
Chris Mason 已提交
1326
		u64 disk_blocknr;
1327 1328 1329 1330
		key = &leaf->items[i].key;
		if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
			continue;
		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1331 1332
		if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
			continue;
1333 1334 1335 1336
		/*
		 * FIXME make sure to insert a trans record that
		 * repeats the snapshot del on crash
		 */
C
Chris Mason 已提交
1337 1338 1339 1340
		disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
		if (disk_blocknr == 0)
			continue;
		ret = btrfs_free_extent(trans, root, disk_blocknr,
1341 1342 1343 1344 1345 1346 1347
					btrfs_file_extent_disk_num_blocks(fi),
					0);
		BUG_ON(ret);
	}
	return 0;
}

C
Chris Mason 已提交
1348 1349 1350 1351
/*
 * helper function for drop_snapshot, this walks down the tree dropping ref
 * counts as it goes.
 */
1352 1353
static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			  *root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1354
{
C
Chris Mason 已提交
1355 1356
	struct buffer_head *next;
	struct buffer_head *cur;
C
Chris Mason 已提交
1357 1358 1359 1360
	u64 blocknr;
	int ret;
	u32 refs;

1361 1362
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1363
	ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1364
			       1, &refs);
C
Chris Mason 已提交
1365 1366 1367
	BUG_ON(ret);
	if (refs > 1)
		goto out;
C
Chris Mason 已提交
1368 1369 1370
	/*
	 * walk down to the last node level and free all the leaves
	 */
1371
	while(*level >= 0) {
1372 1373
		WARN_ON(*level < 0);
		WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1374
		cur = path->nodes[*level];
C
Chris Mason 已提交
1375 1376
		if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
			WARN_ON(1);
1377
		if (path->slots[*level] >=
C
Chris Mason 已提交
1378
		    btrfs_header_nritems(btrfs_buffer_header(cur)))
C
Chris Mason 已提交
1379
			break;
1380 1381 1382 1383 1384
		if (*level == 0) {
			ret = drop_leaf_ref(trans, root, cur);
			BUG_ON(ret);
			break;
		}
C
Chris Mason 已提交
1385 1386
		blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
					      path->slots[*level]);
C
Chris Mason 已提交
1387
		ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1388 1389
		BUG_ON(ret);
		if (refs != 1) {
C
Chris Mason 已提交
1390
			path->slots[*level]++;
1391
			ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
C
Chris Mason 已提交
1392 1393 1394 1395
			BUG_ON(ret);
			continue;
		}
		next = read_tree_block(root, blocknr);
1396
		WARN_ON(*level <= 0);
C
Chris Mason 已提交
1397
		if (path->nodes[*level-1])
C
Chris Mason 已提交
1398
			btrfs_block_release(root, path->nodes[*level-1]);
C
Chris Mason 已提交
1399
		path->nodes[*level-1] = next;
C
Chris Mason 已提交
1400
		*level = btrfs_header_level(btrfs_buffer_header(next));
C
Chris Mason 已提交
1401 1402 1403
		path->slots[*level] = 0;
	}
out:
1404 1405
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1406
	ret = btrfs_free_extent(trans, root,
1407
				bh_blocknr(path->nodes[*level]), 1, 1);
C
Chris Mason 已提交
1408
	btrfs_block_release(root, path->nodes[*level]);
C
Chris Mason 已提交
1409 1410 1411 1412 1413 1414
	path->nodes[*level] = NULL;
	*level += 1;
	BUG_ON(ret);
	return 0;
}

C
Chris Mason 已提交
1415 1416 1417 1418 1419
/*
 * helper for dropping snapshots.  This walks back up the tree in the path
 * to find the first node higher up where we haven't yet gone through
 * all the slots
 */
1420 1421
static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			*root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1422 1423 1424 1425
{
	int i;
	int slot;
	int ret;
C
Chris Mason 已提交
1426
	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
C
Chris Mason 已提交
1427
		slot = path->slots[i];
C
Chris Mason 已提交
1428 1429
		if (slot < btrfs_header_nritems(
		    btrfs_buffer_header(path->nodes[i])) - 1) {
C
Chris Mason 已提交
1430 1431 1432 1433
			path->slots[i]++;
			*level = i;
			return 0;
		} else {
1434
			ret = btrfs_free_extent(trans, root,
1435
						bh_blocknr(path->nodes[*level]),
1436
						1, 1);
1437
			BUG_ON(ret);
C
Chris Mason 已提交
1438
			btrfs_block_release(root, path->nodes[*level]);
C
Chris Mason 已提交
1439
			path->nodes[*level] = NULL;
C
Chris Mason 已提交
1440 1441 1442 1443 1444 1445
			*level = i + 1;
		}
	}
	return 1;
}

C
Chris Mason 已提交
1446 1447 1448 1449 1450
/*
 * drop the reference count on the tree rooted at 'snap'.  This traverses
 * the tree freeing any blocks that have a ref count of zero after being
 * decremented.
 */
1451
int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
C
Chris Mason 已提交
1452
			*root, struct buffer_head *snap)
C
Chris Mason 已提交
1453
{
1454
	int ret = 0;
C
Chris Mason 已提交
1455
	int wret;
C
Chris Mason 已提交
1456
	int level;
1457
	struct btrfs_path *path;
C
Chris Mason 已提交
1458 1459 1460
	int i;
	int orig_level;

1461 1462
	path = btrfs_alloc_path();
	BUG_ON(!path);
C
Chris Mason 已提交
1463

C
Chris Mason 已提交
1464
	level = btrfs_header_level(btrfs_buffer_header(snap));
C
Chris Mason 已提交
1465
	orig_level = level;
1466 1467
	path->nodes[level] = snap;
	path->slots[level] = 0;
C
Chris Mason 已提交
1468
	while(1) {
1469
		wret = walk_down_tree(trans, root, path, &level);
C
Chris Mason 已提交
1470
		if (wret > 0)
C
Chris Mason 已提交
1471
			break;
C
Chris Mason 已提交
1472 1473 1474
		if (wret < 0)
			ret = wret;

1475
		wret = walk_up_tree(trans, root, path, &level);
C
Chris Mason 已提交
1476
		if (wret > 0)
C
Chris Mason 已提交
1477
			break;
C
Chris Mason 已提交
1478 1479
		if (wret < 0)
			ret = wret;
C
Chris Mason 已提交
1480
	}
C
Chris Mason 已提交
1481
	for (i = 0; i <= orig_level; i++) {
1482 1483
		if (path->nodes[i]) {
			btrfs_block_release(root, path->nodes[i]);
C
Chris Mason 已提交
1484
		}
C
Chris Mason 已提交
1485
	}
1486
	btrfs_free_path(path);
C
Chris Mason 已提交
1487
	return ret;
C
Chris Mason 已提交
1488
}
C
Chris Mason 已提交
1489

C
Chris Mason 已提交
1490
static int free_block_group_radix(struct radix_tree_root *radix)
C
Chris Mason 已提交
1491 1492 1493 1494 1495 1496
{
	int ret;
	struct btrfs_block_group_cache *cache[8];
	int i;

	while(1) {
C
Chris Mason 已提交
1497
		ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
C
Chris Mason 已提交
1498 1499 1500 1501
					     ARRAY_SIZE(cache));
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
1502
			radix_tree_delete(radix, cache[i]->key.objectid +
C
Chris Mason 已提交
1503 1504 1505 1506 1507 1508 1509
					  cache[i]->key.offset - 1);
			kfree(cache[i]);
		}
	}
	return 0;
}

C
Chris Mason 已提交
1510 1511 1512 1513
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
	int ret;
	int ret2;
1514 1515
	unsigned long gang[16];
	int i;
C
Chris Mason 已提交
1516 1517 1518 1519 1520 1521 1522

	ret = free_block_group_radix(&info->block_group_radix);
	ret2 = free_block_group_radix(&info->block_group_data_radix);
	if (ret)
		return ret;
	if (ret2)
		return ret2;
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532

	while(1) {
		ret = find_first_radix_bit(&info->extent_map_radix,
					   gang, 0, ARRAY_SIZE(gang));
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			clear_radix_bit(&info->extent_map_radix, gang[i]);
		}
	}
C
Chris Mason 已提交
1533 1534 1535
	return 0;
}

C
Chris Mason 已提交
1536 1537 1538 1539 1540 1541 1542
int btrfs_read_block_groups(struct btrfs_root *root)
{
	struct btrfs_path *path;
	int ret;
	int err = 0;
	struct btrfs_block_group_item *bi;
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
1543 1544
	struct btrfs_fs_info *info = root->fs_info;
	struct radix_tree_root *radix;
C
Chris Mason 已提交
1545 1546 1547
	struct btrfs_key key;
	struct btrfs_key found_key;
	struct btrfs_leaf *leaf;
C
Chris Mason 已提交
1548
	u64 group_size_blocks;
1549
	u64 used;
C
Chris Mason 已提交
1550

C
Chris Mason 已提交
1551 1552
	group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
		root->fs_info->sb->s_blocksize_bits;
C
Chris Mason 已提交
1553
	root = info->extent_root;
C
Chris Mason 已提交
1554 1555 1556 1557 1558 1559 1560 1561 1562 1563
	key.objectid = 0;
	key.offset = group_size_blocks;
	key.flags = 0;
	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
C
Chris Mason 已提交
1564
		ret = btrfs_search_slot(NULL, info->extent_root,
C
Chris Mason 已提交
1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577
					&key, path, 0, 0);
		if (ret != 0) {
			err = ret;
			break;
		}
		leaf = btrfs_buffer_leaf(path->nodes[0]);
		btrfs_disk_key_to_cpu(&found_key,
				      &leaf->items[path->slots[0]].key);
		cache = kmalloc(sizeof(*cache), GFP_NOFS);
		if (!cache) {
			err = -1;
			break;
		}
C
Chris Mason 已提交
1578

C
Chris Mason 已提交
1579 1580 1581
		bi = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_block_group_item);
		if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
C
Chris Mason 已提交
1582
			radix = &info->block_group_data_radix;
C
Chris Mason 已提交
1583 1584
			cache->data = 1;
		} else {
C
Chris Mason 已提交
1585
			radix = &info->block_group_radix;
C
Chris Mason 已提交
1586 1587
			cache->data = 0;
		}
C
Chris Mason 已提交
1588

C
Chris Mason 已提交
1589 1590
		memcpy(&cache->item, bi, sizeof(*bi));
		memcpy(&cache->key, &found_key, sizeof(found_key));
1591 1592
		cache->last_alloc = cache->key.objectid;
		cache->first_free = cache->key.objectid;
1593
		cache->last_prealloc = cache->key.objectid;
C
Chris Mason 已提交
1594
		cache->pinned = 0;
1595 1596
		cache->cached = 0;

C
Chris Mason 已提交
1597 1598
		cache->radix = radix;

C
Chris Mason 已提交
1599 1600
		key.objectid = found_key.objectid + found_key.offset;
		btrfs_release_path(root, path);
C
Chris Mason 已提交
1601
		ret = radix_tree_insert(radix, found_key.objectid +
C
Chris Mason 已提交
1602 1603 1604
					found_key.offset - 1,
					(void *)cache);
		BUG_ON(ret);
1605
		used = btrfs_block_group_used(bi);
C
Chris Mason 已提交
1606
		if (used < div_factor(key.offset, 8)) {
C
Chris Mason 已提交
1607
			radix_tree_tag_set(radix, found_key.objectid +
1608 1609 1610
					   found_key.offset - 1,
					   BTRFS_BLOCK_GROUP_AVAIL);
		}
C
Chris Mason 已提交
1611
		if (key.objectid >=
C
Chris Mason 已提交
1612
		    btrfs_super_total_blocks(info->disk_super))
C
Chris Mason 已提交
1613 1614 1615 1616 1617 1618
			break;
	}

	btrfs_free_path(path);
	return 0;
}