extent-tree.c 41.3 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

19
#include <linux/module.h>
20 21 22
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
23
#include "transaction.h"
24

25
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26 27 28
			    *orig_root, u64 num_blocks, u64 search_start,
			    u64 search_end, u64 hint_block,
			    struct btrfs_key *ins, int data);
29 30
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root);
C
Chris Mason 已提交
31 32
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root);
33

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
static void reada_extent_leaves(struct btrfs_root *root,
				struct btrfs_path *path, u64 limit)
{
	struct btrfs_node *node;
	int i;
	int nritems;
	u64 item_objectid;
	u64 blocknr;
	int slot;
	int ret;

	if (!path->nodes[1])
		return;
	node = btrfs_buffer_node(path->nodes[1]);
	slot = path->slots[1] + 1;
	nritems = btrfs_header_nritems(&node->header);
	for (i = slot; i < nritems && i < slot + 8; i++) {
		item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
		if (item_objectid > limit)
			break;
		blocknr = btrfs_node_blockptr(node, i);
		ret = readahead_tree_block(root, blocknr);
		if (ret)
			break;
	}
}

61 62 63 64 65 66 67 68 69 70 71 72
static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
	struct btrfs_path *path;
	int ret;
	struct btrfs_key key;
	struct btrfs_leaf *leaf;
	struct radix_tree_root *extent_radix;
	int slot;
	u64 i;
	u64 last = 0;
	u64 hole_size;
73
	u64 limit;
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
	int found = 0;

	root = root->fs_info->extent_root;
	extent_radix = &root->fs_info->extent_map_radix;

	if (block_group->cached)
		return 0;
	if (block_group->data)
		return 0;
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	key.objectid = block_group->key.objectid;
	key.flags = 0;
	key.offset = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		return ret;
	if (ret && path->slots[0] > 0)
		path->slots[0]--;
95 96
	limit = block_group->key.objectid + block_group->key.offset;
	reada_extent_leaves(root, path, limit);
97 98 99 100
	while(1) {
		leaf = btrfs_buffer_leaf(path->nodes[0]);
		slot = path->slots[0];
		if (slot >= btrfs_header_nritems(&leaf->header)) {
101
			reada_extent_leaves(root, path, limit);
102
			ret = btrfs_next_leaf(root, path);
103
			if (ret == 0) {
104
				continue;
105
			} else {
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
				if (found) {
					hole_size = block_group->key.objectid +
						block_group->key.offset - last;
				} else {
					last = block_group->key.objectid;
					hole_size = block_group->key.offset;
				}
				for (i = 0; i < hole_size; i++) {
					set_radix_bit(extent_radix,
						      last + i);
				}
				break;
			}
		}
		btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
		if (key.objectid >= block_group->key.objectid +
		    block_group->key.offset) {
			if (found) {
				hole_size = block_group->key.objectid +
					block_group->key.offset - last;
			} else {
				last = block_group->key.objectid;
				hole_size = block_group->key.offset;
			}
			for (i = 0; i < hole_size; i++) {
				set_radix_bit(extent_radix, last + i);
			}
			break;
		}
		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			if (!found) {
				last = key.objectid + key.offset;
				found = 1;
			} else {
				hole_size = key.objectid - last;
				for (i = 0; i < hole_size; i++) {
					set_radix_bit(extent_radix, last + i);
				}
				last = key.objectid + key.offset;
			}
		}
		path->slots[0]++;
	}

	block_group->cached = 1;
	btrfs_free_path(path);
	return 0;
}

155 156 157
struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
							 btrfs_fs_info *info,
							 u64 blocknr)
C
Chris Mason 已提交
158 159 160 161 162 163 164 165
{
	struct btrfs_block_group_cache *block_group;
	int ret;

	ret = radix_tree_gang_lookup(&info->block_group_radix,
				     (void **)&block_group,
				     blocknr, 1);
	if (ret) {
C
Chris Mason 已提交
166
		if (block_group->key.objectid <= blocknr && blocknr <=
C
Chris Mason 已提交
167 168 169 170 171 172 173
		    block_group->key.objectid + block_group->key.offset)
			return block_group;
	}
	ret = radix_tree_gang_lookup(&info->block_group_data_radix,
				     (void **)&block_group,
				     blocknr, 1);
	if (ret) {
C
Chris Mason 已提交
174
		if (block_group->key.objectid <= blocknr && blocknr <=
C
Chris Mason 已提交
175 176 177 178 179 180
		    block_group->key.objectid + block_group->key.offset)
			return block_group;
	}
	return NULL;
}

181 182 183
static u64 leaf_range(struct btrfs_root *root)
{
	u64 size = BTRFS_LEAF_DATA_SIZE(root);
C
Chris Mason 已提交
184 185
	do_div(size, sizeof(struct btrfs_extent_item) +
		sizeof(struct btrfs_item));
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
	return size;
}

static u64 find_search_start(struct btrfs_root *root,
			     struct btrfs_block_group_cache **cache_ret,
			     u64 search_start, int num)
{
	unsigned long gang[8];
	int ret;
	struct btrfs_block_group_cache *cache = *cache_ret;
	u64 last = max(search_start, cache->key.objectid);

	if (cache->data)
		goto out;
	if (num > 1) {
		last = max(last, cache->last_prealloc);
	}
again:
	cache_block_group(root, cache);
	while(1) {
		ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
					   gang, last, ARRAY_SIZE(gang));
		if (!ret)
			goto out;
		last = gang[ret-1] + 1;
		if (num > 1) {
			if (ret != ARRAY_SIZE(gang)) {
				goto new_group;
			}
			if (gang[ret-1] - gang[0] > leaf_range(root)) {
				continue;
			}
		}
		if (gang[0] >= cache->key.objectid + cache->key.offset) {
			goto new_group;
		}
		return gang[0];
	}
out:
	return max(cache->last_alloc, search_start);

new_group:
228 229
	cache = btrfs_lookup_block_group(root->fs_info,
					 last + cache->key.offset - 1);
230 231 232 233
	if (!cache) {
		return max((*cache_ret)->last_alloc, search_start);
	}
	cache = btrfs_find_block_group(root, cache,
234
				       last + cache->key.offset - 1, 0, 0);
235 236 237 238
	*cache_ret = cache;
	goto again;
}

C
Chris Mason 已提交
239 240 241 242 243 244 245
static u64 div_factor(u64 num, int factor)
{
	num *= factor;
	do_div(num, 10);
	return num;
}

246 247
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
C
Chris Mason 已提交
248
						 *hint, u64 search_start,
249
						 int data, int owner)
C
Chris Mason 已提交
250 251
{
	struct btrfs_block_group_cache *cache[8];
252
	struct btrfs_block_group_cache *found_group = NULL;
C
Chris Mason 已提交
253
	struct btrfs_fs_info *info = root->fs_info;
C
Chris Mason 已提交
254
	struct radix_tree_root *radix;
C
Chris Mason 已提交
255
	struct radix_tree_root *swap_radix;
C
Chris Mason 已提交
256
	u64 used;
257 258
	u64 last = 0;
	u64 hint_last;
C
Chris Mason 已提交
259 260
	int i;
	int ret;
261
	int full_search = 0;
262
	int factor = 8;
C
Chris Mason 已提交
263
	int data_swap = 0;
264 265 266

	if (!owner)
		factor = 5;
C
Chris Mason 已提交
267

C
Chris Mason 已提交
268
	if (data) {
C
Chris Mason 已提交
269
		radix = &info->block_group_data_radix;
C
Chris Mason 已提交
270 271
		swap_radix = &info->block_group_radix;
	} else {
C
Chris Mason 已提交
272
		radix = &info->block_group_radix;
C
Chris Mason 已提交
273 274
		swap_radix = &info->block_group_data_radix;
	}
C
Chris Mason 已提交
275 276 277

	if (search_start) {
		struct btrfs_block_group_cache *shint;
278
		shint = btrfs_lookup_block_group(info, search_start);
C
Chris Mason 已提交
279 280 281
		if (shint->data == data) {
			used = btrfs_block_group_used(&shint->item);
			if (used + shint->pinned <
C
Chris Mason 已提交
282
			    div_factor(shint->key.offset, factor)) {
C
Chris Mason 已提交
283 284 285 286 287
				return shint;
			}
		}
	}
	if (hint && hint->data == data) {
288
		used = btrfs_block_group_used(&hint->item);
C
Chris Mason 已提交
289 290
		if (used + hint->pinned <
		    div_factor(hint->key.offset, factor)) {
291 292
			return hint;
		}
C
Chris Mason 已提交
293
		if (used >= div_factor(hint->key.offset, 8)) {
C
Chris Mason 已提交
294 295 296 297 298
			radix_tree_tag_clear(radix,
					     hint->key.objectid +
					     hint->key.offset - 1,
					     BTRFS_BLOCK_GROUP_AVAIL);
		}
299
		last = hint->key.offset * 3;
C
Chris Mason 已提交
300
		if (hint->key.objectid >= last)
301 302
			last = max(search_start + hint->key.offset - 1,
				   hint->key.objectid - last);
C
Chris Mason 已提交
303 304
		else
			last = hint->key.objectid + hint->key.offset;
305 306
		hint_last = last;
	} else {
307 308 309 310 311 312
		if (hint)
			hint_last = max(hint->key.objectid, search_start);
		else
			hint_last = search_start;

		last = hint_last;
313
	}
C
Chris Mason 已提交
314
	while(1) {
C
Chris Mason 已提交
315
		ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
C
Chris Mason 已提交
316
						 last, ARRAY_SIZE(cache),
317
						 BTRFS_BLOCK_GROUP_AVAIL);
C
Chris Mason 已提交
318 319 320
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
321 322
			last = cache[i]->key.objectid +
				cache[i]->key.offset;
C
Chris Mason 已提交
323
			used = btrfs_block_group_used(&cache[i]->item);
C
Chris Mason 已提交
324
			if (used + cache[i]->pinned <
C
Chris Mason 已提交
325
			    div_factor(cache[i]->key.offset, factor)) {
326 327
				found_group = cache[i];
				goto found;
C
Chris Mason 已提交
328
			}
C
Chris Mason 已提交
329
			if (used >= div_factor(cache[i]->key.offset, 8)) {
C
Chris Mason 已提交
330 331 332 333 334
				radix_tree_tag_clear(radix,
						     cache[i]->key.objectid +
						     cache[i]->key.offset - 1,
						     BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
335
		}
336
		cond_resched();
C
Chris Mason 已提交
337
	}
338 339
	last = hint_last;
again:
C
Chris Mason 已提交
340
	while(1) {
C
Chris Mason 已提交
341 342
		ret = radix_tree_gang_lookup(radix, (void **)cache,
					     last, ARRAY_SIZE(cache));
C
Chris Mason 已提交
343 344 345
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
346 347
			last = cache[i]->key.objectid +
				cache[i]->key.offset;
C
Chris Mason 已提交
348
			used = btrfs_block_group_used(&cache[i]->item);
C
Chris Mason 已提交
349
			if (used + cache[i]->pinned < cache[i]->key.offset) {
350 351
				found_group = cache[i];
				goto found;
C
Chris Mason 已提交
352
			}
C
Chris Mason 已提交
353 354 355 356 357 358
			if (used >= cache[i]->key.offset) {
				radix_tree_tag_clear(radix,
						     cache[i]->key.objectid +
						     cache[i]->key.offset - 1,
						     BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
359
		}
360
		cond_resched();
C
Chris Mason 已提交
361
	}
362
	if (!full_search) {
C
Chris Mason 已提交
363
		last = search_start;
364 365 366
		full_search = 1;
		goto again;
	}
C
Chris Mason 已提交
367 368 369 370 371 372 373 374
	if (!data_swap) {
		struct radix_tree_root *tmp = radix;
		data_swap = 1;
		radix = swap_radix;
		swap_radix = tmp;
		last = search_start;
		goto again;
	}
375
	if (!found_group) {
C
Chris Mason 已提交
376
		ret = radix_tree_gang_lookup(radix,
377
					     (void **)&found_group, 0, 1);
C
Chris Mason 已提交
378 379 380 381 382
		if (ret == 0) {
			ret = radix_tree_gang_lookup(swap_radix,
						     (void **)&found_group,
						     0, 1);
		}
383 384
		BUG_ON(ret != 1);
	}
C
Chris Mason 已提交
385
found:
386
	return found_group;
C
Chris Mason 已提交
387 388
}

C
Chris Mason 已提交
389 390 391
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
				u64 blocknr, u64 num_blocks)
C
Chris Mason 已提交
392
{
393
	struct btrfs_path *path;
C
Chris Mason 已提交
394
	int ret;
C
Chris Mason 已提交
395
	struct btrfs_key key;
C
Chris Mason 已提交
396 397
	struct btrfs_leaf *l;
	struct btrfs_extent_item *item;
C
Chris Mason 已提交
398
	struct btrfs_key ins;
C
Chris Mason 已提交
399
	u32 refs;
C
Chris Mason 已提交
400

401
	find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0,
C
Chris Mason 已提交
402
			 &ins, 0);
403 404 405
	path = btrfs_alloc_path();
	BUG_ON(!path);
	btrfs_init_path(path);
C
Chris Mason 已提交
406 407
	key.objectid = blocknr;
	key.flags = 0;
408
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
409
	key.offset = num_blocks;
410
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
411
				0, 1);
412
	if (ret != 0) {
413
		BUG();
414
	}
C
Chris Mason 已提交
415
	BUG_ON(ret != 0);
416 417
	l = btrfs_buffer_leaf(path->nodes[0]);
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
C
Chris Mason 已提交
418 419
	refs = btrfs_extent_refs(item);
	btrfs_set_extent_refs(item, refs + 1);
420
	btrfs_mark_buffer_dirty(path->nodes[0]);
421

422 423
	btrfs_release_path(root->fs_info->extent_root, path);
	btrfs_free_path(path);
424
	finish_current_insert(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
425
	del_pending_extents(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
426 427 428
	return 0;
}

C
Chris Mason 已提交
429 430 431
static int lookup_extent_ref(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root, u64 blocknr,
			     u64 num_blocks, u32 *refs)
432
{
433
	struct btrfs_path *path;
434
	int ret;
C
Chris Mason 已提交
435
	struct btrfs_key key;
C
Chris Mason 已提交
436 437
	struct btrfs_leaf *l;
	struct btrfs_extent_item *item;
438 439 440

	path = btrfs_alloc_path();
	btrfs_init_path(path);
441
	key.objectid = blocknr;
442
	key.offset = num_blocks;
443 444
	key.flags = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
445
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
446
				0, 0);
447 448
	if (ret != 0)
		BUG();
449 450
	l = btrfs_buffer_leaf(path->nodes[0]);
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
C
Chris Mason 已提交
451
	*refs = btrfs_extent_refs(item);
452 453
	btrfs_release_path(root->fs_info->extent_root, path);
	btrfs_free_path(path);
454 455 456
	return 0;
}

C
Chris Mason 已提交
457 458 459
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root)
{
C
Chris Mason 已提交
460
	return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
C
Chris Mason 已提交
461 462
}

463
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
C
Chris Mason 已提交
464
		  struct buffer_head *buf)
C
Chris Mason 已提交
465 466
{
	u64 blocknr;
C
Chris Mason 已提交
467
	struct btrfs_node *buf_node;
468 469 470
	struct btrfs_leaf *buf_leaf;
	struct btrfs_disk_key *key;
	struct btrfs_file_extent_item *fi;
C
Chris Mason 已提交
471
	int i;
472 473
	int leaf;
	int ret;
474

475
	if (!root->ref_cows)
476
		return 0;
C
Chris Mason 已提交
477
	buf_node = btrfs_buffer_node(buf);
478 479
	leaf = btrfs_is_leaf(buf_node);
	buf_leaf = btrfs_buffer_leaf(buf);
C
Chris Mason 已提交
480
	for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
481
		if (leaf) {
C
Chris Mason 已提交
482
			u64 disk_blocknr;
483 484 485 486 487
			key = &buf_leaf->items[i].key;
			if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
				continue;
			fi = btrfs_item_ptr(buf_leaf, i,
					    struct btrfs_file_extent_item);
488 489 490
			if (btrfs_file_extent_type(fi) ==
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
C
Chris Mason 已提交
491 492 493 494
			disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
			if (disk_blocknr == 0)
				continue;
			ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
495 496 497 498
				    btrfs_file_extent_disk_num_blocks(fi));
			BUG_ON(ret);
		} else {
			blocknr = btrfs_node_blockptr(buf_node, i);
C
Chris Mason 已提交
499
			ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
500 501
			BUG_ON(ret);
		}
C
Chris Mason 已提交
502 503 504 505
	}
	return 0;
}

C
Chris Mason 已提交
506 507 508 509 510 511 512 513 514 515 516
static int write_one_cache_group(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_block_group_cache *cache)
{
	int ret;
	int pending_ret;
	struct btrfs_root *extent_root = root->fs_info->extent_root;
	struct btrfs_block_group_item *bi;
	struct btrfs_key ins;

517
	find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0);
C
Chris Mason 已提交
518 519 520 521 522 523 524 525 526 527 528 529 530 531
	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
	BUG_ON(ret);
	bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
			    struct btrfs_block_group_item);
	memcpy(bi, &cache->item, sizeof(*bi));
	mark_buffer_dirty(path->nodes[0]);
	btrfs_release_path(extent_root, path);

	finish_current_insert(trans, extent_root);
	pending_ret = del_pending_extents(trans, extent_root);
	if (ret)
		return ret;
	if (pending_ret)
		return pending_ret;
C
Chris Mason 已提交
532 533
	if (cache->data)
		cache->last_alloc = cache->first_free;
C
Chris Mason 已提交
534 535 536 537
	return 0;

}

C
Chris Mason 已提交
538 539 540
static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root,
				   struct radix_tree_root *radix)
C
Chris Mason 已提交
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
{
	struct btrfs_block_group_cache *cache[8];
	int ret;
	int err = 0;
	int werr = 0;
	int i;
	struct btrfs_path *path;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
		ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
						 0, ARRAY_SIZE(cache),
						 BTRFS_BLOCK_GROUP_DIRTY);
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			radix_tree_tag_clear(radix, cache[i]->key.objectid +
					     cache[i]->key.offset - 1,
					     BTRFS_BLOCK_GROUP_DIRTY);
			err = write_one_cache_group(trans, root,
						    path, cache[i]);
			if (err)
				werr = err;
		}
	}
	btrfs_free_path(path);
	return werr;
}

C
Chris Mason 已提交
573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
{
	int ret;
	int ret2;
	ret = write_dirty_block_radix(trans, root,
				      &root->fs_info->block_group_radix);
	ret2 = write_dirty_block_radix(trans, root,
				      &root->fs_info->block_group_data_radix);
	if (ret)
		return ret;
	if (ret2)
		return ret2;
	return 0;
}

C
Chris Mason 已提交
589 590
static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
C
Chris Mason 已提交
591 592
			      u64 blocknr, u64 num, int alloc, int mark_free,
			      int data)
C
Chris Mason 已提交
593 594 595 596 597 598
{
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
	u64 total = num;
	u64 old_val;
	u64 block_in_group;
599
	u64 i;
C
Chris Mason 已提交
600
	int ret;
C
Chris Mason 已提交
601

C
Chris Mason 已提交
602
	while(total) {
603
		cache = btrfs_lookup_block_group(info, blocknr);
C
Chris Mason 已提交
604
		if (!cache) {
C
Chris Mason 已提交
605
			return -1;
C
Chris Mason 已提交
606
		}
C
Chris Mason 已提交
607 608
		block_in_group = blocknr - cache->key.objectid;
		WARN_ON(block_in_group > cache->key.offset);
C
Chris Mason 已提交
609
		radix_tree_tag_set(cache->radix, cache->key.objectid +
C
Chris Mason 已提交
610
				   cache->key.offset - 1,
C
Chris Mason 已提交
611 612 613 614
				   BTRFS_BLOCK_GROUP_DIRTY);

		old_val = btrfs_block_group_used(&cache->item);
		num = min(total, cache->key.offset - block_in_group);
C
Chris Mason 已提交
615 616 617
		if (alloc) {
			if (blocknr > cache->last_alloc)
				cache->last_alloc = blocknr;
618 619 620 621 622 623
			if (!cache->data) {
				for (i = 0; i < num; i++) {
					clear_radix_bit(&info->extent_map_radix,
						        blocknr + i);
				}
			}
C
Chris Mason 已提交
624
			if (cache->data != data &&
C
Chris Mason 已提交
625
			    old_val < (cache->key.offset >> 1)) {
C
Chris Mason 已提交
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
				cache->data = data;
				radix_tree_delete(cache->radix,
						  cache->key.objectid +
						  cache->key.offset - 1);

				if (data) {
					cache->radix =
						&info->block_group_data_radix;
					cache->item.flags |=
						BTRFS_BLOCK_GROUP_DATA;
				} else {
					cache->radix = &info->block_group_radix;
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_DATA;
				}
				ret = radix_tree_insert(cache->radix,
							cache->key.objectid +
							cache->key.offset - 1,
							(void *)cache);
			}
			old_val += num;
C
Chris Mason 已提交
647
		} else {
C
Chris Mason 已提交
648
			old_val -= num;
C
Chris Mason 已提交
649 650
			if (blocknr < cache->first_free)
				cache->first_free = blocknr;
651 652 653 654 655 656
			if (!cache->data && mark_free) {
				for (i = 0; i < num; i++) {
					set_radix_bit(&info->extent_map_radix,
						      blocknr + i);
				}
			}
C
Chris Mason 已提交
657 658
			if (old_val < (cache->key.offset >> 1) &&
			    old_val + num >= (cache->key.offset >> 1)) {
659 660 661 662 663
				radix_tree_tag_set(cache->radix,
						   cache->key.objectid +
						   cache->key.offset - 1,
						   BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
664
		}
C
Chris Mason 已提交
665
		btrfs_set_block_group_used(&cache->item, old_val);
666 667
		total -= num;
		blocknr += num;
C
Chris Mason 已提交
668 669 670 671
	}
	return 0;
}

C
Chris Mason 已提交
672 673 674 675 676 677 678
static int try_remove_page(struct address_space *mapping, unsigned long index)
{
	int ret;
	ret = invalidate_mapping_pages(mapping, index, index);
	return ret;
}

679 680
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
			       btrfs_root *root)
681
{
C
Chris Mason 已提交
682
	unsigned long gang[8];
C
Chris Mason 已提交
683
	struct inode *btree_inode = root->fs_info->btree_inode;
C
Chris Mason 已提交
684
	struct btrfs_block_group_cache *block_group;
685
	u64 first = 0;
686 687
	int ret;
	int i;
C
Chris Mason 已提交
688
	struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
689
	struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
690 691

	while(1) {
692
		ret = find_first_radix_bit(pinned_radix, gang, 0,
C
Chris Mason 已提交
693
					   ARRAY_SIZE(gang));
694 695
		if (!ret)
			break;
696
		if (!first)
C
Chris Mason 已提交
697
			first = gang[0];
698
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
699
			clear_radix_bit(pinned_radix, gang[i]);
700 701
			block_group = btrfs_lookup_block_group(root->fs_info,
							       gang[i]);
C
Chris Mason 已提交
702 703 704 705 706
			if (block_group) {
				WARN_ON(block_group->pinned == 0);
				block_group->pinned--;
				if (gang[i] < block_group->last_alloc)
					block_group->last_alloc = gang[i];
707 708 709 710
				if (gang[i] < block_group->last_prealloc)
					block_group->last_prealloc = gang[i];
				if (!block_group->data)
					set_radix_bit(extent_radix, gang[i]);
C
Chris Mason 已提交
711
			}
C
Chris Mason 已提交
712 713 714
			try_remove_page(btree_inode->i_mapping,
					gang[i] << (PAGE_CACHE_SHIFT -
						    btree_inode->i_blkbits));
715
		}
716 717 718 719
	}
	return 0;
}

720 721
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root)
C
Chris Mason 已提交
722
{
C
Chris Mason 已提交
723
	struct btrfs_key ins;
C
Chris Mason 已提交
724
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
725 726
	int i;
	int ret;
727 728
	u64 super_blocks_used;
	struct btrfs_fs_info *info = extent_root->fs_info;
C
Chris Mason 已提交
729

C
Chris Mason 已提交
730
	btrfs_set_extent_refs(&extent_item, 1);
C
Chris Mason 已提交
731 732
	ins.offset = 1;
	ins.flags = 0;
733
	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
734
	btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
C
Chris Mason 已提交
735

736 737
	for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
		ins.objectid = extent_root->fs_info->extent_tree_insert[i];
738 739 740
		super_blocks_used = btrfs_super_blocks_used(info->disk_super);
		btrfs_set_super_blocks_used(info->disk_super,
					    super_blocks_used + 1);
741 742
		ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
					sizeof(extent_item));
C
Chris Mason 已提交
743 744
		BUG_ON(ret);
	}
745 746
	extent_root->fs_info->extent_tree_insert_nr = 0;
	extent_root->fs_info->extent_tree_prealloc_nr = 0;
C
Chris Mason 已提交
747 748 749
	return 0;
}

C
Chris Mason 已提交
750
static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
C
Chris Mason 已提交
751 752
{
	int err;
C
Chris Mason 已提交
753
	struct btrfs_header *header;
C
Chris Mason 已提交
754 755
	struct buffer_head *bh;

C
Chris Mason 已提交
756
	if (!pending) {
757
		bh = btrfs_find_tree_block(root, blocknr);
C
Chris Mason 已提交
758 759 760 761 762 763 764 765 766 767
		if (bh) {
			if (buffer_uptodate(bh)) {
				u64 transid =
				    root->fs_info->running_transaction->transid;
				header = btrfs_buffer_header(bh);
				if (btrfs_header_generation(header) ==
				    transid) {
					btrfs_block_release(root, bh);
					return 0;
				}
C
Chris Mason 已提交
768
			}
C
Chris Mason 已提交
769
			btrfs_block_release(root, bh);
C
Chris Mason 已提交
770 771
		}
		err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
C
Chris Mason 已提交
772 773
		if (!err) {
			struct btrfs_block_group_cache *cache;
774 775
			cache = btrfs_lookup_block_group(root->fs_info,
							 blocknr);
C
Chris Mason 已提交
776 777 778
			if (cache)
				cache->pinned++;
		}
C
Chris Mason 已提交
779 780 781
	} else {
		err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
	}
C
Chris Mason 已提交
782
	BUG_ON(err < 0);
C
Chris Mason 已提交
783 784 785
	return 0;
}

786
/*
787
 * remove an extent from the root, returns 0 on success
788
 */
789
static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
790 791
			 *root, u64 blocknr, u64 num_blocks, int pin,
			 int mark_free)
792
{
793
	struct btrfs_path *path;
C
Chris Mason 已提交
794
	struct btrfs_key key;
795 796
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
797
	int ret;
C
Chris Mason 已提交
798
	struct btrfs_extent_item *ei;
C
Chris Mason 已提交
799
	struct btrfs_key ins;
C
Chris Mason 已提交
800
	u32 refs;
C
Chris Mason 已提交
801

802 803
	key.objectid = blocknr;
	key.flags = 0;
804
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
805 806
	key.offset = num_blocks;

807
	find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0);
808 809 810
	path = btrfs_alloc_path();
	BUG_ON(!path);
	btrfs_init_path(path);
811

812
	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
813 814 815
	if (ret) {
		BUG();
	}
816
	ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
C
Chris Mason 已提交
817
			    struct btrfs_extent_item);
818
	BUG_ON(ei->refs == 0);
C
Chris Mason 已提交
819 820
	refs = btrfs_extent_refs(ei) - 1;
	btrfs_set_extent_refs(ei, refs);
821
	btrfs_mark_buffer_dirty(path->nodes[0]);
C
Chris Mason 已提交
822
	if (refs == 0) {
823
		u64 super_blocks_used;
C
Chris Mason 已提交
824 825

		if (pin) {
C
Chris Mason 已提交
826
			ret = pin_down_block(root, blocknr, 0);
C
Chris Mason 已提交
827 828 829
			BUG_ON(ret);
		}

830 831 832
		super_blocks_used = btrfs_super_blocks_used(info->disk_super);
		btrfs_set_super_blocks_used(info->disk_super,
					    super_blocks_used - num_blocks);
833
		ret = btrfs_del_item(trans, extent_root, path);
834 835
		if (ret)
			BUG();
836
		ret = update_block_group(trans, root, blocknr, num_blocks, 0,
C
Chris Mason 已提交
837
					 mark_free, 0);
C
Chris Mason 已提交
838
		BUG_ON(ret);
839
	}
840
	btrfs_free_path(path);
841
	finish_current_insert(trans, extent_root);
842 843 844 845 846 847 848
	return ret;
}

/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
849 850
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root)
851 852
{
	int ret;
C
Chris Mason 已提交
853 854
	int wret;
	int err = 0;
C
Chris Mason 已提交
855
	unsigned long gang[4];
856
	int i;
C
Chris Mason 已提交
857 858
	struct radix_tree_root *pending_radix;
	struct radix_tree_root *pinned_radix;
C
Chris Mason 已提交
859
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
860 861 862

	pending_radix = &extent_root->fs_info->pending_del_radix;
	pinned_radix = &extent_root->fs_info->pinned_radix;
863 864

	while(1) {
865
		ret = find_first_radix_bit(pending_radix, gang, 0,
C
Chris Mason 已提交
866
					   ARRAY_SIZE(gang));
867 868 869
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
870
			wret = set_radix_bit(pinned_radix, gang[i]);
C
Chris Mason 已提交
871
			if (wret == 0) {
872 873
				cache =
				  btrfs_lookup_block_group(extent_root->fs_info,
C
Chris Mason 已提交
874 875 876 877 878 879 880 881 882
							   gang[i]);
				if (cache)
					cache->pinned++;
			}
			if (wret < 0) {
				printk(KERN_CRIT "set_radix_bit, err %d\n",
				       wret);
				BUG_ON(wret < 0);
			}
C
Chris Mason 已提交
883 884
			wret = clear_radix_bit(pending_radix, gang[i]);
			BUG_ON(wret);
885
			wret = __free_extent(trans, extent_root,
886
					     gang[i], 1, 0, 0);
C
Chris Mason 已提交
887 888
			if (wret)
				err = wret;
889 890
		}
	}
C
Chris Mason 已提交
891
	return err;
892 893 894 895 896
}

/*
 * remove an extent from the root, returns 0 on success
 */
897 898
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, u64 blocknr, u64 num_blocks, int pin)
899
{
900
	struct btrfs_root *extent_root = root->fs_info->extent_root;
901 902
	int pending_ret;
	int ret;
903

904
	if (root == extent_root) {
C
Chris Mason 已提交
905
		pin_down_block(root, blocknr, 1);
906 907
		return 0;
	}
908
	ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
C
Chris Mason 已提交
909
	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
910 911 912 913 914 915 916
	return ret ? ret : pending_ret;
}

/*
 * walks the btree of allocated extents and find a hole of a given size.
 * The key ins is changed to record the hole:
 * ins->objectid == block start
917
 * ins->flags = BTRFS_EXTENT_ITEM_KEY
918 919 920
 * ins->offset == number of blocks
 * Any available blocks before search_start are skipped.
 */
921 922
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
			    *orig_root, u64 num_blocks, u64 search_start, u64
923 924
			    search_end, u64 hint_block,
			    struct btrfs_key *ins, int data)
925
{
926
	struct btrfs_path *path;
C
Chris Mason 已提交
927
	struct btrfs_key key;
928 929 930
	int ret;
	u64 hole_size = 0;
	int slot = 0;
C
Chris Mason 已提交
931
	u64 last_block = 0;
C
Chris Mason 已提交
932
	u64 test_block;
C
Chris Mason 已提交
933
	u64 orig_search_start = search_start;
934
	int start_found;
C
Chris Mason 已提交
935
	struct btrfs_leaf *l;
936
	struct btrfs_root * root = orig_root->fs_info->extent_root;
937
	struct btrfs_fs_info *info = root->fs_info;
938
	int total_needed = num_blocks;
939 940
	int total_found = 0;
	int fill_prealloc = 0;
C
Chris Mason 已提交
941
	int level;
C
Chris Mason 已提交
942
	struct btrfs_block_group_cache *block_group;
C
Chris Mason 已提交
943
	int full_scan = 0;
944
	int wrapped = 0;
945
	u64 limit;
946

947 948 949 950
	path = btrfs_alloc_path();
	ins->flags = 0;
	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);

C
Chris Mason 已提交
951
	level = btrfs_header_level(btrfs_buffer_header(root->node));
952 953 954
	if (num_blocks == 0) {
		fill_prealloc = 1;
		num_blocks = 1;
955
		total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
956
	}
C
Chris Mason 已提交
957 958
	if (search_end == (u64)-1)
		search_end = btrfs_super_total_blocks(info->disk_super);
959
	if (hint_block) {
960
		block_group = btrfs_lookup_block_group(info, hint_block);
C
Chris Mason 已提交
961
		block_group = btrfs_find_block_group(root, block_group,
962
						     hint_block, data, 1);
C
Chris Mason 已提交
963 964 965
	} else {
		block_group = btrfs_find_block_group(root,
						     trans->block_group, 0,
966
						     data, 1);
C
Chris Mason 已提交
967 968 969
	}

check_failed:
C
Chris Mason 已提交
970
	if (!block_group->data)
971 972
		search_start = find_search_start(root, &block_group,
						 search_start, total_needed);
973
	else if (!full_scan)
974 975
		search_start = max(block_group->last_alloc, search_start);

976
	btrfs_init_path(path);
977 978 979
	ins->objectid = search_start;
	ins->offset = 0;
	start_found = 0;
980

981
	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
C
Chris Mason 已提交
982 983
	if (ret < 0)
		goto error;
C
Chris Mason 已提交
984

985
	if (path->slots[0] > 0) {
986
		path->slots[0]--;
987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007
	}

	l = btrfs_buffer_leaf(path->nodes[0]);
	btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
	/*
	 * a rare case, go back one key if we hit a block group item
	 * instead of an extent item
	 */
	if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
	    key.objectid + key.offset >= search_start) {
		ins->objectid = key.objectid;
		ins->offset = key.offset - 1;
		btrfs_release_path(root, path);
		ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
		if (ret < 0)
			goto error;

		if (path->slots[0] > 0) {
			path->slots[0]--;
		}
	}
1008

1009
	while (1) {
1010 1011
		l = btrfs_buffer_leaf(path->nodes[0]);
		slot = path->slots[0];
1012
		if (slot >= btrfs_header_nritems(&l->header)) {
1013 1014 1015 1016
			if (fill_prealloc) {
				info->extent_tree_prealloc_nr = 0;
				total_found = 0;
			}
1017 1018
			if (start_found)
				limit = last_block +
C
Chris Mason 已提交
1019
					(block_group->key.offset >> 1);
1020 1021
			else
				limit = search_start +
C
Chris Mason 已提交
1022
					(block_group->key.offset >> 1);
1023
			ret = btrfs_next_leaf(root, path);
1024 1025
			if (ret == 0)
				continue;
C
Chris Mason 已提交
1026 1027
			if (ret < 0)
				goto error;
1028 1029
			if (!start_found) {
				ins->objectid = search_start;
C
Chris Mason 已提交
1030
				ins->offset = search_end - search_start;
1031 1032 1033 1034 1035
				start_found = 1;
				goto check_pending;
			}
			ins->objectid = last_block > search_start ?
					last_block : search_start;
C
Chris Mason 已提交
1036
			ins->offset = search_end - ins->objectid;
1037 1038
			goto check_pending;
		}
1039

C
Chris Mason 已提交
1040
		btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1041 1042 1043 1044 1045 1046 1047 1048 1049
		if (key.objectid >= search_start && key.objectid > last_block &&
		    start_found) {
			if (last_block < search_start)
				last_block = search_start;
			hole_size = key.objectid - last_block;
			if (hole_size >= num_blocks) {
				ins->objectid = last_block;
				ins->offset = hole_size;
				goto check_pending;
1050
			}
1051
		}
1052 1053 1054 1055

		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
			goto next;

1056
		start_found = 1;
C
Chris Mason 已提交
1057
		last_block = key.objectid + key.offset;
1058
		if (!full_scan && last_block >= block_group->key.objectid +
C
Chris Mason 已提交
1059 1060 1061 1062 1063 1064
		    block_group->key.offset) {
			btrfs_release_path(root, path);
			search_start = block_group->key.objectid +
				block_group->key.offset * 2;
			goto new_group;
		}
C
Chris Mason 已提交
1065
next:
1066
		path->slots[0]++;
1067
		cond_resched();
1068 1069 1070 1071 1072 1073
	}
	// FIXME -ENOSPC
check_pending:
	/* we have to make sure we didn't find an extent that has already
	 * been allocated by the map tree or the original allocation
	 */
1074
	btrfs_release_path(root, path);
1075
	BUG_ON(ins->objectid < search_start);
1076

C
Chris Mason 已提交
1077
	if (ins->objectid + num_blocks >= search_end) {
1078 1079 1080 1081
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
C
Chris Mason 已提交
1082
		search_start = orig_search_start;
1083 1084 1085 1086
		if (wrapped)
			full_scan = 1;
		else
			wrapped = 1;
C
Chris Mason 已提交
1087
		goto new_group;
1088
	}
C
Chris Mason 已提交
1089
	for (test_block = ins->objectid;
1090 1091
	     test_block < ins->objectid + num_blocks; test_block++) {
		if (test_radix_bit(&info->pinned_radix, test_block)) {
C
Chris Mason 已提交
1092
			search_start = test_block + 1;
C
Chris Mason 已提交
1093
			goto new_group;
1094 1095
		}
	}
1096 1097 1098 1099 1100 1101 1102
	if (!fill_prealloc && info->extent_tree_insert_nr) {
		u64 last =
		  info->extent_tree_insert[info->extent_tree_insert_nr - 1];
		if (ins->objectid + num_blocks >
		    info->extent_tree_insert[0] &&
		    ins->objectid <= last) {
			search_start = last + 1;
1103
			WARN_ON(!full_scan);
C
Chris Mason 已提交
1104
			goto new_group;
1105 1106 1107 1108 1109 1110 1111 1112
		}
	}
	if (!fill_prealloc && info->extent_tree_prealloc_nr) {
		u64 first =
		  info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
		if (ins->objectid + num_blocks > first &&
		    ins->objectid <= info->extent_tree_prealloc[0]) {
			search_start = info->extent_tree_prealloc[0] + 1;
1113
			WARN_ON(!full_scan);
C
Chris Mason 已提交
1114
			goto new_group;
1115 1116 1117 1118 1119
		}
	}
	if (fill_prealloc) {
		int nr;
		test_block = ins->objectid;
1120 1121 1122 1123 1124
		if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
		    leaf_range(root)) {
			total_found = 0;
			info->extent_tree_prealloc_nr = total_found;
		}
1125 1126 1127 1128
		while(test_block < ins->objectid + ins->offset &&
		      total_found < total_needed) {
			nr = total_needed - total_found - 1;
			BUG_ON(nr < 0);
C
Chris Mason 已提交
1129
			info->extent_tree_prealloc[nr] = test_block;
1130 1131 1132 1133 1134
			total_found++;
			test_block++;
		}
		if (total_found < total_needed) {
			search_start = test_block;
C
Chris Mason 已提交
1135
			goto new_group;
1136
		}
C
Chris Mason 已提交
1137 1138
		info->extent_tree_prealloc_nr = total_found;
	}
1139
	if (!data) {
1140
		block_group = btrfs_lookup_block_group(info, ins->objectid);
1141 1142 1143 1144 1145 1146 1147
		if (block_group) {
			if (fill_prealloc)
				block_group->last_prealloc =
				     info->extent_tree_prealloc[total_needed-1];
			else
				trans->block_group = block_group;
		}
1148
	}
C
Chris Mason 已提交
1149
	ins->offset = num_blocks;
1150
	btrfs_free_path(path);
1151
	return 0;
C
Chris Mason 已提交
1152 1153

new_group:
C
Chris Mason 已提交
1154
	if (search_start + num_blocks >= search_end) {
C
Chris Mason 已提交
1155
		search_start = orig_search_start;
1156 1157 1158 1159 1160 1161 1162 1163
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
		if (wrapped)
			full_scan = 1;
		else
			wrapped = 1;
C
Chris Mason 已提交
1164
	}
1165
	block_group = btrfs_lookup_block_group(info, search_start);
1166
	cond_resched();
C
Chris Mason 已提交
1167 1168
	if (!full_scan)
		block_group = btrfs_find_block_group(root, block_group,
1169
						     search_start, data, 0);
C
Chris Mason 已提交
1170 1171
	goto check_failed;

C
Chris Mason 已提交
1172
error:
1173 1174
	btrfs_release_path(root, path);
	btrfs_free_path(path);
C
Chris Mason 已提交
1175
	return ret;
1176 1177 1178 1179 1180 1181 1182 1183
}
/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns 0 if everything worked, non-zero otherwise.
 */
1184 1185
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, u64 owner,
1186
		       u64 num_blocks, u64 hint_block,
C
Chris Mason 已提交
1187
		       u64 search_end, struct btrfs_key *ins, int data)
1188 1189 1190
{
	int ret;
	int pending_ret;
1191
	u64 super_blocks_used;
1192
	u64 search_start = 0;
1193 1194
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
C
Chris Mason 已提交
1195
	struct btrfs_extent_item extent_item;
1196
	struct btrfs_key prealloc_key;
C
Chris Mason 已提交
1197

C
Chris Mason 已提交
1198
	btrfs_set_extent_refs(&extent_item, 1);
1199
	btrfs_set_extent_owner(&extent_item, owner);
1200

C
Chris Mason 已提交
1201
	if (root == extent_root) {
1202 1203
		int nr;
		BUG_ON(info->extent_tree_prealloc_nr == 0);
C
Chris Mason 已提交
1204 1205
		BUG_ON(num_blocks != 1);
		ins->offset = 1;
1206 1207 1208 1209 1210
		info->extent_tree_prealloc_nr--;
		nr = info->extent_tree_prealloc_nr;
		ins->objectid = info->extent_tree_prealloc[nr];
		info->extent_tree_insert[info->extent_tree_insert_nr++] =
			ins->objectid;
C
Chris Mason 已提交
1211
		ret = update_block_group(trans, root,
C
Chris Mason 已提交
1212
					 ins->objectid, ins->offset, 1, 0, 0);
C
Chris Mason 已提交
1213
		BUG_ON(ret);
1214 1215
		return 0;
	}
1216 1217 1218 1219 1220 1221 1222

	/*
	 * if we're doing a data allocation, preallocate room in the
	 * extent tree first.  This way the extent tree blocks end up
	 * in the correct block group.
	 */
	if (data) {
1223
		ret = find_free_extent(trans, root, 0, 0,
1224
				       search_end, 0, &prealloc_key, 0);
1225 1226 1227 1228 1229 1230 1231 1232 1233 1234
		if (ret) {
			return ret;
		}
		if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
			int nr = info->extent_tree_prealloc_nr;
			search_end = info->extent_tree_prealloc[nr - 1] - 1;
		} else {
			search_start = info->extent_tree_prealloc[0] + 1;
		}
	}
1235 1236
	if (hint_block < search_start)
		hint_block = search_start;
1237
	/* do the real allocation */
1238
	ret = find_free_extent(trans, root, num_blocks, search_start,
1239
			       search_end, hint_block, ins, data);
1240
	if (ret) {
C
Chris Mason 已提交
1241
		return ret;
1242
	}
1243

1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257
	/*
	 * if we're doing a metadata allocation, preallocate space in the
	 * extent tree second.  This way, we don't create a tiny hole
	 * in the allocation map between any unused preallocation blocks
	 * and the metadata block we're actually allocating.  On disk,
	 * it'll go:
	 * [block we've allocated], [used prealloc 1], [ unused prealloc ]
	 * The unused prealloc will get reused the next time around.
	 */
	if (!data) {
		if (ins->objectid + ins->offset >= search_end)
			search_end = ins->objectid - 1;
		else
			search_start = ins->objectid + ins->offset;
C
Chris Mason 已提交
1258

1259 1260 1261
		if (hint_block < search_start)
			hint_block = search_start;

1262
		ret = find_free_extent(trans, root, 0, search_start,
1263 1264
				       search_end, hint_block,
				       &prealloc_key, 0);
1265 1266 1267 1268
		if (ret) {
			return ret;
		}
	}
1269

1270 1271 1272
	super_blocks_used = btrfs_super_blocks_used(info->disk_super);
	btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
				    num_blocks);
1273 1274
	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
				sizeof(extent_item));
C
Chris Mason 已提交
1275

1276
	finish_current_insert(trans, extent_root);
C
Chris Mason 已提交
1277
	pending_ret = del_pending_extents(trans, extent_root);
1278
	if (ret) {
C
Chris Mason 已提交
1279
		return ret;
1280 1281
	}
	if (pending_ret) {
C
Chris Mason 已提交
1282
		return pending_ret;
1283
	}
C
Chris Mason 已提交
1284 1285
	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
				 data);
C
Chris Mason 已提交
1286
	BUG_ON(ret);
C
Chris Mason 已提交
1287
	return 0;
1288 1289 1290 1291 1292 1293
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
C
Chris Mason 已提交
1294
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1295
					   struct btrfs_root *root, u64 hint)
1296
{
C
Chris Mason 已提交
1297
	struct btrfs_key ins;
1298
	int ret;
C
Chris Mason 已提交
1299
	struct buffer_head *buf;
1300

1301
	ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1302
				 1, hint, (unsigned long)-1, &ins, 0);
1303 1304 1305 1306
	if (ret) {
		BUG();
		return NULL;
	}
C
Chris Mason 已提交
1307
	BUG_ON(ret);
1308
	buf = btrfs_find_create_tree_block(root, ins.objectid);
1309
	set_buffer_uptodate(buf);
C
Chris Mason 已提交
1310
	set_buffer_checked(buf);
1311
	set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1312 1313
	return buf;
}
1314

1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328
static int drop_leaf_ref(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root, struct buffer_head *cur)
{
	struct btrfs_disk_key *key;
	struct btrfs_leaf *leaf;
	struct btrfs_file_extent_item *fi;
	int i;
	int nritems;
	int ret;

	BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
	leaf = btrfs_buffer_leaf(cur);
	nritems = btrfs_header_nritems(&leaf->header);
	for (i = 0; i < nritems; i++) {
C
Chris Mason 已提交
1329
		u64 disk_blocknr;
1330 1331 1332 1333
		key = &leaf->items[i].key;
		if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
			continue;
		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1334 1335
		if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
			continue;
1336 1337 1338 1339
		/*
		 * FIXME make sure to insert a trans record that
		 * repeats the snapshot del on crash
		 */
C
Chris Mason 已提交
1340 1341 1342 1343
		disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
		if (disk_blocknr == 0)
			continue;
		ret = btrfs_free_extent(trans, root, disk_blocknr,
1344 1345 1346 1347 1348 1349 1350
					btrfs_file_extent_disk_num_blocks(fi),
					0);
		BUG_ON(ret);
	}
	return 0;
}

C
Chris Mason 已提交
1351 1352 1353 1354
/*
 * helper function for drop_snapshot, this walks down the tree dropping ref
 * counts as it goes.
 */
1355 1356
static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			  *root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1357
{
C
Chris Mason 已提交
1358 1359
	struct buffer_head *next;
	struct buffer_head *cur;
C
Chris Mason 已提交
1360 1361 1362 1363
	u64 blocknr;
	int ret;
	u32 refs;

1364 1365
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1366
	ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1367
			       1, &refs);
C
Chris Mason 已提交
1368 1369 1370
	BUG_ON(ret);
	if (refs > 1)
		goto out;
C
Chris Mason 已提交
1371 1372 1373
	/*
	 * walk down to the last node level and free all the leaves
	 */
1374
	while(*level >= 0) {
1375 1376
		WARN_ON(*level < 0);
		WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1377
		cur = path->nodes[*level];
C
Chris Mason 已提交
1378 1379
		if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
			WARN_ON(1);
1380
		if (path->slots[*level] >=
C
Chris Mason 已提交
1381
		    btrfs_header_nritems(btrfs_buffer_header(cur)))
C
Chris Mason 已提交
1382
			break;
1383 1384 1385 1386 1387
		if (*level == 0) {
			ret = drop_leaf_ref(trans, root, cur);
			BUG_ON(ret);
			break;
		}
C
Chris Mason 已提交
1388 1389
		blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
					      path->slots[*level]);
C
Chris Mason 已提交
1390
		ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1391 1392
		BUG_ON(ret);
		if (refs != 1) {
C
Chris Mason 已提交
1393
			path->slots[*level]++;
1394
			ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
C
Chris Mason 已提交
1395 1396 1397 1398
			BUG_ON(ret);
			continue;
		}
		next = read_tree_block(root, blocknr);
1399
		WARN_ON(*level <= 0);
C
Chris Mason 已提交
1400
		if (path->nodes[*level-1])
C
Chris Mason 已提交
1401
			btrfs_block_release(root, path->nodes[*level-1]);
C
Chris Mason 已提交
1402
		path->nodes[*level-1] = next;
C
Chris Mason 已提交
1403
		*level = btrfs_header_level(btrfs_buffer_header(next));
C
Chris Mason 已提交
1404 1405 1406
		path->slots[*level] = 0;
	}
out:
1407 1408
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1409
	ret = btrfs_free_extent(trans, root,
1410
				bh_blocknr(path->nodes[*level]), 1, 1);
C
Chris Mason 已提交
1411
	btrfs_block_release(root, path->nodes[*level]);
C
Chris Mason 已提交
1412 1413 1414 1415 1416 1417
	path->nodes[*level] = NULL;
	*level += 1;
	BUG_ON(ret);
	return 0;
}

C
Chris Mason 已提交
1418 1419 1420 1421 1422
/*
 * helper for dropping snapshots.  This walks back up the tree in the path
 * to find the first node higher up where we haven't yet gone through
 * all the slots
 */
1423 1424
static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			*root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1425 1426 1427 1428
{
	int i;
	int slot;
	int ret;
C
Chris Mason 已提交
1429
	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
C
Chris Mason 已提交
1430
		slot = path->slots[i];
C
Chris Mason 已提交
1431 1432
		if (slot < btrfs_header_nritems(
		    btrfs_buffer_header(path->nodes[i])) - 1) {
C
Chris Mason 已提交
1433 1434 1435 1436
			path->slots[i]++;
			*level = i;
			return 0;
		} else {
1437
			ret = btrfs_free_extent(trans, root,
1438
						bh_blocknr(path->nodes[*level]),
1439
						1, 1);
1440
			BUG_ON(ret);
C
Chris Mason 已提交
1441
			btrfs_block_release(root, path->nodes[*level]);
C
Chris Mason 已提交
1442
			path->nodes[*level] = NULL;
C
Chris Mason 已提交
1443 1444 1445 1446 1447 1448
			*level = i + 1;
		}
	}
	return 1;
}

C
Chris Mason 已提交
1449 1450 1451 1452 1453
/*
 * drop the reference count on the tree rooted at 'snap'.  This traverses
 * the tree freeing any blocks that have a ref count of zero after being
 * decremented.
 */
1454
int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
C
Chris Mason 已提交
1455
			*root, struct buffer_head *snap)
C
Chris Mason 已提交
1456
{
1457
	int ret = 0;
C
Chris Mason 已提交
1458
	int wret;
C
Chris Mason 已提交
1459
	int level;
1460
	struct btrfs_path *path;
C
Chris Mason 已提交
1461 1462 1463
	int i;
	int orig_level;

1464 1465 1466
	path = btrfs_alloc_path();
	BUG_ON(!path);
	btrfs_init_path(path);
C
Chris Mason 已提交
1467

C
Chris Mason 已提交
1468
	level = btrfs_header_level(btrfs_buffer_header(snap));
C
Chris Mason 已提交
1469
	orig_level = level;
1470 1471
	path->nodes[level] = snap;
	path->slots[level] = 0;
C
Chris Mason 已提交
1472
	while(1) {
1473
		wret = walk_down_tree(trans, root, path, &level);
C
Chris Mason 已提交
1474
		if (wret > 0)
C
Chris Mason 已提交
1475
			break;
C
Chris Mason 已提交
1476 1477 1478
		if (wret < 0)
			ret = wret;

1479
		wret = walk_up_tree(trans, root, path, &level);
C
Chris Mason 已提交
1480
		if (wret > 0)
C
Chris Mason 已提交
1481
			break;
C
Chris Mason 已提交
1482 1483
		if (wret < 0)
			ret = wret;
C
Chris Mason 已提交
1484
		btrfs_btree_balance_dirty(root);
C
Chris Mason 已提交
1485
	}
C
Chris Mason 已提交
1486
	for (i = 0; i <= orig_level; i++) {
1487 1488
		if (path->nodes[i]) {
			btrfs_block_release(root, path->nodes[i]);
C
Chris Mason 已提交
1489
		}
C
Chris Mason 已提交
1490
	}
1491
	btrfs_free_path(path);
C
Chris Mason 已提交
1492
	return ret;
C
Chris Mason 已提交
1493
}
C
Chris Mason 已提交
1494

C
Chris Mason 已提交
1495
static int free_block_group_radix(struct radix_tree_root *radix)
C
Chris Mason 已提交
1496 1497 1498 1499 1500 1501
{
	int ret;
	struct btrfs_block_group_cache *cache[8];
	int i;

	while(1) {
C
Chris Mason 已提交
1502
		ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
C
Chris Mason 已提交
1503 1504 1505 1506
					     ARRAY_SIZE(cache));
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
1507
			radix_tree_delete(radix, cache[i]->key.objectid +
C
Chris Mason 已提交
1508 1509 1510 1511 1512 1513 1514
					  cache[i]->key.offset - 1);
			kfree(cache[i]);
		}
	}
	return 0;
}

C
Chris Mason 已提交
1515 1516 1517 1518
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
	int ret;
	int ret2;
1519 1520
	unsigned long gang[16];
	int i;
C
Chris Mason 已提交
1521 1522 1523 1524 1525 1526 1527

	ret = free_block_group_radix(&info->block_group_radix);
	ret2 = free_block_group_radix(&info->block_group_data_radix);
	if (ret)
		return ret;
	if (ret2)
		return ret2;
1528 1529 1530 1531 1532 1533 1534 1535 1536 1537

	while(1) {
		ret = find_first_radix_bit(&info->extent_map_radix,
					   gang, 0, ARRAY_SIZE(gang));
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			clear_radix_bit(&info->extent_map_radix, gang[i]);
		}
	}
C
Chris Mason 已提交
1538 1539 1540
	return 0;
}

C
Chris Mason 已提交
1541 1542 1543 1544 1545 1546 1547
int btrfs_read_block_groups(struct btrfs_root *root)
{
	struct btrfs_path *path;
	int ret;
	int err = 0;
	struct btrfs_block_group_item *bi;
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
1548 1549
	struct btrfs_fs_info *info = root->fs_info;
	struct radix_tree_root *radix;
C
Chris Mason 已提交
1550 1551 1552
	struct btrfs_key key;
	struct btrfs_key found_key;
	struct btrfs_leaf *leaf;
C
Chris Mason 已提交
1553
	u64 group_size_blocks;
1554
	u64 used;
C
Chris Mason 已提交
1555

C
Chris Mason 已提交
1556 1557
	group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
		root->fs_info->sb->s_blocksize_bits;
C
Chris Mason 已提交
1558
	root = info->extent_root;
C
Chris Mason 已提交
1559 1560 1561 1562 1563 1564 1565 1566 1567 1568
	key.objectid = 0;
	key.offset = group_size_blocks;
	key.flags = 0;
	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
C
Chris Mason 已提交
1569
		ret = btrfs_search_slot(NULL, info->extent_root,
C
Chris Mason 已提交
1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
					&key, path, 0, 0);
		if (ret != 0) {
			err = ret;
			break;
		}
		leaf = btrfs_buffer_leaf(path->nodes[0]);
		btrfs_disk_key_to_cpu(&found_key,
				      &leaf->items[path->slots[0]].key);
		cache = kmalloc(sizeof(*cache), GFP_NOFS);
		if (!cache) {
			err = -1;
			break;
		}
C
Chris Mason 已提交
1583

C
Chris Mason 已提交
1584 1585 1586
		bi = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_block_group_item);
		if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
C
Chris Mason 已提交
1587
			radix = &info->block_group_data_radix;
C
Chris Mason 已提交
1588 1589
			cache->data = 1;
		} else {
C
Chris Mason 已提交
1590
			radix = &info->block_group_radix;
C
Chris Mason 已提交
1591 1592
			cache->data = 0;
		}
C
Chris Mason 已提交
1593

C
Chris Mason 已提交
1594 1595
		memcpy(&cache->item, bi, sizeof(*bi));
		memcpy(&cache->key, &found_key, sizeof(found_key));
1596 1597
		cache->last_alloc = cache->key.objectid;
		cache->first_free = cache->key.objectid;
1598
		cache->last_prealloc = cache->key.objectid;
C
Chris Mason 已提交
1599
		cache->pinned = 0;
1600 1601
		cache->cached = 0;

C
Chris Mason 已提交
1602 1603
		cache->radix = radix;

C
Chris Mason 已提交
1604 1605
		key.objectid = found_key.objectid + found_key.offset;
		btrfs_release_path(root, path);
C
Chris Mason 已提交
1606
		ret = radix_tree_insert(radix, found_key.objectid +
C
Chris Mason 已提交
1607 1608 1609
					found_key.offset - 1,
					(void *)cache);
		BUG_ON(ret);
1610
		used = btrfs_block_group_used(bi);
C
Chris Mason 已提交
1611
		if (used < div_factor(key.offset, 8)) {
C
Chris Mason 已提交
1612
			radix_tree_tag_set(radix, found_key.objectid +
1613 1614 1615
					   found_key.offset - 1,
					   BTRFS_BLOCK_GROUP_AVAIL);
		}
C
Chris Mason 已提交
1616
		if (key.objectid >=
C
Chris Mason 已提交
1617
		    btrfs_super_total_blocks(info->disk_super))
C
Chris Mason 已提交
1618 1619 1620 1621 1622 1623
			break;
	}

	btrfs_free_path(path);
	return 0;
}