extent-tree.c 39.7 KB
Newer Older
1
#include <linux/module.h>
2 3 4
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
5
#include "transaction.h"
6

7 8
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
			    *orig_root, u64 num_blocks, u64 search_start, u64
C
Chris Mason 已提交
9
			    search_end, struct btrfs_key *ins, int data);
10 11
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root);
C
Chris Mason 已提交
12 13
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root);
14

15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
static void reada_extent_leaves(struct btrfs_root *root,
				struct btrfs_path *path, u64 limit)
{
	struct btrfs_node *node;
	int i;
	int nritems;
	u64 item_objectid;
	u64 blocknr;
	int slot;
	int ret;

	if (!path->nodes[1])
		return;
	node = btrfs_buffer_node(path->nodes[1]);
	slot = path->slots[1] + 1;
	nritems = btrfs_header_nritems(&node->header);
	for (i = slot; i < nritems && i < slot + 8; i++) {
		item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
		if (item_objectid > limit)
			break;
		blocknr = btrfs_node_blockptr(node, i);
		ret = readahead_tree_block(root, blocknr);
		if (ret)
			break;
	}
}

42 43 44 45 46 47 48 49 50 51 52 53
static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
	struct btrfs_path *path;
	int ret;
	struct btrfs_key key;
	struct btrfs_leaf *leaf;
	struct radix_tree_root *extent_radix;
	int slot;
	u64 i;
	u64 last = 0;
	u64 hole_size;
54
	u64 limit;
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
	int found = 0;

	root = root->fs_info->extent_root;
	extent_radix = &root->fs_info->extent_map_radix;

	if (block_group->cached)
		return 0;
	if (block_group->data)
		return 0;
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
printk("cache block group %Lu\n", block_group->key.objectid);
	key.objectid = block_group->key.objectid;
	key.flags = 0;
	key.offset = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		return ret;
	if (ret && path->slots[0] > 0)
		path->slots[0]--;
77 78
	limit = block_group->key.objectid + block_group->key.offset;
	reada_extent_leaves(root, path, limit);
79 80 81 82
	while(1) {
		leaf = btrfs_buffer_leaf(path->nodes[0]);
		slot = path->slots[0];
		if (slot >= btrfs_header_nritems(&leaf->header)) {
83
			reada_extent_leaves(root, path, limit);
84
			ret = btrfs_next_leaf(root, path);
85
			if (ret == 0) {
86
				continue;
87
			} else {
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
				if (found) {
					hole_size = block_group->key.objectid +
						block_group->key.offset - last;
				} else {
					last = block_group->key.objectid;
					hole_size = block_group->key.offset;
				}
				for (i = 0; i < hole_size; i++) {
					set_radix_bit(extent_radix,
						      last + i);
				}
				break;
			}
		}
		btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
		if (key.objectid >= block_group->key.objectid +
		    block_group->key.offset) {
			if (found) {
				hole_size = block_group->key.objectid +
					block_group->key.offset - last;
			} else {
				last = block_group->key.objectid;
				hole_size = block_group->key.offset;
			}
			for (i = 0; i < hole_size; i++) {
				set_radix_bit(extent_radix, last + i);
			}
			break;
		}
		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			if (!found) {
				last = key.objectid + key.offset;
				found = 1;
			} else {
				hole_size = key.objectid - last;
				for (i = 0; i < hole_size; i++) {
					set_radix_bit(extent_radix, last + i);
				}
				last = key.objectid + key.offset;
			}
		}
		path->slots[0]++;
	}

	block_group->cached = 1;
	btrfs_free_path(path);
	return 0;
}

C
Chris Mason 已提交
137 138 139 140 141 142 143 144 145 146 147
static struct btrfs_block_group_cache *lookup_block_group(struct
							  btrfs_fs_info *info,
							  u64 blocknr)
{
	struct btrfs_block_group_cache *block_group;
	int ret;

	ret = radix_tree_gang_lookup(&info->block_group_radix,
				     (void **)&block_group,
				     blocknr, 1);
	if (ret) {
C
Chris Mason 已提交
148
		if (block_group->key.objectid <= blocknr && blocknr <=
C
Chris Mason 已提交
149 150 151 152 153 154 155
		    block_group->key.objectid + block_group->key.offset)
			return block_group;
	}
	ret = radix_tree_gang_lookup(&info->block_group_data_radix,
				     (void **)&block_group,
				     blocknr, 1);
	if (ret) {
C
Chris Mason 已提交
156
		if (block_group->key.objectid <= blocknr && blocknr <=
C
Chris Mason 已提交
157 158 159
		    block_group->key.objectid + block_group->key.offset)
			return block_group;
	}
C
Chris Mason 已提交
160 161 162 163 164 165
	WARN_ON(1);
	printk("lookup_block_group fails for blocknr %Lu\n", blocknr);
	printk("last ret was %d\n", ret);
	if (ret) {
		printk("last block group was %Lu %Lu\n", block_group->key.objectid, block_group->key.offset);
	}
C
Chris Mason 已提交
166 167 168
	return NULL;
}

169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
static u64 leaf_range(struct btrfs_root *root)
{
	u64 size = BTRFS_LEAF_DATA_SIZE(root);
	size = size / (sizeof(struct btrfs_extent_item) +
		       sizeof(struct btrfs_item));
	return size;
}

static u64 find_search_start(struct btrfs_root *root,
			     struct btrfs_block_group_cache **cache_ret,
			     u64 search_start, int num)
{
	unsigned long gang[8];
	int ret;
	struct btrfs_block_group_cache *cache = *cache_ret;
	u64 last = max(search_start, cache->key.objectid);

	if (cache->data)
		goto out;
	if (num > 1) {
		last = max(last, cache->last_prealloc);
	}
again:
	cache_block_group(root, cache);
	while(1) {
		ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
					   gang, last, ARRAY_SIZE(gang));
		if (!ret)
			goto out;
		last = gang[ret-1] + 1;
		if (num > 1) {
			if (ret != ARRAY_SIZE(gang)) {
				goto new_group;
			}
			if (gang[ret-1] - gang[0] > leaf_range(root)) {
				continue;
			}
		}
		if (gang[0] >= cache->key.objectid + cache->key.offset) {
			goto new_group;
		}
		return gang[0];
	}
out:
	return max(cache->last_alloc, search_start);

new_group:
	cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1);
	if (!cache) {
		return max((*cache_ret)->last_alloc, search_start);
	}
	cache = btrfs_find_block_group(root, cache,
221
				       last + cache->key.offset - 1, 0, 0);
222 223 224 225
	*cache_ret = cache;
	goto again;
}

226 227
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
C
Chris Mason 已提交
228
						 *hint, u64 search_start,
229
						 int data, int owner)
C
Chris Mason 已提交
230 231
{
	struct btrfs_block_group_cache *cache[8];
232
	struct btrfs_block_group_cache *found_group = NULL;
C
Chris Mason 已提交
233
	struct btrfs_fs_info *info = root->fs_info;
C
Chris Mason 已提交
234
	struct radix_tree_root *radix;
C
Chris Mason 已提交
235
	u64 used;
236 237
	u64 last = 0;
	u64 hint_last;
C
Chris Mason 已提交
238 239
	int i;
	int ret;
240
	int full_search = 0;
241 242 243 244
	int factor = 8;

	if (!owner)
		factor = 5;
C
Chris Mason 已提交
245 246 247 248 249 250 251 252 253 254 255 256

	if (data)
		radix = &info->block_group_data_radix;
	else
		radix = &info->block_group_radix;

	if (search_start) {
		struct btrfs_block_group_cache *shint;
		shint = lookup_block_group(info, search_start);
		if (shint->data == data) {
			used = btrfs_block_group_used(&shint->item);
			if (used + shint->pinned <
257
			    (shint->key.offset * factor) / 10) {
C
Chris Mason 已提交
258 259 260 261 262
				return shint;
			}
		}
	}
	if (hint && hint->data == data) {
263
		used = btrfs_block_group_used(&hint->item);
264
		if (used + hint->pinned < (hint->key.offset * factor) / 10) {
265 266
			return hint;
		}
C
Chris Mason 已提交
267 268 269 270 271 272
		if (used >= (hint->key.offset * 8) / 10) {
			radix_tree_tag_clear(radix,
					     hint->key.objectid +
					     hint->key.offset - 1,
					     BTRFS_BLOCK_GROUP_AVAIL);
		}
273
		last = hint->key.offset * 3;
C
Chris Mason 已提交
274
		if (hint->key.objectid >= last)
275 276
			last = max(search_start + hint->key.offset - 1,
				   hint->key.objectid - last);
C
Chris Mason 已提交
277 278
		else
			last = hint->key.objectid + hint->key.offset;
279 280
		hint_last = last;
	} else {
281 282 283 284 285 286
		if (hint)
			hint_last = max(hint->key.objectid, search_start);
		else
			hint_last = search_start;

		last = hint_last;
287
	}
C
Chris Mason 已提交
288
	while(1) {
C
Chris Mason 已提交
289
		ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
C
Chris Mason 已提交
290
						 last, ARRAY_SIZE(cache),
291
						 BTRFS_BLOCK_GROUP_AVAIL);
C
Chris Mason 已提交
292 293 294
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
295 296
			last = cache[i]->key.objectid +
				cache[i]->key.offset;
C
Chris Mason 已提交
297
			used = btrfs_block_group_used(&cache[i]->item);
C
Chris Mason 已提交
298
			if (used + cache[i]->pinned <
299
			    (cache[i]->key.offset * factor) / 10) {
300 301
				found_group = cache[i];
				goto found;
C
Chris Mason 已提交
302
			}
C
Chris Mason 已提交
303 304 305 306 307 308
			if (used >= (cache[i]->key.offset * 8) / 10) {
				radix_tree_tag_clear(radix,
						     cache[i]->key.objectid +
						     cache[i]->key.offset - 1,
						     BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
309
		}
310
		cond_resched();
C
Chris Mason 已提交
311
	}
312 313
	last = hint_last;
again:
C
Chris Mason 已提交
314
	while(1) {
C
Chris Mason 已提交
315 316
		ret = radix_tree_gang_lookup(radix, (void **)cache,
					     last, ARRAY_SIZE(cache));
C
Chris Mason 已提交
317 318 319
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
320 321
			last = cache[i]->key.objectid +
				cache[i]->key.offset;
C
Chris Mason 已提交
322
			used = btrfs_block_group_used(&cache[i]->item);
C
Chris Mason 已提交
323
			if (used + cache[i]->pinned < cache[i]->key.offset) {
324 325
				found_group = cache[i];
				goto found;
C
Chris Mason 已提交
326
			}
C
Chris Mason 已提交
327 328 329 330 331 332
			if (used >= cache[i]->key.offset) {
				radix_tree_tag_clear(radix,
						     cache[i]->key.objectid +
						     cache[i]->key.offset - 1,
						     BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
333
		}
334
		cond_resched();
C
Chris Mason 已提交
335
	}
336
	if (!full_search) {
337
printk("find block group doing full search data %d start %Lu\n", data, search_start);
C
Chris Mason 已提交
338
		last = search_start;
339 340 341 342
		full_search = 1;
		goto again;
	}
	if (!found_group) {
343
printk("find block group bailing to zero data %d\n", data);
C
Chris Mason 已提交
344
		ret = radix_tree_gang_lookup(radix,
345 346 347
					     (void **)&found_group, 0, 1);
		BUG_ON(ret != 1);
	}
C
Chris Mason 已提交
348
found:
349
	return found_group;
C
Chris Mason 已提交
350 351
}

C
Chris Mason 已提交
352 353 354
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
				u64 blocknr, u64 num_blocks)
C
Chris Mason 已提交
355
{
356
	struct btrfs_path *path;
C
Chris Mason 已提交
357
	int ret;
C
Chris Mason 已提交
358
	struct btrfs_key key;
C
Chris Mason 已提交
359 360
	struct btrfs_leaf *l;
	struct btrfs_extent_item *item;
C
Chris Mason 已提交
361
	struct btrfs_key ins;
C
Chris Mason 已提交
362
	u32 refs;
C
Chris Mason 已提交
363

364
	find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1,
C
Chris Mason 已提交
365
			 &ins, 0);
366 367 368
	path = btrfs_alloc_path();
	BUG_ON(!path);
	btrfs_init_path(path);
C
Chris Mason 已提交
369 370
	key.objectid = blocknr;
	key.flags = 0;
371
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
372
	key.offset = num_blocks;
373
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
374
				0, 1);
375 376
	if (ret != 0) {
printk("can't find block %Lu %Lu\n", blocknr, num_blocks);
377
		BUG();
378
	}
C
Chris Mason 已提交
379
	BUG_ON(ret != 0);
380 381
	l = btrfs_buffer_leaf(path->nodes[0]);
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
C
Chris Mason 已提交
382 383
	refs = btrfs_extent_refs(item);
	btrfs_set_extent_refs(item, refs + 1);
384
	btrfs_mark_buffer_dirty(path->nodes[0]);
385

386 387
	btrfs_release_path(root->fs_info->extent_root, path);
	btrfs_free_path(path);
388
	finish_current_insert(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
389
	del_pending_extents(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
390 391 392
	return 0;
}

C
Chris Mason 已提交
393 394 395
static int lookup_extent_ref(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root, u64 blocknr,
			     u64 num_blocks, u32 *refs)
396
{
397
	struct btrfs_path *path;
398
	int ret;
C
Chris Mason 已提交
399
	struct btrfs_key key;
C
Chris Mason 已提交
400 401
	struct btrfs_leaf *l;
	struct btrfs_extent_item *item;
402 403 404

	path = btrfs_alloc_path();
	btrfs_init_path(path);
405
	key.objectid = blocknr;
406
	key.offset = num_blocks;
407 408
	key.flags = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
409
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
410
				0, 0);
411 412
	if (ret != 0)
		BUG();
413 414
	l = btrfs_buffer_leaf(path->nodes[0]);
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
C
Chris Mason 已提交
415
	*refs = btrfs_extent_refs(item);
416 417
	btrfs_release_path(root->fs_info->extent_root, path);
	btrfs_free_path(path);
418 419 420
	return 0;
}

C
Chris Mason 已提交
421 422 423
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root)
{
C
Chris Mason 已提交
424
	return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
C
Chris Mason 已提交
425 426
}

427
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
C
Chris Mason 已提交
428
		  struct buffer_head *buf)
C
Chris Mason 已提交
429 430
{
	u64 blocknr;
C
Chris Mason 已提交
431
	struct btrfs_node *buf_node;
432 433 434
	struct btrfs_leaf *buf_leaf;
	struct btrfs_disk_key *key;
	struct btrfs_file_extent_item *fi;
C
Chris Mason 已提交
435
	int i;
436 437
	int leaf;
	int ret;
438

439
	if (!root->ref_cows)
440
		return 0;
C
Chris Mason 已提交
441
	buf_node = btrfs_buffer_node(buf);
442 443
	leaf = btrfs_is_leaf(buf_node);
	buf_leaf = btrfs_buffer_leaf(buf);
C
Chris Mason 已提交
444
	for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
445
		if (leaf) {
C
Chris Mason 已提交
446
			u64 disk_blocknr;
447 448 449 450 451
			key = &buf_leaf->items[i].key;
			if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
				continue;
			fi = btrfs_item_ptr(buf_leaf, i,
					    struct btrfs_file_extent_item);
452 453 454
			if (btrfs_file_extent_type(fi) ==
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
C
Chris Mason 已提交
455 456 457 458
			disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
			if (disk_blocknr == 0)
				continue;
			ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
459 460 461 462
				    btrfs_file_extent_disk_num_blocks(fi));
			BUG_ON(ret);
		} else {
			blocknr = btrfs_node_blockptr(buf_node, i);
C
Chris Mason 已提交
463
			ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
464 465
			BUG_ON(ret);
		}
C
Chris Mason 已提交
466 467 468 469
	}
	return 0;
}

C
Chris Mason 已提交
470 471 472 473 474 475 476 477 478 479 480
static int write_one_cache_group(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_block_group_cache *cache)
{
	int ret;
	int pending_ret;
	struct btrfs_root *extent_root = root->fs_info->extent_root;
	struct btrfs_block_group_item *bi;
	struct btrfs_key ins;

C
Chris Mason 已提交
481
	find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins, 0);
C
Chris Mason 已提交
482 483 484 485 486 487 488 489 490 491 492 493 494 495
	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
	BUG_ON(ret);
	bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
			    struct btrfs_block_group_item);
	memcpy(bi, &cache->item, sizeof(*bi));
	mark_buffer_dirty(path->nodes[0]);
	btrfs_release_path(extent_root, path);

	finish_current_insert(trans, extent_root);
	pending_ret = del_pending_extents(trans, extent_root);
	if (ret)
		return ret;
	if (pending_ret)
		return pending_ret;
C
Chris Mason 已提交
496 497
	if (cache->data)
		cache->last_alloc = cache->first_free;
C
Chris Mason 已提交
498 499 500 501
	return 0;

}

C
Chris Mason 已提交
502 503 504
static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root,
				   struct radix_tree_root *radix)
C
Chris Mason 已提交
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
{
	struct btrfs_block_group_cache *cache[8];
	int ret;
	int err = 0;
	int werr = 0;
	int i;
	struct btrfs_path *path;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
		ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
						 0, ARRAY_SIZE(cache),
						 BTRFS_BLOCK_GROUP_DIRTY);
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			radix_tree_tag_clear(radix, cache[i]->key.objectid +
					     cache[i]->key.offset - 1,
					     BTRFS_BLOCK_GROUP_DIRTY);
			err = write_one_cache_group(trans, root,
						    path, cache[i]);
			if (err)
				werr = err;
		}
	}
	btrfs_free_path(path);
	return werr;
}

C
Chris Mason 已提交
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
{
	int ret;
	int ret2;
	ret = write_dirty_block_radix(trans, root,
				      &root->fs_info->block_group_radix);
	ret2 = write_dirty_block_radix(trans, root,
				      &root->fs_info->block_group_data_radix);
	if (ret)
		return ret;
	if (ret2)
		return ret2;
	return 0;
}

C
Chris Mason 已提交
553 554
static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
555
			      u64 blocknr, u64 num, int alloc, int mark_free)
C
Chris Mason 已提交
556 557 558 559 560 561
{
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
	u64 total = num;
	u64 old_val;
	u64 block_in_group;
562
	u64 i;
C
Chris Mason 已提交
563

C
Chris Mason 已提交
564
	while(total) {
C
Chris Mason 已提交
565 566
		cache = lookup_block_group(info, blocknr);
		if (!cache) {
C
Chris Mason 已提交
567 568
			printk(KERN_CRIT "blocknr %Lu lookup failed\n",
			       blocknr);
C
Chris Mason 已提交
569
			return -1;
C
Chris Mason 已提交
570
		}
C
Chris Mason 已提交
571 572
		block_in_group = blocknr - cache->key.objectid;
		WARN_ON(block_in_group > cache->key.offset);
C
Chris Mason 已提交
573
		radix_tree_tag_set(cache->radix, cache->key.objectid +
C
Chris Mason 已提交
574
				   cache->key.offset - 1,
C
Chris Mason 已提交
575 576 577 578
				   BTRFS_BLOCK_GROUP_DIRTY);

		old_val = btrfs_block_group_used(&cache->item);
		num = min(total, cache->key.offset - block_in_group);
C
Chris Mason 已提交
579
		if (alloc) {
C
Chris Mason 已提交
580
			old_val += num;
C
Chris Mason 已提交
581 582
			if (blocknr > cache->last_alloc)
				cache->last_alloc = blocknr;
583 584 585 586 587 588
			if (!cache->data) {
				for (i = 0; i < num; i++) {
					clear_radix_bit(&info->extent_map_radix,
						        blocknr + i);
				}
			}
C
Chris Mason 已提交
589
		} else {
C
Chris Mason 已提交
590
			old_val -= num;
C
Chris Mason 已提交
591 592
			if (blocknr < cache->first_free)
				cache->first_free = blocknr;
593 594 595 596 597 598
			if (!cache->data && mark_free) {
				for (i = 0; i < num; i++) {
					set_radix_bit(&info->extent_map_radix,
						      blocknr + i);
				}
			}
599 600
			if (old_val < (cache->key.offset * 5) / 10 &&
			    old_val + num >= (cache->key.offset * 5) / 10) {
601 602 603 604 605 606
printk("group %Lu now available\n", cache->key.objectid);
				radix_tree_tag_set(cache->radix,
						   cache->key.objectid +
						   cache->key.offset - 1,
						   BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
607
		}
C
Chris Mason 已提交
608
		btrfs_set_block_group_used(&cache->item, old_val);
609 610
		total -= num;
		blocknr += num;
C
Chris Mason 已提交
611 612 613 614
	}
	return 0;
}

C
Chris Mason 已提交
615 616 617 618 619 620 621
static int try_remove_page(struct address_space *mapping, unsigned long index)
{
	int ret;
	ret = invalidate_mapping_pages(mapping, index, index);
	return ret;
}

622 623
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
			       btrfs_root *root)
624
{
C
Chris Mason 已提交
625
	unsigned long gang[8];
C
Chris Mason 已提交
626
	struct inode *btree_inode = root->fs_info->btree_inode;
C
Chris Mason 已提交
627
	struct btrfs_block_group_cache *block_group;
628
	u64 first = 0;
629 630
	int ret;
	int i;
C
Chris Mason 已提交
631
	struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
632
	struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
633 634

	while(1) {
635
		ret = find_first_radix_bit(pinned_radix, gang, 0,
C
Chris Mason 已提交
636
					   ARRAY_SIZE(gang));
637 638
		if (!ret)
			break;
639
		if (!first)
C
Chris Mason 已提交
640
			first = gang[0];
641
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
642
			clear_radix_bit(pinned_radix, gang[i]);
C
Chris Mason 已提交
643 644 645 646 647 648 649
			block_group = lookup_block_group(root->fs_info,
							 gang[i]);
			if (block_group) {
				WARN_ON(block_group->pinned == 0);
				block_group->pinned--;
				if (gang[i] < block_group->last_alloc)
					block_group->last_alloc = gang[i];
650 651 652 653
				if (gang[i] < block_group->last_prealloc)
					block_group->last_prealloc = gang[i];
				if (!block_group->data)
					set_radix_bit(extent_radix, gang[i]);
C
Chris Mason 已提交
654
			}
C
Chris Mason 已提交
655 656 657
			try_remove_page(btree_inode->i_mapping,
					gang[i] << (PAGE_CACHE_SHIFT -
						    btree_inode->i_blkbits));
658
		}
659 660 661 662
	}
	return 0;
}

663 664
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root)
C
Chris Mason 已提交
665
{
C
Chris Mason 已提交
666
	struct btrfs_key ins;
C
Chris Mason 已提交
667
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
668 669
	int i;
	int ret;
670 671
	u64 super_blocks_used;
	struct btrfs_fs_info *info = extent_root->fs_info;
C
Chris Mason 已提交
672

C
Chris Mason 已提交
673
	btrfs_set_extent_refs(&extent_item, 1);
C
Chris Mason 已提交
674 675
	ins.offset = 1;
	ins.flags = 0;
676
	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
677
	btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
C
Chris Mason 已提交
678

679 680
	for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
		ins.objectid = extent_root->fs_info->extent_tree_insert[i];
681 682 683
		super_blocks_used = btrfs_super_blocks_used(info->disk_super);
		btrfs_set_super_blocks_used(info->disk_super,
					    super_blocks_used + 1);
684 685
		ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
					sizeof(extent_item));
C
Chris Mason 已提交
686 687
		BUG_ON(ret);
	}
688 689
	extent_root->fs_info->extent_tree_insert_nr = 0;
	extent_root->fs_info->extent_tree_prealloc_nr = 0;
C
Chris Mason 已提交
690 691 692
	return 0;
}

C
Chris Mason 已提交
693
static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
C
Chris Mason 已提交
694 695
{
	int err;
C
Chris Mason 已提交
696
	struct btrfs_header *header;
C
Chris Mason 已提交
697 698
	struct buffer_head *bh;

C
Chris Mason 已提交
699
	if (!pending) {
700
		bh = btrfs_find_tree_block(root, blocknr);
C
Chris Mason 已提交
701 702 703 704 705 706 707 708 709 710
		if (bh) {
			if (buffer_uptodate(bh)) {
				u64 transid =
				    root->fs_info->running_transaction->transid;
				header = btrfs_buffer_header(bh);
				if (btrfs_header_generation(header) ==
				    transid) {
					btrfs_block_release(root, bh);
					return 0;
				}
C
Chris Mason 已提交
711
			}
C
Chris Mason 已提交
712
			btrfs_block_release(root, bh);
C
Chris Mason 已提交
713 714
		}
		err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
C
Chris Mason 已提交
715 716 717 718 719 720
		if (!err) {
			struct btrfs_block_group_cache *cache;
			cache = lookup_block_group(root->fs_info, blocknr);
			if (cache)
				cache->pinned++;
		}
C
Chris Mason 已提交
721 722 723
	} else {
		err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
	}
C
Chris Mason 已提交
724
	BUG_ON(err < 0);
C
Chris Mason 已提交
725 726 727
	return 0;
}

728
/*
729
 * remove an extent from the root, returns 0 on success
730
 */
731
static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
732 733
			 *root, u64 blocknr, u64 num_blocks, int pin,
			 int mark_free)
734
{
735
	struct btrfs_path *path;
C
Chris Mason 已提交
736
	struct btrfs_key key;
737 738
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
739
	int ret;
C
Chris Mason 已提交
740
	struct btrfs_extent_item *ei;
C
Chris Mason 已提交
741
	struct btrfs_key ins;
C
Chris Mason 已提交
742
	u32 refs;
C
Chris Mason 已提交
743

744 745
	key.objectid = blocknr;
	key.flags = 0;
746
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
747 748
	key.offset = num_blocks;

C
Chris Mason 已提交
749
	find_free_extent(trans, root, 0, 0, (u64)-1, &ins, 0);
750 751 752
	path = btrfs_alloc_path();
	BUG_ON(!path);
	btrfs_init_path(path);
753

754
	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
755
	if (ret) {
756
		printk("failed to find %Lu\n", key.objectid);
C
Chris Mason 已提交
757
		btrfs_print_tree(extent_root, extent_root->node);
758
		printk("failed to find %Lu\n", key.objectid);
759 760
		BUG();
	}
761
	ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
C
Chris Mason 已提交
762
			    struct btrfs_extent_item);
763
	BUG_ON(ei->refs == 0);
C
Chris Mason 已提交
764 765
	refs = btrfs_extent_refs(ei) - 1;
	btrfs_set_extent_refs(ei, refs);
766
	btrfs_mark_buffer_dirty(path->nodes[0]);
C
Chris Mason 已提交
767
	if (refs == 0) {
768
		u64 super_blocks_used;
C
Chris Mason 已提交
769 770

		if (pin) {
C
Chris Mason 已提交
771
			ret = pin_down_block(root, blocknr, 0);
C
Chris Mason 已提交
772 773 774
			BUG_ON(ret);
		}

775 776 777
		super_blocks_used = btrfs_super_blocks_used(info->disk_super);
		btrfs_set_super_blocks_used(info->disk_super,
					    super_blocks_used - num_blocks);
778
		ret = btrfs_del_item(trans, extent_root, path);
779 780
		if (ret)
			BUG();
781 782
		ret = update_block_group(trans, root, blocknr, num_blocks, 0,
					 mark_free);
C
Chris Mason 已提交
783
		BUG_ON(ret);
784
	}
785
	btrfs_free_path(path);
786
	finish_current_insert(trans, extent_root);
787 788 789 790 791 792 793
	return ret;
}

/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
794 795
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root)
796 797
{
	int ret;
C
Chris Mason 已提交
798 799
	int wret;
	int err = 0;
C
Chris Mason 已提交
800
	unsigned long gang[4];
801
	int i;
C
Chris Mason 已提交
802 803
	struct radix_tree_root *pending_radix;
	struct radix_tree_root *pinned_radix;
C
Chris Mason 已提交
804
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
805 806 807

	pending_radix = &extent_root->fs_info->pending_del_radix;
	pinned_radix = &extent_root->fs_info->pinned_radix;
808 809

	while(1) {
810
		ret = find_first_radix_bit(pending_radix, gang, 0,
C
Chris Mason 已提交
811
					   ARRAY_SIZE(gang));
812 813 814
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
815
			wret = set_radix_bit(pinned_radix, gang[i]);
C
Chris Mason 已提交
816 817 818 819 820 821 822 823 824 825 826
			if (wret == 0) {
				cache = lookup_block_group(extent_root->fs_info,
							   gang[i]);
				if (cache)
					cache->pinned++;
			}
			if (wret < 0) {
				printk(KERN_CRIT "set_radix_bit, err %d\n",
				       wret);
				BUG_ON(wret < 0);
			}
C
Chris Mason 已提交
827 828
			wret = clear_radix_bit(pending_radix, gang[i]);
			BUG_ON(wret);
829
			wret = __free_extent(trans, extent_root,
830
					     gang[i], 1, 0, 0);
C
Chris Mason 已提交
831 832
			if (wret)
				err = wret;
833 834
		}
	}
C
Chris Mason 已提交
835
	return err;
836 837 838 839 840
}

/*
 * remove an extent from the root, returns 0 on success
 */
841 842
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, u64 blocknr, u64 num_blocks, int pin)
843
{
844
	struct btrfs_root *extent_root = root->fs_info->extent_root;
845 846
	int pending_ret;
	int ret;
847

848
	if (root == extent_root) {
C
Chris Mason 已提交
849
		pin_down_block(root, blocknr, 1);
850 851
		return 0;
	}
852
	ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
C
Chris Mason 已提交
853
	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
854 855 856 857 858 859 860
	return ret ? ret : pending_ret;
}

/*
 * walks the btree of allocated extents and find a hole of a given size.
 * The key ins is changed to record the hole:
 * ins->objectid == block start
861
 * ins->flags = BTRFS_EXTENT_ITEM_KEY
862 863 864
 * ins->offset == number of blocks
 * Any available blocks before search_start are skipped.
 */
865 866
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
			    *orig_root, u64 num_blocks, u64 search_start, u64
C
Chris Mason 已提交
867
			    search_end, struct btrfs_key *ins, int data)
868
{
869
	struct btrfs_path *path;
C
Chris Mason 已提交
870
	struct btrfs_key key;
871 872 873
	int ret;
	u64 hole_size = 0;
	int slot = 0;
C
Chris Mason 已提交
874
	u64 last_block = 0;
C
Chris Mason 已提交
875
	u64 test_block;
C
Chris Mason 已提交
876
	u64 orig_search_start = search_start;
877
	int start_found;
C
Chris Mason 已提交
878
	struct btrfs_leaf *l;
879
	struct btrfs_root * root = orig_root->fs_info->extent_root;
880
	struct btrfs_fs_info *info = root->fs_info;
881
	int total_needed = num_blocks;
882 883
	int total_found = 0;
	int fill_prealloc = 0;
C
Chris Mason 已提交
884
	int level;
C
Chris Mason 已提交
885
	struct btrfs_block_group_cache *block_group;
C
Chris Mason 已提交
886
	int full_scan = 0;
887
	u64 limit;
888

889 890 891 892
	path = btrfs_alloc_path();
	ins->flags = 0;
	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);

C
Chris Mason 已提交
893
	level = btrfs_header_level(btrfs_buffer_header(root->node));
894 895 896
	if (num_blocks == 0) {
		fill_prealloc = 1;
		num_blocks = 1;
897
		total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
898
	}
C
Chris Mason 已提交
899 900
	if (search_end == (u64)-1)
		search_end = btrfs_super_total_blocks(info->disk_super);
C
Chris Mason 已提交
901 902 903
	if (search_start) {
		block_group = lookup_block_group(info, search_start);
		block_group = btrfs_find_block_group(root, block_group,
904
						     search_start, data, 1);
C
Chris Mason 已提交
905 906 907
	} else {
		block_group = btrfs_find_block_group(root,
						     trans->block_group, 0,
908
						     data, 1);
C
Chris Mason 已提交
909 910 911
	}

check_failed:
C
Chris Mason 已提交
912
	if (!full_scan && block_group->data != data)
C
Chris Mason 已提交
913
		WARN_ON(1);
914 915 916 917 918 919 920

	if (!data)
		search_start = find_search_start(root, &block_group,
						 search_start, total_needed);
	else
		search_start = max(block_group->last_alloc, search_start);

921
	btrfs_init_path(path);
922 923 924
	ins->objectid = search_start;
	ins->offset = 0;
	start_found = 0;
925

926
	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
C
Chris Mason 已提交
927 928
	if (ret < 0)
		goto error;
C
Chris Mason 已提交
929

930
	if (path->slots[0] > 0) {
931
		path->slots[0]--;
932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952
	}

	l = btrfs_buffer_leaf(path->nodes[0]);
	btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
	/*
	 * a rare case, go back one key if we hit a block group item
	 * instead of an extent item
	 */
	if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
	    key.objectid + key.offset >= search_start) {
		ins->objectid = key.objectid;
		ins->offset = key.offset - 1;
		btrfs_release_path(root, path);
		ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
		if (ret < 0)
			goto error;

		if (path->slots[0] > 0) {
			path->slots[0]--;
		}
	}
953

954
	while (1) {
955 956
		l = btrfs_buffer_leaf(path->nodes[0]);
		slot = path->slots[0];
957
		if (slot >= btrfs_header_nritems(&l->header)) {
958 959 960 961
			if (fill_prealloc) {
				info->extent_tree_prealloc_nr = 0;
				total_found = 0;
			}
962 963 964 965 966 967
			if (start_found)
				limit = last_block +
					block_group->key.offset / 2;
			else
				limit = search_start +
					block_group->key.offset / 2;
968
			ret = btrfs_next_leaf(root, path);
969 970
			if (ret == 0)
				continue;
C
Chris Mason 已提交
971 972
			if (ret < 0)
				goto error;
973 974
			if (!start_found) {
				ins->objectid = search_start;
C
Chris Mason 已提交
975
				ins->offset = search_end - search_start;
976 977 978 979 980
				start_found = 1;
				goto check_pending;
			}
			ins->objectid = last_block > search_start ?
					last_block : search_start;
C
Chris Mason 已提交
981
			ins->offset = search_end - ins->objectid;
982 983
			goto check_pending;
		}
984

C
Chris Mason 已提交
985
		btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
986 987 988 989 990 991 992 993 994
		if (key.objectid >= search_start && key.objectid > last_block &&
		    start_found) {
			if (last_block < search_start)
				last_block = search_start;
			hole_size = key.objectid - last_block;
			if (hole_size >= num_blocks) {
				ins->objectid = last_block;
				ins->offset = hole_size;
				goto check_pending;
995
			}
996
		}
997 998 999 1000

		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
			goto next;

1001
		start_found = 1;
C
Chris Mason 已提交
1002
		last_block = key.objectid + key.offset;
C
Chris Mason 已提交
1003 1004 1005 1006 1007 1008 1009
		if (last_block >= block_group->key.objectid +
		    block_group->key.offset) {
			btrfs_release_path(root, path);
			search_start = block_group->key.objectid +
				block_group->key.offset * 2;
			goto new_group;
		}
C
Chris Mason 已提交
1010
next:
1011
		path->slots[0]++;
1012
		cond_resched();
1013 1014 1015 1016 1017 1018
	}
	// FIXME -ENOSPC
check_pending:
	/* we have to make sure we didn't find an extent that has already
	 * been allocated by the map tree or the original allocation
	 */
1019
	btrfs_release_path(root, path);
1020
	BUG_ON(ins->objectid < search_start);
1021

C
Chris Mason 已提交
1022
	if (ins->objectid + num_blocks >= search_end) {
C
Chris Mason 已提交
1023
		if (full_scan)
1024
			return -ENOSPC;
C
Chris Mason 已提交
1025 1026 1027
		search_start = orig_search_start;
		full_scan = 1;
		goto new_group;
1028
	}
C
Chris Mason 已提交
1029
	for (test_block = ins->objectid;
1030 1031
	     test_block < ins->objectid + num_blocks; test_block++) {
		if (test_radix_bit(&info->pinned_radix, test_block)) {
C
Chris Mason 已提交
1032
			search_start = test_block + 1;
C
Chris Mason 已提交
1033
			goto new_group;
1034 1035
		}
	}
1036 1037 1038 1039 1040 1041 1042
	if (!fill_prealloc && info->extent_tree_insert_nr) {
		u64 last =
		  info->extent_tree_insert[info->extent_tree_insert_nr - 1];
		if (ins->objectid + num_blocks >
		    info->extent_tree_insert[0] &&
		    ins->objectid <= last) {
			search_start = last + 1;
1043
			WARN_ON(!full_scan);
C
Chris Mason 已提交
1044
			goto new_group;
1045 1046 1047 1048 1049 1050 1051 1052
		}
	}
	if (!fill_prealloc && info->extent_tree_prealloc_nr) {
		u64 first =
		  info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
		if (ins->objectid + num_blocks > first &&
		    ins->objectid <= info->extent_tree_prealloc[0]) {
			search_start = info->extent_tree_prealloc[0] + 1;
1053
			WARN_ON(!full_scan);
C
Chris Mason 已提交
1054
			goto new_group;
1055 1056 1057 1058 1059
		}
	}
	if (fill_prealloc) {
		int nr;
		test_block = ins->objectid;
1060 1061 1062 1063 1064
		if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
		    leaf_range(root)) {
			total_found = 0;
			info->extent_tree_prealloc_nr = total_found;
		}
1065 1066 1067 1068
		while(test_block < ins->objectid + ins->offset &&
		      total_found < total_needed) {
			nr = total_needed - total_found - 1;
			BUG_ON(nr < 0);
C
Chris Mason 已提交
1069
			info->extent_tree_prealloc[nr] = test_block;
1070 1071 1072 1073 1074
			total_found++;
			test_block++;
		}
		if (total_found < total_needed) {
			search_start = test_block;
C
Chris Mason 已提交
1075
			goto new_group;
1076
		}
C
Chris Mason 已提交
1077 1078
		info->extent_tree_prealloc_nr = total_found;
	}
1079 1080 1081 1082 1083 1084 1085 1086 1087
	if (!data) {
		block_group = lookup_block_group(info, ins->objectid);
		if (block_group) {
			if (fill_prealloc)
				block_group->last_prealloc =
				     info->extent_tree_prealloc[total_needed-1];
			else
				trans->block_group = block_group;
		}
1088
	}
C
Chris Mason 已提交
1089
	ins->offset = num_blocks;
1090
	btrfs_free_path(path);
1091
	return 0;
C
Chris Mason 已提交
1092 1093

new_group:
C
Chris Mason 已提交
1094
	if (search_start + num_blocks >= search_end) {
C
Chris Mason 已提交
1095
		search_start = orig_search_start;
1096
printk("doing full scan!\n");
C
Chris Mason 已提交
1097 1098 1099 1100 1101
		full_scan = 1;
	}
	block_group = lookup_block_group(info, search_start);
	if (!full_scan)
		block_group = btrfs_find_block_group(root, block_group,
1102 1103
						     search_start, data, 0);
	cond_resched();
C
Chris Mason 已提交
1104 1105
	goto check_failed;

C
Chris Mason 已提交
1106
error:
1107 1108
	btrfs_release_path(root, path);
	btrfs_free_path(path);
C
Chris Mason 已提交
1109
	return ret;
1110 1111 1112 1113 1114 1115 1116 1117
}
/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns 0 if everything worked, non-zero otherwise.
 */
1118 1119
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, u64 owner,
1120
		       u64 num_blocks, u64 search_start,
C
Chris Mason 已提交
1121
		       u64 search_end, struct btrfs_key *ins, int data)
1122 1123 1124
{
	int ret;
	int pending_ret;
1125 1126 1127
	u64 super_blocks_used;
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
C
Chris Mason 已提交
1128
	struct btrfs_extent_item extent_item;
1129
	struct btrfs_key prealloc_key;
C
Chris Mason 已提交
1130

C
Chris Mason 已提交
1131
	btrfs_set_extent_refs(&extent_item, 1);
1132
	btrfs_set_extent_owner(&extent_item, owner);
1133

C
Chris Mason 已提交
1134
	if (root == extent_root) {
1135 1136
		int nr;
		BUG_ON(info->extent_tree_prealloc_nr == 0);
C
Chris Mason 已提交
1137 1138
		BUG_ON(num_blocks != 1);
		ins->offset = 1;
1139 1140 1141 1142 1143
		info->extent_tree_prealloc_nr--;
		nr = info->extent_tree_prealloc_nr;
		ins->objectid = info->extent_tree_prealloc[nr];
		info->extent_tree_insert[info->extent_tree_insert_nr++] =
			ins->objectid;
C
Chris Mason 已提交
1144
		ret = update_block_group(trans, root,
1145
					 ins->objectid, ins->offset, 1, 0);
C
Chris Mason 已提交
1146
		BUG_ON(ret);
1147 1148
		return 0;
	}
1149 1150 1151 1152 1153 1154 1155

	/*
	 * if we're doing a data allocation, preallocate room in the
	 * extent tree first.  This way the extent tree blocks end up
	 * in the correct block group.
	 */
	if (data) {
1156
		ret = find_free_extent(trans, root, 0, 0,
1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
				       search_end, &prealloc_key, 0);
		if (ret) {
			return ret;
		}
		if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
			int nr = info->extent_tree_prealloc_nr;
			search_end = info->extent_tree_prealloc[nr - 1] - 1;
		} else {
			search_start = info->extent_tree_prealloc[0] + 1;
		}
	}
1168
	/* do the real allocation */
1169
	ret = find_free_extent(trans, root, num_blocks, search_start,
C
Chris Mason 已提交
1170
			       search_end, ins, data);
1171
	if (ret) {
C
Chris Mason 已提交
1172
		return ret;
1173
	}
1174

1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188
	/*
	 * if we're doing a metadata allocation, preallocate space in the
	 * extent tree second.  This way, we don't create a tiny hole
	 * in the allocation map between any unused preallocation blocks
	 * and the metadata block we're actually allocating.  On disk,
	 * it'll go:
	 * [block we've allocated], [used prealloc 1], [ unused prealloc ]
	 * The unused prealloc will get reused the next time around.
	 */
	if (!data) {
		if (ins->objectid + ins->offset >= search_end)
			search_end = ins->objectid - 1;
		else
			search_start = ins->objectid + ins->offset;
C
Chris Mason 已提交
1189

1190 1191 1192 1193 1194 1195
		ret = find_free_extent(trans, root, 0, search_start,
				       search_end, &prealloc_key, 0);
		if (ret) {
			return ret;
		}
	}
1196

1197 1198 1199
	super_blocks_used = btrfs_super_blocks_used(info->disk_super);
	btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
				    num_blocks);
1200 1201
	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
				sizeof(extent_item));
C
Chris Mason 已提交
1202

1203
	finish_current_insert(trans, extent_root);
C
Chris Mason 已提交
1204
	pending_ret = del_pending_extents(trans, extent_root);
1205
	if (ret) {
C
Chris Mason 已提交
1206
		return ret;
1207 1208
	}
	if (pending_ret) {
C
Chris Mason 已提交
1209
		return pending_ret;
1210 1211
	}
	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
C
Chris Mason 已提交
1212
	return 0;
1213 1214 1215 1216 1217 1218
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
C
Chris Mason 已提交
1219
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1220
					   struct btrfs_root *root, u64 hint)
1221
{
C
Chris Mason 已提交
1222
	struct btrfs_key ins;
1223
	int ret;
C
Chris Mason 已提交
1224
	struct buffer_head *buf;
1225

1226
	ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1227
				 1, hint, (unsigned long)-1, &ins, 0);
1228 1229 1230 1231
	if (ret) {
		BUG();
		return NULL;
	}
C
Chris Mason 已提交
1232
	BUG_ON(ret);
1233
	buf = btrfs_find_create_tree_block(root, ins.objectid);
1234
	set_buffer_uptodate(buf);
C
Chris Mason 已提交
1235
	set_buffer_checked(buf);
1236
	set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1237 1238
	return buf;
}
1239

1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253
static int drop_leaf_ref(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root, struct buffer_head *cur)
{
	struct btrfs_disk_key *key;
	struct btrfs_leaf *leaf;
	struct btrfs_file_extent_item *fi;
	int i;
	int nritems;
	int ret;

	BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
	leaf = btrfs_buffer_leaf(cur);
	nritems = btrfs_header_nritems(&leaf->header);
	for (i = 0; i < nritems; i++) {
C
Chris Mason 已提交
1254
		u64 disk_blocknr;
1255 1256 1257 1258
		key = &leaf->items[i].key;
		if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
			continue;
		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1259 1260
		if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
			continue;
1261 1262 1263 1264
		/*
		 * FIXME make sure to insert a trans record that
		 * repeats the snapshot del on crash
		 */
C
Chris Mason 已提交
1265 1266 1267 1268
		disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
		if (disk_blocknr == 0)
			continue;
		ret = btrfs_free_extent(trans, root, disk_blocknr,
1269 1270 1271 1272 1273 1274 1275
					btrfs_file_extent_disk_num_blocks(fi),
					0);
		BUG_ON(ret);
	}
	return 0;
}

C
Chris Mason 已提交
1276 1277 1278 1279
/*
 * helper function for drop_snapshot, this walks down the tree dropping ref
 * counts as it goes.
 */
1280 1281
static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			  *root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1282
{
C
Chris Mason 已提交
1283 1284
	struct buffer_head *next;
	struct buffer_head *cur;
C
Chris Mason 已提交
1285 1286 1287 1288
	u64 blocknr;
	int ret;
	u32 refs;

1289 1290
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1291
	ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1292
			       1, &refs);
C
Chris Mason 已提交
1293 1294 1295
	BUG_ON(ret);
	if (refs > 1)
		goto out;
C
Chris Mason 已提交
1296 1297 1298
	/*
	 * walk down to the last node level and free all the leaves
	 */
1299
	while(*level >= 0) {
1300 1301
		WARN_ON(*level < 0);
		WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1302
		cur = path->nodes[*level];
C
Chris Mason 已提交
1303 1304
		if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
			WARN_ON(1);
1305
		if (path->slots[*level] >=
C
Chris Mason 已提交
1306
		    btrfs_header_nritems(btrfs_buffer_header(cur)))
C
Chris Mason 已提交
1307
			break;
1308 1309 1310 1311 1312
		if (*level == 0) {
			ret = drop_leaf_ref(trans, root, cur);
			BUG_ON(ret);
			break;
		}
C
Chris Mason 已提交
1313 1314
		blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
					      path->slots[*level]);
C
Chris Mason 已提交
1315
		ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1316 1317
		BUG_ON(ret);
		if (refs != 1) {
C
Chris Mason 已提交
1318
			path->slots[*level]++;
1319
			ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
C
Chris Mason 已提交
1320 1321 1322 1323
			BUG_ON(ret);
			continue;
		}
		next = read_tree_block(root, blocknr);
1324
		WARN_ON(*level <= 0);
C
Chris Mason 已提交
1325
		if (path->nodes[*level-1])
C
Chris Mason 已提交
1326
			btrfs_block_release(root, path->nodes[*level-1]);
C
Chris Mason 已提交
1327
		path->nodes[*level-1] = next;
C
Chris Mason 已提交
1328
		*level = btrfs_header_level(btrfs_buffer_header(next));
C
Chris Mason 已提交
1329 1330 1331
		path->slots[*level] = 0;
	}
out:
1332 1333
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1334
	ret = btrfs_free_extent(trans, root,
1335
				bh_blocknr(path->nodes[*level]), 1, 1);
C
Chris Mason 已提交
1336
	btrfs_block_release(root, path->nodes[*level]);
C
Chris Mason 已提交
1337 1338 1339 1340 1341 1342
	path->nodes[*level] = NULL;
	*level += 1;
	BUG_ON(ret);
	return 0;
}

C
Chris Mason 已提交
1343 1344 1345 1346 1347
/*
 * helper for dropping snapshots.  This walks back up the tree in the path
 * to find the first node higher up where we haven't yet gone through
 * all the slots
 */
1348 1349
static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			*root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1350 1351 1352 1353
{
	int i;
	int slot;
	int ret;
C
Chris Mason 已提交
1354
	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
C
Chris Mason 已提交
1355
		slot = path->slots[i];
C
Chris Mason 已提交
1356 1357
		if (slot < btrfs_header_nritems(
		    btrfs_buffer_header(path->nodes[i])) - 1) {
C
Chris Mason 已提交
1358 1359 1360 1361
			path->slots[i]++;
			*level = i;
			return 0;
		} else {
1362
			ret = btrfs_free_extent(trans, root,
1363
						bh_blocknr(path->nodes[*level]),
1364
						1, 1);
1365
			BUG_ON(ret);
C
Chris Mason 已提交
1366
			btrfs_block_release(root, path->nodes[*level]);
C
Chris Mason 已提交
1367
			path->nodes[*level] = NULL;
C
Chris Mason 已提交
1368 1369 1370 1371 1372 1373
			*level = i + 1;
		}
	}
	return 1;
}

C
Chris Mason 已提交
1374 1375 1376 1377 1378
/*
 * drop the reference count on the tree rooted at 'snap'.  This traverses
 * the tree freeing any blocks that have a ref count of zero after being
 * decremented.
 */
1379
int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
C
Chris Mason 已提交
1380
			*root, struct buffer_head *snap)
C
Chris Mason 已提交
1381
{
1382
	int ret = 0;
C
Chris Mason 已提交
1383
	int wret;
C
Chris Mason 已提交
1384
	int level;
1385
	struct btrfs_path *path;
C
Chris Mason 已提交
1386 1387 1388
	int i;
	int orig_level;

1389 1390 1391
	path = btrfs_alloc_path();
	BUG_ON(!path);
	btrfs_init_path(path);
C
Chris Mason 已提交
1392

C
Chris Mason 已提交
1393
	level = btrfs_header_level(btrfs_buffer_header(snap));
C
Chris Mason 已提交
1394
	orig_level = level;
1395 1396
	path->nodes[level] = snap;
	path->slots[level] = 0;
C
Chris Mason 已提交
1397
	while(1) {
1398
		wret = walk_down_tree(trans, root, path, &level);
C
Chris Mason 已提交
1399
		if (wret > 0)
C
Chris Mason 已提交
1400
			break;
C
Chris Mason 已提交
1401 1402 1403
		if (wret < 0)
			ret = wret;

1404
		wret = walk_up_tree(trans, root, path, &level);
C
Chris Mason 已提交
1405
		if (wret > 0)
C
Chris Mason 已提交
1406
			break;
C
Chris Mason 已提交
1407 1408
		if (wret < 0)
			ret = wret;
C
Chris Mason 已提交
1409
		btrfs_btree_balance_dirty(root);
C
Chris Mason 已提交
1410
	}
C
Chris Mason 已提交
1411
	for (i = 0; i <= orig_level; i++) {
1412 1413
		if (path->nodes[i]) {
			btrfs_block_release(root, path->nodes[i]);
C
Chris Mason 已提交
1414
		}
C
Chris Mason 已提交
1415
	}
1416
	btrfs_free_path(path);
C
Chris Mason 已提交
1417
	return ret;
C
Chris Mason 已提交
1418
}
C
Chris Mason 已提交
1419

C
Chris Mason 已提交
1420
static int free_block_group_radix(struct radix_tree_root *radix)
C
Chris Mason 已提交
1421 1422 1423 1424 1425 1426
{
	int ret;
	struct btrfs_block_group_cache *cache[8];
	int i;

	while(1) {
C
Chris Mason 已提交
1427
		ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
C
Chris Mason 已提交
1428 1429 1430 1431
					     ARRAY_SIZE(cache));
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
1432
			radix_tree_delete(radix, cache[i]->key.objectid +
C
Chris Mason 已提交
1433 1434 1435 1436 1437 1438 1439
					  cache[i]->key.offset - 1);
			kfree(cache[i]);
		}
	}
	return 0;
}

C
Chris Mason 已提交
1440 1441 1442 1443
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
	int ret;
	int ret2;
1444 1445
	unsigned long gang[16];
	int i;
C
Chris Mason 已提交
1446 1447 1448 1449 1450 1451 1452

	ret = free_block_group_radix(&info->block_group_radix);
	ret2 = free_block_group_radix(&info->block_group_data_radix);
	if (ret)
		return ret;
	if (ret2)
		return ret2;
1453 1454 1455 1456 1457 1458 1459 1460 1461 1462

	while(1) {
		ret = find_first_radix_bit(&info->extent_map_radix,
					   gang, 0, ARRAY_SIZE(gang));
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			clear_radix_bit(&info->extent_map_radix, gang[i]);
		}
	}
C
Chris Mason 已提交
1463 1464 1465
	return 0;
}

C
Chris Mason 已提交
1466 1467 1468 1469 1470 1471 1472
int btrfs_read_block_groups(struct btrfs_root *root)
{
	struct btrfs_path *path;
	int ret;
	int err = 0;
	struct btrfs_block_group_item *bi;
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
1473 1474
	struct btrfs_fs_info *info = root->fs_info;
	struct radix_tree_root *radix;
C
Chris Mason 已提交
1475 1476 1477 1478
	struct btrfs_key key;
	struct btrfs_key found_key;
	struct btrfs_leaf *leaf;
	u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize;
1479
	u64 used;
C
Chris Mason 已提交
1480
	u64 nr = 0;
C
Chris Mason 已提交
1481

C
Chris Mason 已提交
1482
	root = info->extent_root;
C
Chris Mason 已提交
1483 1484 1485 1486 1487 1488 1489 1490 1491 1492
	key.objectid = 0;
	key.offset = group_size_blocks;
	key.flags = 0;
	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
C
Chris Mason 已提交
1493
		ret = btrfs_search_slot(NULL, info->extent_root,
C
Chris Mason 已提交
1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506
					&key, path, 0, 0);
		if (ret != 0) {
			err = ret;
			break;
		}
		leaf = btrfs_buffer_leaf(path->nodes[0]);
		btrfs_disk_key_to_cpu(&found_key,
				      &leaf->items[path->slots[0]].key);
		cache = kmalloc(sizeof(*cache), GFP_NOFS);
		if (!cache) {
			err = -1;
			break;
		}
C
Chris Mason 已提交
1507

1508
		if (nr % 3)
C
Chris Mason 已提交
1509 1510 1511 1512
			radix = &info->block_group_data_radix;
		else
			radix = &info->block_group_radix;

C
Chris Mason 已提交
1513 1514 1515 1516
		bi = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_block_group_item);
		memcpy(&cache->item, bi, sizeof(*bi));
		memcpy(&cache->key, &found_key, sizeof(found_key));
1517 1518
		cache->last_alloc = cache->key.objectid;
		cache->first_free = cache->key.objectid;
1519
		cache->last_prealloc = cache->key.objectid;
C
Chris Mason 已提交
1520
		cache->pinned = 0;
1521 1522 1523 1524 1525 1526
		cache->cached = 0;

		if (nr % 3)
			cache->data = 1;
		else
			cache->data = 0;
C
Chris Mason 已提交
1527 1528
		cache->radix = radix;

C
Chris Mason 已提交
1529 1530
		key.objectid = found_key.objectid + found_key.offset;
		btrfs_release_path(root, path);
C
Chris Mason 已提交
1531
		ret = radix_tree_insert(radix, found_key.objectid +
C
Chris Mason 已提交
1532 1533 1534
					found_key.offset - 1,
					(void *)cache);
		BUG_ON(ret);
1535
		used = btrfs_block_group_used(bi);
C
Chris Mason 已提交
1536 1537
		if (used < (key.offset * 8) / 10) {
			radix_tree_tag_set(radix, found_key.objectid +
1538 1539 1540
					   found_key.offset - 1,
					   BTRFS_BLOCK_GROUP_AVAIL);
		}
C
Chris Mason 已提交
1541
		if (key.objectid >=
C
Chris Mason 已提交
1542
		    btrfs_super_total_blocks(info->disk_super))
C
Chris Mason 已提交
1543
			break;
C
Chris Mason 已提交
1544
		nr++;
C
Chris Mason 已提交
1545 1546 1547 1548 1549
	}

	btrfs_free_path(path);
	return 0;
}