extent-tree.c 40.9 KB
Newer Older
1
#include <linux/module.h>
2 3 4
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
5
#include "transaction.h"
6

7
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
8 9 10
			    *orig_root, u64 num_blocks, u64 search_start,
			    u64 search_end, u64 hint_block,
			    struct btrfs_key *ins, int data);
11 12
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root);
C
Chris Mason 已提交
13 14
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root);
15

16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
static void reada_extent_leaves(struct btrfs_root *root,
				struct btrfs_path *path, u64 limit)
{
	struct btrfs_node *node;
	int i;
	int nritems;
	u64 item_objectid;
	u64 blocknr;
	int slot;
	int ret;

	if (!path->nodes[1])
		return;
	node = btrfs_buffer_node(path->nodes[1]);
	slot = path->slots[1] + 1;
	nritems = btrfs_header_nritems(&node->header);
	for (i = slot; i < nritems && i < slot + 8; i++) {
		item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
		if (item_objectid > limit)
			break;
		blocknr = btrfs_node_blockptr(node, i);
		ret = readahead_tree_block(root, blocknr);
		if (ret)
			break;
	}
}

43 44 45 46 47 48 49 50 51 52 53 54
static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
	struct btrfs_path *path;
	int ret;
	struct btrfs_key key;
	struct btrfs_leaf *leaf;
	struct radix_tree_root *extent_radix;
	int slot;
	u64 i;
	u64 last = 0;
	u64 hole_size;
55
	u64 limit;
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
	int found = 0;

	root = root->fs_info->extent_root;
	extent_radix = &root->fs_info->extent_map_radix;

	if (block_group->cached)
		return 0;
	if (block_group->data)
		return 0;
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
printk("cache block group %Lu\n", block_group->key.objectid);
	key.objectid = block_group->key.objectid;
	key.flags = 0;
	key.offset = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		return ret;
	if (ret && path->slots[0] > 0)
		path->slots[0]--;
78 79
	limit = block_group->key.objectid + block_group->key.offset;
	reada_extent_leaves(root, path, limit);
80 81 82 83
	while(1) {
		leaf = btrfs_buffer_leaf(path->nodes[0]);
		slot = path->slots[0];
		if (slot >= btrfs_header_nritems(&leaf->header)) {
84
			reada_extent_leaves(root, path, limit);
85
			ret = btrfs_next_leaf(root, path);
86
			if (ret == 0) {
87
				continue;
88
			} else {
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
				if (found) {
					hole_size = block_group->key.objectid +
						block_group->key.offset - last;
				} else {
					last = block_group->key.objectid;
					hole_size = block_group->key.offset;
				}
				for (i = 0; i < hole_size; i++) {
					set_radix_bit(extent_radix,
						      last + i);
				}
				break;
			}
		}
		btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
		if (key.objectid >= block_group->key.objectid +
		    block_group->key.offset) {
			if (found) {
				hole_size = block_group->key.objectid +
					block_group->key.offset - last;
			} else {
				last = block_group->key.objectid;
				hole_size = block_group->key.offset;
			}
			for (i = 0; i < hole_size; i++) {
				set_radix_bit(extent_radix, last + i);
			}
			break;
		}
		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			if (!found) {
				last = key.objectid + key.offset;
				found = 1;
			} else {
				hole_size = key.objectid - last;
				for (i = 0; i < hole_size; i++) {
					set_radix_bit(extent_radix, last + i);
				}
				last = key.objectid + key.offset;
			}
		}
		path->slots[0]++;
	}

	block_group->cached = 1;
	btrfs_free_path(path);
	return 0;
}

138 139 140
struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
							 btrfs_fs_info *info,
							 u64 blocknr)
C
Chris Mason 已提交
141 142 143 144 145 146 147 148
{
	struct btrfs_block_group_cache *block_group;
	int ret;

	ret = radix_tree_gang_lookup(&info->block_group_radix,
				     (void **)&block_group,
				     blocknr, 1);
	if (ret) {
C
Chris Mason 已提交
149
		if (block_group->key.objectid <= blocknr && blocknr <=
C
Chris Mason 已提交
150 151 152 153 154 155 156
		    block_group->key.objectid + block_group->key.offset)
			return block_group;
	}
	ret = radix_tree_gang_lookup(&info->block_group_data_radix,
				     (void **)&block_group,
				     blocknr, 1);
	if (ret) {
C
Chris Mason 已提交
157
		if (block_group->key.objectid <= blocknr && blocknr <=
C
Chris Mason 已提交
158 159 160 161 162 163
		    block_group->key.objectid + block_group->key.offset)
			return block_group;
	}
	return NULL;
}

164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
static u64 leaf_range(struct btrfs_root *root)
{
	u64 size = BTRFS_LEAF_DATA_SIZE(root);
	size = size / (sizeof(struct btrfs_extent_item) +
		       sizeof(struct btrfs_item));
	return size;
}

static u64 find_search_start(struct btrfs_root *root,
			     struct btrfs_block_group_cache **cache_ret,
			     u64 search_start, int num)
{
	unsigned long gang[8];
	int ret;
	struct btrfs_block_group_cache *cache = *cache_ret;
	u64 last = max(search_start, cache->key.objectid);

	if (cache->data)
		goto out;
	if (num > 1) {
		last = max(last, cache->last_prealloc);
	}
again:
	cache_block_group(root, cache);
	while(1) {
		ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
					   gang, last, ARRAY_SIZE(gang));
		if (!ret)
			goto out;
		last = gang[ret-1] + 1;
		if (num > 1) {
			if (ret != ARRAY_SIZE(gang)) {
				goto new_group;
			}
			if (gang[ret-1] - gang[0] > leaf_range(root)) {
				continue;
			}
		}
		if (gang[0] >= cache->key.objectid + cache->key.offset) {
			goto new_group;
		}
		return gang[0];
	}
out:
	return max(cache->last_alloc, search_start);

new_group:
211 212
	cache = btrfs_lookup_block_group(root->fs_info,
					 last + cache->key.offset - 1);
213 214 215 216
	if (!cache) {
		return max((*cache_ret)->last_alloc, search_start);
	}
	cache = btrfs_find_block_group(root, cache,
217
				       last + cache->key.offset - 1, 0, 0);
218 219 220 221
	*cache_ret = cache;
	goto again;
}

222 223
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
C
Chris Mason 已提交
224
						 *hint, u64 search_start,
225
						 int data, int owner)
C
Chris Mason 已提交
226 227
{
	struct btrfs_block_group_cache *cache[8];
228
	struct btrfs_block_group_cache *found_group = NULL;
C
Chris Mason 已提交
229
	struct btrfs_fs_info *info = root->fs_info;
C
Chris Mason 已提交
230
	struct radix_tree_root *radix;
C
Chris Mason 已提交
231
	struct radix_tree_root *swap_radix;
C
Chris Mason 已提交
232
	u64 used;
233 234
	u64 last = 0;
	u64 hint_last;
C
Chris Mason 已提交
235 236
	int i;
	int ret;
237
	int full_search = 0;
238
	int factor = 8;
C
Chris Mason 已提交
239
	int data_swap = 0;
240 241 242

	if (!owner)
		factor = 5;
C
Chris Mason 已提交
243

C
Chris Mason 已提交
244
	if (data) {
C
Chris Mason 已提交
245
		radix = &info->block_group_data_radix;
C
Chris Mason 已提交
246 247
		swap_radix = &info->block_group_radix;
	} else {
C
Chris Mason 已提交
248
		radix = &info->block_group_radix;
C
Chris Mason 已提交
249 250
		swap_radix = &info->block_group_data_radix;
	}
C
Chris Mason 已提交
251 252 253

	if (search_start) {
		struct btrfs_block_group_cache *shint;
254
		shint = btrfs_lookup_block_group(info, search_start);
C
Chris Mason 已提交
255 256 257
		if (shint->data == data) {
			used = btrfs_block_group_used(&shint->item);
			if (used + shint->pinned <
258
			    (shint->key.offset * factor) / 10) {
C
Chris Mason 已提交
259 260 261 262 263
				return shint;
			}
		}
	}
	if (hint && hint->data == data) {
264
		used = btrfs_block_group_used(&hint->item);
265
		if (used + hint->pinned < (hint->key.offset * factor) / 10) {
266 267
			return hint;
		}
C
Chris Mason 已提交
268 269 270 271 272 273
		if (used >= (hint->key.offset * 8) / 10) {
			radix_tree_tag_clear(radix,
					     hint->key.objectid +
					     hint->key.offset - 1,
					     BTRFS_BLOCK_GROUP_AVAIL);
		}
274
		last = hint->key.offset * 3;
C
Chris Mason 已提交
275
		if (hint->key.objectid >= last)
276 277
			last = max(search_start + hint->key.offset - 1,
				   hint->key.objectid - last);
C
Chris Mason 已提交
278 279
		else
			last = hint->key.objectid + hint->key.offset;
280 281
		hint_last = last;
	} else {
282 283 284 285 286 287
		if (hint)
			hint_last = max(hint->key.objectid, search_start);
		else
			hint_last = search_start;

		last = hint_last;
288
	}
C
Chris Mason 已提交
289
	while(1) {
C
Chris Mason 已提交
290
		ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
C
Chris Mason 已提交
291
						 last, ARRAY_SIZE(cache),
292
						 BTRFS_BLOCK_GROUP_AVAIL);
C
Chris Mason 已提交
293 294 295
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
296 297
			last = cache[i]->key.objectid +
				cache[i]->key.offset;
C
Chris Mason 已提交
298
			used = btrfs_block_group_used(&cache[i]->item);
C
Chris Mason 已提交
299
			if (used + cache[i]->pinned <
300
			    (cache[i]->key.offset * factor) / 10) {
301 302
				found_group = cache[i];
				goto found;
C
Chris Mason 已提交
303
			}
C
Chris Mason 已提交
304 305 306 307 308 309
			if (used >= (cache[i]->key.offset * 8) / 10) {
				radix_tree_tag_clear(radix,
						     cache[i]->key.objectid +
						     cache[i]->key.offset - 1,
						     BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
310
		}
311
		cond_resched();
C
Chris Mason 已提交
312
	}
313 314
	last = hint_last;
again:
C
Chris Mason 已提交
315
	while(1) {
C
Chris Mason 已提交
316 317
		ret = radix_tree_gang_lookup(radix, (void **)cache,
					     last, ARRAY_SIZE(cache));
C
Chris Mason 已提交
318 319 320
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
321 322
			last = cache[i]->key.objectid +
				cache[i]->key.offset;
C
Chris Mason 已提交
323
			used = btrfs_block_group_used(&cache[i]->item);
C
Chris Mason 已提交
324
			if (used + cache[i]->pinned < cache[i]->key.offset) {
325 326
				found_group = cache[i];
				goto found;
C
Chris Mason 已提交
327
			}
C
Chris Mason 已提交
328 329 330 331 332 333
			if (used >= cache[i]->key.offset) {
				radix_tree_tag_clear(radix,
						     cache[i]->key.objectid +
						     cache[i]->key.offset - 1,
						     BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
334
		}
335
		cond_resched();
C
Chris Mason 已提交
336
	}
337
	if (!full_search) {
C
Chris Mason 已提交
338
		last = search_start;
339 340 341
		full_search = 1;
		goto again;
	}
C
Chris Mason 已提交
342 343 344 345 346 347 348 349
	if (!data_swap) {
		struct radix_tree_root *tmp = radix;
		data_swap = 1;
		radix = swap_radix;
		swap_radix = tmp;
		last = search_start;
		goto again;
	}
350
	if (!found_group) {
351
printk("find block group bailing to zero data %d\n", data);
C
Chris Mason 已提交
352
		ret = radix_tree_gang_lookup(radix,
353
					     (void **)&found_group, 0, 1);
C
Chris Mason 已提交
354 355 356 357 358
		if (ret == 0) {
			ret = radix_tree_gang_lookup(swap_radix,
						     (void **)&found_group,
						     0, 1);
		}
359 360
		BUG_ON(ret != 1);
	}
C
Chris Mason 已提交
361
found:
362
	return found_group;
C
Chris Mason 已提交
363 364
}

C
Chris Mason 已提交
365 366 367
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
				u64 blocknr, u64 num_blocks)
C
Chris Mason 已提交
368
{
369
	struct btrfs_path *path;
C
Chris Mason 已提交
370
	int ret;
C
Chris Mason 已提交
371
	struct btrfs_key key;
C
Chris Mason 已提交
372 373
	struct btrfs_leaf *l;
	struct btrfs_extent_item *item;
C
Chris Mason 已提交
374
	struct btrfs_key ins;
C
Chris Mason 已提交
375
	u32 refs;
C
Chris Mason 已提交
376

377
	find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0,
C
Chris Mason 已提交
378
			 &ins, 0);
379 380 381
	path = btrfs_alloc_path();
	BUG_ON(!path);
	btrfs_init_path(path);
C
Chris Mason 已提交
382 383
	key.objectid = blocknr;
	key.flags = 0;
384
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
385
	key.offset = num_blocks;
386
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
387
				0, 1);
388 389
	if (ret != 0) {
printk("can't find block %Lu %Lu\n", blocknr, num_blocks);
390
		BUG();
391
	}
C
Chris Mason 已提交
392
	BUG_ON(ret != 0);
393 394
	l = btrfs_buffer_leaf(path->nodes[0]);
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
C
Chris Mason 已提交
395 396
	refs = btrfs_extent_refs(item);
	btrfs_set_extent_refs(item, refs + 1);
397
	btrfs_mark_buffer_dirty(path->nodes[0]);
398

399 400
	btrfs_release_path(root->fs_info->extent_root, path);
	btrfs_free_path(path);
401
	finish_current_insert(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
402
	del_pending_extents(trans, root->fs_info->extent_root);
C
Chris Mason 已提交
403 404 405
	return 0;
}

C
Chris Mason 已提交
406 407 408
static int lookup_extent_ref(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root, u64 blocknr,
			     u64 num_blocks, u32 *refs)
409
{
410
	struct btrfs_path *path;
411
	int ret;
C
Chris Mason 已提交
412
	struct btrfs_key key;
C
Chris Mason 已提交
413 414
	struct btrfs_leaf *l;
	struct btrfs_extent_item *item;
415 416 417

	path = btrfs_alloc_path();
	btrfs_init_path(path);
418
	key.objectid = blocknr;
419
	key.offset = num_blocks;
420 421
	key.flags = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
422
	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
423
				0, 0);
424 425
	if (ret != 0)
		BUG();
426 427
	l = btrfs_buffer_leaf(path->nodes[0]);
	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
C
Chris Mason 已提交
428
	*refs = btrfs_extent_refs(item);
429 430
	btrfs_release_path(root->fs_info->extent_root, path);
	btrfs_free_path(path);
431 432 433
	return 0;
}

C
Chris Mason 已提交
434 435 436
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root)
{
C
Chris Mason 已提交
437
	return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
C
Chris Mason 已提交
438 439
}

440
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
C
Chris Mason 已提交
441
		  struct buffer_head *buf)
C
Chris Mason 已提交
442 443
{
	u64 blocknr;
C
Chris Mason 已提交
444
	struct btrfs_node *buf_node;
445 446 447
	struct btrfs_leaf *buf_leaf;
	struct btrfs_disk_key *key;
	struct btrfs_file_extent_item *fi;
C
Chris Mason 已提交
448
	int i;
449 450
	int leaf;
	int ret;
451

452
	if (!root->ref_cows)
453
		return 0;
C
Chris Mason 已提交
454
	buf_node = btrfs_buffer_node(buf);
455 456
	leaf = btrfs_is_leaf(buf_node);
	buf_leaf = btrfs_buffer_leaf(buf);
C
Chris Mason 已提交
457
	for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
458
		if (leaf) {
C
Chris Mason 已提交
459
			u64 disk_blocknr;
460 461 462 463 464
			key = &buf_leaf->items[i].key;
			if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
				continue;
			fi = btrfs_item_ptr(buf_leaf, i,
					    struct btrfs_file_extent_item);
465 466 467
			if (btrfs_file_extent_type(fi) ==
			    BTRFS_FILE_EXTENT_INLINE)
				continue;
C
Chris Mason 已提交
468 469 470 471
			disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
			if (disk_blocknr == 0)
				continue;
			ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
472 473 474 475
				    btrfs_file_extent_disk_num_blocks(fi));
			BUG_ON(ret);
		} else {
			blocknr = btrfs_node_blockptr(buf_node, i);
C
Chris Mason 已提交
476
			ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
477 478
			BUG_ON(ret);
		}
C
Chris Mason 已提交
479 480 481 482
	}
	return 0;
}

C
Chris Mason 已提交
483 484 485 486 487 488 489 490 491 492 493
static int write_one_cache_group(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path,
				 struct btrfs_block_group_cache *cache)
{
	int ret;
	int pending_ret;
	struct btrfs_root *extent_root = root->fs_info->extent_root;
	struct btrfs_block_group_item *bi;
	struct btrfs_key ins;

494
	find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0);
C
Chris Mason 已提交
495 496 497 498 499 500 501 502 503 504 505 506 507 508
	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
	BUG_ON(ret);
	bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
			    struct btrfs_block_group_item);
	memcpy(bi, &cache->item, sizeof(*bi));
	mark_buffer_dirty(path->nodes[0]);
	btrfs_release_path(extent_root, path);

	finish_current_insert(trans, extent_root);
	pending_ret = del_pending_extents(trans, extent_root);
	if (ret)
		return ret;
	if (pending_ret)
		return pending_ret;
C
Chris Mason 已提交
509 510
	if (cache->data)
		cache->last_alloc = cache->first_free;
C
Chris Mason 已提交
511 512 513 514
	return 0;

}

C
Chris Mason 已提交
515 516 517
static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root,
				   struct radix_tree_root *radix)
C
Chris Mason 已提交
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
{
	struct btrfs_block_group_cache *cache[8];
	int ret;
	int err = 0;
	int werr = 0;
	int i;
	struct btrfs_path *path;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
		ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
						 0, ARRAY_SIZE(cache),
						 BTRFS_BLOCK_GROUP_DIRTY);
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			radix_tree_tag_clear(radix, cache[i]->key.objectid +
					     cache[i]->key.offset - 1,
					     BTRFS_BLOCK_GROUP_DIRTY);
			err = write_one_cache_group(trans, root,
						    path, cache[i]);
			if (err)
				werr = err;
		}
	}
	btrfs_free_path(path);
	return werr;
}

C
Chris Mason 已提交
550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
{
	int ret;
	int ret2;
	ret = write_dirty_block_radix(trans, root,
				      &root->fs_info->block_group_radix);
	ret2 = write_dirty_block_radix(trans, root,
				      &root->fs_info->block_group_data_radix);
	if (ret)
		return ret;
	if (ret2)
		return ret2;
	return 0;
}

C
Chris Mason 已提交
566 567
static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
C
Chris Mason 已提交
568 569
			      u64 blocknr, u64 num, int alloc, int mark_free,
			      int data)
C
Chris Mason 已提交
570 571 572 573 574 575
{
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
	u64 total = num;
	u64 old_val;
	u64 block_in_group;
576
	u64 i;
C
Chris Mason 已提交
577
	int ret;
C
Chris Mason 已提交
578

C
Chris Mason 已提交
579
	while(total) {
580
		cache = btrfs_lookup_block_group(info, blocknr);
C
Chris Mason 已提交
581
		if (!cache) {
C
Chris Mason 已提交
582 583
			printk(KERN_CRIT "blocknr %Lu lookup failed\n",
			       blocknr);
C
Chris Mason 已提交
584
			return -1;
C
Chris Mason 已提交
585
		}
C
Chris Mason 已提交
586 587
		block_in_group = blocknr - cache->key.objectid;
		WARN_ON(block_in_group > cache->key.offset);
C
Chris Mason 已提交
588
		radix_tree_tag_set(cache->radix, cache->key.objectid +
C
Chris Mason 已提交
589
				   cache->key.offset - 1,
C
Chris Mason 已提交
590 591 592 593
				   BTRFS_BLOCK_GROUP_DIRTY);

		old_val = btrfs_block_group_used(&cache->item);
		num = min(total, cache->key.offset - block_in_group);
C
Chris Mason 已提交
594 595 596
		if (alloc) {
			if (blocknr > cache->last_alloc)
				cache->last_alloc = blocknr;
597 598 599 600 601 602
			if (!cache->data) {
				for (i = 0; i < num; i++) {
					clear_radix_bit(&info->extent_map_radix,
						        blocknr + i);
				}
			}
C
Chris Mason 已提交
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626
			if (cache->data != data &&
			    old_val < cache->key.offset / 2) {
printk("changing block group %Lu from %d to %d\n", cache->key.objectid, cache->data, data);
				cache->data = data;
				radix_tree_delete(cache->radix,
						  cache->key.objectid +
						  cache->key.offset - 1);

				if (data) {
					cache->radix =
						&info->block_group_data_radix;
					cache->item.flags |=
						BTRFS_BLOCK_GROUP_DATA;
				} else {
					cache->radix = &info->block_group_radix;
					cache->item.flags &=
						~BTRFS_BLOCK_GROUP_DATA;
				}
				ret = radix_tree_insert(cache->radix,
							cache->key.objectid +
							cache->key.offset - 1,
							(void *)cache);
			}
			old_val += num;
C
Chris Mason 已提交
627
		} else {
C
Chris Mason 已提交
628
			old_val -= num;
C
Chris Mason 已提交
629 630
			if (blocknr < cache->first_free)
				cache->first_free = blocknr;
631 632 633 634 635 636
			if (!cache->data && mark_free) {
				for (i = 0; i < num; i++) {
					set_radix_bit(&info->extent_map_radix,
						      blocknr + i);
				}
			}
C
Chris Mason 已提交
637 638
			if (old_val < cache->key.offset / 2 &&
			    old_val + num >= cache->key.offset / 2) {
639 640 641 642 643 644
printk("group %Lu now available\n", cache->key.objectid);
				radix_tree_tag_set(cache->radix,
						   cache->key.objectid +
						   cache->key.offset - 1,
						   BTRFS_BLOCK_GROUP_AVAIL);
			}
C
Chris Mason 已提交
645
		}
C
Chris Mason 已提交
646
		btrfs_set_block_group_used(&cache->item, old_val);
647 648
		total -= num;
		blocknr += num;
C
Chris Mason 已提交
649 650 651 652
	}
	return 0;
}

C
Chris Mason 已提交
653 654 655 656 657 658 659
static int try_remove_page(struct address_space *mapping, unsigned long index)
{
	int ret;
	ret = invalidate_mapping_pages(mapping, index, index);
	return ret;
}

660 661
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
			       btrfs_root *root)
662
{
C
Chris Mason 已提交
663
	unsigned long gang[8];
C
Chris Mason 已提交
664
	struct inode *btree_inode = root->fs_info->btree_inode;
C
Chris Mason 已提交
665
	struct btrfs_block_group_cache *block_group;
666
	u64 first = 0;
667 668
	int ret;
	int i;
C
Chris Mason 已提交
669
	struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
670
	struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
671 672

	while(1) {
673
		ret = find_first_radix_bit(pinned_radix, gang, 0,
C
Chris Mason 已提交
674
					   ARRAY_SIZE(gang));
675 676
		if (!ret)
			break;
677
		if (!first)
C
Chris Mason 已提交
678
			first = gang[0];
679
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
680
			clear_radix_bit(pinned_radix, gang[i]);
681 682
			block_group = btrfs_lookup_block_group(root->fs_info,
							       gang[i]);
C
Chris Mason 已提交
683 684 685 686 687
			if (block_group) {
				WARN_ON(block_group->pinned == 0);
				block_group->pinned--;
				if (gang[i] < block_group->last_alloc)
					block_group->last_alloc = gang[i];
688 689 690 691
				if (gang[i] < block_group->last_prealloc)
					block_group->last_prealloc = gang[i];
				if (!block_group->data)
					set_radix_bit(extent_radix, gang[i]);
C
Chris Mason 已提交
692
			}
C
Chris Mason 已提交
693 694 695
			try_remove_page(btree_inode->i_mapping,
					gang[i] << (PAGE_CACHE_SHIFT -
						    btree_inode->i_blkbits));
696
		}
697 698 699 700
	}
	return 0;
}

701 702
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
				 btrfs_root *extent_root)
C
Chris Mason 已提交
703
{
C
Chris Mason 已提交
704
	struct btrfs_key ins;
C
Chris Mason 已提交
705
	struct btrfs_extent_item extent_item;
C
Chris Mason 已提交
706 707
	int i;
	int ret;
708 709
	u64 super_blocks_used;
	struct btrfs_fs_info *info = extent_root->fs_info;
C
Chris Mason 已提交
710

C
Chris Mason 已提交
711
	btrfs_set_extent_refs(&extent_item, 1);
C
Chris Mason 已提交
712 713
	ins.offset = 1;
	ins.flags = 0;
714
	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
715
	btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
C
Chris Mason 已提交
716

717 718
	for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
		ins.objectid = extent_root->fs_info->extent_tree_insert[i];
719 720 721
		super_blocks_used = btrfs_super_blocks_used(info->disk_super);
		btrfs_set_super_blocks_used(info->disk_super,
					    super_blocks_used + 1);
722 723
		ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
					sizeof(extent_item));
C
Chris Mason 已提交
724 725
		BUG_ON(ret);
	}
726 727
	extent_root->fs_info->extent_tree_insert_nr = 0;
	extent_root->fs_info->extent_tree_prealloc_nr = 0;
C
Chris Mason 已提交
728 729 730
	return 0;
}

C
Chris Mason 已提交
731
static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
C
Chris Mason 已提交
732 733
{
	int err;
C
Chris Mason 已提交
734
	struct btrfs_header *header;
C
Chris Mason 已提交
735 736
	struct buffer_head *bh;

C
Chris Mason 已提交
737
	if (!pending) {
738
		bh = btrfs_find_tree_block(root, blocknr);
C
Chris Mason 已提交
739 740 741 742 743 744 745 746 747 748
		if (bh) {
			if (buffer_uptodate(bh)) {
				u64 transid =
				    root->fs_info->running_transaction->transid;
				header = btrfs_buffer_header(bh);
				if (btrfs_header_generation(header) ==
				    transid) {
					btrfs_block_release(root, bh);
					return 0;
				}
C
Chris Mason 已提交
749
			}
C
Chris Mason 已提交
750
			btrfs_block_release(root, bh);
C
Chris Mason 已提交
751 752
		}
		err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
C
Chris Mason 已提交
753 754
		if (!err) {
			struct btrfs_block_group_cache *cache;
755 756
			cache = btrfs_lookup_block_group(root->fs_info,
							 blocknr);
C
Chris Mason 已提交
757 758 759
			if (cache)
				cache->pinned++;
		}
C
Chris Mason 已提交
760 761 762
	} else {
		err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
	}
C
Chris Mason 已提交
763
	BUG_ON(err < 0);
C
Chris Mason 已提交
764 765 766
	return 0;
}

767
/*
768
 * remove an extent from the root, returns 0 on success
769
 */
770
static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
771 772
			 *root, u64 blocknr, u64 num_blocks, int pin,
			 int mark_free)
773
{
774
	struct btrfs_path *path;
C
Chris Mason 已提交
775
	struct btrfs_key key;
776 777
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
778
	int ret;
C
Chris Mason 已提交
779
	struct btrfs_extent_item *ei;
C
Chris Mason 已提交
780
	struct btrfs_key ins;
C
Chris Mason 已提交
781
	u32 refs;
C
Chris Mason 已提交
782

783 784
	key.objectid = blocknr;
	key.flags = 0;
785
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
786 787
	key.offset = num_blocks;

788
	find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0);
789 790 791
	path = btrfs_alloc_path();
	BUG_ON(!path);
	btrfs_init_path(path);
792

793
	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
794
	if (ret) {
795
		printk("failed to find %Lu\n", key.objectid);
C
Chris Mason 已提交
796
		btrfs_print_tree(extent_root, extent_root->node);
797
		printk("failed to find %Lu\n", key.objectid);
798 799
		BUG();
	}
800
	ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
C
Chris Mason 已提交
801
			    struct btrfs_extent_item);
802
	BUG_ON(ei->refs == 0);
C
Chris Mason 已提交
803 804
	refs = btrfs_extent_refs(ei) - 1;
	btrfs_set_extent_refs(ei, refs);
805
	btrfs_mark_buffer_dirty(path->nodes[0]);
C
Chris Mason 已提交
806
	if (refs == 0) {
807
		u64 super_blocks_used;
C
Chris Mason 已提交
808 809

		if (pin) {
C
Chris Mason 已提交
810
			ret = pin_down_block(root, blocknr, 0);
C
Chris Mason 已提交
811 812 813
			BUG_ON(ret);
		}

814 815 816
		super_blocks_used = btrfs_super_blocks_used(info->disk_super);
		btrfs_set_super_blocks_used(info->disk_super,
					    super_blocks_used - num_blocks);
817
		ret = btrfs_del_item(trans, extent_root, path);
818 819
		if (ret)
			BUG();
820
		ret = update_block_group(trans, root, blocknr, num_blocks, 0,
C
Chris Mason 已提交
821
					 mark_free, 0);
C
Chris Mason 已提交
822
		BUG_ON(ret);
823
	}
824
	btrfs_free_path(path);
825
	finish_current_insert(trans, extent_root);
826 827 828 829 830 831 832
	return ret;
}

/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
833 834
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root)
835 836
{
	int ret;
C
Chris Mason 已提交
837 838
	int wret;
	int err = 0;
C
Chris Mason 已提交
839
	unsigned long gang[4];
840
	int i;
C
Chris Mason 已提交
841 842
	struct radix_tree_root *pending_radix;
	struct radix_tree_root *pinned_radix;
C
Chris Mason 已提交
843
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
844 845 846

	pending_radix = &extent_root->fs_info->pending_del_radix;
	pinned_radix = &extent_root->fs_info->pinned_radix;
847 848

	while(1) {
849
		ret = find_first_radix_bit(pending_radix, gang, 0,
C
Chris Mason 已提交
850
					   ARRAY_SIZE(gang));
851 852 853
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
854
			wret = set_radix_bit(pinned_radix, gang[i]);
C
Chris Mason 已提交
855
			if (wret == 0) {
856 857
				cache =
				  btrfs_lookup_block_group(extent_root->fs_info,
C
Chris Mason 已提交
858 859 860 861 862 863 864 865 866
							   gang[i]);
				if (cache)
					cache->pinned++;
			}
			if (wret < 0) {
				printk(KERN_CRIT "set_radix_bit, err %d\n",
				       wret);
				BUG_ON(wret < 0);
			}
C
Chris Mason 已提交
867 868
			wret = clear_radix_bit(pending_radix, gang[i]);
			BUG_ON(wret);
869
			wret = __free_extent(trans, extent_root,
870
					     gang[i], 1, 0, 0);
C
Chris Mason 已提交
871 872
			if (wret)
				err = wret;
873 874
		}
	}
C
Chris Mason 已提交
875
	return err;
876 877 878 879 880
}

/*
 * remove an extent from the root, returns 0 on success
 */
881 882
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root, u64 blocknr, u64 num_blocks, int pin)
883
{
884
	struct btrfs_root *extent_root = root->fs_info->extent_root;
885 886
	int pending_ret;
	int ret;
887

888
	if (root == extent_root) {
C
Chris Mason 已提交
889
		pin_down_block(root, blocknr, 1);
890 891
		return 0;
	}
892
	ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
C
Chris Mason 已提交
893
	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
894 895 896 897 898 899 900
	return ret ? ret : pending_ret;
}

/*
 * walks the btree of allocated extents and find a hole of a given size.
 * The key ins is changed to record the hole:
 * ins->objectid == block start
901
 * ins->flags = BTRFS_EXTENT_ITEM_KEY
902 903 904
 * ins->offset == number of blocks
 * Any available blocks before search_start are skipped.
 */
905 906
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
			    *orig_root, u64 num_blocks, u64 search_start, u64
907 908
			    search_end, u64 hint_block,
			    struct btrfs_key *ins, int data)
909
{
910
	struct btrfs_path *path;
C
Chris Mason 已提交
911
	struct btrfs_key key;
912 913 914
	int ret;
	u64 hole_size = 0;
	int slot = 0;
C
Chris Mason 已提交
915
	u64 last_block = 0;
C
Chris Mason 已提交
916
	u64 test_block;
C
Chris Mason 已提交
917
	u64 orig_search_start = search_start;
918
	int start_found;
C
Chris Mason 已提交
919
	struct btrfs_leaf *l;
920
	struct btrfs_root * root = orig_root->fs_info->extent_root;
921
	struct btrfs_fs_info *info = root->fs_info;
922
	int total_needed = num_blocks;
923 924
	int total_found = 0;
	int fill_prealloc = 0;
C
Chris Mason 已提交
925
	int level;
C
Chris Mason 已提交
926
	struct btrfs_block_group_cache *block_group;
C
Chris Mason 已提交
927
	int full_scan = 0;
928
	int wrapped = 0;
929
	u64 limit;
930

931 932 933 934
	path = btrfs_alloc_path();
	ins->flags = 0;
	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);

C
Chris Mason 已提交
935
	level = btrfs_header_level(btrfs_buffer_header(root->node));
936 937 938
	if (num_blocks == 0) {
		fill_prealloc = 1;
		num_blocks = 1;
939
		total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
940
	}
C
Chris Mason 已提交
941 942
	if (search_end == (u64)-1)
		search_end = btrfs_super_total_blocks(info->disk_super);
943
	if (hint_block) {
944
		block_group = btrfs_lookup_block_group(info, hint_block);
C
Chris Mason 已提交
945
		block_group = btrfs_find_block_group(root, block_group,
946
						     hint_block, data, 1);
C
Chris Mason 已提交
947 948 949
	} else {
		block_group = btrfs_find_block_group(root,
						     trans->block_group, 0,
950
						     data, 1);
C
Chris Mason 已提交
951 952 953
	}

check_failed:
C
Chris Mason 已提交
954
	if (!block_group->data)
955 956
		search_start = find_search_start(root, &block_group,
						 search_start, total_needed);
957
	else if (!full_scan)
958 959
		search_start = max(block_group->last_alloc, search_start);

960
	btrfs_init_path(path);
961 962 963
	ins->objectid = search_start;
	ins->offset = 0;
	start_found = 0;
964

965
	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
C
Chris Mason 已提交
966 967
	if (ret < 0)
		goto error;
C
Chris Mason 已提交
968

969
	if (path->slots[0] > 0) {
970
		path->slots[0]--;
971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991
	}

	l = btrfs_buffer_leaf(path->nodes[0]);
	btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
	/*
	 * a rare case, go back one key if we hit a block group item
	 * instead of an extent item
	 */
	if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
	    key.objectid + key.offset >= search_start) {
		ins->objectid = key.objectid;
		ins->offset = key.offset - 1;
		btrfs_release_path(root, path);
		ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
		if (ret < 0)
			goto error;

		if (path->slots[0] > 0) {
			path->slots[0]--;
		}
	}
992

993
	while (1) {
994 995
		l = btrfs_buffer_leaf(path->nodes[0]);
		slot = path->slots[0];
996
		if (slot >= btrfs_header_nritems(&l->header)) {
997 998 999 1000
			if (fill_prealloc) {
				info->extent_tree_prealloc_nr = 0;
				total_found = 0;
			}
1001 1002 1003 1004 1005 1006
			if (start_found)
				limit = last_block +
					block_group->key.offset / 2;
			else
				limit = search_start +
					block_group->key.offset / 2;
1007
			ret = btrfs_next_leaf(root, path);
1008 1009
			if (ret == 0)
				continue;
C
Chris Mason 已提交
1010 1011
			if (ret < 0)
				goto error;
1012 1013
			if (!start_found) {
				ins->objectid = search_start;
C
Chris Mason 已提交
1014
				ins->offset = search_end - search_start;
1015 1016 1017 1018 1019
				start_found = 1;
				goto check_pending;
			}
			ins->objectid = last_block > search_start ?
					last_block : search_start;
C
Chris Mason 已提交
1020
			ins->offset = search_end - ins->objectid;
1021 1022
			goto check_pending;
		}
1023

C
Chris Mason 已提交
1024
		btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1025 1026 1027 1028 1029 1030 1031 1032 1033
		if (key.objectid >= search_start && key.objectid > last_block &&
		    start_found) {
			if (last_block < search_start)
				last_block = search_start;
			hole_size = key.objectid - last_block;
			if (hole_size >= num_blocks) {
				ins->objectid = last_block;
				ins->offset = hole_size;
				goto check_pending;
1034
			}
1035
		}
1036 1037 1038 1039

		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
			goto next;

1040
		start_found = 1;
C
Chris Mason 已提交
1041
		last_block = key.objectid + key.offset;
1042
		if (!full_scan && last_block >= block_group->key.objectid +
C
Chris Mason 已提交
1043 1044 1045 1046 1047 1048
		    block_group->key.offset) {
			btrfs_release_path(root, path);
			search_start = block_group->key.objectid +
				block_group->key.offset * 2;
			goto new_group;
		}
C
Chris Mason 已提交
1049
next:
1050
		path->slots[0]++;
1051
		cond_resched();
1052 1053 1054 1055 1056 1057
	}
	// FIXME -ENOSPC
check_pending:
	/* we have to make sure we didn't find an extent that has already
	 * been allocated by the map tree or the original allocation
	 */
1058
	btrfs_release_path(root, path);
1059
	BUG_ON(ins->objectid < search_start);
1060

C
Chris Mason 已提交
1061
	if (ins->objectid + num_blocks >= search_end) {
1062 1063 1064 1065
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
C
Chris Mason 已提交
1066
		search_start = orig_search_start;
1067 1068 1069 1070
		if (wrapped)
			full_scan = 1;
		else
			wrapped = 1;
C
Chris Mason 已提交
1071
		goto new_group;
1072
	}
C
Chris Mason 已提交
1073
	for (test_block = ins->objectid;
1074 1075
	     test_block < ins->objectid + num_blocks; test_block++) {
		if (test_radix_bit(&info->pinned_radix, test_block)) {
C
Chris Mason 已提交
1076
			search_start = test_block + 1;
C
Chris Mason 已提交
1077
			goto new_group;
1078 1079
		}
	}
1080 1081 1082 1083 1084 1085 1086
	if (!fill_prealloc && info->extent_tree_insert_nr) {
		u64 last =
		  info->extent_tree_insert[info->extent_tree_insert_nr - 1];
		if (ins->objectid + num_blocks >
		    info->extent_tree_insert[0] &&
		    ins->objectid <= last) {
			search_start = last + 1;
1087
			WARN_ON(!full_scan);
C
Chris Mason 已提交
1088
			goto new_group;
1089 1090 1091 1092 1093 1094 1095 1096
		}
	}
	if (!fill_prealloc && info->extent_tree_prealloc_nr) {
		u64 first =
		  info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
		if (ins->objectid + num_blocks > first &&
		    ins->objectid <= info->extent_tree_prealloc[0]) {
			search_start = info->extent_tree_prealloc[0] + 1;
1097
			WARN_ON(!full_scan);
C
Chris Mason 已提交
1098
			goto new_group;
1099 1100 1101 1102 1103
		}
	}
	if (fill_prealloc) {
		int nr;
		test_block = ins->objectid;
1104 1105 1106 1107 1108
		if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
		    leaf_range(root)) {
			total_found = 0;
			info->extent_tree_prealloc_nr = total_found;
		}
1109 1110 1111 1112
		while(test_block < ins->objectid + ins->offset &&
		      total_found < total_needed) {
			nr = total_needed - total_found - 1;
			BUG_ON(nr < 0);
C
Chris Mason 已提交
1113
			info->extent_tree_prealloc[nr] = test_block;
1114 1115 1116 1117 1118
			total_found++;
			test_block++;
		}
		if (total_found < total_needed) {
			search_start = test_block;
C
Chris Mason 已提交
1119
			goto new_group;
1120
		}
C
Chris Mason 已提交
1121 1122
		info->extent_tree_prealloc_nr = total_found;
	}
1123
	if (!data) {
1124
		block_group = btrfs_lookup_block_group(info, ins->objectid);
1125 1126 1127 1128 1129 1130 1131
		if (block_group) {
			if (fill_prealloc)
				block_group->last_prealloc =
				     info->extent_tree_prealloc[total_needed-1];
			else
				trans->block_group = block_group;
		}
1132
	}
C
Chris Mason 已提交
1133
	ins->offset = num_blocks;
1134
	btrfs_free_path(path);
1135
	return 0;
C
Chris Mason 已提交
1136 1137

new_group:
C
Chris Mason 已提交
1138
	if (search_start + num_blocks >= search_end) {
C
Chris Mason 已提交
1139
		search_start = orig_search_start;
1140 1141 1142 1143 1144 1145 1146 1147
		if (full_scan) {
			ret = -ENOSPC;
			goto error;
		}
		if (wrapped)
			full_scan = 1;
		else
			wrapped = 1;
C
Chris Mason 已提交
1148
	}
1149
	block_group = btrfs_lookup_block_group(info, search_start);
1150
	cond_resched();
C
Chris Mason 已提交
1151 1152
	if (!full_scan)
		block_group = btrfs_find_block_group(root, block_group,
1153
						     search_start, data, 0);
C
Chris Mason 已提交
1154 1155
	goto check_failed;

C
Chris Mason 已提交
1156
error:
1157 1158
	btrfs_release_path(root, path);
	btrfs_free_path(path);
C
Chris Mason 已提交
1159
	return ret;
1160 1161 1162 1163 1164 1165 1166 1167
}
/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns 0 if everything worked, non-zero otherwise.
 */
1168 1169
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, u64 owner,
1170
		       u64 num_blocks, u64 hint_block,
C
Chris Mason 已提交
1171
		       u64 search_end, struct btrfs_key *ins, int data)
1172 1173 1174
{
	int ret;
	int pending_ret;
1175
	u64 super_blocks_used;
1176
	u64 search_start = 0;
1177 1178
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_root *extent_root = info->extent_root;
C
Chris Mason 已提交
1179
	struct btrfs_extent_item extent_item;
1180
	struct btrfs_key prealloc_key;
C
Chris Mason 已提交
1181

C
Chris Mason 已提交
1182
	btrfs_set_extent_refs(&extent_item, 1);
1183
	btrfs_set_extent_owner(&extent_item, owner);
1184

C
Chris Mason 已提交
1185
	if (root == extent_root) {
1186 1187
		int nr;
		BUG_ON(info->extent_tree_prealloc_nr == 0);
C
Chris Mason 已提交
1188 1189
		BUG_ON(num_blocks != 1);
		ins->offset = 1;
1190 1191 1192 1193 1194
		info->extent_tree_prealloc_nr--;
		nr = info->extent_tree_prealloc_nr;
		ins->objectid = info->extent_tree_prealloc[nr];
		info->extent_tree_insert[info->extent_tree_insert_nr++] =
			ins->objectid;
C
Chris Mason 已提交
1195
		ret = update_block_group(trans, root,
C
Chris Mason 已提交
1196
					 ins->objectid, ins->offset, 1, 0, 0);
C
Chris Mason 已提交
1197
		BUG_ON(ret);
1198 1199
		return 0;
	}
1200 1201 1202 1203 1204 1205 1206

	/*
	 * if we're doing a data allocation, preallocate room in the
	 * extent tree first.  This way the extent tree blocks end up
	 * in the correct block group.
	 */
	if (data) {
1207
		ret = find_free_extent(trans, root, 0, 0,
1208
				       search_end, 0, &prealloc_key, 0);
1209 1210 1211 1212 1213 1214 1215 1216 1217 1218
		if (ret) {
			return ret;
		}
		if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
			int nr = info->extent_tree_prealloc_nr;
			search_end = info->extent_tree_prealloc[nr - 1] - 1;
		} else {
			search_start = info->extent_tree_prealloc[0] + 1;
		}
	}
1219 1220
	if (hint_block < search_start)
		hint_block = search_start;
1221
	/* do the real allocation */
1222
	ret = find_free_extent(trans, root, num_blocks, search_start,
1223
			       search_end, hint_block, ins, data);
1224
	if (ret) {
C
Chris Mason 已提交
1225
		return ret;
1226
	}
1227

1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241
	/*
	 * if we're doing a metadata allocation, preallocate space in the
	 * extent tree second.  This way, we don't create a tiny hole
	 * in the allocation map between any unused preallocation blocks
	 * and the metadata block we're actually allocating.  On disk,
	 * it'll go:
	 * [block we've allocated], [used prealloc 1], [ unused prealloc ]
	 * The unused prealloc will get reused the next time around.
	 */
	if (!data) {
		if (ins->objectid + ins->offset >= search_end)
			search_end = ins->objectid - 1;
		else
			search_start = ins->objectid + ins->offset;
C
Chris Mason 已提交
1242

1243 1244 1245
		if (hint_block < search_start)
			hint_block = search_start;

1246
		ret = find_free_extent(trans, root, 0, search_start,
1247 1248
				       search_end, hint_block,
				       &prealloc_key, 0);
1249 1250 1251 1252
		if (ret) {
			return ret;
		}
	}
1253

1254 1255 1256
	super_blocks_used = btrfs_super_blocks_used(info->disk_super);
	btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
				    num_blocks);
1257 1258
	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
				sizeof(extent_item));
C
Chris Mason 已提交
1259

1260
	finish_current_insert(trans, extent_root);
C
Chris Mason 已提交
1261
	pending_ret = del_pending_extents(trans, extent_root);
1262
	if (ret) {
C
Chris Mason 已提交
1263
		return ret;
1264 1265
	}
	if (pending_ret) {
C
Chris Mason 已提交
1266
		return pending_ret;
1267
	}
C
Chris Mason 已提交
1268 1269
	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
				 data);
C
Chris Mason 已提交
1270
	BUG_ON(ret);
C
Chris Mason 已提交
1271
	return 0;
1272 1273 1274 1275 1276 1277
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
C
Chris Mason 已提交
1278
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1279
					   struct btrfs_root *root, u64 hint)
1280
{
C
Chris Mason 已提交
1281
	struct btrfs_key ins;
1282
	int ret;
C
Chris Mason 已提交
1283
	struct buffer_head *buf;
1284

1285
	ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1286
				 1, hint, (unsigned long)-1, &ins, 0);
1287 1288 1289 1290
	if (ret) {
		BUG();
		return NULL;
	}
C
Chris Mason 已提交
1291
	BUG_ON(ret);
1292
	buf = btrfs_find_create_tree_block(root, ins.objectid);
1293
	set_buffer_uptodate(buf);
C
Chris Mason 已提交
1294
	set_buffer_checked(buf);
1295
	set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1296 1297
	return buf;
}
1298

1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312
static int drop_leaf_ref(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root, struct buffer_head *cur)
{
	struct btrfs_disk_key *key;
	struct btrfs_leaf *leaf;
	struct btrfs_file_extent_item *fi;
	int i;
	int nritems;
	int ret;

	BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
	leaf = btrfs_buffer_leaf(cur);
	nritems = btrfs_header_nritems(&leaf->header);
	for (i = 0; i < nritems; i++) {
C
Chris Mason 已提交
1313
		u64 disk_blocknr;
1314 1315 1316 1317
		key = &leaf->items[i].key;
		if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
			continue;
		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1318 1319
		if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
			continue;
1320 1321 1322 1323
		/*
		 * FIXME make sure to insert a trans record that
		 * repeats the snapshot del on crash
		 */
C
Chris Mason 已提交
1324 1325 1326 1327
		disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
		if (disk_blocknr == 0)
			continue;
		ret = btrfs_free_extent(trans, root, disk_blocknr,
1328 1329 1330 1331 1332 1333 1334
					btrfs_file_extent_disk_num_blocks(fi),
					0);
		BUG_ON(ret);
	}
	return 0;
}

C
Chris Mason 已提交
1335 1336 1337 1338
/*
 * helper function for drop_snapshot, this walks down the tree dropping ref
 * counts as it goes.
 */
1339 1340
static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			  *root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1341
{
C
Chris Mason 已提交
1342 1343
	struct buffer_head *next;
	struct buffer_head *cur;
C
Chris Mason 已提交
1344 1345 1346 1347
	u64 blocknr;
	int ret;
	u32 refs;

1348 1349
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1350
	ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1351
			       1, &refs);
C
Chris Mason 已提交
1352 1353 1354
	BUG_ON(ret);
	if (refs > 1)
		goto out;
C
Chris Mason 已提交
1355 1356 1357
	/*
	 * walk down to the last node level and free all the leaves
	 */
1358
	while(*level >= 0) {
1359 1360
		WARN_ON(*level < 0);
		WARN_ON(*level >= BTRFS_MAX_LEVEL);
C
Chris Mason 已提交
1361
		cur = path->nodes[*level];
C
Chris Mason 已提交
1362 1363
		if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
			WARN_ON(1);
1364
		if (path->slots[*level] >=
C
Chris Mason 已提交
1365
		    btrfs_header_nritems(btrfs_buffer_header(cur)))
C
Chris Mason 已提交
1366
			break;
1367 1368 1369 1370 1371
		if (*level == 0) {
			ret = drop_leaf_ref(trans, root, cur);
			BUG_ON(ret);
			break;
		}
C
Chris Mason 已提交
1372 1373
		blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
					      path->slots[*level]);
C
Chris Mason 已提交
1374
		ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1375 1376
		BUG_ON(ret);
		if (refs != 1) {
C
Chris Mason 已提交
1377
			path->slots[*level]++;
1378
			ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
C
Chris Mason 已提交
1379 1380 1381 1382
			BUG_ON(ret);
			continue;
		}
		next = read_tree_block(root, blocknr);
1383
		WARN_ON(*level <= 0);
C
Chris Mason 已提交
1384
		if (path->nodes[*level-1])
C
Chris Mason 已提交
1385
			btrfs_block_release(root, path->nodes[*level-1]);
C
Chris Mason 已提交
1386
		path->nodes[*level-1] = next;
C
Chris Mason 已提交
1387
		*level = btrfs_header_level(btrfs_buffer_header(next));
C
Chris Mason 已提交
1388 1389 1390
		path->slots[*level] = 0;
	}
out:
1391 1392
	WARN_ON(*level < 0);
	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1393
	ret = btrfs_free_extent(trans, root,
1394
				bh_blocknr(path->nodes[*level]), 1, 1);
C
Chris Mason 已提交
1395
	btrfs_block_release(root, path->nodes[*level]);
C
Chris Mason 已提交
1396 1397 1398 1399 1400 1401
	path->nodes[*level] = NULL;
	*level += 1;
	BUG_ON(ret);
	return 0;
}

C
Chris Mason 已提交
1402 1403 1404 1405 1406
/*
 * helper for dropping snapshots.  This walks back up the tree in the path
 * to find the first node higher up where we haven't yet gone through
 * all the slots
 */
1407 1408
static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
			*root, struct btrfs_path *path, int *level)
C
Chris Mason 已提交
1409 1410 1411 1412
{
	int i;
	int slot;
	int ret;
C
Chris Mason 已提交
1413
	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
C
Chris Mason 已提交
1414
		slot = path->slots[i];
C
Chris Mason 已提交
1415 1416
		if (slot < btrfs_header_nritems(
		    btrfs_buffer_header(path->nodes[i])) - 1) {
C
Chris Mason 已提交
1417 1418 1419 1420
			path->slots[i]++;
			*level = i;
			return 0;
		} else {
1421
			ret = btrfs_free_extent(trans, root,
1422
						bh_blocknr(path->nodes[*level]),
1423
						1, 1);
1424
			BUG_ON(ret);
C
Chris Mason 已提交
1425
			btrfs_block_release(root, path->nodes[*level]);
C
Chris Mason 已提交
1426
			path->nodes[*level] = NULL;
C
Chris Mason 已提交
1427 1428 1429 1430 1431 1432
			*level = i + 1;
		}
	}
	return 1;
}

C
Chris Mason 已提交
1433 1434 1435 1436 1437
/*
 * drop the reference count on the tree rooted at 'snap'.  This traverses
 * the tree freeing any blocks that have a ref count of zero after being
 * decremented.
 */
1438
int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
C
Chris Mason 已提交
1439
			*root, struct buffer_head *snap)
C
Chris Mason 已提交
1440
{
1441
	int ret = 0;
C
Chris Mason 已提交
1442
	int wret;
C
Chris Mason 已提交
1443
	int level;
1444
	struct btrfs_path *path;
C
Chris Mason 已提交
1445 1446 1447
	int i;
	int orig_level;

1448 1449 1450
	path = btrfs_alloc_path();
	BUG_ON(!path);
	btrfs_init_path(path);
C
Chris Mason 已提交
1451

C
Chris Mason 已提交
1452
	level = btrfs_header_level(btrfs_buffer_header(snap));
C
Chris Mason 已提交
1453
	orig_level = level;
1454 1455
	path->nodes[level] = snap;
	path->slots[level] = 0;
C
Chris Mason 已提交
1456
	while(1) {
1457
		wret = walk_down_tree(trans, root, path, &level);
C
Chris Mason 已提交
1458
		if (wret > 0)
C
Chris Mason 已提交
1459
			break;
C
Chris Mason 已提交
1460 1461 1462
		if (wret < 0)
			ret = wret;

1463
		wret = walk_up_tree(trans, root, path, &level);
C
Chris Mason 已提交
1464
		if (wret > 0)
C
Chris Mason 已提交
1465
			break;
C
Chris Mason 已提交
1466 1467
		if (wret < 0)
			ret = wret;
C
Chris Mason 已提交
1468
		btrfs_btree_balance_dirty(root);
C
Chris Mason 已提交
1469
	}
C
Chris Mason 已提交
1470
	for (i = 0; i <= orig_level; i++) {
1471 1472
		if (path->nodes[i]) {
			btrfs_block_release(root, path->nodes[i]);
C
Chris Mason 已提交
1473
		}
C
Chris Mason 已提交
1474
	}
1475
	btrfs_free_path(path);
C
Chris Mason 已提交
1476
	return ret;
C
Chris Mason 已提交
1477
}
C
Chris Mason 已提交
1478

C
Chris Mason 已提交
1479
static int free_block_group_radix(struct radix_tree_root *radix)
C
Chris Mason 已提交
1480 1481 1482 1483 1484 1485
{
	int ret;
	struct btrfs_block_group_cache *cache[8];
	int i;

	while(1) {
C
Chris Mason 已提交
1486
		ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
C
Chris Mason 已提交
1487 1488 1489 1490
					     ARRAY_SIZE(cache));
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
C
Chris Mason 已提交
1491
			radix_tree_delete(radix, cache[i]->key.objectid +
C
Chris Mason 已提交
1492 1493 1494 1495 1496 1497 1498
					  cache[i]->key.offset - 1);
			kfree(cache[i]);
		}
	}
	return 0;
}

C
Chris Mason 已提交
1499 1500 1501 1502
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
	int ret;
	int ret2;
1503 1504
	unsigned long gang[16];
	int i;
C
Chris Mason 已提交
1505 1506 1507 1508 1509 1510 1511

	ret = free_block_group_radix(&info->block_group_radix);
	ret2 = free_block_group_radix(&info->block_group_data_radix);
	if (ret)
		return ret;
	if (ret2)
		return ret2;
1512 1513 1514 1515 1516 1517 1518 1519 1520 1521

	while(1) {
		ret = find_first_radix_bit(&info->extent_map_radix,
					   gang, 0, ARRAY_SIZE(gang));
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			clear_radix_bit(&info->extent_map_radix, gang[i]);
		}
	}
C
Chris Mason 已提交
1522 1523 1524
	return 0;
}

C
Chris Mason 已提交
1525 1526 1527 1528 1529 1530 1531
int btrfs_read_block_groups(struct btrfs_root *root)
{
	struct btrfs_path *path;
	int ret;
	int err = 0;
	struct btrfs_block_group_item *bi;
	struct btrfs_block_group_cache *cache;
C
Chris Mason 已提交
1532 1533
	struct btrfs_fs_info *info = root->fs_info;
	struct radix_tree_root *radix;
C
Chris Mason 已提交
1534 1535 1536 1537
	struct btrfs_key key;
	struct btrfs_key found_key;
	struct btrfs_leaf *leaf;
	u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize;
1538
	u64 used;
C
Chris Mason 已提交
1539

C
Chris Mason 已提交
1540
	root = info->extent_root;
C
Chris Mason 已提交
1541 1542 1543 1544 1545 1546 1547 1548 1549 1550
	key.objectid = 0;
	key.offset = group_size_blocks;
	key.flags = 0;
	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	while(1) {
C
Chris Mason 已提交
1551
		ret = btrfs_search_slot(NULL, info->extent_root,
C
Chris Mason 已提交
1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564
					&key, path, 0, 0);
		if (ret != 0) {
			err = ret;
			break;
		}
		leaf = btrfs_buffer_leaf(path->nodes[0]);
		btrfs_disk_key_to_cpu(&found_key,
				      &leaf->items[path->slots[0]].key);
		cache = kmalloc(sizeof(*cache), GFP_NOFS);
		if (!cache) {
			err = -1;
			break;
		}
C
Chris Mason 已提交
1565

C
Chris Mason 已提交
1566 1567 1568
		bi = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_block_group_item);
		if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
C
Chris Mason 已提交
1569
			radix = &info->block_group_data_radix;
C
Chris Mason 已提交
1570 1571
			cache->data = 1;
		} else {
C
Chris Mason 已提交
1572
			radix = &info->block_group_radix;
C
Chris Mason 已提交
1573 1574
			cache->data = 0;
		}
C
Chris Mason 已提交
1575

C
Chris Mason 已提交
1576 1577
		memcpy(&cache->item, bi, sizeof(*bi));
		memcpy(&cache->key, &found_key, sizeof(found_key));
1578 1579
		cache->last_alloc = cache->key.objectid;
		cache->first_free = cache->key.objectid;
1580
		cache->last_prealloc = cache->key.objectid;
C
Chris Mason 已提交
1581
		cache->pinned = 0;
1582 1583
		cache->cached = 0;

C
Chris Mason 已提交
1584 1585
		cache->radix = radix;

C
Chris Mason 已提交
1586 1587
		key.objectid = found_key.objectid + found_key.offset;
		btrfs_release_path(root, path);
C
Chris Mason 已提交
1588
		ret = radix_tree_insert(radix, found_key.objectid +
C
Chris Mason 已提交
1589 1590 1591
					found_key.offset - 1,
					(void *)cache);
		BUG_ON(ret);
1592
		used = btrfs_block_group_used(bi);
C
Chris Mason 已提交
1593 1594
		if (used < (key.offset * 8) / 10) {
			radix_tree_tag_set(radix, found_key.objectid +
1595 1596 1597
					   found_key.offset - 1,
					   BTRFS_BLOCK_GROUP_AVAIL);
		}
C
Chris Mason 已提交
1598
		if (key.objectid >=
C
Chris Mason 已提交
1599
		    btrfs_super_total_blocks(info->disk_super))
C
Chris Mason 已提交
1600 1601 1602 1603 1604 1605
			break;
	}

	btrfs_free_path(path);
	return 0;
}