extent-tree.c 12.9 KB
Newer Older
1 2 3 4 5 6 7 8
#include <stdio.h>
#include <stdlib.h>
#include "kerncompat.h"
#include "radix-tree.h"
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"

C
Chris Mason 已提交
9 10 11 12 13
static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks,
			    u64 search_start, u64 search_end, struct key *ins);
static int finish_current_insert(struct ctree_root *extent_root);
static int run_pending(struct ctree_root *extent_root);

14 15 16 17 18 19 20
/*
 * pending extents are blocks that we're trying to allocate in the extent
 * map while trying to grow the map because of other allocations.  To avoid
 * recursing, they are tagged in the radix tree and cleaned up after
 * other allocations are done.  The pending tag is also used in the same
 * manner for deletes.
 */
C
Chris Mason 已提交
21
#define CTREE_EXTENT_PENDING_DEL 0
22

C
Chris Mason 已提交
23 24 25 26 27 28 29
static int inc_block_ref(struct ctree_root *root, u64 blocknr)
{
	struct ctree_path path;
	int ret;
	struct key key;
	struct leaf *l;
	struct extent_item *item;
C
Chris Mason 已提交
30 31 32
	struct key ins;

	find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins);
C
Chris Mason 已提交
33 34 35 36 37
	init_path(&path);
	key.objectid = blocknr;
	key.flags = 0;
	key.offset = 1;
	ret = search_slot(root->extent_root, &key, &path, 0, 1);
38 39
	if (ret != 0)
		BUG();
C
Chris Mason 已提交
40 41 42 43 44
	BUG_ON(ret != 0);
	l = &path.nodes[0]->leaf;
	item = (struct extent_item *)(l->data +
				      l->items[path.slots[0]].offset);
	item->refs++;
45

C
Chris Mason 已提交
46 47
	BUG_ON(list_empty(&path.nodes[0]->dirty));
	release_path(root->extent_root, &path);
C
Chris Mason 已提交
48 49
	finish_current_insert(root->extent_root);
	run_pending(root->extent_root);
C
Chris Mason 已提交
50 51 52
	return 0;
}

C
Chris Mason 已提交
53
static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs)
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
{
	struct ctree_path path;
	int ret;
	struct key key;
	struct leaf *l;
	struct extent_item *item;
	init_path(&path);
	key.objectid = blocknr;
	key.flags = 0;
	key.offset = 1;
	ret = search_slot(root->extent_root, &key, &path, 0, 0);
	if (ret != 0)
		BUG();
	l = &path.nodes[0]->leaf;
	item = (struct extent_item *)(l->data +
				      l->items[path.slots[0]].offset);
	*refs = item->refs;
	release_path(root->extent_root, &path);
	return 0;
}

C
Chris Mason 已提交
75 76 77 78
int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf)
{
	u64 blocknr;
	int i;
79 80 81 82 83 84

	if (root == root->extent_root)
		return 0;
	if (is_leaf(buf->node.header.flags))
		return 0;

C
Chris Mason 已提交
85 86 87 88 89 90 91
	for (i = 0; i < buf->node.header.nritems; i++) {
		blocknr = buf->node.blockptrs[i];
		inc_block_ref(root, blocknr);
	}
	return 0;
}

92 93 94 95 96 97 98 99 100 101 102 103 104
int btrfs_finish_extent_commit(struct ctree_root *root)
{
	struct ctree_root *extent_root = root->extent_root;
	unsigned long gang[8];
	int ret;
	int i;

	while(1) {
		ret = radix_tree_gang_lookup(&extent_root->pinned_radix,
						 (void **)gang, 0,
						 ARRAY_SIZE(gang));
		if (!ret)
			break;
105
		for (i = 0; i < ret; i++) {
106
			radix_tree_delete(&extent_root->pinned_radix, gang[i]);
107
		}
108
	}
109 110
	extent_root->last_insert.objectid = 0;
	extent_root->last_insert.offset = 0;
111 112 113
	return 0;
}

C
Chris Mason 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
static int finish_current_insert(struct ctree_root *extent_root)
{
	struct key ins;
	struct extent_item extent_item;
	int i;
	int ret;

	extent_item.refs = 1;
	extent_item.owner = extent_root->node->node.header.parentid;
	ins.offset = 1;
	ins.flags = 0;

	for (i = 0; i < extent_root->current_insert.flags; i++) {
		ins.objectid = extent_root->current_insert.objectid + i;
		ret = insert_item(extent_root, &ins, &extent_item,
				  sizeof(extent_item));
		BUG_ON(ret);
	}
	extent_root->current_insert.offset = 0;
	return 0;
}

136
/*
137
 * remove an extent from the root, returns 0 on success
138
 */
139 140 141 142 143 144 145 146
int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks)
{
	struct ctree_path path;
	struct key key;
	struct ctree_root *extent_root = root->extent_root;
	int ret;
	struct item *item;
	struct extent_item *ei;
C
Chris Mason 已提交
147 148
	struct key ins;

149 150 151 152
	key.objectid = blocknr;
	key.flags = 0;
	key.offset = num_blocks;

C
Chris Mason 已提交
153
	find_free_extent(root, 0, 0, (u64)-1, &ins);
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
	init_path(&path);
	ret = search_slot(extent_root, &key, &path, -1, 1);
	if (ret) {
		printf("failed to find %Lu\n", key.objectid);
		print_tree(extent_root, extent_root->node);
		printf("failed to find %Lu\n", key.objectid);
		BUG();
	}
	item = path.nodes[0]->leaf.items + path.slots[0];
	ei = (struct extent_item *)(path.nodes[0]->leaf.data + item->offset);
	BUG_ON(ei->refs == 0);
	ei->refs--;
	if (ei->refs == 0) {
		if (root == extent_root) {
			int err;
			radix_tree_preload(GFP_KERNEL);
			err = radix_tree_insert(&extent_root->pinned_radix,
					  blocknr, (void *)blocknr);
			BUG_ON(err);
			radix_tree_preload_end();
		}
		ret = del_item(extent_root, &path);
176 177 178
		if (root != extent_root &&
		    extent_root->last_insert.objectid < blocknr)
			extent_root->last_insert.objectid = blocknr;
179 180 181 182
		if (ret)
			BUG();
	}
	release_path(extent_root, &path);
C
Chris Mason 已提交
183
	finish_current_insert(extent_root);
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
	return ret;
}

/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
static int del_pending_extents(struct ctree_root *extent_root)
{
	int ret;
	struct tree_buffer *gang[4];
	int i;

	while(1) {
		ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix,
						 (void **)gang, 0,
						 ARRAY_SIZE(gang),
						 CTREE_EXTENT_PENDING_DEL);
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			ret = __free_extent(extent_root, gang[i]->blocknr, 1);
206 207
			radix_tree_tag_clear(&extent_root->cache_radix,
						gang[i]->blocknr,
208
						CTREE_EXTENT_PENDING_DEL);
209 210 211 212 213 214
			tree_block_release(extent_root, gang[i]);
		}
	}
	return 0;
}

215 216 217
static int run_pending(struct ctree_root *extent_root)
{
	while(radix_tree_tagged(&extent_root->cache_radix,
C
Chris Mason 已提交
218
			        CTREE_EXTENT_PENDING_DEL))
219 220 221 222 223
		del_pending_extents(extent_root);
	return 0;
}


224 225 226 227 228 229 230 231 232 233
/*
 * remove an extent from the root, returns 0 on success
 */
int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks)
{
	struct key key;
	struct ctree_root *extent_root = root->extent_root;
	struct tree_buffer *t;
	int pending_ret;
	int ret;
234

235
	if (root == extent_root) {
236
		t = find_tree_block(root, blocknr);
C
Chris Mason 已提交
237
		radix_tree_tag_set(&root->cache_radix, blocknr,
238
				   CTREE_EXTENT_PENDING_DEL);
239 240
		return 0;
	}
241 242 243 244 245
	key.objectid = blocknr;
	key.flags = 0;
	key.offset = num_blocks;
	ret = __free_extent(root, blocknr, num_blocks);
	pending_ret = run_pending(root->extent_root);
246 247 248 249 250 251 252 253 254 255 256
	return ret ? ret : pending_ret;
}

/*
 * walks the btree of allocated extents and find a hole of a given size.
 * The key ins is changed to record the hole:
 * ins->objectid == block start
 * ins->flags = 0
 * ins->offset == number of blocks
 * Any available blocks before search_start are skipped.
 */
C
Chris Mason 已提交
257 258
static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks,
			    u64 search_start, u64 search_end, struct key *ins)
259 260 261 262 263 264 265
{
	struct ctree_path path;
	struct key *key;
	int ret;
	u64 hole_size = 0;
	int slot = 0;
	u64 last_block;
C
Chris Mason 已提交
266
	u64 test_block;
267 268 269
	int start_found;
	struct leaf *l;
	struct ctree_root * root = orig_root->extent_root;
270
	int total_needed = num_blocks;
271

272 273 274
	total_needed += (node_level(root->node->node.header.flags) + 1) * 3;
	if (root->last_insert.objectid > search_start)
		search_start = root->last_insert.objectid;
275 276 277 278 279 280
check_failed:
	init_path(&path);
	ins->objectid = search_start;
	ins->offset = 0;
	ins->flags = 0;
	start_found = 0;
C
Chris Mason 已提交
281
	ret = search_slot(root, ins, &path, 0, 0);
C
Chris Mason 已提交
282 283
	if (ret < 0)
		goto error;
C
Chris Mason 已提交
284

285 286 287
	if (path.slots[0] > 0)
		path.slots[0]--;

288 289 290 291 292 293 294
	while (1) {
		l = &path.nodes[0]->leaf;
		slot = path.slots[0];
		if (slot >= l->header.nritems) {
			ret = next_leaf(root, &path);
			if (ret == 0)
				continue;
C
Chris Mason 已提交
295 296
			if (ret < 0)
				goto error;
297 298
			if (!start_found) {
				ins->objectid = search_start;
C
Chris Mason 已提交
299
				ins->offset = (u64)-1;
300 301 302 303 304
				start_found = 1;
				goto check_pending;
			}
			ins->objectid = last_block > search_start ?
					last_block : search_start;
C
Chris Mason 已提交
305
			ins->offset = (u64)-1;
306 307 308 309 310
			goto check_pending;
		}
		key = &l->items[slot].key;
		if (key->objectid >= search_start) {
			if (start_found) {
311 312
				if (last_block < search_start)
					last_block = search_start;
313
				hole_size = key->objectid - last_block;
C
Chris Mason 已提交
314
				if (hole_size > total_needed) {
315
					ins->objectid = last_block;
C
Chris Mason 已提交
316
					ins->offset = hole_size;
317 318
					goto check_pending;
				}
319
			}
320
		}
321 322
		start_found = 1;
		last_block = key->objectid + key->offset;
323 324 325 326 327 328 329 330 331
		path.slots[0]++;
	}
	// FIXME -ENOSPC
check_pending:
	/* we have to make sure we didn't find an extent that has already
	 * been allocated by the map tree or the original allocation
	 */
	release_path(root, &path);
	BUG_ON(ins->objectid < search_start);
C
Chris Mason 已提交
332 333 334 335
	for (test_block = ins->objectid;
	     test_block < ins->objectid + total_needed; test_block++) {
		if (radix_tree_lookup(&root->pinned_radix, test_block)) {
			search_start = test_block + 1;
336 337 338
			goto check_failed;
		}
	}
C
Chris Mason 已提交
339
	BUG_ON(root->current_insert.offset);
340
	root->current_insert.offset = total_needed - num_blocks;
C
Chris Mason 已提交
341 342
	root->current_insert.objectid = ins->objectid + num_blocks;
	root->current_insert.flags = 0;
343
	root->last_insert.objectid = ins->objectid;
C
Chris Mason 已提交
344
	ins->offset = num_blocks;
345
	return 0;
C
Chris Mason 已提交
346 347 348
error:
	release_path(root, &path);
	return ret;
349 350 351 352 353 354 355 356 357 358
}

/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns 0 if everything worked, non-zero otherwise.
 */
int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start,
C
Chris Mason 已提交
359
			 u64 search_end, u64 owner, struct key *ins)
360 361 362
{
	int ret;
	int pending_ret;
C
Chris Mason 已提交
363
	struct ctree_root *extent_root = root->extent_root;
364
	struct extent_item extent_item;
C
Chris Mason 已提交
365

366 367 368
	extent_item.refs = 1;
	extent_item.owner = owner;

C
Chris Mason 已提交
369 370 371 372 373 374 375 376
	if (root == extent_root) {
		BUG_ON(extent_root->current_insert.offset == 0);
		BUG_ON(num_blocks != 1);
		BUG_ON(extent_root->current_insert.flags ==
		       extent_root->current_insert.offset);
		ins->offset = 1;
		ins->objectid = extent_root->current_insert.objectid +
				extent_root->current_insert.flags++;
377 378
		return 0;
	}
C
Chris Mason 已提交
379 380 381 382
	ret = find_free_extent(root, num_blocks, search_start,
			       search_end, ins);
	if (ret)
		return ret;
383

C
Chris Mason 已提交
384 385 386 387 388 389 390 391 392 393
	ret = insert_item(extent_root, ins, &extent_item,
			  sizeof(extent_item));

	finish_current_insert(extent_root);
	pending_ret = run_pending(extent_root);
	if (ret)
		return ret;
	if (pending_ret)
		return pending_ret;
	return 0;
394 395 396 397 398 399 400 401 402 403
}

/*
 * helper function to allocate a block for a given tree
 * returns the tree buffer or NULL.
 */
struct tree_buffer *alloc_free_block(struct ctree_root *root)
{
	struct key ins;
	int ret;
C
Chris Mason 已提交
404
	struct tree_buffer *buf;
405 406 407

	ret = alloc_extent(root, 1, 0, (unsigned long)-1,
			   root->node->node.header.parentid,
C
Chris Mason 已提交
408
			   &ins);
409 410 411 412
	if (ret) {
		BUG();
		return NULL;
	}
C
Chris Mason 已提交
413 414
	buf = find_tree_block(root, ins.objectid);
	dirty_tree_block(root, buf);
415 416
	return buf;
}
417

C
Chris Mason 已提交
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level)
{
	struct tree_buffer *next;
	struct tree_buffer *cur;
	u64 blocknr;
	int ret;
	u32 refs;

	ret = lookup_block_ref(root, path->nodes[*level]->blocknr, &refs);
	BUG_ON(ret);
	if (refs > 1)
		goto out;
	while(*level > 0) {
		cur = path->nodes[*level];
		if (path->slots[*level] >= cur->node.header.nritems)
			break;
		blocknr = cur->node.blockptrs[path->slots[*level]];
		ret = lookup_block_ref(root, blocknr, &refs);
		if (refs != 1 || *level == 1) {
			path->slots[*level]++;
			ret = free_extent(root, blocknr, 1);
			BUG_ON(ret);
			continue;
		}
		BUG_ON(ret);
		next = read_tree_block(root, blocknr);
		if (path->nodes[*level-1]) {
			tree_block_release(root, path->nodes[*level-1]);
		}
		path->nodes[*level-1] = next;
		*level = node_level(next->node.header.flags);
		path->slots[*level] = 0;
	}
out:
	ret = free_extent(root, path->nodes[*level]->blocknr, 1);
	path->nodes[*level] = NULL;
	*level += 1;
	BUG_ON(ret);
	return 0;
}

int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level)
{
	int i;
	int slot;
	int ret;
	for(i = *level; i < MAX_LEVEL - 1 && path->nodes[i]; i++) {
		slot = path->slots[i];
		if (slot < path->nodes[i]->node.header.nritems - 1) {
			path->slots[i]++;
			*level = i;
			return 0;
		} else {
			ret = free_extent(root,
					  path->nodes[*level]->blocknr, 1);
			*level = i + 1;
			BUG_ON(ret);
		}
	}
	return 1;
}

int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap)
{
	int ret;
	int level;
	struct ctree_path path;
	int i;
	int orig_level;

	init_path(&path);

	level = node_level(snap->node.header.flags);
	orig_level = level;
	path.nodes[level] = snap;
	path.slots[level] = 0;
	while(1) {
		ret = walk_down_tree(root, &path, &level);
		if (ret > 0)
			break;
		ret = walk_up_tree(root, &path, &level);
		if (ret > 0)
			break;
	}
	for (i = 0; i < orig_level; i++) {
		if (path.nodes[i])
			tree_block_release(root, path.nodes[i]);
	}

	return 0;
}


#if 0
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537
int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap)
{
	int ret;
	int level;
	int refs;
	u64 blocknr = snap->blocknr;

	level = node_level(snap->node.header.flags);
	ret = lookup_block_ref(root, snap->blocknr, &refs);
	BUG_ON(ret);
	if (refs == 1 && level != 0) {
		struct node *n = &snap->node;
		struct tree_buffer *b;
		int i;
		for (i = 0; i < n->header.nritems; i++) {
			b = read_tree_block(root, n->blockptrs[i]);
			/* FIXME, don't recurse here */
			ret = btrfs_drop_snapshot(root, b);
			BUG_ON(ret);
			tree_block_release(root, b);
		}
	}
	ret = free_extent(root, blocknr, 1);
	BUG_ON(ret);
	return 0;
}
C
Chris Mason 已提交
538
#endif