free-space-cache.c 66.1 KB
Newer Older
J
Josef Bacik 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2008 Red Hat.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

19
#include <linux/pagemap.h>
J
Josef Bacik 已提交
20
#include <linux/sched.h>
21
#include <linux/slab.h>
22
#include <linux/math64.h>
23
#include <linux/ratelimit.h>
J
Josef Bacik 已提交
24
#include "ctree.h"
25 26
#include "free-space-cache.h"
#include "transaction.h"
27
#include "disk-io.h"
28
#include "extent_io.h"
29
#include "inode-map.h"
30

31 32
#define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG	(32 * 1024)
J
Josef Bacik 已提交
33

34
static int link_free_space(struct btrfs_free_space_ctl *ctl,
J
Josef Bacik 已提交
35 36
			   struct btrfs_free_space *info);

37 38 39
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
					       struct btrfs_path *path,
					       u64 offset)
40 41 42 43 44 45 46 47 48 49
{
	struct btrfs_key key;
	struct btrfs_key location;
	struct btrfs_disk_key disk_key;
	struct btrfs_free_space_header *header;
	struct extent_buffer *leaf;
	struct inode *inode = NULL;
	int ret;

	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50
	key.offset = offset;
51 52 53 54 55 56
	key.type = 0;

	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		return ERR_PTR(ret);
	if (ret > 0) {
57
		btrfs_release_path(path);
58 59 60 61 62 63 64 65
		return ERR_PTR(-ENOENT);
	}

	leaf = path->nodes[0];
	header = btrfs_item_ptr(leaf, path->slots[0],
				struct btrfs_free_space_header);
	btrfs_free_space_key(leaf, header, &disk_key);
	btrfs_disk_key_to_cpu(&location, &disk_key);
66
	btrfs_release_path(path);
67 68 69 70 71 72 73 74 75 76 77

	inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
	if (!inode)
		return ERR_PTR(-ENOENT);
	if (IS_ERR(inode))
		return inode;
	if (is_bad_inode(inode)) {
		iput(inode);
		return ERR_PTR(-ENOENT);
	}

78 79
	inode->i_mapping->flags &= ~__GFP_FS;

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
	return inode;
}

struct inode *lookup_free_space_inode(struct btrfs_root *root,
				      struct btrfs_block_group_cache
				      *block_group, struct btrfs_path *path)
{
	struct inode *inode = NULL;

	spin_lock(&block_group->lock);
	if (block_group->inode)
		inode = igrab(block_group->inode);
	spin_unlock(&block_group->lock);
	if (inode)
		return inode;

	inode = __lookup_free_space_inode(root, path,
					  block_group->key.objectid);
	if (IS_ERR(inode))
		return inode;

101
	spin_lock(&block_group->lock);
102 103 104 105 106 107
	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
		printk(KERN_INFO "Old style space inode found, converting.\n");
		BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
		block_group->disk_cache_state = BTRFS_DC_CLEAR;
	}

108
	if (!block_group->iref) {
109 110 111 112 113 114 115 116
		block_group->inode = igrab(inode);
		block_group->iref = 1;
	}
	spin_unlock(&block_group->lock);

	return inode;
}

117 118 119
int __create_free_space_inode(struct btrfs_root *root,
			      struct btrfs_trans_handle *trans,
			      struct btrfs_path *path, u64 ino, u64 offset)
120 121 122 123 124 125 126 127
{
	struct btrfs_key key;
	struct btrfs_disk_key disk_key;
	struct btrfs_free_space_header *header;
	struct btrfs_inode_item *inode_item;
	struct extent_buffer *leaf;
	int ret;

128
	ret = btrfs_insert_empty_inode(trans, root, path, ino);
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
	if (ret)
		return ret;

	leaf = path->nodes[0];
	inode_item = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_inode_item);
	btrfs_item_key(leaf, &disk_key, path->slots[0]);
	memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
			     sizeof(*inode_item));
	btrfs_set_inode_generation(leaf, inode_item, trans->transid);
	btrfs_set_inode_size(leaf, inode_item, 0);
	btrfs_set_inode_nbytes(leaf, inode_item, 0);
	btrfs_set_inode_uid(leaf, inode_item, 0);
	btrfs_set_inode_gid(leaf, inode_item, 0);
	btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
	btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
145
			      BTRFS_INODE_PREALLOC);
146 147
	btrfs_set_inode_nlink(leaf, inode_item, 1);
	btrfs_set_inode_transid(leaf, inode_item, trans->transid);
148
	btrfs_set_inode_block_group(leaf, inode_item, offset);
149
	btrfs_mark_buffer_dirty(leaf);
150
	btrfs_release_path(path);
151 152

	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
153
	key.offset = offset;
154 155 156 157 158
	key.type = 0;

	ret = btrfs_insert_empty_item(trans, root, path, &key,
				      sizeof(struct btrfs_free_space_header));
	if (ret < 0) {
159
		btrfs_release_path(path);
160 161 162 163 164 165 166 167
		return ret;
	}
	leaf = path->nodes[0];
	header = btrfs_item_ptr(leaf, path->slots[0],
				struct btrfs_free_space_header);
	memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
	btrfs_set_free_space_key(leaf, header, &disk_key);
	btrfs_mark_buffer_dirty(leaf);
168
	btrfs_release_path(path);
169 170 171 172

	return 0;
}

173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
int create_free_space_inode(struct btrfs_root *root,
			    struct btrfs_trans_handle *trans,
			    struct btrfs_block_group_cache *block_group,
			    struct btrfs_path *path)
{
	int ret;
	u64 ino;

	ret = btrfs_find_free_objectid(root, &ino);
	if (ret < 0)
		return ret;

	return __create_free_space_inode(root, trans, path, ino,
					 block_group->key.objectid);
}

189 190 191 192 193
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
				    struct btrfs_trans_handle *trans,
				    struct btrfs_path *path,
				    struct inode *inode)
{
194
	struct btrfs_block_rsv *rsv;
195 196 197
	loff_t oldsize;
	int ret = 0;

198
	rsv = trans->block_rsv;
199
	trans->block_rsv = root->orphan_block_rsv;
200
	ret = btrfs_block_rsv_check(root, root->orphan_block_rsv, 0, 5, 0);
201 202 203 204 205 206 207 208 209 210 211 212 213
	if (ret)
		return ret;

	oldsize = i_size_read(inode);
	btrfs_i_size_write(inode, 0);
	truncate_pagecache(inode, oldsize, 0);

	/*
	 * We don't need an orphan item because truncating the free space cache
	 * will never be split across transactions.
	 */
	ret = btrfs_truncate_inode_items(trans, root, inode,
					 0, BTRFS_EXTENT_DATA_KEY);
214 215

	trans->block_rsv = rsv;
216 217 218 219 220
	if (ret) {
		WARN_ON(1);
		return ret;
	}

221 222
	ret = btrfs_update_inode(trans, root, inode);
	return ret;
223 224
}

225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
static int readahead_cache(struct inode *inode)
{
	struct file_ra_state *ra;
	unsigned long last_index;

	ra = kzalloc(sizeof(*ra), GFP_NOFS);
	if (!ra)
		return -ENOMEM;

	file_ra_state_init(ra, inode->i_mapping);
	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;

	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);

	kfree(ra);

	return 0;
}

244 245 246
int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
			    struct btrfs_free_space_ctl *ctl,
			    struct btrfs_path *path, u64 offset)
247 248 249 250 251 252 253 254 255 256
{
	struct btrfs_free_space_header *header;
	struct extent_buffer *leaf;
	struct page *page;
	struct btrfs_key key;
	struct list_head bitmaps;
	u64 num_entries;
	u64 num_bitmaps;
	u64 generation;
	pgoff_t index = 0;
257
	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
258
	int ret = 0;
259 260 261 262

	INIT_LIST_HEAD(&bitmaps);

	/* Nothing in the space cache, goodbye */
263
	if (!i_size_read(inode))
264 265 266
		goto out;

	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
267
	key.offset = offset;
268 269 270
	key.type = 0;

	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
271 272 273
	if (ret < 0)
		goto out;
	else if (ret > 0) {
274
		btrfs_release_path(path);
275
		ret = 0;
276 277 278
		goto out;
	}

279 280
	ret = -1;

281 282 283 284 285 286
	leaf = path->nodes[0];
	header = btrfs_item_ptr(leaf, path->slots[0],
				struct btrfs_free_space_header);
	num_entries = btrfs_free_space_entries(leaf, header);
	num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
	generation = btrfs_free_space_generation(leaf, header);
287
	btrfs_release_path(path);
288 289 290

	if (BTRFS_I(inode)->generation != generation) {
		printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
291
		       " not match free space cache generation (%llu)\n",
292
		       (unsigned long long)BTRFS_I(inode)->generation,
293 294
		       (unsigned long long)generation);
		goto out;
295 296 297 298 299 300
	}

	if (!num_entries)
		goto out;

	ret = readahead_cache(inode);
301
	if (ret)
302 303 304 305 306 307 308 309 310 311 312 313
		goto out;

	while (1) {
		struct btrfs_free_space_entry *entry;
		struct btrfs_free_space *e;
		void *addr;
		unsigned long offset = 0;
		int need_loop = 0;

		if (!num_entries && !num_bitmaps)
			break;

314
		page = find_or_create_page(inode->i_mapping, index, mask);
315
		if (!page)
316 317 318 319 320 321 322 323 324
			goto free_cache;

		if (!PageUptodate(page)) {
			btrfs_readpage(NULL, page);
			lock_page(page);
			if (!PageUptodate(page)) {
				unlock_page(page);
				page_cache_release(page);
				printk(KERN_ERR "btrfs: error reading free "
325
				       "space cache\n");
326 327 328 329 330 331 332 333
				goto free_cache;
			}
		}
		addr = kmap(page);

		if (index == 0) {
			u64 *gen;

334 335 336 337 338 339 340 341 342
			/*
			 * We put a bogus crc in the front of the first page in
			 * case old kernels try to mount a fs with the new
			 * format to make sure they discard the cache.
			 */
			addr += sizeof(u64);
			offset += sizeof(u64);

			gen = addr;
343
			if (*gen != BTRFS_I(inode)->generation) {
344 345 346 347 348 349
				printk_ratelimited(KERN_ERR "btrfs: space cache"
					" generation (%llu) does not match "
					"inode (%llu)\n",
					(unsigned long long)*gen,
					(unsigned long long)
					BTRFS_I(inode)->generation);
350 351 352 353 354
				kunmap(page);
				unlock_page(page);
				page_cache_release(page);
				goto free_cache;
			}
355 356
			addr += sizeof(u64);
			offset += sizeof(u64);
357
		}
358
		entry = addr;
359 360 361 362 363 364

		while (1) {
			if (!num_entries)
				break;

			need_loop = 1;
365 366
			e = kmem_cache_zalloc(btrfs_free_space_cachep,
					      GFP_NOFS);
367 368 369 370 371 372 373 374 375 376 377
			if (!e) {
				kunmap(page);
				unlock_page(page);
				page_cache_release(page);
				goto free_cache;
			}

			e->offset = le64_to_cpu(entry->offset);
			e->bytes = le64_to_cpu(entry->bytes);
			if (!e->bytes) {
				kunmap(page);
378
				kmem_cache_free(btrfs_free_space_cachep, e);
379 380 381 382 383 384
				unlock_page(page);
				page_cache_release(page);
				goto free_cache;
			}

			if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
385 386 387
				spin_lock(&ctl->tree_lock);
				ret = link_free_space(ctl, e);
				spin_unlock(&ctl->tree_lock);
388 389 390 391 392 393 394 395
				if (ret) {
					printk(KERN_ERR "Duplicate entries in "
					       "free space cache, dumping\n");
					kunmap(page);
					unlock_page(page);
					page_cache_release(page);
					goto free_cache;
				}
396 397 398 399
			} else {
				e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
				if (!e->bitmap) {
					kunmap(page);
400 401
					kmem_cache_free(
						btrfs_free_space_cachep, e);
402 403 404 405
					unlock_page(page);
					page_cache_release(page);
					goto free_cache;
				}
406
				spin_lock(&ctl->tree_lock);
407
				ret = link_free_space(ctl, e);
408 409 410
				ctl->total_bitmaps++;
				ctl->op->recalc_thresholds(ctl);
				spin_unlock(&ctl->tree_lock);
411 412 413 414 415 416 417 418
				if (ret) {
					printk(KERN_ERR "Duplicate entries in "
					       "free space cache, dumping\n");
					kunmap(page);
					unlock_page(page);
					page_cache_release(page);
					goto free_cache;
				}
419
				list_add_tail(&e->list, &bitmaps);
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
			}

			num_entries--;
			offset += sizeof(struct btrfs_free_space_entry);
			if (offset + sizeof(struct btrfs_free_space_entry) >=
			    PAGE_CACHE_SIZE)
				break;
			entry++;
		}

		/*
		 * We read an entry out of this page, we need to move on to the
		 * next page.
		 */
		if (need_loop) {
			kunmap(page);
			goto next;
		}

		/*
		 * We add the bitmaps at the end of the entries in order that
		 * the bitmap entries are added to the cache.
		 */
		e = list_entry(bitmaps.next, struct btrfs_free_space, list);
		list_del_init(&e->list);
		memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
		kunmap(page);
		num_bitmaps--;
next:
		unlock_page(page);
		page_cache_release(page);
		index++;
	}

	ret = 1;
out:
	return ret;
free_cache:
458
	__btrfs_remove_free_space_cache(ctl);
459 460 461
	goto out;
}

462 463
int load_free_space_cache(struct btrfs_fs_info *fs_info,
			  struct btrfs_block_group_cache *block_group)
J
Josef Bacik 已提交
464
{
465
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
466 467 468 469 470 471 472 473 474 475 476
	struct btrfs_root *root = fs_info->tree_root;
	struct inode *inode;
	struct btrfs_path *path;
	int ret;
	bool matched;
	u64 used = btrfs_block_group_used(&block_group->item);

	/*
	 * If we're unmounting then just return, since this does a search on the
	 * normal root and not the commit root and we could deadlock.
	 */
477
	if (btrfs_fs_closing(fs_info))
478 479 480 481 482 483
		return 0;

	/*
	 * If this block group has been marked to be cleared for one reason or
	 * another then we can't trust the on disk cache, so just return.
	 */
484
	spin_lock(&block_group->lock);
485 486 487 488
	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
		spin_unlock(&block_group->lock);
		return 0;
	}
489
	spin_unlock(&block_group->lock);
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523

	path = btrfs_alloc_path();
	if (!path)
		return 0;

	inode = lookup_free_space_inode(root, block_group, path);
	if (IS_ERR(inode)) {
		btrfs_free_path(path);
		return 0;
	}

	ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
				      path, block_group->key.objectid);
	btrfs_free_path(path);
	if (ret <= 0)
		goto out;

	spin_lock(&ctl->tree_lock);
	matched = (ctl->free_space == (block_group->key.offset - used -
				       block_group->bytes_super));
	spin_unlock(&ctl->tree_lock);

	if (!matched) {
		__btrfs_remove_free_space_cache(ctl);
		printk(KERN_ERR "block group %llu has an wrong amount of free "
		       "space\n", block_group->key.objectid);
		ret = -1;
	}
out:
	if (ret < 0) {
		/* This cache is bogus, make sure it gets cleared */
		spin_lock(&block_group->lock);
		block_group->disk_cache_state = BTRFS_DC_CLEAR;
		spin_unlock(&block_group->lock);
524
		ret = 0;
525 526 527 528 529 530 531

		printk(KERN_ERR "btrfs: failed to load free space cache "
		       "for block group %llu\n", block_group->key.objectid);
	}

	iput(inode);
	return ret;
532 533
}

534 535 536 537 538 539 540 541 542 543 544 545 546
/**
 * __btrfs_write_out_cache - write out cached info to an inode
 * @root - the root the inode belongs to
 * @ctl - the free space cache we are going to write out
 * @block_group - the block_group for this cache if it belongs to a block_group
 * @trans - the trans handle
 * @path - the path to use
 * @offset - the offset for the key we'll insert
 *
 * This function writes out a free space cache struct to disk for quick recovery
 * on mount.  This will return 0 if it was successfull in writing the cache out,
 * and -1 if it was not.
 */
547 548 549 550 551
int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
			    struct btrfs_free_space_ctl *ctl,
			    struct btrfs_block_group_cache *block_group,
			    struct btrfs_trans_handle *trans,
			    struct btrfs_path *path, u64 offset)
J
Josef Bacik 已提交
552 553 554 555 556
{
	struct btrfs_free_space_header *header;
	struct extent_buffer *leaf;
	struct rb_node *node;
	struct list_head *pos, *n;
557
	struct page **pages;
J
Josef Bacik 已提交
558 559
	struct page *page;
	struct extent_state *cached_state = NULL;
560 561
	struct btrfs_free_cluster *cluster = NULL;
	struct extent_io_tree *unpin = NULL;
J
Josef Bacik 已提交
562 563
	struct list_head bitmap_list;
	struct btrfs_key key;
564
	u64 start, end, len;
J
Josef Bacik 已提交
565
	u64 bytes = 0;
566
	u32 crc = ~(u32)0;
567
	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
568
	int index = 0, num_pages = 0;
J
Josef Bacik 已提交
569 570
	int entries = 0;
	int bitmaps = 0;
571 572
	int ret;
	int err = -1;
573
	bool next_page = false;
574
	bool out_of_space = false;
J
Josef Bacik 已提交
575 576 577

	INIT_LIST_HEAD(&bitmap_list);

578 579
	if (!i_size_read(inode))
		return -1;
580

581 582
	num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
		PAGE_CACHE_SHIFT;
583

J
Josef Bacik 已提交
584 585 586 587
	filemap_write_and_wait(inode->i_mapping);
	btrfs_wait_ordered_range(inode, inode->i_size &
				 ~(root->sectorsize - 1), (u64)-1);

588
	pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
589
	if (!pages)
590
		return -1;
591

592
	/* Get the cluster for this block_group if it exists */
593
	if (block_group && !list_empty(&block_group->cluster_list))
594 595 596 597 598 599 600 601 602 603
		cluster = list_entry(block_group->cluster_list.next,
				     struct btrfs_free_cluster,
				     block_group_list);

	/*
	 * We shouldn't have switched the pinned extents yet so this is the
	 * right one
	 */
	unpin = root->fs_info->pinned_extents;

J
Josef Bacik 已提交
604 605 606 607 608 609 610 611
	/*
	 * Lock all pages first so we can lock the extent safely.
	 *
	 * NOTE: Because we hold the ref the entire time we're going to write to
	 * the page find_get_page should never fail, so we don't do a check
	 * after find_get_page at this point.  Just putting this here so people
	 * know and don't freak out.
	 */
612
	while (index < num_pages) {
613
		page = find_or_create_page(inode->i_mapping, index, mask);
J
Josef Bacik 已提交
614
		if (!page) {
615
			int i;
J
Josef Bacik 已提交
616

617 618 619
			for (i = 0; i < num_pages; i++) {
				unlock_page(pages[i]);
				page_cache_release(pages[i]);
J
Josef Bacik 已提交
620
			}
621
			goto out;
J
Josef Bacik 已提交
622
		}
623
		pages[index] = page;
J
Josef Bacik 已提交
624 625 626 627 628 629 630
		index++;
	}

	index = 0;
	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
			 0, &cached_state, GFP_NOFS);

631 632 633 634
	/*
	 * When searching for pinned extents, we need to start at our start
	 * offset.
	 */
635 636
	if (block_group)
		start = block_group->key.objectid;
637

638 639 640 641 642 643
	node = rb_first(&ctl->free_space_offset);
	if (!node && cluster) {
		node = rb_first(&cluster->root);
		cluster = NULL;
	}

J
Josef Bacik 已提交
644 645 646
	/* Write out the extent entries */
	do {
		struct btrfs_free_space_entry *entry;
647
		void *addr, *orig;
J
Josef Bacik 已提交
648 649
		unsigned long offset = 0;

650 651
		next_page = false;

652 653 654 655 656 657
		if (index >= num_pages) {
			out_of_space = true;
			break;
		}

		page = pages[index];
J
Josef Bacik 已提交
658

659 660 661
		orig = addr = kmap(page);
		if (index == 0) {
			u64 *gen;
J
Josef Bacik 已提交
662

663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678
			/*
			 * We're going to put in a bogus crc for this page to
			 * make sure that old kernels who aren't aware of this
			 * format will be sure to discard the cache.
			 */
			addr += sizeof(u64);
			offset += sizeof(u64);

			gen = addr;
			*gen = trans->transid;
			addr += sizeof(u64);
			offset += sizeof(u64);
		}
		entry = addr;

		memset(addr, 0, PAGE_CACHE_SIZE - offset);
679
		while (node && !next_page) {
J
Josef Bacik 已提交
680 681 682 683 684 685 686 687 688 689 690 691 692 693 694
			struct btrfs_free_space *e;

			e = rb_entry(node, struct btrfs_free_space, offset_index);
			entries++;

			entry->offset = cpu_to_le64(e->offset);
			entry->bytes = cpu_to_le64(e->bytes);
			if (e->bitmap) {
				entry->type = BTRFS_FREE_SPACE_BITMAP;
				list_add_tail(&e->list, &bitmap_list);
				bitmaps++;
			} else {
				entry->type = BTRFS_FREE_SPACE_EXTENT;
			}
			node = rb_next(node);
695 696 697 698
			if (!node && cluster) {
				node = rb_first(&cluster->root);
				cluster = NULL;
			}
J
Josef Bacik 已提交
699 700 701
			offset += sizeof(struct btrfs_free_space_entry);
			if (offset + sizeof(struct btrfs_free_space_entry) >=
			    PAGE_CACHE_SIZE)
702 703 704 705 706 707 708 709
				next_page = true;
			entry++;
		}

		/*
		 * We want to add any pinned extents to our free space cache
		 * so we don't leak the space
		 */
710 711 712
		while (block_group && !next_page &&
		       (start < block_group->key.objectid +
			block_group->key.offset)) {
713 714 715 716 717 718 719 720 721 722
			ret = find_first_extent_bit(unpin, start, &start, &end,
						    EXTENT_DIRTY);
			if (ret) {
				ret = 0;
				break;
			}

			/* This pinned extent is out of our range */
			if (start >= block_group->key.objectid +
			    block_group->key.offset)
J
Josef Bacik 已提交
723
				break;
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738

			len = block_group->key.objectid +
				block_group->key.offset - start;
			len = min(len, end + 1 - start);

			entries++;
			entry->offset = cpu_to_le64(start);
			entry->bytes = cpu_to_le64(len);
			entry->type = BTRFS_FREE_SPACE_EXTENT;

			start = end + 1;
			offset += sizeof(struct btrfs_free_space_entry);
			if (offset + sizeof(struct btrfs_free_space_entry) >=
			    PAGE_CACHE_SIZE)
				next_page = true;
J
Josef Bacik 已提交
739 740 741
			entry++;
		}

742 743 744 745 746 747 748 749 750 751 752 753
		/* Generate bogus crc value */
		if (index == 0) {
			u32 *tmp;
			crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
					      PAGE_CACHE_SIZE - sizeof(u64));
			btrfs_csum_final(crc, (char *)&crc);
			crc++;
			tmp = orig;
			*tmp = crc;
		}

		kunmap(page);
J
Josef Bacik 已提交
754 755 756 757

		bytes += PAGE_CACHE_SIZE;

		index++;
758
	} while (node || next_page);
J
Josef Bacik 已提交
759 760 761 762 763 764 765

	/* Write out the bitmaps */
	list_for_each_safe(pos, n, &bitmap_list) {
		void *addr;
		struct btrfs_free_space *entry =
			list_entry(pos, struct btrfs_free_space, list);

766 767 768 769
		if (index >= num_pages) {
			out_of_space = true;
			break;
		}
C
Chris Mason 已提交
770
		page = pages[index];
J
Josef Bacik 已提交
771 772 773 774 775 776 777 778 779 780

		addr = kmap(page);
		memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
		kunmap(page);
		bytes += PAGE_CACHE_SIZE;

		list_del_init(&entry->list);
		index++;
	}

781 782 783 784 785
	if (out_of_space) {
		btrfs_drop_pages(pages, num_pages);
		unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
				     i_size_read(inode) - 1, &cached_state,
				     GFP_NOFS);
786
		goto out;
787 788
	}

J
Josef Bacik 已提交
789
	/* Zero out the rest of the pages just to make sure */
790
	while (index < num_pages) {
J
Josef Bacik 已提交
791 792
		void *addr;

793
		page = pages[index];
J
Josef Bacik 已提交
794 795 796 797 798 799 800
		addr = kmap(page);
		memset(addr, 0, PAGE_CACHE_SIZE);
		kunmap(page);
		bytes += PAGE_CACHE_SIZE;
		index++;
	}

801 802 803
	ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
					    bytes, &cached_state);
	btrfs_drop_pages(pages, num_pages);
J
Josef Bacik 已提交
804 805 806
	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);

807
	if (ret)
808
		goto out;
809 810 811

	BTRFS_I(inode)->generation = trans->transid;

J
Josef Bacik 已提交
812 813 814
	filemap_write_and_wait(inode->i_mapping);

	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
815
	key.offset = offset;
J
Josef Bacik 已提交
816 817
	key.type = 0;

818
	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
J
Josef Bacik 已提交
819 820 821 822
	if (ret < 0) {
		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
				 EXTENT_DIRTY | EXTENT_DELALLOC |
				 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
823
		goto out;
J
Josef Bacik 已提交
824 825 826 827 828 829 830 831
	}
	leaf = path->nodes[0];
	if (ret > 0) {
		struct btrfs_key found_key;
		BUG_ON(!path->slots[0]);
		path->slots[0]--;
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
		if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
832
		    found_key.offset != offset) {
J
Josef Bacik 已提交
833 834 835 836
			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
					 EXTENT_DIRTY | EXTENT_DELALLOC |
					 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
					 GFP_NOFS);
837
			btrfs_release_path(path);
838
			goto out;
J
Josef Bacik 已提交
839 840 841 842 843 844 845 846
		}
	}
	header = btrfs_item_ptr(leaf, path->slots[0],
				struct btrfs_free_space_header);
	btrfs_set_free_space_entries(leaf, header, entries);
	btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
	btrfs_set_free_space_generation(leaf, header, trans->transid);
	btrfs_mark_buffer_dirty(leaf);
847
	btrfs_release_path(path);
J
Josef Bacik 已提交
848

849
	err = 0;
850
out:
851
	kfree(pages);
852
	if (err) {
J
Josef Bacik 已提交
853 854 855 856
		invalidate_inode_pages2_range(inode->i_mapping, 0, index);
		BTRFS_I(inode)->generation = 0;
	}
	btrfs_update_inode(trans, root, inode);
857
	return err;
858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
}

int btrfs_write_out_cache(struct btrfs_root *root,
			  struct btrfs_trans_handle *trans,
			  struct btrfs_block_group_cache *block_group,
			  struct btrfs_path *path)
{
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
	struct inode *inode;
	int ret = 0;

	root = root->fs_info->tree_root;

	spin_lock(&block_group->lock);
	if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
		spin_unlock(&block_group->lock);
		return 0;
	}
	spin_unlock(&block_group->lock);

	inode = lookup_free_space_inode(root, block_group, path);
	if (IS_ERR(inode))
		return 0;

	ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
				      path, block_group->key.objectid);
884 885
	if (ret) {
		btrfs_delalloc_release_metadata(inode, inode->i_size);
886 887 888
		spin_lock(&block_group->lock);
		block_group->disk_cache_state = BTRFS_DC_ERROR;
		spin_unlock(&block_group->lock);
889
		ret = 0;
890
#ifdef DEBUG
891 892
		printk(KERN_ERR "btrfs: failed to write free space cace "
		       "for block group %llu\n", block_group->key.objectid);
893
#endif
894 895
	}

J
Josef Bacik 已提交
896 897 898 899
	iput(inode);
	return ret;
}

900
static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
901
					  u64 offset)
J
Josef Bacik 已提交
902
{
903 904
	BUG_ON(offset < bitmap_start);
	offset -= bitmap_start;
905
	return (unsigned long)(div_u64(offset, unit));
906
}
J
Josef Bacik 已提交
907

908
static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
909
{
910
	return (unsigned long)(div_u64(bytes, unit));
911
}
J
Josef Bacik 已提交
912

913
static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
914 915 916 917
				   u64 offset)
{
	u64 bitmap_start;
	u64 bytes_per_bitmap;
J
Josef Bacik 已提交
918

919 920
	bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
	bitmap_start = offset - ctl->start;
921 922
	bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
	bitmap_start *= bytes_per_bitmap;
923
	bitmap_start += ctl->start;
J
Josef Bacik 已提交
924

925
	return bitmap_start;
J
Josef Bacik 已提交
926 927
}

928 929
static int tree_insert_offset(struct rb_root *root, u64 offset,
			      struct rb_node *node, int bitmap)
J
Josef Bacik 已提交
930 931 932 933 934 935 936
{
	struct rb_node **p = &root->rb_node;
	struct rb_node *parent = NULL;
	struct btrfs_free_space *info;

	while (*p) {
		parent = *p;
937
		info = rb_entry(parent, struct btrfs_free_space, offset_index);
J
Josef Bacik 已提交
938

939
		if (offset < info->offset) {
J
Josef Bacik 已提交
940
			p = &(*p)->rb_left;
941
		} else if (offset > info->offset) {
J
Josef Bacik 已提交
942
			p = &(*p)->rb_right;
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957
		} else {
			/*
			 * we could have a bitmap entry and an extent entry
			 * share the same offset.  If this is the case, we want
			 * the extent entry to always be found first if we do a
			 * linear search through the tree, since we want to have
			 * the quickest allocation time, and allocating from an
			 * extent is faster than allocating from a bitmap.  So
			 * if we're inserting a bitmap and we find an entry at
			 * this offset, we want to go right, or after this entry
			 * logically.  If we are inserting an extent and we've
			 * found a bitmap, we want to go left, or before
			 * logically.
			 */
			if (bitmap) {
958 959 960 961
				if (info->bitmap) {
					WARN_ON_ONCE(1);
					return -EEXIST;
				}
962 963
				p = &(*p)->rb_right;
			} else {
964 965 966 967
				if (!info->bitmap) {
					WARN_ON_ONCE(1);
					return -EEXIST;
				}
968 969 970
				p = &(*p)->rb_left;
			}
		}
J
Josef Bacik 已提交
971 972 973 974 975 976 977 978 979
	}

	rb_link_node(node, parent, p);
	rb_insert_color(node, root);

	return 0;
}

/*
J
Josef Bacik 已提交
980 981
 * searches the tree for the given offset.
 *
982 983 984
 * fuzzy - If this is set, then we are trying to make an allocation, and we just
 * want a section that has at least bytes size and comes at or after the given
 * offset.
J
Josef Bacik 已提交
985
 */
986
static struct btrfs_free_space *
987
tree_search_offset(struct btrfs_free_space_ctl *ctl,
988
		   u64 offset, int bitmap_only, int fuzzy)
J
Josef Bacik 已提交
989
{
990
	struct rb_node *n = ctl->free_space_offset.rb_node;
991 992 993 994 995 996 997 998
	struct btrfs_free_space *entry, *prev = NULL;

	/* find entry that is closest to the 'offset' */
	while (1) {
		if (!n) {
			entry = NULL;
			break;
		}
J
Josef Bacik 已提交
999 1000

		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1001
		prev = entry;
J
Josef Bacik 已提交
1002

1003
		if (offset < entry->offset)
J
Josef Bacik 已提交
1004
			n = n->rb_left;
1005
		else if (offset > entry->offset)
J
Josef Bacik 已提交
1006
			n = n->rb_right;
1007
		else
J
Josef Bacik 已提交
1008 1009 1010
			break;
	}

1011 1012 1013 1014 1015
	if (bitmap_only) {
		if (!entry)
			return NULL;
		if (entry->bitmap)
			return entry;
J
Josef Bacik 已提交
1016

1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
		/*
		 * bitmap entry and extent entry may share same offset,
		 * in that case, bitmap entry comes after extent entry.
		 */
		n = rb_next(n);
		if (!n)
			return NULL;
		entry = rb_entry(n, struct btrfs_free_space, offset_index);
		if (entry->offset != offset)
			return NULL;
J
Josef Bacik 已提交
1027

1028 1029 1030 1031
		WARN_ON(!entry->bitmap);
		return entry;
	} else if (entry) {
		if (entry->bitmap) {
J
Josef Bacik 已提交
1032
			/*
1033 1034
			 * if previous extent entry covers the offset,
			 * we should return it instead of the bitmap entry
J
Josef Bacik 已提交
1035
			 */
1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
			n = &entry->offset_index;
			while (1) {
				n = rb_prev(n);
				if (!n)
					break;
				prev = rb_entry(n, struct btrfs_free_space,
						offset_index);
				if (!prev->bitmap) {
					if (prev->offset + prev->bytes > offset)
						entry = prev;
					break;
				}
J
Josef Bacik 已提交
1048
			}
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
		}
		return entry;
	}

	if (!prev)
		return NULL;

	/* find last entry before the 'offset' */
	entry = prev;
	if (entry->offset > offset) {
		n = rb_prev(&entry->offset_index);
		if (n) {
			entry = rb_entry(n, struct btrfs_free_space,
					offset_index);
			BUG_ON(entry->offset > offset);
J
Josef Bacik 已提交
1064
		} else {
1065 1066 1067 1068
			if (fuzzy)
				return entry;
			else
				return NULL;
J
Josef Bacik 已提交
1069 1070 1071
		}
	}

1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
	if (entry->bitmap) {
		n = &entry->offset_index;
		while (1) {
			n = rb_prev(n);
			if (!n)
				break;
			prev = rb_entry(n, struct btrfs_free_space,
					offset_index);
			if (!prev->bitmap) {
				if (prev->offset + prev->bytes > offset)
					return prev;
				break;
			}
		}
1086
		if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
			return entry;
	} else if (entry->offset + entry->bytes > offset)
		return entry;

	if (!fuzzy)
		return NULL;

	while (1) {
		if (entry->bitmap) {
			if (entry->offset + BITS_PER_BITMAP *
1097
			    ctl->unit > offset)
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
				break;
		} else {
			if (entry->offset + entry->bytes > offset)
				break;
		}

		n = rb_next(&entry->offset_index);
		if (!n)
			return NULL;
		entry = rb_entry(n, struct btrfs_free_space, offset_index);
	}
	return entry;
J
Josef Bacik 已提交
1110 1111
}

1112
static inline void
1113
__unlink_free_space(struct btrfs_free_space_ctl *ctl,
1114
		    struct btrfs_free_space *info)
J
Josef Bacik 已提交
1115
{
1116 1117
	rb_erase(&info->offset_index, &ctl->free_space_offset);
	ctl->free_extents--;
1118 1119
}

1120
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1121 1122
			      struct btrfs_free_space *info)
{
1123 1124
	__unlink_free_space(ctl, info);
	ctl->free_space -= info->bytes;
J
Josef Bacik 已提交
1125 1126
}

1127
static int link_free_space(struct btrfs_free_space_ctl *ctl,
J
Josef Bacik 已提交
1128 1129 1130 1131
			   struct btrfs_free_space *info)
{
	int ret = 0;

1132
	BUG_ON(!info->bitmap && !info->bytes);
1133
	ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1134
				 &info->offset_index, (info->bitmap != NULL));
J
Josef Bacik 已提交
1135 1136 1137
	if (ret)
		return ret;

1138 1139
	ctl->free_space += info->bytes;
	ctl->free_extents++;
1140 1141 1142
	return ret;
}

1143
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1144
{
1145
	struct btrfs_block_group_cache *block_group = ctl->private;
1146 1147 1148
	u64 max_bytes;
	u64 bitmap_bytes;
	u64 extent_bytes;
1149
	u64 size = block_group->key.offset;
1150 1151 1152 1153
	u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
	int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);

	BUG_ON(ctl->total_bitmaps > max_bitmaps);
1154 1155 1156 1157 1158 1159

	/*
	 * The goal is to keep the total amount of memory used per 1gb of space
	 * at or below 32k, so we need to adjust how much memory we allow to be
	 * used by extent based free space tracking
	 */
1160 1161 1162 1163 1164
	if (size < 1024 * 1024 * 1024)
		max_bytes = MAX_CACHE_BYTES_PER_GIG;
	else
		max_bytes = MAX_CACHE_BYTES_PER_GIG *
			div64_u64(size, 1024 * 1024 * 1024);
1165

1166 1167 1168 1169 1170
	/*
	 * we want to account for 1 more bitmap than what we have so we can make
	 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
	 * we add more bitmaps.
	 */
1171
	bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1172

1173
	if (bitmap_bytes >= max_bytes) {
1174
		ctl->extents_thresh = 0;
1175 1176
		return;
	}
1177

1178 1179 1180 1181 1182 1183
	/*
	 * we want the extent entry threshold to always be at most 1/2 the maxw
	 * bytes we can have, or whatever is less than that.
	 */
	extent_bytes = max_bytes - bitmap_bytes;
	extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1184

1185
	ctl->extents_thresh =
1186
		div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1187 1188
}

1189 1190 1191
static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
				       struct btrfs_free_space *info,
				       u64 offset, u64 bytes)
1192
{
L
Li Zefan 已提交
1193
	unsigned long start, count;
1194

1195 1196
	start = offset_to_bit(info->offset, ctl->unit, offset);
	count = bytes_to_bits(bytes, ctl->unit);
L
Li Zefan 已提交
1197
	BUG_ON(start + count > BITS_PER_BITMAP);
1198

L
Li Zefan 已提交
1199
	bitmap_clear(info->bitmap, start, count);
1200 1201

	info->bytes -= bytes;
1202 1203 1204 1205 1206 1207 1208
}

static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
			      struct btrfs_free_space *info, u64 offset,
			      u64 bytes)
{
	__bitmap_clear_bits(ctl, info, offset, bytes);
1209
	ctl->free_space -= bytes;
1210 1211
}

1212
static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
J
Josef Bacik 已提交
1213 1214
			    struct btrfs_free_space *info, u64 offset,
			    u64 bytes)
1215
{
L
Li Zefan 已提交
1216
	unsigned long start, count;
1217

1218 1219
	start = offset_to_bit(info->offset, ctl->unit, offset);
	count = bytes_to_bits(bytes, ctl->unit);
L
Li Zefan 已提交
1220
	BUG_ON(start + count > BITS_PER_BITMAP);
1221

L
Li Zefan 已提交
1222
	bitmap_set(info->bitmap, start, count);
1223 1224

	info->bytes += bytes;
1225
	ctl->free_space += bytes;
1226 1227
}

1228
static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1229 1230 1231 1232 1233 1234 1235
			 struct btrfs_free_space *bitmap_info, u64 *offset,
			 u64 *bytes)
{
	unsigned long found_bits = 0;
	unsigned long bits, i;
	unsigned long next_zero;

1236
	i = offset_to_bit(bitmap_info->offset, ctl->unit,
1237
			  max_t(u64, *offset, bitmap_info->offset));
1238
	bits = bytes_to_bits(*bytes, ctl->unit);
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252

	for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
	     i < BITS_PER_BITMAP;
	     i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
		next_zero = find_next_zero_bit(bitmap_info->bitmap,
					       BITS_PER_BITMAP, i);
		if ((next_zero - i) >= bits) {
			found_bits = next_zero - i;
			break;
		}
		i = next_zero;
	}

	if (found_bits) {
1253 1254
		*offset = (u64)(i * ctl->unit) + bitmap_info->offset;
		*bytes = (u64)(found_bits) * ctl->unit;
1255 1256 1257 1258 1259 1260
		return 0;
	}

	return -1;
}

1261 1262
static struct btrfs_free_space *
find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1263 1264 1265 1266 1267
{
	struct btrfs_free_space *entry;
	struct rb_node *node;
	int ret;

1268
	if (!ctl->free_space_offset.rb_node)
1269 1270
		return NULL;

1271
	entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1272 1273 1274 1275 1276 1277 1278 1279 1280
	if (!entry)
		return NULL;

	for (node = &entry->offset_index; node; node = rb_next(node)) {
		entry = rb_entry(node, struct btrfs_free_space, offset_index);
		if (entry->bytes < *bytes)
			continue;

		if (entry->bitmap) {
1281
			ret = search_bitmap(ctl, entry, offset, bytes);
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294
			if (!ret)
				return entry;
			continue;
		}

		*offset = entry->offset;
		*bytes = entry->bytes;
		return entry;
	}

	return NULL;
}

1295
static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1296 1297
			   struct btrfs_free_space *info, u64 offset)
{
1298
	info->offset = offset_to_bitmap(ctl, offset);
J
Josef Bacik 已提交
1299
	info->bytes = 0;
1300 1301
	link_free_space(ctl, info);
	ctl->total_bitmaps++;
1302

1303
	ctl->op->recalc_thresholds(ctl);
1304 1305
}

1306
static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1307 1308
			struct btrfs_free_space *bitmap_info)
{
1309
	unlink_free_space(ctl, bitmap_info);
1310
	kfree(bitmap_info->bitmap);
1311
	kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1312 1313
	ctl->total_bitmaps--;
	ctl->op->recalc_thresholds(ctl);
1314 1315
}

1316
static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1317 1318 1319 1320
			      struct btrfs_free_space *bitmap_info,
			      u64 *offset, u64 *bytes)
{
	u64 end;
1321 1322
	u64 search_start, search_bytes;
	int ret;
1323 1324

again:
1325
	end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1326

1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338
	/*
	 * XXX - this can go away after a few releases.
	 *
	 * since the only user of btrfs_remove_free_space is the tree logging
	 * stuff, and the only way to test that is under crash conditions, we
	 * want to have this debug stuff here just in case somethings not
	 * working.  Search the bitmap for the space we are trying to use to
	 * make sure its actually there.  If its not there then we need to stop
	 * because something has gone wrong.
	 */
	search_start = *offset;
	search_bytes = *bytes;
1339
	search_bytes = min(search_bytes, end - search_start + 1);
1340
	ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1341 1342
	BUG_ON(ret < 0 || search_start != *offset);

1343
	if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1344
		bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1345 1346 1347
		*bytes -= end - *offset + 1;
		*offset = end + 1;
	} else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1348
		bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1349 1350 1351 1352
		*bytes = 0;
	}

	if (*bytes) {
1353
		struct rb_node *next = rb_next(&bitmap_info->offset_index);
1354
		if (!bitmap_info->bytes)
1355
			free_bitmap(ctl, bitmap_info);
1356

1357 1358 1359 1360 1361
		/*
		 * no entry after this bitmap, but we still have bytes to
		 * remove, so something has gone wrong.
		 */
		if (!next)
1362 1363
			return -EINVAL;

1364 1365 1366 1367 1368 1369 1370
		bitmap_info = rb_entry(next, struct btrfs_free_space,
				       offset_index);

		/*
		 * if the next entry isn't a bitmap we need to return to let the
		 * extent stuff do its work.
		 */
1371 1372 1373
		if (!bitmap_info->bitmap)
			return -EAGAIN;

1374 1375 1376 1377 1378 1379 1380 1381
		/*
		 * Ok the next item is a bitmap, but it may not actually hold
		 * the information for the rest of this free space stuff, so
		 * look for it, and if we don't find it return so we can try
		 * everything over again.
		 */
		search_start = *offset;
		search_bytes = *bytes;
1382
		ret = search_bitmap(ctl, bitmap_info, &search_start,
1383 1384 1385 1386
				    &search_bytes);
		if (ret < 0 || search_start != *offset)
			return -EAGAIN;

1387
		goto again;
1388
	} else if (!bitmap_info->bytes)
1389
		free_bitmap(ctl, bitmap_info);
1390 1391 1392 1393

	return 0;
}

J
Josef Bacik 已提交
1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410
static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
			       struct btrfs_free_space *info, u64 offset,
			       u64 bytes)
{
	u64 bytes_to_set = 0;
	u64 end;

	end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);

	bytes_to_set = min(end - offset, bytes);

	bitmap_set_bits(ctl, info, offset, bytes_to_set);

	return bytes_to_set;

}

1411 1412
static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
		      struct btrfs_free_space *info)
1413
{
1414
	struct btrfs_block_group_cache *block_group = ctl->private;
1415 1416 1417 1418 1419

	/*
	 * If we are below the extents threshold then we can add this as an
	 * extent, and don't have to deal with the bitmap
	 */
1420
	if (ctl->free_extents < ctl->extents_thresh) {
1421 1422 1423 1424 1425 1426 1427 1428
		/*
		 * If this block group has some small extents we don't want to
		 * use up all of our free slots in the cache with them, we want
		 * to reserve them to larger extents, however if we have plent
		 * of cache left then go ahead an dadd them, no sense in adding
		 * the overhead of a bitmap if we don't have to.
		 */
		if (info->bytes <= block_group->sectorsize * 4) {
1429 1430
			if (ctl->free_extents * 2 <= ctl->extents_thresh)
				return false;
1431
		} else {
1432
			return false;
1433 1434
		}
	}
1435 1436 1437 1438 1439 1440 1441

	/*
	 * some block groups are so tiny they can't be enveloped by a bitmap, so
	 * don't even bother to create a bitmap for this
	 */
	if (BITS_PER_BITMAP * block_group->sectorsize >
	    block_group->key.offset)
1442 1443 1444 1445 1446
		return false;

	return true;
}

J
Josef Bacik 已提交
1447 1448 1449 1450 1451
static struct btrfs_free_space_op free_space_op = {
	.recalc_thresholds	= recalculate_thresholds,
	.use_bitmap		= use_bitmap,
};

1452 1453 1454 1455
static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
			      struct btrfs_free_space *info)
{
	struct btrfs_free_space *bitmap_info;
J
Josef Bacik 已提交
1456
	struct btrfs_block_group_cache *block_group = NULL;
1457
	int added = 0;
J
Josef Bacik 已提交
1458
	u64 bytes, offset, bytes_added;
1459
	int ret;
1460 1461 1462 1463

	bytes = info->bytes;
	offset = info->offset;

1464 1465 1466
	if (!ctl->op->use_bitmap(ctl, info))
		return 0;

J
Josef Bacik 已提交
1467 1468
	if (ctl->op == &free_space_op)
		block_group = ctl->private;
1469
again:
J
Josef Bacik 已提交
1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
	/*
	 * Since we link bitmaps right into the cluster we need to see if we
	 * have a cluster here, and if so and it has our bitmap we need to add
	 * the free space to that bitmap.
	 */
	if (block_group && !list_empty(&block_group->cluster_list)) {
		struct btrfs_free_cluster *cluster;
		struct rb_node *node;
		struct btrfs_free_space *entry;

		cluster = list_entry(block_group->cluster_list.next,
				     struct btrfs_free_cluster,
				     block_group_list);
		spin_lock(&cluster->lock);
		node = rb_first(&cluster->root);
		if (!node) {
			spin_unlock(&cluster->lock);
1487
			goto no_cluster_bitmap;
J
Josef Bacik 已提交
1488 1489 1490 1491 1492
		}

		entry = rb_entry(node, struct btrfs_free_space, offset_index);
		if (!entry->bitmap) {
			spin_unlock(&cluster->lock);
1493
			goto no_cluster_bitmap;
J
Josef Bacik 已提交
1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507
		}

		if (entry->offset == offset_to_bitmap(ctl, offset)) {
			bytes_added = add_bytes_to_bitmap(ctl, entry,
							  offset, bytes);
			bytes -= bytes_added;
			offset += bytes_added;
		}
		spin_unlock(&cluster->lock);
		if (!bytes) {
			ret = 1;
			goto out;
		}
	}
1508 1509

no_cluster_bitmap:
1510
	bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1511 1512 1513 1514 1515 1516
					 1, 0);
	if (!bitmap_info) {
		BUG_ON(added);
		goto new_bitmap;
	}

J
Josef Bacik 已提交
1517 1518 1519 1520
	bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
	bytes -= bytes_added;
	offset += bytes_added;
	added = 0;
1521 1522 1523 1524 1525 1526 1527 1528 1529

	if (!bytes) {
		ret = 1;
		goto out;
	} else
		goto again;

new_bitmap:
	if (info && info->bitmap) {
1530
		add_new_bitmap(ctl, info, offset);
1531 1532 1533 1534
		added = 1;
		info = NULL;
		goto again;
	} else {
1535
		spin_unlock(&ctl->tree_lock);
1536 1537 1538

		/* no pre-allocated info, allocate a new one */
		if (!info) {
1539 1540
			info = kmem_cache_zalloc(btrfs_free_space_cachep,
						 GFP_NOFS);
1541
			if (!info) {
1542
				spin_lock(&ctl->tree_lock);
1543 1544 1545 1546 1547 1548 1549
				ret = -ENOMEM;
				goto out;
			}
		}

		/* allocate the bitmap */
		info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1550
		spin_lock(&ctl->tree_lock);
1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561
		if (!info->bitmap) {
			ret = -ENOMEM;
			goto out;
		}
		goto again;
	}

out:
	if (info) {
		if (info->bitmap)
			kfree(info->bitmap);
1562
		kmem_cache_free(btrfs_free_space_cachep, info);
1563
	}
J
Josef Bacik 已提交
1564 1565 1566 1567

	return ret;
}

1568
static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1569
			  struct btrfs_free_space *info, bool update_stat)
J
Josef Bacik 已提交
1570
{
1571 1572 1573 1574 1575
	struct btrfs_free_space *left_info;
	struct btrfs_free_space *right_info;
	bool merged = false;
	u64 offset = info->offset;
	u64 bytes = info->bytes;
1576

J
Josef Bacik 已提交
1577 1578 1579 1580 1581
	/*
	 * first we want to see if there is free space adjacent to the range we
	 * are adding, if there is remove that struct and add a new one to
	 * cover the entire range
	 */
1582
	right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1583 1584 1585 1586
	if (right_info && rb_prev(&right_info->offset_index))
		left_info = rb_entry(rb_prev(&right_info->offset_index),
				     struct btrfs_free_space, offset_index);
	else
1587
		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
J
Josef Bacik 已提交
1588

1589
	if (right_info && !right_info->bitmap) {
1590
		if (update_stat)
1591
			unlink_free_space(ctl, right_info);
1592
		else
1593
			__unlink_free_space(ctl, right_info);
1594
		info->bytes += right_info->bytes;
1595
		kmem_cache_free(btrfs_free_space_cachep, right_info);
1596
		merged = true;
J
Josef Bacik 已提交
1597 1598
	}

1599 1600
	if (left_info && !left_info->bitmap &&
	    left_info->offset + left_info->bytes == offset) {
1601
		if (update_stat)
1602
			unlink_free_space(ctl, left_info);
1603
		else
1604
			__unlink_free_space(ctl, left_info);
1605 1606
		info->offset = left_info->offset;
		info->bytes += left_info->bytes;
1607
		kmem_cache_free(btrfs_free_space_cachep, left_info);
1608
		merged = true;
J
Josef Bacik 已提交
1609 1610
	}

1611 1612 1613
	return merged;
}

1614 1615
int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
			   u64 offset, u64 bytes)
1616 1617 1618 1619
{
	struct btrfs_free_space *info;
	int ret = 0;

1620
	info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1621 1622 1623 1624 1625 1626
	if (!info)
		return -ENOMEM;

	info->offset = offset;
	info->bytes = bytes;

1627
	spin_lock(&ctl->tree_lock);
1628

1629
	if (try_merge_free_space(ctl, info, true))
1630 1631 1632 1633 1634 1635 1636
		goto link;

	/*
	 * There was no extent directly to the left or right of this new
	 * extent then we know we're going to have to allocate a new extent, so
	 * before we do that see if we need to drop this into a bitmap
	 */
1637
	ret = insert_into_bitmap(ctl, info);
1638 1639 1640 1641 1642 1643 1644
	if (ret < 0) {
		goto out;
	} else if (ret) {
		ret = 0;
		goto out;
	}
link:
1645
	ret = link_free_space(ctl, info);
J
Josef Bacik 已提交
1646
	if (ret)
1647
		kmem_cache_free(btrfs_free_space_cachep, info);
1648
out:
1649
	spin_unlock(&ctl->tree_lock);
1650

J
Josef Bacik 已提交
1651
	if (ret) {
1652
		printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
S
Stoyan Gaydarov 已提交
1653
		BUG_ON(ret == -EEXIST);
J
Josef Bacik 已提交
1654 1655 1656 1657 1658
	}

	return ret;
}

1659 1660
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
			    u64 offset, u64 bytes)
J
Josef Bacik 已提交
1661
{
1662
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
J
Josef Bacik 已提交
1663
	struct btrfs_free_space *info;
1664
	struct btrfs_free_space *next_info = NULL;
J
Josef Bacik 已提交
1665 1666
	int ret = 0;

1667
	spin_lock(&ctl->tree_lock);
1668

1669
again:
1670
	info = tree_search_offset(ctl, offset, 0, 0);
1671
	if (!info) {
1672 1673 1674 1675
		/*
		 * oops didn't find an extent that matched the space we wanted
		 * to remove, look for a bitmap instead
		 */
1676
		info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1677 1678 1679 1680 1681
					  1, 0);
		if (!info) {
			WARN_ON(1);
			goto out_lock;
		}
1682 1683 1684 1685 1686 1687 1688 1689 1690
	}

	if (info->bytes < bytes && rb_next(&info->offset_index)) {
		u64 end;
		next_info = rb_entry(rb_next(&info->offset_index),
					     struct btrfs_free_space,
					     offset_index);

		if (next_info->bitmap)
1691 1692
			end = next_info->offset +
			      BITS_PER_BITMAP * ctl->unit - 1;
1693 1694 1695 1696 1697 1698 1699 1700 1701 1702
		else
			end = next_info->offset + next_info->bytes;

		if (next_info->bytes < bytes ||
		    next_info->offset > offset || offset > end) {
			printk(KERN_CRIT "Found free space at %llu, size %llu,"
			      " trying to use %llu\n",
			      (unsigned long long)info->offset,
			      (unsigned long long)info->bytes,
			      (unsigned long long)bytes);
J
Josef Bacik 已提交
1703 1704
			WARN_ON(1);
			ret = -EINVAL;
1705
			goto out_lock;
J
Josef Bacik 已提交
1706 1707
		}

1708 1709 1710 1711
		info = next_info;
	}

	if (info->bytes == bytes) {
1712
		unlink_free_space(ctl, info);
1713 1714
		if (info->bitmap) {
			kfree(info->bitmap);
1715
			ctl->total_bitmaps--;
J
Josef Bacik 已提交
1716
		}
1717
		kmem_cache_free(btrfs_free_space_cachep, info);
1718 1719
		goto out_lock;
	}
J
Josef Bacik 已提交
1720

1721
	if (!info->bitmap && info->offset == offset) {
1722
		unlink_free_space(ctl, info);
J
Josef Bacik 已提交
1723 1724
		info->offset += bytes;
		info->bytes -= bytes;
1725
		link_free_space(ctl, info);
1726 1727
		goto out_lock;
	}
J
Josef Bacik 已提交
1728

1729 1730
	if (!info->bitmap && info->offset <= offset &&
	    info->offset + info->bytes >= offset + bytes) {
1731 1732 1733 1734 1735 1736 1737 1738
		u64 old_start = info->offset;
		/*
		 * we're freeing space in the middle of the info,
		 * this can happen during tree log replay
		 *
		 * first unlink the old info and then
		 * insert it again after the hole we're creating
		 */
1739
		unlink_free_space(ctl, info);
1740 1741 1742 1743 1744
		if (offset + bytes < info->offset + info->bytes) {
			u64 old_end = info->offset + info->bytes;

			info->offset = offset + bytes;
			info->bytes = old_end - info->offset;
1745
			ret = link_free_space(ctl, info);
1746 1747 1748
			WARN_ON(ret);
			if (ret)
				goto out_lock;
1749 1750 1751 1752
		} else {
			/* the hole we're creating ends at the end
			 * of the info struct, just free the info
			 */
1753
			kmem_cache_free(btrfs_free_space_cachep, info);
1754
		}
1755
		spin_unlock(&ctl->tree_lock);
1756 1757 1758

		/* step two, insert a new info struct to cover
		 * anything before the hole
1759
		 */
1760 1761
		ret = btrfs_add_free_space(block_group, old_start,
					   offset - old_start);
1762 1763
		WARN_ON(ret);
		goto out;
J
Josef Bacik 已提交
1764
	}
1765

1766
	ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1767 1768 1769 1770
	if (ret == -EAGAIN)
		goto again;
	BUG_ON(ret);
out_lock:
1771
	spin_unlock(&ctl->tree_lock);
J
Josef Bacik 已提交
1772
out:
1773 1774 1775
	return ret;
}

J
Josef Bacik 已提交
1776 1777 1778
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
			   u64 bytes)
{
1779
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
J
Josef Bacik 已提交
1780 1781 1782 1783
	struct btrfs_free_space *info;
	struct rb_node *n;
	int count = 0;

1784
	for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
J
Josef Bacik 已提交
1785 1786 1787
		info = rb_entry(n, struct btrfs_free_space, offset_index);
		if (info->bytes >= bytes)
			count++;
1788
		printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1789
		       (unsigned long long)info->offset,
1790 1791
		       (unsigned long long)info->bytes,
		       (info->bitmap) ? "yes" : "no");
J
Josef Bacik 已提交
1792
	}
1793 1794
	printk(KERN_INFO "block group has cluster?: %s\n",
	       list_empty(&block_group->cluster_list) ? "no" : "yes");
J
Josef Bacik 已提交
1795 1796 1797 1798
	printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
	       "\n", count);
}

1799
void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
J
Josef Bacik 已提交
1800
{
1801
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
J
Josef Bacik 已提交
1802

1803 1804 1805 1806 1807
	spin_lock_init(&ctl->tree_lock);
	ctl->unit = block_group->sectorsize;
	ctl->start = block_group->key.objectid;
	ctl->private = block_group;
	ctl->op = &free_space_op;
J
Josef Bacik 已提交
1808

1809 1810 1811 1812 1813 1814 1815
	/*
	 * we only want to have 32k of ram per block group for keeping
	 * track of free space, and if we pass 1/2 of that we want to
	 * start converting things over to using bitmaps
	 */
	ctl->extents_thresh = ((1024 * 32) / 2) /
				sizeof(struct btrfs_free_space);
J
Josef Bacik 已提交
1816 1817
}

1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828
/*
 * for a given cluster, put all of its extents back into the free
 * space cache.  If the block group passed doesn't match the block group
 * pointed to by the cluster, someone else raced in and freed the
 * cluster already.  In that case, we just return without changing anything
 */
static int
__btrfs_return_cluster_to_free_space(
			     struct btrfs_block_group_cache *block_group,
			     struct btrfs_free_cluster *cluster)
{
1829
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1830 1831 1832 1833 1834 1835 1836
	struct btrfs_free_space *entry;
	struct rb_node *node;

	spin_lock(&cluster->lock);
	if (cluster->block_group != block_group)
		goto out;

1837
	cluster->block_group = NULL;
1838
	cluster->window_start = 0;
1839 1840
	list_del_init(&cluster->block_group_list);

1841
	node = rb_first(&cluster->root);
1842
	while (node) {
1843 1844
		bool bitmap;

1845 1846 1847
		entry = rb_entry(node, struct btrfs_free_space, offset_index);
		node = rb_next(&entry->offset_index);
		rb_erase(&entry->offset_index, &cluster->root);
1848 1849 1850

		bitmap = (entry->bitmap != NULL);
		if (!bitmap)
1851 1852
			try_merge_free_space(ctl, entry, false);
		tree_insert_offset(&ctl->free_space_offset,
1853
				   entry->offset, &entry->offset_index, bitmap);
1854
	}
1855
	cluster->root = RB_ROOT;
1856

1857 1858
out:
	spin_unlock(&cluster->lock);
1859
	btrfs_put_block_group(block_group);
1860 1861 1862
	return 0;
}

1863
void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
J
Josef Bacik 已提交
1864 1865 1866
{
	struct btrfs_free_space *info;
	struct rb_node *node;
1867 1868 1869

	while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
		info = rb_entry(node, struct btrfs_free_space, offset_index);
1870 1871 1872 1873 1874 1875
		if (!info->bitmap) {
			unlink_free_space(ctl, info);
			kmem_cache_free(btrfs_free_space_cachep, info);
		} else {
			free_bitmap(ctl, info);
		}
1876 1877 1878 1879 1880 1881
		if (need_resched()) {
			spin_unlock(&ctl->tree_lock);
			cond_resched();
			spin_lock(&ctl->tree_lock);
		}
	}
1882 1883 1884 1885 1886 1887
}

void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
{
	spin_lock(&ctl->tree_lock);
	__btrfs_remove_free_space_cache_locked(ctl);
1888 1889 1890 1891 1892 1893
	spin_unlock(&ctl->tree_lock);
}

void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
{
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1894
	struct btrfs_free_cluster *cluster;
1895
	struct list_head *head;
J
Josef Bacik 已提交
1896

1897
	spin_lock(&ctl->tree_lock);
1898 1899 1900 1901
	while ((head = block_group->cluster_list.next) !=
	       &block_group->cluster_list) {
		cluster = list_entry(head, struct btrfs_free_cluster,
				     block_group_list);
1902 1903 1904

		WARN_ON(cluster->block_group != block_group);
		__btrfs_return_cluster_to_free_space(block_group, cluster);
1905
		if (need_resched()) {
1906
			spin_unlock(&ctl->tree_lock);
1907
			cond_resched();
1908
			spin_lock(&ctl->tree_lock);
1909
		}
1910
	}
1911
	__btrfs_remove_free_space_cache_locked(ctl);
1912
	spin_unlock(&ctl->tree_lock);
1913

J
Josef Bacik 已提交
1914 1915
}

1916 1917
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
			       u64 offset, u64 bytes, u64 empty_size)
J
Josef Bacik 已提交
1918
{
1919
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1920
	struct btrfs_free_space *entry = NULL;
1921
	u64 bytes_search = bytes + empty_size;
1922
	u64 ret = 0;
J
Josef Bacik 已提交
1923

1924 1925
	spin_lock(&ctl->tree_lock);
	entry = find_free_space(ctl, &offset, &bytes_search);
1926
	if (!entry)
1927 1928 1929 1930
		goto out;

	ret = offset;
	if (entry->bitmap) {
1931
		bitmap_clear_bits(ctl, entry, offset, bytes);
1932
		if (!entry->bytes)
1933
			free_bitmap(ctl, entry);
1934
	} else {
1935
		unlink_free_space(ctl, entry);
1936 1937 1938
		entry->offset += bytes;
		entry->bytes -= bytes;
		if (!entry->bytes)
1939
			kmem_cache_free(btrfs_free_space_cachep, entry);
1940
		else
1941
			link_free_space(ctl, entry);
1942
	}
J
Josef Bacik 已提交
1943

1944
out:
1945
	spin_unlock(&ctl->tree_lock);
J
Josef Bacik 已提交
1946

J
Josef Bacik 已提交
1947 1948
	return ret;
}
1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961

/*
 * given a cluster, put all of its extents back into the free space
 * cache.  If a block group is passed, this function will only free
 * a cluster that belongs to the passed block group.
 *
 * Otherwise, it'll get a reference on the block group pointed to by the
 * cluster and remove the cluster from it.
 */
int btrfs_return_cluster_to_free_space(
			       struct btrfs_block_group_cache *block_group,
			       struct btrfs_free_cluster *cluster)
{
1962
	struct btrfs_free_space_ctl *ctl;
1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980
	int ret;

	/* first, get a safe pointer to the block group */
	spin_lock(&cluster->lock);
	if (!block_group) {
		block_group = cluster->block_group;
		if (!block_group) {
			spin_unlock(&cluster->lock);
			return 0;
		}
	} else if (cluster->block_group != block_group) {
		/* someone else has already freed it don't redo their work */
		spin_unlock(&cluster->lock);
		return 0;
	}
	atomic_inc(&block_group->count);
	spin_unlock(&cluster->lock);

1981 1982
	ctl = block_group->free_space_ctl;

1983
	/* now return any extents the cluster had on it */
1984
	spin_lock(&ctl->tree_lock);
1985
	ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
1986
	spin_unlock(&ctl->tree_lock);
1987 1988 1989 1990 1991 1992

	/* finally drop our ref */
	btrfs_put_block_group(block_group);
	return ret;
}

1993 1994
static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
				   struct btrfs_free_cluster *cluster,
1995
				   struct btrfs_free_space *entry,
1996 1997
				   u64 bytes, u64 min_start)
{
1998
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1999 2000 2001 2002 2003 2004 2005 2006
	int err;
	u64 search_start = cluster->window_start;
	u64 search_bytes = bytes;
	u64 ret = 0;

	search_start = min_start;
	search_bytes = bytes;

2007
	err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2008
	if (err)
2009
		return 0;
2010 2011

	ret = search_start;
2012
	__bitmap_clear_bits(ctl, entry, ret, bytes);
2013 2014 2015 2016

	return ret;
}

2017 2018 2019 2020 2021 2022 2023 2024 2025
/*
 * given a cluster, try to allocate 'bytes' from it, returns 0
 * if it couldn't find anything suitably large, or a logical disk offset
 * if things worked out
 */
u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
			     struct btrfs_free_cluster *cluster, u64 bytes,
			     u64 min_start)
{
2026
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043
	struct btrfs_free_space *entry = NULL;
	struct rb_node *node;
	u64 ret = 0;

	spin_lock(&cluster->lock);
	if (bytes > cluster->max_size)
		goto out;

	if (cluster->block_group != block_group)
		goto out;

	node = rb_first(&cluster->root);
	if (!node)
		goto out;

	entry = rb_entry(node, struct btrfs_free_space, offset_index);
	while(1) {
2044 2045
		if (entry->bytes < bytes ||
		    (!entry->bitmap && entry->offset < min_start)) {
2046 2047 2048 2049 2050 2051 2052 2053
			node = rb_next(&entry->offset_index);
			if (!node)
				break;
			entry = rb_entry(node, struct btrfs_free_space,
					 offset_index);
			continue;
		}

2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071
		if (entry->bitmap) {
			ret = btrfs_alloc_from_bitmap(block_group,
						      cluster, entry, bytes,
						      min_start);
			if (ret == 0) {
				node = rb_next(&entry->offset_index);
				if (!node)
					break;
				entry = rb_entry(node, struct btrfs_free_space,
						 offset_index);
				continue;
			}
		} else {
			ret = entry->offset;

			entry->offset += bytes;
			entry->bytes -= bytes;
		}
2072

2073
		if (entry->bytes == 0)
2074 2075 2076 2077 2078
			rb_erase(&entry->offset_index, &cluster->root);
		break;
	}
out:
	spin_unlock(&cluster->lock);
2079

2080 2081 2082
	if (!ret)
		return 0;

2083
	spin_lock(&ctl->tree_lock);
2084

2085
	ctl->free_space -= bytes;
2086
	if (entry->bytes == 0) {
2087
		ctl->free_extents--;
2088 2089
		if (entry->bitmap) {
			kfree(entry->bitmap);
2090 2091
			ctl->total_bitmaps--;
			ctl->op->recalc_thresholds(ctl);
2092
		}
2093
		kmem_cache_free(btrfs_free_space_cachep, entry);
2094 2095
	}

2096
	spin_unlock(&ctl->tree_lock);
2097

2098 2099 2100
	return ret;
}

2101 2102 2103 2104 2105
static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
				struct btrfs_free_space *entry,
				struct btrfs_free_cluster *cluster,
				u64 offset, u64 bytes, u64 min_bytes)
{
2106
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2107 2108 2109 2110 2111 2112 2113
	unsigned long next_zero;
	unsigned long i;
	unsigned long search_bits;
	unsigned long total_bits;
	unsigned long found_bits;
	unsigned long start = 0;
	unsigned long total_found = 0;
2114
	int ret;
2115 2116 2117 2118
	bool found = false;

	i = offset_to_bit(entry->offset, block_group->sectorsize,
			  max_t(u64, offset, entry->offset));
2119 2120
	search_bits = bytes_to_bits(bytes, block_group->sectorsize);
	total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136

again:
	found_bits = 0;
	for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
	     i < BITS_PER_BITMAP;
	     i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
		next_zero = find_next_zero_bit(entry->bitmap,
					       BITS_PER_BITMAP, i);
		if (next_zero - i >= search_bits) {
			found_bits = next_zero - i;
			break;
		}
		i = next_zero;
	}

	if (!found_bits)
2137
		return -ENOSPC;
2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160

	if (!found) {
		start = i;
		found = true;
	}

	total_found += found_bits;

	if (cluster->max_size < found_bits * block_group->sectorsize)
		cluster->max_size = found_bits * block_group->sectorsize;

	if (total_found < total_bits) {
		i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
		if (i - start > total_bits * 2) {
			total_found = 0;
			cluster->max_size = 0;
			found = false;
		}
		goto again;
	}

	cluster->window_start = start * block_group->sectorsize +
		entry->offset;
2161
	rb_erase(&entry->offset_index, &ctl->free_space_offset);
2162 2163 2164
	ret = tree_insert_offset(&cluster->root, entry->offset,
				 &entry->offset_index, 1);
	BUG_ON(ret);
2165 2166 2167 2168

	return 0;
}

2169 2170 2171
/*
 * This searches the block group for just extents to fill the cluster with.
 */
2172 2173 2174 2175 2176
static noinline int
setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
			struct btrfs_free_cluster *cluster,
			struct list_head *bitmaps, u64 offset, u64 bytes,
			u64 min_bytes)
2177
{
2178
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2179 2180 2181 2182 2183 2184 2185 2186 2187 2188
	struct btrfs_free_space *first = NULL;
	struct btrfs_free_space *entry = NULL;
	struct btrfs_free_space *prev = NULL;
	struct btrfs_free_space *last;
	struct rb_node *node;
	u64 window_start;
	u64 window_free;
	u64 max_extent;
	u64 max_gap = 128 * 1024;

2189
	entry = tree_search_offset(ctl, offset, 0, 1);
2190 2191 2192 2193 2194 2195 2196 2197
	if (!entry)
		return -ENOSPC;

	/*
	 * We don't want bitmaps, so just move along until we find a normal
	 * extent entry.
	 */
	while (entry->bitmap) {
2198 2199
		if (list_empty(&entry->list))
			list_add_tail(&entry->list, bitmaps);
2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218
		node = rb_next(&entry->offset_index);
		if (!node)
			return -ENOSPC;
		entry = rb_entry(node, struct btrfs_free_space, offset_index);
	}

	window_start = entry->offset;
	window_free = entry->bytes;
	max_extent = entry->bytes;
	first = entry;
	last = entry;
	prev = entry;

	while (window_free <= min_bytes) {
		node = rb_next(&entry->offset_index);
		if (!node)
			return -ENOSPC;
		entry = rb_entry(node, struct btrfs_free_space, offset_index);

2219 2220 2221
		if (entry->bitmap) {
			if (list_empty(&entry->list))
				list_add_tail(&entry->list, bitmaps);
2222
			continue;
2223 2224
		}

2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260
		/*
		 * we haven't filled the empty size and the window is
		 * very large.  reset and try again
		 */
		if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
		    entry->offset - window_start > (min_bytes * 2)) {
			first = entry;
			window_start = entry->offset;
			window_free = entry->bytes;
			last = entry;
			max_extent = entry->bytes;
		} else {
			last = entry;
			window_free += entry->bytes;
			if (entry->bytes > max_extent)
				max_extent = entry->bytes;
		}
		prev = entry;
	}

	cluster->window_start = first->offset;

	node = &first->offset_index;

	/*
	 * now we've found our entries, pull them out of the free space
	 * cache and put them into the cluster rbtree
	 */
	do {
		int ret;

		entry = rb_entry(node, struct btrfs_free_space, offset_index);
		node = rb_next(&entry->offset_index);
		if (entry->bitmap)
			continue;

2261
		rb_erase(&entry->offset_index, &ctl->free_space_offset);
2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275
		ret = tree_insert_offset(&cluster->root, entry->offset,
					 &entry->offset_index, 0);
		BUG_ON(ret);
	} while (node && entry != last);

	cluster->max_size = max_extent;

	return 0;
}

/*
 * This specifically looks for bitmaps that may work in the cluster, we assume
 * that we have already failed to find extents that will work.
 */
2276 2277 2278 2279 2280
static noinline int
setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
		     struct btrfs_free_cluster *cluster,
		     struct list_head *bitmaps, u64 offset, u64 bytes,
		     u64 min_bytes)
2281
{
2282
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2283 2284 2285 2286
	struct btrfs_free_space *entry;
	struct rb_node *node;
	int ret = -ENOSPC;

2287
	if (ctl->total_bitmaps == 0)
2288 2289
		return -ENOSPC;

2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317
	/*
	 * First check our cached list of bitmaps and see if there is an entry
	 * here that will work.
	 */
	list_for_each_entry(entry, bitmaps, list) {
		if (entry->bytes < min_bytes)
			continue;
		ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
					   bytes, min_bytes);
		if (!ret)
			return 0;
	}

	/*
	 * If we do have entries on our list and we are here then we didn't find
	 * anything, so go ahead and get the next entry after the last entry in
	 * this list and start the search from there.
	 */
	if (!list_empty(bitmaps)) {
		entry = list_entry(bitmaps->prev, struct btrfs_free_space,
				   list);
		node = rb_next(&entry->offset_index);
		if (!node)
			return -ENOSPC;
		entry = rb_entry(node, struct btrfs_free_space, offset_index);
		goto search;
	}

2318
	entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2319 2320 2321
	if (!entry)
		return -ENOSPC;

2322
search:
2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337
	node = &entry->offset_index;
	do {
		entry = rb_entry(node, struct btrfs_free_space, offset_index);
		node = rb_next(&entry->offset_index);
		if (!entry->bitmap)
			continue;
		if (entry->bytes < min_bytes)
			continue;
		ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
					   bytes, min_bytes);
	} while (ret && node);

	return ret;
}

2338 2339 2340 2341 2342 2343 2344 2345 2346
/*
 * here we try to find a cluster of blocks in a block group.  The goal
 * is to find at least bytes free and up to empty_size + bytes free.
 * We might not find them all in one contiguous area.
 *
 * returns zero and sets up cluster if things worked out, otherwise
 * it returns -enospc
 */
int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2347
			     struct btrfs_root *root,
2348 2349 2350 2351
			     struct btrfs_block_group_cache *block_group,
			     struct btrfs_free_cluster *cluster,
			     u64 offset, u64 bytes, u64 empty_size)
{
2352
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2353 2354
	struct list_head bitmaps;
	struct btrfs_free_space *entry, *tmp;
2355 2356 2357 2358
	u64 min_bytes;
	int ret;

	/* for metadata, allow allocates with more holes */
2359 2360 2361
	if (btrfs_test_opt(root, SSD_SPREAD)) {
		min_bytes = bytes + empty_size;
	} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373
		/*
		 * we want to do larger allocations when we are
		 * flushing out the delayed refs, it helps prevent
		 * making more work as we go along.
		 */
		if (trans->transaction->delayed_refs.flushing)
			min_bytes = max(bytes, (bytes + empty_size) >> 1);
		else
			min_bytes = max(bytes, (bytes + empty_size) >> 4);
	} else
		min_bytes = max(bytes, (bytes + empty_size) >> 2);

2374
	spin_lock(&ctl->tree_lock);
2375 2376 2377 2378 2379

	/*
	 * If we know we don't have enough space to make a cluster don't even
	 * bother doing all the work to try and find one.
	 */
2380 2381
	if (ctl->free_space < min_bytes) {
		spin_unlock(&ctl->tree_lock);
2382 2383 2384
		return -ENOSPC;
	}

2385 2386 2387 2388 2389 2390 2391 2392
	spin_lock(&cluster->lock);

	/* someone already found a cluster, hooray */
	if (cluster->block_group) {
		ret = 0;
		goto out;
	}

2393 2394 2395
	INIT_LIST_HEAD(&bitmaps);
	ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
				      bytes, min_bytes);
2396
	if (ret)
2397 2398 2399 2400 2401 2402
		ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
					   offset, bytes, min_bytes);

	/* Clear our temporary list */
	list_for_each_entry_safe(entry, tmp, &bitmaps, list)
		list_del_init(&entry->list);
2403

2404 2405 2406 2407 2408
	if (!ret) {
		atomic_inc(&block_group->count);
		list_add_tail(&cluster->block_group_list,
			      &block_group->cluster_list);
		cluster->block_group = block_group;
2409 2410 2411
	}
out:
	spin_unlock(&cluster->lock);
2412
	spin_unlock(&ctl->tree_lock);
2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423

	return ret;
}

/*
 * simple code to zero out a cluster
 */
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
{
	spin_lock_init(&cluster->lock);
	spin_lock_init(&cluster->refill_lock);
2424
	cluster->root = RB_ROOT;
2425 2426 2427 2428 2429
	cluster->max_size = 0;
	INIT_LIST_HEAD(&cluster->block_group_list);
	cluster->block_group = NULL;
}

2430 2431 2432
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
			   u64 *trimmed, u64 start, u64 end, u64 minlen)
{
2433
	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2434 2435 2436 2437 2438 2439 2440 2441 2442
	struct btrfs_free_space *entry = NULL;
	struct btrfs_fs_info *fs_info = block_group->fs_info;
	u64 bytes = 0;
	u64 actually_trimmed;
	int ret = 0;

	*trimmed = 0;

	while (start < end) {
2443
		spin_lock(&ctl->tree_lock);
2444

2445 2446
		if (ctl->free_space < minlen) {
			spin_unlock(&ctl->tree_lock);
2447 2448 2449
			break;
		}

2450
		entry = tree_search_offset(ctl, start, 0, 1);
2451
		if (!entry)
2452 2453
			entry = tree_search_offset(ctl,
						   offset_to_bitmap(ctl, start),
2454 2455 2456
						   1, 1);

		if (!entry || entry->offset >= end) {
2457
			spin_unlock(&ctl->tree_lock);
2458 2459 2460 2461
			break;
		}

		if (entry->bitmap) {
2462
			ret = search_bitmap(ctl, entry, &start, &bytes);
2463 2464
			if (!ret) {
				if (start >= end) {
2465
					spin_unlock(&ctl->tree_lock);
2466 2467 2468
					break;
				}
				bytes = min(bytes, end - start);
2469
				bitmap_clear_bits(ctl, entry, start, bytes);
2470
				if (entry->bytes == 0)
2471
					free_bitmap(ctl, entry);
2472 2473 2474
			} else {
				start = entry->offset + BITS_PER_BITMAP *
					block_group->sectorsize;
2475
				spin_unlock(&ctl->tree_lock);
2476 2477 2478 2479 2480 2481
				ret = 0;
				continue;
			}
		} else {
			start = entry->offset;
			bytes = min(entry->bytes, end - start);
2482
			unlink_free_space(ctl, entry);
2483
			kmem_cache_free(btrfs_free_space_cachep, entry);
2484 2485
		}

2486
		spin_unlock(&ctl->tree_lock);
2487 2488

		if (bytes >= minlen) {
2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501
			struct btrfs_space_info *space_info;
			int update = 0;

			space_info = block_group->space_info;
			spin_lock(&space_info->lock);
			spin_lock(&block_group->lock);
			if (!block_group->ro) {
				block_group->reserved += bytes;
				space_info->bytes_reserved += bytes;
				update = 1;
			}
			spin_unlock(&block_group->lock);
			spin_unlock(&space_info->lock);
2502 2503 2504 2505 2506 2507

			ret = btrfs_error_discard_extent(fs_info->extent_root,
							 start,
							 bytes,
							 &actually_trimmed);

2508
			btrfs_add_free_space(block_group, start, bytes);
2509 2510 2511 2512 2513 2514 2515 2516 2517 2518
			if (update) {
				spin_lock(&space_info->lock);
				spin_lock(&block_group->lock);
				if (block_group->ro)
					space_info->bytes_readonly += bytes;
				block_group->reserved -= bytes;
				space_info->bytes_reserved -= bytes;
				spin_unlock(&space_info->lock);
				spin_unlock(&block_group->lock);
			}
2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536

			if (ret)
				break;
			*trimmed += actually_trimmed;
		}
		start += bytes;
		bytes = 0;

		if (fatal_signal_pending(current)) {
			ret = -ERESTARTSYS;
			break;
		}

		cond_resched();
	}

	return ret;
}
2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586

/*
 * Find the left-most item in the cache tree, and then return the
 * smallest inode number in the item.
 *
 * Note: the returned inode number may not be the smallest one in
 * the tree, if the left-most item is a bitmap.
 */
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
{
	struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
	struct btrfs_free_space *entry = NULL;
	u64 ino = 0;

	spin_lock(&ctl->tree_lock);

	if (RB_EMPTY_ROOT(&ctl->free_space_offset))
		goto out;

	entry = rb_entry(rb_first(&ctl->free_space_offset),
			 struct btrfs_free_space, offset_index);

	if (!entry->bitmap) {
		ino = entry->offset;

		unlink_free_space(ctl, entry);
		entry->offset++;
		entry->bytes--;
		if (!entry->bytes)
			kmem_cache_free(btrfs_free_space_cachep, entry);
		else
			link_free_space(ctl, entry);
	} else {
		u64 offset = 0;
		u64 count = 1;
		int ret;

		ret = search_bitmap(ctl, entry, &offset, &count);
		BUG_ON(ret);

		ino = offset;
		bitmap_clear_bits(ctl, entry, offset, 1);
		if (entry->bytes == 0)
			free_bitmap(ctl, entry);
	}
out:
	spin_unlock(&ctl->tree_lock);

	return ino;
}
2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604

struct inode *lookup_free_ino_inode(struct btrfs_root *root,
				    struct btrfs_path *path)
{
	struct inode *inode = NULL;

	spin_lock(&root->cache_lock);
	if (root->cache_inode)
		inode = igrab(root->cache_inode);
	spin_unlock(&root->cache_lock);
	if (inode)
		return inode;

	inode = __lookup_free_space_inode(root, path, 0);
	if (IS_ERR(inode))
		return inode;

	spin_lock(&root->cache_lock);
2605
	if (!btrfs_fs_closing(root->fs_info))
2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627
		root->cache_inode = igrab(inode);
	spin_unlock(&root->cache_lock);

	return inode;
}

int create_free_ino_inode(struct btrfs_root *root,
			  struct btrfs_trans_handle *trans,
			  struct btrfs_path *path)
{
	return __create_free_space_inode(root, trans, path,
					 BTRFS_FREE_INO_OBJECTID, 0);
}

int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
{
	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
	struct btrfs_path *path;
	struct inode *inode;
	int ret = 0;
	u64 root_gen = btrfs_root_generation(&root->root_item);

C
Chris Mason 已提交
2628 2629 2630
	if (!btrfs_test_opt(root, INODE_MAP_CACHE))
		return 0;

2631 2632 2633 2634
	/*
	 * If we're unmounting then just return, since this does a search on the
	 * normal root and not the commit root and we could deadlock.
	 */
2635
	if (btrfs_fs_closing(fs_info))
2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668
		return 0;

	path = btrfs_alloc_path();
	if (!path)
		return 0;

	inode = lookup_free_ino_inode(root, path);
	if (IS_ERR(inode))
		goto out;

	if (root_gen != BTRFS_I(inode)->generation)
		goto out_put;

	ret = __load_free_space_cache(root, inode, ctl, path, 0);

	if (ret < 0)
		printk(KERN_ERR "btrfs: failed to load free ino cache for "
		       "root %llu\n", root->root_key.objectid);
out_put:
	iput(inode);
out:
	btrfs_free_path(path);
	return ret;
}

int btrfs_write_out_ino_cache(struct btrfs_root *root,
			      struct btrfs_trans_handle *trans,
			      struct btrfs_path *path)
{
	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
	struct inode *inode;
	int ret;

C
Chris Mason 已提交
2669 2670 2671
	if (!btrfs_test_opt(root, INODE_MAP_CACHE))
		return 0;

2672 2673 2674 2675 2676
	inode = lookup_free_ino_inode(root, path);
	if (IS_ERR(inode))
		return 0;

	ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2677 2678 2679
	if (ret) {
		btrfs_delalloc_release_metadata(inode, inode->i_size);
#ifdef DEBUG
2680 2681
		printk(KERN_ERR "btrfs: failed to write free ino cache "
		       "for root %llu\n", root->root_key.objectid);
2682 2683
#endif
	}
2684 2685 2686 2687

	iput(inode);
	return ret;
}