disk-io.c 27.0 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

C
Chris Mason 已提交
19
#include <linux/fs.h>
20
#include <linux/blkdev.h>
21
#include <linux/crc32c.h>
C
Chris Mason 已提交
22
#include <linux/scatterlist.h>
C
Chris Mason 已提交
23
#include <linux/swap.h>
24
#include <linux/radix-tree.h>
C
Chris Mason 已提交
25
#include <linux/writeback.h>
26
#include <linux/buffer_head.h> // for block_sync_page
27 28
#include "ctree.h"
#include "disk-io.h"
29
#include "transaction.h"
30
#include "btrfs_inode.h"
31
#include "print-tree.h"
32

33 34
#if 0
static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
35
{
36 37 38 39
	if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) {
		printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n",
		       (unsigned long long)extent_buffer_blocknr(buf),
		       (unsigned long long)btrfs_header_blocknr(buf));
C
Chris Mason 已提交
40
		return 1;
41
	}
42
	return 0;
43
}
44
#endif
45

46 47
static struct extent_map_ops btree_extent_map_ops;

48
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
49
					    u64 bytenr, u32 blocksize)
50
{
51
	struct inode *btree_inode = root->fs_info->btree_inode;
52 53
	struct extent_buffer *eb;
	eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
54
				bytenr, blocksize, GFP_NOFS);
55
	return eb;
56
}
57

58
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
59
						 u64 bytenr, u32 blocksize)
60 61
{
	struct inode *btree_inode = root->fs_info->btree_inode;
62
	struct extent_buffer *eb;
63

64
	eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
65
				 bytenr, blocksize, NULL, GFP_NOFS);
66
	return eb;
67 68
}

69 70 71
struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
				    size_t page_offset, u64 start, u64 end,
				    int create)
72
{
73 74 75 76 77 78 79 80
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
	struct extent_map *em;
	int ret;

again:
	em = lookup_extent_mapping(em_tree, start, end);
	if (em) {
		goto out;
81
	}
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
	em = alloc_extent_map(GFP_NOFS);
	if (!em) {
		em = ERR_PTR(-ENOMEM);
		goto out;
	}
	em->start = 0;
	em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1;
	em->block_start = 0;
	em->block_end = em->end;
	em->bdev = inode->i_sb->s_bdev;
	ret = add_extent_mapping(em_tree, em);
	if (ret == -EEXIST) {
		free_extent_map(em);
		em = NULL;
		goto again;
	} else if (ret) {
		em = ERR_PTR(ret);
	}
out:
	return em;
102 103
}

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
{
	return crc32c(seed, data, len);
}

void btrfs_csum_final(u32 crc, char *result)
{
	*(__le32 *)result = ~cpu_to_le32(crc);
}

static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
			   int verify)
{
	char result[BTRFS_CRC32_SIZE];
	unsigned long len;
	unsigned long cur_len;
	unsigned long offset = BTRFS_CSUM_SIZE;
	char *map_token = NULL;
	char *kaddr;
	unsigned long map_start;
	unsigned long map_len;
	int err;
	u32 crc = ~(u32)0;

	len = buf->len - offset;
	while(len > 0) {
		err = map_private_extent_buffer(buf, offset, 32,
					&map_token, &kaddr,
					&map_start, &map_len, KM_USER0);
		if (err) {
			printk("failed to map extent buffer! %lu\n",
			       offset);
			return 1;
		}
		cur_len = min(len, map_len - (offset - map_start));
		crc = btrfs_csum_data(root, kaddr + offset - map_start,
				      crc, cur_len);
		len -= cur_len;
		offset += cur_len;
		unmap_extent_buffer(buf, map_token, KM_USER0);
	}
	btrfs_csum_final(crc, result);

	if (verify) {
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
		int from_this_trans = 0;

		if (root->fs_info->running_transaction &&
		    btrfs_header_generation(buf) ==
		    root->fs_info->running_transaction->transid)
			from_this_trans = 1;

		/* FIXME, this is not good */
		if (from_this_trans == 0 &&
		    memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
			u32 val;
			u32 found = 0;
			memcpy(&found, result, BTRFS_CRC32_SIZE);

			read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE);
			printk("btrfs: %s checksum verify failed on %llu "
			       "wanted %X found %X from_this_trans %d\n",
165
			       root->fs_info->sb->s_id,
166
			       buf->start, val, found, from_this_trans);
167 168 169 170 171 172 173 174 175 176 177 178
			return 1;
		}
	} else {
		write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
	}
	return 0;
}


int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
{
	struct extent_map_tree *tree;
179
	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
	u64 found_start;
	int found_level;
	unsigned long len;
	struct extent_buffer *eb;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;

	if (page->private == EXTENT_PAGE_PRIVATE)
		goto out;
	if (!page->private)
		goto out;
	len = page->private >> 2;
	if (len == 0) {
		WARN_ON(1);
	}
	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
	read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1);
	found_start = btrfs_header_bytenr(eb);
	if (found_start != start) {
		printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
		       start, found_start, len);
200 201 202 203 204 205 206 207 208 209 210 211 212
		WARN_ON(1);
		goto err;
	}
	if (eb->first_page != page) {
		printk("bad first page %lu %lu\n", eb->first_page->index,
		       page->index);
		WARN_ON(1);
		goto err;
	}
	if (!PageUptodate(page)) {
		printk("csum not up to date page %lu\n", page->index);
		WARN_ON(1);
		goto err;
213 214 215
	}
	found_level = btrfs_header_level(eb);
	csum_tree_block(root, eb, 0);
216
err:
217 218 219 220 221
	free_extent_buffer(eb);
out:
	return 0;
}

222
static int btree_writepage_io_hook(struct page *page, u64 start, u64 end)
223
{
224 225 226
	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;

	csum_dirty_buffer(root, page);
227 228 229 230 231 232 233
	return 0;
}

static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
	struct extent_map_tree *tree;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
234 235
	return extent_write_full_page(tree, page, btree_get_extent, wbc);
}
236 237 238 239 240 241

static int btree_writepages(struct address_space *mapping,
			    struct writeback_control *wbc)
{
	struct extent_map_tree *tree;
	tree = &BTRFS_I(mapping->host)->extent_tree;
242
	if (wbc->sync_mode == WB_SYNC_NONE) {
243 244 245
		u64 num_dirty;
		u64 start = 0;
		unsigned long thresh = 96 * 1024 * 1024;
246 247 248 249

		if (wbc->for_kupdate)
			return 0;

250 251 252 253 254
		if (current_is_pdflush()) {
			thresh = 96 * 1024 * 1024;
		} else {
			thresh = 8 * 1024 * 1024;
		}
255 256
		num_dirty = count_range_bits(tree, &start, (u64)-1,
					     thresh, EXTENT_DIRTY);
257 258 259 260
		if (num_dirty < thresh) {
			return 0;
		}
	}
261 262 263
	return extent_writepages(tree, mapping, btree_get_extent, wbc);
}

264 265 266 267 268 269
int btree_readpage(struct file *file, struct page *page)
{
	struct extent_map_tree *tree;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
	return extent_read_full_page(tree, page, btree_get_extent);
}
C
Chris Mason 已提交
270

271 272 273 274
static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)
{
	struct extent_map_tree *tree;
	int ret;
275

276 277 278 279 280 281 282
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
	ret = try_release_extent_mapping(tree, page);
	if (ret == 1) {
		ClearPagePrivate(page);
		set_page_private(page, 0);
		page_cache_release(page);
	}
283 284 285
	return ret;
}

286
static void btree_invalidatepage(struct page *page, unsigned long offset)
287
{
288 289 290 291
	struct extent_map_tree *tree;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
	extent_invalidatepage(tree, page, offset);
	btree_releasepage(page, GFP_NOFS);
292 293
}

294
#if 0
295
static int btree_writepage(struct page *page, struct writeback_control *wbc)
296
{
C
Chris Mason 已提交
297
	struct buffer_head *bh;
298
	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
C
Chris Mason 已提交
299 300 301 302 303 304 305 306 307 308 309 310
	struct buffer_head *head;
	if (!page_has_buffers(page)) {
		create_empty_buffers(page, root->fs_info->sb->s_blocksize,
					(1 << BH_Dirty)|(1 << BH_Uptodate));
	}
	head = page_buffers(page);
	bh = head;
	do {
		if (buffer_dirty(bh))
			csum_tree_block(root, bh, 0);
		bh = bh->b_this_page;
	} while (bh != head);
311
	return block_write_full_page(page, btree_get_block, wbc);
312
}
313
#endif
314

315 316 317
static struct address_space_operations btree_aops = {
	.readpage	= btree_readpage,
	.writepage	= btree_writepage,
318
	.writepages	= btree_writepages,
319 320
	.releasepage	= btree_releasepage,
	.invalidatepage = btree_invalidatepage,
321 322 323
	.sync_page	= block_sync_page,
};

324
int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
C
Chris Mason 已提交
325
{
326 327
	struct extent_buffer *buf = NULL;
	struct inode *btree_inode = root->fs_info->btree_inode;
328
	int ret = 0;
C
Chris Mason 已提交
329

330
	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
331
	if (!buf)
C
Chris Mason 已提交
332
		return 0;
333
	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
334
				 buf, 0, 0);
335
	free_extent_buffer(buf);
336
	return ret;
C
Chris Mason 已提交
337 338
}

339 340
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
				      u32 blocksize)
341
{
342 343
	struct extent_buffer *buf = NULL;
	struct inode *btree_inode = root->fs_info->btree_inode;
344
	struct extent_map_tree *extent_tree;
345
	u64 end;
346 347 348
	int ret;

	extent_tree = &BTRFS_I(btree_inode)->extent_tree;
349

350
	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
351 352 353
	if (!buf)
		return NULL;
	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
354
				 buf, 0, 1);
355 356

	if (buf->flags & EXTENT_CSUM)
357
		return buf;
358 359 360

	end = buf->start + PAGE_CACHE_SIZE - 1;
	if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
361 362 363
		buf->flags |= EXTENT_CSUM;
		return buf;
	}
364 365 366 367 368 369 370 371

	lock_extent(extent_tree, buf->start, end, GFP_NOFS);

	if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
		buf->flags |= EXTENT_CSUM;
		goto out_unlock;
	}

372
	ret = csum_tree_block(root, buf, 1);
373
	set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS);
374
	buf->flags |= EXTENT_CSUM;
375 376 377

out_unlock:
	unlock_extent(extent_tree, buf->start, end, GFP_NOFS);
378
	return buf;
379 380
}

381
int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
382
		     struct extent_buffer *buf)
383
{
384
	struct inode *btree_inode = root->fs_info->btree_inode;
385 386 387 388
	if (btrfs_header_generation(buf) ==
	    root->fs_info->running_transaction->transid)
		clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree,
					  buf);
389 390 391 392 393 394 395 396 397 398 399 400
	return 0;
}

int wait_on_tree_block_writeback(struct btrfs_root *root,
				 struct extent_buffer *buf)
{
	struct inode *btree_inode = root->fs_info->btree_inode;
	wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree,
					buf);
	return 0;
}

401
static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
402
			u32 stripesize, struct btrfs_root *root,
403
			struct btrfs_fs_info *fs_info,
C
Chris Mason 已提交
404
			u64 objectid)
405
{
C
Chris Mason 已提交
406
	root->node = NULL;
407
	root->inode = NULL;
408
	root->commit_root = NULL;
409 410 411
	root->sectorsize = sectorsize;
	root->nodesize = nodesize;
	root->leafsize = leafsize;
412
	root->stripesize = stripesize;
C
Chris Mason 已提交
413
	root->ref_cows = 0;
414
	root->fs_info = fs_info;
415 416
	root->objectid = objectid;
	root->last_trans = 0;
C
Chris Mason 已提交
417 418
	root->highest_inode = 0;
	root->last_inode_alloc = 0;
419
	root->name = NULL;
420
	root->in_sysfs = 0;
421 422
	memset(&root->root_key, 0, sizeof(root->root_key));
	memset(&root->root_item, 0, sizeof(root->root_item));
423
	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
424 425
	memset(&root->root_kobj, 0, sizeof(root->root_kobj));
	init_completion(&root->kobj_unregister);
426 427
	root->defrag_running = 0;
	root->defrag_level = 0;
428
	root->root_key.objectid = objectid;
429 430 431
	return 0;
}

432
static int find_and_setup_root(struct btrfs_root *tree_root,
433 434
			       struct btrfs_fs_info *fs_info,
			       u64 objectid,
C
Chris Mason 已提交
435
			       struct btrfs_root *root)
436 437
{
	int ret;
438
	u32 blocksize;
439

440
	__setup_root(tree_root->nodesize, tree_root->leafsize,
441 442
		     tree_root->sectorsize, tree_root->stripesize,
		     root, fs_info, objectid);
443 444 445 446
	ret = btrfs_find_last_root(tree_root, objectid,
				   &root->root_item, &root->root_key);
	BUG_ON(ret);

447 448 449
	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
				     blocksize);
450
	BUG_ON(!root->node);
451 452 453
	return 0;
}

454 455
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info,
					       struct btrfs_key *location)
456 457 458 459
{
	struct btrfs_root *root;
	struct btrfs_root *tree_root = fs_info->tree_root;
	struct btrfs_path *path;
460
	struct extent_buffer *l;
C
Chris Mason 已提交
461
	u64 highest_inode;
462
	u32 blocksize;
463 464
	int ret = 0;

465
	root = kzalloc(sizeof(*root), GFP_NOFS);
C
Chris Mason 已提交
466
	if (!root)
467 468
		return ERR_PTR(-ENOMEM);
	if (location->offset == (u64)-1) {
469
		ret = find_and_setup_root(tree_root, fs_info,
470 471 472 473 474 475 476 477
					  location->objectid, root);
		if (ret) {
			kfree(root);
			return ERR_PTR(ret);
		}
		goto insert;
	}

478
	__setup_root(tree_root->nodesize, tree_root->leafsize,
479 480
		     tree_root->sectorsize, tree_root->stripesize,
		     root, fs_info, location->objectid);
481 482 483 484 485 486 487 488 489

	path = btrfs_alloc_path();
	BUG_ON(!path);
	ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
	if (ret != 0) {
		if (ret > 0)
			ret = -ENOENT;
		goto out;
	}
490 491 492
	l = path->nodes[0];
	read_extent_buffer(l, &root->root_item,
	       btrfs_item_ptr_offset(l, path->slots[0]),
493
	       sizeof(root->root_item));
494
	memcpy(&root->root_key, location, sizeof(*location));
495 496 497 498 499 500 501 502
	ret = 0;
out:
	btrfs_release_path(root, path);
	btrfs_free_path(path);
	if (ret) {
		kfree(root);
		return ERR_PTR(ret);
	}
503 504 505
	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
				     blocksize);
506 507 508
	BUG_ON(!root->node);
insert:
	root->ref_cows = 1;
509 510 511 512 513 514 515 516
	ret = btrfs_find_highest_inode(root, &highest_inode);
	if (ret == 0) {
		root->highest_inode = highest_inode;
		root->last_inode_alloc = highest_inode;
	}
	return root;
}

C
Chris Mason 已提交
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
					u64 root_objectid)
{
	struct btrfs_root *root;

	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
		return fs_info->tree_root;
	if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
		return fs_info->extent_root;

	root = radix_tree_lookup(&fs_info->fs_roots_radix,
				 (unsigned long)root_objectid);
	return root;
}

532 533
struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
					      struct btrfs_key *location)
534 535 536 537
{
	struct btrfs_root *root;
	int ret;

538 539 540 541 542
	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
		return fs_info->tree_root;
	if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
		return fs_info->extent_root;

543 544 545 546 547 548 549 550
	root = radix_tree_lookup(&fs_info->fs_roots_radix,
				 (unsigned long)location->objectid);
	if (root)
		return root;

	root = btrfs_read_fs_root_no_radix(fs_info, location);
	if (IS_ERR(root))
		return root;
C
Chris Mason 已提交
551 552
	ret = radix_tree_insert(&fs_info->fs_roots_radix,
				(unsigned long)root->root_key.objectid,
553 554
				root);
	if (ret) {
555
		free_extent_buffer(root->node);
556 557 558
		kfree(root);
		return ERR_PTR(ret);
	}
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
	ret = btrfs_find_dead_roots(fs_info->tree_root,
				    root->root_key.objectid, root);
	BUG_ON(ret);

	return root;
}

struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
				      struct btrfs_key *location,
				      const char *name, int namelen)
{
	struct btrfs_root *root;
	int ret;

	root = btrfs_read_fs_root_no_name(fs_info, location);
	if (!root)
		return NULL;
576

577 578 579
	if (root->in_sysfs)
		return root;

580 581
	ret = btrfs_set_root_name(root, name, namelen);
	if (ret) {
582
		free_extent_buffer(root->node);
583 584 585 586 587 588
		kfree(root);
		return ERR_PTR(ret);
	}

	ret = btrfs_sysfs_add_root(root);
	if (ret) {
589
		free_extent_buffer(root->node);
590 591 592 593
		kfree(root->name);
		kfree(root);
		return ERR_PTR(ret);
	}
594
	root->in_sysfs = 1;
595 596
	return root;
}
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
#if 0
static int add_hasher(struct btrfs_fs_info *info, char *type) {
	struct btrfs_hasher *hasher;

	hasher = kmalloc(sizeof(*hasher), GFP_NOFS);
	if (!hasher)
		return -ENOMEM;
	hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC);
	if (!hasher->hash_tfm) {
		kfree(hasher);
		return -EINVAL;
	}
	spin_lock(&info->hash_lock);
	list_add(&hasher->list, &info->hashers);
	spin_unlock(&info->hash_lock);
	return 0;
}
#endif
C
Chris Mason 已提交
615
struct btrfs_root *open_ctree(struct super_block *sb)
616
{
617 618 619 620
	u32 sectorsize;
	u32 nodesize;
	u32 leafsize;
	u32 blocksize;
621
	u32 stripesize;
C
Chris Mason 已提交
622 623 624 625 626 627
	struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
						 GFP_NOFS);
	struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
					       GFP_NOFS);
	struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
						GFP_NOFS);
628
	int ret;
C
Chris Mason 已提交
629
	int err = -EIO;
C
Chris Mason 已提交
630
	struct btrfs_super_block *disk_super;
631

C
Chris Mason 已提交
632 633 634 635
	if (!extent_root || !tree_root || !fs_info) {
		err = -ENOMEM;
		goto fail;
	}
636
	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
C
Chris Mason 已提交
637
	INIT_LIST_HEAD(&fs_info->trans_list);
638
	INIT_LIST_HEAD(&fs_info->dead_roots);
639 640
	INIT_LIST_HEAD(&fs_info->hashers);
	spin_lock_init(&fs_info->hash_lock);
641
	spin_lock_init(&fs_info->delalloc_lock);
642

643 644
	memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj));
	init_completion(&fs_info->kobj_unregister);
C
Chris Mason 已提交
645
	sb_set_blocksize(sb, 4096);
646
	fs_info->running_transaction = NULL;
647
	fs_info->last_trans_committed = 0;
648 649
	fs_info->tree_root = tree_root;
	fs_info->extent_root = extent_root;
C
Chris Mason 已提交
650
	fs_info->sb = sb;
651
	fs_info->throttles = 0;
652
	fs_info->mount_opt = 0;
653
	fs_info->max_extent = (u64)-1;
654
	fs_info->delalloc_bytes = 0;
655 656
	fs_info->btree_inode = new_inode(sb);
	fs_info->btree_inode->i_ino = 1;
C
Chris Mason 已提交
657
	fs_info->btree_inode->i_nlink = 1;
658 659
	fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
	fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
660 661 662
	extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
			     fs_info->btree_inode->i_mapping,
			     GFP_NOFS);
663 664
	BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops;

665 666
	extent_map_tree_init(&fs_info->free_space_cache,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
667 668
	extent_map_tree_init(&fs_info->block_group_cache,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
669 670 671 672 673 674
	extent_map_tree_init(&fs_info->pinned_extents,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
	extent_map_tree_init(&fs_info->pending_del,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
	extent_map_tree_init(&fs_info->extent_ins,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
675
	fs_info->do_barriers = 1;
676
	fs_info->closing = 0;
677
	fs_info->total_pinned = 0;
678 679 680
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
	INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
#else
C
Chris Mason 已提交
681
	INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
682
#endif
683 684 685
	BTRFS_I(fs_info->btree_inode)->root = tree_root;
	memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
	       sizeof(struct btrfs_key));
C
Chris Mason 已提交
686
	insert_inode_hash(fs_info->btree_inode);
687
	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
C
Chris Mason 已提交
688

C
Chris Mason 已提交
689
	mutex_init(&fs_info->trans_mutex);
C
Chris Mason 已提交
690
	mutex_init(&fs_info->fs_mutex);
691

692 693 694 695 696 697 698 699
#if 0
	ret = add_hasher(fs_info, "crc32c");
	if (ret) {
		printk("btrfs: failed hash setup, modprobe cryptomgr?\n");
		err = -ENOMEM;
		goto fail_iput;
	}
#endif
700
	__setup_root(512, 512, 512, 512, tree_root,
C
Chris Mason 已提交
701
		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
702

C
Chris Mason 已提交
703
	fs_info->sb_buffer = read_tree_block(tree_root,
704 705
					     BTRFS_SUPER_INFO_OFFSET,
					     512);
706

707
	if (!fs_info->sb_buffer)
C
Chris Mason 已提交
708 709
		goto fail_iput;

710 711 712 713 714 715 716
	read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0,
			   sizeof(fs_info->super_copy));

	read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
			   (unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
			   BTRFS_FSID_SIZE);
	disk_super = &fs_info->super_copy;
717
	if (!btrfs_super_root(disk_super))
C
Chris Mason 已提交
718
		goto fail_sb_buffer;
719

720 721 722
	nodesize = btrfs_super_nodesize(disk_super);
	leafsize = btrfs_super_leafsize(disk_super);
	sectorsize = btrfs_super_sectorsize(disk_super);
723
	stripesize = btrfs_super_stripesize(disk_super);
724 725 726
	tree_root->nodesize = nodesize;
	tree_root->leafsize = leafsize;
	tree_root->sectorsize = sectorsize;
727
	tree_root->stripesize = stripesize;
728
	sb_set_blocksize(sb, sectorsize);
729

730
	i_size_write(fs_info->btree_inode,
731
		     btrfs_super_total_bytes(disk_super));
732

C
Chris Mason 已提交
733 734 735 736 737
	if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
		    sizeof(disk_super->magic))) {
		printk("btrfs: valid FS not found on %s\n", sb->s_id);
		goto fail_sb_buffer;
	}
738

739 740
	blocksize = btrfs_level_size(tree_root,
				     btrfs_super_root_level(disk_super));
741

C
Chris Mason 已提交
742
	tree_root->node = read_tree_block(tree_root,
743 744
					  btrfs_super_root(disk_super),
					  blocksize);
C
Chris Mason 已提交
745 746
	if (!tree_root->node)
		goto fail_sb_buffer;
747

C
Chris Mason 已提交
748
	mutex_lock(&fs_info->fs_mutex);
749 750

	ret = find_and_setup_root(tree_root, fs_info,
C
Chris Mason 已提交
751
				  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
C
Chris Mason 已提交
752 753 754 755
	if (ret) {
		mutex_unlock(&fs_info->fs_mutex);
		goto fail_tree_root;
	}
756

C
Chris Mason 已提交
757 758
	btrfs_read_block_groups(extent_root);

759
	fs_info->generation = btrfs_super_generation(disk_super) + 1;
C
Chris Mason 已提交
760
	mutex_unlock(&fs_info->fs_mutex);
761
	return tree_root;
C
Chris Mason 已提交
762 763

fail_tree_root:
764
	free_extent_buffer(tree_root->node);
C
Chris Mason 已提交
765
fail_sb_buffer:
766
	free_extent_buffer(fs_info->sb_buffer);
C
Chris Mason 已提交
767 768 769 770 771 772 773
fail_iput:
	iput(fs_info->btree_inode);
fail:
	kfree(extent_root);
	kfree(tree_root);
	kfree(fs_info);
	return ERR_PTR(err);
774 775
}

776
int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
C
Chris Mason 已提交
777
		      *root)
778
{
779
	int ret;
780 781
	struct extent_buffer *super = root->fs_info->sb_buffer;
	struct inode *btree_inode = root->fs_info->btree_inode;
782
	struct super_block *sb = root->fs_info->sb;
783

784 785
	if (!btrfs_test_opt(root, NOBARRIER))
		blkdev_issue_flush(sb->s_bdev, NULL);
786 787 788
	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super);
	ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping,
				     super->start, super->len);
789 790
	if (!btrfs_test_opt(root, NOBARRIER))
		blkdev_issue_flush(sb->s_bdev, NULL);
791
	return ret;
C
Chris Mason 已提交
792 793
}

794
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
C
Chris Mason 已提交
795 796 797
{
	radix_tree_delete(&fs_info->fs_roots_radix,
			  (unsigned long)root->root_key.objectid);
798
	btrfs_sysfs_del_root(root);
C
Chris Mason 已提交
799 800 801
	if (root->inode)
		iput(root->inode);
	if (root->node)
802
		free_extent_buffer(root->node);
C
Chris Mason 已提交
803
	if (root->commit_root)
804
		free_extent_buffer(root->commit_root);
805 806
	if (root->name)
		kfree(root->name);
C
Chris Mason 已提交
807 808 809 810
	kfree(root);
	return 0;
}

C
Chris Mason 已提交
811
static int del_fs_roots(struct btrfs_fs_info *fs_info)
812 813 814 815 816 817 818 819 820 821 822
{
	int ret;
	struct btrfs_root *gang[8];
	int i;

	while(1) {
		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
					     (void **)gang, 0,
					     ARRAY_SIZE(gang));
		if (!ret)
			break;
C
Chris Mason 已提交
823
		for (i = 0; i < ret; i++)
824
			btrfs_free_fs_root(fs_info, gang[i]);
825 826 827
	}
	return 0;
}
828

C
Chris Mason 已提交
829
int close_ctree(struct btrfs_root *root)
C
Chris Mason 已提交
830
{
831
	int ret;
832
	struct btrfs_trans_handle *trans;
833
	struct btrfs_fs_info *fs_info = root->fs_info;
834

835
	fs_info->closing = 1;
C
Chris Mason 已提交
836
	btrfs_transaction_flush_work(root);
837
	mutex_lock(&fs_info->fs_mutex);
838
	btrfs_defrag_dirty_roots(root->fs_info);
C
Chris Mason 已提交
839
	trans = btrfs_start_transaction(root, 1);
840
	ret = btrfs_commit_transaction(trans, root);
C
Chris Mason 已提交
841 842 843 844
	/* run commit again to  drop the original snapshot */
	trans = btrfs_start_transaction(root, 1);
	btrfs_commit_transaction(trans, root);
	ret = btrfs_write_and_wait_transaction(NULL, root);
845
	BUG_ON(ret);
C
Chris Mason 已提交
846
	write_ctree_super(NULL, root);
847 848 849
	mutex_unlock(&fs_info->fs_mutex);

	if (fs_info->extent_root->node)
850
		free_extent_buffer(fs_info->extent_root->node);
851

852
	if (fs_info->tree_root->node)
853
		free_extent_buffer(fs_info->tree_root->node);
854

855
	free_extent_buffer(fs_info->sb_buffer);
856

C
Chris Mason 已提交
857
	btrfs_free_block_groups(root->fs_info);
858
	del_fs_roots(fs_info);
859 860 861 862 863 864 865 866

	filemap_write_and_wait(fs_info->btree_inode->i_mapping);

	extent_map_tree_empty_lru(&fs_info->free_space_cache);
	extent_map_tree_empty_lru(&fs_info->block_group_cache);
	extent_map_tree_empty_lru(&fs_info->pinned_extents);
	extent_map_tree_empty_lru(&fs_info->pending_del);
	extent_map_tree_empty_lru(&fs_info->extent_ins);
867
	extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree);
868

869
	truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
870

871
	iput(fs_info->btree_inode);
872 873 874 875 876 877 878 879 880 881
#if 0
	while(!list_empty(&fs_info->hashers)) {
		struct btrfs_hasher *hasher;
		hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
				    hashers);
		list_del(&hasher->hashers);
		crypto_free_hash(&fs_info->hash_tfm);
		kfree(hasher);
	}
#endif
882 883
	kfree(fs_info->extent_root);
	kfree(fs_info->tree_root);
884 885 886
	return 0;
}

887 888
int btrfs_buffer_uptodate(struct extent_buffer *buf)
{
889
	struct inode *btree_inode = buf->first_page->mapping->host;
890 891 892 893
	return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf);
}

int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
C
Chris Mason 已提交
894
{
895
	struct inode *btree_inode = buf->first_page->mapping->host;
896 897 898
	return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree,
					  buf);
}
899

900 901
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
902
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
903 904
	u64 transid = btrfs_header_generation(buf);
	struct inode *btree_inode = root->fs_info->btree_inode;
905

C
Chris Mason 已提交
906 907
	if (transid != root->fs_info->generation) {
		printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
908
			(unsigned long long)buf->start,
C
Chris Mason 已提交
909 910 911
			transid, root->fs_info->generation);
		WARN_ON(1);
	}
912
	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf);
913 914
}

915 916
void btrfs_throttle(struct btrfs_root *root)
{
917 918 919 920 921
	struct backing_dev_info *bdi;

	bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
	if (root->fs_info->throttles && bdi_write_congested(bdi))
		congestion_wait(WRITE, HZ/20);
922 923
}

924
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
C
Chris Mason 已提交
925
{
926
	balance_dirty_pages_ratelimited_nr(
927
			root->fs_info->btree_inode->i_mapping, 1);
C
Chris Mason 已提交
928
}
929 930 931

void btrfs_set_buffer_defrag(struct extent_buffer *buf)
{
932
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
933 934 935 936 937 938 939
	struct inode *btree_inode = root->fs_info->btree_inode;
	set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
			buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
}

void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
{
940
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
941 942 943 944 945 946 947 948
	struct inode *btree_inode = root->fs_info->btree_inode;
	set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
			buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
			GFP_NOFS);
}

int btrfs_buffer_defrag(struct extent_buffer *buf)
{
949
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
950 951 952 953 954 955 956
	struct inode *btree_inode = root->fs_info->btree_inode;
	return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
}

int btrfs_buffer_defrag_done(struct extent_buffer *buf)
{
957
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
958 959 960 961 962 963 964 965
	struct inode *btree_inode = root->fs_info->btree_inode;
	return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1,
		     EXTENT_DEFRAG_DONE, 0);
}

int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
{
966
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
967 968 969 970 971 972 973 974
	struct inode *btree_inode = root->fs_info->btree_inode;
	return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1,
		     EXTENT_DEFRAG_DONE, GFP_NOFS);
}

int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
{
975
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
976 977 978 979 980 981 982 983
	struct inode *btree_inode = root->fs_info->btree_inode;
	return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1,
		     EXTENT_DEFRAG, GFP_NOFS);
}

int btrfs_read_buffer(struct extent_buffer *buf)
{
984
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
985 986
	struct inode *btree_inode = root->fs_info->btree_inode;
	return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
987
					buf, 0, 1);
988
}
989 990 991 992

static struct extent_map_ops btree_extent_map_ops = {
	.writepage_io_hook = btree_writepage_io_hook,
};