disk-io.c 27.2 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

C
Chris Mason 已提交
19
#include <linux/fs.h>
20
#include <linux/blkdev.h>
21
#include <linux/crc32c.h>
C
Chris Mason 已提交
22
#include <linux/scatterlist.h>
C
Chris Mason 已提交
23
#include <linux/swap.h>
24
#include <linux/radix-tree.h>
C
Chris Mason 已提交
25
#include <linux/writeback.h>
26
#include <linux/buffer_head.h> // for block_sync_page
27 28
#include "ctree.h"
#include "disk-io.h"
29
#include "transaction.h"
30
#include "btrfs_inode.h"
31
#include "print-tree.h"
32

33 34
#if 0
static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
35
{
36 37 38 39
	if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) {
		printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n",
		       (unsigned long long)extent_buffer_blocknr(buf),
		       (unsigned long long)btrfs_header_blocknr(buf));
C
Chris Mason 已提交
40
		return 1;
41
	}
42
	return 0;
43
}
44
#endif
45

46 47
static struct extent_map_ops btree_extent_map_ops;

48
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
49
					    u64 bytenr, u32 blocksize)
50
{
51
	struct inode *btree_inode = root->fs_info->btree_inode;
52 53
	struct extent_buffer *eb;
	eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
54
				bytenr, blocksize, GFP_NOFS);
55
	return eb;
56
}
57

58
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
59
						 u64 bytenr, u32 blocksize)
60 61
{
	struct inode *btree_inode = root->fs_info->btree_inode;
62
	struct extent_buffer *eb;
63

64
	eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
65
				 bytenr, blocksize, NULL, GFP_NOFS);
66
	return eb;
67 68
}

69 70 71
struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
				    size_t page_offset, u64 start, u64 end,
				    int create)
72
{
73 74 75 76 77 78 79 80
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
	struct extent_map *em;
	int ret;

again:
	em = lookup_extent_mapping(em_tree, start, end);
	if (em) {
		goto out;
81
	}
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
	em = alloc_extent_map(GFP_NOFS);
	if (!em) {
		em = ERR_PTR(-ENOMEM);
		goto out;
	}
	em->start = 0;
	em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1;
	em->block_start = 0;
	em->block_end = em->end;
	em->bdev = inode->i_sb->s_bdev;
	ret = add_extent_mapping(em_tree, em);
	if (ret == -EEXIST) {
		free_extent_map(em);
		em = NULL;
		goto again;
	} else if (ret) {
		em = ERR_PTR(ret);
	}
out:
	return em;
102 103
}

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
{
	return crc32c(seed, data, len);
}

void btrfs_csum_final(u32 crc, char *result)
{
	*(__le32 *)result = ~cpu_to_le32(crc);
}

static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
			   int verify)
{
	char result[BTRFS_CRC32_SIZE];
	unsigned long len;
	unsigned long cur_len;
	unsigned long offset = BTRFS_CSUM_SIZE;
	char *map_token = NULL;
	char *kaddr;
	unsigned long map_start;
	unsigned long map_len;
	int err;
	u32 crc = ~(u32)0;

	len = buf->len - offset;
	while(len > 0) {
		err = map_private_extent_buffer(buf, offset, 32,
					&map_token, &kaddr,
					&map_start, &map_len, KM_USER0);
		if (err) {
			printk("failed to map extent buffer! %lu\n",
			       offset);
			return 1;
		}
		cur_len = min(len, map_len - (offset - map_start));
		crc = btrfs_csum_data(root, kaddr + offset - map_start,
				      crc, cur_len);
		len -= cur_len;
		offset += cur_len;
		unmap_extent_buffer(buf, map_token, KM_USER0);
	}
	btrfs_csum_final(crc, result);

	if (verify) {
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
		int from_this_trans = 0;

		if (root->fs_info->running_transaction &&
		    btrfs_header_generation(buf) ==
		    root->fs_info->running_transaction->transid)
			from_this_trans = 1;

		/* FIXME, this is not good */
		if (from_this_trans == 0 &&
		    memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
			u32 val;
			u32 found = 0;
			memcpy(&found, result, BTRFS_CRC32_SIZE);

			read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE);
			printk("btrfs: %s checksum verify failed on %llu "
			       "wanted %X found %X from_this_trans %d\n",
165
			       root->fs_info->sb->s_id,
166
			       buf->start, val, found, from_this_trans);
167 168 169 170 171 172 173 174 175 176 177 178
			return 1;
		}
	} else {
		write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
	}
	return 0;
}


int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
{
	struct extent_map_tree *tree;
179
	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
	u64 found_start;
	int found_level;
	unsigned long len;
	struct extent_buffer *eb;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;

	if (page->private == EXTENT_PAGE_PRIVATE)
		goto out;
	if (!page->private)
		goto out;
	len = page->private >> 2;
	if (len == 0) {
		WARN_ON(1);
	}
	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
	read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1);
196
	btrfs_clear_buffer_defrag(eb);
197 198 199 200
	found_start = btrfs_header_bytenr(eb);
	if (found_start != start) {
		printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
		       start, found_start, len);
201 202 203 204 205 206 207 208 209 210 211 212 213
		WARN_ON(1);
		goto err;
	}
	if (eb->first_page != page) {
		printk("bad first page %lu %lu\n", eb->first_page->index,
		       page->index);
		WARN_ON(1);
		goto err;
	}
	if (!PageUptodate(page)) {
		printk("csum not up to date page %lu\n", page->index);
		WARN_ON(1);
		goto err;
214 215 216
	}
	found_level = btrfs_header_level(eb);
	csum_tree_block(root, eb, 0);
217
err:
218 219 220 221 222
	free_extent_buffer(eb);
out:
	return 0;
}

223
static int btree_writepage_io_hook(struct page *page, u64 start, u64 end)
224
{
225 226 227
	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;

	csum_dirty_buffer(root, page);
228 229 230 231 232 233 234
	return 0;
}

static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
	struct extent_map_tree *tree;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
235 236
	return extent_write_full_page(tree, page, btree_get_extent, wbc);
}
237 238 239 240 241 242

static int btree_writepages(struct address_space *mapping,
			    struct writeback_control *wbc)
{
	struct extent_map_tree *tree;
	tree = &BTRFS_I(mapping->host)->extent_tree;
243
	if (wbc->sync_mode == WB_SYNC_NONE) {
244 245 246
		u64 num_dirty;
		u64 start = 0;
		unsigned long thresh = 96 * 1024 * 1024;
247 248 249 250

		if (wbc->for_kupdate)
			return 0;

251 252 253 254 255
		if (current_is_pdflush()) {
			thresh = 96 * 1024 * 1024;
		} else {
			thresh = 8 * 1024 * 1024;
		}
256 257
		num_dirty = count_range_bits(tree, &start, (u64)-1,
					     thresh, EXTENT_DIRTY);
258 259 260 261
		if (num_dirty < thresh) {
			return 0;
		}
	}
262 263 264
	return extent_writepages(tree, mapping, btree_get_extent, wbc);
}

265 266 267 268 269 270
int btree_readpage(struct file *file, struct page *page)
{
	struct extent_map_tree *tree;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
	return extent_read_full_page(tree, page, btree_get_extent);
}
C
Chris Mason 已提交
271

272 273 274 275
static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)
{
	struct extent_map_tree *tree;
	int ret;
276

277 278 279 280 281 282 283
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
	ret = try_release_extent_mapping(tree, page);
	if (ret == 1) {
		ClearPagePrivate(page);
		set_page_private(page, 0);
		page_cache_release(page);
	}
284 285 286
	return ret;
}

287
static void btree_invalidatepage(struct page *page, unsigned long offset)
288
{
289 290 291 292
	struct extent_map_tree *tree;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
	extent_invalidatepage(tree, page, offset);
	btree_releasepage(page, GFP_NOFS);
293 294
}

295
#if 0
296
static int btree_writepage(struct page *page, struct writeback_control *wbc)
297
{
C
Chris Mason 已提交
298
	struct buffer_head *bh;
299
	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
C
Chris Mason 已提交
300 301 302 303 304 305 306 307 308 309 310 311
	struct buffer_head *head;
	if (!page_has_buffers(page)) {
		create_empty_buffers(page, root->fs_info->sb->s_blocksize,
					(1 << BH_Dirty)|(1 << BH_Uptodate));
	}
	head = page_buffers(page);
	bh = head;
	do {
		if (buffer_dirty(bh))
			csum_tree_block(root, bh, 0);
		bh = bh->b_this_page;
	} while (bh != head);
312
	return block_write_full_page(page, btree_get_block, wbc);
313
}
314
#endif
315

316 317 318
static struct address_space_operations btree_aops = {
	.readpage	= btree_readpage,
	.writepage	= btree_writepage,
319
	.writepages	= btree_writepages,
320 321
	.releasepage	= btree_releasepage,
	.invalidatepage = btree_invalidatepage,
322 323 324
	.sync_page	= block_sync_page,
};

325
int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
C
Chris Mason 已提交
326
{
327 328
	struct extent_buffer *buf = NULL;
	struct inode *btree_inode = root->fs_info->btree_inode;
329
	int ret = 0;
C
Chris Mason 已提交
330

331
	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
332
	if (!buf)
C
Chris Mason 已提交
333
		return 0;
334
	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
335
				 buf, 0, 0);
336
	free_extent_buffer(buf);
337
	return ret;
C
Chris Mason 已提交
338 339
}

340 341
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
				      u32 blocksize)
342
{
343 344
	struct extent_buffer *buf = NULL;
	struct inode *btree_inode = root->fs_info->btree_inode;
345
	struct extent_map_tree *extent_tree;
346
	u64 end;
347 348 349
	int ret;

	extent_tree = &BTRFS_I(btree_inode)->extent_tree;
350

351
	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
352 353 354
	if (!buf)
		return NULL;
	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
355
				 buf, 0, 1);
356 357

	if (buf->flags & EXTENT_CSUM)
358
		return buf;
359 360 361

	end = buf->start + PAGE_CACHE_SIZE - 1;
	if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
362 363 364
		buf->flags |= EXTENT_CSUM;
		return buf;
	}
365 366 367 368 369 370 371 372

	lock_extent(extent_tree, buf->start, end, GFP_NOFS);

	if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
		buf->flags |= EXTENT_CSUM;
		goto out_unlock;
	}

373
	ret = csum_tree_block(root, buf, 1);
374
	set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS);
375
	buf->flags |= EXTENT_CSUM;
376 377 378

out_unlock:
	unlock_extent(extent_tree, buf->start, end, GFP_NOFS);
379
	return buf;
380 381
}

382
int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
383
		     struct extent_buffer *buf)
384
{
385
	struct inode *btree_inode = root->fs_info->btree_inode;
386 387 388 389
	if (btrfs_header_generation(buf) ==
	    root->fs_info->running_transaction->transid)
		clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree,
					  buf);
390 391 392 393 394 395 396 397 398 399 400 401
	return 0;
}

int wait_on_tree_block_writeback(struct btrfs_root *root,
				 struct extent_buffer *buf)
{
	struct inode *btree_inode = root->fs_info->btree_inode;
	wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree,
					buf);
	return 0;
}

402
static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
403
			u32 stripesize, struct btrfs_root *root,
404
			struct btrfs_fs_info *fs_info,
C
Chris Mason 已提交
405
			u64 objectid)
406
{
C
Chris Mason 已提交
407
	root->node = NULL;
408
	root->inode = NULL;
409
	root->commit_root = NULL;
410 411 412
	root->sectorsize = sectorsize;
	root->nodesize = nodesize;
	root->leafsize = leafsize;
413
	root->stripesize = stripesize;
C
Chris Mason 已提交
414
	root->ref_cows = 0;
415
	root->fs_info = fs_info;
416 417
	root->objectid = objectid;
	root->last_trans = 0;
C
Chris Mason 已提交
418 419
	root->highest_inode = 0;
	root->last_inode_alloc = 0;
420
	root->name = NULL;
421
	root->in_sysfs = 0;
422 423
	memset(&root->root_key, 0, sizeof(root->root_key));
	memset(&root->root_item, 0, sizeof(root->root_item));
424
	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
425 426
	memset(&root->root_kobj, 0, sizeof(root->root_kobj));
	init_completion(&root->kobj_unregister);
427 428
	root->defrag_running = 0;
	root->defrag_level = 0;
429
	root->root_key.objectid = objectid;
430 431 432
	return 0;
}

433
static int find_and_setup_root(struct btrfs_root *tree_root,
434 435
			       struct btrfs_fs_info *fs_info,
			       u64 objectid,
C
Chris Mason 已提交
436
			       struct btrfs_root *root)
437 438
{
	int ret;
439
	u32 blocksize;
440

441
	__setup_root(tree_root->nodesize, tree_root->leafsize,
442 443
		     tree_root->sectorsize, tree_root->stripesize,
		     root, fs_info, objectid);
444 445 446 447
	ret = btrfs_find_last_root(tree_root, objectid,
				   &root->root_item, &root->root_key);
	BUG_ON(ret);

448 449 450
	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
				     blocksize);
451
	BUG_ON(!root->node);
452 453 454
	return 0;
}

455 456
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info,
					       struct btrfs_key *location)
457 458 459 460
{
	struct btrfs_root *root;
	struct btrfs_root *tree_root = fs_info->tree_root;
	struct btrfs_path *path;
461
	struct extent_buffer *l;
C
Chris Mason 已提交
462
	u64 highest_inode;
463
	u32 blocksize;
464 465
	int ret = 0;

466
	root = kzalloc(sizeof(*root), GFP_NOFS);
C
Chris Mason 已提交
467
	if (!root)
468 469
		return ERR_PTR(-ENOMEM);
	if (location->offset == (u64)-1) {
470
		ret = find_and_setup_root(tree_root, fs_info,
471 472 473 474 475 476 477 478
					  location->objectid, root);
		if (ret) {
			kfree(root);
			return ERR_PTR(ret);
		}
		goto insert;
	}

479
	__setup_root(tree_root->nodesize, tree_root->leafsize,
480 481
		     tree_root->sectorsize, tree_root->stripesize,
		     root, fs_info, location->objectid);
482 483 484 485 486 487 488 489 490

	path = btrfs_alloc_path();
	BUG_ON(!path);
	ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
	if (ret != 0) {
		if (ret > 0)
			ret = -ENOENT;
		goto out;
	}
491 492 493
	l = path->nodes[0];
	read_extent_buffer(l, &root->root_item,
	       btrfs_item_ptr_offset(l, path->slots[0]),
494
	       sizeof(root->root_item));
495
	memcpy(&root->root_key, location, sizeof(*location));
496 497 498 499 500 501 502 503
	ret = 0;
out:
	btrfs_release_path(root, path);
	btrfs_free_path(path);
	if (ret) {
		kfree(root);
		return ERR_PTR(ret);
	}
504 505 506
	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
				     blocksize);
507 508 509
	BUG_ON(!root->node);
insert:
	root->ref_cows = 1;
510 511 512 513 514 515 516 517
	ret = btrfs_find_highest_inode(root, &highest_inode);
	if (ret == 0) {
		root->highest_inode = highest_inode;
		root->last_inode_alloc = highest_inode;
	}
	return root;
}

C
Chris Mason 已提交
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
					u64 root_objectid)
{
	struct btrfs_root *root;

	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
		return fs_info->tree_root;
	if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
		return fs_info->extent_root;

	root = radix_tree_lookup(&fs_info->fs_roots_radix,
				 (unsigned long)root_objectid);
	return root;
}

533 534
struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
					      struct btrfs_key *location)
535 536 537 538
{
	struct btrfs_root *root;
	int ret;

539 540 541 542 543
	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
		return fs_info->tree_root;
	if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
		return fs_info->extent_root;

544 545 546 547 548 549 550 551
	root = radix_tree_lookup(&fs_info->fs_roots_radix,
				 (unsigned long)location->objectid);
	if (root)
		return root;

	root = btrfs_read_fs_root_no_radix(fs_info, location);
	if (IS_ERR(root))
		return root;
C
Chris Mason 已提交
552 553
	ret = radix_tree_insert(&fs_info->fs_roots_radix,
				(unsigned long)root->root_key.objectid,
554 555
				root);
	if (ret) {
556
		free_extent_buffer(root->node);
557 558 559
		kfree(root);
		return ERR_PTR(ret);
	}
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
	ret = btrfs_find_dead_roots(fs_info->tree_root,
				    root->root_key.objectid, root);
	BUG_ON(ret);

	return root;
}

struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
				      struct btrfs_key *location,
				      const char *name, int namelen)
{
	struct btrfs_root *root;
	int ret;

	root = btrfs_read_fs_root_no_name(fs_info, location);
	if (!root)
		return NULL;
577

578 579 580
	if (root->in_sysfs)
		return root;

581 582
	ret = btrfs_set_root_name(root, name, namelen);
	if (ret) {
583
		free_extent_buffer(root->node);
584 585 586 587 588 589
		kfree(root);
		return ERR_PTR(ret);
	}

	ret = btrfs_sysfs_add_root(root);
	if (ret) {
590
		free_extent_buffer(root->node);
591 592 593 594
		kfree(root->name);
		kfree(root);
		return ERR_PTR(ret);
	}
595
	root->in_sysfs = 1;
596 597
	return root;
}
598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
#if 0
static int add_hasher(struct btrfs_fs_info *info, char *type) {
	struct btrfs_hasher *hasher;

	hasher = kmalloc(sizeof(*hasher), GFP_NOFS);
	if (!hasher)
		return -ENOMEM;
	hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC);
	if (!hasher->hash_tfm) {
		kfree(hasher);
		return -EINVAL;
	}
	spin_lock(&info->hash_lock);
	list_add(&hasher->list, &info->hashers);
	spin_unlock(&info->hash_lock);
	return 0;
}
#endif
C
Chris Mason 已提交
616
struct btrfs_root *open_ctree(struct super_block *sb)
617
{
618 619 620 621
	u32 sectorsize;
	u32 nodesize;
	u32 leafsize;
	u32 blocksize;
622
	u32 stripesize;
C
Chris Mason 已提交
623 624 625 626 627 628
	struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
						 GFP_NOFS);
	struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
					       GFP_NOFS);
	struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
						GFP_NOFS);
629
	int ret;
C
Chris Mason 已提交
630
	int err = -EIO;
C
Chris Mason 已提交
631
	struct btrfs_super_block *disk_super;
632

C
Chris Mason 已提交
633 634 635 636
	if (!extent_root || !tree_root || !fs_info) {
		err = -ENOMEM;
		goto fail;
	}
637
	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
C
Chris Mason 已提交
638
	INIT_LIST_HEAD(&fs_info->trans_list);
639
	INIT_LIST_HEAD(&fs_info->dead_roots);
640 641
	INIT_LIST_HEAD(&fs_info->hashers);
	spin_lock_init(&fs_info->hash_lock);
642
	spin_lock_init(&fs_info->delalloc_lock);
643
	spin_lock_init(&fs_info->new_trans_lock);
644

645 646
	memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj));
	init_completion(&fs_info->kobj_unregister);
C
Chris Mason 已提交
647
	sb_set_blocksize(sb, 4096);
648
	fs_info->running_transaction = NULL;
649
	fs_info->last_trans_committed = 0;
650 651
	fs_info->tree_root = tree_root;
	fs_info->extent_root = extent_root;
C
Chris Mason 已提交
652
	fs_info->sb = sb;
653
	fs_info->throttles = 0;
654
	fs_info->mount_opt = 0;
655
	fs_info->max_extent = (u64)-1;
656
	fs_info->delalloc_bytes = 0;
657 658
	fs_info->btree_inode = new_inode(sb);
	fs_info->btree_inode->i_ino = 1;
C
Chris Mason 已提交
659
	fs_info->btree_inode->i_nlink = 1;
660 661
	fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
	fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
662 663 664
	extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
			     fs_info->btree_inode->i_mapping,
			     GFP_NOFS);
665 666
	BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops;

667 668
	extent_map_tree_init(&fs_info->free_space_cache,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
669 670
	extent_map_tree_init(&fs_info->block_group_cache,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
671 672 673 674 675 676
	extent_map_tree_init(&fs_info->pinned_extents,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
	extent_map_tree_init(&fs_info->pending_del,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
	extent_map_tree_init(&fs_info->extent_ins,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
677
	fs_info->do_barriers = 1;
678
	fs_info->closing = 0;
679
	fs_info->total_pinned = 0;
680 681
	fs_info->last_alloc = 0;

682 683 684
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
	INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
#else
C
Chris Mason 已提交
685
	INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
686
#endif
687 688 689
	BTRFS_I(fs_info->btree_inode)->root = tree_root;
	memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
	       sizeof(struct btrfs_key));
C
Chris Mason 已提交
690
	insert_inode_hash(fs_info->btree_inode);
691
	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
C
Chris Mason 已提交
692

C
Chris Mason 已提交
693
	mutex_init(&fs_info->trans_mutex);
C
Chris Mason 已提交
694
	mutex_init(&fs_info->fs_mutex);
695

696 697 698 699 700 701 702 703
#if 0
	ret = add_hasher(fs_info, "crc32c");
	if (ret) {
		printk("btrfs: failed hash setup, modprobe cryptomgr?\n");
		err = -ENOMEM;
		goto fail_iput;
	}
#endif
704
	__setup_root(512, 512, 512, 512, tree_root,
C
Chris Mason 已提交
705
		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
706

C
Chris Mason 已提交
707
	fs_info->sb_buffer = read_tree_block(tree_root,
708 709
					     BTRFS_SUPER_INFO_OFFSET,
					     512);
710

711
	if (!fs_info->sb_buffer)
C
Chris Mason 已提交
712 713
		goto fail_iput;

714 715 716 717 718 719 720
	read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0,
			   sizeof(fs_info->super_copy));

	read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
			   (unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
			   BTRFS_FSID_SIZE);
	disk_super = &fs_info->super_copy;
721
	if (!btrfs_super_root(disk_super))
C
Chris Mason 已提交
722
		goto fail_sb_buffer;
723

724 725 726
	nodesize = btrfs_super_nodesize(disk_super);
	leafsize = btrfs_super_leafsize(disk_super);
	sectorsize = btrfs_super_sectorsize(disk_super);
727
	stripesize = btrfs_super_stripesize(disk_super);
728 729 730
	tree_root->nodesize = nodesize;
	tree_root->leafsize = leafsize;
	tree_root->sectorsize = sectorsize;
731
	tree_root->stripesize = stripesize;
732
	sb_set_blocksize(sb, sectorsize);
733

734
	i_size_write(fs_info->btree_inode,
735
		     btrfs_super_total_bytes(disk_super));
736

C
Chris Mason 已提交
737 738 739 740 741
	if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
		    sizeof(disk_super->magic))) {
		printk("btrfs: valid FS not found on %s\n", sb->s_id);
		goto fail_sb_buffer;
	}
742

743 744
	blocksize = btrfs_level_size(tree_root,
				     btrfs_super_root_level(disk_super));
745

C
Chris Mason 已提交
746
	tree_root->node = read_tree_block(tree_root,
747 748
					  btrfs_super_root(disk_super),
					  blocksize);
C
Chris Mason 已提交
749 750
	if (!tree_root->node)
		goto fail_sb_buffer;
751

C
Chris Mason 已提交
752
	mutex_lock(&fs_info->fs_mutex);
753 754

	ret = find_and_setup_root(tree_root, fs_info,
C
Chris Mason 已提交
755
				  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
C
Chris Mason 已提交
756 757 758 759
	if (ret) {
		mutex_unlock(&fs_info->fs_mutex);
		goto fail_tree_root;
	}
760

C
Chris Mason 已提交
761 762
	btrfs_read_block_groups(extent_root);

763
	fs_info->generation = btrfs_super_generation(disk_super) + 1;
C
Chris Mason 已提交
764
	mutex_unlock(&fs_info->fs_mutex);
765
	return tree_root;
C
Chris Mason 已提交
766 767

fail_tree_root:
768
	free_extent_buffer(tree_root->node);
C
Chris Mason 已提交
769
fail_sb_buffer:
770
	free_extent_buffer(fs_info->sb_buffer);
C
Chris Mason 已提交
771 772 773 774 775 776 777
fail_iput:
	iput(fs_info->btree_inode);
fail:
	kfree(extent_root);
	kfree(tree_root);
	kfree(fs_info);
	return ERR_PTR(err);
778 779
}

780
int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
C
Chris Mason 已提交
781
		      *root)
782
{
783
	int ret;
784 785
	struct extent_buffer *super = root->fs_info->sb_buffer;
	struct inode *btree_inode = root->fs_info->btree_inode;
786
	struct super_block *sb = root->fs_info->sb;
787

788 789
	if (!btrfs_test_opt(root, NOBARRIER))
		blkdev_issue_flush(sb->s_bdev, NULL);
790 791 792
	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super);
	ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping,
				     super->start, super->len);
793 794
	if (!btrfs_test_opt(root, NOBARRIER))
		blkdev_issue_flush(sb->s_bdev, NULL);
795
	return ret;
C
Chris Mason 已提交
796 797
}

798
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
C
Chris Mason 已提交
799 800 801
{
	radix_tree_delete(&fs_info->fs_roots_radix,
			  (unsigned long)root->root_key.objectid);
802 803
	if (root->in_sysfs)
		btrfs_sysfs_del_root(root);
C
Chris Mason 已提交
804 805 806
	if (root->inode)
		iput(root->inode);
	if (root->node)
807
		free_extent_buffer(root->node);
C
Chris Mason 已提交
808
	if (root->commit_root)
809
		free_extent_buffer(root->commit_root);
810 811
	if (root->name)
		kfree(root->name);
C
Chris Mason 已提交
812 813 814 815
	kfree(root);
	return 0;
}

C
Chris Mason 已提交
816
static int del_fs_roots(struct btrfs_fs_info *fs_info)
817 818 819 820 821 822 823 824 825 826 827
{
	int ret;
	struct btrfs_root *gang[8];
	int i;

	while(1) {
		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
					     (void **)gang, 0,
					     ARRAY_SIZE(gang));
		if (!ret)
			break;
C
Chris Mason 已提交
828
		for (i = 0; i < ret; i++)
829
			btrfs_free_fs_root(fs_info, gang[i]);
830 831 832
	}
	return 0;
}
833

C
Chris Mason 已提交
834
int close_ctree(struct btrfs_root *root)
C
Chris Mason 已提交
835
{
836
	int ret;
837
	struct btrfs_trans_handle *trans;
838
	struct btrfs_fs_info *fs_info = root->fs_info;
839

840
	fs_info->closing = 1;
C
Chris Mason 已提交
841
	btrfs_transaction_flush_work(root);
842
	mutex_lock(&fs_info->fs_mutex);
843
	btrfs_defrag_dirty_roots(root->fs_info);
C
Chris Mason 已提交
844
	trans = btrfs_start_transaction(root, 1);
845
	ret = btrfs_commit_transaction(trans, root);
C
Chris Mason 已提交
846 847 848 849
	/* run commit again to  drop the original snapshot */
	trans = btrfs_start_transaction(root, 1);
	btrfs_commit_transaction(trans, root);
	ret = btrfs_write_and_wait_transaction(NULL, root);
850
	BUG_ON(ret);
C
Chris Mason 已提交
851
	write_ctree_super(NULL, root);
852 853 854
	mutex_unlock(&fs_info->fs_mutex);

	if (fs_info->extent_root->node)
855
		free_extent_buffer(fs_info->extent_root->node);
856

857
	if (fs_info->tree_root->node)
858
		free_extent_buffer(fs_info->tree_root->node);
859

860
	free_extent_buffer(fs_info->sb_buffer);
861

C
Chris Mason 已提交
862
	btrfs_free_block_groups(root->fs_info);
863
	del_fs_roots(fs_info);
864 865 866 867 868 869 870 871

	filemap_write_and_wait(fs_info->btree_inode->i_mapping);

	extent_map_tree_empty_lru(&fs_info->free_space_cache);
	extent_map_tree_empty_lru(&fs_info->block_group_cache);
	extent_map_tree_empty_lru(&fs_info->pinned_extents);
	extent_map_tree_empty_lru(&fs_info->pending_del);
	extent_map_tree_empty_lru(&fs_info->extent_ins);
872
	extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree);
873

874
	truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
875

876
	iput(fs_info->btree_inode);
877 878 879 880 881 882 883 884 885 886
#if 0
	while(!list_empty(&fs_info->hashers)) {
		struct btrfs_hasher *hasher;
		hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
				    hashers);
		list_del(&hasher->hashers);
		crypto_free_hash(&fs_info->hash_tfm);
		kfree(hasher);
	}
#endif
887 888
	kfree(fs_info->extent_root);
	kfree(fs_info->tree_root);
889 890 891
	return 0;
}

892 893
int btrfs_buffer_uptodate(struct extent_buffer *buf)
{
894
	struct inode *btree_inode = buf->first_page->mapping->host;
895 896 897 898
	return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf);
}

int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
C
Chris Mason 已提交
899
{
900
	struct inode *btree_inode = buf->first_page->mapping->host;
901 902 903
	return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree,
					  buf);
}
904

905 906
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
907
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
908 909
	u64 transid = btrfs_header_generation(buf);
	struct inode *btree_inode = root->fs_info->btree_inode;
910

C
Chris Mason 已提交
911 912
	if (transid != root->fs_info->generation) {
		printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
913
			(unsigned long long)buf->start,
C
Chris Mason 已提交
914 915 916
			transid, root->fs_info->generation);
		WARN_ON(1);
	}
917
	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf);
918 919
}

920 921
void btrfs_throttle(struct btrfs_root *root)
{
922 923 924
	struct backing_dev_info *bdi;

	bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
925 926
	if (root->fs_info->throttles && bdi_write_congested(bdi)) {
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
927
		congestion_wait(WRITE, HZ/20);
928 929 930 931
#else
		blk_congestion_wait(WRITE, HZ/20);
#endif
	}
932 933
}

934
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
C
Chris Mason 已提交
935
{
936
	balance_dirty_pages_ratelimited_nr(
937
			root->fs_info->btree_inode->i_mapping, 1);
C
Chris Mason 已提交
938
}
939 940 941

void btrfs_set_buffer_defrag(struct extent_buffer *buf)
{
942
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
943 944 945 946 947 948 949
	struct inode *btree_inode = root->fs_info->btree_inode;
	set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
			buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
}

void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
{
950
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
951 952 953 954 955 956 957 958
	struct inode *btree_inode = root->fs_info->btree_inode;
	set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
			buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
			GFP_NOFS);
}

int btrfs_buffer_defrag(struct extent_buffer *buf)
{
959
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
960 961 962 963 964 965 966
	struct inode *btree_inode = root->fs_info->btree_inode;
	return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
}

int btrfs_buffer_defrag_done(struct extent_buffer *buf)
{
967
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
968 969 970 971 972 973 974 975
	struct inode *btree_inode = root->fs_info->btree_inode;
	return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1,
		     EXTENT_DEFRAG_DONE, 0);
}

int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
{
976
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
977 978 979 980 981 982 983 984
	struct inode *btree_inode = root->fs_info->btree_inode;
	return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1,
		     EXTENT_DEFRAG_DONE, GFP_NOFS);
}

int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
{
985
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
986 987 988 989 990 991 992 993
	struct inode *btree_inode = root->fs_info->btree_inode;
	return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1,
		     EXTENT_DEFRAG, GFP_NOFS);
}

int btrfs_read_buffer(struct extent_buffer *buf)
{
994
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
995 996
	struct inode *btree_inode = root->fs_info->btree_inode;
	return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
997
					buf, 0, 1);
998
}
999 1000 1001 1002

static struct extent_map_ops btree_extent_map_ops = {
	.writepage_io_hook = btree_writepage_io_hook,
};