disk-io.c 27.1 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

C
Chris Mason 已提交
19
#include <linux/fs.h>
20
#include <linux/blkdev.h>
21
#include <linux/crc32c.h>
C
Chris Mason 已提交
22
#include <linux/scatterlist.h>
C
Chris Mason 已提交
23
#include <linux/swap.h>
24
#include <linux/radix-tree.h>
C
Chris Mason 已提交
25
#include <linux/writeback.h>
26
#include <linux/buffer_head.h> // for block_sync_page
27 28
#include "ctree.h"
#include "disk-io.h"
29
#include "transaction.h"
30
#include "btrfs_inode.h"
31
#include "print-tree.h"
32

33 34
#if 0
static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
35
{
36 37 38 39
	if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) {
		printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n",
		       (unsigned long long)extent_buffer_blocknr(buf),
		       (unsigned long long)btrfs_header_blocknr(buf));
C
Chris Mason 已提交
40
		return 1;
41
	}
42
	return 0;
43
}
44
#endif
45

46 47
static struct extent_map_ops btree_extent_map_ops;

48
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
49
					    u64 bytenr, u32 blocksize)
50
{
51
	struct inode *btree_inode = root->fs_info->btree_inode;
52 53
	struct extent_buffer *eb;
	eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
54
				bytenr, blocksize, GFP_NOFS);
55
	return eb;
56
}
57

58
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
59
						 u64 bytenr, u32 blocksize)
60 61
{
	struct inode *btree_inode = root->fs_info->btree_inode;
62
	struct extent_buffer *eb;
63

64
	eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
65
				 bytenr, blocksize, NULL, GFP_NOFS);
66
	return eb;
67 68
}

69 70 71
struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
				    size_t page_offset, u64 start, u64 end,
				    int create)
72
{
73 74 75 76 77 78 79 80
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
	struct extent_map *em;
	int ret;

again:
	em = lookup_extent_mapping(em_tree, start, end);
	if (em) {
		goto out;
81
	}
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
	em = alloc_extent_map(GFP_NOFS);
	if (!em) {
		em = ERR_PTR(-ENOMEM);
		goto out;
	}
	em->start = 0;
	em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1;
	em->block_start = 0;
	em->block_end = em->end;
	em->bdev = inode->i_sb->s_bdev;
	ret = add_extent_mapping(em_tree, em);
	if (ret == -EEXIST) {
		free_extent_map(em);
		em = NULL;
		goto again;
	} else if (ret) {
		em = ERR_PTR(ret);
	}
out:
	return em;
102 103
}

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
{
	return crc32c(seed, data, len);
}

void btrfs_csum_final(u32 crc, char *result)
{
	*(__le32 *)result = ~cpu_to_le32(crc);
}

static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
			   int verify)
{
	char result[BTRFS_CRC32_SIZE];
	unsigned long len;
	unsigned long cur_len;
	unsigned long offset = BTRFS_CSUM_SIZE;
	char *map_token = NULL;
	char *kaddr;
	unsigned long map_start;
	unsigned long map_len;
	int err;
	u32 crc = ~(u32)0;

	len = buf->len - offset;
	while(len > 0) {
		err = map_private_extent_buffer(buf, offset, 32,
					&map_token, &kaddr,
					&map_start, &map_len, KM_USER0);
		if (err) {
			printk("failed to map extent buffer! %lu\n",
			       offset);
			return 1;
		}
		cur_len = min(len, map_len - (offset - map_start));
		crc = btrfs_csum_data(root, kaddr + offset - map_start,
				      crc, cur_len);
		len -= cur_len;
		offset += cur_len;
		unmap_extent_buffer(buf, map_token, KM_USER0);
	}
	btrfs_csum_final(crc, result);

	if (verify) {
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
		int from_this_trans = 0;

		if (root->fs_info->running_transaction &&
		    btrfs_header_generation(buf) ==
		    root->fs_info->running_transaction->transid)
			from_this_trans = 1;

		/* FIXME, this is not good */
		if (from_this_trans == 0 &&
		    memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
			u32 val;
			u32 found = 0;
			memcpy(&found, result, BTRFS_CRC32_SIZE);

			read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE);
			printk("btrfs: %s checksum verify failed on %llu "
			       "wanted %X found %X from_this_trans %d\n",
165
			       root->fs_info->sb->s_id,
166
			       buf->start, val, found, from_this_trans);
167 168 169 170 171 172 173 174 175 176 177 178
			return 1;
		}
	} else {
		write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
	}
	return 0;
}


int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
{
	struct extent_map_tree *tree;
179
	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
	u64 found_start;
	int found_level;
	unsigned long len;
	struct extent_buffer *eb;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;

	if (page->private == EXTENT_PAGE_PRIVATE)
		goto out;
	if (!page->private)
		goto out;
	len = page->private >> 2;
	if (len == 0) {
		WARN_ON(1);
	}
	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
	read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1);
	found_start = btrfs_header_bytenr(eb);
	if (found_start != start) {
		printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
		       start, found_start, len);
200 201 202 203 204 205 206 207 208 209 210 211 212
		WARN_ON(1);
		goto err;
	}
	if (eb->first_page != page) {
		printk("bad first page %lu %lu\n", eb->first_page->index,
		       page->index);
		WARN_ON(1);
		goto err;
	}
	if (!PageUptodate(page)) {
		printk("csum not up to date page %lu\n", page->index);
		WARN_ON(1);
		goto err;
213 214 215
	}
	found_level = btrfs_header_level(eb);
	csum_tree_block(root, eb, 0);
216
err:
217 218 219 220 221
	free_extent_buffer(eb);
out:
	return 0;
}

222
static int btree_writepage_io_hook(struct page *page, u64 start, u64 end)
223
{
224 225 226
	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;

	csum_dirty_buffer(root, page);
227 228 229 230 231 232 233
	return 0;
}

static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
	struct extent_map_tree *tree;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
234 235
	return extent_write_full_page(tree, page, btree_get_extent, wbc);
}
236 237 238 239 240 241

static int btree_writepages(struct address_space *mapping,
			    struct writeback_control *wbc)
{
	struct extent_map_tree *tree;
	tree = &BTRFS_I(mapping->host)->extent_tree;
242
	if (wbc->sync_mode == WB_SYNC_NONE) {
243 244 245
		u64 num_dirty;
		u64 start = 0;
		unsigned long thresh = 96 * 1024 * 1024;
246 247 248 249

		if (wbc->for_kupdate)
			return 0;

250 251 252 253 254
		if (current_is_pdflush()) {
			thresh = 96 * 1024 * 1024;
		} else {
			thresh = 8 * 1024 * 1024;
		}
255 256
		num_dirty = count_range_bits(tree, &start, (u64)-1,
					     thresh, EXTENT_DIRTY);
257 258 259 260
		if (num_dirty < thresh) {
			return 0;
		}
	}
261 262 263
	return extent_writepages(tree, mapping, btree_get_extent, wbc);
}

264 265 266 267 268 269
int btree_readpage(struct file *file, struct page *page)
{
	struct extent_map_tree *tree;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
	return extent_read_full_page(tree, page, btree_get_extent);
}
C
Chris Mason 已提交
270

271 272 273 274
static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)
{
	struct extent_map_tree *tree;
	int ret;
275

276 277 278 279 280 281 282
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
	ret = try_release_extent_mapping(tree, page);
	if (ret == 1) {
		ClearPagePrivate(page);
		set_page_private(page, 0);
		page_cache_release(page);
	}
283 284 285
	return ret;
}

286
static void btree_invalidatepage(struct page *page, unsigned long offset)
287
{
288 289 290 291
	struct extent_map_tree *tree;
	tree = &BTRFS_I(page->mapping->host)->extent_tree;
	extent_invalidatepage(tree, page, offset);
	btree_releasepage(page, GFP_NOFS);
292 293
}

294
#if 0
295
static int btree_writepage(struct page *page, struct writeback_control *wbc)
296
{
C
Chris Mason 已提交
297
	struct buffer_head *bh;
298
	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
C
Chris Mason 已提交
299 300 301 302 303 304 305 306 307 308 309 310
	struct buffer_head *head;
	if (!page_has_buffers(page)) {
		create_empty_buffers(page, root->fs_info->sb->s_blocksize,
					(1 << BH_Dirty)|(1 << BH_Uptodate));
	}
	head = page_buffers(page);
	bh = head;
	do {
		if (buffer_dirty(bh))
			csum_tree_block(root, bh, 0);
		bh = bh->b_this_page;
	} while (bh != head);
311
	return block_write_full_page(page, btree_get_block, wbc);
312
}
313
#endif
314

315 316 317
static struct address_space_operations btree_aops = {
	.readpage	= btree_readpage,
	.writepage	= btree_writepage,
318
	.writepages	= btree_writepages,
319 320
	.releasepage	= btree_releasepage,
	.invalidatepage = btree_invalidatepage,
321 322 323
	.sync_page	= block_sync_page,
};

324
int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
C
Chris Mason 已提交
325
{
326 327
	struct extent_buffer *buf = NULL;
	struct inode *btree_inode = root->fs_info->btree_inode;
328
	int ret = 0;
C
Chris Mason 已提交
329

330
	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
331
	if (!buf)
C
Chris Mason 已提交
332
		return 0;
333
	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
334
				 buf, 0, 0);
335
	free_extent_buffer(buf);
336
	return ret;
C
Chris Mason 已提交
337 338
}

339 340
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
				      u32 blocksize)
341
{
342 343
	struct extent_buffer *buf = NULL;
	struct inode *btree_inode = root->fs_info->btree_inode;
344
	struct extent_map_tree *extent_tree;
345
	u64 end;
346 347 348
	int ret;

	extent_tree = &BTRFS_I(btree_inode)->extent_tree;
349

350
	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
351 352 353
	if (!buf)
		return NULL;
	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
354
				 buf, 0, 1);
355 356

	if (buf->flags & EXTENT_CSUM)
357
		return buf;
358 359 360

	end = buf->start + PAGE_CACHE_SIZE - 1;
	if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
361 362 363
		buf->flags |= EXTENT_CSUM;
		return buf;
	}
364 365 366 367 368 369 370 371

	lock_extent(extent_tree, buf->start, end, GFP_NOFS);

	if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
		buf->flags |= EXTENT_CSUM;
		goto out_unlock;
	}

372
	ret = csum_tree_block(root, buf, 1);
373
	set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS);
374
	buf->flags |= EXTENT_CSUM;
375 376 377

out_unlock:
	unlock_extent(extent_tree, buf->start, end, GFP_NOFS);
378
	return buf;
379 380
}

381
int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
382
		     struct extent_buffer *buf)
383
{
384
	struct inode *btree_inode = root->fs_info->btree_inode;
385 386 387 388
	if (btrfs_header_generation(buf) ==
	    root->fs_info->running_transaction->transid)
		clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree,
					  buf);
389 390 391 392 393 394 395 396 397 398 399 400
	return 0;
}

int wait_on_tree_block_writeback(struct btrfs_root *root,
				 struct extent_buffer *buf)
{
	struct inode *btree_inode = root->fs_info->btree_inode;
	wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree,
					buf);
	return 0;
}

401
static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
402
			u32 stripesize, struct btrfs_root *root,
403
			struct btrfs_fs_info *fs_info,
C
Chris Mason 已提交
404
			u64 objectid)
405
{
C
Chris Mason 已提交
406
	root->node = NULL;
407
	root->inode = NULL;
408
	root->commit_root = NULL;
409 410 411
	root->sectorsize = sectorsize;
	root->nodesize = nodesize;
	root->leafsize = leafsize;
412
	root->stripesize = stripesize;
C
Chris Mason 已提交
413
	root->ref_cows = 0;
414
	root->fs_info = fs_info;
415 416
	root->objectid = objectid;
	root->last_trans = 0;
C
Chris Mason 已提交
417 418
	root->highest_inode = 0;
	root->last_inode_alloc = 0;
419
	root->name = NULL;
420
	root->in_sysfs = 0;
421 422
	memset(&root->root_key, 0, sizeof(root->root_key));
	memset(&root->root_item, 0, sizeof(root->root_item));
423
	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
424 425
	memset(&root->root_kobj, 0, sizeof(root->root_kobj));
	init_completion(&root->kobj_unregister);
426 427
	root->defrag_running = 0;
	root->defrag_level = 0;
428
	root->root_key.objectid = objectid;
429 430 431
	return 0;
}

432
static int find_and_setup_root(struct btrfs_root *tree_root,
433 434
			       struct btrfs_fs_info *fs_info,
			       u64 objectid,
C
Chris Mason 已提交
435
			       struct btrfs_root *root)
436 437
{
	int ret;
438
	u32 blocksize;
439

440
	__setup_root(tree_root->nodesize, tree_root->leafsize,
441 442
		     tree_root->sectorsize, tree_root->stripesize,
		     root, fs_info, objectid);
443 444 445 446
	ret = btrfs_find_last_root(tree_root, objectid,
				   &root->root_item, &root->root_key);
	BUG_ON(ret);

447 448 449
	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
				     blocksize);
450
	BUG_ON(!root->node);
451 452 453
	return 0;
}

454 455
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info,
					       struct btrfs_key *location)
456 457 458 459
{
	struct btrfs_root *root;
	struct btrfs_root *tree_root = fs_info->tree_root;
	struct btrfs_path *path;
460
	struct extent_buffer *l;
C
Chris Mason 已提交
461
	u64 highest_inode;
462
	u32 blocksize;
463 464
	int ret = 0;

465
	root = kzalloc(sizeof(*root), GFP_NOFS);
C
Chris Mason 已提交
466
	if (!root)
467 468
		return ERR_PTR(-ENOMEM);
	if (location->offset == (u64)-1) {
469
		ret = find_and_setup_root(tree_root, fs_info,
470 471 472 473 474 475 476 477
					  location->objectid, root);
		if (ret) {
			kfree(root);
			return ERR_PTR(ret);
		}
		goto insert;
	}

478
	__setup_root(tree_root->nodesize, tree_root->leafsize,
479 480
		     tree_root->sectorsize, tree_root->stripesize,
		     root, fs_info, location->objectid);
481 482 483 484 485 486 487 488 489

	path = btrfs_alloc_path();
	BUG_ON(!path);
	ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
	if (ret != 0) {
		if (ret > 0)
			ret = -ENOENT;
		goto out;
	}
490 491 492
	l = path->nodes[0];
	read_extent_buffer(l, &root->root_item,
	       btrfs_item_ptr_offset(l, path->slots[0]),
493
	       sizeof(root->root_item));
494
	memcpy(&root->root_key, location, sizeof(*location));
495 496 497 498 499 500 501 502
	ret = 0;
out:
	btrfs_release_path(root, path);
	btrfs_free_path(path);
	if (ret) {
		kfree(root);
		return ERR_PTR(ret);
	}
503 504 505
	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
				     blocksize);
506 507 508
	BUG_ON(!root->node);
insert:
	root->ref_cows = 1;
509 510 511 512 513 514 515 516
	ret = btrfs_find_highest_inode(root, &highest_inode);
	if (ret == 0) {
		root->highest_inode = highest_inode;
		root->last_inode_alloc = highest_inode;
	}
	return root;
}

C
Chris Mason 已提交
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
					u64 root_objectid)
{
	struct btrfs_root *root;

	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
		return fs_info->tree_root;
	if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
		return fs_info->extent_root;

	root = radix_tree_lookup(&fs_info->fs_roots_radix,
				 (unsigned long)root_objectid);
	return root;
}

532 533
struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
					      struct btrfs_key *location)
534 535 536 537
{
	struct btrfs_root *root;
	int ret;

538 539 540 541 542
	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
		return fs_info->tree_root;
	if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
		return fs_info->extent_root;

543 544 545 546 547 548 549 550
	root = radix_tree_lookup(&fs_info->fs_roots_radix,
				 (unsigned long)location->objectid);
	if (root)
		return root;

	root = btrfs_read_fs_root_no_radix(fs_info, location);
	if (IS_ERR(root))
		return root;
C
Chris Mason 已提交
551 552
	ret = radix_tree_insert(&fs_info->fs_roots_radix,
				(unsigned long)root->root_key.objectid,
553 554
				root);
	if (ret) {
555
		free_extent_buffer(root->node);
556 557 558
		kfree(root);
		return ERR_PTR(ret);
	}
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
	ret = btrfs_find_dead_roots(fs_info->tree_root,
				    root->root_key.objectid, root);
	BUG_ON(ret);

	return root;
}

struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
				      struct btrfs_key *location,
				      const char *name, int namelen)
{
	struct btrfs_root *root;
	int ret;

	root = btrfs_read_fs_root_no_name(fs_info, location);
	if (!root)
		return NULL;
576

577 578 579
	if (root->in_sysfs)
		return root;

580 581
	ret = btrfs_set_root_name(root, name, namelen);
	if (ret) {
582
		free_extent_buffer(root->node);
583 584 585 586 587 588
		kfree(root);
		return ERR_PTR(ret);
	}

	ret = btrfs_sysfs_add_root(root);
	if (ret) {
589
		free_extent_buffer(root->node);
590 591 592 593
		kfree(root->name);
		kfree(root);
		return ERR_PTR(ret);
	}
594
	root->in_sysfs = 1;
595 596
	return root;
}
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
#if 0
static int add_hasher(struct btrfs_fs_info *info, char *type) {
	struct btrfs_hasher *hasher;

	hasher = kmalloc(sizeof(*hasher), GFP_NOFS);
	if (!hasher)
		return -ENOMEM;
	hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC);
	if (!hasher->hash_tfm) {
		kfree(hasher);
		return -EINVAL;
	}
	spin_lock(&info->hash_lock);
	list_add(&hasher->list, &info->hashers);
	spin_unlock(&info->hash_lock);
	return 0;
}
#endif
C
Chris Mason 已提交
615
struct btrfs_root *open_ctree(struct super_block *sb)
616
{
617 618 619 620
	u32 sectorsize;
	u32 nodesize;
	u32 leafsize;
	u32 blocksize;
621
	u32 stripesize;
C
Chris Mason 已提交
622 623 624 625 626 627
	struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
						 GFP_NOFS);
	struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
					       GFP_NOFS);
	struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
						GFP_NOFS);
628
	int ret;
C
Chris Mason 已提交
629
	int err = -EIO;
C
Chris Mason 已提交
630
	struct btrfs_super_block *disk_super;
631

C
Chris Mason 已提交
632 633 634 635
	if (!extent_root || !tree_root || !fs_info) {
		err = -ENOMEM;
		goto fail;
	}
636
	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
C
Chris Mason 已提交
637
	INIT_LIST_HEAD(&fs_info->trans_list);
638
	INIT_LIST_HEAD(&fs_info->dead_roots);
639 640
	INIT_LIST_HEAD(&fs_info->hashers);
	spin_lock_init(&fs_info->hash_lock);
641
	spin_lock_init(&fs_info->delalloc_lock);
642
	spin_lock_init(&fs_info->new_trans_lock);
643

644 645
	memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj));
	init_completion(&fs_info->kobj_unregister);
C
Chris Mason 已提交
646
	sb_set_blocksize(sb, 4096);
647
	fs_info->running_transaction = NULL;
648
	fs_info->last_trans_committed = 0;
649 650
	fs_info->tree_root = tree_root;
	fs_info->extent_root = extent_root;
C
Chris Mason 已提交
651
	fs_info->sb = sb;
652
	fs_info->throttles = 0;
653
	fs_info->mount_opt = 0;
654
	fs_info->max_extent = (u64)-1;
655
	fs_info->delalloc_bytes = 0;
656 657
	fs_info->btree_inode = new_inode(sb);
	fs_info->btree_inode->i_ino = 1;
C
Chris Mason 已提交
658
	fs_info->btree_inode->i_nlink = 1;
659 660
	fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
	fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
661 662 663
	extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
			     fs_info->btree_inode->i_mapping,
			     GFP_NOFS);
664 665
	BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops;

666 667
	extent_map_tree_init(&fs_info->free_space_cache,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
668 669
	extent_map_tree_init(&fs_info->block_group_cache,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
670 671 672 673 674 675
	extent_map_tree_init(&fs_info->pinned_extents,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
	extent_map_tree_init(&fs_info->pending_del,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
	extent_map_tree_init(&fs_info->extent_ins,
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
676
	fs_info->do_barriers = 1;
677
	fs_info->closing = 0;
678
	fs_info->total_pinned = 0;
679 680 681
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
	INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
#else
C
Chris Mason 已提交
682
	INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
683
#endif
684 685 686
	BTRFS_I(fs_info->btree_inode)->root = tree_root;
	memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
	       sizeof(struct btrfs_key));
C
Chris Mason 已提交
687
	insert_inode_hash(fs_info->btree_inode);
688
	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
C
Chris Mason 已提交
689

C
Chris Mason 已提交
690
	mutex_init(&fs_info->trans_mutex);
C
Chris Mason 已提交
691
	mutex_init(&fs_info->fs_mutex);
692

693 694 695 696 697 698 699 700
#if 0
	ret = add_hasher(fs_info, "crc32c");
	if (ret) {
		printk("btrfs: failed hash setup, modprobe cryptomgr?\n");
		err = -ENOMEM;
		goto fail_iput;
	}
#endif
701
	__setup_root(512, 512, 512, 512, tree_root,
C
Chris Mason 已提交
702
		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
703

C
Chris Mason 已提交
704
	fs_info->sb_buffer = read_tree_block(tree_root,
705 706
					     BTRFS_SUPER_INFO_OFFSET,
					     512);
707

708
	if (!fs_info->sb_buffer)
C
Chris Mason 已提交
709 710
		goto fail_iput;

711 712 713 714 715 716 717
	read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0,
			   sizeof(fs_info->super_copy));

	read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
			   (unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
			   BTRFS_FSID_SIZE);
	disk_super = &fs_info->super_copy;
718
	if (!btrfs_super_root(disk_super))
C
Chris Mason 已提交
719
		goto fail_sb_buffer;
720

721 722 723
	nodesize = btrfs_super_nodesize(disk_super);
	leafsize = btrfs_super_leafsize(disk_super);
	sectorsize = btrfs_super_sectorsize(disk_super);
724
	stripesize = btrfs_super_stripesize(disk_super);
725 726 727
	tree_root->nodesize = nodesize;
	tree_root->leafsize = leafsize;
	tree_root->sectorsize = sectorsize;
728
	tree_root->stripesize = stripesize;
729
	sb_set_blocksize(sb, sectorsize);
730

731
	i_size_write(fs_info->btree_inode,
732
		     btrfs_super_total_bytes(disk_super));
733

C
Chris Mason 已提交
734 735 736 737 738
	if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
		    sizeof(disk_super->magic))) {
		printk("btrfs: valid FS not found on %s\n", sb->s_id);
		goto fail_sb_buffer;
	}
739

740 741
	blocksize = btrfs_level_size(tree_root,
				     btrfs_super_root_level(disk_super));
742

C
Chris Mason 已提交
743
	tree_root->node = read_tree_block(tree_root,
744 745
					  btrfs_super_root(disk_super),
					  blocksize);
C
Chris Mason 已提交
746 747
	if (!tree_root->node)
		goto fail_sb_buffer;
748

C
Chris Mason 已提交
749
	mutex_lock(&fs_info->fs_mutex);
750 751

	ret = find_and_setup_root(tree_root, fs_info,
C
Chris Mason 已提交
752
				  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
C
Chris Mason 已提交
753 754 755 756
	if (ret) {
		mutex_unlock(&fs_info->fs_mutex);
		goto fail_tree_root;
	}
757

C
Chris Mason 已提交
758 759
	btrfs_read_block_groups(extent_root);

760
	fs_info->generation = btrfs_super_generation(disk_super) + 1;
C
Chris Mason 已提交
761
	mutex_unlock(&fs_info->fs_mutex);
762
	return tree_root;
C
Chris Mason 已提交
763 764

fail_tree_root:
765
	free_extent_buffer(tree_root->node);
C
Chris Mason 已提交
766
fail_sb_buffer:
767
	free_extent_buffer(fs_info->sb_buffer);
C
Chris Mason 已提交
768 769 770 771 772 773 774
fail_iput:
	iput(fs_info->btree_inode);
fail:
	kfree(extent_root);
	kfree(tree_root);
	kfree(fs_info);
	return ERR_PTR(err);
775 776
}

777
int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
C
Chris Mason 已提交
778
		      *root)
779
{
780
	int ret;
781 782
	struct extent_buffer *super = root->fs_info->sb_buffer;
	struct inode *btree_inode = root->fs_info->btree_inode;
783
	struct super_block *sb = root->fs_info->sb;
784

785 786
	if (!btrfs_test_opt(root, NOBARRIER))
		blkdev_issue_flush(sb->s_bdev, NULL);
787 788 789
	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super);
	ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping,
				     super->start, super->len);
790 791
	if (!btrfs_test_opt(root, NOBARRIER))
		blkdev_issue_flush(sb->s_bdev, NULL);
792
	return ret;
C
Chris Mason 已提交
793 794
}

795
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
C
Chris Mason 已提交
796 797 798
{
	radix_tree_delete(&fs_info->fs_roots_radix,
			  (unsigned long)root->root_key.objectid);
799 800
	if (root->in_sysfs)
		btrfs_sysfs_del_root(root);
C
Chris Mason 已提交
801 802 803
	if (root->inode)
		iput(root->inode);
	if (root->node)
804
		free_extent_buffer(root->node);
C
Chris Mason 已提交
805
	if (root->commit_root)
806
		free_extent_buffer(root->commit_root);
807 808
	if (root->name)
		kfree(root->name);
C
Chris Mason 已提交
809 810 811 812
	kfree(root);
	return 0;
}

C
Chris Mason 已提交
813
static int del_fs_roots(struct btrfs_fs_info *fs_info)
814 815 816 817 818 819 820 821 822 823 824
{
	int ret;
	struct btrfs_root *gang[8];
	int i;

	while(1) {
		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
					     (void **)gang, 0,
					     ARRAY_SIZE(gang));
		if (!ret)
			break;
C
Chris Mason 已提交
825
		for (i = 0; i < ret; i++)
826
			btrfs_free_fs_root(fs_info, gang[i]);
827 828 829
	}
	return 0;
}
830

C
Chris Mason 已提交
831
int close_ctree(struct btrfs_root *root)
C
Chris Mason 已提交
832
{
833
	int ret;
834
	struct btrfs_trans_handle *trans;
835
	struct btrfs_fs_info *fs_info = root->fs_info;
836

837
	fs_info->closing = 1;
C
Chris Mason 已提交
838
	btrfs_transaction_flush_work(root);
839
	mutex_lock(&fs_info->fs_mutex);
840
	btrfs_defrag_dirty_roots(root->fs_info);
C
Chris Mason 已提交
841
	trans = btrfs_start_transaction(root, 1);
842
	ret = btrfs_commit_transaction(trans, root);
C
Chris Mason 已提交
843 844 845 846
	/* run commit again to  drop the original snapshot */
	trans = btrfs_start_transaction(root, 1);
	btrfs_commit_transaction(trans, root);
	ret = btrfs_write_and_wait_transaction(NULL, root);
847
	BUG_ON(ret);
C
Chris Mason 已提交
848
	write_ctree_super(NULL, root);
849 850 851
	mutex_unlock(&fs_info->fs_mutex);

	if (fs_info->extent_root->node)
852
		free_extent_buffer(fs_info->extent_root->node);
853

854
	if (fs_info->tree_root->node)
855
		free_extent_buffer(fs_info->tree_root->node);
856

857
	free_extent_buffer(fs_info->sb_buffer);
858

C
Chris Mason 已提交
859
	btrfs_free_block_groups(root->fs_info);
860
	del_fs_roots(fs_info);
861 862 863 864 865 866 867 868

	filemap_write_and_wait(fs_info->btree_inode->i_mapping);

	extent_map_tree_empty_lru(&fs_info->free_space_cache);
	extent_map_tree_empty_lru(&fs_info->block_group_cache);
	extent_map_tree_empty_lru(&fs_info->pinned_extents);
	extent_map_tree_empty_lru(&fs_info->pending_del);
	extent_map_tree_empty_lru(&fs_info->extent_ins);
869
	extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree);
870

871
	truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
872

873
	iput(fs_info->btree_inode);
874 875 876 877 878 879 880 881 882 883
#if 0
	while(!list_empty(&fs_info->hashers)) {
		struct btrfs_hasher *hasher;
		hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
				    hashers);
		list_del(&hasher->hashers);
		crypto_free_hash(&fs_info->hash_tfm);
		kfree(hasher);
	}
#endif
884 885
	kfree(fs_info->extent_root);
	kfree(fs_info->tree_root);
886 887 888
	return 0;
}

889 890
int btrfs_buffer_uptodate(struct extent_buffer *buf)
{
891
	struct inode *btree_inode = buf->first_page->mapping->host;
892 893 894 895
	return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf);
}

int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
C
Chris Mason 已提交
896
{
897
	struct inode *btree_inode = buf->first_page->mapping->host;
898 899 900
	return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree,
					  buf);
}
901

902 903
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
904
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
905 906
	u64 transid = btrfs_header_generation(buf);
	struct inode *btree_inode = root->fs_info->btree_inode;
907

C
Chris Mason 已提交
908 909
	if (transid != root->fs_info->generation) {
		printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
910
			(unsigned long long)buf->start,
C
Chris Mason 已提交
911 912 913
			transid, root->fs_info->generation);
		WARN_ON(1);
	}
914
	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf);
915 916
}

917 918
void btrfs_throttle(struct btrfs_root *root)
{
919 920 921 922 923
	struct backing_dev_info *bdi;

	bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
	if (root->fs_info->throttles && bdi_write_congested(bdi))
		congestion_wait(WRITE, HZ/20);
924 925
}

926
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
C
Chris Mason 已提交
927
{
928
	balance_dirty_pages_ratelimited_nr(
929
			root->fs_info->btree_inode->i_mapping, 1);
C
Chris Mason 已提交
930
}
931 932 933

void btrfs_set_buffer_defrag(struct extent_buffer *buf)
{
934
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
935 936 937 938 939 940 941
	struct inode *btree_inode = root->fs_info->btree_inode;
	set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
			buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
}

void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
{
942
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
943 944 945 946 947 948 949 950
	struct inode *btree_inode = root->fs_info->btree_inode;
	set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
			buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
			GFP_NOFS);
}

int btrfs_buffer_defrag(struct extent_buffer *buf)
{
951
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
952 953 954 955 956 957 958
	struct inode *btree_inode = root->fs_info->btree_inode;
	return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
}

int btrfs_buffer_defrag_done(struct extent_buffer *buf)
{
959
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
960 961 962 963 964 965 966 967
	struct inode *btree_inode = root->fs_info->btree_inode;
	return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1,
		     EXTENT_DEFRAG_DONE, 0);
}

int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
{
968
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
969 970 971 972 973 974 975 976
	struct inode *btree_inode = root->fs_info->btree_inode;
	return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1,
		     EXTENT_DEFRAG_DONE, GFP_NOFS);
}

int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
{
977
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
978 979 980 981 982 983 984 985
	struct inode *btree_inode = root->fs_info->btree_inode;
	return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
		     buf->start, buf->start + buf->len - 1,
		     EXTENT_DEFRAG, GFP_NOFS);
}

int btrfs_read_buffer(struct extent_buffer *buf)
{
986
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
987 988
	struct inode *btree_inode = root->fs_info->btree_inode;
	return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
989
					buf, 0, 1);
990
}
991 992 993 994

static struct extent_map_ops btree_extent_map_ops = {
	.writepage_io_hook = btree_writepage_io_hook,
};