super.c 29.8 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Y
Yan 已提交
19
#include <linux/blkdev.h>
20
#include <linux/module.h>
C
Chris Mason 已提交
21
#include <linux/buffer_head.h>
22 23 24 25 26
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
E
Eric Paris 已提交
27
#include <linux/seq_file.h>
28 29
#include <linux/string.h>
#include <linux/backing-dev.h>
Y
Yan 已提交
30
#include <linux/mount.h>
C
Chris Mason 已提交
31
#include <linux/mpage.h>
C
Chris Mason 已提交
32 33
#include <linux/swap.h>
#include <linux/writeback.h>
C
Chris Mason 已提交
34
#include <linux/statfs.h>
C
Chris Mason 已提交
35
#include <linux/compat.h>
36
#include <linux/parser.h>
37
#include <linux/ctype.h>
38
#include <linux/namei.h>
39
#include <linux/miscdevice.h>
40
#include <linux/magic.h>
41
#include <linux/slab.h>
C
Chris Mason 已提交
42
#include "compat.h"
43
#include "ctree.h"
C
Chris Mason 已提交
44
#include "disk-io.h"
45
#include "transaction.h"
C
Chris Mason 已提交
46
#include "btrfs_inode.h"
C
Chris Mason 已提交
47
#include "ioctl.h"
C
Chris Mason 已提交
48
#include "print-tree.h"
J
Josef Bacik 已提交
49
#include "xattr.h"
50
#include "volumes.h"
51
#include "version.h"
B
Balaji Rao 已提交
52
#include "export.h"
C
Chris Mason 已提交
53
#include "compression.h"
54

55 56 57
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>

58
static const struct super_operations btrfs_super_ops;
C
Chris Mason 已提交
59

L
liubo 已提交
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
				      char nbuf[16])
{
	char *errstr = NULL;

	switch (errno) {
	case -EIO:
		errstr = "IO failure";
		break;
	case -ENOMEM:
		errstr = "Out of memory";
		break;
	case -EROFS:
		errstr = "Readonly filesystem";
		break;
	default:
		if (nbuf) {
			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
				errstr = nbuf;
		}
		break;
	}

	return errstr;
}

static void __save_error_info(struct btrfs_fs_info *fs_info)
{
	/*
	 * today we only save the error info into ram.  Long term we'll
	 * also send it down to the disk
	 */
	fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
}

/* NOTE:
 *	We move write_super stuff at umount in order to avoid deadlock
 *	for umount hold all lock.
 */
static void save_error_info(struct btrfs_fs_info *fs_info)
{
	__save_error_info(fs_info);
}

/* btrfs handle error by forcing the filesystem readonly */
static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
{
	struct super_block *sb = fs_info->sb;

	if (sb->s_flags & MS_RDONLY)
		return;

	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
		sb->s_flags |= MS_RDONLY;
		printk(KERN_INFO "btrfs is forced readonly\n");
	}
}

/*
 * __btrfs_std_error decodes expected errors from the caller and
 * invokes the approciate error response.
 */
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
		     unsigned int line, int errno)
{
	struct super_block *sb = fs_info->sb;
	char nbuf[16];
	const char *errstr;

	/*
	 * Special case: if the error is EROFS, and we're already
	 * under MS_RDONLY, then it is safe here.
	 */
	if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
		return;

	errstr = btrfs_decode_error(fs_info, errno, nbuf);
	printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
		sb->s_id, function, line, errstr);
	save_error_info(fs_info);

	btrfs_handle_error(fs_info);
}

C
Chris Mason 已提交
144
static void btrfs_put_super(struct super_block *sb)
C
Chris Mason 已提交
145
{
C
Chris Mason 已提交
146
	struct btrfs_root *root = btrfs_sb(sb);
C
Chris Mason 已提交
147 148
	int ret;

C
Chris Mason 已提交
149 150
	ret = close_ctree(root);
	sb->s_fs_info = NULL;
151 152

	(void)ret; /* FIXME: need to fix VFS to return error? */
C
Chris Mason 已提交
153 154
}

155
enum {
156
	Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
157 158
	Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
	Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
159 160
	Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
161 162
	Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
	Opt_enospc_debug, Opt_err,
163 164 165
};

static match_table_t tokens = {
166
	{Opt_degraded, "degraded"},
167
	{Opt_subvol, "subvol=%s"},
168
	{Opt_subvolid, "subvolid=%d"},
169
	{Opt_device, "device=%s"},
170
	{Opt_nodatasum, "nodatasum"},
171
	{Opt_nodatacow, "nodatacow"},
172
	{Opt_nobarrier, "nobarrier"},
173
	{Opt_max_inline, "max_inline=%s"},
174
	{Opt_alloc_start, "alloc_start=%s"},
175
	{Opt_thread_pool, "thread_pool=%d"},
C
Chris Mason 已提交
176
	{Opt_compress, "compress"},
177
	{Opt_compress_type, "compress=%s"},
C
Chris Mason 已提交
178
	{Opt_compress_force, "compress-force"},
179
	{Opt_compress_force_type, "compress-force=%s"},
180
	{Opt_ssd, "ssd"},
181
	{Opt_ssd_spread, "ssd_spread"},
C
Chris Mason 已提交
182
	{Opt_nossd, "nossd"},
J
Josef Bacik 已提交
183
	{Opt_noacl, "noacl"},
S
Sage Weil 已提交
184
	{Opt_notreelog, "notreelog"},
185
	{Opt_flushoncommit, "flushoncommit"},
186
	{Opt_ratio, "metadata_ratio=%d"},
C
Christoph Hellwig 已提交
187
	{Opt_discard, "discard"},
188
	{Opt_space_cache, "space_cache"},
189
	{Opt_clear_cache, "clear_cache"},
190
	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
191
	{Opt_enospc_debug, "enospc_debug"},
J
Josef Bacik 已提交
192
	{Opt_err, NULL},
193 194
};

195 196 197 198 199
/*
 * Regular mount options parser.  Everything that is needed only when
 * reading in a new superblock is parsed here.
 */
int btrfs_parse_options(struct btrfs_root *root, char *options)
200
{
201
	struct btrfs_fs_info *info = root->fs_info;
202
	substring_t args[MAX_OPT_ARGS];
203
	char *p, *num, *orig;
204
	int intarg;
S
Sage Weil 已提交
205
	int ret = 0;
206 207
	char *compress_type;
	bool compress_force = false;
208

209
	if (!options)
210
		return 0;
211

212 213 214 215 216 217 218 219
	/*
	 * strsep changes the string, duplicate it because parse_options
	 * gets called twice
	 */
	options = kstrdup(options, GFP_NOFS);
	if (!options)
		return -ENOMEM;

220
	orig = options;
221

222
	while ((p = strsep(&options, ",")) != NULL) {
223 224 225 226 227 228
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
229
		case Opt_degraded:
230 231
			printk(KERN_INFO "btrfs: allowing degraded mounts\n");
			btrfs_set_opt(info->mount_opt, DEGRADED);
232
			break;
233
		case Opt_subvol:
234
		case Opt_subvolid:
235
		case Opt_device:
236
			/*
237
			 * These are parsed by btrfs_parse_early_options
238 239
			 * and can be happily ignored here.
			 */
240 241
			break;
		case Opt_nodatasum:
242
			printk(KERN_INFO "btrfs: setting nodatasum\n");
243
			btrfs_set_opt(info->mount_opt, NODATASUM);
244 245
			break;
		case Opt_nodatacow:
246 247 248
			printk(KERN_INFO "btrfs: setting nodatacow\n");
			btrfs_set_opt(info->mount_opt, NODATACOW);
			btrfs_set_opt(info->mount_opt, NODATASUM);
249
			break;
C
Chris Mason 已提交
250
		case Opt_compress_force:
251 252 253 254 255 256 257 258 259
		case Opt_compress_force_type:
			compress_force = true;
		case Opt_compress:
		case Opt_compress_type:
			if (token == Opt_compress ||
			    token == Opt_compress_force ||
			    strcmp(args[0].from, "zlib") == 0) {
				compress_type = "zlib";
				info->compress_type = BTRFS_COMPRESS_ZLIB;
L
Li Zefan 已提交
260 261 262
			} else if (strcmp(args[0].from, "lzo") == 0) {
				compress_type = "lzo";
				info->compress_type = BTRFS_COMPRESS_LZO;
263 264 265 266 267
			} else {
				ret = -EINVAL;
				goto out;
			}

C
Chris Mason 已提交
268
			btrfs_set_opt(info->mount_opt, COMPRESS);
269 270 271 272 273 274 275
			if (compress_force) {
				btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
				pr_info("btrfs: force %s compression\n",
					compress_type);
			} else
				pr_info("btrfs: use %s compression\n",
					compress_type);
C
Chris Mason 已提交
276
			break;
277
		case Opt_ssd:
278 279
			printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
			btrfs_set_opt(info->mount_opt, SSD);
280
			break;
281 282 283 284 285 286
		case Opt_ssd_spread:
			printk(KERN_INFO "btrfs: use spread ssd "
			       "allocation scheme\n");
			btrfs_set_opt(info->mount_opt, SSD);
			btrfs_set_opt(info->mount_opt, SSD_SPREAD);
			break;
C
Chris Mason 已提交
287
		case Opt_nossd:
288 289
			printk(KERN_INFO "btrfs: not using ssd allocation "
			       "scheme\n");
C
Chris Mason 已提交
290
			btrfs_set_opt(info->mount_opt, NOSSD);
C
Chris Mason 已提交
291
			btrfs_clear_opt(info->mount_opt, SSD);
292
			btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
C
Chris Mason 已提交
293
			break;
294
		case Opt_nobarrier:
295 296
			printk(KERN_INFO "btrfs: turning off barriers\n");
			btrfs_set_opt(info->mount_opt, NOBARRIER);
297
			break;
298 299 300 301 302 303 304 305 306
		case Opt_thread_pool:
			intarg = 0;
			match_int(&args[0], &intarg);
			if (intarg) {
				info->thread_pool_size = intarg;
				printk(KERN_INFO "btrfs: thread pool %d\n",
				       info->thread_pool_size);
			}
			break;
307
		case Opt_max_inline:
308 309
			num = match_strdup(&args[0]);
			if (num) {
A
Akinobu Mita 已提交
310
				info->max_inline = memparse(num, NULL);
311 312
				kfree(num);

C
Chris Mason 已提交
313 314 315 316 317
				if (info->max_inline) {
					info->max_inline = max_t(u64,
						info->max_inline,
						root->sectorsize);
				}
318
				printk(KERN_INFO "btrfs: max_inline at %llu\n",
319
					(unsigned long long)info->max_inline);
320 321
			}
			break;
322
		case Opt_alloc_start:
323 324
			num = match_strdup(&args[0]);
			if (num) {
A
Akinobu Mita 已提交
325
				info->alloc_start = memparse(num, NULL);
326 327 328
				kfree(num);
				printk(KERN_INFO
					"btrfs: allocations start at %llu\n",
329
					(unsigned long long)info->alloc_start);
330 331
			}
			break;
J
Josef Bacik 已提交
332 333 334
		case Opt_noacl:
			root->fs_info->sb->s_flags &= ~MS_POSIXACL;
			break;
S
Sage Weil 已提交
335 336 337 338
		case Opt_notreelog:
			printk(KERN_INFO "btrfs: disabling tree log\n");
			btrfs_set_opt(info->mount_opt, NOTREELOG);
			break;
339 340 341 342
		case Opt_flushoncommit:
			printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
			btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
			break;
343 344 345 346 347 348 349 350 351
		case Opt_ratio:
			intarg = 0;
			match_int(&args[0], &intarg);
			if (intarg) {
				info->metadata_ratio = intarg;
				printk(KERN_INFO "btrfs: metadata ratio %d\n",
				       info->metadata_ratio);
			}
			break;
C
Christoph Hellwig 已提交
352 353 354
		case Opt_discard:
			btrfs_set_opt(info->mount_opt, DISCARD);
			break;
355 356 357
		case Opt_space_cache:
			printk(KERN_INFO "btrfs: enabling disk space caching\n");
			btrfs_set_opt(info->mount_opt, SPACE_CACHE);
358
			break;
359 360 361
		case Opt_clear_cache:
			printk(KERN_INFO "btrfs: force clearing of disk cache\n");
			btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
362
			break;
363 364 365
		case Opt_user_subvol_rm_allowed:
			btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
			break;
366 367 368
		case Opt_enospc_debug:
			btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
S
Sage Weil 已提交
369 370 371 372 373
		case Opt_err:
			printk(KERN_INFO "btrfs: unrecognized mount option "
			       "'%s'\n", p);
			ret = -EINVAL;
			goto out;
374
		default:
375
			break;
376 377
		}
	}
S
Sage Weil 已提交
378
out:
379
	kfree(orig);
S
Sage Weil 已提交
380
	return ret;
381 382 383 384 385 386 387 388
}

/*
 * Parse mount options that are required early in the mount process.
 *
 * All other options will be parsed on much later in the mount process and
 * only when we need to allocate a new super block.
 */
389
static int btrfs_parse_early_options(const char *options, fmode_t flags,
390
		void *holder, char **subvol_name, u64 *subvol_objectid,
391
		struct btrfs_fs_devices **fs_devices)
392 393
{
	substring_t args[MAX_OPT_ARGS];
394
	char *opts, *orig, *p;
395
	int error = 0;
396
	int intarg;
397 398 399 400 401 402 403 404 405 406 407

	if (!options)
		goto out;

	/*
	 * strsep changes the string, duplicate it because parse_options
	 * gets called twice
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
408
	orig = opts;
409 410 411 412 413 414 415 416 417 418 419

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
		case Opt_subvol:
			*subvol_name = match_strdup(&args[0]);
			break;
420 421
		case Opt_subvolid:
			intarg = 0;
422 423 424 425 426 427 428 429 430
			error = match_int(&args[0], &intarg);
			if (!error) {
				/* we want the original fs_tree */
				if (!intarg)
					*subvol_objectid =
						BTRFS_FS_TREE_OBJECTID;
				else
					*subvol_objectid = intarg;
			}
431
			break;
432 433 434 435 436 437
		case Opt_device:
			error = btrfs_scan_one_device(match_strdup(&args[0]),
					flags, holder, fs_devices);
			if (error)
				goto out_free_opts;
			break;
438 439 440 441 442
		default:
			break;
		}
	}

443
 out_free_opts:
444
	kfree(orig);
445 446 447
 out:
	/*
	 * If no subvolume name is specified we use the default one.  Allocate
448
	 * a copy of the string "." here so that code later in the
449 450 451
	 * mount path doesn't care if it's the default volume or another one.
	 */
	if (!*subvol_name) {
452
		*subvol_name = kstrdup(".", GFP_KERNEL);
453 454 455 456
		if (!*subvol_name)
			return -ENOMEM;
	}
	return error;
457 458
}

459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
static struct dentry *get_default_root(struct super_block *sb,
				       u64 subvol_objectid)
{
	struct btrfs_root *root = sb->s_fs_info;
	struct btrfs_root *new_root;
	struct btrfs_dir_item *di;
	struct btrfs_path *path;
	struct btrfs_key location;
	struct inode *inode;
	struct dentry *dentry;
	u64 dir_id;
	int new = 0;

	/*
	 * We have a specific subvol we want to mount, just setup location and
	 * go look up the root.
	 */
	if (subvol_objectid) {
		location.objectid = subvol_objectid;
		location.type = BTRFS_ROOT_ITEM_KEY;
		location.offset = (u64)-1;
		goto find_root;
	}

	path = btrfs_alloc_path();
	if (!path)
		return ERR_PTR(-ENOMEM);
	path->leave_spinning = 1;

	/*
	 * Find the "default" dir item which points to the root item that we
	 * will mount by default if we haven't been given a specific subvolume
	 * to mount.
	 */
	dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
495 496
	if (IS_ERR(di))
		return ERR_CAST(di);
497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
	if (!di) {
		/*
		 * Ok the default dir item isn't there.  This is weird since
		 * it's always been there, but don't freak out, just try and
		 * mount to root most subvolume.
		 */
		btrfs_free_path(path);
		dir_id = BTRFS_FIRST_FREE_OBJECTID;
		new_root = root->fs_info->fs_root;
		goto setup_root;
	}

	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
	btrfs_free_path(path);

find_root:
	new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
	if (IS_ERR(new_root))
J
Julia Lawall 已提交
515
		return ERR_CAST(new_root);
516 517 518 519 520 521 522 523 524 525 526

	if (btrfs_root_refs(&new_root->root_item) == 0)
		return ERR_PTR(-ENOENT);

	dir_id = btrfs_root_dirid(&new_root->root_item);
setup_root:
	location.objectid = dir_id;
	location.type = BTRFS_INODE_ITEM_KEY;
	location.offset = 0;

	inode = btrfs_iget(sb, &location, new_root, &new);
527 528
	if (IS_ERR(inode))
		return ERR_CAST(inode);
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564

	/*
	 * If we're just mounting the root most subvol put the inode and return
	 * a reference to the dentry.  We will have already gotten a reference
	 * to the inode in btrfs_fill_super so we're good to go.
	 */
	if (!new && sb->s_root->d_inode == inode) {
		iput(inode);
		return dget(sb->s_root);
	}

	if (new) {
		const struct qstr name = { .name = "/", .len = 1 };

		/*
		 * New inode, we need to make the dentry a sibling of s_root so
		 * everything gets cleaned up properly on unmount.
		 */
		dentry = d_alloc(sb->s_root, &name);
		if (!dentry) {
			iput(inode);
			return ERR_PTR(-ENOMEM);
		}
		d_splice_alias(inode, dentry);
	} else {
		/*
		 * We found the inode in cache, just find a dentry for it and
		 * put the reference to the inode we just got.
		 */
		dentry = d_find_alias(inode);
		iput(inode);
	}

	return dentry;
}

C
Chris Mason 已提交
565
static int btrfs_fill_super(struct super_block *sb,
566
			    struct btrfs_fs_devices *fs_devices,
C
Chris Mason 已提交
567
			    void *data, int silent)
C
Chris Mason 已提交
568
{
C
Chris Mason 已提交
569 570
	struct inode *inode;
	struct dentry *root_dentry;
C
Chris Mason 已提交
571
	struct btrfs_root *tree_root;
572
	struct btrfs_key key;
C
Chris Mason 已提交
573
	int err;
574

C
Chris Mason 已提交
575 576 577
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_magic = BTRFS_SUPER_MAGIC;
	sb->s_op = &btrfs_super_ops;
A
Al Viro 已提交
578
	sb->s_d_op = &btrfs_dentry_operations;
B
Balaji Rao 已提交
579
	sb->s_export_op = &btrfs_export_ops;
J
Josef Bacik 已提交
580
	sb->s_xattr = btrfs_xattr_handlers;
C
Chris Mason 已提交
581
	sb->s_time_gran = 1;
C
Chris Mason 已提交
582
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
J
Josef Bacik 已提交
583
	sb->s_flags |= MS_POSIXACL;
584
#endif
585

586
	tree_root = open_ctree(sb, fs_devices, (char *)data);
587

588
	if (IS_ERR(tree_root)) {
C
Chris Mason 已提交
589
		printk("btrfs: open_ctree failed\n");
590
		return PTR_ERR(tree_root);
591
	}
C
Chris Mason 已提交
592
	sb->s_fs_info = tree_root;
593

594 595 596
	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
	key.type = BTRFS_INODE_ITEM_KEY;
	key.offset = 0;
597
	inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root, NULL);
598 599
	if (IS_ERR(inode)) {
		err = PTR_ERR(inode);
C
Chris Mason 已提交
600
		goto fail_close;
C
Chris Mason 已提交
601 602
	}

C
Chris Mason 已提交
603 604 605 606 607
	root_dentry = d_alloc_root(inode);
	if (!root_dentry) {
		iput(inode);
		err = -ENOMEM;
		goto fail_close;
C
Chris Mason 已提交
608
	}
609

C
Chris Mason 已提交
610
	sb->s_root = root_dentry;
C
Chris Mason 已提交
611 612

	save_mount_options(sb, data);
C
Chris Mason 已提交
613
	return 0;
C
Chris Mason 已提交
614 615 616 617

fail_close:
	close_ctree(tree_root);
	return err;
C
Chris Mason 已提交
618 619
}

S
Sage Weil 已提交
620
int btrfs_sync_fs(struct super_block *sb, int wait)
C
Chris Mason 已提交
621 622
{
	struct btrfs_trans_handle *trans;
623
	struct btrfs_root *root = btrfs_sb(sb);
C
Chris Mason 已提交
624
	int ret;
C
Chris Mason 已提交
625

626 627
	trace_btrfs_sync_fs(wait);

C
Chris Mason 已提交
628 629 630 631
	if (!wait) {
		filemap_flush(root->fs_info->btree_inode->i_mapping);
		return 0;
	}
632

Y
Yan, Zheng 已提交
633 634
	btrfs_start_delalloc_inodes(root, 0);
	btrfs_wait_ordered_extents(root, 0, 0);
635

636
	trans = btrfs_start_transaction(root, 0);
637 638
	if (IS_ERR(trans))
		return PTR_ERR(trans);
C
Chris Mason 已提交
639
	ret = btrfs_commit_transaction(trans, root);
640
	return ret;
C
Chris Mason 已提交
641 642
}

E
Eric Paris 已提交
643 644 645 646 647 648 649 650 651 652 653 654 655 656
static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
{
	struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
	struct btrfs_fs_info *info = root->fs_info;

	if (btrfs_test_opt(root, DEGRADED))
		seq_puts(seq, ",degraded");
	if (btrfs_test_opt(root, NODATASUM))
		seq_puts(seq, ",nodatasum");
	if (btrfs_test_opt(root, NODATACOW))
		seq_puts(seq, ",nodatacow");
	if (btrfs_test_opt(root, NOBARRIER))
		seq_puts(seq, ",nobarrier");
	if (info->max_inline != 8192 * 1024)
657 658
		seq_printf(seq, ",max_inline=%llu",
			   (unsigned long long)info->max_inline);
E
Eric Paris 已提交
659
	if (info->alloc_start != 0)
660 661
		seq_printf(seq, ",alloc_start=%llu",
			   (unsigned long long)info->alloc_start);
E
Eric Paris 已提交
662 663 664 665 666
	if (info->thread_pool_size !=  min_t(unsigned long,
					     num_online_cpus() + 2, 8))
		seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
	if (btrfs_test_opt(root, COMPRESS))
		seq_puts(seq, ",compress");
C
Chris Mason 已提交
667 668
	if (btrfs_test_opt(root, NOSSD))
		seq_puts(seq, ",nossd");
669 670 671
	if (btrfs_test_opt(root, SSD_SPREAD))
		seq_puts(seq, ",ssd_spread");
	else if (btrfs_test_opt(root, SSD))
E
Eric Paris 已提交
672
		seq_puts(seq, ",ssd");
S
Sage Weil 已提交
673
	if (btrfs_test_opt(root, NOTREELOG))
674
		seq_puts(seq, ",notreelog");
675
	if (btrfs_test_opt(root, FLUSHONCOMMIT))
676
		seq_puts(seq, ",flushoncommit");
677 678
	if (btrfs_test_opt(root, DISCARD))
		seq_puts(seq, ",discard");
E
Eric Paris 已提交
679 680 681 682 683
	if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
		seq_puts(seq, ",noacl");
	return 0;
}

684
static int btrfs_test_super(struct super_block *s, void *data)
Y
Yan 已提交
685
{
686
	struct btrfs_root *test_root = data;
687
	struct btrfs_root *root = btrfs_sb(s);
Y
Yan 已提交
688

689 690 691 692 693 694
	/*
	 * If this super block is going away, return false as it
	 * can't match as an existing super block.
	 */
	if (!atomic_read(&s->s_active))
		return 0;
695
	return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
Y
Yan 已提交
696 697
}

698 699 700 701 702
static int btrfs_set_super(struct super_block *s, void *data)
{
	s->s_fs_info = data;

	return set_anon_super(s, data);
Y
Yan 已提交
703 704
}

705

706 707 708 709 710 711
/*
 * Find a superblock for the given device / mount point.
 *
 * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
 *	  for multiple device setup.  Make sure to keep it in sync.
 */
A
Al Viro 已提交
712 713
static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
		const char *dev_name, void *data)
Y
Yan 已提交
714 715 716 717
{
	struct block_device *bdev = NULL;
	struct super_block *s;
	struct dentry *root;
718
	struct btrfs_fs_devices *fs_devices = NULL;
719 720
	struct btrfs_root *tree_root = NULL;
	struct btrfs_fs_info *fs_info = NULL;
721
	fmode_t mode = FMODE_READ;
722 723
	char *subvol_name = NULL;
	u64 subvol_objectid = 0;
Y
Yan 已提交
724 725
	int error = 0;

726 727 728 729
	if (!(flags & MS_RDONLY))
		mode |= FMODE_WRITE;

	error = btrfs_parse_early_options(data, mode, fs_type,
730 731
					  &subvol_name, &subvol_objectid,
					  &fs_devices);
732
	if (error)
A
Al Viro 已提交
733
		return ERR_PTR(error);
734

735
	error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
736
	if (error)
737
		goto error_free_subvol_name;
Y
Yan 已提交
738

739
	error = btrfs_open_devices(fs_devices, mode, fs_type);
740
	if (error)
741
		goto error_free_subvol_name;
742

Y
Yan Zheng 已提交
743 744 745 746 747
	if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
		error = -EACCES;
		goto error_close_devices;
	}

748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763
	/*
	 * Setup a dummy root and fs_info for test/set super.  This is because
	 * we don't actually fill this stuff out until open_ctree, but we need
	 * it for searching for existing supers, so this lets us do that and
	 * then open_ctree will properly initialize everything later.
	 */
	fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
	tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
	if (!fs_info || !tree_root) {
		error = -ENOMEM;
		goto error_close_devices;
	}
	fs_info->tree_root = tree_root;
	fs_info->fs_devices = fs_devices;
	tree_root->fs_info = fs_info;

764
	bdev = fs_devices->latest_bdev;
765
	s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
Y
Yan 已提交
766 767 768 769 770
	if (IS_ERR(s))
		goto error_s;

	if (s->s_root) {
		if ((flags ^ s->s_flags) & MS_RDONLY) {
771
			deactivate_locked_super(s);
Y
Yan 已提交
772
			error = -EBUSY;
Y
Yan Zheng 已提交
773
			goto error_close_devices;
Y
Yan 已提交
774 775
		}

Y
Yan Zheng 已提交
776
		btrfs_close_devices(fs_devices);
777 778
		kfree(fs_info);
		kfree(tree_root);
Y
Yan 已提交
779 780 781 782 783
	} else {
		char b[BDEVNAME_SIZE];

		s->s_flags = flags;
		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
784 785
		error = btrfs_fill_super(s, fs_devices, data,
					 flags & MS_SILENT ? 1 : 0);
Y
Yan 已提交
786
		if (error) {
787
			deactivate_locked_super(s);
788
			goto error_free_subvol_name;
Y
Yan 已提交
789 790
		}

791
		btrfs_sb(s)->fs_info->bdev_holder = fs_type;
Y
Yan 已提交
792 793 794
		s->s_flags |= MS_ACTIVE;
	}

795 796 797 798
	root = get_default_root(s, subvol_objectid);
	if (IS_ERR(root)) {
		error = PTR_ERR(root);
		deactivate_locked_super(s);
799
		goto error_free_subvol_name;
800 801 802 803 804 805
	}
	/* if they gave us a subvolume name bind mount into that */
	if (strcmp(subvol_name, ".")) {
		struct dentry *new_root;
		mutex_lock(&root->d_inode->i_mutex);
		new_root = lookup_one_len(subvol_name, root,
C
Chris Mason 已提交
806
				      strlen(subvol_name));
807
		mutex_unlock(&root->d_inode->i_mutex);
C
Chris Mason 已提交
808

809
		if (IS_ERR(new_root)) {
810
			dput(root);
811
			deactivate_locked_super(s);
812
			error = PTR_ERR(new_root);
813
			goto error_free_subvol_name;
814
		}
815
		if (!new_root->d_inode) {
816
			dput(root);
817
			dput(new_root);
818
			deactivate_locked_super(s);
819
			error = -ENXIO;
820
			goto error_free_subvol_name;
821
		}
822 823
		dput(root);
		root = new_root;
Y
Yan 已提交
824 825
	}

826
	kfree(subvol_name);
A
Al Viro 已提交
827
	return root;
Y
Yan 已提交
828 829 830

error_s:
	error = PTR_ERR(s);
Y
Yan Zheng 已提交
831
error_close_devices:
832
	btrfs_close_devices(fs_devices);
833 834
	kfree(fs_info);
	kfree(tree_root);
835 836
error_free_subvol_name:
	kfree(subvol_name);
A
Al Viro 已提交
837
	return ERR_PTR(error);
Y
Yan 已提交
838
}
839

Y
Yan Zheng 已提交
840 841 842 843 844
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
{
	struct btrfs_root *root = btrfs_sb(sb);
	int ret;

845 846 847 848
	ret = btrfs_parse_options(root, data);
	if (ret)
		return -EINVAL;

Y
Yan Zheng 已提交
849 850 851 852 853 854 855 856 857
	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
		return 0;

	if (*flags & MS_RDONLY) {
		sb->s_flags |= MS_RDONLY;

		ret =  btrfs_commit_super(root);
		WARN_ON(ret);
	} else {
Y
Yan Zheng 已提交
858 859 860
		if (root->fs_info->fs_devices->rw_devices == 0)
			return -EACCES;

Y
Yan Zheng 已提交
861 862 863
		if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
			return -EINVAL;

864
		ret = btrfs_cleanup_fs_roots(root->fs_info);
Y
Yan Zheng 已提交
865 866
		WARN_ON(ret);

867 868
		/* recover relocation */
		ret = btrfs_recover_relocation(root);
Y
Yan Zheng 已提交
869 870 871 872 873 874 875 876
		WARN_ON(ret);

		sb->s_flags &= ~MS_RDONLY;
	}

	return 0;
}

877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997
/*
 * The helper to calc the free space on the devices that can be used to store
 * file data.
 */
static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
{
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct btrfs_device_info *devices_info;
	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
	struct btrfs_device *device;
	u64 skip_space;
	u64 type;
	u64 avail_space;
	u64 used_space;
	u64 min_stripe_size;
	int min_stripes = 1;
	int i = 0, nr_devices;
	int ret;

	nr_devices = fs_info->fs_devices->rw_devices;
	BUG_ON(!nr_devices);

	devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
			       GFP_NOFS);
	if (!devices_info)
		return -ENOMEM;

	/* calc min stripe number for data space alloction */
	type = btrfs_get_alloc_profile(root, 1);
	if (type & BTRFS_BLOCK_GROUP_RAID0)
		min_stripes = 2;
	else if (type & BTRFS_BLOCK_GROUP_RAID1)
		min_stripes = 2;
	else if (type & BTRFS_BLOCK_GROUP_RAID10)
		min_stripes = 4;

	if (type & BTRFS_BLOCK_GROUP_DUP)
		min_stripe_size = 2 * BTRFS_STRIPE_LEN;
	else
		min_stripe_size = BTRFS_STRIPE_LEN;

	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
		if (!device->in_fs_metadata)
			continue;

		avail_space = device->total_bytes - device->bytes_used;

		/* align with stripe_len */
		do_div(avail_space, BTRFS_STRIPE_LEN);
		avail_space *= BTRFS_STRIPE_LEN;

		/*
		 * In order to avoid overwritting the superblock on the drive,
		 * btrfs starts at an offset of at least 1MB when doing chunk
		 * allocation.
		 */
		skip_space = 1024 * 1024;

		/* user can set the offset in fs_info->alloc_start. */
		if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
		    device->total_bytes)
			skip_space = max(fs_info->alloc_start, skip_space);

		/*
		 * btrfs can not use the free space in [0, skip_space - 1],
		 * we must subtract it from the total. In order to implement
		 * it, we account the used space in this range first.
		 */
		ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
						     &used_space);
		if (ret) {
			kfree(devices_info);
			return ret;
		}

		/* calc the free space in [0, skip_space - 1] */
		skip_space -= used_space;

		/*
		 * we can use the free space in [0, skip_space - 1], subtract
		 * it from the total.
		 */
		if (avail_space && avail_space >= skip_space)
			avail_space -= skip_space;
		else
			avail_space = 0;

		if (avail_space < min_stripe_size)
			continue;

		devices_info[i].dev = device;
		devices_info[i].max_avail = avail_space;

		i++;
	}

	nr_devices = i;

	btrfs_descending_sort_devices(devices_info, nr_devices);

	i = nr_devices - 1;
	avail_space = 0;
	while (nr_devices >= min_stripes) {
		if (devices_info[i].max_avail >= min_stripe_size) {
			int j;
			u64 alloc_size;

			avail_space += devices_info[i].max_avail * min_stripes;
			alloc_size = devices_info[i].max_avail;
			for (j = i + 1 - min_stripes; j <= i; j++)
				devices_info[j].max_avail -= alloc_size;
		}
		i--;
		nr_devices--;
	}

	kfree(devices_info);
	*free_bytes = avail_space;
	return 0;
}

C
Chris Mason 已提交
998 999 1000
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
	struct btrfs_root *root = btrfs_sb(dentry->d_sb);
1001
	struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
1002 1003 1004
	struct list_head *head = &root->fs_info->space_info;
	struct btrfs_space_info *found;
	u64 total_used = 0;
1005
	u64 total_free_data = 0;
1006
	int bits = dentry->d_sb->s_blocksize_bits;
1007
	__be32 *fsid = (__be32 *)root->fs_info->fsid;
1008
	int ret;
C
Chris Mason 已提交
1009

1010 1011
	/* holding chunk_muext to avoid allocating new chunks */
	mutex_lock(&root->fs_info->chunk_mutex);
1012
	rcu_read_lock();
J
Josef Bacik 已提交
1013
	list_for_each_entry_rcu(found, head, list) {
1014 1015 1016 1017 1018 1019
		if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
			total_free_data += found->disk_total - found->disk_used;
			total_free_data -=
				btrfs_account_ro_block_groups_free_space(found);
		}

1020
		total_used += found->disk_used;
J
Josef Bacik 已提交
1021
	}
1022 1023
	rcu_read_unlock();

C
Chris Mason 已提交
1024
	buf->f_namelen = BTRFS_NAME_LEN;
1025
	buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
1026
	buf->f_bfree = buf->f_blocks - (total_used >> bits);
C
Chris Mason 已提交
1027 1028
	buf->f_bsize = dentry->d_sb->s_blocksize;
	buf->f_type = BTRFS_SUPER_MAGIC;
1029 1030 1031 1032 1033 1034 1035 1036 1037
	buf->f_bavail = total_free_data;
	ret = btrfs_calc_avail_data_space(root, &total_free_data);
	if (ret) {
		mutex_unlock(&root->fs_info->chunk_mutex);
		return ret;
	}
	buf->f_bavail += total_free_data;
	buf->f_bavail = buf->f_bavail >> bits;
	mutex_unlock(&root->fs_info->chunk_mutex);
C
Chris Mason 已提交
1038

1039
	/* We treat it as constant endianness (it doesn't matter _which_)
C
Chris Mason 已提交
1040
	   because we want the fsid to come out the same whether mounted
1041 1042 1043
	   on a big-endian or little-endian host */
	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
1044 1045 1046 1047
	/* Mask in the root object ID too, to disambiguate subvols */
	buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
	buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;

C
Chris Mason 已提交
1048 1049
	return 0;
}
C
Chris Mason 已提交
1050

1051 1052 1053
static struct file_system_type btrfs_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "btrfs",
A
Al Viro 已提交
1054
	.mount		= btrfs_mount,
1055
	.kill_sb	= kill_anon_super,
1056 1057
	.fs_flags	= FS_REQUIRES_DEV,
};
1058

C
Chris Mason 已提交
1059 1060 1061
/*
 * used by btrfsctl to scan devices when no FS is mounted
 */
1062 1063 1064 1065 1066
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
				unsigned long arg)
{
	struct btrfs_ioctl_vol_args *vol;
	struct btrfs_fs_devices *fs_devices;
1067
	int ret = -ENOTTY;
1068

1069 1070 1071
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

L
Li Zefan 已提交
1072 1073 1074
	vol = memdup_user((void __user *)arg, sizeof(*vol));
	if (IS_ERR(vol))
		return PTR_ERR(vol);
1075

1076 1077
	switch (cmd) {
	case BTRFS_IOC_SCAN_DEV:
1078
		ret = btrfs_scan_one_device(vol->name, FMODE_READ,
1079 1080 1081
					    &btrfs_fs_type, &fs_devices);
		break;
	}
L
Li Zefan 已提交
1082

1083
	kfree(vol);
L
Linda Knippers 已提交
1084
	return ret;
1085 1086
}

1087
static int btrfs_freeze(struct super_block *sb)
Y
Yan 已提交
1088 1089
{
	struct btrfs_root *root = btrfs_sb(sb);
1090 1091
	mutex_lock(&root->fs_info->transaction_kthread_mutex);
	mutex_lock(&root->fs_info->cleaner_mutex);
1092
	return 0;
Y
Yan 已提交
1093 1094
}

1095
static int btrfs_unfreeze(struct super_block *sb)
Y
Yan 已提交
1096 1097
{
	struct btrfs_root *root = btrfs_sb(sb);
1098 1099
	mutex_unlock(&root->fs_info->cleaner_mutex);
	mutex_unlock(&root->fs_info->transaction_kthread_mutex);
1100
	return 0;
Y
Yan 已提交
1101
}
1102

1103
static const struct super_operations btrfs_super_ops = {
1104
	.drop_inode	= btrfs_drop_inode,
A
Al Viro 已提交
1105
	.evict_inode	= btrfs_evict_inode,
C
Chris Mason 已提交
1106
	.put_super	= btrfs_put_super,
1107
	.sync_fs	= btrfs_sync_fs,
E
Eric Paris 已提交
1108
	.show_options	= btrfs_show_options,
C
Chris Mason 已提交
1109
	.write_inode	= btrfs_write_inode,
C
Chris Mason 已提交
1110
	.dirty_inode	= btrfs_dirty_inode,
C
Chris Mason 已提交
1111 1112
	.alloc_inode	= btrfs_alloc_inode,
	.destroy_inode	= btrfs_destroy_inode,
C
Chris Mason 已提交
1113
	.statfs		= btrfs_statfs,
Y
Yan Zheng 已提交
1114
	.remount_fs	= btrfs_remount,
1115 1116
	.freeze_fs	= btrfs_freeze,
	.unfreeze_fs	= btrfs_unfreeze,
C
Chris Mason 已提交
1117
};
1118 1119 1120 1121 1122

static const struct file_operations btrfs_ctl_fops = {
	.unlocked_ioctl	 = btrfs_control_ioctl,
	.compat_ioctl = btrfs_control_ioctl,
	.owner	 = THIS_MODULE,
1123
	.llseek = noop_llseek,
1124 1125 1126
};

static struct miscdevice btrfs_misc = {
1127
	.minor		= BTRFS_MINOR,
1128 1129 1130 1131
	.name		= "btrfs-control",
	.fops		= &btrfs_ctl_fops
};

1132 1133 1134
MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
MODULE_ALIAS("devname:btrfs-control");

1135 1136 1137 1138 1139
static int btrfs_interface_init(void)
{
	return misc_register(&btrfs_misc);
}

1140
static void btrfs_interface_exit(void)
1141 1142
{
	if (misc_deregister(&btrfs_misc) < 0)
C
Chris Mason 已提交
1143
		printk(KERN_INFO "misc_deregister failed for control device");
1144 1145
}

1146 1147
static int __init init_btrfs_fs(void)
{
C
Chris Mason 已提交
1148
	int err;
1149 1150 1151 1152 1153

	err = btrfs_init_sysfs();
	if (err)
		return err;

1154
	err = btrfs_init_compress();
C
Chris Mason 已提交
1155
	if (err)
1156
		goto free_sysfs;
1157

1158 1159 1160 1161
	err = btrfs_init_cachep();
	if (err)
		goto free_compress;

1162
	err = extent_io_init();
1163 1164 1165
	if (err)
		goto free_cachep;

1166 1167 1168 1169
	err = extent_map_init();
	if (err)
		goto free_extent_io;

1170
	err = btrfs_interface_init();
1171 1172
	if (err)
		goto free_extent_map;
C
Chris Mason 已提交
1173

1174 1175 1176
	err = register_filesystem(&btrfs_fs_type);
	if (err)
		goto unregister_ioctl;
1177 1178

	printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
1179 1180
	return 0;

1181 1182
unregister_ioctl:
	btrfs_interface_exit();
1183 1184
free_extent_map:
	extent_map_exit();
1185 1186
free_extent_io:
	extent_io_exit();
1187 1188
free_cachep:
	btrfs_destroy_cachep();
1189 1190
free_compress:
	btrfs_exit_compress();
1191
free_sysfs:
1192 1193
	btrfs_exit_sysfs();
	return err;
1194 1195 1196 1197
}

static void __exit exit_btrfs_fs(void)
{
C
Chris Mason 已提交
1198
	btrfs_destroy_cachep();
1199
	extent_map_exit();
1200
	extent_io_exit();
1201
	btrfs_interface_exit();
1202
	unregister_filesystem(&btrfs_fs_type);
1203
	btrfs_exit_sysfs();
1204
	btrfs_cleanup_fs_uuids();
1205
	btrfs_exit_compress();
1206 1207 1208 1209 1210 1211
}

module_init(init_btrfs_fs)
module_exit(exit_btrfs_fs)

MODULE_LICENSE("GPL");