super.c 64.6 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

Y
Yan 已提交
19
#include <linux/blkdev.h>
20
#include <linux/module.h>
C
Chris Mason 已提交
21
#include <linux/buffer_head.h>
22 23 24 25 26
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
E
Eric Paris 已提交
27
#include <linux/seq_file.h>
28 29
#include <linux/string.h>
#include <linux/backing-dev.h>
Y
Yan 已提交
30
#include <linux/mount.h>
C
Chris Mason 已提交
31
#include <linux/mpage.h>
C
Chris Mason 已提交
32 33
#include <linux/swap.h>
#include <linux/writeback.h>
C
Chris Mason 已提交
34
#include <linux/statfs.h>
C
Chris Mason 已提交
35
#include <linux/compat.h>
36
#include <linux/parser.h>
37
#include <linux/ctype.h>
38
#include <linux/namei.h>
39
#include <linux/miscdevice.h>
40
#include <linux/magic.h>
41
#include <linux/slab.h>
D
Dan Magenheimer 已提交
42
#include <linux/cleancache.h>
43
#include <linux/ratelimit.h>
44
#include <linux/btrfs.h>
45
#include "delayed-inode.h"
46
#include "ctree.h"
C
Chris Mason 已提交
47
#include "disk-io.h"
48
#include "transaction.h"
C
Chris Mason 已提交
49
#include "btrfs_inode.h"
C
Chris Mason 已提交
50
#include "print-tree.h"
51
#include "hash.h"
52
#include "props.h"
J
Josef Bacik 已提交
53
#include "xattr.h"
54
#include "volumes.h"
B
Balaji Rao 已提交
55
#include "export.h"
C
Chris Mason 已提交
56
#include "compression.h"
J
Josef Bacik 已提交
57
#include "rcu-string.h"
58
#include "dev-replace.h"
59
#include "free-space-cache.h"
60
#include "backref.h"
61
#include "tests/btrfs-tests.h"
62

63
#include "qgroup.h"
64 65 66
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>

67
static const struct super_operations btrfs_super_ops;
68
static struct file_system_type btrfs_fs_type;
C
Chris Mason 已提交
69

70 71
static int btrfs_remount(struct super_block *sb, int *flags, char *data);

72
const char *btrfs_decode_error(int errno)
L
liubo 已提交
73
{
74
	char *errstr = "unknown";
L
liubo 已提交
75 76 77 78 79 80 81 82 83 84 85

	switch (errno) {
	case -EIO:
		errstr = "IO failure";
		break;
	case -ENOMEM:
		errstr = "Out of memory";
		break;
	case -EROFS:
		errstr = "Readonly filesystem";
		break;
J
Jeff Mahoney 已提交
86 87 88
	case -EEXIST:
		errstr = "Object already exists";
		break;
89 90 91 92 93 94
	case -ENOSPC:
		errstr = "No space left";
		break;
	case -ENOENT:
		errstr = "No such entry";
		break;
L
liubo 已提交
95 96 97 98 99 100 101 102 103 104 105 106 107
	}

	return errstr;
}

/* btrfs handle error by forcing the filesystem readonly */
static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
{
	struct super_block *sb = fs_info->sb;

	if (sb->s_flags & MS_RDONLY)
		return;

108
	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
L
liubo 已提交
109
		sb->s_flags |= MS_RDONLY;
110
		btrfs_info(fs_info, "forced readonly");
111 112 113 114
		/*
		 * Note that a running device replace operation is not
		 * canceled here although there is no way to update
		 * the progress. It would add the risk of a deadlock,
115
		 * therefore the canceling is omitted. The only penalty
116 117 118 119 120
		 * is that some I/O remains active until the procedure
		 * completes. The next time when the filesystem is
		 * mounted writeable again, the device replace
		 * operation continues.
		 */
L
liubo 已提交
121 122 123 124
	}
}

/*
125
 * __btrfs_handle_fs_error decodes expected errors from the caller and
L
liubo 已提交
126 127
 * invokes the approciate error response.
 */
128
__cold
129
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
J
Jeff Mahoney 已提交
130
		       unsigned int line, int errno, const char *fmt, ...)
L
liubo 已提交
131 132
{
	struct super_block *sb = fs_info->sb;
133
#ifdef CONFIG_PRINTK
L
liubo 已提交
134
	const char *errstr;
135
#endif
L
liubo 已提交
136 137 138 139 140 141

	/*
	 * Special case: if the error is EROFS, and we're already
	 * under MS_RDONLY, then it is safe here.
	 */
	if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
J
Jeff Mahoney 已提交
142 143
  		return;

144
#ifdef CONFIG_PRINTK
145
	errstr = btrfs_decode_error(errno);
J
Jeff Mahoney 已提交
146
	if (fmt) {
147 148 149 150 151 152
		struct va_format vaf;
		va_list args;

		va_start(args, fmt);
		vaf.fmt = fmt;
		vaf.va = &args;
J
Jeff Mahoney 已提交
153

154 155
		printk(KERN_CRIT
			"BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
156
			sb->s_id, function, line, errno, errstr, &vaf);
157
		va_end(args);
J
Jeff Mahoney 已提交
158
	} else {
159
		printk(KERN_CRIT "BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
160
			sb->s_id, function, line, errno, errstr);
J
Jeff Mahoney 已提交
161
	}
162
#endif
L
liubo 已提交
163

A
Anand Jain 已提交
164 165 166 167 168 169
	/*
	 * Today we only save the error info to memory.  Long term we'll
	 * also send it down to the disk
	 */
	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);

J
Jeff Mahoney 已提交
170
	/* Don't go through full error handling during mount */
171
	if (sb->s_flags & MS_BORN)
J
Jeff Mahoney 已提交
172 173
		btrfs_handle_error(fs_info);
}
L
liubo 已提交
174

175
#ifdef CONFIG_PRINTK
176
static const char * const logtypes[] = {
J
Jeff Mahoney 已提交
177 178 179 180 181 182 183 184 185 186
	"emergency",
	"alert",
	"critical",
	"error",
	"warning",
	"notice",
	"info",
	"debug",
};

187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202

/*
 * Use one ratelimit state per log level so that a flood of less important
 * messages doesn't cause more important ones to be dropped.
 */
static struct ratelimit_state printk_limits[] = {
	RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
};

203
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
J
Jeff Mahoney 已提交
204 205 206 207 208 209
{
	struct super_block *sb = fs_info->sb;
	char lvl[4];
	struct va_format vaf;
	va_list args;
	const char *type = logtypes[4];
210
	int kern_level;
211
	struct ratelimit_state *ratelimit;
J
Jeff Mahoney 已提交
212 213 214

	va_start(args, fmt);

215 216 217 218 219 220 221
	kern_level = printk_get_level(fmt);
	if (kern_level) {
		size_t size = printk_skip_level(fmt) - fmt;
		memcpy(lvl, fmt,  size);
		lvl[size] = '\0';
		fmt += size;
		type = logtypes[kern_level - '0'];
222 223
		ratelimit = &printk_limits[kern_level - '0'];
	} else {
J
Jeff Mahoney 已提交
224
		*lvl = '\0';
225 226 227
		/* Default to debug output */
		ratelimit = &printk_limits[7];
	}
J
Jeff Mahoney 已提交
228 229 230

	vaf.fmt = fmt;
	vaf.va = &args;
231

232 233
	if (__ratelimit(ratelimit))
		printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
234 235 236 237

	va_end(args);
}
#endif
L
liubo 已提交
238

239 240 241 242 243 244 245 246 247 248 249 250 251
/*
 * We only mark the transaction aborted and then set the file system read-only.
 * This will prevent new transactions from starting or trying to join this
 * one.
 *
 * This means that error recovery at the call site is limited to freeing
 * any local memory allocations and passing the error code up without
 * further cleanup. The transaction should complete as it normally would
 * in the call path but will return -EIO.
 *
 * We'll complete the cleanup in btrfs_end_transaction and
 * btrfs_commit_transaction.
 */
252
__cold
253
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
254
			       const char *function,
255 256
			       unsigned int line, int errno)
{
257 258
	struct btrfs_fs_info *fs_info = trans->fs_info;

259 260 261
	trans->aborted = errno;
	/* Nothing used. The other threads that have joined this
	 * transaction may be able to continue. */
262
	if (!trans->dirty && list_empty(&trans->new_bgs)) {
263 264
		const char *errstr;

265
		errstr = btrfs_decode_error(errno);
266
		btrfs_warn(fs_info,
267 268
		           "%s:%d: Aborting unused transaction(%s).",
		           function, line, errstr);
L
liubo 已提交
269
		return;
270
	}
271
	ACCESS_ONCE(trans->transaction->aborted) = errno;
272
	/* Wake up anybody who may be waiting on this transaction */
273 274 275
	wake_up(&fs_info->transaction_wait);
	wake_up(&fs_info->transaction_blocked_wait);
	__btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
276
}
J
Jeff Mahoney 已提交
277 278 279 280
/*
 * __btrfs_panic decodes unexpected, fatal errors from the caller,
 * issues an alert, and either panics or BUGs, depending on mount options.
 */
281
__cold
J
Jeff Mahoney 已提交
282 283 284 285 286 287 288
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
		   unsigned int line, int errno, const char *fmt, ...)
{
	char *s_id = "<unknown>";
	const char *errstr;
	struct va_format vaf = { .fmt = fmt };
	va_list args;
L
liubo 已提交
289

J
Jeff Mahoney 已提交
290 291
	if (fs_info)
		s_id = fs_info->sb->s_id;
L
liubo 已提交
292

J
Jeff Mahoney 已提交
293 294 295
	va_start(args, fmt);
	vaf.va = &args;

296
	errstr = btrfs_decode_error(errno);
297
	if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
298 299
		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
			s_id, function, line, &vaf, errno, errstr);
J
Jeff Mahoney 已提交
300

301 302
	btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
		   function, line, &vaf, errno, errstr);
J
Jeff Mahoney 已提交
303 304
	va_end(args);
	/* Caller calls BUG() */
L
liubo 已提交
305 306
}

C
Chris Mason 已提交
307
static void btrfs_put_super(struct super_block *sb)
C
Chris Mason 已提交
308
{
309
	close_ctree(btrfs_sb(sb)->tree_root);
C
Chris Mason 已提交
310 311
}

312
enum {
313
	Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
314 315
	Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
	Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
316 317
	Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
318 319 320 321 322
	Opt_space_cache, Opt_space_cache_version, Opt_clear_cache,
	Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid,
	Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
	Opt_skip_balance, Opt_check_integrity,
	Opt_check_integrity_including_extent_data,
323
	Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
Q
Qu Wenruo 已提交
324
	Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
Q
Qu Wenruo 已提交
325
	Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
326
	Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot,
327
	Opt_nologreplay, Opt_norecovery,
328 329 330
#ifdef CONFIG_BTRFS_DEBUG
	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
#endif
331
	Opt_err,
332 333
};

D
David Sterba 已提交
334
static const match_table_t tokens = {
335
	{Opt_degraded, "degraded"},
336
	{Opt_subvol, "subvol=%s"},
337
	{Opt_subvolid, "subvolid=%s"},
338
	{Opt_device, "device=%s"},
339
	{Opt_nodatasum, "nodatasum"},
Q
Qu Wenruo 已提交
340
	{Opt_datasum, "datasum"},
341
	{Opt_nodatacow, "nodatacow"},
Q
Qu Wenruo 已提交
342
	{Opt_datacow, "datacow"},
343
	{Opt_nobarrier, "nobarrier"},
344
	{Opt_barrier, "barrier"},
345
	{Opt_max_inline, "max_inline=%s"},
346
	{Opt_alloc_start, "alloc_start=%s"},
347
	{Opt_thread_pool, "thread_pool=%d"},
C
Chris Mason 已提交
348
	{Opt_compress, "compress"},
349
	{Opt_compress_type, "compress=%s"},
C
Chris Mason 已提交
350
	{Opt_compress_force, "compress-force"},
351
	{Opt_compress_force_type, "compress-force=%s"},
352
	{Opt_ssd, "ssd"},
353
	{Opt_ssd_spread, "ssd_spread"},
C
Chris Mason 已提交
354
	{Opt_nossd, "nossd"},
Q
Qu Wenruo 已提交
355
	{Opt_acl, "acl"},
J
Josef Bacik 已提交
356
	{Opt_noacl, "noacl"},
S
Sage Weil 已提交
357
	{Opt_notreelog, "notreelog"},
Q
Qu Wenruo 已提交
358
	{Opt_treelog, "treelog"},
359
	{Opt_nologreplay, "nologreplay"},
360
	{Opt_norecovery, "norecovery"},
361
	{Opt_flushoncommit, "flushoncommit"},
362
	{Opt_noflushoncommit, "noflushoncommit"},
363
	{Opt_ratio, "metadata_ratio=%d"},
C
Christoph Hellwig 已提交
364
	{Opt_discard, "discard"},
Q
Qu Wenruo 已提交
365
	{Opt_nodiscard, "nodiscard"},
366
	{Opt_space_cache, "space_cache"},
367
	{Opt_space_cache_version, "space_cache=%s"},
368
	{Opt_clear_cache, "clear_cache"},
369
	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
370
	{Opt_enospc_debug, "enospc_debug"},
371
	{Opt_noenospc_debug, "noenospc_debug"},
372
	{Opt_subvolrootid, "subvolrootid=%d"},
C
Chris Mason 已提交
373
	{Opt_defrag, "autodefrag"},
374
	{Opt_nodefrag, "noautodefrag"},
C
Chris Mason 已提交
375
	{Opt_inode_cache, "inode_cache"},
376
	{Opt_noinode_cache, "noinode_cache"},
377
	{Opt_no_space_cache, "nospace_cache"},
378 379
	{Opt_recovery, "recovery"}, /* deprecated */
	{Opt_usebackuproot, "usebackuproot"},
380
	{Opt_skip_balance, "skip_balance"},
381 382 383
	{Opt_check_integrity, "check_int"},
	{Opt_check_integrity_including_extent_data, "check_int_data"},
	{Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
384
	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
J
Jeff Mahoney 已提交
385
	{Opt_fatal_errors, "fatal_errors=%s"},
386
	{Opt_commit_interval, "commit=%d"},
387 388 389 390 391
#ifdef CONFIG_BTRFS_DEBUG
	{Opt_fragment_data, "fragment=data"},
	{Opt_fragment_metadata, "fragment=metadata"},
	{Opt_fragment_all, "fragment=all"},
#endif
J
Josef Bacik 已提交
392
	{Opt_err, NULL},
393 394
};

395 396 397
/*
 * Regular mount options parser.  Everything that is needed only when
 * reading in a new superblock is parsed here.
398
 * XXX JDM: This needs to be cleaned up for remount.
399
 */
400 401
int btrfs_parse_options(struct btrfs_root *root, char *options,
			unsigned long new_flags)
402
{
403
	struct btrfs_fs_info *info = root->fs_info;
404
	substring_t args[MAX_OPT_ARGS];
405 406
	char *p, *num, *orig = NULL;
	u64 cache_gen;
407
	int intarg;
S
Sage Weil 已提交
408
	int ret = 0;
409 410
	char *compress_type;
	bool compress_force = false;
411 412 413
	enum btrfs_compression_type saved_compress_type;
	bool saved_compress_force;
	int no_compress = 0;
414

415
	cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
416 417 418
	if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
		btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
	else if (cache_gen)
419 420
		btrfs_set_opt(info->mount_opt, SPACE_CACHE);

421 422 423 424
	/*
	 * Even the options are empty, we still need to do extra check
	 * against new flags
	 */
425
	if (!options)
426
		goto check;
427

428 429 430 431 432 433 434 435
	/*
	 * strsep changes the string, duplicate it because parse_options
	 * gets called twice
	 */
	options = kstrdup(options, GFP_NOFS);
	if (!options)
		return -ENOMEM;

436
	orig = options;
437

438
	while ((p = strsep(&options, ",")) != NULL) {
439 440 441 442 443 444
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
445
		case Opt_degraded:
446
			btrfs_info(root->fs_info, "allowing degraded mounts");
447
			btrfs_set_opt(info->mount_opt, DEGRADED);
448
			break;
449
		case Opt_subvol:
450
		case Opt_subvolid:
451
		case Opt_subvolrootid:
452
		case Opt_device:
453
			/*
454
			 * These are parsed by btrfs_parse_early_options
455 456
			 * and can be happily ignored here.
			 */
457 458
			break;
		case Opt_nodatasum:
459
			btrfs_set_and_info(info, NODATASUM,
460
					   "setting nodatasum");
461
			break;
Q
Qu Wenruo 已提交
462
		case Opt_datasum:
463 464
			if (btrfs_test_opt(info, NODATASUM)) {
				if (btrfs_test_opt(info, NODATACOW))
465 466 467 468
					btrfs_info(root->fs_info, "setting datasum, datacow enabled");
				else
					btrfs_info(root->fs_info, "setting datasum");
			}
Q
Qu Wenruo 已提交
469 470 471
			btrfs_clear_opt(info->mount_opt, NODATACOW);
			btrfs_clear_opt(info->mount_opt, NODATASUM);
			break;
472
		case Opt_nodatacow:
473 474 475
			if (!btrfs_test_opt(info, NODATACOW)) {
				if (!btrfs_test_opt(info, COMPRESS) ||
				    !btrfs_test_opt(info, FORCE_COMPRESS)) {
476
					btrfs_info(root->fs_info,
477 478 479 480
						   "setting nodatacow, compression disabled");
				} else {
					btrfs_info(root->fs_info, "setting nodatacow");
				}
481 482 483
			}
			btrfs_clear_opt(info->mount_opt, COMPRESS);
			btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
484 485
			btrfs_set_opt(info->mount_opt, NODATACOW);
			btrfs_set_opt(info->mount_opt, NODATASUM);
486
			break;
Q
Qu Wenruo 已提交
487
		case Opt_datacow:
488
			btrfs_clear_and_info(info, NODATACOW,
489
					     "setting datacow");
Q
Qu Wenruo 已提交
490
			break;
C
Chris Mason 已提交
491
		case Opt_compress_force:
492 493
		case Opt_compress_force_type:
			compress_force = true;
494
			/* Fallthrough */
495 496
		case Opt_compress:
		case Opt_compress_type:
497 498
			saved_compress_type = btrfs_test_opt(info,
							     COMPRESS) ?
499 500
				info->compress_type : BTRFS_COMPRESS_NONE;
			saved_compress_force =
501
				btrfs_test_opt(info, FORCE_COMPRESS);
502 503 504 505 506
			if (token == Opt_compress ||
			    token == Opt_compress_force ||
			    strcmp(args[0].from, "zlib") == 0) {
				compress_type = "zlib";
				info->compress_type = BTRFS_COMPRESS_ZLIB;
507
				btrfs_set_opt(info->mount_opt, COMPRESS);
508 509
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
510
				no_compress = 0;
L
Li Zefan 已提交
511 512 513
			} else if (strcmp(args[0].from, "lzo") == 0) {
				compress_type = "lzo";
				info->compress_type = BTRFS_COMPRESS_LZO;
514
				btrfs_set_opt(info->mount_opt, COMPRESS);
515 516
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
517
				btrfs_set_fs_incompat(info, COMPRESS_LZO);
518
				no_compress = 0;
519 520 521 522 523
			} else if (strncmp(args[0].from, "no", 2) == 0) {
				compress_type = "no";
				btrfs_clear_opt(info->mount_opt, COMPRESS);
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				compress_force = false;
524
				no_compress++;
525 526 527 528 529 530
			} else {
				ret = -EINVAL;
				goto out;
			}

			if (compress_force) {
531
				btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
532
			} else {
533 534 535 536 537 538 539
				/*
				 * If we remount from compress-force=xxx to
				 * compress=xxx, we need clear FORCE_COMPRESS
				 * flag, otherwise, there is no way for users
				 * to disable forcible compression separately.
				 */
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
540
			}
541
			if ((btrfs_test_opt(info, COMPRESS) &&
542 543
			     (info->compress_type != saved_compress_type ||
			      compress_force != saved_compress_force)) ||
544
			    (!btrfs_test_opt(info, COMPRESS) &&
545 546 547 548 549 550 551
			     no_compress == 1)) {
				btrfs_info(root->fs_info,
					   "%s %s compression",
					   (compress_force) ? "force" : "use",
					   compress_type);
			}
			compress_force = false;
C
Chris Mason 已提交
552
			break;
553
		case Opt_ssd:
554
			btrfs_set_and_info(info, SSD,
555
					   "use ssd allocation scheme");
556
			break;
557
		case Opt_ssd_spread:
558
			btrfs_set_and_info(info, SSD_SPREAD,
559
					   "use spread ssd allocation scheme");
560
			btrfs_set_opt(info->mount_opt, SSD);
561
			break;
C
Chris Mason 已提交
562
		case Opt_nossd:
563
			btrfs_set_and_info(info, NOSSD,
564
					     "not using ssd allocation scheme");
C
Chris Mason 已提交
565 566
			btrfs_clear_opt(info->mount_opt, SSD);
			break;
567
		case Opt_barrier:
568
			btrfs_clear_and_info(info, NOBARRIER,
569
					     "turning on barriers");
570
			break;
571
		case Opt_nobarrier:
572
			btrfs_set_and_info(info, NOBARRIER,
573
					   "turning off barriers");
574
			break;
575
		case Opt_thread_pool:
576 577 578 579
			ret = match_int(&args[0], &intarg);
			if (ret) {
				goto out;
			} else if (intarg > 0) {
580
				info->thread_pool_size = intarg;
581 582 583 584
			} else {
				ret = -EINVAL;
				goto out;
			}
585
			break;
586
		case Opt_max_inline:
587 588
			num = match_strdup(&args[0]);
			if (num) {
A
Akinobu Mita 已提交
589
				info->max_inline = memparse(num, NULL);
590 591
				kfree(num);

C
Chris Mason 已提交
592
				if (info->max_inline) {
593
					info->max_inline = min_t(u64,
C
Chris Mason 已提交
594 595 596
						info->max_inline,
						root->sectorsize);
				}
597
				btrfs_info(root->fs_info, "max_inline at %llu",
598
					info->max_inline);
599 600 601
			} else {
				ret = -ENOMEM;
				goto out;
602 603
			}
			break;
604
		case Opt_alloc_start:
605 606
			num = match_strdup(&args[0]);
			if (num) {
M
Miao Xie 已提交
607
				mutex_lock(&info->chunk_mutex);
A
Akinobu Mita 已提交
608
				info->alloc_start = memparse(num, NULL);
M
Miao Xie 已提交
609
				mutex_unlock(&info->chunk_mutex);
610
				kfree(num);
611
				btrfs_info(root->fs_info, "allocations start at %llu",
612
					info->alloc_start);
613 614 615
			} else {
				ret = -ENOMEM;
				goto out;
616 617
			}
			break;
Q
Qu Wenruo 已提交
618
		case Opt_acl:
619
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
Q
Qu Wenruo 已提交
620 621
			root->fs_info->sb->s_flags |= MS_POSIXACL;
			break;
622 623 624 625 626 627
#else
			btrfs_err(root->fs_info,
				"support for ACL not compiled in!");
			ret = -EINVAL;
			goto out;
#endif
J
Josef Bacik 已提交
628 629 630
		case Opt_noacl:
			root->fs_info->sb->s_flags &= ~MS_POSIXACL;
			break;
S
Sage Weil 已提交
631
		case Opt_notreelog:
632
			btrfs_set_and_info(info, NOTREELOG,
633
					   "disabling tree log");
Q
Qu Wenruo 已提交
634 635
			break;
		case Opt_treelog:
636
			btrfs_clear_and_info(info, NOTREELOG,
637
					     "enabling tree log");
S
Sage Weil 已提交
638
			break;
639
		case Opt_norecovery:
640
		case Opt_nologreplay:
641
			btrfs_set_and_info(info, NOLOGREPLAY,
642 643
					   "disabling log replay at mount time");
			break;
644
		case Opt_flushoncommit:
645
			btrfs_set_and_info(info, FLUSHONCOMMIT,
646
					   "turning on flush-on-commit");
647
			break;
648
		case Opt_noflushoncommit:
649
			btrfs_clear_and_info(info, FLUSHONCOMMIT,
650
					     "turning off flush-on-commit");
651
			break;
652
		case Opt_ratio:
653 654 655 656
			ret = match_int(&args[0], &intarg);
			if (ret) {
				goto out;
			} else if (intarg >= 0) {
657
				info->metadata_ratio = intarg;
658
				btrfs_info(root->fs_info, "metadata ratio %d",
659
				       info->metadata_ratio);
660 661 662
			} else {
				ret = -EINVAL;
				goto out;
663 664
			}
			break;
C
Christoph Hellwig 已提交
665
		case Opt_discard:
666
			btrfs_set_and_info(info, DISCARD,
667
					   "turning on discard");
C
Christoph Hellwig 已提交
668
			break;
Q
Qu Wenruo 已提交
669
		case Opt_nodiscard:
670
			btrfs_clear_and_info(info, DISCARD,
671
					     "turning off discard");
Q
Qu Wenruo 已提交
672
			break;
673
		case Opt_space_cache:
674 675 676 677 678
		case Opt_space_cache_version:
			if (token == Opt_space_cache ||
			    strcmp(args[0].from, "v1") == 0) {
				btrfs_clear_opt(root->fs_info->mount_opt,
						FREE_SPACE_TREE);
679
				btrfs_set_and_info(info, SPACE_CACHE,
680 681 682 683
						   "enabling disk space caching");
			} else if (strcmp(args[0].from, "v2") == 0) {
				btrfs_clear_opt(root->fs_info->mount_opt,
						SPACE_CACHE);
684 685
				btrfs_set_and_info(info,
						   FREE_SPACE_TREE,
686 687 688 689 690
						   "enabling free space tree");
			} else {
				ret = -EINVAL;
				goto out;
			}
691
			break;
692 693 694
		case Opt_rescan_uuid_tree:
			btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
			break;
695
		case Opt_no_space_cache:
696 697 698
			if (btrfs_test_opt(info, SPACE_CACHE)) {
				btrfs_clear_and_info(info,
						     SPACE_CACHE,
699 700
						     "disabling disk space caching");
			}
701 702 703
			if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
				btrfs_clear_and_info(info,
						     FREE_SPACE_TREE,
704 705
						     "disabling free space tree");
			}
706
			break;
C
Chris Mason 已提交
707
		case Opt_inode_cache:
708
			btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
709
					   "enabling inode map caching");
710 711
			break;
		case Opt_noinode_cache:
712
			btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
713
					     "disabling inode map caching");
C
Chris Mason 已提交
714
			break;
715
		case Opt_clear_cache:
716
			btrfs_set_and_info(info, CLEAR_CACHE,
717
					   "force clearing of disk cache");
718
			break;
719 720 721
		case Opt_user_subvol_rm_allowed:
			btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
			break;
722 723 724
		case Opt_enospc_debug:
			btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
725 726 727
		case Opt_noenospc_debug:
			btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
C
Chris Mason 已提交
728
		case Opt_defrag:
729
			btrfs_set_and_info(info, AUTO_DEFRAG,
730
					   "enabling auto defrag");
C
Chris Mason 已提交
731
			break;
732
		case Opt_nodefrag:
733
			btrfs_clear_and_info(info, AUTO_DEFRAG,
734
					     "disabling auto defrag");
735
			break;
C
Chris Mason 已提交
736
		case Opt_recovery:
737 738 739 740 741 742
			btrfs_warn(root->fs_info,
				   "'recovery' is deprecated, use 'usebackuproot' instead");
		case Opt_usebackuproot:
			btrfs_info(root->fs_info,
				   "trying to use backup root at mount time");
			btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
C
Chris Mason 已提交
743
			break;
744 745 746
		case Opt_skip_balance:
			btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
			break;
747 748
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
		case Opt_check_integrity_including_extent_data:
749 750
			btrfs_info(root->fs_info,
				   "enabling check integrity including extent data");
751 752 753 754 755
			btrfs_set_opt(info->mount_opt,
				      CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity:
756
			btrfs_info(root->fs_info, "enabling check integrity");
757 758 759
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity_print_mask:
760 761 762 763
			ret = match_int(&args[0], &intarg);
			if (ret) {
				goto out;
			} else if (intarg >= 0) {
764
				info->check_integrity_print_mask = intarg;
765
				btrfs_info(root->fs_info, "check_integrity_print_mask 0x%x",
766
				       info->check_integrity_print_mask);
767 768 769
			} else {
				ret = -EINVAL;
				goto out;
770 771 772 773 774 775
			}
			break;
#else
		case Opt_check_integrity_including_extent_data:
		case Opt_check_integrity:
		case Opt_check_integrity_print_mask:
776 777
			btrfs_err(root->fs_info,
				"support for check_integrity* not compiled in!");
778 779 780
			ret = -EINVAL;
			goto out;
#endif
J
Jeff Mahoney 已提交
781 782 783 784 785 786 787 788 789 790 791 792
		case Opt_fatal_errors:
			if (strcmp(args[0].from, "panic") == 0)
				btrfs_set_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else if (strcmp(args[0].from, "bug") == 0)
				btrfs_clear_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else {
				ret = -EINVAL;
				goto out;
			}
			break;
793 794 795 796
		case Opt_commit_interval:
			intarg = 0;
			ret = match_int(&args[0], &intarg);
			if (ret < 0) {
797
				btrfs_err(root->fs_info, "invalid commit interval");
798 799 800 801 802
				ret = -EINVAL;
				goto out;
			}
			if (intarg > 0) {
				if (intarg > 300) {
803
					btrfs_warn(root->fs_info, "excessive commit interval %d",
804 805 806 807
							intarg);
				}
				info->commit_interval = intarg;
			} else {
808
				btrfs_info(root->fs_info, "using default commit interval %ds",
809 810 811 812
				    BTRFS_DEFAULT_COMMIT_INTERVAL);
				info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
			}
			break;
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
#ifdef CONFIG_BTRFS_DEBUG
		case Opt_fragment_all:
			btrfs_info(root->fs_info, "fragmenting all space");
			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
			break;
		case Opt_fragment_metadata:
			btrfs_info(root->fs_info, "fragmenting metadata");
			btrfs_set_opt(info->mount_opt,
				      FRAGMENT_METADATA);
			break;
		case Opt_fragment_data:
			btrfs_info(root->fs_info, "fragmenting data");
			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			break;
#endif
S
Sage Weil 已提交
829
		case Opt_err:
830
			btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
S
Sage Weil 已提交
831 832
			ret = -EINVAL;
			goto out;
833
		default:
834
			break;
835 836
		}
	}
837 838 839 840
check:
	/*
	 * Extra check for current option against current flag
	 */
841
	if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
842 843 844 845
		btrfs_err(root->fs_info,
			  "nologreplay must be used with ro mount option");
		ret = -EINVAL;
	}
S
Sage Weil 已提交
846
out:
847
	if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&
848 849
	    !btrfs_test_opt(info, FREE_SPACE_TREE) &&
	    !btrfs_test_opt(info, CLEAR_CACHE)) {
850 851 852 853
		btrfs_err(root->fs_info, "cannot disable free space tree");
		ret = -EINVAL;

	}
854
	if (!ret && btrfs_test_opt(info, SPACE_CACHE))
855
		btrfs_info(root->fs_info, "disk space caching is enabled");
856
	if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
857
		btrfs_info(root->fs_info, "using free space tree");
858
	kfree(orig);
S
Sage Weil 已提交
859
	return ret;
860 861 862 863 864 865 866 867
}

/*
 * Parse mount options that are required early in the mount process.
 *
 * All other options will be parsed on much later in the mount process and
 * only when we need to allocate a new super block.
 */
868
static int btrfs_parse_early_options(const char *options, fmode_t flags,
869
		void *holder, char **subvol_name, u64 *subvol_objectid,
870
		struct btrfs_fs_devices **fs_devices)
871 872
{
	substring_t args[MAX_OPT_ARGS];
873
	char *device_name, *opts, *orig, *p;
874
	char *num = NULL;
875 876 877
	int error = 0;

	if (!options)
878
		return 0;
879 880 881 882 883 884 885 886

	/*
	 * strsep changes the string, duplicate it because parse_options
	 * gets called twice
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
887
	orig = opts;
888 889 890 891 892 893 894 895 896

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
		case Opt_subvol:
897
			kfree(*subvol_name);
898
			*subvol_name = match_strdup(&args[0]);
899 900 901 902
			if (!*subvol_name) {
				error = -ENOMEM;
				goto out;
			}
903
			break;
904
		case Opt_subvolid:
905 906 907 908
			num = match_strdup(&args[0]);
			if (num) {
				*subvol_objectid = memparse(num, NULL);
				kfree(num);
909
				/* we want the original fs_tree */
910
				if (!*subvol_objectid)
911 912
					*subvol_objectid =
						BTRFS_FS_TREE_OBJECTID;
913 914 915
			} else {
				error = -EINVAL;
				goto out;
916
			}
917
			break;
918
		case Opt_subvolrootid:
919
			printk(KERN_WARNING
920 921
				"BTRFS: 'subvolrootid' mount option is deprecated and has "
				"no effect\n");
922
			break;
923
		case Opt_device:
924 925 926 927 928 929
			device_name = match_strdup(&args[0]);
			if (!device_name) {
				error = -ENOMEM;
				goto out;
			}
			error = btrfs_scan_one_device(device_name,
930
					flags, holder, fs_devices);
931
			kfree(device_name);
932
			if (error)
933
				goto out;
934
			break;
935 936 937 938 939
		default:
			break;
		}
	}

940
out:
941
	kfree(orig);
942
	return error;
943 944
}

945 946
static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
					   u64 subvol_objectid)
947
{
948
	struct btrfs_root *root = fs_info->tree_root;
949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
	struct btrfs_root *fs_root;
	struct btrfs_root_ref *root_ref;
	struct btrfs_inode_ref *inode_ref;
	struct btrfs_key key;
	struct btrfs_path *path = NULL;
	char *name = NULL, *ptr;
	u64 dirid;
	int len;
	int ret;

	path = btrfs_alloc_path();
	if (!path) {
		ret = -ENOMEM;
		goto err;
	}
	path->leave_spinning = 1;

	name = kmalloc(PATH_MAX, GFP_NOFS);
	if (!name) {
		ret = -ENOMEM;
		goto err;
	}
	ptr = name + PATH_MAX - 1;
	ptr[0] = '\0';
973 974

	/*
975 976
	 * Walk up the subvolume trees in the tree of tree roots by root
	 * backrefs until we hit the top-level subvolume.
977
	 */
978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
	while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
		key.objectid = subvol_objectid;
		key.type = BTRFS_ROOT_BACKREF_KEY;
		key.offset = (u64)-1;

		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
		if (ret < 0) {
			goto err;
		} else if (ret > 0) {
			ret = btrfs_previous_item(root, path, subvol_objectid,
						  BTRFS_ROOT_BACKREF_KEY);
			if (ret < 0) {
				goto err;
			} else if (ret > 0) {
				ret = -ENOENT;
				goto err;
			}
		}

		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
		subvol_objectid = key.offset;

		root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
					  struct btrfs_root_ref);
		len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
		ptr -= len + 1;
		if (ptr < name) {
			ret = -ENAMETOOLONG;
			goto err;
		}
		read_extent_buffer(path->nodes[0], ptr + 1,
				   (unsigned long)(root_ref + 1), len);
		ptr[0] = '/';
		dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
		btrfs_release_path(path);

		key.objectid = subvol_objectid;
		key.type = BTRFS_ROOT_ITEM_KEY;
		key.offset = (u64)-1;
		fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
		if (IS_ERR(fs_root)) {
			ret = PTR_ERR(fs_root);
			goto err;
		}

		/*
		 * Walk up the filesystem tree by inode refs until we hit the
		 * root directory.
		 */
		while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
			key.objectid = dirid;
			key.type = BTRFS_INODE_REF_KEY;
			key.offset = (u64)-1;

			ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
			if (ret < 0) {
				goto err;
			} else if (ret > 0) {
				ret = btrfs_previous_item(fs_root, path, dirid,
							  BTRFS_INODE_REF_KEY);
				if (ret < 0) {
					goto err;
				} else if (ret > 0) {
					ret = -ENOENT;
					goto err;
				}
			}

			btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
			dirid = key.offset;

			inode_ref = btrfs_item_ptr(path->nodes[0],
						   path->slots[0],
						   struct btrfs_inode_ref);
			len = btrfs_inode_ref_name_len(path->nodes[0],
						       inode_ref);
			ptr -= len + 1;
			if (ptr < name) {
				ret = -ENAMETOOLONG;
				goto err;
			}
			read_extent_buffer(path->nodes[0], ptr + 1,
					   (unsigned long)(inode_ref + 1), len);
			ptr[0] = '/';
			btrfs_release_path(path);
		}
1064 1065
	}

1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
	btrfs_free_path(path);
	if (ptr == name + PATH_MAX - 1) {
		name[0] = '/';
		name[1] = '\0';
	} else {
		memmove(name, ptr, name + PATH_MAX - ptr);
	}
	return name;

err:
	btrfs_free_path(path);
	kfree(name);
	return ERR_PTR(ret);
}

static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
{
	struct btrfs_root *root = fs_info->tree_root;
	struct btrfs_dir_item *di;
	struct btrfs_path *path;
	struct btrfs_key location;
	u64 dir_id;

1089 1090
	path = btrfs_alloc_path();
	if (!path)
1091
		return -ENOMEM;
1092 1093 1094 1095 1096 1097 1098
	path->leave_spinning = 1;

	/*
	 * Find the "default" dir item which points to the root item that we
	 * will mount by default if we haven't been given a specific subvolume
	 * to mount.
	 */
1099
	dir_id = btrfs_super_root_dir(fs_info->super_copy);
1100
	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
1101 1102
	if (IS_ERR(di)) {
		btrfs_free_path(path);
1103
		return PTR_ERR(di);
1104
	}
1105 1106 1107 1108
	if (!di) {
		/*
		 * Ok the default dir item isn't there.  This is weird since
		 * it's always been there, but don't freak out, just try and
1109
		 * mount the top-level subvolume.
1110 1111
		 */
		btrfs_free_path(path);
1112 1113
		*objectid = BTRFS_FS_TREE_OBJECTID;
		return 0;
1114 1115 1116 1117
	}

	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
	btrfs_free_path(path);
1118 1119
	*objectid = location.objectid;
	return 0;
1120 1121
}

C
Chris Mason 已提交
1122
static int btrfs_fill_super(struct super_block *sb,
1123
			    struct btrfs_fs_devices *fs_devices,
C
Chris Mason 已提交
1124
			    void *data, int silent)
C
Chris Mason 已提交
1125
{
C
Chris Mason 已提交
1126
	struct inode *inode;
1127
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1128
	struct btrfs_key key;
C
Chris Mason 已提交
1129
	int err;
1130

C
Chris Mason 已提交
1131 1132 1133
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_magic = BTRFS_SUPER_MAGIC;
	sb->s_op = &btrfs_super_ops;
A
Al Viro 已提交
1134
	sb->s_d_op = &btrfs_dentry_operations;
B
Balaji Rao 已提交
1135
	sb->s_export_op = &btrfs_export_ops;
J
Josef Bacik 已提交
1136
	sb->s_xattr = btrfs_xattr_handlers;
C
Chris Mason 已提交
1137
	sb->s_time_gran = 1;
C
Chris Mason 已提交
1138
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
J
Josef Bacik 已提交
1139
	sb->s_flags |= MS_POSIXACL;
1140
#endif
1141
	sb->s_flags |= MS_I_VERSION;
1142
	sb->s_iflags |= SB_I_CGROUPWB;
A
Al Viro 已提交
1143 1144
	err = open_ctree(sb, fs_devices, (char *)data);
	if (err) {
1145
		printk(KERN_ERR "BTRFS: open_ctree failed\n");
A
Al Viro 已提交
1146
		return err;
1147 1148
	}

1149 1150 1151
	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
	key.type = BTRFS_INODE_ITEM_KEY;
	key.offset = 0;
1152
	inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
1153 1154
	if (IS_ERR(inode)) {
		err = PTR_ERR(inode);
C
Chris Mason 已提交
1155
		goto fail_close;
C
Chris Mason 已提交
1156 1157
	}

1158 1159
	sb->s_root = d_make_root(inode);
	if (!sb->s_root) {
C
Chris Mason 已提交
1160 1161
		err = -ENOMEM;
		goto fail_close;
C
Chris Mason 已提交
1162
	}
1163

C
Chris Mason 已提交
1164
	save_mount_options(sb, data);
D
Dan Magenheimer 已提交
1165
	cleancache_init_fs(sb);
1166
	sb->s_flags |= MS_ACTIVE;
C
Chris Mason 已提交
1167
	return 0;
C
Chris Mason 已提交
1168 1169

fail_close:
1170
	close_ctree(fs_info->tree_root);
C
Chris Mason 已提交
1171
	return err;
C
Chris Mason 已提交
1172 1173
}

S
Sage Weil 已提交
1174
int btrfs_sync_fs(struct super_block *sb, int wait)
C
Chris Mason 已提交
1175 1176
{
	struct btrfs_trans_handle *trans;
1177 1178
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
C
Chris Mason 已提交
1179

1180
	trace_btrfs_sync_fs(fs_info, wait);
1181

C
Chris Mason 已提交
1182
	if (!wait) {
1183
		filemap_flush(fs_info->btree_inode->i_mapping);
C
Chris Mason 已提交
1184 1185
		return 0;
	}
1186

1187
	btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
1188

M
Miao Xie 已提交
1189
	trans = btrfs_attach_transaction_barrier(root);
1190
	if (IS_ERR(trans)) {
1191
		/* no transaction, don't bother */
1192 1193 1194 1195 1196 1197 1198
		if (PTR_ERR(trans) == -ENOENT) {
			/*
			 * Exit unless we have some pending changes
			 * that need to go through commit
			 */
			if (fs_info->pending_changes == 0)
				return 0;
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
			/*
			 * A non-blocking test if the fs is frozen. We must not
			 * start a new transaction here otherwise a deadlock
			 * happens. The pending operations are delayed to the
			 * next commit after thawing.
			 */
			if (__sb_start_write(sb, SB_FREEZE_WRITE, false))
				__sb_end_write(sb, SB_FREEZE_WRITE);
			else
				return 0;
1209 1210
			trans = btrfs_start_transaction(root, 0);
		}
1211 1212
		if (IS_ERR(trans))
			return PTR_ERR(trans);
1213
	}
1214
	return btrfs_commit_transaction(trans, root);
C
Chris Mason 已提交
1215 1216
}

1217
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
E
Eric Paris 已提交
1218
{
1219 1220
	struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
	struct btrfs_root *root = info->tree_root;
T
Tsutomu Itoh 已提交
1221
	char *compress_type;
E
Eric Paris 已提交
1222

1223
	if (btrfs_test_opt(info, DEGRADED))
E
Eric Paris 已提交
1224
		seq_puts(seq, ",degraded");
1225
	if (btrfs_test_opt(info, NODATASUM))
E
Eric Paris 已提交
1226
		seq_puts(seq, ",nodatasum");
1227
	if (btrfs_test_opt(info, NODATACOW))
E
Eric Paris 已提交
1228
		seq_puts(seq, ",nodatacow");
1229
	if (btrfs_test_opt(info, NOBARRIER))
E
Eric Paris 已提交
1230
		seq_puts(seq, ",nobarrier");
1231
	if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
1232
		seq_printf(seq, ",max_inline=%llu", info->max_inline);
E
Eric Paris 已提交
1233
	if (info->alloc_start != 0)
1234
		seq_printf(seq, ",alloc_start=%llu", info->alloc_start);
E
Eric Paris 已提交
1235 1236 1237
	if (info->thread_pool_size !=  min_t(unsigned long,
					     num_online_cpus() + 2, 8))
		seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
1238
	if (btrfs_test_opt(info, COMPRESS)) {
T
Tsutomu Itoh 已提交
1239 1240 1241 1242
		if (info->compress_type == BTRFS_COMPRESS_ZLIB)
			compress_type = "zlib";
		else
			compress_type = "lzo";
1243
		if (btrfs_test_opt(info, FORCE_COMPRESS))
T
Tsutomu Itoh 已提交
1244 1245 1246 1247
			seq_printf(seq, ",compress-force=%s", compress_type);
		else
			seq_printf(seq, ",compress=%s", compress_type);
	}
1248
	if (btrfs_test_opt(info, NOSSD))
C
Chris Mason 已提交
1249
		seq_puts(seq, ",nossd");
1250
	if (btrfs_test_opt(info, SSD_SPREAD))
1251
		seq_puts(seq, ",ssd_spread");
1252
	else if (btrfs_test_opt(info, SSD))
E
Eric Paris 已提交
1253
		seq_puts(seq, ",ssd");
1254
	if (btrfs_test_opt(info, NOTREELOG))
1255
		seq_puts(seq, ",notreelog");
1256
	if (btrfs_test_opt(info, NOLOGREPLAY))
1257
		seq_puts(seq, ",nologreplay");
1258
	if (btrfs_test_opt(info, FLUSHONCOMMIT))
1259
		seq_puts(seq, ",flushoncommit");
1260
	if (btrfs_test_opt(info, DISCARD))
1261
		seq_puts(seq, ",discard");
E
Eric Paris 已提交
1262 1263
	if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
		seq_puts(seq, ",noacl");
1264
	if (btrfs_test_opt(info, SPACE_CACHE))
T
Tsutomu Itoh 已提交
1265
		seq_puts(seq, ",space_cache");
1266
	else if (btrfs_test_opt(info, FREE_SPACE_TREE))
1267
		seq_puts(seq, ",space_cache=v2");
1268
	else
1269
		seq_puts(seq, ",nospace_cache");
1270
	if (btrfs_test_opt(info, RESCAN_UUID_TREE))
1271
		seq_puts(seq, ",rescan_uuid_tree");
1272
	if (btrfs_test_opt(info, CLEAR_CACHE))
T
Tsutomu Itoh 已提交
1273
		seq_puts(seq, ",clear_cache");
1274
	if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
T
Tsutomu Itoh 已提交
1275
		seq_puts(seq, ",user_subvol_rm_allowed");
1276
	if (btrfs_test_opt(info, ENOSPC_DEBUG))
1277
		seq_puts(seq, ",enospc_debug");
1278
	if (btrfs_test_opt(info, AUTO_DEFRAG))
1279
		seq_puts(seq, ",autodefrag");
1280
	if (btrfs_test_opt(info, INODE_MAP_CACHE))
1281
		seq_puts(seq, ",inode_cache");
1282
	if (btrfs_test_opt(info, SKIP_BALANCE))
1283
		seq_puts(seq, ",skip_balance");
1284
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1285
	if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
1286
		seq_puts(seq, ",check_int_data");
1287
	else if (btrfs_test_opt(info, CHECK_INTEGRITY))
1288 1289 1290 1291 1292 1293 1294 1295
		seq_puts(seq, ",check_int");
	if (info->check_integrity_print_mask)
		seq_printf(seq, ",check_int_print_mask=%d",
				info->check_integrity_print_mask);
#endif
	if (info->metadata_ratio)
		seq_printf(seq, ",metadata_ratio=%d",
				info->metadata_ratio);
1296
	if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
J
Jeff Mahoney 已提交
1297
		seq_puts(seq, ",fatal_errors=panic");
1298 1299
	if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
		seq_printf(seq, ",commit=%d", info->commit_interval);
1300
#ifdef CONFIG_BTRFS_DEBUG
1301
	if (btrfs_test_opt(info, FRAGMENT_DATA))
1302
		seq_puts(seq, ",fragment=data");
1303
	if (btrfs_test_opt(info, FRAGMENT_METADATA))
1304 1305
		seq_puts(seq, ",fragment=metadata");
#endif
1306 1307 1308 1309
	seq_printf(seq, ",subvolid=%llu",
		  BTRFS_I(d_inode(dentry))->root->root_key.objectid);
	seq_puts(seq, ",subvol=");
	seq_dentry(seq, dentry, " \t\n\\");
E
Eric Paris 已提交
1310 1311 1312
	return 0;
}

1313
static int btrfs_test_super(struct super_block *s, void *data)
Y
Yan 已提交
1314
{
1315 1316
	struct btrfs_fs_info *p = data;
	struct btrfs_fs_info *fs_info = btrfs_sb(s);
Y
Yan 已提交
1317

1318
	return fs_info->fs_devices == p->fs_devices;
Y
Yan 已提交
1319 1320
}

1321 1322
static int btrfs_set_super(struct super_block *s, void *data)
{
A
Al Viro 已提交
1323 1324 1325 1326
	int err = set_anon_super(s, data);
	if (!err)
		s->s_fs_info = data;
	return err;
Y
Yan 已提交
1327 1328
}

1329 1330 1331 1332 1333 1334 1335 1336 1337 1338
/*
 * subvolumes are identified by ino 256
 */
static inline int is_subvolume_inode(struct inode *inode)
{
	if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
		return 1;
	return 0;
}

1339
/*
1340 1341 1342
 * This will add subvolid=0 to the argument string while removing any subvol=
 * and subvolid= arguments to make sure we get the top-level root for path
 * walking to the subvol we want.
1343 1344 1345
 */
static char *setup_root_args(char *args)
{
1346
	char *buf, *dst, *sep;
1347

1348 1349
	if (!args)
		return kstrdup("subvolid=0", GFP_NOFS);
1350

1351 1352
	/* The worst case is that we add ",subvolid=0" to the end. */
	buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, GFP_NOFS);
1353
	if (!buf)
1354 1355
		return NULL;

1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367
	while (1) {
		sep = strchrnul(args, ',');
		if (!strstarts(args, "subvol=") &&
		    !strstarts(args, "subvolid=")) {
			memcpy(dst, args, sep - args);
			dst += sep - args;
			*dst++ = ',';
		}
		if (*sep)
			args = sep + 1;
		else
			break;
1368
	}
1369
	strcpy(dst, "subvolid=0");
1370

1371
	return buf;
1372 1373
}

1374 1375 1376
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
				   int flags, const char *device_name,
				   char *data)
1377 1378
{
	struct dentry *root;
1379
	struct vfsmount *mnt = NULL;
1380
	char *newargs;
1381
	int ret;
1382 1383

	newargs = setup_root_args(data);
1384 1385 1386 1387
	if (!newargs) {
		root = ERR_PTR(-ENOMEM);
		goto out;
	}
1388

1389 1390
	mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
	if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
1391
		if (flags & MS_RDONLY) {
1392 1393
			mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY,
					     device_name, newargs);
1394
		} else {
1395 1396
			mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY,
					     device_name, newargs);
1397
			if (IS_ERR(mnt)) {
1398 1399 1400
				root = ERR_CAST(mnt);
				mnt = NULL;
				goto out;
1401
			}
1402

1403
			down_write(&mnt->mnt_sb->s_umount);
1404
			ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
1405
			up_write(&mnt->mnt_sb->s_umount);
1406 1407 1408
			if (ret < 0) {
				root = ERR_PTR(ret);
				goto out;
1409 1410 1411
			}
		}
	}
1412 1413 1414 1415 1416
	if (IS_ERR(mnt)) {
		root = ERR_CAST(mnt);
		mnt = NULL;
		goto out;
	}
1417

1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436
	if (!subvol_name) {
		if (!subvol_objectid) {
			ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
							  &subvol_objectid);
			if (ret) {
				root = ERR_PTR(ret);
				goto out;
			}
		}
		subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
							    subvol_objectid);
		if (IS_ERR(subvol_name)) {
			root = ERR_CAST(subvol_name);
			subvol_name = NULL;
			goto out;
		}

	}

A
Al Viro 已提交
1437
	root = mount_subtree(mnt, subvol_name);
1438 1439
	/* mount_subtree() drops our reference on the vfsmount. */
	mnt = NULL;
1440

1441
	if (!IS_ERR(root)) {
A
Al Viro 已提交
1442
		struct super_block *s = root->d_sb;
1443 1444 1445 1446 1447 1448 1449 1450 1451 1452
		struct inode *root_inode = d_inode(root);
		u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;

		ret = 0;
		if (!is_subvolume_inode(root_inode)) {
			pr_err("BTRFS: '%s' is not a valid subvolume\n",
			       subvol_name);
			ret = -EINVAL;
		}
		if (subvol_objectid && root_objectid != subvol_objectid) {
1453 1454 1455 1456 1457
			/*
			 * This will also catch a race condition where a
			 * subvolume which was passed by ID is renamed and
			 * another subvolume is renamed over the old location.
			 */
1458 1459 1460 1461 1462 1463 1464 1465 1466
			pr_err("BTRFS: subvol '%s' does not match subvolid %llu\n",
			       subvol_name, subvol_objectid);
			ret = -EINVAL;
		}
		if (ret) {
			dput(root);
			root = ERR_PTR(ret);
			deactivate_locked_super(s);
		}
1467 1468
	}

1469 1470 1471 1472
out:
	mntput(mnt);
	kfree(newargs);
	kfree(subvol_name);
1473 1474
	return root;
}
1475

1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508
static int parse_security_options(char *orig_opts,
				  struct security_mnt_opts *sec_opts)
{
	char *secdata = NULL;
	int ret = 0;

	secdata = alloc_secdata();
	if (!secdata)
		return -ENOMEM;
	ret = security_sb_copy_data(orig_opts, secdata);
	if (ret) {
		free_secdata(secdata);
		return ret;
	}
	ret = security_sb_parse_opts_str(secdata, sec_opts);
	free_secdata(secdata);
	return ret;
}

static int setup_security_options(struct btrfs_fs_info *fs_info,
				  struct super_block *sb,
				  struct security_mnt_opts *sec_opts)
{
	int ret = 0;

	/*
	 * Call security_sb_set_mnt_opts() to check whether new sec_opts
	 * is valid.
	 */
	ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
	if (ret)
		return ret;

1509
#ifdef CONFIG_SECURITY
1510 1511 1512 1513 1514
	if (!fs_info->security_opts.num_mnt_opts) {
		/* first time security setup, copy sec_opts to fs_info */
		memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
	} else {
		/*
1515 1516 1517 1518
		 * Since SELinux (the only one supporting security_mnt_opts)
		 * does NOT support changing context during remount/mount of
		 * the same sb, this must be the same or part of the same
		 * security options, just free it.
1519 1520 1521
		 */
		security_free_mnt_opts(sec_opts);
	}
1522
#endif
1523 1524 1525
	return ret;
}

1526 1527 1528 1529 1530 1531
/*
 * Find a superblock for the given device / mount point.
 *
 * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
 *	  for multiple device setup.  Make sure to keep it in sync.
 */
A
Al Viro 已提交
1532
static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1533
		const char *device_name, void *data)
Y
Yan 已提交
1534 1535 1536
{
	struct block_device *bdev = NULL;
	struct super_block *s;
1537
	struct btrfs_fs_devices *fs_devices = NULL;
1538
	struct btrfs_fs_info *fs_info = NULL;
1539
	struct security_mnt_opts new_sec_opts;
1540
	fmode_t mode = FMODE_READ;
1541 1542
	char *subvol_name = NULL;
	u64 subvol_objectid = 0;
Y
Yan 已提交
1543 1544
	int error = 0;

1545 1546 1547 1548
	if (!(flags & MS_RDONLY))
		mode |= FMODE_WRITE;

	error = btrfs_parse_early_options(data, mode, fs_type,
1549
					  &subvol_name, &subvol_objectid,
1550
					  &fs_devices);
1551 1552
	if (error) {
		kfree(subvol_name);
A
Al Viro 已提交
1553
		return ERR_PTR(error);
1554
	}
1555

1556
	if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
1557
		/* mount_subvol() will free subvol_name. */
1558 1559
		return mount_subvol(subvol_name, subvol_objectid, flags,
				    device_name, data);
1560 1561
	}

1562 1563 1564 1565 1566 1567 1568
	security_init_mnt_opts(&new_sec_opts);
	if (data) {
		error = parse_security_options(data, &new_sec_opts);
		if (error)
			return ERR_PTR(error);
	}

1569
	error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
1570
	if (error)
1571
		goto error_sec_opts;
Y
Yan 已提交
1572

1573 1574 1575 1576 1577 1578 1579
	/*
	 * Setup a dummy root and fs_info for test/set super.  This is because
	 * we don't actually fill this stuff out until open_ctree, but we need
	 * it for searching for existing supers, so this lets us do that and
	 * then open_ctree will properly initialize everything later.
	 */
	fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
1580 1581 1582 1583
	if (!fs_info) {
		error = -ENOMEM;
		goto error_sec_opts;
	}
1584

1585 1586
	fs_info->fs_devices = fs_devices;

1587 1588
	fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
	fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
1589
	security_init_mnt_opts(&fs_info->security_opts);
1590 1591
	if (!fs_info->super_copy || !fs_info->super_for_commit) {
		error = -ENOMEM;
1592 1593 1594 1595 1596 1597 1598 1599 1600
		goto error_fs_info;
	}

	error = btrfs_open_devices(fs_devices, mode, fs_type);
	if (error)
		goto error_fs_info;

	if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
		error = -EACCES;
1601 1602 1603
		goto error_close_devices;
	}

1604
	bdev = fs_devices->latest_bdev;
D
David Howells 已提交
1605 1606
	s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC,
		 fs_info);
1607 1608 1609 1610
	if (IS_ERR(s)) {
		error = PTR_ERR(s);
		goto error_close_devices;
	}
Y
Yan 已提交
1611 1612

	if (s->s_root) {
Y
Yan Zheng 已提交
1613
		btrfs_close_devices(fs_devices);
1614
		free_fs_info(fs_info);
1615 1616
		if ((flags ^ s->s_flags) & MS_RDONLY)
			error = -EBUSY;
Y
Yan 已提交
1617
	} else {
1618
		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
1619
		btrfs_sb(s)->bdev_holder = fs_type;
1620 1621
		error = btrfs_fill_super(s, fs_devices, data,
					 flags & MS_SILENT ? 1 : 0);
Y
Yan 已提交
1622
	}
1623
	if (error) {
1624 1625 1626 1627 1628 1629 1630
		deactivate_locked_super(s);
		goto error_sec_opts;
	}

	fs_info = btrfs_sb(s);
	error = setup_security_options(fs_info, s, &new_sec_opts);
	if (error) {
1631
		deactivate_locked_super(s);
1632 1633
		goto error_sec_opts;
	}
Y
Yan 已提交
1634

1635
	return dget(s->s_root);
Y
Yan 已提交
1636

Y
Yan Zheng 已提交
1637
error_close_devices:
1638
	btrfs_close_devices(fs_devices);
1639
error_fs_info:
1640
	free_fs_info(fs_info);
1641 1642
error_sec_opts:
	security_free_mnt_opts(&new_sec_opts);
A
Al Viro 已提交
1643
	return ERR_PTR(error);
Y
Yan 已提交
1644
}
1645

1646 1647 1648 1649 1650 1651 1652 1653
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
				     int new_pool_size, int old_pool_size)
{
	if (new_pool_size == old_pool_size)
		return;

	fs_info->thread_pool_size = new_pool_size;

1654
	btrfs_info(fs_info, "resize thread pool %d -> %d",
1655 1656
	       old_pool_size, new_pool_size);

1657
	btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
1658
	btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
1659
	btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size);
1660
	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
1661 1662 1663 1664 1665 1666
	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
				new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
1667
	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
1668
	btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
1669 1670
	btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
				new_pool_size);
1671 1672
}

1673
static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
M
Miao Xie 已提交
1674 1675
{
	set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1676
}
M
Miao Xie 已提交
1677

1678 1679 1680
static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
				       unsigned long old_opts, int flags)
{
M
Miao Xie 已提交
1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695
	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
	     (flags & MS_RDONLY))) {
		/* wait for any defraggers to finish */
		wait_event(fs_info->transaction_wait,
			   (atomic_read(&fs_info->defrag_running) == 0));
		if (flags & MS_RDONLY)
			sync_filesystem(fs_info->sb);
	}
}

static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
					 unsigned long old_opts)
{
	/*
1696 1697
	 * We need to cleanup all defragable inodes if the autodefragment is
	 * close or the filesystem is read only.
M
Miao Xie 已提交
1698 1699 1700 1701 1702 1703 1704 1705 1706 1707
	 */
	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
	     (fs_info->sb->s_flags & MS_RDONLY))) {
		btrfs_cleanup_defrag_inodes(fs_info);
	}

	clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
}

Y
Yan Zheng 已提交
1708 1709
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
{
1710 1711
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
1712 1713 1714 1715 1716 1717 1718
	unsigned old_flags = sb->s_flags;
	unsigned long old_opts = fs_info->mount_opt;
	unsigned long old_compress_type = fs_info->compress_type;
	u64 old_max_inline = fs_info->max_inline;
	u64 old_alloc_start = fs_info->alloc_start;
	int old_thread_pool_size = fs_info->thread_pool_size;
	unsigned int old_metadata_ratio = fs_info->metadata_ratio;
Y
Yan Zheng 已提交
1719 1720
	int ret;

1721
	sync_filesystem(sb);
1722
	btrfs_remount_prepare(fs_info);
M
Miao Xie 已提交
1723

1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738
	if (data) {
		struct security_mnt_opts new_sec_opts;

		security_init_mnt_opts(&new_sec_opts);
		ret = parse_security_options(data, &new_sec_opts);
		if (ret)
			goto restore;
		ret = setup_security_options(fs_info, sb,
					     &new_sec_opts);
		if (ret) {
			security_free_mnt_opts(&new_sec_opts);
			goto restore;
		}
	}

1739
	ret = btrfs_parse_options(root, data, *flags);
1740 1741 1742 1743
	if (ret) {
		ret = -EINVAL;
		goto restore;
	}
1744

1745
	btrfs_remount_begin(fs_info, old_opts, *flags);
1746 1747 1748
	btrfs_resize_thread_pool(fs_info,
		fs_info->thread_pool_size, old_thread_pool_size);

Y
Yan Zheng 已提交
1749
	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
M
Miao Xie 已提交
1750
		goto out;
Y
Yan Zheng 已提交
1751 1752

	if (*flags & MS_RDONLY) {
1753 1754 1755 1756
		/*
		 * this also happens on 'umount -rf' or on shutdown, when
		 * the filesystem is busy.
		 */
1757
		cancel_work_sync(&fs_info->async_reclaim_work);
1758 1759 1760 1761 1762 1763

		/* wait for the uuid_scan task to finish */
		down(&fs_info->uuid_tree_rescan_sem);
		/* avoid complains from lockdep et al. */
		up(&fs_info->uuid_tree_rescan_sem);

Y
Yan Zheng 已提交
1764 1765
		sb->s_flags |= MS_RDONLY;

1766 1767 1768 1769 1770 1771 1772 1773 1774
		/*
		 * Setting MS_RDONLY will put the cleaner thread to
		 * sleep at the next loop if it's already active.
		 * If it's already asleep, we'll leave unused block
		 * groups on disk until we're mounted read-write again
		 * unless we clean them up here.
		 */
		btrfs_delete_unused_bgs(fs_info);

1775 1776
		btrfs_dev_replace_suspend_for_unmount(fs_info);
		btrfs_scrub_cancel(fs_info);
1777
		btrfs_pause_balance(fs_info);
1778

1779 1780 1781
		ret = btrfs_commit_super(root);
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1782
	} else {
1783 1784
		if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
			btrfs_err(fs_info,
1785
				"Remounting read-write after error is not allowed");
1786 1787 1788
			ret = -EINVAL;
			goto restore;
		}
1789
		if (fs_info->fs_devices->rw_devices == 0) {
1790 1791
			ret = -EACCES;
			goto restore;
1792
		}
Y
Yan Zheng 已提交
1793

1794 1795 1796
		if (fs_info->fs_devices->missing_devices >
		     fs_info->num_tolerated_disk_barrier_failures &&
		    !(*flags & MS_RDONLY)) {
1797 1798
			btrfs_warn(fs_info,
				"too many missing devices, writeable remount is not allowed");
1799 1800 1801 1802
			ret = -EACCES;
			goto restore;
		}

1803
		if (btrfs_super_log_root(fs_info->super_copy) != 0) {
1804 1805
			ret = -EINVAL;
			goto restore;
1806
		}
Y
Yan Zheng 已提交
1807

1808
		ret = btrfs_cleanup_fs_roots(fs_info);
1809 1810
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1811

1812
		/* recover relocation */
1813
		mutex_lock(&fs_info->cleaner_mutex);
1814
		ret = btrfs_recover_relocation(root);
1815
		mutex_unlock(&fs_info->cleaner_mutex);
1816 1817
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1818

1819 1820 1821 1822
		ret = btrfs_resume_balance_async(fs_info);
		if (ret)
			goto restore;

1823 1824
		ret = btrfs_resume_dev_replace_async(fs_info);
		if (ret) {
1825
			btrfs_warn(fs_info, "failed to resume dev_replace");
1826 1827
			goto restore;
		}
1828 1829

		if (!fs_info->uuid_root) {
1830
			btrfs_info(fs_info, "creating UUID tree");
1831 1832
			ret = btrfs_create_uuid_tree(fs_info);
			if (ret) {
1833
				btrfs_warn(fs_info, "failed to create the UUID tree %d", ret);
1834 1835 1836
				goto restore;
			}
		}
Y
Yan Zheng 已提交
1837
		sb->s_flags &= ~MS_RDONLY;
1838 1839

		fs_info->open = 1;
Y
Yan Zheng 已提交
1840
	}
M
Miao Xie 已提交
1841
out:
1842
	wake_up_process(fs_info->transaction_kthread);
M
Miao Xie 已提交
1843
	btrfs_remount_cleanup(fs_info, old_opts);
Y
Yan Zheng 已提交
1844
	return 0;
1845 1846 1847 1848 1849 1850 1851 1852 1853

restore:
	/* We've hit an error - don't reset MS_RDONLY */
	if (sb->s_flags & MS_RDONLY)
		old_flags |= MS_RDONLY;
	sb->s_flags = old_flags;
	fs_info->mount_opt = old_opts;
	fs_info->compress_type = old_compress_type;
	fs_info->max_inline = old_max_inline;
M
Miao Xie 已提交
1854
	mutex_lock(&fs_info->chunk_mutex);
1855
	fs_info->alloc_start = old_alloc_start;
M
Miao Xie 已提交
1856
	mutex_unlock(&fs_info->chunk_mutex);
1857 1858
	btrfs_resize_thread_pool(fs_info,
		old_thread_pool_size, fs_info->thread_pool_size);
1859
	fs_info->metadata_ratio = old_metadata_ratio;
M
Miao Xie 已提交
1860
	btrfs_remount_cleanup(fs_info, old_opts);
1861
	return ret;
Y
Yan Zheng 已提交
1862 1863
}

1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889
/* Used to sort the devices by max_avail(descending sort) */
static int btrfs_cmp_device_free_bytes(const void *dev_info1,
				       const void *dev_info2)
{
	if (((struct btrfs_device_info *)dev_info1)->max_avail >
	    ((struct btrfs_device_info *)dev_info2)->max_avail)
		return -1;
	else if (((struct btrfs_device_info *)dev_info1)->max_avail <
		 ((struct btrfs_device_info *)dev_info2)->max_avail)
		return 1;
	else
	return 0;
}

/*
 * sort the devices by max_avail, in which max free extent size of each device
 * is stored.(Descending Sort)
 */
static inline void btrfs_descending_sort_devices(
					struct btrfs_device_info *devices,
					size_t nr_devices)
{
	sort(devices, nr_devices, sizeof(struct btrfs_device_info),
	     btrfs_cmp_device_free_bytes, NULL);
}

1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904
/*
 * The helper to calc the free space on the devices that can be used to store
 * file data.
 */
static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
{
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct btrfs_device_info *devices_info;
	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
	struct btrfs_device *device;
	u64 skip_space;
	u64 type;
	u64 avail_space;
	u64 used_space;
	u64 min_stripe_size;
1905
	int min_stripes = 1, num_stripes = 1;
1906 1907 1908
	int i = 0, nr_devices;
	int ret;

1909
	/*
1910
	 * We aren't under the device list lock, so this is racy-ish, but good
1911 1912
	 * enough for our purposes.
	 */
1913
	nr_devices = fs_info->fs_devices->open_devices;
1914 1915 1916 1917 1918 1919 1920 1921 1922
	if (!nr_devices) {
		smp_mb();
		nr_devices = fs_info->fs_devices->open_devices;
		ASSERT(nr_devices);
		if (!nr_devices) {
			*free_bytes = 0;
			return 0;
		}
	}
1923

1924
	devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
1925 1926 1927 1928
			       GFP_NOFS);
	if (!devices_info)
		return -ENOMEM;

1929
	/* calc min stripe number for data space allocation */
1930
	type = btrfs_get_alloc_profile(root, 1);
1931
	if (type & BTRFS_BLOCK_GROUP_RAID0) {
1932
		min_stripes = 2;
1933 1934
		num_stripes = nr_devices;
	} else if (type & BTRFS_BLOCK_GROUP_RAID1) {
1935
		min_stripes = 2;
1936 1937
		num_stripes = 2;
	} else if (type & BTRFS_BLOCK_GROUP_RAID10) {
1938
		min_stripes = 4;
1939 1940
		num_stripes = 4;
	}
1941 1942 1943 1944 1945 1946

	if (type & BTRFS_BLOCK_GROUP_DUP)
		min_stripe_size = 2 * BTRFS_STRIPE_LEN;
	else
		min_stripe_size = BTRFS_STRIPE_LEN;

1947 1948 1949 1950
	if (fs_info->alloc_start)
		mutex_lock(&fs_devices->device_list_mutex);
	rcu_read_lock();
	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
1951 1952
		if (!device->in_fs_metadata || !device->bdev ||
		    device->is_tgtdev_for_dev_replace)
1953 1954
			continue;

1955 1956 1957
		if (i >= nr_devices)
			break;

1958 1959 1960
		avail_space = device->total_bytes - device->bytes_used;

		/* align with stripe_len */
1961
		avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
1962 1963 1964
		avail_space *= BTRFS_STRIPE_LEN;

		/*
1965
		 * In order to avoid overwriting the superblock on the drive,
1966 1967 1968
		 * btrfs starts at an offset of at least 1MB when doing chunk
		 * allocation.
		 */
1969
		skip_space = SZ_1M;
1970 1971

		/* user can set the offset in fs_info->alloc_start. */
1972 1973 1974 1975
		if (fs_info->alloc_start &&
		    fs_info->alloc_start + BTRFS_STRIPE_LEN <=
		    device->total_bytes) {
			rcu_read_unlock();
1976 1977
			skip_space = max(fs_info->alloc_start, skip_space);

1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993
			/*
			 * btrfs can not use the free space in
			 * [0, skip_space - 1], we must subtract it from the
			 * total. In order to implement it, we account the used
			 * space in this range first.
			 */
			ret = btrfs_account_dev_extents_size(device, 0,
							     skip_space - 1,
							     &used_space);
			if (ret) {
				kfree(devices_info);
				mutex_unlock(&fs_devices->device_list_mutex);
				return ret;
			}

			rcu_read_lock();
1994

1995 1996 1997
			/* calc the free space in [0, skip_space - 1] */
			skip_space -= used_space;
		}
1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015

		/*
		 * we can use the free space in [0, skip_space - 1], subtract
		 * it from the total.
		 */
		if (avail_space && avail_space >= skip_space)
			avail_space -= skip_space;
		else
			avail_space = 0;

		if (avail_space < min_stripe_size)
			continue;

		devices_info[i].dev = device;
		devices_info[i].max_avail = avail_space;

		i++;
	}
2016 2017 2018
	rcu_read_unlock();
	if (fs_info->alloc_start)
		mutex_unlock(&fs_devices->device_list_mutex);
2019 2020 2021 2022 2023 2024 2025 2026

	nr_devices = i;

	btrfs_descending_sort_devices(devices_info, nr_devices);

	i = nr_devices - 1;
	avail_space = 0;
	while (nr_devices >= min_stripes) {
2027 2028 2029
		if (num_stripes > nr_devices)
			num_stripes = nr_devices;

2030 2031 2032 2033
		if (devices_info[i].max_avail >= min_stripe_size) {
			int j;
			u64 alloc_size;

2034
			avail_space += devices_info[i].max_avail * num_stripes;
2035
			alloc_size = devices_info[i].max_avail;
2036
			for (j = i + 1 - num_stripes; j <= i; j++)
2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047
				devices_info[j].max_avail -= alloc_size;
		}
		i--;
		nr_devices--;
	}

	kfree(devices_info);
	*free_bytes = avail_space;
	return 0;
}

2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059
/*
 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
 *
 * If there's a redundant raid level at DATA block groups, use the respective
 * multiplier to scale the sizes.
 *
 * Unused device space usage is based on simulating the chunk allocator
 * algorithm that respects the device sizes, order of allocations and the
 * 'alloc_start' value, this is a close approximation of the actual use but
 * there are other factors that may change the result (like a new metadata
 * chunk).
 *
2060
 * If metadata is exhausted, f_bavail will be 0.
2061
 */
C
Chris Mason 已提交
2062 2063
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
2064 2065 2066
	struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
	struct btrfs_super_block *disk_super = fs_info->super_copy;
	struct list_head *head = &fs_info->space_info;
2067 2068
	struct btrfs_space_info *found;
	u64 total_used = 0;
2069
	u64 total_free_data = 0;
2070
	u64 total_free_meta = 0;
2071
	int bits = dentry->d_sb->s_blocksize_bits;
2072
	__be32 *fsid = (__be32 *)fs_info->fsid;
2073 2074
	unsigned factor = 1;
	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
2075
	int ret;
2076
	u64 thresh = 0;
2077
	int mixed = 0;
C
Chris Mason 已提交
2078

2079
	/*
2080
	 * holding chunk_mutex to avoid allocating new chunks, holding
2081 2082
	 * device_list_mutex to avoid the device being removed
	 */
2083
	rcu_read_lock();
J
Josef Bacik 已提交
2084
	list_for_each_entry_rcu(found, head, list) {
2085
		if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
2086 2087
			int i;

2088 2089 2090
			total_free_data += found->disk_total - found->disk_used;
			total_free_data -=
				btrfs_account_ro_block_groups_free_space(found);
2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101

			for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
				if (!list_empty(&found->block_groups[i])) {
					switch (i) {
					case BTRFS_RAID_DUP:
					case BTRFS_RAID_RAID1:
					case BTRFS_RAID_RAID10:
						factor = 2;
					}
				}
			}
2102
		}
2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113

		/*
		 * Metadata in mixed block goup profiles are accounted in data
		 */
		if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
			if (found->flags & BTRFS_BLOCK_GROUP_DATA)
				mixed = 1;
			else
				total_free_meta += found->disk_total -
					found->disk_used;
		}
2114

2115
		total_used += found->disk_used;
J
Josef Bacik 已提交
2116
	}
2117

2118 2119
	rcu_read_unlock();

2120 2121 2122 2123 2124 2125
	buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
	buf->f_blocks >>= bits;
	buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);

	/* Account global block reserve as used, it's in logical size already */
	spin_lock(&block_rsv->lock);
2126 2127 2128 2129 2130
	/* Mixed block groups accounting is not byte-accurate, avoid overflow */
	if (buf->f_bfree >= block_rsv->size >> bits)
		buf->f_bfree -= block_rsv->size >> bits;
	else
		buf->f_bfree = 0;
2131 2132
	spin_unlock(&block_rsv->lock);

2133
	buf->f_bavail = div_u64(total_free_data, factor);
2134
	ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
2135
	if (ret)
2136
		return ret;
2137
	buf->f_bavail += div_u64(total_free_data, factor);
2138
	buf->f_bavail = buf->f_bavail >> bits;
C
Chris Mason 已提交
2139

2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154
	/*
	 * We calculate the remaining metadata space minus global reserve. If
	 * this is (supposedly) smaller than zero, there's no space. But this
	 * does not hold in practice, the exhausted state happens where's still
	 * some positive delta. So we apply some guesswork and compare the
	 * delta to a 4M threshold.  (Practically observed delta was ~2M.)
	 *
	 * We probably cannot calculate the exact threshold value because this
	 * depends on the internal reservations requested by various
	 * operations, so some operations that consume a few metadata will
	 * succeed even if the Avail is zero. But this is better than the other
	 * way around.
	 */
	thresh = 4 * 1024 * 1024;

2155
	if (!mixed && total_free_meta - thresh < block_rsv->size)
2156 2157
		buf->f_bavail = 0;

2158 2159 2160 2161
	buf->f_type = BTRFS_SUPER_MAGIC;
	buf->f_bsize = dentry->d_sb->s_blocksize;
	buf->f_namelen = BTRFS_NAME_LEN;

2162
	/* We treat it as constant endianness (it doesn't matter _which_)
C
Chris Mason 已提交
2163
	   because we want the fsid to come out the same whether mounted
2164 2165 2166
	   on a big-endian or little-endian host */
	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
2167
	/* Mask in the root object ID too, to disambiguate subvols */
2168 2169
	buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32;
	buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid;
2170

C
Chris Mason 已提交
2171 2172
	return 0;
}
C
Chris Mason 已提交
2173

A
Al Viro 已提交
2174 2175
static void btrfs_kill_super(struct super_block *sb)
{
2176
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
A
Al Viro 已提交
2177
	kill_anon_super(sb);
2178
	free_fs_info(fs_info);
A
Al Viro 已提交
2179 2180
}

2181 2182 2183
static struct file_system_type btrfs_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "btrfs",
A
Al Viro 已提交
2184
	.mount		= btrfs_mount,
A
Al Viro 已提交
2185
	.kill_sb	= btrfs_kill_super,
2186
	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
2187
};
2188
MODULE_ALIAS_FS("btrfs");
2189

2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200
static int btrfs_control_open(struct inode *inode, struct file *file)
{
	/*
	 * The control file's private_data is used to hold the
	 * transaction when it is started and is used to keep
	 * track of whether a transaction is already in progress.
	 */
	file->private_data = NULL;
	return 0;
}

C
Chris Mason 已提交
2201 2202 2203
/*
 * used by btrfsctl to scan devices when no FS is mounted
 */
2204 2205 2206 2207 2208
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
				unsigned long arg)
{
	struct btrfs_ioctl_vol_args *vol;
	struct btrfs_fs_devices *fs_devices;
2209
	int ret = -ENOTTY;
2210

2211 2212 2213
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

L
Li Zefan 已提交
2214 2215 2216
	vol = memdup_user((void __user *)arg, sizeof(*vol));
	if (IS_ERR(vol))
		return PTR_ERR(vol);
2217

2218 2219
	switch (cmd) {
	case BTRFS_IOC_SCAN_DEV:
2220
		ret = btrfs_scan_one_device(vol->name, FMODE_READ,
2221 2222
					    &btrfs_fs_type, &fs_devices);
		break;
J
Josef Bacik 已提交
2223 2224 2225 2226 2227 2228 2229
	case BTRFS_IOC_DEVICES_READY:
		ret = btrfs_scan_one_device(vol->name, FMODE_READ,
					    &btrfs_fs_type, &fs_devices);
		if (ret)
			break;
		ret = !(fs_devices->num_devices == fs_devices->total_devices);
		break;
2230
	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
2231
		ret = btrfs_ioctl_get_supported_features((void __user*)arg);
2232
		break;
2233
	}
L
Li Zefan 已提交
2234

2235
	kfree(vol);
L
Linda Knippers 已提交
2236
	return ret;
2237 2238
}

2239
static int btrfs_freeze(struct super_block *sb)
Y
Yan 已提交
2240
{
2241 2242 2243
	struct btrfs_trans_handle *trans;
	struct btrfs_root *root = btrfs_sb(sb)->tree_root;

M
Miao Xie 已提交
2244
	trans = btrfs_attach_transaction_barrier(root);
2245 2246 2247 2248 2249 2250 2251
	if (IS_ERR(trans)) {
		/* no transaction, don't bother */
		if (PTR_ERR(trans) == -ENOENT)
			return 0;
		return PTR_ERR(trans);
	}
	return btrfs_commit_transaction(trans, root);
Y
Yan 已提交
2252 2253
}

J
Josef Bacik 已提交
2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
	struct btrfs_fs_devices *cur_devices;
	struct btrfs_device *dev, *first_dev = NULL;
	struct list_head *head;
	struct rcu_string *name;

	mutex_lock(&fs_info->fs_devices->device_list_mutex);
	cur_devices = fs_info->fs_devices;
	while (cur_devices) {
		head = &cur_devices->devices;
		list_for_each_entry(dev, head, dev_list) {
2267 2268
			if (dev->missing)
				continue;
2269 2270
			if (!dev->name)
				continue;
J
Josef Bacik 已提交
2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288
			if (!first_dev || dev->devid < first_dev->devid)
				first_dev = dev;
		}
		cur_devices = cur_devices->seed;
	}

	if (first_dev) {
		rcu_read_lock();
		name = rcu_dereference(first_dev->name);
		seq_escape(m, name->str, " \t\n\\");
		rcu_read_unlock();
	} else {
		WARN_ON(1);
	}
	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
	return 0;
}

2289
static const struct super_operations btrfs_super_ops = {
2290
	.drop_inode	= btrfs_drop_inode,
A
Al Viro 已提交
2291
	.evict_inode	= btrfs_evict_inode,
C
Chris Mason 已提交
2292
	.put_super	= btrfs_put_super,
2293
	.sync_fs	= btrfs_sync_fs,
E
Eric Paris 已提交
2294
	.show_options	= btrfs_show_options,
J
Josef Bacik 已提交
2295
	.show_devname	= btrfs_show_devname,
C
Chris Mason 已提交
2296
	.write_inode	= btrfs_write_inode,
C
Chris Mason 已提交
2297 2298
	.alloc_inode	= btrfs_alloc_inode,
	.destroy_inode	= btrfs_destroy_inode,
C
Chris Mason 已提交
2299
	.statfs		= btrfs_statfs,
Y
Yan Zheng 已提交
2300
	.remount_fs	= btrfs_remount,
2301
	.freeze_fs	= btrfs_freeze,
C
Chris Mason 已提交
2302
};
2303 2304

static const struct file_operations btrfs_ctl_fops = {
2305
	.open = btrfs_control_open,
2306 2307 2308
	.unlocked_ioctl	 = btrfs_control_ioctl,
	.compat_ioctl = btrfs_control_ioctl,
	.owner	 = THIS_MODULE,
2309
	.llseek = noop_llseek,
2310 2311 2312
};

static struct miscdevice btrfs_misc = {
2313
	.minor		= BTRFS_MINOR,
2314 2315 2316 2317
	.name		= "btrfs-control",
	.fops		= &btrfs_ctl_fops
};

2318 2319 2320
MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
MODULE_ALIAS("devname:btrfs-control");

2321 2322 2323 2324 2325
static int btrfs_interface_init(void)
{
	return misc_register(&btrfs_misc);
}

2326
static void btrfs_interface_exit(void)
2327
{
2328
	misc_deregister(&btrfs_misc);
2329 2330
}

2331
static void btrfs_print_mod_info(void)
2332
{
2333
	printk(KERN_INFO "Btrfs loaded, crc32c=%s"
2334 2335 2336
#ifdef CONFIG_BTRFS_DEBUG
			", debug=on"
#endif
2337 2338 2339
#ifdef CONFIG_BTRFS_ASSERT
			", assert=on"
#endif
2340 2341 2342
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
			", integrity-checker=on"
#endif
2343 2344
			"\n",
			btrfs_crc32c_impl());
2345 2346
}

2347 2348
static int __init init_btrfs_fs(void)
{
C
Chris Mason 已提交
2349
	int err;
2350

2351 2352 2353 2354
	err = btrfs_hash_init();
	if (err)
		return err;

2355 2356
	btrfs_props_init();

2357 2358
	err = btrfs_init_sysfs();
	if (err)
2359
		goto free_hash;
2360

2361
	btrfs_init_compress();
2362

2363 2364 2365 2366
	err = btrfs_init_cachep();
	if (err)
		goto free_compress;

2367
	err = extent_io_init();
2368 2369 2370
	if (err)
		goto free_cachep;

2371 2372 2373 2374
	err = extent_map_init();
	if (err)
		goto free_extent_io;

2375
	err = ordered_data_init();
2376 2377
	if (err)
		goto free_extent_map;
C
Chris Mason 已提交
2378

2379 2380 2381 2382
	err = btrfs_delayed_inode_init();
	if (err)
		goto free_ordered_data;

2383
	err = btrfs_auto_defrag_init();
2384 2385 2386
	if (err)
		goto free_delayed_inode;

2387
	err = btrfs_delayed_ref_init();
2388 2389 2390
	if (err)
		goto free_auto_defrag;

2391 2392
	err = btrfs_prelim_ref_init();
	if (err)
2393
		goto free_delayed_ref;
2394

2395
	err = btrfs_end_io_wq_init();
2396
	if (err)
2397
		goto free_prelim_ref;
2398

2399 2400 2401 2402
	err = btrfs_interface_init();
	if (err)
		goto free_end_io_wq;

2403 2404
	btrfs_init_lockdep();

2405
	btrfs_print_mod_info();
2406 2407 2408 2409 2410 2411 2412 2413

	err = btrfs_run_sanity_tests();
	if (err)
		goto unregister_ioctl;

	err = register_filesystem(&btrfs_fs_type);
	if (err)
		goto unregister_ioctl;
2414

2415 2416
	return 0;

2417 2418
unregister_ioctl:
	btrfs_interface_exit();
2419 2420
free_end_io_wq:
	btrfs_end_io_wq_exit();
2421 2422
free_prelim_ref:
	btrfs_prelim_ref_exit();
2423 2424
free_delayed_ref:
	btrfs_delayed_ref_exit();
2425 2426
free_auto_defrag:
	btrfs_auto_defrag_exit();
2427 2428
free_delayed_inode:
	btrfs_delayed_inode_exit();
2429 2430
free_ordered_data:
	ordered_data_exit();
2431 2432
free_extent_map:
	extent_map_exit();
2433 2434
free_extent_io:
	extent_io_exit();
2435 2436
free_cachep:
	btrfs_destroy_cachep();
2437 2438
free_compress:
	btrfs_exit_compress();
2439
	btrfs_exit_sysfs();
2440 2441
free_hash:
	btrfs_hash_exit();
2442
	return err;
2443 2444 2445 2446
}

static void __exit exit_btrfs_fs(void)
{
C
Chris Mason 已提交
2447
	btrfs_destroy_cachep();
2448
	btrfs_delayed_ref_exit();
2449
	btrfs_auto_defrag_exit();
2450
	btrfs_delayed_inode_exit();
2451
	btrfs_prelim_ref_exit();
2452
	ordered_data_exit();
2453
	extent_map_exit();
2454
	extent_io_exit();
2455
	btrfs_interface_exit();
2456
	btrfs_end_io_wq_exit();
2457
	unregister_filesystem(&btrfs_fs_type);
2458
	btrfs_exit_sysfs();
2459
	btrfs_cleanup_fs_uuids();
2460
	btrfs_exit_compress();
2461
	btrfs_hash_exit();
2462 2463
}

2464
late_initcall(init_btrfs_fs);
2465 2466 2467
module_exit(exit_btrfs_fs)

MODULE_LICENSE("GPL");