super.c 64.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
C
Chris Mason 已提交
2 3 4 5
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

Y
Yan 已提交
6
#include <linux/blkdev.h>
7 8 9 10 11 12
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
E
Eric Paris 已提交
13
#include <linux/seq_file.h>
14 15
#include <linux/string.h>
#include <linux/backing-dev.h>
Y
Yan 已提交
16
#include <linux/mount.h>
C
Chris Mason 已提交
17
#include <linux/writeback.h>
C
Chris Mason 已提交
18
#include <linux/statfs.h>
C
Chris Mason 已提交
19
#include <linux/compat.h>
20
#include <linux/parser.h>
21
#include <linux/ctype.h>
22
#include <linux/namei.h>
23
#include <linux/miscdevice.h>
24
#include <linux/magic.h>
25
#include <linux/slab.h>
D
Dan Magenheimer 已提交
26
#include <linux/cleancache.h>
27
#include <linux/ratelimit.h>
28
#include <linux/crc32c.h>
29
#include <linux/btrfs.h>
30
#include "delayed-inode.h"
31
#include "ctree.h"
C
Chris Mason 已提交
32
#include "disk-io.h"
33
#include "transaction.h"
C
Chris Mason 已提交
34
#include "btrfs_inode.h"
C
Chris Mason 已提交
35
#include "print-tree.h"
36
#include "props.h"
J
Josef Bacik 已提交
37
#include "xattr.h"
38
#include "volumes.h"
B
Balaji Rao 已提交
39
#include "export.h"
C
Chris Mason 已提交
40
#include "compression.h"
J
Josef Bacik 已提交
41
#include "rcu-string.h"
42
#include "dev-replace.h"
43
#include "free-space-cache.h"
44
#include "backref.h"
45
#include "space-info.h"
46
#include "sysfs.h"
47
#include "tests/btrfs-tests.h"
48
#include "block-group.h"
49

50
#include "qgroup.h"
51 52 53
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>

54
static const struct super_operations btrfs_super_ops;
55 56 57 58 59 60

/*
 * Types for mounting the default subvolume and a subvolume explicitly
 * requested by subvol=/path. That way the callchain is straightforward and we
 * don't have to play tricks with the mount options and recursive calls to
 * btrfs_mount.
61 62
 *
 * The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
63
 */
64
static struct file_system_type btrfs_fs_type;
65
static struct file_system_type btrfs_root_fs_type;
C
Chris Mason 已提交
66

67 68
static int btrfs_remount(struct super_block *sb, int *flags, char *data);

69
const char *btrfs_decode_error(int errno)
L
liubo 已提交
70
{
71
	char *errstr = "unknown";
L
liubo 已提交
72 73 74 75 76 77 78 79 80 81 82

	switch (errno) {
	case -EIO:
		errstr = "IO failure";
		break;
	case -ENOMEM:
		errstr = "Out of memory";
		break;
	case -EROFS:
		errstr = "Readonly filesystem";
		break;
J
Jeff Mahoney 已提交
83 84 85
	case -EEXIST:
		errstr = "Object already exists";
		break;
86 87 88 89 90 91
	case -ENOSPC:
		errstr = "No space left";
		break;
	case -ENOENT:
		errstr = "No such entry";
		break;
L
liubo 已提交
92 93 94 95 96 97
	}

	return errstr;
}

/*
98
 * __btrfs_handle_fs_error decodes expected errors from the caller and
99
 * invokes the appropriate error response.
L
liubo 已提交
100
 */
101
__cold
102
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
J
Jeff Mahoney 已提交
103
		       unsigned int line, int errno, const char *fmt, ...)
L
liubo 已提交
104 105
{
	struct super_block *sb = fs_info->sb;
106
#ifdef CONFIG_PRINTK
L
liubo 已提交
107
	const char *errstr;
108
#endif
L
liubo 已提交
109 110 111

	/*
	 * Special case: if the error is EROFS, and we're already
112
	 * under SB_RDONLY, then it is safe here.
L
liubo 已提交
113
	 */
114
	if (errno == -EROFS && sb_rdonly(sb))
J
Jeff Mahoney 已提交
115 116
  		return;

117
#ifdef CONFIG_PRINTK
118
	errstr = btrfs_decode_error(errno);
J
Jeff Mahoney 已提交
119
	if (fmt) {
120 121 122 123 124 125
		struct va_format vaf;
		va_list args;

		va_start(args, fmt);
		vaf.fmt = fmt;
		vaf.va = &args;
J
Jeff Mahoney 已提交
126

127
		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
128
			sb->s_id, function, line, errno, errstr, &vaf);
129
		va_end(args);
J
Jeff Mahoney 已提交
130
	} else {
131
		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
132
			sb->s_id, function, line, errno, errstr);
J
Jeff Mahoney 已提交
133
	}
134
#endif
L
liubo 已提交
135

A
Anand Jain 已提交
136 137 138 139 140 141
	/*
	 * Today we only save the error info to memory.  Long term we'll
	 * also send it down to the disk
	 */
	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);

J
Jeff Mahoney 已提交
142
	/* Don't go through full error handling during mount */
143 144 145 146 147 148 149 150 151 152 153 154 155 156
	if (!(sb->s_flags & SB_BORN))
		return;

	if (sb_rdonly(sb))
		return;

	/* btrfs handle error by forcing the filesystem readonly */
	sb->s_flags |= SB_RDONLY;
	btrfs_info(fs_info, "forced readonly");
	/*
	 * Note that a running device replace operation is not canceled here
	 * although there is no way to update the progress. It would add the
	 * risk of a deadlock, therefore the canceling is omitted. The only
	 * penalty is that some I/O remains active until the procedure
157
	 * completes. The next time when the filesystem is mounted writable
158 159
	 * again, the device replace operation continues.
	 */
J
Jeff Mahoney 已提交
160
}
L
liubo 已提交
161

162
#ifdef CONFIG_PRINTK
163
static const char * const logtypes[] = {
J
Jeff Mahoney 已提交
164 165 166 167 168 169 170 171 172 173
	"emergency",
	"alert",
	"critical",
	"error",
	"warning",
	"notice",
	"info",
	"debug",
};

174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189

/*
 * Use one ratelimit state per log level so that a flood of less important
 * messages doesn't cause more important ones to be dropped.
 */
static struct ratelimit_state printk_limits[] = {
	RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
};

190
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
J
Jeff Mahoney 已提交
191
{
192
	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
J
Jeff Mahoney 已提交
193 194
	struct va_format vaf;
	va_list args;
195
	int kern_level;
196 197
	const char *type = logtypes[4];
	struct ratelimit_state *ratelimit = &printk_limits[4];
J
Jeff Mahoney 已提交
198 199 200

	va_start(args, fmt);

201
	while ((kern_level = printk_get_level(fmt)) != 0) {
202
		size_t size = printk_skip_level(fmt) - fmt;
203 204 205 206 207 208 209

		if (kern_level >= '0' && kern_level <= '7') {
			memcpy(lvl, fmt,  size);
			lvl[size] = '\0';
			type = logtypes[kern_level - '0'];
			ratelimit = &printk_limits[kern_level - '0'];
		}
210
		fmt += size;
211 212
	}

J
Jeff Mahoney 已提交
213 214
	vaf.fmt = fmt;
	vaf.va = &args;
215

216
	if (__ratelimit(ratelimit))
217 218
		printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
			fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
219 220 221 222

	va_end(args);
}
#endif
L
liubo 已提交
223

224 225 226 227 228 229 230 231 232 233 234 235 236
/*
 * We only mark the transaction aborted and then set the file system read-only.
 * This will prevent new transactions from starting or trying to join this
 * one.
 *
 * This means that error recovery at the call site is limited to freeing
 * any local memory allocations and passing the error code up without
 * further cleanup. The transaction should complete as it normally would
 * in the call path but will return -EIO.
 *
 * We'll complete the cleanup in btrfs_end_transaction and
 * btrfs_commit_transaction.
 */
237
__cold
238
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
239
			       const char *function,
240 241
			       unsigned int line, int errno)
{
242 243
	struct btrfs_fs_info *fs_info = trans->fs_info;

244 245 246
	trans->aborted = errno;
	/* Nothing used. The other threads that have joined this
	 * transaction may be able to continue. */
247
	if (!trans->dirty && list_empty(&trans->new_bgs)) {
248 249
		const char *errstr;

250
		errstr = btrfs_decode_error(errno);
251
		btrfs_warn(fs_info,
252 253
		           "%s:%d: Aborting unused transaction(%s).",
		           function, line, errstr);
L
liubo 已提交
254
		return;
255
	}
S
Seraphime Kirkovski 已提交
256
	WRITE_ONCE(trans->transaction->aborted, errno);
257
	/* Wake up anybody who may be waiting on this transaction */
258 259 260
	wake_up(&fs_info->transaction_wait);
	wake_up(&fs_info->transaction_blocked_wait);
	__btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
261
}
J
Jeff Mahoney 已提交
262 263 264 265
/*
 * __btrfs_panic decodes unexpected, fatal errors from the caller,
 * issues an alert, and either panics or BUGs, depending on mount options.
 */
266
__cold
J
Jeff Mahoney 已提交
267 268 269 270 271 272 273
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
		   unsigned int line, int errno, const char *fmt, ...)
{
	char *s_id = "<unknown>";
	const char *errstr;
	struct va_format vaf = { .fmt = fmt };
	va_list args;
L
liubo 已提交
274

J
Jeff Mahoney 已提交
275 276
	if (fs_info)
		s_id = fs_info->sb->s_id;
L
liubo 已提交
277

J
Jeff Mahoney 已提交
278 279 280
	va_start(args, fmt);
	vaf.va = &args;

281
	errstr = btrfs_decode_error(errno);
282
	if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
283 284
		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
			s_id, function, line, &vaf, errno, errstr);
J
Jeff Mahoney 已提交
285

286 287
	btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
		   function, line, &vaf, errno, errstr);
J
Jeff Mahoney 已提交
288 289
	va_end(args);
	/* Caller calls BUG() */
L
liubo 已提交
290 291
}

C
Chris Mason 已提交
292
static void btrfs_put_super(struct super_block *sb)
C
Chris Mason 已提交
293
{
294
	close_ctree(btrfs_sb(sb));
C
Chris Mason 已提交
295 296
}

297
enum {
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
	Opt_acl, Opt_noacl,
	Opt_clear_cache,
	Opt_commit_interval,
	Opt_compress,
	Opt_compress_force,
	Opt_compress_force_type,
	Opt_compress_type,
	Opt_degraded,
	Opt_device,
	Opt_fatal_errors,
	Opt_flushoncommit, Opt_noflushoncommit,
	Opt_inode_cache, Opt_noinode_cache,
	Opt_max_inline,
	Opt_barrier, Opt_nobarrier,
	Opt_datacow, Opt_nodatacow,
	Opt_datasum, Opt_nodatasum,
	Opt_defrag, Opt_nodefrag,
	Opt_discard, Opt_nodiscard,
	Opt_nologreplay,
	Opt_norecovery,
	Opt_ratio,
	Opt_rescan_uuid_tree,
	Opt_skip_balance,
	Opt_space_cache, Opt_no_space_cache,
	Opt_space_cache_version,
	Opt_ssd, Opt_nossd,
	Opt_ssd_spread, Opt_nossd_spread,
	Opt_subvol,
O
Omar Sandoval 已提交
326
	Opt_subvol_empty,
327 328 329 330 331 332 333 334 335 336 337 338 339
	Opt_subvolid,
	Opt_thread_pool,
	Opt_treelog, Opt_notreelog,
	Opt_usebackuproot,
	Opt_user_subvol_rm_allowed,

	/* Deprecated options */
	Opt_alloc_start,
	Opt_recovery,
	Opt_subvolrootid,

	/* Debugging options */
	Opt_check_integrity,
340
	Opt_check_integrity_including_extent_data,
341 342
	Opt_check_integrity_print_mask,
	Opt_enospc_debug, Opt_noenospc_debug,
343 344
#ifdef CONFIG_BTRFS_DEBUG
	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
J
Josef Bacik 已提交
345 346 347
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
	Opt_ref_verify,
348
#endif
349
	Opt_err,
350 351
};

D
David Sterba 已提交
352
static const match_table_t tokens = {
353 354 355 356
	{Opt_acl, "acl"},
	{Opt_noacl, "noacl"},
	{Opt_clear_cache, "clear_cache"},
	{Opt_commit_interval, "commit=%u"},
C
Chris Mason 已提交
357
	{Opt_compress, "compress"},
358
	{Opt_compress_type, "compress=%s"},
C
Chris Mason 已提交
359
	{Opt_compress_force, "compress-force"},
360
	{Opt_compress_force_type, "compress-force=%s"},
361 362 363
	{Opt_degraded, "degraded"},
	{Opt_device, "device=%s"},
	{Opt_fatal_errors, "fatal_errors=%s"},
364
	{Opt_flushoncommit, "flushoncommit"},
365
	{Opt_noflushoncommit, "noflushoncommit"},
366 367 368 369 370 371 372 373 374 375 376
	{Opt_inode_cache, "inode_cache"},
	{Opt_noinode_cache, "noinode_cache"},
	{Opt_max_inline, "max_inline=%s"},
	{Opt_barrier, "barrier"},
	{Opt_nobarrier, "nobarrier"},
	{Opt_datacow, "datacow"},
	{Opt_nodatacow, "nodatacow"},
	{Opt_datasum, "datasum"},
	{Opt_nodatasum, "nodatasum"},
	{Opt_defrag, "autodefrag"},
	{Opt_nodefrag, "noautodefrag"},
C
Christoph Hellwig 已提交
377
	{Opt_discard, "discard"},
Q
Qu Wenruo 已提交
378
	{Opt_nodiscard, "nodiscard"},
379 380 381 382 383
	{Opt_nologreplay, "nologreplay"},
	{Opt_norecovery, "norecovery"},
	{Opt_ratio, "metadata_ratio=%u"},
	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
	{Opt_skip_balance, "skip_balance"},
384
	{Opt_space_cache, "space_cache"},
385
	{Opt_no_space_cache, "nospace_cache"},
386 387 388 389 390 391
	{Opt_space_cache_version, "space_cache=%s"},
	{Opt_ssd, "ssd"},
	{Opt_nossd, "nossd"},
	{Opt_ssd_spread, "ssd_spread"},
	{Opt_nossd_spread, "nossd_spread"},
	{Opt_subvol, "subvol=%s"},
O
Omar Sandoval 已提交
392
	{Opt_subvol_empty, "subvol="},
393 394 395 396
	{Opt_subvolid, "subvolid=%s"},
	{Opt_thread_pool, "thread_pool=%u"},
	{Opt_treelog, "treelog"},
	{Opt_notreelog, "notreelog"},
397
	{Opt_usebackuproot, "usebackuproot"},
398 399 400 401 402 403 404 405
	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},

	/* Deprecated options */
	{Opt_alloc_start, "alloc_start=%s"},
	{Opt_recovery, "recovery"},
	{Opt_subvolrootid, "subvolrootid=%d"},

	/* Debugging options */
406 407
	{Opt_check_integrity, "check_int"},
	{Opt_check_integrity_including_extent_data, "check_int_data"},
408
	{Opt_check_integrity_print_mask, "check_int_print_mask=%u"},
409 410
	{Opt_enospc_debug, "enospc_debug"},
	{Opt_noenospc_debug, "noenospc_debug"},
411 412 413 414
#ifdef CONFIG_BTRFS_DEBUG
	{Opt_fragment_data, "fragment=data"},
	{Opt_fragment_metadata, "fragment=metadata"},
	{Opt_fragment_all, "fragment=all"},
J
Josef Bacik 已提交
415 416 417
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
	{Opt_ref_verify, "ref_verify"},
418
#endif
J
Josef Bacik 已提交
419
	{Opt_err, NULL},
420 421
};

422 423 424
/*
 * Regular mount options parser.  Everything that is needed only when
 * reading in a new superblock is parsed here.
425
 * XXX JDM: This needs to be cleaned up for remount.
426
 */
427
int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
428
			unsigned long new_flags)
429 430
{
	substring_t args[MAX_OPT_ARGS];
431
	char *p, *num;
432
	u64 cache_gen;
433
	int intarg;
S
Sage Weil 已提交
434
	int ret = 0;
435 436
	char *compress_type;
	bool compress_force = false;
437 438 439
	enum btrfs_compression_type saved_compress_type;
	bool saved_compress_force;
	int no_compress = 0;
440

441 442
	cache_gen = btrfs_super_cache_generation(info->super_copy);
	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
443 444
		btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
	else if (cache_gen)
445 446
		btrfs_set_opt(info->mount_opt, SPACE_CACHE);

447 448 449 450
	/*
	 * Even the options are empty, we still need to do extra check
	 * against new flags
	 */
451
	if (!options)
452
		goto check;
453

454
	while ((p = strsep(&options, ",")) != NULL) {
455 456 457 458 459 460
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
461
		case Opt_degraded:
462
			btrfs_info(info, "allowing degraded mounts");
463
			btrfs_set_opt(info->mount_opt, DEGRADED);
464
			break;
465
		case Opt_subvol:
O
Omar Sandoval 已提交
466
		case Opt_subvol_empty:
467
		case Opt_subvolid:
468
		case Opt_subvolrootid:
469
		case Opt_device:
470
			/*
471 472
			 * These are parsed by btrfs_parse_subvol_options or
			 * btrfs_parse_device_options and can be ignored here.
473
			 */
474 475
			break;
		case Opt_nodatasum:
476
			btrfs_set_and_info(info, NODATASUM,
477
					   "setting nodatasum");
478
			break;
Q
Qu Wenruo 已提交
479
		case Opt_datasum:
480 481
			if (btrfs_test_opt(info, NODATASUM)) {
				if (btrfs_test_opt(info, NODATACOW))
482
					btrfs_info(info,
J
Jeff Mahoney 已提交
483
						   "setting datasum, datacow enabled");
484
				else
485
					btrfs_info(info, "setting datasum");
486
			}
Q
Qu Wenruo 已提交
487 488 489
			btrfs_clear_opt(info->mount_opt, NODATACOW);
			btrfs_clear_opt(info->mount_opt, NODATASUM);
			break;
490
		case Opt_nodatacow:
491 492 493
			if (!btrfs_test_opt(info, NODATACOW)) {
				if (!btrfs_test_opt(info, COMPRESS) ||
				    !btrfs_test_opt(info, FORCE_COMPRESS)) {
494
					btrfs_info(info,
495 496
						   "setting nodatacow, compression disabled");
				} else {
497
					btrfs_info(info, "setting nodatacow");
498
				}
499 500 501
			}
			btrfs_clear_opt(info->mount_opt, COMPRESS);
			btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
502 503
			btrfs_set_opt(info->mount_opt, NODATACOW);
			btrfs_set_opt(info->mount_opt, NODATASUM);
504
			break;
Q
Qu Wenruo 已提交
505
		case Opt_datacow:
506
			btrfs_clear_and_info(info, NODATACOW,
507
					     "setting datacow");
Q
Qu Wenruo 已提交
508
			break;
C
Chris Mason 已提交
509
		case Opt_compress_force:
510 511
		case Opt_compress_force_type:
			compress_force = true;
512
			/* Fallthrough */
513 514
		case Opt_compress:
		case Opt_compress_type:
515 516
			saved_compress_type = btrfs_test_opt(info,
							     COMPRESS) ?
517 518
				info->compress_type : BTRFS_COMPRESS_NONE;
			saved_compress_force =
519
				btrfs_test_opt(info, FORCE_COMPRESS);
520 521
			if (token == Opt_compress ||
			    token == Opt_compress_force ||
522
			    strncmp(args[0].from, "zlib", 4) == 0) {
523
				compress_type = "zlib";
524

525
				info->compress_type = BTRFS_COMPRESS_ZLIB;
526 527 528 529 530 531 532 533 534
				info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
				/*
				 * args[0] contains uninitialized data since
				 * for these tokens we don't expect any
				 * parameter.
				 */
				if (token != Opt_compress &&
				    token != Opt_compress_force)
					info->compress_level =
535 536 537
					  btrfs_compress_str2level(
							BTRFS_COMPRESS_ZLIB,
							args[0].from + 4);
538
				btrfs_set_opt(info->mount_opt, COMPRESS);
539 540
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
541
				no_compress = 0;
542
			} else if (strncmp(args[0].from, "lzo", 3) == 0) {
L
Li Zefan 已提交
543 544
				compress_type = "lzo";
				info->compress_type = BTRFS_COMPRESS_LZO;
545
				btrfs_set_opt(info->mount_opt, COMPRESS);
546 547
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
548
				btrfs_set_fs_incompat(info, COMPRESS_LZO);
549
				no_compress = 0;
550
			} else if (strncmp(args[0].from, "zstd", 4) == 0) {
N
Nick Terrell 已提交
551 552
				compress_type = "zstd";
				info->compress_type = BTRFS_COMPRESS_ZSTD;
553 554 555 556
				info->compress_level =
					btrfs_compress_str2level(
							 BTRFS_COMPRESS_ZSTD,
							 args[0].from + 4);
N
Nick Terrell 已提交
557 558 559 560 561
				btrfs_set_opt(info->mount_opt, COMPRESS);
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
				btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
				no_compress = 0;
562 563 564 565 566
			} else if (strncmp(args[0].from, "no", 2) == 0) {
				compress_type = "no";
				btrfs_clear_opt(info->mount_opt, COMPRESS);
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				compress_force = false;
567
				no_compress++;
568 569 570 571 572 573
			} else {
				ret = -EINVAL;
				goto out;
			}

			if (compress_force) {
574
				btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
575
			} else {
576 577 578 579 580 581 582
				/*
				 * If we remount from compress-force=xxx to
				 * compress=xxx, we need clear FORCE_COMPRESS
				 * flag, otherwise, there is no way for users
				 * to disable forcible compression separately.
				 */
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
583
			}
584
			if ((btrfs_test_opt(info, COMPRESS) &&
585 586
			     (info->compress_type != saved_compress_type ||
			      compress_force != saved_compress_force)) ||
587
			    (!btrfs_test_opt(info, COMPRESS) &&
588
			     no_compress == 1)) {
589
				btrfs_info(info, "%s %s compression, level %d",
590
					   (compress_force) ? "force" : "use",
591
					   compress_type, info->compress_level);
592 593
			}
			compress_force = false;
C
Chris Mason 已提交
594
			break;
595
		case Opt_ssd:
596
			btrfs_set_and_info(info, SSD,
597
					   "enabling ssd optimizations");
598
			btrfs_clear_opt(info->mount_opt, NOSSD);
599
			break;
600
		case Opt_ssd_spread:
601 602
			btrfs_set_and_info(info, SSD,
					   "enabling ssd optimizations");
603
			btrfs_set_and_info(info, SSD_SPREAD,
604
					   "using spread ssd allocation scheme");
605
			btrfs_clear_opt(info->mount_opt, NOSSD);
606
			break;
C
Chris Mason 已提交
607
		case Opt_nossd:
608 609 610
			btrfs_set_opt(info->mount_opt, NOSSD);
			btrfs_clear_and_info(info, SSD,
					     "not using ssd optimizations");
611 612
			/* Fallthrough */
		case Opt_nossd_spread:
613 614
			btrfs_clear_and_info(info, SSD_SPREAD,
					     "not using spread ssd allocation scheme");
C
Chris Mason 已提交
615
			break;
616
		case Opt_barrier:
617
			btrfs_clear_and_info(info, NOBARRIER,
618
					     "turning on barriers");
619
			break;
620
		case Opt_nobarrier:
621
			btrfs_set_and_info(info, NOBARRIER,
622
					   "turning off barriers");
623
			break;
624
		case Opt_thread_pool:
625 626 627
			ret = match_int(&args[0], &intarg);
			if (ret) {
				goto out;
628
			} else if (intarg == 0) {
629 630 631
				ret = -EINVAL;
				goto out;
			}
632
			info->thread_pool_size = intarg;
633
			break;
634
		case Opt_max_inline:
635 636
			num = match_strdup(&args[0]);
			if (num) {
A
Akinobu Mita 已提交
637
				info->max_inline = memparse(num, NULL);
638 639
				kfree(num);

C
Chris Mason 已提交
640
				if (info->max_inline) {
641
					info->max_inline = min_t(u64,
C
Chris Mason 已提交
642
						info->max_inline,
643
						info->sectorsize);
C
Chris Mason 已提交
644
				}
645 646
				btrfs_info(info, "max_inline at %llu",
					   info->max_inline);
647 648 649
			} else {
				ret = -ENOMEM;
				goto out;
650 651
			}
			break;
652
		case Opt_alloc_start:
653 654
			btrfs_info(info,
				"option alloc_start is obsolete, ignored");
655
			break;
Q
Qu Wenruo 已提交
656
		case Opt_acl:
657
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
658
			info->sb->s_flags |= SB_POSIXACL;
Q
Qu Wenruo 已提交
659
			break;
660
#else
661
			btrfs_err(info, "support for ACL not compiled in!");
662 663 664
			ret = -EINVAL;
			goto out;
#endif
J
Josef Bacik 已提交
665
		case Opt_noacl:
666
			info->sb->s_flags &= ~SB_POSIXACL;
J
Josef Bacik 已提交
667
			break;
S
Sage Weil 已提交
668
		case Opt_notreelog:
669
			btrfs_set_and_info(info, NOTREELOG,
670
					   "disabling tree log");
Q
Qu Wenruo 已提交
671 672
			break;
		case Opt_treelog:
673
			btrfs_clear_and_info(info, NOTREELOG,
674
					     "enabling tree log");
S
Sage Weil 已提交
675
			break;
676
		case Opt_norecovery:
677
		case Opt_nologreplay:
678
			btrfs_set_and_info(info, NOLOGREPLAY,
679 680
					   "disabling log replay at mount time");
			break;
681
		case Opt_flushoncommit:
682
			btrfs_set_and_info(info, FLUSHONCOMMIT,
683
					   "turning on flush-on-commit");
684
			break;
685
		case Opt_noflushoncommit:
686
			btrfs_clear_and_info(info, FLUSHONCOMMIT,
687
					     "turning off flush-on-commit");
688
			break;
689
		case Opt_ratio:
690
			ret = match_int(&args[0], &intarg);
691
			if (ret)
692
				goto out;
693 694 695
			info->metadata_ratio = intarg;
			btrfs_info(info, "metadata ratio %u",
				   info->metadata_ratio);
696
			break;
C
Christoph Hellwig 已提交
697
		case Opt_discard:
698
			btrfs_set_and_info(info, DISCARD,
699
					   "turning on discard");
C
Christoph Hellwig 已提交
700
			break;
Q
Qu Wenruo 已提交
701
		case Opt_nodiscard:
702
			btrfs_clear_and_info(info, DISCARD,
703
					     "turning off discard");
Q
Qu Wenruo 已提交
704
			break;
705
		case Opt_space_cache:
706 707 708
		case Opt_space_cache_version:
			if (token == Opt_space_cache ||
			    strcmp(args[0].from, "v1") == 0) {
709
				btrfs_clear_opt(info->mount_opt,
710
						FREE_SPACE_TREE);
711
				btrfs_set_and_info(info, SPACE_CACHE,
712
					   "enabling disk space caching");
713
			} else if (strcmp(args[0].from, "v2") == 0) {
714
				btrfs_clear_opt(info->mount_opt,
715
						SPACE_CACHE);
716
				btrfs_set_and_info(info, FREE_SPACE_TREE,
717 718 719 720 721
						   "enabling free space tree");
			} else {
				ret = -EINVAL;
				goto out;
			}
722
			break;
723 724 725
		case Opt_rescan_uuid_tree:
			btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
			break;
726
		case Opt_no_space_cache:
727
			if (btrfs_test_opt(info, SPACE_CACHE)) {
728 729
				btrfs_clear_and_info(info, SPACE_CACHE,
					     "disabling disk space caching");
730
			}
731
			if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
732 733
				btrfs_clear_and_info(info, FREE_SPACE_TREE,
					     "disabling free space tree");
734
			}
735
			break;
C
Chris Mason 已提交
736
		case Opt_inode_cache:
737
			btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
738
					   "enabling inode map caching");
739 740
			break;
		case Opt_noinode_cache:
741
			btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
742
					     "disabling inode map caching");
C
Chris Mason 已提交
743
			break;
744
		case Opt_clear_cache:
745
			btrfs_set_and_info(info, CLEAR_CACHE,
746
					   "force clearing of disk cache");
747
			break;
748 749 750
		case Opt_user_subvol_rm_allowed:
			btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
			break;
751 752 753
		case Opt_enospc_debug:
			btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
754 755 756
		case Opt_noenospc_debug:
			btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
C
Chris Mason 已提交
757
		case Opt_defrag:
758
			btrfs_set_and_info(info, AUTO_DEFRAG,
759
					   "enabling auto defrag");
C
Chris Mason 已提交
760
			break;
761
		case Opt_nodefrag:
762
			btrfs_clear_and_info(info, AUTO_DEFRAG,
763
					     "disabling auto defrag");
764
			break;
C
Chris Mason 已提交
765
		case Opt_recovery:
766
			btrfs_warn(info,
767
				   "'recovery' is deprecated, use 'usebackuproot' instead");
768
			/* fall through */
769
		case Opt_usebackuproot:
770
			btrfs_info(info,
771 772
				   "trying to use backup root at mount time");
			btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
C
Chris Mason 已提交
773
			break;
774 775 776
		case Opt_skip_balance:
			btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
			break;
777 778
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
		case Opt_check_integrity_including_extent_data:
779
			btrfs_info(info,
780
				   "enabling check integrity including extent data");
781 782 783 784 785
			btrfs_set_opt(info->mount_opt,
				      CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity:
786
			btrfs_info(info, "enabling check integrity");
787 788 789
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity_print_mask:
790
			ret = match_int(&args[0], &intarg);
791
			if (ret)
792
				goto out;
793 794 795
			info->check_integrity_print_mask = intarg;
			btrfs_info(info, "check_integrity_print_mask 0x%x",
				   info->check_integrity_print_mask);
796 797 798 799 800
			break;
#else
		case Opt_check_integrity_including_extent_data:
		case Opt_check_integrity:
		case Opt_check_integrity_print_mask:
801 802
			btrfs_err(info,
				  "support for check_integrity* not compiled in!");
803 804 805
			ret = -EINVAL;
			goto out;
#endif
J
Jeff Mahoney 已提交
806 807 808 809 810 811 812 813 814 815 816 817
		case Opt_fatal_errors:
			if (strcmp(args[0].from, "panic") == 0)
				btrfs_set_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else if (strcmp(args[0].from, "bug") == 0)
				btrfs_clear_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else {
				ret = -EINVAL;
				goto out;
			}
			break;
818 819 820
		case Opt_commit_interval:
			intarg = 0;
			ret = match_int(&args[0], &intarg);
821
			if (ret)
822
				goto out;
823
			if (intarg == 0) {
824
				btrfs_info(info,
825
					   "using default commit interval %us",
J
Jeff Mahoney 已提交
826
					   BTRFS_DEFAULT_COMMIT_INTERVAL);
827 828 829 830
				intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
			} else if (intarg > 300) {
				btrfs_warn(info, "excessive commit interval %d",
					   intarg);
831
			}
832
			info->commit_interval = intarg;
833
			break;
834 835
#ifdef CONFIG_BTRFS_DEBUG
		case Opt_fragment_all:
836
			btrfs_info(info, "fragmenting all space");
837 838 839 840
			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
			break;
		case Opt_fragment_metadata:
841
			btrfs_info(info, "fragmenting metadata");
842 843 844 845
			btrfs_set_opt(info->mount_opt,
				      FRAGMENT_METADATA);
			break;
		case Opt_fragment_data:
846
			btrfs_info(info, "fragmenting data");
847 848
			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			break;
J
Josef Bacik 已提交
849 850 851 852 853 854
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
		case Opt_ref_verify:
			btrfs_info(info, "doing ref verification");
			btrfs_set_opt(info->mount_opt, REF_VERIFY);
			break;
855
#endif
S
Sage Weil 已提交
856
		case Opt_err:
857
			btrfs_info(info, "unrecognized mount option '%s'", p);
S
Sage Weil 已提交
858 859
			ret = -EINVAL;
			goto out;
860
		default:
861
			break;
862 863
		}
	}
864 865 866 867
check:
	/*
	 * Extra check for current option against current flag
	 */
868
	if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & SB_RDONLY)) {
869
		btrfs_err(info,
870 871 872
			  "nologreplay must be used with ro mount option");
		ret = -EINVAL;
	}
S
Sage Weil 已提交
873
out:
874
	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
875 876
	    !btrfs_test_opt(info, FREE_SPACE_TREE) &&
	    !btrfs_test_opt(info, CLEAR_CACHE)) {
877
		btrfs_err(info, "cannot disable free space tree");
878 879 880
		ret = -EINVAL;

	}
881
	if (!ret && btrfs_test_opt(info, SPACE_CACHE))
882
		btrfs_info(info, "disk space caching is enabled");
883
	if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
884
		btrfs_info(info, "using free space tree");
S
Sage Weil 已提交
885
	return ret;
886 887 888 889 890 891 892 893
}

/*
 * Parse mount options that are required early in the mount process.
 *
 * All other options will be parsed on much later in the mount process and
 * only when we need to allocate a new super block.
 */
894 895
static int btrfs_parse_device_options(const char *options, fmode_t flags,
				      void *holder)
896 897
{
	substring_t args[MAX_OPT_ARGS];
898
	char *device_name, *opts, *orig, *p;
899
	struct btrfs_device *device = NULL;
900 901
	int error = 0;

902 903
	lockdep_assert_held(&uuid_mutex);

904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928
	if (!options)
		return 0;

	/*
	 * strsep changes the string, duplicate it because btrfs_parse_options
	 * gets called later
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
	orig = opts;

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;

		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		if (token == Opt_device) {
			device_name = match_strdup(&args[0]);
			if (!device_name) {
				error = -ENOMEM;
				goto out;
			}
929 930
			device = btrfs_scan_one_device(device_name, flags,
					holder);
931
			kfree(device_name);
932 933
			if (IS_ERR(device)) {
				error = PTR_ERR(device);
934
				goto out;
935
			}
936 937 938 939 940 941 942 943 944 945 946 947 948
		}
	}

out:
	kfree(orig);
	return error;
}

/*
 * Parse mount options that are related to subvolume id
 *
 * The value is later passed to mount_subvol()
 */
949 950
static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
		u64 *subvol_objectid)
951 952 953
{
	substring_t args[MAX_OPT_ARGS];
	char *opts, *orig, *p;
954
	int error = 0;
955
	u64 subvolid;
956 957

	if (!options)
958
		return 0;
959 960

	/*
961
	 * strsep changes the string, duplicate it because
962
	 * btrfs_parse_device_options gets called later
963 964 965 966
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
967
	orig = opts;
968 969 970 971 972 973 974 975 976

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
		case Opt_subvol:
977
			kfree(*subvol_name);
978
			*subvol_name = match_strdup(&args[0]);
979 980 981 982
			if (!*subvol_name) {
				error = -ENOMEM;
				goto out;
			}
983
			break;
984
		case Opt_subvolid:
985 986
			error = match_u64(&args[0], &subvolid);
			if (error)
987
				goto out;
988 989 990 991 992 993

			/* we want the original fs_tree */
			if (subvolid == 0)
				subvolid = BTRFS_FS_TREE_OBJECTID;

			*subvol_objectid = subvolid;
994
			break;
995
		case Opt_subvolrootid:
996
			pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
997
			break;
998 999 1000 1001 1002
		default:
			break;
		}
	}

1003
out:
1004
	kfree(orig);
1005
	return error;
1006 1007
}

1008 1009
static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
					   u64 subvol_objectid)
1010
{
1011
	struct btrfs_root *root = fs_info->tree_root;
1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
	struct btrfs_root *fs_root;
	struct btrfs_root_ref *root_ref;
	struct btrfs_inode_ref *inode_ref;
	struct btrfs_key key;
	struct btrfs_path *path = NULL;
	char *name = NULL, *ptr;
	u64 dirid;
	int len;
	int ret;

	path = btrfs_alloc_path();
	if (!path) {
		ret = -ENOMEM;
		goto err;
	}
	path->leave_spinning = 1;

1029
	name = kmalloc(PATH_MAX, GFP_KERNEL);
1030 1031 1032 1033 1034 1035
	if (!name) {
		ret = -ENOMEM;
		goto err;
	}
	ptr = name + PATH_MAX - 1;
	ptr[0] = '\0';
1036 1037

	/*
1038 1039
	 * Walk up the subvolume trees in the tree of tree roots by root
	 * backrefs until we hit the top-level subvolume.
1040
	 */
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
	while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
		key.objectid = subvol_objectid;
		key.type = BTRFS_ROOT_BACKREF_KEY;
		key.offset = (u64)-1;

		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
		if (ret < 0) {
			goto err;
		} else if (ret > 0) {
			ret = btrfs_previous_item(root, path, subvol_objectid,
						  BTRFS_ROOT_BACKREF_KEY);
			if (ret < 0) {
				goto err;
			} else if (ret > 0) {
				ret = -ENOENT;
				goto err;
			}
		}

		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
		subvol_objectid = key.offset;

		root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
					  struct btrfs_root_ref);
		len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
		ptr -= len + 1;
		if (ptr < name) {
			ret = -ENAMETOOLONG;
			goto err;
		}
		read_extent_buffer(path->nodes[0], ptr + 1,
				   (unsigned long)(root_ref + 1), len);
		ptr[0] = '/';
		dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
		btrfs_release_path(path);

		key.objectid = subvol_objectid;
		key.type = BTRFS_ROOT_ITEM_KEY;
		key.offset = (u64)-1;
		fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
		if (IS_ERR(fs_root)) {
			ret = PTR_ERR(fs_root);
			goto err;
		}

		/*
		 * Walk up the filesystem tree by inode refs until we hit the
		 * root directory.
		 */
		while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
			key.objectid = dirid;
			key.type = BTRFS_INODE_REF_KEY;
			key.offset = (u64)-1;

			ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
			if (ret < 0) {
				goto err;
			} else if (ret > 0) {
				ret = btrfs_previous_item(fs_root, path, dirid,
							  BTRFS_INODE_REF_KEY);
				if (ret < 0) {
					goto err;
				} else if (ret > 0) {
					ret = -ENOENT;
					goto err;
				}
			}

			btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
			dirid = key.offset;

			inode_ref = btrfs_item_ptr(path->nodes[0],
						   path->slots[0],
						   struct btrfs_inode_ref);
			len = btrfs_inode_ref_name_len(path->nodes[0],
						       inode_ref);
			ptr -= len + 1;
			if (ptr < name) {
				ret = -ENAMETOOLONG;
				goto err;
			}
			read_extent_buffer(path->nodes[0], ptr + 1,
					   (unsigned long)(inode_ref + 1), len);
			ptr[0] = '/';
			btrfs_release_path(path);
		}
1127 1128
	}

1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
	btrfs_free_path(path);
	if (ptr == name + PATH_MAX - 1) {
		name[0] = '/';
		name[1] = '\0';
	} else {
		memmove(name, ptr, name + PATH_MAX - ptr);
	}
	return name;

err:
	btrfs_free_path(path);
	kfree(name);
	return ERR_PTR(ret);
}

static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
{
	struct btrfs_root *root = fs_info->tree_root;
	struct btrfs_dir_item *di;
	struct btrfs_path *path;
	struct btrfs_key location;
	u64 dir_id;

1152 1153
	path = btrfs_alloc_path();
	if (!path)
1154
		return -ENOMEM;
1155 1156 1157 1158 1159 1160 1161
	path->leave_spinning = 1;

	/*
	 * Find the "default" dir item which points to the root item that we
	 * will mount by default if we haven't been given a specific subvolume
	 * to mount.
	 */
1162
	dir_id = btrfs_super_root_dir(fs_info->super_copy);
1163
	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
1164 1165
	if (IS_ERR(di)) {
		btrfs_free_path(path);
1166
		return PTR_ERR(di);
1167
	}
1168 1169 1170 1171
	if (!di) {
		/*
		 * Ok the default dir item isn't there.  This is weird since
		 * it's always been there, but don't freak out, just try and
1172
		 * mount the top-level subvolume.
1173 1174
		 */
		btrfs_free_path(path);
1175 1176
		*objectid = BTRFS_FS_TREE_OBJECTID;
		return 0;
1177 1178 1179 1180
	}

	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
	btrfs_free_path(path);
1181 1182
	*objectid = location.objectid;
	return 0;
1183 1184
}

C
Chris Mason 已提交
1185
static int btrfs_fill_super(struct super_block *sb,
1186
			    struct btrfs_fs_devices *fs_devices,
1187
			    void *data)
C
Chris Mason 已提交
1188
{
C
Chris Mason 已提交
1189
	struct inode *inode;
1190
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1191
	struct btrfs_key key;
C
Chris Mason 已提交
1192
	int err;
1193

C
Chris Mason 已提交
1194 1195 1196
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_magic = BTRFS_SUPER_MAGIC;
	sb->s_op = &btrfs_super_ops;
A
Al Viro 已提交
1197
	sb->s_d_op = &btrfs_dentry_operations;
B
Balaji Rao 已提交
1198
	sb->s_export_op = &btrfs_export_ops;
J
Josef Bacik 已提交
1199
	sb->s_xattr = btrfs_xattr_handlers;
C
Chris Mason 已提交
1200
	sb->s_time_gran = 1;
C
Chris Mason 已提交
1201
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
1202
	sb->s_flags |= SB_POSIXACL;
1203
#endif
M
Matthew Garrett 已提交
1204
	sb->s_flags |= SB_I_VERSION;
1205
	sb->s_iflags |= SB_I_CGROUPWB;
1206 1207 1208 1209 1210 1211 1212

	err = super_setup_bdi(sb);
	if (err) {
		btrfs_err(fs_info, "super_setup_bdi failed");
		return err;
	}

A
Al Viro 已提交
1213 1214
	err = open_ctree(sb, fs_devices, (char *)data);
	if (err) {
1215
		btrfs_err(fs_info, "open_ctree failed");
A
Al Viro 已提交
1216
		return err;
1217 1218
	}

1219 1220 1221
	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
	key.type = BTRFS_INODE_ITEM_KEY;
	key.offset = 0;
1222
	inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
1223 1224
	if (IS_ERR(inode)) {
		err = PTR_ERR(inode);
C
Chris Mason 已提交
1225
		goto fail_close;
C
Chris Mason 已提交
1226 1227
	}

1228 1229
	sb->s_root = d_make_root(inode);
	if (!sb->s_root) {
C
Chris Mason 已提交
1230 1231
		err = -ENOMEM;
		goto fail_close;
C
Chris Mason 已提交
1232
	}
1233

D
Dan Magenheimer 已提交
1234
	cleancache_init_fs(sb);
1235
	sb->s_flags |= SB_ACTIVE;
C
Chris Mason 已提交
1236
	return 0;
C
Chris Mason 已提交
1237 1238

fail_close:
1239
	close_ctree(fs_info);
C
Chris Mason 已提交
1240
	return err;
C
Chris Mason 已提交
1241 1242
}

S
Sage Weil 已提交
1243
int btrfs_sync_fs(struct super_block *sb, int wait)
C
Chris Mason 已提交
1244 1245
{
	struct btrfs_trans_handle *trans;
1246 1247
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
C
Chris Mason 已提交
1248

1249
	trace_btrfs_sync_fs(fs_info, wait);
1250

C
Chris Mason 已提交
1251
	if (!wait) {
1252
		filemap_flush(fs_info->btree_inode->i_mapping);
C
Chris Mason 已提交
1253 1254
		return 0;
	}
1255

1256
	btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
1257

M
Miao Xie 已提交
1258
	trans = btrfs_attach_transaction_barrier(root);
1259
	if (IS_ERR(trans)) {
1260
		/* no transaction, don't bother */
1261 1262 1263 1264 1265 1266 1267
		if (PTR_ERR(trans) == -ENOENT) {
			/*
			 * Exit unless we have some pending changes
			 * that need to go through commit
			 */
			if (fs_info->pending_changes == 0)
				return 0;
1268 1269 1270 1271 1272 1273
			/*
			 * A non-blocking test if the fs is frozen. We must not
			 * start a new transaction here otherwise a deadlock
			 * happens. The pending operations are delayed to the
			 * next commit after thawing.
			 */
1274 1275
			if (sb_start_write_trylock(sb))
				sb_end_write(sb);
1276 1277
			else
				return 0;
1278 1279
			trans = btrfs_start_transaction(root, 0);
		}
1280 1281
		if (IS_ERR(trans))
			return PTR_ERR(trans);
1282
	}
1283
	return btrfs_commit_transaction(trans);
C
Chris Mason 已提交
1284 1285
}

1286
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
E
Eric Paris 已提交
1287
{
1288
	struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
1289
	const char *compress_type;
E
Eric Paris 已提交
1290

1291
	if (btrfs_test_opt(info, DEGRADED))
E
Eric Paris 已提交
1292
		seq_puts(seq, ",degraded");
1293
	if (btrfs_test_opt(info, NODATASUM))
E
Eric Paris 已提交
1294
		seq_puts(seq, ",nodatasum");
1295
	if (btrfs_test_opt(info, NODATACOW))
E
Eric Paris 已提交
1296
		seq_puts(seq, ",nodatacow");
1297
	if (btrfs_test_opt(info, NOBARRIER))
E
Eric Paris 已提交
1298
		seq_puts(seq, ",nobarrier");
1299
	if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
1300
		seq_printf(seq, ",max_inline=%llu", info->max_inline);
E
Eric Paris 已提交
1301 1302
	if (info->thread_pool_size !=  min_t(unsigned long,
					     num_online_cpus() + 2, 8))
1303
		seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
1304
	if (btrfs_test_opt(info, COMPRESS)) {
1305
		compress_type = btrfs_compress_type2str(info->compress_type);
1306
		if (btrfs_test_opt(info, FORCE_COMPRESS))
T
Tsutomu Itoh 已提交
1307 1308 1309
			seq_printf(seq, ",compress-force=%s", compress_type);
		else
			seq_printf(seq, ",compress=%s", compress_type);
1310
		if (info->compress_level)
1311
			seq_printf(seq, ":%d", info->compress_level);
T
Tsutomu Itoh 已提交
1312
	}
1313
	if (btrfs_test_opt(info, NOSSD))
C
Chris Mason 已提交
1314
		seq_puts(seq, ",nossd");
1315
	if (btrfs_test_opt(info, SSD_SPREAD))
1316
		seq_puts(seq, ",ssd_spread");
1317
	else if (btrfs_test_opt(info, SSD))
E
Eric Paris 已提交
1318
		seq_puts(seq, ",ssd");
1319
	if (btrfs_test_opt(info, NOTREELOG))
1320
		seq_puts(seq, ",notreelog");
1321
	if (btrfs_test_opt(info, NOLOGREPLAY))
1322
		seq_puts(seq, ",nologreplay");
1323
	if (btrfs_test_opt(info, FLUSHONCOMMIT))
1324
		seq_puts(seq, ",flushoncommit");
1325
	if (btrfs_test_opt(info, DISCARD))
1326
		seq_puts(seq, ",discard");
1327
	if (!(info->sb->s_flags & SB_POSIXACL))
E
Eric Paris 已提交
1328
		seq_puts(seq, ",noacl");
1329
	if (btrfs_test_opt(info, SPACE_CACHE))
T
Tsutomu Itoh 已提交
1330
		seq_puts(seq, ",space_cache");
1331
	else if (btrfs_test_opt(info, FREE_SPACE_TREE))
1332
		seq_puts(seq, ",space_cache=v2");
1333
	else
1334
		seq_puts(seq, ",nospace_cache");
1335
	if (btrfs_test_opt(info, RESCAN_UUID_TREE))
1336
		seq_puts(seq, ",rescan_uuid_tree");
1337
	if (btrfs_test_opt(info, CLEAR_CACHE))
T
Tsutomu Itoh 已提交
1338
		seq_puts(seq, ",clear_cache");
1339
	if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
T
Tsutomu Itoh 已提交
1340
		seq_puts(seq, ",user_subvol_rm_allowed");
1341
	if (btrfs_test_opt(info, ENOSPC_DEBUG))
1342
		seq_puts(seq, ",enospc_debug");
1343
	if (btrfs_test_opt(info, AUTO_DEFRAG))
1344
		seq_puts(seq, ",autodefrag");
1345
	if (btrfs_test_opt(info, INODE_MAP_CACHE))
1346
		seq_puts(seq, ",inode_cache");
1347
	if (btrfs_test_opt(info, SKIP_BALANCE))
1348
		seq_puts(seq, ",skip_balance");
1349
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1350
	if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
1351
		seq_puts(seq, ",check_int_data");
1352
	else if (btrfs_test_opt(info, CHECK_INTEGRITY))
1353 1354 1355 1356 1357 1358
		seq_puts(seq, ",check_int");
	if (info->check_integrity_print_mask)
		seq_printf(seq, ",check_int_print_mask=%d",
				info->check_integrity_print_mask);
#endif
	if (info->metadata_ratio)
1359
		seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
1360
	if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
J
Jeff Mahoney 已提交
1361
		seq_puts(seq, ",fatal_errors=panic");
1362
	if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1363
		seq_printf(seq, ",commit=%u", info->commit_interval);
1364
#ifdef CONFIG_BTRFS_DEBUG
1365
	if (btrfs_test_opt(info, FRAGMENT_DATA))
1366
		seq_puts(seq, ",fragment=data");
1367
	if (btrfs_test_opt(info, FRAGMENT_METADATA))
1368 1369
		seq_puts(seq, ",fragment=metadata");
#endif
J
Josef Bacik 已提交
1370 1371
	if (btrfs_test_opt(info, REF_VERIFY))
		seq_puts(seq, ",ref_verify");
1372 1373 1374 1375
	seq_printf(seq, ",subvolid=%llu",
		  BTRFS_I(d_inode(dentry))->root->root_key.objectid);
	seq_puts(seq, ",subvol=");
	seq_dentry(seq, dentry, " \t\n\\");
E
Eric Paris 已提交
1376 1377 1378
	return 0;
}

1379
static int btrfs_test_super(struct super_block *s, void *data)
Y
Yan 已提交
1380
{
1381 1382
	struct btrfs_fs_info *p = data;
	struct btrfs_fs_info *fs_info = btrfs_sb(s);
Y
Yan 已提交
1383

1384
	return fs_info->fs_devices == p->fs_devices;
Y
Yan 已提交
1385 1386
}

1387 1388
static int btrfs_set_super(struct super_block *s, void *data)
{
A
Al Viro 已提交
1389 1390 1391 1392
	int err = set_anon_super(s, data);
	if (!err)
		s->s_fs_info = data;
	return err;
Y
Yan 已提交
1393 1394
}

1395 1396 1397 1398 1399 1400 1401 1402 1403 1404
/*
 * subvolumes are identified by ino 256
 */
static inline int is_subvolume_inode(struct inode *inode)
{
	if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
		return 1;
	return 0;
}

1405
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
1406
				   struct vfsmount *mnt)
1407 1408
{
	struct dentry *root;
1409
	int ret;
1410

1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
	if (!subvol_name) {
		if (!subvol_objectid) {
			ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
							  &subvol_objectid);
			if (ret) {
				root = ERR_PTR(ret);
				goto out;
			}
		}
		subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
							    subvol_objectid);
		if (IS_ERR(subvol_name)) {
			root = ERR_CAST(subvol_name);
			subvol_name = NULL;
			goto out;
		}

	}

A
Al Viro 已提交
1430
	root = mount_subtree(mnt, subvol_name);
1431 1432
	/* mount_subtree() drops our reference on the vfsmount. */
	mnt = NULL;
1433

1434
	if (!IS_ERR(root)) {
A
Al Viro 已提交
1435
		struct super_block *s = root->d_sb;
1436
		struct btrfs_fs_info *fs_info = btrfs_sb(s);
1437 1438 1439 1440 1441
		struct inode *root_inode = d_inode(root);
		u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;

		ret = 0;
		if (!is_subvolume_inode(root_inode)) {
1442
			btrfs_err(fs_info, "'%s' is not a valid subvolume",
1443 1444 1445 1446
			       subvol_name);
			ret = -EINVAL;
		}
		if (subvol_objectid && root_objectid != subvol_objectid) {
1447 1448 1449 1450 1451
			/*
			 * This will also catch a race condition where a
			 * subvolume which was passed by ID is renamed and
			 * another subvolume is renamed over the old location.
			 */
1452 1453 1454
			btrfs_err(fs_info,
				  "subvol '%s' does not match subvolid %llu",
				  subvol_name, subvol_objectid);
1455 1456 1457 1458 1459 1460 1461
			ret = -EINVAL;
		}
		if (ret) {
			dput(root);
			root = ERR_PTR(ret);
			deactivate_locked_super(s);
		}
1462 1463
	}

1464 1465 1466
out:
	mntput(mnt);
	kfree(subvol_name);
1467 1468
	return root;
}
1469

1470 1471 1472 1473 1474 1475
/*
 * Find a superblock for the given device / mount point.
 *
 * Note: This is based on mount_bdev from fs/super.c with a few additions
 *       for multiple device setup.  Make sure to keep it in sync.
 */
1476 1477 1478 1479 1480
static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
		int flags, const char *device_name, void *data)
{
	struct block_device *bdev = NULL;
	struct super_block *s;
1481
	struct btrfs_device *device = NULL;
1482 1483
	struct btrfs_fs_devices *fs_devices = NULL;
	struct btrfs_fs_info *fs_info = NULL;
1484
	void *new_sec_opts = NULL;
1485 1486 1487 1488 1489 1490 1491
	fmode_t mode = FMODE_READ;
	int error = 0;

	if (!(flags & SB_RDONLY))
		mode |= FMODE_WRITE;

	if (data) {
A
Al Viro 已提交
1492
		error = security_sb_eat_lsm_opts(data, &new_sec_opts);
1493 1494 1495 1496 1497 1498 1499 1500 1501 1502
		if (error)
			return ERR_PTR(error);
	}

	/*
	 * Setup a dummy root and fs_info for test/set super.  This is because
	 * we don't actually fill this stuff out until open_ctree, but we need
	 * it for searching for existing supers, so this lets us do that and
	 * then open_ctree will properly initialize everything later.
	 */
1503
	fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
	if (!fs_info) {
		error = -ENOMEM;
		goto error_sec_opts;
	}

	fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
	fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
	if (!fs_info->super_copy || !fs_info->super_for_commit) {
		error = -ENOMEM;
		goto error_fs_info;
	}

1516
	mutex_lock(&uuid_mutex);
1517
	error = btrfs_parse_device_options(data, mode, fs_type);
1518 1519
	if (error) {
		mutex_unlock(&uuid_mutex);
1520
		goto error_fs_info;
1521
	}
1522

1523 1524
	device = btrfs_scan_one_device(device_name, mode, fs_type);
	if (IS_ERR(device)) {
1525
		mutex_unlock(&uuid_mutex);
1526
		error = PTR_ERR(device);
1527
		goto error_fs_info;
1528
	}
1529

1530
	fs_devices = device->fs_devices;
1531 1532
	fs_info->fs_devices = fs_devices;

1533
	error = btrfs_open_devices(fs_devices, mode, fs_type);
1534
	mutex_unlock(&uuid_mutex);
1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558
	if (error)
		goto error_fs_info;

	if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
		error = -EACCES;
		goto error_close_devices;
	}

	bdev = fs_devices->latest_bdev;
	s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
		 fs_info);
	if (IS_ERR(s)) {
		error = PTR_ERR(s);
		goto error_close_devices;
	}

	if (s->s_root) {
		btrfs_close_devices(fs_devices);
		free_fs_info(fs_info);
		if ((flags ^ s->s_flags) & SB_RDONLY)
			error = -EBUSY;
	} else {
		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
		btrfs_sb(s)->bdev_holder = fs_type;
1559 1560
		if (!strstr(crc32c_impl(), "generic"))
			set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
1561 1562
		error = btrfs_fill_super(s, fs_devices, data);
	}
A
Al Viro 已提交
1563
	if (!error)
1564
		error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
A
Al Viro 已提交
1565
	security_free_mnt_opts(&new_sec_opts);
1566 1567
	if (error) {
		deactivate_locked_super(s);
A
Al Viro 已提交
1568
		return ERR_PTR(error);
1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580
	}

	return dget(s->s_root);

error_close_devices:
	btrfs_close_devices(fs_devices);
error_fs_info:
	free_fs_info(fs_info);
error_sec_opts:
	security_free_mnt_opts(&new_sec_opts);
	return ERR_PTR(error);
}
1581

1582
/*
1583
 * Mount function which is called by VFS layer.
1584
 *
1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602
 * In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
 * which needs vfsmount* of device's root (/).  This means device's root has to
 * be mounted internally in any case.
 *
 * Operation flow:
 *   1. Parse subvol id related options for later use in mount_subvol().
 *
 *   2. Mount device's root (/) by calling vfs_kern_mount().
 *
 *      NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
 *      first place. In order to avoid calling btrfs_mount() again, we use
 *      different file_system_type which is not registered to VFS by
 *      register_filesystem() (btrfs_root_fs_type). As a result,
 *      btrfs_mount_root() is called. The return value will be used by
 *      mount_subtree() in mount_subvol().
 *
 *   3. Call mount_subvol() to get the dentry of subvolume. Since there is
 *      "btrfs subvolume set-default", mount_subvol() is called always.
1603
 */
A
Al Viro 已提交
1604
static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1605
		const char *device_name, void *data)
Y
Yan 已提交
1606
{
1607 1608
	struct vfsmount *mnt_root;
	struct dentry *root;
1609 1610
	char *subvol_name = NULL;
	u64 subvol_objectid = 0;
Y
Yan 已提交
1611 1612
	int error = 0;

1613 1614
	error = btrfs_parse_subvol_options(data, &subvol_name,
					&subvol_objectid);
1615 1616
	if (error) {
		kfree(subvol_name);
A
Al Viro 已提交
1617
		return ERR_PTR(error);
1618
	}
1619

1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630
	/* mount device's root (/) */
	mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
	if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
		if (flags & SB_RDONLY) {
			mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
				flags & ~SB_RDONLY, device_name, data);
		} else {
			mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
				flags | SB_RDONLY, device_name, data);
			if (IS_ERR(mnt_root)) {
				root = ERR_CAST(mnt_root);
1631
				kfree(subvol_name);
1632 1633
				goto out;
			}
Y
Yan 已提交
1634

1635 1636 1637 1638 1639 1640
			down_write(&mnt_root->mnt_sb->s_umount);
			error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
			up_write(&mnt_root->mnt_sb->s_umount);
			if (error < 0) {
				root = ERR_PTR(error);
				mntput(mnt_root);
1641
				kfree(subvol_name);
1642 1643 1644
				goto out;
			}
		}
1645
	}
1646 1647
	if (IS_ERR(mnt_root)) {
		root = ERR_CAST(mnt_root);
1648
		kfree(subvol_name);
1649
		goto out;
1650
	}
Y
Yan 已提交
1651

1652
	/* mount_subvol() will free subvol_name and mnt_root */
1653
	root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
Y
Yan 已提交
1654

1655 1656
out:
	return root;
Y
Yan 已提交
1657
}
1658

1659
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1660
				     u32 new_pool_size, u32 old_pool_size)
1661 1662 1663 1664 1665 1666
{
	if (new_pool_size == old_pool_size)
		return;

	fs_info->thread_pool_size = new_pool_size;

1667
	btrfs_info(fs_info, "resize thread pool %d -> %d",
1668 1669
	       old_pool_size, new_pool_size);

1670
	btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
1671
	btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
1672
	btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size);
1673
	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
1674 1675 1676 1677 1678 1679
	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
				new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
1680
	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
1681
	btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
1682 1683
	btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
				new_pool_size);
1684 1685
}

1686
static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
M
Miao Xie 已提交
1687 1688
{
	set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1689
}
M
Miao Xie 已提交
1690

1691 1692 1693
static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
				       unsigned long old_opts, int flags)
{
M
Miao Xie 已提交
1694 1695
	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1696
	     (flags & SB_RDONLY))) {
M
Miao Xie 已提交
1697 1698 1699
		/* wait for any defraggers to finish */
		wait_event(fs_info->transaction_wait,
			   (atomic_read(&fs_info->defrag_running) == 0));
1700
		if (flags & SB_RDONLY)
M
Miao Xie 已提交
1701 1702 1703 1704 1705 1706 1707 1708
			sync_filesystem(fs_info->sb);
	}
}

static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
					 unsigned long old_opts)
{
	/*
1709 1710
	 * We need to cleanup all defragable inodes if the autodefragment is
	 * close or the filesystem is read only.
M
Miao Xie 已提交
1711 1712
	 */
	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1713
	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) {
M
Miao Xie 已提交
1714 1715 1716 1717 1718 1719
		btrfs_cleanup_defrag_inodes(fs_info);
	}

	clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
}

Y
Yan Zheng 已提交
1720 1721
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
{
1722 1723
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
1724 1725 1726 1727
	unsigned old_flags = sb->s_flags;
	unsigned long old_opts = fs_info->mount_opt;
	unsigned long old_compress_type = fs_info->compress_type;
	u64 old_max_inline = fs_info->max_inline;
1728
	u32 old_thread_pool_size = fs_info->thread_pool_size;
1729
	u32 old_metadata_ratio = fs_info->metadata_ratio;
Y
Yan Zheng 已提交
1730 1731
	int ret;

1732
	sync_filesystem(sb);
1733
	btrfs_remount_prepare(fs_info);
M
Miao Xie 已提交
1734

1735
	if (data) {
1736
		void *new_sec_opts = NULL;
1737

A
Al Viro 已提交
1738 1739
		ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
		if (!ret)
1740
			ret = security_sb_remount(sb, new_sec_opts);
A
Al Viro 已提交
1741
		security_free_mnt_opts(&new_sec_opts);
1742 1743 1744 1745
		if (ret)
			goto restore;
	}

1746
	ret = btrfs_parse_options(fs_info, data, *flags);
1747
	if (ret)
1748
		goto restore;
1749

1750
	btrfs_remount_begin(fs_info, old_opts, *flags);
1751 1752 1753
	btrfs_resize_thread_pool(fs_info,
		fs_info->thread_pool_size, old_thread_pool_size);

1754
	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
M
Miao Xie 已提交
1755
		goto out;
Y
Yan Zheng 已提交
1756

1757
	if (*flags & SB_RDONLY) {
1758 1759 1760 1761
		/*
		 * this also happens on 'umount -rf' or on shutdown, when
		 * the filesystem is busy.
		 */
1762
		cancel_work_sync(&fs_info->async_reclaim_work);
1763 1764 1765 1766 1767 1768

		/* wait for the uuid_scan task to finish */
		down(&fs_info->uuid_tree_rescan_sem);
		/* avoid complains from lockdep et al. */
		up(&fs_info->uuid_tree_rescan_sem);

1769
		sb->s_flags |= SB_RDONLY;
Y
Yan Zheng 已提交
1770

1771
		/*
1772
		 * Setting SB_RDONLY will put the cleaner thread to
1773 1774 1775 1776 1777 1778 1779
		 * sleep at the next loop if it's already active.
		 * If it's already asleep, we'll leave unused block
		 * groups on disk until we're mounted read-write again
		 * unless we clean them up here.
		 */
		btrfs_delete_unused_bgs(fs_info);

1780 1781
		btrfs_dev_replace_suspend_for_unmount(fs_info);
		btrfs_scrub_cancel(fs_info);
1782
		btrfs_pause_balance(fs_info);
1783

1784
		ret = btrfs_commit_super(fs_info);
1785 1786
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1787
	} else {
1788
		if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
1789
			btrfs_err(fs_info,
1790
				"Remounting read-write after error is not allowed");
1791 1792 1793
			ret = -EINVAL;
			goto restore;
		}
1794
		if (fs_info->fs_devices->rw_devices == 0) {
1795 1796
			ret = -EACCES;
			goto restore;
1797
		}
Y
Yan Zheng 已提交
1798

1799
		if (!btrfs_check_rw_degradable(fs_info, NULL)) {
1800
			btrfs_warn(fs_info,
1801
		"too many missing devices, writable remount is not allowed");
1802 1803 1804 1805
			ret = -EACCES;
			goto restore;
		}

1806
		if (btrfs_super_log_root(fs_info->super_copy) != 0) {
1807 1808
			ret = -EINVAL;
			goto restore;
1809
		}
Y
Yan Zheng 已提交
1810

1811
		ret = btrfs_cleanup_fs_roots(fs_info);
1812 1813
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1814

1815
		/* recover relocation */
1816
		mutex_lock(&fs_info->cleaner_mutex);
1817
		ret = btrfs_recover_relocation(root);
1818
		mutex_unlock(&fs_info->cleaner_mutex);
1819 1820
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1821

1822 1823 1824 1825
		ret = btrfs_resume_balance_async(fs_info);
		if (ret)
			goto restore;

1826 1827
		ret = btrfs_resume_dev_replace_async(fs_info);
		if (ret) {
1828
			btrfs_warn(fs_info, "failed to resume dev_replace");
1829 1830
			goto restore;
		}
1831

1832 1833
		btrfs_qgroup_rescan_resume(fs_info);

1834
		if (!fs_info->uuid_root) {
1835
			btrfs_info(fs_info, "creating UUID tree");
1836 1837
			ret = btrfs_create_uuid_tree(fs_info);
			if (ret) {
J
Jeff Mahoney 已提交
1838 1839 1840
				btrfs_warn(fs_info,
					   "failed to create the UUID tree %d",
					   ret);
1841 1842 1843
				goto restore;
			}
		}
1844
		sb->s_flags &= ~SB_RDONLY;
1845

1846
		set_bit(BTRFS_FS_OPEN, &fs_info->flags);
Y
Yan Zheng 已提交
1847
	}
M
Miao Xie 已提交
1848
out:
1849
	wake_up_process(fs_info->transaction_kthread);
M
Miao Xie 已提交
1850
	btrfs_remount_cleanup(fs_info, old_opts);
Y
Yan Zheng 已提交
1851
	return 0;
1852 1853

restore:
1854
	/* We've hit an error - don't reset SB_RDONLY */
1855
	if (sb_rdonly(sb))
1856
		old_flags |= SB_RDONLY;
1857 1858 1859 1860
	sb->s_flags = old_flags;
	fs_info->mount_opt = old_opts;
	fs_info->compress_type = old_compress_type;
	fs_info->max_inline = old_max_inline;
1861 1862
	btrfs_resize_thread_pool(fs_info,
		old_thread_pool_size, fs_info->thread_pool_size);
1863
	fs_info->metadata_ratio = old_metadata_ratio;
M
Miao Xie 已提交
1864
	btrfs_remount_cleanup(fs_info, old_opts);
1865
	return ret;
Y
Yan Zheng 已提交
1866 1867
}

1868
/* Used to sort the devices by max_avail(descending sort) */
1869
static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893
				       const void *dev_info2)
{
	if (((struct btrfs_device_info *)dev_info1)->max_avail >
	    ((struct btrfs_device_info *)dev_info2)->max_avail)
		return -1;
	else if (((struct btrfs_device_info *)dev_info1)->max_avail <
		 ((struct btrfs_device_info *)dev_info2)->max_avail)
		return 1;
	else
	return 0;
}

/*
 * sort the devices by max_avail, in which max free extent size of each device
 * is stored.(Descending Sort)
 */
static inline void btrfs_descending_sort_devices(
					struct btrfs_device_info *devices,
					size_t nr_devices)
{
	sort(devices, nr_devices, sizeof(struct btrfs_device_info),
	     btrfs_cmp_device_free_bytes, NULL);
}

1894 1895 1896 1897
/*
 * The helper to calc the free space on the devices that can be used to store
 * file data.
 */
1898 1899
static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
					      u64 *free_bytes)
1900 1901 1902 1903 1904 1905 1906
{
	struct btrfs_device_info *devices_info;
	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
	struct btrfs_device *device;
	u64 type;
	u64 avail_space;
	u64 min_stripe_size;
1907
	int num_stripes = 1;
1908
	int i = 0, nr_devices;
1909
	const struct btrfs_raid_attr *rattr;
1910

1911
	/*
1912
	 * We aren't under the device list lock, so this is racy-ish, but good
1913 1914
	 * enough for our purposes.
	 */
1915
	nr_devices = fs_info->fs_devices->open_devices;
1916 1917 1918 1919 1920 1921 1922 1923 1924
	if (!nr_devices) {
		smp_mb();
		nr_devices = fs_info->fs_devices->open_devices;
		ASSERT(nr_devices);
		if (!nr_devices) {
			*free_bytes = 0;
			return 0;
		}
	}
1925

1926
	devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
1927
			       GFP_KERNEL);
1928 1929 1930
	if (!devices_info)
		return -ENOMEM;

1931
	/* calc min stripe number for data space allocation */
1932
	type = btrfs_data_alloc_profile(fs_info);
1933 1934
	rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];

1935
	if (type & BTRFS_BLOCK_GROUP_RAID0)
1936
		num_stripes = nr_devices;
1937
	else if (type & BTRFS_BLOCK_GROUP_RAID1)
1938
		num_stripes = 2;
1939
	else if (type & BTRFS_BLOCK_GROUP_RAID10)
1940
		num_stripes = 4;
1941

1942 1943
	/* Adjust for more than 1 stripe per device */
	min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
1944

1945 1946
	rcu_read_lock();
	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
1947 1948
		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
						&device->dev_state) ||
1949 1950
		    !device->bdev ||
		    test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
1951 1952
			continue;

1953 1954 1955
		if (i >= nr_devices)
			break;

1956 1957 1958
		avail_space = device->total_bytes - device->bytes_used;

		/* align with stripe_len */
1959
		avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
1960 1961

		/*
1962
		 * In order to avoid overwriting the superblock on the drive,
1963 1964
		 * btrfs starts at an offset of at least 1MB when doing chunk
		 * allocation.
1965 1966 1967
		 *
		 * This ensures we have at least min_stripe_size free space
		 * after excluding 1MB.
1968
		 */
1969
		if (avail_space <= SZ_1M + min_stripe_size)
1970 1971
			continue;

1972 1973
		avail_space -= SZ_1M;

1974 1975 1976 1977 1978
		devices_info[i].dev = device;
		devices_info[i].max_avail = avail_space;

		i++;
	}
1979
	rcu_read_unlock();
1980 1981 1982 1983 1984 1985 1986

	nr_devices = i;

	btrfs_descending_sort_devices(devices_info, nr_devices);

	i = nr_devices - 1;
	avail_space = 0;
1987 1988
	while (nr_devices >= rattr->devs_min) {
		num_stripes = min(num_stripes, nr_devices);
1989

1990 1991 1992 1993
		if (devices_info[i].max_avail >= min_stripe_size) {
			int j;
			u64 alloc_size;

1994
			avail_space += devices_info[i].max_avail * num_stripes;
1995
			alloc_size = devices_info[i].max_avail;
1996
			for (j = i + 1 - num_stripes; j <= i; j++)
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007
				devices_info[j].max_avail -= alloc_size;
		}
		i--;
		nr_devices--;
	}

	kfree(devices_info);
	*free_bytes = avail_space;
	return 0;
}

2008 2009 2010 2011 2012 2013 2014
/*
 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
 *
 * If there's a redundant raid level at DATA block groups, use the respective
 * multiplier to scale the sizes.
 *
 * Unused device space usage is based on simulating the chunk allocator
2015 2016 2017
 * algorithm that respects the device sizes and order of allocations.  This is
 * a close approximation of the actual use but there are other factors that may
 * change the result (like a new metadata chunk).
2018
 *
2019
 * If metadata is exhausted, f_bavail will be 0.
2020
 */
C
Chris Mason 已提交
2021 2022
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
2023 2024 2025
	struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
	struct btrfs_super_block *disk_super = fs_info->super_copy;
	struct list_head *head = &fs_info->space_info;
2026 2027
	struct btrfs_space_info *found;
	u64 total_used = 0;
2028
	u64 total_free_data = 0;
2029
	u64 total_free_meta = 0;
2030
	int bits = dentry->d_sb->s_blocksize_bits;
2031
	__be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
2032 2033
	unsigned factor = 1;
	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
2034
	int ret;
2035
	u64 thresh = 0;
2036
	int mixed = 0;
C
Chris Mason 已提交
2037

2038
	rcu_read_lock();
J
Josef Bacik 已提交
2039
	list_for_each_entry_rcu(found, head, list) {
2040
		if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
2041 2042
			int i;

2043 2044 2045
			total_free_data += found->disk_total - found->disk_used;
			total_free_data -=
				btrfs_account_ro_block_groups_free_space(found);
2046 2047

			for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2048 2049 2050
				if (!list_empty(&found->block_groups[i]))
					factor = btrfs_bg_type_to_factor(
						btrfs_raid_array[i].bg_flag);
2051
			}
2052
		}
2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063

		/*
		 * Metadata in mixed block goup profiles are accounted in data
		 */
		if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
			if (found->flags & BTRFS_BLOCK_GROUP_DATA)
				mixed = 1;
			else
				total_free_meta += found->disk_total -
					found->disk_used;
		}
2064

2065
		total_used += found->disk_used;
J
Josef Bacik 已提交
2066
	}
2067

2068 2069
	rcu_read_unlock();

2070 2071 2072 2073 2074 2075
	buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
	buf->f_blocks >>= bits;
	buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);

	/* Account global block reserve as used, it's in logical size already */
	spin_lock(&block_rsv->lock);
2076 2077 2078 2079 2080
	/* Mixed block groups accounting is not byte-accurate, avoid overflow */
	if (buf->f_bfree >= block_rsv->size >> bits)
		buf->f_bfree -= block_rsv->size >> bits;
	else
		buf->f_bfree = 0;
2081 2082
	spin_unlock(&block_rsv->lock);

2083
	buf->f_bavail = div_u64(total_free_data, factor);
2084
	ret = btrfs_calc_avail_data_space(fs_info, &total_free_data);
2085
	if (ret)
2086
		return ret;
2087
	buf->f_bavail += div_u64(total_free_data, factor);
2088
	buf->f_bavail = buf->f_bavail >> bits;
C
Chris Mason 已提交
2089

2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102
	/*
	 * We calculate the remaining metadata space minus global reserve. If
	 * this is (supposedly) smaller than zero, there's no space. But this
	 * does not hold in practice, the exhausted state happens where's still
	 * some positive delta. So we apply some guesswork and compare the
	 * delta to a 4M threshold.  (Practically observed delta was ~2M.)
	 *
	 * We probably cannot calculate the exact threshold value because this
	 * depends on the internal reservations requested by various
	 * operations, so some operations that consume a few metadata will
	 * succeed even if the Avail is zero. But this is better than the other
	 * way around.
	 */
2103
	thresh = SZ_4M;
2104

2105
	if (!mixed && total_free_meta - thresh < block_rsv->size)
2106 2107
		buf->f_bavail = 0;

2108 2109 2110 2111
	buf->f_type = BTRFS_SUPER_MAGIC;
	buf->f_bsize = dentry->d_sb->s_blocksize;
	buf->f_namelen = BTRFS_NAME_LEN;

2112
	/* We treat it as constant endianness (it doesn't matter _which_)
C
Chris Mason 已提交
2113
	   because we want the fsid to come out the same whether mounted
2114 2115 2116
	   on a big-endian or little-endian host */
	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
2117
	/* Mask in the root object ID too, to disambiguate subvols */
2118 2119 2120 2121
	buf->f_fsid.val[0] ^=
		BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32;
	buf->f_fsid.val[1] ^=
		BTRFS_I(d_inode(dentry))->root->root_key.objectid;
2122

C
Chris Mason 已提交
2123 2124
	return 0;
}
C
Chris Mason 已提交
2125

A
Al Viro 已提交
2126 2127
static void btrfs_kill_super(struct super_block *sb)
{
2128
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
A
Al Viro 已提交
2129
	kill_anon_super(sb);
2130
	free_fs_info(fs_info);
A
Al Viro 已提交
2131 2132
}

2133 2134 2135
static struct file_system_type btrfs_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "btrfs",
A
Al Viro 已提交
2136
	.mount		= btrfs_mount,
A
Al Viro 已提交
2137
	.kill_sb	= btrfs_kill_super,
2138
	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
2139
};
2140 2141 2142 2143 2144 2145 2146 2147 2148

static struct file_system_type btrfs_root_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "btrfs",
	.mount		= btrfs_mount_root,
	.kill_sb	= btrfs_kill_super,
	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
};

2149
MODULE_ALIAS_FS("btrfs");
2150

2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161
static int btrfs_control_open(struct inode *inode, struct file *file)
{
	/*
	 * The control file's private_data is used to hold the
	 * transaction when it is started and is used to keep
	 * track of whether a transaction is already in progress.
	 */
	file->private_data = NULL;
	return 0;
}

C
Chris Mason 已提交
2162 2163 2164
/*
 * used by btrfsctl to scan devices when no FS is mounted
 */
2165 2166 2167 2168
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
				unsigned long arg)
{
	struct btrfs_ioctl_vol_args *vol;
2169
	struct btrfs_device *device = NULL;
2170
	int ret = -ENOTTY;
2171

2172 2173 2174
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

L
Li Zefan 已提交
2175 2176 2177
	vol = memdup_user((void __user *)arg, sizeof(*vol));
	if (IS_ERR(vol))
		return PTR_ERR(vol);
2178
	vol->name[BTRFS_PATH_NAME_MAX] = '\0';
2179

2180 2181
	switch (cmd) {
	case BTRFS_IOC_SCAN_DEV:
2182
		mutex_lock(&uuid_mutex);
2183 2184 2185
		device = btrfs_scan_one_device(vol->name, FMODE_READ,
					       &btrfs_root_fs_type);
		ret = PTR_ERR_OR_ZERO(device);
2186
		mutex_unlock(&uuid_mutex);
2187
		break;
2188 2189 2190
	case BTRFS_IOC_FORGET_DEV:
		ret = btrfs_forget_devices(vol->name);
		break;
J
Josef Bacik 已提交
2191
	case BTRFS_IOC_DEVICES_READY:
2192
		mutex_lock(&uuid_mutex);
2193 2194 2195
		device = btrfs_scan_one_device(vol->name, FMODE_READ,
					       &btrfs_root_fs_type);
		if (IS_ERR(device)) {
2196
			mutex_unlock(&uuid_mutex);
2197
			ret = PTR_ERR(device);
J
Josef Bacik 已提交
2198
			break;
2199
		}
2200 2201
		ret = !(device->fs_devices->num_devices ==
			device->fs_devices->total_devices);
2202
		mutex_unlock(&uuid_mutex);
J
Josef Bacik 已提交
2203
		break;
2204
	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
2205
		ret = btrfs_ioctl_get_supported_features((void __user*)arg);
2206
		break;
2207
	}
L
Li Zefan 已提交
2208

2209
	kfree(vol);
L
Linda Knippers 已提交
2210
	return ret;
2211 2212
}

2213
static int btrfs_freeze(struct super_block *sb)
Y
Yan 已提交
2214
{
2215
	struct btrfs_trans_handle *trans;
2216 2217
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
2218

2219
	set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2220 2221 2222 2223 2224 2225
	/*
	 * We don't need a barrier here, we'll wait for any transaction that
	 * could be in progress on other threads (and do delayed iputs that
	 * we want to avoid on a frozen filesystem), or do the commit
	 * ourselves.
	 */
M
Miao Xie 已提交
2226
	trans = btrfs_attach_transaction_barrier(root);
2227 2228 2229 2230 2231 2232
	if (IS_ERR(trans)) {
		/* no transaction, don't bother */
		if (PTR_ERR(trans) == -ENOENT)
			return 0;
		return PTR_ERR(trans);
	}
2233
	return btrfs_commit_transaction(trans);
Y
Yan 已提交
2234 2235
}

2236 2237
static int btrfs_unfreeze(struct super_block *sb)
{
2238 2239 2240
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);

	clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2241 2242 2243
	return 0;
}

J
Josef Bacik 已提交
2244 2245 2246 2247 2248 2249 2250
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
	struct btrfs_fs_devices *cur_devices;
	struct btrfs_device *dev, *first_dev = NULL;
	struct list_head *head;

2251 2252 2253 2254 2255
	/*
	 * Lightweight locking of the devices. We should not need
	 * device_list_mutex here as we only read the device data and the list
	 * is protected by RCU.  Even if a device is deleted during the list
	 * traversals, we'll get valid data, the freeing callback will wait at
2256
	 * least until the rcu_read_unlock.
2257 2258
	 */
	rcu_read_lock();
J
Josef Bacik 已提交
2259 2260 2261
	cur_devices = fs_info->fs_devices;
	while (cur_devices) {
		head = &cur_devices->devices;
2262
		list_for_each_entry_rcu(dev, head, dev_list) {
2263
			if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
2264
				continue;
2265 2266
			if (!dev->name)
				continue;
J
Josef Bacik 已提交
2267 2268 2269 2270 2271 2272
			if (!first_dev || dev->devid < first_dev->devid)
				first_dev = dev;
		}
		cur_devices = cur_devices->seed;
	}

2273 2274 2275
	if (first_dev)
		seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
	else
J
Josef Bacik 已提交
2276
		WARN_ON(1);
2277
	rcu_read_unlock();
J
Josef Bacik 已提交
2278 2279 2280
	return 0;
}

2281
static const struct super_operations btrfs_super_ops = {
2282
	.drop_inode	= btrfs_drop_inode,
A
Al Viro 已提交
2283
	.evict_inode	= btrfs_evict_inode,
C
Chris Mason 已提交
2284
	.put_super	= btrfs_put_super,
2285
	.sync_fs	= btrfs_sync_fs,
E
Eric Paris 已提交
2286
	.show_options	= btrfs_show_options,
J
Josef Bacik 已提交
2287
	.show_devname	= btrfs_show_devname,
C
Chris Mason 已提交
2288 2289
	.alloc_inode	= btrfs_alloc_inode,
	.destroy_inode	= btrfs_destroy_inode,
A
Al Viro 已提交
2290
	.free_inode	= btrfs_free_inode,
C
Chris Mason 已提交
2291
	.statfs		= btrfs_statfs,
Y
Yan Zheng 已提交
2292
	.remount_fs	= btrfs_remount,
2293
	.freeze_fs	= btrfs_freeze,
2294
	.unfreeze_fs	= btrfs_unfreeze,
C
Chris Mason 已提交
2295
};
2296 2297

static const struct file_operations btrfs_ctl_fops = {
2298
	.open = btrfs_control_open,
2299 2300 2301
	.unlocked_ioctl	 = btrfs_control_ioctl,
	.compat_ioctl = btrfs_control_ioctl,
	.owner	 = THIS_MODULE,
2302
	.llseek = noop_llseek,
2303 2304 2305
};

static struct miscdevice btrfs_misc = {
2306
	.minor		= BTRFS_MINOR,
2307 2308 2309 2310
	.name		= "btrfs-control",
	.fops		= &btrfs_ctl_fops
};

2311 2312 2313
MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
MODULE_ALIAS("devname:btrfs-control");

2314
static int __init btrfs_interface_init(void)
2315 2316 2317 2318
{
	return misc_register(&btrfs_misc);
}

2319
static __cold void btrfs_interface_exit(void)
2320
{
2321
	misc_deregister(&btrfs_misc);
2322 2323
}

2324
static void __init btrfs_print_mod_info(void)
2325
{
2326
	static const char options[] = ""
2327 2328 2329
#ifdef CONFIG_BTRFS_DEBUG
			", debug=on"
#endif
2330 2331 2332
#ifdef CONFIG_BTRFS_ASSERT
			", assert=on"
#endif
2333 2334
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
			", integrity-checker=on"
J
Josef Bacik 已提交
2335 2336 2337
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
			", ref-verify=on"
2338
#endif
2339 2340
			;
	pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
2341 2342
}

2343 2344
static int __init init_btrfs_fs(void)
{
C
Chris Mason 已提交
2345
	int err;
2346

2347 2348
	btrfs_props_init();

2349 2350
	err = btrfs_init_sysfs();
	if (err)
2351
		return err;
2352

2353
	btrfs_init_compress();
2354

2355 2356 2357 2358
	err = btrfs_init_cachep();
	if (err)
		goto free_compress;

2359
	err = extent_io_init();
2360 2361 2362
	if (err)
		goto free_cachep;

2363 2364 2365 2366
	err = extent_map_init();
	if (err)
		goto free_extent_io;

2367
	err = ordered_data_init();
2368 2369
	if (err)
		goto free_extent_map;
C
Chris Mason 已提交
2370

2371 2372 2373 2374
	err = btrfs_delayed_inode_init();
	if (err)
		goto free_ordered_data;

2375
	err = btrfs_auto_defrag_init();
2376 2377 2378
	if (err)
		goto free_delayed_inode;

2379
	err = btrfs_delayed_ref_init();
2380 2381 2382
	if (err)
		goto free_auto_defrag;

2383 2384
	err = btrfs_prelim_ref_init();
	if (err)
2385
		goto free_delayed_ref;
2386

2387
	err = btrfs_end_io_wq_init();
2388
	if (err)
2389
		goto free_prelim_ref;
2390

2391 2392 2393 2394
	err = btrfs_interface_init();
	if (err)
		goto free_end_io_wq;

2395 2396
	btrfs_init_lockdep();

2397
	btrfs_print_mod_info();
2398 2399 2400 2401 2402 2403 2404 2405

	err = btrfs_run_sanity_tests();
	if (err)
		goto unregister_ioctl;

	err = register_filesystem(&btrfs_fs_type);
	if (err)
		goto unregister_ioctl;
2406

2407 2408
	return 0;

2409 2410
unregister_ioctl:
	btrfs_interface_exit();
2411 2412
free_end_io_wq:
	btrfs_end_io_wq_exit();
2413 2414
free_prelim_ref:
	btrfs_prelim_ref_exit();
2415 2416
free_delayed_ref:
	btrfs_delayed_ref_exit();
2417 2418
free_auto_defrag:
	btrfs_auto_defrag_exit();
2419 2420
free_delayed_inode:
	btrfs_delayed_inode_exit();
2421 2422
free_ordered_data:
	ordered_data_exit();
2423 2424
free_extent_map:
	extent_map_exit();
2425 2426
free_extent_io:
	extent_io_exit();
2427 2428
free_cachep:
	btrfs_destroy_cachep();
2429 2430
free_compress:
	btrfs_exit_compress();
2431
	btrfs_exit_sysfs();
2432

2433
	return err;
2434 2435 2436 2437
}

static void __exit exit_btrfs_fs(void)
{
C
Chris Mason 已提交
2438
	btrfs_destroy_cachep();
2439
	btrfs_delayed_ref_exit();
2440
	btrfs_auto_defrag_exit();
2441
	btrfs_delayed_inode_exit();
2442
	btrfs_prelim_ref_exit();
2443
	ordered_data_exit();
2444
	extent_map_exit();
2445
	extent_io_exit();
2446
	btrfs_interface_exit();
2447
	btrfs_end_io_wq_exit();
2448
	unregister_filesystem(&btrfs_fs_type);
2449
	btrfs_exit_sysfs();
2450
	btrfs_cleanup_fs_uuids();
2451
	btrfs_exit_compress();
2452 2453
}

2454
late_initcall(init_btrfs_fs);
2455 2456 2457
module_exit(exit_btrfs_fs)

MODULE_LICENSE("GPL");
2458
MODULE_SOFTDEP("pre: crc32c");