super.c 64.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
C
Chris Mason 已提交
2 3 4 5
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

Y
Yan 已提交
6
#include <linux/blkdev.h>
7 8 9 10 11 12
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
E
Eric Paris 已提交
13
#include <linux/seq_file.h>
14 15
#include <linux/string.h>
#include <linux/backing-dev.h>
Y
Yan 已提交
16
#include <linux/mount.h>
C
Chris Mason 已提交
17
#include <linux/writeback.h>
C
Chris Mason 已提交
18
#include <linux/statfs.h>
C
Chris Mason 已提交
19
#include <linux/compat.h>
20
#include <linux/parser.h>
21
#include <linux/ctype.h>
22
#include <linux/namei.h>
23
#include <linux/miscdevice.h>
24
#include <linux/magic.h>
25
#include <linux/slab.h>
D
Dan Magenheimer 已提交
26
#include <linux/cleancache.h>
27
#include <linux/ratelimit.h>
28
#include <linux/crc32c.h>
29
#include <linux/btrfs.h>
30
#include "delayed-inode.h"
31
#include "ctree.h"
C
Chris Mason 已提交
32
#include "disk-io.h"
33
#include "transaction.h"
C
Chris Mason 已提交
34
#include "btrfs_inode.h"
C
Chris Mason 已提交
35
#include "print-tree.h"
36
#include "props.h"
J
Josef Bacik 已提交
37
#include "xattr.h"
38
#include "volumes.h"
B
Balaji Rao 已提交
39
#include "export.h"
C
Chris Mason 已提交
40
#include "compression.h"
J
Josef Bacik 已提交
41
#include "rcu-string.h"
42
#include "dev-replace.h"
43
#include "free-space-cache.h"
44
#include "backref.h"
45
#include "space-info.h"
46
#include "sysfs.h"
47
#include "tests/btrfs-tests.h"
48
#include "block-group.h"
49

50
#include "qgroup.h"
51 52 53
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>

54
static const struct super_operations btrfs_super_ops;
55 56 57 58 59 60

/*
 * Types for mounting the default subvolume and a subvolume explicitly
 * requested by subvol=/path. That way the callchain is straightforward and we
 * don't have to play tricks with the mount options and recursive calls to
 * btrfs_mount.
61 62
 *
 * The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
63
 */
64
static struct file_system_type btrfs_fs_type;
65
static struct file_system_type btrfs_root_fs_type;
C
Chris Mason 已提交
66

67 68
static int btrfs_remount(struct super_block *sb, int *flags, char *data);

D
David Sterba 已提交
69
const char * __attribute_const__ btrfs_decode_error(int errno)
L
liubo 已提交
70
{
71
	char *errstr = "unknown";
L
liubo 已提交
72 73 74 75 76 77 78 79 80 81 82

	switch (errno) {
	case -EIO:
		errstr = "IO failure";
		break;
	case -ENOMEM:
		errstr = "Out of memory";
		break;
	case -EROFS:
		errstr = "Readonly filesystem";
		break;
J
Jeff Mahoney 已提交
83 84 85
	case -EEXIST:
		errstr = "Object already exists";
		break;
86 87 88 89 90 91
	case -ENOSPC:
		errstr = "No space left";
		break;
	case -ENOENT:
		errstr = "No such entry";
		break;
L
liubo 已提交
92 93 94 95 96 97
	}

	return errstr;
}

/*
98
 * __btrfs_handle_fs_error decodes expected errors from the caller and
99
 * invokes the appropriate error response.
L
liubo 已提交
100
 */
101
__cold
102
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
J
Jeff Mahoney 已提交
103
		       unsigned int line, int errno, const char *fmt, ...)
L
liubo 已提交
104 105
{
	struct super_block *sb = fs_info->sb;
106
#ifdef CONFIG_PRINTK
L
liubo 已提交
107
	const char *errstr;
108
#endif
L
liubo 已提交
109 110 111

	/*
	 * Special case: if the error is EROFS, and we're already
112
	 * under SB_RDONLY, then it is safe here.
L
liubo 已提交
113
	 */
114
	if (errno == -EROFS && sb_rdonly(sb))
J
Jeff Mahoney 已提交
115 116
  		return;

117
#ifdef CONFIG_PRINTK
118
	errstr = btrfs_decode_error(errno);
J
Jeff Mahoney 已提交
119
	if (fmt) {
120 121 122 123 124 125
		struct va_format vaf;
		va_list args;

		va_start(args, fmt);
		vaf.fmt = fmt;
		vaf.va = &args;
J
Jeff Mahoney 已提交
126

127
		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
128
			sb->s_id, function, line, errno, errstr, &vaf);
129
		va_end(args);
J
Jeff Mahoney 已提交
130
	} else {
131
		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
132
			sb->s_id, function, line, errno, errstr);
J
Jeff Mahoney 已提交
133
	}
134
#endif
L
liubo 已提交
135

A
Anand Jain 已提交
136 137 138 139 140 141
	/*
	 * Today we only save the error info to memory.  Long term we'll
	 * also send it down to the disk
	 */
	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);

J
Jeff Mahoney 已提交
142
	/* Don't go through full error handling during mount */
143 144 145 146 147 148 149 150 151 152 153 154 155 156
	if (!(sb->s_flags & SB_BORN))
		return;

	if (sb_rdonly(sb))
		return;

	/* btrfs handle error by forcing the filesystem readonly */
	sb->s_flags |= SB_RDONLY;
	btrfs_info(fs_info, "forced readonly");
	/*
	 * Note that a running device replace operation is not canceled here
	 * although there is no way to update the progress. It would add the
	 * risk of a deadlock, therefore the canceling is omitted. The only
	 * penalty is that some I/O remains active until the procedure
157
	 * completes. The next time when the filesystem is mounted writable
158 159
	 * again, the device replace operation continues.
	 */
J
Jeff Mahoney 已提交
160
}
L
liubo 已提交
161

162
#ifdef CONFIG_PRINTK
163
static const char * const logtypes[] = {
J
Jeff Mahoney 已提交
164 165 166 167 168 169 170 171 172 173
	"emergency",
	"alert",
	"critical",
	"error",
	"warning",
	"notice",
	"info",
	"debug",
};

174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189

/*
 * Use one ratelimit state per log level so that a flood of less important
 * messages doesn't cause more important ones to be dropped.
 */
static struct ratelimit_state printk_limits[] = {
	RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
};

190
void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
J
Jeff Mahoney 已提交
191
{
192
	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
J
Jeff Mahoney 已提交
193 194
	struct va_format vaf;
	va_list args;
195
	int kern_level;
196 197
	const char *type = logtypes[4];
	struct ratelimit_state *ratelimit = &printk_limits[4];
J
Jeff Mahoney 已提交
198 199 200

	va_start(args, fmt);

201
	while ((kern_level = printk_get_level(fmt)) != 0) {
202
		size_t size = printk_skip_level(fmt) - fmt;
203 204 205 206 207 208 209

		if (kern_level >= '0' && kern_level <= '7') {
			memcpy(lvl, fmt,  size);
			lvl[size] = '\0';
			type = logtypes[kern_level - '0'];
			ratelimit = &printk_limits[kern_level - '0'];
		}
210
		fmt += size;
211 212
	}

J
Jeff Mahoney 已提交
213 214
	vaf.fmt = fmt;
	vaf.va = &args;
215

216
	if (__ratelimit(ratelimit))
217 218
		printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
			fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
219 220 221 222

	va_end(args);
}
#endif
L
liubo 已提交
223

224 225 226 227 228 229 230 231 232 233 234 235 236
/*
 * We only mark the transaction aborted and then set the file system read-only.
 * This will prevent new transactions from starting or trying to join this
 * one.
 *
 * This means that error recovery at the call site is limited to freeing
 * any local memory allocations and passing the error code up without
 * further cleanup. The transaction should complete as it normally would
 * in the call path but will return -EIO.
 *
 * We'll complete the cleanup in btrfs_end_transaction and
 * btrfs_commit_transaction.
 */
237
__cold
238
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
239
			       const char *function,
240 241
			       unsigned int line, int errno)
{
242 243
	struct btrfs_fs_info *fs_info = trans->fs_info;

244 245 246
	trans->aborted = errno;
	/* Nothing used. The other threads that have joined this
	 * transaction may be able to continue. */
247
	if (!trans->dirty && list_empty(&trans->new_bgs)) {
248 249
		const char *errstr;

250
		errstr = btrfs_decode_error(errno);
251
		btrfs_warn(fs_info,
252 253
		           "%s:%d: Aborting unused transaction(%s).",
		           function, line, errstr);
L
liubo 已提交
254
		return;
255
	}
S
Seraphime Kirkovski 已提交
256
	WRITE_ONCE(trans->transaction->aborted, errno);
257
	/* Wake up anybody who may be waiting on this transaction */
258 259 260
	wake_up(&fs_info->transaction_wait);
	wake_up(&fs_info->transaction_blocked_wait);
	__btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
261
}
J
Jeff Mahoney 已提交
262 263 264 265
/*
 * __btrfs_panic decodes unexpected, fatal errors from the caller,
 * issues an alert, and either panics or BUGs, depending on mount options.
 */
266
__cold
J
Jeff Mahoney 已提交
267 268 269 270 271 272 273
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
		   unsigned int line, int errno, const char *fmt, ...)
{
	char *s_id = "<unknown>";
	const char *errstr;
	struct va_format vaf = { .fmt = fmt };
	va_list args;
L
liubo 已提交
274

J
Jeff Mahoney 已提交
275 276
	if (fs_info)
		s_id = fs_info->sb->s_id;
L
liubo 已提交
277

J
Jeff Mahoney 已提交
278 279 280
	va_start(args, fmt);
	vaf.va = &args;

281
	errstr = btrfs_decode_error(errno);
282
	if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
283 284
		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
			s_id, function, line, &vaf, errno, errstr);
J
Jeff Mahoney 已提交
285

286 287
	btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
		   function, line, &vaf, errno, errstr);
J
Jeff Mahoney 已提交
288 289
	va_end(args);
	/* Caller calls BUG() */
L
liubo 已提交
290 291
}

C
Chris Mason 已提交
292
static void btrfs_put_super(struct super_block *sb)
C
Chris Mason 已提交
293
{
294
	close_ctree(btrfs_sb(sb));
C
Chris Mason 已提交
295 296
}

297
enum {
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
	Opt_acl, Opt_noacl,
	Opt_clear_cache,
	Opt_commit_interval,
	Opt_compress,
	Opt_compress_force,
	Opt_compress_force_type,
	Opt_compress_type,
	Opt_degraded,
	Opt_device,
	Opt_fatal_errors,
	Opt_flushoncommit, Opt_noflushoncommit,
	Opt_inode_cache, Opt_noinode_cache,
	Opt_max_inline,
	Opt_barrier, Opt_nobarrier,
	Opt_datacow, Opt_nodatacow,
	Opt_datasum, Opt_nodatasum,
	Opt_defrag, Opt_nodefrag,
	Opt_discard, Opt_nodiscard,
	Opt_nologreplay,
	Opt_norecovery,
	Opt_ratio,
	Opt_rescan_uuid_tree,
	Opt_skip_balance,
	Opt_space_cache, Opt_no_space_cache,
	Opt_space_cache_version,
	Opt_ssd, Opt_nossd,
	Opt_ssd_spread, Opt_nossd_spread,
	Opt_subvol,
O
Omar Sandoval 已提交
326
	Opt_subvol_empty,
327 328 329 330 331 332 333 334 335 336 337 338 339
	Opt_subvolid,
	Opt_thread_pool,
	Opt_treelog, Opt_notreelog,
	Opt_usebackuproot,
	Opt_user_subvol_rm_allowed,

	/* Deprecated options */
	Opt_alloc_start,
	Opt_recovery,
	Opt_subvolrootid,

	/* Debugging options */
	Opt_check_integrity,
340
	Opt_check_integrity_including_extent_data,
341 342
	Opt_check_integrity_print_mask,
	Opt_enospc_debug, Opt_noenospc_debug,
343 344
#ifdef CONFIG_BTRFS_DEBUG
	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
J
Josef Bacik 已提交
345 346 347
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
	Opt_ref_verify,
348
#endif
349
	Opt_err,
350 351
};

D
David Sterba 已提交
352
static const match_table_t tokens = {
353 354 355 356
	{Opt_acl, "acl"},
	{Opt_noacl, "noacl"},
	{Opt_clear_cache, "clear_cache"},
	{Opt_commit_interval, "commit=%u"},
C
Chris Mason 已提交
357
	{Opt_compress, "compress"},
358
	{Opt_compress_type, "compress=%s"},
C
Chris Mason 已提交
359
	{Opt_compress_force, "compress-force"},
360
	{Opt_compress_force_type, "compress-force=%s"},
361 362 363
	{Opt_degraded, "degraded"},
	{Opt_device, "device=%s"},
	{Opt_fatal_errors, "fatal_errors=%s"},
364
	{Opt_flushoncommit, "flushoncommit"},
365
	{Opt_noflushoncommit, "noflushoncommit"},
366 367 368 369 370 371 372 373 374 375 376
	{Opt_inode_cache, "inode_cache"},
	{Opt_noinode_cache, "noinode_cache"},
	{Opt_max_inline, "max_inline=%s"},
	{Opt_barrier, "barrier"},
	{Opt_nobarrier, "nobarrier"},
	{Opt_datacow, "datacow"},
	{Opt_nodatacow, "nodatacow"},
	{Opt_datasum, "datasum"},
	{Opt_nodatasum, "nodatasum"},
	{Opt_defrag, "autodefrag"},
	{Opt_nodefrag, "noautodefrag"},
C
Christoph Hellwig 已提交
377
	{Opt_discard, "discard"},
Q
Qu Wenruo 已提交
378
	{Opt_nodiscard, "nodiscard"},
379 380 381 382 383
	{Opt_nologreplay, "nologreplay"},
	{Opt_norecovery, "norecovery"},
	{Opt_ratio, "metadata_ratio=%u"},
	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
	{Opt_skip_balance, "skip_balance"},
384
	{Opt_space_cache, "space_cache"},
385
	{Opt_no_space_cache, "nospace_cache"},
386 387 388 389 390 391
	{Opt_space_cache_version, "space_cache=%s"},
	{Opt_ssd, "ssd"},
	{Opt_nossd, "nossd"},
	{Opt_ssd_spread, "ssd_spread"},
	{Opt_nossd_spread, "nossd_spread"},
	{Opt_subvol, "subvol=%s"},
O
Omar Sandoval 已提交
392
	{Opt_subvol_empty, "subvol="},
393 394 395 396
	{Opt_subvolid, "subvolid=%s"},
	{Opt_thread_pool, "thread_pool=%u"},
	{Opt_treelog, "treelog"},
	{Opt_notreelog, "notreelog"},
397
	{Opt_usebackuproot, "usebackuproot"},
398 399 400 401 402 403 404 405
	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},

	/* Deprecated options */
	{Opt_alloc_start, "alloc_start=%s"},
	{Opt_recovery, "recovery"},
	{Opt_subvolrootid, "subvolrootid=%d"},

	/* Debugging options */
406 407
	{Opt_check_integrity, "check_int"},
	{Opt_check_integrity_including_extent_data, "check_int_data"},
408
	{Opt_check_integrity_print_mask, "check_int_print_mask=%u"},
409 410
	{Opt_enospc_debug, "enospc_debug"},
	{Opt_noenospc_debug, "noenospc_debug"},
411 412 413 414
#ifdef CONFIG_BTRFS_DEBUG
	{Opt_fragment_data, "fragment=data"},
	{Opt_fragment_metadata, "fragment=metadata"},
	{Opt_fragment_all, "fragment=all"},
J
Josef Bacik 已提交
415 416 417
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
	{Opt_ref_verify, "ref_verify"},
418
#endif
J
Josef Bacik 已提交
419
	{Opt_err, NULL},
420 421
};

422 423 424
/*
 * Regular mount options parser.  Everything that is needed only when
 * reading in a new superblock is parsed here.
425
 * XXX JDM: This needs to be cleaned up for remount.
426
 */
427
int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
428
			unsigned long new_flags)
429 430
{
	substring_t args[MAX_OPT_ARGS];
431
	char *p, *num;
432
	u64 cache_gen;
433
	int intarg;
S
Sage Weil 已提交
434
	int ret = 0;
435 436
	char *compress_type;
	bool compress_force = false;
437 438 439
	enum btrfs_compression_type saved_compress_type;
	bool saved_compress_force;
	int no_compress = 0;
440

441 442
	cache_gen = btrfs_super_cache_generation(info->super_copy);
	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
443 444
		btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
	else if (cache_gen)
445 446
		btrfs_set_opt(info->mount_opt, SPACE_CACHE);

447 448 449 450
	/*
	 * Even the options are empty, we still need to do extra check
	 * against new flags
	 */
451
	if (!options)
452
		goto check;
453

454
	while ((p = strsep(&options, ",")) != NULL) {
455 456 457 458 459 460
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
461
		case Opt_degraded:
462
			btrfs_info(info, "allowing degraded mounts");
463
			btrfs_set_opt(info->mount_opt, DEGRADED);
464
			break;
465
		case Opt_subvol:
O
Omar Sandoval 已提交
466
		case Opt_subvol_empty:
467
		case Opt_subvolid:
468
		case Opt_subvolrootid:
469
		case Opt_device:
470
			/*
471 472
			 * These are parsed by btrfs_parse_subvol_options or
			 * btrfs_parse_device_options and can be ignored here.
473
			 */
474 475
			break;
		case Opt_nodatasum:
476
			btrfs_set_and_info(info, NODATASUM,
477
					   "setting nodatasum");
478
			break;
Q
Qu Wenruo 已提交
479
		case Opt_datasum:
480 481
			if (btrfs_test_opt(info, NODATASUM)) {
				if (btrfs_test_opt(info, NODATACOW))
482
					btrfs_info(info,
J
Jeff Mahoney 已提交
483
						   "setting datasum, datacow enabled");
484
				else
485
					btrfs_info(info, "setting datasum");
486
			}
Q
Qu Wenruo 已提交
487 488 489
			btrfs_clear_opt(info->mount_opt, NODATACOW);
			btrfs_clear_opt(info->mount_opt, NODATASUM);
			break;
490
		case Opt_nodatacow:
491 492 493
			if (!btrfs_test_opt(info, NODATACOW)) {
				if (!btrfs_test_opt(info, COMPRESS) ||
				    !btrfs_test_opt(info, FORCE_COMPRESS)) {
494
					btrfs_info(info,
495 496
						   "setting nodatacow, compression disabled");
				} else {
497
					btrfs_info(info, "setting nodatacow");
498
				}
499 500 501
			}
			btrfs_clear_opt(info->mount_opt, COMPRESS);
			btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
502 503
			btrfs_set_opt(info->mount_opt, NODATACOW);
			btrfs_set_opt(info->mount_opt, NODATASUM);
504
			break;
Q
Qu Wenruo 已提交
505
		case Opt_datacow:
506
			btrfs_clear_and_info(info, NODATACOW,
507
					     "setting datacow");
Q
Qu Wenruo 已提交
508
			break;
C
Chris Mason 已提交
509
		case Opt_compress_force:
510 511
		case Opt_compress_force_type:
			compress_force = true;
512
			/* Fallthrough */
513 514
		case Opt_compress:
		case Opt_compress_type:
515 516
			saved_compress_type = btrfs_test_opt(info,
							     COMPRESS) ?
517 518
				info->compress_type : BTRFS_COMPRESS_NONE;
			saved_compress_force =
519
				btrfs_test_opt(info, FORCE_COMPRESS);
520 521
			if (token == Opt_compress ||
			    token == Opt_compress_force ||
522
			    strncmp(args[0].from, "zlib", 4) == 0) {
523
				compress_type = "zlib";
524

525
				info->compress_type = BTRFS_COMPRESS_ZLIB;
526 527 528 529 530 531 532 533 534
				info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
				/*
				 * args[0] contains uninitialized data since
				 * for these tokens we don't expect any
				 * parameter.
				 */
				if (token != Opt_compress &&
				    token != Opt_compress_force)
					info->compress_level =
535 536 537
					  btrfs_compress_str2level(
							BTRFS_COMPRESS_ZLIB,
							args[0].from + 4);
538
				btrfs_set_opt(info->mount_opt, COMPRESS);
539 540
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
541
				no_compress = 0;
542
			} else if (strncmp(args[0].from, "lzo", 3) == 0) {
L
Li Zefan 已提交
543 544
				compress_type = "lzo";
				info->compress_type = BTRFS_COMPRESS_LZO;
545
				btrfs_set_opt(info->mount_opt, COMPRESS);
546 547
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
548
				btrfs_set_fs_incompat(info, COMPRESS_LZO);
549
				no_compress = 0;
550
			} else if (strncmp(args[0].from, "zstd", 4) == 0) {
N
Nick Terrell 已提交
551 552
				compress_type = "zstd";
				info->compress_type = BTRFS_COMPRESS_ZSTD;
553 554 555 556
				info->compress_level =
					btrfs_compress_str2level(
							 BTRFS_COMPRESS_ZSTD,
							 args[0].from + 4);
N
Nick Terrell 已提交
557 558 559 560 561
				btrfs_set_opt(info->mount_opt, COMPRESS);
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
				btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
				no_compress = 0;
562 563 564 565 566
			} else if (strncmp(args[0].from, "no", 2) == 0) {
				compress_type = "no";
				btrfs_clear_opt(info->mount_opt, COMPRESS);
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				compress_force = false;
567
				no_compress++;
568 569 570 571 572 573
			} else {
				ret = -EINVAL;
				goto out;
			}

			if (compress_force) {
574
				btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
575
			} else {
576 577 578 579 580 581 582
				/*
				 * If we remount from compress-force=xxx to
				 * compress=xxx, we need clear FORCE_COMPRESS
				 * flag, otherwise, there is no way for users
				 * to disable forcible compression separately.
				 */
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
583
			}
584
			if ((btrfs_test_opt(info, COMPRESS) &&
585 586
			     (info->compress_type != saved_compress_type ||
			      compress_force != saved_compress_force)) ||
587
			    (!btrfs_test_opt(info, COMPRESS) &&
588
			     no_compress == 1)) {
589
				btrfs_info(info, "%s %s compression, level %d",
590
					   (compress_force) ? "force" : "use",
591
					   compress_type, info->compress_level);
592 593
			}
			compress_force = false;
C
Chris Mason 已提交
594
			break;
595
		case Opt_ssd:
596
			btrfs_set_and_info(info, SSD,
597
					   "enabling ssd optimizations");
598
			btrfs_clear_opt(info->mount_opt, NOSSD);
599
			break;
600
		case Opt_ssd_spread:
601 602
			btrfs_set_and_info(info, SSD,
					   "enabling ssd optimizations");
603
			btrfs_set_and_info(info, SSD_SPREAD,
604
					   "using spread ssd allocation scheme");
605
			btrfs_clear_opt(info->mount_opt, NOSSD);
606
			break;
C
Chris Mason 已提交
607
		case Opt_nossd:
608 609 610
			btrfs_set_opt(info->mount_opt, NOSSD);
			btrfs_clear_and_info(info, SSD,
					     "not using ssd optimizations");
611 612
			/* Fallthrough */
		case Opt_nossd_spread:
613 614
			btrfs_clear_and_info(info, SSD_SPREAD,
					     "not using spread ssd allocation scheme");
C
Chris Mason 已提交
615
			break;
616
		case Opt_barrier:
617
			btrfs_clear_and_info(info, NOBARRIER,
618
					     "turning on barriers");
619
			break;
620
		case Opt_nobarrier:
621
			btrfs_set_and_info(info, NOBARRIER,
622
					   "turning off barriers");
623
			break;
624
		case Opt_thread_pool:
625 626 627
			ret = match_int(&args[0], &intarg);
			if (ret) {
				goto out;
628
			} else if (intarg == 0) {
629 630 631
				ret = -EINVAL;
				goto out;
			}
632
			info->thread_pool_size = intarg;
633
			break;
634
		case Opt_max_inline:
635 636
			num = match_strdup(&args[0]);
			if (num) {
A
Akinobu Mita 已提交
637
				info->max_inline = memparse(num, NULL);
638 639
				kfree(num);

C
Chris Mason 已提交
640
				if (info->max_inline) {
641
					info->max_inline = min_t(u64,
C
Chris Mason 已提交
642
						info->max_inline,
643
						info->sectorsize);
C
Chris Mason 已提交
644
				}
645 646
				btrfs_info(info, "max_inline at %llu",
					   info->max_inline);
647 648 649
			} else {
				ret = -ENOMEM;
				goto out;
650 651
			}
			break;
652
		case Opt_alloc_start:
653 654
			btrfs_info(info,
				"option alloc_start is obsolete, ignored");
655
			break;
Q
Qu Wenruo 已提交
656
		case Opt_acl:
657
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
658
			info->sb->s_flags |= SB_POSIXACL;
Q
Qu Wenruo 已提交
659
			break;
660
#else
661
			btrfs_err(info, "support for ACL not compiled in!");
662 663 664
			ret = -EINVAL;
			goto out;
#endif
J
Josef Bacik 已提交
665
		case Opt_noacl:
666
			info->sb->s_flags &= ~SB_POSIXACL;
J
Josef Bacik 已提交
667
			break;
S
Sage Weil 已提交
668
		case Opt_notreelog:
669
			btrfs_set_and_info(info, NOTREELOG,
670
					   "disabling tree log");
Q
Qu Wenruo 已提交
671 672
			break;
		case Opt_treelog:
673
			btrfs_clear_and_info(info, NOTREELOG,
674
					     "enabling tree log");
S
Sage Weil 已提交
675
			break;
676
		case Opt_norecovery:
677
		case Opt_nologreplay:
678
			btrfs_set_and_info(info, NOLOGREPLAY,
679 680
					   "disabling log replay at mount time");
			break;
681
		case Opt_flushoncommit:
682
			btrfs_set_and_info(info, FLUSHONCOMMIT,
683
					   "turning on flush-on-commit");
684
			break;
685
		case Opt_noflushoncommit:
686
			btrfs_clear_and_info(info, FLUSHONCOMMIT,
687
					     "turning off flush-on-commit");
688
			break;
689
		case Opt_ratio:
690
			ret = match_int(&args[0], &intarg);
691
			if (ret)
692
				goto out;
693 694 695
			info->metadata_ratio = intarg;
			btrfs_info(info, "metadata ratio %u",
				   info->metadata_ratio);
696
			break;
C
Christoph Hellwig 已提交
697
		case Opt_discard:
698
			btrfs_set_and_info(info, DISCARD,
699
					   "turning on discard");
C
Christoph Hellwig 已提交
700
			break;
Q
Qu Wenruo 已提交
701
		case Opt_nodiscard:
702
			btrfs_clear_and_info(info, DISCARD,
703
					     "turning off discard");
Q
Qu Wenruo 已提交
704
			break;
705
		case Opt_space_cache:
706 707 708
		case Opt_space_cache_version:
			if (token == Opt_space_cache ||
			    strcmp(args[0].from, "v1") == 0) {
709
				btrfs_clear_opt(info->mount_opt,
710
						FREE_SPACE_TREE);
711
				btrfs_set_and_info(info, SPACE_CACHE,
712
					   "enabling disk space caching");
713
			} else if (strcmp(args[0].from, "v2") == 0) {
714
				btrfs_clear_opt(info->mount_opt,
715
						SPACE_CACHE);
716
				btrfs_set_and_info(info, FREE_SPACE_TREE,
717 718 719 720 721
						   "enabling free space tree");
			} else {
				ret = -EINVAL;
				goto out;
			}
722
			break;
723 724 725
		case Opt_rescan_uuid_tree:
			btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
			break;
726
		case Opt_no_space_cache:
727
			if (btrfs_test_opt(info, SPACE_CACHE)) {
728 729
				btrfs_clear_and_info(info, SPACE_CACHE,
					     "disabling disk space caching");
730
			}
731
			if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
732 733
				btrfs_clear_and_info(info, FREE_SPACE_TREE,
					     "disabling free space tree");
734
			}
735
			break;
C
Chris Mason 已提交
736
		case Opt_inode_cache:
737
			btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
738
					   "enabling inode map caching");
739 740
			break;
		case Opt_noinode_cache:
741
			btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
742
					     "disabling inode map caching");
C
Chris Mason 已提交
743
			break;
744
		case Opt_clear_cache:
745
			btrfs_set_and_info(info, CLEAR_CACHE,
746
					   "force clearing of disk cache");
747
			break;
748 749 750
		case Opt_user_subvol_rm_allowed:
			btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
			break;
751 752 753
		case Opt_enospc_debug:
			btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
754 755 756
		case Opt_noenospc_debug:
			btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
C
Chris Mason 已提交
757
		case Opt_defrag:
758
			btrfs_set_and_info(info, AUTO_DEFRAG,
759
					   "enabling auto defrag");
C
Chris Mason 已提交
760
			break;
761
		case Opt_nodefrag:
762
			btrfs_clear_and_info(info, AUTO_DEFRAG,
763
					     "disabling auto defrag");
764
			break;
C
Chris Mason 已提交
765
		case Opt_recovery:
766
			btrfs_warn(info,
767
				   "'recovery' is deprecated, use 'usebackuproot' instead");
768
			/* fall through */
769
		case Opt_usebackuproot:
770
			btrfs_info(info,
771 772
				   "trying to use backup root at mount time");
			btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
C
Chris Mason 已提交
773
			break;
774 775 776
		case Opt_skip_balance:
			btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
			break;
777 778
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
		case Opt_check_integrity_including_extent_data:
779
			btrfs_info(info,
780
				   "enabling check integrity including extent data");
781 782 783 784 785
			btrfs_set_opt(info->mount_opt,
				      CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity:
786
			btrfs_info(info, "enabling check integrity");
787 788 789
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity_print_mask:
790
			ret = match_int(&args[0], &intarg);
791
			if (ret)
792
				goto out;
793 794 795
			info->check_integrity_print_mask = intarg;
			btrfs_info(info, "check_integrity_print_mask 0x%x",
				   info->check_integrity_print_mask);
796 797 798 799 800
			break;
#else
		case Opt_check_integrity_including_extent_data:
		case Opt_check_integrity:
		case Opt_check_integrity_print_mask:
801 802
			btrfs_err(info,
				  "support for check_integrity* not compiled in!");
803 804 805
			ret = -EINVAL;
			goto out;
#endif
J
Jeff Mahoney 已提交
806 807 808 809 810 811 812 813 814 815 816 817
		case Opt_fatal_errors:
			if (strcmp(args[0].from, "panic") == 0)
				btrfs_set_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else if (strcmp(args[0].from, "bug") == 0)
				btrfs_clear_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else {
				ret = -EINVAL;
				goto out;
			}
			break;
818 819 820
		case Opt_commit_interval:
			intarg = 0;
			ret = match_int(&args[0], &intarg);
821
			if (ret)
822
				goto out;
823
			if (intarg == 0) {
824
				btrfs_info(info,
825
					   "using default commit interval %us",
J
Jeff Mahoney 已提交
826
					   BTRFS_DEFAULT_COMMIT_INTERVAL);
827 828 829 830
				intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
			} else if (intarg > 300) {
				btrfs_warn(info, "excessive commit interval %d",
					   intarg);
831
			}
832
			info->commit_interval = intarg;
833
			break;
834 835
#ifdef CONFIG_BTRFS_DEBUG
		case Opt_fragment_all:
836
			btrfs_info(info, "fragmenting all space");
837 838 839 840
			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
			break;
		case Opt_fragment_metadata:
841
			btrfs_info(info, "fragmenting metadata");
842 843 844 845
			btrfs_set_opt(info->mount_opt,
				      FRAGMENT_METADATA);
			break;
		case Opt_fragment_data:
846
			btrfs_info(info, "fragmenting data");
847 848
			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			break;
J
Josef Bacik 已提交
849 850 851 852 853 854
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
		case Opt_ref_verify:
			btrfs_info(info, "doing ref verification");
			btrfs_set_opt(info->mount_opt, REF_VERIFY);
			break;
855
#endif
S
Sage Weil 已提交
856
		case Opt_err:
857
			btrfs_info(info, "unrecognized mount option '%s'", p);
S
Sage Weil 已提交
858 859
			ret = -EINVAL;
			goto out;
860
		default:
861
			break;
862 863
		}
	}
864 865 866 867
check:
	/*
	 * Extra check for current option against current flag
	 */
868
	if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & SB_RDONLY)) {
869
		btrfs_err(info,
870 871 872
			  "nologreplay must be used with ro mount option");
		ret = -EINVAL;
	}
S
Sage Weil 已提交
873
out:
874
	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
875 876
	    !btrfs_test_opt(info, FREE_SPACE_TREE) &&
	    !btrfs_test_opt(info, CLEAR_CACHE)) {
877
		btrfs_err(info, "cannot disable free space tree");
878 879 880
		ret = -EINVAL;

	}
881
	if (!ret && btrfs_test_opt(info, SPACE_CACHE))
882
		btrfs_info(info, "disk space caching is enabled");
883
	if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
884
		btrfs_info(info, "using free space tree");
S
Sage Weil 已提交
885
	return ret;
886 887 888 889 890 891 892 893
}

/*
 * Parse mount options that are required early in the mount process.
 *
 * All other options will be parsed on much later in the mount process and
 * only when we need to allocate a new super block.
 */
894 895
static int btrfs_parse_device_options(const char *options, fmode_t flags,
				      void *holder)
896 897
{
	substring_t args[MAX_OPT_ARGS];
898
	char *device_name, *opts, *orig, *p;
899
	struct btrfs_device *device = NULL;
900 901
	int error = 0;

902 903
	lockdep_assert_held(&uuid_mutex);

904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928
	if (!options)
		return 0;

	/*
	 * strsep changes the string, duplicate it because btrfs_parse_options
	 * gets called later
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
	orig = opts;

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;

		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		if (token == Opt_device) {
			device_name = match_strdup(&args[0]);
			if (!device_name) {
				error = -ENOMEM;
				goto out;
			}
929 930
			device = btrfs_scan_one_device(device_name, flags,
					holder);
931
			kfree(device_name);
932 933
			if (IS_ERR(device)) {
				error = PTR_ERR(device);
934
				goto out;
935
			}
936 937 938 939 940 941 942 943 944 945 946 947 948
		}
	}

out:
	kfree(orig);
	return error;
}

/*
 * Parse mount options that are related to subvolume id
 *
 * The value is later passed to mount_subvol()
 */
949 950
static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
		u64 *subvol_objectid)
951 952 953
{
	substring_t args[MAX_OPT_ARGS];
	char *opts, *orig, *p;
954
	int error = 0;
955
	u64 subvolid;
956 957

	if (!options)
958
		return 0;
959 960

	/*
961
	 * strsep changes the string, duplicate it because
962
	 * btrfs_parse_device_options gets called later
963 964 965 966
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
967
	orig = opts;
968 969 970 971 972 973 974 975 976

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
		case Opt_subvol:
977
			kfree(*subvol_name);
978
			*subvol_name = match_strdup(&args[0]);
979 980 981 982
			if (!*subvol_name) {
				error = -ENOMEM;
				goto out;
			}
983
			break;
984
		case Opt_subvolid:
985 986
			error = match_u64(&args[0], &subvolid);
			if (error)
987
				goto out;
988 989 990 991 992 993

			/* we want the original fs_tree */
			if (subvolid == 0)
				subvolid = BTRFS_FS_TREE_OBJECTID;

			*subvol_objectid = subvolid;
994
			break;
995
		case Opt_subvolrootid:
996
			pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
997
			break;
998 999 1000 1001 1002
		default:
			break;
		}
	}

1003
out:
1004
	kfree(orig);
1005
	return error;
1006 1007
}

1008 1009
static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
					   u64 subvol_objectid)
1010
{
1011
	struct btrfs_root *root = fs_info->tree_root;
1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
	struct btrfs_root *fs_root;
	struct btrfs_root_ref *root_ref;
	struct btrfs_inode_ref *inode_ref;
	struct btrfs_key key;
	struct btrfs_path *path = NULL;
	char *name = NULL, *ptr;
	u64 dirid;
	int len;
	int ret;

	path = btrfs_alloc_path();
	if (!path) {
		ret = -ENOMEM;
		goto err;
	}
	path->leave_spinning = 1;

1029
	name = kmalloc(PATH_MAX, GFP_KERNEL);
1030 1031 1032 1033 1034 1035
	if (!name) {
		ret = -ENOMEM;
		goto err;
	}
	ptr = name + PATH_MAX - 1;
	ptr[0] = '\0';
1036 1037

	/*
1038 1039
	 * Walk up the subvolume trees in the tree of tree roots by root
	 * backrefs until we hit the top-level subvolume.
1040
	 */
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
	while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
		key.objectid = subvol_objectid;
		key.type = BTRFS_ROOT_BACKREF_KEY;
		key.offset = (u64)-1;

		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
		if (ret < 0) {
			goto err;
		} else if (ret > 0) {
			ret = btrfs_previous_item(root, path, subvol_objectid,
						  BTRFS_ROOT_BACKREF_KEY);
			if (ret < 0) {
				goto err;
			} else if (ret > 0) {
				ret = -ENOENT;
				goto err;
			}
		}

		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
		subvol_objectid = key.offset;

		root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
					  struct btrfs_root_ref);
		len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
		ptr -= len + 1;
		if (ptr < name) {
			ret = -ENAMETOOLONG;
			goto err;
		}
		read_extent_buffer(path->nodes[0], ptr + 1,
				   (unsigned long)(root_ref + 1), len);
		ptr[0] = '/';
		dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
		btrfs_release_path(path);

		key.objectid = subvol_objectid;
		key.type = BTRFS_ROOT_ITEM_KEY;
		key.offset = (u64)-1;
		fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
		if (IS_ERR(fs_root)) {
			ret = PTR_ERR(fs_root);
			goto err;
		}

		/*
		 * Walk up the filesystem tree by inode refs until we hit the
		 * root directory.
		 */
		while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
			key.objectid = dirid;
			key.type = BTRFS_INODE_REF_KEY;
			key.offset = (u64)-1;

			ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
			if (ret < 0) {
				goto err;
			} else if (ret > 0) {
				ret = btrfs_previous_item(fs_root, path, dirid,
							  BTRFS_INODE_REF_KEY);
				if (ret < 0) {
					goto err;
				} else if (ret > 0) {
					ret = -ENOENT;
					goto err;
				}
			}

			btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
			dirid = key.offset;

			inode_ref = btrfs_item_ptr(path->nodes[0],
						   path->slots[0],
						   struct btrfs_inode_ref);
			len = btrfs_inode_ref_name_len(path->nodes[0],
						       inode_ref);
			ptr -= len + 1;
			if (ptr < name) {
				ret = -ENAMETOOLONG;
				goto err;
			}
			read_extent_buffer(path->nodes[0], ptr + 1,
					   (unsigned long)(inode_ref + 1), len);
			ptr[0] = '/';
			btrfs_release_path(path);
		}
1127 1128
	}

1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
	btrfs_free_path(path);
	if (ptr == name + PATH_MAX - 1) {
		name[0] = '/';
		name[1] = '\0';
	} else {
		memmove(name, ptr, name + PATH_MAX - ptr);
	}
	return name;

err:
	btrfs_free_path(path);
	kfree(name);
	return ERR_PTR(ret);
}

static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
{
	struct btrfs_root *root = fs_info->tree_root;
	struct btrfs_dir_item *di;
	struct btrfs_path *path;
	struct btrfs_key location;
	u64 dir_id;

1152 1153
	path = btrfs_alloc_path();
	if (!path)
1154
		return -ENOMEM;
1155 1156 1157 1158 1159 1160 1161
	path->leave_spinning = 1;

	/*
	 * Find the "default" dir item which points to the root item that we
	 * will mount by default if we haven't been given a specific subvolume
	 * to mount.
	 */
1162
	dir_id = btrfs_super_root_dir(fs_info->super_copy);
1163
	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
1164 1165
	if (IS_ERR(di)) {
		btrfs_free_path(path);
1166
		return PTR_ERR(di);
1167
	}
1168 1169 1170 1171
	if (!di) {
		/*
		 * Ok the default dir item isn't there.  This is weird since
		 * it's always been there, but don't freak out, just try and
1172
		 * mount the top-level subvolume.
1173 1174
		 */
		btrfs_free_path(path);
1175 1176
		*objectid = BTRFS_FS_TREE_OBJECTID;
		return 0;
1177 1178 1179 1180
	}

	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
	btrfs_free_path(path);
1181 1182
	*objectid = location.objectid;
	return 0;
1183 1184
}

C
Chris Mason 已提交
1185
static int btrfs_fill_super(struct super_block *sb,
1186
			    struct btrfs_fs_devices *fs_devices,
1187
			    void *data)
C
Chris Mason 已提交
1188
{
C
Chris Mason 已提交
1189
	struct inode *inode;
1190
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1191
	struct btrfs_key key;
C
Chris Mason 已提交
1192
	int err;
1193

C
Chris Mason 已提交
1194 1195 1196
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_magic = BTRFS_SUPER_MAGIC;
	sb->s_op = &btrfs_super_ops;
A
Al Viro 已提交
1197
	sb->s_d_op = &btrfs_dentry_operations;
B
Balaji Rao 已提交
1198
	sb->s_export_op = &btrfs_export_ops;
J
Josef Bacik 已提交
1199
	sb->s_xattr = btrfs_xattr_handlers;
C
Chris Mason 已提交
1200
	sb->s_time_gran = 1;
C
Chris Mason 已提交
1201
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
1202
	sb->s_flags |= SB_POSIXACL;
1203
#endif
M
Matthew Garrett 已提交
1204
	sb->s_flags |= SB_I_VERSION;
1205
	sb->s_iflags |= SB_I_CGROUPWB;
1206 1207 1208 1209 1210 1211 1212

	err = super_setup_bdi(sb);
	if (err) {
		btrfs_err(fs_info, "super_setup_bdi failed");
		return err;
	}

A
Al Viro 已提交
1213 1214
	err = open_ctree(sb, fs_devices, (char *)data);
	if (err) {
1215
		btrfs_err(fs_info, "open_ctree failed");
A
Al Viro 已提交
1216
		return err;
1217 1218
	}

1219 1220 1221
	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
	key.type = BTRFS_INODE_ITEM_KEY;
	key.offset = 0;
1222
	inode = btrfs_iget(sb, &key, fs_info->fs_root);
1223 1224
	if (IS_ERR(inode)) {
		err = PTR_ERR(inode);
C
Chris Mason 已提交
1225
		goto fail_close;
C
Chris Mason 已提交
1226 1227
	}

1228 1229
	sb->s_root = d_make_root(inode);
	if (!sb->s_root) {
C
Chris Mason 已提交
1230 1231
		err = -ENOMEM;
		goto fail_close;
C
Chris Mason 已提交
1232
	}
1233

D
Dan Magenheimer 已提交
1234
	cleancache_init_fs(sb);
1235
	sb->s_flags |= SB_ACTIVE;
C
Chris Mason 已提交
1236
	return 0;
C
Chris Mason 已提交
1237 1238

fail_close:
1239
	close_ctree(fs_info);
C
Chris Mason 已提交
1240
	return err;
C
Chris Mason 已提交
1241 1242
}

S
Sage Weil 已提交
1243
int btrfs_sync_fs(struct super_block *sb, int wait)
C
Chris Mason 已提交
1244 1245
{
	struct btrfs_trans_handle *trans;
1246 1247
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
C
Chris Mason 已提交
1248

1249
	trace_btrfs_sync_fs(fs_info, wait);
1250

C
Chris Mason 已提交
1251
	if (!wait) {
1252
		filemap_flush(fs_info->btree_inode->i_mapping);
C
Chris Mason 已提交
1253 1254
		return 0;
	}
1255

1256
	btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
1257

M
Miao Xie 已提交
1258
	trans = btrfs_attach_transaction_barrier(root);
1259
	if (IS_ERR(trans)) {
1260
		/* no transaction, don't bother */
1261 1262 1263 1264 1265 1266 1267
		if (PTR_ERR(trans) == -ENOENT) {
			/*
			 * Exit unless we have some pending changes
			 * that need to go through commit
			 */
			if (fs_info->pending_changes == 0)
				return 0;
1268 1269 1270 1271 1272 1273
			/*
			 * A non-blocking test if the fs is frozen. We must not
			 * start a new transaction here otherwise a deadlock
			 * happens. The pending operations are delayed to the
			 * next commit after thawing.
			 */
1274 1275
			if (sb_start_write_trylock(sb))
				sb_end_write(sb);
1276 1277
			else
				return 0;
1278 1279
			trans = btrfs_start_transaction(root, 0);
		}
1280 1281
		if (IS_ERR(trans))
			return PTR_ERR(trans);
1282
	}
1283
	return btrfs_commit_transaction(trans);
C
Chris Mason 已提交
1284 1285
}

1286
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
E
Eric Paris 已提交
1287
{
1288
	struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
1289
	const char *compress_type;
E
Eric Paris 已提交
1290

1291
	if (btrfs_test_opt(info, DEGRADED))
E
Eric Paris 已提交
1292
		seq_puts(seq, ",degraded");
1293
	if (btrfs_test_opt(info, NODATASUM))
E
Eric Paris 已提交
1294
		seq_puts(seq, ",nodatasum");
1295
	if (btrfs_test_opt(info, NODATACOW))
E
Eric Paris 已提交
1296
		seq_puts(seq, ",nodatacow");
1297
	if (btrfs_test_opt(info, NOBARRIER))
E
Eric Paris 已提交
1298
		seq_puts(seq, ",nobarrier");
1299
	if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
1300
		seq_printf(seq, ",max_inline=%llu", info->max_inline);
E
Eric Paris 已提交
1301 1302
	if (info->thread_pool_size !=  min_t(unsigned long,
					     num_online_cpus() + 2, 8))
1303
		seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
1304
	if (btrfs_test_opt(info, COMPRESS)) {
1305
		compress_type = btrfs_compress_type2str(info->compress_type);
1306
		if (btrfs_test_opt(info, FORCE_COMPRESS))
T
Tsutomu Itoh 已提交
1307 1308 1309
			seq_printf(seq, ",compress-force=%s", compress_type);
		else
			seq_printf(seq, ",compress=%s", compress_type);
1310
		if (info->compress_level)
1311
			seq_printf(seq, ":%d", info->compress_level);
T
Tsutomu Itoh 已提交
1312
	}
1313
	if (btrfs_test_opt(info, NOSSD))
C
Chris Mason 已提交
1314
		seq_puts(seq, ",nossd");
1315
	if (btrfs_test_opt(info, SSD_SPREAD))
1316
		seq_puts(seq, ",ssd_spread");
1317
	else if (btrfs_test_opt(info, SSD))
E
Eric Paris 已提交
1318
		seq_puts(seq, ",ssd");
1319
	if (btrfs_test_opt(info, NOTREELOG))
1320
		seq_puts(seq, ",notreelog");
1321
	if (btrfs_test_opt(info, NOLOGREPLAY))
1322
		seq_puts(seq, ",nologreplay");
1323
	if (btrfs_test_opt(info, FLUSHONCOMMIT))
1324
		seq_puts(seq, ",flushoncommit");
1325
	if (btrfs_test_opt(info, DISCARD))
1326
		seq_puts(seq, ",discard");
1327
	if (!(info->sb->s_flags & SB_POSIXACL))
E
Eric Paris 已提交
1328
		seq_puts(seq, ",noacl");
1329
	if (btrfs_test_opt(info, SPACE_CACHE))
T
Tsutomu Itoh 已提交
1330
		seq_puts(seq, ",space_cache");
1331
	else if (btrfs_test_opt(info, FREE_SPACE_TREE))
1332
		seq_puts(seq, ",space_cache=v2");
1333
	else
1334
		seq_puts(seq, ",nospace_cache");
1335
	if (btrfs_test_opt(info, RESCAN_UUID_TREE))
1336
		seq_puts(seq, ",rescan_uuid_tree");
1337
	if (btrfs_test_opt(info, CLEAR_CACHE))
T
Tsutomu Itoh 已提交
1338
		seq_puts(seq, ",clear_cache");
1339
	if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
T
Tsutomu Itoh 已提交
1340
		seq_puts(seq, ",user_subvol_rm_allowed");
1341
	if (btrfs_test_opt(info, ENOSPC_DEBUG))
1342
		seq_puts(seq, ",enospc_debug");
1343
	if (btrfs_test_opt(info, AUTO_DEFRAG))
1344
		seq_puts(seq, ",autodefrag");
1345
	if (btrfs_test_opt(info, INODE_MAP_CACHE))
1346
		seq_puts(seq, ",inode_cache");
1347
	if (btrfs_test_opt(info, SKIP_BALANCE))
1348
		seq_puts(seq, ",skip_balance");
1349
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1350
	if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
1351
		seq_puts(seq, ",check_int_data");
1352
	else if (btrfs_test_opt(info, CHECK_INTEGRITY))
1353 1354 1355 1356 1357 1358
		seq_puts(seq, ",check_int");
	if (info->check_integrity_print_mask)
		seq_printf(seq, ",check_int_print_mask=%d",
				info->check_integrity_print_mask);
#endif
	if (info->metadata_ratio)
1359
		seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
1360
	if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
J
Jeff Mahoney 已提交
1361
		seq_puts(seq, ",fatal_errors=panic");
1362
	if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1363
		seq_printf(seq, ",commit=%u", info->commit_interval);
1364
#ifdef CONFIG_BTRFS_DEBUG
1365
	if (btrfs_test_opt(info, FRAGMENT_DATA))
1366
		seq_puts(seq, ",fragment=data");
1367
	if (btrfs_test_opt(info, FRAGMENT_METADATA))
1368 1369
		seq_puts(seq, ",fragment=metadata");
#endif
J
Josef Bacik 已提交
1370 1371
	if (btrfs_test_opt(info, REF_VERIFY))
		seq_puts(seq, ",ref_verify");
1372 1373 1374 1375
	seq_printf(seq, ",subvolid=%llu",
		  BTRFS_I(d_inode(dentry))->root->root_key.objectid);
	seq_puts(seq, ",subvol=");
	seq_dentry(seq, dentry, " \t\n\\");
E
Eric Paris 已提交
1376 1377 1378
	return 0;
}

1379
static int btrfs_test_super(struct super_block *s, void *data)
Y
Yan 已提交
1380
{
1381 1382
	struct btrfs_fs_info *p = data;
	struct btrfs_fs_info *fs_info = btrfs_sb(s);
Y
Yan 已提交
1383

1384
	return fs_info->fs_devices == p->fs_devices;
Y
Yan 已提交
1385 1386
}

1387 1388
static int btrfs_set_super(struct super_block *s, void *data)
{
A
Al Viro 已提交
1389 1390 1391 1392
	int err = set_anon_super(s, data);
	if (!err)
		s->s_fs_info = data;
	return err;
Y
Yan 已提交
1393 1394
}

1395 1396 1397 1398 1399 1400 1401 1402 1403 1404
/*
 * subvolumes are identified by ino 256
 */
static inline int is_subvolume_inode(struct inode *inode)
{
	if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
		return 1;
	return 0;
}

1405
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
1406
				   struct vfsmount *mnt)
1407 1408
{
	struct dentry *root;
1409
	int ret;
1410

1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
	if (!subvol_name) {
		if (!subvol_objectid) {
			ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
							  &subvol_objectid);
			if (ret) {
				root = ERR_PTR(ret);
				goto out;
			}
		}
		subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
							    subvol_objectid);
		if (IS_ERR(subvol_name)) {
			root = ERR_CAST(subvol_name);
			subvol_name = NULL;
			goto out;
		}

	}

A
Al Viro 已提交
1430
	root = mount_subtree(mnt, subvol_name);
1431 1432
	/* mount_subtree() drops our reference on the vfsmount. */
	mnt = NULL;
1433

1434
	if (!IS_ERR(root)) {
A
Al Viro 已提交
1435
		struct super_block *s = root->d_sb;
1436
		struct btrfs_fs_info *fs_info = btrfs_sb(s);
1437 1438 1439 1440 1441
		struct inode *root_inode = d_inode(root);
		u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;

		ret = 0;
		if (!is_subvolume_inode(root_inode)) {
1442
			btrfs_err(fs_info, "'%s' is not a valid subvolume",
1443 1444 1445 1446
			       subvol_name);
			ret = -EINVAL;
		}
		if (subvol_objectid && root_objectid != subvol_objectid) {
1447 1448 1449 1450 1451
			/*
			 * This will also catch a race condition where a
			 * subvolume which was passed by ID is renamed and
			 * another subvolume is renamed over the old location.
			 */
1452 1453 1454
			btrfs_err(fs_info,
				  "subvol '%s' does not match subvolid %llu",
				  subvol_name, subvol_objectid);
1455 1456 1457 1458 1459 1460 1461
			ret = -EINVAL;
		}
		if (ret) {
			dput(root);
			root = ERR_PTR(ret);
			deactivate_locked_super(s);
		}
1462 1463
	}

1464 1465 1466
out:
	mntput(mnt);
	kfree(subvol_name);
1467 1468
	return root;
}
1469

1470 1471 1472 1473 1474 1475
/*
 * Find a superblock for the given device / mount point.
 *
 * Note: This is based on mount_bdev from fs/super.c with a few additions
 *       for multiple device setup.  Make sure to keep it in sync.
 */
1476 1477 1478 1479 1480
static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
		int flags, const char *device_name, void *data)
{
	struct block_device *bdev = NULL;
	struct super_block *s;
1481
	struct btrfs_device *device = NULL;
1482 1483
	struct btrfs_fs_devices *fs_devices = NULL;
	struct btrfs_fs_info *fs_info = NULL;
1484
	void *new_sec_opts = NULL;
1485 1486 1487 1488 1489 1490 1491
	fmode_t mode = FMODE_READ;
	int error = 0;

	if (!(flags & SB_RDONLY))
		mode |= FMODE_WRITE;

	if (data) {
A
Al Viro 已提交
1492
		error = security_sb_eat_lsm_opts(data, &new_sec_opts);
1493 1494 1495 1496 1497 1498 1499 1500 1501 1502
		if (error)
			return ERR_PTR(error);
	}

	/*
	 * Setup a dummy root and fs_info for test/set super.  This is because
	 * we don't actually fill this stuff out until open_ctree, but we need
	 * it for searching for existing supers, so this lets us do that and
	 * then open_ctree will properly initialize everything later.
	 */
1503
	fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
	if (!fs_info) {
		error = -ENOMEM;
		goto error_sec_opts;
	}

	fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
	fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
	if (!fs_info->super_copy || !fs_info->super_for_commit) {
		error = -ENOMEM;
		goto error_fs_info;
	}

1516
	mutex_lock(&uuid_mutex);
1517
	error = btrfs_parse_device_options(data, mode, fs_type);
1518 1519
	if (error) {
		mutex_unlock(&uuid_mutex);
1520
		goto error_fs_info;
1521
	}
1522

1523 1524
	device = btrfs_scan_one_device(device_name, mode, fs_type);
	if (IS_ERR(device)) {
1525
		mutex_unlock(&uuid_mutex);
1526
		error = PTR_ERR(device);
1527
		goto error_fs_info;
1528
	}
1529

1530
	fs_devices = device->fs_devices;
1531 1532
	fs_info->fs_devices = fs_devices;

1533
	error = btrfs_open_devices(fs_devices, mode, fs_type);
1534
	mutex_unlock(&uuid_mutex);
1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558
	if (error)
		goto error_fs_info;

	if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
		error = -EACCES;
		goto error_close_devices;
	}

	bdev = fs_devices->latest_bdev;
	s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
		 fs_info);
	if (IS_ERR(s)) {
		error = PTR_ERR(s);
		goto error_close_devices;
	}

	if (s->s_root) {
		btrfs_close_devices(fs_devices);
		free_fs_info(fs_info);
		if ((flags ^ s->s_flags) & SB_RDONLY)
			error = -EBUSY;
	} else {
		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
		btrfs_sb(s)->bdev_holder = fs_type;
1559 1560
		if (!strstr(crc32c_impl(), "generic"))
			set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
1561 1562
		error = btrfs_fill_super(s, fs_devices, data);
	}
A
Al Viro 已提交
1563
	if (!error)
1564
		error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
A
Al Viro 已提交
1565
	security_free_mnt_opts(&new_sec_opts);
1566 1567
	if (error) {
		deactivate_locked_super(s);
A
Al Viro 已提交
1568
		return ERR_PTR(error);
1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580
	}

	return dget(s->s_root);

error_close_devices:
	btrfs_close_devices(fs_devices);
error_fs_info:
	free_fs_info(fs_info);
error_sec_opts:
	security_free_mnt_opts(&new_sec_opts);
	return ERR_PTR(error);
}
1581

1582
/*
1583
 * Mount function which is called by VFS layer.
1584
 *
1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602
 * In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
 * which needs vfsmount* of device's root (/).  This means device's root has to
 * be mounted internally in any case.
 *
 * Operation flow:
 *   1. Parse subvol id related options for later use in mount_subvol().
 *
 *   2. Mount device's root (/) by calling vfs_kern_mount().
 *
 *      NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
 *      first place. In order to avoid calling btrfs_mount() again, we use
 *      different file_system_type which is not registered to VFS by
 *      register_filesystem() (btrfs_root_fs_type). As a result,
 *      btrfs_mount_root() is called. The return value will be used by
 *      mount_subtree() in mount_subvol().
 *
 *   3. Call mount_subvol() to get the dentry of subvolume. Since there is
 *      "btrfs subvolume set-default", mount_subvol() is called always.
1603
 */
A
Al Viro 已提交
1604
static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1605
		const char *device_name, void *data)
Y
Yan 已提交
1606
{
1607 1608
	struct vfsmount *mnt_root;
	struct dentry *root;
1609 1610
	char *subvol_name = NULL;
	u64 subvol_objectid = 0;
Y
Yan 已提交
1611 1612
	int error = 0;

1613 1614
	error = btrfs_parse_subvol_options(data, &subvol_name,
					&subvol_objectid);
1615 1616
	if (error) {
		kfree(subvol_name);
A
Al Viro 已提交
1617
		return ERR_PTR(error);
1618
	}
1619

1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630
	/* mount device's root (/) */
	mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
	if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
		if (flags & SB_RDONLY) {
			mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
				flags & ~SB_RDONLY, device_name, data);
		} else {
			mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
				flags | SB_RDONLY, device_name, data);
			if (IS_ERR(mnt_root)) {
				root = ERR_CAST(mnt_root);
1631
				kfree(subvol_name);
1632 1633
				goto out;
			}
Y
Yan 已提交
1634

1635 1636 1637 1638 1639 1640
			down_write(&mnt_root->mnt_sb->s_umount);
			error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
			up_write(&mnt_root->mnt_sb->s_umount);
			if (error < 0) {
				root = ERR_PTR(error);
				mntput(mnt_root);
1641
				kfree(subvol_name);
1642 1643 1644
				goto out;
			}
		}
1645
	}
1646 1647
	if (IS_ERR(mnt_root)) {
		root = ERR_CAST(mnt_root);
1648
		kfree(subvol_name);
1649
		goto out;
1650
	}
Y
Yan 已提交
1651

1652
	/* mount_subvol() will free subvol_name and mnt_root */
1653
	root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
Y
Yan 已提交
1654

1655 1656
out:
	return root;
Y
Yan 已提交
1657
}
1658

1659
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1660
				     u32 new_pool_size, u32 old_pool_size)
1661 1662 1663 1664 1665 1666
{
	if (new_pool_size == old_pool_size)
		return;

	fs_info->thread_pool_size = new_pool_size;

1667
	btrfs_info(fs_info, "resize thread pool %d -> %d",
1668 1669
	       old_pool_size, new_pool_size);

1670
	btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
1671
	btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
1672
	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
1673 1674 1675 1676 1677 1678
	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
				new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
1679
	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
1680
	btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
1681 1682
	btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
				new_pool_size);
1683 1684
}

1685
static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
M
Miao Xie 已提交
1686 1687
{
	set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1688
}
M
Miao Xie 已提交
1689

1690 1691 1692
static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
				       unsigned long old_opts, int flags)
{
M
Miao Xie 已提交
1693 1694
	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1695
	     (flags & SB_RDONLY))) {
M
Miao Xie 已提交
1696 1697 1698
		/* wait for any defraggers to finish */
		wait_event(fs_info->transaction_wait,
			   (atomic_read(&fs_info->defrag_running) == 0));
1699
		if (flags & SB_RDONLY)
M
Miao Xie 已提交
1700 1701 1702 1703 1704 1705 1706 1707
			sync_filesystem(fs_info->sb);
	}
}

static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
					 unsigned long old_opts)
{
	/*
1708 1709
	 * We need to cleanup all defragable inodes if the autodefragment is
	 * close or the filesystem is read only.
M
Miao Xie 已提交
1710 1711
	 */
	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1712
	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) {
M
Miao Xie 已提交
1713 1714 1715 1716 1717 1718
		btrfs_cleanup_defrag_inodes(fs_info);
	}

	clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
}

Y
Yan Zheng 已提交
1719 1720
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
{
1721 1722
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
1723 1724 1725 1726
	unsigned old_flags = sb->s_flags;
	unsigned long old_opts = fs_info->mount_opt;
	unsigned long old_compress_type = fs_info->compress_type;
	u64 old_max_inline = fs_info->max_inline;
1727
	u32 old_thread_pool_size = fs_info->thread_pool_size;
1728
	u32 old_metadata_ratio = fs_info->metadata_ratio;
Y
Yan Zheng 已提交
1729 1730
	int ret;

1731
	sync_filesystem(sb);
1732
	btrfs_remount_prepare(fs_info);
M
Miao Xie 已提交
1733

1734
	if (data) {
1735
		void *new_sec_opts = NULL;
1736

A
Al Viro 已提交
1737 1738
		ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
		if (!ret)
1739
			ret = security_sb_remount(sb, new_sec_opts);
A
Al Viro 已提交
1740
		security_free_mnt_opts(&new_sec_opts);
1741 1742 1743 1744
		if (ret)
			goto restore;
	}

1745
	ret = btrfs_parse_options(fs_info, data, *flags);
1746
	if (ret)
1747
		goto restore;
1748

1749
	btrfs_remount_begin(fs_info, old_opts, *flags);
1750 1751 1752
	btrfs_resize_thread_pool(fs_info,
		fs_info->thread_pool_size, old_thread_pool_size);

1753
	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
M
Miao Xie 已提交
1754
		goto out;
Y
Yan Zheng 已提交
1755

1756
	if (*flags & SB_RDONLY) {
1757 1758 1759 1760
		/*
		 * this also happens on 'umount -rf' or on shutdown, when
		 * the filesystem is busy.
		 */
1761
		cancel_work_sync(&fs_info->async_reclaim_work);
1762 1763 1764 1765 1766 1767

		/* wait for the uuid_scan task to finish */
		down(&fs_info->uuid_tree_rescan_sem);
		/* avoid complains from lockdep et al. */
		up(&fs_info->uuid_tree_rescan_sem);

1768
		sb->s_flags |= SB_RDONLY;
Y
Yan Zheng 已提交
1769

1770
		/*
1771
		 * Setting SB_RDONLY will put the cleaner thread to
1772 1773 1774 1775 1776 1777 1778
		 * sleep at the next loop if it's already active.
		 * If it's already asleep, we'll leave unused block
		 * groups on disk until we're mounted read-write again
		 * unless we clean them up here.
		 */
		btrfs_delete_unused_bgs(fs_info);

1779 1780
		btrfs_dev_replace_suspend_for_unmount(fs_info);
		btrfs_scrub_cancel(fs_info);
1781
		btrfs_pause_balance(fs_info);
1782

1783
		ret = btrfs_commit_super(fs_info);
1784 1785
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1786
	} else {
1787
		if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
1788
			btrfs_err(fs_info,
1789
				"Remounting read-write after error is not allowed");
1790 1791 1792
			ret = -EINVAL;
			goto restore;
		}
1793
		if (fs_info->fs_devices->rw_devices == 0) {
1794 1795
			ret = -EACCES;
			goto restore;
1796
		}
Y
Yan Zheng 已提交
1797

1798
		if (!btrfs_check_rw_degradable(fs_info, NULL)) {
1799
			btrfs_warn(fs_info,
1800
		"too many missing devices, writable remount is not allowed");
1801 1802 1803 1804
			ret = -EACCES;
			goto restore;
		}

1805
		if (btrfs_super_log_root(fs_info->super_copy) != 0) {
1806 1807
			ret = -EINVAL;
			goto restore;
1808
		}
Y
Yan Zheng 已提交
1809

1810
		ret = btrfs_cleanup_fs_roots(fs_info);
1811 1812
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1813

1814
		/* recover relocation */
1815
		mutex_lock(&fs_info->cleaner_mutex);
1816
		ret = btrfs_recover_relocation(root);
1817
		mutex_unlock(&fs_info->cleaner_mutex);
1818 1819
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1820

1821 1822 1823 1824
		ret = btrfs_resume_balance_async(fs_info);
		if (ret)
			goto restore;

1825 1826
		ret = btrfs_resume_dev_replace_async(fs_info);
		if (ret) {
1827
			btrfs_warn(fs_info, "failed to resume dev_replace");
1828 1829
			goto restore;
		}
1830

1831 1832
		btrfs_qgroup_rescan_resume(fs_info);

1833
		if (!fs_info->uuid_root) {
1834
			btrfs_info(fs_info, "creating UUID tree");
1835 1836
			ret = btrfs_create_uuid_tree(fs_info);
			if (ret) {
J
Jeff Mahoney 已提交
1837 1838 1839
				btrfs_warn(fs_info,
					   "failed to create the UUID tree %d",
					   ret);
1840 1841 1842
				goto restore;
			}
		}
1843
		sb->s_flags &= ~SB_RDONLY;
1844

1845
		set_bit(BTRFS_FS_OPEN, &fs_info->flags);
Y
Yan Zheng 已提交
1846
	}
M
Miao Xie 已提交
1847
out:
1848
	wake_up_process(fs_info->transaction_kthread);
M
Miao Xie 已提交
1849
	btrfs_remount_cleanup(fs_info, old_opts);
Y
Yan Zheng 已提交
1850
	return 0;
1851 1852

restore:
1853
	/* We've hit an error - don't reset SB_RDONLY */
1854
	if (sb_rdonly(sb))
1855
		old_flags |= SB_RDONLY;
1856 1857 1858 1859
	sb->s_flags = old_flags;
	fs_info->mount_opt = old_opts;
	fs_info->compress_type = old_compress_type;
	fs_info->max_inline = old_max_inline;
1860 1861
	btrfs_resize_thread_pool(fs_info,
		old_thread_pool_size, fs_info->thread_pool_size);
1862
	fs_info->metadata_ratio = old_metadata_ratio;
M
Miao Xie 已提交
1863
	btrfs_remount_cleanup(fs_info, old_opts);
1864
	return ret;
Y
Yan Zheng 已提交
1865 1866
}

1867
/* Used to sort the devices by max_avail(descending sort) */
1868
static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892
				       const void *dev_info2)
{
	if (((struct btrfs_device_info *)dev_info1)->max_avail >
	    ((struct btrfs_device_info *)dev_info2)->max_avail)
		return -1;
	else if (((struct btrfs_device_info *)dev_info1)->max_avail <
		 ((struct btrfs_device_info *)dev_info2)->max_avail)
		return 1;
	else
	return 0;
}

/*
 * sort the devices by max_avail, in which max free extent size of each device
 * is stored.(Descending Sort)
 */
static inline void btrfs_descending_sort_devices(
					struct btrfs_device_info *devices,
					size_t nr_devices)
{
	sort(devices, nr_devices, sizeof(struct btrfs_device_info),
	     btrfs_cmp_device_free_bytes, NULL);
}

1893 1894 1895 1896
/*
 * The helper to calc the free space on the devices that can be used to store
 * file data.
 */
1897 1898
static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
					      u64 *free_bytes)
1899 1900 1901 1902 1903 1904 1905
{
	struct btrfs_device_info *devices_info;
	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
	struct btrfs_device *device;
	u64 type;
	u64 avail_space;
	u64 min_stripe_size;
1906
	int num_stripes = 1;
1907
	int i = 0, nr_devices;
1908
	const struct btrfs_raid_attr *rattr;
1909

1910
	/*
1911
	 * We aren't under the device list lock, so this is racy-ish, but good
1912 1913
	 * enough for our purposes.
	 */
1914
	nr_devices = fs_info->fs_devices->open_devices;
1915 1916 1917 1918 1919 1920 1921 1922 1923
	if (!nr_devices) {
		smp_mb();
		nr_devices = fs_info->fs_devices->open_devices;
		ASSERT(nr_devices);
		if (!nr_devices) {
			*free_bytes = 0;
			return 0;
		}
	}
1924

1925
	devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
1926
			       GFP_KERNEL);
1927 1928 1929
	if (!devices_info)
		return -ENOMEM;

1930
	/* calc min stripe number for data space allocation */
1931
	type = btrfs_data_alloc_profile(fs_info);
1932 1933
	rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];

1934
	if (type & BTRFS_BLOCK_GROUP_RAID0)
1935
		num_stripes = nr_devices;
1936
	else if (type & BTRFS_BLOCK_GROUP_RAID1)
1937
		num_stripes = 2;
1938
	else if (type & BTRFS_BLOCK_GROUP_RAID10)
1939
		num_stripes = 4;
1940

1941 1942
	/* Adjust for more than 1 stripe per device */
	min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
1943

1944 1945
	rcu_read_lock();
	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
1946 1947
		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
						&device->dev_state) ||
1948 1949
		    !device->bdev ||
		    test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
1950 1951
			continue;

1952 1953 1954
		if (i >= nr_devices)
			break;

1955 1956 1957
		avail_space = device->total_bytes - device->bytes_used;

		/* align with stripe_len */
1958
		avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
1959 1960

		/*
1961
		 * In order to avoid overwriting the superblock on the drive,
1962 1963
		 * btrfs starts at an offset of at least 1MB when doing chunk
		 * allocation.
1964 1965 1966
		 *
		 * This ensures we have at least min_stripe_size free space
		 * after excluding 1MB.
1967
		 */
1968
		if (avail_space <= SZ_1M + min_stripe_size)
1969 1970
			continue;

1971 1972
		avail_space -= SZ_1M;

1973 1974 1975 1976 1977
		devices_info[i].dev = device;
		devices_info[i].max_avail = avail_space;

		i++;
	}
1978
	rcu_read_unlock();
1979 1980 1981 1982 1983 1984 1985

	nr_devices = i;

	btrfs_descending_sort_devices(devices_info, nr_devices);

	i = nr_devices - 1;
	avail_space = 0;
1986 1987
	while (nr_devices >= rattr->devs_min) {
		num_stripes = min(num_stripes, nr_devices);
1988

1989 1990 1991 1992
		if (devices_info[i].max_avail >= min_stripe_size) {
			int j;
			u64 alloc_size;

1993
			avail_space += devices_info[i].max_avail * num_stripes;
1994
			alloc_size = devices_info[i].max_avail;
1995
			for (j = i + 1 - num_stripes; j <= i; j++)
1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006
				devices_info[j].max_avail -= alloc_size;
		}
		i--;
		nr_devices--;
	}

	kfree(devices_info);
	*free_bytes = avail_space;
	return 0;
}

2007 2008 2009 2010 2011 2012 2013
/*
 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
 *
 * If there's a redundant raid level at DATA block groups, use the respective
 * multiplier to scale the sizes.
 *
 * Unused device space usage is based on simulating the chunk allocator
2014 2015 2016
 * algorithm that respects the device sizes and order of allocations.  This is
 * a close approximation of the actual use but there are other factors that may
 * change the result (like a new metadata chunk).
2017
 *
2018
 * If metadata is exhausted, f_bavail will be 0.
2019
 */
C
Chris Mason 已提交
2020 2021
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
2022 2023 2024
	struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
	struct btrfs_super_block *disk_super = fs_info->super_copy;
	struct list_head *head = &fs_info->space_info;
2025 2026
	struct btrfs_space_info *found;
	u64 total_used = 0;
2027
	u64 total_free_data = 0;
2028
	u64 total_free_meta = 0;
2029
	int bits = dentry->d_sb->s_blocksize_bits;
2030
	__be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
2031 2032
	unsigned factor = 1;
	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
2033
	int ret;
2034
	u64 thresh = 0;
2035
	int mixed = 0;
C
Chris Mason 已提交
2036

2037
	rcu_read_lock();
J
Josef Bacik 已提交
2038
	list_for_each_entry_rcu(found, head, list) {
2039
		if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
2040 2041
			int i;

2042 2043 2044
			total_free_data += found->disk_total - found->disk_used;
			total_free_data -=
				btrfs_account_ro_block_groups_free_space(found);
2045 2046

			for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2047 2048 2049
				if (!list_empty(&found->block_groups[i]))
					factor = btrfs_bg_type_to_factor(
						btrfs_raid_array[i].bg_flag);
2050
			}
2051
		}
2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062

		/*
		 * Metadata in mixed block goup profiles are accounted in data
		 */
		if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
			if (found->flags & BTRFS_BLOCK_GROUP_DATA)
				mixed = 1;
			else
				total_free_meta += found->disk_total -
					found->disk_used;
		}
2063

2064
		total_used += found->disk_used;
J
Josef Bacik 已提交
2065
	}
2066

2067 2068
	rcu_read_unlock();

2069 2070 2071 2072 2073 2074
	buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
	buf->f_blocks >>= bits;
	buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);

	/* Account global block reserve as used, it's in logical size already */
	spin_lock(&block_rsv->lock);
2075 2076 2077 2078 2079
	/* Mixed block groups accounting is not byte-accurate, avoid overflow */
	if (buf->f_bfree >= block_rsv->size >> bits)
		buf->f_bfree -= block_rsv->size >> bits;
	else
		buf->f_bfree = 0;
2080 2081
	spin_unlock(&block_rsv->lock);

2082
	buf->f_bavail = div_u64(total_free_data, factor);
2083
	ret = btrfs_calc_avail_data_space(fs_info, &total_free_data);
2084
	if (ret)
2085
		return ret;
2086
	buf->f_bavail += div_u64(total_free_data, factor);
2087
	buf->f_bavail = buf->f_bavail >> bits;
C
Chris Mason 已提交
2088

2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101
	/*
	 * We calculate the remaining metadata space minus global reserve. If
	 * this is (supposedly) smaller than zero, there's no space. But this
	 * does not hold in practice, the exhausted state happens where's still
	 * some positive delta. So we apply some guesswork and compare the
	 * delta to a 4M threshold.  (Practically observed delta was ~2M.)
	 *
	 * We probably cannot calculate the exact threshold value because this
	 * depends on the internal reservations requested by various
	 * operations, so some operations that consume a few metadata will
	 * succeed even if the Avail is zero. But this is better than the other
	 * way around.
	 */
2102
	thresh = SZ_4M;
2103

2104
	if (!mixed && total_free_meta - thresh < block_rsv->size)
2105 2106
		buf->f_bavail = 0;

2107 2108 2109 2110
	buf->f_type = BTRFS_SUPER_MAGIC;
	buf->f_bsize = dentry->d_sb->s_blocksize;
	buf->f_namelen = BTRFS_NAME_LEN;

2111
	/* We treat it as constant endianness (it doesn't matter _which_)
C
Chris Mason 已提交
2112
	   because we want the fsid to come out the same whether mounted
2113 2114 2115
	   on a big-endian or little-endian host */
	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
2116
	/* Mask in the root object ID too, to disambiguate subvols */
2117 2118 2119 2120
	buf->f_fsid.val[0] ^=
		BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32;
	buf->f_fsid.val[1] ^=
		BTRFS_I(d_inode(dentry))->root->root_key.objectid;
2121

C
Chris Mason 已提交
2122 2123
	return 0;
}
C
Chris Mason 已提交
2124

A
Al Viro 已提交
2125 2126
static void btrfs_kill_super(struct super_block *sb)
{
2127
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
A
Al Viro 已提交
2128
	kill_anon_super(sb);
2129
	free_fs_info(fs_info);
A
Al Viro 已提交
2130 2131
}

2132 2133 2134
static struct file_system_type btrfs_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "btrfs",
A
Al Viro 已提交
2135
	.mount		= btrfs_mount,
A
Al Viro 已提交
2136
	.kill_sb	= btrfs_kill_super,
2137
	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
2138
};
2139 2140 2141 2142 2143 2144 2145 2146 2147

static struct file_system_type btrfs_root_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "btrfs",
	.mount		= btrfs_mount_root,
	.kill_sb	= btrfs_kill_super,
	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
};

2148
MODULE_ALIAS_FS("btrfs");
2149

2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160
static int btrfs_control_open(struct inode *inode, struct file *file)
{
	/*
	 * The control file's private_data is used to hold the
	 * transaction when it is started and is used to keep
	 * track of whether a transaction is already in progress.
	 */
	file->private_data = NULL;
	return 0;
}

C
Chris Mason 已提交
2161 2162 2163
/*
 * used by btrfsctl to scan devices when no FS is mounted
 */
2164 2165 2166 2167
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
				unsigned long arg)
{
	struct btrfs_ioctl_vol_args *vol;
2168
	struct btrfs_device *device = NULL;
2169
	int ret = -ENOTTY;
2170

2171 2172 2173
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

L
Li Zefan 已提交
2174 2175 2176
	vol = memdup_user((void __user *)arg, sizeof(*vol));
	if (IS_ERR(vol))
		return PTR_ERR(vol);
2177
	vol->name[BTRFS_PATH_NAME_MAX] = '\0';
2178

2179 2180
	switch (cmd) {
	case BTRFS_IOC_SCAN_DEV:
2181
		mutex_lock(&uuid_mutex);
2182 2183 2184
		device = btrfs_scan_one_device(vol->name, FMODE_READ,
					       &btrfs_root_fs_type);
		ret = PTR_ERR_OR_ZERO(device);
2185
		mutex_unlock(&uuid_mutex);
2186
		break;
2187 2188 2189
	case BTRFS_IOC_FORGET_DEV:
		ret = btrfs_forget_devices(vol->name);
		break;
J
Josef Bacik 已提交
2190
	case BTRFS_IOC_DEVICES_READY:
2191
		mutex_lock(&uuid_mutex);
2192 2193 2194
		device = btrfs_scan_one_device(vol->name, FMODE_READ,
					       &btrfs_root_fs_type);
		if (IS_ERR(device)) {
2195
			mutex_unlock(&uuid_mutex);
2196
			ret = PTR_ERR(device);
J
Josef Bacik 已提交
2197
			break;
2198
		}
2199 2200
		ret = !(device->fs_devices->num_devices ==
			device->fs_devices->total_devices);
2201
		mutex_unlock(&uuid_mutex);
J
Josef Bacik 已提交
2202
		break;
2203
	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
2204
		ret = btrfs_ioctl_get_supported_features((void __user*)arg);
2205
		break;
2206
	}
L
Li Zefan 已提交
2207

2208
	kfree(vol);
L
Linda Knippers 已提交
2209
	return ret;
2210 2211
}

2212
static int btrfs_freeze(struct super_block *sb)
Y
Yan 已提交
2213
{
2214
	struct btrfs_trans_handle *trans;
2215 2216
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
2217

2218
	set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2219 2220 2221 2222 2223 2224
	/*
	 * We don't need a barrier here, we'll wait for any transaction that
	 * could be in progress on other threads (and do delayed iputs that
	 * we want to avoid on a frozen filesystem), or do the commit
	 * ourselves.
	 */
M
Miao Xie 已提交
2225
	trans = btrfs_attach_transaction_barrier(root);
2226 2227 2228 2229 2230 2231
	if (IS_ERR(trans)) {
		/* no transaction, don't bother */
		if (PTR_ERR(trans) == -ENOENT)
			return 0;
		return PTR_ERR(trans);
	}
2232
	return btrfs_commit_transaction(trans);
Y
Yan 已提交
2233 2234
}

2235 2236
static int btrfs_unfreeze(struct super_block *sb)
{
2237 2238 2239
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);

	clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2240 2241 2242
	return 0;
}

J
Josef Bacik 已提交
2243 2244 2245 2246 2247 2248 2249
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
	struct btrfs_fs_devices *cur_devices;
	struct btrfs_device *dev, *first_dev = NULL;
	struct list_head *head;

2250 2251 2252 2253 2254
	/*
	 * Lightweight locking of the devices. We should not need
	 * device_list_mutex here as we only read the device data and the list
	 * is protected by RCU.  Even if a device is deleted during the list
	 * traversals, we'll get valid data, the freeing callback will wait at
2255
	 * least until the rcu_read_unlock.
2256 2257
	 */
	rcu_read_lock();
J
Josef Bacik 已提交
2258 2259 2260
	cur_devices = fs_info->fs_devices;
	while (cur_devices) {
		head = &cur_devices->devices;
2261
		list_for_each_entry_rcu(dev, head, dev_list) {
2262
			if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
2263
				continue;
2264 2265
			if (!dev->name)
				continue;
J
Josef Bacik 已提交
2266 2267 2268 2269 2270 2271
			if (!first_dev || dev->devid < first_dev->devid)
				first_dev = dev;
		}
		cur_devices = cur_devices->seed;
	}

2272 2273 2274
	if (first_dev)
		seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
	else
J
Josef Bacik 已提交
2275
		WARN_ON(1);
2276
	rcu_read_unlock();
J
Josef Bacik 已提交
2277 2278 2279
	return 0;
}

2280
static const struct super_operations btrfs_super_ops = {
2281
	.drop_inode	= btrfs_drop_inode,
A
Al Viro 已提交
2282
	.evict_inode	= btrfs_evict_inode,
C
Chris Mason 已提交
2283
	.put_super	= btrfs_put_super,
2284
	.sync_fs	= btrfs_sync_fs,
E
Eric Paris 已提交
2285
	.show_options	= btrfs_show_options,
J
Josef Bacik 已提交
2286
	.show_devname	= btrfs_show_devname,
C
Chris Mason 已提交
2287 2288
	.alloc_inode	= btrfs_alloc_inode,
	.destroy_inode	= btrfs_destroy_inode,
A
Al Viro 已提交
2289
	.free_inode	= btrfs_free_inode,
C
Chris Mason 已提交
2290
	.statfs		= btrfs_statfs,
Y
Yan Zheng 已提交
2291
	.remount_fs	= btrfs_remount,
2292
	.freeze_fs	= btrfs_freeze,
2293
	.unfreeze_fs	= btrfs_unfreeze,
C
Chris Mason 已提交
2294
};
2295 2296

static const struct file_operations btrfs_ctl_fops = {
2297
	.open = btrfs_control_open,
2298 2299 2300
	.unlocked_ioctl	 = btrfs_control_ioctl,
	.compat_ioctl = btrfs_control_ioctl,
	.owner	 = THIS_MODULE,
2301
	.llseek = noop_llseek,
2302 2303 2304
};

static struct miscdevice btrfs_misc = {
2305
	.minor		= BTRFS_MINOR,
2306 2307 2308 2309
	.name		= "btrfs-control",
	.fops		= &btrfs_ctl_fops
};

2310 2311 2312
MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
MODULE_ALIAS("devname:btrfs-control");

2313
static int __init btrfs_interface_init(void)
2314 2315 2316 2317
{
	return misc_register(&btrfs_misc);
}

2318
static __cold void btrfs_interface_exit(void)
2319
{
2320
	misc_deregister(&btrfs_misc);
2321 2322
}

2323
static void __init btrfs_print_mod_info(void)
2324
{
2325
	static const char options[] = ""
2326 2327 2328
#ifdef CONFIG_BTRFS_DEBUG
			", debug=on"
#endif
2329 2330 2331
#ifdef CONFIG_BTRFS_ASSERT
			", assert=on"
#endif
2332 2333
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
			", integrity-checker=on"
J
Josef Bacik 已提交
2334 2335 2336
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
			", ref-verify=on"
2337
#endif
2338 2339
			;
	pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
2340 2341
}

2342 2343
static int __init init_btrfs_fs(void)
{
C
Chris Mason 已提交
2344
	int err;
2345

2346 2347
	btrfs_props_init();

2348 2349
	err = btrfs_init_sysfs();
	if (err)
2350
		return err;
2351

2352
	btrfs_init_compress();
2353

2354 2355 2356 2357
	err = btrfs_init_cachep();
	if (err)
		goto free_compress;

2358
	err = extent_io_init();
2359 2360 2361
	if (err)
		goto free_cachep;

2362
	err = extent_state_cache_init();
2363 2364 2365
	if (err)
		goto free_extent_io;

2366 2367 2368 2369
	err = extent_map_init();
	if (err)
		goto free_extent_state_cache;

2370
	err = ordered_data_init();
2371 2372
	if (err)
		goto free_extent_map;
C
Chris Mason 已提交
2373

2374 2375 2376 2377
	err = btrfs_delayed_inode_init();
	if (err)
		goto free_ordered_data;

2378
	err = btrfs_auto_defrag_init();
2379 2380 2381
	if (err)
		goto free_delayed_inode;

2382
	err = btrfs_delayed_ref_init();
2383 2384 2385
	if (err)
		goto free_auto_defrag;

2386 2387
	err = btrfs_prelim_ref_init();
	if (err)
2388
		goto free_delayed_ref;
2389

2390
	err = btrfs_end_io_wq_init();
2391
	if (err)
2392
		goto free_prelim_ref;
2393

2394 2395 2396 2397
	err = btrfs_interface_init();
	if (err)
		goto free_end_io_wq;

2398 2399
	btrfs_init_lockdep();

2400
	btrfs_print_mod_info();
2401 2402 2403 2404 2405 2406 2407 2408

	err = btrfs_run_sanity_tests();
	if (err)
		goto unregister_ioctl;

	err = register_filesystem(&btrfs_fs_type);
	if (err)
		goto unregister_ioctl;
2409

2410 2411
	return 0;

2412 2413
unregister_ioctl:
	btrfs_interface_exit();
2414 2415
free_end_io_wq:
	btrfs_end_io_wq_exit();
2416 2417
free_prelim_ref:
	btrfs_prelim_ref_exit();
2418 2419
free_delayed_ref:
	btrfs_delayed_ref_exit();
2420 2421
free_auto_defrag:
	btrfs_auto_defrag_exit();
2422 2423
free_delayed_inode:
	btrfs_delayed_inode_exit();
2424 2425
free_ordered_data:
	ordered_data_exit();
2426 2427
free_extent_map:
	extent_map_exit();
2428 2429
free_extent_state_cache:
	extent_state_cache_exit();
2430 2431
free_extent_io:
	extent_io_exit();
2432 2433
free_cachep:
	btrfs_destroy_cachep();
2434 2435
free_compress:
	btrfs_exit_compress();
2436
	btrfs_exit_sysfs();
2437

2438
	return err;
2439 2440 2441 2442
}

static void __exit exit_btrfs_fs(void)
{
C
Chris Mason 已提交
2443
	btrfs_destroy_cachep();
2444
	btrfs_delayed_ref_exit();
2445
	btrfs_auto_defrag_exit();
2446
	btrfs_delayed_inode_exit();
2447
	btrfs_prelim_ref_exit();
2448
	ordered_data_exit();
2449
	extent_map_exit();
2450
	extent_state_cache_exit();
2451
	extent_io_exit();
2452
	btrfs_interface_exit();
2453
	btrfs_end_io_wq_exit();
2454
	unregister_filesystem(&btrfs_fs_type);
2455
	btrfs_exit_sysfs();
2456
	btrfs_cleanup_fs_uuids();
2457
	btrfs_exit_compress();
2458 2459
}

2460
late_initcall(init_btrfs_fs);
2461 2462 2463
module_exit(exit_btrfs_fs)

MODULE_LICENSE("GPL");
2464
MODULE_SOFTDEP("pre: crc32c");