super.c 66.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
C
Chris Mason 已提交
2 3 4 5
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

Y
Yan 已提交
6
#include <linux/blkdev.h>
7 8 9 10 11 12
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
E
Eric Paris 已提交
13
#include <linux/seq_file.h>
14 15
#include <linux/string.h>
#include <linux/backing-dev.h>
Y
Yan 已提交
16
#include <linux/mount.h>
C
Chris Mason 已提交
17
#include <linux/writeback.h>
C
Chris Mason 已提交
18
#include <linux/statfs.h>
C
Chris Mason 已提交
19
#include <linux/compat.h>
20
#include <linux/parser.h>
21
#include <linux/ctype.h>
22
#include <linux/namei.h>
23
#include <linux/miscdevice.h>
24
#include <linux/magic.h>
25
#include <linux/slab.h>
D
Dan Magenheimer 已提交
26
#include <linux/cleancache.h>
27
#include <linux/ratelimit.h>
28
#include <linux/crc32c.h>
29
#include <linux/btrfs.h>
30
#include "delayed-inode.h"
31
#include "ctree.h"
C
Chris Mason 已提交
32
#include "disk-io.h"
33
#include "transaction.h"
C
Chris Mason 已提交
34
#include "btrfs_inode.h"
C
Chris Mason 已提交
35
#include "print-tree.h"
36
#include "props.h"
J
Josef Bacik 已提交
37
#include "xattr.h"
38
#include "volumes.h"
B
Balaji Rao 已提交
39
#include "export.h"
C
Chris Mason 已提交
40
#include "compression.h"
J
Josef Bacik 已提交
41
#include "rcu-string.h"
42
#include "dev-replace.h"
43
#include "free-space-cache.h"
44
#include "backref.h"
45
#include "space-info.h"
46
#include "sysfs.h"
47
#include "tests/btrfs-tests.h"
48
#include "block-group.h"
49
#include "discard.h"
50

51
#include "qgroup.h"
52 53 54
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>

55
static const struct super_operations btrfs_super_ops;
56 57 58 59 60 61

/*
 * Types for mounting the default subvolume and a subvolume explicitly
 * requested by subvol=/path. That way the callchain is straightforward and we
 * don't have to play tricks with the mount options and recursive calls to
 * btrfs_mount.
62 63
 *
 * The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
64
 */
65
static struct file_system_type btrfs_fs_type;
66
static struct file_system_type btrfs_root_fs_type;
C
Chris Mason 已提交
67

68 69
static int btrfs_remount(struct super_block *sb, int *flags, char *data);

D
David Sterba 已提交
70
const char * __attribute_const__ btrfs_decode_error(int errno)
L
liubo 已提交
71
{
72
	char *errstr = "unknown";
L
liubo 已提交
73 74 75 76 77 78 79 80 81 82 83

	switch (errno) {
	case -EIO:
		errstr = "IO failure";
		break;
	case -ENOMEM:
		errstr = "Out of memory";
		break;
	case -EROFS:
		errstr = "Readonly filesystem";
		break;
J
Jeff Mahoney 已提交
84 85 86
	case -EEXIST:
		errstr = "Object already exists";
		break;
87 88 89 90 91 92
	case -ENOSPC:
		errstr = "No space left";
		break;
	case -ENOENT:
		errstr = "No such entry";
		break;
L
liubo 已提交
93 94 95 96 97 98
	}

	return errstr;
}

/*
99
 * __btrfs_handle_fs_error decodes expected errors from the caller and
100
 * invokes the appropriate error response.
L
liubo 已提交
101
 */
102
__cold
103
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
J
Jeff Mahoney 已提交
104
		       unsigned int line, int errno, const char *fmt, ...)
L
liubo 已提交
105 106
{
	struct super_block *sb = fs_info->sb;
107
#ifdef CONFIG_PRINTK
L
liubo 已提交
108
	const char *errstr;
109
#endif
L
liubo 已提交
110 111 112

	/*
	 * Special case: if the error is EROFS, and we're already
113
	 * under SB_RDONLY, then it is safe here.
L
liubo 已提交
114
	 */
115
	if (errno == -EROFS && sb_rdonly(sb))
J
Jeff Mahoney 已提交
116 117
  		return;

118
#ifdef CONFIG_PRINTK
119
	errstr = btrfs_decode_error(errno);
J
Jeff Mahoney 已提交
120
	if (fmt) {
121 122 123 124 125 126
		struct va_format vaf;
		va_list args;

		va_start(args, fmt);
		vaf.fmt = fmt;
		vaf.va = &args;
J
Jeff Mahoney 已提交
127

128
		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
129
			sb->s_id, function, line, errno, errstr, &vaf);
130
		va_end(args);
J
Jeff Mahoney 已提交
131
	} else {
132
		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
133
			sb->s_id, function, line, errno, errstr);
J
Jeff Mahoney 已提交
134
	}
135
#endif
L
liubo 已提交
136

A
Anand Jain 已提交
137 138 139 140 141 142
	/*
	 * Today we only save the error info to memory.  Long term we'll
	 * also send it down to the disk
	 */
	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);

J
Jeff Mahoney 已提交
143
	/* Don't go through full error handling during mount */
144 145 146 147 148 149
	if (!(sb->s_flags & SB_BORN))
		return;

	if (sb_rdonly(sb))
		return;

150 151
	btrfs_discard_stop(fs_info);

152 153 154 155 156 157 158 159
	/* btrfs handle error by forcing the filesystem readonly */
	sb->s_flags |= SB_RDONLY;
	btrfs_info(fs_info, "forced readonly");
	/*
	 * Note that a running device replace operation is not canceled here
	 * although there is no way to update the progress. It would add the
	 * risk of a deadlock, therefore the canceling is omitted. The only
	 * penalty is that some I/O remains active until the procedure
160
	 * completes. The next time when the filesystem is mounted writable
161 162
	 * again, the device replace operation continues.
	 */
J
Jeff Mahoney 已提交
163
}
L
liubo 已提交
164

165
#ifdef CONFIG_PRINTK
166
static const char * const logtypes[] = {
J
Jeff Mahoney 已提交
167 168 169 170 171 172 173 174 175 176
	"emergency",
	"alert",
	"critical",
	"error",
	"warning",
	"notice",
	"info",
	"debug",
};

177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192

/*
 * Use one ratelimit state per log level so that a flood of less important
 * messages doesn't cause more important ones to be dropped.
 */
static struct ratelimit_state printk_limits[] = {
	RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
};

193
void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
J
Jeff Mahoney 已提交
194
{
195
	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
J
Jeff Mahoney 已提交
196 197
	struct va_format vaf;
	va_list args;
198
	int kern_level;
199 200
	const char *type = logtypes[4];
	struct ratelimit_state *ratelimit = &printk_limits[4];
J
Jeff Mahoney 已提交
201 202 203

	va_start(args, fmt);

204
	while ((kern_level = printk_get_level(fmt)) != 0) {
205
		size_t size = printk_skip_level(fmt) - fmt;
206 207 208 209 210 211 212

		if (kern_level >= '0' && kern_level <= '7') {
			memcpy(lvl, fmt,  size);
			lvl[size] = '\0';
			type = logtypes[kern_level - '0'];
			ratelimit = &printk_limits[kern_level - '0'];
		}
213
		fmt += size;
214 215
	}

J
Jeff Mahoney 已提交
216 217
	vaf.fmt = fmt;
	vaf.va = &args;
218

219
	if (__ratelimit(ratelimit))
220 221
		printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
			fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
222 223 224 225

	va_end(args);
}
#endif
L
liubo 已提交
226

227 228 229 230 231 232 233 234 235 236 237 238 239
/*
 * We only mark the transaction aborted and then set the file system read-only.
 * This will prevent new transactions from starting or trying to join this
 * one.
 *
 * This means that error recovery at the call site is limited to freeing
 * any local memory allocations and passing the error code up without
 * further cleanup. The transaction should complete as it normally would
 * in the call path but will return -EIO.
 *
 * We'll complete the cleanup in btrfs_end_transaction and
 * btrfs_commit_transaction.
 */
240
__cold
241
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
242
			       const char *function,
243 244
			       unsigned int line, int errno)
{
245 246
	struct btrfs_fs_info *fs_info = trans->fs_info;

247
	WRITE_ONCE(trans->aborted, errno);
248 249
	/* Nothing used. The other threads that have joined this
	 * transaction may be able to continue. */
250
	if (!trans->dirty && list_empty(&trans->new_bgs)) {
251 252
		const char *errstr;

253
		errstr = btrfs_decode_error(errno);
254
		btrfs_warn(fs_info,
255 256
		           "%s:%d: Aborting unused transaction(%s).",
		           function, line, errstr);
L
liubo 已提交
257
		return;
258
	}
S
Seraphime Kirkovski 已提交
259
	WRITE_ONCE(trans->transaction->aborted, errno);
260
	/* Wake up anybody who may be waiting on this transaction */
261 262 263
	wake_up(&fs_info->transaction_wait);
	wake_up(&fs_info->transaction_blocked_wait);
	__btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
264
}
J
Jeff Mahoney 已提交
265 266 267 268
/*
 * __btrfs_panic decodes unexpected, fatal errors from the caller,
 * issues an alert, and either panics or BUGs, depending on mount options.
 */
269
__cold
J
Jeff Mahoney 已提交
270 271 272 273 274 275 276
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
		   unsigned int line, int errno, const char *fmt, ...)
{
	char *s_id = "<unknown>";
	const char *errstr;
	struct va_format vaf = { .fmt = fmt };
	va_list args;
L
liubo 已提交
277

J
Jeff Mahoney 已提交
278 279
	if (fs_info)
		s_id = fs_info->sb->s_id;
L
liubo 已提交
280

J
Jeff Mahoney 已提交
281 282 283
	va_start(args, fmt);
	vaf.va = &args;

284
	errstr = btrfs_decode_error(errno);
285
	if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
286 287
		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
			s_id, function, line, &vaf, errno, errstr);
J
Jeff Mahoney 已提交
288

289 290
	btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
		   function, line, &vaf, errno, errstr);
J
Jeff Mahoney 已提交
291 292
	va_end(args);
	/* Caller calls BUG() */
L
liubo 已提交
293 294
}

C
Chris Mason 已提交
295
static void btrfs_put_super(struct super_block *sb)
C
Chris Mason 已提交
296
{
297
	close_ctree(btrfs_sb(sb));
C
Chris Mason 已提交
298 299
}

300
enum {
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
	Opt_acl, Opt_noacl,
	Opt_clear_cache,
	Opt_commit_interval,
	Opt_compress,
	Opt_compress_force,
	Opt_compress_force_type,
	Opt_compress_type,
	Opt_degraded,
	Opt_device,
	Opt_fatal_errors,
	Opt_flushoncommit, Opt_noflushoncommit,
	Opt_inode_cache, Opt_noinode_cache,
	Opt_max_inline,
	Opt_barrier, Opt_nobarrier,
	Opt_datacow, Opt_nodatacow,
	Opt_datasum, Opt_nodatasum,
	Opt_defrag, Opt_nodefrag,
	Opt_discard, Opt_nodiscard,
319
	Opt_discard_mode,
320 321 322 323 324 325 326 327 328 329
	Opt_nologreplay,
	Opt_norecovery,
	Opt_ratio,
	Opt_rescan_uuid_tree,
	Opt_skip_balance,
	Opt_space_cache, Opt_no_space_cache,
	Opt_space_cache_version,
	Opt_ssd, Opt_nossd,
	Opt_ssd_spread, Opt_nossd_spread,
	Opt_subvol,
O
Omar Sandoval 已提交
330
	Opt_subvol_empty,
331 332 333 334 335 336 337 338 339 340 341 342 343
	Opt_subvolid,
	Opt_thread_pool,
	Opt_treelog, Opt_notreelog,
	Opt_usebackuproot,
	Opt_user_subvol_rm_allowed,

	/* Deprecated options */
	Opt_alloc_start,
	Opt_recovery,
	Opt_subvolrootid,

	/* Debugging options */
	Opt_check_integrity,
344
	Opt_check_integrity_including_extent_data,
345 346
	Opt_check_integrity_print_mask,
	Opt_enospc_debug, Opt_noenospc_debug,
347 348
#ifdef CONFIG_BTRFS_DEBUG
	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
J
Josef Bacik 已提交
349 350 351
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
	Opt_ref_verify,
352
#endif
353
	Opt_err,
354 355
};

D
David Sterba 已提交
356
static const match_table_t tokens = {
357 358 359 360
	{Opt_acl, "acl"},
	{Opt_noacl, "noacl"},
	{Opt_clear_cache, "clear_cache"},
	{Opt_commit_interval, "commit=%u"},
C
Chris Mason 已提交
361
	{Opt_compress, "compress"},
362
	{Opt_compress_type, "compress=%s"},
C
Chris Mason 已提交
363
	{Opt_compress_force, "compress-force"},
364
	{Opt_compress_force_type, "compress-force=%s"},
365 366 367
	{Opt_degraded, "degraded"},
	{Opt_device, "device=%s"},
	{Opt_fatal_errors, "fatal_errors=%s"},
368
	{Opt_flushoncommit, "flushoncommit"},
369
	{Opt_noflushoncommit, "noflushoncommit"},
370 371 372 373 374 375 376 377 378 379 380
	{Opt_inode_cache, "inode_cache"},
	{Opt_noinode_cache, "noinode_cache"},
	{Opt_max_inline, "max_inline=%s"},
	{Opt_barrier, "barrier"},
	{Opt_nobarrier, "nobarrier"},
	{Opt_datacow, "datacow"},
	{Opt_nodatacow, "nodatacow"},
	{Opt_datasum, "datasum"},
	{Opt_nodatasum, "nodatasum"},
	{Opt_defrag, "autodefrag"},
	{Opt_nodefrag, "noautodefrag"},
C
Christoph Hellwig 已提交
381
	{Opt_discard, "discard"},
382
	{Opt_discard_mode, "discard=%s"},
Q
Qu Wenruo 已提交
383
	{Opt_nodiscard, "nodiscard"},
384 385 386 387 388
	{Opt_nologreplay, "nologreplay"},
	{Opt_norecovery, "norecovery"},
	{Opt_ratio, "metadata_ratio=%u"},
	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
	{Opt_skip_balance, "skip_balance"},
389
	{Opt_space_cache, "space_cache"},
390
	{Opt_no_space_cache, "nospace_cache"},
391 392 393 394 395 396
	{Opt_space_cache_version, "space_cache=%s"},
	{Opt_ssd, "ssd"},
	{Opt_nossd, "nossd"},
	{Opt_ssd_spread, "ssd_spread"},
	{Opt_nossd_spread, "nossd_spread"},
	{Opt_subvol, "subvol=%s"},
O
Omar Sandoval 已提交
397
	{Opt_subvol_empty, "subvol="},
398 399 400 401
	{Opt_subvolid, "subvolid=%s"},
	{Opt_thread_pool, "thread_pool=%u"},
	{Opt_treelog, "treelog"},
	{Opt_notreelog, "notreelog"},
402
	{Opt_usebackuproot, "usebackuproot"},
403 404 405 406 407 408 409 410
	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},

	/* Deprecated options */
	{Opt_alloc_start, "alloc_start=%s"},
	{Opt_recovery, "recovery"},
	{Opt_subvolrootid, "subvolrootid=%d"},

	/* Debugging options */
411 412
	{Opt_check_integrity, "check_int"},
	{Opt_check_integrity_including_extent_data, "check_int_data"},
413
	{Opt_check_integrity_print_mask, "check_int_print_mask=%u"},
414 415
	{Opt_enospc_debug, "enospc_debug"},
	{Opt_noenospc_debug, "noenospc_debug"},
416 417 418 419
#ifdef CONFIG_BTRFS_DEBUG
	{Opt_fragment_data, "fragment=data"},
	{Opt_fragment_metadata, "fragment=metadata"},
	{Opt_fragment_all, "fragment=all"},
J
Josef Bacik 已提交
420 421 422
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
	{Opt_ref_verify, "ref_verify"},
423
#endif
J
Josef Bacik 已提交
424
	{Opt_err, NULL},
425 426
};

427 428 429
/*
 * Regular mount options parser.  Everything that is needed only when
 * reading in a new superblock is parsed here.
430
 * XXX JDM: This needs to be cleaned up for remount.
431
 */
432
int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
433
			unsigned long new_flags)
434 435
{
	substring_t args[MAX_OPT_ARGS];
436
	char *p, *num;
437
	u64 cache_gen;
438
	int intarg;
S
Sage Weil 已提交
439
	int ret = 0;
440 441
	char *compress_type;
	bool compress_force = false;
442 443 444
	enum btrfs_compression_type saved_compress_type;
	bool saved_compress_force;
	int no_compress = 0;
445

446 447
	cache_gen = btrfs_super_cache_generation(info->super_copy);
	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
448 449
		btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
	else if (cache_gen)
450 451
		btrfs_set_opt(info->mount_opt, SPACE_CACHE);

452 453 454 455
	/*
	 * Even the options are empty, we still need to do extra check
	 * against new flags
	 */
456
	if (!options)
457
		goto check;
458

459
	while ((p = strsep(&options, ",")) != NULL) {
460 461 462 463 464 465
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
466
		case Opt_degraded:
467
			btrfs_info(info, "allowing degraded mounts");
468
			btrfs_set_opt(info->mount_opt, DEGRADED);
469
			break;
470
		case Opt_subvol:
O
Omar Sandoval 已提交
471
		case Opt_subvol_empty:
472
		case Opt_subvolid:
473
		case Opt_subvolrootid:
474
		case Opt_device:
475
			/*
476 477
			 * These are parsed by btrfs_parse_subvol_options or
			 * btrfs_parse_device_options and can be ignored here.
478
			 */
479 480
			break;
		case Opt_nodatasum:
481
			btrfs_set_and_info(info, NODATASUM,
482
					   "setting nodatasum");
483
			break;
Q
Qu Wenruo 已提交
484
		case Opt_datasum:
485 486
			if (btrfs_test_opt(info, NODATASUM)) {
				if (btrfs_test_opt(info, NODATACOW))
487
					btrfs_info(info,
J
Jeff Mahoney 已提交
488
						   "setting datasum, datacow enabled");
489
				else
490
					btrfs_info(info, "setting datasum");
491
			}
Q
Qu Wenruo 已提交
492 493 494
			btrfs_clear_opt(info->mount_opt, NODATACOW);
			btrfs_clear_opt(info->mount_opt, NODATASUM);
			break;
495
		case Opt_nodatacow:
496 497 498
			if (!btrfs_test_opt(info, NODATACOW)) {
				if (!btrfs_test_opt(info, COMPRESS) ||
				    !btrfs_test_opt(info, FORCE_COMPRESS)) {
499
					btrfs_info(info,
500 501
						   "setting nodatacow, compression disabled");
				} else {
502
					btrfs_info(info, "setting nodatacow");
503
				}
504 505 506
			}
			btrfs_clear_opt(info->mount_opt, COMPRESS);
			btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
507 508
			btrfs_set_opt(info->mount_opt, NODATACOW);
			btrfs_set_opt(info->mount_opt, NODATASUM);
509
			break;
Q
Qu Wenruo 已提交
510
		case Opt_datacow:
511
			btrfs_clear_and_info(info, NODATACOW,
512
					     "setting datacow");
Q
Qu Wenruo 已提交
513
			break;
C
Chris Mason 已提交
514
		case Opt_compress_force:
515 516
		case Opt_compress_force_type:
			compress_force = true;
517
			/* Fallthrough */
518 519
		case Opt_compress:
		case Opt_compress_type:
520 521
			saved_compress_type = btrfs_test_opt(info,
							     COMPRESS) ?
522 523
				info->compress_type : BTRFS_COMPRESS_NONE;
			saved_compress_force =
524
				btrfs_test_opt(info, FORCE_COMPRESS);
525 526
			if (token == Opt_compress ||
			    token == Opt_compress_force ||
527
			    strncmp(args[0].from, "zlib", 4) == 0) {
528
				compress_type = "zlib";
529

530
				info->compress_type = BTRFS_COMPRESS_ZLIB;
531 532 533 534 535 536 537 538 539
				info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
				/*
				 * args[0] contains uninitialized data since
				 * for these tokens we don't expect any
				 * parameter.
				 */
				if (token != Opt_compress &&
				    token != Opt_compress_force)
					info->compress_level =
540 541 542
					  btrfs_compress_str2level(
							BTRFS_COMPRESS_ZLIB,
							args[0].from + 4);
543
				btrfs_set_opt(info->mount_opt, COMPRESS);
544 545
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
546
				no_compress = 0;
547
			} else if (strncmp(args[0].from, "lzo", 3) == 0) {
L
Li Zefan 已提交
548 549
				compress_type = "lzo";
				info->compress_type = BTRFS_COMPRESS_LZO;
550
				btrfs_set_opt(info->mount_opt, COMPRESS);
551 552
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
553
				btrfs_set_fs_incompat(info, COMPRESS_LZO);
554
				no_compress = 0;
555
			} else if (strncmp(args[0].from, "zstd", 4) == 0) {
N
Nick Terrell 已提交
556 557
				compress_type = "zstd";
				info->compress_type = BTRFS_COMPRESS_ZSTD;
558 559 560 561
				info->compress_level =
					btrfs_compress_str2level(
							 BTRFS_COMPRESS_ZSTD,
							 args[0].from + 4);
N
Nick Terrell 已提交
562 563 564 565 566
				btrfs_set_opt(info->mount_opt, COMPRESS);
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
				btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
				no_compress = 0;
567 568 569 570 571
			} else if (strncmp(args[0].from, "no", 2) == 0) {
				compress_type = "no";
				btrfs_clear_opt(info->mount_opt, COMPRESS);
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				compress_force = false;
572
				no_compress++;
573 574 575 576 577 578
			} else {
				ret = -EINVAL;
				goto out;
			}

			if (compress_force) {
579
				btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
580
			} else {
581 582 583 584 585 586 587
				/*
				 * If we remount from compress-force=xxx to
				 * compress=xxx, we need clear FORCE_COMPRESS
				 * flag, otherwise, there is no way for users
				 * to disable forcible compression separately.
				 */
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
588
			}
589
			if ((btrfs_test_opt(info, COMPRESS) &&
590 591
			     (info->compress_type != saved_compress_type ||
			      compress_force != saved_compress_force)) ||
592
			    (!btrfs_test_opt(info, COMPRESS) &&
593
			     no_compress == 1)) {
594
				btrfs_info(info, "%s %s compression, level %d",
595
					   (compress_force) ? "force" : "use",
596
					   compress_type, info->compress_level);
597 598
			}
			compress_force = false;
C
Chris Mason 已提交
599
			break;
600
		case Opt_ssd:
601
			btrfs_set_and_info(info, SSD,
602
					   "enabling ssd optimizations");
603
			btrfs_clear_opt(info->mount_opt, NOSSD);
604
			break;
605
		case Opt_ssd_spread:
606 607
			btrfs_set_and_info(info, SSD,
					   "enabling ssd optimizations");
608
			btrfs_set_and_info(info, SSD_SPREAD,
609
					   "using spread ssd allocation scheme");
610
			btrfs_clear_opt(info->mount_opt, NOSSD);
611
			break;
C
Chris Mason 已提交
612
		case Opt_nossd:
613 614 615
			btrfs_set_opt(info->mount_opt, NOSSD);
			btrfs_clear_and_info(info, SSD,
					     "not using ssd optimizations");
616 617
			/* Fallthrough */
		case Opt_nossd_spread:
618 619
			btrfs_clear_and_info(info, SSD_SPREAD,
					     "not using spread ssd allocation scheme");
C
Chris Mason 已提交
620
			break;
621
		case Opt_barrier:
622
			btrfs_clear_and_info(info, NOBARRIER,
623
					     "turning on barriers");
624
			break;
625
		case Opt_nobarrier:
626
			btrfs_set_and_info(info, NOBARRIER,
627
					   "turning off barriers");
628
			break;
629
		case Opt_thread_pool:
630 631 632
			ret = match_int(&args[0], &intarg);
			if (ret) {
				goto out;
633
			} else if (intarg == 0) {
634 635 636
				ret = -EINVAL;
				goto out;
			}
637
			info->thread_pool_size = intarg;
638
			break;
639
		case Opt_max_inline:
640 641
			num = match_strdup(&args[0]);
			if (num) {
A
Akinobu Mita 已提交
642
				info->max_inline = memparse(num, NULL);
643 644
				kfree(num);

C
Chris Mason 已提交
645
				if (info->max_inline) {
646
					info->max_inline = min_t(u64,
C
Chris Mason 已提交
647
						info->max_inline,
648
						info->sectorsize);
C
Chris Mason 已提交
649
				}
650 651
				btrfs_info(info, "max_inline at %llu",
					   info->max_inline);
652 653 654
			} else {
				ret = -ENOMEM;
				goto out;
655 656
			}
			break;
657
		case Opt_alloc_start:
658 659
			btrfs_info(info,
				"option alloc_start is obsolete, ignored");
660
			break;
Q
Qu Wenruo 已提交
661
		case Opt_acl:
662
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
663
			info->sb->s_flags |= SB_POSIXACL;
Q
Qu Wenruo 已提交
664
			break;
665
#else
666
			btrfs_err(info, "support for ACL not compiled in!");
667 668 669
			ret = -EINVAL;
			goto out;
#endif
J
Josef Bacik 已提交
670
		case Opt_noacl:
671
			info->sb->s_flags &= ~SB_POSIXACL;
J
Josef Bacik 已提交
672
			break;
S
Sage Weil 已提交
673
		case Opt_notreelog:
674
			btrfs_set_and_info(info, NOTREELOG,
675
					   "disabling tree log");
Q
Qu Wenruo 已提交
676 677
			break;
		case Opt_treelog:
678
			btrfs_clear_and_info(info, NOTREELOG,
679
					     "enabling tree log");
S
Sage Weil 已提交
680
			break;
681
		case Opt_norecovery:
682
		case Opt_nologreplay:
683
			btrfs_set_and_info(info, NOLOGREPLAY,
684 685
					   "disabling log replay at mount time");
			break;
686
		case Opt_flushoncommit:
687
			btrfs_set_and_info(info, FLUSHONCOMMIT,
688
					   "turning on flush-on-commit");
689
			break;
690
		case Opt_noflushoncommit:
691
			btrfs_clear_and_info(info, FLUSHONCOMMIT,
692
					     "turning off flush-on-commit");
693
			break;
694
		case Opt_ratio:
695
			ret = match_int(&args[0], &intarg);
696
			if (ret)
697
				goto out;
698 699 700
			info->metadata_ratio = intarg;
			btrfs_info(info, "metadata ratio %u",
				   info->metadata_ratio);
701
			break;
C
Christoph Hellwig 已提交
702
		case Opt_discard:
703 704 705 706 707 708 709 710 711 712 713 714 715 716
		case Opt_discard_mode:
			if (token == Opt_discard ||
			    strcmp(args[0].from, "sync") == 0) {
				btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC);
				btrfs_set_and_info(info, DISCARD_SYNC,
						   "turning on sync discard");
			} else if (strcmp(args[0].from, "async") == 0) {
				btrfs_clear_opt(info->mount_opt, DISCARD_SYNC);
				btrfs_set_and_info(info, DISCARD_ASYNC,
						   "turning on async discard");
			} else {
				ret = -EINVAL;
				goto out;
			}
C
Christoph Hellwig 已提交
717
			break;
Q
Qu Wenruo 已提交
718
		case Opt_nodiscard:
719
			btrfs_clear_and_info(info, DISCARD_SYNC,
720
					     "turning off discard");
721 722
			btrfs_clear_and_info(info, DISCARD_ASYNC,
					     "turning off async discard");
Q
Qu Wenruo 已提交
723
			break;
724
		case Opt_space_cache:
725 726 727
		case Opt_space_cache_version:
			if (token == Opt_space_cache ||
			    strcmp(args[0].from, "v1") == 0) {
728
				btrfs_clear_opt(info->mount_opt,
729
						FREE_SPACE_TREE);
730
				btrfs_set_and_info(info, SPACE_CACHE,
731
					   "enabling disk space caching");
732
			} else if (strcmp(args[0].from, "v2") == 0) {
733
				btrfs_clear_opt(info->mount_opt,
734
						SPACE_CACHE);
735
				btrfs_set_and_info(info, FREE_SPACE_TREE,
736 737 738 739 740
						   "enabling free space tree");
			} else {
				ret = -EINVAL;
				goto out;
			}
741
			break;
742 743 744
		case Opt_rescan_uuid_tree:
			btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
			break;
745
		case Opt_no_space_cache:
746
			if (btrfs_test_opt(info, SPACE_CACHE)) {
747 748
				btrfs_clear_and_info(info, SPACE_CACHE,
					     "disabling disk space caching");
749
			}
750
			if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
751 752
				btrfs_clear_and_info(info, FREE_SPACE_TREE,
					     "disabling free space tree");
753
			}
754
			break;
C
Chris Mason 已提交
755
		case Opt_inode_cache:
756
			btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
757
					   "enabling inode map caching");
758 759
			break;
		case Opt_noinode_cache:
760
			btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
761
					     "disabling inode map caching");
C
Chris Mason 已提交
762
			break;
763
		case Opt_clear_cache:
764
			btrfs_set_and_info(info, CLEAR_CACHE,
765
					   "force clearing of disk cache");
766
			break;
767 768 769
		case Opt_user_subvol_rm_allowed:
			btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
			break;
770 771 772
		case Opt_enospc_debug:
			btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
773 774 775
		case Opt_noenospc_debug:
			btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
C
Chris Mason 已提交
776
		case Opt_defrag:
777
			btrfs_set_and_info(info, AUTO_DEFRAG,
778
					   "enabling auto defrag");
C
Chris Mason 已提交
779
			break;
780
		case Opt_nodefrag:
781
			btrfs_clear_and_info(info, AUTO_DEFRAG,
782
					     "disabling auto defrag");
783
			break;
C
Chris Mason 已提交
784
		case Opt_recovery:
785
			btrfs_warn(info,
786
				   "'recovery' is deprecated, use 'usebackuproot' instead");
787
			/* fall through */
788
		case Opt_usebackuproot:
789
			btrfs_info(info,
790 791
				   "trying to use backup root at mount time");
			btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
C
Chris Mason 已提交
792
			break;
793 794 795
		case Opt_skip_balance:
			btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
			break;
796 797
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
		case Opt_check_integrity_including_extent_data:
798
			btrfs_info(info,
799
				   "enabling check integrity including extent data");
800 801 802 803 804
			btrfs_set_opt(info->mount_opt,
				      CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity:
805
			btrfs_info(info, "enabling check integrity");
806 807 808
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity_print_mask:
809
			ret = match_int(&args[0], &intarg);
810
			if (ret)
811
				goto out;
812 813 814
			info->check_integrity_print_mask = intarg;
			btrfs_info(info, "check_integrity_print_mask 0x%x",
				   info->check_integrity_print_mask);
815 816 817 818 819
			break;
#else
		case Opt_check_integrity_including_extent_data:
		case Opt_check_integrity:
		case Opt_check_integrity_print_mask:
820 821
			btrfs_err(info,
				  "support for check_integrity* not compiled in!");
822 823 824
			ret = -EINVAL;
			goto out;
#endif
J
Jeff Mahoney 已提交
825 826 827 828 829 830 831 832 833 834 835 836
		case Opt_fatal_errors:
			if (strcmp(args[0].from, "panic") == 0)
				btrfs_set_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else if (strcmp(args[0].from, "bug") == 0)
				btrfs_clear_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else {
				ret = -EINVAL;
				goto out;
			}
			break;
837 838 839
		case Opt_commit_interval:
			intarg = 0;
			ret = match_int(&args[0], &intarg);
840
			if (ret)
841
				goto out;
842
			if (intarg == 0) {
843
				btrfs_info(info,
844
					   "using default commit interval %us",
J
Jeff Mahoney 已提交
845
					   BTRFS_DEFAULT_COMMIT_INTERVAL);
846 847 848 849
				intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
			} else if (intarg > 300) {
				btrfs_warn(info, "excessive commit interval %d",
					   intarg);
850
			}
851
			info->commit_interval = intarg;
852
			break;
853 854
#ifdef CONFIG_BTRFS_DEBUG
		case Opt_fragment_all:
855
			btrfs_info(info, "fragmenting all space");
856 857 858 859
			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
			break;
		case Opt_fragment_metadata:
860
			btrfs_info(info, "fragmenting metadata");
861 862 863 864
			btrfs_set_opt(info->mount_opt,
				      FRAGMENT_METADATA);
			break;
		case Opt_fragment_data:
865
			btrfs_info(info, "fragmenting data");
866 867
			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			break;
J
Josef Bacik 已提交
868 869 870 871 872 873
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
		case Opt_ref_verify:
			btrfs_info(info, "doing ref verification");
			btrfs_set_opt(info->mount_opt, REF_VERIFY);
			break;
874
#endif
S
Sage Weil 已提交
875
		case Opt_err:
876
			btrfs_err(info, "unrecognized mount option '%s'", p);
S
Sage Weil 已提交
877 878
			ret = -EINVAL;
			goto out;
879
		default:
880
			break;
881 882
		}
	}
883 884 885 886
check:
	/*
	 * Extra check for current option against current flag
	 */
887
	if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & SB_RDONLY)) {
888
		btrfs_err(info,
889 890 891
			  "nologreplay must be used with ro mount option");
		ret = -EINVAL;
	}
S
Sage Weil 已提交
892
out:
893
	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
894 895
	    !btrfs_test_opt(info, FREE_SPACE_TREE) &&
	    !btrfs_test_opt(info, CLEAR_CACHE)) {
896
		btrfs_err(info, "cannot disable free space tree");
897 898 899
		ret = -EINVAL;

	}
900
	if (!ret && btrfs_test_opt(info, SPACE_CACHE))
901
		btrfs_info(info, "disk space caching is enabled");
902
	if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
903
		btrfs_info(info, "using free space tree");
S
Sage Weil 已提交
904
	return ret;
905 906 907 908 909 910 911 912
}

/*
 * Parse mount options that are required early in the mount process.
 *
 * All other options will be parsed on much later in the mount process and
 * only when we need to allocate a new super block.
 */
913 914
static int btrfs_parse_device_options(const char *options, fmode_t flags,
				      void *holder)
915 916
{
	substring_t args[MAX_OPT_ARGS];
917
	char *device_name, *opts, *orig, *p;
918
	struct btrfs_device *device = NULL;
919 920
	int error = 0;

921 922
	lockdep_assert_held(&uuid_mutex);

923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
	if (!options)
		return 0;

	/*
	 * strsep changes the string, duplicate it because btrfs_parse_options
	 * gets called later
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
	orig = opts;

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;

		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		if (token == Opt_device) {
			device_name = match_strdup(&args[0]);
			if (!device_name) {
				error = -ENOMEM;
				goto out;
			}
948 949
			device = btrfs_scan_one_device(device_name, flags,
					holder);
950
			kfree(device_name);
951 952
			if (IS_ERR(device)) {
				error = PTR_ERR(device);
953
				goto out;
954
			}
955 956 957 958 959 960 961 962 963 964 965 966 967
		}
	}

out:
	kfree(orig);
	return error;
}

/*
 * Parse mount options that are related to subvolume id
 *
 * The value is later passed to mount_subvol()
 */
968 969
static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
		u64 *subvol_objectid)
970 971 972
{
	substring_t args[MAX_OPT_ARGS];
	char *opts, *orig, *p;
973
	int error = 0;
974
	u64 subvolid;
975 976

	if (!options)
977
		return 0;
978 979

	/*
980
	 * strsep changes the string, duplicate it because
981
	 * btrfs_parse_device_options gets called later
982 983 984 985
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
986
	orig = opts;
987 988 989 990 991 992 993 994 995

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
		case Opt_subvol:
996
			kfree(*subvol_name);
997
			*subvol_name = match_strdup(&args[0]);
998 999 1000 1001
			if (!*subvol_name) {
				error = -ENOMEM;
				goto out;
			}
1002
			break;
1003
		case Opt_subvolid:
1004 1005
			error = match_u64(&args[0], &subvolid);
			if (error)
1006
				goto out;
1007 1008 1009 1010 1011 1012

			/* we want the original fs_tree */
			if (subvolid == 0)
				subvolid = BTRFS_FS_TREE_OBJECTID;

			*subvol_objectid = subvolid;
1013
			break;
1014
		case Opt_subvolrootid:
1015
			pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
1016
			break;
1017 1018 1019 1020 1021
		default:
			break;
		}
	}

1022
out:
1023
	kfree(orig);
1024
	return error;
1025 1026
}

1027 1028
char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
					  u64 subvol_objectid)
1029
{
1030
	struct btrfs_root *root = fs_info->tree_root;
1031
	struct btrfs_root *fs_root = NULL;
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
	struct btrfs_root_ref *root_ref;
	struct btrfs_inode_ref *inode_ref;
	struct btrfs_key key;
	struct btrfs_path *path = NULL;
	char *name = NULL, *ptr;
	u64 dirid;
	int len;
	int ret;

	path = btrfs_alloc_path();
	if (!path) {
		ret = -ENOMEM;
		goto err;
	}
	path->leave_spinning = 1;

1048
	name = kmalloc(PATH_MAX, GFP_KERNEL);
1049 1050 1051 1052 1053 1054
	if (!name) {
		ret = -ENOMEM;
		goto err;
	}
	ptr = name + PATH_MAX - 1;
	ptr[0] = '\0';
1055 1056

	/*
1057 1058
	 * Walk up the subvolume trees in the tree of tree roots by root
	 * backrefs until we hit the top-level subvolume.
1059
	 */
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
	while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
		key.objectid = subvol_objectid;
		key.type = BTRFS_ROOT_BACKREF_KEY;
		key.offset = (u64)-1;

		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
		if (ret < 0) {
			goto err;
		} else if (ret > 0) {
			ret = btrfs_previous_item(root, path, subvol_objectid,
						  BTRFS_ROOT_BACKREF_KEY);
			if (ret < 0) {
				goto err;
			} else if (ret > 0) {
				ret = -ENOENT;
				goto err;
			}
		}

		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
		subvol_objectid = key.offset;

		root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
					  struct btrfs_root_ref);
		len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
		ptr -= len + 1;
		if (ptr < name) {
			ret = -ENAMETOOLONG;
			goto err;
		}
		read_extent_buffer(path->nodes[0], ptr + 1,
				   (unsigned long)(root_ref + 1), len);
		ptr[0] = '/';
		dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
		btrfs_release_path(path);

		key.objectid = subvol_objectid;
		key.type = BTRFS_ROOT_ITEM_KEY;
		key.offset = (u64)-1;
1099
		fs_root = btrfs_get_fs_root(fs_info, &key, true);
1100 1101
		if (IS_ERR(fs_root)) {
			ret = PTR_ERR(fs_root);
1102 1103 1104
			fs_root = NULL;
			goto err;
		}
1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146

		/*
		 * Walk up the filesystem tree by inode refs until we hit the
		 * root directory.
		 */
		while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
			key.objectid = dirid;
			key.type = BTRFS_INODE_REF_KEY;
			key.offset = (u64)-1;

			ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
			if (ret < 0) {
				goto err;
			} else if (ret > 0) {
				ret = btrfs_previous_item(fs_root, path, dirid,
							  BTRFS_INODE_REF_KEY);
				if (ret < 0) {
					goto err;
				} else if (ret > 0) {
					ret = -ENOENT;
					goto err;
				}
			}

			btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
			dirid = key.offset;

			inode_ref = btrfs_item_ptr(path->nodes[0],
						   path->slots[0],
						   struct btrfs_inode_ref);
			len = btrfs_inode_ref_name_len(path->nodes[0],
						       inode_ref);
			ptr -= len + 1;
			if (ptr < name) {
				ret = -ENAMETOOLONG;
				goto err;
			}
			read_extent_buffer(path->nodes[0], ptr + 1,
					   (unsigned long)(inode_ref + 1), len);
			ptr[0] = '/';
			btrfs_release_path(path);
		}
1147
		btrfs_put_root(fs_root);
1148
		fs_root = NULL;
1149 1150
	}

1151 1152 1153 1154 1155 1156 1157 1158 1159 1160
	btrfs_free_path(path);
	if (ptr == name + PATH_MAX - 1) {
		name[0] = '/';
		name[1] = '\0';
	} else {
		memmove(name, ptr, name + PATH_MAX - ptr);
	}
	return name;

err:
1161
	btrfs_put_root(fs_root);
1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
	btrfs_free_path(path);
	kfree(name);
	return ERR_PTR(ret);
}

static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
{
	struct btrfs_root *root = fs_info->tree_root;
	struct btrfs_dir_item *di;
	struct btrfs_path *path;
	struct btrfs_key location;
	u64 dir_id;

1175 1176
	path = btrfs_alloc_path();
	if (!path)
1177
		return -ENOMEM;
1178 1179 1180 1181 1182 1183 1184
	path->leave_spinning = 1;

	/*
	 * Find the "default" dir item which points to the root item that we
	 * will mount by default if we haven't been given a specific subvolume
	 * to mount.
	 */
1185
	dir_id = btrfs_super_root_dir(fs_info->super_copy);
1186
	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
1187 1188
	if (IS_ERR(di)) {
		btrfs_free_path(path);
1189
		return PTR_ERR(di);
1190
	}
1191 1192 1193 1194
	if (!di) {
		/*
		 * Ok the default dir item isn't there.  This is weird since
		 * it's always been there, but don't freak out, just try and
1195
		 * mount the top-level subvolume.
1196 1197
		 */
		btrfs_free_path(path);
1198 1199
		*objectid = BTRFS_FS_TREE_OBJECTID;
		return 0;
1200 1201 1202 1203
	}

	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
	btrfs_free_path(path);
1204 1205
	*objectid = location.objectid;
	return 0;
1206 1207
}

C
Chris Mason 已提交
1208
static int btrfs_fill_super(struct super_block *sb,
1209
			    struct btrfs_fs_devices *fs_devices,
1210
			    void *data)
C
Chris Mason 已提交
1211
{
C
Chris Mason 已提交
1212
	struct inode *inode;
1213
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1214
	struct btrfs_key key;
C
Chris Mason 已提交
1215
	int err;
1216

C
Chris Mason 已提交
1217 1218 1219
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_magic = BTRFS_SUPER_MAGIC;
	sb->s_op = &btrfs_super_ops;
A
Al Viro 已提交
1220
	sb->s_d_op = &btrfs_dentry_operations;
B
Balaji Rao 已提交
1221
	sb->s_export_op = &btrfs_export_ops;
J
Josef Bacik 已提交
1222
	sb->s_xattr = btrfs_xattr_handlers;
C
Chris Mason 已提交
1223
	sb->s_time_gran = 1;
C
Chris Mason 已提交
1224
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
1225
	sb->s_flags |= SB_POSIXACL;
1226
#endif
M
Matthew Garrett 已提交
1227
	sb->s_flags |= SB_I_VERSION;
1228
	sb->s_iflags |= SB_I_CGROUPWB;
1229 1230 1231 1232 1233 1234 1235

	err = super_setup_bdi(sb);
	if (err) {
		btrfs_err(fs_info, "super_setup_bdi failed");
		return err;
	}

A
Al Viro 已提交
1236 1237
	err = open_ctree(sb, fs_devices, (char *)data);
	if (err) {
1238
		btrfs_err(fs_info, "open_ctree failed");
A
Al Viro 已提交
1239
		return err;
1240 1241
	}

1242 1243 1244
	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
	key.type = BTRFS_INODE_ITEM_KEY;
	key.offset = 0;
1245
	inode = btrfs_iget(sb, &key, fs_info->fs_root);
1246 1247
	if (IS_ERR(inode)) {
		err = PTR_ERR(inode);
C
Chris Mason 已提交
1248
		goto fail_close;
C
Chris Mason 已提交
1249 1250
	}

1251 1252
	sb->s_root = d_make_root(inode);
	if (!sb->s_root) {
C
Chris Mason 已提交
1253 1254
		err = -ENOMEM;
		goto fail_close;
C
Chris Mason 已提交
1255
	}
1256

D
Dan Magenheimer 已提交
1257
	cleancache_init_fs(sb);
1258
	sb->s_flags |= SB_ACTIVE;
C
Chris Mason 已提交
1259
	return 0;
C
Chris Mason 已提交
1260 1261

fail_close:
1262
	close_ctree(fs_info);
C
Chris Mason 已提交
1263
	return err;
C
Chris Mason 已提交
1264 1265
}

S
Sage Weil 已提交
1266
int btrfs_sync_fs(struct super_block *sb, int wait)
C
Chris Mason 已提交
1267 1268
{
	struct btrfs_trans_handle *trans;
1269 1270
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
C
Chris Mason 已提交
1271

1272
	trace_btrfs_sync_fs(fs_info, wait);
1273

C
Chris Mason 已提交
1274
	if (!wait) {
1275
		filemap_flush(fs_info->btree_inode->i_mapping);
C
Chris Mason 已提交
1276 1277
		return 0;
	}
1278

1279
	btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
1280

M
Miao Xie 已提交
1281
	trans = btrfs_attach_transaction_barrier(root);
1282
	if (IS_ERR(trans)) {
1283
		/* no transaction, don't bother */
1284 1285 1286 1287 1288 1289 1290
		if (PTR_ERR(trans) == -ENOENT) {
			/*
			 * Exit unless we have some pending changes
			 * that need to go through commit
			 */
			if (fs_info->pending_changes == 0)
				return 0;
1291 1292 1293 1294 1295 1296
			/*
			 * A non-blocking test if the fs is frozen. We must not
			 * start a new transaction here otherwise a deadlock
			 * happens. The pending operations are delayed to the
			 * next commit after thawing.
			 */
1297 1298
			if (sb_start_write_trylock(sb))
				sb_end_write(sb);
1299 1300
			else
				return 0;
1301 1302
			trans = btrfs_start_transaction(root, 0);
		}
1303 1304
		if (IS_ERR(trans))
			return PTR_ERR(trans);
1305
	}
1306
	return btrfs_commit_transaction(trans);
C
Chris Mason 已提交
1307 1308
}

1309
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
E
Eric Paris 已提交
1310
{
1311
	struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
1312
	const char *compress_type;
E
Eric Paris 已提交
1313

1314
	if (btrfs_test_opt(info, DEGRADED))
E
Eric Paris 已提交
1315
		seq_puts(seq, ",degraded");
1316
	if (btrfs_test_opt(info, NODATASUM))
E
Eric Paris 已提交
1317
		seq_puts(seq, ",nodatasum");
1318
	if (btrfs_test_opt(info, NODATACOW))
E
Eric Paris 已提交
1319
		seq_puts(seq, ",nodatacow");
1320
	if (btrfs_test_opt(info, NOBARRIER))
E
Eric Paris 已提交
1321
		seq_puts(seq, ",nobarrier");
1322
	if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
1323
		seq_printf(seq, ",max_inline=%llu", info->max_inline);
E
Eric Paris 已提交
1324 1325
	if (info->thread_pool_size !=  min_t(unsigned long,
					     num_online_cpus() + 2, 8))
1326
		seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
1327
	if (btrfs_test_opt(info, COMPRESS)) {
1328
		compress_type = btrfs_compress_type2str(info->compress_type);
1329
		if (btrfs_test_opt(info, FORCE_COMPRESS))
T
Tsutomu Itoh 已提交
1330 1331 1332
			seq_printf(seq, ",compress-force=%s", compress_type);
		else
			seq_printf(seq, ",compress=%s", compress_type);
1333
		if (info->compress_level)
1334
			seq_printf(seq, ":%d", info->compress_level);
T
Tsutomu Itoh 已提交
1335
	}
1336
	if (btrfs_test_opt(info, NOSSD))
C
Chris Mason 已提交
1337
		seq_puts(seq, ",nossd");
1338
	if (btrfs_test_opt(info, SSD_SPREAD))
1339
		seq_puts(seq, ",ssd_spread");
1340
	else if (btrfs_test_opt(info, SSD))
E
Eric Paris 已提交
1341
		seq_puts(seq, ",ssd");
1342
	if (btrfs_test_opt(info, NOTREELOG))
1343
		seq_puts(seq, ",notreelog");
1344
	if (btrfs_test_opt(info, NOLOGREPLAY))
1345
		seq_puts(seq, ",nologreplay");
1346
	if (btrfs_test_opt(info, FLUSHONCOMMIT))
1347
		seq_puts(seq, ",flushoncommit");
1348
	if (btrfs_test_opt(info, DISCARD_SYNC))
1349
		seq_puts(seq, ",discard");
1350 1351
	if (btrfs_test_opt(info, DISCARD_ASYNC))
		seq_puts(seq, ",discard=async");
1352
	if (!(info->sb->s_flags & SB_POSIXACL))
E
Eric Paris 已提交
1353
		seq_puts(seq, ",noacl");
1354
	if (btrfs_test_opt(info, SPACE_CACHE))
T
Tsutomu Itoh 已提交
1355
		seq_puts(seq, ",space_cache");
1356
	else if (btrfs_test_opt(info, FREE_SPACE_TREE))
1357
		seq_puts(seq, ",space_cache=v2");
1358
	else
1359
		seq_puts(seq, ",nospace_cache");
1360
	if (btrfs_test_opt(info, RESCAN_UUID_TREE))
1361
		seq_puts(seq, ",rescan_uuid_tree");
1362
	if (btrfs_test_opt(info, CLEAR_CACHE))
T
Tsutomu Itoh 已提交
1363
		seq_puts(seq, ",clear_cache");
1364
	if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
T
Tsutomu Itoh 已提交
1365
		seq_puts(seq, ",user_subvol_rm_allowed");
1366
	if (btrfs_test_opt(info, ENOSPC_DEBUG))
1367
		seq_puts(seq, ",enospc_debug");
1368
	if (btrfs_test_opt(info, AUTO_DEFRAG))
1369
		seq_puts(seq, ",autodefrag");
1370
	if (btrfs_test_opt(info, INODE_MAP_CACHE))
1371
		seq_puts(seq, ",inode_cache");
1372
	if (btrfs_test_opt(info, SKIP_BALANCE))
1373
		seq_puts(seq, ",skip_balance");
1374
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1375
	if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
1376
		seq_puts(seq, ",check_int_data");
1377
	else if (btrfs_test_opt(info, CHECK_INTEGRITY))
1378 1379 1380 1381 1382 1383
		seq_puts(seq, ",check_int");
	if (info->check_integrity_print_mask)
		seq_printf(seq, ",check_int_print_mask=%d",
				info->check_integrity_print_mask);
#endif
	if (info->metadata_ratio)
1384
		seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
1385
	if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
J
Jeff Mahoney 已提交
1386
		seq_puts(seq, ",fatal_errors=panic");
1387
	if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1388
		seq_printf(seq, ",commit=%u", info->commit_interval);
1389
#ifdef CONFIG_BTRFS_DEBUG
1390
	if (btrfs_test_opt(info, FRAGMENT_DATA))
1391
		seq_puts(seq, ",fragment=data");
1392
	if (btrfs_test_opt(info, FRAGMENT_METADATA))
1393 1394
		seq_puts(seq, ",fragment=metadata");
#endif
J
Josef Bacik 已提交
1395 1396
	if (btrfs_test_opt(info, REF_VERIFY))
		seq_puts(seq, ",ref_verify");
1397 1398 1399 1400
	seq_printf(seq, ",subvolid=%llu",
		  BTRFS_I(d_inode(dentry))->root->root_key.objectid);
	seq_puts(seq, ",subvol=");
	seq_dentry(seq, dentry, " \t\n\\");
E
Eric Paris 已提交
1401 1402 1403
	return 0;
}

1404
static int btrfs_test_super(struct super_block *s, void *data)
Y
Yan 已提交
1405
{
1406 1407
	struct btrfs_fs_info *p = data;
	struct btrfs_fs_info *fs_info = btrfs_sb(s);
Y
Yan 已提交
1408

1409
	return fs_info->fs_devices == p->fs_devices;
Y
Yan 已提交
1410 1411
}

1412 1413
static int btrfs_set_super(struct super_block *s, void *data)
{
A
Al Viro 已提交
1414 1415 1416 1417
	int err = set_anon_super(s, data);
	if (!err)
		s->s_fs_info = data;
	return err;
Y
Yan 已提交
1418 1419
}

1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
/*
 * subvolumes are identified by ino 256
 */
static inline int is_subvolume_inode(struct inode *inode)
{
	if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
		return 1;
	return 0;
}

1430
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
1431
				   struct vfsmount *mnt)
1432 1433
{
	struct dentry *root;
1434
	int ret;
1435

1436 1437 1438 1439 1440 1441 1442 1443 1444
	if (!subvol_name) {
		if (!subvol_objectid) {
			ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
							  &subvol_objectid);
			if (ret) {
				root = ERR_PTR(ret);
				goto out;
			}
		}
1445 1446
		subvol_name = btrfs_get_subvol_name_from_objectid(
					btrfs_sb(mnt->mnt_sb), subvol_objectid);
1447 1448 1449 1450 1451 1452 1453 1454
		if (IS_ERR(subvol_name)) {
			root = ERR_CAST(subvol_name);
			subvol_name = NULL;
			goto out;
		}

	}

A
Al Viro 已提交
1455
	root = mount_subtree(mnt, subvol_name);
1456 1457
	/* mount_subtree() drops our reference on the vfsmount. */
	mnt = NULL;
1458

1459
	if (!IS_ERR(root)) {
A
Al Viro 已提交
1460
		struct super_block *s = root->d_sb;
1461
		struct btrfs_fs_info *fs_info = btrfs_sb(s);
1462 1463 1464 1465 1466
		struct inode *root_inode = d_inode(root);
		u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;

		ret = 0;
		if (!is_subvolume_inode(root_inode)) {
1467
			btrfs_err(fs_info, "'%s' is not a valid subvolume",
1468 1469 1470 1471
			       subvol_name);
			ret = -EINVAL;
		}
		if (subvol_objectid && root_objectid != subvol_objectid) {
1472 1473 1474 1475 1476
			/*
			 * This will also catch a race condition where a
			 * subvolume which was passed by ID is renamed and
			 * another subvolume is renamed over the old location.
			 */
1477 1478 1479
			btrfs_err(fs_info,
				  "subvol '%s' does not match subvolid %llu",
				  subvol_name, subvol_objectid);
1480 1481 1482 1483 1484 1485 1486
			ret = -EINVAL;
		}
		if (ret) {
			dput(root);
			root = ERR_PTR(ret);
			deactivate_locked_super(s);
		}
1487 1488
	}

1489 1490 1491
out:
	mntput(mnt);
	kfree(subvol_name);
1492 1493
	return root;
}
1494

1495 1496 1497 1498 1499 1500
/*
 * Find a superblock for the given device / mount point.
 *
 * Note: This is based on mount_bdev from fs/super.c with a few additions
 *       for multiple device setup.  Make sure to keep it in sync.
 */
1501 1502 1503 1504 1505
static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
		int flags, const char *device_name, void *data)
{
	struct block_device *bdev = NULL;
	struct super_block *s;
1506
	struct btrfs_device *device = NULL;
1507 1508
	struct btrfs_fs_devices *fs_devices = NULL;
	struct btrfs_fs_info *fs_info = NULL;
1509
	void *new_sec_opts = NULL;
1510 1511 1512 1513 1514 1515 1516
	fmode_t mode = FMODE_READ;
	int error = 0;

	if (!(flags & SB_RDONLY))
		mode |= FMODE_WRITE;

	if (data) {
A
Al Viro 已提交
1517
		error = security_sb_eat_lsm_opts(data, &new_sec_opts);
1518 1519 1520 1521 1522 1523 1524
		if (error)
			return ERR_PTR(error);
	}

	/*
	 * Setup a dummy root and fs_info for test/set super.  This is because
	 * we don't actually fill this stuff out until open_ctree, but we need
1525 1526 1527 1528
	 * then open_ctree will properly initialize the file system specific
	 * settings later.  btrfs_init_fs_info initializes the static elements
	 * of the fs_info (locks and such) to make cleanup easier if we find a
	 * superblock with our given fs_devices later on at sget() time.
1529
	 */
1530
	fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
1531 1532 1533 1534
	if (!fs_info) {
		error = -ENOMEM;
		goto error_sec_opts;
	}
1535
	btrfs_init_fs_info(fs_info);
1536 1537 1538 1539 1540 1541 1542 1543

	fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
	fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
	if (!fs_info->super_copy || !fs_info->super_for_commit) {
		error = -ENOMEM;
		goto error_fs_info;
	}

1544
	mutex_lock(&uuid_mutex);
1545
	error = btrfs_parse_device_options(data, mode, fs_type);
1546 1547
	if (error) {
		mutex_unlock(&uuid_mutex);
1548
		goto error_fs_info;
1549
	}
1550

1551 1552
	device = btrfs_scan_one_device(device_name, mode, fs_type);
	if (IS_ERR(device)) {
1553
		mutex_unlock(&uuid_mutex);
1554
		error = PTR_ERR(device);
1555
		goto error_fs_info;
1556
	}
1557

1558
	fs_devices = device->fs_devices;
1559 1560
	fs_info->fs_devices = fs_devices;

1561
	error = btrfs_open_devices(fs_devices, mode, fs_type);
1562
	mutex_unlock(&uuid_mutex);
1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580
	if (error)
		goto error_fs_info;

	if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
		error = -EACCES;
		goto error_close_devices;
	}

	bdev = fs_devices->latest_bdev;
	s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
		 fs_info);
	if (IS_ERR(s)) {
		error = PTR_ERR(s);
		goto error_close_devices;
	}

	if (s->s_root) {
		btrfs_close_devices(fs_devices);
1581
		btrfs_free_fs_info(fs_info);
1582 1583 1584 1585 1586
		if ((flags ^ s->s_flags) & SB_RDONLY)
			error = -EBUSY;
	} else {
		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
		btrfs_sb(s)->bdev_holder = fs_type;
1587 1588
		if (!strstr(crc32c_impl(), "generic"))
			set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
1589 1590
		error = btrfs_fill_super(s, fs_devices, data);
	}
A
Al Viro 已提交
1591
	if (!error)
1592
		error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
A
Al Viro 已提交
1593
	security_free_mnt_opts(&new_sec_opts);
1594 1595
	if (error) {
		deactivate_locked_super(s);
A
Al Viro 已提交
1596
		return ERR_PTR(error);
1597 1598 1599 1600 1601 1602 1603
	}

	return dget(s->s_root);

error_close_devices:
	btrfs_close_devices(fs_devices);
error_fs_info:
1604
	btrfs_free_fs_info(fs_info);
1605 1606 1607 1608
error_sec_opts:
	security_free_mnt_opts(&new_sec_opts);
	return ERR_PTR(error);
}
1609

1610
/*
1611
 * Mount function which is called by VFS layer.
1612
 *
1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630
 * In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
 * which needs vfsmount* of device's root (/).  This means device's root has to
 * be mounted internally in any case.
 *
 * Operation flow:
 *   1. Parse subvol id related options for later use in mount_subvol().
 *
 *   2. Mount device's root (/) by calling vfs_kern_mount().
 *
 *      NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
 *      first place. In order to avoid calling btrfs_mount() again, we use
 *      different file_system_type which is not registered to VFS by
 *      register_filesystem() (btrfs_root_fs_type). As a result,
 *      btrfs_mount_root() is called. The return value will be used by
 *      mount_subtree() in mount_subvol().
 *
 *   3. Call mount_subvol() to get the dentry of subvolume. Since there is
 *      "btrfs subvolume set-default", mount_subvol() is called always.
1631
 */
A
Al Viro 已提交
1632
static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1633
		const char *device_name, void *data)
Y
Yan 已提交
1634
{
1635 1636
	struct vfsmount *mnt_root;
	struct dentry *root;
1637 1638
	char *subvol_name = NULL;
	u64 subvol_objectid = 0;
Y
Yan 已提交
1639 1640
	int error = 0;

1641 1642
	error = btrfs_parse_subvol_options(data, &subvol_name,
					&subvol_objectid);
1643 1644
	if (error) {
		kfree(subvol_name);
A
Al Viro 已提交
1645
		return ERR_PTR(error);
1646
	}
1647

1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658
	/* mount device's root (/) */
	mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
	if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
		if (flags & SB_RDONLY) {
			mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
				flags & ~SB_RDONLY, device_name, data);
		} else {
			mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
				flags | SB_RDONLY, device_name, data);
			if (IS_ERR(mnt_root)) {
				root = ERR_CAST(mnt_root);
1659
				kfree(subvol_name);
1660 1661
				goto out;
			}
Y
Yan 已提交
1662

1663 1664 1665 1666 1667 1668
			down_write(&mnt_root->mnt_sb->s_umount);
			error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
			up_write(&mnt_root->mnt_sb->s_umount);
			if (error < 0) {
				root = ERR_PTR(error);
				mntput(mnt_root);
1669
				kfree(subvol_name);
1670 1671 1672
				goto out;
			}
		}
1673
	}
1674 1675
	if (IS_ERR(mnt_root)) {
		root = ERR_CAST(mnt_root);
1676
		kfree(subvol_name);
1677
		goto out;
1678
	}
Y
Yan 已提交
1679

1680
	/* mount_subvol() will free subvol_name and mnt_root */
1681
	root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
Y
Yan 已提交
1682

1683 1684
out:
	return root;
Y
Yan 已提交
1685
}
1686

1687
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1688
				     u32 new_pool_size, u32 old_pool_size)
1689 1690 1691 1692 1693 1694
{
	if (new_pool_size == old_pool_size)
		return;

	fs_info->thread_pool_size = new_pool_size;

1695
	btrfs_info(fs_info, "resize thread pool %d -> %d",
1696 1697
	       old_pool_size, new_pool_size);

1698
	btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
1699
	btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
1700
	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
1701 1702 1703 1704 1705 1706
	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
				new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
1707
	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
1708
	btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
1709 1710
	btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
				new_pool_size);
1711 1712
}

1713
static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
M
Miao Xie 已提交
1714 1715
{
	set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1716
}
M
Miao Xie 已提交
1717

1718 1719 1720
static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
				       unsigned long old_opts, int flags)
{
M
Miao Xie 已提交
1721 1722
	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1723
	     (flags & SB_RDONLY))) {
M
Miao Xie 已提交
1724 1725 1726
		/* wait for any defraggers to finish */
		wait_event(fs_info->transaction_wait,
			   (atomic_read(&fs_info->defrag_running) == 0));
1727
		if (flags & SB_RDONLY)
M
Miao Xie 已提交
1728 1729 1730 1731 1732 1733 1734 1735
			sync_filesystem(fs_info->sb);
	}
}

static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
					 unsigned long old_opts)
{
	/*
1736 1737
	 * We need to cleanup all defragable inodes if the autodefragment is
	 * close or the filesystem is read only.
M
Miao Xie 已提交
1738 1739
	 */
	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1740
	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) {
M
Miao Xie 已提交
1741 1742 1743
		btrfs_cleanup_defrag_inodes(fs_info);
	}

1744 1745 1746 1747 1748 1749 1750 1751
	/* If we toggled discard async */
	if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
	    btrfs_test_opt(fs_info, DISCARD_ASYNC))
		btrfs_discard_resume(fs_info);
	else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
		 !btrfs_test_opt(fs_info, DISCARD_ASYNC))
		btrfs_discard_cleanup(fs_info);

M
Miao Xie 已提交
1752 1753 1754
	clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
}

Y
Yan Zheng 已提交
1755 1756
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
{
1757 1758
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
1759 1760 1761 1762
	unsigned old_flags = sb->s_flags;
	unsigned long old_opts = fs_info->mount_opt;
	unsigned long old_compress_type = fs_info->compress_type;
	u64 old_max_inline = fs_info->max_inline;
1763
	u32 old_thread_pool_size = fs_info->thread_pool_size;
1764
	u32 old_metadata_ratio = fs_info->metadata_ratio;
Y
Yan Zheng 已提交
1765 1766
	int ret;

1767
	sync_filesystem(sb);
1768
	btrfs_remount_prepare(fs_info);
M
Miao Xie 已提交
1769

1770
	if (data) {
1771
		void *new_sec_opts = NULL;
1772

A
Al Viro 已提交
1773 1774
		ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
		if (!ret)
1775
			ret = security_sb_remount(sb, new_sec_opts);
A
Al Viro 已提交
1776
		security_free_mnt_opts(&new_sec_opts);
1777 1778 1779 1780
		if (ret)
			goto restore;
	}

1781
	ret = btrfs_parse_options(fs_info, data, *flags);
1782
	if (ret)
1783
		goto restore;
1784

1785
	btrfs_remount_begin(fs_info, old_opts, *flags);
1786 1787 1788
	btrfs_resize_thread_pool(fs_info,
		fs_info->thread_pool_size, old_thread_pool_size);

1789
	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
M
Miao Xie 已提交
1790
		goto out;
Y
Yan Zheng 已提交
1791

1792
	if (*flags & SB_RDONLY) {
1793 1794 1795 1796
		/*
		 * this also happens on 'umount -rf' or on shutdown, when
		 * the filesystem is busy.
		 */
1797
		cancel_work_sync(&fs_info->async_reclaim_work);
1798

1799 1800
		btrfs_discard_cleanup(fs_info);

1801 1802 1803 1804 1805
		/* wait for the uuid_scan task to finish */
		down(&fs_info->uuid_tree_rescan_sem);
		/* avoid complains from lockdep et al. */
		up(&fs_info->uuid_tree_rescan_sem);

1806
		sb->s_flags |= SB_RDONLY;
Y
Yan Zheng 已提交
1807

1808
		/*
1809
		 * Setting SB_RDONLY will put the cleaner thread to
1810 1811 1812 1813 1814 1815 1816
		 * sleep at the next loop if it's already active.
		 * If it's already asleep, we'll leave unused block
		 * groups on disk until we're mounted read-write again
		 * unless we clean them up here.
		 */
		btrfs_delete_unused_bgs(fs_info);

1817 1818
		btrfs_dev_replace_suspend_for_unmount(fs_info);
		btrfs_scrub_cancel(fs_info);
1819
		btrfs_pause_balance(fs_info);
1820

1821
		ret = btrfs_commit_super(fs_info);
1822 1823
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1824
	} else {
1825
		if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
1826
			btrfs_err(fs_info,
1827
				"Remounting read-write after error is not allowed");
1828 1829 1830
			ret = -EINVAL;
			goto restore;
		}
1831
		if (fs_info->fs_devices->rw_devices == 0) {
1832 1833
			ret = -EACCES;
			goto restore;
1834
		}
Y
Yan Zheng 已提交
1835

1836
		if (!btrfs_check_rw_degradable(fs_info, NULL)) {
1837
			btrfs_warn(fs_info,
1838
		"too many missing devices, writable remount is not allowed");
1839 1840 1841 1842
			ret = -EACCES;
			goto restore;
		}

1843
		if (btrfs_super_log_root(fs_info->super_copy) != 0) {
1844 1845
			btrfs_warn(fs_info,
		"mount required to replay tree-log, cannot remount read-write");
1846 1847
			ret = -EINVAL;
			goto restore;
1848
		}
Y
Yan Zheng 已提交
1849

1850
		ret = btrfs_cleanup_fs_roots(fs_info);
1851 1852
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1853

1854
		/* recover relocation */
1855
		mutex_lock(&fs_info->cleaner_mutex);
1856
		ret = btrfs_recover_relocation(root);
1857
		mutex_unlock(&fs_info->cleaner_mutex);
1858 1859
		if (ret)
			goto restore;
Y
Yan Zheng 已提交
1860

1861 1862 1863 1864
		ret = btrfs_resume_balance_async(fs_info);
		if (ret)
			goto restore;

1865 1866
		ret = btrfs_resume_dev_replace_async(fs_info);
		if (ret) {
1867
			btrfs_warn(fs_info, "failed to resume dev_replace");
1868 1869
			goto restore;
		}
1870

1871 1872
		btrfs_qgroup_rescan_resume(fs_info);

1873
		if (!fs_info->uuid_root) {
1874
			btrfs_info(fs_info, "creating UUID tree");
1875 1876
			ret = btrfs_create_uuid_tree(fs_info);
			if (ret) {
J
Jeff Mahoney 已提交
1877 1878 1879
				btrfs_warn(fs_info,
					   "failed to create the UUID tree %d",
					   ret);
1880 1881 1882
				goto restore;
			}
		}
1883
		sb->s_flags &= ~SB_RDONLY;
1884

1885
		set_bit(BTRFS_FS_OPEN, &fs_info->flags);
Y
Yan Zheng 已提交
1886
	}
M
Miao Xie 已提交
1887
out:
1888
	wake_up_process(fs_info->transaction_kthread);
M
Miao Xie 已提交
1889
	btrfs_remount_cleanup(fs_info, old_opts);
Y
Yan Zheng 已提交
1890
	return 0;
1891 1892

restore:
1893
	/* We've hit an error - don't reset SB_RDONLY */
1894
	if (sb_rdonly(sb))
1895
		old_flags |= SB_RDONLY;
1896 1897 1898 1899
	sb->s_flags = old_flags;
	fs_info->mount_opt = old_opts;
	fs_info->compress_type = old_compress_type;
	fs_info->max_inline = old_max_inline;
1900 1901
	btrfs_resize_thread_pool(fs_info,
		old_thread_pool_size, fs_info->thread_pool_size);
1902
	fs_info->metadata_ratio = old_metadata_ratio;
M
Miao Xie 已提交
1903
	btrfs_remount_cleanup(fs_info, old_opts);
1904
	return ret;
Y
Yan Zheng 已提交
1905 1906
}

1907
/* Used to sort the devices by max_avail(descending sort) */
1908
static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932
				       const void *dev_info2)
{
	if (((struct btrfs_device_info *)dev_info1)->max_avail >
	    ((struct btrfs_device_info *)dev_info2)->max_avail)
		return -1;
	else if (((struct btrfs_device_info *)dev_info1)->max_avail <
		 ((struct btrfs_device_info *)dev_info2)->max_avail)
		return 1;
	else
	return 0;
}

/*
 * sort the devices by max_avail, in which max free extent size of each device
 * is stored.(Descending Sort)
 */
static inline void btrfs_descending_sort_devices(
					struct btrfs_device_info *devices,
					size_t nr_devices)
{
	sort(devices, nr_devices, sizeof(struct btrfs_device_info),
	     btrfs_cmp_device_free_bytes, NULL);
}

1933 1934 1935 1936
/*
 * The helper to calc the free space on the devices that can be used to store
 * file data.
 */
1937 1938
static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
					      u64 *free_bytes)
1939 1940 1941 1942 1943 1944 1945
{
	struct btrfs_device_info *devices_info;
	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
	struct btrfs_device *device;
	u64 type;
	u64 avail_space;
	u64 min_stripe_size;
1946
	int num_stripes = 1;
1947
	int i = 0, nr_devices;
1948
	const struct btrfs_raid_attr *rattr;
1949

1950
	/*
1951
	 * We aren't under the device list lock, so this is racy-ish, but good
1952 1953
	 * enough for our purposes.
	 */
1954
	nr_devices = fs_info->fs_devices->open_devices;
1955 1956 1957 1958 1959 1960 1961 1962 1963
	if (!nr_devices) {
		smp_mb();
		nr_devices = fs_info->fs_devices->open_devices;
		ASSERT(nr_devices);
		if (!nr_devices) {
			*free_bytes = 0;
			return 0;
		}
	}
1964

1965
	devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
1966
			       GFP_KERNEL);
1967 1968 1969
	if (!devices_info)
		return -ENOMEM;

1970
	/* calc min stripe number for data space allocation */
1971
	type = btrfs_data_alloc_profile(fs_info);
1972 1973
	rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];

1974
	if (type & BTRFS_BLOCK_GROUP_RAID0)
1975
		num_stripes = nr_devices;
1976
	else if (type & BTRFS_BLOCK_GROUP_RAID1)
1977
		num_stripes = 2;
1978 1979
	else if (type & BTRFS_BLOCK_GROUP_RAID1C3)
		num_stripes = 3;
1980 1981
	else if (type & BTRFS_BLOCK_GROUP_RAID1C4)
		num_stripes = 4;
1982
	else if (type & BTRFS_BLOCK_GROUP_RAID10)
1983
		num_stripes = 4;
1984

1985 1986
	/* Adjust for more than 1 stripe per device */
	min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
1987

1988 1989
	rcu_read_lock();
	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
1990 1991
		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
						&device->dev_state) ||
1992 1993
		    !device->bdev ||
		    test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
1994 1995
			continue;

1996 1997 1998
		if (i >= nr_devices)
			break;

1999 2000 2001
		avail_space = device->total_bytes - device->bytes_used;

		/* align with stripe_len */
2002
		avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
2003 2004

		/*
2005
		 * In order to avoid overwriting the superblock on the drive,
2006 2007
		 * btrfs starts at an offset of at least 1MB when doing chunk
		 * allocation.
2008 2009 2010
		 *
		 * This ensures we have at least min_stripe_size free space
		 * after excluding 1MB.
2011
		 */
2012
		if (avail_space <= SZ_1M + min_stripe_size)
2013 2014
			continue;

2015 2016
		avail_space -= SZ_1M;

2017 2018 2019 2020 2021
		devices_info[i].dev = device;
		devices_info[i].max_avail = avail_space;

		i++;
	}
2022
	rcu_read_unlock();
2023 2024 2025 2026 2027 2028 2029

	nr_devices = i;

	btrfs_descending_sort_devices(devices_info, nr_devices);

	i = nr_devices - 1;
	avail_space = 0;
2030 2031
	while (nr_devices >= rattr->devs_min) {
		num_stripes = min(num_stripes, nr_devices);
2032

2033 2034 2035 2036
		if (devices_info[i].max_avail >= min_stripe_size) {
			int j;
			u64 alloc_size;

2037
			avail_space += devices_info[i].max_avail * num_stripes;
2038
			alloc_size = devices_info[i].max_avail;
2039
			for (j = i + 1 - num_stripes; j <= i; j++)
2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050
				devices_info[j].max_avail -= alloc_size;
		}
		i--;
		nr_devices--;
	}

	kfree(devices_info);
	*free_bytes = avail_space;
	return 0;
}

2051 2052 2053 2054 2055 2056 2057
/*
 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
 *
 * If there's a redundant raid level at DATA block groups, use the respective
 * multiplier to scale the sizes.
 *
 * Unused device space usage is based on simulating the chunk allocator
2058 2059 2060
 * algorithm that respects the device sizes and order of allocations.  This is
 * a close approximation of the actual use but there are other factors that may
 * change the result (like a new metadata chunk).
2061
 *
2062
 * If metadata is exhausted, f_bavail will be 0.
2063
 */
C
Chris Mason 已提交
2064 2065
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
2066 2067
	struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
	struct btrfs_super_block *disk_super = fs_info->super_copy;
2068 2069
	struct btrfs_space_info *found;
	u64 total_used = 0;
2070
	u64 total_free_data = 0;
2071
	u64 total_free_meta = 0;
2072
	int bits = dentry->d_sb->s_blocksize_bits;
2073
	__be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
2074 2075
	unsigned factor = 1;
	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
2076
	int ret;
2077
	u64 thresh = 0;
2078
	int mixed = 0;
C
Chris Mason 已提交
2079

2080
	rcu_read_lock();
2081
	list_for_each_entry_rcu(found, &fs_info->space_info, list) {
2082
		if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
2083 2084
			int i;

2085 2086 2087
			total_free_data += found->disk_total - found->disk_used;
			total_free_data -=
				btrfs_account_ro_block_groups_free_space(found);
2088 2089

			for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2090 2091 2092
				if (!list_empty(&found->block_groups[i]))
					factor = btrfs_bg_type_to_factor(
						btrfs_raid_array[i].bg_flag);
2093
			}
2094
		}
2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105

		/*
		 * Metadata in mixed block goup profiles are accounted in data
		 */
		if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
			if (found->flags & BTRFS_BLOCK_GROUP_DATA)
				mixed = 1;
			else
				total_free_meta += found->disk_total -
					found->disk_used;
		}
2106

2107
		total_used += found->disk_used;
J
Josef Bacik 已提交
2108
	}
2109

2110 2111
	rcu_read_unlock();

2112 2113 2114 2115 2116 2117
	buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
	buf->f_blocks >>= bits;
	buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);

	/* Account global block reserve as used, it's in logical size already */
	spin_lock(&block_rsv->lock);
2118 2119 2120 2121 2122
	/* Mixed block groups accounting is not byte-accurate, avoid overflow */
	if (buf->f_bfree >= block_rsv->size >> bits)
		buf->f_bfree -= block_rsv->size >> bits;
	else
		buf->f_bfree = 0;
2123 2124
	spin_unlock(&block_rsv->lock);

2125
	buf->f_bavail = div_u64(total_free_data, factor);
2126
	ret = btrfs_calc_avail_data_space(fs_info, &total_free_data);
2127
	if (ret)
2128
		return ret;
2129
	buf->f_bavail += div_u64(total_free_data, factor);
2130
	buf->f_bavail = buf->f_bavail >> bits;
C
Chris Mason 已提交
2131

2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144
	/*
	 * We calculate the remaining metadata space minus global reserve. If
	 * this is (supposedly) smaller than zero, there's no space. But this
	 * does not hold in practice, the exhausted state happens where's still
	 * some positive delta. So we apply some guesswork and compare the
	 * delta to a 4M threshold.  (Practically observed delta was ~2M.)
	 *
	 * We probably cannot calculate the exact threshold value because this
	 * depends on the internal reservations requested by various
	 * operations, so some operations that consume a few metadata will
	 * succeed even if the Avail is zero. But this is better than the other
	 * way around.
	 */
2145
	thresh = SZ_4M;
2146

2147 2148 2149 2150 2151 2152 2153 2154 2155
	/*
	 * We only want to claim there's no available space if we can no longer
	 * allocate chunks for our metadata profile and our global reserve will
	 * not fit in the free metadata space.  If we aren't ->full then we
	 * still can allocate chunks and thus are fine using the currently
	 * calculated f_bavail.
	 */
	if (!mixed && block_rsv->space_info->full &&
	    total_free_meta - thresh < block_rsv->size)
2156 2157
		buf->f_bavail = 0;

2158 2159 2160 2161
	buf->f_type = BTRFS_SUPER_MAGIC;
	buf->f_bsize = dentry->d_sb->s_blocksize;
	buf->f_namelen = BTRFS_NAME_LEN;

2162
	/* We treat it as constant endianness (it doesn't matter _which_)
C
Chris Mason 已提交
2163
	   because we want the fsid to come out the same whether mounted
2164 2165 2166
	   on a big-endian or little-endian host */
	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
2167
	/* Mask in the root object ID too, to disambiguate subvols */
2168 2169 2170 2171
	buf->f_fsid.val[0] ^=
		BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32;
	buf->f_fsid.val[1] ^=
		BTRFS_I(d_inode(dentry))->root->root_key.objectid;
2172

C
Chris Mason 已提交
2173 2174
	return 0;
}
C
Chris Mason 已提交
2175

A
Al Viro 已提交
2176 2177
static void btrfs_kill_super(struct super_block *sb)
{
2178
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
A
Al Viro 已提交
2179
	kill_anon_super(sb);
2180
	btrfs_free_fs_info(fs_info);
A
Al Viro 已提交
2181 2182
}

2183 2184 2185
static struct file_system_type btrfs_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "btrfs",
A
Al Viro 已提交
2186
	.mount		= btrfs_mount,
A
Al Viro 已提交
2187
	.kill_sb	= btrfs_kill_super,
2188
	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
2189
};
2190 2191 2192 2193 2194 2195 2196 2197 2198

static struct file_system_type btrfs_root_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "btrfs",
	.mount		= btrfs_mount_root,
	.kill_sb	= btrfs_kill_super,
	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
};

2199
MODULE_ALIAS_FS("btrfs");
2200

2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211
static int btrfs_control_open(struct inode *inode, struct file *file)
{
	/*
	 * The control file's private_data is used to hold the
	 * transaction when it is started and is used to keep
	 * track of whether a transaction is already in progress.
	 */
	file->private_data = NULL;
	return 0;
}

C
Chris Mason 已提交
2212
/*
2213
 * Used by /dev/btrfs-control for devices ioctls.
C
Chris Mason 已提交
2214
 */
2215 2216 2217 2218
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
				unsigned long arg)
{
	struct btrfs_ioctl_vol_args *vol;
2219
	struct btrfs_device *device = NULL;
2220
	int ret = -ENOTTY;
2221

2222 2223 2224
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

L
Li Zefan 已提交
2225 2226 2227
	vol = memdup_user((void __user *)arg, sizeof(*vol));
	if (IS_ERR(vol))
		return PTR_ERR(vol);
2228
	vol->name[BTRFS_PATH_NAME_MAX] = '\0';
2229

2230 2231
	switch (cmd) {
	case BTRFS_IOC_SCAN_DEV:
2232
		mutex_lock(&uuid_mutex);
2233 2234 2235
		device = btrfs_scan_one_device(vol->name, FMODE_READ,
					       &btrfs_root_fs_type);
		ret = PTR_ERR_OR_ZERO(device);
2236
		mutex_unlock(&uuid_mutex);
2237
		break;
2238 2239 2240
	case BTRFS_IOC_FORGET_DEV:
		ret = btrfs_forget_devices(vol->name);
		break;
J
Josef Bacik 已提交
2241
	case BTRFS_IOC_DEVICES_READY:
2242
		mutex_lock(&uuid_mutex);
2243 2244 2245
		device = btrfs_scan_one_device(vol->name, FMODE_READ,
					       &btrfs_root_fs_type);
		if (IS_ERR(device)) {
2246
			mutex_unlock(&uuid_mutex);
2247
			ret = PTR_ERR(device);
J
Josef Bacik 已提交
2248
			break;
2249
		}
2250 2251
		ret = !(device->fs_devices->num_devices ==
			device->fs_devices->total_devices);
2252
		mutex_unlock(&uuid_mutex);
J
Josef Bacik 已提交
2253
		break;
2254
	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
2255
		ret = btrfs_ioctl_get_supported_features((void __user*)arg);
2256
		break;
2257
	}
L
Li Zefan 已提交
2258

2259
	kfree(vol);
L
Linda Knippers 已提交
2260
	return ret;
2261 2262
}

2263
static int btrfs_freeze(struct super_block *sb)
Y
Yan 已提交
2264
{
2265
	struct btrfs_trans_handle *trans;
2266 2267
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
2268

2269
	set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2270 2271 2272 2273 2274 2275
	/*
	 * We don't need a barrier here, we'll wait for any transaction that
	 * could be in progress on other threads (and do delayed iputs that
	 * we want to avoid on a frozen filesystem), or do the commit
	 * ourselves.
	 */
M
Miao Xie 已提交
2276
	trans = btrfs_attach_transaction_barrier(root);
2277 2278 2279 2280 2281 2282
	if (IS_ERR(trans)) {
		/* no transaction, don't bother */
		if (PTR_ERR(trans) == -ENOENT)
			return 0;
		return PTR_ERR(trans);
	}
2283
	return btrfs_commit_transaction(trans);
Y
Yan 已提交
2284 2285
}

2286 2287
static int btrfs_unfreeze(struct super_block *sb)
{
2288 2289 2290
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);

	clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2291 2292 2293
	return 0;
}

J
Josef Bacik 已提交
2294 2295 2296 2297 2298 2299 2300
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
	struct btrfs_fs_devices *cur_devices;
	struct btrfs_device *dev, *first_dev = NULL;
	struct list_head *head;

2301 2302 2303 2304 2305
	/*
	 * Lightweight locking of the devices. We should not need
	 * device_list_mutex here as we only read the device data and the list
	 * is protected by RCU.  Even if a device is deleted during the list
	 * traversals, we'll get valid data, the freeing callback will wait at
2306
	 * least until the rcu_read_unlock.
2307 2308
	 */
	rcu_read_lock();
J
Josef Bacik 已提交
2309 2310 2311
	cur_devices = fs_info->fs_devices;
	while (cur_devices) {
		head = &cur_devices->devices;
2312
		list_for_each_entry_rcu(dev, head, dev_list) {
2313
			if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
2314
				continue;
2315 2316
			if (!dev->name)
				continue;
J
Josef Bacik 已提交
2317 2318 2319 2320 2321 2322
			if (!first_dev || dev->devid < first_dev->devid)
				first_dev = dev;
		}
		cur_devices = cur_devices->seed;
	}

2323 2324 2325
	if (first_dev)
		seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
	else
J
Josef Bacik 已提交
2326
		WARN_ON(1);
2327
	rcu_read_unlock();
J
Josef Bacik 已提交
2328 2329 2330
	return 0;
}

2331
static const struct super_operations btrfs_super_ops = {
2332
	.drop_inode	= btrfs_drop_inode,
A
Al Viro 已提交
2333
	.evict_inode	= btrfs_evict_inode,
C
Chris Mason 已提交
2334
	.put_super	= btrfs_put_super,
2335
	.sync_fs	= btrfs_sync_fs,
E
Eric Paris 已提交
2336
	.show_options	= btrfs_show_options,
J
Josef Bacik 已提交
2337
	.show_devname	= btrfs_show_devname,
C
Chris Mason 已提交
2338 2339
	.alloc_inode	= btrfs_alloc_inode,
	.destroy_inode	= btrfs_destroy_inode,
A
Al Viro 已提交
2340
	.free_inode	= btrfs_free_inode,
C
Chris Mason 已提交
2341
	.statfs		= btrfs_statfs,
Y
Yan Zheng 已提交
2342
	.remount_fs	= btrfs_remount,
2343
	.freeze_fs	= btrfs_freeze,
2344
	.unfreeze_fs	= btrfs_unfreeze,
C
Chris Mason 已提交
2345
};
2346 2347

static const struct file_operations btrfs_ctl_fops = {
2348
	.open = btrfs_control_open,
2349
	.unlocked_ioctl	 = btrfs_control_ioctl,
2350
	.compat_ioctl = compat_ptr_ioctl,
2351
	.owner	 = THIS_MODULE,
2352
	.llseek = noop_llseek,
2353 2354 2355
};

static struct miscdevice btrfs_misc = {
2356
	.minor		= BTRFS_MINOR,
2357 2358 2359 2360
	.name		= "btrfs-control",
	.fops		= &btrfs_ctl_fops
};

2361 2362 2363
MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
MODULE_ALIAS("devname:btrfs-control");

2364
static int __init btrfs_interface_init(void)
2365 2366 2367 2368
{
	return misc_register(&btrfs_misc);
}

2369
static __cold void btrfs_interface_exit(void)
2370
{
2371
	misc_deregister(&btrfs_misc);
2372 2373
}

2374
static void __init btrfs_print_mod_info(void)
2375
{
2376
	static const char options[] = ""
2377 2378 2379
#ifdef CONFIG_BTRFS_DEBUG
			", debug=on"
#endif
2380 2381 2382
#ifdef CONFIG_BTRFS_ASSERT
			", assert=on"
#endif
2383 2384
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
			", integrity-checker=on"
J
Josef Bacik 已提交
2385 2386 2387
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
			", ref-verify=on"
2388
#endif
2389 2390
			;
	pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
2391 2392
}

2393 2394
static int __init init_btrfs_fs(void)
{
C
Chris Mason 已提交
2395
	int err;
2396

2397 2398
	btrfs_props_init();

2399 2400
	err = btrfs_init_sysfs();
	if (err)
2401
		return err;
2402

2403
	btrfs_init_compress();
2404

2405 2406 2407 2408
	err = btrfs_init_cachep();
	if (err)
		goto free_compress;

2409
	err = extent_io_init();
2410 2411 2412
	if (err)
		goto free_cachep;

2413
	err = extent_state_cache_init();
2414 2415 2416
	if (err)
		goto free_extent_io;

2417 2418 2419 2420
	err = extent_map_init();
	if (err)
		goto free_extent_state_cache;

2421
	err = ordered_data_init();
2422 2423
	if (err)
		goto free_extent_map;
C
Chris Mason 已提交
2424

2425 2426 2427 2428
	err = btrfs_delayed_inode_init();
	if (err)
		goto free_ordered_data;

2429
	err = btrfs_auto_defrag_init();
2430 2431 2432
	if (err)
		goto free_delayed_inode;

2433
	err = btrfs_delayed_ref_init();
2434 2435 2436
	if (err)
		goto free_auto_defrag;

2437 2438
	err = btrfs_prelim_ref_init();
	if (err)
2439
		goto free_delayed_ref;
2440

2441
	err = btrfs_end_io_wq_init();
2442
	if (err)
2443
		goto free_prelim_ref;
2444

2445 2446 2447 2448
	err = btrfs_interface_init();
	if (err)
		goto free_end_io_wq;

2449 2450
	btrfs_init_lockdep();

2451
	btrfs_print_mod_info();
2452 2453 2454 2455 2456 2457 2458 2459

	err = btrfs_run_sanity_tests();
	if (err)
		goto unregister_ioctl;

	err = register_filesystem(&btrfs_fs_type);
	if (err)
		goto unregister_ioctl;
2460

2461 2462
	return 0;

2463 2464
unregister_ioctl:
	btrfs_interface_exit();
2465 2466
free_end_io_wq:
	btrfs_end_io_wq_exit();
2467 2468
free_prelim_ref:
	btrfs_prelim_ref_exit();
2469 2470
free_delayed_ref:
	btrfs_delayed_ref_exit();
2471 2472
free_auto_defrag:
	btrfs_auto_defrag_exit();
2473 2474
free_delayed_inode:
	btrfs_delayed_inode_exit();
2475 2476
free_ordered_data:
	ordered_data_exit();
2477 2478
free_extent_map:
	extent_map_exit();
2479 2480
free_extent_state_cache:
	extent_state_cache_exit();
2481 2482
free_extent_io:
	extent_io_exit();
2483 2484
free_cachep:
	btrfs_destroy_cachep();
2485 2486
free_compress:
	btrfs_exit_compress();
2487
	btrfs_exit_sysfs();
2488

2489
	return err;
2490 2491 2492 2493
}

static void __exit exit_btrfs_fs(void)
{
C
Chris Mason 已提交
2494
	btrfs_destroy_cachep();
2495
	btrfs_delayed_ref_exit();
2496
	btrfs_auto_defrag_exit();
2497
	btrfs_delayed_inode_exit();
2498
	btrfs_prelim_ref_exit();
2499
	ordered_data_exit();
2500
	extent_map_exit();
2501
	extent_state_cache_exit();
2502
	extent_io_exit();
2503
	btrfs_interface_exit();
2504
	btrfs_end_io_wq_exit();
2505
	unregister_filesystem(&btrfs_fs_type);
2506
	btrfs_exit_sysfs();
2507
	btrfs_cleanup_fs_uuids();
2508
	btrfs_exit_compress();
2509 2510
}

2511
late_initcall(init_btrfs_fs);
2512 2513 2514
module_exit(exit_btrfs_fs)

MODULE_LICENSE("GPL");
2515
MODULE_SOFTDEP("pre: crc32c");
2516
MODULE_SOFTDEP("pre: xxhash64");
2517
MODULE_SOFTDEP("pre: sha256");
2518
MODULE_SOFTDEP("pre: blake2b-256");