xfs_super.c 50.4 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3
 * All Rights Reserved.
L
Linus Torvalds 已提交
4
 *
5 6
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
L
Linus Torvalds 已提交
7 8
 * published by the Free Software Foundation.
 *
9 10 11 12
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
L
Linus Torvalds 已提交
13
 *
14 15 16
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
L
Linus Torvalds 已提交
17
 */
C
Christoph Hellwig 已提交
18

L
Linus Torvalds 已提交
19
#include "xfs.h"
20
#include "xfs_shared.h"
21
#include "xfs_format.h"
22 23
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
L
Linus Torvalds 已提交
24 25
#include "xfs_sb.h"
#include "xfs_mount.h"
26
#include "xfs_da_format.h"
L
Linus Torvalds 已提交
27
#include "xfs_inode.h"
28
#include "xfs_btree.h"
L
Linus Torvalds 已提交
29
#include "xfs_bmap.h"
30
#include "xfs_alloc.h"
L
Linus Torvalds 已提交
31
#include "xfs_error.h"
C
Christoph Hellwig 已提交
32
#include "xfs_fsops.h"
33
#include "xfs_trans.h"
L
Linus Torvalds 已提交
34
#include "xfs_buf_item.h"
35
#include "xfs_log.h"
36
#include "xfs_log_priv.h"
37
#include "xfs_da_btree.h"
38
#include "xfs_dir2.h"
39 40 41
#include "xfs_extfree_item.h"
#include "xfs_mru_cache.h"
#include "xfs_inode_item.h"
42
#include "xfs_icache.h"
C
Christoph Hellwig 已提交
43
#include "xfs_trace.h"
D
Dave Chinner 已提交
44
#include "xfs_icreate_item.h"
45 46
#include "xfs_filestream.h"
#include "xfs_quota.h"
47
#include "xfs_sysfs.h"
L
Linus Torvalds 已提交
48 49 50

#include <linux/namei.h>
#include <linux/init.h>
51
#include <linux/slab.h>
L
Linus Torvalds 已提交
52
#include <linux/mount.h>
53
#include <linux/mempool.h>
L
Linus Torvalds 已提交
54
#include <linux/writeback.h>
55
#include <linux/kthread.h>
56
#include <linux/freezer.h>
57
#include <linux/parser.h>
L
Linus Torvalds 已提交
58

59
static const struct super_operations xfs_super_operations;
60
static kmem_zone_t *xfs_ioend_zone;
61
mempool_t *xfs_ioend_pool;
62

D
Dave Chinner 已提交
63
static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
64 65 66
#ifdef DEBUG
static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */
#endif
L
Linus Torvalds 已提交
67

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
#define MNTOPT_LOGBUFS	"logbufs"	/* number of XFS log buffers */
#define MNTOPT_LOGBSIZE	"logbsize"	/* size of XFS log buffers */
#define MNTOPT_LOGDEV	"logdev"	/* log device */
#define MNTOPT_RTDEV	"rtdev"		/* realtime I/O device */
#define MNTOPT_BIOSIZE	"biosize"	/* log2 of preferred buffered io size */
#define MNTOPT_WSYNC	"wsync"		/* safe-mode nfs compatible mount */
#define MNTOPT_NOALIGN	"noalign"	/* turn off stripe alignment */
#define MNTOPT_SWALLOC	"swalloc"	/* turn on stripe width allocation */
#define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
#define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
#define MNTOPT_NOUUID	"nouuid"	/* ignore filesystem UUID */
#define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
#define MNTOPT_GRPID	"grpid"		/* group-ID from parent directory */
#define MNTOPT_NOGRPID	"nogrpid"	/* group-ID from current process */
#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
#define MNTOPT_BARRIER	"barrier"	/* use writer barriers for log write and
					 * unwritten extent conversion */
#define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
#define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
90 91
#define MNTOPT_32BITINODE   "inode32"	/* inode allocation limited to
					 * XFS_MAXINUMBER_32 */
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
#define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
#define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
#define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
#define MNTOPT_NOLARGEIO   "nolargeio"	/* do not report large I/O sizes
					 * in stat(). */
#define MNTOPT_ATTR2	"attr2"		/* do use attr2 attribute format */
#define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
#define MNTOPT_QUOTA	"quota"		/* disk quotas (user) */
#define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
#define MNTOPT_USRQUOTA	"usrquota"	/* user quota enabled */
#define MNTOPT_GRPQUOTA	"grpquota"	/* group quota enabled */
#define MNTOPT_PRJQUOTA	"prjquota"	/* project quota enabled */
#define MNTOPT_UQUOTA	"uquota"	/* user quota (IRIX variant) */
#define MNTOPT_GQUOTA	"gquota"	/* group quota (IRIX variant) */
#define MNTOPT_PQUOTA	"pquota"	/* project quota (IRIX variant) */
#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
#define MNTOPT_QUOTANOENF  "qnoenforce"	/* same as uqnoenforce */
112 113
#define MNTOPT_DISCARD	   "discard"	/* Discard unused blocks */
#define MNTOPT_NODISCARD   "nodiscard"	/* Do not discard unused blocks */
114

D
Dave Chinner 已提交
115 116
#define MNTOPT_DAX	"dax"		/* Enable direct access to bdev pages */

117 118 119 120 121 122 123
/*
 * Table driven mount option parser.
 *
 * Currently only used for remount, but it will be used for mount
 * in the future, too.
 */
enum {
124 125 126 127 128
	Opt_barrier,
	Opt_nobarrier,
	Opt_inode64,
	Opt_inode32,
	Opt_err
129 130
};

131
static const match_table_t tokens = {
132 133
	{Opt_barrier, "barrier"},
	{Opt_nobarrier, "nobarrier"},
134
	{Opt_inode64, "inode64"},
135
	{Opt_inode32, "inode32"},
136 137 138 139
	{Opt_err, NULL}
};


140
STATIC unsigned long
141
suffix_kstrtoint(char *s, unsigned int base, int *res)
142
{
143
	int	last, shift_left_factor = 0, _res;
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
	char	*value = s;

	last = strlen(value) - 1;
	if (value[last] == 'K' || value[last] == 'k') {
		shift_left_factor = 10;
		value[last] = '\0';
	}
	if (value[last] == 'M' || value[last] == 'm') {
		shift_left_factor = 20;
		value[last] = '\0';
	}
	if (value[last] == 'G' || value[last] == 'g') {
		shift_left_factor = 30;
		value[last] = '\0';
	}

160 161 162 163
	if (kstrtoint(s, base, &_res))
		return -EINVAL;
	*res = _res << shift_left_factor;
	return 0;
164 165
}

166 167 168 169 170 171 172
/*
 * This function fills in xfs_mount_t fields based on mount args.
 * Note: the superblock has _not_ yet been read in.
 *
 * Note that this function leaks the various device name allocations on
 * failure.  The caller takes care of them.
 */
173 174 175
STATIC int
xfs_parseargs(
	struct xfs_mount	*mp,
C
Christoph Hellwig 已提交
176
	char			*options)
177
{
178
	struct super_block	*sb = mp->m_super;
179
	char			*this_char, *value;
180 181 182
	int			dsunit = 0;
	int			dswidth = 0;
	int			iosize = 0;
183
	__uint8_t		iosizelog = 0;
184

185 186 187 188 189 190
	/*
	 * set up the mount name first so all the errors will refer to the
	 * correct device.
	 */
	mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
	if (!mp->m_fsname)
D
Dave Chinner 已提交
191
		return -ENOMEM;
192 193
	mp->m_fsname_len = strlen(mp->m_fsname) + 1;

194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
	/*
	 * Copy binary VFS mount flags we are interested in.
	 */
	if (sb->s_flags & MS_RDONLY)
		mp->m_flags |= XFS_MOUNT_RDONLY;
	if (sb->s_flags & MS_DIRSYNC)
		mp->m_flags |= XFS_MOUNT_DIRSYNC;
	if (sb->s_flags & MS_SYNCHRONOUS)
		mp->m_flags |= XFS_MOUNT_WSYNC;

	/*
	 * Set some default flags that could be cleared by the mount option
	 * parsing.
	 */
	mp->m_flags |= XFS_MOUNT_BARRIER;
	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
210

211 212 213 214 215
	/*
	 * These can be overridden by the mount option parsing.
	 */
	mp->m_logbufs = -1;
	mp->m_logbsize = -1;
216 217 218 219 220 221 222 223 224 225 226 227

	if (!options)
		goto done;

	while ((this_char = strsep(&options, ",")) != NULL) {
		if (!*this_char)
			continue;
		if ((value = strchr(this_char, '=')) != NULL)
			*value++ = 0;

		if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
			if (!value || !*value) {
228
				xfs_warn(mp, "%s option requires an argument",
229
					this_char);
D
Dave Chinner 已提交
230
				return -EINVAL;
231
			}
232
			if (kstrtoint(value, 10, &mp->m_logbufs))
D
Dave Chinner 已提交
233
				return -EINVAL;
234 235
		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
			if (!value || !*value) {
236
				xfs_warn(mp, "%s option requires an argument",
237
					this_char);
D
Dave Chinner 已提交
238
				return -EINVAL;
239
			}
240
			if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
D
Dave Chinner 已提交
241
				return -EINVAL;
242 243
		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
			if (!value || !*value) {
244
				xfs_warn(mp, "%s option requires an argument",
245
					this_char);
D
Dave Chinner 已提交
246
				return -EINVAL;
247
			}
248 249
			mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
			if (!mp->m_logname)
D
Dave Chinner 已提交
250
				return -ENOMEM;
251
		} else if (!strcmp(this_char, MNTOPT_MTPT)) {
252
			xfs_warn(mp, "%s option not allowed on this system",
C
Christoph Hellwig 已提交
253
				this_char);
D
Dave Chinner 已提交
254
			return -EINVAL;
255 256
		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
			if (!value || !*value) {
257
				xfs_warn(mp, "%s option requires an argument",
258
					this_char);
D
Dave Chinner 已提交
259
				return -EINVAL;
260
			}
261 262
			mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
			if (!mp->m_rtname)
D
Dave Chinner 已提交
263
				return -ENOMEM;
264 265
		} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE) ||
			   !strcmp(this_char, MNTOPT_BIOSIZE)) {
266
			if (!value || !*value) {
267
				xfs_warn(mp, "%s option requires an argument",
268
					this_char);
D
Dave Chinner 已提交
269
				return -EINVAL;
270
			}
271
			if (suffix_kstrtoint(value, 10, &iosize))
D
Dave Chinner 已提交
272
				return -EINVAL;
273
			iosizelog = ffs(iosize) - 1;
274 275 276 277 278 279 280
		} else if (!strcmp(this_char, MNTOPT_GRPID) ||
			   !strcmp(this_char, MNTOPT_BSDGROUPS)) {
			mp->m_flags |= XFS_MOUNT_GRPID;
		} else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
			   !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
			mp->m_flags &= ~XFS_MOUNT_GRPID;
		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
281
			mp->m_flags |= XFS_MOUNT_WSYNC;
282
		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
283
			mp->m_flags |= XFS_MOUNT_NORECOVERY;
284
		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
285
			mp->m_flags |= XFS_MOUNT_NOALIGN;
286
		} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
287
			mp->m_flags |= XFS_MOUNT_SWALLOC;
288 289
		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
			if (!value || !*value) {
290
				xfs_warn(mp, "%s option requires an argument",
291
					this_char);
D
Dave Chinner 已提交
292
				return -EINVAL;
293
			}
294
			if (kstrtoint(value, 10, &dsunit))
D
Dave Chinner 已提交
295
				return -EINVAL;
296 297
		} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
			if (!value || !*value) {
298
				xfs_warn(mp, "%s option requires an argument",
299
					this_char);
D
Dave Chinner 已提交
300
				return -EINVAL;
301
			}
302
			if (kstrtoint(value, 10, &dswidth))
D
Dave Chinner 已提交
303
				return -EINVAL;
304 305
		} else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
306
		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
307
			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
308
		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
309
			mp->m_flags |= XFS_MOUNT_NOUUID;
310
		} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
311
			mp->m_flags |= XFS_MOUNT_BARRIER;
312
		} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
313
			mp->m_flags &= ~XFS_MOUNT_BARRIER;
314
		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
315
			mp->m_flags |= XFS_MOUNT_IKEEP;
316
		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
317
			mp->m_flags &= ~XFS_MOUNT_IKEEP;
318
		} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
319
			mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
320
		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
321
			mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
322
		} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
323
			mp->m_flags |= XFS_MOUNT_ATTR2;
324
		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
325 326
			mp->m_flags &= ~XFS_MOUNT_ATTR2;
			mp->m_flags |= XFS_MOUNT_NOATTR2;
327
		} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
328
			mp->m_flags |= XFS_MOUNT_FILESTREAMS;
329
		} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
330 331 332
			mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
			mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
			mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
333 334 335
		} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
			   !strcmp(this_char, MNTOPT_UQUOTA) ||
			   !strcmp(this_char, MNTOPT_USRQUOTA)) {
336 337
			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
					 XFS_UQUOTA_ENFD);
338 339
		} else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
			   !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
340 341
			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
			mp->m_qflags &= ~XFS_UQUOTA_ENFD;
342 343
		} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
			   !strcmp(this_char, MNTOPT_PRJQUOTA)) {
344
			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
345
					 XFS_PQUOTA_ENFD);
346
		} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
347
			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
348
			mp->m_qflags &= ~XFS_PQUOTA_ENFD;
349 350
		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {
351
			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
352
					 XFS_GQUOTA_ENFD);
353
		} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
354
			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
355
			mp->m_qflags &= ~XFS_GQUOTA_ENFD;
356 357 358 359
		} else if (!strcmp(this_char, MNTOPT_DISCARD)) {
			mp->m_flags |= XFS_MOUNT_DISCARD;
		} else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
			mp->m_flags &= ~XFS_MOUNT_DISCARD;
D
Dave Chinner 已提交
360 361 362 363
#ifdef CONFIG_FS_DAX
		} else if (!strcmp(this_char, MNTOPT_DAX)) {
			mp->m_flags |= XFS_MOUNT_DAX;
#endif
364
		} else {
365
			xfs_warn(mp, "unknown mount option [%s].", this_char);
D
Dave Chinner 已提交
366
			return -EINVAL;
367 368 369
		}
	}

370 371 372 373 374
	/*
	 * no recovery flag requires a read-only mount
	 */
	if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
	    !(mp->m_flags & XFS_MOUNT_RDONLY)) {
375
		xfs_warn(mp, "no-recovery mounts must be read-only.");
D
Dave Chinner 已提交
376
		return -EINVAL;
377 378
	}

379
	if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
380 381
		xfs_warn(mp,
	"sunit and swidth options incompatible with the noalign option");
D
Dave Chinner 已提交
382
		return -EINVAL;
383 384
	}

C
Christoph Hellwig 已提交
385 386
#ifndef CONFIG_XFS_QUOTA
	if (XFS_IS_QUOTA_RUNNING(mp)) {
387
		xfs_warn(mp, "quota support not available in this kernel.");
D
Dave Chinner 已提交
388
		return -EINVAL;
C
Christoph Hellwig 已提交
389 390 391
	}
#endif

392
	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
393
		xfs_warn(mp, "sunit and swidth must be specified together");
D
Dave Chinner 已提交
394
		return -EINVAL;
395 396 397
	}

	if (dsunit && (dswidth % dsunit != 0)) {
398 399
		xfs_warn(mp,
	"stripe width (%d) must be a multiple of the stripe unit (%d)",
400
			dswidth, dsunit);
D
Dave Chinner 已提交
401
		return -EINVAL;
402 403
	}

404
done:
J
Jie Liu 已提交
405
	if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) {
406 407 408 409 410 411
		/*
		 * At this point the superblock has not been read
		 * in, therefore we do not know the block size.
		 * Before the mount call ends we will convert
		 * these to FSBs.
		 */
J
Jie Liu 已提交
412 413
		mp->m_dalign = dsunit;
		mp->m_swidth = dswidth;
414 415 416 417 418 419
	}

	if (mp->m_logbufs != -1 &&
	    mp->m_logbufs != 0 &&
	    (mp->m_logbufs < XLOG_MIN_ICLOGS ||
	     mp->m_logbufs > XLOG_MAX_ICLOGS)) {
420
		xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
421
			mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
D
Dave Chinner 已提交
422
		return -EINVAL;
423 424 425 426 427 428
	}
	if (mp->m_logbsize != -1 &&
	    mp->m_logbsize !=  0 &&
	    (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
	     mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
	     !is_power_of_2(mp->m_logbsize))) {
429 430
		xfs_warn(mp,
			"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
431
			mp->m_logbsize);
D
Dave Chinner 已提交
432
		return -EINVAL;
433 434 435 436 437
	}

	if (iosizelog) {
		if (iosizelog > XFS_MAX_IO_LOG ||
		    iosizelog < XFS_MIN_IO_LOG) {
438
			xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
439 440
				iosizelog, XFS_MIN_IO_LOG,
				XFS_MAX_IO_LOG);
D
Dave Chinner 已提交
441
			return -EINVAL;
442 443 444 445 446
		}

		mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
		mp->m_readio_log = iosizelog;
		mp->m_writeio_log = iosizelog;
447 448 449 450 451 452
	}

	return 0;
}

struct proc_xfs_info {
D
Dave Chinner 已提交
453 454
	uint64_t	flag;
	char		*str;
455 456 457 458 459 460 461 462 463
};

STATIC int
xfs_showargs(
	struct xfs_mount	*mp,
	struct seq_file		*m)
{
	static struct proc_xfs_info xfs_info_set[] = {
		/* the few simple ones we can get from the mount struct */
464
		{ XFS_MOUNT_IKEEP,		"," MNTOPT_IKEEP },
465 466 467 468 469 470 471 472
		{ XFS_MOUNT_WSYNC,		"," MNTOPT_WSYNC },
		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
		{ XFS_MOUNT_SWALLOC,		"," MNTOPT_SWALLOC },
		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
		{ XFS_MOUNT_ATTR2,		"," MNTOPT_ATTR2 },
		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },
		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID },
473
		{ XFS_MOUNT_DISCARD,		"," MNTOPT_DISCARD },
474
		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_32BITINODE },
D
Dave Chinner 已提交
475
		{ XFS_MOUNT_DAX,		"," MNTOPT_DAX },
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
		{ 0, NULL }
	};
	static struct proc_xfs_info xfs_info_unset[] = {
		/* the few simple ones we can get from the mount struct */
		{ XFS_MOUNT_COMPAT_IOSIZE,	"," MNTOPT_LARGEIO },
		{ XFS_MOUNT_BARRIER,		"," MNTOPT_NOBARRIER },
		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_64BITINODE },
		{ 0, NULL }
	};
	struct proc_xfs_info	*xfs_infop;

	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
		if (mp->m_flags & xfs_infop->flag)
			seq_puts(m, xfs_infop->str);
	}
	for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
		if (!(mp->m_flags & xfs_infop->flag))
			seq_puts(m, xfs_infop->str);
	}

	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
				(int)(1 << mp->m_writeio_log) >> 10);

	if (mp->m_logbufs > 0)
		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
	if (mp->m_logbsize > 0)
		seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);

	if (mp->m_logname)
506
		seq_show_option(m, MNTOPT_LOGDEV, mp->m_logname);
507
	if (mp->m_rtname)
508
		seq_show_option(m, MNTOPT_RTDEV, mp->m_rtname);
509 510 511 512 513 514 515 516 517 518 519 520 521

	if (mp->m_dalign > 0)
		seq_printf(m, "," MNTOPT_SUNIT "=%d",
				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
	if (mp->m_swidth > 0)
		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));

	if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
		seq_puts(m, "," MNTOPT_USRQUOTA);
	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
		seq_puts(m, "," MNTOPT_UQUOTANOENF);

522
	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
523
		if (mp->m_qflags & XFS_PQUOTA_ENFD)
524 525 526
			seq_puts(m, "," MNTOPT_PRJQUOTA);
		else
			seq_puts(m, "," MNTOPT_PQUOTANOENF);
527 528
	}
	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
529
		if (mp->m_qflags & XFS_GQUOTA_ENFD)
530 531 532 533
			seq_puts(m, "," MNTOPT_GRPQUOTA);
		else
			seq_puts(m, "," MNTOPT_GQUOTANOENF);
	}
534 535 536 537 538 539

	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
		seq_puts(m, "," MNTOPT_NOQUOTA);

	return 0;
}
L
Linus Torvalds 已提交
540 541 542 543 544 545 546 547 548
__uint64_t
xfs_max_file_offset(
	unsigned int		blockshift)
{
	unsigned int		pagefactor = 1;
	unsigned int		bitshift = BITS_PER_LONG - 1;

	/* Figure out maximum filesize, on Linux this can depend on
	 * the filesystem blocksize (on 32 bit platforms).
C
Christoph Hellwig 已提交
549
	 * __block_write_begin does this in an [unsigned] long...
L
Linus Torvalds 已提交
550 551 552 553 554 555 556 557 558 559 560 561
	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
	 * So, for page sized blocks (4K on 32 bit platforms),
	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
	 *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
	 * but for smaller blocksizes it is less (bbits = log2 bsize).
	 * Note1: get_block_t takes a long (implicit cast from above)
	 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
	 * can optionally convert the [unsigned] long from above into
	 * an [unsigned] long long.
	 */

#if BITS_PER_LONG == 32
562
# if defined(CONFIG_LBDAF)
L
Linus Torvalds 已提交
563 564 565 566 567 568 569 570 571 572 573
	ASSERT(sizeof(sector_t) == 8);
	pagefactor = PAGE_CACHE_SIZE;
	bitshift = BITS_PER_LONG;
# else
	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
# endif
#endif

	return (((__uint64_t)pagefactor) << bitshift) - 1;
}

574 575 576 577 578
/*
 * xfs_set_inode32() and xfs_set_inode64() are passed an agcount
 * because in the growfs case, mp->m_sb.sb_agcount is not updated
 * yet to the potentially higher ag count.
 */
579
xfs_agnumber_t
580
xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
581 582
{
	xfs_agnumber_t	index = 0;
583
	xfs_agnumber_t	maxagi = 0;
584 585
	xfs_sb_t	*sbp = &mp->m_sb;
	xfs_agnumber_t	max_metadata;
E
Eric Sandeen 已提交
586 587
	xfs_agino_t	agino;
	xfs_ino_t	ino;
588 589 590 591 592 593 594 595 596 597 598 599 600 601
	xfs_perag_t	*pag;

	/* Calculate how much should be reserved for inodes to meet
	 * the max inode percentage.
	 */
	if (mp->m_maxicount) {
		__uint64_t	icount;

		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
		do_div(icount, 100);
		icount += sbp->sb_agblocks - 1;
		do_div(icount, sbp->sb_agblocks);
		max_metadata = icount;
	} else {
602
		max_metadata = agcount;
603 604
	}

E
Eric Sandeen 已提交
605 606
	agino =	XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);

607
	for (index = 0; index < agcount; index++) {
608
		ino = XFS_AGINO_TO_INO(mp, index, agino);
609

610
		if (ino > XFS_MAXINUMBER_32) {
611 612 613 614 615
			pag = xfs_perag_get(mp, index);
			pag->pagi_inodeok = 0;
			pag->pagf_metadata = 0;
			xfs_perag_put(pag);
			continue;
616 617 618 619
		}

		pag = xfs_perag_get(mp, index);
		pag->pagi_inodeok = 1;
620
		maxagi++;
621 622 623 624
		if (index < max_metadata)
			pag->pagf_metadata = 1;
		xfs_perag_put(pag);
	}
625 626 627 628
	mp->m_flags |= (XFS_MOUNT_32BITINODES |
			XFS_MOUNT_SMALL_INUMS);

	return maxagi;
629 630 631
}

xfs_agnumber_t
632
xfs_set_inode64(struct xfs_mount *mp, xfs_agnumber_t agcount)
633 634 635
{
	xfs_agnumber_t index = 0;

636
	for (index = 0; index < agcount; index++) {
637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
		struct xfs_perag	*pag;

		pag = xfs_perag_get(mp, index);
		pag->pagi_inodeok = 1;
		pag->pagf_metadata = 0;
		xfs_perag_put(pag);
	}

	/* There is no need for lock protection on m_flags,
	 * the rw_semaphore of the VFS superblock is locked
	 * during mount/umount/remount operations, so this is
	 * enough to avoid concurency on the m_flags field
	 */
	mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
			 XFS_MOUNT_SMALL_INUMS);
	return index;
}

H
Hannes Eder 已提交
655
STATIC int
L
Linus Torvalds 已提交
656 657 658 659 660 661 662
xfs_blkdev_get(
	xfs_mount_t		*mp,
	const char		*name,
	struct block_device	**bdevp)
{
	int			error = 0;

663 664
	*bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
				    mp);
L
Linus Torvalds 已提交
665 666
	if (IS_ERR(*bdevp)) {
		error = PTR_ERR(*bdevp);
667
		xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
L
Linus Torvalds 已提交
668 669
	}

D
Dave Chinner 已提交
670
	return error;
L
Linus Torvalds 已提交
671 672
}

H
Hannes Eder 已提交
673
STATIC void
L
Linus Torvalds 已提交
674 675 676 677
xfs_blkdev_put(
	struct block_device	*bdev)
{
	if (bdev)
678
		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
L
Linus Torvalds 已提交
679 680
}

681 682 683 684
void
xfs_blkdev_issue_flush(
	xfs_buftarg_t		*buftarg)
{
685
	blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
686
}
L
Linus Torvalds 已提交
687

688 689 690 691 692
STATIC void
xfs_close_devices(
	struct xfs_mount	*mp)
{
	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
693
		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
694
		xfs_free_buftarg(mp, mp->m_logdev_targp);
695
		xfs_blkdev_put(logdev);
696 697
	}
	if (mp->m_rtdev_targp) {
698
		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
699
		xfs_free_buftarg(mp, mp->m_rtdev_targp);
700
		xfs_blkdev_put(rtdev);
701
	}
702
	xfs_free_buftarg(mp, mp->m_ddev_targp);
703 704 705 706 707 708 709 710 711 712 713 714 715 716
}

/*
 * The file system configurations are:
 *	(1) device (partition) with data and internal log
 *	(2) logical volume with data and log subvolumes.
 *	(3) logical volume with data, log, and realtime subvolumes.
 *
 * We only have to handle opening the log and realtime volumes here if
 * they are present.  The data subvolume has already been opened by
 * get_sb_bdev() and is stored in sb->s_bdev.
 */
STATIC int
xfs_open_devices(
717
	struct xfs_mount	*mp)
718 719 720 721 722 723 724 725
{
	struct block_device	*ddev = mp->m_super->s_bdev;
	struct block_device	*logdev = NULL, *rtdev = NULL;
	int			error;

	/*
	 * Open real time and log devices - order is important.
	 */
726 727
	if (mp->m_logname) {
		error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
728 729 730 731
		if (error)
			goto out;
	}

732 733
	if (mp->m_rtname) {
		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
734 735 736 737
		if (error)
			goto out_close_logdev;

		if (rtdev == ddev || rtdev == logdev) {
738 739
			xfs_warn(mp,
	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
D
Dave Chinner 已提交
740
			error = -EINVAL;
741 742 743 744 745 746 747
			goto out_close_rtdev;
		}
	}

	/*
	 * Setup xfs_mount buffer target pointers
	 */
D
Dave Chinner 已提交
748
	error = -ENOMEM;
749
	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
750 751 752 753
	if (!mp->m_ddev_targp)
		goto out_close_rtdev;

	if (rtdev) {
754
		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
755 756 757 758 759
		if (!mp->m_rtdev_targp)
			goto out_free_ddev_targ;
	}

	if (logdev && logdev != ddev) {
760
		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
761 762 763 764 765 766 767 768 769 770
		if (!mp->m_logdev_targp)
			goto out_free_rtdev_targ;
	} else {
		mp->m_logdev_targp = mp->m_ddev_targp;
	}

	return 0;

 out_free_rtdev_targ:
	if (mp->m_rtdev_targp)
771
		xfs_free_buftarg(mp, mp->m_rtdev_targp);
772
 out_free_ddev_targ:
773
	xfs_free_buftarg(mp, mp->m_ddev_targp);
774
 out_close_rtdev:
775
	xfs_blkdev_put(rtdev);
776 777 778 779 780 781 782
 out_close_logdev:
	if (logdev && logdev != ddev)
		xfs_blkdev_put(logdev);
 out:
	return error;
}

783 784 785 786 787 788 789 790
/*
 * Setup xfs_mount buffer target pointers based on superblock
 */
STATIC int
xfs_setup_devices(
	struct xfs_mount	*mp)
{
	int			error;
791

792
	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814
	if (error)
		return error;

	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
		unsigned int	log_sector_size = BBSIZE;

		if (xfs_sb_version_hassector(&mp->m_sb))
			log_sector_size = mp->m_sb.sb_logsectsize;
		error = xfs_setsize_buftarg(mp->m_logdev_targp,
					    log_sector_size);
		if (error)
			return error;
	}
	if (mp->m_rtdev_targp) {
		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
					    mp->m_sb.sb_sectsize);
		if (error)
			return error;
	}

	return 0;
}
815

816 817 818 819
STATIC int
xfs_init_mount_workqueues(
	struct xfs_mount	*mp)
{
820
	mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
821
			WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname);
822 823 824
	if (!mp->m_buf_workqueue)
		goto out;

825
	mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
826
			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
827
	if (!mp->m_data_workqueue)
828
		goto out_destroy_buf;
829 830

	mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
831
			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
832 833 834
	if (!mp->m_unwritten_workqueue)
		goto out_destroy_data_iodone_queue;

835
	mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
836
			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
837 838
	if (!mp->m_cil_workqueue)
		goto out_destroy_unwritten;
D
Dave Chinner 已提交
839 840

	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
841
			WQ_FREEZABLE, 0, mp->m_fsname);
D
Dave Chinner 已提交
842 843 844 845
	if (!mp->m_reclaim_workqueue)
		goto out_destroy_cil;

	mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
846
			WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname);
D
Dave Chinner 已提交
847 848 849
	if (!mp->m_log_workqueue)
		goto out_destroy_reclaim;

850
	mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
851
			WQ_FREEZABLE, 0, mp->m_fsname);
852 853 854
	if (!mp->m_eofblocks_workqueue)
		goto out_destroy_log;

855 856
	return 0;

857 858
out_destroy_log:
	destroy_workqueue(mp->m_log_workqueue);
D
Dave Chinner 已提交
859 860 861 862
out_destroy_reclaim:
	destroy_workqueue(mp->m_reclaim_workqueue);
out_destroy_cil:
	destroy_workqueue(mp->m_cil_workqueue);
863 864
out_destroy_unwritten:
	destroy_workqueue(mp->m_unwritten_workqueue);
865 866
out_destroy_data_iodone_queue:
	destroy_workqueue(mp->m_data_workqueue);
867 868
out_destroy_buf:
	destroy_workqueue(mp->m_buf_workqueue);
869 870 871 872 873 874 875 876
out:
	return -ENOMEM;
}

STATIC void
xfs_destroy_mount_workqueues(
	struct xfs_mount	*mp)
{
877
	destroy_workqueue(mp->m_eofblocks_workqueue);
D
Dave Chinner 已提交
878 879
	destroy_workqueue(mp->m_log_workqueue);
	destroy_workqueue(mp->m_reclaim_workqueue);
880
	destroy_workqueue(mp->m_cil_workqueue);
881 882
	destroy_workqueue(mp->m_data_workqueue);
	destroy_workqueue(mp->m_unwritten_workqueue);
883
	destroy_workqueue(mp->m_buf_workqueue);
884 885
}

D
Dave Chinner 已提交
886 887 888 889 890 891 892 893 894 895 896 897 898
/*
 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
 * for IO to complete so that we effectively throttle multiple callers to the
 * rate at which IO is completing.
 */
void
xfs_flush_inodes(
	struct xfs_mount	*mp)
{
	struct super_block	*sb = mp->m_super;

	if (down_read_trylock(&sb->s_umount)) {
899
		sync_inodes_sb(sb);
D
Dave Chinner 已提交
900 901 902 903
		up_read(&sb->s_umount);
	}
}

904
/* Catch misguided souls that try to use this interface on XFS */
L
Linus Torvalds 已提交
905
STATIC struct inode *
906
xfs_fs_alloc_inode(
L
Linus Torvalds 已提交
907 908
	struct super_block	*sb)
{
909
	BUG();
910
	return NULL;
L
Linus Torvalds 已提交
911 912
}

913
/*
914 915
 * Now that the generic code is guaranteed not to be accessing
 * the linux inode, we can reclaim the inode.
916
 */
L
Linus Torvalds 已提交
917
STATIC void
918
xfs_fs_destroy_inode(
C
Christoph Hellwig 已提交
919
	struct inode		*inode)
L
Linus Torvalds 已提交
920
{
C
Christoph Hellwig 已提交
921 922
	struct xfs_inode	*ip = XFS_I(inode);

C
Christoph Hellwig 已提交
923
	trace_xfs_destroy_inode(ip);
924 925

	XFS_STATS_INC(vn_reclaim);
C
Christoph Hellwig 已提交
926 927 928 929 930 931 932 933 934 935

	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);

	/*
	 * We should never get here with one of the reclaim flags already set.
	 */
	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));

	/*
936 937 938 939 940
	 * We always use background reclaim here because even if the
	 * inode is clean, it still may be under IO and hence we have
	 * to take the flush lock. The background reclaim path handles
	 * this more efficiently than we can here, so simply let background
	 * reclaim tear down all inodes.
C
Christoph Hellwig 已提交
941
	 */
942
	xfs_inode_set_reclaim_tag(ip);
L
Linus Torvalds 已提交
943 944
}

945 946 947 948
/*
 * Slab object creation initialisation for the XFS inode.
 * This covers only the idempotent fields in the XFS inode;
 * all other fields need to be initialised on allocation
949
 * from the slab. This avoids the need to repeatedly initialise
950 951 952
 * fields in the xfs inode that left in the initialise state
 * when freeing the inode.
 */
953 954
STATIC void
xfs_fs_inode_init_once(
955 956 957 958 959
	void			*inode)
{
	struct xfs_inode	*ip = inode;

	memset(ip, 0, sizeof(struct xfs_inode));
960 961 962 963 964

	/* vfs inode */
	inode_init_once(VFS_I(ip));

	/* xfs inode */
965 966 967
	atomic_set(&ip->i_pincount, 0);
	spin_lock_init(&ip->i_flags_lock);

D
Dave Chinner 已提交
968 969
	mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
		     "xfsino", ip->i_ino);
970 971 972 973
	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
		     "xfsino", ip->i_ino);
}

L
Linus Torvalds 已提交
974
STATIC void
975
xfs_fs_evict_inode(
L
Linus Torvalds 已提交
976 977
	struct inode		*inode)
{
978
	xfs_inode_t		*ip = XFS_I(inode);
979

980 981
	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));

982
	trace_xfs_evict_inode(ip);
C
Christoph Hellwig 已提交
983

984
	truncate_inode_pages_final(&inode->i_data);
985
	clear_inode(inode);
986 987 988 989
	XFS_STATS_INC(vn_rele);
	XFS_STATS_INC(vn_remove);

	xfs_inactive(ip);
990
}
L
Linus Torvalds 已提交
991

992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007
/*
 * We do an unlocked check for XFS_IDONTCACHE here because we are already
 * serialised against cache hits here via the inode->i_lock and igrab() in
 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
 * racing with us, and it avoids needing to grab a spinlock here for every inode
 * we drop the final reference on.
 */
STATIC int
xfs_fs_drop_inode(
	struct inode		*inode)
{
	struct xfs_inode	*ip = XFS_I(inode);

	return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
}

1008 1009 1010 1011 1012 1013 1014 1015 1016
STATIC void
xfs_free_fsname(
	struct xfs_mount	*mp)
{
	kfree(mp->m_fsname);
	kfree(mp->m_rtname);
	kfree(mp->m_logname);
}

L
Linus Torvalds 已提交
1017
STATIC int
C
Christoph Hellwig 已提交
1018
xfs_fs_sync_fs(
L
Linus Torvalds 已提交
1019 1020 1021
	struct super_block	*sb,
	int			wait)
{
1022
	struct xfs_mount	*mp = XFS_M(sb);
L
Linus Torvalds 已提交
1023

1024
	/*
C
Christoph Hellwig 已提交
1025
	 * Doing anything during the async pass would be counterproductive.
1026
	 */
C
Christoph Hellwig 已提交
1027
	if (!wait)
C
Christoph Hellwig 已提交
1028 1029
		return 0;

D
Dave Chinner 已提交
1030
	xfs_log_force(mp, XFS_LOG_SYNC);
C
Christoph Hellwig 已提交
1031
	if (laptop_mode) {
L
Linus Torvalds 已提交
1032 1033
		/*
		 * The disk must be active because we're syncing.
1034
		 * We schedule log work now (now that the disk is
L
Linus Torvalds 已提交
1035 1036
		 * active) instead of later (when it might not be).
		 */
1037
		flush_delayed_work(&mp->m_log->l_work);
L
Linus Torvalds 已提交
1038 1039
	}

C
Christoph Hellwig 已提交
1040
	return 0;
L
Linus Torvalds 已提交
1041 1042 1043
}

STATIC int
1044
xfs_fs_statfs(
1045
	struct dentry		*dentry,
L
Linus Torvalds 已提交
1046 1047
	struct kstatfs		*statp)
{
C
Christoph Hellwig 已提交
1048 1049
	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
	xfs_sb_t		*sbp = &mp->m_sb;
1050
	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
C
Christoph Hellwig 已提交
1051
	__uint64_t		fakeinos, id;
1052
	__uint64_t		icount;
1053
	__uint64_t		ifree;
1054
	__uint64_t		fdblocks;
C
Christoph Hellwig 已提交
1055
	xfs_extlen_t		lsize;
1056
	__int64_t		ffree;
C
Christoph Hellwig 已提交
1057 1058 1059 1060 1061 1062 1063 1064

	statp->f_type = XFS_SB_MAGIC;
	statp->f_namelen = MAXNAMELEN - 1;

	id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
	statp->f_fsid.val[0] = (u32)id;
	statp->f_fsid.val[1] = (u32)(id >> 32);

1065
	icount = percpu_counter_sum(&mp->m_icount);
1066
	ifree = percpu_counter_sum(&mp->m_ifree);
1067
	fdblocks = percpu_counter_sum(&mp->m_fdblocks);
C
Christoph Hellwig 已提交
1068 1069 1070 1071 1072

	spin_lock(&mp->m_sb_lock);
	statp->f_bsize = sbp->sb_blocksize;
	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
	statp->f_blocks = sbp->sb_dblocks - lsize;
1073 1074 1075 1076 1077
	spin_unlock(&mp->m_sb_lock);

	statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
	statp->f_bavail = statp->f_bfree;

C
Christoph Hellwig 已提交
1078
	fakeinos = statp->f_bfree << sbp->sb_inopblog;
1079
	statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
C
Christoph Hellwig 已提交
1080
	if (mp->m_maxicount)
C
Christoph Hellwig 已提交
1081 1082 1083
		statp->f_files = min_t(typeof(statp->f_files),
					statp->f_files,
					mp->m_maxicount);
1084

1085 1086 1087 1088 1089
	/* If sb_icount overshot maxicount, report actual allocation */
	statp->f_files = max_t(typeof(statp->f_files),
					statp->f_files,
					sbp->sb_icount);

1090
	/* make sure statp->f_ffree does not underflow */
1091
	ffree = statp->f_files - (icount - ifree);
1092 1093
	statp->f_ffree = max_t(__int64_t, ffree, 0);

C
Christoph Hellwig 已提交
1094

1095
	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1096 1097
	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
			      (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
C
Christoph Hellwig 已提交
1098
		xfs_qm_statvfs(ip, statp);
C
Christoph Hellwig 已提交
1099
	return 0;
L
Linus Torvalds 已提交
1100 1101
}

E
Eric Sandeen 已提交
1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124
STATIC void
xfs_save_resvblks(struct xfs_mount *mp)
{
	__uint64_t resblks = 0;

	mp->m_resblks_save = mp->m_resblks;
	xfs_reserve_blocks(mp, &resblks, NULL);
}

STATIC void
xfs_restore_resvblks(struct xfs_mount *mp)
{
	__uint64_t resblks;

	if (mp->m_resblks_save) {
		resblks = mp->m_resblks_save;
		mp->m_resblks_save = 0;
	} else
		resblks = xfs_default_resblks(mp);

	xfs_reserve_blocks(mp, &resblks, NULL);
}

1125 1126 1127 1128 1129
/*
 * Trigger writeback of all the dirty metadata in the file system.
 *
 * This ensures that the metadata is written to their location on disk rather
 * than just existing in transactions in the log. This means after a quiesce
1130 1131
 * there is no log replay required to write the inodes to disk - this is the
 * primary difference between a sync and a quiesce.
1132
 *
1133 1134
 * Note: xfs_log_quiesce() stops background log work - the callers must ensure
 * it is started again when appropriate.
1135
 */
1136
static void
1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158
xfs_quiesce_attr(
	struct xfs_mount	*mp)
{
	int	error = 0;

	/* wait for all modifications to complete */
	while (atomic_read(&mp->m_active_trans) > 0)
		delay(100);

	/* force the log to unpin objects from the now complete transactions */
	xfs_log_force(mp, XFS_LOG_SYNC);

	/* reclaim inodes to do any IO before the freeze completes */
	xfs_reclaim_inodes(mp, 0);
	xfs_reclaim_inodes(mp, SYNC_WAIT);

	/* Push the superblock and write an unmount record */
	error = xfs_log_sbcount(mp);
	if (error)
		xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
				"Frozen image may not be consistent.");
	/*
1159 1160
	 * Just warn here till VFS can correctly support
	 * read-only remount without racing.
1161
	 */
1162
	WARN_ON(atomic_read(&mp->m_active_trans) != 0);
1163

1164
	xfs_log_quiesce(mp);
1165 1166
}

L
Linus Torvalds 已提交
1167
STATIC int
1168
xfs_fs_remount(
L
Linus Torvalds 已提交
1169 1170 1171 1172
	struct super_block	*sb,
	int			*flags,
	char			*options)
{
1173
	struct xfs_mount	*mp = XFS_M(sb);
1174
	xfs_sb_t		*sbp = &mp->m_sb;
1175 1176
	substring_t		args[MAX_OPT_ARGS];
	char			*p;
1177
	int			error;
L
Linus Torvalds 已提交
1178

1179
	sync_filesystem(sb);
1180 1181
	while ((p = strsep(&options, ",")) != NULL) {
		int token;
1182

1183 1184
		if (!*p)
			continue;
1185

1186 1187 1188
		token = match_token(p, tokens, args);
		switch (token) {
		case Opt_barrier:
1189
			mp->m_flags |= XFS_MOUNT_BARRIER;
1190 1191
			break;
		case Opt_nobarrier:
1192
			mp->m_flags &= ~XFS_MOUNT_BARRIER;
1193
			break;
1194
		case Opt_inode64:
1195
			mp->m_maxagi = xfs_set_inode64(mp, sbp->sb_agcount);
1196
			break;
1197
		case Opt_inode32:
1198
			mp->m_maxagi = xfs_set_inode32(mp, sbp->sb_agcount);
1199
			break;
1200
		default:
1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217
			/*
			 * Logically we would return an error here to prevent
			 * users from believing they might have changed
			 * mount options using remount which can't be changed.
			 *
			 * But unfortunately mount(8) adds all options from
			 * mtab and fstab to the mount arguments in some cases
			 * so we can't blindly reject options, but have to
			 * check for each specified option if it actually
			 * differs from the currently set option and only
			 * reject it if that's the case.
			 *
			 * Until that is implemented we return success for
			 * every remount request, and silently ignore all
			 * options that we can't actually change.
			 */
#if 0
1218
			xfs_info(mp,
1219
		"mount option \"%s\" not supported for remount", p);
1220
			return -EINVAL;
1221
#else
1222
			break;
1223
#endif
1224
		}
1225 1226
	}

1227
	/* ro -> rw */
1228
	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
1229 1230 1231 1232 1233 1234
		if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
			xfs_warn(mp,
		"ro->rw transition prohibited on norecovery mount");
			return -EINVAL;
		}

1235
		mp->m_flags &= ~XFS_MOUNT_RDONLY;
1236 1237 1238 1239 1240

		/*
		 * If this is the first remount to writeable state we
		 * might have some superblock changes to update.
		 */
1241 1242
		if (mp->m_update_sb) {
			error = xfs_sync_sb(mp, false);
1243
			if (error) {
1244
				xfs_warn(mp, "failed to write sb changes");
1245 1246
				return error;
			}
1247
			mp->m_update_sb = false;
1248
		}
1249 1250 1251 1252 1253

		/*
		 * Fill out the reserve pool if it is empty. Use the stashed
		 * value if it is non-zero, otherwise go with the default.
		 */
E
Eric Sandeen 已提交
1254
		xfs_restore_resvblks(mp);
1255
		xfs_log_work_queue(mp);
1256 1257 1258 1259
	}

	/* rw -> ro */
	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1260
		/*
D
Dave Chinner 已提交
1261 1262 1263 1264 1265
		 * Before we sync the metadata, we need to free up the reserve
		 * block pool so that the used block count in the superblock on
		 * disk is correct at the end of the remount. Stash the current
		 * reserve pool size so that if we get remounted rw, we can
		 * return it to the same size.
1266
		 */
E
Eric Sandeen 已提交
1267
		xfs_save_resvblks(mp);
D
David Chinner 已提交
1268
		xfs_quiesce_attr(mp);
1269 1270 1271
		mp->m_flags |= XFS_MOUNT_RDONLY;
	}

1272
	return 0;
L
Linus Torvalds 已提交
1273 1274
}

C
Christoph Hellwig 已提交
1275 1276
/*
 * Second stage of a freeze. The data is already frozen so we only
1277 1278 1279
 * need to take care of the metadata. Once that's done sync the superblock
 * to the log to dirty it in case of a crash while frozen. This ensures that we
 * will recover the unlinked inode lists on the next mount.
C
Christoph Hellwig 已提交
1280
 */
1281 1282
STATIC int
xfs_fs_freeze(
L
Linus Torvalds 已提交
1283 1284
	struct super_block	*sb)
{
C
Christoph Hellwig 已提交
1285 1286
	struct xfs_mount	*mp = XFS_M(sb);

E
Eric Sandeen 已提交
1287
	xfs_save_resvblks(mp);
D
David Chinner 已提交
1288
	xfs_quiesce_attr(mp);
1289
	return xfs_sync_sb(mp, true);
L
Linus Torvalds 已提交
1290 1291
}

E
Eric Sandeen 已提交
1292 1293 1294 1295 1296 1297 1298
STATIC int
xfs_fs_unfreeze(
	struct super_block	*sb)
{
	struct xfs_mount	*mp = XFS_M(sb);

	xfs_restore_resvblks(mp);
1299
	xfs_log_work_queue(mp);
E
Eric Sandeen 已提交
1300 1301 1302
	return 0;
}

L
Linus Torvalds 已提交
1303
STATIC int
1304
xfs_fs_show_options(
L
Linus Torvalds 已提交
1305
	struct seq_file		*m,
1306
	struct dentry		*root)
L
Linus Torvalds 已提交
1307
{
D
Dave Chinner 已提交
1308
	return xfs_showargs(XFS_M(root->d_sb), m);
L
Linus Torvalds 已提交
1309 1310
}

1311 1312 1313 1314 1315 1316 1317 1318 1319 1320
/*
 * This function fills in xfs_mount_t fields based on mount args.
 * Note: the superblock _has_ now been read in.
 */
STATIC int
xfs_finish_flags(
	struct xfs_mount	*mp)
{
	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);

1321
	/* Fail a mount where the logbuf is smaller than the log stripe */
1322
	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1323 1324
		if (mp->m_logbsize <= 0 &&
		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1325
			mp->m_logbsize = mp->m_sb.sb_logsunit;
1326 1327
		} else if (mp->m_logbsize > 0 &&
			   mp->m_logbsize < mp->m_sb.sb_logsunit) {
1328 1329
			xfs_warn(mp,
		"logbuf size must be greater than or equal to log stripe size");
D
Dave Chinner 已提交
1330
			return -EINVAL;
1331 1332 1333
		}
	} else {
		/* Fail a mount if the logbuf is larger than 32K */
1334
		if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1335 1336
			xfs_warn(mp,
		"logbuf size for version 1 logs must be 16K or 32K");
D
Dave Chinner 已提交
1337
			return -EINVAL;
1338 1339 1340
		}
	}

1341 1342 1343 1344 1345 1346 1347 1348
	/*
	 * V5 filesystems always use attr2 format for attributes.
	 */
	if (xfs_sb_version_hascrc(&mp->m_sb) &&
	    (mp->m_flags & XFS_MOUNT_NOATTR2)) {
		xfs_warn(mp,
"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
			MNTOPT_NOATTR2, MNTOPT_ATTR2);
D
Dave Chinner 已提交
1349
		return -EINVAL;
1350 1351
	}

1352 1353 1354 1355 1356
	/*
	 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
	 * told by noattr2 to turn it off
	 */
	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1357
	    !(mp->m_flags & XFS_MOUNT_NOATTR2))
1358 1359 1360 1361 1362 1363
		mp->m_flags |= XFS_MOUNT_ATTR2;

	/*
	 * prohibit r/w mounts of read-only filesystems
	 */
	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1364 1365
		xfs_warn(mp,
			"cannot mount a read-only filesystem as read-write");
D
Dave Chinner 已提交
1366
		return -EROFS;
1367 1368
	}

1369 1370 1371 1372 1373
	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
	    !xfs_sb_version_has_pquotino(&mp->m_sb)) {
		xfs_warn(mp,
		  "Super block does not support project and group quota together");
D
Dave Chinner 已提交
1374
		return -EINVAL;
1375 1376
	}

1377 1378 1379
	return 0;
}

D
Dave Chinner 已提交
1380 1381 1382 1383 1384 1385 1386 1387
static int
xfs_init_percpu_counters(
	struct xfs_mount	*mp)
{
	int		error;

	error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
	if (error)
1388
		return -ENOMEM;
D
Dave Chinner 已提交
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424

	error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
	if (error)
		goto free_icount;

	error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
	if (error)
		goto free_ifree;

	return 0;

free_ifree:
	percpu_counter_destroy(&mp->m_ifree);
free_icount:
	percpu_counter_destroy(&mp->m_icount);
	return -ENOMEM;
}

void
xfs_reinit_percpu_counters(
	struct xfs_mount	*mp)
{
	percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
	percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
	percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
}

static void
xfs_destroy_percpu_counters(
	struct xfs_mount	*mp)
{
	percpu_counter_destroy(&mp->m_icount);
	percpu_counter_destroy(&mp->m_ifree);
	percpu_counter_destroy(&mp->m_fdblocks);
}

L
Linus Torvalds 已提交
1425
STATIC int
1426
xfs_fs_fill_super(
L
Linus Torvalds 已提交
1427 1428 1429 1430
	struct super_block	*sb,
	void			*data,
	int			silent)
{
1431
	struct inode		*root;
1432
	struct xfs_mount	*mp = NULL;
D
Dave Chinner 已提交
1433
	int			flags = 0, error = -ENOMEM;
1434

C
Christoph Hellwig 已提交
1435 1436
	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
	if (!mp)
1437
		goto out;
L
Linus Torvalds 已提交
1438

C
Christoph Hellwig 已提交
1439 1440 1441
	spin_lock_init(&mp->m_sb_lock);
	mutex_init(&mp->m_growlock);
	atomic_set(&mp->m_active_trans, 0);
D
Dave Chinner 已提交
1442
	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1443
	INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
D
Dave Chinner 已提交
1444
	mp->m_kobj.kobject.kset = xfs_kset;
1445

C
Christoph Hellwig 已提交
1446 1447
	mp->m_super = sb;
	sb->s_fs_info = mp;
L
Linus Torvalds 已提交
1448

C
Christoph Hellwig 已提交
1449
	error = xfs_parseargs(mp, (char *)data);
1450
	if (error)
1451
		goto out_free_fsname;
L
Linus Torvalds 已提交
1452 1453

	sb_min_blocksize(sb, BBSIZE);
1454
	sb->s_xattr = xfs_xattr_handlers;
1455
	sb->s_export_op = &xfs_export_operations;
1456
#ifdef CONFIG_XFS_QUOTA
1457
	sb->s_qcop = &xfs_quotactl_operations;
J
Jan Kara 已提交
1458
	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1459
#endif
1460
	sb->s_op = &xfs_super_operations;
L
Linus Torvalds 已提交
1461

1462
	if (silent)
1463 1464
		flags |= XFS_MFSI_QUIET;

1465
	error = xfs_open_devices(mp);
1466
	if (error)
C
Christoph Hellwig 已提交
1467
		goto out_free_fsname;
1468

D
Dave Chinner 已提交
1469
	error = xfs_init_mount_workqueues(mp);
1470 1471
	if (error)
		goto out_close_devices;
C
Christoph Hellwig 已提交
1472

D
Dave Chinner 已提交
1473
	error = xfs_init_percpu_counters(mp);
1474 1475 1476
	if (error)
		goto out_destroy_workqueues;

1477 1478 1479 1480 1481 1482 1483
	/* Allocate stats memory before we do operations that might use it */
	mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
	if (!mp->m_stats.xs_stats) {
		error = PTR_ERR(mp->m_stats.xs_stats);
		goto out_destroy_counters;
	}

1484 1485
	error = xfs_readsb(mp, flags);
	if (error)
1486
		goto out_free_stats;
1487 1488

	error = xfs_finish_flags(mp);
1489
	if (error)
1490
		goto out_free_sb;
1491

1492
	error = xfs_setup_devices(mp);
1493
	if (error)
1494
		goto out_free_sb;
1495 1496 1497

	error = xfs_filestream_mount(mp);
	if (error)
1498
		goto out_free_sb;
1499

1500 1501 1502 1503
	/*
	 * we must configure the block size in the superblock before we run the
	 * full mount process as the mount process can lookup and cache inodes.
	 */
C
Christoph Hellwig 已提交
1504 1505 1506
	sb->s_magic = XFS_SB_MAGIC;
	sb->s_blocksize = mp->m_sb.sb_blocksize;
	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
L
Linus Torvalds 已提交
1507
	sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1508
	sb->s_max_links = XFS_MAXLINK;
L
Linus Torvalds 已提交
1509 1510 1511
	sb->s_time_gran = 1;
	set_posix_acl_flag(sb);

D
Dave Chinner 已提交
1512 1513 1514 1515
	/* version 5 superblocks support inode version counters. */
	if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
		sb->s_flags |= MS_I_VERSION;

D
Dave Chinner 已提交
1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529
	if (mp->m_flags & XFS_MOUNT_DAX) {
		xfs_warn(mp,
	"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
		if (sb->s_blocksize != PAGE_SIZE) {
			xfs_alert(mp,
		"Filesystem block size invalid for DAX Turning DAX off.");
			mp->m_flags &= ~XFS_MOUNT_DAX;
		} else if (!sb->s_bdev->bd_disk->fops->direct_access) {
			xfs_alert(mp,
		"Block device does not support DAX Turning DAX off.");
			mp->m_flags &= ~XFS_MOUNT_DAX;
		}
	}

1530 1531 1532 1533
	if (xfs_sb_version_hassparseinodes(&mp->m_sb))
		xfs_alert(mp,
	"EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");

1534
	error = xfs_mountfs(mp);
1535
	if (error)
D
Dave Chinner 已提交
1536
		goto out_filestream_unmount;
1537

1538
	root = igrab(VFS_I(mp->m_rootip));
1539
	if (!root) {
D
Dave Chinner 已提交
1540
		error = -ENOENT;
1541
		goto out_unmount;
C
Christoph Hellwig 已提交
1542
	}
1543
	sb->s_root = d_make_root(root);
1544
	if (!sb->s_root) {
D
Dave Chinner 已提交
1545
		error = -ENOMEM;
1546
		goto out_unmount;
L
Linus Torvalds 已提交
1547
	}
1548

L
Linus Torvalds 已提交
1549
	return 0;
D
Dave Chinner 已提交
1550

D
Dave Chinner 已提交
1551
 out_filestream_unmount:
1552
	xfs_filestream_unmount(mp);
1553 1554
 out_free_sb:
	xfs_freesb(mp);
1555 1556
 out_free_stats:
	free_percpu(mp->m_stats.xs_stats);
1557
 out_destroy_counters:
D
Dave Chinner 已提交
1558
	xfs_destroy_percpu_counters(mp);
1559
 out_destroy_workqueues:
1560
	xfs_destroy_mount_workqueues(mp);
1561
 out_close_devices:
1562
	xfs_close_devices(mp);
1563 1564
 out_free_fsname:
	xfs_free_fsname(mp);
C
Christoph Hellwig 已提交
1565
	kfree(mp);
1566
 out:
D
Dave Chinner 已提交
1567
	return error;
1568

1569
 out_unmount:
1570
	xfs_filestream_unmount(mp);
1571
	xfs_unmountfs(mp);
1572
	goto out_free_sb;
L
Linus Torvalds 已提交
1573 1574
}

D
Dave Chinner 已提交
1575 1576 1577 1578 1579 1580
STATIC void
xfs_fs_put_super(
	struct super_block	*sb)
{
	struct xfs_mount	*mp = XFS_M(sb);

1581
	xfs_notice(mp, "Unmounting Filesystem");
D
Dave Chinner 已提交
1582 1583 1584 1585
	xfs_filestream_unmount(mp);
	xfs_unmountfs(mp);

	xfs_freesb(mp);
1586
	free_percpu(mp->m_stats.xs_stats);
D
Dave Chinner 已提交
1587 1588 1589 1590 1591 1592 1593
	xfs_destroy_percpu_counters(mp);
	xfs_destroy_mount_workqueues(mp);
	xfs_close_devices(mp);
	xfs_free_fsname(mp);
	kfree(mp);
}

A
Al Viro 已提交
1594 1595
STATIC struct dentry *
xfs_fs_mount(
L
Linus Torvalds 已提交
1596 1597 1598
	struct file_system_type	*fs_type,
	int			flags,
	const char		*dev_name,
A
Al Viro 已提交
1599
	void			*data)
L
Linus Torvalds 已提交
1600
{
A
Al Viro 已提交
1601
	return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1602 1603
}

1604
static long
1605
xfs_fs_nr_cached_objects(
1606
	struct super_block	*sb,
1607
	struct shrink_control	*sc)
1608 1609 1610 1611
{
	return xfs_reclaim_inodes_count(XFS_M(sb));
}

1612
static long
1613 1614
xfs_fs_free_cached_objects(
	struct super_block	*sb,
1615
	struct shrink_control	*sc)
1616
{
1617
	return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1618 1619
}

1620
static const struct super_operations xfs_super_operations = {
1621 1622
	.alloc_inode		= xfs_fs_alloc_inode,
	.destroy_inode		= xfs_fs_destroy_inode,
1623
	.evict_inode		= xfs_fs_evict_inode,
1624
	.drop_inode		= xfs_fs_drop_inode,
1625
	.put_super		= xfs_fs_put_super,
C
Christoph Hellwig 已提交
1626
	.sync_fs		= xfs_fs_sync_fs,
1627
	.freeze_fs		= xfs_fs_freeze,
E
Eric Sandeen 已提交
1628
	.unfreeze_fs		= xfs_fs_unfreeze,
1629 1630 1631
	.statfs			= xfs_fs_statfs,
	.remount_fs		= xfs_fs_remount,
	.show_options		= xfs_fs_show_options,
1632 1633
	.nr_cached_objects	= xfs_fs_nr_cached_objects,
	.free_cached_objects	= xfs_fs_free_cached_objects,
L
Linus Torvalds 已提交
1634 1635
};

A
Andrew Morton 已提交
1636
static struct file_system_type xfs_fs_type = {
L
Linus Torvalds 已提交
1637 1638
	.owner			= THIS_MODULE,
	.name			= "xfs",
A
Al Viro 已提交
1639
	.mount			= xfs_fs_mount,
L
Linus Torvalds 已提交
1640 1641 1642
	.kill_sb		= kill_block_super,
	.fs_flags		= FS_REQUIRES_DEV,
};
1643
MODULE_ALIAS_FS("xfs");
L
Linus Torvalds 已提交
1644

1645 1646 1647 1648 1649 1650
STATIC int __init
xfs_init_zones(void)
{

	xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
	if (!xfs_ioend_zone)
1651
		goto out;
1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666

	xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
						  xfs_ioend_zone);
	if (!xfs_ioend_pool)
		goto out_destroy_ioend_zone;

	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
						"xfs_log_ticket");
	if (!xfs_log_ticket_zone)
		goto out_destroy_ioend_pool;

	xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
						"xfs_bmap_free_item");
	if (!xfs_bmap_free_item_zone)
		goto out_destroy_log_ticket_zone;
1667

1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679
	xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
						"xfs_btree_cur");
	if (!xfs_btree_cur_zone)
		goto out_destroy_bmap_free_item_zone;

	xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
						"xfs_da_state");
	if (!xfs_da_state_zone)
		goto out_destroy_btree_cur_zone;

	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
	if (!xfs_ifork_zone)
1680
		goto out_destroy_da_state_zone;
1681 1682 1683 1684 1685

	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
	if (!xfs_trans_zone)
		goto out_destroy_ifork_zone;

1686 1687 1688 1689 1690 1691
	xfs_log_item_desc_zone =
		kmem_zone_init(sizeof(struct xfs_log_item_desc),
			       "xfs_log_item_desc");
	if (!xfs_log_item_desc_zone)
		goto out_destroy_trans_zone;

1692 1693 1694 1695 1696
	/*
	 * The size of the zone allocated buf log item is the maximum
	 * size possible under XFS.  This wastes a little bit of memory,
	 * but it is much faster.
	 */
1697 1698
	xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
					   "xfs_buf_item");
1699
	if (!xfs_buf_item_zone)
1700
		goto out_destroy_log_item_desc_zone;
1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715

	xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
			((XFS_EFD_MAX_FAST_EXTENTS - 1) *
				 sizeof(xfs_extent_t))), "xfs_efd_item");
	if (!xfs_efd_zone)
		goto out_destroy_buf_item_zone;

	xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
			((XFS_EFI_MAX_FAST_EXTENTS - 1) *
				sizeof(xfs_extent_t))), "xfs_efi_item");
	if (!xfs_efi_zone)
		goto out_destroy_efd_zone;

	xfs_inode_zone =
		kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
1716 1717
			KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
			xfs_fs_inode_init_once);
1718 1719 1720 1721 1722 1723 1724 1725
	if (!xfs_inode_zone)
		goto out_destroy_efi_zone;

	xfs_ili_zone =
		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
					KM_ZONE_SPREAD, NULL);
	if (!xfs_ili_zone)
		goto out_destroy_inode_zone;
D
Dave Chinner 已提交
1726 1727 1728 1729
	xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item),
					"xfs_icr");
	if (!xfs_icreate_zone)
		goto out_destroy_ili_zone;
1730 1731 1732

	return 0;

D
Dave Chinner 已提交
1733 1734
 out_destroy_ili_zone:
	kmem_zone_destroy(xfs_ili_zone);
1735 1736 1737 1738 1739 1740 1741 1742
 out_destroy_inode_zone:
	kmem_zone_destroy(xfs_inode_zone);
 out_destroy_efi_zone:
	kmem_zone_destroy(xfs_efi_zone);
 out_destroy_efd_zone:
	kmem_zone_destroy(xfs_efd_zone);
 out_destroy_buf_item_zone:
	kmem_zone_destroy(xfs_buf_item_zone);
1743 1744
 out_destroy_log_item_desc_zone:
	kmem_zone_destroy(xfs_log_item_desc_zone);
1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767
 out_destroy_trans_zone:
	kmem_zone_destroy(xfs_trans_zone);
 out_destroy_ifork_zone:
	kmem_zone_destroy(xfs_ifork_zone);
 out_destroy_da_state_zone:
	kmem_zone_destroy(xfs_da_state_zone);
 out_destroy_btree_cur_zone:
	kmem_zone_destroy(xfs_btree_cur_zone);
 out_destroy_bmap_free_item_zone:
	kmem_zone_destroy(xfs_bmap_free_item_zone);
 out_destroy_log_ticket_zone:
	kmem_zone_destroy(xfs_log_ticket_zone);
 out_destroy_ioend_pool:
	mempool_destroy(xfs_ioend_pool);
 out_destroy_ioend_zone:
	kmem_zone_destroy(xfs_ioend_zone);
 out:
	return -ENOMEM;
}

STATIC void
xfs_destroy_zones(void)
{
1768 1769 1770 1771 1772
	/*
	 * Make sure all delayed rcu free are flushed before we
	 * destroy caches.
	 */
	rcu_barrier();
D
Dave Chinner 已提交
1773
	kmem_zone_destroy(xfs_icreate_zone);
1774 1775 1776 1777 1778
	kmem_zone_destroy(xfs_ili_zone);
	kmem_zone_destroy(xfs_inode_zone);
	kmem_zone_destroy(xfs_efi_zone);
	kmem_zone_destroy(xfs_efd_zone);
	kmem_zone_destroy(xfs_buf_item_zone);
1779
	kmem_zone_destroy(xfs_log_item_desc_zone);
1780 1781 1782 1783 1784 1785 1786 1787 1788 1789
	kmem_zone_destroy(xfs_trans_zone);
	kmem_zone_destroy(xfs_ifork_zone);
	kmem_zone_destroy(xfs_da_state_zone);
	kmem_zone_destroy(xfs_btree_cur_zone);
	kmem_zone_destroy(xfs_bmap_free_item_zone);
	kmem_zone_destroy(xfs_log_ticket_zone);
	mempool_destroy(xfs_ioend_pool);
	kmem_zone_destroy(xfs_ioend_zone);

}
L
Linus Torvalds 已提交
1790

1791 1792 1793
STATIC int __init
xfs_init_workqueues(void)
{
1794 1795 1796 1797 1798 1799
	/*
	 * The allocation workqueue can be used in memory reclaim situations
	 * (writepage path), and parallelism is only limited by the number of
	 * AGs in all the filesystems mounted. Hence use the default large
	 * max_active value for this workqueue.
	 */
1800 1801
	xfs_alloc_wq = alloc_workqueue("xfsalloc",
			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
1802
	if (!xfs_alloc_wq)
D
Dave Chinner 已提交
1803
		return -ENOMEM;
1804

1805 1806 1807
	return 0;
}

1808
STATIC void
1809 1810
xfs_destroy_workqueues(void)
{
1811
	destroy_workqueue(xfs_alloc_wq);
1812 1813
}

L
Linus Torvalds 已提交
1814
STATIC int __init
1815
init_xfs_fs(void)
L
Linus Torvalds 已提交
1816 1817 1818
{
	int			error;

1819 1820
	printk(KERN_INFO XFS_VERSION_STRING " with "
			 XFS_BUILD_OPTIONS " enabled\n");
L
Linus Torvalds 已提交
1821

1822
	xfs_dir_startup();
L
Linus Torvalds 已提交
1823

1824
	error = xfs_init_zones();
1825 1826 1827
	if (error)
		goto out;

1828
	error = xfs_init_workqueues();
1829
	if (error)
C
Christoph Hellwig 已提交
1830
		goto out_destroy_zones;
1831

1832 1833 1834 1835
	error = xfs_mru_cache_init();
	if (error)
		goto out_destroy_wq;

1836
	error = xfs_buf_init();
1837
	if (error)
1838
		goto out_mru_cache_uninit;
1839 1840 1841 1842 1843 1844 1845 1846

	error = xfs_init_procfs();
	if (error)
		goto out_buf_terminate;

	error = xfs_sysctl_register();
	if (error)
		goto out_cleanup_procfs;
L
Linus Torvalds 已提交
1847

B
Brian Foster 已提交
1848 1849 1850
	xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
	if (!xfs_kset) {
		error = -ENOMEM;
1851
		goto out_sysctl_unregister;
B
Brian Foster 已提交
1852 1853
	}

1854 1855 1856 1857 1858 1859 1860 1861 1862
	xfsstats.xs_kobj.kobject.kset = xfs_kset;

	xfsstats.xs_stats = alloc_percpu(struct xfsstats);
	if (!xfsstats.xs_stats) {
		error = -ENOMEM;
		goto out_kset_unregister;
	}

	error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
1863 1864
			       "stats");
	if (error)
1865
		goto out_free_stats;
1866

1867 1868 1869
#ifdef DEBUG
	xfs_dbg_kobj.kobject.kset = xfs_kset;
	error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
1870
	if (error)
1871
		goto out_remove_stats_kobj;
1872 1873 1874 1875
#endif

	error = xfs_qm_init();
	if (error)
1876
		goto out_remove_dbg_kobj;
L
Linus Torvalds 已提交
1877 1878 1879

	error = register_filesystem(&xfs_fs_type);
	if (error)
1880
		goto out_qm_exit;
L
Linus Torvalds 已提交
1881 1882
	return 0;

1883 1884
 out_qm_exit:
	xfs_qm_exit();
1885
 out_remove_dbg_kobj:
1886 1887
#ifdef DEBUG
	xfs_sysfs_del(&xfs_dbg_kobj);
1888
 out_remove_stats_kobj:
1889
#endif
1890 1891 1892
	xfs_sysfs_del(&xfsstats.xs_kobj);
 out_free_stats:
	free_percpu(xfsstats.xs_stats);
1893
 out_kset_unregister:
B
Brian Foster 已提交
1894
	kset_unregister(xfs_kset);
1895 1896 1897 1898 1899
 out_sysctl_unregister:
	xfs_sysctl_unregister();
 out_cleanup_procfs:
	xfs_cleanup_procfs();
 out_buf_terminate:
1900
	xfs_buf_terminate();
1901 1902
 out_mru_cache_uninit:
	xfs_mru_cache_uninit();
1903 1904
 out_destroy_wq:
	xfs_destroy_workqueues();
1905
 out_destroy_zones:
1906
	xfs_destroy_zones();
1907
 out:
L
Linus Torvalds 已提交
1908 1909 1910 1911
	return error;
}

STATIC void __exit
1912
exit_xfs_fs(void)
L
Linus Torvalds 已提交
1913
{
1914
	xfs_qm_exit();
L
Linus Torvalds 已提交
1915
	unregister_filesystem(&xfs_fs_type);
1916 1917 1918
#ifdef DEBUG
	xfs_sysfs_del(&xfs_dbg_kobj);
#endif
1919 1920
	xfs_sysfs_del(&xfsstats.xs_kobj);
	free_percpu(xfsstats.xs_stats);
B
Brian Foster 已提交
1921
	kset_unregister(xfs_kset);
1922 1923
	xfs_sysctl_unregister();
	xfs_cleanup_procfs();
1924
	xfs_buf_terminate();
1925
	xfs_mru_cache_uninit();
1926
	xfs_destroy_workqueues();
1927
	xfs_destroy_zones();
L
Linus Torvalds 已提交
1928 1929 1930 1931 1932 1933 1934 1935
}

module_init(init_xfs_fs);
module_exit(exit_xfs_fs);

MODULE_AUTHOR("Silicon Graphics, Inc.");
MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
MODULE_LICENSE("GPL");