super.c 34.9 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0+
R
Ryusuke Konishi 已提交
2 3 4 5 6
/*
 * super.c - NILFS module and super block management.
 *
 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
 *
7
 * Written by Ryusuke Konishi.
R
Ryusuke Konishi 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
 */
/*
 *  linux/fs/ext2/super.c
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/inode.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 */

#include <linux/module.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
#include <linux/crc32.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
36 37
#include <linux/seq_file.h>
#include <linux/mount.h>
R
Ryusuke Konishi 已提交
38
#include "nilfs.h"
39
#include "export.h"
R
Ryusuke Konishi 已提交
40 41
#include "mdt.h"
#include "alloc.h"
42 43
#include "btree.h"
#include "btnode.h"
R
Ryusuke Konishi 已提交
44 45
#include "page.h"
#include "cpfile.h"
R
Ryusuke Konishi 已提交
46
#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
R
Ryusuke Konishi 已提交
47 48 49 50 51 52 53 54 55 56
#include "ifile.h"
#include "dat.h"
#include "segment.h"
#include "segbuf.h"

MODULE_AUTHOR("NTT Corp.");
MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
		   "(NILFS)");
MODULE_LICENSE("GPL");

57
static struct kmem_cache *nilfs_inode_cachep;
58 59 60 61
struct kmem_cache *nilfs_transaction_cachep;
struct kmem_cache *nilfs_segbuf_cachep;
struct kmem_cache *nilfs_btree_path_cache;

62
static int nilfs_setup_super(struct super_block *sb, int is_mount);
R
Ryusuke Konishi 已提交
63 64
static int nilfs_remount(struct super_block *sb, int *flags, char *data);

65
void __nilfs_msg(struct super_block *sb, const char *fmt, ...)
66 67 68
{
	struct va_format vaf;
	va_list args;
69
	int level;
70 71

	va_start(args, fmt);
72 73 74

	level = printk_get_level(fmt);
	vaf.fmt = printk_skip_level(fmt);
75
	vaf.va = &args;
76

77
	if (sb)
78 79
		printk("%c%cNILFS (%s): %pV\n",
		       KERN_SOH_ASCII, level, sb->s_id, &vaf);
80
	else
81 82 83
		printk("%c%cNILFS: %pV\n",
		       KERN_SOH_ASCII, level, &vaf);

84 85 86
	va_end(args);
}

87
static void nilfs_set_error(struct super_block *sb)
R
Ryusuke Konishi 已提交
88
{
89
	struct the_nilfs *nilfs = sb->s_fs_info;
90
	struct nilfs_super_block **sbp;
R
Ryusuke Konishi 已提交
91 92 93 94

	down_write(&nilfs->ns_sem);
	if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) {
		nilfs->ns_mount_state |= NILFS_ERROR_FS;
95
		sbp = nilfs_prepare_super(sb, 0);
96 97
		if (likely(sbp)) {
			sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
J
Jiro SEKIBA 已提交
98 99
			if (sbp[1])
				sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
100
			nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
101
		}
R
Ryusuke Konishi 已提交
102 103 104 105
	}
	up_write(&nilfs->ns_sem);
}

R
Ryusuke Konishi 已提交
106
/**
107
 * __nilfs_error() - report failure condition on a filesystem
R
Ryusuke Konishi 已提交
108
 *
109 110 111
 * __nilfs_error() sets an ERROR_FS flag on the superblock as well as
 * reporting an error message.  This function should be called when
 * NILFS detects incoherences or defects of meta data on disk.
R
Ryusuke Konishi 已提交
112
 *
113 114
 * This implements the body of nilfs_error() macro.  Normally,
 * nilfs_error() should be used.  As for sustainable errors such as a
115
 * single-shot I/O error, nilfs_err() should be used instead.
116 117
 *
 * Callers should not add a trailing newline since this will do it.
R
Ryusuke Konishi 已提交
118
 */
119 120
void __nilfs_error(struct super_block *sb, const char *function,
		   const char *fmt, ...)
R
Ryusuke Konishi 已提交
121
{
122
	struct the_nilfs *nilfs = sb->s_fs_info;
123
	struct va_format vaf;
R
Ryusuke Konishi 已提交
124 125 126
	va_list args;

	va_start(args, fmt);
127 128 129 130 131 132 133

	vaf.fmt = fmt;
	vaf.va = &args;

	printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n",
	       sb->s_id, function, &vaf);

R
Ryusuke Konishi 已提交
134 135
	va_end(args);

136
	if (!sb_rdonly(sb)) {
137
		nilfs_set_error(sb);
R
Ryusuke Konishi 已提交
138

139
		if (nilfs_test_opt(nilfs, ERRORS_RO)) {
R
Ryusuke Konishi 已提交
140
			printk(KERN_CRIT "Remounting filesystem read-only\n");
141
			sb->s_flags |= SB_RDONLY;
R
Ryusuke Konishi 已提交
142 143 144
		}
	}

145
	if (nilfs_test_opt(nilfs, ERRORS_PANIC))
R
Ryusuke Konishi 已提交
146 147 148 149
		panic("NILFS (device %s): panic forced after error\n",
		      sb->s_id);
}

150
struct inode *nilfs_alloc_inode(struct super_block *sb)
R
Ryusuke Konishi 已提交
151 152 153 154 155 156 157 158
{
	struct nilfs_inode_info *ii;

	ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS);
	if (!ii)
		return NULL;
	ii->i_bh = NULL;
	ii->i_state = 0;
159
	ii->i_cno = 0;
160
	nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode);
R
Ryusuke Konishi 已提交
161 162 163
	return &ii->vfs_inode;
}

A
Al Viro 已提交
164
static void nilfs_free_inode(struct inode *inode)
R
Ryusuke Konishi 已提交
165
{
166 167 168
	if (nilfs_is_metadata_file_inode(inode))
		nilfs_mdt_destroy(inode);

R
Ryusuke Konishi 已提交
169 170 171
	kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
}

172
static int nilfs_sync_super(struct super_block *sb, int flag)
R
Ryusuke Konishi 已提交
173
{
174
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
175 176 177
	int err;

 retry:
178
	set_buffer_dirty(nilfs->ns_sbh[0]);
179
	if (nilfs_test_opt(nilfs, BARRIER)) {
C
Christoph Hellwig 已提交
180
		err = __sync_dirty_buffer(nilfs->ns_sbh[0],
181
					  REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
C
Christoph Hellwig 已提交
182 183
	} else {
		err = sync_dirty_buffer(nilfs->ns_sbh[0]);
R
Ryusuke Konishi 已提交
184
	}
C
Christoph Hellwig 已提交
185

186
	if (unlikely(err)) {
187
		nilfs_err(sb, "unable to write superblock: err=%d", err);
188
		if (err == -EIO && nilfs->ns_sbh[1]) {
J
Jiro SEKIBA 已提交
189 190 191 192 193 194
			/*
			 * sbp[0] points to newer log than sbp[1],
			 * so copy sbp[0] to sbp[1] to take over sbp[0].
			 */
			memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0],
			       nilfs->ns_sbsize);
195 196 197 198 199 200
			nilfs_fall_back_super_block(nilfs);
			goto retry;
		}
	} else {
		struct nilfs_super_block *sbp = nilfs->ns_sbp[0];

J
Jiro SEKIBA 已提交
201 202
		nilfs->ns_sbwcount++;

203 204 205 206
		/*
		 * The latest segment becomes trailable from the position
		 * written in superblock.
		 */
R
Ryusuke Konishi 已提交
207
		clear_nilfs_discontinued(nilfs);
208 209 210

		/* update GC protection for recent segments */
		if (nilfs->ns_sbh[1]) {
J
Jiro SEKIBA 已提交
211
			if (flag == NILFS_SB_COMMIT_ALL) {
212
				set_buffer_dirty(nilfs->ns_sbh[1]);
J
Jiro SEKIBA 已提交
213 214
				if (sync_dirty_buffer(nilfs->ns_sbh[1]) < 0)
					goto out;
215
			}
J
Jiro SEKIBA 已提交
216 217 218
			if (le64_to_cpu(nilfs->ns_sbp[1]->s_last_cno) <
			    le64_to_cpu(nilfs->ns_sbp[0]->s_last_cno))
				sbp = nilfs->ns_sbp[1];
219
		}
R
Ryusuke Konishi 已提交
220

J
Jiro SEKIBA 已提交
221 222 223 224 225
		spin_lock(&nilfs->ns_last_segment_lock);
		nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq);
		spin_unlock(&nilfs->ns_last_segment_lock);
	}
 out:
R
Ryusuke Konishi 已提交
226 227 228
	return err;
}

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
void nilfs_set_log_cursor(struct nilfs_super_block *sbp,
			  struct the_nilfs *nilfs)
{
	sector_t nfreeblocks;

	/* nilfs->ns_sem must be locked by the caller. */
	nilfs_count_free_blocks(nilfs, &nfreeblocks);
	sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks);

	spin_lock(&nilfs->ns_last_segment_lock);
	sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq);
	sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg);
	sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno);
	spin_unlock(&nilfs->ns_last_segment_lock);
}

245
struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
J
Jiro SEKIBA 已提交
246
					       int flip)
R
Ryusuke Konishi 已提交
247
{
248
	struct the_nilfs *nilfs = sb->s_fs_info;
249
	struct nilfs_super_block **sbp = nilfs->ns_sbp;
R
Ryusuke Konishi 已提交
250

251
	/* nilfs->ns_sem must be locked by the caller. */
252
	if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
253 254
		if (sbp[1] &&
		    sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) {
J
Jiro SEKIBA 已提交
255
			memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
256
		} else {
257
			nilfs_crit(sb, "superblock broke");
258
			return NULL;
259
		}
J
Jiro SEKIBA 已提交
260 261
	} else if (sbp[1] &&
		   sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
262
		memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
263
	}
J
Jiro SEKIBA 已提交
264 265 266 267

	if (flip && sbp[1])
		nilfs_swap_super_block(nilfs);

268 269 270
	return sbp;
}

271
int nilfs_commit_super(struct super_block *sb, int flag)
272
{
273
	struct the_nilfs *nilfs = sb->s_fs_info;
274
	struct nilfs_super_block **sbp = nilfs->ns_sbp;
A
Arnd Bergmann 已提交
275
	time64_t t;
276 277

	/* nilfs->ns_sem must be locked by the caller. */
A
Arnd Bergmann 已提交
278
	t = ktime_get_real_seconds();
J
Jiro SEKIBA 已提交
279
	nilfs->ns_sbwtime = t;
280 281 282 283 284
	sbp[0]->s_wtime = cpu_to_le64(t);
	sbp[0]->s_sum = 0;
	sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed,
					     (unsigned char *)sbp[0],
					     nilfs->ns_sbsize));
J
Jiro SEKIBA 已提交
285 286 287 288 289 290
	if (flag == NILFS_SB_COMMIT_ALL && sbp[1]) {
		sbp[1]->s_wtime = sbp[0]->s_wtime;
		sbp[1]->s_sum = 0;
		sbp[1]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed,
					    (unsigned char *)sbp[1],
					    nilfs->ns_sbsize));
291
	}
292
	clear_nilfs_sb_dirty(nilfs);
293 294 295
	nilfs->ns_flushed_device = 1;
	/* make sure store to ns_flushed_device cannot be reordered */
	smp_wmb();
296
	return nilfs_sync_super(sb, flag);
R
Ryusuke Konishi 已提交
297 298
}

299 300
/**
 * nilfs_cleanup_super() - write filesystem state for cleanup
301
 * @sb: super block instance to be unmounted or degraded to read-only
302 303 304 305 306
 *
 * This function restores state flags in the on-disk super block.
 * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the
 * filesystem was not clean previously.
 */
307
int nilfs_cleanup_super(struct super_block *sb)
308
{
309
	struct the_nilfs *nilfs = sb->s_fs_info;
310
	struct nilfs_super_block **sbp;
J
Jiro SEKIBA 已提交
311
	int flag = NILFS_SB_COMMIT;
312
	int ret = -EIO;
313

314
	sbp = nilfs_prepare_super(sb, 0);
315
	if (sbp) {
316 317
		sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
		nilfs_set_log_cursor(sbp[0], nilfs);
J
Jiro SEKIBA 已提交
318 319 320 321 322 323 324 325 326
		if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) {
			/*
			 * make the "clean" flag also to the opposite
			 * super block if both super blocks point to
			 * the same checkpoint.
			 */
			sbp[1]->s_state = sbp[0]->s_state;
			flag = NILFS_SB_COMMIT_ALL;
		}
327
		ret = nilfs_commit_super(sb, flag);
328
	}
329 330 331
	return ret;
}

332 333 334 335 336 337 338 339 340 341 342 343
/**
 * nilfs_move_2nd_super - relocate secondary super block
 * @sb: super block instance
 * @sb2off: new offset of the secondary super block (in bytes)
 */
static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
{
	struct the_nilfs *nilfs = sb->s_fs_info;
	struct buffer_head *nsbh;
	struct nilfs_super_block *nsbp;
	sector_t blocknr, newblocknr;
	unsigned long offset;
344
	int sb2i;  /* array index of the secondary superblock */
345 346 347 348 349 350 351 352 353 354
	int ret = 0;

	/* nilfs->ns_sem must be locked by the caller. */
	if (nilfs->ns_sbh[1] &&
	    nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) {
		sb2i = 1;
		blocknr = nilfs->ns_sbh[1]->b_blocknr;
	} else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
		sb2i = 0;
		blocknr = nilfs->ns_sbh[0]->b_blocknr;
355 356 357
	} else {
		sb2i = -1;
		blocknr = 0;
358 359 360 361 362 363 364 365 366
	}
	if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
		goto out;  /* super block location is unchanged */

	/* Get new super block buffer */
	newblocknr = sb2off >> nilfs->ns_blocksize_bits;
	offset = sb2off & (nilfs->ns_blocksize - 1);
	nsbh = sb_getblk(sb, newblocknr);
	if (!nsbh) {
367 368 369
		nilfs_warn(sb,
			   "unable to move secondary superblock to block %llu",
			   (unsigned long long)newblocknr);
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
		ret = -EIO;
		goto out;
	}
	nsbp = (void *)nsbh->b_data + offset;
	memset(nsbp, 0, nilfs->ns_blocksize);

	if (sb2i >= 0) {
		memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
		brelse(nilfs->ns_sbh[sb2i]);
		nilfs->ns_sbh[sb2i] = nsbh;
		nilfs->ns_sbp[sb2i] = nsbp;
	} else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) {
		/* secondary super block will be restored to index 1 */
		nilfs->ns_sbh[1] = nsbh;
		nilfs->ns_sbp[1] = nsbp;
	} else {
		brelse(nsbh);
	}
out:
	return ret;
}

R
Ryusuke Konishi 已提交
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
/**
 * nilfs_resize_fs - resize the filesystem
 * @sb: super block instance
 * @newsize: new size of the filesystem (in bytes)
 */
int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
{
	struct the_nilfs *nilfs = sb->s_fs_info;
	struct nilfs_super_block **sbp;
	__u64 devsize, newnsegs;
	loff_t sb2off;
	int ret;

	ret = -ERANGE;
	devsize = i_size_read(sb->s_bdev->bd_inode);
	if (newsize > devsize)
		goto out;

	/*
	 * Write lock is required to protect some functions depending
	 * on the number of segments, the number of reserved segments,
	 * and so forth.
	 */
	down_write(&nilfs->ns_segctor_sem);

	sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
	newnsegs = sb2off >> nilfs->ns_blocksize_bits;
	do_div(newnsegs, nilfs->ns_blocks_per_segment);

	ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
	up_write(&nilfs->ns_segctor_sem);
	if (ret < 0)
		goto out;

	ret = nilfs_construct_segment(sb);
	if (ret < 0)
		goto out;

	down_write(&nilfs->ns_sem);
	nilfs_move_2nd_super(sb, sb2off);
	ret = -EIO;
	sbp = nilfs_prepare_super(sb, 0);
	if (likely(sbp)) {
		nilfs_set_log_cursor(sbp[0], nilfs);
		/*
		 * Drop NILFS_RESIZE_FS flag for compatibility with
		 * mount-time resize which may be implemented in a
		 * future release.
		 */
		sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
					      ~NILFS_RESIZE_FS);
		sbp[0]->s_dev_size = cpu_to_le64(newsize);
		sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
		if (sbp[1])
			memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
		ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
	}
	up_write(&nilfs->ns_sem);

	/*
	 * Reset the range of allocatable segments last.  This order
	 * is important in the case of expansion because the secondary
	 * superblock must be protected from log write until migration
	 * completes.
	 */
	if (!ret)
		nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
out:
	return ret;
}

R
Ryusuke Konishi 已提交
463 464
static void nilfs_put_super(struct super_block *sb)
{
465
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
466

467
	nilfs_detach_log_writer(sb);
R
Ryusuke Konishi 已提交
468

469
	if (!sb_rdonly(sb)) {
R
Ryusuke Konishi 已提交
470
		down_write(&nilfs->ns_sem);
471
		nilfs_cleanup_super(sb);
R
Ryusuke Konishi 已提交
472 473 474
		up_write(&nilfs->ns_sem);
	}

475 476 477 478
	iput(nilfs->ns_sufile);
	iput(nilfs->ns_cpfile);
	iput(nilfs->ns_dat);

479
	destroy_nilfs(nilfs);
R
Ryusuke Konishi 已提交
480 481 482 483 484
	sb->s_fs_info = NULL;
}

static int nilfs_sync_fs(struct super_block *sb, int wait)
{
485
	struct the_nilfs *nilfs = sb->s_fs_info;
486
	struct nilfs_super_block **sbp;
R
Ryusuke Konishi 已提交
487 488 489 490 491
	int err = 0;

	/* This function is called when super block should be written back */
	if (wait)
		err = nilfs_construct_segment(sb);
492 493

	down_write(&nilfs->ns_sem);
494
	if (nilfs_sb_dirty(nilfs)) {
495
		sbp = nilfs_prepare_super(sb, nilfs_sb_will_flip(nilfs));
J
Jiro SEKIBA 已提交
496 497
		if (likely(sbp)) {
			nilfs_set_log_cursor(sbp[0], nilfs);
498
			nilfs_commit_super(sb, NILFS_SB_COMMIT);
J
Jiro SEKIBA 已提交
499
		}
500
	}
501 502
	up_write(&nilfs->ns_sem);

503 504 505
	if (!err)
		err = nilfs_flush_device(nilfs);

R
Ryusuke Konishi 已提交
506 507 508
	return err;
}

509
int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
510
			    struct nilfs_root **rootp)
R
Ryusuke Konishi 已提交
511
{
512
	struct the_nilfs *nilfs = sb->s_fs_info;
513
	struct nilfs_root *root;
R
Ryusuke Konishi 已提交
514 515
	struct nilfs_checkpoint *raw_cp;
	struct buffer_head *bh_cp;
516
	int err = -ENOMEM;
R
Ryusuke Konishi 已提交
517

518 519 520 521
	root = nilfs_find_or_create_root(
		nilfs, curr_mnt ? NILFS_CPTREE_CURRENT_CNO : cno);
	if (!root)
		return err;
R
Ryusuke Konishi 已提交
522

523 524
	if (root->ifile)
		goto reuse; /* already attached checkpoint */
R
Ryusuke Konishi 已提交
525

526
	down_read(&nilfs->ns_segctor_sem);
R
Ryusuke Konishi 已提交
527 528
	err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
					  &bh_cp);
529
	up_read(&nilfs->ns_segctor_sem);
R
Ryusuke Konishi 已提交
530 531
	if (unlikely(err)) {
		if (err == -ENOENT || err == -EINVAL) {
532
			nilfs_err(sb,
533 534
				  "Invalid checkpoint (checkpoint number=%llu)",
				  (unsigned long long)cno);
R
Ryusuke Konishi 已提交
535 536 537 538
			err = -EINVAL;
		}
		goto failed;
	}
539

540
	err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size,
541 542
			       &raw_cp->cp_ifile_inode, &root->ifile);
	if (err)
R
Ryusuke Konishi 已提交
543
		goto failed_bh;
544

545 546 547 548
	atomic64_set(&root->inodes_count,
			le64_to_cpu(raw_cp->cp_inodes_count));
	atomic64_set(&root->blocks_count,
			le64_to_cpu(raw_cp->cp_blocks_count));
R
Ryusuke Konishi 已提交
549 550

	nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
551

552
 reuse:
553
	*rootp = root;
R
Ryusuke Konishi 已提交
554 555 556 557 558
	return 0;

 failed_bh:
	nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
 failed:
559
	nilfs_put_root(root);
R
Ryusuke Konishi 已提交
560 561 562 563

	return err;
}

564 565
static int nilfs_freeze(struct super_block *sb)
{
566
	struct the_nilfs *nilfs = sb->s_fs_info;
567 568
	int err;

569
	if (sb_rdonly(sb))
570
		return 0;
R
Ryusuke Konishi 已提交
571

572 573
	/* Mark super block clean */
	down_write(&nilfs->ns_sem);
574
	err = nilfs_cleanup_super(sb);
575
	up_write(&nilfs->ns_sem);
R
Ryusuke Konishi 已提交
576 577 578
	return err;
}

579
static int nilfs_unfreeze(struct super_block *sb)
R
Ryusuke Konishi 已提交
580
{
581
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
582

583
	if (sb_rdonly(sb))
584 585 586
		return 0;

	down_write(&nilfs->ns_sem);
587
	nilfs_setup_super(sb, false);
588 589
	up_write(&nilfs->ns_sem);
	return 0;
R
Ryusuke Konishi 已提交
590 591 592 593 594
}

static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
	struct super_block *sb = dentry->d_sb;
595
	struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root;
596
	struct the_nilfs *nilfs = root->nilfs;
597
	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
R
Ryusuke Konishi 已提交
598 599 600 601
	unsigned long long blocks;
	unsigned long overhead;
	unsigned long nrsvblocks;
	sector_t nfreeblocks;
602
	u64 nmaxinodes, nfreeinodes;
R
Ryusuke Konishi 已提交
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617
	int err;

	/*
	 * Compute all of the segment blocks
	 *
	 * The blocks before first segment and after last segment
	 * are excluded.
	 */
	blocks = nilfs->ns_blocks_per_segment * nilfs->ns_nsegments
		- nilfs->ns_first_data_block;
	nrsvblocks = nilfs->ns_nrsvsegs * nilfs->ns_blocks_per_segment;

	/*
	 * Compute the overhead
	 *
618
	 * When distributing meta data blocks outside segment structure,
R
Ryusuke Konishi 已提交
619 620 621 622 623 624 625 626
	 * We must count them as the overhead.
	 */
	overhead = 0;

	err = nilfs_count_free_blocks(nilfs, &nfreeblocks);
	if (unlikely(err))
		return err;

627 628 629
	err = nilfs_ifile_count_free_inodes(root->ifile,
					    &nmaxinodes, &nfreeinodes);
	if (unlikely(err)) {
630
		nilfs_warn(sb, "failed to count free inodes: err=%d", err);
631 632 633 634 635 636 637
		if (err == -ERANGE) {
			/*
			 * If nilfs_palloc_count_max_entries() returns
			 * -ERANGE error code then we simply treat
			 * curent inodes count as maximum possible and
			 * zero as free inodes value.
			 */
638
			nmaxinodes = atomic64_read(&root->inodes_count);
639 640 641 642 643 644
			nfreeinodes = 0;
			err = 0;
		} else
			return err;
	}

R
Ryusuke Konishi 已提交
645 646 647 648 649 650
	buf->f_type = NILFS_SUPER_MAGIC;
	buf->f_bsize = sb->s_blocksize;
	buf->f_blocks = blocks - overhead;
	buf->f_bfree = nfreeblocks;
	buf->f_bavail = (buf->f_bfree >= nrsvblocks) ?
		(buf->f_bfree - nrsvblocks) : 0;
651 652
	buf->f_files = nmaxinodes;
	buf->f_ffree = nfreeinodes;
R
Ryusuke Konishi 已提交
653
	buf->f_namelen = NILFS_NAME_LEN;
654
	buf->f_fsid = u64_to_fsid(id);
655

R
Ryusuke Konishi 已提交
656 657 658
	return 0;
}

659
static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry)
660
{
661
	struct super_block *sb = dentry->d_sb;
662
	struct the_nilfs *nilfs = sb->s_fs_info;
663
	struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root;
664

665
	if (!nilfs_test_opt(nilfs, BARRIER))
666
		seq_puts(seq, ",nobarrier");
667 668
	if (root->cno != NILFS_CPTREE_CURRENT_CNO)
		seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno);
669
	if (nilfs_test_opt(nilfs, ERRORS_PANIC))
670
		seq_puts(seq, ",errors=panic");
671
	if (nilfs_test_opt(nilfs, ERRORS_CONT))
672
		seq_puts(seq, ",errors=continue");
673
	if (nilfs_test_opt(nilfs, STRICT_ORDER))
674
		seq_puts(seq, ",order=strict");
675
	if (nilfs_test_opt(nilfs, NORECOVERY))
676
		seq_puts(seq, ",norecovery");
677
	if (nilfs_test_opt(nilfs, DISCARD))
678
		seq_puts(seq, ",discard");
679 680 681 682

	return 0;
}

683
static const struct super_operations nilfs_sops = {
R
Ryusuke Konishi 已提交
684
	.alloc_inode    = nilfs_alloc_inode,
A
Al Viro 已提交
685
	.free_inode     = nilfs_free_inode,
R
Ryusuke Konishi 已提交
686
	.dirty_inode    = nilfs_dirty_inode,
A
Al Viro 已提交
687
	.evict_inode    = nilfs_evict_inode,
R
Ryusuke Konishi 已提交
688 689
	.put_super      = nilfs_put_super,
	.sync_fs        = nilfs_sync_fs,
690 691
	.freeze_fs	= nilfs_freeze,
	.unfreeze_fs	= nilfs_unfreeze,
R
Ryusuke Konishi 已提交
692 693
	.statfs         = nilfs_statfs,
	.remount_fs     = nilfs_remount,
694
	.show_options = nilfs_show_options
R
Ryusuke Konishi 已提交
695 696 697 698
};

enum {
	Opt_err_cont, Opt_err_panic, Opt_err_ro,
699
	Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
700
	Opt_discard, Opt_nodiscard, Opt_err,
R
Ryusuke Konishi 已提交
701 702 703 704 705 706
};

static match_table_t tokens = {
	{Opt_err_cont, "errors=continue"},
	{Opt_err_panic, "errors=panic"},
	{Opt_err_ro, "errors=remount-ro"},
707
	{Opt_barrier, "barrier"},
708
	{Opt_nobarrier, "nobarrier"},
R
Ryusuke Konishi 已提交
709 710
	{Opt_snapshot, "cp=%u"},
	{Opt_order, "order=%s"},
711
	{Opt_norecovery, "norecovery"},
712
	{Opt_discard, "discard"},
713
	{Opt_nodiscard, "nodiscard"},
R
Ryusuke Konishi 已提交
714 715 716
	{Opt_err, NULL}
};

717
static int parse_options(char *options, struct super_block *sb, int is_remount)
R
Ryusuke Konishi 已提交
718
{
719
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
720 721 722 723 724 725 726 727
	char *p;
	substring_t args[MAX_OPT_ARGS];

	if (!options)
		return 1;

	while ((p = strsep(&options, ",")) != NULL) {
		int token;
728

R
Ryusuke Konishi 已提交
729 730 731 732 733
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
734
		case Opt_barrier:
735
			nilfs_set_opt(nilfs, BARRIER);
736
			break;
737
		case Opt_nobarrier:
738
			nilfs_clear_opt(nilfs, BARRIER);
R
Ryusuke Konishi 已提交
739 740 741 742
			break;
		case Opt_order:
			if (strcmp(args[0].from, "relaxed") == 0)
				/* Ordered data semantics */
743
				nilfs_clear_opt(nilfs, STRICT_ORDER);
R
Ryusuke Konishi 已提交
744 745
			else if (strcmp(args[0].from, "strict") == 0)
				/* Strict in-order semantics */
746
				nilfs_set_opt(nilfs, STRICT_ORDER);
R
Ryusuke Konishi 已提交
747 748 749 750
			else
				return 0;
			break;
		case Opt_err_panic:
751
			nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC);
R
Ryusuke Konishi 已提交
752 753
			break;
		case Opt_err_ro:
754
			nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO);
R
Ryusuke Konishi 已提交
755 756
			break;
		case Opt_err_cont:
757
			nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT);
R
Ryusuke Konishi 已提交
758 759
			break;
		case Opt_snapshot:
760
			if (is_remount) {
761
				nilfs_err(sb,
762 763
					  "\"%s\" option is invalid for remount",
					  p);
R
Ryusuke Konishi 已提交
764
				return 0;
765
			}
R
Ryusuke Konishi 已提交
766
			break;
767
		case Opt_norecovery:
768
			nilfs_set_opt(nilfs, NORECOVERY);
769
			break;
770
		case Opt_discard:
771
			nilfs_set_opt(nilfs, DISCARD);
772
			break;
773
		case Opt_nodiscard:
774
			nilfs_clear_opt(nilfs, DISCARD);
775
			break;
R
Ryusuke Konishi 已提交
776
		default:
777
			nilfs_err(sb, "unrecognized mount option \"%s\"", p);
R
Ryusuke Konishi 已提交
778 779 780 781 782 783 784
			return 0;
		}
	}
	return 1;
}

static inline void
785
nilfs_set_default_options(struct super_block *sb,
R
Ryusuke Konishi 已提交
786 787
			  struct nilfs_super_block *sbp)
{
788
	struct the_nilfs *nilfs = sb->s_fs_info;
789 790

	nilfs->ns_mount_opt =
791
		NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
R
Ryusuke Konishi 已提交
792 793
}

794
static int nilfs_setup_super(struct super_block *sb, int is_mount)
R
Ryusuke Konishi 已提交
795
{
796
	struct the_nilfs *nilfs = sb->s_fs_info;
797 798 799 800 801
	struct nilfs_super_block **sbp;
	int max_mnt_count;
	int mnt_count;

	/* nilfs->ns_sem must be locked by the caller. */
802
	sbp = nilfs_prepare_super(sb, 0);
803 804 805
	if (!sbp)
		return -EIO;

806 807 808
	if (!is_mount)
		goto skip_mount_setup;

809 810
	max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count);
	mnt_count = le16_to_cpu(sbp[0]->s_mnt_count);
R
Ryusuke Konishi 已提交
811

812
	if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
813
		nilfs_warn(sb, "mounting fs with errors");
R
Ryusuke Konishi 已提交
814 815
#if 0
	} else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) {
816
		nilfs_warn(sb, "maximal mount count reached");
R
Ryusuke Konishi 已提交
817 818 819
#endif
	}
	if (!max_mnt_count)
820
		sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT);
R
Ryusuke Konishi 已提交
821

822
	sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1);
823
	sbp[0]->s_mtime = cpu_to_le64(ktime_get_real_seconds());
824 825

skip_mount_setup:
826 827
	sbp[0]->s_state =
		cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS);
J
Jiro SEKIBA 已提交
828
	/* synchronize sbp[1] with sbp[0] */
829 830
	if (sbp[1])
		memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
831
	return nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
R
Ryusuke Konishi 已提交
832 833
}

834 835 836
struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
						 u64 pos, int blocksize,
						 struct buffer_head **pbh)
R
Ryusuke Konishi 已提交
837
{
838 839
	unsigned long long sb_index = pos;
	unsigned long offset;
R
Ryusuke Konishi 已提交
840

841
	offset = do_div(sb_index, blocksize);
R
Ryusuke Konishi 已提交
842
	*pbh = sb_bread(sb, sb_index);
843
	if (!*pbh)
R
Ryusuke Konishi 已提交
844 845 846 847 848 849 850 851
		return NULL;
	return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset);
}

int nilfs_store_magic_and_option(struct super_block *sb,
				 struct nilfs_super_block *sbp,
				 char *data)
{
852
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
853 854 855 856 857

	sb->s_magic = le16_to_cpu(sbp->s_magic);

	/* FS independent flags */
#ifdef NILFS_ATIME_DISABLE
858
	sb->s_flags |= SB_NOATIME;
R
Ryusuke Konishi 已提交
859 860
#endif

861
	nilfs_set_default_options(sb, sbp);
R
Ryusuke Konishi 已提交
862

863 864 865 866
	nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid);
	nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid);
	nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval);
	nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max);
R
Ryusuke Konishi 已提交
867

868
	return !parse_options(data, sb, 0) ? -EINVAL : 0;
R
Ryusuke Konishi 已提交
869 870
}

871 872 873 874 875 876 877 878
int nilfs_check_feature_compatibility(struct super_block *sb,
				      struct nilfs_super_block *sbp)
{
	__u64 features;

	features = le64_to_cpu(sbp->s_feature_incompat) &
		~NILFS_FEATURE_INCOMPAT_SUPP;
	if (features) {
879
		nilfs_err(sb,
880 881
			  "couldn't mount because of unsupported optional features (%llx)",
			  (unsigned long long)features);
882 883 884 885
		return -EINVAL;
	}
	features = le64_to_cpu(sbp->s_feature_compat_ro) &
		~NILFS_FEATURE_COMPAT_RO_SUPP;
886
	if (!sb_rdonly(sb) && features) {
887
		nilfs_err(sb,
888 889
			  "couldn't mount RDWR because of unsupported optional features (%llx)",
			  (unsigned long long)features);
890 891 892 893 894
		return -EINVAL;
	}
	return 0;
}

895 896 897 898 899 900 901 902 903 904 905
static int nilfs_get_root_dentry(struct super_block *sb,
				 struct nilfs_root *root,
				 struct dentry **root_dentry)
{
	struct inode *inode;
	struct dentry *dentry;
	int ret = 0;

	inode = nilfs_iget(sb, root, NILFS_ROOT_INO);
	if (IS_ERR(inode)) {
		ret = PTR_ERR(inode);
906
		nilfs_err(sb, "error %d getting root inode", ret);
907 908 909 910
		goto out;
	}
	if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) {
		iput(inode);
911
		nilfs_err(sb, "corrupt root inode");
912 913 914 915
		ret = -EINVAL;
		goto out;
	}

916 917 918
	if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
		dentry = d_find_alias(inode);
		if (!dentry) {
919
			dentry = d_make_root(inode);
920 921 922 923 924 925 926 927
			if (!dentry) {
				ret = -ENOMEM;
				goto failed_dentry;
			}
		} else {
			iput(inode);
		}
	} else {
928
		dentry = d_obtain_root(inode);
929 930 931 932
		if (IS_ERR(dentry)) {
			ret = PTR_ERR(dentry);
			goto failed_dentry;
		}
933 934 935 936
	}
	*root_dentry = dentry;
 out:
	return ret;
937 938

 failed_dentry:
939
	nilfs_err(sb, "error %d getting root dentry", ret);
940
	goto out;
941 942
}

943 944 945
static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
				 struct dentry **root_dentry)
{
946
	struct the_nilfs *nilfs = s->s_fs_info;
947 948 949
	struct nilfs_root *root;
	int ret;

950 951
	mutex_lock(&nilfs->ns_snapshot_mount_mutex);

952 953 954 955 956 957 958
	down_read(&nilfs->ns_segctor_sem);
	ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno);
	up_read(&nilfs->ns_segctor_sem);
	if (ret < 0) {
		ret = (ret == -ENOENT) ? -EINVAL : ret;
		goto out;
	} else if (!ret) {
959
		nilfs_err(s,
960 961
			  "The specified checkpoint is not a snapshot (checkpoint number=%llu)",
			  (unsigned long long)cno);
962 963 964 965
		ret = -EINVAL;
		goto out;
	}

966
	ret = nilfs_attach_checkpoint(s, cno, false, &root);
967
	if (ret) {
968
		nilfs_err(s,
969 970
			  "error %d while loading snapshot (checkpoint number=%llu)",
			  ret, (unsigned long long)cno);
971 972 973 974 975
		goto out;
	}
	ret = nilfs_get_root_dentry(s, root, root_dentry);
	nilfs_put_root(root);
 out:
976
	mutex_unlock(&nilfs->ns_snapshot_mount_mutex);
977 978 979
	return ret;
}

980
/**
981
 * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint
982 983 984 985
 * @root_dentry: root dentry of the tree to be shrunk
 *
 * This function returns true if the tree was in-use.
 */
986
static bool nilfs_tree_is_busy(struct dentry *root_dentry)
987 988
{
	shrink_dcache_parent(root_dentry);
989
	return d_count(root_dentry) > 1;
990 991
}

992 993
int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
{
994
	struct the_nilfs *nilfs = sb->s_fs_info;
995 996 997 998 999
	struct nilfs_root *root;
	struct inode *inode;
	struct dentry *dentry;
	int ret;

1000
	if (cno > nilfs->ns_cno)
1001 1002 1003 1004 1005 1006
		return false;

	if (cno >= nilfs_last_cno(nilfs))
		return true;	/* protect recent checkpoints */

	ret = false;
1007
	root = nilfs_lookup_root(nilfs, cno);
1008 1009 1010 1011 1012
	if (root) {
		inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO);
		if (inode) {
			dentry = d_find_alias(inode);
			if (dentry) {
1013
				ret = nilfs_tree_is_busy(dentry);
1014 1015 1016 1017 1018 1019 1020 1021 1022
				dput(dentry);
			}
			iput(inode);
		}
		nilfs_put_root(root);
	}
	return ret;
}

R
Ryusuke Konishi 已提交
1023 1024 1025 1026 1027 1028
/**
 * nilfs_fill_super() - initialize a super block instance
 * @sb: super_block
 * @data: mount options
 * @silent: silent mode flag
 *
1029
 * This function is called exclusively by nilfs->ns_mount_mutex.
R
Ryusuke Konishi 已提交
1030 1031 1032
 * So, the recovery process is protected from other simultaneous mounts.
 */
static int
1033
nilfs_fill_super(struct super_block *sb, void *data, int silent)
R
Ryusuke Konishi 已提交
1034
{
1035
	struct the_nilfs *nilfs;
1036
	struct nilfs_root *fsroot;
R
Ryusuke Konishi 已提交
1037 1038 1039
	__u64 cno;
	int err;

1040
	nilfs = alloc_nilfs(sb);
1041
	if (!nilfs)
R
Ryusuke Konishi 已提交
1042 1043
		return -ENOMEM;

1044
	sb->s_fs_info = nilfs;
R
Ryusuke Konishi 已提交
1045

1046
	err = init_nilfs(nilfs, sb, (char *)data);
R
Ryusuke Konishi 已提交
1047
	if (err)
1048
		goto failed_nilfs;
R
Ryusuke Konishi 已提交
1049 1050 1051 1052

	sb->s_op = &nilfs_sops;
	sb->s_export_op = &nilfs_export_ops;
	sb->s_root = NULL;
1053
	sb->s_time_gran = 1;
1054
	sb->s_max_links = NILFS_LINK_MAX;
1055

1056
	sb->s_bdi = bdi_get(sb->s_bdev->bd_bdi);
R
Ryusuke Konishi 已提交
1057

1058
	err = load_nilfs(nilfs, sb);
1059
	if (err)
1060
		goto failed_nilfs;
1061

R
Ryusuke Konishi 已提交
1062
	cno = nilfs_last_cno(nilfs);
1063
	err = nilfs_attach_checkpoint(sb, cno, true, &fsroot);
R
Ryusuke Konishi 已提交
1064
	if (err) {
1065
		nilfs_err(sb,
1066 1067
			  "error %d while loading last checkpoint (checkpoint number=%llu)",
			  err, (unsigned long long)cno);
1068
		goto failed_unload;
R
Ryusuke Konishi 已提交
1069 1070
	}

1071
	if (!sb_rdonly(sb)) {
1072
		err = nilfs_attach_log_writer(sb, fsroot);
R
Ryusuke Konishi 已提交
1073 1074 1075 1076
		if (err)
			goto failed_checkpoint;
	}

1077 1078
	err = nilfs_get_root_dentry(sb, fsroot, &sb->s_root);
	if (err)
R
Ryusuke Konishi 已提交
1079 1080
		goto failed_segctor;

1081
	nilfs_put_root(fsroot);
R
Ryusuke Konishi 已提交
1082

1083
	if (!sb_rdonly(sb)) {
R
Ryusuke Konishi 已提交
1084
		down_write(&nilfs->ns_sem);
1085
		nilfs_setup_super(sb, true);
R
Ryusuke Konishi 已提交
1086 1087 1088 1089 1090 1091
		up_write(&nilfs->ns_sem);
	}

	return 0;

 failed_segctor:
1092
	nilfs_detach_log_writer(sb);
R
Ryusuke Konishi 已提交
1093 1094

 failed_checkpoint:
1095
	nilfs_put_root(fsroot);
R
Ryusuke Konishi 已提交
1096

1097 1098 1099 1100 1101
 failed_unload:
	iput(nilfs->ns_sufile);
	iput(nilfs->ns_cpfile);
	iput(nilfs->ns_dat);

1102 1103
 failed_nilfs:
	destroy_nilfs(nilfs);
R
Ryusuke Konishi 已提交
1104 1105 1106 1107 1108
	return err;
}

static int nilfs_remount(struct super_block *sb, int *flags, char *data)
{
1109
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
1110
	unsigned long old_sb_flags;
1111
	unsigned long old_mount_opt;
1112
	int err;
R
Ryusuke Konishi 已提交
1113

1114
	sync_filesystem(sb);
R
Ryusuke Konishi 已提交
1115
	old_sb_flags = sb->s_flags;
1116
	old_mount_opt = nilfs->ns_mount_opt;
R
Ryusuke Konishi 已提交
1117

1118
	if (!parse_options(data, sb, 1)) {
R
Ryusuke Konishi 已提交
1119 1120 1121
		err = -EINVAL;
		goto restore_opts;
	}
1122
	sb->s_flags = (sb->s_flags & ~SB_POSIXACL);
R
Ryusuke Konishi 已提交
1123

1124
	err = -EINVAL;
R
Ryusuke Konishi 已提交
1125

1126
	if (!nilfs_valid_fs(nilfs)) {
1127 1128
		nilfs_warn(sb,
			   "couldn't remount because the filesystem is in an incomplete recovery state");
1129 1130 1131
		goto restore_opts;
	}

1132
	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
R
Ryusuke Konishi 已提交
1133
		goto out;
1134
	if (*flags & SB_RDONLY) {
1135 1136
		/* Shutting down log writer */
		nilfs_detach_log_writer(sb);
1137
		sb->s_flags |= SB_RDONLY;
R
Ryusuke Konishi 已提交
1138 1139 1140 1141 1142 1143

		/*
		 * Remounting a valid RW partition RDONLY, so set
		 * the RDONLY flag and then mark the partition as valid again.
		 */
		down_write(&nilfs->ns_sem);
1144
		nilfs_cleanup_super(sb);
R
Ryusuke Konishi 已提交
1145 1146
		up_write(&nilfs->ns_sem);
	} else {
1147
		__u64 features;
1148
		struct nilfs_root *root;
1149

R
Ryusuke Konishi 已提交
1150 1151 1152 1153 1154
		/*
		 * Mounting a RDONLY partition read-write, so reread and
		 * store the current valid flag.  (It may have been changed
		 * by fsck since we originally mounted the partition.)
		 */
1155 1156 1157 1158 1159
		down_read(&nilfs->ns_sem);
		features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) &
			~NILFS_FEATURE_COMPAT_RO_SUPP;
		up_read(&nilfs->ns_sem);
		if (features) {
1160 1161 1162
			nilfs_warn(sb,
				   "couldn't remount RDWR because of unsupported optional features (%llx)",
				   (unsigned long long)features);
1163 1164 1165 1166
			err = -EROFS;
			goto restore_opts;
		}

1167
		sb->s_flags &= ~SB_RDONLY;
R
Ryusuke Konishi 已提交
1168

1169
		root = NILFS_I(d_inode(sb->s_root))->i_root;
1170
		err = nilfs_attach_log_writer(sb, root);
R
Ryusuke Konishi 已提交
1171
		if (err)
1172
			goto restore_opts;
R
Ryusuke Konishi 已提交
1173 1174

		down_write(&nilfs->ns_sem);
1175
		nilfs_setup_super(sb, true);
R
Ryusuke Konishi 已提交
1176 1177 1178 1179 1180 1181 1182
		up_write(&nilfs->ns_sem);
	}
 out:
	return 0;

 restore_opts:
	sb->s_flags = old_sb_flags;
1183
	nilfs->ns_mount_opt = old_mount_opt;
R
Ryusuke Konishi 已提交
1184 1185 1186 1187 1188 1189 1190 1191 1192
	return err;
}

struct nilfs_super_data {
	struct block_device *bdev;
	__u64 cno;
	int flags;
};

1193 1194 1195 1196 1197 1198 1199 1200
static int nilfs_parse_snapshot_option(const char *option,
				       const substring_t *arg,
				       struct nilfs_super_data *sd)
{
	unsigned long long val;
	const char *msg = NULL;
	int err;

1201
	if (!(sd->flags & SB_RDONLY)) {
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220
		msg = "read-only option is not specified";
		goto parse_error;
	}

	err = kstrtoull(arg->from, 0, &val);
	if (err) {
		if (err == -ERANGE)
			msg = "too large checkpoint number";
		else
			msg = "malformed argument";
		goto parse_error;
	} else if (val == 0) {
		msg = "invalid checkpoint number 0";
		goto parse_error;
	}
	sd->cno = val;
	return 0;

parse_error:
1221
	nilfs_err(NULL, "invalid option \"%s\": %s", option, msg);
1222 1223 1224
	return 1;
}

R
Ryusuke Konishi 已提交
1225 1226 1227 1228 1229 1230 1231 1232 1233
/**
 * nilfs_identify - pre-read mount options needed to identify mount instance
 * @data: mount options
 * @sd: nilfs_super_data
 */
static int nilfs_identify(char *data, struct nilfs_super_data *sd)
{
	char *p, *options = data;
	substring_t args[MAX_OPT_ARGS];
1234
	int token;
R
Ryusuke Konishi 已提交
1235 1236 1237 1238 1239 1240
	int ret = 0;

	do {
		p = strsep(&options, ",");
		if (p != NULL && *p) {
			token = match_token(p, tokens, args);
1241 1242 1243
			if (token == Opt_snapshot)
				ret = nilfs_parse_snapshot_option(p, &args[0],
								  sd);
R
Ryusuke Konishi 已提交
1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254
		}
		if (!options)
			break;
		BUG_ON(options == data);
		*(options - 1) = ',';
	} while (!ret);
	return ret;
}

static int nilfs_set_bdev_super(struct super_block *s, void *data)
{
1255
	s->s_bdev = data;
R
Ryusuke Konishi 已提交
1256 1257 1258 1259 1260 1261
	s->s_dev = s->s_bdev->bd_dev;
	return 0;
}

static int nilfs_test_bdev_super(struct super_block *s, void *data)
{
1262
	return (void *)s->s_bdev == data;
R
Ryusuke Konishi 已提交
1263 1264
}

A
Al Viro 已提交
1265 1266 1267
static struct dentry *
nilfs_mount(struct file_system_type *fs_type, int flags,
	     const char *dev_name, void *data)
R
Ryusuke Konishi 已提交
1268 1269
{
	struct nilfs_super_data sd;
1270
	struct super_block *s;
1271
	fmode_t mode = FMODE_READ | FMODE_EXCL;
1272 1273
	struct dentry *root_dentry;
	int err, s_new = false;
R
Ryusuke Konishi 已提交
1274

1275
	if (!(flags & SB_RDONLY))
1276 1277
		mode |= FMODE_WRITE;

1278
	sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
J
Jan Blunck 已提交
1279
	if (IS_ERR(sd.bdev))
A
Al Viro 已提交
1280
		return ERR_CAST(sd.bdev);
R
Ryusuke Konishi 已提交
1281 1282 1283 1284 1285 1286 1287 1288

	sd.cno = 0;
	sd.flags = flags;
	if (nilfs_identify((char *)data, &sd)) {
		err = -EINVAL;
		goto failed;
	}

1289
	/*
1290 1291 1292
	 * once the super is inserted into the list by sget, s_umount
	 * will protect the lockfs code from trying to start a snapshot
	 * while we are mounting
1293
	 */
1294 1295 1296 1297 1298 1299
	mutex_lock(&sd.bdev->bd_fsfreeze_mutex);
	if (sd.bdev->bd_fsfreeze_count > 0) {
		mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
		err = -EBUSY;
		goto failed;
	}
D
David Howells 已提交
1300 1301
	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
		 sd.bdev);
1302
	mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
1303 1304
	if (IS_ERR(s)) {
		err = PTR_ERR(s);
1305
		goto failed;
R
Ryusuke Konishi 已提交
1306 1307 1308
	}

	if (!s->s_root) {
1309
		s_new = true;
1310

1311
		/* New superblock instance created */
1312
		s->s_mode = mode;
1313
		snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);
R
Ryusuke Konishi 已提交
1314 1315
		sb_set_blocksize(s, block_size(sd.bdev));

1316
		err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0);
R
Ryusuke Konishi 已提交
1317
		if (err)
1318
			goto failed_super;
R
Ryusuke Konishi 已提交
1319

1320
		s->s_flags |= SB_ACTIVE;
1321
	} else if (!sd.cno) {
1322
		if (nilfs_tree_is_busy(s->s_root)) {
1323
			if ((flags ^ s->s_flags) & SB_RDONLY) {
1324
				nilfs_err(s,
1325
					  "the device already has a %s mount.",
1326
					  sb_rdonly(s) ? "read-only" : "read/write");
1327 1328 1329
				err = -EBUSY;
				goto failed_super;
			}
1330
		} else {
1331 1332 1333 1334 1335 1336 1337 1338
			/*
			 * Try remount to setup mount states if the current
			 * tree is not mounted and only snapshots use this sb.
			 */
			err = nilfs_remount(s, &flags, data);
			if (err)
				goto failed_super;
		}
R
Ryusuke Konishi 已提交
1339 1340
	}

1341 1342
	if (sd.cno) {
		err = nilfs_attach_snapshot(s, sd.cno, &root_dentry);
1343
		if (err)
1344 1345 1346
			goto failed_super;
	} else {
		root_dentry = dget(s->s_root);
R
Ryusuke Konishi 已提交
1347 1348
	}

1349
	if (!s_new)
1350
		blkdev_put(sd.bdev, mode);
R
Ryusuke Konishi 已提交
1351

A
Al Viro 已提交
1352
	return root_dentry;
R
Ryusuke Konishi 已提交
1353

1354
 failed_super:
1355
	deactivate_locked_super(s);
R
Ryusuke Konishi 已提交
1356

1357 1358
 failed:
	if (!s_new)
1359
		blkdev_put(sd.bdev, mode);
A
Al Viro 已提交
1360
	return ERR_PTR(err);
R
Ryusuke Konishi 已提交
1361 1362 1363 1364 1365
}

struct file_system_type nilfs_fs_type = {
	.owner    = THIS_MODULE,
	.name     = "nilfs2",
A
Al Viro 已提交
1366
	.mount    = nilfs_mount,
R
Ryusuke Konishi 已提交
1367 1368 1369
	.kill_sb  = kill_block_super,
	.fs_flags = FS_REQUIRES_DEV,
};
1370
MODULE_ALIAS_FS("nilfs2");
R
Ryusuke Konishi 已提交
1371

1372
static void nilfs_inode_init_once(void *obj)
R
Ryusuke Konishi 已提交
1373
{
1374
	struct nilfs_inode_info *ii = obj;
R
Ryusuke Konishi 已提交
1375

1376 1377 1378 1379
	INIT_LIST_HEAD(&ii->i_dirty);
#ifdef CONFIG_NILFS_XATTR
	init_rwsem(&ii->xattr_sem);
#endif
1380
	address_space_init_once(&ii->i_btnode_cache);
1381
	ii->i_bmap = &ii->i_bmap_data;
1382 1383
	inode_init_once(&ii->vfs_inode);
}
R
Ryusuke Konishi 已提交
1384

1385 1386 1387 1388
static void nilfs_segbuf_init_once(void *obj)
{
	memset(obj, 0, sizeof(struct nilfs_segment_buffer));
}
R
Ryusuke Konishi 已提交
1389

1390 1391
static void nilfs_destroy_cachep(void)
{
1392 1393 1394 1395 1396 1397
	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();

1398 1399 1400 1401
	kmem_cache_destroy(nilfs_inode_cachep);
	kmem_cache_destroy(nilfs_transaction_cachep);
	kmem_cache_destroy(nilfs_segbuf_cachep);
	kmem_cache_destroy(nilfs_btree_path_cache);
1402
}
R
Ryusuke Konishi 已提交
1403

1404 1405 1406 1407
static int __init nilfs_init_cachep(void)
{
	nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache",
			sizeof(struct nilfs_inode_info), 0,
1408 1409
			SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT,
			nilfs_inode_init_once);
1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
	if (!nilfs_inode_cachep)
		goto fail;

	nilfs_transaction_cachep = kmem_cache_create("nilfs2_transaction_cache",
			sizeof(struct nilfs_transaction_info), 0,
			SLAB_RECLAIM_ACCOUNT, NULL);
	if (!nilfs_transaction_cachep)
		goto fail;

	nilfs_segbuf_cachep = kmem_cache_create("nilfs2_segbuf_cache",
			sizeof(struct nilfs_segment_buffer), 0,
			SLAB_RECLAIM_ACCOUNT, nilfs_segbuf_init_once);
	if (!nilfs_segbuf_cachep)
		goto fail;

	nilfs_btree_path_cache = kmem_cache_create("nilfs2_btree_path_cache",
			sizeof(struct nilfs_btree_path) * NILFS_BTREE_LEVEL_MAX,
			0, 0, NULL);
	if (!nilfs_btree_path_cache)
		goto fail;
R
Ryusuke Konishi 已提交
1430 1431 1432

	return 0;

1433 1434 1435 1436 1437 1438 1439 1440
fail:
	nilfs_destroy_cachep();
	return -ENOMEM;
}

static int __init init_nilfs_fs(void)
{
	int err;
R
Ryusuke Konishi 已提交
1441

1442 1443 1444
	err = nilfs_init_cachep();
	if (err)
		goto fail;
R
Ryusuke Konishi 已提交
1445

1446
	err = nilfs_sysfs_init();
1447 1448
	if (err)
		goto free_cachep;
R
Ryusuke Konishi 已提交
1449

1450 1451 1452 1453
	err = register_filesystem(&nilfs_fs_type);
	if (err)
		goto deinit_sysfs_entry;

1454
	printk(KERN_INFO "NILFS version 2 loaded\n");
1455
	return 0;
R
Ryusuke Konishi 已提交
1456

1457 1458
deinit_sysfs_entry:
	nilfs_sysfs_exit();
1459 1460 1461
free_cachep:
	nilfs_destroy_cachep();
fail:
R
Ryusuke Konishi 已提交
1462 1463 1464 1465 1466
	return err;
}

static void __exit exit_nilfs_fs(void)
{
1467
	nilfs_destroy_cachep();
1468
	nilfs_sysfs_exit();
R
Ryusuke Konishi 已提交
1469 1470 1471 1472 1473
	unregister_filesystem(&nilfs_fs_type);
}

module_init(init_nilfs_fs)
module_exit(exit_nilfs_fs)