super.c 35.7 KB
Newer Older
R
Ryusuke Konishi 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
/*
 * super.c - NILFS module and super block management.
 *
 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * Written by Ryusuke Konishi <ryusuke@osrg.net>
 */
/*
 *  linux/fs/ext2/super.c
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/inode.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 */

#include <linux/module.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
#include <linux/crc32.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
49 50
#include <linux/seq_file.h>
#include <linux/mount.h>
R
Ryusuke Konishi 已提交
51
#include "nilfs.h"
52
#include "export.h"
R
Ryusuke Konishi 已提交
53 54
#include "mdt.h"
#include "alloc.h"
55 56
#include "btree.h"
#include "btnode.h"
R
Ryusuke Konishi 已提交
57 58
#include "page.h"
#include "cpfile.h"
R
Ryusuke Konishi 已提交
59
#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
R
Ryusuke Konishi 已提交
60 61 62 63 64 65 66 67 68 69
#include "ifile.h"
#include "dat.h"
#include "segment.h"
#include "segbuf.h"

MODULE_AUTHOR("NTT Corp.");
MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
		   "(NILFS)");
MODULE_LICENSE("GPL");

70
static struct kmem_cache *nilfs_inode_cachep;
71 72 73 74
struct kmem_cache *nilfs_transaction_cachep;
struct kmem_cache *nilfs_segbuf_cachep;
struct kmem_cache *nilfs_btree_path_cache;

75
static int nilfs_setup_super(struct super_block *sb, int is_mount);
R
Ryusuke Konishi 已提交
76 77
static int nilfs_remount(struct super_block *sb, int *flags, char *data);

78
static void nilfs_set_error(struct super_block *sb)
R
Ryusuke Konishi 已提交
79
{
80
	struct the_nilfs *nilfs = sb->s_fs_info;
81
	struct nilfs_super_block **sbp;
R
Ryusuke Konishi 已提交
82 83 84 85

	down_write(&nilfs->ns_sem);
	if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) {
		nilfs->ns_mount_state |= NILFS_ERROR_FS;
86
		sbp = nilfs_prepare_super(sb, 0);
87 88
		if (likely(sbp)) {
			sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
J
Jiro SEKIBA 已提交
89 90
			if (sbp[1])
				sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
91
			nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
92
		}
R
Ryusuke Konishi 已提交
93 94 95 96
	}
	up_write(&nilfs->ns_sem);
}

R
Ryusuke Konishi 已提交
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
/**
 * nilfs_error() - report failure condition on a filesystem
 *
 * nilfs_error() sets an ERROR_FS flag on the superblock as well as
 * reporting an error message.  It should be called when NILFS detects
 * incoherences or defects of meta data on disk.  As for sustainable
 * errors such as a single-shot I/O error, nilfs_warning() or the printk()
 * function should be used instead.
 *
 * The segment constructor must not call this function because it can
 * kill itself.
 */
void nilfs_error(struct super_block *sb, const char *function,
		 const char *fmt, ...)
{
112
	struct the_nilfs *nilfs = sb->s_fs_info;
113
	struct va_format vaf;
R
Ryusuke Konishi 已提交
114 115 116
	va_list args;

	va_start(args, fmt);
117 118 119 120 121 122 123

	vaf.fmt = fmt;
	vaf.va = &args;

	printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n",
	       sb->s_id, function, &vaf);

R
Ryusuke Konishi 已提交
124 125 126
	va_end(args);

	if (!(sb->s_flags & MS_RDONLY)) {
127
		nilfs_set_error(sb);
R
Ryusuke Konishi 已提交
128

129
		if (nilfs_test_opt(nilfs, ERRORS_RO)) {
R
Ryusuke Konishi 已提交
130 131 132 133 134
			printk(KERN_CRIT "Remounting filesystem read-only\n");
			sb->s_flags |= MS_RDONLY;
		}
	}

135
	if (nilfs_test_opt(nilfs, ERRORS_PANIC))
R
Ryusuke Konishi 已提交
136 137 138 139 140 141 142
		panic("NILFS (device %s): panic forced after error\n",
		      sb->s_id);
}

void nilfs_warning(struct super_block *sb, const char *function,
		   const char *fmt, ...)
{
143
	struct va_format vaf;
R
Ryusuke Konishi 已提交
144 145 146
	va_list args;

	va_start(args, fmt);
147 148 149 150 151 152 153

	vaf.fmt = fmt;
	vaf.va = &args;

	printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n",
	       sb->s_id, function, &vaf);

R
Ryusuke Konishi 已提交
154 155 156 157
	va_end(args);
}


158
struct inode *nilfs_alloc_inode(struct super_block *sb)
R
Ryusuke Konishi 已提交
159 160 161 162 163 164 165 166
{
	struct nilfs_inode_info *ii;

	ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS);
	if (!ii)
		return NULL;
	ii->i_bh = NULL;
	ii->i_state = 0;
167
	ii->i_cno = 0;
R
Ryusuke Konishi 已提交
168
	ii->vfs_inode.i_version = 1;
169
	nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi);
R
Ryusuke Konishi 已提交
170 171 172
	return &ii->vfs_inode;
}

N
Nick Piggin 已提交
173
static void nilfs_i_callback(struct rcu_head *head)
R
Ryusuke Konishi 已提交
174
{
N
Nick Piggin 已提交
175
	struct inode *inode = container_of(head, struct inode, i_rcu);
176 177
	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);

N
Nick Piggin 已提交
178 179
	INIT_LIST_HEAD(&inode->i_dentry);

180 181 182 183
	if (mdi) {
		kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
		kfree(mdi);
	}
R
Ryusuke Konishi 已提交
184 185 186
	kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
}

N
Nick Piggin 已提交
187 188 189 190 191
void nilfs_destroy_inode(struct inode *inode)
{
	call_rcu(&inode->i_rcu, nilfs_i_callback);
}

192
static int nilfs_sync_super(struct super_block *sb, int flag)
R
Ryusuke Konishi 已提交
193
{
194
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
195 196 197
	int err;

 retry:
198
	set_buffer_dirty(nilfs->ns_sbh[0]);
199
	if (nilfs_test_opt(nilfs, BARRIER)) {
C
Christoph Hellwig 已提交
200
		err = __sync_dirty_buffer(nilfs->ns_sbh[0],
201
					  WRITE_SYNC | WRITE_FLUSH_FUA);
C
Christoph Hellwig 已提交
202 203
	} else {
		err = sync_dirty_buffer(nilfs->ns_sbh[0]);
R
Ryusuke Konishi 已提交
204
	}
C
Christoph Hellwig 已提交
205

206
	if (unlikely(err)) {
R
Ryusuke Konishi 已提交
207 208
		printk(KERN_ERR
		       "NILFS: unable to write superblock (err=%d)\n", err);
209
		if (err == -EIO && nilfs->ns_sbh[1]) {
J
Jiro SEKIBA 已提交
210 211 212 213 214 215
			/*
			 * sbp[0] points to newer log than sbp[1],
			 * so copy sbp[0] to sbp[1] to take over sbp[0].
			 */
			memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0],
			       nilfs->ns_sbsize);
216 217 218 219 220 221
			nilfs_fall_back_super_block(nilfs);
			goto retry;
		}
	} else {
		struct nilfs_super_block *sbp = nilfs->ns_sbp[0];

J
Jiro SEKIBA 已提交
222 223
		nilfs->ns_sbwcount++;

224 225 226 227
		/*
		 * The latest segment becomes trailable from the position
		 * written in superblock.
		 */
R
Ryusuke Konishi 已提交
228
		clear_nilfs_discontinued(nilfs);
229 230 231

		/* update GC protection for recent segments */
		if (nilfs->ns_sbh[1]) {
J
Jiro SEKIBA 已提交
232
			if (flag == NILFS_SB_COMMIT_ALL) {
233
				set_buffer_dirty(nilfs->ns_sbh[1]);
J
Jiro SEKIBA 已提交
234 235
				if (sync_dirty_buffer(nilfs->ns_sbh[1]) < 0)
					goto out;
236
			}
J
Jiro SEKIBA 已提交
237 238 239
			if (le64_to_cpu(nilfs->ns_sbp[1]->s_last_cno) <
			    le64_to_cpu(nilfs->ns_sbp[0]->s_last_cno))
				sbp = nilfs->ns_sbp[1];
240
		}
R
Ryusuke Konishi 已提交
241

J
Jiro SEKIBA 已提交
242 243 244 245 246
		spin_lock(&nilfs->ns_last_segment_lock);
		nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq);
		spin_unlock(&nilfs->ns_last_segment_lock);
	}
 out:
R
Ryusuke Konishi 已提交
247 248 249
	return err;
}

250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
void nilfs_set_log_cursor(struct nilfs_super_block *sbp,
			  struct the_nilfs *nilfs)
{
	sector_t nfreeblocks;

	/* nilfs->ns_sem must be locked by the caller. */
	nilfs_count_free_blocks(nilfs, &nfreeblocks);
	sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks);

	spin_lock(&nilfs->ns_last_segment_lock);
	sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq);
	sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg);
	sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno);
	spin_unlock(&nilfs->ns_last_segment_lock);
}

266
struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
J
Jiro SEKIBA 已提交
267
					       int flip)
R
Ryusuke Konishi 已提交
268
{
269
	struct the_nilfs *nilfs = sb->s_fs_info;
270
	struct nilfs_super_block **sbp = nilfs->ns_sbp;
R
Ryusuke Konishi 已提交
271

272
	/* nilfs->ns_sem must be locked by the caller. */
273
	if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
274 275
		if (sbp[1] &&
		    sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) {
J
Jiro SEKIBA 已提交
276
			memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
277
		} else {
278
			printk(KERN_CRIT "NILFS: superblock broke on dev %s\n",
279
			       sb->s_id);
280
			return NULL;
281
		}
J
Jiro SEKIBA 已提交
282 283 284
	} else if (sbp[1] &&
		   sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
			memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
285
	}
J
Jiro SEKIBA 已提交
286 287 288 289

	if (flip && sbp[1])
		nilfs_swap_super_block(nilfs);

290 291 292
	return sbp;
}

293
int nilfs_commit_super(struct super_block *sb, int flag)
294
{
295
	struct the_nilfs *nilfs = sb->s_fs_info;
296 297 298 299
	struct nilfs_super_block **sbp = nilfs->ns_sbp;
	time_t t;

	/* nilfs->ns_sem must be locked by the caller. */
300
	t = get_seconds();
J
Jiro SEKIBA 已提交
301
	nilfs->ns_sbwtime = t;
302 303 304 305 306
	sbp[0]->s_wtime = cpu_to_le64(t);
	sbp[0]->s_sum = 0;
	sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed,
					     (unsigned char *)sbp[0],
					     nilfs->ns_sbsize));
J
Jiro SEKIBA 已提交
307 308 309 310 311 312
	if (flag == NILFS_SB_COMMIT_ALL && sbp[1]) {
		sbp[1]->s_wtime = sbp[0]->s_wtime;
		sbp[1]->s_sum = 0;
		sbp[1]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed,
					    (unsigned char *)sbp[1],
					    nilfs->ns_sbsize));
313
	}
314
	clear_nilfs_sb_dirty(nilfs);
315
	return nilfs_sync_super(sb, flag);
R
Ryusuke Konishi 已提交
316 317
}

318 319
/**
 * nilfs_cleanup_super() - write filesystem state for cleanup
320
 * @sb: super block instance to be unmounted or degraded to read-only
321 322 323 324 325
 *
 * This function restores state flags in the on-disk super block.
 * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the
 * filesystem was not clean previously.
 */
326
int nilfs_cleanup_super(struct super_block *sb)
327
{
328
	struct the_nilfs *nilfs = sb->s_fs_info;
329
	struct nilfs_super_block **sbp;
J
Jiro SEKIBA 已提交
330
	int flag = NILFS_SB_COMMIT;
331
	int ret = -EIO;
332

333
	sbp = nilfs_prepare_super(sb, 0);
334
	if (sbp) {
335 336
		sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
		nilfs_set_log_cursor(sbp[0], nilfs);
J
Jiro SEKIBA 已提交
337 338 339 340 341 342 343 344 345
		if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) {
			/*
			 * make the "clean" flag also to the opposite
			 * super block if both super blocks point to
			 * the same checkpoint.
			 */
			sbp[1]->s_state = sbp[0]->s_state;
			flag = NILFS_SB_COMMIT_ALL;
		}
346
		ret = nilfs_commit_super(sb, flag);
347
	}
348 349 350
	return ret;
}

351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
/**
 * nilfs_move_2nd_super - relocate secondary super block
 * @sb: super block instance
 * @sb2off: new offset of the secondary super block (in bytes)
 */
static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
{
	struct the_nilfs *nilfs = sb->s_fs_info;
	struct buffer_head *nsbh;
	struct nilfs_super_block *nsbp;
	sector_t blocknr, newblocknr;
	unsigned long offset;
	int sb2i = -1;  /* array index of the secondary superblock */
	int ret = 0;

	/* nilfs->ns_sem must be locked by the caller. */
	if (nilfs->ns_sbh[1] &&
	    nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) {
		sb2i = 1;
		blocknr = nilfs->ns_sbh[1]->b_blocknr;
	} else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
		sb2i = 0;
		blocknr = nilfs->ns_sbh[0]->b_blocknr;
	}
	if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
		goto out;  /* super block location is unchanged */

	/* Get new super block buffer */
	newblocknr = sb2off >> nilfs->ns_blocksize_bits;
	offset = sb2off & (nilfs->ns_blocksize - 1);
	nsbh = sb_getblk(sb, newblocknr);
	if (!nsbh) {
		printk(KERN_WARNING
		       "NILFS warning: unable to move secondary superblock "
		       "to block %llu\n", (unsigned long long)newblocknr);
		ret = -EIO;
		goto out;
	}
	nsbp = (void *)nsbh->b_data + offset;
	memset(nsbp, 0, nilfs->ns_blocksize);

	if (sb2i >= 0) {
		memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
		brelse(nilfs->ns_sbh[sb2i]);
		nilfs->ns_sbh[sb2i] = nsbh;
		nilfs->ns_sbp[sb2i] = nsbp;
	} else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) {
		/* secondary super block will be restored to index 1 */
		nilfs->ns_sbh[1] = nsbh;
		nilfs->ns_sbp[1] = nsbp;
	} else {
		brelse(nsbh);
	}
out:
	return ret;
}

R
Ryusuke Konishi 已提交
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
/**
 * nilfs_resize_fs - resize the filesystem
 * @sb: super block instance
 * @newsize: new size of the filesystem (in bytes)
 */
int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
{
	struct the_nilfs *nilfs = sb->s_fs_info;
	struct nilfs_super_block **sbp;
	__u64 devsize, newnsegs;
	loff_t sb2off;
	int ret;

	ret = -ERANGE;
	devsize = i_size_read(sb->s_bdev->bd_inode);
	if (newsize > devsize)
		goto out;

	/*
	 * Write lock is required to protect some functions depending
	 * on the number of segments, the number of reserved segments,
	 * and so forth.
	 */
	down_write(&nilfs->ns_segctor_sem);

	sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
	newnsegs = sb2off >> nilfs->ns_blocksize_bits;
	do_div(newnsegs, nilfs->ns_blocks_per_segment);

	ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
	up_write(&nilfs->ns_segctor_sem);
	if (ret < 0)
		goto out;

	ret = nilfs_construct_segment(sb);
	if (ret < 0)
		goto out;

	down_write(&nilfs->ns_sem);
	nilfs_move_2nd_super(sb, sb2off);
	ret = -EIO;
	sbp = nilfs_prepare_super(sb, 0);
	if (likely(sbp)) {
		nilfs_set_log_cursor(sbp[0], nilfs);
		/*
		 * Drop NILFS_RESIZE_FS flag for compatibility with
		 * mount-time resize which may be implemented in a
		 * future release.
		 */
		sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
					      ~NILFS_RESIZE_FS);
		sbp[0]->s_dev_size = cpu_to_le64(newsize);
		sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
		if (sbp[1])
			memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
		ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
	}
	up_write(&nilfs->ns_sem);

	/*
	 * Reset the range of allocatable segments last.  This order
	 * is important in the case of expansion because the secondary
	 * superblock must be protected from log write until migration
	 * completes.
	 */
	if (!ret)
		nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
out:
	return ret;
}

R
Ryusuke Konishi 已提交
479 480
static void nilfs_put_super(struct super_block *sb)
{
481
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
482

483
	nilfs_detach_log_writer(sb);
R
Ryusuke Konishi 已提交
484 485 486

	if (!(sb->s_flags & MS_RDONLY)) {
		down_write(&nilfs->ns_sem);
487
		nilfs_cleanup_super(sb);
R
Ryusuke Konishi 已提交
488 489 490
		up_write(&nilfs->ns_sem);
	}

491 492 493 494
	iput(nilfs->ns_sufile);
	iput(nilfs->ns_cpfile);
	iput(nilfs->ns_dat);

495
	destroy_nilfs(nilfs);
R
Ryusuke Konishi 已提交
496 497 498 499 500
	sb->s_fs_info = NULL;
}

static int nilfs_sync_fs(struct super_block *sb, int wait)
{
501
	struct the_nilfs *nilfs = sb->s_fs_info;
502
	struct nilfs_super_block **sbp;
R
Ryusuke Konishi 已提交
503 504 505 506 507
	int err = 0;

	/* This function is called when super block should be written back */
	if (wait)
		err = nilfs_construct_segment(sb);
508 509

	down_write(&nilfs->ns_sem);
510
	if (nilfs_sb_dirty(nilfs)) {
511
		sbp = nilfs_prepare_super(sb, nilfs_sb_will_flip(nilfs));
J
Jiro SEKIBA 已提交
512 513
		if (likely(sbp)) {
			nilfs_set_log_cursor(sbp[0], nilfs);
514
			nilfs_commit_super(sb, NILFS_SB_COMMIT);
J
Jiro SEKIBA 已提交
515
		}
516
	}
517 518
	up_write(&nilfs->ns_sem);

R
Ryusuke Konishi 已提交
519 520 521
	return err;
}

522
int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
523
			    struct nilfs_root **rootp)
R
Ryusuke Konishi 已提交
524
{
525
	struct the_nilfs *nilfs = sb->s_fs_info;
526
	struct nilfs_root *root;
R
Ryusuke Konishi 已提交
527 528
	struct nilfs_checkpoint *raw_cp;
	struct buffer_head *bh_cp;
529
	int err = -ENOMEM;
R
Ryusuke Konishi 已提交
530

531 532 533 534
	root = nilfs_find_or_create_root(
		nilfs, curr_mnt ? NILFS_CPTREE_CURRENT_CNO : cno);
	if (!root)
		return err;
R
Ryusuke Konishi 已提交
535

536 537
	if (root->ifile)
		goto reuse; /* already attached checkpoint */
R
Ryusuke Konishi 已提交
538

539
	down_read(&nilfs->ns_segctor_sem);
R
Ryusuke Konishi 已提交
540 541
	err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
					  &bh_cp);
542
	up_read(&nilfs->ns_segctor_sem);
R
Ryusuke Konishi 已提交
543 544 545 546 547 548 549 550 551 552
	if (unlikely(err)) {
		if (err == -ENOENT || err == -EINVAL) {
			printk(KERN_ERR
			       "NILFS: Invalid checkpoint "
			       "(checkpoint number=%llu)\n",
			       (unsigned long long)cno);
			err = -EINVAL;
		}
		goto failed;
	}
553

554
	err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size,
555 556
			       &raw_cp->cp_ifile_inode, &root->ifile);
	if (err)
R
Ryusuke Konishi 已提交
557
		goto failed_bh;
558 559 560

	atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count));
	atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count));
R
Ryusuke Konishi 已提交
561 562

	nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
563

564
 reuse:
565
	*rootp = root;
R
Ryusuke Konishi 已提交
566 567 568 569 570
	return 0;

 failed_bh:
	nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
 failed:
571
	nilfs_put_root(root);
R
Ryusuke Konishi 已提交
572 573 574 575

	return err;
}

576 577
static int nilfs_freeze(struct super_block *sb)
{
578
	struct the_nilfs *nilfs = sb->s_fs_info;
579 580 581 582
	int err;

	if (sb->s_flags & MS_RDONLY)
		return 0;
R
Ryusuke Konishi 已提交
583

584 585
	/* Mark super block clean */
	down_write(&nilfs->ns_sem);
586
	err = nilfs_cleanup_super(sb);
587
	up_write(&nilfs->ns_sem);
R
Ryusuke Konishi 已提交
588 589 590
	return err;
}

591
static int nilfs_unfreeze(struct super_block *sb)
R
Ryusuke Konishi 已提交
592
{
593
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
594

595 596 597 598
	if (sb->s_flags & MS_RDONLY)
		return 0;

	down_write(&nilfs->ns_sem);
599
	nilfs_setup_super(sb, false);
600 601
	up_write(&nilfs->ns_sem);
	return 0;
R
Ryusuke Konishi 已提交
602 603 604 605 606
}

static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
	struct super_block *sb = dentry->d_sb;
607 608
	struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
	struct the_nilfs *nilfs = root->nilfs;
609
	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
R
Ryusuke Konishi 已提交
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
	unsigned long long blocks;
	unsigned long overhead;
	unsigned long nrsvblocks;
	sector_t nfreeblocks;
	int err;

	/*
	 * Compute all of the segment blocks
	 *
	 * The blocks before first segment and after last segment
	 * are excluded.
	 */
	blocks = nilfs->ns_blocks_per_segment * nilfs->ns_nsegments
		- nilfs->ns_first_data_block;
	nrsvblocks = nilfs->ns_nrsvsegs * nilfs->ns_blocks_per_segment;

	/*
	 * Compute the overhead
	 *
629
	 * When distributing meta data blocks outside segment structure,
R
Ryusuke Konishi 已提交
630 631 632 633 634 635 636 637 638 639 640 641 642 643
	 * We must count them as the overhead.
	 */
	overhead = 0;

	err = nilfs_count_free_blocks(nilfs, &nfreeblocks);
	if (unlikely(err))
		return err;

	buf->f_type = NILFS_SUPER_MAGIC;
	buf->f_bsize = sb->s_blocksize;
	buf->f_blocks = blocks - overhead;
	buf->f_bfree = nfreeblocks;
	buf->f_bavail = (buf->f_bfree >= nrsvblocks) ?
		(buf->f_bfree - nrsvblocks) : 0;
644
	buf->f_files = atomic_read(&root->inodes_count);
R
Ryusuke Konishi 已提交
645 646
	buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */
	buf->f_namelen = NILFS_NAME_LEN;
647 648 649
	buf->f_fsid.val[0] = (u32)id;
	buf->f_fsid.val[1] = (u32)(id >> 32);

R
Ryusuke Konishi 已提交
650 651 652
	return 0;
}

653 654 655
static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
{
	struct super_block *sb = vfs->mnt_sb;
656
	struct the_nilfs *nilfs = sb->s_fs_info;
657
	struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root;
658

659
	if (!nilfs_test_opt(nilfs, BARRIER))
660
		seq_puts(seq, ",nobarrier");
661 662
	if (root->cno != NILFS_CPTREE_CURRENT_CNO)
		seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno);
663
	if (nilfs_test_opt(nilfs, ERRORS_PANIC))
664
		seq_puts(seq, ",errors=panic");
665
	if (nilfs_test_opt(nilfs, ERRORS_CONT))
666
		seq_puts(seq, ",errors=continue");
667
	if (nilfs_test_opt(nilfs, STRICT_ORDER))
668
		seq_puts(seq, ",order=strict");
669
	if (nilfs_test_opt(nilfs, NORECOVERY))
670
		seq_puts(seq, ",norecovery");
671
	if (nilfs_test_opt(nilfs, DISCARD))
672
		seq_puts(seq, ",discard");
673 674 675 676

	return 0;
}

677
static const struct super_operations nilfs_sops = {
R
Ryusuke Konishi 已提交
678 679 680 681 682 683
	.alloc_inode    = nilfs_alloc_inode,
	.destroy_inode  = nilfs_destroy_inode,
	.dirty_inode    = nilfs_dirty_inode,
	/* .write_inode    = nilfs_write_inode, */
	/* .put_inode      = nilfs_put_inode, */
	/* .drop_inode	  = nilfs_drop_inode, */
A
Al Viro 已提交
684
	.evict_inode    = nilfs_evict_inode,
R
Ryusuke Konishi 已提交
685
	.put_super      = nilfs_put_super,
686
	/* .write_super    = nilfs_write_super, */
R
Ryusuke Konishi 已提交
687
	.sync_fs        = nilfs_sync_fs,
688 689
	.freeze_fs	= nilfs_freeze,
	.unfreeze_fs	= nilfs_unfreeze,
R
Ryusuke Konishi 已提交
690 691 692 693 694
	/* .write_super_lockfs */
	/* .unlockfs */
	.statfs         = nilfs_statfs,
	.remount_fs     = nilfs_remount,
	/* .umount_begin */
695
	.show_options = nilfs_show_options
R
Ryusuke Konishi 已提交
696 697 698 699
};

enum {
	Opt_err_cont, Opt_err_panic, Opt_err_ro,
700
	Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
701
	Opt_discard, Opt_nodiscard, Opt_err,
R
Ryusuke Konishi 已提交
702 703 704 705 706 707
};

static match_table_t tokens = {
	{Opt_err_cont, "errors=continue"},
	{Opt_err_panic, "errors=panic"},
	{Opt_err_ro, "errors=remount-ro"},
708
	{Opt_barrier, "barrier"},
709
	{Opt_nobarrier, "nobarrier"},
R
Ryusuke Konishi 已提交
710 711
	{Opt_snapshot, "cp=%u"},
	{Opt_order, "order=%s"},
712
	{Opt_norecovery, "norecovery"},
713
	{Opt_discard, "discard"},
714
	{Opt_nodiscard, "nodiscard"},
R
Ryusuke Konishi 已提交
715 716 717
	{Opt_err, NULL}
};

718
static int parse_options(char *options, struct super_block *sb, int is_remount)
R
Ryusuke Konishi 已提交
719
{
720
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
721 722 723 724 725 726 727 728 729 730 731 732 733
	char *p;
	substring_t args[MAX_OPT_ARGS];

	if (!options)
		return 1;

	while ((p = strsep(&options, ",")) != NULL) {
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
734
		case Opt_barrier:
735
			nilfs_set_opt(nilfs, BARRIER);
736
			break;
737
		case Opt_nobarrier:
738
			nilfs_clear_opt(nilfs, BARRIER);
R
Ryusuke Konishi 已提交
739 740 741 742
			break;
		case Opt_order:
			if (strcmp(args[0].from, "relaxed") == 0)
				/* Ordered data semantics */
743
				nilfs_clear_opt(nilfs, STRICT_ORDER);
R
Ryusuke Konishi 已提交
744 745
			else if (strcmp(args[0].from, "strict") == 0)
				/* Strict in-order semantics */
746
				nilfs_set_opt(nilfs, STRICT_ORDER);
R
Ryusuke Konishi 已提交
747 748 749 750
			else
				return 0;
			break;
		case Opt_err_panic:
751
			nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC);
R
Ryusuke Konishi 已提交
752 753
			break;
		case Opt_err_ro:
754
			nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO);
R
Ryusuke Konishi 已提交
755 756
			break;
		case Opt_err_cont:
757
			nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT);
R
Ryusuke Konishi 已提交
758 759
			break;
		case Opt_snapshot:
760
			if (is_remount) {
761 762 763
				printk(KERN_ERR
				       "NILFS: \"%s\" option is invalid "
				       "for remount.\n", p);
R
Ryusuke Konishi 已提交
764
				return 0;
765
			}
R
Ryusuke Konishi 已提交
766
			break;
767
		case Opt_norecovery:
768
			nilfs_set_opt(nilfs, NORECOVERY);
769
			break;
770
		case Opt_discard:
771
			nilfs_set_opt(nilfs, DISCARD);
772
			break;
773
		case Opt_nodiscard:
774
			nilfs_clear_opt(nilfs, DISCARD);
775
			break;
R
Ryusuke Konishi 已提交
776 777 778 779 780 781 782 783 784 785
		default:
			printk(KERN_ERR
			       "NILFS: Unrecognized mount option \"%s\"\n", p);
			return 0;
		}
	}
	return 1;
}

static inline void
786
nilfs_set_default_options(struct super_block *sb,
R
Ryusuke Konishi 已提交
787 788
			  struct nilfs_super_block *sbp)
{
789
	struct the_nilfs *nilfs = sb->s_fs_info;
790 791

	nilfs->ns_mount_opt =
792
		NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
R
Ryusuke Konishi 已提交
793 794
}

795
static int nilfs_setup_super(struct super_block *sb, int is_mount)
R
Ryusuke Konishi 已提交
796
{
797
	struct the_nilfs *nilfs = sb->s_fs_info;
798 799 800 801 802
	struct nilfs_super_block **sbp;
	int max_mnt_count;
	int mnt_count;

	/* nilfs->ns_sem must be locked by the caller. */
803
	sbp = nilfs_prepare_super(sb, 0);
804 805 806
	if (!sbp)
		return -EIO;

807 808 809
	if (!is_mount)
		goto skip_mount_setup;

810 811
	max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count);
	mnt_count = le16_to_cpu(sbp[0]->s_mnt_count);
R
Ryusuke Konishi 已提交
812

813
	if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
R
Ryusuke Konishi 已提交
814 815 816 817 818 819 820 821 822
		printk(KERN_WARNING
		       "NILFS warning: mounting fs with errors\n");
#if 0
	} else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) {
		printk(KERN_WARNING
		       "NILFS warning: maximal mount count reached\n");
#endif
	}
	if (!max_mnt_count)
823
		sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT);
R
Ryusuke Konishi 已提交
824

825
	sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1);
826 827 828
	sbp[0]->s_mtime = cpu_to_le64(get_seconds());

skip_mount_setup:
829 830
	sbp[0]->s_state =
		cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS);
J
Jiro SEKIBA 已提交
831
	/* synchronize sbp[1] with sbp[0] */
832 833
	if (sbp[1])
		memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
834
	return nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
R
Ryusuke Konishi 已提交
835 836
}

837 838 839
struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
						 u64 pos, int blocksize,
						 struct buffer_head **pbh)
R
Ryusuke Konishi 已提交
840
{
841 842
	unsigned long long sb_index = pos;
	unsigned long offset;
R
Ryusuke Konishi 已提交
843

844
	offset = do_div(sb_index, blocksize);
R
Ryusuke Konishi 已提交
845
	*pbh = sb_bread(sb, sb_index);
846
	if (!*pbh)
R
Ryusuke Konishi 已提交
847 848 849 850 851 852 853 854
		return NULL;
	return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset);
}

int nilfs_store_magic_and_option(struct super_block *sb,
				 struct nilfs_super_block *sbp,
				 char *data)
{
855
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
856 857 858 859 860 861 862 863

	sb->s_magic = le16_to_cpu(sbp->s_magic);

	/* FS independent flags */
#ifdef NILFS_ATIME_DISABLE
	sb->s_flags |= MS_NOATIME;
#endif

864
	nilfs_set_default_options(sb, sbp);
R
Ryusuke Konishi 已提交
865

866 867 868 869
	nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid);
	nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid);
	nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval);
	nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max);
R
Ryusuke Konishi 已提交
870

871
	return !parse_options(data, sb, 0) ? -EINVAL : 0 ;
R
Ryusuke Konishi 已提交
872 873
}

874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897
int nilfs_check_feature_compatibility(struct super_block *sb,
				      struct nilfs_super_block *sbp)
{
	__u64 features;

	features = le64_to_cpu(sbp->s_feature_incompat) &
		~NILFS_FEATURE_INCOMPAT_SUPP;
	if (features) {
		printk(KERN_ERR "NILFS: couldn't mount because of unsupported "
		       "optional features (%llx)\n",
		       (unsigned long long)features);
		return -EINVAL;
	}
	features = le64_to_cpu(sbp->s_feature_compat_ro) &
		~NILFS_FEATURE_COMPAT_RO_SUPP;
	if (!(sb->s_flags & MS_RDONLY) && features) {
		printk(KERN_ERR "NILFS: couldn't mount RDWR because of "
		       "unsupported optional features (%llx)\n",
		       (unsigned long long)features);
		return -EINVAL;
	}
	return 0;
}

898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
static int nilfs_get_root_dentry(struct super_block *sb,
				 struct nilfs_root *root,
				 struct dentry **root_dentry)
{
	struct inode *inode;
	struct dentry *dentry;
	int ret = 0;

	inode = nilfs_iget(sb, root, NILFS_ROOT_INO);
	if (IS_ERR(inode)) {
		printk(KERN_ERR "NILFS: get root inode failed\n");
		ret = PTR_ERR(inode);
		goto out;
	}
	if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) {
		iput(inode);
		printk(KERN_ERR "NILFS: corrupt root inode.\n");
		ret = -EINVAL;
		goto out;
	}

919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
	if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
		dentry = d_find_alias(inode);
		if (!dentry) {
			dentry = d_alloc_root(inode);
			if (!dentry) {
				iput(inode);
				ret = -ENOMEM;
				goto failed_dentry;
			}
		} else {
			iput(inode);
		}
	} else {
		dentry = d_obtain_alias(inode);
		if (IS_ERR(dentry)) {
			ret = PTR_ERR(dentry);
			goto failed_dentry;
		}
937 938 939 940
	}
	*root_dentry = dentry;
 out:
	return ret;
941 942 943 944

 failed_dentry:
	printk(KERN_ERR "NILFS: get root dentry failed\n");
	goto out;
945 946
}

947 948 949
static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
				 struct dentry **root_dentry)
{
950
	struct the_nilfs *nilfs = s->s_fs_info;
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967
	struct nilfs_root *root;
	int ret;

	down_read(&nilfs->ns_segctor_sem);
	ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno);
	up_read(&nilfs->ns_segctor_sem);
	if (ret < 0) {
		ret = (ret == -ENOENT) ? -EINVAL : ret;
		goto out;
	} else if (!ret) {
		printk(KERN_ERR "NILFS: The specified checkpoint is "
		       "not a snapshot (checkpoint number=%llu).\n",
		       (unsigned long long)cno);
		ret = -EINVAL;
		goto out;
	}

968
	ret = nilfs_attach_checkpoint(s, cno, false, &root);
969 970 971 972 973 974 975 976 977 978 979 980
	if (ret) {
		printk(KERN_ERR "NILFS: error loading snapshot "
		       "(checkpoint number=%llu).\n",
	       (unsigned long long)cno);
		goto out;
	}
	ret = nilfs_get_root_dentry(s, root, root_dentry);
	nilfs_put_root(root);
 out:
	return ret;
}

981 982
static int nilfs_tree_was_touched(struct dentry *root_dentry)
{
N
Nick Piggin 已提交
983
	return root_dentry->d_count > 1;
984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999
}

/**
 * nilfs_try_to_shrink_tree() - try to shrink dentries of a checkpoint
 * @root_dentry: root dentry of the tree to be shrunk
 *
 * This function returns true if the tree was in-use.
 */
static int nilfs_try_to_shrink_tree(struct dentry *root_dentry)
{
	if (have_submounts(root_dentry))
		return true;
	shrink_dcache_parent(root_dentry);
	return nilfs_tree_was_touched(root_dentry);
}

1000 1001
int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
{
1002
	struct the_nilfs *nilfs = sb->s_fs_info;
1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
	struct nilfs_root *root;
	struct inode *inode;
	struct dentry *dentry;
	int ret;

	if (cno < 0 || cno > nilfs->ns_cno)
		return false;

	if (cno >= nilfs_last_cno(nilfs))
		return true;	/* protect recent checkpoints */

	ret = false;
1015
	root = nilfs_lookup_root(nilfs, cno);
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
	if (root) {
		inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO);
		if (inode) {
			dentry = d_find_alias(inode);
			if (dentry) {
				if (nilfs_tree_was_touched(dentry))
					ret = nilfs_try_to_shrink_tree(dentry);
				dput(dentry);
			}
			iput(inode);
		}
		nilfs_put_root(root);
	}
	return ret;
}

R
Ryusuke Konishi 已提交
1032 1033 1034 1035 1036 1037
/**
 * nilfs_fill_super() - initialize a super block instance
 * @sb: super_block
 * @data: mount options
 * @silent: silent mode flag
 *
1038
 * This function is called exclusively by nilfs->ns_mount_mutex.
R
Ryusuke Konishi 已提交
1039 1040 1041
 * So, the recovery process is protected from other simultaneous mounts.
 */
static int
1042
nilfs_fill_super(struct super_block *sb, void *data, int silent)
R
Ryusuke Konishi 已提交
1043
{
1044
	struct the_nilfs *nilfs;
1045
	struct nilfs_root *fsroot;
1046
	struct backing_dev_info *bdi;
R
Ryusuke Konishi 已提交
1047 1048 1049
	__u64 cno;
	int err;

1050 1051
	nilfs = alloc_nilfs(sb->s_bdev);
	if (!nilfs)
R
Ryusuke Konishi 已提交
1052 1053
		return -ENOMEM;

1054
	sb->s_fs_info = nilfs;
R
Ryusuke Konishi 已提交
1055

1056
	err = init_nilfs(nilfs, sb, (char *)data);
R
Ryusuke Konishi 已提交
1057
	if (err)
1058
		goto failed_nilfs;
R
Ryusuke Konishi 已提交
1059 1060 1061 1062

	sb->s_op = &nilfs_sops;
	sb->s_export_op = &nilfs_export_ops;
	sb->s_root = NULL;
1063
	sb->s_time_gran = 1;
1064 1065 1066

	bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
	sb->s_bdi = bdi ? : &default_backing_dev_info;
R
Ryusuke Konishi 已提交
1067

1068
	err = load_nilfs(nilfs, sb);
1069
	if (err)
1070
		goto failed_nilfs;
1071

R
Ryusuke Konishi 已提交
1072
	cno = nilfs_last_cno(nilfs);
1073
	err = nilfs_attach_checkpoint(sb, cno, true, &fsroot);
R
Ryusuke Konishi 已提交
1074
	if (err) {
1075 1076
		printk(KERN_ERR "NILFS: error loading last checkpoint "
		       "(checkpoint number=%llu).\n", (unsigned long long)cno);
1077
		goto failed_unload;
R
Ryusuke Konishi 已提交
1078 1079 1080
	}

	if (!(sb->s_flags & MS_RDONLY)) {
1081
		err = nilfs_attach_log_writer(sb, fsroot);
R
Ryusuke Konishi 已提交
1082 1083 1084 1085
		if (err)
			goto failed_checkpoint;
	}

1086 1087
	err = nilfs_get_root_dentry(sb, fsroot, &sb->s_root);
	if (err)
R
Ryusuke Konishi 已提交
1088 1089
		goto failed_segctor;

1090
	nilfs_put_root(fsroot);
R
Ryusuke Konishi 已提交
1091 1092 1093

	if (!(sb->s_flags & MS_RDONLY)) {
		down_write(&nilfs->ns_sem);
1094
		nilfs_setup_super(sb, true);
R
Ryusuke Konishi 已提交
1095 1096 1097 1098 1099 1100
		up_write(&nilfs->ns_sem);
	}

	return 0;

 failed_segctor:
1101
	nilfs_detach_log_writer(sb);
R
Ryusuke Konishi 已提交
1102 1103

 failed_checkpoint:
1104
	nilfs_put_root(fsroot);
R
Ryusuke Konishi 已提交
1105

1106 1107 1108 1109 1110
 failed_unload:
	iput(nilfs->ns_sufile);
	iput(nilfs->ns_cpfile);
	iput(nilfs->ns_dat);

1111 1112
 failed_nilfs:
	destroy_nilfs(nilfs);
R
Ryusuke Konishi 已提交
1113 1114 1115 1116 1117
	return err;
}

static int nilfs_remount(struct super_block *sb, int *flags, char *data)
{
1118
	struct the_nilfs *nilfs = sb->s_fs_info;
R
Ryusuke Konishi 已提交
1119
	unsigned long old_sb_flags;
1120
	unsigned long old_mount_opt;
1121
	int err;
R
Ryusuke Konishi 已提交
1122 1123

	old_sb_flags = sb->s_flags;
1124
	old_mount_opt = nilfs->ns_mount_opt;
R
Ryusuke Konishi 已提交
1125

1126
	if (!parse_options(data, sb, 1)) {
R
Ryusuke Konishi 已提交
1127 1128 1129 1130 1131
		err = -EINVAL;
		goto restore_opts;
	}
	sb->s_flags = (sb->s_flags & ~MS_POSIXACL);

1132
	err = -EINVAL;
R
Ryusuke Konishi 已提交
1133

1134 1135 1136 1137 1138 1139 1140
	if (!nilfs_valid_fs(nilfs)) {
		printk(KERN_WARNING "NILFS (device %s): couldn't "
		       "remount because the filesystem is in an "
		       "incomplete recovery state.\n", sb->s_id);
		goto restore_opts;
	}

R
Ryusuke Konishi 已提交
1141 1142 1143
	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
		goto out;
	if (*flags & MS_RDONLY) {
1144 1145
		/* Shutting down log writer */
		nilfs_detach_log_writer(sb);
R
Ryusuke Konishi 已提交
1146 1147 1148 1149 1150 1151 1152
		sb->s_flags |= MS_RDONLY;

		/*
		 * Remounting a valid RW partition RDONLY, so set
		 * the RDONLY flag and then mark the partition as valid again.
		 */
		down_write(&nilfs->ns_sem);
1153
		nilfs_cleanup_super(sb);
R
Ryusuke Konishi 已提交
1154 1155
		up_write(&nilfs->ns_sem);
	} else {
1156
		__u64 features;
1157
		struct nilfs_root *root;
1158

R
Ryusuke Konishi 已提交
1159 1160 1161 1162 1163
		/*
		 * Mounting a RDONLY partition read-write, so reread and
		 * store the current valid flag.  (It may have been changed
		 * by fsck since we originally mounted the partition.)
		 */
1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
		down_read(&nilfs->ns_sem);
		features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) &
			~NILFS_FEATURE_COMPAT_RO_SUPP;
		up_read(&nilfs->ns_sem);
		if (features) {
			printk(KERN_WARNING "NILFS (device %s): couldn't "
			       "remount RDWR because of unsupported optional "
			       "features (%llx)\n",
			       sb->s_id, (unsigned long long)features);
			err = -EROFS;
			goto restore_opts;
		}

R
Ryusuke Konishi 已提交
1177 1178
		sb->s_flags &= ~MS_RDONLY;

1179
		root = NILFS_I(sb->s_root->d_inode)->i_root;
1180
		err = nilfs_attach_log_writer(sb, root);
R
Ryusuke Konishi 已提交
1181
		if (err)
1182
			goto restore_opts;
R
Ryusuke Konishi 已提交
1183 1184

		down_write(&nilfs->ns_sem);
1185
		nilfs_setup_super(sb, true);
R
Ryusuke Konishi 已提交
1186 1187 1188 1189 1190 1191 1192
		up_write(&nilfs->ns_sem);
	}
 out:
	return 0;

 restore_opts:
	sb->s_flags = old_sb_flags;
1193
	nilfs->ns_mount_opt = old_mount_opt;
R
Ryusuke Konishi 已提交
1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
	return err;
}

struct nilfs_super_data {
	struct block_device *bdev;
	__u64 cno;
	int flags;
};

/**
 * nilfs_identify - pre-read mount options needed to identify mount instance
 * @data: mount options
 * @sd: nilfs_super_data
 */
static int nilfs_identify(char *data, struct nilfs_super_data *sd)
{
	char *p, *options = data;
	substring_t args[MAX_OPT_ARGS];
1212
	int token;
R
Ryusuke Konishi 已提交
1213 1214 1215 1216 1217 1218 1219
	int ret = 0;

	do {
		p = strsep(&options, ",");
		if (p != NULL && *p) {
			token = match_token(p, tokens, args);
			if (token == Opt_snapshot) {
1220
				if (!(sd->flags & MS_RDONLY)) {
R
Ryusuke Konishi 已提交
1221
					ret++;
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231
				} else {
					sd->cno = simple_strtoull(args[0].from,
								  NULL, 0);
					/*
					 * No need to see the end pointer;
					 * match_token() has done syntax
					 * checking.
					 */
					if (sd->cno == 0)
						ret++;
R
Ryusuke Konishi 已提交
1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
				}
			}
			if (ret)
				printk(KERN_ERR
				       "NILFS: invalid mount option: %s\n", p);
		}
		if (!options)
			break;
		BUG_ON(options == data);
		*(options - 1) = ',';
	} while (!ret);
	return ret;
}

static int nilfs_set_bdev_super(struct super_block *s, void *data)
{
1248
	s->s_bdev = data;
R
Ryusuke Konishi 已提交
1249 1250 1251 1252 1253 1254
	s->s_dev = s->s_bdev->bd_dev;
	return 0;
}

static int nilfs_test_bdev_super(struct super_block *s, void *data)
{
1255
	return (void *)s->s_bdev == data;
R
Ryusuke Konishi 已提交
1256 1257
}

A
Al Viro 已提交
1258 1259 1260
static struct dentry *
nilfs_mount(struct file_system_type *fs_type, int flags,
	     const char *dev_name, void *data)
R
Ryusuke Konishi 已提交
1261 1262
{
	struct nilfs_super_data sd;
1263
	struct super_block *s;
1264
	fmode_t mode = FMODE_READ | FMODE_EXCL;
1265 1266
	struct dentry *root_dentry;
	int err, s_new = false;
R
Ryusuke Konishi 已提交
1267

1268 1269 1270
	if (!(flags & MS_RDONLY))
		mode |= FMODE_WRITE;

1271
	sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
J
Jan Blunck 已提交
1272
	if (IS_ERR(sd.bdev))
A
Al Viro 已提交
1273
		return ERR_CAST(sd.bdev);
R
Ryusuke Konishi 已提交
1274 1275 1276 1277 1278 1279 1280 1281

	sd.cno = 0;
	sd.flags = flags;
	if (nilfs_identify((char *)data, &sd)) {
		err = -EINVAL;
		goto failed;
	}

1282
	/*
1283 1284 1285
	 * once the super is inserted into the list by sget, s_umount
	 * will protect the lockfs code from trying to start a snapshot
	 * while we are mounting
1286
	 */
1287 1288 1289 1290 1291 1292
	mutex_lock(&sd.bdev->bd_fsfreeze_mutex);
	if (sd.bdev->bd_fsfreeze_count > 0) {
		mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
		err = -EBUSY;
		goto failed;
	}
1293
	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev);
1294
	mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
1295 1296
	if (IS_ERR(s)) {
		err = PTR_ERR(s);
1297
		goto failed;
R
Ryusuke Konishi 已提交
1298 1299 1300 1301 1302
	}

	if (!s->s_root) {
		char b[BDEVNAME_SIZE];

1303 1304
		s_new = true;

1305
		/* New superblock instance created */
R
Ryusuke Konishi 已提交
1306
		s->s_flags = flags;
1307
		s->s_mode = mode;
R
Ryusuke Konishi 已提交
1308 1309 1310
		strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
		sb_set_blocksize(s, block_size(sd.bdev));

1311
		err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
R
Ryusuke Konishi 已提交
1312
		if (err)
1313
			goto failed_super;
R
Ryusuke Konishi 已提交
1314 1315

		s->s_flags |= MS_ACTIVE;
1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338
	} else if (!sd.cno) {
		int busy = false;

		if (nilfs_tree_was_touched(s->s_root)) {
			busy = nilfs_try_to_shrink_tree(s->s_root);
			if (busy && (flags ^ s->s_flags) & MS_RDONLY) {
				printk(KERN_ERR "NILFS: the device already "
				       "has a %s mount.\n",
				       (s->s_flags & MS_RDONLY) ?
				       "read-only" : "read/write");
				err = -EBUSY;
				goto failed_super;
			}
		}
		if (!busy) {
			/*
			 * Try remount to setup mount states if the current
			 * tree is not mounted and only snapshots use this sb.
			 */
			err = nilfs_remount(s, &flags, data);
			if (err)
				goto failed_super;
		}
R
Ryusuke Konishi 已提交
1339 1340
	}

1341 1342
	if (sd.cno) {
		err = nilfs_attach_snapshot(s, sd.cno, &root_dentry);
1343
		if (err)
1344 1345 1346
			goto failed_super;
	} else {
		root_dentry = dget(s->s_root);
R
Ryusuke Konishi 已提交
1347 1348
	}

1349
	if (!s_new)
1350
		blkdev_put(sd.bdev, mode);
R
Ryusuke Konishi 已提交
1351

A
Al Viro 已提交
1352
	return root_dentry;
R
Ryusuke Konishi 已提交
1353

1354
 failed_super:
1355
	deactivate_locked_super(s);
R
Ryusuke Konishi 已提交
1356

1357 1358
 failed:
	if (!s_new)
1359
		blkdev_put(sd.bdev, mode);
A
Al Viro 已提交
1360
	return ERR_PTR(err);
R
Ryusuke Konishi 已提交
1361 1362 1363 1364 1365
}

struct file_system_type nilfs_fs_type = {
	.owner    = THIS_MODULE,
	.name     = "nilfs2",
A
Al Viro 已提交
1366
	.mount    = nilfs_mount,
R
Ryusuke Konishi 已提交
1367 1368 1369 1370
	.kill_sb  = kill_block_super,
	.fs_flags = FS_REQUIRES_DEV,
};

1371
static void nilfs_inode_init_once(void *obj)
R
Ryusuke Konishi 已提交
1372
{
1373
	struct nilfs_inode_info *ii = obj;
R
Ryusuke Konishi 已提交
1374

1375 1376 1377 1378
	INIT_LIST_HEAD(&ii->i_dirty);
#ifdef CONFIG_NILFS_XATTR
	init_rwsem(&ii->xattr_sem);
#endif
1379
	address_space_init_once(&ii->i_btnode_cache);
1380
	ii->i_bmap = &ii->i_bmap_data;
1381 1382
	inode_init_once(&ii->vfs_inode);
}
R
Ryusuke Konishi 已提交
1383

1384 1385 1386 1387
static void nilfs_segbuf_init_once(void *obj)
{
	memset(obj, 0, sizeof(struct nilfs_segment_buffer));
}
R
Ryusuke Konishi 已提交
1388

1389 1390
static void nilfs_destroy_cachep(void)
{
1391
	if (nilfs_inode_cachep)
1392
		kmem_cache_destroy(nilfs_inode_cachep);
1393
	if (nilfs_transaction_cachep)
1394
		kmem_cache_destroy(nilfs_transaction_cachep);
1395
	if (nilfs_segbuf_cachep)
1396
		kmem_cache_destroy(nilfs_segbuf_cachep);
1397
	if (nilfs_btree_path_cache)
1398 1399
		kmem_cache_destroy(nilfs_btree_path_cache);
}
R
Ryusuke Konishi 已提交
1400

1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425
static int __init nilfs_init_cachep(void)
{
	nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache",
			sizeof(struct nilfs_inode_info), 0,
			SLAB_RECLAIM_ACCOUNT, nilfs_inode_init_once);
	if (!nilfs_inode_cachep)
		goto fail;

	nilfs_transaction_cachep = kmem_cache_create("nilfs2_transaction_cache",
			sizeof(struct nilfs_transaction_info), 0,
			SLAB_RECLAIM_ACCOUNT, NULL);
	if (!nilfs_transaction_cachep)
		goto fail;

	nilfs_segbuf_cachep = kmem_cache_create("nilfs2_segbuf_cache",
			sizeof(struct nilfs_segment_buffer), 0,
			SLAB_RECLAIM_ACCOUNT, nilfs_segbuf_init_once);
	if (!nilfs_segbuf_cachep)
		goto fail;

	nilfs_btree_path_cache = kmem_cache_create("nilfs2_btree_path_cache",
			sizeof(struct nilfs_btree_path) * NILFS_BTREE_LEVEL_MAX,
			0, 0, NULL);
	if (!nilfs_btree_path_cache)
		goto fail;
R
Ryusuke Konishi 已提交
1426 1427 1428

	return 0;

1429 1430 1431 1432 1433 1434 1435 1436
fail:
	nilfs_destroy_cachep();
	return -ENOMEM;
}

static int __init init_nilfs_fs(void)
{
	int err;
R
Ryusuke Konishi 已提交
1437

1438 1439 1440
	err = nilfs_init_cachep();
	if (err)
		goto fail;
R
Ryusuke Konishi 已提交
1441

1442 1443 1444
	err = register_filesystem(&nilfs_fs_type);
	if (err)
		goto free_cachep;
R
Ryusuke Konishi 已提交
1445

1446
	printk(KERN_INFO "NILFS version 2 loaded\n");
1447
	return 0;
R
Ryusuke Konishi 已提交
1448

1449 1450 1451
free_cachep:
	nilfs_destroy_cachep();
fail:
R
Ryusuke Konishi 已提交
1452 1453 1454 1455 1456
	return err;
}

static void __exit exit_nilfs_fs(void)
{
1457
	nilfs_destroy_cachep();
R
Ryusuke Konishi 已提交
1458 1459 1460 1461 1462
	unregister_filesystem(&nilfs_fs_type);
}

module_init(init_nilfs_fs)
module_exit(exit_nilfs_fs)