the_nilfs.h 11.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * the_nilfs.h - the_nilfs shared structure.
 *
 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
16
 * Written by Ryusuke Konishi.
17 18 19 20 21 22 23 24
 *
 */

#ifndef _THE_NILFS_H
#define _THE_NILFS_H

#include <linux/types.h>
#include <linux/buffer_head.h>
25
#include <linux/rbtree.h>
26 27 28
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
29
#include <linux/slab.h>
30

31
struct nilfs_sc_info;
32
struct nilfs_sysfs_dev_subgroups;
33

34 35 36 37
/* the_nilfs struct */
enum {
	THE_NILFS_INIT = 0,     /* Information from super_block is set */
	THE_NILFS_DISCONTINUED,	/* 'next' pointer chain has broken */
38
	THE_NILFS_GC_RUNNING,	/* gc process is running */
39
	THE_NILFS_SB_DIRTY,	/* super block is dirty */
40 41 42 43 44
};

/**
 * struct the_nilfs - struct to supervise multiple nilfs mount points
 * @ns_flags: flags
45
 * @ns_flushed_device: flag indicating if all volatile data was flushed
46 47
 * @ns_bdev: block device
 * @ns_sem: semaphore for shared states
48
 * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts
49 50
 * @ns_sbh: buffer heads of on-disk super blocks
 * @ns_sbp: pointers to super block data
J
Jiro SEKIBA 已提交
51 52
 * @ns_sbwtime: previous write time of super block
 * @ns_sbwcount: write count of super block
53
 * @ns_sbsize: size of valid data in super block
54
 * @ns_mount_state: file system state
55
 * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
56 57 58 59 60 61 62 63 64 65 66 67
 * @ns_seg_seq: segment sequence counter
 * @ns_segnum: index number of the latest full segment.
 * @ns_nextnum: index number of the full segment index to be used next
 * @ns_pseg_offset: offset of next partial segment in the current full segment
 * @ns_cno: next checkpoint number
 * @ns_ctime: write time of the last segment
 * @ns_nongc_ctime: write time of the last segment not for cleaner operation
 * @ns_ndirtyblks: Number of dirty data blocks
 * @ns_last_segment_lock: lock protecting fields for the latest segment
 * @ns_last_pseg: start block number of the latest segment
 * @ns_last_seq: sequence value of the latest segment
 * @ns_last_cno: checkpoint number of the latest segment
68
 * @ns_prot_seq: least sequence number of segments which must not be reclaimed
69
 * @ns_prev_seq: base sequence number used to decide if advance log cursor
70 71
 * @ns_writer: log writer
 * @ns_segctor_sem: semaphore protecting log write
72 73 74
 * @ns_dat: DAT file inode
 * @ns_cpfile: checkpoint file inode
 * @ns_sufile: segusage file inode
75 76
 * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root)
 * @ns_cptree_lock: lock protecting @ns_cptree
77 78
 * @ns_dirty_files: list of dirty files
 * @ns_inode_lock: lock protecting @ns_dirty_files
79
 * @ns_gc_inodes: dummy inodes to keep live blocks
80 81
 * @ns_next_generation: next generation number for inodes
 * @ns_next_gen_lock: lock protecting @ns_next_generation
82
 * @ns_mount_opt: mount options
83 84 85 86
 * @ns_resuid: uid for reserved blocks
 * @ns_resgid: gid for reserved blocks
 * @ns_interval: checkpoint creation interval
 * @ns_watermark: watermark for the number of dirty buffers
87
 * @ns_blocksize_bits: bit length of block size
88
 * @ns_blocksize: block size
89 90 91 92 93 94 95 96
 * @ns_nsegments: number of segments in filesystem
 * @ns_blocks_per_segment: number of blocks per segment
 * @ns_r_segments_percentage: reserved segments percentage
 * @ns_nrsvsegs: number of reserved segments
 * @ns_first_data_block: block number of first data block
 * @ns_inode_size: size of on-disk inode
 * @ns_first_ino: first not-special inode number
 * @ns_crc_seed: seed value of CRC32 calculation
97 98
 * @ns_dev_kobj: /sys/fs/<nilfs>/<device>
 * @ns_dev_kobj_unregister: completion state
99
 * @ns_dev_subgroups: <device> subgroups pointer
100 101 102
 */
struct the_nilfs {
	unsigned long		ns_flags;
103
	int			ns_flushed_device;
104 105 106

	struct block_device    *ns_bdev;
	struct rw_semaphore	ns_sem;
107
	struct mutex		ns_snapshot_mount_mutex;
108 109 110 111 112 113

	/*
	 * used for
	 * - loading the latest checkpoint exclusively.
	 * - allocating a new full segment.
	 */
114 115
	struct buffer_head     *ns_sbh[2];
	struct nilfs_super_block *ns_sbp[2];
J
Jiro SEKIBA 已提交
116 117
	time_t			ns_sbwtime;
	unsigned		ns_sbwcount;
118
	unsigned		ns_sbsize;
119
	unsigned		ns_mount_state;
120
	unsigned		ns_sb_update_freq;
121 122 123 124

	/*
	 * Following fields are dedicated to a writable FS-instance.
	 * Except for the period seeking checkpoint, code outside the segment
125 126
	 * constructor must lock a segment semaphore while accessing these
	 * fields.
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
	 * The writable FS-instance is sole during a lifetime of the_nilfs.
	 */
	u64			ns_seg_seq;
	__u64			ns_segnum;
	__u64			ns_nextnum;
	unsigned long		ns_pseg_offset;
	__u64			ns_cno;
	time_t			ns_ctime;
	time_t			ns_nongc_ctime;
	atomic_t		ns_ndirtyblks;

	/*
	 * The following fields hold information on the latest partial segment
	 * written to disk with a super root.  These fields are protected by
	 * ns_last_segment_lock.
	 */
	spinlock_t		ns_last_segment_lock;
	sector_t		ns_last_pseg;
	u64			ns_last_seq;
	__u64			ns_last_cno;
147
	u64			ns_prot_seq;
148
	u64			ns_prev_seq;
149

150
	struct nilfs_sc_info   *ns_writer;
151 152 153 154 155 156 157 158 159 160
	struct rw_semaphore	ns_segctor_sem;

	/*
	 * Following fields are lock free except for the period before
	 * the_nilfs is initialized.
	 */
	struct inode	       *ns_dat;
	struct inode	       *ns_cpfile;
	struct inode	       *ns_sufile;

161 162 163 164
	/* Checkpoint tree */
	struct rb_root		ns_cptree;
	spinlock_t		ns_cptree_lock;

165 166 167 168
	/* Dirty inode list */
	struct list_head	ns_dirty_files;
	spinlock_t		ns_inode_lock;

169
	/* GC inode list */
170 171
	struct list_head	ns_gc_inodes;

172 173 174 175
	/* Inode allocator */
	u32			ns_next_generation;
	spinlock_t		ns_next_gen_lock;

176 177 178
	/* Mount options */
	unsigned long		ns_mount_opt;

179 180 181 182 183
	uid_t			ns_resuid;
	gid_t			ns_resgid;
	unsigned long		ns_interval;
	unsigned long		ns_watermark;

184 185
	/* Disk layout information (static) */
	unsigned int		ns_blocksize_bits;
186
	unsigned int		ns_blocksize;
187 188 189 190 191 192 193 194
	unsigned long		ns_nsegments;
	unsigned long		ns_blocks_per_segment;
	unsigned long		ns_r_segments_percentage;
	unsigned long		ns_nrsvsegs;
	unsigned long		ns_first_data_block;
	int			ns_inode_size;
	int			ns_first_ino;
	u32			ns_crc_seed;
195 196 197 198

	/* /sys/fs/<nilfs>/<device> */
	struct kobject ns_dev_kobj;
	struct completion ns_dev_kobj_unregister;
199
	struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
};

#define THE_NILFS_FNS(bit, name)					\
static inline void set_nilfs_##name(struct the_nilfs *nilfs)		\
{									\
	set_bit(THE_NILFS_##bit, &(nilfs)->ns_flags);			\
}									\
static inline void clear_nilfs_##name(struct the_nilfs *nilfs)		\
{									\
	clear_bit(THE_NILFS_##bit, &(nilfs)->ns_flags);			\
}									\
static inline int nilfs_##name(struct the_nilfs *nilfs)			\
{									\
	return test_bit(THE_NILFS_##bit, &(nilfs)->ns_flags);		\
}

THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(DISCONTINUED, discontinued)
218
THE_NILFS_FNS(GC_RUNNING, gc_running)
219
THE_NILFS_FNS(SB_DIRTY, sb_dirty)
220

221 222 223 224 225 226 227 228 229 230 231 232 233 234
/*
 * Mount option operations
 */
#define nilfs_clear_opt(nilfs, opt)  \
	do { (nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt; } while (0)
#define nilfs_set_opt(nilfs, opt)  \
	do { (nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt; } while (0)
#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt)
#define nilfs_write_opt(nilfs, mask, opt)				\
	do { (nilfs)->ns_mount_opt =					\
		(((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) |	\
		 NILFS_MOUNT_##opt);					\
	} while (0)

235 236 237 238 239 240 241 242
/**
 * struct nilfs_root - nilfs root object
 * @cno: checkpoint number
 * @rb_node: red-black tree node
 * @count: refcount of this structure
 * @nilfs: nilfs object
 * @ifile: inode file
 * @inodes_count: number of inodes
243
 * @blocks_count: number of blocks
244 245
 * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
 * @snapshot_kobj_unregister: completion state for kernel object
246 247 248 249 250 251 252 253 254
 */
struct nilfs_root {
	__u64 cno;
	struct rb_node rb_node;

	atomic_t count;
	struct the_nilfs *nilfs;
	struct inode *ifile;

255 256
	atomic64_t inodes_count;
	atomic64_t blocks_count;
257 258 259 260

	/* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
	struct kobject snapshot_kobj;
	struct completion snapshot_kobj_unregister;
261 262 263 264 265
};

/* Special checkpoint number */
#define NILFS_CPTREE_CURRENT_CNO	0

266 267 268
/* Minimum interval of periodical update of superblocks (in seconds) */
#define NILFS_SB_FREQ		10

J
Jiro SEKIBA 已提交
269 270 271
static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
{
	u64 t = get_seconds();
272

273 274
	return t < nilfs->ns_sbwtime ||
		t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
J
Jiro SEKIBA 已提交
275 276
}

J
Jiro SEKIBA 已提交
277
static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
J
Jiro SEKIBA 已提交
278
{
J
Jiro SEKIBA 已提交
279
	int flip_bits = nilfs->ns_sbwcount & 0x0FL;
280

J
Jiro SEKIBA 已提交
281
	return (flip_bits != 0x08 && flip_bits != 0x0F);
J
Jiro SEKIBA 已提交
282 283
}

284
void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
285 286
struct the_nilfs *alloc_nilfs(struct block_device *bdev);
void destroy_nilfs(struct the_nilfs *nilfs);
287 288
int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
R
Ryusuke Konishi 已提交
289 290
unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
291
int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
292
int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
293 294 295 296
struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs,
					     __u64 cno);
void nilfs_put_root(struct nilfs_root *root);
297
int nilfs_near_disk_full(struct the_nilfs *);
298 299
void nilfs_fall_back_super_block(struct the_nilfs *);
void nilfs_swap_super_block(struct the_nilfs *);
300 301


302 303 304 305 306
static inline void nilfs_get_root(struct nilfs_root *root)
{
	atomic_inc(&root->count);
}

307 308 309 310 311 312 313 314 315 316
static inline int nilfs_valid_fs(struct the_nilfs *nilfs)
{
	unsigned valid_fs;

	down_read(&nilfs->ns_sem);
	valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS);
	up_read(&nilfs->ns_sem);
	return valid_fs;
}

317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
static inline void
nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
			sector_t *seg_start, sector_t *seg_end)
{
	*seg_start = (sector_t)nilfs->ns_blocks_per_segment * segnum;
	*seg_end = *seg_start + nilfs->ns_blocks_per_segment - 1;
	if (segnum == 0)
		*seg_start = nilfs->ns_first_data_block;
}

static inline sector_t
nilfs_get_segment_start_blocknr(struct the_nilfs *nilfs, __u64 segnum)
{
	return (segnum == 0) ? nilfs->ns_first_data_block :
		(sector_t)nilfs->ns_blocks_per_segment * segnum;
}

static inline __u64
nilfs_get_segnum_of_block(struct the_nilfs *nilfs, sector_t blocknr)
{
	sector_t segnum = blocknr;

	sector_div(segnum, nilfs->ns_blocks_per_segment);
	return segnum;
}

static inline void
nilfs_terminate_segment(struct the_nilfs *nilfs, sector_t seg_start,
			sector_t seg_end)
{
	/* terminate the current full segment (used in case of I/O-error) */
	nilfs->ns_pseg_offset = seg_end - seg_start + 1;
}

static inline void nilfs_shift_to_next_segment(struct the_nilfs *nilfs)
{
	/* move forward with a full segment */
	nilfs->ns_segnum = nilfs->ns_nextnum;
	nilfs->ns_pseg_offset = 0;
	nilfs->ns_seg_seq++;
}

static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs)
{
	__u64 cno;

	spin_lock(&nilfs->ns_last_segment_lock);
	cno = nilfs->ns_last_cno;
	spin_unlock(&nilfs->ns_last_segment_lock);
	return cno;
}

369 370 371 372 373
static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n)
{
	return n == nilfs->ns_segnum || n == nilfs->ns_nextnum;
}

374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
static inline int nilfs_flush_device(struct the_nilfs *nilfs)
{
	int err;

	if (!nilfs_test_opt(nilfs, BARRIER) || nilfs->ns_flushed_device)
		return 0;

	nilfs->ns_flushed_device = 1;
	/*
	 * the store to ns_flushed_device must not be reordered after
	 * blkdev_issue_flush().
	 */
	smp_wmb();

	err = blkdev_issue_flush(nilfs->ns_bdev, GFP_KERNEL, NULL);
	if (err != -EIO)
		err = 0;
	return err;
}

394
#endif /* _THE_NILFS_H */