xfs_mount.h 15.6 KB
Newer Older
D
Dave Chinner 已提交
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2
/*
3 4
 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
 * All Rights Reserved.
L
Linus Torvalds 已提交
5 6 7 8
 */
#ifndef __XFS_MOUNT_H__
#define	__XFS_MOUNT_H__

9
struct xlog;
L
Linus Torvalds 已提交
10
struct xfs_inode;
11
struct xfs_mru_cache;
D
David Chinner 已提交
12
struct xfs_ail;
C
Christoph Hellwig 已提交
13
struct xfs_quotainfo;
14
struct xfs_da_geometry;
C
Christoph Hellwig 已提交
15

16 17 18 19 20 21 22 23 24 25
/* dynamic preallocation free space thresholds, 5% down to 1% */
enum {
	XFS_LOWSP_1_PCNT = 0,
	XFS_LOWSP_2_PCNT,
	XFS_LOWSP_3_PCNT,
	XFS_LOWSP_4_PCNT,
	XFS_LOWSP_5_PCNT,
	XFS_LOWSP_MAX,
};

26 27 28 29 30 31 32
/*
 * Error Configuration
 *
 * Error classes define the subsystem the configuration belongs to.
 * Error numbers define the errors that are configurable.
 */
enum {
33
	XFS_ERR_METADATA,
34 35 36
	XFS_ERR_CLASS_MAX,
};
enum {
37
	XFS_ERR_DEFAULT,
38 39 40
	XFS_ERR_EIO,
	XFS_ERR_ENOSPC,
	XFS_ERR_ENODEV,
41 42 43
	XFS_ERR_ERRNO_MAX,
};

44 45
#define XFS_ERR_RETRY_FOREVER	-1

46 47 48 49 50 51
/*
 * Although retry_timeout is in jiffies which is normally an unsigned long,
 * we limit the retry timeout to 86400 seconds, or one day.  So even a
 * signed 32-bit long is sufficient for a HZ value up to 24855.  Making it
 * signed lets us store the special "-1" value, meaning retry forever.
 */
52 53 54
struct xfs_error_cfg {
	struct xfs_kobj	kobj;
	int		max_retries;
55
	long		retry_timeout;	/* in jiffies, -1 = infinite */
56 57
};

58 59 60 61 62 63 64 65 66
/*
 * The struct xfsmount layout is optimised to separate read-mostly variables
 * from variables that are frequently modified. We put the read-mostly variables
 * first, then place all the other variables at the end.
 *
 * Typically, read-mostly variables are those that are set at mount time and
 * never changed again, or only change rarely as a result of things like sysfs
 * knobs being tweaked.
 */
L
Linus Torvalds 已提交
67
typedef struct xfs_mount {
68
	struct xfs_sb		m_sb;		/* copy of fs superblock */
C
Christoph Hellwig 已提交
69
	struct super_block	*m_super;
D
David Chinner 已提交
70
	struct xfs_ail		*m_ail;		/* fs active log item list */
L
Linus Torvalds 已提交
71
	struct xfs_buf		*m_sb_bp;	/* buffer for superblock */
72 73
	char			*m_rtname;	/* realtime device name */
	char			*m_logname;	/* external log device name */
74 75
	struct xfs_da_geometry	*m_dir_geo;	/* directory block geometry */
	struct xfs_da_geometry	*m_attr_geo;	/* attribute block geometry */
76
	struct xlog		*m_log;		/* log specific stuff */
L
Linus Torvalds 已提交
77 78 79 80 81 82 83
	struct xfs_inode	*m_rbmip;	/* pointer to bitmap inode */
	struct xfs_inode	*m_rsumip;	/* pointer to summary inode */
	struct xfs_inode	*m_rootip;	/* pointer to root directory */
	struct xfs_quotainfo	*m_quotainfo;	/* disk quota information */
	xfs_buftarg_t		*m_ddev_targp;	/* saves taking the address */
	xfs_buftarg_t		*m_logdev_targp;/* ptr to log device */
	xfs_buftarg_t		*m_rtdev_targp;	/* ptr to rt device */
84 85 86 87 88 89 90 91 92 93 94 95
	/*
	 * Optional cache of rt summary level per bitmap block with the
	 * invariant that m_rsum_cache[bbno] <= the minimum i for which
	 * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
	 * inode lock.
	 */
	uint8_t			*m_rsum_cache;
	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
	struct workqueue_struct *m_buf_workqueue;
	struct workqueue_struct	*m_unwritten_workqueue;
	struct workqueue_struct	*m_cil_workqueue;
	struct workqueue_struct	*m_reclaim_workqueue;
96
	struct workqueue_struct *m_gc_workqueue;
97 98 99
	struct workqueue_struct	*m_sync_workqueue;

	int			m_bsize;	/* fs logical block size */
100 101 102
	uint8_t			m_blkbit_log;	/* blocklog + NBBY */
	uint8_t			m_blkbb_log;	/* blocklog - BBSHIFT */
	uint8_t			m_agno_log;	/* log #ag's */
103
	uint8_t			m_sectbb_log;	/* sectlog - BBSHIFT */
L
Linus Torvalds 已提交
104 105 106
	uint			m_blockmask;	/* sb_blocksize-1 */
	uint			m_blockwsize;	/* sb_blocksize in words */
	uint			m_blockwmask;	/* blockwsize-1 */
107 108 109 110
	uint			m_alloc_mxr[2];	/* max alloc btree records */
	uint			m_alloc_mnr[2];	/* min alloc btree records */
	uint			m_bmap_dmxr[2];	/* max bmap btree records */
	uint			m_bmap_dmnr[2];	/* min bmap btree records */
111 112
	uint			m_rmap_mxr[2];	/* max rmap btree records */
	uint			m_rmap_mnr[2];	/* min rmap btree records */
113 114
	uint			m_refc_mxr[2];	/* max refc btree records */
	uint			m_refc_mnr[2];	/* min refc btree records */
L
Linus Torvalds 已提交
115 116
	uint			m_ag_maxlevels;	/* XFS_AG_MAXLEVELS */
	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
117
	uint			m_rmap_maxlevels; /* max rmap btree levels */
118
	uint			m_refc_maxlevels; /* max refcount btree level */
119
	xfs_extlen_t		m_ag_prealloc_blocks; /* reserved ag blocks */
120 121
	uint			m_alloc_set_aside; /* space we can't use */
	uint			m_ag_max_usable; /* max space per AG */
122 123 124 125 126 127 128 129 130
	int			m_dalign;	/* stripe unit */
	int			m_swidth;	/* stripe width */
	xfs_agnumber_t		m_maxagi;	/* highest inode alloc group */
	uint			m_allocsize_log;/* min write size log bytes */
	uint			m_allocsize_blocks; /* min write size blocks */
	int			m_logbufs;	/* number of log buffers */
	int			m_logbsize;	/* size of each log buffer */
	uint			m_rsumlevels;	/* rt summary levels */
	uint			m_rsumsize;	/* size of rt summary, bytes */
L
Linus Torvalds 已提交
131 132
	int			m_fixedfsid[2];	/* unchanged for life of FS */
	uint			m_qflags;	/* quota status flags */
133 134 135
	uint64_t		m_flags;	/* global mount flags */
	int64_t			m_low_space[XFS_LOWSP_MAX];
	struct xfs_ino_geometry	m_ino_geo;	/* inode geometry */
136
	struct xfs_trans_resv	m_resv;		/* precomputed res values */
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
						/* low free space thresholds */
	bool			m_always_cow;
	bool			m_fail_unmount;
	bool			m_finobt_nores; /* no per-AG finobt resv. */
	bool			m_update_sb;	/* sb needs update in mount */

	/*
	 * Bitsets of per-fs metadata that have been checked and/or are sick.
	 * Callers must hold m_sb_lock to access these two fields.
	 */
	uint8_t			m_fs_checked;
	uint8_t			m_fs_sick;
	/*
	 * Bitsets of rt metadata that have been checked and/or are sick.
	 * Callers must hold m_sb_lock to access this field.
	 */
	uint8_t			m_rt_checked;
	uint8_t			m_rt_sick;

	/*
	 * End of read-mostly variables. Frequently written variables and locks
	 * should be placed below this comment from now on. The first variable
	 * here is marked as cacheline aligned so they it is separated from
	 * the read-mostly variables.
	 */

	spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */
	struct percpu_counter	m_icount;	/* allocated inodes counter */
	struct percpu_counter	m_ifree;	/* free inodes counter */
	struct percpu_counter	m_fdblocks;	/* free block counter */
	/*
	 * Count of data device blocks reserved for delayed allocations,
	 * including indlen blocks.  Does not include allocated CoW staging
	 * extents or anything related to the rt device.
	 */
	struct percpu_counter	m_delalloc_blks;
173 174 175 176 177 178
	/*
	 * Global count of allocation btree blocks in use across all AGs. Only
	 * used when perag reservation is enabled. Helps prevent block
	 * reservation from attempting to reserve allocation btree blocks.
	 */
	atomic64_t		m_allocbt_blks;
179 180 181

	struct radix_tree_root	m_perag_tree;	/* per-ag accounting info */
	spinlock_t		m_perag_lock;	/* lock for m_perag_tree */
182 183 184
	uint64_t		m_resblks;	/* total reserved blocks */
	uint64_t		m_resblks_avail;/* available reserved blocks */
	uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */
185
	struct delayed_work	m_reclaim_work;	/* background inode reclaim */
B
Brian Foster 已提交
186
	struct xfs_kobj		m_kobj;
187
	struct xfs_kobj		m_error_kobj;
188
	struct xfs_kobj		m_error_meta_kobj;
189
	struct xfs_error_cfg	m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
190
	struct xstats		m_stats;	/* per-fs stats */
191 192 193
	xfs_agnumber_t		m_agfrotor;	/* last ag where space found */
	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
	spinlock_t		m_agirotor_lock;/* .. and lock protecting it */
194

195 196 197 198 199
	/*
	 * Workqueue item so that we can coalesce multiple inode flush attempts
	 * into a single flush.
	 */
	struct work_struct	m_flush_inodes_work;
200 201 202 203 204 205 206 207 208 209

	/*
	 * Generation of the filesysyem layout.  This is incremented by each
	 * growfs, and used by the pNFS server to ensure the client updates
	 * its view of the block device once it gets a layout that might
	 * reference the newly added blocks.  Does not need to be persistent
	 * as long as we only allow file system size increments, but if we
	 * ever support shrinks it would have to be persisted in addition
	 * to various other kinds of pain inflicted on the pNFS server.
	 */
210
	uint32_t		m_generation;
211
	struct mutex		m_growlock;	/* growfs mutex */
212 213

#ifdef DEBUG
214 215 216 217 218 219
	/*
	 * Frequency with which errors are injected.  Replaces xfs_etest; the
	 * value stored in here is the inverse of the frequency with which the
	 * error triggers.  1 = always, 2 = half the time, etc.
	 */
	unsigned int		*m_errortag;
220
	struct xfs_kobj		m_errortag_kobj;
221
#endif
L
Linus Torvalds 已提交
222 223
} xfs_mount_t;

D
Darrick J. Wong 已提交
224 225
#define M_IGEO(mp)		(&(mp)->m_ino_geo)

L
Linus Torvalds 已提交
226 227 228
/*
 * Flags for m_flags.
 */
D
David Chinner 已提交
229
#define XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops
L
Linus Torvalds 已提交
230 231
						   must be synchronous except
						   for space allocations */
232
#define XFS_MOUNT_UNMOUNTING	(1ULL << 1)	/* filesystem is unmounting */
D
David Chinner 已提交
233
#define XFS_MOUNT_WAS_CLEAN	(1ULL << 3)
234
#define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
L
Linus Torvalds 已提交
235 236
						   operations, typically for
						   disk errors in metadata */
237
#define XFS_MOUNT_DISCARD	(1ULL << 5)	/* discard unused blocks */
238
#define XFS_MOUNT_NOALIGN	(1ULL << 7)	/* turn off stripe alignment
L
Linus Torvalds 已提交
239
						   allocations */
240
#define XFS_MOUNT_ATTR2		(1ULL << 8)	/* allow use of attr2 format */
241
#define XFS_MOUNT_GRPID		(1ULL << 9)	/* group-ID assigned from directory */
242
#define XFS_MOUNT_NORECOVERY	(1ULL << 10)	/* no recovery - dirty fs */
243
#define XFS_MOUNT_ALLOCSIZE	(1ULL << 12)	/* specified allocation size */
244 245
#define XFS_MOUNT_SMALL_INUMS	(1ULL << 14)	/* user wants 32bit inodes */
#define XFS_MOUNT_32BITINODES	(1ULL << 15)	/* inode32 allocator active */
246
#define XFS_MOUNT_NOUUID	(1ULL << 16)	/* ignore uuid during mount */
247
#define XFS_MOUNT_IKEEP		(1ULL << 18)	/* keep empty inode clusters*/
248
#define XFS_MOUNT_SWALLOC	(1ULL << 19)	/* turn on stripe width
L
Linus Torvalds 已提交
249
						 * allocation */
250
#define XFS_MOUNT_RDONLY	(1ULL << 20)	/* read-only fs */
251
#define XFS_MOUNT_DIRSYNC	(1ULL << 21)	/* synchronous directory ops */
252
#define XFS_MOUNT_LARGEIO	(1ULL << 22)	/* report large preferred
253
						 * I/O size in stat() */
254 255
#define XFS_MOUNT_FILESTREAMS	(1ULL << 24)	/* enable the filestreams
						   allocator */
256
#define XFS_MOUNT_NOATTR2	(1ULL << 25)	/* disable use of attr2 format */
257
#define XFS_MOUNT_DAX_ALWAYS	(1ULL << 26)
258
#define XFS_MOUNT_DAX_NEVER	(1ULL << 27)
D
Dave Chinner 已提交
259

L
Linus Torvalds 已提交
260
/*
261 262
 * Max and min values for mount-option defined I/O
 * preallocation sizes.
L
Linus Torvalds 已提交
263
 */
264
#define XFS_MAX_IO_LOG		30	/* 1G */
L
Linus Torvalds 已提交
265 266
#define XFS_MIN_IO_LOG		PAGE_SHIFT

D
David Chinner 已提交
267 268
#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)	\
				((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
L
Linus Torvalds 已提交
269
#define XFS_FORCED_SHUTDOWN(mp)	((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
270 271
void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
		int lnnum);
L
Linus Torvalds 已提交
272
#define xfs_force_shutdown(m,f)	\
273
	xfs_do_force_shutdown(m, f, __FILE__, __LINE__)
L
Linus Torvalds 已提交
274

C
Christoph Hellwig 已提交
275 276 277 278 279
#define SHUTDOWN_META_IO_ERROR	0x0001	/* write attempt to metadata failed */
#define SHUTDOWN_LOG_IO_ERROR	0x0002	/* write attempt to the log failed */
#define SHUTDOWN_FORCE_UMOUNT	0x0004	/* shutdown from a forced unmount */
#define SHUTDOWN_CORRUPT_INCORE	0x0008	/* corrupt in-memory data structures */

L
Linus Torvalds 已提交
280 281 282
/*
 * Flags for xfs_mountfs
 */
283
#define XFS_MFSI_QUIET		0x40	/* Be silent if mount errors found */
L
Linus Torvalds 已提交
284

285 286
static inline xfs_agnumber_t
xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
L
Linus Torvalds 已提交
287
{
E
Eric Sandeen 已提交
288
	xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
289 290
	do_div(ld, mp->m_sb.sb_agblocks);
	return (xfs_agnumber_t) ld;
L
Linus Torvalds 已提交
291 292
}

293 294
static inline xfs_agblock_t
xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
L
Linus Torvalds 已提交
295
{
E
Eric Sandeen 已提交
296
	xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
297
	return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
L
Linus Torvalds 已提交
298 299
}

300 301 302 303 304 305 306 307 308 309
/* per-AG block reservation data structures*/
struct xfs_ag_resv {
	/* number of blocks originally reserved here */
	xfs_extlen_t			ar_orig_reserved;
	/* number of blocks reserved here */
	xfs_extlen_t			ar_reserved;
	/* number of blocks originally asked for */
	xfs_extlen_t			ar_asked;
};

310 311
/*
 * Per-ag incore structure, copies of information in agf and agi, to improve the
312
 * performance of allocation group selection.
313 314 315 316 317 318 319 320 321
 */
typedef struct xfs_perag {
	struct xfs_mount *pag_mount;	/* owner filesystem */
	xfs_agnumber_t	pag_agno;	/* AG this structure belongs to */
	atomic_t	pag_ref;	/* perag reference count */
	char		pagf_init;	/* this agf's entry is initialized */
	char		pagi_init;	/* this agi's entry is initialized */
	char		pagf_metadata;	/* the agf is preferred to be metadata */
	char		pagi_inodeok;	/* The agi is ok for inodes */
322
	uint8_t		pagf_levels[XFS_BTNUM_AGF];
323
					/* # of levels in bno & cnt btree */
324
	bool		pagf_agflreset; /* agfl requires reset before use */
325
	uint32_t	pagf_flcount;	/* count of blocks in freelist */
326 327
	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
	xfs_extlen_t	pagf_longest;	/* longest free space */
328
	uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
329 330 331 332 333 334 335 336 337 338 339
	xfs_agino_t	pagi_freecount;	/* number of free inodes */
	xfs_agino_t	pagi_count;	/* number of allocated inodes */

	/*
	 * Inode allocation search lookup optimisation.
	 * If the pagino matches, the search for new inodes
	 * doesn't need to search the near ones again straight away
	 */
	xfs_agino_t	pagl_pagino;
	xfs_agino_t	pagl_leftrec;
	xfs_agino_t	pagl_rightrec;
340

341 342 343 344 345 346 347 348 349 350
	int		pagb_count;	/* pagb slots in use */
	uint8_t		pagf_refcount_level; /* recount btree height */

	/* Blocks reserved for all kinds of metadata. */
	struct xfs_ag_resv	pag_meta_resv;
	/* Blocks reserved for the reverse mapping btree. */
	struct xfs_ag_resv	pag_rmapbt_resv;

	/* -- kernel only structures below this line -- */

351 352 353 354 355 356 357 358
	/*
	 * Bitsets of per-ag metadata that have been checked and/or are sick.
	 * Callers should hold pag_state_lock before accessing this field.
	 */
	uint16_t	pag_checked;
	uint16_t	pag_sick;
	spinlock_t	pag_state_lock;

359 360
	spinlock_t	pagb_lock;	/* lock for pagb_tree */
	struct rb_root	pagb_tree;	/* ordered tree of busy extents */
361 362
	unsigned int	pagb_gen;	/* generation count for pagb_tree */
	wait_queue_head_t pagb_wait;	/* woken when pagb_gen changes */
363 364 365 366 367 368 369 370 371

	atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */

	spinlock_t	pag_ici_lock;	/* incore inode cache lock */
	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
	int		pag_ici_reclaimable;	/* reclaimable inodes */
	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */

	/* buffer cache index */
372 373
	spinlock_t	pag_buf_lock;	/* lock for pag_buf_hash */
	struct rhashtable pag_buf_hash;
374 375 376

	/* for rcu-safe freeing */
	struct rcu_head	rcu_head;
377

378 379 380
	/* background prealloc block trimming */
	struct delayed_work	pag_blockgc_work;

381 382 383 384 385 386
	/*
	 * Unlinked inode information.  This incore information reflects
	 * data stored in the AGI, so callers must hold the AGI buffer lock
	 * or have some other means to control concurrency.
	 */
	struct rhashtable	pagi_unlinked_hash;
387 388
} xfs_perag_t;

389 390 391 392 393 394 395 396
static inline struct xfs_ag_resv *
xfs_perag_resv(
	struct xfs_perag	*pag,
	enum xfs_ag_resv_type	type)
{
	switch (type) {
	case XFS_AG_RESV_METADATA:
		return &pag->pag_meta_resv;
397 398
	case XFS_AG_RESV_RMAPBT:
		return &pag->pag_rmapbt_resv;
399 400 401 402 403
	default:
		return NULL;
	}
}

404 405 406
int xfs_buf_hash_init(xfs_perag_t *pag);
void xfs_buf_hash_destroy(xfs_perag_t *pag);

407
extern void	xfs_uuid_table_free(void);
408
extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
C
Christoph Hellwig 已提交
409
extern int	xfs_mountfs(xfs_mount_t *mp);
410 411
extern int	xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
				     xfs_agnumber_t *maxagi);
412
extern void	xfs_unmountfs(xfs_mount_t *);
D
Dave Chinner 已提交
413

414 415
extern int	xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
				 bool reserved);
D
Dave Chinner 已提交
416 417
extern int	xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);

418
extern int	xfs_readsb(xfs_mount_t *, int);
L
Linus Torvalds 已提交
419
extern void	xfs_freesb(xfs_mount_t *);
420
extern bool	xfs_fs_writable(struct xfs_mount *mp, int level);
421
extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, uint64_t);
L
Linus Torvalds 已提交
422

C
Christoph Hellwig 已提交
423 424
extern int	xfs_dev_is_read_only(struct xfs_mount *, char *);

425 426
extern void	xfs_set_low_space_thresholds(struct xfs_mount *);

427 428 429
int	xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
			xfs_off_t count_fsb);

430 431
struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
		int error_class, int error);
432
void xfs_force_summary_recalc(struct xfs_mount *mp);
433
void xfs_mod_delalloc(struct xfs_mount *mp, int64_t delta);
434

L
Linus Torvalds 已提交
435
#endif	/* __XFS_MOUNT_H__ */