md_k.h 11.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
   md_k.h : kernel internal structure of the Linux MD driver
          Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
	  
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.
   
   You should have received a copy of the GNU General Public License
   (for example /usr/src/linux/COPYING); if not, write to the Free
   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
*/

#ifndef _MD_K_H
#define _MD_K_H

18 19 20
/* and dm-bio-list.h is not under include/linux because.... ??? */
#include "../../../drivers/md/dm-bio-list.h"

21 22
#ifdef CONFIG_BLOCK

L
Linus Torvalds 已提交
23 24 25 26
#define	LEVEL_MULTIPATH		(-4)
#define	LEVEL_LINEAR		(-1)
#define	LEVEL_FAULTY		(-5)

27 28 29 30 31 32
/* we need a value for 'no level specified' and 0
 * means 'raid0', so we need something else.  This is
 * for internal use only
 */
#define	LEVEL_NONE		(-1000000)

L
Linus Torvalds 已提交
33 34 35 36 37 38 39 40 41
#define MaxSector (~(sector_t)0)

typedef struct mddev_s mddev_t;
typedef struct mdk_rdev_s mdk_rdev_t;

/*
 * options passed in raidrun:
 */

42
/* Currently this must fit in an 'int' */
43
#define MAX_CHUNK_SIZE (1<<30)
L
Linus Torvalds 已提交
44 45 46 47 48 49 50 51 52 53

/*
 * MD's 'extended' device
 */
struct mdk_rdev_s
{
	struct list_head same_set;	/* RAID devices within the same set */

	sector_t size;			/* Device size (in blocks) */
	mddev_t *mddev;			/* RAID array if running */
54
	long last_events;		/* IO event timestamp */
L
Linus Torvalds 已提交
55 56 57 58 59

	struct block_device *bdev;	/* block device handle */

	struct page	*sb_page;
	int		sb_loaded;
60
	__u64		sb_events;
L
Linus Torvalds 已提交
61 62
	sector_t	data_offset;	/* start of data in array */
	sector_t	sb_offset;
63
	int		sb_size;	/* bytes in the superblock */
L
Linus Torvalds 已提交
64 65
	int		preferred_minor;	/* autorun support */

66 67
	struct kobject	kobj;

L
Linus Torvalds 已提交
68 69 70 71 72 73 74 75 76 77 78
	/* A device can be in one of three states based on two flags:
	 * Not working:   faulty==1 in_sync==0
	 * Fully working: faulty==0 in_sync==1
	 * Working, but not
	 * in sync with array
	 *                faulty==0 in_sync==0
	 *
	 * It can never have faulty==1, in_sync==1
	 * This reduces the burden of testing multiple flags in many cases
	 */

79 80 81
	unsigned long	flags;
#define	Faulty		1		/* device is known to have a fault */
#define	In_sync		2		/* device is in_sync with rest of array */
82
#define	WriteMostly	4		/* Avoid reading if at all possible */
83
#define	BarriersNotsupp	5		/* BIO_RW_BARRIER is not supported */
84 85
#define	AllReserved	6		/* If whole device is reserved for
					 * one array */
86
#define	AutoDetected	7		/* added by auto-detect */
87 88 89 90
#define Blocked		8		/* An error occured on an externally
					 * managed array, don't allow writes
					 * until it is cleared */
	wait_queue_head_t blocked_wait;
91

L
Linus Torvalds 已提交
92 93
	int desc_nr;			/* descriptor index in the superblock */
	int raid_disk;			/* role of device in array */
94 95 96 97
	int saved_raid_disk;		/* role that device used to have in the
					 * array and could again if we did a partial
					 * resync from the bitmap
					 */
98 99 100 101
	sector_t	recovery_offset;/* If this device has been partially
					 * recovered, this is where we were
					 * up to.
					 */
L
Linus Torvalds 已提交
102 103 104 105 106

	atomic_t	nr_pending;	/* number of pending requests.
					 * only maintained for arrays that
					 * support hot removal
					 */
107 108 109
	atomic_t	read_errors;	/* number of consecutive read errors that
					 * we have tried to ignore.
					 */
110 111 112 113
	atomic_t	corrected_errors; /* number of corrected read errors,
					   * for reporting to userspace and storing
					   * in superblock.
					   */
114
	struct work_struct del_work;	/* used for delayed sysfs removal */
L
Linus Torvalds 已提交
115 116 117 118 119
};

struct mddev_s
{
	void				*private;
120
	struct mdk_personality		*pers;
L
Linus Torvalds 已提交
121 122 123
	dev_t				unit;
	int				md_minor;
	struct list_head 		disks;
124 125 126 127 128
	unsigned long			flags;
#define MD_CHANGE_DEVS	0	/* Some device status has changed */
#define MD_CHANGE_CLEAN 1	/* transition to or from 'clean' */
#define MD_CHANGE_PENDING 2	/* superblock update in progress */

L
Linus Torvalds 已提交
129 130 131 132
	int				ro;

	struct gendisk			*gendisk;

133 134
	struct kobject			kobj;

L
Linus Torvalds 已提交
135 136 137 138 139
	/* Superblock information */
	int				major_version,
					minor_version,
					patch_version;
	int				persistent;
140 141 142
	int 				external;	/* metadata is
							 * managed externally */
	char				metadata_type[17]; /* externally set*/
L
Linus Torvalds 已提交
143 144 145
	int				chunk_size;
	time_t				ctime, utime;
	int				level, layout;
146
	char				clevel[16];
L
Linus Torvalds 已提交
147 148 149 150 151 152 153 154
	int				raid_disks;
	int				max_disks;
	sector_t			size; /* used size of component devices */
	sector_t			array_size; /* exported array size */
	__u64				events;

	char				uuid[16];

155 156 157 158 159 160 161 162
	/* If the array is being reshaped, we need to record the
	 * new shape and an indication of where we are up to.
	 * This is written to the superblock.
	 * If reshape_position is MaxSector, then no reshape is happening (yet).
	 */
	sector_t			reshape_position;
	int				delta_disks, new_level, new_layout, new_chunk;

L
Linus Torvalds 已提交
163 164
	struct mdk_thread_s		*thread;	/* management thread */
	struct mdk_thread_s		*sync_thread;	/* doing resync or reconstruct */
165
	sector_t			curr_resync;	/* last block scheduled */
L
Linus Torvalds 已提交
166 167
	unsigned long			resync_mark;	/* a recent timestamp */
	sector_t			resync_mark_cnt;/* blocks written at resync_mark */
168
	sector_t			curr_mark_cnt; /* blocks scheduled now */
L
Linus Torvalds 已提交
169 170

	sector_t			resync_max_sectors; /* may be set by personality */
171 172 173 174

	sector_t			resync_mismatches; /* count of sectors where
							    * parity/replica mismatch found
							    */
175 176 177 178

	/* allow user-space to request suspension of IO to regions of the array */
	sector_t			suspend_lo;
	sector_t			suspend_hi;
179 180 181 182
	/* if zero, use the system-wide default */
	int				sync_speed_min;
	int				sync_speed_max;

183 184 185
	/* resync even though the same disks are shared among md-devices */
	int				parallel_resync;

186
	int				ok_start_degraded;
L
Linus Torvalds 已提交
187 188 189 190
	/* recovery/resync flags 
	 * NEEDED:   we might need to start a resync/recover
	 * RUNNING:  a thread is running, or about to be started
	 * SYNC:     actually doing a resync, not a recovery
191
	 * INTR:     resync needs to be aborted for some reason
L
Linus Torvalds 已提交
192
	 * DONE:     thread is done and is waiting to be reaped
193 194
	 * REQUEST:  user-space has requested a sync (used with SYNC)
	 * CHECK:    user-space request for for check-only, no repair
195 196 197
	 * RESHAPE:  A reshape is happening
	 *
	 * If neither SYNC or RESHAPE are set, then it is a recovery.
L
Linus Torvalds 已提交
198 199 200 201 202 203
	 */
#define	MD_RECOVERY_RUNNING	0
#define	MD_RECOVERY_SYNC	1
#define	MD_RECOVERY_INTR	3
#define	MD_RECOVERY_DONE	4
#define	MD_RECOVERY_NEEDED	5
204 205
#define	MD_RECOVERY_REQUESTED	6
#define	MD_RECOVERY_CHECK	7
206
#define MD_RECOVERY_RESHAPE	8
207 208
#define	MD_RECOVERY_FROZEN	9

L
Linus Torvalds 已提交
209 210 211
	unsigned long			recovery;

	int				in_sync;	/* know to not need resync */
212
	struct mutex			reconfig_mutex;
L
Linus Torvalds 已提交
213 214
	atomic_t			active;

215
	int				changed;	/* true if we might need to reread partition info */
L
Linus Torvalds 已提交
216 217 218
	int				degraded;	/* whether md should consider
							 * adding a spare
							 */
219 220 221 222 223 224 225
	int				barriers_work;	/* initialised to true, cleared as soon
							 * as a barrier request to slave
							 * fails.  Only supported
							 */
	struct bio			*biolist; 	/* bios that need to be retried
							 * because BIO_RW_BARRIER is not supported
							 */
L
Linus Torvalds 已提交
226 227 228 229

	atomic_t			recovery_active; /* blocks scheduled, but not written */
	wait_queue_head_t		recovery_wait;
	sector_t			recovery_cp;
230 231
	sector_t			resync_max;	/* resync should pause
							 * when it gets here */
232 233

	spinlock_t			write_lock;
234
	wait_queue_head_t		sb_wait;	/* for waiting on superblock updates */
235
	atomic_t			pending_writes;	/* number of active superblock writes */
236

L
Linus Torvalds 已提交
237 238 239 240 241 242
	unsigned int			safemode;	/* if set, update "clean" superblock
							 * when no writes pending.
							 */ 
	unsigned int			safemode_delay;
	struct timer_list		safemode_timer;
	atomic_t			writes_pending; 
243
	struct request_queue		*queue;	/* for plugging ... */
L
Linus Torvalds 已提交
244

245 246 247
	atomic_t                        write_behind; /* outstanding async IO */
	unsigned int                    max_write_behind; /* 0 = sync */

248 249
	struct bitmap                   *bitmap; /* the bitmap for the device */
	struct file			*bitmap_file; /* the bitmap file */
250 251 252 253
	long				bitmap_offset; /* offset from superblock of
							* start of bitmap. May be
							* negative, but not '0'
							*/
254 255 256 257
	long				default_bitmap_offset; /* this is the offset to use when
								* hot-adding a bitmap.  It should
								* eventually be settable by sysfs.
								*/
258

L
Linus Torvalds 已提交
259 260 261 262 263 264
	struct list_head		all_mddevs;
};


static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev)
{
265
	int faulty = test_bit(Faulty, &rdev->flags);
L
Linus Torvalds 已提交
266 267 268 269 270 271 272 273 274
	if (atomic_dec_and_test(&rdev->nr_pending) && faulty)
		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}

static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
{
        atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
}

275
struct mdk_personality
L
Linus Torvalds 已提交
276 277
{
	char *name;
278 279
	int level;
	struct list_head list;
L
Linus Torvalds 已提交
280
	struct module *owner;
281
	int (*make_request)(struct request_queue *q, struct bio *bio);
L
Linus Torvalds 已提交
282 283 284 285 286 287 288 289 290 291
	int (*run)(mddev_t *mddev);
	int (*stop)(mddev_t *mddev);
	void (*status)(struct seq_file *seq, mddev_t *mddev);
	/* error_handler must set ->faulty and clear ->in_sync
	 * if appropriate, and should abort recovery if needed 
	 */
	void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev);
	int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
	int (*hot_remove_disk) (mddev_t *mddev, int number);
	int (*spare_active) (mddev_t *mddev);
292
	sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
L
Linus Torvalds 已提交
293
	int (*resize) (mddev_t *mddev, sector_t sectors);
294 295
	int (*check_reshape) (mddev_t *mddev);
	int (*start_reshape) (mddev_t *mddev);
L
Linus Torvalds 已提交
296
	int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
297 298 299 300 301 302
	/* quiesce moves between quiescence states
	 * 0 - fully active
	 * 1 - no new requests allowed
	 * others - reserved
	 */
	void (*quiesce) (mddev_t *mddev, int state);
L
Linus Torvalds 已提交
303 304 305
};


306 307 308 309 310 311 312
struct md_sysfs_entry {
	struct attribute attr;
	ssize_t (*show)(mddev_t *, char *);
	ssize_t (*store)(mddev_t *, const char *, size_t);
};


L
Linus Torvalds 已提交
313 314 315 316 317 318 319 320 321
static inline char * mdname (mddev_t * mddev)
{
	return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
}

/*
 * iterates through some rdev ringlist. It's safe to remove the
 * current 'rdev'. Dont touch 'tmp' though.
 */
322
#define rdev_for_each_list(rdev, tmp, list)				\
L
Linus Torvalds 已提交
323
									\
324
	for ((tmp) = (list).next;					\
L
Linus Torvalds 已提交
325
		(rdev) = (list_entry((tmp), mdk_rdev_t, same_set)),	\
326
			(tmp) = (tmp)->next, (tmp)->prev != &(list)	\
L
Linus Torvalds 已提交
327 328 329 330
		; )
/*
 * iterates through the 'same array disks' ringlist
 */
331
#define rdev_for_each(rdev, tmp, mddev)				\
332
	rdev_for_each_list(rdev, tmp, (mddev)->disks)
L
Linus Torvalds 已提交
333 334 335 336 337 338 339

typedef struct mdk_thread_s {
	void			(*run) (mddev_t *mddev);
	mddev_t			*mddev;
	wait_queue_head_t	wqueue;
	unsigned long           flags;
	struct task_struct	*tsk;
340
	unsigned long		timeout;
L
Linus Torvalds 已提交
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
} mdk_thread_t;

#define THREAD_WAKEUP  0

#define __wait_event_lock_irq(wq, condition, lock, cmd) 		\
do {									\
	wait_queue_t __wait;						\
	init_waitqueue_entry(&__wait, current);				\
									\
	add_wait_queue(&wq, &__wait);					\
	for (;;) {							\
		set_current_state(TASK_UNINTERRUPTIBLE);		\
		if (condition)						\
			break;						\
		spin_unlock_irq(&lock);					\
		cmd;							\
		schedule();						\
		spin_lock_irq(&lock);					\
	}								\
	current->state = TASK_RUNNING;					\
	remove_wait_queue(&wq, &__wait);				\
} while (0)

#define wait_event_lock_irq(wq, condition, lock, cmd) 			\
do {									\
	if (condition)	 						\
		break;							\
	__wait_event_lock_irq(wq, condition, lock, cmd);		\
} while (0)

371 372 373 374 375
static inline void safe_put_page(struct page *p)
{
	if (p) put_page(p);
}

376
#endif /* CONFIG_BLOCK */
L
Linus Torvalds 已提交
377 378
#endif