raid10.h 5.1 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0 */
L
Linus Torvalds 已提交
2 3 4
#ifndef _RAID10_H
#define _RAID10_H

5 6 7 8 9 10 11 12 13 14 15 16 17
/* Note: raid10_info.rdev can be set to NULL asynchronously by
 * raid10_remove_disk.
 * There are three safe ways to access raid10_info.rdev.
 * 1/ when holding mddev->reconfig_mutex
 * 2/ when resync/recovery/reshape is known to be happening - i.e. in code
 *    that is called as part of performing resync/recovery/reshape.
 * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
 *    and if it is non-NULL, increment rdev->nr_pending before dropping the
 *    RCU lock.
 * When .rdev is set to NULL, the nr_pending count checked again and if it has
 * been incremented, the pointer is put back in .rdev.
 */

18
struct raid10_info {
19
	struct md_rdev	*rdev, *replacement;
L
Linus Torvalds 已提交
20
	sector_t	head_position;
21 22 23 24 25
	int		recovery_disabled;	/* matches
						 * mddev->recovery_disabled
						 * when we shouldn't try
						 * recovering this device.
						 */
L
Linus Torvalds 已提交
26 27
};

28
struct r10conf {
29
	struct mddev		*mddev;
30 31
	struct raid10_info	*mirrors;
	struct raid10_info	*mirrors_new, *mirrors_old;
L
Linus Torvalds 已提交
32 33 34
	spinlock_t		device_lock;

	/* geometry */
35 36 37
	struct geom {
		int		raid_disks;
		int		near_copies;  /* number of copies laid out
38
					       * raid0 style */
39
		int		far_copies;   /* number of copies laid out
L
Linus Torvalds 已提交
40 41
					       * at large strides across drives
					       */
42
		int		far_offset;   /* far_copies are offset by 1
43
					       * stripe instead of many
44
					       */
45
		sector_t	stride;	      /* distance between far copies.
46 47 48
					       * This is size / far_copies unless
					       * far_offset, in which case it is
					       * 1 stripe.
L
Linus Torvalds 已提交
49
					       */
50 51 52 53 54
		int             far_set_size; /* The number of devices in a set,
					       * where a 'set' are devices that
					       * contain far/offset copies of
					       * each other.
					       */
55 56
		int		chunk_shift; /* shift from chunks to sectors */
		sector_t	chunk_mask;
57
	} prev, geo;
58 59 60
	int			copies;	      /* near_copies * far_copies.
					       * must be <= raid_disks
					       */
L
Linus Torvalds 已提交
61

62 63
	sector_t		dev_sectors;  /* temp copy of
					       * mddev->dev_sectors */
64
	sector_t		reshape_progress;
N
NeilBrown 已提交
65 66 67
	sector_t		reshape_safe;
	unsigned long		reshape_checkpoint;
	sector_t		offset_diff;
68

L
Linus Torvalds 已提交
69
	struct list_head	retry_list;
70 71 72 73 74 75
	/* A separate list of r1bio which just need raid_end_bio_io called.
	 * This mustn't happen for writes which had any errors if the superblock
	 * needs to be written.
	 */
	struct list_head	bio_end_io_list;

76 77
	/* queue pending writes and submit them on unplug */
	struct bio_list		pending_bio_list;
78
	int			pending_count;
L
Linus Torvalds 已提交
79 80

	spinlock_t		resync_lock;
81
	atomic_t		nr_pending;
82 83 84
	int			nr_waiting;
	int			nr_queued;
	int			barrier;
85
	int			array_freeze_pending;
L
Linus Torvalds 已提交
86
	sector_t		next_resync;
87 88 89 90
	int			fullsync;  /* set to 1 if a full sync is needed,
					    * (fresh device added).
					    * Cleared when a sync completes.
					    */
91 92 93
	int			have_replacement; /* There is at least one
						   * replacement device.
						   */
94
	wait_queue_head_t	wait_barrier;
L
Linus Torvalds 已提交
95

96 97
	mempool_t		r10bio_pool;
	mempool_t		r10buf_pool;
98
	struct page		*tmppage;
99
	struct bio_set		bio_split;
100 101 102 103

	/* When taking over an array from a different personality, we store
	 * the new thread here until we fully activate the array.
	 */
104
	struct md_thread	*thread;
105 106 107 108 109 110

	/*
	 * Keep track of cluster resync window to send to other nodes.
	 */
	sector_t		cluster_sync_low;
	sector_t		cluster_sync_high;
L
Linus Torvalds 已提交
111 112 113 114 115 116 117 118 119
};

/*
 * this is our 'private' RAID10 bio.
 *
 * it contains information about what kind of IO operations were started
 * for this RAID10 operation, and about their status:
 */

120
struct r10bio {
L
Linus Torvalds 已提交
121 122 123 124 125 126
	atomic_t		remaining; /* 'have we finished' count,
					    * used from IRQ handlers
					    */
	sector_t		sector;	/* virtual sector number */
	int			sectors;
	unsigned long		state;
G
Guoqing Jiang 已提交
127
	unsigned long		start_time;
128
	struct mddev		*mddev;
L
Linus Torvalds 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
	/*
	 * original bio going to /dev/mdx
	 */
	struct bio		*master_bio;
	/*
	 * if the IO is in READ direction, then this is where we read
	 */
	int			read_slot;

	struct list_head	retry_list;
	/*
	 * if the IO is in WRITE direction, then multiple bios are used,
	 * one for each copy.
	 * When resyncing we also use one for each copy.
	 * When reconstructing, we use 2 bios, one for read, one for write.
	 * We choose the number when they are allocated.
145
	 * We sometimes need an extra bio to write to the replacement.
L
Linus Torvalds 已提交
146
	 */
147
	struct r10dev {
148
		struct bio	*bio;
149 150 151 152 153 154
		/* Currently just used for normal reads and writes */
		struct md_rdev	*rdev;
		/* used for resync and writes */
		struct bio	*repl_bio;
		/* Currently just used for normal writes */
		struct md_rdev	*replacement;
155 156
		sector_t	addr;
		int		devnum;
157
	} devs[];
L
Linus Torvalds 已提交
158 159 160
};

/* bits for r10bio.state */
161 162 163 164
enum r10bio_state {
	R10BIO_Uptodate,
	R10BIO_IsSync,
	R10BIO_IsRecover,
N
NeilBrown 已提交
165
	R10BIO_IsReshape,
166
	R10BIO_Degraded,
167 168 169
/* Set ReadError on bios that experience a read error
 * so that raid10d knows what to do with them.
 */
170
	R10BIO_ReadError,
171 172 173
/* If a write for this request means we can clear some
 * known-bad-block records, we set this flag.
 */
174 175
	R10BIO_MadeGood,
	R10BIO_WriteError,
176 177 178 179 180
/* During a reshape we might be performing IO on the
 * 'previous' part of the array, in which case this
 * flag is set
 */
	R10BIO_Previous,
181 182
/* failfast devices did receive failfast requests. */
	R10BIO_FailFast,
183
};
L
Linus Torvalds 已提交
184
#endif