dm-stripe.c 10.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6
/*
 * Copyright (C) 2001-2003 Sistina Software (UK) Limited.
 *
 * This file is released under the GPL.
 */

7
#include <linux/device-mapper.h>
L
Linus Torvalds 已提交
8 9 10 11 12 13

#include <linux/module.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/slab.h>
V
vignesh babu 已提交
14
#include <linux/log2.h>
L
Linus Torvalds 已提交
15

16
#define DM_MSG_PREFIX "striped"
B
Brian Wood 已提交
17
#define DM_IO_ERROR_THRESHOLD 15
18

L
Linus Torvalds 已提交
19 20 21
struct stripe {
	struct dm_dev *dev;
	sector_t physical_start;
B
Brian Wood 已提交
22 23

	atomic_t error_count;
L
Linus Torvalds 已提交
24 25 26 27
};

struct stripe_c {
	uint32_t stripes;
28 29
	int stripes_shift;
	sector_t stripes_mask;
L
Linus Torvalds 已提交
30 31 32 33 34 35 36 37

	/* The size of this target / num. stripes */
	sector_t stripe_width;

	/* stripe chunk size */
	uint32_t chunk_shift;
	sector_t chunk_mask;

B
Brian Wood 已提交
38 39 40 41
	/* Needed for handling events */
	struct dm_target *ti;

	/* Work struct used for triggering events*/
42
	struct work_struct trigger_event;
B
Brian Wood 已提交
43

L
Linus Torvalds 已提交
44 45 46
	struct stripe stripe[0];
};

B
Brian Wood 已提交
47 48 49 50 51 52
/*
 * An event is triggered whenever a drive
 * drops out of a stripe volume.
 */
static void trigger_event(struct work_struct *work)
{
53 54
	struct stripe_c *sc = container_of(work, struct stripe_c,
					   trigger_event);
B
Brian Wood 已提交
55 56 57
	dm_table_event(sc->ti->table);
}

L
Linus Torvalds 已提交
58 59 60 61
static inline struct stripe_c *alloc_context(unsigned int stripes)
{
	size_t len;

M
Mikulas Patocka 已提交
62 63
	if (dm_array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
			     stripes))
L
Linus Torvalds 已提交
64 65 66 67 68 69 70 71 72 73 74 75 76
		return NULL;

	len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);

	return kmalloc(len, GFP_KERNEL);
}

/*
 * Parse a single <dev> <sector> pair
 */
static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
		      unsigned int stripe, char **argv)
{
A
Andrew Morton 已提交
77
	unsigned long long start;
78
	char dummy;
L
Linus Torvalds 已提交
79

80
	if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1)
L
Linus Torvalds 已提交
81 82
		return -EINVAL;

83
	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
L
Linus Torvalds 已提交
84 85 86 87
			  &sc->stripe[stripe].dev))
		return -ENXIO;

	sc->stripe[stripe].physical_start = start;
B
Brian Wood 已提交
88

L
Linus Torvalds 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
	return 0;
}

/*
 * Construct a striped mapping.
 * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
 */
static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
	struct stripe_c *sc;
	sector_t width;
	uint32_t stripes;
	uint32_t chunk_size;
	char *end;
	int r;
	unsigned int i;

	if (argc < 2) {
107
		ti->error = "Not enough arguments";
L
Linus Torvalds 已提交
108 109 110 111
		return -EINVAL;
	}

	stripes = simple_strtoul(argv[0], &end, 10);
112
	if (!stripes || *end) {
113
		ti->error = "Invalid stripe count";
L
Linus Torvalds 已提交
114 115 116 117 118
		return -EINVAL;
	}

	chunk_size = simple_strtoul(argv[1], &end, 10);
	if (*end) {
119
		ti->error = "Invalid chunk_size";
L
Linus Torvalds 已提交
120 121 122 123 124 125
		return -EINVAL;
	}

	/*
	 * chunk_size is a power of two
	 */
V
vignesh babu 已提交
126
	if (!is_power_of_2(chunk_size) ||
L
Linus Torvalds 已提交
127
	    (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) {
128
		ti->error = "Invalid chunk size";
L
Linus Torvalds 已提交
129 130 131
		return -EINVAL;
	}

132
	if (ti->len & (chunk_size - 1)) {
133
		ti->error = "Target length not divisible by "
K
Kevin Corry 已提交
134 135 136 137
		    "chunk size";
		return -EINVAL;
	}

L
Linus Torvalds 已提交
138 139
	width = ti->len;
	if (sector_div(width, stripes)) {
140
		ti->error = "Target length not divisible by "
L
Linus Torvalds 已提交
141 142 143 144 145 146 147 148
		    "number of stripes";
		return -EINVAL;
	}

	/*
	 * Do we have enough arguments for that many stripes ?
	 */
	if (argc != (2 + 2 * stripes)) {
149
		ti->error = "Not enough destinations "
L
Linus Torvalds 已提交
150 151 152 153 154 155
			"specified";
		return -EINVAL;
	}

	sc = alloc_context(stripes);
	if (!sc) {
156
		ti->error = "Memory allocation for striped context "
L
Linus Torvalds 已提交
157 158 159 160
		    "failed";
		return -ENOMEM;
	}

161
	INIT_WORK(&sc->trigger_event, trigger_event);
B
Brian Wood 已提交
162 163 164

	/* Set pointer to dm target; used in trigger_event */
	sc->ti = ti;
L
Linus Torvalds 已提交
165 166
	sc->stripes = stripes;
	sc->stripe_width = width;
167 168 169 170 171 172 173 174

	if (stripes & (stripes - 1))
		sc->stripes_shift = -1;
	else {
		sc->stripes_shift = ffs(stripes) - 1;
		sc->stripes_mask = ((sector_t) stripes) - 1;
	}

L
Linus Torvalds 已提交
175
	ti->split_io = chunk_size;
M
Mikulas Patocka 已提交
176
	ti->num_flush_requests = stripes;
M
Mikulas Patocka 已提交
177
	ti->num_discard_requests = stripes;
L
Linus Torvalds 已提交
178

179
	sc->chunk_shift = ffs(chunk_size) - 1;
L
Linus Torvalds 已提交
180 181 182 183 184 185 186 187 188 189
	sc->chunk_mask = ((sector_t) chunk_size) - 1;

	/*
	 * Get the stripe destinations.
	 */
	for (i = 0; i < stripes; i++) {
		argv += 2;

		r = get_stripe(ti, sc, i, argv);
		if (r < 0) {
190
			ti->error = "Couldn't parse stripe destination";
L
Linus Torvalds 已提交
191 192 193 194 195
			while (i--)
				dm_put_device(ti, sc->stripe[i].dev);
			kfree(sc);
			return r;
		}
B
Brian Wood 已提交
196
		atomic_set(&(sc->stripe[i].error_count), 0);
L
Linus Torvalds 已提交
197 198 199
	}

	ti->private = sc;
B
Brian Wood 已提交
200

L
Linus Torvalds 已提交
201 202 203 204 205 206 207 208 209 210 211
	return 0;
}

static void stripe_dtr(struct dm_target *ti)
{
	unsigned int i;
	struct stripe_c *sc = (struct stripe_c *) ti->private;

	for (i = 0; i < sc->stripes; i++)
		dm_put_device(ti, sc->stripe[i].dev);

212
	flush_work_sync(&sc->trigger_event);
L
Linus Torvalds 已提交
213 214 215
	kfree(sc);
}

216 217 218 219 220 221
static void stripe_map_sector(struct stripe_c *sc, sector_t sector,
			      uint32_t *stripe, sector_t *result)
{
	sector_t offset = dm_target_offset(sc->ti, sector);
	sector_t chunk = offset >> sc->chunk_shift;

222 223 224 225 226 227 228
	if (sc->stripes_shift < 0)
		*stripe = sector_div(chunk, sc->stripes);
	else {
		*stripe = chunk & sc->stripes_mask;
		chunk >>= sc->stripes_shift;
	}

229 230 231
	*result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask);
}

M
Mikulas Patocka 已提交
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
				    uint32_t target_stripe, sector_t *result)
{
	uint32_t stripe;

	stripe_map_sector(sc, sector, &stripe, result);
	if (stripe == target_stripe)
		return;
	*result &= ~sc->chunk_mask;			/* round down */
	if (target_stripe < stripe)
		*result += sc->chunk_mask + 1;		/* next chunk */
}

static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
			      uint32_t target_stripe)
{
	sector_t begin, end;

	stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin);
	stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio),
				target_stripe, &end);
	if (begin < end) {
		bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
		bio->bi_sector = begin + sc->stripe[target_stripe].physical_start;
		bio->bi_size = to_bytes(end - begin);
		return DM_MAPIO_REMAPPED;
	} else {
		/* The range doesn't map to the target stripe */
		bio_endio(bio, 0);
		return DM_MAPIO_SUBMITTED;
	}
}

L
Linus Torvalds 已提交
265 266 267
static int stripe_map(struct dm_target *ti, struct bio *bio,
		      union map_info *map_context)
{
268
	struct stripe_c *sc = ti->private;
M
Mikulas Patocka 已提交
269
	uint32_t stripe;
270
	unsigned target_request_nr;
L
Linus Torvalds 已提交
271

272
	if (bio->bi_rw & REQ_FLUSH) {
273 274 275
		target_request_nr = map_context->target_request_nr;
		BUG_ON(target_request_nr >= sc->stripes);
		bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
M
Mikulas Patocka 已提交
276 277
		return DM_MAPIO_REMAPPED;
	}
M
Mikulas Patocka 已提交
278 279 280 281 282
	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
		target_request_nr = map_context->target_request_nr;
		BUG_ON(target_request_nr >= sc->stripes);
		return stripe_map_discard(sc, bio, target_request_nr);
	}
M
Mikulas Patocka 已提交
283

284
	stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
L
Linus Torvalds 已提交
285

286
	bio->bi_sector += sc->stripe[stripe].physical_start;
L
Linus Torvalds 已提交
287
	bio->bi_bdev = sc->stripe[stripe].dev->bdev;
288

289
	return DM_MAPIO_REMAPPED;
L
Linus Torvalds 已提交
290 291
}

B
Brian Wood 已提交
292 293 294 295 296 297 298 299 300 301 302 303 304
/*
 * Stripe status:
 *
 * INFO
 * #stripes [stripe_name <stripe_name>] [group word count]
 * [error count 'A|D' <error count 'A|D'>]
 *
 * TABLE
 * #stripes [stripe chunk size]
 * [stripe_name physical_start <stripe_name physical_start>]
 *
 */

L
Linus Torvalds 已提交
305 306 307 308
static int stripe_status(struct dm_target *ti,
			 status_type_t type, char *result, unsigned int maxlen)
{
	struct stripe_c *sc = (struct stripe_c *) ti->private;
B
Brian Wood 已提交
309
	char buffer[sc->stripes + 1];
L
Linus Torvalds 已提交
310 311 312 313 314
	unsigned int sz = 0;
	unsigned int i;

	switch (type) {
	case STATUSTYPE_INFO:
B
Brian Wood 已提交
315 316 317 318 319 320 321 322
		DMEMIT("%d ", sc->stripes);
		for (i = 0; i < sc->stripes; i++)  {
			DMEMIT("%s ", sc->stripe[i].dev->name);
			buffer[i] = atomic_read(&(sc->stripe[i].error_count)) ?
				'D' : 'A';
		}
		buffer[i] = '\0';
		DMEMIT("1 %s", buffer);
L
Linus Torvalds 已提交
323 324 325
		break;

	case STATUSTYPE_TABLE:
A
Andrew Morton 已提交
326 327
		DMEMIT("%d %llu", sc->stripes,
			(unsigned long long)sc->chunk_mask + 1);
L
Linus Torvalds 已提交
328
		for (i = 0; i < sc->stripes; i++)
A
Andrew Morton 已提交
329 330
			DMEMIT(" %s %llu", sc->stripe[i].dev->name,
			    (unsigned long long)sc->stripe[i].physical_start);
L
Linus Torvalds 已提交
331 332 333 334 335
		break;
	}
	return 0;
}

B
Brian Wood 已提交
336 337 338 339 340 341 342 343 344 345
static int stripe_end_io(struct dm_target *ti, struct bio *bio,
			 int error, union map_info *map_context)
{
	unsigned i;
	char major_minor[16];
	struct stripe_c *sc = ti->private;

	if (!error)
		return 0; /* I/O complete */

346
	if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD))
B
Brian Wood 已提交
347 348 349 350 351 352 353
		return error;

	if (error == -EOPNOTSUPP)
		return error;

	memset(major_minor, 0, sizeof(major_minor));
	sprintf(major_minor, "%d:%d",
354 355
		MAJOR(disk_devt(bio->bi_bdev->bd_disk)),
		MINOR(disk_devt(bio->bi_bdev->bd_disk)));
B
Brian Wood 已提交
356 357 358 359 360 361 362 363 364 365 366 367

	/*
	 * Test to see which stripe drive triggered the event
	 * and increment error count for all stripes on that device.
	 * If the error count for a given device exceeds the threshold
	 * value we will no longer trigger any further events.
	 */
	for (i = 0; i < sc->stripes; i++)
		if (!strcmp(sc->stripe[i].dev->name, major_minor)) {
			atomic_inc(&(sc->stripe[i].error_count));
			if (atomic_read(&(sc->stripe[i].error_count)) <
			    DM_IO_ERROR_THRESHOLD)
368
				schedule_work(&sc->trigger_event);
B
Brian Wood 已提交
369 370 371 372 373
		}

	return error;
}

374 375 376 377 378 379 380
static int stripe_iterate_devices(struct dm_target *ti,
				  iterate_devices_callout_fn fn, void *data)
{
	struct stripe_c *sc = ti->private;
	int ret = 0;
	unsigned i = 0;

381
	do {
382
		ret = fn(ti, sc->stripe[i].dev,
383 384 385
			 sc->stripe[i].physical_start,
			 sc->stripe_width, data);
	} while (!ret && ++i < sc->stripes);
386 387 388 389

	return ret;
}

390 391 392 393 394 395 396
static void stripe_io_hints(struct dm_target *ti,
			    struct queue_limits *limits)
{
	struct stripe_c *sc = ti->private;
	unsigned chunk_size = (sc->chunk_mask + 1) << 9;

	blk_limits_io_min(limits, chunk_size);
397
	blk_limits_io_opt(limits, chunk_size * sc->stripes);
398 399
}

400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
			struct bio_vec *biovec, int max_size)
{
	struct stripe_c *sc = ti->private;
	sector_t bvm_sector = bvm->bi_sector;
	uint32_t stripe;
	struct request_queue *q;

	stripe_map_sector(sc, bvm_sector, &stripe, &bvm_sector);

	q = bdev_get_queue(sc->stripe[stripe].dev->bdev);
	if (!q->merge_bvec_fn)
		return max_size;

	bvm->bi_bdev = sc->stripe[stripe].dev->bdev;
	bvm->bi_sector = sc->stripe[stripe].physical_start + bvm_sector;

	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}

L
Linus Torvalds 已提交
420 421
static struct target_type stripe_target = {
	.name   = "striped",
422
	.version = {1, 4, 0},
L
Linus Torvalds 已提交
423 424 425 426
	.module = THIS_MODULE,
	.ctr    = stripe_ctr,
	.dtr    = stripe_dtr,
	.map    = stripe_map,
B
Brian Wood 已提交
427
	.end_io = stripe_end_io,
L
Linus Torvalds 已提交
428
	.status = stripe_status,
429
	.iterate_devices = stripe_iterate_devices,
430
	.io_hints = stripe_io_hints,
431
	.merge  = stripe_merge,
L
Linus Torvalds 已提交
432 433 434 435 436 437 438
};

int __init dm_stripe_init(void)
{
	int r;

	r = dm_register_target(&stripe_target);
H
Heinz Mauelshagen 已提交
439
	if (r < 0) {
440
		DMWARN("target registration failed");
H
Heinz Mauelshagen 已提交
441 442
		return r;
	}
L
Linus Torvalds 已提交
443 444 445 446 447 448

	return r;
}

void dm_stripe_exit(void)
{
449
	dm_unregister_target(&stripe_target);
L
Linus Torvalds 已提交
450
}