raid0.c 18.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
   raid0.c : Multiple Devices driver for Linux
             Copyright (C) 1994-96 Marc ZYNGIER
	     <zyngier@ufr-info-p7.ibp.fr> or
	     <maz@gloups.fdn.fr>
             Copyright (C) 1999, 2000 Ingo Molnar, Red Hat


   RAID-0 management functions.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.
   
   You should have received a copy of the GNU General Public License
   (for example /usr/src/linux/COPYING); if not, write to the Free
   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
*/

21 22
#include <linux/blkdev.h>
#include <linux/seq_file.h>
23
#include "md.h"
24
#include "raid0.h"
25
#include "raid5.h"
L
Linus Torvalds 已提交
26

27
static void raid0_unplug(struct request_queue *q)
L
Linus Torvalds 已提交
28 29
{
	mddev_t *mddev = q->queuedata;
30
	raid0_conf_t *conf = mddev->private;
31
	mdk_rdev_t **devlist = conf->devlist;
32
	int raid_disks = conf->strip_zone[0].nb_dev;
L
Linus Torvalds 已提交
33 34
	int i;

35
	for (i=0; i < raid_disks; i++) {
36
		struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev);
L
Linus Torvalds 已提交
37

38
		blk_unplug(r_queue);
L
Linus Torvalds 已提交
39 40 41
	}
}

42 43 44
static int raid0_congested(void *data, int bits)
{
	mddev_t *mddev = data;
45
	raid0_conf_t *conf = mddev->private;
46
	mdk_rdev_t **devlist = conf->devlist;
47
	int raid_disks = conf->strip_zone[0].nb_dev;
48 49
	int i, ret = 0;

50 51 52
	if (mddev_congested(mddev, bits))
		return 1;

53
	for (i = 0; i < raid_disks && !ret ; i++) {
54
		struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
55 56 57 58 59 60

		ret |= bdi_congested(&q->backing_dev_info, bits);
	}
	return ret;
}

61 62 63 64 65 66 67 68 69 70
/*
 * inform the user of the raid configuration
*/
static void dump_zones(mddev_t *mddev)
{
	int j, k, h;
	sector_t zone_size = 0;
	sector_t zone_start = 0;
	char b[BDEVNAME_SIZE];
	raid0_conf_t *conf = mddev->private;
71
	int raid_disks = conf->strip_zone[0].nb_dev;
72 73 74 75 76 77 78
	printk(KERN_INFO "******* %s configuration *********\n",
		mdname(mddev));
	h = 0;
	for (j = 0; j < conf->nr_strip_zones; j++) {
		printk(KERN_INFO "zone%d=[", j);
		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
			printk("%s/",
79
			bdevname(conf->devlist[j*raid_disks
80 81 82 83 84 85 86 87 88 89 90 91 92 93
						+ k]->bdev, b));
		printk("]\n");

		zone_size  = conf->strip_zone[j].zone_end - zone_start;
		printk(KERN_INFO "        zone offset=%llukb "
				"device offset=%llukb size=%llukb\n",
			(unsigned long long)zone_start>>1,
			(unsigned long long)conf->strip_zone[j].dev_start>>1,
			(unsigned long long)zone_size>>1);
		zone_start = conf->strip_zone[j].zone_end;
	}
	printk(KERN_INFO "**********************************\n\n");
}

94
static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
L
Linus Torvalds 已提交
95
{
96
	int i, c, err;
97
	sector_t curr_zone_end, sectors;
98
	mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
L
Linus Torvalds 已提交
99 100 101
	struct strip_zone *zone;
	int cnt;
	char b[BDEVNAME_SIZE];
102 103 104 105
	raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL);

	if (!conf)
		return -ENOMEM;
106
	list_for_each_entry(rdev1, &mddev->disks, same_set) {
107
		printk(KERN_INFO "raid0: looking at %s\n",
L
Linus Torvalds 已提交
108 109
			bdevname(rdev1->bdev,b));
		c = 0;
110 111 112 113 114 115

		/* round size to chunk_size */
		sectors = rdev1->sectors;
		sector_div(sectors, mddev->chunk_sectors);
		rdev1->sectors = sectors * mddev->chunk_sectors;

116
		list_for_each_entry(rdev2, &mddev->disks, same_set) {
117
			printk(KERN_INFO "raid0:   comparing %s(%llu)",
L
Linus Torvalds 已提交
118
			       bdevname(rdev1->bdev,b),
119
			       (unsigned long long)rdev1->sectors);
120
			printk(KERN_INFO " with %s(%llu)\n",
L
Linus Torvalds 已提交
121
			       bdevname(rdev2->bdev,b),
122
			       (unsigned long long)rdev2->sectors);
L
Linus Torvalds 已提交
123
			if (rdev2 == rdev1) {
124
				printk(KERN_INFO "raid0:   END\n");
L
Linus Torvalds 已提交
125 126
				break;
			}
127
			if (rdev2->sectors == rdev1->sectors) {
L
Linus Torvalds 已提交
128 129 130 131
				/*
				 * Not unique, don't count it as a new
				 * group
				 */
132
				printk(KERN_INFO "raid0:   EQUAL\n");
L
Linus Torvalds 已提交
133 134 135
				c = 1;
				break;
			}
136
			printk(KERN_INFO "raid0:   NOT EQUAL\n");
L
Linus Torvalds 已提交
137 138
		}
		if (!c) {
139
			printk(KERN_INFO "raid0:   ==> UNIQUE\n");
L
Linus Torvalds 已提交
140
			conf->nr_strip_zones++;
141 142
			printk(KERN_INFO "raid0: %d zones\n",
				conf->nr_strip_zones);
L
Linus Torvalds 已提交
143 144
		}
	}
145
	printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones);
146
	err = -ENOMEM;
147
	conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
L
Linus Torvalds 已提交
148 149
				conf->nr_strip_zones, GFP_KERNEL);
	if (!conf->strip_zone)
150
		goto abort;
151
	conf->devlist = kzalloc(sizeof(mdk_rdev_t*)*
L
Linus Torvalds 已提交
152 153 154
				conf->nr_strip_zones*mddev->raid_disks,
				GFP_KERNEL);
	if (!conf->devlist)
155
		goto abort;
L
Linus Torvalds 已提交
156 157 158 159 160 161 162

	/* The first zone must contain all devices, so here we check that
	 * there is a proper alignment of slots to devices and find them all
	 */
	zone = &conf->strip_zone[0];
	cnt = 0;
	smallest = NULL;
163
	dev = conf->devlist;
164
	err = -EINVAL;
165
	list_for_each_entry(rdev1, &mddev->disks, same_set) {
L
Linus Torvalds 已提交
166 167
		int j = rdev1->raid_disk;

168 169 170 171
		if (mddev->level == 10)
			/* taking over a raid10-n2 array */
			j /= 2;

L
Linus Torvalds 已提交
172
		if (j < 0 || j >= mddev->raid_disks) {
173 174
			printk(KERN_ERR "raid0: bad disk number %d - "
				"aborting!\n", j);
L
Linus Torvalds 已提交
175 176
			goto abort;
		}
177
		if (dev[j]) {
178 179
			printk(KERN_ERR "raid0: multiple devices for %d - "
				"aborting!\n", j);
L
Linus Torvalds 已提交
180 181
			goto abort;
		}
182
		dev[j] = rdev1;
L
Linus Torvalds 已提交
183

184 185
		disk_stack_limits(mddev->gendisk, rdev1->bdev,
				  rdev1->data_offset << 9);
L
Linus Torvalds 已提交
186
		/* as we don't honour merge_bvec_fn, we must never risk
187 188
		 * violating it, so limit ->max_segments to 1, lying within
		 * a single page.
L
Linus Torvalds 已提交
189 190
		 */

191 192 193 194 195
		if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) {
			blk_queue_max_segments(mddev->queue, 1);
			blk_queue_segment_boundary(mddev->queue,
						   PAGE_CACHE_SIZE - 1);
		}
196
		if (!smallest || (rdev1->sectors < smallest->sectors))
L
Linus Torvalds 已提交
197 198 199 200
			smallest = rdev1;
		cnt++;
	}
	if (cnt != mddev->raid_disks) {
201 202
		printk(KERN_ERR "raid0: too few disks (%d of %d) - "
			"aborting!\n", cnt, mddev->raid_disks);
L
Linus Torvalds 已提交
203 204 205
		goto abort;
	}
	zone->nb_dev = cnt;
206
	zone->zone_end = smallest->sectors * cnt;
L
Linus Torvalds 已提交
207

208
	curr_zone_end = zone->zone_end;
L
Linus Torvalds 已提交
209 210 211 212

	/* now do the other zones */
	for (i = 1; i < conf->nr_strip_zones; i++)
	{
213 214
		int j;

L
Linus Torvalds 已提交
215
		zone = conf->strip_zone + i;
216
		dev = conf->devlist + i * mddev->raid_disks;
L
Linus Torvalds 已提交
217

218
		printk(KERN_INFO "raid0: zone %d\n", i);
219
		zone->dev_start = smallest->sectors;
L
Linus Torvalds 已提交
220 221 222 223
		smallest = NULL;
		c = 0;

		for (j=0; j<cnt; j++) {
224
			rdev = conf->devlist[j];
225 226
			printk(KERN_INFO "raid0: checking %s ...",
				bdevname(rdev->bdev, b));
227
			if (rdev->sectors <= zone->dev_start) {
228
				printk(KERN_INFO " nope.\n");
229 230 231
				continue;
			}
			printk(KERN_INFO " contained as device %d\n", c);
232
			dev[c] = rdev;
233 234 235 236 237 238
			c++;
			if (!smallest || rdev->sectors < smallest->sectors) {
				smallest = rdev;
				printk(KERN_INFO "  (%llu) is smallest!.\n",
					(unsigned long long)rdev->sectors);
			}
L
Linus Torvalds 已提交
239 240 241
		}

		zone->nb_dev = c;
242
		sectors = (smallest->sectors - zone->dev_start) * c;
243
		printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
244
			zone->nb_dev, (unsigned long long)sectors);
L
Linus Torvalds 已提交
245

246
		curr_zone_end += sectors;
247
		zone->zone_end = curr_zone_end;
L
Linus Torvalds 已提交
248

249
		printk(KERN_INFO "raid0: current zone start: %llu\n",
250
			(unsigned long long)smallest->sectors);
L
Linus Torvalds 已提交
251 252
	}
	mddev->queue->unplug_fn = raid0_unplug;
253 254
	mddev->queue->backing_dev_info.congested_fn = raid0_congested;
	mddev->queue->backing_dev_info.congested_data = mddev;
L
Linus Torvalds 已提交
255

256 257 258 259
	/*
	 * now since we have the hard sector sizes, we can make sure
	 * chunk size is a multiple of that sector size
	 */
260
	if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
261 262
		printk(KERN_ERR "%s chunk_size of %d not valid\n",
		       mdname(mddev),
263
		       mddev->chunk_sectors << 9);
264 265
		goto abort;
	}
266 267 268 269 270

	blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
	blk_queue_io_opt(mddev->queue,
			 (mddev->chunk_sectors << 9) * mddev->raid_disks);

271
	printk(KERN_INFO "raid0: done.\n");
272 273
	*private_conf = conf;

L
Linus Torvalds 已提交
274
	return 0;
275
abort:
276 277 278
	kfree(conf->strip_zone);
	kfree(conf->devlist);
	kfree(conf);
279
	*private_conf = NULL;
280
	return err;
L
Linus Torvalds 已提交
281 282 283 284 285
}

/**
 *	raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
 *	@q: request queue
286
 *	@bvm: properties of new bio
L
Linus Torvalds 已提交
287 288 289 290
 *	@biovec: the request that could be merged to it.
 *
 *	Return amount of bytes we can accept at this offset
 */
291 292 293
static int raid0_mergeable_bvec(struct request_queue *q,
				struct bvec_merge_data *bvm,
				struct bio_vec *biovec)
L
Linus Torvalds 已提交
294 295
{
	mddev_t *mddev = q->queuedata;
296
	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
L
Linus Torvalds 已提交
297
	int max;
298
	unsigned int chunk_sectors = mddev->chunk_sectors;
299
	unsigned int bio_sectors = bvm->bi_size >> 9;
L
Linus Torvalds 已提交
300

N
NeilBrown 已提交
301
	if (is_power_of_2(chunk_sectors))
302 303 304 305 306
		max =  (chunk_sectors - ((sector & (chunk_sectors-1))
						+ bio_sectors)) << 9;
	else
		max =  (chunk_sectors - (sector_div(sector, chunk_sectors)
						+ bio_sectors)) << 9;
L
Linus Torvalds 已提交
307 308 309 310 311 312 313
	if (max < 0) max = 0; /* bio_add cannot handle a negative return */
	if (max <= biovec->bv_len && bio_sectors == 0)
		return biovec->bv_len;
	else 
		return max;
}

314 315 316 317 318 319 320 321 322 323 324 325 326 327
static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
	sector_t array_sectors = 0;
	mdk_rdev_t *rdev;

	WARN_ONCE(sectors || raid_disks,
		  "%s does not support generic reshape\n", __func__);

	list_for_each_entry(rdev, &mddev->disks, same_set)
		array_sectors += rdev->sectors;

	return array_sectors;
}

328
static int raid0_run(mddev_t *mddev)
L
Linus Torvalds 已提交
329
{
330
	raid0_conf_t *conf;
331
	int ret;
L
Linus Torvalds 已提交
332

333
	if (mddev->chunk_sectors == 0) {
334
		printk(KERN_ERR "md/raid0: chunk size must be set.\n");
335 336
		return -EINVAL;
	}
337 338
	if (md_check_no_bitmap(mddev))
		return -EINVAL;
339
	blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
340
	mddev->queue->queue_lock = &mddev->queue->__queue_lock;
L
Linus Torvalds 已提交
341

342 343 344 345 346 347 348 349 350 351 352 353 354 355
	/* if private is not null, we are here after takeover */
	if (mddev->private == NULL) {
		ret = create_strip_zones(mddev, &conf);
		if (ret < 0)
			return ret;
		mddev->private = conf;
	}
	conf = mddev->private;
	if (conf->scale_raid_disks) {
		int i;
		for (i=0; i < conf->strip_zone[0].nb_dev; i++)
			conf->devlist[i]->raid_disk /= conf->scale_raid_disks;
		/* FIXME update sysfs rd links */
	}
L
Linus Torvalds 已提交
356 357

	/* calculate array device size */
358
	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
L
Linus Torvalds 已提交
359

360 361
	printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
		(unsigned long long)mddev->array_sectors);
L
Linus Torvalds 已提交
362 363 364 365 366 367 368 369 370 371
	/* calculate the max read-ahead size.
	 * For read-ahead of large files to be effective, we need to
	 * readahead at least twice a whole stripe. i.e. number of devices
	 * multiplied by chunk size times 2.
	 * If an individual device has an ra_pages greater than the
	 * chunk size, then we will not drive that device as hard as it
	 * wants.  We consider this a configuration error: a larger
	 * chunksize should be used in that case.
	 */
	{
372 373
		int stripe = mddev->raid_disks *
			(mddev->chunk_sectors << 9) / PAGE_SIZE;
L
Linus Torvalds 已提交
374 375 376 377 378
		if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
			mddev->queue->backing_dev_info.ra_pages = 2* stripe;
	}

	blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
379
	dump_zones(mddev);
380
	md_integrity_register(mddev);
L
Linus Torvalds 已提交
381 382 383
	return 0;
}

384
static int raid0_stop(mddev_t *mddev)
L
Linus Torvalds 已提交
385
{
386
	raid0_conf_t *conf = mddev->private;
L
Linus Torvalds 已提交
387 388

	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
389
	kfree(conf->strip_zone);
390
	kfree(conf->devlist);
391
	kfree(conf);
L
Linus Torvalds 已提交
392 393 394 395
	mddev->private = NULL;
	return 0;
}

396 397 398
/* Find the zone which holds a particular offset
 * Update *sectorp to be an offset in that zone
 */
399
static struct strip_zone *find_zone(struct raid0_private_data *conf,
400
				    sector_t *sectorp)
401 402 403
{
	int i;
	struct strip_zone *z = conf->strip_zone;
404
	sector_t sector = *sectorp;
405 406

	for (i = 0; i < conf->nr_strip_zones; i++)
407 408 409
		if (sector < z[i].zone_end) {
			if (i)
				*sectorp = sector - z[i-1].zone_end;
410
			return z + i;
411
		}
412 413 414
	BUG();
}

415 416 417 418 419 420
/*
 * remaps the bio to the target device. we separate two flows.
 * power 2 flow and a general flow for the sake of perfromance
*/
static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone,
				sector_t sector, sector_t *sector_offset)
L
Linus Torvalds 已提交
421
{
422 423
	unsigned int sect_in_chunk;
	sector_t chunk;
424
	raid0_conf_t *conf = mddev->private;
425
	int raid_disks = conf->strip_zone[0].nb_dev;
426
	unsigned int chunk_sects = mddev->chunk_sectors;
427

N
NeilBrown 已提交
428
	if (is_power_of_2(chunk_sects)) {
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
		int chunksect_bits = ffz(~chunk_sects);
		/* find the sector offset inside the chunk */
		sect_in_chunk  = sector & (chunk_sects - 1);
		sector >>= chunksect_bits;
		/* chunk in zone */
		chunk = *sector_offset;
		/* quotient is the chunk in real device*/
		sector_div(chunk, zone->nb_dev << chunksect_bits);
	} else{
		sect_in_chunk = sector_div(sector, chunk_sects);
		chunk = *sector_offset;
		sector_div(chunk, chunk_sects * zone->nb_dev);
	}
	/*
	*  position the bio over the real device
	*  real sector = chunk in device + starting of zone
	*	+ the position in the chunk
	*/
	*sector_offset = (chunk * chunk_sects) + sect_in_chunk;
448
	return conf->devlist[(zone - conf->strip_zone)*raid_disks
449 450 451 452 453 454 455 456 457
			     + sector_div(sector, zone->nb_dev)];
}

/*
 * Is io distribute over 1 or more chunks ?
*/
static inline int is_io_in_chunk_boundary(mddev_t *mddev,
			unsigned int chunk_sects, struct bio *bio)
{
N
NeilBrown 已提交
458
	if (likely(is_power_of_2(chunk_sects))) {
459 460 461 462 463 464 465 466 467 468 469 470 471 472
		return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
					+ (bio->bi_size >> 9));
	} else{
		sector_t sector = bio->bi_sector;
		return chunk_sects >= (sector_div(sector, chunk_sects)
						+ (bio->bi_size >> 9));
	}
}

static int raid0_make_request(struct request_queue *q, struct bio *bio)
{
	mddev_t *mddev = q->queuedata;
	unsigned int chunk_sects;
	sector_t sector_offset;
L
Linus Torvalds 已提交
473 474 475
	struct strip_zone *zone;
	mdk_rdev_t *tmp_dev;

476
	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
477
		md_barrier_request(mddev, bio);
478 479 480
		return 0;
	}

481
	chunk_sects = mddev->chunk_sectors;
482 483
	if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
		sector_t sector = bio->bi_sector;
L
Linus Torvalds 已提交
484 485 486 487 488 489 490 491
		struct bio_pair *bp;
		/* Sanity check -- queue functions should prevent this happening */
		if (bio->bi_vcnt != 1 ||
		    bio->bi_idx != 0)
			goto bad_map;
		/* This is a one page bio that upper layers
		 * refuse to split for us, so we need to split it.
		 */
N
NeilBrown 已提交
492
		if (likely(is_power_of_2(chunk_sects)))
493 494 495 496 497
			bp = bio_split(bio, chunk_sects - (sector &
							   (chunk_sects-1)));
		else
			bp = bio_split(bio, chunk_sects -
				       sector_div(sector, chunk_sects));
L
Linus Torvalds 已提交
498 499 500 501 502 503 504 505 506
		if (raid0_make_request(q, &bp->bio1))
			generic_make_request(&bp->bio1);
		if (raid0_make_request(q, &bp->bio2))
			generic_make_request(&bp->bio2);

		bio_pair_release(bp);
		return 0;
	}

507 508 509 510
	sector_offset = bio->bi_sector;
	zone =  find_zone(mddev->private, &sector_offset);
	tmp_dev = map_sector(mddev, zone, bio->bi_sector,
			     &sector_offset);
L
Linus Torvalds 已提交
511
	bio->bi_bdev = tmp_dev->bdev;
512 513
	bio->bi_sector = sector_offset + zone->dev_start +
		tmp_dev->data_offset;
L
Linus Torvalds 已提交
514 515 516 517 518 519 520
	/*
	 * Let the main block layer submit the IO and resolve recursion:
	 */
	return 1;

bad_map:
	printk("raid0_make_request bug: can't convert block across chunks"
521
		" or bigger than %dk %llu %d\n", chunk_sects / 2,
L
Linus Torvalds 已提交
522 523
		(unsigned long long)bio->bi_sector, bio->bi_size >> 10);

524
	bio_io_error(bio);
L
Linus Torvalds 已提交
525 526
	return 0;
}
N
NeilBrown 已提交
527

528
static void raid0_status(struct seq_file *seq, mddev_t *mddev)
L
Linus Torvalds 已提交
529 530 531 532 533
{
#undef MD_DEBUG
#ifdef MD_DEBUG
	int j, k, h;
	char b[BDEVNAME_SIZE];
534
	raid0_conf_t *conf = mddev->private;
535
	int raid_disks = conf->strip_zone[0].nb_dev;
N
NeilBrown 已提交
536

537 538
	sector_t zone_size;
	sector_t zone_start = 0;
L
Linus Torvalds 已提交
539
	h = 0;
540

L
Linus Torvalds 已提交
541 542 543 544
	for (j = 0; j < conf->nr_strip_zones; j++) {
		seq_printf(seq, "      z%d", j);
		seq_printf(seq, "=[");
		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
N
NeilBrown 已提交
545
			seq_printf(seq, "%s/", bdevname(
546
				conf->devlist[j*raid_disks + k]
547 548 549 550 551 552 553 554
						->bdev, b));

		zone_size  = conf->strip_zone[j].zone_end - zone_start;
		seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n",
			(unsigned long long)zone_start>>1,
			(unsigned long long)conf->strip_zone[j].dev_start>>1,
			(unsigned long long)zone_size>>1);
		zone_start = conf->strip_zone[j].zone_end;
L
Linus Torvalds 已提交
555 556
	}
#endif
557
	seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
L
Linus Torvalds 已提交
558 559 560
	return;
}

561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
static void *raid0_takeover_raid5(mddev_t *mddev)
{
	mdk_rdev_t *rdev;
	raid0_conf_t *priv_conf;

	if (mddev->degraded != 1) {
		printk(KERN_ERR "md: raid5 must be degraded! Degraded disks: %d\n",
		       mddev->degraded);
		return ERR_PTR(-EINVAL);
	}

	list_for_each_entry(rdev, &mddev->disks, same_set) {
		/* check slot number for a disk */
		if (rdev->raid_disk == mddev->raid_disks-1) {
			printk(KERN_ERR "md: raid5 must have missing parity disk!\n");
			return ERR_PTR(-EINVAL);
		}
	}

	/* Set new parameters */
	mddev->new_level = 0;
	mddev->new_chunk_sectors = mddev->chunk_sectors;
	mddev->raid_disks--;
	mddev->delta_disks = -1;
	/* make sure it will be not marked as dirty */
	mddev->recovery_cp = MaxSector;

	create_strip_zones(mddev, &priv_conf);
	return priv_conf;
}

static void *raid0_takeover_raid10(mddev_t *mddev)
{
	raid0_conf_t *priv_conf;

	/* Check layout:
	 *  - far_copies must be 1
	 *  - near_copies must be 2
	 *  - disks number must be even
	 *  - all mirrors must be already degraded
	 */
	if (mddev->layout != ((1 << 8) + 2)) {
		printk(KERN_ERR "md: Raid0 cannot takover layout: %x\n",
		       mddev->layout);
		return ERR_PTR(-EINVAL);
	}
	if (mddev->raid_disks & 1) {
		printk(KERN_ERR "md: Raid0 cannot takover Raid10 with odd disk number.\n");
		return ERR_PTR(-EINVAL);
	}
	if (mddev->degraded != (mddev->raid_disks>>1)) {
		printk(KERN_ERR "md: All mirrors must be already degraded!\n");
		return ERR_PTR(-EINVAL);
	}

	/* Set new parameters */
	mddev->new_level = 0;
	mddev->new_chunk_sectors = mddev->chunk_sectors;
	mddev->delta_disks = - mddev->raid_disks / 2;
	mddev->raid_disks += mddev->delta_disks;
	mddev->degraded = 0;
	/* make sure it will be not marked as dirty */
	mddev->recovery_cp = MaxSector;

	create_strip_zones(mddev, &priv_conf);
	priv_conf->scale_raid_disks = 2;
	return priv_conf;
}

static void *raid0_takeover(mddev_t *mddev)
{
	/* raid0 can take over:
	 *  raid5 - providing it is Raid4 layout and one disk is faulty
	 *  raid10 - assuming we have all necessary active disks
	 */
	if (mddev->level == 5) {
		if (mddev->layout == ALGORITHM_PARITY_N)
			return raid0_takeover_raid5(mddev);

		printk(KERN_ERR "md: Raid can only takeover Raid5 with layout: %d\n",
		       ALGORITHM_PARITY_N);
	}

	if (mddev->level == 10)
		return raid0_takeover_raid10(mddev);

	return ERR_PTR(-EINVAL);
}

static void raid0_quiesce(mddev_t *mddev, int state)
{
}

654
static struct mdk_personality raid0_personality=
L
Linus Torvalds 已提交
655 656
{
	.name		= "raid0",
657
	.level		= 0,
L
Linus Torvalds 已提交
658 659 660 661 662
	.owner		= THIS_MODULE,
	.make_request	= raid0_make_request,
	.run		= raid0_run,
	.stop		= raid0_stop,
	.status		= raid0_status,
663
	.size		= raid0_size,
664 665
	.takeover	= raid0_takeover,
	.quiesce	= raid0_quiesce,
L
Linus Torvalds 已提交
666 667 668 669
};

static int __init raid0_init (void)
{
670
	return register_md_personality (&raid0_personality);
L
Linus Torvalds 已提交
671 672 673 674
}

static void raid0_exit (void)
{
675
	unregister_md_personality (&raid0_personality);
L
Linus Torvalds 已提交
676 677 678 679 680
}

module_init(raid0_init);
module_exit(raid0_exit);
MODULE_LICENSE("GPL");
681
MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
L
Linus Torvalds 已提交
682
MODULE_ALIAS("md-personality-2"); /* RAID0 */
683
MODULE_ALIAS("md-raid0");
684
MODULE_ALIAS("md-level-0");