raid0.c 18.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
   raid0.c : Multiple Devices driver for Linux
3
	     Copyright (C) 1994-96 Marc ZYNGIER
L
Linus Torvalds 已提交
4 5
	     <zyngier@ufr-info-p7.ibp.fr> or
	     <maz@gloups.fdn.fr>
6
	     Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
L
Linus Torvalds 已提交
7 8 9 10 11 12 13

   RAID-0 management functions.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.
14

L
Linus Torvalds 已提交
15 16
   You should have received a copy of the GNU General Public License
   (for example /usr/src/linux/COPYING); if not, write to the Free
17
   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
L
Linus Torvalds 已提交
18 19
*/

20 21
#include <linux/blkdev.h>
#include <linux/seq_file.h>
22
#include <linux/module.h>
23
#include <linux/slab.h>
24
#include "md.h"
25
#include "raid0.h"
26
#include "raid5.h"
L
Linus Torvalds 已提交
27

28
static int raid0_congested(struct mddev *mddev, int bits)
29
{
30
	struct r0conf *conf = mddev->private;
31
	struct md_rdev **devlist = conf->devlist;
32
	int raid_disks = conf->strip_zone[0].nb_dev;
33 34
	int i, ret = 0;

35
	for (i = 0; i < raid_disks && !ret ; i++) {
36
		struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
37 38 39 40 41 42

		ret |= bdi_congested(&q->backing_dev_info, bits);
	}
	return ret;
}

43 44 45
/*
 * inform the user of the raid configuration
*/
46
static void dump_zones(struct mddev *mddev)
47
{
48
	int j, k;
49 50 51
	sector_t zone_size = 0;
	sector_t zone_start = 0;
	char b[BDEVNAME_SIZE];
52
	struct r0conf *conf = mddev->private;
53
	int raid_disks = conf->strip_zone[0].nb_dev;
N
NeilBrown 已提交
54 55 56
	pr_debug("md: RAID0 configuration for %s - %d zone%s\n",
		 mdname(mddev),
		 conf->nr_strip_zones, conf->nr_strip_zones==1?"":"s");
57
	for (j = 0; j < conf->nr_strip_zones; j++) {
N
NeilBrown 已提交
58 59 60
		char line[200];
		int len = 0;

61
		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
N
NeilBrown 已提交
62 63 64 65
			len += snprintf(line+len, 200-len, "%s%s", k?"/":"",
					bdevname(conf->devlist[j*raid_disks
							       + k]->bdev, b));
		pr_debug("md: zone%d=[%s]\n", j, line);
66 67

		zone_size  = conf->strip_zone[j].zone_end - zone_start;
N
NeilBrown 已提交
68
		pr_debug("      zone-offset=%10lluKB, device-offset=%10lluKB, size=%10lluKB\n",
69 70 71 72 73 74 75
			(unsigned long long)zone_start>>1,
			(unsigned long long)conf->strip_zone[j].dev_start>>1,
			(unsigned long long)zone_size>>1);
		zone_start = conf->strip_zone[j].zone_end;
	}
}

76
static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
L
Linus Torvalds 已提交
77
{
78
	int i, c, err;
79
	sector_t curr_zone_end, sectors;
80
	struct md_rdev *smallest, *rdev1, *rdev2, *rdev, **dev;
L
Linus Torvalds 已提交
81 82 83
	struct strip_zone *zone;
	int cnt;
	char b[BDEVNAME_SIZE];
84
	char b2[BDEVNAME_SIZE];
85
	struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
86
	unsigned short blksize = 512;
87

88
	*private_conf = ERR_PTR(-ENOMEM);
89 90
	if (!conf)
		return -ENOMEM;
N
NeilBrown 已提交
91
	rdev_for_each(rdev1, mddev) {
92 93 94
		pr_debug("md/raid0:%s: looking at %s\n",
			 mdname(mddev),
			 bdevname(rdev1->bdev, b));
L
Linus Torvalds 已提交
95
		c = 0;
96 97 98 99 100 101

		/* round size to chunk_size */
		sectors = rdev1->sectors;
		sector_div(sectors, mddev->chunk_sectors);
		rdev1->sectors = sectors * mddev->chunk_sectors;

102 103 104
		blksize = max(blksize, queue_logical_block_size(
				      rdev1->bdev->bd_disk->queue));

N
NeilBrown 已提交
105
		rdev_for_each(rdev2, mddev) {
106 107 108 109 110 111 112
			pr_debug("md/raid0:%s:   comparing %s(%llu)"
				 " with %s(%llu)\n",
				 mdname(mddev),
				 bdevname(rdev1->bdev,b),
				 (unsigned long long)rdev1->sectors,
				 bdevname(rdev2->bdev,b2),
				 (unsigned long long)rdev2->sectors);
L
Linus Torvalds 已提交
113
			if (rdev2 == rdev1) {
114 115
				pr_debug("md/raid0:%s:   END\n",
					 mdname(mddev));
L
Linus Torvalds 已提交
116 117
				break;
			}
118
			if (rdev2->sectors == rdev1->sectors) {
L
Linus Torvalds 已提交
119 120 121 122
				/*
				 * Not unique, don't count it as a new
				 * group
				 */
123 124
				pr_debug("md/raid0:%s:   EQUAL\n",
					 mdname(mddev));
L
Linus Torvalds 已提交
125 126 127
				c = 1;
				break;
			}
128 129
			pr_debug("md/raid0:%s:   NOT EQUAL\n",
				 mdname(mddev));
L
Linus Torvalds 已提交
130 131
		}
		if (!c) {
132 133
			pr_debug("md/raid0:%s:   ==> UNIQUE\n",
				 mdname(mddev));
L
Linus Torvalds 已提交
134
			conf->nr_strip_zones++;
135 136
			pr_debug("md/raid0:%s: %d zones\n",
				 mdname(mddev), conf->nr_strip_zones);
L
Linus Torvalds 已提交
137 138
		}
	}
139 140
	pr_debug("md/raid0:%s: FINAL %d zones\n",
		 mdname(mddev), conf->nr_strip_zones);
141 142 143 144 145
	/*
	 * now since we have the hard sector sizes, we can make sure
	 * chunk size is a multiple of that sector size
	 */
	if ((mddev->chunk_sectors << 9) % blksize) {
N
NeilBrown 已提交
146 147 148
		pr_warn("md/raid0:%s: chunk_size of %d not multiple of block size %d\n",
			mdname(mddev),
			mddev->chunk_sectors << 9, blksize);
149 150 151 152
		err = -EINVAL;
		goto abort;
	}

153
	err = -ENOMEM;
154
	conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
L
Linus Torvalds 已提交
155 156
				conf->nr_strip_zones, GFP_KERNEL);
	if (!conf->strip_zone)
157
		goto abort;
158
	conf->devlist = kzalloc(sizeof(struct md_rdev*)*
L
Linus Torvalds 已提交
159 160 161
				conf->nr_strip_zones*mddev->raid_disks,
				GFP_KERNEL);
	if (!conf->devlist)
162
		goto abort;
L
Linus Torvalds 已提交
163 164 165 166 167 168 169

	/* The first zone must contain all devices, so here we check that
	 * there is a proper alignment of slots to devices and find them all
	 */
	zone = &conf->strip_zone[0];
	cnt = 0;
	smallest = NULL;
170
	dev = conf->devlist;
171
	err = -EINVAL;
N
NeilBrown 已提交
172
	rdev_for_each(rdev1, mddev) {
L
Linus Torvalds 已提交
173 174
		int j = rdev1->raid_disk;

175
		if (mddev->level == 10) {
176 177
			/* taking over a raid10-n2 array */
			j /= 2;
178 179
			rdev1->new_raid_disk = j;
		}
180

181 182 183 184 185 186 187 188
		if (mddev->level == 1) {
			/* taiking over a raid1 array-
			 * we have only one active disk
			 */
			j = 0;
			rdev1->new_raid_disk = j;
		}

189
		if (j < 0) {
N
NeilBrown 已提交
190 191
			pr_warn("md/raid0:%s: remove inactive devices before converting to RAID0\n",
				mdname(mddev));
192 193 194
			goto abort;
		}
		if (j >= mddev->raid_disks) {
N
NeilBrown 已提交
195 196
			pr_warn("md/raid0:%s: bad disk number %d - aborting!\n",
				mdname(mddev), j);
L
Linus Torvalds 已提交
197 198
			goto abort;
		}
199
		if (dev[j]) {
N
NeilBrown 已提交
200 201
			pr_warn("md/raid0:%s: multiple devices for %d - aborting!\n",
				mdname(mddev), j);
L
Linus Torvalds 已提交
202 203
			goto abort;
		}
204
		dev[j] = rdev1;
L
Linus Torvalds 已提交
205

206
		if (!smallest || (rdev1->sectors < smallest->sectors))
L
Linus Torvalds 已提交
207 208 209 210
			smallest = rdev1;
		cnt++;
	}
	if (cnt != mddev->raid_disks) {
N
NeilBrown 已提交
211 212
		pr_warn("md/raid0:%s: too few disks (%d of %d) - aborting!\n",
			mdname(mddev), cnt, mddev->raid_disks);
L
Linus Torvalds 已提交
213 214 215
		goto abort;
	}
	zone->nb_dev = cnt;
216
	zone->zone_end = smallest->sectors * cnt;
L
Linus Torvalds 已提交
217

218
	curr_zone_end = zone->zone_end;
L
Linus Torvalds 已提交
219 220 221 222

	/* now do the other zones */
	for (i = 1; i < conf->nr_strip_zones; i++)
	{
223 224
		int j;

L
Linus Torvalds 已提交
225
		zone = conf->strip_zone + i;
226
		dev = conf->devlist + i * mddev->raid_disks;
L
Linus Torvalds 已提交
227

228
		pr_debug("md/raid0:%s: zone %d\n", mdname(mddev), i);
229
		zone->dev_start = smallest->sectors;
L
Linus Torvalds 已提交
230 231 232 233
		smallest = NULL;
		c = 0;

		for (j=0; j<cnt; j++) {
234
			rdev = conf->devlist[j];
235
			if (rdev->sectors <= zone->dev_start) {
236 237 238
				pr_debug("md/raid0:%s: checking %s ... nope\n",
					 mdname(mddev),
					 bdevname(rdev->bdev, b));
239 240
				continue;
			}
241 242 243 244
			pr_debug("md/raid0:%s: checking %s ..."
				 " contained as device %d\n",
				 mdname(mddev),
				 bdevname(rdev->bdev, b), c);
245
			dev[c] = rdev;
246 247 248
			c++;
			if (!smallest || rdev->sectors < smallest->sectors) {
				smallest = rdev;
249 250 251
				pr_debug("md/raid0:%s:  (%llu) is smallest!.\n",
					 mdname(mddev),
					 (unsigned long long)rdev->sectors);
252
			}
L
Linus Torvalds 已提交
253 254 255
		}

		zone->nb_dev = c;
256
		sectors = (smallest->sectors - zone->dev_start) * c;
257 258 259
		pr_debug("md/raid0:%s: zone->nb_dev: %d, sectors: %llu\n",
			 mdname(mddev),
			 zone->nb_dev, (unsigned long long)sectors);
L
Linus Torvalds 已提交
260

261
		curr_zone_end += sectors;
262
		zone->zone_end = curr_zone_end;
L
Linus Torvalds 已提交
263

264 265 266
		pr_debug("md/raid0:%s: current zone start: %llu\n",
			 mdname(mddev),
			 (unsigned long long)smallest->sectors);
L
Linus Torvalds 已提交
267 268
	}

269
	pr_debug("md/raid0:%s: done.\n", mdname(mddev));
270 271
	*private_conf = conf;

L
Linus Torvalds 已提交
272
	return 0;
273
abort:
274 275 276
	kfree(conf->strip_zone);
	kfree(conf->devlist);
	kfree(conf);
277
	*private_conf = ERR_PTR(err);
278
	return err;
L
Linus Torvalds 已提交
279 280
}

281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
/* Find the zone which holds a particular offset
 * Update *sectorp to be an offset in that zone
 */
static struct strip_zone *find_zone(struct r0conf *conf,
				    sector_t *sectorp)
{
	int i;
	struct strip_zone *z = conf->strip_zone;
	sector_t sector = *sectorp;

	for (i = 0; i < conf->nr_strip_zones; i++)
		if (sector < z[i].zone_end) {
			if (i)
				*sectorp = sector - z[i-1].zone_end;
			return z + i;
		}
	BUG();
}

/*
 * remaps the bio to the target device. we separate two flows.
302
 * power 2 flow and a general flow for the sake of performance
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
*/
static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
				sector_t sector, sector_t *sector_offset)
{
	unsigned int sect_in_chunk;
	sector_t chunk;
	struct r0conf *conf = mddev->private;
	int raid_disks = conf->strip_zone[0].nb_dev;
	unsigned int chunk_sects = mddev->chunk_sectors;

	if (is_power_of_2(chunk_sects)) {
		int chunksect_bits = ffz(~chunk_sects);
		/* find the sector offset inside the chunk */
		sect_in_chunk  = sector & (chunk_sects - 1);
		sector >>= chunksect_bits;
		/* chunk in zone */
		chunk = *sector_offset;
		/* quotient is the chunk in real device*/
		sector_div(chunk, zone->nb_dev << chunksect_bits);
	} else{
		sect_in_chunk = sector_div(sector, chunk_sects);
		chunk = *sector_offset;
		sector_div(chunk, chunk_sects * zone->nb_dev);
	}
	/*
	*  position the bio over the real device
	*  real sector = chunk in device + starting of zone
	*	+ the position in the chunk
	*/
	*sector_offset = (chunk * chunk_sects) + sect_in_chunk;
	return conf->devlist[(zone - conf->strip_zone)*raid_disks
			     + sector_div(sector, zone->nb_dev)];
}

337
static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks)
338 339
{
	sector_t array_sectors = 0;
340
	struct md_rdev *rdev;
341 342 343 344

	WARN_ONCE(sectors || raid_disks,
		  "%s does not support generic reshape\n", __func__);

N
NeilBrown 已提交
345
	rdev_for_each(rdev, mddev)
346 347
		array_sectors += (rdev->sectors &
				  ~(sector_t)(mddev->chunk_sectors-1));
348 349 350 351

	return array_sectors;
}

N
NeilBrown 已提交
352
static void raid0_free(struct mddev *mddev, void *priv);
353

354
static int raid0_run(struct mddev *mddev)
L
Linus Torvalds 已提交
355
{
356
	struct r0conf *conf;
357
	int ret;
L
Linus Torvalds 已提交
358

359
	if (mddev->chunk_sectors == 0) {
N
NeilBrown 已提交
360
		pr_warn("md/raid0:%s: chunk size must be set.\n", mdname(mddev));
361 362
		return -EINVAL;
	}
363 364
	if (md_check_no_bitmap(mddev))
		return -EINVAL;
365

366 367 368 369 370 371 372 373
	/* if private is not null, we are here after takeover */
	if (mddev->private == NULL) {
		ret = create_strip_zones(mddev, &conf);
		if (ret < 0)
			return ret;
		mddev->private = conf;
	}
	conf = mddev->private;
374 375 376 377 378 379 380 381 382 383 384 385
	if (mddev->queue) {
		struct md_rdev *rdev;
		bool discard_supported = false;

		blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
		blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
		blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);

		blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
		blk_queue_io_opt(mddev->queue,
				 (mddev->chunk_sectors << 9) * mddev->raid_disks);

386 387 388 389 390 391
		rdev_for_each(rdev, mddev) {
			disk_stack_limits(mddev->gendisk, rdev->bdev,
					  rdev->data_offset << 9);
			if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
				discard_supported = true;
		}
392 393 394 395 396
		if (!discard_supported)
			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
		else
			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
	}
L
Linus Torvalds 已提交
397 398

	/* calculate array device size */
399
	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
L
Linus Torvalds 已提交
400

N
NeilBrown 已提交
401 402 403
	pr_debug("md/raid0:%s: md_size is %llu sectors.\n",
		 mdname(mddev),
		 (unsigned long long)mddev->array_sectors);
404 405 406 407 408 409 410 411 412 413 414

	if (mddev->queue) {
		/* calculate the max read-ahead size.
		 * For read-ahead of large files to be effective, we need to
		 * readahead at least twice a whole stripe. i.e. number of devices
		 * multiplied by chunk size times 2.
		 * If an individual device has an ra_pages greater than the
		 * chunk size, then we will not drive that device as hard as it
		 * wants.  We consider this a configuration error: a larger
		 * chunksize should be used in that case.
		 */
415 416
		int stripe = mddev->raid_disks *
			(mddev->chunk_sectors << 9) / PAGE_SIZE;
L
Linus Torvalds 已提交
417 418 419 420
		if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
			mddev->queue->backing_dev_info.ra_pages = 2* stripe;
	}

421
	dump_zones(mddev);
422 423 424 425

	ret = md_integrity_register(mddev);

	return ret;
L
Linus Torvalds 已提交
426 427
}

N
NeilBrown 已提交
428
static void raid0_free(struct mddev *mddev, void *priv)
L
Linus Torvalds 已提交
429
{
N
NeilBrown 已提交
430
	struct r0conf *conf = priv;
L
Linus Torvalds 已提交
431

432
	kfree(conf->strip_zone);
433
	kfree(conf->devlist);
434
	kfree(conf);
L
Linus Torvalds 已提交
435 436
}

437 438 439
/*
 * Is io distribute over 1 or more chunks ?
*/
440
static inline int is_io_in_chunk_boundary(struct mddev *mddev,
441 442
			unsigned int chunk_sects, struct bio *bio)
{
N
NeilBrown 已提交
443
	if (likely(is_power_of_2(chunk_sects))) {
444 445
		return chunk_sects >=
			((bio->bi_iter.bi_sector & (chunk_sects-1))
446
					+ bio_sectors(bio));
447
	} else{
448
		sector_t sector = bio->bi_iter.bi_sector;
449
		return chunk_sects >= (sector_div(sector, chunk_sects)
450
						+ bio_sectors(bio));
451 452 453
	}
}

454
static void raid0_make_request(struct mddev *mddev, struct bio *bio)
455
{
L
Linus Torvalds 已提交
456
	struct strip_zone *zone;
457
	struct md_rdev *tmp_dev;
K
Kent Overstreet 已提交
458
	struct bio *split;
L
Linus Torvalds 已提交
459

J
Jens Axboe 已提交
460
	if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
T
Tejun Heo 已提交
461
		md_flush_request(mddev, bio);
462
		return;
463 464
	}

K
Kent Overstreet 已提交
465
	do {
466
		sector_t sector = bio->bi_iter.bi_sector;
K
Kent Overstreet 已提交
467 468 469 470 471 472 473
		unsigned chunk_sects = mddev->chunk_sectors;

		unsigned sectors = chunk_sects -
			(likely(is_power_of_2(chunk_sects))
			 ? (sector & (chunk_sects-1))
			 : sector_div(sector, chunk_sects));

474 475 476
		/* Restore due to sector_div */
		sector = bio->bi_iter.bi_sector;

K
Kent Overstreet 已提交
477 478 479 480 481 482
		if (sectors < bio_sectors(bio)) {
			split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
			bio_chain(split, bio);
		} else {
			split = bio;
		}
L
Linus Torvalds 已提交
483

K
Kent Overstreet 已提交
484 485 486 487 488 489
		zone = find_zone(mddev->private, &sector);
		tmp_dev = map_sector(mddev, zone, sector, &sector);
		split->bi_bdev = tmp_dev->bdev;
		split->bi_iter.bi_sector = sector + zone->dev_start +
			tmp_dev->data_offset;

M
Mike Christie 已提交
490
		if (unlikely((bio_op(split) == REQ_OP_DISCARD) &&
K
Kent Overstreet 已提交
491 492
			 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
			/* Just ignore it */
493
			bio_endio(split);
K
Kent Overstreet 已提交
494 495 496
		} else
			generic_make_request(split);
	} while (split != bio);
L
Linus Torvalds 已提交
497
}
N
NeilBrown 已提交
498

499
static void raid0_status(struct seq_file *seq, struct mddev *mddev)
L
Linus Torvalds 已提交
500
{
501
	seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
L
Linus Torvalds 已提交
502 503 504
	return;
}

505
static void *raid0_takeover_raid45(struct mddev *mddev)
506
{
507
	struct md_rdev *rdev;
508
	struct r0conf *priv_conf;
509 510

	if (mddev->degraded != 1) {
N
NeilBrown 已提交
511 512 513
		pr_warn("md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
			mdname(mddev),
			mddev->degraded);
514 515 516
		return ERR_PTR(-EINVAL);
	}

N
NeilBrown 已提交
517
	rdev_for_each(rdev, mddev) {
518 519
		/* check slot number for a disk */
		if (rdev->raid_disk == mddev->raid_disks-1) {
N
NeilBrown 已提交
520 521
			pr_warn("md/raid0:%s: raid5 must have missing parity disk!\n",
				mdname(mddev));
522 523
			return ERR_PTR(-EINVAL);
		}
524
		rdev->sectors = mddev->dev_sectors;
525 526 527 528
	}

	/* Set new parameters */
	mddev->new_level = 0;
529
	mddev->new_layout = 0;
530 531 532 533 534 535 536 537 538 539
	mddev->new_chunk_sectors = mddev->chunk_sectors;
	mddev->raid_disks--;
	mddev->delta_disks = -1;
	/* make sure it will be not marked as dirty */
	mddev->recovery_cp = MaxSector;

	create_strip_zones(mddev, &priv_conf);
	return priv_conf;
}

540
static void *raid0_takeover_raid10(struct mddev *mddev)
541
{
542
	struct r0conf *priv_conf;
543 544 545 546 547 548 549 550

	/* Check layout:
	 *  - far_copies must be 1
	 *  - near_copies must be 2
	 *  - disks number must be even
	 *  - all mirrors must be already degraded
	 */
	if (mddev->layout != ((1 << 8) + 2)) {
N
NeilBrown 已提交
551 552 553
		pr_warn("md/raid0:%s:: Raid0 cannot takeover layout: 0x%x\n",
			mdname(mddev),
			mddev->layout);
554 555 556
		return ERR_PTR(-EINVAL);
	}
	if (mddev->raid_disks & 1) {
N
NeilBrown 已提交
557 558
		pr_warn("md/raid0:%s: Raid0 cannot takeover Raid10 with odd disk number.\n",
			mdname(mddev));
559 560 561
		return ERR_PTR(-EINVAL);
	}
	if (mddev->degraded != (mddev->raid_disks>>1)) {
N
NeilBrown 已提交
562 563
		pr_warn("md/raid0:%s: All mirrors must be already degraded!\n",
			mdname(mddev));
564 565 566 567 568
		return ERR_PTR(-EINVAL);
	}

	/* Set new parameters */
	mddev->new_level = 0;
569
	mddev->new_layout = 0;
570 571 572 573 574 575 576 577 578 579 580
	mddev->new_chunk_sectors = mddev->chunk_sectors;
	mddev->delta_disks = - mddev->raid_disks / 2;
	mddev->raid_disks += mddev->delta_disks;
	mddev->degraded = 0;
	/* make sure it will be not marked as dirty */
	mddev->recovery_cp = MaxSector;

	create_strip_zones(mddev, &priv_conf);
	return priv_conf;
}

581
static void *raid0_takeover_raid1(struct mddev *mddev)
582
{
583
	struct r0conf *priv_conf;
584
	int chunksect;
585 586 587 588 589

	/* Check layout:
	 *  - (N - 1) mirror drives must be already faulty
	 */
	if ((mddev->raid_disks - 1) != mddev->degraded) {
N
NeilBrown 已提交
590
		pr_err("md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
591 592 593 594
		       mdname(mddev));
		return ERR_PTR(-EINVAL);
	}

595 596 597 598 599 600 601 602 603 604 605 606 607 608
	/*
	 * a raid1 doesn't have the notion of chunk size, so
	 * figure out the largest suitable size we can use.
	 */
	chunksect = 64 * 2; /* 64K by default */

	/* The array must be an exact multiple of chunksize */
	while (chunksect && (mddev->array_sectors & (chunksect - 1)))
		chunksect >>= 1;

	if ((chunksect << 9) < PAGE_SIZE)
		/* array size does not allow a suitable chunk size */
		return ERR_PTR(-EINVAL);

609 610 611
	/* Set new parameters */
	mddev->new_level = 0;
	mddev->new_layout = 0;
612 613
	mddev->new_chunk_sectors = chunksect;
	mddev->chunk_sectors = chunksect;
614
	mddev->delta_disks = 1 - mddev->raid_disks;
K
Krzysztof Wojcik 已提交
615
	mddev->raid_disks = 1;
616 617 618 619 620 621 622
	/* make sure it will be not marked as dirty */
	mddev->recovery_cp = MaxSector;

	create_strip_zones(mddev, &priv_conf);
	return priv_conf;
}

623
static void *raid0_takeover(struct mddev *mddev)
624 625
{
	/* raid0 can take over:
M
Maciej Trela 已提交
626
	 *  raid4 - if all data disks are active.
627 628
	 *  raid5 - providing it is Raid4 layout and one disk is faulty
	 *  raid10 - assuming we have all necessary active disks
629
	 *  raid1 - with (N -1) mirror drives faulty
630
	 */
631 632

	if (mddev->bitmap) {
N
NeilBrown 已提交
633 634
		pr_warn("md/raid0: %s: cannot takeover array with bitmap\n",
			mdname(mddev));
635 636
		return ERR_PTR(-EBUSY);
	}
M
Maciej Trela 已提交
637 638 639
	if (mddev->level == 4)
		return raid0_takeover_raid45(mddev);

640 641
	if (mddev->level == 5) {
		if (mddev->layout == ALGORITHM_PARITY_N)
M
Maciej Trela 已提交
642
			return raid0_takeover_raid45(mddev);
643

N
NeilBrown 已提交
644 645
		pr_warn("md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
			mdname(mddev), ALGORITHM_PARITY_N);
646 647 648 649 650
	}

	if (mddev->level == 10)
		return raid0_takeover_raid10(mddev);

651 652 653
	if (mddev->level == 1)
		return raid0_takeover_raid1(mddev);

N
NeilBrown 已提交
654
	pr_warn("Takeover from raid%i to raid0 not supported\n",
655 656
		mddev->level);

657 658 659
	return ERR_PTR(-EINVAL);
}

660
static void raid0_quiesce(struct mddev *mddev, int state)
661 662 663
{
}

664
static struct md_personality raid0_personality=
L
Linus Torvalds 已提交
665 666
{
	.name		= "raid0",
667
	.level		= 0,
L
Linus Torvalds 已提交
668 669 670
	.owner		= THIS_MODULE,
	.make_request	= raid0_make_request,
	.run		= raid0_run,
N
NeilBrown 已提交
671
	.free		= raid0_free,
L
Linus Torvalds 已提交
672
	.status		= raid0_status,
673
	.size		= raid0_size,
674 675
	.takeover	= raid0_takeover,
	.quiesce	= raid0_quiesce,
676
	.congested	= raid0_congested,
L
Linus Torvalds 已提交
677 678 679 680
};

static int __init raid0_init (void)
{
681
	return register_md_personality (&raid0_personality);
L
Linus Torvalds 已提交
682 683 684 685
}

static void raid0_exit (void)
{
686
	unregister_md_personality (&raid0_personality);
L
Linus Torvalds 已提交
687 688 689 690 691
}

module_init(raid0_init);
module_exit(raid0_exit);
MODULE_LICENSE("GPL");
692
MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
L
Linus Torvalds 已提交
693
MODULE_ALIAS("md-personality-2"); /* RAID0 */
694
MODULE_ALIAS("md-raid0");
695
MODULE_ALIAS("md-level-0");