Merge tag 'md-3.4-fixes' of git://neil.brown.name/md

Pull assorted md fixes from Neil Brown: - some RAID levels didn't clear up properly if md_integrity_register failed - a 'check' of RAID5/RAID6 doesn't actually read any data since a recent patch - so fix that (and mark for -stable) - a couple of other minor bugs. * tag 'md-3.4-fixes' of git://neil.brown.name/md: md/raid1,raid10: don't compare excess byte during consistency check. md/raid5: Fix a bug about judging if the operation is syncing or replacing md/raid1:Remove unnecessary rcu_dereference(conf->mirrors[i].rdev). md: Avoid OOPS when reshaping raid1 to raid0 md/raid5: fix handling of bad blocks during recovery. md/raid1: If md_integrity_register() failed,run() must free the mem md/raid0: If md_integrity_register() fails, raid0_run() must free the mem. md/linear: If md_integrity_register() fails, linear_run() must free the mem.

Merge tag 'md-3.4-fixes' of git://neil.brown.name/md
Pull assorted md fixes from Neil Brown: - some RAID levels didn't clear up properly if md_integrity_register failed - a 'check' of RAID5/RAID6 doesn't actually read any data since a recent patch - so fix that (and mark for -stable) - a couple of other minor bugs. * tag 'md-3.4-fixes' of git://neil.brown.name/md: md/raid1,raid10: don't compare excess byte during consistency check. md/raid5: Fix a bug about judging if the operation is syncing or replacing md/raid1:Remove unnecessary rcu_dereference(conf->mirrors[i].rdev). md: Avoid OOPS when reshaping raid1 to raid0 md/raid5: fix handling of bad blocks during recovery. md/raid1: If md_integrity_register() failed,run() must free the mem md/raid0: If md_integrity_register() fails, raid0_run() must free the mem. md/linear: If md_integrity_register() fails, linear_run() must free the mem.
36bbffc0 · Linus Torvalds · 20a2a811 · 5020ad7d · 36bbffc0 · 36bbffc0
5 changed file
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -198,6 +198,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
 static int linear_run (struct mddev *mddev)
 {
 	struct linear_conf *conf;
+	int ret;

 	if (md_check_no_bitmap(mddev))
 		return -EINVAL;
@@ -211,7 +212,13 @@ static int linear_run (struct mddev *mddev)
 	blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
 	mddev->queue->backing_dev_info.congested_fn = linear_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
-	return md_integrity_register(mddev);
+
+	ret =  md_integrity_register(mddev);
+	if (ret) {
+		kfree(conf);
+		mddev->private = NULL;
+	}
+	return ret;
 }

 static int linear_add(struct mddev *mddev, struct md_rdev *rdev)

--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -407,6 +407,8 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
 	return array_sectors;
 }

+static int raid0_stop(struct mddev *mddev);
+
 static int raid0_run(struct mddev *mddev)
 {
 	struct r0conf *conf;
@@ -454,7 +456,12 @@ static int raid0_run(struct mddev *mddev)

 	blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
 	dump_zones(mddev);
-	return md_integrity_register(mddev);
+
+	ret = md_integrity_register(mddev);
+	if (ret)
+		raid0_stop(mddev);
+
+	return ret;
 }

 static int raid0_stop(struct mddev *mddev)
@@ -625,6 +632,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
 static void *raid0_takeover_raid1(struct mddev *mddev)
 {
 	struct r0conf *priv_conf;
+	int chunksect;

 	/* Check layout:
 	 *  - (N - 1) mirror drives must be already faulty
@@ -635,10 +643,25 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
 		return ERR_PTR(-EINVAL);
 	}

+	/*
+	 * a raid1 doesn't have the notion of chunk size, so
+	 * figure out the largest suitable size we can use.
+	 */
+	chunksect = 64 * 2; /* 64K by default */
+
+	/* The array must be an exact multiple of chunksize */
+	while (chunksect && (mddev->array_sectors & (chunksect - 1)))
+		chunksect >>= 1;
+
+	if ((chunksect << 9) < PAGE_SIZE)
+		/* array size does not allow a suitable chunk size */
+		return ERR_PTR(-EINVAL);
+
 	/* Set new parameters */
 	mddev->new_level = 0;
 	mddev->new_layout = 0;
-	mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
+	mddev->new_chunk_sectors = chunksect;
+	mddev->chunk_sectors = chunksect;
 	mddev->delta_disks = 1 - mddev->raid_disks;
 	mddev->raid_disks = 1;
 	/* make sure it will be not marked as dirty */

--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1738,7 +1738,7 @@ static int process_checks(struct r1bio *r1_bio)
 				s = sbio->bi_io_vec[j].bv_page;
 				if (memcmp(page_address(p),
 					   page_address(s),
-					   PAGE_SIZE))
+					   sbio->bi_io_vec[j].bv_len))
 					break;
 			}
 		} else
@@ -2386,8 +2386,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
 		int ok = 1;
 		for (i = 0 ; i < conf->raid_disks * 2 ; i++)
 			if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
-				struct md_rdev *rdev =
-					rcu_dereference(conf->mirrors[i].rdev);
+				struct md_rdev *rdev = conf->mirrors[i].rdev;
 				ok = rdev_set_badblocks(rdev, sector_nr,
 							min_bad, 0
 					) && ok;
@@ -2636,11 +2635,13 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 	return ERR_PTR(err);
 }

+static int stop(struct mddev *mddev);
 static int run(struct mddev *mddev)
 {
 	struct r1conf *conf;
 	int i;
 	struct md_rdev *rdev;
+	int ret;

 	if (mddev->level != 1) {
 		printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
@@ -2705,7 +2706,11 @@ static int run(struct mddev *mddev)
 		mddev->queue->backing_dev_info.congested_data = mddev;
 		blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
 	}
-	return md_integrity_register(mddev);
+
+	ret =  md_integrity_register(mddev);
+	if (ret)
+		stop(mddev);
+	return ret;
 }

 static int stop(struct mddev *mddev)

--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1821,7 +1821,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 			for (j = 0; j < vcnt; j++)
 				if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
 					   page_address(tbio->bi_io_vec[j].bv_page),
-					   PAGE_SIZE))
+					   fbio->bi_io_vec[j].bv_len))
 					break;
 			if (j == vcnt)
 				continue;

--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2471,18 +2471,19 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
 	int abort = 0;
 	int i;

-	md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
 	clear_bit(STRIPE_SYNCING, &sh->state);
 	s->syncing = 0;
 	s->replacing = 0;
 	/* There is nothing more to do for sync/check/repair.
+	 * Don't even need to abort as that is handled elsewhere
+	 * if needed, and not always wanted e.g. if there is a known
+	 * bad block here.
 	 * For recover/replace we need to record a bad block on all
 	 * non-sync devices, or abort the recovery
 	 */
-	if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
-		return;
-	/* During recovery devices cannot be removed, so locking and
-	 * refcounting of rdevs is not needed
+	if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
+		/* During recovery devices cannot be removed, so
+		 * locking and refcounting of rdevs is not needed
 		 */
 		for (i = 0; i < conf->raid_disks; i++) {
 			struct md_rdev *rdev = conf->disks[i].rdev;
@@ -2500,10 +2501,11 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
 						   STRIPE_SECTORS, 0))
 				abort = 1;
 		}
-	if (abort) {
-		conf->recovery_disabled = conf->mddev->recovery_disabled;
-		set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+		if (abort)
+			conf->recovery_disabled =
+				conf->mddev->recovery_disabled;
 	}
+	md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
 }

 static int want_replace(struct stripe_head *sh, int disk_idx)
@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
 			/* Not in-sync */;
 		else if (is_bad) {
 			/* also not in-sync */
-			if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+			if (!test_bit(WriteErrorSeen, &rdev->flags) &&
+			    test_bit(R5_UPTODATE, &dev->flags)) {
 				/* treat as in-sync, but with a read error
 				 * which we can now try to correct
 				 */
@@ -3276,12 +3279,14 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
 		/* If there is a failed device being replaced,
 		 *     we must be recovering.
 		 * else if we are after recovery_cp, we must be syncing
+		 * else if MD_RECOVERY_REQUESTED is set, we also are syncing.
 		 * else we can only be replacing
 		 * sync and recovery both need to read all devices, and so
 		 * use the same flag.
 		 */
 		if (do_recovery ||
-		    sh->sector >= conf->mddev->recovery_cp)
+		    sh->sector >= conf->mddev->recovery_cp ||
+		    test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery)))
 			s->syncing = 1;
 		else
 			s->replacing = 1;