diff --git a/drivers/md/md.c b/drivers/md/md.c index 295be1a688065b06b1367ab947f6793c9fce1ecd..51c19f86ff99c3785b5d27838e755d04dabe5fe3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5434,7 +5434,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) atomic_sub(blocks, &mddev->recovery_active); wake_up(&mddev->recovery_wait); if (!ok) { - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_wakeup_thread(mddev->thread); // stop recovery, signal do_sync .... } @@ -5690,7 +5690,7 @@ void md_do_sync(mddev_t *mddev) sectors = mddev->pers->sync_request(mddev, j, &skipped, currspeed < speed_min(mddev)); if (sectors == 0) { - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); goto out; } @@ -5713,8 +5713,7 @@ void md_do_sync(mddev_t *mddev) last_check = io_sectors; - if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || - test_bit(MD_RECOVERY_ERR, &mddev->recovery)) + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) break; repeat: @@ -5768,8 +5767,7 @@ void md_do_sync(mddev_t *mddev) /* tell personality that we are finished */ mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); - if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && - !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && + if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && mddev->curr_resync > 2) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -5838,7 +5836,10 @@ static int remove_and_add_spares(mddev_t *mddev) } if (mddev->degraded) { - rdev_for_each(rdev, rtmp, mddev) + rdev_for_each(rdev, rtmp, mddev) { + if (rdev->raid_disk >= 0 && + !test_bit(In_sync, &rdev->flags)) + spares++; if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) { rdev->recovery_offset = 0; @@ -5856,6 +5857,7 @@ static int remove_and_add_spares(mddev_t *mddev) } else break; } + } } return spares; } @@ -5869,7 +5871,7 @@ static int remove_and_add_spares(mddev_t *mddev) * to do that as needed. * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in * "->recovery" and create a thread at ->sync_thread. - * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR) + * When the thread finishes it sets MD_RECOVERY_DONE * and wakeups up this thread which will reap the thread and finish up. * This thread also removes any faulty devices (with nr_pending == 0). * @@ -5944,8 +5946,7 @@ void md_check_recovery(mddev_t *mddev) /* resync has finished, collect result */ md_unregister_thread(mddev->sync_thread); mddev->sync_thread = NULL; - if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && - !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { /* success...*/ /* activate any spares */ mddev->pers->spare_active(mddev); @@ -5969,7 +5970,6 @@ void md_check_recovery(mddev_t *mddev) * might be left set */ clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - clear_bit(MD_RECOVERY_ERR, &mddev->recovery); clear_bit(MD_RECOVERY_INTR, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 4f4d1f383842c9ca41cb3e89e914cdace48e60c1..e968116e0de9699d2bef0f114ae79f638506bf1c 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -327,7 +327,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number) if (rdev) { if (test_bit(In_sync, &rdev->flags) || atomic_read(&rdev->nr_pending)) { - printk(KERN_ERR "hot-remove-disk, slot %d is identified" " but is still operational!\n", number); + printk(KERN_ERR "hot-remove-disk, slot %d is identified" + " but is still operational!\n", number); err = -EBUSY; goto abort; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d0f4021bbc2ecb483c7b30676478c5403f84f018..c610b947218afb73f49982d59dd178a7fdbd0959 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1027,7 +1027,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) /* * if recovery is running, make sure it aborts. */ - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); } else set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); @@ -1148,6 +1148,14 @@ static int raid1_remove_disk(mddev_t *mddev, int number) err = -EBUSY; goto abort; } + /* Only remove non-faulty devices is recovery + * is not possible. + */ + if (!test_bit(Faulty, &rdev->flags) && + mddev->degraded < conf->raid_disks) { + err = -EBUSY; + goto abort; + } p->rdev = NULL; synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8536ede1e7129c37fc30303e15577c383e68569c..1de17da34a956e8383efb1b42474c3c4d3d15dbe 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1020,7 +1020,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) /* * if recovery is running, make sure it aborts. */ - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); } set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); @@ -1171,6 +1171,14 @@ static int raid10_remove_disk(mddev_t *mddev, int number) err = -EBUSY; goto abort; } + /* Only remove faulty devices in recovery + * is not possible. + */ + if (!test_bit(Faulty, &rdev->flags) && + enough(conf)) { + err = -EBUSY; + goto abort; + } p->rdev = NULL; synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { @@ -1237,6 +1245,7 @@ static void end_sync_write(struct bio *bio, int error) if (!uptodate) md_error(mddev, conf->mirrors[d].rdev); + update_head_pos(i, r10_bio); while (atomic_dec_and_test(&r10_bio->remaining)) { @@ -1844,7 +1853,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (rb2) atomic_dec(&rb2->remaining); r10_bio = rb2; - if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) + if (!test_and_set_bit(MD_RECOVERY_INTR, + &mddev->recovery)) printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", mdname(mddev)); break; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2f28745dacf974b509ae019231973f4de95c8227..425958a76b84afc9d7608509f6c06954db342745 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1268,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) /* * if recovery was running, make sure it aborts. */ - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); } set_bit(Faulty, &rdev->flags); printk (KERN_ALERT @@ -4574,6 +4574,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number) err = -EBUSY; goto abort; } + /* Only remove non-faulty devices if recovery + * isn't possible. + */ + if (!test_bit(Faulty, &rdev->flags) && + mddev->degraded <= conf->max_degraded) { + err = -EBUSY; + goto abort; + } p->rdev = NULL; synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index a6d7ab688edec5699887975d691576b369a97af9..3dea9f545c8f337c516b1713a39e06f59ec767d3 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -188,8 +188,7 @@ struct mddev_s * NEEDED: we might need to start a resync/recover * RUNNING: a thread is running, or about to be started * SYNC: actually doing a resync, not a recovery - * ERR: and IO error was detected - abort the resync/recovery - * INTR: someone requested a (clean) early abort. + * INTR: resync needs to be aborted for some reason * DONE: thread is done and is waiting to be reaped * REQUEST: user-space has requested a sync (used with SYNC) * CHECK: user-space request for for check-only, no repair @@ -199,7 +198,6 @@ struct mddev_s */ #define MD_RECOVERY_RUNNING 0 #define MD_RECOVERY_SYNC 1 -#define MD_RECOVERY_ERR 2 #define MD_RECOVERY_INTR 3 #define MD_RECOVERY_DONE 4 #define MD_RECOVERY_NEEDED 5