提交 24afd80d 编写于 作者: N NeilBrown

md/raid10: handle recovery of replacement devices.

If there is a replacement device, then recover to it,
reading from any drives - maybe the one being replaced, maybe not.
Signed-off-by: NNeilBrown <neilb@suse.de>
上级 9ad1aefc
...@@ -1843,7 +1843,7 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -1843,7 +1843,7 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
{ {
struct r10conf *conf = mddev->private; struct r10conf *conf = mddev->private;
int d; int d;
struct bio *wbio; struct bio *wbio, *wbio2;
if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) { if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
fix_recovery_read_error(r10_bio); fix_recovery_read_error(r10_bio);
...@@ -1855,12 +1855,20 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -1855,12 +1855,20 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
* share the pages with the first bio * share the pages with the first bio
* and submit the write request * and submit the write request
*/ */
wbio = r10_bio->devs[1].bio;
d = r10_bio->devs[1].devnum; d = r10_bio->devs[1].devnum;
wbio = r10_bio->devs[1].bio;
wbio2 = r10_bio->devs[1].repl_bio;
if (wbio->bi_end_io) {
atomic_inc(&conf->mirrors[d].rdev->nr_pending); atomic_inc(&conf->mirrors[d].rdev->nr_pending);
md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
generic_make_request(wbio); generic_make_request(wbio);
}
if (wbio2 && wbio2->bi_end_io) {
atomic_inc(&conf->mirrors[d].replacement->nr_pending);
md_sync_acct(conf->mirrors[d].replacement->bdev,
wbio2->bi_size >> 9);
generic_make_request(wbio2);
}
} }
...@@ -2590,23 +2598,30 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2590,23 +2598,30 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
sector_t sect; sector_t sect;
int must_sync; int must_sync;
int any_working; int any_working;
struct mirror_info *mirror = &conf->mirrors[i];
if (conf->mirrors[i].rdev == NULL || if ((mirror->rdev == NULL ||
test_bit(In_sync, &conf->mirrors[i].rdev->flags)) test_bit(In_sync, &mirror->rdev->flags))
&&
(mirror->replacement == NULL ||
test_bit(Faulty,
&mirror->replacement->flags)))
continue; continue;
still_degraded = 0; still_degraded = 0;
/* want to reconstruct this device */ /* want to reconstruct this device */
rb2 = r10_bio; rb2 = r10_bio;
sect = raid10_find_virt(conf, sector_nr, i); sect = raid10_find_virt(conf, sector_nr, i);
/* Unless we are doing a full sync, we only need /* Unless we are doing a full sync, or a replacement
* to recover the block if it is set in the bitmap * we only need to recover the block if it is set in
* the bitmap
*/ */
must_sync = bitmap_start_sync(mddev->bitmap, sect, must_sync = bitmap_start_sync(mddev->bitmap, sect,
&sync_blocks, 1); &sync_blocks, 1);
if (sync_blocks < max_sync) if (sync_blocks < max_sync)
max_sync = sync_blocks; max_sync = sync_blocks;
if (!must_sync && if (!must_sync &&
mirror->replacement == NULL &&
!conf->fullsync) { !conf->fullsync) {
/* yep, skip the sync_blocks here, but don't assume /* yep, skip the sync_blocks here, but don't assume
* that there will never be anything to do here * that there will never be anything to do here
...@@ -2676,33 +2691,60 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2676,33 +2691,60 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_end_io = end_sync_read; bio->bi_end_io = end_sync_read;
bio->bi_rw = READ; bio->bi_rw = READ;
from_addr = r10_bio->devs[j].addr; from_addr = r10_bio->devs[j].addr;
bio->bi_sector = from_addr + bio->bi_sector = from_addr + rdev->data_offset;
conf->mirrors[d].rdev->data_offset; bio->bi_bdev = rdev->bdev;
bio->bi_bdev = conf->mirrors[d].rdev->bdev; atomic_inc(&rdev->nr_pending);
atomic_inc(&conf->mirrors[d].rdev->nr_pending); /* and we write to 'i' (if not in_sync) */
atomic_inc(&r10_bio->remaining);
/* and we write to 'i' */
for (k=0; k<conf->copies; k++) for (k=0; k<conf->copies; k++)
if (r10_bio->devs[k].devnum == i) if (r10_bio->devs[k].devnum == i)
break; break;
BUG_ON(k == conf->copies); BUG_ON(k == conf->copies);
to_addr = r10_bio->devs[k].addr;
r10_bio->devs[0].devnum = d;
r10_bio->devs[0].addr = from_addr;
r10_bio->devs[1].devnum = i;
r10_bio->devs[1].addr = to_addr;
rdev = mirror->rdev;
if (!test_bit(In_sync, &rdev->flags)) {
bio = r10_bio->devs[1].bio; bio = r10_bio->devs[1].bio;
bio->bi_next = biolist; bio->bi_next = biolist;
biolist = bio; biolist = bio;
bio->bi_private = r10_bio; bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write; bio->bi_end_io = end_sync_write;
bio->bi_rw = WRITE; bio->bi_rw = WRITE;
to_addr = r10_bio->devs[k].addr; bio->bi_sector = to_addr
bio->bi_sector = to_addr + + rdev->data_offset;
conf->mirrors[i].rdev->data_offset; bio->bi_bdev = rdev->bdev;
bio->bi_bdev = conf->mirrors[i].rdev->bdev; atomic_inc(&r10_bio->remaining);
} else
r10_bio->devs[0].devnum = d; r10_bio->devs[1].bio->bi_end_io = NULL;
r10_bio->devs[0].addr = from_addr;
r10_bio->devs[1].devnum = i;
r10_bio->devs[1].addr = to_addr;
/* and maybe write to replacement */
bio = r10_bio->devs[1].repl_bio;
if (bio)
bio->bi_end_io = NULL;
rdev = mirror->replacement;
/* Note: if rdev != NULL, then bio
* cannot be NULL as r10buf_pool_alloc will
* have allocated it.
* So the second test here is pointless.
* But it keeps semantic-checkers happy, and
* this comment keeps human reviewers
* happy.
*/
if (rdev == NULL || bio == NULL ||
test_bit(Faulty, &rdev->flags))
break;
bio->bi_next = biolist;
biolist = bio;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
bio->bi_rw = WRITE;
bio->bi_sector = to_addr + rdev->data_offset;
bio->bi_bdev = rdev->bdev;
atomic_inc(&r10_bio->remaining);
break; break;
} }
if (j == conf->copies) { if (j == conf->copies) {
...@@ -2720,8 +2762,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2720,8 +2762,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
for (k = 0; k < conf->copies; k++) for (k = 0; k < conf->copies; k++)
if (r10_bio->devs[k].devnum == i) if (r10_bio->devs[k].devnum == i)
break; break;
if (!rdev_set_badblocks( if (!test_bit(In_sync,
conf->mirrors[i].rdev, &mirror->rdev->flags)
&& !rdev_set_badblocks(
mirror->rdev,
r10_bio->devs[k].addr,
max_sync, 0))
any_working = 0;
if (mirror->replacement &&
!rdev_set_badblocks(
mirror->replacement,
r10_bio->devs[k].addr, r10_bio->devs[k].addr,
max_sync, 0)) max_sync, 0))
any_working = 0; any_working = 0;
...@@ -2732,7 +2782,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2732,7 +2782,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
printk(KERN_INFO "md/raid10:%s: insufficient " printk(KERN_INFO "md/raid10:%s: insufficient "
"working devices for recovery.\n", "working devices for recovery.\n",
mdname(mddev)); mdname(mddev));
conf->mirrors[i].recovery_disabled mirror->recovery_disabled
= mddev->recovery_disabled; = mddev->recovery_disabled;
} }
break; break;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册