提交 ceff49d9 编写于 作者: Y Yufen Yu 提交者: Zheng Zengkai

md/raid1: fix a race between removing rdev and access conf->mirrors[i].rdev

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4JYYO?from=project-issue
CVE: NA

---------------------------

We get a NULL pointer dereference oops when test raid1 as follow:

mdadm -CR /dev/md1 -l 1 -n 2 /dev/sd[ab]

mdadm /dev/md1 -f /dev/sda
mdadm /dev/md1 -r /dev/sda
mdadm /dev/md1 -a /dev/sda
sleep 5
mdadm /dev/md1 -f /dev/sdb
mdadm /dev/md1 -r /dev/sdb
mdadm /dev/md1 -a /dev/sdb

After a disk(/dev/sda) has been removed, we add the disk to
raid array again, which would trigger recovery action.
Since the rdev current state is 'spare', read/write bio can
be issued to the disk.

Then we set the other disk (/dev/sdb) faulty. Since the raid
array is now in degraded state and /dev/sdb is the only
'In_sync' disk, raid1_error() will return but without set
faulty success.

However, that can interrupt the recovery action and
md_check_recovery will try to call remove_and_add_spares()
to remove the spare disk. And the race condition between
remove_and_add_spares() and raid1_write_request() in follow
can cause NULL pointer dereference for conf->mirrors[i].rdev:

raid1_write_request()   md_check_recovery    raid1_error()
rcu_read_lock()
rdev != NULL
!test_bit(Faulty, &rdev->flags)

                                           conf->recovery_disabled=
                                             mddev->recovery_disabled;
                                            return busy

                        remove_and_add_spares
                        raid1_remove_disk
                        rdev->nr_pending == 0

atomic_inc(&rdev->nr_pending);
rcu_read_unlock()

                        p->rdev=NULL

conf->mirrors[i].rdev->data_offset
NULL pointer deref!!!

                        if (!test_bit(RemoveSynchronized,
                          &rdev->flags))
                         synchronize_rcu();
                         p->rdev=rdev

To fix the race condition, we add a new flag 'WantRemove' for rdev.
Before access conf->mirrors[i].rdev, we need to ensure the rdev
without 'WantRemove' bit.

Link: https://marc.info/?l=linux-raid&m=156412052717709&w=2Reported-by: NZou Wei <zou_wei@huawei.com>
Signed-off-by: NYufen Yu <yuyufen@huawei.com>
Confilct:
        drivers/md/md.h
Signed-off-by: NLaibin Qiu <qiulaibin@huawei.com>
Reviewed-by: Nyuyufen <yuyufen@huawei.com>
Reviewed-by: NJason Yan <yanaijie@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 78987190
......@@ -213,6 +213,10 @@ enum flag_bits {
* check if there is collision between raid1
* serial bios.
*/
WantRemove, /* Before set conf->mirrors[i] as NULL,
* we set the bit first, avoiding access the
* conf->mirrors[i] after it set NULL.
*/
};
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
......
......@@ -641,7 +641,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED
|| rdev == NULL
|| test_bit(Faulty, &rdev->flags))
|| test_bit(Faulty, &rdev->flags)
|| test_bit(WantRemove, &rdev->flags))
continue;
if (!test_bit(In_sync, &rdev->flags) &&
rdev->recovery_offset < this_sector + sectors)
......@@ -770,7 +771,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
if (best_disk >= 0) {
rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
if (!rdev)
if (!rdev || test_bit(Faulty, &rdev->flags)
|| test_bit(WantRemove, &rdev->flags))
goto retry;
atomic_inc(&rdev->nr_pending);
sectors = best_good_sectors;
......@@ -1382,7 +1384,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
break;
}
r1_bio->bios[i] = NULL;
if (!rdev || test_bit(Faulty, &rdev->flags)) {
if (!rdev || test_bit(Faulty, &rdev->flags)
|| test_bit(WantRemove, &rdev->flags)) {
if (i < conf->raid_disks)
set_bit(R1BIO_Degraded, &r1_bio->state);
continue;
......@@ -1759,6 +1762,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
p->head_position = 0;
rdev->raid_disk = mirror;
clear_bit(WantRemove, &rdev->flags);
err = 0;
/* As all devices are equivalent, we don't need a full recovery
* if this was recently any drive of the array
......@@ -1773,6 +1777,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
/* Add this device as a replacement */
clear_bit(In_sync, &rdev->flags);
set_bit(Replacement, &rdev->flags);
clear_bit(WantRemove, &rdev->flags);
rdev->raid_disk = mirror;
err = 0;
conf->fullsync = 1;
......@@ -1812,16 +1817,26 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY;
goto abort;
}
p->rdev = NULL;
/*
* Before set p->rdev = NULL, we set WantRemove bit avoiding
* race between rdev remove and issue bio, which can cause
* NULL pointer deference of rdev by conf->mirrors[i].rdev.
*/
set_bit(WantRemove, &rdev->flags);
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
/* lost the race, try later */
err = -EBUSY;
p->rdev = rdev;
clear_bit(WantRemove, &rdev->flags);
goto abort;
}
}
p->rdev = NULL;
if (conf->mirrors[conf->raid_disks + number].rdev) {
/* We just removed a device that is being replaced.
* Move down the replacement. We drain all IO before
......@@ -2716,7 +2731,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev == NULL ||
test_bit(Faulty, &rdev->flags)) {
test_bit(Faulty, &rdev->flags) ||
test_bit(WantRemove, &rdev->flags)) {
if (i < conf->raid_disks)
still_degraded = 1;
} else if (!test_bit(In_sync, &rdev->flags)) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册