From 432bc30040120d508fd45dd0d140a8345a8132cc Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Wed, 16 Oct 2019 16:00:03 +0800 Subject: [PATCH] md: no longer compare spare disk superblock events in super_load commit 6a5cb53aaa4ef515ddeffa04ce18b771121127b4 upstream. We have a test case as follow: mdadm -CR /dev/md1 -l 1 -n 4 /dev/sd[a-d] \ --assume-clean --bitmap=internal mdadm -S /dev/md1 mdadm -A /dev/md1 /dev/sd[b-c] --run --force mdadm --zero /dev/sda mdadm /dev/md1 -a /dev/sda echo offline > /sys/block/sdc/device/state echo offline > /sys/block/sdb/device/state sleep 5 mdadm -S /dev/md1 echo running > /sys/block/sdb/device/state echo running > /sys/block/sdc/device/state mdadm -A /dev/md1 /dev/sd[a-c] --run --force When we readd /dev/sda to the array, it started to do recovery. After offline the other two disks in md1, the recovery have been interrupted and superblock update info cannot be written to the offline disks. While the spare disk (/dev/sda) can continue to update superblock info. After stopping the array and assemble it, we found the array run fail, with the follow kernel message: [ 172.986064] md: kicking non-fresh sdb from array! [ 173.004210] md: kicking non-fresh sdc from array! [ 173.022383] md/raid1:md1: active with 0 out of 4 mirrors [ 173.022406] md1: failed to create bitmap (-5) [ 173.023466] md: md1 stopped. Since both sdb and sdc have the value of 'sb->events' smaller than that in sda, they have been kicked from the array. However, the only remained disk sda is in 'spare' state before stop and it cannot be added to conf->mirrors[] array. In the end, raid array assemble and run fail. In fact, we can use the older disk sdb or sdc to assemble the array. That means we should not choose the 'spare' disk as the fresh disk in analyze_sbs(). To fix the problem, we do not compare superblock events when it is a spare disk, as same as validate_super. Signed-off-by: Yufen Yu Signed-off-by: Song Liu Signed-off-by: Joseph Qi Acked-by: Caspar Zhang --- drivers/md/md.c | 57 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 0a1daf2cde1f..caedc11f9020 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1093,7 +1093,15 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor rdev->desc_nr = sb->this_disk.number; if (!refdev) { - ret = 1; + /* + * Insist on good event counter while assembling, except + * for spares (which don't need an event count) + */ + if (sb->disks[rdev->desc_nr].state & ( + (1<sb_page); @@ -1109,7 +1117,14 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor } ev1 = md_event(sb); ev2 = md_event(refsb); - if (ev1 > ev2) + + /* + * Insist on good event counter while assembling, except + * for spares (which don't need an event count) + */ + if (sb->disks[rdev->desc_nr].state & ( + (1< ev2)) ret = 1; else ret = 0; @@ -1469,6 +1484,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ sector_t sectors; char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; int bmask; + __u64 role; /* * Calculate the position of the superblock in 512byte sectors. @@ -1598,8 +1614,20 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset; } + role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); + if (!refdev) { - ret = 1; + /* + * Insist of good event counter while assembling, except for + * spares (which don't need an event count) + */ + if (rdev->desc_nr >= 0 && + rdev->desc_nr < le32_to_cpu(sb->max_dev) && + (role < MD_DISK_ROLE_MAX || + role == MD_DISK_ROLE_JOURNAL)) + ret = 1; + else + ret = 0; } else { __u64 ev1, ev2; struct mdp_superblock_1 *refsb = page_address(refdev->sb_page); @@ -1616,7 +1644,14 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ ev1 = le64_to_cpu(sb->events); ev2 = le64_to_cpu(refsb->events); - if (ev1 > ev2) + /* + * Insist of good event counter while assembling, except for + * spares (which don't need an event count) + */ + if (rdev->desc_nr >= 0 && + rdev->desc_nr < le32_to_cpu(sb->max_dev) && + (role < MD_DISK_ROLE_MAX || + role == MD_DISK_ROLE_JOURNAL) && ev1 > ev2) ret = 1; else ret = 0; @@ -3529,7 +3564,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe * Check a full RAID array for plausibility */ -static void analyze_sbs(struct mddev *mddev) +static int analyze_sbs(struct mddev *mddev) { int i; struct md_rdev *rdev, *freshest, *tmp; @@ -3550,6 +3585,12 @@ static void analyze_sbs(struct mddev *mddev) md_kick_rdev_from_array(rdev); } + /* Cannot find a valid fresh disk */ + if (!freshest) { + pr_warn("md: cannot find a valid disk\n"); + return -EINVAL; + } + super_types[mddev->major_version]. validate_super(mddev, freshest); @@ -3584,6 +3625,8 @@ static void analyze_sbs(struct mddev *mddev) clear_bit(In_sync, &rdev->flags); } } + + return 0; } /* Read a fixed-point number. @@ -5467,7 +5510,9 @@ int md_run(struct mddev *mddev) if (!mddev->raid_disks) { if (!mddev->persistent) return -EINVAL; - analyze_sbs(mddev); + err = analyze_sbs(mddev); + if (err) + return -EINVAL; } if (mddev->level != LEVEL_NONE) -- GitLab