提交 e464eafd 编写于 作者: N NeilBrown 提交者: Linus Torvalds

[PATCH] md: Support suspending of IO to regions of an md array

This allows user-space to access data safely.  This is needed for raid5
reshape as user-space needs to take a backup of the first few stripes before
allowing reshape to commence.

It will also be useful in cluster-aware raid1 configurations so that all
cluster members can leave a section of the array untouched while a
resync/recovery happens.

A 'start' and 'end' of the suspended range are written to 2 sysfs attributes.
Note that only one range can be suspended at a time.
Signed-off-by: NNeil Brown <neilb@suse.de>
Signed-off-by: NAndrew Morton <akpm@osdl.org>
Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
上级 16484bf5
...@@ -2365,6 +2365,63 @@ sync_completed_show(mddev_t *mddev, char *page) ...@@ -2365,6 +2365,63 @@ sync_completed_show(mddev_t *mddev, char *page)
static struct md_sysfs_entry static struct md_sysfs_entry
md_sync_completed = __ATTR_RO(sync_completed); md_sync_completed = __ATTR_RO(sync_completed);
static ssize_t
suspend_lo_show(mddev_t *mddev, char *page)
{
return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
}
static ssize_t
suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
{
char *e;
unsigned long long new = simple_strtoull(buf, &e, 10);
if (mddev->pers->quiesce == NULL)
return -EINVAL;
if (buf == e || (*e && *e != '\n'))
return -EINVAL;
if (new >= mddev->suspend_hi ||
(new > mddev->suspend_lo && new < mddev->suspend_hi)) {
mddev->suspend_lo = new;
mddev->pers->quiesce(mddev, 2);
return len;
} else
return -EINVAL;
}
static struct md_sysfs_entry md_suspend_lo =
__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
static ssize_t
suspend_hi_show(mddev_t *mddev, char *page)
{
return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
}
static ssize_t
suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
{
char *e;
unsigned long long new = simple_strtoull(buf, &e, 10);
if (mddev->pers->quiesce == NULL)
return -EINVAL;
if (buf == e || (*e && *e != '\n'))
return -EINVAL;
if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
(new > mddev->suspend_lo && new > mddev->suspend_hi)) {
mddev->suspend_hi = new;
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
return len;
} else
return -EINVAL;
}
static struct md_sysfs_entry md_suspend_hi =
__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
static struct attribute *md_default_attrs[] = { static struct attribute *md_default_attrs[] = {
&md_level.attr, &md_level.attr,
&md_raid_disks.attr, &md_raid_disks.attr,
...@@ -2382,6 +2439,8 @@ static struct attribute *md_redundancy_attrs[] = { ...@@ -2382,6 +2439,8 @@ static struct attribute *md_redundancy_attrs[] = {
&md_sync_max.attr, &md_sync_max.attr,
&md_sync_speed.attr, &md_sync_speed.attr,
&md_sync_completed.attr, &md_sync_completed.attr,
&md_suspend_lo.attr,
&md_suspend_hi.attr,
NULL, NULL,
}; };
static struct attribute_group md_redundancy_group = { static struct attribute_group md_redundancy_group = {
......
...@@ -1805,6 +1805,15 @@ static int make_request(request_queue_t *q, struct bio * bi) ...@@ -1805,6 +1805,15 @@ static int make_request(request_queue_t *q, struct bio * bi)
goto retry; goto retry;
} }
} }
/* FIXME what if we get a false positive because these
* are being updated.
*/
if (logical_sector >= mddev->suspend_lo &&
logical_sector < mddev->suspend_hi) {
release_stripe(sh);
schedule();
goto retry;
}
if (test_bit(STRIPE_EXPANDING, &sh->state) || if (test_bit(STRIPE_EXPANDING, &sh->state) ||
!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
...@@ -2725,6 +2734,10 @@ static void raid5_quiesce(mddev_t *mddev, int state) ...@@ -2725,6 +2734,10 @@ static void raid5_quiesce(mddev_t *mddev, int state)
raid5_conf_t *conf = mddev_to_conf(mddev); raid5_conf_t *conf = mddev_to_conf(mddev);
switch(state) { switch(state) {
case 2: /* resume for a suspend */
wake_up(&conf->wait_for_overlap);
break;
case 1: /* stop all writes */ case 1: /* stop all writes */
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
conf->quiesce = 1; conf->quiesce = 1;
...@@ -2738,6 +2751,7 @@ static void raid5_quiesce(mddev_t *mddev, int state) ...@@ -2738,6 +2751,7 @@ static void raid5_quiesce(mddev_t *mddev, int state)
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
conf->quiesce = 0; conf->quiesce = 0;
wake_up(&conf->wait_for_stripe); wake_up(&conf->wait_for_stripe);
wake_up(&conf->wait_for_overlap);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
break; break;
} }
......
...@@ -151,6 +151,10 @@ struct mddev_s ...@@ -151,6 +151,10 @@ struct mddev_s
sector_t resync_mismatches; /* count of sectors where sector_t resync_mismatches; /* count of sectors where
* parity/replica mismatch found * parity/replica mismatch found
*/ */
/* allow user-space to request suspension of IO to regions of the array */
sector_t suspend_lo;
sector_t suspend_hi;
/* if zero, use the system-wide default */ /* if zero, use the system-wide default */
int sync_speed_min; int sync_speed_min;
int sync_speed_max; int sync_speed_max;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册