提交 4526b710 编写于 作者: L Linus Torvalds

Merge tag 'md/4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD updates from Shaohua Li:
 "This update mainly fixes bugs.

   - a raid5 discard related fix from Jes
   - a MD multipath bio clone fix from Ming
   - raid1 error handling deadlock fix from Nate and corresponding
     raid10 fix from myself
   - a raid5 stripe batch fix from Neil
   - a patch from Sebastian to avoid unnecessary uevent
   - several cleanup/debug patches"

* tag 'md/4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md/raid5: Cleanup cpu hotplug notifier
  raid10: include bio_end_io_list in nr_queued to prevent freeze_array hang
  raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang
  md: fix typos for stipe
  md/bitmap: remove redundant return in bitmap_checkpage
  md/raid1: remove unnecessary BUG_ON
  md: multipath: don't hardcopy bio in .make_request path
  md/raid5: output stripe state for debug
  md/raid5: preserve STRIPE_PREREAD_ACTIVE in break_stripe_batch_list
  Update MD git tree URL
  md/bitmap: remove redundant check
  MD: warn for potential deadlock
  md: Drop sending a change uevent when stopping
  RAID5: revert e9e4c377 to fix a livelock
  RAID5: check_reshape() shouldn't call mddev_suspend
  md/raid5: Compare apples to apples (or sectors to sectors)
......@@ -10291,7 +10291,7 @@ F: drivers/media/pci/solo6x10/
SOFTWARE RAID (Multiple Disks) SUPPORT
M: Shaohua Li <shli@kernel.org>
L: linux-raid@vger.kernel.org
T: git git://neil.brown.name/md
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
S: Supported
F: drivers/md/
F: include/linux/raid/
......
......@@ -98,7 +98,6 @@ __acquires(bitmap->lock)
bitmap->bp[page].hijacked) {
/* somebody beat us to getting the page */
kfree(mappage);
return 0;
} else {
/* no page was in place and we have one, so install it */
......@@ -510,8 +509,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
sb->chunksize = cpu_to_le32(chunksize);
daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
if (!daemon_sleep ||
(daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
daemon_sleep = 5 * HZ;
}
......
......@@ -49,8 +49,8 @@
* When we set a bit, or in the counter (to start a write), if the fields is
* 0, we first set the disk bit and set the counter to 1.
*
* If the counter is 0, the on-disk bit is clear and the stipe is clean
* Anything that dirties the stipe pushes the counter to 2 (at least)
* If the counter is 0, the on-disk bit is clear and the stripe is clean
* Anything that dirties the stripe pushes the counter to 2 (at least)
* and sets the on-disk bit (lazily).
* If a periodic sweep find the counter at 2, it is decremented to 1.
* If the sweep find the counter at 1, the on-disk bit is cleared and the
......
......@@ -305,6 +305,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
*/
void mddev_suspend(struct mddev *mddev)
{
WARN_ON_ONCE(current == mddev->thread->tsk);
if (mddev->suspended++)
return;
synchronize_rcu();
......@@ -5671,7 +5672,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
export_array(mddev);
md_clean(mddev);
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
if (mddev->hold_active == UNTIL_STOP)
mddev->hold_active = 0;
}
......
......@@ -129,7 +129,9 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
}
multipath = conf->multipaths + mp_bh->path;
mp_bh->bio = *bio;
bio_init(&mp_bh->bio);
__bio_clone_fast(&mp_bh->bio, bio);
mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
mp_bh->bio.bi_bdev = multipath->rdev->bdev;
mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;
......
......@@ -2274,6 +2274,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
if (fail) {
spin_lock_irq(&conf->device_lock);
list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
conf->nr_queued++;
spin_unlock_irq(&conf->device_lock);
md_wakeup_thread(conf->mddev->thread);
} else {
......@@ -2391,8 +2392,10 @@ static void raid1d(struct md_thread *thread)
LIST_HEAD(tmp);
spin_lock_irqsave(&conf->device_lock, flags);
if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
list_add(&tmp, &conf->bio_end_io_list);
list_del_init(&conf->bio_end_io_list);
while (!list_empty(&conf->bio_end_io_list)) {
list_move(conf->bio_end_io_list.prev, &tmp);
conf->nr_queued--;
}
}
spin_unlock_irqrestore(&conf->device_lock, flags);
while (!list_empty(&tmp)) {
......@@ -2695,7 +2698,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
!conf->fullsync &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
break;
BUG_ON(sync_blocks < (PAGE_SIZE>>9));
if ((len >> 9) > sync_blocks)
len = sync_blocks<<9;
}
......
......@@ -2664,6 +2664,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
if (fail) {
spin_lock_irq(&conf->device_lock);
list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
conf->nr_queued++;
spin_unlock_irq(&conf->device_lock);
md_wakeup_thread(conf->mddev->thread);
} else {
......@@ -2691,8 +2692,10 @@ static void raid10d(struct md_thread *thread)
LIST_HEAD(tmp);
spin_lock_irqsave(&conf->device_lock, flags);
if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
list_add(&tmp, &conf->bio_end_io_list);
list_del_init(&conf->bio_end_io_list);
while (!list_empty(&conf->bio_end_io_list)) {
list_move(conf->bio_end_io_list.prev, &tmp);
conf->nr_queued--;
}
}
spin_unlock_irqrestore(&conf->device_lock, flags);
while (!list_empty(&tmp)) {
......
......@@ -340,8 +340,7 @@ static void release_inactive_stripe_list(struct r5conf *conf,
int hash)
{
int size;
unsigned long do_wakeup = 0;
int i = 0;
bool do_wakeup = false;
unsigned long flags;
if (hash == NR_STRIPE_HASH_LOCKS) {
......@@ -362,19 +361,15 @@ static void release_inactive_stripe_list(struct r5conf *conf,
!list_empty(list))
atomic_dec(&conf->empty_inactive_list_nr);
list_splice_tail_init(list, conf->inactive_list + hash);
do_wakeup |= 1 << hash;
do_wakeup = true;
spin_unlock_irqrestore(conf->hash_locks + hash, flags);
}
size--;
hash--;
}
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
if (do_wakeup & (1 << i))
wake_up(&conf->wait_for_stripe[i]);
}
if (do_wakeup) {
wake_up(&conf->wait_for_stripe);
if (atomic_read(&conf->active_stripes) == 0)
wake_up(&conf->wait_for_quiescent);
if (conf->retry_read_aligned)
......@@ -687,15 +682,14 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
if (!sh) {
set_bit(R5_INACTIVE_BLOCKED,
&conf->cache_state);
wait_event_exclusive_cmd(
conf->wait_for_stripe[hash],
wait_event_lock_irq(
conf->wait_for_stripe,
!list_empty(conf->inactive_list + hash) &&
(atomic_read(&conf->active_stripes)
< (conf->max_nr_stripes * 3 / 4)
|| !test_bit(R5_INACTIVE_BLOCKED,
&conf->cache_state)),
spin_unlock_irq(conf->hash_locks + hash),
spin_lock_irq(conf->hash_locks + hash));
*(conf->hash_locks + hash));
clear_bit(R5_INACTIVE_BLOCKED,
&conf->cache_state);
} else {
......@@ -720,9 +714,6 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
}
} while (sh == NULL);
if (!list_empty(conf->inactive_list + hash))
wake_up(&conf->wait_for_stripe[hash]);
spin_unlock_irq(conf->hash_locks + hash);
return sh;
}
......@@ -2089,6 +2080,14 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
unsigned long cpu;
int err = 0;
/*
* Never shrink. And mddev_suspend() could deadlock if this is called
* from raid5d. In that case, scribble_disks and scribble_sectors
* should equal to new_disks and new_sectors
*/
if (conf->scribble_disks >= new_disks &&
conf->scribble_sectors >= new_sectors)
return 0;
mddev_suspend(conf->mddev);
get_online_cpus();
for_each_present_cpu(cpu) {
......@@ -2110,6 +2109,10 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
}
put_online_cpus();
mddev_resume(conf->mddev);
if (!err) {
conf->scribble_disks = new_disks;
conf->scribble_sectors = new_sectors;
}
return err;
}
......@@ -2190,7 +2193,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
cnt = 0;
list_for_each_entry(nsh, &newstripes, lru) {
lock_device_hash_lock(conf, hash);
wait_event_exclusive_cmd(conf->wait_for_stripe[hash],
wait_event_cmd(conf->wait_for_stripe,
!list_empty(conf->inactive_list + hash),
unlock_device_hash_lock(conf, hash),
lock_device_hash_lock(conf, hash));
......@@ -4233,10 +4236,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
list_del_init(&sh->batch_list);
WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
(1 << STRIPE_SYNCING) |
(1 << STRIPE_REPLACED) |
(1 << STRIPE_PREREAD_ACTIVE) |
(1 << STRIPE_DELAYED) |
(1 << STRIPE_BIT_DELAY) |
(1 << STRIPE_FULL_WRITE) |
......@@ -4246,11 +4248,14 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
(1 << STRIPE_DISCARD) |
(1 << STRIPE_BATCH_READY) |
(1 << STRIPE_BATCH_ERR) |
(1 << STRIPE_BITMAP_PENDING)));
WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
(1 << STRIPE_REPLACED)));
(1 << STRIPE_BITMAP_PENDING)),
"stripe state: %lx\n", sh->state);
WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
(1 << STRIPE_REPLACED)),
"head stripe state: %lx\n", head_sh->state);
set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
(1 << STRIPE_PREREAD_ACTIVE) |
(1 << STRIPE_DEGRADED)),
head_sh->state & (1 << STRIPE_INSYNC));
......@@ -6376,6 +6381,8 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
break;
default:
......@@ -6413,6 +6420,12 @@ static int raid5_alloc_percpu(struct r5conf *conf)
}
put_online_cpus();
if (!err) {
conf->scribble_disks = max(conf->raid_disks,
conf->previous_raid_disks);
conf->scribble_sectors = max(conf->chunk_sectors,
conf->prev_chunk_sectors);
}
return err;
}
......@@ -6503,9 +6516,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
seqcount_init(&conf->gen_lock);
mutex_init(&conf->cache_size_mutex);
init_waitqueue_head(&conf->wait_for_quiescent);
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
init_waitqueue_head(&conf->wait_for_stripe[i]);
}
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap);
INIT_LIST_HEAD(&conf->handle_list);
INIT_LIST_HEAD(&conf->hold_list);
......@@ -7014,7 +7025,7 @@ static int raid5_run(struct mddev *mddev)
}
if (discard_supported &&
mddev->queue->limits.max_discard_sectors >= stripe &&
mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
mddev->queue->limits.discard_granularity >= stripe)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
mddev->queue);
......
......@@ -510,6 +510,8 @@ struct r5conf {
* conversions
*/
} __percpu *percpu;
int scribble_disks;
int scribble_sectors;
#ifdef CONFIG_HOTPLUG_CPU
struct notifier_block cpu_notify;
#endif
......@@ -522,7 +524,7 @@ struct r5conf {
atomic_t empty_inactive_list_nr;
struct llist_head released_stripes;
wait_queue_head_t wait_for_quiescent;
wait_queue_head_t wait_for_stripe[NR_STRIPE_HASH_LOCKS];
wait_queue_head_t wait_for_stripe;
wait_queue_head_t wait_for_overlap;
unsigned long cache_state;
#define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册