提交 5626196a 编写于 作者: J Jens Axboe

Merge branch 'md-next' of...

Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.2/block

Pull MD fixes from Song.

* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md/raid1: stop mdx_raid1 thread when raid1 array run failed
  md/raid5: use bdev_write_cache instead of open coding it
  md: fix a crash in mempool_free
  md/raid0, raid10: Don't set discard sectors for request queue
  md/bitmap: Fix bitmap chunk size overflow issues
  md: introduce md_ro_state
  md: factor out __md_set_array_info()
  lib/raid6: drop RAID6_USE_EMPTY_ZERO_PAGE
  raid5-cache: use try_cmpxchg in r5l_wake_reclaim
  drivers/md/md-bitmap: check the return value of md_bitmap_get_counter()
...@@ -486,7 +486,7 @@ void md_bitmap_print_sb(struct bitmap *bitmap) ...@@ -486,7 +486,7 @@ void md_bitmap_print_sb(struct bitmap *bitmap)
sb = kmap_atomic(bitmap->storage.sb_page); sb = kmap_atomic(bitmap->storage.sb_page);
pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic));
pr_debug(" version: %d\n", le32_to_cpu(sb->version)); pr_debug(" version: %u\n", le32_to_cpu(sb->version));
pr_debug(" uuid: %08x.%08x.%08x.%08x\n", pr_debug(" uuid: %08x.%08x.%08x.%08x\n",
le32_to_cpu(*(__le32 *)(sb->uuid+0)), le32_to_cpu(*(__le32 *)(sb->uuid+0)),
le32_to_cpu(*(__le32 *)(sb->uuid+4)), le32_to_cpu(*(__le32 *)(sb->uuid+4)),
...@@ -497,11 +497,11 @@ void md_bitmap_print_sb(struct bitmap *bitmap) ...@@ -497,11 +497,11 @@ void md_bitmap_print_sb(struct bitmap *bitmap)
pr_debug("events cleared: %llu\n", pr_debug("events cleared: %llu\n",
(unsigned long long) le64_to_cpu(sb->events_cleared)); (unsigned long long) le64_to_cpu(sb->events_cleared));
pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); pr_debug(" state: %08x\n", le32_to_cpu(sb->state));
pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize)); pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize));
pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep));
pr_debug(" sync size: %llu KB\n", pr_debug(" sync size: %llu KB\n",
(unsigned long long)le64_to_cpu(sb->sync_size)/2); (unsigned long long)le64_to_cpu(sb->sync_size)/2);
pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind)); pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
kunmap_atomic(sb); kunmap_atomic(sb);
} }
...@@ -2105,7 +2105,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, ...@@ -2105,7 +2105,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
bytes = DIV_ROUND_UP(chunks, 8); bytes = DIV_ROUND_UP(chunks, 8);
if (!bitmap->mddev->bitmap_info.external) if (!bitmap->mddev->bitmap_info.external)
bytes += sizeof(bitmap_super_t); bytes += sizeof(bitmap_super_t);
} while (bytes > (space << 9)); } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) <
(BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1));
} else } else
chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
...@@ -2150,7 +2151,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, ...@@ -2150,7 +2151,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
bitmap->counts.missing_pages = pages; bitmap->counts.missing_pages = pages;
bitmap->counts.chunkshift = chunkshift; bitmap->counts.chunkshift = chunkshift;
bitmap->counts.chunks = chunks; bitmap->counts.chunks = chunks;
bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift + bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift +
BITMAP_BLOCK_SHIFT); BITMAP_BLOCK_SHIFT);
blocks = min(old_counts.chunks << old_counts.chunkshift, blocks = min(old_counts.chunks << old_counts.chunkshift,
...@@ -2176,8 +2177,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, ...@@ -2176,8 +2177,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
bitmap->counts.missing_pages = old_counts.pages; bitmap->counts.missing_pages = old_counts.pages;
bitmap->counts.chunkshift = old_counts.chunkshift; bitmap->counts.chunkshift = old_counts.chunkshift;
bitmap->counts.chunks = old_counts.chunks; bitmap->counts.chunks = old_counts.chunks;
bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift + bitmap->mddev->bitmap_info.chunksize =
BITMAP_BLOCK_SHIFT); 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT);
blocks = old_counts.chunks << old_counts.chunkshift; blocks = old_counts.chunks << old_counts.chunkshift;
pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
break; break;
...@@ -2195,20 +2196,23 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, ...@@ -2195,20 +2196,23 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
if (set) { if (set) {
bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
if (*bmc_new == 0) { if (bmc_new) {
/* need to set on-disk bits too. */ if (*bmc_new == 0) {
sector_t end = block + new_blocks; /* need to set on-disk bits too. */
sector_t start = block >> chunkshift; sector_t end = block + new_blocks;
start <<= chunkshift; sector_t start = block >> chunkshift;
while (start < end) {
md_bitmap_file_set_bit(bitmap, block); start <<= chunkshift;
start += 1 << chunkshift; while (start < end) {
md_bitmap_file_set_bit(bitmap, block);
start += 1 << chunkshift;
}
*bmc_new = 2;
md_bitmap_count_page(&bitmap->counts, block, 1);
md_bitmap_set_pending(&bitmap->counts, block);
} }
*bmc_new = 2; *bmc_new |= NEEDED_MASK;
md_bitmap_count_page(&bitmap->counts, block, 1);
md_bitmap_set_pending(&bitmap->counts, block);
} }
*bmc_new |= NEEDED_MASK;
if (new_blocks < old_blocks) if (new_blocks < old_blocks)
old_blocks = new_blocks; old_blocks = new_blocks;
} }
...@@ -2534,6 +2538,9 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -2534,6 +2538,9 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len)
if (csize < 512 || if (csize < 512 ||
!is_power_of_2(csize)) !is_power_of_2(csize))
return -EINVAL; return -EINVAL;
if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE *
sizeof(((bitmap_super_t *)0)->chunksize))))
return -EOVERFLOW;
mddev->bitmap_info.chunksize = csize; mddev->bitmap_info.chunksize = csize;
return len; return len;
} }
......
...@@ -93,6 +93,18 @@ static int remove_and_add_spares(struct mddev *mddev, ...@@ -93,6 +93,18 @@ static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *this); struct md_rdev *this);
static void mddev_detach(struct mddev *mddev); static void mddev_detach(struct mddev *mddev);
enum md_ro_state {
MD_RDWR,
MD_RDONLY,
MD_AUTO_READ,
MD_MAX_STATE
};
static bool md_is_rdwr(struct mddev *mddev)
{
return (mddev->ro == MD_RDWR);
}
/* /*
* Default number of read corrections we'll attempt on an rdev * Default number of read corrections we'll attempt on an rdev
* before ejecting it from the array. We divide the read error * before ejecting it from the array. We divide the read error
...@@ -444,7 +456,7 @@ static void md_submit_bio(struct bio *bio) ...@@ -444,7 +456,7 @@ static void md_submit_bio(struct bio *bio)
bio = bio_split_to_limits(bio); bio = bio_split_to_limits(bio);
if (mddev->ro == 1 && unlikely(rw == WRITE)) { if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0) if (bio_sectors(bio) != 0)
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
bio_endio(bio); bio_endio(bio);
...@@ -509,13 +521,14 @@ static void md_end_flush(struct bio *bio) ...@@ -509,13 +521,14 @@ static void md_end_flush(struct bio *bio)
struct md_rdev *rdev = bio->bi_private; struct md_rdev *rdev = bio->bi_private;
struct mddev *mddev = rdev->mddev; struct mddev *mddev = rdev->mddev;
bio_put(bio);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
if (atomic_dec_and_test(&mddev->flush_pending)) { if (atomic_dec_and_test(&mddev->flush_pending)) {
/* The pre-request flush has finished */ /* The pre-request flush has finished */
queue_work(md_wq, &mddev->flush_work); queue_work(md_wq, &mddev->flush_work);
} }
bio_put(bio);
} }
static void md_submit_flush_data(struct work_struct *ws); static void md_submit_flush_data(struct work_struct *ws);
...@@ -913,10 +926,12 @@ static void super_written(struct bio *bio) ...@@ -913,10 +926,12 @@ static void super_written(struct bio *bio)
} else } else
clear_bit(LastDev, &rdev->flags); clear_bit(LastDev, &rdev->flags);
bio_put(bio);
rdev_dec_pending(rdev, mddev);
if (atomic_dec_and_test(&mddev->pending_writes)) if (atomic_dec_and_test(&mddev->pending_writes))
wake_up(&mddev->sb_wait); wake_up(&mddev->sb_wait);
rdev_dec_pending(rdev, mddev);
bio_put(bio);
} }
void md_super_write(struct mddev *mddev, struct md_rdev *rdev, void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
...@@ -2639,7 +2654,7 @@ void md_update_sb(struct mddev *mddev, int force_change) ...@@ -2639,7 +2654,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
int any_badblocks_changed = 0; int any_badblocks_changed = 0;
int ret = -1; int ret = -1;
if (mddev->ro) { if (!md_is_rdwr(mddev)) {
if (force_change) if (force_change)
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
return; return;
...@@ -3901,7 +3916,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -3901,7 +3916,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
goto out_unlock; goto out_unlock;
} }
rv = -EROFS; rv = -EROFS;
if (mddev->ro) if (!md_is_rdwr(mddev))
goto out_unlock; goto out_unlock;
/* request to change the personality. Need to ensure: /* request to change the personality. Need to ensure:
...@@ -4107,7 +4122,7 @@ layout_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -4107,7 +4122,7 @@ layout_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers) { if (mddev->pers) {
if (mddev->pers->check_reshape == NULL) if (mddev->pers->check_reshape == NULL)
err = -EBUSY; err = -EBUSY;
else if (mddev->ro) else if (!md_is_rdwr(mddev))
err = -EROFS; err = -EROFS;
else { else {
mddev->new_layout = n; mddev->new_layout = n;
...@@ -4216,7 +4231,7 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -4216,7 +4231,7 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers) { if (mddev->pers) {
if (mddev->pers->check_reshape == NULL) if (mddev->pers->check_reshape == NULL)
err = -EBUSY; err = -EBUSY;
else if (mddev->ro) else if (!md_is_rdwr(mddev))
err = -EROFS; err = -EROFS;
else { else {
mddev->new_chunk_sectors = n >> 9; mddev->new_chunk_sectors = n >> 9;
...@@ -4339,13 +4354,13 @@ array_state_show(struct mddev *mddev, char *page) ...@@ -4339,13 +4354,13 @@ array_state_show(struct mddev *mddev, char *page)
if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) { if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
switch(mddev->ro) { switch(mddev->ro) {
case 1: case MD_RDONLY:
st = readonly; st = readonly;
break; break;
case 2: case MD_AUTO_READ:
st = read_auto; st = read_auto;
break; break;
case 0: case MD_RDWR:
spin_lock(&mddev->lock); spin_lock(&mddev->lock);
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
st = write_pending; st = write_pending;
...@@ -4381,7 +4396,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -4381,7 +4396,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
int err = 0; int err = 0;
enum array_state st = match_word(buf, array_states); enum array_state st = match_word(buf, array_states);
if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) { if (mddev->pers && (st == active || st == clean) &&
mddev->ro != MD_RDONLY) {
/* don't take reconfig_mutex when toggling between /* don't take reconfig_mutex when toggling between
* clean and active * clean and active
*/ */
...@@ -4425,23 +4441,23 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -4425,23 +4441,23 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers) if (mddev->pers)
err = md_set_readonly(mddev, NULL); err = md_set_readonly(mddev, NULL);
else { else {
mddev->ro = 1; mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1); set_disk_ro(mddev->gendisk, 1);
err = do_md_run(mddev); err = do_md_run(mddev);
} }
break; break;
case read_auto: case read_auto:
if (mddev->pers) { if (mddev->pers) {
if (mddev->ro == 0) if (md_is_rdwr(mddev))
err = md_set_readonly(mddev, NULL); err = md_set_readonly(mddev, NULL);
else if (mddev->ro == 1) else if (mddev->ro == MD_RDONLY)
err = restart_array(mddev); err = restart_array(mddev);
if (err == 0) { if (err == 0) {
mddev->ro = 2; mddev->ro = MD_AUTO_READ;
set_disk_ro(mddev->gendisk, 0); set_disk_ro(mddev->gendisk, 0);
} }
} else { } else {
mddev->ro = 2; mddev->ro = MD_AUTO_READ;
err = do_md_run(mddev); err = do_md_run(mddev);
} }
break; break;
...@@ -4466,7 +4482,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -4466,7 +4482,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
wake_up(&mddev->sb_wait); wake_up(&mddev->sb_wait);
err = 0; err = 0;
} else { } else {
mddev->ro = 0; mddev->ro = MD_RDWR;
set_disk_ro(mddev->gendisk, 0); set_disk_ro(mddev->gendisk, 0);
err = do_md_run(mddev); err = do_md_run(mddev);
} }
...@@ -4765,7 +4781,7 @@ action_show(struct mddev *mddev, char *page) ...@@ -4765,7 +4781,7 @@ action_show(struct mddev *mddev, char *page)
if (test_bit(MD_RECOVERY_FROZEN, &recovery)) if (test_bit(MD_RECOVERY_FROZEN, &recovery))
type = "frozen"; type = "frozen";
else if (test_bit(MD_RECOVERY_RUNNING, &recovery) || else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
(!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) { (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
type = "reshape"; type = "reshape";
else if (test_bit(MD_RECOVERY_SYNC, &recovery)) { else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
...@@ -4851,11 +4867,11 @@ action_store(struct mddev *mddev, const char *page, size_t len) ...@@ -4851,11 +4867,11 @@ action_store(struct mddev *mddev, const char *page, size_t len)
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
set_bit(MD_RECOVERY_SYNC, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
} }
if (mddev->ro == 2) { if (mddev->ro == MD_AUTO_READ) {
/* A write to sync_action is enough to justify /* A write to sync_action is enough to justify
* canceling read-auto mode * canceling read-auto mode
*/ */
mddev->ro = 0; mddev->ro = MD_RDWR;
md_wakeup_thread(mddev->sync_thread); md_wakeup_thread(mddev->sync_thread);
} }
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
...@@ -5083,8 +5099,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -5083,8 +5099,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len)
goto out_unlock; goto out_unlock;
err = -EBUSY; err = -EBUSY;
if (max < mddev->resync_max && if (max < mddev->resync_max && md_is_rdwr(mddev) &&
mddev->ro == 0 &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
goto out_unlock; goto out_unlock;
...@@ -5813,8 +5828,8 @@ int md_run(struct mddev *mddev) ...@@ -5813,8 +5828,8 @@ int md_run(struct mddev *mddev)
continue; continue;
sync_blockdev(rdev->bdev); sync_blockdev(rdev->bdev);
invalidate_bdev(rdev->bdev); invalidate_bdev(rdev->bdev);
if (mddev->ro != 1 && rdev_read_only(rdev)) { if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) {
mddev->ro = 1; mddev->ro = MD_RDONLY;
if (mddev->gendisk) if (mddev->gendisk)
set_disk_ro(mddev->gendisk, 1); set_disk_ro(mddev->gendisk, 1);
} }
...@@ -5917,8 +5932,8 @@ int md_run(struct mddev *mddev) ...@@ -5917,8 +5932,8 @@ int md_run(struct mddev *mddev)
mddev->ok_start_degraded = start_dirty_degraded; mddev->ok_start_degraded = start_dirty_degraded;
if (start_readonly && mddev->ro == 0) if (start_readonly && md_is_rdwr(mddev))
mddev->ro = 2; /* read-only, but switch on first write */ mddev->ro = MD_AUTO_READ; /* read-only, but switch on first write */
err = pers->run(mddev); err = pers->run(mddev);
if (err) if (err)
...@@ -5996,8 +6011,8 @@ int md_run(struct mddev *mddev) ...@@ -5996,8 +6011,8 @@ int md_run(struct mddev *mddev)
mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action"); mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed"); mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded"); mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
} else if (mddev->ro == 2) /* auto-readonly not meaningful */ } else if (mddev->ro == MD_AUTO_READ)
mddev->ro = 0; mddev->ro = MD_RDWR;
atomic_set(&mddev->max_corr_read_errors, atomic_set(&mddev->max_corr_read_errors,
MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
...@@ -6015,7 +6030,7 @@ int md_run(struct mddev *mddev) ...@@ -6015,7 +6030,7 @@ int md_run(struct mddev *mddev)
if (rdev->raid_disk >= 0) if (rdev->raid_disk >= 0)
sysfs_link_rdev(mddev, rdev); /* failure here is OK */ sysfs_link_rdev(mddev, rdev); /* failure here is OK */
if (mddev->degraded && !mddev->ro) if (mddev->degraded && md_is_rdwr(mddev))
/* This ensures that recovering status is reported immediately /* This ensures that recovering status is reported immediately
* via sysfs - until a lack of spares is confirmed. * via sysfs - until a lack of spares is confirmed.
*/ */
...@@ -6105,7 +6120,7 @@ static int restart_array(struct mddev *mddev) ...@@ -6105,7 +6120,7 @@ static int restart_array(struct mddev *mddev)
return -ENXIO; return -ENXIO;
if (!mddev->pers) if (!mddev->pers)
return -EINVAL; return -EINVAL;
if (!mddev->ro) if (md_is_rdwr(mddev))
return -EBUSY; return -EBUSY;
rcu_read_lock(); rcu_read_lock();
...@@ -6124,7 +6139,7 @@ static int restart_array(struct mddev *mddev) ...@@ -6124,7 +6139,7 @@ static int restart_array(struct mddev *mddev)
return -EROFS; return -EROFS;
mddev->safemode = 0; mddev->safemode = 0;
mddev->ro = 0; mddev->ro = MD_RDWR;
set_disk_ro(disk, 0); set_disk_ro(disk, 0);
pr_debug("md: %s switched to read-write mode.\n", mdname(mddev)); pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
/* Kick recovery or resync if necessary */ /* Kick recovery or resync if necessary */
...@@ -6151,7 +6166,7 @@ static void md_clean(struct mddev *mddev) ...@@ -6151,7 +6166,7 @@ static void md_clean(struct mddev *mddev)
mddev->clevel[0] = 0; mddev->clevel[0] = 0;
mddev->flags = 0; mddev->flags = 0;
mddev->sb_flags = 0; mddev->sb_flags = 0;
mddev->ro = 0; mddev->ro = MD_RDWR;
mddev->metadata_type[0] = 0; mddev->metadata_type[0] = 0;
mddev->chunk_sectors = 0; mddev->chunk_sectors = 0;
mddev->ctime = mddev->utime = 0; mddev->ctime = mddev->utime = 0;
...@@ -6203,7 +6218,7 @@ static void __md_stop_writes(struct mddev *mddev) ...@@ -6203,7 +6218,7 @@ static void __md_stop_writes(struct mddev *mddev)
} }
md_bitmap_flush(mddev); md_bitmap_flush(mddev);
if (mddev->ro == 0 && if (md_is_rdwr(mddev) &&
((!mddev->in_sync && !mddev_is_clustered(mddev)) || ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
mddev->sb_flags)) { mddev->sb_flags)) {
/* mark array as shutdown cleanly */ /* mark array as shutdown cleanly */
...@@ -6312,9 +6327,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) ...@@ -6312,9 +6327,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
__md_stop_writes(mddev); __md_stop_writes(mddev);
err = -ENXIO; err = -ENXIO;
if (mddev->ro==1) if (mddev->ro == MD_RDONLY)
goto out; goto out;
mddev->ro = 1; mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1); set_disk_ro(mddev->gendisk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
...@@ -6371,7 +6386,7 @@ static int do_md_stop(struct mddev *mddev, int mode, ...@@ -6371,7 +6386,7 @@ static int do_md_stop(struct mddev *mddev, int mode,
return -EBUSY; return -EBUSY;
} }
if (mddev->pers) { if (mddev->pers) {
if (mddev->ro) if (!md_is_rdwr(mddev))
set_disk_ro(disk, 0); set_disk_ro(disk, 0);
__md_stop_writes(mddev); __md_stop_writes(mddev);
...@@ -6388,8 +6403,8 @@ static int do_md_stop(struct mddev *mddev, int mode, ...@@ -6388,8 +6403,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
mutex_unlock(&mddev->open_mutex); mutex_unlock(&mddev->open_mutex);
mddev->changed = 1; mddev->changed = 1;
if (mddev->ro) if (!md_is_rdwr(mddev))
mddev->ro = 0; mddev->ro = MD_RDWR;
} else } else
mutex_unlock(&mddev->open_mutex); mutex_unlock(&mddev->open_mutex);
/* /*
...@@ -7204,7 +7219,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) ...@@ -7204,7 +7219,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
mddev->sync_thread) mddev->sync_thread)
return -EBUSY; return -EBUSY;
if (mddev->ro) if (!md_is_rdwr(mddev))
return -EROFS; return -EROFS;
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
...@@ -7234,7 +7249,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) ...@@ -7234,7 +7249,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
/* change the number of raid disks */ /* change the number of raid disks */
if (mddev->pers->check_reshape == NULL) if (mddev->pers->check_reshape == NULL)
return -EINVAL; return -EINVAL;
if (mddev->ro) if (!md_is_rdwr(mddev))
return -EROFS; return -EROFS;
if (raid_disks <= 0 || if (raid_disks <= 0 ||
(mddev->max_disks && raid_disks >= mddev->max_disks)) (mddev->max_disks && raid_disks >= mddev->max_disks))
...@@ -7464,6 +7479,40 @@ static inline bool md_ioctl_valid(unsigned int cmd) ...@@ -7464,6 +7479,40 @@ static inline bool md_ioctl_valid(unsigned int cmd)
} }
} }
static int __md_set_array_info(struct mddev *mddev, void __user *argp)
{
mdu_array_info_t info;
int err;
if (!argp)
memset(&info, 0, sizeof(info));
else if (copy_from_user(&info, argp, sizeof(info)))
return -EFAULT;
if (mddev->pers) {
err = update_array_info(mddev, &info);
if (err)
pr_warn("md: couldn't update array info. %d\n", err);
return err;
}
if (!list_empty(&mddev->disks)) {
pr_warn("md: array %s already has disks!\n", mdname(mddev));
return -EBUSY;
}
if (mddev->raid_disks) {
pr_warn("md: array %s already initialised!\n", mdname(mddev));
return -EBUSY;
}
err = md_set_array_info(mddev, &info);
if (err)
pr_warn("md: couldn't set array info. %d\n", err);
return err;
}
static int md_ioctl(struct block_device *bdev, fmode_t mode, static int md_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg) unsigned int cmd, unsigned long arg)
{ {
...@@ -7569,36 +7618,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -7569,36 +7618,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
} }
if (cmd == SET_ARRAY_INFO) { if (cmd == SET_ARRAY_INFO) {
mdu_array_info_t info; err = __md_set_array_info(mddev, argp);
if (!arg)
memset(&info, 0, sizeof(info));
else if (copy_from_user(&info, argp, sizeof(info))) {
err = -EFAULT;
goto unlock;
}
if (mddev->pers) {
err = update_array_info(mddev, &info);
if (err) {
pr_warn("md: couldn't update array info. %d\n", err);
goto unlock;
}
goto unlock;
}
if (!list_empty(&mddev->disks)) {
pr_warn("md: array %s already has disks!\n", mdname(mddev));
err = -EBUSY;
goto unlock;
}
if (mddev->raid_disks) {
pr_warn("md: array %s already initialised!\n", mdname(mddev));
err = -EBUSY;
goto unlock;
}
err = md_set_array_info(mddev, &info);
if (err) {
pr_warn("md: couldn't set array info. %d\n", err);
goto unlock;
}
goto unlock; goto unlock;
} }
...@@ -7658,26 +7678,25 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -7658,26 +7678,25 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
* The remaining ioctls are changing the state of the * The remaining ioctls are changing the state of the
* superblock, so we do not allow them on read-only arrays. * superblock, so we do not allow them on read-only arrays.
*/ */
if (mddev->ro && mddev->pers) { if (!md_is_rdwr(mddev) && mddev->pers) {
if (mddev->ro == 2) { if (mddev->ro != MD_AUTO_READ) {
mddev->ro = 0;
sysfs_notify_dirent_safe(mddev->sysfs_state);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
/* mddev_unlock will wake thread */
/* If a device failed while we were read-only, we
* need to make sure the metadata is updated now.
*/
if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
mddev_unlock(mddev);
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
}
} else {
err = -EROFS; err = -EROFS;
goto unlock; goto unlock;
} }
mddev->ro = MD_RDWR;
sysfs_notify_dirent_safe(mddev->sysfs_state);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
/* mddev_unlock will wake thread */
/* If a device failed while we were read-only, we
* need to make sure the metadata is updated now.
*/
if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
mddev_unlock(mddev);
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
}
} }
switch (cmd) { switch (cmd) {
...@@ -7763,11 +7782,11 @@ static int md_set_read_only(struct block_device *bdev, bool ro) ...@@ -7763,11 +7782,11 @@ static int md_set_read_only(struct block_device *bdev, bool ro)
* Transitioning to read-auto need only happen for arrays that call * Transitioning to read-auto need only happen for arrays that call
* md_write_start and which are not ready for writes yet. * md_write_start and which are not ready for writes yet.
*/ */
if (!ro && mddev->ro == 1 && mddev->pers) { if (!ro && mddev->ro == MD_RDONLY && mddev->pers) {
err = restart_array(mddev); err = restart_array(mddev);
if (err) if (err)
goto out_unlock; goto out_unlock;
mddev->ro = 2; mddev->ro = MD_AUTO_READ;
} }
out_unlock: out_unlock:
...@@ -8241,9 +8260,9 @@ static int md_seq_show(struct seq_file *seq, void *v) ...@@ -8241,9 +8260,9 @@ static int md_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%s : %sactive", mdname(mddev), seq_printf(seq, "%s : %sactive", mdname(mddev),
mddev->pers ? "" : "in"); mddev->pers ? "" : "in");
if (mddev->pers) { if (mddev->pers) {
if (mddev->ro==1) if (mddev->ro == MD_RDONLY)
seq_printf(seq, " (read-only)"); seq_printf(seq, " (read-only)");
if (mddev->ro==2) if (mddev->ro == MD_AUTO_READ)
seq_printf(seq, " (auto-read-only)"); seq_printf(seq, " (auto-read-only)");
seq_printf(seq, " %s", mddev->pers->name); seq_printf(seq, " %s", mddev->pers->name);
} }
...@@ -8502,10 +8521,10 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) ...@@ -8502,10 +8521,10 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
if (bio_data_dir(bi) != WRITE) if (bio_data_dir(bi) != WRITE)
return true; return true;
BUG_ON(mddev->ro == 1); BUG_ON(mddev->ro == MD_RDONLY);
if (mddev->ro == 2) { if (mddev->ro == MD_AUTO_READ) {
/* need to switch to read/write */ /* need to switch to read/write */
mddev->ro = 0; mddev->ro = MD_RDWR;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); md_wakeup_thread(mddev->sync_thread);
...@@ -8556,7 +8575,7 @@ void md_write_inc(struct mddev *mddev, struct bio *bi) ...@@ -8556,7 +8575,7 @@ void md_write_inc(struct mddev *mddev, struct bio *bi)
{ {
if (bio_data_dir(bi) != WRITE) if (bio_data_dir(bi) != WRITE)
return; return;
WARN_ON_ONCE(mddev->in_sync || mddev->ro); WARN_ON_ONCE(mddev->in_sync || !md_is_rdwr(mddev));
percpu_ref_get(&mddev->writes_pending); percpu_ref_get(&mddev->writes_pending);
} }
EXPORT_SYMBOL(md_write_inc); EXPORT_SYMBOL(md_write_inc);
...@@ -8661,7 +8680,7 @@ void md_allow_write(struct mddev *mddev) ...@@ -8661,7 +8680,7 @@ void md_allow_write(struct mddev *mddev)
{ {
if (!mddev->pers) if (!mddev->pers)
return; return;
if (mddev->ro) if (!md_is_rdwr(mddev))
return; return;
if (!mddev->pers->sync_request) if (!mddev->pers->sync_request)
return; return;
...@@ -8709,7 +8728,7 @@ void md_do_sync(struct md_thread *thread) ...@@ -8709,7 +8728,7 @@ void md_do_sync(struct md_thread *thread)
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) || if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
test_bit(MD_RECOVERY_WAIT, &mddev->recovery)) test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
return; return;
if (mddev->ro) {/* never try to sync a read-only array */ if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
return; return;
} }
...@@ -9178,9 +9197,9 @@ static int remove_and_add_spares(struct mddev *mddev, ...@@ -9178,9 +9197,9 @@ static int remove_and_add_spares(struct mddev *mddev,
if (test_bit(Faulty, &rdev->flags)) if (test_bit(Faulty, &rdev->flags))
continue; continue;
if (!test_bit(Journal, &rdev->flags)) { if (!test_bit(Journal, &rdev->flags)) {
if (mddev->ro && if (!md_is_rdwr(mddev) &&
! (rdev->saved_raid_disk >= 0 && !(rdev->saved_raid_disk >= 0 &&
!test_bit(Bitmap_sync, &rdev->flags))) !test_bit(Bitmap_sync, &rdev->flags)))
continue; continue;
rdev->recovery_offset = 0; rdev->recovery_offset = 0;
...@@ -9278,7 +9297,8 @@ void md_check_recovery(struct mddev *mddev) ...@@ -9278,7 +9297,8 @@ void md_check_recovery(struct mddev *mddev)
flush_signals(current); flush_signals(current);
} }
if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) if (!md_is_rdwr(mddev) &&
!test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return; return;
if ( ! ( if ( ! (
(mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) || (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
...@@ -9297,7 +9317,7 @@ void md_check_recovery(struct mddev *mddev) ...@@ -9297,7 +9317,7 @@ void md_check_recovery(struct mddev *mddev)
if (!mddev->external && mddev->safemode == 1) if (!mddev->external && mddev->safemode == 1)
mddev->safemode = 0; mddev->safemode = 0;
if (mddev->ro) { if (!md_is_rdwr(mddev)) {
struct md_rdev *rdev; struct md_rdev *rdev;
if (!mddev->external && mddev->in_sync) if (!mddev->external && mddev->in_sync)
/* 'Blocked' flag not needed as failed devices /* 'Blocked' flag not needed as failed devices
......
...@@ -398,7 +398,6 @@ static int raid0_run(struct mddev *mddev) ...@@ -398,7 +398,6 @@ static int raid0_run(struct mddev *mddev)
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
blk_queue_io_opt(mddev->queue, blk_queue_io_opt(mddev->queue,
......
...@@ -3159,6 +3159,7 @@ static int raid1_run(struct mddev *mddev) ...@@ -3159,6 +3159,7 @@ static int raid1_run(struct mddev *mddev)
* RAID1 needs at least one disk in active * RAID1 needs at least one disk in active
*/ */
if (conf->raid_disks - mddev->degraded < 1) { if (conf->raid_disks - mddev->degraded < 1) {
md_unregister_thread(&conf->thread);
ret = -EINVAL; ret = -EINVAL;
goto abort; goto abort;
} }
......
...@@ -4145,8 +4145,6 @@ static int raid10_run(struct mddev *mddev) ...@@ -4145,8 +4145,6 @@ static int raid10_run(struct mddev *mddev)
conf->thread = NULL; conf->thread = NULL;
if (mddev->queue) { if (mddev->queue) {
blk_queue_max_discard_sectors(mddev->queue,
UINT_MAX);
blk_queue_max_write_zeroes_sectors(mddev->queue, 0); blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
raid10_set_io_opt(conf); raid10_set_io_opt(conf);
......
...@@ -1565,11 +1565,12 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space) ...@@ -1565,11 +1565,12 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
if (!log) if (!log)
return; return;
target = READ_ONCE(log->reclaim_target);
do { do {
target = log->reclaim_target;
if (new < target) if (new < target)
return; return;
} while (cmpxchg(&log->reclaim_target, target, new) != target); } while (!try_cmpxchg(&log->reclaim_target, &target, new));
md_wakeup_thread(log->reclaim_thread); md_wakeup_thread(log->reclaim_thread);
} }
...@@ -3061,7 +3062,6 @@ void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev) ...@@ -3061,7 +3062,6 @@ void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
{ {
struct request_queue *q = bdev_get_queue(rdev->bdev);
struct r5l_log *log; struct r5l_log *log;
int ret; int ret;
...@@ -3090,9 +3090,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) ...@@ -3090,9 +3090,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
if (!log) if (!log)
return -ENOMEM; return -ENOMEM;
log->rdev = rdev; log->rdev = rdev;
log->need_cache_flush = bdev_write_cache(rdev->bdev);
log->need_cache_flush = test_bit(QUEUE_FLAG_WC, &q->queue_flags) != 0;
log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid, log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid,
sizeof(rdev->mddev->uuid)); sizeof(rdev->mddev->uuid));
......
...@@ -1301,8 +1301,6 @@ static int ppl_validate_rdev(struct md_rdev *rdev) ...@@ -1301,8 +1301,6 @@ static int ppl_validate_rdev(struct md_rdev *rdev)
static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev) static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
{ {
struct request_queue *q;
if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE + if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
PPL_HEADER_SIZE) * 2) { PPL_HEADER_SIZE) * 2) {
log->use_multippl = true; log->use_multippl = true;
...@@ -1316,8 +1314,7 @@ static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev) ...@@ -1316,8 +1314,7 @@ static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
} }
log->next_io_sector = rdev->ppl.sector; log->next_io_sector = rdev->ppl.sector;
q = bdev_get_queue(rdev->bdev); if (bdev_write_cache(rdev->bdev))
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
log->wb_cache_on = true; log->wb_cache_on = true;
} }
......
...@@ -10,17 +10,9 @@ ...@@ -10,17 +10,9 @@
#ifdef __KERNEL__ #ifdef __KERNEL__
/* Set to 1 to use kernel-wide empty_zero_page */
#define RAID6_USE_EMPTY_ZERO_PAGE 0
#include <linux/blkdev.h> #include <linux/blkdev.h>
/* We need a pre-zeroed page... if we don't want to use the kernel-provided
one define it here */
#if RAID6_USE_EMPTY_ZERO_PAGE
# define raid6_empty_zero_page empty_zero_page
#else
extern const char raid6_empty_zero_page[PAGE_SIZE]; extern const char raid6_empty_zero_page[PAGE_SIZE];
#endif
#else /* ! __KERNEL__ */ #else /* ! __KERNEL__ */
/* Used for testing in user space */ /* Used for testing in user space */
......
...@@ -18,12 +18,10 @@ ...@@ -18,12 +18,10 @@
#else #else
#include <linux/module.h> #include <linux/module.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#if !RAID6_USE_EMPTY_ZERO_PAGE
/* In .bss so it's zeroed */ /* In .bss so it's zeroed */
const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
EXPORT_SYMBOL(raid6_empty_zero_page); EXPORT_SYMBOL(raid6_empty_zero_page);
#endif #endif
#endif
struct raid6_calls raid6_call; struct raid6_calls raid6_call;
EXPORT_SYMBOL_GPL(raid6_call); EXPORT_SYMBOL_GPL(raid6_call);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册