提交 5d8e7fb6 编写于 作者: L Linus Torvalds

Merge tag 'md/3.20' of git://neil.brown.name/md

Pull md updates from Neil Brown:

 - assorted locking changes so that access to /proc/mdstat
   and much of /sys/block/mdXX/md/* is protected by a spinlock
   rather than a mutex and will never block indefinitely.

 - Make an 'if' condition in RAID5 - which has been implicated
   in recent bugs - more readable.

 - misc minor fixes

* tag 'md/3.20' of git://neil.brown.name/md: (28 commits)
  md/raid10: fix conversion from RAID0 to RAID10
  md: wakeup thread upon rdev_dec_pending()
  md: make reconfig_mutex optional for writes to md sysfs files.
  md: move mddev_lock and related to md.h
  md: use mddev->lock to protect updates to resync_{min,max}.
  md: minor cleanup in safe_delay_store.
  md: move GET_BITMAP_FILE ioctl out from mddev_lock.
  md: tidy up set_bitmap_file
  md: remove unnecessary 'buf' from get_bitmap_file.
  md: remove mddev_lock from rdev_attr_show()
  md: remove mddev_lock() from md_attr_show()
  md/raid5: use ->lock to protect accessing raid5 sysfs attributes.
  md: remove need for mddev_lock() in md_seq_show()
  md/bitmap: protect clearing of ->bitmap by mddev->lock
  md: protect ->pers changes with mddev->lock
  md: level_store: group all important changes into one place.
  md: rename ->stop to ->free
  md: split detach operation out from ->stop.
  md/linear: remove rcu protections in favour of suspend/resume
  md: make merge_bvec_fn more robust in face of personality changes.
  ...
......@@ -148,6 +148,7 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
# does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
......
......@@ -1619,7 +1619,9 @@ void bitmap_destroy(struct mddev *mddev)
return;
mutex_lock(&mddev->bitmap_info.mutex);
spin_lock(&mddev->lock);
mddev->bitmap = NULL; /* disconnect from the md device */
spin_unlock(&mddev->lock);
mutex_unlock(&mddev->bitmap_info.mutex);
if (mddev->thread)
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
......@@ -2209,11 +2211,13 @@ __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
static ssize_t can_clear_show(struct mddev *mddev, char *page)
{
int len;
spin_lock(&mddev->lock);
if (mddev->bitmap)
len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
"false" : "true"));
else
len = sprintf(page, "\n");
spin_unlock(&mddev->lock);
return len;
}
......@@ -2238,10 +2242,15 @@ __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
static ssize_t
behind_writes_used_show(struct mddev *mddev, char *page)
{
ssize_t ret;
spin_lock(&mddev->lock);
if (mddev->bitmap == NULL)
return sprintf(page, "0\n");
return sprintf(page, "%lu\n",
mddev->bitmap->behind_writes_used);
ret = sprintf(page, "0\n");
else
ret = sprintf(page, "%lu\n",
mddev->bitmap->behind_writes_used);
spin_unlock(&mddev->lock);
return ret;
}
static ssize_t
......
......@@ -746,13 +746,7 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
{
struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
if (rs->raid_type->level == 1)
return md_raid1_congested(&rs->md, bits);
if (rs->raid_type->level == 10)
return md_raid10_congested(&rs->md, bits);
return md_raid5_congested(&rs->md, bits);
return mddev_congested(&rs->md, bits);
}
/*
......
......@@ -332,13 +332,11 @@ static int run(struct mddev *mddev)
return 0;
}
static int stop(struct mddev *mddev)
static void faulty_free(struct mddev *mddev, void *priv)
{
struct faulty_conf *conf = mddev->private;
struct faulty_conf *conf = priv;
kfree(conf);
mddev->private = NULL;
return 0;
}
static struct md_personality faulty_personality =
......@@ -348,7 +346,7 @@ static struct md_personality faulty_personality =
.owner = THIS_MODULE,
.make_request = make_request,
.run = run,
.stop = stop,
.free = faulty_free,
.status = status,
.check_reshape = reshape,
.size = faulty_size,
......
......@@ -34,7 +34,7 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
lo = 0;
hi = mddev->raid_disks - 1;
conf = rcu_dereference(mddev->private);
conf = mddev->private;
/*
* Binary Search
......@@ -60,18 +60,16 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
*
* Return amount of bytes we can take at this offset
*/
static int linear_mergeable_bvec(struct request_queue *q,
static int linear_mergeable_bvec(struct mddev *mddev,
struct bvec_merge_data *bvm,
struct bio_vec *biovec)
{
struct mddev *mddev = q->queuedata;
struct dev_info *dev0;
unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int maxbytes = biovec->bv_len;
struct request_queue *subq;
rcu_read_lock();
dev0 = which_dev(mddev, sector);
maxsectors = dev0->end_sector - sector;
subq = bdev_get_queue(dev0->rdev->bdev);
......@@ -81,7 +79,6 @@ static int linear_mergeable_bvec(struct request_queue *q,
maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm,
biovec));
}
rcu_read_unlock();
if (maxsectors < bio_sectors)
maxsectors = 0;
......@@ -97,24 +94,18 @@ static int linear_mergeable_bvec(struct request_queue *q,
return maxsectors << 9;
}
static int linear_congested(void *data, int bits)
static int linear_congested(struct mddev *mddev, int bits)
{
struct mddev *mddev = data;
struct linear_conf *conf;
int i, ret = 0;
if (mddev_congested(mddev, bits))
return 1;
rcu_read_lock();
conf = rcu_dereference(mddev->private);
conf = mddev->private;
for (i = 0; i < mddev->raid_disks && !ret ; i++) {
struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
ret |= bdi_congested(&q->backing_dev_info, bits);
}
rcu_read_unlock();
return ret;
}
......@@ -123,12 +114,10 @@ static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disk
struct linear_conf *conf;
sector_t array_sectors;
rcu_read_lock();
conf = rcu_dereference(mddev->private);
conf = mddev->private;
WARN_ONCE(sectors || raid_disks,
"%s does not support generic reshape\n", __func__);
array_sectors = conf->array_sectors;
rcu_read_unlock();
return array_sectors;
}
......@@ -217,10 +206,6 @@ static int linear_run (struct mddev *mddev)
mddev->private = conf;
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->backing_dev_info.congested_fn = linear_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
ret = md_integrity_register(mddev);
if (ret) {
kfree(conf);
......@@ -252,38 +237,23 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
if (!newconf)
return -ENOMEM;
oldconf = rcu_dereference_protected(mddev->private,
lockdep_is_held(
&mddev->reconfig_mutex));
mddev_suspend(mddev);
oldconf = mddev->private;
mddev->raid_disks++;
rcu_assign_pointer(mddev->private, newconf);
mddev->private = newconf;
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors);
mddev_resume(mddev);
revalidate_disk(mddev->gendisk);
kfree_rcu(oldconf, rcu);
kfree(oldconf);
return 0;
}
static int linear_stop (struct mddev *mddev)
static void linear_free(struct mddev *mddev, void *priv)
{
struct linear_conf *conf =
rcu_dereference_protected(mddev->private,
lockdep_is_held(
&mddev->reconfig_mutex));
struct linear_conf *conf = priv;
/*
* We do not require rcu protection here since
* we hold reconfig_mutex for both linear_add and
* linear_stop, so they cannot race.
* We should make sure any old 'conf's are properly
* freed though.
*/
rcu_barrier();
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
kfree(conf);
mddev->private = NULL;
return 0;
}
static void linear_make_request(struct mddev *mddev, struct bio *bio)
......@@ -299,16 +269,12 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
}
do {
rcu_read_lock();
tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
end_sector = tmp_dev->end_sector;
data_offset = tmp_dev->rdev->data_offset;
bio->bi_bdev = tmp_dev->rdev->bdev;
rcu_read_unlock();
if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
bio->bi_iter.bi_sector < start_sector))
goto out_of_bounds;
......@@ -355,6 +321,10 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
}
static void linear_quiesce(struct mddev *mddev, int state)
{
}
static struct md_personality linear_personality =
{
.name = "linear",
......@@ -362,10 +332,13 @@ static struct md_personality linear_personality =
.owner = THIS_MODULE,
.make_request = linear_make_request,
.run = linear_run,
.stop = linear_stop,
.free = linear_free,
.status = linear_status,
.hot_add_disk = linear_add,
.size = linear_size,
.quiesce = linear_quiesce,
.congested = linear_congested,
.mergeable_bvec = linear_mergeable_bvec,
};
static int __init linear_init (void)
......
此差异已折叠。
......@@ -386,7 +386,18 @@ struct mddev {
struct work_struct del_work; /* used for delayed sysfs removal */
spinlock_t write_lock;
/* "lock" protects:
* flush_bio transition from NULL to !NULL
* rdev superblocks, events
* clearing MD_CHANGE_*
* in_sync - and related safemode and MD_CHANGE changes
* pers (also protected by reconfig_mutex and pending IO).
* clearing ->bitmap
* clearing ->bitmap_info.file
* changing ->resync_{min,max}
* setting MD_RECOVERY_RUNNING (which interacts with resync_{min,max})
*/
spinlock_t lock;
wait_queue_head_t sb_wait; /* for waiting on superblock updates */
atomic_t pending_writes; /* number of active superblock writes */
......@@ -439,13 +450,30 @@ struct mddev {
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
};
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
static inline int __must_check mddev_lock(struct mddev *mddev)
{
int faulty = test_bit(Faulty, &rdev->flags);
if (atomic_dec_and_test(&rdev->nr_pending) && faulty)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
return mutex_lock_interruptible(&mddev->reconfig_mutex);
}
/* Sometimes we need to take the lock in a situation where
* failure due to interrupts is not acceptable.
*/
static inline void mddev_lock_nointr(struct mddev *mddev)
{
mutex_lock(&mddev->reconfig_mutex);
}
static inline int mddev_is_locked(struct mddev *mddev)
{
return mutex_is_locked(&mddev->reconfig_mutex);
}
static inline int mddev_trylock(struct mddev *mddev)
{
return mutex_trylock(&mddev->reconfig_mutex);
}
extern void mddev_unlock(struct mddev *mddev);
static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
{
atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
......@@ -459,7 +487,7 @@ struct md_personality
struct module *owner;
void (*make_request)(struct mddev *mddev, struct bio *bio);
int (*run)(struct mddev *mddev);
int (*stop)(struct mddev *mddev);
void (*free)(struct mddev *mddev, void *priv);
void (*status)(struct seq_file *seq, struct mddev *mddev);
/* error_handler must set ->faulty and clear ->in_sync
* if appropriate, and should abort recovery if needed
......@@ -490,6 +518,13 @@ struct md_personality
* array.
*/
void *(*takeover) (struct mddev *mddev);
/* congested implements bdi.congested_fn().
* Will not be called while array is 'suspended' */
int (*congested)(struct mddev *mddev, int bits);
/* mergeable_bvec is use to implement ->merge_bvec_fn */
int (*mergeable_bvec)(struct mddev *mddev,
struct bvec_merge_data *bvm,
struct bio_vec *biovec);
};
struct md_sysfs_entry {
......@@ -624,4 +659,14 @@ static inline int mddev_check_plugged(struct mddev *mddev)
return !!blk_check_plugged(md_unplug, mddev,
sizeof(struct blk_plug_cb));
}
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
{
int faulty = test_bit(Faulty, &rdev->flags);
if (atomic_dec_and_test(&rdev->nr_pending) && faulty) {
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
}
#endif /* _MD_MD_H */
......@@ -153,15 +153,11 @@ static void multipath_status (struct seq_file *seq, struct mddev *mddev)
seq_printf (seq, "]");
}
static int multipath_congested(void *data, int bits)
static int multipath_congested(struct mddev *mddev, int bits)
{
struct mddev *mddev = data;
struct mpconf *conf = mddev->private;
int i, ret = 0;
if (mddev_congested(mddev, bits))
return 1;
rcu_read_lock();
for (i = 0; i < mddev->raid_disks ; i++) {
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
......@@ -403,7 +399,7 @@ static int multipath_run (struct mddev *mddev)
/*
* copy the already verified devices into our private MULTIPATH
* bookkeeping area. [whatever we allocate in multipath_run(),
* should be freed in multipath_stop()]
* should be freed in multipath_free()]
*/
conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL);
......@@ -489,9 +485,6 @@ static int multipath_run (struct mddev *mddev)
*/
md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
if (md_integrity_register(mddev))
goto out_free_conf;
......@@ -507,17 +500,13 @@ static int multipath_run (struct mddev *mddev)
return -EIO;
}
static int multipath_stop (struct mddev *mddev)
static void multipath_free(struct mddev *mddev, void *priv)
{
struct mpconf *conf = mddev->private;
struct mpconf *conf = priv;
md_unregister_thread(&mddev->thread);
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
mempool_destroy(conf->pool);
kfree(conf->multipaths);
kfree(conf);
mddev->private = NULL;
return 0;
}
static struct md_personality multipath_personality =
......@@ -527,12 +516,13 @@ static struct md_personality multipath_personality =
.owner = THIS_MODULE,
.make_request = multipath_make_request,
.run = multipath_run,
.stop = multipath_stop,
.free = multipath_free,
.status = multipath_status,
.error_handler = multipath_error,
.hot_add_disk = multipath_add_disk,
.hot_remove_disk= multipath_remove_disk,
.size = multipath_size,
.congested = multipath_congested,
};
static int __init multipath_init (void)
......
......@@ -25,17 +25,13 @@
#include "raid0.h"
#include "raid5.h"
static int raid0_congested(void *data, int bits)
static int raid0_congested(struct mddev *mddev, int bits)
{
struct mddev *mddev = data;
struct r0conf *conf = mddev->private;
struct md_rdev **devlist = conf->devlist;
int raid_disks = conf->strip_zone[0].nb_dev;
int i, ret = 0;
if (mddev_congested(mddev, bits))
return 1;
for (i = 0; i < raid_disks && !ret ; i++) {
struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
......@@ -263,8 +259,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
mdname(mddev),
(unsigned long long)smallest->sectors);
}
mddev->queue->backing_dev_info.congested_fn = raid0_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
/*
* now since we have the hard sector sizes, we can make sure
......@@ -356,17 +350,16 @@ static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
/**
* raid0_mergeable_bvec -- tell bio layer if two requests can be merged
* @q: request queue
* @mddev: the md device
* @bvm: properties of new bio
* @biovec: the request that could be merged to it.
*
* Return amount of bytes we can accept at this offset
*/
static int raid0_mergeable_bvec(struct request_queue *q,
static int raid0_mergeable_bvec(struct mddev *mddev,
struct bvec_merge_data *bvm,
struct bio_vec *biovec)
{
struct mddev *mddev = q->queuedata;
struct r0conf *conf = mddev->private;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
sector_t sector_offset = sector;
......@@ -422,7 +415,7 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
return array_sectors;
}
static int raid0_stop(struct mddev *mddev);
static void raid0_free(struct mddev *mddev, void *priv);
static int raid0_run(struct mddev *mddev)
{
......@@ -471,26 +464,22 @@ static int raid0_run(struct mddev *mddev)
mddev->queue->backing_dev_info.ra_pages = 2* stripe;
}
blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
dump_zones(mddev);
ret = md_integrity_register(mddev);
if (ret)
raid0_stop(mddev);
raid0_free(mddev, conf);
return ret;
}
static int raid0_stop(struct mddev *mddev)
static void raid0_free(struct mddev *mddev, void *priv)
{
struct r0conf *conf = mddev->private;
struct r0conf *conf = priv;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
kfree(conf->strip_zone);
kfree(conf->devlist);
kfree(conf);
mddev->private = NULL;
return 0;
}
/*
......@@ -724,11 +713,13 @@ static struct md_personality raid0_personality=
.owner = THIS_MODULE,
.make_request = raid0_make_request,
.run = raid0_run,
.stop = raid0_stop,
.free = raid0_free,
.status = raid0_status,
.size = raid0_size,
.takeover = raid0_takeover,
.quiesce = raid0_quiesce,
.congested = raid0_congested,
.mergeable_bvec = raid0_mergeable_bvec,
};
static int __init raid0_init (void)
......
......@@ -701,11 +701,10 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
return best_disk;
}
static int raid1_mergeable_bvec(struct request_queue *q,
static int raid1_mergeable_bvec(struct mddev *mddev,
struct bvec_merge_data *bvm,
struct bio_vec *biovec)
{
struct mddev *mddev = q->queuedata;
struct r1conf *conf = mddev->private;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max = biovec->bv_len;
......@@ -734,7 +733,7 @@ static int raid1_mergeable_bvec(struct request_queue *q,
}
int md_raid1_congested(struct mddev *mddev, int bits)
static int raid1_congested(struct mddev *mddev, int bits)
{
struct r1conf *conf = mddev->private;
int i, ret = 0;
......@@ -763,15 +762,6 @@ int md_raid1_congested(struct mddev *mddev, int bits)
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(md_raid1_congested);
static int raid1_congested(void *data, int bits)
{
struct mddev *mddev = data;
return mddev_congested(mddev, bits) ||
md_raid1_congested(mddev, bits);
}
static void flush_pending_writes(struct r1conf *conf)
{
......@@ -2882,7 +2872,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
return ERR_PTR(err);
}
static int stop(struct mddev *mddev);
static void raid1_free(struct mddev *mddev, void *priv);
static int run(struct mddev *mddev)
{
struct r1conf *conf;
......@@ -2904,7 +2894,7 @@ static int run(struct mddev *mddev)
/*
* copy the already verified devices into our private RAID1
* bookkeeping area. [whatever we allocate in run(),
* should be freed in stop()]
* should be freed in raid1_free()]
*/
if (mddev->private == NULL)
conf = setup_conf(mddev);
......@@ -2955,10 +2945,6 @@ static int run(struct mddev *mddev)
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
if (mddev->queue) {
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
if (discard_supported)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
mddev->queue);
......@@ -2968,37 +2954,23 @@ static int run(struct mddev *mddev)
}
ret = md_integrity_register(mddev);
if (ret)
stop(mddev);
if (ret) {
md_unregister_thread(&mddev->thread);
raid1_free(mddev, conf);
}
return ret;
}
static int stop(struct mddev *mddev)
static void raid1_free(struct mddev *mddev, void *priv)
{
struct r1conf *conf = mddev->private;
struct bitmap *bitmap = mddev->bitmap;
struct r1conf *conf = priv;
/* wait for behind writes to complete */
if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
printk(KERN_INFO "md/raid1:%s: behind writes in progress - waiting to stop.\n",
mdname(mddev));
/* need to kick something here to make sure I/O goes? */
wait_event(bitmap->behind_wait,
atomic_read(&bitmap->behind_writes) == 0);
}
freeze_array(conf, 0);
unfreeze_array(conf);
md_unregister_thread(&mddev->thread);
if (conf->r1bio_pool)
mempool_destroy(conf->r1bio_pool);
kfree(conf->mirrors);
safe_put_page(conf->tmppage);
kfree(conf->poolinfo);
kfree(conf);
mddev->private = NULL;
return 0;
}
static int raid1_resize(struct mddev *mddev, sector_t sectors)
......@@ -3181,7 +3153,7 @@ static struct md_personality raid1_personality =
.owner = THIS_MODULE,
.make_request = make_request,
.run = run,
.stop = stop,
.free = raid1_free,
.status = status,
.error_handler = error,
.hot_add_disk = raid1_add_disk,
......@@ -3193,6 +3165,8 @@ static struct md_personality raid1_personality =
.check_reshape = raid1_reshape,
.quiesce = raid1_quiesce,
.takeover = raid1_takeover,
.congested = raid1_congested,
.mergeable_bvec = raid1_mergeable_bvec,
};
static int __init raid_init(void)
......
......@@ -170,7 +170,4 @@ struct r1bio {
*/
#define R1BIO_MadeGood 7
#define R1BIO_WriteError 8
extern int md_raid1_congested(struct mddev *mddev, int bits);
#endif
......@@ -674,7 +674,7 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
/**
* raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
* @q: request queue
* @mddev: the md device
* @bvm: properties of new bio
* @biovec: the request that could be merged to it.
*
......@@ -682,11 +682,10 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
* This requires checking for end-of-chunk if near_copies != raid_disks,
* and for subordinate merge_bvec_fns if merge_check_needed.
*/
static int raid10_mergeable_bvec(struct request_queue *q,
static int raid10_mergeable_bvec(struct mddev *mddev,
struct bvec_merge_data *bvm,
struct bio_vec *biovec)
{
struct mddev *mddev = q->queuedata;
struct r10conf *conf = mddev->private;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
......@@ -910,7 +909,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
return rdev;
}
int md_raid10_congested(struct mddev *mddev, int bits)
static int raid10_congested(struct mddev *mddev, int bits)
{
struct r10conf *conf = mddev->private;
int i, ret = 0;
......@@ -934,15 +933,6 @@ int md_raid10_congested(struct mddev *mddev, int bits)
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(md_raid10_congested);
static int raid10_congested(void *data, int bits)
{
struct mddev *mddev = data;
return mddev_congested(mddev, bits) ||
md_raid10_congested(mddev, bits);
}
static void flush_pending_writes(struct r10conf *conf)
{
......@@ -3757,8 +3747,6 @@ static int run(struct mddev *mddev)
if (mddev->queue) {
int stripe = conf->geo.raid_disks *
((mddev->chunk_sectors << 9) / PAGE_SIZE);
mddev->queue->backing_dev_info.congested_fn = raid10_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
/* Calculate max read-ahead size.
* We need to readahead at least twice a whole stripe....
......@@ -3767,7 +3755,6 @@ static int run(struct mddev *mddev)
stripe /= conf->geo.near_copies;
if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
}
if (md_integrity_register(mddev))
......@@ -3811,17 +3798,9 @@ static int run(struct mddev *mddev)
return -EIO;
}
static int stop(struct mddev *mddev)
static void raid10_free(struct mddev *mddev, void *priv)
{
struct r10conf *conf = mddev->private;
raise_barrier(conf, 0);
lower_barrier(conf);
md_unregister_thread(&mddev->thread);
if (mddev->queue)
/* the unplug fn references 'conf'*/
blk_sync_queue(mddev->queue);
struct r10conf *conf = priv;
if (conf->r10bio_pool)
mempool_destroy(conf->r10bio_pool);
......@@ -3830,8 +3809,6 @@ static int stop(struct mddev *mddev)
kfree(conf->mirrors_old);
kfree(conf->mirrors_new);
kfree(conf);
mddev->private = NULL;
return 0;
}
static void raid10_quiesce(struct mddev *mddev, int state)
......@@ -3895,7 +3872,7 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
return 0;
}
static void *raid10_takeover_raid0(struct mddev *mddev)
static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
{
struct md_rdev *rdev;
struct r10conf *conf;
......@@ -3905,6 +3882,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev)
mdname(mddev));
return ERR_PTR(-EINVAL);
}
sector_div(size, devs);
/* Set new parameters */
mddev->new_level = 10;
......@@ -3915,12 +3893,15 @@ static void *raid10_takeover_raid0(struct mddev *mddev)
mddev->raid_disks *= 2;
/* make sure it will be not marked as dirty */
mddev->recovery_cp = MaxSector;
mddev->dev_sectors = size;
conf = setup_conf(mddev);
if (!IS_ERR(conf)) {
rdev_for_each(rdev, mddev)
if (rdev->raid_disk >= 0)
if (rdev->raid_disk >= 0) {
rdev->new_raid_disk = rdev->raid_disk * 2;
rdev->sectors = size;
}
conf->barrier = 1;
}
......@@ -3943,7 +3924,9 @@ static void *raid10_takeover(struct mddev *mddev)
mdname(mddev));
return ERR_PTR(-EINVAL);
}
return raid10_takeover_raid0(mddev);
return raid10_takeover_raid0(mddev,
raid0_conf->strip_zone->zone_end,
raid0_conf->strip_zone->nb_dev);
}
return ERR_PTR(-EINVAL);
}
......@@ -4713,7 +4696,7 @@ static struct md_personality raid10_personality =
.owner = THIS_MODULE,
.make_request = make_request,
.run = run,
.stop = stop,
.free = raid10_free,
.status = status,
.error_handler = error,
.hot_add_disk = raid10_add_disk,
......@@ -4727,6 +4710,8 @@ static struct md_personality raid10_personality =
.check_reshape = raid10_check_reshape,
.start_reshape = raid10_start_reshape,
.finish_reshape = raid10_finish_reshape,
.congested = raid10_congested,
.mergeable_bvec = raid10_mergeable_bvec,
};
static int __init raid_init(void)
......
......@@ -150,7 +150,4 @@ enum r10bio_state {
*/
R10BIO_Previous,
};
extern int md_raid10_congested(struct mddev *mddev, int bits);
#endif
......@@ -296,12 +296,9 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
BUG_ON(atomic_read(&conf->active_stripes)==0);
if (test_bit(STRIPE_HANDLE, &sh->state)) {
if (test_bit(STRIPE_DELAYED, &sh->state) &&
!test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
!test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
list_add_tail(&sh->lru, &conf->delayed_list);
if (atomic_read(&conf->preread_active_stripes)
< IO_THRESHOLD)
md_wakeup_thread(conf->mddev->thread);
} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0)
list_add_tail(&sh->lru, &conf->bitmap_list);
else {
......@@ -2898,31 +2895,102 @@ static int want_replace(struct stripe_head *sh, int disk_idx)
* Returns 1 when no more member devices need to be checked, otherwise returns
* 0 to tell the loop in handle_stripe_fill to continue
*/
static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
int disk_idx, int disks)
static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
int disk_idx, int disks)
{
struct r5dev *dev = &sh->dev[disk_idx];
struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
&sh->dev[s->failed_num[1]] };
int i;
if (test_bit(R5_LOCKED, &dev->flags) ||
test_bit(R5_UPTODATE, &dev->flags))
/* No point reading this as we already have it or have
* decided to get it.
*/
return 0;
if (dev->toread ||
(dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)))
/* We need this block to directly satisfy a request */
return 1;
if (s->syncing || s->expanding ||
(s->replacing && want_replace(sh, disk_idx)))
/* When syncing, or expanding we read everything.
* When replacing, we need the replaced block.
*/
return 1;
if ((s->failed >= 1 && fdev[0]->toread) ||
(s->failed >= 2 && fdev[1]->toread))
/* If we want to read from a failed device, then
* we need to actually read every other device.
*/
return 1;
/* Sometimes neither read-modify-write nor reconstruct-write
* cycles can work. In those cases we read every block we
* can. Then the parity-update is certain to have enough to
* work with.
* This can only be a problem when we need to write something,
* and some device has failed. If either of those tests
* fail we need look no further.
*/
if (!s->failed || !s->to_write)
return 0;
if (test_bit(R5_Insync, &dev->flags) &&
!test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
/* Pre-reads at not permitted until after short delay
* to gather multiple requests. However if this
* device is no Insync, the block could only be be computed
* and there is no need to delay that.
*/
return 0;
for (i = 0; i < s->failed; i++) {
if (fdev[i]->towrite &&
!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
!test_bit(R5_OVERWRITE, &fdev[i]->flags))
/* If we have a partial write to a failed
* device, then we will need to reconstruct
* the content of that device, so all other
* devices must be read.
*/
return 1;
}
/* If we are forced to do a reconstruct-write, either because
* the current RAID6 implementation only supports that, or
* or because parity cannot be trusted and we are currently
* recovering it, there is extra need to be careful.
* If one of the devices that we would need to read, because
* it is not being overwritten (and maybe not written at all)
* is missing/faulty, then we need to read everything we can.
*/
if (sh->raid_conf->level != 6 &&
sh->sector < sh->raid_conf->mddev->recovery_cp)
/* reconstruct-write isn't being forced */
return 0;
for (i = 0; i < s->failed; i++) {
if (!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
!test_bit(R5_OVERWRITE, &fdev[i]->flags))
return 1;
}
return 0;
}
static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
int disk_idx, int disks)
{
struct r5dev *dev = &sh->dev[disk_idx];
/* is the data in this block needed, and can we get it? */
if (!test_bit(R5_LOCKED, &dev->flags) &&
!test_bit(R5_UPTODATE, &dev->flags) &&
(dev->toread ||
(dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
s->syncing || s->expanding ||
(s->replacing && want_replace(sh, disk_idx)) ||
(s->failed >= 1 && fdev[0]->toread) ||
(s->failed >= 2 && fdev[1]->toread) ||
(sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
(!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
!test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
((sh->raid_conf->level == 6 ||
sh->sector >= sh->raid_conf->mddev->recovery_cp)
&& s->failed && s->to_write &&
(s->to_write - s->non_overwrite <
sh->raid_conf->raid_disks - sh->raid_conf->max_degraded) &&
(!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
if (need_this_block(sh, s, disk_idx, disks)) {
/* we would like to get this block, possibly by computing it,
* otherwise read it if the backing disk is insync
*/
......@@ -4081,7 +4149,7 @@ static void activate_bit_delay(struct r5conf *conf,
}
}
int md_raid5_congested(struct mddev *mddev, int bits)
static int raid5_congested(struct mddev *mddev, int bits)
{
struct r5conf *conf = mddev->private;
......@@ -4098,24 +4166,14 @@ int md_raid5_congested(struct mddev *mddev, int bits)
return 0;
}
EXPORT_SYMBOL_GPL(md_raid5_congested);
static int raid5_congested(void *data, int bits)
{
struct mddev *mddev = data;
return mddev_congested(mddev, bits) ||
md_raid5_congested(mddev, bits);
}
/* We want read requests to align with chunks where possible,
* but write requests don't need to.
*/
static int raid5_mergeable_bvec(struct request_queue *q,
static int raid5_mergeable_bvec(struct mddev *mddev,
struct bvec_merge_data *bvm,
struct bio_vec *biovec)
{
struct mddev *mddev = q->queuedata;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
unsigned int chunk_sectors = mddev->chunk_sectors;
......@@ -5296,11 +5354,14 @@ static void raid5d(struct md_thread *thread)
static ssize_t
raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
{
struct r5conf *conf = mddev->private;
struct r5conf *conf;
int ret = 0;
spin_lock(&mddev->lock);
conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", conf->max_nr_stripes);
else
return 0;
ret = sprintf(page, "%d\n", conf->max_nr_stripes);
spin_unlock(&mddev->lock);
return ret;
}
int
......@@ -5339,21 +5400,25 @@ EXPORT_SYMBOL(raid5_set_cache_size);
static ssize_t
raid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len)
{
struct r5conf *conf = mddev->private;
struct r5conf *conf;
unsigned long new;
int err;
if (len >= PAGE_SIZE)
return -EINVAL;
if (!conf)
return -ENODEV;
if (kstrtoul(page, 10, &new))
return -EINVAL;
err = raid5_set_cache_size(mddev, new);
err = mddev_lock(mddev);
if (err)
return err;
return len;
conf = mddev->private;
if (!conf)
err = -ENODEV;
else
err = raid5_set_cache_size(mddev, new);
mddev_unlock(mddev);
return err ?: len;
}
static struct md_sysfs_entry
......@@ -5364,29 +5429,40 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
static ssize_t
raid5_show_preread_threshold(struct mddev *mddev, char *page)
{
struct r5conf *conf = mddev->private;
struct r5conf *conf;
int ret = 0;
spin_lock(&mddev->lock);
conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", conf->bypass_threshold);
else
return 0;
ret = sprintf(page, "%d\n", conf->bypass_threshold);
spin_unlock(&mddev->lock);
return ret;
}
static ssize_t
raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
{
struct r5conf *conf = mddev->private;
struct r5conf *conf;
unsigned long new;
int err;
if (len >= PAGE_SIZE)
return -EINVAL;
if (!conf)
return -ENODEV;
if (kstrtoul(page, 10, &new))
return -EINVAL;
if (new > conf->max_nr_stripes)
return -EINVAL;
conf->bypass_threshold = new;
return len;
err = mddev_lock(mddev);
if (err)
return err;
conf = mddev->private;
if (!conf)
err = -ENODEV;
else if (new > conf->max_nr_stripes)
err = -EINVAL;
else
conf->bypass_threshold = new;
mddev_unlock(mddev);
return err ?: len;
}
static struct md_sysfs_entry
......@@ -5398,39 +5474,48 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
static ssize_t
raid5_show_skip_copy(struct mddev *mddev, char *page)
{
struct r5conf *conf = mddev->private;
struct r5conf *conf;
int ret = 0;
spin_lock(&mddev->lock);
conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", conf->skip_copy);
else
return 0;
ret = sprintf(page, "%d\n", conf->skip_copy);
spin_unlock(&mddev->lock);
return ret;
}
static ssize_t
raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
{
struct r5conf *conf = mddev->private;
struct r5conf *conf;
unsigned long new;
int err;
if (len >= PAGE_SIZE)
return -EINVAL;
if (!conf)
return -ENODEV;
if (kstrtoul(page, 10, &new))
return -EINVAL;
new = !!new;
if (new == conf->skip_copy)
return len;
mddev_suspend(mddev);
conf->skip_copy = new;
if (new)
mddev->queue->backing_dev_info.capabilities |=
BDI_CAP_STABLE_WRITES;
else
mddev->queue->backing_dev_info.capabilities &=
~BDI_CAP_STABLE_WRITES;
mddev_resume(mddev);
return len;
err = mddev_lock(mddev);
if (err)
return err;
conf = mddev->private;
if (!conf)
err = -ENODEV;
else if (new != conf->skip_copy) {
mddev_suspend(mddev);
conf->skip_copy = new;
if (new)
mddev->queue->backing_dev_info.capabilities |=
BDI_CAP_STABLE_WRITES;
else
mddev->queue->backing_dev_info.capabilities &=
~BDI_CAP_STABLE_WRITES;
mddev_resume(mddev);
}
mddev_unlock(mddev);
return err ?: len;
}
static struct md_sysfs_entry
......@@ -5454,11 +5539,14 @@ raid5_stripecache_active = __ATTR_RO(stripe_cache_active);
static ssize_t
raid5_show_group_thread_cnt(struct mddev *mddev, char *page)
{
struct r5conf *conf = mddev->private;
struct r5conf *conf;
int ret = 0;
spin_lock(&mddev->lock);
conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", conf->worker_cnt_per_group);
else
return 0;
ret = sprintf(page, "%d\n", conf->worker_cnt_per_group);
spin_unlock(&mddev->lock);
return ret;
}
static int alloc_thread_groups(struct r5conf *conf, int cnt,
......@@ -5468,7 +5556,7 @@ static int alloc_thread_groups(struct r5conf *conf, int cnt,
static ssize_t
raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
{
struct r5conf *conf = mddev->private;
struct r5conf *conf;
unsigned long new;
int err;
struct r5worker_group *new_groups, *old_groups;
......@@ -5476,41 +5564,41 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
if (len >= PAGE_SIZE)
return -EINVAL;
if (!conf)
return -ENODEV;
if (kstrtoul(page, 10, &new))
return -EINVAL;
if (new == conf->worker_cnt_per_group)
return len;
mddev_suspend(mddev);
err = mddev_lock(mddev);
if (err)
return err;
conf = mddev->private;
if (!conf)
err = -ENODEV;
else if (new != conf->worker_cnt_per_group) {
mddev_suspend(mddev);
old_groups = conf->worker_groups;
if (old_groups)
flush_workqueue(raid5_wq);
old_groups = conf->worker_groups;
if (old_groups)
flush_workqueue(raid5_wq);
err = alloc_thread_groups(conf, new,
&group_cnt, &worker_cnt_per_group,
&new_groups);
if (!err) {
spin_lock_irq(&conf->device_lock);
conf->group_cnt = group_cnt;
conf->worker_cnt_per_group = worker_cnt_per_group;
conf->worker_groups = new_groups;
spin_unlock_irq(&conf->device_lock);
err = alloc_thread_groups(conf, new,
&group_cnt, &worker_cnt_per_group,
&new_groups);
if (!err) {
spin_lock_irq(&conf->device_lock);
conf->group_cnt = group_cnt;
conf->worker_cnt_per_group = worker_cnt_per_group;
conf->worker_groups = new_groups;
spin_unlock_irq(&conf->device_lock);
if (old_groups)
kfree(old_groups[0].workers);
kfree(old_groups);
if (old_groups)
kfree(old_groups[0].workers);
kfree(old_groups);
}
mddev_resume(mddev);
}
mddev_unlock(mddev);
mddev_resume(mddev);
if (err)
return err;
return len;
return err ?: len;
}
static struct md_sysfs_entry
......@@ -6178,11 +6266,6 @@ static int run(struct mddev *mddev)
if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;
chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size);
blk_queue_io_opt(mddev->queue, chunk_size *
......@@ -6260,17 +6343,12 @@ static int run(struct mddev *mddev)
return -EIO;
}
static int stop(struct mddev *mddev)
static void raid5_free(struct mddev *mddev, void *priv)
{
struct r5conf *conf = mddev->private;
struct r5conf *conf = priv;
md_unregister_thread(&mddev->thread);
if (mddev->queue)
mddev->queue->backing_dev_info.congested_fn = NULL;
free_conf(conf);
mddev->private = NULL;
mddev->to_remove = &raid5_attrs_group;
return 0;
}
static void status(struct seq_file *seq, struct mddev *mddev)
......@@ -7044,7 +7122,7 @@ static struct md_personality raid6_personality =
.owner = THIS_MODULE,
.make_request = make_request,
.run = run,
.stop = stop,
.free = raid5_free,
.status = status,
.error_handler = error,
.hot_add_disk = raid5_add_disk,
......@@ -7058,6 +7136,8 @@ static struct md_personality raid6_personality =
.finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
.takeover = raid6_takeover,
.congested = raid5_congested,
.mergeable_bvec = raid5_mergeable_bvec,
};
static struct md_personality raid5_personality =
{
......@@ -7066,7 +7146,7 @@ static struct md_personality raid5_personality =
.owner = THIS_MODULE,
.make_request = make_request,
.run = run,
.stop = stop,
.free = raid5_free,
.status = status,
.error_handler = error,
.hot_add_disk = raid5_add_disk,
......@@ -7080,6 +7160,8 @@ static struct md_personality raid5_personality =
.finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
.takeover = raid5_takeover,
.congested = raid5_congested,
.mergeable_bvec = raid5_mergeable_bvec,
};
static struct md_personality raid4_personality =
......@@ -7089,7 +7171,7 @@ static struct md_personality raid4_personality =
.owner = THIS_MODULE,
.make_request = make_request,
.run = run,
.stop = stop,
.free = raid5_free,
.status = status,
.error_handler = error,
.hot_add_disk = raid5_add_disk,
......@@ -7103,6 +7185,8 @@ static struct md_personality raid4_personality =
.finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
.takeover = raid4_takeover,
.congested = raid5_congested,
.mergeable_bvec = raid5_mergeable_bvec,
};
static int __init raid5_init(void)
......
......@@ -558,7 +558,6 @@ static inline int algorithm_is_DDF(int layout)
return layout >= 8 && layout <= 10;
}
extern int md_raid5_congested(struct mddev *mddev, int bits);
extern void md_raid5_kick_device(struct r5conf *conf);
extern int raid5_set_cache_size(struct mddev *mddev, int size);
#endif
......@@ -89,10 +89,10 @@ void (*raid6_datap_recov)(int, size_t, int, void **);
EXPORT_SYMBOL_GPL(raid6_datap_recov);
const struct raid6_recov_calls *const raid6_recov_algos[] = {
#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
#ifdef CONFIG_AS_AVX2
&raid6_recov_avx2,
#endif
#ifdef CONFIG_AS_SSSE3
&raid6_recov_ssse3,
#endif
&raid6_recov_intx1,
......
......@@ -8,7 +8,7 @@
* of the License.
*/
#if CONFIG_AS_AVX2
#ifdef CONFIG_AS_AVX2
#include <linux/raid/pq.h>
#include "x86.h"
......
......@@ -7,6 +7,8 @@
* of the License.
*/
#ifdef CONFIG_AS_SSSE3
#include <linux/raid/pq.h>
#include "x86.h"
......@@ -330,3 +332,7 @@ const struct raid6_recov_calls raid6_recov_ssse3 = {
#endif
.priority = 1,
};
#else
#warning "your version of binutils lacks SSSE3 support"
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册