提交 34bbce9e 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Sending this a bit sooner than I otherwise would have, as a fix in the
  merge window had some unfortunate issues and side effects for some
  folks.

  This contains:

   - Fixes from Jan for the bdi registration/unregistration. These have
     been tested by the various parties reporting issues, and should be
     solid at this point.

   - Also from Jan, fix for axonram gendisk registration.

   - A stable fix for zram from Johannes.

   - A small series from Ming, fixing up some long standing issues with
     blk-mq hardware queue kobject initialization and registration.

   - A fix for sed opal from Jon, fixing a nonsensical range check and
     some set-but-not-used variables.

   - A fix from Neil for a long standing deadlock issue for stacking
     device drivers. With this in place, dm/md don't have to work around
     the issue anymore, and can be properly fixed up"

* 'for-linus' of git://git.kernel.dk/linux-block:
  axonram: Fix gendisk handling
  blk: improve order of bio handling in generic_make_request()
  Revert "scsi, block: fix duplicate bdi name registration crashes"
  block: Make del_gendisk() safer for disks without queues
  bdi: Fix use-after-free in wb_congested_put()
  block: Allow bdi re-registration
  block/sed: Fix opal user range check and unused variables
  zram: set physical queue limits to avoid array out of bounds accesses
  blk-mq: free hctx->cpumask in release handler of hctx's kobject
  blk-mq: make lifetime consistent between hctx and its kobject
  blk-mq: make lifetime consitent between q/ctx and its kobject
  blk-mq: initialize mq kobjects in blk_mq_init_allocated_queue()
......@@ -274,7 +274,9 @@ static int axon_ram_probe(struct platform_device *device)
if (bank->disk->major > 0)
unregister_blkdev(bank->disk->major,
bank->disk->disk_name);
if (bank->disk->flags & GENHD_FL_UP)
del_gendisk(bank->disk);
put_disk(bank->disk);
}
device->dev.platform_data = NULL;
if (bank->io_addr != 0)
......@@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device)
device_remove_file(&device->dev, &dev_attr_ecc);
free_irq(bank->irq_id, device);
del_gendisk(bank->disk);
put_disk(bank->disk);
iounmap((void __iomem *) bank->io_addr);
kfree(bank);
......
......@@ -578,8 +578,6 @@ void blk_cleanup_queue(struct request_queue *q)
q->queue_lock = &q->__queue_lock;
spin_unlock_irq(lock);
put_disk_devt(q->disk_devt);
/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
......@@ -2017,17 +2015,34 @@ blk_qc_t generic_make_request(struct bio *bio)
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
if (likely(blk_queue_enter(q, false) == 0)) {
struct bio_list hold;
struct bio_list lower, same;
/* Create a fresh bio_list for all subordinate requests */
hold = bio_list_on_stack;
bio_list_init(&bio_list_on_stack);
ret = q->make_request_fn(q, bio);
blk_queue_exit(q);
bio = bio_list_pop(current->bio_list);
/* sort new bios into those for a lower level
* and those for the same level
*/
bio_list_init(&lower);
bio_list_init(&same);
while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
if (q == bdev_get_queue(bio->bi_bdev))
bio_list_add(&same, bio);
else
bio_list_add(&lower, bio);
/* now assemble so we handle the lowest level first */
bio_list_merge(&bio_list_on_stack, &lower);
bio_list_merge(&bio_list_on_stack, &same);
bio_list_merge(&bio_list_on_stack, &hold);
} else {
struct bio *bio_next = bio_list_pop(current->bio_list);
bio_io_error(bio);
bio = bio_next;
}
bio = bio_list_pop(current->bio_list);
} while (bio);
current->bio_list = NULL; /* deactivate */
......
......@@ -17,6 +17,15 @@ static void blk_mq_sysfs_release(struct kobject *kobj)
{
}
static void blk_mq_hw_sysfs_release(struct kobject *kobj)
{
struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
kobj);
free_cpumask_var(hctx->cpumask);
kfree(hctx->ctxs);
kfree(hctx);
}
struct blk_mq_ctx_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct blk_mq_ctx *, char *);
......@@ -200,7 +209,7 @@ static struct kobj_type blk_mq_ctx_ktype = {
static struct kobj_type blk_mq_hw_ktype = {
.sysfs_ops = &blk_mq_hw_sysfs_ops,
.default_attrs = default_hw_ctx_attrs,
.release = blk_mq_sysfs_release,
.release = blk_mq_hw_sysfs_release,
};
static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
......@@ -242,24 +251,15 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
int i, j;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx);
hctx_for_each_ctx(hctx, ctx, j)
kobject_put(&ctx->kobj);
kobject_put(&hctx->kobj);
}
blk_mq_debugfs_unregister_hctxs(q);
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
kobject_del(&q->mq_kobj);
kobject_put(&q->mq_kobj);
kobject_put(&dev->kobj);
q->mq_sysfs_init_done = false;
......@@ -277,7 +277,19 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
}
static void blk_mq_sysfs_init(struct request_queue *q)
void blk_mq_sysfs_deinit(struct request_queue *q)
{
struct blk_mq_ctx *ctx;
int cpu;
for_each_possible_cpu(cpu) {
ctx = per_cpu_ptr(q->queue_ctx, cpu);
kobject_put(&ctx->kobj);
}
kobject_put(&q->mq_kobj);
}
void blk_mq_sysfs_init(struct request_queue *q)
{
struct blk_mq_ctx *ctx;
int cpu;
......@@ -297,8 +309,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
blk_mq_disable_hotplug();
blk_mq_sysfs_init(q);
ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
if (ret < 0)
goto out;
......
......@@ -1955,16 +1955,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
}
}
static void blk_mq_free_hw_queues(struct request_queue *q,
struct blk_mq_tag_set *set)
{
struct blk_mq_hw_ctx *hctx;
unsigned int i;
queue_for_each_hw_ctx(q, hctx, i)
free_cpumask_var(hctx->cpumask);
}
static int blk_mq_init_hctx(struct request_queue *q,
struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
......@@ -2045,7 +2035,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
struct blk_mq_hw_ctx *hctx;
memset(__ctx, 0, sizeof(*__ctx));
__ctx->cpu = i;
spin_lock_init(&__ctx->lock);
INIT_LIST_HEAD(&__ctx->rq_list);
......@@ -2257,15 +2246,19 @@ void blk_mq_release(struct request_queue *q)
queue_for_each_hw_ctx(q, hctx, i) {
if (!hctx)
continue;
kfree(hctx->ctxs);
kfree(hctx);
kobject_put(&hctx->kobj);
}
q->mq_map = NULL;
kfree(q->queue_hw_ctx);
/* ctx kobj stays in queue_ctx */
/*
* release .mq_kobj and sw queue's kobject now because
* both share lifetime with request queue.
*/
blk_mq_sysfs_deinit(q);
free_percpu(q->queue_ctx);
}
......@@ -2330,10 +2323,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
if (hctx->tags)
blk_mq_free_map_and_requests(set, j);
blk_mq_exit_hctx(q, set, hctx, j);
free_cpumask_var(hctx->cpumask);
kobject_put(&hctx->kobj);
kfree(hctx->ctxs);
kfree(hctx);
hctxs[j] = NULL;
}
......@@ -2352,6 +2342,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
if (!q->queue_ctx)
goto err_exit;
/* init q->mq_kobj and sw queues' kobjects */
blk_mq_sysfs_init(q);
q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
GFP_KERNEL, set->numa_node);
if (!q->queue_hw_ctx)
......@@ -2442,7 +2435,6 @@ void blk_mq_free_queue(struct request_queue *q)
blk_mq_del_queue_tag_set(q);
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
blk_mq_free_hw_queues(q, set);
}
/* Basically redo blk_mq_init_queue with queue frozen */
......
......@@ -77,6 +77,8 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
/*
* sysfs helpers
*/
extern void blk_mq_sysfs_init(struct request_queue *q);
extern void blk_mq_sysfs_deinit(struct request_queue *q);
extern int blk_mq_sysfs_register(struct request_queue *q);
extern void blk_mq_sysfs_unregister(struct request_queue *q);
extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
......
......@@ -572,20 +572,6 @@ static void register_disk(struct device *parent, struct gendisk *disk)
disk_part_iter_exit(&piter);
}
void put_disk_devt(struct disk_devt *disk_devt)
{
if (disk_devt && atomic_dec_and_test(&disk_devt->count))
disk_devt->release(disk_devt);
}
EXPORT_SYMBOL(put_disk_devt);
void get_disk_devt(struct disk_devt *disk_devt)
{
if (disk_devt)
atomic_inc(&disk_devt->count);
}
EXPORT_SYMBOL(get_disk_devt);
/**
* device_add_disk - add partitioning information to kernel list
* @parent: parent device for the disk
......@@ -626,13 +612,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
disk_alloc_events(disk);
/*
* Take a reference on the devt and assign it to queue since it
* must not be reallocated while the bdi is registered
*/
disk->queue->disk_devt = disk->disk_devt;
get_disk_devt(disk->disk_devt);
/* Register BDI before referencing it from bdev */
bdi = disk->queue->backing_dev_info;
bdi_register_owner(bdi, disk_to_dev(disk));
......@@ -681,12 +660,16 @@ void del_gendisk(struct gendisk *disk)
disk->flags &= ~GENHD_FL_UP;
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
if (disk->queue) {
/*
* Unregister bdi before releasing device numbers (as they can get
* reused and we'd get clashes in sysfs).
* Unregister bdi before releasing device numbers (as they can
* get reused and we'd get clashes in sysfs).
*/
bdi_unregister(disk->queue->backing_dev_info);
blk_unregister_queue(disk);
} else {
WARN_ON(1);
}
blk_unregister_region(disk_devt(disk), disk->minors);
part_stat_set_all(&disk->part0, 0);
......
......@@ -1023,7 +1023,6 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
static int gen_key(struct opal_dev *dev, void *data)
{
const u8 *method;
u8 uid[OPAL_UID_LENGTH];
int err = 0;
......@@ -1031,7 +1030,6 @@ static int gen_key(struct opal_dev *dev, void *data)
set_comid(dev, dev->comid);
memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len));
method = opalmethod[OPAL_GENKEY];
kfree(dev->prev_data);
dev->prev_data = NULL;
......@@ -1669,7 +1667,6 @@ static int add_user_to_lr(struct opal_dev *dev, void *data)
static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
{
u8 lr_buffer[OPAL_UID_LENGTH];
const u8 *method;
struct opal_lock_unlock *lkul = data;
u8 read_locked = 1, write_locked = 1;
int err = 0;
......@@ -1677,7 +1674,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
clear_opal_cmd(dev);
set_comid(dev, dev->comid);
method = opalmethod[OPAL_SET];
if (build_locking_range(lr_buffer, sizeof(lr_buffer),
lkul->session.opal_key.lr) < 0)
return -ERANGE;
......@@ -1733,14 +1729,12 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data)
{
u8 lr_buffer[OPAL_UID_LENGTH];
u8 read_locked = 1, write_locked = 1;
const u8 *method;
struct opal_lock_unlock *lkul = data;
int ret;
clear_opal_cmd(dev);
set_comid(dev, dev->comid);
method = opalmethod[OPAL_SET];
if (build_locking_range(lr_buffer, sizeof(lr_buffer),
lkul->session.opal_key.lr) < 0)
return -ERANGE;
......@@ -2133,7 +2127,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
pr_err("Locking state was not RO or RW\n");
return -EINVAL;
}
if (lk_unlk->session.who < OPAL_USER1 &&
if (lk_unlk->session.who < OPAL_USER1 ||
lk_unlk->session.who > OPAL_USER9) {
pr_err("Authority was not within the range of users: %d\n",
lk_unlk->session.who);
......@@ -2316,7 +2310,7 @@ static int opal_activate_user(struct opal_dev *dev,
int ret;
/* We can't activate Admin1 it's active as manufactured */
if (opal_session->who < OPAL_USER1 &&
if (opal_session->who < OPAL_USER1 ||
opal_session->who > OPAL_USER9) {
pr_err("Who was not a valid user: %d\n", opal_session->who);
return -EINVAL;
......
......@@ -1189,6 +1189,8 @@ static int zram_add(void)
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
zram->disk->queue->limits.chunk_sectors = 0;
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
/*
* zram_bio_discard() will clear all logical blocks if logical block
......
......@@ -3075,23 +3075,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
put_device(&sdkp->dev);
}
struct sd_devt {
int idx;
struct disk_devt disk_devt;
};
static void sd_devt_release(struct disk_devt *disk_devt)
{
struct sd_devt *sd_devt = container_of(disk_devt, struct sd_devt,
disk_devt);
spin_lock(&sd_index_lock);
ida_remove(&sd_index_ida, sd_devt->idx);
spin_unlock(&sd_index_lock);
kfree(sd_devt);
}
/**
* sd_probe - called during driver initialization and whenever a
* new scsi device is attached to the system. It is called once
......@@ -3113,7 +3096,6 @@ static void sd_devt_release(struct disk_devt *disk_devt)
static int sd_probe(struct device *dev)
{
struct scsi_device *sdp = to_scsi_device(dev);
struct sd_devt *sd_devt;
struct scsi_disk *sdkp;
struct gendisk *gd;
int index;
......@@ -3139,13 +3121,9 @@ static int sd_probe(struct device *dev)
if (!sdkp)
goto out;
sd_devt = kzalloc(sizeof(*sd_devt), GFP_KERNEL);
if (!sd_devt)
goto out_free;
gd = alloc_disk(SD_MINORS);
if (!gd)
goto out_free_devt;
goto out_free;
do {
if (!ida_pre_get(&sd_index_ida, GFP_KERNEL))
......@@ -3161,11 +3139,6 @@ static int sd_probe(struct device *dev)
goto out_put;
}
atomic_set(&sd_devt->disk_devt.count, 1);
sd_devt->disk_devt.release = sd_devt_release;
sd_devt->idx = index;
gd->disk_devt = &sd_devt->disk_devt;
error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
if (error) {
sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n");
......@@ -3205,12 +3178,11 @@ static int sd_probe(struct device *dev)
return 0;
out_free_index:
put_disk_devt(&sd_devt->disk_devt);
sd_devt = NULL;
spin_lock(&sd_index_lock);
ida_remove(&sd_index_ida, index);
spin_unlock(&sd_index_lock);
out_put:
put_disk(gd);
out_free_devt:
kfree(sd_devt);
out_free:
kfree(sdkp);
out:
......@@ -3271,7 +3243,10 @@ static void scsi_disk_release(struct device *dev)
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct gendisk *disk = sdkp->disk;
put_disk_devt(disk->disk_devt);
spin_lock(&sd_index_lock);
ida_remove(&sd_index_ida, sdkp->index);
spin_unlock(&sd_index_lock);
disk->private_data = NULL;
put_disk(disk);
put_device(&sdkp->device->sdev_gendev);
......
......@@ -435,7 +435,6 @@ struct request_queue {
struct delayed_work delay_work;
struct backing_dev_info *backing_dev_info;
struct disk_devt *disk_devt;
/*
* The queue owner gets to use this for whatever they like.
......
......@@ -167,13 +167,6 @@ struct blk_integrity {
};
#endif /* CONFIG_BLK_DEV_INTEGRITY */
struct disk_devt {
atomic_t count;
void (*release)(struct disk_devt *disk_devt);
};
void put_disk_devt(struct disk_devt *disk_devt);
void get_disk_devt(struct disk_devt *disk_devt);
struct gendisk {
/* major, first_minor and minors are input parameters only,
......@@ -183,7 +176,6 @@ struct gendisk {
int first_minor;
int minors; /* maximum number of minors, =1 for
* disks that can't be partitioned. */
struct disk_devt *disk_devt;
char disk_name[DISK_NAME_LEN]; /* name of major driver */
char *(*devnode)(struct gendisk *gd, umode_t *mode);
......
......@@ -683,33 +683,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
{
struct radix_tree_iter iter;
struct rb_node *rbn;
void **slot;
WARN_ON(test_bit(WB_registered, &bdi->wb.state));
spin_lock_irq(&cgwb_lock);
radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
cgwb_kill(*slot);
while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
struct bdi_writeback_congested *congested =
rb_entry(rbn, struct bdi_writeback_congested, rb_node);
rb_erase(rbn, &bdi->cgwb_congested_tree);
congested->bdi = NULL; /* mark @congested unlinked */
}
spin_unlock_irq(&cgwb_lock);
/*
* All cgwb's and their congested states must be shutdown and
* released before returning. Drain the usage counter to wait for
* all cgwb's and cgwb_congested's ever created on @bdi.
* All cgwb's must be shutdown and released before returning. Drain
* the usage counter to wait for all cgwb's ever created on @bdi.
*/
atomic_dec(&bdi->usage_cnt);
wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
/*
* Grab back our reference so that we hold it when @bdi gets
* re-registered.
*/
atomic_inc(&bdi->usage_cnt);
}
/**
......@@ -749,6 +742,21 @@ void wb_blkcg_offline(struct blkcg *blkcg)
spin_unlock_irq(&cgwb_lock);
}
static void cgwb_bdi_exit(struct backing_dev_info *bdi)
{
struct rb_node *rbn;
spin_lock_irq(&cgwb_lock);
while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
struct bdi_writeback_congested *congested =
rb_entry(rbn, struct bdi_writeback_congested, rb_node);
rb_erase(rbn, &bdi->cgwb_congested_tree);
congested->bdi = NULL; /* mark @congested unlinked */
}
spin_unlock_irq(&cgwb_lock);
}
#else /* CONFIG_CGROUP_WRITEBACK */
static int cgwb_bdi_init(struct backing_dev_info *bdi)
......@@ -769,7 +777,9 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
return 0;
}
static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
static void cgwb_bdi_exit(struct backing_dev_info *bdi)
{
wb_congested_put(bdi->wb_congested);
}
......@@ -857,6 +867,8 @@ int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
MINOR(owner->devt));
if (rc)
return rc;
/* Leaking owner reference... */
WARN_ON(bdi->owner);
bdi->owner = owner;
get_device(owner);
return 0;
......@@ -898,6 +910,7 @@ static void bdi_exit(struct backing_dev_info *bdi)
{
WARN_ON_ONCE(bdi->dev);
wb_exit(&bdi->wb);
cgwb_bdi_exit(bdi);
}
static void release_bdi(struct kref *ref)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册