diff --git a/block/blk-core.c b/block/blk-core.c index 2ce4ad4619c19269c94e8cacb7e67a7a8d7214cd..dbfcf2dc735da10c8ece1bb361dc479381ab76d3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1337,6 +1337,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, mutex_init(&q->blk_trace_mutex); #endif mutex_init(&q->sysfs_lock); + mutex_init(&q->sysfs_dir_lock); spin_lock_init(&q->__queue_lock); q->queue_lock = lock ? : &q->__queue_lock; diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 3209979e105bd93217da4bfb099e069cafe38c8b..87fe2708d188fbae8a90a4366066f875b16afe34 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -283,7 +283,7 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; - lockdep_assert_held(&q->sysfs_lock); + lockdep_assert_held(&q->sysfs_dir_lock); queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); @@ -333,7 +333,7 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q) int ret, i; WARN_ON_ONCE(!q->kobj.parent); - lockdep_assert_held(&q->sysfs_lock); + lockdep_assert_held(&q->sysfs_dir_lock); ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) @@ -366,9 +366,9 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) { int ret; - mutex_lock(&q->sysfs_lock); + mutex_lock(&q->sysfs_dir_lock); ret = __blk_mq_register_dev(dev, q); - mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->sysfs_dir_lock); return ret; } @@ -379,7 +379,7 @@ void blk_mq_sysfs_unregister(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; - mutex_lock(&q->sysfs_lock); + mutex_lock(&q->sysfs_dir_lock); if (!q->mq_sysfs_init_done) goto unlock; @@ -387,7 +387,7 @@ void blk_mq_sysfs_unregister(struct request_queue *q) blk_mq_unregister_hctx(hctx); unlock: - mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->sysfs_dir_lock); } int blk_mq_sysfs_register(struct request_queue *q) @@ -395,7 +395,7 @@ int blk_mq_sysfs_register(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i, ret = 0; - mutex_lock(&q->sysfs_lock); + mutex_lock(&q->sysfs_dir_lock); if (!q->mq_sysfs_init_done) goto unlock; @@ -406,7 +406,7 @@ int blk_mq_sysfs_register(struct request_queue *q) } unlock: - mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->sysfs_dir_lock); return ret; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 5bdae7c8fa27b891f9d093619778dfc40b751525..d925b75702e3c004d2234ac5a736d3557040651f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -933,6 +933,7 @@ int blk_register_queue(struct gendisk *disk) int ret; struct device *dev = disk_to_dev(disk); struct request_queue *q = disk->queue; + bool has_elevator = false; if (WARN_ON(!q)) return -ENXIO; @@ -940,14 +941,12 @@ int blk_register_queue(struct gendisk *disk) WARN_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags), "%s is registering an already registered queue\n", kobject_name(&dev->kobj)); - queue_flag_set_unlocked(QUEUE_FLAG_REGISTERED, q); ret = blk_trace_init_sysfs(dev); if (ret) return ret; - /* Prevent changes through sysfs until registration is completed. */ - mutex_lock(&q->sysfs_lock); + mutex_lock(&q->sysfs_dir_lock); ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); if (ret < 0) { @@ -960,28 +959,37 @@ int blk_register_queue(struct gendisk *disk) blk_mq_debugfs_register(q); } - kobject_uevent(&q->kobj, KOBJ_ADD); - - wbt_enable_default(q); - - blk_throtl_register_queue(q); - blk_queue_flag_set(QUEUE_FLAG_THROTL_INIT_DONE, q); - + /* + * The flag of QUEUE_FLAG_REGISTERED isn't set yet, so elevator + * switch won't happen at all. + */ if (q->request_fn || (q->mq_ops && q->elevator)) { - ret = elv_register_queue(q); + ret = elv_register_queue(q, false); if (ret) { - mutex_unlock(&q->sysfs_lock); - kobject_uevent(&q->kobj, KOBJ_REMOVE); + mutex_unlock(&q->sysfs_dir_lock); kobject_del(&q->kobj); blk_trace_remove_sysfs(dev); kobject_put(&dev->kobj); return ret; } + has_elevator = true; } + mutex_lock(&q->sysfs_lock); + blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); + wbt_enable_default(q); + blk_throtl_register_queue(q); + blk_queue_flag_set(QUEUE_FLAG_THROTL_INIT_DONE, q); + + /* Now everything is ready and send out KOBJ_ADD uevent */ + kobject_uevent(&q->kobj, KOBJ_ADD); + if (has_elevator) + kobject_uevent(&q->elevator->kobj, KOBJ_ADD); + mutex_unlock(&q->sysfs_lock); + ret = 0; unlock: - mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->sysfs_dir_lock); /* * SCSI probing may synchronously create and destroy a lot of @@ -1012,6 +1020,7 @@ EXPORT_SYMBOL_GPL(blk_register_queue); void blk_unregister_queue(struct gendisk *disk) { struct request_queue *q = disk->queue; + bool has_elevator; if (WARN_ON(!q)) return; @@ -1029,21 +1038,22 @@ void blk_unregister_queue(struct gendisk *disk) mutex_lock(&q->sysfs_lock); blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); + has_elevator = !!q->elevator; + mutex_unlock(&q->sysfs_lock); + mutex_lock(&q->sysfs_dir_lock); /* * Remove the sysfs attributes before unregistering the queue data * structures that can be modified through sysfs. */ if (q->mq_ops) blk_mq_unregister_dev(disk_to_dev(disk), q); - mutex_unlock(&q->sysfs_lock); blk_trace_remove_sysfs(disk_to_dev(disk)); - mutex_lock(&q->sysfs_lock); - if (q->request_fn || (q->mq_ops && q->elevator)) + if (q->request_fn || (q->mq_ops && has_elevator)) elv_unregister_queue(q); - mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->sysfs_dir_lock); /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&q->kobj, KOBJ_REMOVE); diff --git a/block/blk.h b/block/blk.h index 985e20980aff2b319f84f7b698b845c787cb60a1..2c73160dff6c3f352ead4d1038bc39a74ad58974 100644 --- a/block/blk.h +++ b/block/blk.h @@ -262,7 +262,7 @@ int elevator_init_mq(struct request_queue *q); int elevator_switch_mq(struct request_queue *q, struct elevator_type *new_e); void elevator_exit(struct request_queue *, struct elevator_queue *); -int elv_register_queue(struct request_queue *q); +int elv_register_queue(struct request_queue *q, bool uevent); void elv_unregister_queue(struct request_queue *q); void __blk_rq_init(struct request_queue *q, struct request *rq); diff --git a/block/elevator.c b/block/elevator.c index 9621a812319e74bf077ce2132c270f87d88b5380..a1e62af8bf338e9222d3ba05d600f4f1f260e1e0 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -841,13 +841,16 @@ static struct kobj_type elv_ktype = { .release = elevator_release, }; -int elv_register_queue(struct request_queue *q) +/* + * elv_register_queue is called from either blk_register_queue or + * elevator_switch, elevator switch is prevented from being happen + * in the two paths, so it is safe to not hold q->sysfs_lock. + */ +int elv_register_queue(struct request_queue *q, bool uevent) { struct elevator_queue *e = q->elevator; int error; - lockdep_assert_held(&q->sysfs_lock); - error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); if (!error) { struct elv_fs_entry *attr = e->type->elevator_attrs; @@ -858,24 +861,34 @@ int elv_register_queue(struct request_queue *q) attr++; } } - kobject_uevent(&e->kobj, KOBJ_ADD); + if (uevent) + kobject_uevent(&e->kobj, KOBJ_ADD); + + mutex_lock(&q->sysfs_lock); e->registered = 1; if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn) e->type->ops.sq.elevator_registered_fn(q); + mutex_unlock(&q->sysfs_lock); } return error; } +/* + * elv_unregister_queue is called from either blk_unregister_queue or + * elevator_switch, elevator switch is prevented from being happen + * in the two paths, so it is safe to not hold q->sysfs_lock. + */ void elv_unregister_queue(struct request_queue *q) { - lockdep_assert_held(&q->sysfs_lock); - if (q) { struct elevator_queue *e = q->elevator; kobject_uevent(&e->kobj, KOBJ_REMOVE); kobject_del(&e->kobj); + + mutex_lock(&q->sysfs_lock); e->registered = 0; + mutex_unlock(&q->sysfs_lock); } } @@ -946,10 +959,32 @@ int elevator_switch_mq(struct request_queue *q, lockdep_assert_held(&q->sysfs_lock); if (q->elevator) { - if (q->elevator->registered) + if (q->elevator->registered) { + mutex_unlock(&q->sysfs_lock); + + /* + * Concurrent elevator switch can't happen becasue + * sysfs write is always exclusively on same file. + * + * Also the elevator queue won't be freed after + * sysfs_lock is released becasue kobject_del() in + * blk_unregister_queue() waits for completion of + * .store & .show on its attributes. + */ elv_unregister_queue(q); + + mutex_lock(&q->sysfs_lock); + } ioc_clear_queue(q); elevator_exit(q, q->elevator); + + /* + * sysfs_lock may be dropped, so re-check if queue is + * unregistered. If yes, don't switch to new elevator + * any more + */ + if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)) + return 0; } ret = blk_mq_init_sched(q, new_e); @@ -957,7 +992,11 @@ int elevator_switch_mq(struct request_queue *q, goto out; if (new_e) { - ret = elv_register_queue(q); + mutex_unlock(&q->sysfs_lock); + + ret = elv_register_queue(q, true); + + mutex_lock(&q->sysfs_lock); if (ret) { elevator_exit(q, q->elevator); goto out; @@ -1053,7 +1092,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) if (err) goto fail_init; - err = elv_register_queue(q); + err = elv_register_queue(q, true); if (err) goto fail_register; @@ -1073,7 +1112,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) /* switch failed, restore and re-register old elevator */ if (old) { q->elevator = old; - elv_register_queue(q); + elv_register_queue(q, true); blk_queue_bypass_end(q); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 41d235ee579ae91bc55f0504ce243b36cf54f5cb..0c8f2a28f163ad6f58c9e7a5c431fbf394db0bdc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -655,6 +655,7 @@ struct request_queue { struct delayed_work requeue_work; struct mutex sysfs_lock; + struct mutex sysfs_dir_lock; /* * for reusing dead hctx instance in case of updating