blk-mq: not embed .mq_kobj and ctx->kobj into queue instance

commit 1db4909e76f64a85f4aaa187f0f683f5c85a471d upstream. Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime from block layer's view, actually they don't because userspace may grab one kobject anytime via sysfs. This patch fixes the issue by the following approach: 1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing all ctxs 2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release handler of .mq_kobj 3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that .mq_kobj is always released after all ctxs are freed. This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE is enabled. Reported-by: N Guenter Roeck <linux@roeck-us.net> Cc: "jianchao.wang" <jianchao.w.wang@oracle.com> Tested-by: N Guenter Roeck <linux@roeck-us.net> Reviewed-by: N Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: N Ming Lei <ming.lei@redhat.com> Signed-off-by: N Jens Axboe <axboe@kernel.dk> Signed-off-by: N Joseph Qi <joseph.qi@linux.alibaba.com> Reviewed-by: N Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>

blk-mq: not embed .mq_kobj and ctx->kobj into queue instance
commit 1db4909e76f64a85f4aaa187f0f683f5c85a471d upstream. Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime from block layer's view, actually they don't because userspace may grab one kobject anytime via sysfs. This patch fixes the issue by the following approach: 1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing all ctxs 2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release handler of .mq_kobj 3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that .mq_kobj is always released after all ctxs are freed. This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE is enabled. Reported-by: N Guenter Roeck <linux@roeck-us.net> Cc: "jianchao.wang" <jianchao.w.wang@oracle.com> Tested-by: N Guenter Roeck <linux@roeck-us.net> Reviewed-by: N Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: N Ming Lei <ming.lei@redhat.com> Signed-off-by: N Jens Axboe <axboe@kernel.dk> Signed-off-by: N Joseph Qi <joseph.qi@linux.alibaba.com> Reviewed-by: N Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
9ff28240 · Ming Lei · Shile Zhang · 4f3484ac · 9ff28240 · 9ff28240
隐藏空白更改
内联并排

Showing with 63 addition and 18 deletion

block/blk-mq-sysfs.c block/blk-mq-sysfs.c +24 -10

block/blk-mq.c block/blk-mq.c +32 -7

block/blk-mq.h block/blk-mq.h +6 -0

include/linux/blkdev.h include/linux/blkdev.h +1 -1

未找到文件。
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -16,6 +16,18 @@

 static void blk_mq_sysfs_release(struct kobject *kobj)
 {
+	struct blk_mq_ctxs *ctxs = container_of(kobj, struct blk_mq_ctxs, kobj);
+
+	free_percpu(ctxs->queue_ctx);
+	kfree(ctxs);
+}
+
+static void blk_mq_ctx_sysfs_release(struct kobject *kobj)
+{
+	struct blk_mq_ctx *ctx = container_of(kobj, struct blk_mq_ctx, kobj);
+
+	/* ctx->ctxs won't be released until all ctx are freed */
+	kobject_put(&ctx->ctxs->kobj);
 }

 static void blk_mq_hw_sysfs_release(struct kobject *kobj)
@@ -214,7 +226,7 @@ static struct kobj_type blk_mq_ktype = {
 static struct kobj_type blk_mq_ctx_ktype = {
 	.sysfs_ops	= &blk_mq_sysfs_ops,
 	.default_attrs	= default_ctx_attrs,
-	.release	= blk_mq_sysfs_release,
+	.release	= blk_mq_ctx_sysfs_release,
 };

 static struct kobj_type blk_mq_hw_ktype = {
@@ -246,7 +258,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
 	if (!hctx->nr_ctx)
 		return 0;

-	ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num);
+	ret = kobject_add(&hctx->kobj, q->mq_kobj, "%u", hctx->queue_num);
 	if (ret)
 		return ret;

@@ -269,8 +281,8 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);

-	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
-	kobject_del(&q->mq_kobj);
+	kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
+	kobject_del(q->mq_kobj);
 	kobject_put(&dev->kobj);

 	q->mq_sysfs_init_done = false;
@@ -290,7 +302,7 @@ void blk_mq_sysfs_deinit(struct request_queue *q)
 		ctx = per_cpu_ptr(q->queue_ctx, cpu);
 		kobject_put(&ctx->kobj);
 	}
-	kobject_put(&q->mq_kobj);
+	kobject_put(q->mq_kobj);
 }

 void blk_mq_sysfs_init(struct request_queue *q)
@@ -298,10 +310,12 @@ void blk_mq_sysfs_init(struct request_queue *q)
 	struct blk_mq_ctx *ctx;
 	int cpu;

-	kobject_init(&q->mq_kobj, &blk_mq_ktype);
+	kobject_init(q->mq_kobj, &blk_mq_ktype);

 	for_each_possible_cpu(cpu) {
 		ctx = per_cpu_ptr(q->queue_ctx, cpu);
+
+		kobject_get(q->mq_kobj);
 		kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
 	}
 }
@@ -314,11 +328,11 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
 	WARN_ON_ONCE(!q->kobj.parent);
 	lockdep_assert_held(&q->sysfs_lock);

-	ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
+	ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
 	if (ret < 0)
 		goto out;

-	kobject_uevent(&q->mq_kobj, KOBJ_ADD);
+	kobject_uevent(q->mq_kobj, KOBJ_ADD);

 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_register_hctx(hctx);
@@ -335,8 +349,8 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
 	while (--i >= 0)
 		blk_mq_unregister_hctx(q->queue_hw_ctx[i]);

-	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
-	kobject_del(&q->mq_kobj);
+	kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
+	kobject_del(q->mq_kobj);
 	kobject_put(&dev->kobj);
 	return ret;
 }

--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2487,6 +2487,34 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
 	mutex_unlock(&set->tag_list_lock);
 }

+/* All allocations will be freed in release handler of q->mq_kobj */
+static int blk_mq_alloc_ctxs(struct request_queue *q)
+{
+	struct blk_mq_ctxs *ctxs;
+	int cpu;
+
+	ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
+	if (!ctxs)
+		return -ENOMEM;
+
+	ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
+	if (!ctxs->queue_ctx)
+		goto fail;
+
+	for_each_possible_cpu(cpu) {
+		struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
+		ctx->ctxs = ctxs;
+	}
+
+	q->mq_kobj = &ctxs->kobj;
+	q->queue_ctx = ctxs->queue_ctx;
+
+	return 0;
+ fail:
+	kfree(ctxs);
+	return -ENOMEM;
+}
+
 /*
 * It is the actual release handler for mq, but we do it from
 * request queue's release handler for avoiding use-after-free
@@ -2514,8 +2542,6 @@ void blk_mq_release(struct request_queue *q)
 	 * both share lifetime with request queue.
 	 */
 	blk_mq_sysfs_deinit(q);
-
-	free_percpu(q->queue_ctx);
 }

 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
@@ -2658,8 +2684,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	if (!q->poll_cb)
 		goto err_exit;

-	q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
-	if (!q->queue_ctx)
+	if (blk_mq_alloc_ctxs(q))
 		goto err_exit;

 	/* init q->mq_kobj and sw queues' kobjects */
@@ -2668,7 +2693,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	q->queue_hw_ctx = kcalloc_node(nr_cpu_ids, sizeof(*(q->queue_hw_ctx)),
 						GFP_KERNEL, set->numa_node);
 	if (!q->queue_hw_ctx)
-		goto err_percpu;
+		goto err_sys_init;

 	q->mq_map = set->mq_map;

@@ -2723,8 +2748,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,

 err_hctxs:
 	kfree(q->queue_hw_ctx);
-err_percpu:
-	free_percpu(q->queue_ctx);
+err_sys_init:
+	blk_mq_sysfs_deinit(q);
 err_exit:
 	q->mq_ops = NULL;
 	return ERR_PTR(-ENOMEM);

--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -7,6 +7,11 @@

 struct blk_mq_tag_set;

+struct blk_mq_ctxs {
+	struct kobject kobj;
+	struct blk_mq_ctx __percpu	*queue_ctx;
+};
+
 /**
 * struct blk_mq_ctx - State for a software queue facing the submitting CPUs
 */
@@ -27,6 +32,7 @@ struct blk_mq_ctx {
 	unsigned long		____cacheline_aligned_in_smp rq_completed[2];

 	struct request_queue	*queue;
+	struct blk_mq_ctxs      *ctxs;
 	struct kobject		kobj;
 } ____cacheline_aligned_in_smp;


--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -549,7 +549,7 @@ struct request_queue {
 	/*
 	 * mq queue kobject
 	 */
-	struct kobject mq_kobj;
+	struct kobject *mq_kobj;

 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct blk_integrity integrity;