提交 9ff28240 编写于 作者: M Ming Lei 提交者: Shile Zhang

blk-mq: not embed .mq_kobj and ctx->kobj into queue instance

commit 1db4909e76f64a85f4aaa187f0f683f5c85a471d upstream.

Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime
from block layer's view, actually they don't because userspace may
grab one kobject anytime via sysfs.

This patch fixes the issue by the following approach:

1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing
all ctxs

2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release
handler of .mq_kobj

3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that
.mq_kobj is always released after all ctxs are freed.

This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE
is enabled.
Reported-by: NGuenter Roeck <linux@roeck-us.net>
Cc: "jianchao.wang" <jianchao.w.wang@oracle.com>
Tested-by: NGuenter Roeck <linux@roeck-us.net>
Reviewed-by: NGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: NMing Lei <ming.lei@redhat.com>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
上级 4f3484ac
...@@ -16,6 +16,18 @@ ...@@ -16,6 +16,18 @@
static void blk_mq_sysfs_release(struct kobject *kobj) static void blk_mq_sysfs_release(struct kobject *kobj)
{ {
struct blk_mq_ctxs *ctxs = container_of(kobj, struct blk_mq_ctxs, kobj);
free_percpu(ctxs->queue_ctx);
kfree(ctxs);
}
static void blk_mq_ctx_sysfs_release(struct kobject *kobj)
{
struct blk_mq_ctx *ctx = container_of(kobj, struct blk_mq_ctx, kobj);
/* ctx->ctxs won't be released until all ctx are freed */
kobject_put(&ctx->ctxs->kobj);
} }
static void blk_mq_hw_sysfs_release(struct kobject *kobj) static void blk_mq_hw_sysfs_release(struct kobject *kobj)
...@@ -214,7 +226,7 @@ static struct kobj_type blk_mq_ktype = { ...@@ -214,7 +226,7 @@ static struct kobj_type blk_mq_ktype = {
static struct kobj_type blk_mq_ctx_ktype = { static struct kobj_type blk_mq_ctx_ktype = {
.sysfs_ops = &blk_mq_sysfs_ops, .sysfs_ops = &blk_mq_sysfs_ops,
.default_attrs = default_ctx_attrs, .default_attrs = default_ctx_attrs,
.release = blk_mq_sysfs_release, .release = blk_mq_ctx_sysfs_release,
}; };
static struct kobj_type blk_mq_hw_ktype = { static struct kobj_type blk_mq_hw_ktype = {
...@@ -246,7 +258,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) ...@@ -246,7 +258,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
if (!hctx->nr_ctx) if (!hctx->nr_ctx)
return 0; return 0;
ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num); ret = kobject_add(&hctx->kobj, q->mq_kobj, "%u", hctx->queue_num);
if (ret) if (ret)
return ret; return ret;
...@@ -269,8 +281,8 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q) ...@@ -269,8 +281,8 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
queue_for_each_hw_ctx(q, hctx, i) queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx); blk_mq_unregister_hctx(hctx);
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
kobject_del(&q->mq_kobj); kobject_del(q->mq_kobj);
kobject_put(&dev->kobj); kobject_put(&dev->kobj);
q->mq_sysfs_init_done = false; q->mq_sysfs_init_done = false;
...@@ -290,7 +302,7 @@ void blk_mq_sysfs_deinit(struct request_queue *q) ...@@ -290,7 +302,7 @@ void blk_mq_sysfs_deinit(struct request_queue *q)
ctx = per_cpu_ptr(q->queue_ctx, cpu); ctx = per_cpu_ptr(q->queue_ctx, cpu);
kobject_put(&ctx->kobj); kobject_put(&ctx->kobj);
} }
kobject_put(&q->mq_kobj); kobject_put(q->mq_kobj);
} }
void blk_mq_sysfs_init(struct request_queue *q) void blk_mq_sysfs_init(struct request_queue *q)
...@@ -298,10 +310,12 @@ void blk_mq_sysfs_init(struct request_queue *q) ...@@ -298,10 +310,12 @@ void blk_mq_sysfs_init(struct request_queue *q)
struct blk_mq_ctx *ctx; struct blk_mq_ctx *ctx;
int cpu; int cpu;
kobject_init(&q->mq_kobj, &blk_mq_ktype); kobject_init(q->mq_kobj, &blk_mq_ktype);
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
ctx = per_cpu_ptr(q->queue_ctx, cpu); ctx = per_cpu_ptr(q->queue_ctx, cpu);
kobject_get(q->mq_kobj);
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype); kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
} }
} }
...@@ -314,11 +328,11 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q) ...@@ -314,11 +328,11 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
WARN_ON_ONCE(!q->kobj.parent); WARN_ON_ONCE(!q->kobj.parent);
lockdep_assert_held(&q->sysfs_lock); lockdep_assert_held(&q->sysfs_lock);
ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
if (ret < 0) if (ret < 0)
goto out; goto out;
kobject_uevent(&q->mq_kobj, KOBJ_ADD); kobject_uevent(q->mq_kobj, KOBJ_ADD);
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx); ret = blk_mq_register_hctx(hctx);
...@@ -335,8 +349,8 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q) ...@@ -335,8 +349,8 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
while (--i >= 0) while (--i >= 0)
blk_mq_unregister_hctx(q->queue_hw_ctx[i]); blk_mq_unregister_hctx(q->queue_hw_ctx[i]);
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
kobject_del(&q->mq_kobj); kobject_del(q->mq_kobj);
kobject_put(&dev->kobj); kobject_put(&dev->kobj);
return ret; return ret;
} }
......
...@@ -2487,6 +2487,34 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, ...@@ -2487,6 +2487,34 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
mutex_unlock(&set->tag_list_lock); mutex_unlock(&set->tag_list_lock);
} }
/* All allocations will be freed in release handler of q->mq_kobj */
static int blk_mq_alloc_ctxs(struct request_queue *q)
{
struct blk_mq_ctxs *ctxs;
int cpu;
ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
if (!ctxs)
return -ENOMEM;
ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
if (!ctxs->queue_ctx)
goto fail;
for_each_possible_cpu(cpu) {
struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
ctx->ctxs = ctxs;
}
q->mq_kobj = &ctxs->kobj;
q->queue_ctx = ctxs->queue_ctx;
return 0;
fail:
kfree(ctxs);
return -ENOMEM;
}
/* /*
* It is the actual release handler for mq, but we do it from * It is the actual release handler for mq, but we do it from
* request queue's release handler for avoiding use-after-free * request queue's release handler for avoiding use-after-free
...@@ -2514,8 +2542,6 @@ void blk_mq_release(struct request_queue *q) ...@@ -2514,8 +2542,6 @@ void blk_mq_release(struct request_queue *q)
* both share lifetime with request queue. * both share lifetime with request queue.
*/ */
blk_mq_sysfs_deinit(q); blk_mq_sysfs_deinit(q);
free_percpu(q->queue_ctx);
} }
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
...@@ -2658,8 +2684,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -2658,8 +2684,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
if (!q->poll_cb) if (!q->poll_cb)
goto err_exit; goto err_exit;
q->queue_ctx = alloc_percpu(struct blk_mq_ctx); if (blk_mq_alloc_ctxs(q))
if (!q->queue_ctx)
goto err_exit; goto err_exit;
/* init q->mq_kobj and sw queues' kobjects */ /* init q->mq_kobj and sw queues' kobjects */
...@@ -2668,7 +2693,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -2668,7 +2693,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
q->queue_hw_ctx = kcalloc_node(nr_cpu_ids, sizeof(*(q->queue_hw_ctx)), q->queue_hw_ctx = kcalloc_node(nr_cpu_ids, sizeof(*(q->queue_hw_ctx)),
GFP_KERNEL, set->numa_node); GFP_KERNEL, set->numa_node);
if (!q->queue_hw_ctx) if (!q->queue_hw_ctx)
goto err_percpu; goto err_sys_init;
q->mq_map = set->mq_map; q->mq_map = set->mq_map;
...@@ -2723,8 +2748,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -2723,8 +2748,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
err_hctxs: err_hctxs:
kfree(q->queue_hw_ctx); kfree(q->queue_hw_ctx);
err_percpu: err_sys_init:
free_percpu(q->queue_ctx); blk_mq_sysfs_deinit(q);
err_exit: err_exit:
q->mq_ops = NULL; q->mq_ops = NULL;
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
......
...@@ -7,6 +7,11 @@ ...@@ -7,6 +7,11 @@
struct blk_mq_tag_set; struct blk_mq_tag_set;
struct blk_mq_ctxs {
struct kobject kobj;
struct blk_mq_ctx __percpu *queue_ctx;
};
/** /**
* struct blk_mq_ctx - State for a software queue facing the submitting CPUs * struct blk_mq_ctx - State for a software queue facing the submitting CPUs
*/ */
...@@ -27,6 +32,7 @@ struct blk_mq_ctx { ...@@ -27,6 +32,7 @@ struct blk_mq_ctx {
unsigned long ____cacheline_aligned_in_smp rq_completed[2]; unsigned long ____cacheline_aligned_in_smp rq_completed[2];
struct request_queue *queue; struct request_queue *queue;
struct blk_mq_ctxs *ctxs;
struct kobject kobj; struct kobject kobj;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
......
...@@ -549,7 +549,7 @@ struct request_queue { ...@@ -549,7 +549,7 @@ struct request_queue {
/* /*
* mq queue kobject * mq queue kobject
*/ */
struct kobject mq_kobj; struct kobject *mq_kobj;
#ifdef CONFIG_BLK_DEV_INTEGRITY #ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity integrity; struct blk_integrity integrity;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册