提交 0f44f194 编写于 作者: J Jianchao Wang 提交者: Shile Zhang

blk-mq: realloc hctx when hw queue is mapped to another node

commit 34d11ffac1f56c3895dad32153abd6814452dc77 upstream.

When the hw queues and mq_map are updated, a hctx could be mapped
to a different numa node. At this moment, we need to realloc the
hctx. If fail to do that, go on using previous hctx.
Signed-off-by: NJianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
上级 1d5199ea
...@@ -2548,6 +2548,39 @@ static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) ...@@ -2548,6 +2548,39 @@ static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
return hw_ctx_size; return hw_ctx_size;
} }
static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
struct blk_mq_tag_set *set, struct request_queue *q,
int hctx_idx, int node)
{
struct blk_mq_hw_ctx *hctx;
hctx = kzalloc_node(blk_mq_hw_ctx_size(set),
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
node);
if (!hctx)
return NULL;
if (!zalloc_cpumask_var_node(&hctx->cpumask,
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
node)) {
kfree(hctx);
return NULL;
}
atomic_set(&hctx->nr_active, 0);
hctx->numa_node = node;
hctx->queue_num = hctx_idx;
if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) {
free_cpumask_var(hctx->cpumask);
kfree(hctx);
return NULL;
}
blk_mq_hctx_kobj_init(hctx);
return hctx;
}
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q) struct request_queue *q)
{ {
...@@ -2558,37 +2591,34 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, ...@@ -2558,37 +2591,34 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
mutex_lock(&q->sysfs_lock); mutex_lock(&q->sysfs_lock);
for (i = 0; i < set->nr_hw_queues; i++) { for (i = 0; i < set->nr_hw_queues; i++) {
int node; int node;
struct blk_mq_hw_ctx *hctx;
if (hctxs[i])
continue;
node = blk_mq_hw_queue_to_node(q->mq_map, i); node = blk_mq_hw_queue_to_node(q->mq_map, i);
hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set), /*
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, * If the hw queue has been mapped to another numa node,
node); * we need to realloc the hctx. If allocation fails, fallback
if (!hctxs[i]) * to use the previous one.
break; */
if (hctxs[i] && (hctxs[i]->numa_node == node))
if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, continue;
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
node)) {
kfree(hctxs[i]);
hctxs[i] = NULL;
break;
}
atomic_set(&hctxs[i]->nr_active, 0);
hctxs[i]->numa_node = node;
hctxs[i]->queue_num = i;
if (blk_mq_init_hctx(q, set, hctxs[i], i)) { hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
free_cpumask_var(hctxs[i]->cpumask); if (hctx) {
kfree(hctxs[i]); if (hctxs[i]) {
hctxs[i] = NULL; blk_mq_exit_hctx(q, set, hctxs[i], i);
break; kobject_put(&hctxs[i]->kobj);
}
hctxs[i] = hctx;
} else {
if (hctxs[i])
pr_warn("Allocate new hctx on node %d fails,\
fallback to previous one on node %d\n",
node, hctxs[i]->numa_node);
else
break;
} }
blk_mq_hctx_kobj_init(hctxs[i]);
} }
for (j = i; j < q->nr_hw_queues; j++) { for (j = i; j < q->nr_hw_queues; j++) {
struct blk_mq_hw_ctx *hctx = hctxs[j]; struct blk_mq_hw_ctx *hctx = hctxs[j];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册