提交 bc74bf5c 编写于 作者: S Sagi Grimberg 提交者: Yang Yingliang

nvme-rdma: fix possible use-after-free in connect error flow

mainline inclusion
from mainline-v5.3-rc5
commit d94211b8
category: bugfix
bugzilla: NA
CVE: NA
Link: https://gitee.com/openeuler/kernel/issues/I1WGZE

-------------------------------------------------

When start_queue fails, we need to make sure to drain the
queue cq before freeing the rdma resources because we might
still race with the completion path. Have start_queue() error
path safely stop the queue.

--
[30371.808111] nvme nvme1: Failed reconnect attempt 11
[30371.808113] nvme nvme1: Reconnecting in 10 seconds...
[...]
[30382.069315] nvme nvme1: creating 4 I/O queues.
[30382.257058] nvme nvme1: Connect Invalid SQE Parameter, qid 4
[30382.257061] nvme nvme1: failed to connect queue: 4 ret=386
[30382.305001] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
[30382.305022] IP: qedr_poll_cq+0x8a3/0x1170 [qedr]
[30382.305028] PGD 0 P4D 0
[30382.305037] Oops: 0000 [#1] SMP PTI
[...]
[30382.305153] Call Trace:
[30382.305166]  ? __switch_to_asm+0x34/0x70
[30382.305187]  __ib_process_cq+0x56/0xd0 [ib_core]
[30382.305201]  ib_poll_handler+0x26/0x70 [ib_core]
[30382.305213]  irq_poll_softirq+0x88/0x110
[30382.305223]  ? sort_range+0x20/0x20
[30382.305232]  __do_softirq+0xde/0x2c6
[30382.305241]  ? sort_range+0x20/0x20
[30382.305249]  run_ksoftirqd+0x1c/0x60
[30382.305258]  smpboot_thread_fn+0xef/0x160
[30382.305265]  kthread+0x113/0x130
[30382.305273]  ? kthread_create_worker_on_cpu+0x50/0x50
[30382.305281]  ret_from_fork+0x35/0x40
--
Reported-by: NNicolas Morey-Chaisemartin <NMoreyChaisemartin@suse.com>
Reviewed-by: NMax Gurtovoy <maxg@mellanox.com>
Reviewed-by: NHannes Reinecke <hare@suse.com>
Signed-off-by: NSagi Grimberg <sagi@grimberg.me>
Reviewed-by: NChao Leng <lengchao@huawei.com>
Reviewed-by: NJike Cheng <chengjike.cheng@huawei.com>
Conflicts:
  drivers/nvme/host/rdma.c
[lrz: get queue pointer to apply patch]
Signed-off-by: NRuozhu Li <liruozhu@huawei.com>
Signed-off-by: NLijie <lijie34@huawei.com>
Reviewed-by: NTao Hou <houtao1@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 fe309f57
...@@ -547,13 +547,18 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, ...@@ -547,13 +547,18 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
return ret; return ret;
} }
static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
rdma_disconnect(queue->cm_id);
ib_drain_qp(queue->qp);
}
static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{ {
if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
return; return;
rdma_disconnect(queue->cm_id); __nvme_rdma_stop_queue(queue);
ib_drain_qp(queue->qp);
} }
static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
...@@ -583,6 +588,7 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl) ...@@ -583,6 +588,7 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
{ {
struct nvme_rdma_queue *queue = &ctrl->queues[idx];
int ret; int ret;
if (idx) if (idx)
...@@ -590,11 +596,13 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) ...@@ -590,11 +596,13 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
else else
ret = nvmf_connect_admin_queue(&ctrl->ctrl); ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (!ret) if (!ret) {
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[idx].flags); set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
else } else {
__nvme_rdma_stop_queue(queue);
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"failed to connect queue: %d ret=%d\n", idx, ret); "failed to connect queue: %d ret=%d\n", idx, ret);
}
return ret; return ret;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册