提交 d783e0bd 编写于 作者: M Marta Rybczynska 提交者: Jens Axboe

nvme: avoid cqe corruption when update at the same time as read

Make sure the CQE phase (validity) is read before the rest of the
structure. The phase bit is the highest address and the CQE
read will happen on most platforms from lower to upper addresses
and will be done by multiple non-atomic loads. If the structure
is updated by PCI during the reads from the processor, the
processor may get a corrupted copy.

The addition of the new nvme_cqe_valid function that verifies
the validity bit also allows refactoring of the other CQE read
sequences.
Signed-off-by: NMarta Rybczynska <marta.rybczynska@kalray.eu>
Reviewed-by: NJohannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: NChristoph Hellwig <hch@lst.de>
Reviewed-by: NKeith Busch <keith.busch@intel.com>
Signed-off-by: NJens Axboe <axboe@fb.com>
上级 aaf25593
...@@ -723,6 +723,13 @@ static void nvme_complete_rq(struct request *req) ...@@ -723,6 +723,13 @@ static void nvme_complete_rq(struct request *req)
blk_mq_end_request(req, error); blk_mq_end_request(req, error);
} }
/* We read the CQE phase first to check if the rest of the entry is valid */
static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
u16 phase)
{
return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
}
static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
{ {
u16 head, phase; u16 head, phase;
...@@ -730,13 +737,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) ...@@ -730,13 +737,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
head = nvmeq->cq_head; head = nvmeq->cq_head;
phase = nvmeq->cq_phase; phase = nvmeq->cq_phase;
for (;;) { while (nvme_cqe_valid(nvmeq, head, phase)) {
struct nvme_completion cqe = nvmeq->cqes[head]; struct nvme_completion cqe = nvmeq->cqes[head];
u16 status = le16_to_cpu(cqe.status);
struct request *req; struct request *req;
if ((status & 1) != phase)
break;
if (++head == nvmeq->q_depth) { if (++head == nvmeq->q_depth) {
head = 0; head = 0;
phase = !phase; phase = !phase;
...@@ -767,7 +771,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) ...@@ -767,7 +771,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
memcpy(req->special, &cqe, sizeof(cqe)); memcpy(req->special, &cqe, sizeof(cqe));
blk_mq_complete_request(req, status >> 1); blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
} }
...@@ -808,18 +812,16 @@ static irqreturn_t nvme_irq(int irq, void *data) ...@@ -808,18 +812,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
static irqreturn_t nvme_irq_check(int irq, void *data) static irqreturn_t nvme_irq_check(int irq, void *data)
{ {
struct nvme_queue *nvmeq = data; struct nvme_queue *nvmeq = data;
struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head]; if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase) return IRQ_WAKE_THREAD;
return IRQ_NONE; return IRQ_NONE;
return IRQ_WAKE_THREAD;
} }
static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
{ {
struct nvme_queue *nvmeq = hctx->driver_data; struct nvme_queue *nvmeq = hctx->driver_data;
if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
nvmeq->cq_phase) {
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
__nvme_process_cq(nvmeq, &tag); __nvme_process_cq(nvmeq, &tag);
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部