From a76a37777f2c936b1f046bfc0c5982c958b16bfe Mon Sep 17 00:00:00 2001
From: Zhou Wang <wangzhou1@hisilicon.com>
Date: Mon, 28 Sep 2020 16:32:02 +0800
Subject: [PATCH] iommu/arm-smmu-v3: Ensure queue is read after updating prod
 pointer

Reading the 'prod' MMIO register in order to determine whether or not
there is valid data beyond 'cons' for a given queue does not provide
sufficient dependency ordering, as the resulting access is address
dependent only on 'cons' and can therefore be speculated ahead of time,
potentially allowing stale data to be read by the CPU.

Use readl() instead of readl_relaxed() when updating the shadow copy of
the 'prod' pointer, so that all speculated memory reads from the
corresponding queue can occur only from valid slots.

Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
Link: https://lore.kernel.org/r/1601281922-117296-1-git-send-email-wangzhou1@hisilicon.com
[will: Use readl() instead of explicit barrier. Update 'cons' side to match.]
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/barrier.h            |  1 +
 arch/arm64/include/asm/io.h                 |  1 +
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 11 +++++++++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index fb4c27506ef4..c3009b0e5239 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -45,6 +45,7 @@
 #define rmb()		dsb(ld)
 #define wmb()		dsb(st)
 
+#define dma_mb()	dmb(osh)
 #define dma_rmb()	dmb(oshld)
 #define dma_wmb()	dmb(oshst)
 
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index ff50dd731852..fd172c41df90 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -110,6 +110,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
 
 #define __io_par(v)		__iormb(v)
 #define __iowmb()		dma_wmb()
+#define __iomb()		dma_mb()
 
 /*
  * Relaxed I/O memory access primitives. These follow the Device memory
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 3063aeb38063..1ffec08d345a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -810,7 +810,7 @@ static void queue_sync_cons_out(struct arm_smmu_queue *q)
 	 * Ensure that all CPU accesses (reads and writes) to the queue
 	 * are complete before we update the cons pointer.
 	 */
-	mb();
+	__iomb();
 	writel_relaxed(q->llq.cons, q->cons_reg);
 }
 
@@ -822,8 +822,15 @@ static void queue_inc_cons(struct arm_smmu_ll_queue *q)
 
 static int queue_sync_prod_in(struct arm_smmu_queue *q)
 {
+	u32 prod;
 	int ret = 0;
-	u32 prod = readl_relaxed(q->prod_reg);
+
+	/*
+	 * We can't use the _relaxed() variant here, as we must prevent
+	 * speculative reads of the queue before we have determined that
+	 * prod has indeed moved.
+	 */
+	prod = readl(q->prod_reg);
 
 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
 		ret = -EOVERFLOW;
-- 
GitLab