From 1f4a7c0c507b97b8ef570c1ffa0c96b9c47a6b3f Mon Sep 17 00:00:00 2001
From: Lemover <1773908404@qq.com>
Date: Mon, 25 Jul 2022 14:30:36 +0800
Subject: [PATCH] l2tlb: add assert that do not allow duplicate mem access
 (#1669)

Add bypassed logic of refill logic, to prevent duplicate mem access due to wrongly miss.
Not actually forward the data, just check if vpn hit and re-access the page cache.

Add some asserts to prevent duplicate mem access. These assert maybe wrongly triggered in some corner case.

* l2tlb: add assert that do not allow duplicate mem access

* l2tlb: change dup mem access assert to dup mem resp assert

* l2tlb: when refill after access page cache, re-access page cache

* l2tlb: fix assert signal that wrong assigned

* l2tlb: store empty super page to sp entries

* l2tlb: fix bug that lost req due to bypassed req not enq mq

* l2tlb: fix bug that lost req due to bypassed req not enq mq

* l2tlb: fix bug of cache resp ready logic

* l2tlb.cache: fix bug of vpn bypass match

* l2tlb.cache: fix bug of vs anticipate into hit check
---
 .../scala/xiangshan/cache/mmu/L2TLB.scala     | 28 +++++++++---
 .../scala/xiangshan/cache/mmu/MMUBundle.scala | 10 ++++-
 .../xiangshan/cache/mmu/PageTableCache.scala  | 43 +++++++++++++++----
 3 files changed, 67 insertions(+), 14 deletions(-)

diff --git a/src/main/scala/xiangshan/cache/mmu/L2TLB.scala b/src/main/scala/xiangshan/cache/mmu/L2TLB.scala
index 44647da06..5819246a7 100644
--- a/src/main/scala/xiangshan/cache/mmu/L2TLB.scala
+++ b/src/main/scala/xiangshan/cache/mmu/L2TLB.scala
@@ -127,7 +127,7 @@ class L2TLBImp(outer: L2TLB)(implicit p: Parameters) extends PtwModule(outer) wi
   val LLPTWARB_CACHE=0
   val LLPTWARB_PTW=1
   val llptw_arb = Module(new Arbiter(new LLPTWInBundle, 2))
-  llptw_arb.io.in(LLPTWARB_CACHE).valid := cache.io.resp.valid && !cache.io.resp.bits.hit && cache.io.resp.bits.toFsm.l2Hit
+  llptw_arb.io.in(LLPTWARB_CACHE).valid := cache.io.resp.valid && !cache.io.resp.bits.hit && cache.io.resp.bits.toFsm.l2Hit && !cache.io.resp.bits.bypassed
   llptw_arb.io.in(LLPTWARB_CACHE).bits.req_info := cache.io.resp.bits.req_info
   llptw_arb.io.in(LLPTWARB_CACHE).bits.ppn := cache.io.resp.bits.toFsm.ppn
   llptw_arb.io.in(LLPTWARB_PTW) <> ptw.io.llptw
@@ -139,21 +139,24 @@ class L2TLBImp(outer: L2TLB)(implicit p: Parameters) extends PtwModule(outer) wi
   cache.io.req.bits.req_info.vpn := arb2.io.out.bits.vpn
   cache.io.req.bits.req_info.source := arb2.io.out.bits.source
   cache.io.req.bits.isFirst := arb2.io.chosen =/= InArbMissQueuePort.U
+  cache.io.req.bits.bypassed.map(_ := false.B)
   cache.io.sfence := sfence
   cache.io.csr := csr
   cache.io.resp.ready := Mux(cache.io.resp.bits.hit,
     outReady(cache.io.resp.bits.req_info.source, outArbCachePort),
-    Mux(cache.io.resp.bits.toFsm.l2Hit, llptw_arb.io.in(LLPTWARB_CACHE).ready,
-    missQueue.io.in.ready || ptw.io.req.ready))
+    Mux(cache.io.resp.bits.toFsm.l2Hit && !cache.io.resp.bits.bypassed, llptw_arb.io.in(LLPTWARB_CACHE).ready,
+    Mux(cache.io.resp.bits.bypassed, missQueue.io.in.ready, missQueue.io.in.ready || ptw.io.req.ready)))
 
   missQueue.io.in.valid := cache.io.resp.valid && !cache.io.resp.bits.hit &&
-    !cache.io.resp.bits.toFsm.l2Hit && !from_pre(cache.io.resp.bits.req_info.source) && !ptw.io.req.ready
+    (!cache.io.resp.bits.toFsm.l2Hit || cache.io.resp.bits.bypassed) &&
+    !from_pre(cache.io.resp.bits.req_info.source) &&
+    (cache.io.resp.bits.bypassed || !ptw.io.req.ready)
   missQueue.io.in.bits := cache.io.resp.bits.req_info
   missQueue.io.sfence  := sfence
   missQueue.io.csr := csr
 
   // NOTE: missQueue req has higher priority
-  ptw.io.req.valid := cache.io.resp.valid && !cache.io.resp.bits.hit && !cache.io.resp.bits.toFsm.l2Hit
+  ptw.io.req.valid := cache.io.resp.valid && !cache.io.resp.bits.hit && !cache.io.resp.bits.toFsm.l2Hit && !cache.io.resp.bits.bypassed
   ptw.io.req.bits.req_info := cache.io.resp.bits.req_info
   ptw.io.req.bits.l1Hit := cache.io.resp.bits.toFsm.l1Hit
   ptw.io.req.bits.ppn := cache.io.resp.bits.toFsm.ppn
@@ -191,6 +194,21 @@ class L2TLBImp(outer: L2TLB)(implicit p: Parameters) extends PtwModule(outer) wi
   mem_arb.io.in(1) <> llptw_mem.req
   mem_arb.io.out.ready := mem.a.ready && !flush
 
+  // assert, should not send mem access at same addr for twice.
+  val last_resp_vpn = RegEnable(cache.io.refill.bits.req_info.vpn, cache.io.refill.valid)
+  val last_resp_level = RegEnable(cache.io.refill.bits.level, cache.io.refill.valid)
+  val last_resp_v = RegInit(false.B)
+  val last_has_invalid = !Cat(cache.io.refill.bits.ptes.asTypeOf(Vec(blockBits/XLEN, UInt(XLEN.W))).map(a => a(0))).andR
+  when (cache.io.refill.valid) { last_resp_v := !last_has_invalid}
+  when (flush) { last_resp_v := false.B }
+  XSError(last_resp_v && cache.io.refill.valid &&
+    (cache.io.refill.bits.req_info.vpn === last_resp_vpn) &&
+    (cache.io.refill.bits.level === last_resp_level),
+    "l2tlb should not access mem at same addr for twice")
+  // ATTENTION: this may wronngly assert when: a ptes is l2, last part is valid,
+  // but the current part is invalid, so one more mem access happened
+  // If this happened, remove the assert.
+
   val req_addr_low = Reg(Vec(MemReqWidth, UInt((log2Up(l2tlbParams.blockBytes)-log2Up(XLEN/8)).W)))
 
   when (llptw.io.in.fire()) {
diff --git a/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala b/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala
index 89aae7428..c8712aeba 100644
--- a/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala
+++ b/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala
@@ -578,6 +578,7 @@ class PtwEntry(tagLen: Int, hasPerm: Boolean = false, hasLevel: Boolean = false)
 
 class PtwEntries(num: Int, tagLen: Int, level: Int, hasPerm: Boolean)(implicit p: Parameters) extends PtwBundle {
   require(log2Up(num)==log2Down(num))
+  // NOTE: hasPerm means that is leaf or not.
 
   val tag  = UInt(tagLen.W)
   val asid = UInt(asidLen.W)
@@ -586,6 +587,13 @@ class PtwEntries(num: Int, tagLen: Int, level: Int, hasPerm: Boolean)(implicit p
   val perms = if (hasPerm) Some(Vec(num, new PtePermBundle)) else None
   val prefetch = Bool()
   // println(s"PtwEntries: tag:1*${tagLen} ppns:${num}*${ppnLen} vs:${num}*1")
+  // NOTE: vs is used for different usage:
+  // for l3, which store the leaf(leaves), vs is page fault or not.
+  // for l2, which shoule not store leaf, vs is valid or not, that will anticipate in hit check
+  // Because, l2 should not store leaf(no perm), it doesn't store perm.
+  // If l2 hit a leaf, the perm is still unavailble. Should still page walk. Complex but nothing helpful.
+  // TODO: divide vs into validVec and pfVec
+  // for l2: may valid but pf, so no need for page walk, return random pte with pf.
 
   def tagClip(vpn: UInt) = {
     require(vpn.getWidth == vpnLen)
@@ -598,7 +606,7 @@ class PtwEntries(num: Int, tagLen: Int, level: Int, hasPerm: Boolean)(implicit p
 
   def hit(vpn: UInt, asid: UInt, ignoreAsid: Boolean = false) = {
     val asid_hit = if (ignoreAsid) true.B else (this.asid === asid)
-    asid_hit && tag === tagClip(vpn) && vs(sectorIdxClip(vpn, level)) // TODO: optimize this. don't need to compare each with tag
+    asid_hit && tag === tagClip(vpn) && (if (hasPerm) true.B else vs(sectorIdxClip(vpn, level)))
   }
 
   def genEntries(vpn: UInt, asid: UInt, data: UInt, levelUInt: UInt, prefetch: Bool) = {
diff --git a/src/main/scala/xiangshan/cache/mmu/PageTableCache.scala b/src/main/scala/xiangshan/cache/mmu/PageTableCache.scala
index f7fcef48a..dbe0e1000 100644
--- a/src/main/scala/xiangshan/cache/mmu/PageTableCache.scala
+++ b/src/main/scala/xiangshan/cache/mmu/PageTableCache.scala
@@ -63,6 +63,7 @@ class PageCacheRespBundle(implicit p: Parameters) extends PtwBundle {
 class PtwCacheReq(implicit p: Parameters) extends PtwBundle {
   val req_info = new L2TlbInnerBundle()
   val isFirst = Bool()
+  val bypassed = Vec(3, Bool())
 }
 
 class PtwCacheIO()(implicit p: Parameters) extends MMUIOBaseBundle with HasPtwConst {
@@ -72,6 +73,7 @@ class PtwCacheIO()(implicit p: Parameters) extends MMUIOBaseBundle with HasPtwCo
     val isFirst = Bool()
     val hit = Bool()
     val prefetch = Bool() // is the entry fetched by prefetch
+    val bypassed = Bool()
     val toFsm = new Bundle {
       val l1Hit = Bool()
       val l2Hit = Bool()
@@ -113,9 +115,9 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with
   val stageResp = Wire(Decoupled(new PtwCacheReq()))         // deq stage
   stageReq <> io.req
   PipelineConnect(stageReq, stageDelay(0), stageDelay(1).ready, flush, rwHarzad)
-  InsideStageConnect(stageDelay(0), stageDelay(1))
+  InsideStageConnect(stageDelay(0), stageDelay(1), stageReq.fire)
   PipelineConnect(stageDelay(1), stageCheck(0), stageCheck(1).ready, flush)
-  InsideStageConnect(stageCheck(0), stageCheck(1))
+  InsideStageConnect(stageCheck(0), stageCheck(1), stageDelay(1).fire)
   PipelineConnect(stageCheck(1), stageResp, io.resp.ready, flush)
   stageResp.ready := !stageResp.valid || io.resp.ready
 
@@ -193,14 +195,25 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with
   val stageCheck_valid_1cycle = OneCycleValid(stageDelay(1).fire, flush) // replace & perf counter
   val stageResp_valid_1cycle = OneCycleValid(stageCheck(1).fire, flush)  // ecc flush
 
+
+  def vpn_match(vpn1: UInt, vpn2: UInt, level: Int) = {
+    vpn1(vpnnLen*3-1, vpnnLen*(2-level)+3) === vpn2(vpnnLen*3-1, vpnnLen*(2-level)+3)
+  }
+  // NOTE: not actually bypassed, just check if hit, re-access the page cache
+  def refill_bypass(vpn: UInt, level: Int) = {
+    io.refill.valid && (level.U === io.refill.bits.level) && vpn_match(io.refill.bits.req_info.vpn, vpn, level),
+  }
+
   // l1
   val ptwl1replace = ReplacementPolicy.fromString(l2tlbParams.l1Replacer, l2tlbParams.l1Size)
   val (l1Hit, l1HitPPN, l1Pre) = {
     val hitVecT = l1.zipWithIndex.map { case (e, i) => e.hit(stageReq.bits.req_info.vpn, io.csr.satp.asid) && l1v(i) }
     val hitVec = hitVecT.map(RegEnable(_, stageReq.fire))
-    val hitPPN = ParallelPriorityMux(hitVec zip l1.map(_.ppn))
-    val hitPre = ParallelPriorityMux(hitVec zip l1.map(_.prefetch))
-    val hit = ParallelOR(hitVec)
+
+    // stageDelay, but check for l1
+    val hitPPN = DataHoldBypass(ParallelMux(hitVec zip l1.map(_.ppn)), stageDelay_valid_1cycle)
+    val hitPre = DataHoldBypass(ParallelMux(hitVec zip l1.map(_.prefetch)), stageDelay_valid_1cycle)
+    val hit = DataHoldBypass(ParallelOR(hitVec), stageDelay_valid_1cycle)
 
     when (hit && stageDelay_valid_1cycle) { ptwl1replace.access(OHToUInt(hitVec)) }
 
@@ -272,6 +285,7 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with
     // delay one cycle after sram read
     val data_resp = DataHoldBypass(l3.io.r.resp.data, stageDelay_valid_1cycle)
     val vVec_delay = DataHoldBypass(getl3vSet(stageDelay(0).bits.req_info.vpn), stageDelay_valid_1cycle)
+    val bypass_delay = DataHoldBypass(refill_bypass(stageDelay(0).bits.req_info.vpn, 2), stageDelay_valid_1cycle || io.refill.valid)
 
     // check hit and ecc
     val check_vpn = stageCheck(0).bits.req_info.vpn
@@ -305,6 +319,7 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with
   }
   val l3HitPPN = l3HitData.ppns(genPtwL3SectorIdx(stageCheck(0).bits.req_info.vpn))
   val l3HitPerm = l3HitData.perms.getOrElse(0.U.asTypeOf(Vec(PtwL3SectorSize, new PtePermBundle)))(genPtwL3SectorIdx(stageCheck(0).bits.req_info.vpn))
+  val l3HitValid = l3HitData.vs(genPtwL3SectorIdx(stageCheck(0).bits.req_info.vpn))
 
   // super page
   val spreplace = ReplacementPolicy.fromString(l2tlbParams.spReplacer, l2tlbParams.spSize)
@@ -336,16 +351,24 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with
   val check_res = Wire(new PageCacheRespBundle)
   check_res.l1.apply(l1Hit, l1Pre, l1HitPPN)
   check_res.l2.apply(l2Hit, l2Pre, l2HitPPN, ecc = l2eccError)
-  check_res.l3.apply(l3Hit, l3Pre, l3HitPPN, l3HitPerm, l3eccError)
+  check_res.l3.apply(l3Hit, l3Pre, l3HitPPN, l3HitPerm, l3eccError, valid = l3HitValid)
   check_res.sp.apply(spHit, spPre, spHitData.ppn, spHitPerm, false.B, spHitLevel, spValid)
 
-  // stage3, add stage 3 for ecc check...
   val resp_res = Reg(new PageCacheRespBundle)
   when (stageCheck(1).fire) { resp_res := check_res }
 
+  // stageResp bypass
+  val bypassed = Wire(Vec(3, Bool()))
+  bypassed.indices.foreach(i =>
+    bypassed(i) := stageResp.bits.bypassed(i) ||
+      ValidHoldBypass(refill_bypass(stageResp.bits.req_info.vpn, i),
+        OneCycleValid(stageCheck(1).fire, false.B) || io.refill.valid)
+  )
+
   io.resp.bits.req_info   := stageResp.bits.req_info
   io.resp.bits.isFirst  := stageResp.bits.isFirst
   io.resp.bits.hit      := resp_res.l3.hit || resp_res.sp.hit
+  io.resp.bits.bypassed := bypassed(2) || (bypassed(1) && !resp_res.l2.hit) || (bypassed(0) && !resp_res.l1.hit)
   io.resp.bits.prefetch := resp_res.l3.pre && resp_res.l3.hit || resp_res.sp.pre && resp_res.sp.hit
   io.resp.bits.toFsm.l1Hit := resp_res.l1.hit
   io.resp.bits.toFsm.l2Hit := resp_res.l2.hit
@@ -359,6 +382,7 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with
   io.resp.bits.toTlb.v := Mux(resp_res.sp.hit, resp_res.sp.v, resp_res.l3.v)
   io.resp.valid := stageResp.valid
   XSError(stageResp.valid && resp_res.l3.hit && resp_res.sp.hit, "normal page and super page both hit")
+  XSError(stageResp.valid && io.resp.bits.hit && bypassed(2), "page cache, bypassed but hit")
 
   // refill Perf
   val l1RefillPerf = Wire(Vec(l2tlbParams.l1Size, Bool()))
@@ -582,10 +606,13 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with
     }
   }
 
-  def InsideStageConnect[T <:Data](in: DecoupledIO[T], out: DecoupledIO[T], block: Bool = false.B): Unit = {
+  def InsideStageConnect(in: DecoupledIO[PtwCacheReq], out: DecoupledIO[PtwCacheReq], InFire: Bool): Unit = {
     in.ready := !in.valid || out.ready
     out.valid := in.valid
     out.bits := in.bits
+    out.bits.bypassed.zip(in.bits.bypassed).zipWithIndex.map{ case (b, i) =>
+      b._1 := b._2 || DataHoldBypass(refill_bypass(in.bits.req_info.vpn, i), OneCycleValid(InFire, false.B) || io.refill.valid)
+    }
   }
 
   // Perf Count
-- 
GitLab