From 1f4a7c0c507b97b8ef570c1ffa0c96b9c47a6b3f Mon Sep 17 00:00:00 2001 From: Lemover <1773908404@qq.com> Date: Mon, 25 Jul 2022 14:30:36 +0800 Subject: [PATCH] l2tlb: add assert that do not allow duplicate mem access (#1669) Add bypassed logic of refill logic, to prevent duplicate mem access due to wrongly miss. Not actually forward the data, just check if vpn hit and re-access the page cache. Add some asserts to prevent duplicate mem access. These assert maybe wrongly triggered in some corner case. * l2tlb: add assert that do not allow duplicate mem access * l2tlb: change dup mem access assert to dup mem resp assert * l2tlb: when refill after access page cache, re-access page cache * l2tlb: fix assert signal that wrong assigned * l2tlb: store empty super page to sp entries * l2tlb: fix bug that lost req due to bypassed req not enq mq * l2tlb: fix bug that lost req due to bypassed req not enq mq * l2tlb: fix bug of cache resp ready logic * l2tlb.cache: fix bug of vpn bypass match * l2tlb.cache: fix bug of vs anticipate into hit check --- .../scala/xiangshan/cache/mmu/L2TLB.scala | 28 +++++++++--- .../scala/xiangshan/cache/mmu/MMUBundle.scala | 10 ++++- .../xiangshan/cache/mmu/PageTableCache.scala | 43 +++++++++++++++---- 3 files changed, 67 insertions(+), 14 deletions(-) diff --git a/src/main/scala/xiangshan/cache/mmu/L2TLB.scala b/src/main/scala/xiangshan/cache/mmu/L2TLB.scala index 44647da06..5819246a7 100644 --- a/src/main/scala/xiangshan/cache/mmu/L2TLB.scala +++ b/src/main/scala/xiangshan/cache/mmu/L2TLB.scala @@ -127,7 +127,7 @@ class L2TLBImp(outer: L2TLB)(implicit p: Parameters) extends PtwModule(outer) wi val LLPTWARB_CACHE=0 val LLPTWARB_PTW=1 val llptw_arb = Module(new Arbiter(new LLPTWInBundle, 2)) - llptw_arb.io.in(LLPTWARB_CACHE).valid := cache.io.resp.valid && !cache.io.resp.bits.hit && cache.io.resp.bits.toFsm.l2Hit + llptw_arb.io.in(LLPTWARB_CACHE).valid := cache.io.resp.valid && !cache.io.resp.bits.hit && cache.io.resp.bits.toFsm.l2Hit && !cache.io.resp.bits.bypassed llptw_arb.io.in(LLPTWARB_CACHE).bits.req_info := cache.io.resp.bits.req_info llptw_arb.io.in(LLPTWARB_CACHE).bits.ppn := cache.io.resp.bits.toFsm.ppn llptw_arb.io.in(LLPTWARB_PTW) <> ptw.io.llptw @@ -139,21 +139,24 @@ class L2TLBImp(outer: L2TLB)(implicit p: Parameters) extends PtwModule(outer) wi cache.io.req.bits.req_info.vpn := arb2.io.out.bits.vpn cache.io.req.bits.req_info.source := arb2.io.out.bits.source cache.io.req.bits.isFirst := arb2.io.chosen =/= InArbMissQueuePort.U + cache.io.req.bits.bypassed.map(_ := false.B) cache.io.sfence := sfence cache.io.csr := csr cache.io.resp.ready := Mux(cache.io.resp.bits.hit, outReady(cache.io.resp.bits.req_info.source, outArbCachePort), - Mux(cache.io.resp.bits.toFsm.l2Hit, llptw_arb.io.in(LLPTWARB_CACHE).ready, - missQueue.io.in.ready || ptw.io.req.ready)) + Mux(cache.io.resp.bits.toFsm.l2Hit && !cache.io.resp.bits.bypassed, llptw_arb.io.in(LLPTWARB_CACHE).ready, + Mux(cache.io.resp.bits.bypassed, missQueue.io.in.ready, missQueue.io.in.ready || ptw.io.req.ready))) missQueue.io.in.valid := cache.io.resp.valid && !cache.io.resp.bits.hit && - !cache.io.resp.bits.toFsm.l2Hit && !from_pre(cache.io.resp.bits.req_info.source) && !ptw.io.req.ready + (!cache.io.resp.bits.toFsm.l2Hit || cache.io.resp.bits.bypassed) && + !from_pre(cache.io.resp.bits.req_info.source) && + (cache.io.resp.bits.bypassed || !ptw.io.req.ready) missQueue.io.in.bits := cache.io.resp.bits.req_info missQueue.io.sfence := sfence missQueue.io.csr := csr // NOTE: missQueue req has higher priority - ptw.io.req.valid := cache.io.resp.valid && !cache.io.resp.bits.hit && !cache.io.resp.bits.toFsm.l2Hit + ptw.io.req.valid := cache.io.resp.valid && !cache.io.resp.bits.hit && !cache.io.resp.bits.toFsm.l2Hit && !cache.io.resp.bits.bypassed ptw.io.req.bits.req_info := cache.io.resp.bits.req_info ptw.io.req.bits.l1Hit := cache.io.resp.bits.toFsm.l1Hit ptw.io.req.bits.ppn := cache.io.resp.bits.toFsm.ppn @@ -191,6 +194,21 @@ class L2TLBImp(outer: L2TLB)(implicit p: Parameters) extends PtwModule(outer) wi mem_arb.io.in(1) <> llptw_mem.req mem_arb.io.out.ready := mem.a.ready && !flush + // assert, should not send mem access at same addr for twice. + val last_resp_vpn = RegEnable(cache.io.refill.bits.req_info.vpn, cache.io.refill.valid) + val last_resp_level = RegEnable(cache.io.refill.bits.level, cache.io.refill.valid) + val last_resp_v = RegInit(false.B) + val last_has_invalid = !Cat(cache.io.refill.bits.ptes.asTypeOf(Vec(blockBits/XLEN, UInt(XLEN.W))).map(a => a(0))).andR + when (cache.io.refill.valid) { last_resp_v := !last_has_invalid} + when (flush) { last_resp_v := false.B } + XSError(last_resp_v && cache.io.refill.valid && + (cache.io.refill.bits.req_info.vpn === last_resp_vpn) && + (cache.io.refill.bits.level === last_resp_level), + "l2tlb should not access mem at same addr for twice") + // ATTENTION: this may wronngly assert when: a ptes is l2, last part is valid, + // but the current part is invalid, so one more mem access happened + // If this happened, remove the assert. + val req_addr_low = Reg(Vec(MemReqWidth, UInt((log2Up(l2tlbParams.blockBytes)-log2Up(XLEN/8)).W))) when (llptw.io.in.fire()) { diff --git a/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala b/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala index 89aae7428..c8712aeba 100644 --- a/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala +++ b/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala @@ -578,6 +578,7 @@ class PtwEntry(tagLen: Int, hasPerm: Boolean = false, hasLevel: Boolean = false) class PtwEntries(num: Int, tagLen: Int, level: Int, hasPerm: Boolean)(implicit p: Parameters) extends PtwBundle { require(log2Up(num)==log2Down(num)) + // NOTE: hasPerm means that is leaf or not. val tag = UInt(tagLen.W) val asid = UInt(asidLen.W) @@ -586,6 +587,13 @@ class PtwEntries(num: Int, tagLen: Int, level: Int, hasPerm: Boolean)(implicit p val perms = if (hasPerm) Some(Vec(num, new PtePermBundle)) else None val prefetch = Bool() // println(s"PtwEntries: tag:1*${tagLen} ppns:${num}*${ppnLen} vs:${num}*1") + // NOTE: vs is used for different usage: + // for l3, which store the leaf(leaves), vs is page fault or not. + // for l2, which shoule not store leaf, vs is valid or not, that will anticipate in hit check + // Because, l2 should not store leaf(no perm), it doesn't store perm. + // If l2 hit a leaf, the perm is still unavailble. Should still page walk. Complex but nothing helpful. + // TODO: divide vs into validVec and pfVec + // for l2: may valid but pf, so no need for page walk, return random pte with pf. def tagClip(vpn: UInt) = { require(vpn.getWidth == vpnLen) @@ -598,7 +606,7 @@ class PtwEntries(num: Int, tagLen: Int, level: Int, hasPerm: Boolean)(implicit p def hit(vpn: UInt, asid: UInt, ignoreAsid: Boolean = false) = { val asid_hit = if (ignoreAsid) true.B else (this.asid === asid) - asid_hit && tag === tagClip(vpn) && vs(sectorIdxClip(vpn, level)) // TODO: optimize this. don't need to compare each with tag + asid_hit && tag === tagClip(vpn) && (if (hasPerm) true.B else vs(sectorIdxClip(vpn, level))) } def genEntries(vpn: UInt, asid: UInt, data: UInt, levelUInt: UInt, prefetch: Bool) = { diff --git a/src/main/scala/xiangshan/cache/mmu/PageTableCache.scala b/src/main/scala/xiangshan/cache/mmu/PageTableCache.scala index f7fcef48a..dbe0e1000 100644 --- a/src/main/scala/xiangshan/cache/mmu/PageTableCache.scala +++ b/src/main/scala/xiangshan/cache/mmu/PageTableCache.scala @@ -63,6 +63,7 @@ class PageCacheRespBundle(implicit p: Parameters) extends PtwBundle { class PtwCacheReq(implicit p: Parameters) extends PtwBundle { val req_info = new L2TlbInnerBundle() val isFirst = Bool() + val bypassed = Vec(3, Bool()) } class PtwCacheIO()(implicit p: Parameters) extends MMUIOBaseBundle with HasPtwConst { @@ -72,6 +73,7 @@ class PtwCacheIO()(implicit p: Parameters) extends MMUIOBaseBundle with HasPtwCo val isFirst = Bool() val hit = Bool() val prefetch = Bool() // is the entry fetched by prefetch + val bypassed = Bool() val toFsm = new Bundle { val l1Hit = Bool() val l2Hit = Bool() @@ -113,9 +115,9 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with val stageResp = Wire(Decoupled(new PtwCacheReq())) // deq stage stageReq <> io.req PipelineConnect(stageReq, stageDelay(0), stageDelay(1).ready, flush, rwHarzad) - InsideStageConnect(stageDelay(0), stageDelay(1)) + InsideStageConnect(stageDelay(0), stageDelay(1), stageReq.fire) PipelineConnect(stageDelay(1), stageCheck(0), stageCheck(1).ready, flush) - InsideStageConnect(stageCheck(0), stageCheck(1)) + InsideStageConnect(stageCheck(0), stageCheck(1), stageDelay(1).fire) PipelineConnect(stageCheck(1), stageResp, io.resp.ready, flush) stageResp.ready := !stageResp.valid || io.resp.ready @@ -193,14 +195,25 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with val stageCheck_valid_1cycle = OneCycleValid(stageDelay(1).fire, flush) // replace & perf counter val stageResp_valid_1cycle = OneCycleValid(stageCheck(1).fire, flush) // ecc flush + + def vpn_match(vpn1: UInt, vpn2: UInt, level: Int) = { + vpn1(vpnnLen*3-1, vpnnLen*(2-level)+3) === vpn2(vpnnLen*3-1, vpnnLen*(2-level)+3) + } + // NOTE: not actually bypassed, just check if hit, re-access the page cache + def refill_bypass(vpn: UInt, level: Int) = { + io.refill.valid && (level.U === io.refill.bits.level) && vpn_match(io.refill.bits.req_info.vpn, vpn, level), + } + // l1 val ptwl1replace = ReplacementPolicy.fromString(l2tlbParams.l1Replacer, l2tlbParams.l1Size) val (l1Hit, l1HitPPN, l1Pre) = { val hitVecT = l1.zipWithIndex.map { case (e, i) => e.hit(stageReq.bits.req_info.vpn, io.csr.satp.asid) && l1v(i) } val hitVec = hitVecT.map(RegEnable(_, stageReq.fire)) - val hitPPN = ParallelPriorityMux(hitVec zip l1.map(_.ppn)) - val hitPre = ParallelPriorityMux(hitVec zip l1.map(_.prefetch)) - val hit = ParallelOR(hitVec) + + // stageDelay, but check for l1 + val hitPPN = DataHoldBypass(ParallelMux(hitVec zip l1.map(_.ppn)), stageDelay_valid_1cycle) + val hitPre = DataHoldBypass(ParallelMux(hitVec zip l1.map(_.prefetch)), stageDelay_valid_1cycle) + val hit = DataHoldBypass(ParallelOR(hitVec), stageDelay_valid_1cycle) when (hit && stageDelay_valid_1cycle) { ptwl1replace.access(OHToUInt(hitVec)) } @@ -272,6 +285,7 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with // delay one cycle after sram read val data_resp = DataHoldBypass(l3.io.r.resp.data, stageDelay_valid_1cycle) val vVec_delay = DataHoldBypass(getl3vSet(stageDelay(0).bits.req_info.vpn), stageDelay_valid_1cycle) + val bypass_delay = DataHoldBypass(refill_bypass(stageDelay(0).bits.req_info.vpn, 2), stageDelay_valid_1cycle || io.refill.valid) // check hit and ecc val check_vpn = stageCheck(0).bits.req_info.vpn @@ -305,6 +319,7 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with } val l3HitPPN = l3HitData.ppns(genPtwL3SectorIdx(stageCheck(0).bits.req_info.vpn)) val l3HitPerm = l3HitData.perms.getOrElse(0.U.asTypeOf(Vec(PtwL3SectorSize, new PtePermBundle)))(genPtwL3SectorIdx(stageCheck(0).bits.req_info.vpn)) + val l3HitValid = l3HitData.vs(genPtwL3SectorIdx(stageCheck(0).bits.req_info.vpn)) // super page val spreplace = ReplacementPolicy.fromString(l2tlbParams.spReplacer, l2tlbParams.spSize) @@ -336,16 +351,24 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with val check_res = Wire(new PageCacheRespBundle) check_res.l1.apply(l1Hit, l1Pre, l1HitPPN) check_res.l2.apply(l2Hit, l2Pre, l2HitPPN, ecc = l2eccError) - check_res.l3.apply(l3Hit, l3Pre, l3HitPPN, l3HitPerm, l3eccError) + check_res.l3.apply(l3Hit, l3Pre, l3HitPPN, l3HitPerm, l3eccError, valid = l3HitValid) check_res.sp.apply(spHit, spPre, spHitData.ppn, spHitPerm, false.B, spHitLevel, spValid) - // stage3, add stage 3 for ecc check... val resp_res = Reg(new PageCacheRespBundle) when (stageCheck(1).fire) { resp_res := check_res } + // stageResp bypass + val bypassed = Wire(Vec(3, Bool())) + bypassed.indices.foreach(i => + bypassed(i) := stageResp.bits.bypassed(i) || + ValidHoldBypass(refill_bypass(stageResp.bits.req_info.vpn, i), + OneCycleValid(stageCheck(1).fire, false.B) || io.refill.valid) + ) + io.resp.bits.req_info := stageResp.bits.req_info io.resp.bits.isFirst := stageResp.bits.isFirst io.resp.bits.hit := resp_res.l3.hit || resp_res.sp.hit + io.resp.bits.bypassed := bypassed(2) || (bypassed(1) && !resp_res.l2.hit) || (bypassed(0) && !resp_res.l1.hit) io.resp.bits.prefetch := resp_res.l3.pre && resp_res.l3.hit || resp_res.sp.pre && resp_res.sp.hit io.resp.bits.toFsm.l1Hit := resp_res.l1.hit io.resp.bits.toFsm.l2Hit := resp_res.l2.hit @@ -359,6 +382,7 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with io.resp.bits.toTlb.v := Mux(resp_res.sp.hit, resp_res.sp.v, resp_res.l3.v) io.resp.valid := stageResp.valid XSError(stageResp.valid && resp_res.l3.hit && resp_res.sp.hit, "normal page and super page both hit") + XSError(stageResp.valid && io.resp.bits.hit && bypassed(2), "page cache, bypassed but hit") // refill Perf val l1RefillPerf = Wire(Vec(l2tlbParams.l1Size, Bool())) @@ -582,10 +606,13 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with } } - def InsideStageConnect[T <:Data](in: DecoupledIO[T], out: DecoupledIO[T], block: Bool = false.B): Unit = { + def InsideStageConnect(in: DecoupledIO[PtwCacheReq], out: DecoupledIO[PtwCacheReq], InFire: Bool): Unit = { in.ready := !in.valid || out.ready out.valid := in.valid out.bits := in.bits + out.bits.bypassed.zip(in.bits.bypassed).zipWithIndex.map{ case (b, i) => + b._1 := b._2 || DataHoldBypass(refill_bypass(in.bits.req_info.vpn, i), OneCycleValid(InFire, false.B) || io.refill.valid) + } } // Perf Count -- GitLab