diff --git a/src/main/scala/xiangshan/Parameters.scala b/src/main/scala/xiangshan/Parameters.scala index b65bfb156488c2a7b87b32d705d12ca01ba8cb59..660b522e9c778eaca5a4ba4b96770d3ecbab6328 100644 --- a/src/main/scala/xiangshan/Parameters.scala +++ b/src/main/scala/xiangshan/Parameters.scala @@ -92,6 +92,7 @@ case class XSCoreParameters PtwSPEntrySize: Int = 16, PtwL1EntrySize: Int = 16, PtwL2EntrySize: Int = 2048, //(256 * 8) + PtwMissQueueSize: Int = 8, NumPerfCounters: Int = 16, icacheParameters: ICacheParameters = ICacheParameters( tagECC = Some("parity"), @@ -227,6 +228,7 @@ trait HasXSParameter { val PtwSPEntrySize = coreParams.PtwSPEntrySize val PtwL1EntrySize = coreParams.PtwL1EntrySize val PtwL2EntrySize = coreParams.PtwL2EntrySize + val PtwMissQueueSize = coreParams.PtwMissQueueSize val NumPerfCounters = coreParams.NumPerfCounters val instBytes = if (HasCExtension) 2 else 4 diff --git a/src/main/scala/xiangshan/XSCore.scala b/src/main/scala/xiangshan/XSCore.scala index 77a2c2d149553280f941072a260ab5fee8c3bbe8..9ac6d7aeaddadf3402feac76bdfc81ee526e447d 100644 --- a/src/main/scala/xiangshan/XSCore.scala +++ b/src/main/scala/xiangshan/XSCore.scala @@ -7,7 +7,7 @@ import xiangshan.backend.fu.HasExceptionNO import xiangshan.backend.dispatch.DispatchParameters import xiangshan.frontend._ import xiangshan.mem._ -import xiangshan.cache.{DCacheParameters, ICacheParameters, L1plusCacheWrapper, L1plusCacheParameters, PTWWrapper, PTWRepeater} +import xiangshan.cache.{DCacheParameters, ICacheParameters, L1plusCacheWrapper, L1plusCacheParameters, PTWWrapper, PTWRepeater, PTWFilter} import xiangshan.cache.prefetch._ import chipsalliance.rocketchip.config import chipsalliance.rocketchip.config.Parameters @@ -199,13 +199,13 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.roqio.exception.bits.uop.ctrl.commitType) val itlbRepeater = Module(new PTWRepeater()) - val dtlbRepeater = Module(new PTWRepeater()) + val dtlbRepeater = Module(new PTWFilter(LoadPipelineWidth + StorePipelineWidth, PtwMissQueueSize)) itlbRepeater.io.tlb <> frontend.io.ptw dtlbRepeater.io.tlb <> memBlock.io.ptw itlbRepeater.io.sfence <> integerBlock.io.fenceio.sfence dtlbRepeater.io.sfence <> integerBlock.io.fenceio.sfence - ptw.io.tlb(0) <> dtlbRepeater.io.ptw - ptw.io.tlb(1) <> itlbRepeater.io.ptw + ptw.io.tlb(0) <> itlbRepeater.io.ptw + ptw.io.tlb(1) <> dtlbRepeater.io.ptw ptw.io.sfence <> integerBlock.io.fenceio.sfence ptw.io.csr <> integerBlock.io.csrio.tlb diff --git a/src/main/scala/xiangshan/backend/MemBlock.scala b/src/main/scala/xiangshan/backend/MemBlock.scala index d516bad4facc59a411adb84865e2c05a328c773c..30676b0fbba455ad615735dc7f66633f42e29b8f 100644 --- a/src/main/scala/xiangshan/backend/MemBlock.scala +++ b/src/main/scala/xiangshan/backend/MemBlock.scala @@ -72,7 +72,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) val ldFastWakeUpInt = Flipped(new WakeUpBundle(exuParameters.LduCnt, 0)) - val ptw = new TlbPtwIO + val ptw = new TlbPtwIO(LoadPipelineWidth + StorePipelineWidth) val sfence = Input(new SfenceBundle) val tlbCsr = Input(new TlbCsrBundle) val fenceToSbuffer = Flipped(new FenceToSbuffer) diff --git a/src/main/scala/xiangshan/cache/PTW.scala b/src/main/scala/xiangshan/cache/PTW.scala index 1c32de29599e7250dbc58631ca9c61094a366382..c09c4df5b10075ba15d36082a9d0fd49f6e5ec74 100644 --- a/src/main/scala/xiangshan/cache/PTW.scala +++ b/src/main/scala/xiangshan/cache/PTW.scala @@ -8,9 +8,16 @@ import utils._ import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp} import freechips.rocketchip.tilelink._ +/* PTW Graph + * not yet + */ + trait HasPtwConst extends HasTlbConst with MemoryOpConstants{ val PtwWidth = 2 val MemBandWidth = 256 // TODO: change to IO bandwidth param + val SramSinglePort = true // NOTE: ptwl2, ptwl3 sram single port or not + + val bPtwWidth = log2Up(PtwWidth) // ptwl1: fully-associated val PtwL1TagLen = vpnnLen @@ -48,6 +55,8 @@ trait HasPtwConst extends HasTlbConst with MemoryOpConstants{ val SPTagLen = vpnnLen * 2 val spReplacer = Some("plru") + val MSHRSize = PtwMissQueueSize + def genPtwL2Idx(vpn: UInt) = { (vpn(vpnLen - 1, vpnnLen))(PtwL2IdxLen - 1, 0) } @@ -114,7 +123,6 @@ class PteBundle(implicit p: Parameters) extends PtwBundle{ } def unaligned(level: UInt) = { - assert(level=/=3.U) isLeaf() && !(level === 2.U || level === 1.U && ppn(vpnnLen-1, 0) === 0.U || level === 0.U && ppn(vpnnLen*2-1, 0) === 0.U) @@ -151,9 +159,15 @@ class PtwEntry(tagLen: Int, hasPerm: Boolean = false, hasLevel: Boolean = false) val perm = if (hasPerm) Some(new PtePermBundle) else None val level = if (hasLevel) Some(UInt(log2Up(Level).W)) else None - def hit(vpn: UInt) = { + def hit(vpn: UInt, allType: Boolean = false) = { require(vpn.getWidth == vpnLen) - if (hasLevel) { + if (allType) { + require(hasLevel) + val hit0 = tag(tagLen - 1, vpnnLen*2) === vpn(tagLen - 1, vpnnLen*2) + val hit1 = tag(vpnnLen*2 - 1, vpnnLen) === vpn(vpnnLen*2 - 1, vpnnLen) + val hit2 = tag(vpnnLen - 1, 0) === vpn(vpnnLen - 1, 0) + Mux(level.getOrElse(0.U) === 2.U, hit2 && hit1 && hit0, Mux(level.getOrElse(0.U) === 1.U, hit1 && hit0, hit0)) + } else if (hasLevel) { val hit0 = tag(tagLen - 1, tagLen - vpnnLen) === vpn(vpnLen - 1, vpnLen - vpnnLen) val hit1 = tag(tagLen - vpnnLen - 1, tagLen - vpnnLen * 2) === vpn(vpnLen - vpnnLen - 1, vpnLen - vpnnLen * 2) Mux(level.getOrElse(0.U) === 0.U, hit0, hit0 && hit1) @@ -286,27 +300,417 @@ class PTW()(implicit p: Parameters) extends LazyModule { lazy val module = new PTWImp(this) } -class PTWImp(outer: PTW) extends PtwModule(outer) { +class PTWImp(outer: PTW)(implicit p: Parameters) extends PtwModule(outer) { val (mem, edge) = outer.node.out.head require(mem.d.bits.data.getWidth == l1BusDataWidth, "PTW: tilelink width does not match") val io = IO(new PtwIO) + val difftestIO = IO(new Bundle() { + val ptwResp = Output(Bool()) + val ptwAddr = Output(UInt(64.W)) + val ptwData = Output(Vec(4, UInt(64.W))) + }) + + /* Ptw processes multiple requests + * Divide Ptw procedure into two stages: cache access ; mem access if cache miss + * miss queue itlb dtlb + * | | | + * ------arbiter------ + * | + * l1 - l2 - l3 - sp + * | + * ------------------------------------------- + * miss | queue | hit + * [][][][][][] | + * | | + * state machine accessing mem | + * | | + * ---------------arbiter--------------------- + * | | + * itlb dtlb + */ + + difftestIO <> DontCare - val arb = Module(new Arbiter(new PtwReq, PtwWidth)) - arb.io.in <> VecInit(io.tlb.map(_.req)) - val arbChosen = RegEnable(arb.io.chosen, arb.io.out.fire()) - val req = RegEnable(arb.io.out.bits, arb.io.out.fire()) - val resp = VecInit(io.tlb.map(_.resp)) - val vpn = req.vpn val sfence = RegNext(io.sfence) val csr = io.csr val satp = csr.satp val priv = csr.priv - val valid = ValidHold(arb.io.out.fire(), resp(arbChosen).fire(), sfence.valid) - val validOneCycle = OneCycleValid(arb.io.out.fire(), sfence.valid) - arb.io.out.ready := !valid// || resp(arbChosen).fire() + val missQueue = Module(new PtwMissQueue) + val cache = Module(new PtwCache) + val fsm = Module(new PtwFsm) + val arb1 = Module(new Arbiter(new PtwReq, PtwWidth)) + val arb2 = Module(new Arbiter(new Bundle { + val vpn = UInt(vpnLen.W) + val source = UInt(bPtwWidth.W) + }, 2)) + val outArb = (0 until PtwWidth).map(i => Module(new Arbiter(new PtwResp, 2)).io) + + // NOTE: when cache out but miss and fsm doesnt accept, + val blockNewReq = false.B + arb1.io.in <> VecInit(io.tlb.map(_.req(0))) + arb1.io.out.ready := arb2.io.in(1).ready && !blockNewReq + + val blockMissQueue = !fsm.io.req.ready + block_decoupled(missQueue.io.out, arb2.io.in(0), blockMissQueue) + arb2.io.in(1).valid := arb1.io.out.valid && !blockNewReq + arb2.io.in(1).bits.vpn := arb1.io.out.bits.vpn + arb2.io.in(1).bits.source := arb1.io.chosen + arb2.io.out.ready := cache.io.req.ready + + cache.io.req.valid := arb2.io.out.valid + cache.io.req.bits.vpn := arb2.io.out.bits.vpn + cache.io.req.bits.source := arb2.io.out.bits.source + cache.io.req.bits.isReplay := arb2.io.chosen === 0.U + cache.io.refill.valid := mem.d.valid + cache.io.refill.bits.ptes := mem.d.bits.data + cache.io.refill.bits.vpn := fsm.io.refill.vpn + cache.io.refill.bits.level := fsm.io.refill.level + cache.io.refill.bits.memAddr := fsm.io.refill.memAddr + cache.io.sfence := sfence + cache.io.refuseRefill := fsm.io.sfenceLatch + cache.io.resp.ready := Mux(cache.io.resp.bits.hit, true.B, missQueue.io.in.ready || fsm.io.req.ready) + + missQueue.io.in.valid := cache.io.resp.valid && !cache.io.resp.bits.hit && !fsm.io.req.ready + missQueue.io.in.bits.vpn := cache.io.resp.bits.vpn + missQueue.io.in.bits.source := cache.io.resp.bits.source + missQueue.io.sfence := sfence + + // NOTE: missQueue req has higher priority + fsm.io.req.valid := cache.io.resp.valid && !cache.io.resp.bits.hit + fsm.io.req.bits.source := cache.io.resp.bits.source + fsm.io.req.bits.l1Hit := cache.io.resp.bits.toFsm.l1Hit + fsm.io.req.bits.l2Hit := cache.io.resp.bits.toFsm.l2Hit + fsm.io.req.bits.ppn := cache.io.resp.bits.toFsm.ppn + fsm.io.req.bits.vpn := cache.io.resp.bits.vpn + fsm.io.mem.req.ready := mem.a.ready + fsm.io.mem.resp.valid := mem.d.valid + fsm.io.mem.resp.bits.data := mem.d.bits.data + fsm.io.csr := csr + fsm.io.sfence := sfence + fsm.io.resp.ready := MuxLookup(fsm.io.resp.bits.source, false.B, + (0 until PtwWidth).map(i => i.U -> outArb(i).in(1).ready)) + + val memRead = edge.Get( + fromSource = 0.U/*id*/, + // toAddress = memAddr(log2Up(CacheLineSize / 2 / 8) - 1, 0), + toAddress = Cat(fsm.io.mem.req.bits.addr(PAddrBits - 1, log2Up(l1BusDataWidth/8)), 0.U(log2Up(l1BusDataWidth/8).W)), + lgSize = log2Up(l1BusDataWidth/8).U + )._2 + mem.a.bits := memRead + mem.a.valid := fsm.io.mem.req.valid + mem.d.ready := true.B + + for (i <- 0 until PtwWidth) { + outArb(i).in(0).valid := cache.io.resp.valid && cache.io.resp.bits.hit && cache.io.resp.bits.source===i.U + outArb(i).in(0).bits.entry := cache.io.resp.bits.toTlb + outArb(i).in(0).bits.pf := false.B + outArb(i).in(1).valid := fsm.io.resp.valid && fsm.io.resp.bits.source===i.U + outArb(i).in(1).bits := fsm.io.resp.bits.resp + } + + // io.tlb.map(_.resp) <> outArb.map(_.out) + io.tlb.map(_.resp).zip(outArb.map(_.out)).map{ + case (resp, out) => resp <> out + } + def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = { + sink.valid := source.valid && !block_signal + source.ready := sink.ready && !block_signal + sink.bits := source.bits + } + // debug info + for (i <- 0 until PtwWidth) { + XSDebug(p"[io.tlb(${i.U})] ${io.tlb(i)}\n") + } + XSDebug(p"[io.sfence] ${io.sfence}\n") + XSDebug(p"[io.csr] ${io.csr}\n") + + for (i <- 0 until PtwWidth) { + XSPerfAccumulate(s"req_count${i}", io.tlb(i).req(0).fire()) + XSPerfAccumulate(s"req_blocked_count_${i}", io.tlb(i).req(0).valid && !io.tlb(i).req(0).ready) + } + XSPerfAccumulate(s"req_blocked_by_mq", arb1.io.out.valid && missQueue.io.out.valid) + XSPerfAccumulate(s"replay_again", cache.io.resp.valid && !cache.io.resp.bits.hit && cache.io.resp.bits.isReplay && !fsm.io.req.ready) + XSPerfAccumulate(s"into_fsm_no_replay", cache.io.resp.valid && !cache.io.resp.bits.hit && !cache.io.resp.bits.isReplay && fsm.io.req.ready) +} + +/* Miss Queue dont care about duplicate req, which is done by PtwFilter + * PtwMissQueue is just a Queue inside Chisel with flush + */ +class PtwMissQueue(implicit p: Parameters) extends XSModule with HasPtwConst { + val io = IO(new Bundle { + val in = Flipped(Decoupled(new Bundle { + val vpn = UInt(vpnLen.W) + val source = UInt(bPtwWidth.W) + })) + val sfence = Input(new SfenceBundle) + val out = Decoupled(new Bundle { + val vpn = UInt(vpnLen.W) + val source = UInt(bPtwWidth.W) + }) + val empty = Output(Bool()) + }) + + val vpn = Reg(Vec(MSHRSize, UInt(vpnLen.W))) // request vpn + val source = Reg(Vec(MSHRSize, UInt(bPtwWidth.W))) // is itlb + val enqPtr = RegInit(0.U(log2Up(MSHRSize).W)) + val deqPtr = RegInit(0.U(log2Up(MSHRSize).W)) + + val mayFull = RegInit(false.B) + val full = mayFull && enqPtr === deqPtr + val empty = !mayFull && enqPtr === deqPtr + + val do_enq = io.in.fire() + val do_deq = io.out.fire() + + when (do_enq) { + enqPtr := enqPtr + 1.U + vpn(enqPtr) := io.in.bits.vpn + source(enqPtr) := io.in.bits.source + } + + when (do_deq) { + deqPtr := deqPtr + 1.U + } + + when (do_enq =/= do_deq) { + mayFull := do_enq + } + + when (io.sfence.valid) { + enqPtr := 0.U + deqPtr := 0.U + mayFull := false.B + } + + io.in.ready := !full + io.out.valid := !empty + io.out.bits.vpn := vpn(deqPtr) + io.out.bits.source := source(deqPtr) + io.empty := empty + + XSPerfAccumulate("mq_in_count", io.in.fire()) + XSPerfAccumulate("mq_in_block", io.in.valid && !io.in.ready) + val count = RegInit(0.U(log2Up(MSHRSize+1).W)) + when (do_enq =/= do_deq) { + count := Mux(do_enq, count + 1.U, count - 1.U) + } + when (io.sfence.valid) { + count := 0.U + } + for (i <- 0 until MSHRSize) { + XSPerfAccumulate(s"numExist${i}", count === 1.U) + } +} + +class PTWRepeater(implicit p: Parameters) extends XSModule with HasPtwConst { + val io = IO(new Bundle { + val tlb = Flipped(new TlbPtwIO) + val ptw = new TlbPtwIO + val sfence = Input(new SfenceBundle) + }) + + val (tlb, ptw, sfence) = (io.tlb, io.ptw, RegNext(io.sfence.valid)) + val req = RegEnable(tlb.req(0).bits, tlb.req(0).fire()) + val resp = RegEnable(ptw.resp.bits, ptw.resp.fire()) + val haveOne = BoolStopWatch(tlb.req(0).fire(), tlb.resp.fire() || sfence) + val sent = BoolStopWatch(ptw.req(0).fire(), tlb.req(0).fire() || sfence) + val recv = BoolStopWatch(ptw.resp.fire(), tlb.req(0).fire() || sfence) + + tlb.req(0).ready := !haveOne + ptw.req(0).valid := haveOne && !sent + ptw.req(0).bits := req + + tlb.resp.bits := resp + tlb.resp.valid := haveOne && recv + ptw.resp.ready := !recv + + XSDebug(haveOne, p"haveOne:${haveOne} sent:${sent} recv:${recv} sfence:${sfence} req:${req} resp:${resp}") + XSDebug(io.tlb.req(0).valid || io.tlb.resp.valid, p"tlb: ${tlb}\n") + XSDebug(io.ptw.req(0).valid || io.ptw.resp.valid, p"ptw: ${ptw}\n") + assert(!RegNext(recv && io.ptw.resp.valid, init = false.B), "re-receive ptw.resp") +} + +/* dtlb + * + */ +class PTWFilter(Width: Int, Size: Int)(implicit p: Parameters) extends XSModule with HasPtwConst { + val io = IO(new Bundle { + val tlb = Flipped(new TlbPtwIO(Width)) + val ptw = new TlbPtwIO + val sfence = Input(new SfenceBundle) + }) + + require(Size >= Width) + + val v = RegInit(VecInit(Seq.fill(Size)(false.B))) + val vpn = Reg(Vec(Size, UInt(vpnLen.W))) + val enqPtr = RegInit(0.U(log2Up(Size).W)) // Enq + val issPtr = RegInit(0.U(log2Up(Size).W)) // Iss to Ptw + val deqPtr = RegInit(0.U(log2Up(Size).W)) // Deq + val mayFullDeq = RegInit(false.B) + val mayFullIss = RegInit(false.B) + val counter = RegInit(0.U(log2Up(Size+1).W)) + + val sfence = RegNext(io.sfence) + val ptwResp = RegEnable(io.ptw.resp.bits, io.ptw.resp.fire()) + val ptwResp_valid = RegNext(io.ptw.resp.valid, init = false.B) + val reqs = filter(io.tlb.req) + + var enqPtr_next = WireInit(deqPtr) + val isFull = enqPtr === deqPtr && mayFullDeq + val isEmptyDeq = enqPtr === deqPtr && !mayFullDeq + val isEmptyIss = enqPtr === issPtr && !mayFullIss + val accumEnqNum = (0 until Width).map(i => PopCount(reqs.take(i).map(_.valid))) + val enqPtrVec = VecInit((0 until Width).map(i => enqPtr + accumEnqNum(i))) + val enqNum = PopCount(reqs.map(_.valid)) + val canEnqueue = counter +& enqNum <= Size.U + + io.tlb.req.map(_.ready := true.B) // NOTE: just drop un-fire reqs + io.tlb.resp.valid := ptwResp_valid + io.tlb.resp.bits := ptwResp + io.ptw.req(0).valid := v(issPtr) && !isEmptyIss && !(ptwResp_valid && ptwResp.entry.hit(io.ptw.req(0).bits.vpn)) + io.ptw.req(0).bits.vpn := vpn(issPtr) + io.ptw.resp.ready := true.B + + reqs.zipWithIndex.map{ + case (req, i) => + when (req.valid && canEnqueue) { + v(enqPtrVec(i)) := true.B + vpn(enqPtrVec(i)) := req.bits.vpn + } + } + + val do_enq = canEnqueue && Cat(reqs.map(_.valid)).orR + val do_deq = (!v(deqPtr) && !isEmptyDeq) + val do_iss = io.ptw.req(0).fire() || (!v(issPtr) && !isEmptyIss) + when (do_enq) { + enqPtr := enqPtr + enqNum + } + when (do_deq) { + deqPtr := deqPtr + 1.U + } + when (do_iss) { + issPtr := issPtr + 1.U + } + when (do_enq =/= do_deq) { + mayFullDeq := do_enq + } + when (do_enq =/= do_iss) { + mayFullIss := do_enq + } + + when (ptwResp_valid) { + vpn.zip(v).map{case (pi, vi) => + when (vi && ptwResp.entry.hit(pi, allType = true)) { vi := false.B } + } + } + + counter := counter - do_deq + Mux(do_enq, enqNum, 0.U) + assert(counter <= Size.U, "counter should be less than Size") + when (counter === 0.U) { + assert(!io.ptw.req(0).fire(), "when counter is 0, should not req") + assert(isEmptyDeq && isEmptyIss, "when counter is 0, should be empty") + } + when (counter === Size.U) { + assert(mayFullDeq, "when counter is Size, should be full") + } + + when (sfence.valid) { + v.map(_ := false.B) + deqPtr := 0.U + enqPtr := 0.U + issPtr := 0.U + ptwResp_valid := false.B + mayFullDeq := false.B + mayFullIss := false.B + counter := 0.U + } + + def canMerge(vpnReq: UInt, reqs: Seq[DecoupledIO[PtwReq]], index: Int) : Bool = { + Cat((vpn ++ reqs.take(index).map(_.bits.vpn)) + .zip(v ++ reqs.take(index).map(_.valid)) + .map{case (pi, vi) => vi && pi === vpnReq} + ).orR || (ptwResp_valid && ptwResp.entry.hit(vpnReq)) + } + + def filter(tlbReq: Vec[DecoupledIO[PtwReq]]) = { + val reqs = tlbReq.indices.map{ i => + val req = Wire(ValidIO(new PtwReq())) + req.bits := tlbReq(i).bits + req.valid := !canMerge(tlbReq(i).bits.vpn, tlbReq, i) && tlbReq(i).valid + req + } + reqs + } + + XSPerfAccumulate("req_count", PopCount(Cat(io.tlb.req.map(_.valid)))) + XSPerfAccumulate("req_count_filter", Mux(do_enq, accumEnqNum(Width - 1), 0.U)) + XSPerfAccumulate("resp_count", PopCount(Cat(io.tlb.resp.fire()))) + XSPerfAccumulate("inflight_cycle", !isEmptyDeq) + for (i <- 0 until Size + 1) { + XSPerfAccumulate(s"counter${i}", counter === i.U) + } +} + +/* ptw cache caches the page table of all the three layers + * ptw cache resp at next cycle + * the cache should not be blocked + * when miss queue if full, just block req outside + */ +class PtwCacheIO()(implicit p: Parameters) extends PtwBundle { + val req = Flipped(DecoupledIO(new Bundle { + val vpn = UInt(vpnLen.W) + val source = UInt(bPtwWidth.W) + val isReplay = Bool() + })) + val resp = DecoupledIO(new Bundle { + val source = UInt(bPtwWidth.W) + val vpn = UInt(vpnLen.W) + val isReplay = Bool() + val hit = Bool() + val toFsm = new Bundle { + val l1Hit = Bool() + val l2Hit = Bool() + val ppn = UInt(ppnLen.W) + } + val toTlb = new PtwEntry(tagLen = vpnLen, hasPerm = true, hasLevel = true) + }) + val refill = Flipped(ValidIO(new Bundle { + val ptes = UInt(MemBandWidth.W) + val vpn = UInt(vpnLen.W) + val level = UInt(log2Up(Level).W) + val memAddr = Input(UInt(PAddrBits.W)) + })) + val sfence = Input(new SfenceBundle) + val refuseRefill = Input(Bool()) +} + +class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst { + val io = IO(new PtwCacheIO) + + // TODO: four caches make the codes dirty, think about how to deal with it + + val sfence = io.sfence + val refuseRefill = io.refuseRefill + val refill = io.refill.bits + + val first_valid = io.req.valid + val first_fire = first_valid && io.req.ready + val first_req = io.req.bits + val second_ready = Wire(Bool()) + val second_valid = ValidHold(first_fire, io.resp.fire(), sfence.valid) + val second_req = RegEnable(first_req, first_fire) + // NOTE: if ptw cache resp may be blocked, hard to handle refill + // when miss queue is full, please to block itlb and dtlb input + + // when refill, refuce to accept new req + val rwHarzad = if (SramSinglePort) io.refill.valid else false.B + io.req.ready := !rwHarzad && (second_ready || io.req.bits.isReplay) + // NOTE: when write, don't ready, whe + // when replay, just come in, out make sure resp.fire() // l1: level 0 non-leaf pte val l1 = Reg(Vec(PtwL1EntrySize, new PtwEntry(tagLen = PtwL1TagLen))) @@ -318,7 +722,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { new PtwEntries(num = PtwL2SectorSize, tagLen = PtwL2TagLen, level = 1, hasPerm = false), set = PtwL2LineNum, way = PtwL2WayNum, - singlePort = true + singlePort = SramSinglePort )) val l2v = RegInit(0.U((PtwL2LineNum * PtwL2WayNum).W)) val l2g = Reg(UInt((PtwL2LineNum * PtwL2WayNum).W)) @@ -335,7 +739,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { new PtwEntries(num = PtwL3SectorSize, tagLen = PtwL3TagLen, level = 2, hasPerm = true), set = PtwL3LineNum, way = PtwL3WayNum, - singlePort = true + singlePort = SramSinglePort )) val l3v = RegInit(0.U((PtwL3LineNum * PtwL3WayNum).W)) val l3g = Reg(UInt((PtwL3LineNum * PtwL3WayNum).W)) @@ -352,25 +756,6 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { val spv = RegInit(0.U(PtwSPEntrySize.W)) val spg = Reg(UInt(PtwSPEntrySize.W)) - // mem alias - val memRespFire = mem.d.fire() - val memRdata = mem.d.bits.data - val memSelData = Wire(UInt(XLEN.W)) - val memPte = memSelData.asTypeOf(new PteBundle) - val memPteReg = RegEnable(memPte, memRespFire) - val memPtes =(0 until PtwL3SectorSize).map(i => memRdata((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle)) - val memValid = mem.d.valid - val memRespReady = mem.d.ready - val memReqReady = mem.a.ready - val memReqFire = mem.a.fire() - - // fsm - val s_idle :: s_read_ptw :: s_req :: s_resp :: Nil = Enum(4) - val state = RegInit(s_idle) - val level = RegInit(0.U(log2Up(Level).W)) - val levelNext = level + 1.U - val sfenceLatch = RegEnable(false.B, init = false.B, memValid) // NOTE: store sfence to disable mem.resp.fire(), but not stall other ptw req - // Access Perf val l1AccessPerf = Wire(Vec(PtwL1EntrySize, Bool())) val l2AccessPerf = Wire(Vec(PtwL2WayNum, Bool())) @@ -383,22 +768,20 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { // l1 val ptwl1replace = ReplacementPolicy.fromString(ptwl1Replacer, PtwL1EntrySize) - val l1HitReg = Reg(Bool()) - val l1HitPPNReg = Reg(UInt(ppnLen.W)) val (l1Hit, l1HitPPN) = { - val hitVecT = l1.zipWithIndex.map { case (e, i) => e.hit(vpn) && l1v(i) } - val hitVec = hitVecT.map(RegEnable(_, validOneCycle)) + val hitVecT = l1.zipWithIndex.map { case (e, i) => e.hit(first_req.vpn) && l1v(i) } + val hitVec = hitVecT.map(RegEnable(_, first_fire)) val hitPPN = ParallelPriorityMux(hitVec zip l1.map(_.ppn)) - val hit = ParallelOR(hitVec) && RegNext(validOneCycle) + val hit = ParallelOR(hitVec) && second_valid when (hit) { ptwl1replace.access(OHToUInt(hitVec)) } - l1AccessPerf.zip(hitVec).map{ case (l, h) => l := h && RegNext(validOneCycle) } + l1AccessPerf.zip(hitVec).map{ case (l, h) => l := h && RegNext(first_fire)} for (i <- 0 until PtwL1EntrySize) { - XSDebug(validOneCycle, p"[l1] l1(${i.U}) ${l1(i)} hit:${l1(i).hit(vpn)}\n") + XSDebug(first_fire, p"[l1] l1(${i.U}) ${l1(i)} hit:${l1(i).hit(first_req.vpn)}\n") } - XSDebug(validOneCycle, p"[l1] l1v:${Binary(l1v)} hitVecT:${Binary(VecInit(hitVecT).asUInt)}\n") - XSDebug(valid, p"[l1] l1Hit:${hit} l1HitPPN:0x${Hexadecimal(hitPPN)} l1HitReg:${l1HitReg} l1HitPPNReg:${Hexadecimal(l1HitPPNReg)} hitVec:${VecInit(hitVec).asUInt}\n") + XSDebug(first_fire, p"[l1] l1v:${Binary(l1v)} hitVecT:${Binary(VecInit(hitVecT).asUInt)}\n") + XSDebug(second_valid, p"[l1] l1Hit:${hit} l1HitPPN:0x${Hexadecimal(hitPPN)} hitVec:${VecInit(hitVec).asUInt}\n") VecInit(hitVecT).suggestName(s"l1_hitVecT") VecInit(hitVec).suggestName(s"l1_hitVec") @@ -408,18 +791,16 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { // l2 val ptwl2replace = ReplacementPolicy.fromString(ptwl2Replacer,PtwL2WayNum,PtwL2LineNum) - val l2HitReg = Reg(Bool()) - val l2HitPPNReg = Reg(UInt(ppnLen.W)) val (l2Hit, l2HitPPN) = { - val ridx = genPtwL2SetIdx(vpn) - val vidx = RegEnable(VecInit(getl2vSet(vpn).asBools), validOneCycle) - l2.io.r.req.valid := validOneCycle + val ridx = genPtwL2SetIdx(first_req.vpn) + val vidx = RegEnable(VecInit(getl2vSet(first_req.vpn).asBools), first_fire) + l2.io.r.req.valid := first_fire l2.io.r.req.bits.apply(setIdx = ridx) val ramDatas = l2.io.r.resp.data - // val hitVec = VecInit(ramDatas.map{wayData => wayData.hit(vpn) }) - val hitVec = VecInit(ramDatas.zip(vidx).map { case (wayData, v) => wayData.hit(vpn) && v }) + // val hitVec = VecInit(ramDatas.map{wayData => wayData.hit(first_req.vpn) }) + val hitVec = VecInit(ramDatas.zip(vidx).map { case (wayData, v) => wayData.hit(second_req.vpn) && v }) val hitWayData = ParallelPriorityMux(hitVec zip ramDatas) - val hit = ParallelOR(hitVec) && RegNext(validOneCycle) + val hit = ParallelOR(hitVec) && second_valid val hitWay = ParallelPriorityMux(hitVec zip (0 until PtwL2WayNum).map(_.U)) ridx.suggestName(s"l2_ridx") @@ -429,40 +810,39 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { hitWayData.suggestName(s"l2_hitWayData") hitWay.suggestName(s"l2_hitWay") - when (hit) { ptwl2replace.access(genPtwL2SetIdx(vpn), hitWay) } + when (hit) { ptwl2replace.access(genPtwL2SetIdx(second_req.vpn), hitWay) } - l2AccessPerf.zip(hitVec).map{ case (l, h) => l := h && RegNext(validOneCycle) } - XSDebug(validOneCycle, p"[l2] ridx:0x${Hexadecimal(ridx)}\n") + l2AccessPerf.zip(hitVec).map{ case (l, h) => l := h && RegNext(first_fire) } + XSDebug(first_fire, p"[l2] ridx:0x${Hexadecimal(ridx)}\n") for (i <- 0 until PtwL2WayNum) { - XSDebug(RegNext(validOneCycle), p"[l2] ramDatas(${i.U}) ${ramDatas(i)} l2v:${vidx(i)} hit:${ramDatas(i).hit(vpn)}\n") + XSDebug(RegNext(first_fire), p"[l2] ramDatas(${i.U}) ${ramDatas(i)} l2v:${vidx(i)} hit:${ramDatas(i).hit(second_req.vpn)}\n") } - XSDebug(valid, p"[l2] l2Hit:${hit} l2HitPPN:0x${Hexadecimal(hitWayData.ppns(genPtwL2SectorIdx(vpn)))} l2HitReg:${l2HitReg} l2HitPPNReg:0x${Hexadecimal(l2HitPPNReg)} hitVec:${Binary(hitVec.asUInt)} hitWay:${hitWay} vidx:${Binary(vidx.asUInt)}\n") + XSDebug(second_valid, p"[l2] l2Hit:${hit} l2HitPPN:0x${Hexadecimal(hitWayData.ppns(genPtwL2SectorIdx(second_req.vpn)))} hitVec:${Binary(hitVec.asUInt)} hitWay:${hitWay} vidx:${Binary(vidx.asUInt)}\n") - (hit, hitWayData.ppns(genPtwL2SectorIdx(vpn))) + (hit, hitWayData.ppns(genPtwL2SectorIdx(second_req.vpn))) } // l3 val ptwl3replace = ReplacementPolicy.fromString(ptwl3Replacer,PtwL3WayNum,PtwL3LineNum) - val l3HitReg = Reg(Bool()) val (l3Hit, l3HitData) = { - val ridx = genPtwL3SetIdx(vpn) - val vidx = RegEnable(VecInit(getl3vSet(vpn).asBools), validOneCycle) - l3.io.r.req.valid := validOneCycle + val ridx = genPtwL3SetIdx(first_req.vpn) + val vidx = RegEnable(VecInit(getl3vSet(first_req.vpn).asBools), first_fire) + l3.io.r.req.valid := first_fire l3.io.r.req.bits.apply(setIdx = ridx) val ramDatas = l3.io.r.resp.data - val hitVec = VecInit(ramDatas.zip(vidx).map{ case (wayData, v) => wayData.hit(vpn) && v }) + val hitVec = VecInit(ramDatas.zip(vidx).map{ case (wayData, v) => wayData.hit(second_req.vpn) && v }) val hitWayData = ParallelPriorityMux(hitVec zip ramDatas) - val hit = ParallelOR(hitVec) && RegNext(validOneCycle) + val hit = ParallelOR(hitVec) && second_valid val hitWay = ParallelPriorityMux(hitVec zip (0 until PtwL3WayNum).map(_.U)) - when (hit) { ptwl3replace.access(genPtwL3SetIdx(vpn), hitWay) } + when (hit) { ptwl3replace.access(genPtwL3SetIdx(second_req.vpn), hitWay) } - l3AccessPerf.zip(hitVec).map{ case (l, h) => l := h && RegNext(validOneCycle) } - XSDebug(validOneCycle, p"[l3] ridx:0x${Hexadecimal(ridx)}\n") + l3AccessPerf.zip(hitVec).map{ case (l, h) => l := h && RegNext(first_fire) } + XSDebug(first_fire, p"[l3] ridx:0x${Hexadecimal(ridx)}\n") for (i <- 0 until PtwL3WayNum) { - XSDebug(RegNext(validOneCycle), p"[l3] ramDatas(${i.U}) ${ramDatas(i)} l3v:${vidx(i)} hit:${ramDatas(i).hit(vpn)}\n") + XSDebug(RegNext(first_fire), p"[l3] ramDatas(${i.U}) ${ramDatas(i)} l3v:${vidx(i)} hit:${ramDatas(i).hit(second_req.vpn)}\n") } - XSDebug(valid, p"[l3] l3Hit:${hit} l3HitData:${hitWayData} l3HitReg:${l3HitReg} hitVec:${Binary(hitVec.asUInt)} hitWay:${hitWay} vidx:${Binary(vidx.asUInt)}\n") + XSDebug(second_valid, p"[l3] l3Hit:${hit} l3HitData:${hitWayData} hitVec:${Binary(hitVec.asUInt)} hitWay:${hitWay} vidx:${Binary(vidx.asUInt)}\n") ridx.suggestName(s"l3_ridx") vidx.suggestName(s"l3_vidx") @@ -472,25 +852,24 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { (hit, hitWayData) } - val l3HitPPN = l3HitData.ppns(genPtwL3SectorIdx(vpn)) - val l3HitPerm = l3HitData.perms.getOrElse(0.U.asTypeOf(Vec(PtwL3SectorSize, new PtePermBundle)))(genPtwL3SectorIdx(vpn)) + val l3HitPPN = l3HitData.ppns(genPtwL3SectorIdx(second_req.vpn)) + val l3HitPerm = l3HitData.perms.getOrElse(0.U.asTypeOf(Vec(PtwL3SectorSize, new PtePermBundle)))(genPtwL3SectorIdx(second_req.vpn)) // super page val spreplace = ReplacementPolicy.fromString(spReplacer, PtwSPEntrySize) - val spHitReg = Reg(Bool()) val (spHit, spHitData) = { - val hitVecT = sp.zipWithIndex.map { case (e, i) => e.hit(vpn) && spv(i) } - val hitVec = hitVecT.map(RegEnable(_, validOneCycle)) + val hitVecT = sp.zipWithIndex.map { case (e, i) => e.hit(first_req.vpn) && spv(i) } + val hitVec = hitVecT.map(RegEnable(_, first_fire)) val hitData = ParallelPriorityMux(hitVec zip sp) - val hit = ParallelOR(hitVec) && RegNext(validOneCycle) + val hit = ParallelOR(hitVec) && second_valid when (hit) { spreplace.access(OHToUInt(hitVec)) } - spAccessPerf.zip(hitVec).map{ case (s, h) => s := h && RegNext(validOneCycle) } + spAccessPerf.zip(hitVec).map{ case (s, h) => s := h && RegNext(first_fire) } for (i <- 0 until PtwSPEntrySize) { - XSDebug(validOneCycle, p"[sp] sp(${i.U}) ${sp(i)} hit:${sp(i).hit(vpn)} spv:${spv(i)}\n") + XSDebug(first_fire, p"[sp] sp(${i.U}) ${sp(i)} hit:${sp(i).hit(first_req.vpn)} spv:${spv(i)}\n") } - XSDebug(valid, p"[sp] spHit:${hit} spHitData:${hitData} hitVec:${Binary(VecInit(hitVec).asUInt)}\n") + XSDebug(second_valid, p"[sp] spHit:${hit} spHitData:${hitData} hitVec:${Binary(VecInit(hitVec).asUInt)}\n") VecInit(hitVecT).suggestName(s"sp_hitVecT") VecInit(hitVec).suggestName(s"sp_hitVec") @@ -500,93 +879,25 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { val spHitPerm = spHitData.perm.getOrElse(0.U.asTypeOf(new PtePermBundle)) val spHitLevel = spHitData.level.getOrElse(0.U) - // default values - // resp.map(_.valid := false.B) - // resp.map(_.bits := DontCare) - l2.io.w.req <> DontCare - l3.io.w.req <> DontCare - l2.io.w.req.valid := false.B - l3.io.w.req.valid := false.B - - // fsm - val pteHit = l3Hit || spHit - val notFound = WireInit(false.B) - notFound.suggestName("PtwNotFound") - switch (state) { - is (s_idle) { - when (valid) { - state := s_read_ptw - level := 0.U - } - } - - is (s_read_ptw) { - when (pteHit) { - state := s_idle - }.otherwise { - state := s_req - level := Mux(l2Hit, 2.U, Mux(l1Hit, 1.U, 0.U)) - } - l1HitReg := l1Hit - l2HitReg := l2Hit - l1HitPPNReg := l1HitPPN - l2HitPPNReg := l2HitPPN - l3HitReg := l3Hit - spHitReg := spHit - } - - is (s_req) { - when (memReqFire && !sfenceLatch) { - state := s_resp - } - } - - is (s_resp) { - when (memRespFire) { - when (memPte.isLeaf() || memPte.isPf(level)) { - state := s_idle - notFound := memPte.isPf(level) - }.otherwise { - when (level =/= 2.U) { - level := levelNext - state := s_req - }.otherwise { - state := s_idle - notFound := true.B - } - } - } - } - } - - // mem - val l1addr = MakeAddr(satp.ppn, getVpnn(vpn, 2)) - val l2addr = MakeAddr(Mux(l1HitReg, l1HitPPNReg, memPteReg.ppn), getVpnn(vpn, 1)) - val l3addr = MakeAddr(Mux(l2HitReg, l2HitPPNReg, memPteReg.ppn), getVpnn(vpn, 0)) - val memAddr = Mux(level === 0.U, l1addr, Mux(level === 1.U, l2addr, l3addr)) - val memAddrReg = RegEnable(memAddr, mem.a.fire()) - val pteRead = edge.Get( - fromSource = 0.U/*id*/, - // toAddress = memAddr(log2Up(CacheLineSize / 2 / 8) - 1, 0), - toAddress = Cat(memAddr(PAddrBits - 1, log2Up(l1BusDataWidth/8)), 0.U(log2Up(l1BusDataWidth/8).W)), - lgSize = log2Up(l1BusDataWidth/8).U - )._2 - mem.a.bits := pteRead - mem.a.valid := state === s_req && !sfenceLatch && !sfence.valid - mem.d.ready := state === s_resp || sfenceLatch - memSelData := memRdata.asTypeOf(Vec(MemBandWidth/XLEN, UInt(XLEN.W)))(memAddrReg(log2Up(l1BusDataWidth/8) - 1, log2Up(XLEN/8))) - - // resp - val ptwFinish = state === s_read_ptw && pteHit || - memRespFire && !sfenceLatch && (memPte.isLeaf() || memPte.isPf(level) || level === 2.U) - for (i <- 0 until PtwWidth) { - resp(i).valid := valid && ptwFinish && arbChosen === i.U - resp(i).bits.entry.tag := vpn - resp(i).bits.entry.ppn := Mux(memRespFire, memPte.ppn, Mux(l3Hit, l3HitPPN, spHitData.ppn)) - resp(i).bits.entry.perm.map(_ := Mux(memRespFire, memPte.getPerm(), Mux(l3Hit, l3HitPerm, spHitPerm))) - resp(i).bits.entry.level.map(_ := Mux(memRespFire, level, Mux(l3Hit, 2.U, spHitLevel))) - resp(i).bits.pf := level === 3.U || notFound - } + val resp = Wire(io.resp.bits.cloneType) + val resp_latch = RegEnable(resp, io.resp.valid && !io.resp.ready) + val resp_latch_valid = ValidHold(io.resp.valid && !io.resp.ready, io.resp.ready, sfence.valid) + second_ready := !(second_valid || resp_latch_valid) || io.resp.fire() + resp.source := second_req.source + resp.vpn := second_req.vpn + resp.isReplay := second_req.isReplay + resp.hit := l3Hit || spHit + resp.toFsm.l1Hit := l1Hit + resp.toFsm.l2Hit := l2Hit + resp.toFsm.ppn := Mux(l2Hit, l2HitPPN, l1HitPPN) + resp.toTlb.tag := second_req.vpn + resp.toTlb.ppn := Mux(l3Hit, l3HitPPN, spHitData.ppn) + resp.toTlb.perm.map(_ := Mux(l3Hit, l3HitPerm, spHitPerm)) + resp.toTlb.level.map(_ := Mux(l3Hit, 2.U, spHitLevel)) + + io.resp.valid := second_valid + io.resp.bits := Mux(resp_latch_valid, resp_latch, resp) + assert(!(l3Hit && spHit), "normal page and super page both hit") // refill Perf val l1RefillPerf = Wire(Vec(PtwL1EntrySize, Bool())) @@ -599,38 +910,49 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { spRefillPerf.map(_ := false.B) // refill - when (memRespFire && !memPte.isPf(level) && !sfenceLatch) { - when (level === 0.U && !memPte.isLeaf()) { + l2.io.w.req <> DontCare + l3.io.w.req <> DontCare + l2.io.w.req.valid := false.B + l3.io.w.req.valid := false.B + + val memRdata = refill.ptes + val memSelData = memRdata.asTypeOf(Vec(MemBandWidth/XLEN, UInt(XLEN.W)))(refill.memAddr(log2Up(l1BusDataWidth/8) - 1, log2Up(XLEN/8))) + val memPtes = (0 until PtwL3SectorSize).map(i => memRdata((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle)) + val memPte = memSelData.asTypeOf(new PteBundle) + + // TODO: handle sfenceLatch outsize + when (io.refill.valid && !memPte.isPf(refill.level) && !(sfence.valid || refuseRefill)) { + when (refill.level === 0.U && !memPte.isLeaf()) { // val refillIdx = LFSR64()(log2Up(PtwL1EntrySize)-1,0) // TODO: may be LRU val refillIdx = replaceWrapper(l1v, ptwl1replace.way) refillIdx.suggestName(s"PtwL1RefillIdx") val rfOH = UIntToOH(refillIdx) - l1(refillIdx).refill(vpn, memSelData) + l1(refillIdx).refill(refill.vpn, memSelData) ptwl1replace.access(refillIdx) l1v := l1v | rfOH l1g := (l1g & ~rfOH) | Mux(memPte.perm.g, rfOH, 0.U) for (i <- 0 until PtwL1EntrySize) { - l1RefillPerf(i) := i.U === refillIdx + l1RefillPerf(i) := i.U === refillIdx } - XSDebug(p"[l1 refill] refillIdx:${refillIdx} refillEntry:${l1(refillIdx).genPtwEntry(vpn, memSelData)}\n") + XSDebug(p"[l1 refill] refillIdx:${refillIdx} refillEntry:${l1(refillIdx).genPtwEntry(refill.vpn, memSelData)}\n") XSDebug(p"[l1 refill] l1v:${Binary(l1v)}->${Binary(l1v | rfOH)} l1g:${Binary(l1g)}->${Binary((l1g & ~rfOH) | Mux(memPte.perm.g, rfOH, 0.U))}\n") refillIdx.suggestName(s"l1_refillIdx") rfOH.suggestName(s"l1_rfOH") } - when (level === 1.U && !memPte.isLeaf()) { - val refillIdx = genPtwL2SetIdx(vpn) - val victimWay = replaceWrapper(RegEnable(VecInit(getl2vSet(vpn).asBools).asUInt, validOneCycle), ptwl2replace.way(refillIdx)) + when (refill.level === 1.U && !memPte.isLeaf()) { + val refillIdx = genPtwL2SetIdx(refill.vpn) + val victimWay = replaceWrapper(RegEnable(VecInit(getl2vSet(refill.vpn).asBools).asUInt, first_fire), ptwl2replace.way(refillIdx)) val victimWayOH = UIntToOH(victimWay) val rfvOH = UIntToOH(Cat(refillIdx, victimWay)) l2.io.w.apply( valid = true.B, setIdx = refillIdx, data = (new PtwEntries(num = PtwL2SectorSize, tagLen = PtwL2TagLen, level = 1, hasPerm = false)).genEntries( - vpn = vpn, data = memRdata, levelUInt = 1.U + vpn = refill.vpn, data = memRdata, levelUInt = 1.U ), waymask = victimWayOH ) @@ -645,7 +967,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { XSDebug(p"[l2 refill] refillIdx:0x${Hexadecimal(refillIdx)} victimWay:${victimWay} victimWayOH:${Binary(victimWayOH)} rfvOH(in UInt):${Cat(refillIdx, victimWay)}\n") XSDebug(p"[l2 refill] refilldata:0x${ (new PtwEntries(num = PtwL2SectorSize, tagLen = PtwL2TagLen, level = 1, hasPerm = false)).genEntries( - vpn = vpn, data = memRdata, levelUInt = 1.U) + vpn = refill.vpn, data = memRdata, levelUInt = 1.U) }\n") XSDebug(p"[l2 refill] l2v:${Binary(l2v)} -> ${Binary(l2v | rfvOH)}\n") XSDebug(p"[l2 refill] l2g:${Binary(l2g)} -> ${Binary(l2g & ~rfvOH | Mux(Cat(memPtes.map(_.perm.g)).andR, rfvOH, 0.U))}\n") @@ -656,16 +978,16 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { rfvOH.suggestName(s"l2_rfvOH") } - when (level === 2.U && memPte.isLeaf()) { - val refillIdx = genPtwL3SetIdx(vpn) - val victimWay = replaceWrapper(RegEnable(VecInit(getl3vSet(vpn).asBools).asUInt, validOneCycle), ptwl3replace.way(refillIdx)) + when (refill.level === 2.U && memPte.isLeaf()) { + val refillIdx = genPtwL3SetIdx(refill.vpn) + val victimWay = replaceWrapper(RegEnable(VecInit(getl3vSet(refill.vpn).asBools).asUInt, first_fire), ptwl3replace.way(refillIdx)) val victimWayOH = UIntToOH(victimWay) val rfvOH = UIntToOH(Cat(refillIdx, victimWay)) l3.io.w.apply( valid = true.B, setIdx = refillIdx, data = (new PtwEntries(num = PtwL3SectorSize, tagLen = PtwL3TagLen, level = 2, hasPerm = true)).genEntries( - vpn = vpn, data = memRdata, levelUInt = 2.U + vpn = refill.vpn, data = memRdata, levelUInt = 2.U ), waymask = victimWayOH ) @@ -680,7 +1002,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { XSDebug(p"[l3 refill] refillIdx:0x${Hexadecimal(refillIdx)} victimWay:${victimWay} victimWayOH:${Binary(victimWayOH)} rfvOH(in UInt):${Cat(refillIdx, victimWay)}\n") XSDebug(p"[l3 refill] refilldata:0x${ (new PtwEntries(num = PtwL3SectorSize, tagLen = PtwL3TagLen, level = 2, hasPerm = true)).genEntries( - vpn = vpn, data = memRdata, levelUInt = 2.U) + vpn = refill.vpn, data = memRdata, levelUInt = 2.U) }\n") XSDebug(p"[l3 refill] l3v:${Binary(l3v)} -> ${Binary(l3v | rfvOH)}\n") XSDebug(p"[l3 refill] l3g:${Binary(l3g)} -> ${Binary(l3g & ~rfvOH | Mux(Cat(memPtes.map(_.perm.g)).andR, rfvOH, 0.U))}\n") @@ -691,10 +1013,10 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { rfvOH.suggestName(s"l3_rfvOH") } - when ((level === 0.U || level === 1.U) && memPte.isLeaf()) { + when ((refill.level === 0.U || refill.level === 1.U) && memPte.isLeaf()) { val refillIdx = spreplace.way// LFSR64()(log2Up(PtwSPEntrySize)-1,0) // TODO: may be LRU val rfOH = UIntToOH(refillIdx) - sp(refillIdx).refill(vpn, memSelData, level) + sp(refillIdx).refill(refill.vpn, memSelData, refill.level) spreplace.access(refillIdx) spv := spv | rfOH spg := spg & ~rfOH | Mux(memPte.perm.g, rfOH, 0.U) @@ -703,21 +1025,16 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { spRefillPerf(i) := i.U === refillIdx } - XSDebug(p"[sp refill] refillIdx:${refillIdx} refillEntry:${sp(refillIdx).genPtwEntry(vpn, memSelData, level)}\n") + XSDebug(p"[sp refill] refillIdx:${refillIdx} refillEntry:${sp(refillIdx).genPtwEntry(refill.vpn, memSelData, refill.level)}\n") XSDebug(p"[sp refill] spv:${Binary(spv)}->${Binary(spv | rfOH)} spg:${Binary(spg)}->${Binary(spg & ~rfOH | Mux(memPte.perm.g, rfOH, 0.U))}\n") refillIdx.suggestName(s"sp_refillIdx") rfOH.suggestName(s"sp_rfOH") } } - + // sfence when (sfence.valid) { - state := s_idle - when (state === s_resp && !memRespFire) { - sfenceLatch := true.B - } - when (sfence.bits.rs1/*va*/) { when (sfence.bits.rs2) { // all va && all asid @@ -752,15 +1069,14 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { } // Perf Count - XSPerfAccumulate("access", validOneCycle) + XSPerfAccumulate("access", second_valid) XSPerfAccumulate("l1_hit", l1Hit) XSPerfAccumulate("l2_hit", l2Hit) XSPerfAccumulate("l3_hit", l3Hit) XSPerfAccumulate("sp_hit", spHit) - XSPerfAccumulate("pte_hit", pteHit) - XSPerfAccumulate("mem_count", memReqFire) - XSPerfAccumulate("mem_cycle", BoolStopWatch(memReqFire, memRespFire, true)) - XSPerfAccumulate("mem_blocked_cycle", mem.a.valid && !memReqReady) + XSPerfAccumulate("pte_hit", l3Hit || spHit) + XSPerfAccumulate("rwHarzad", io.req.valid && !io.req.ready) + XSPerfAccumulate("out_blocked", io.resp.valid && !io.resp.ready) l1AccessPerf.zipWithIndex.map{ case (l, i) => XSPerfAccumulate(s"L1AccessIndex${i}", l) } l2AccessPerf.zipWithIndex.map{ case (l, i) => XSPerfAccumulate(s"L2AccessIndex${i}", l) } l3AccessPerf.zipWithIndex.map{ case (l, i) => XSPerfAccumulate(s"L3AccessIndex${i}", l) } @@ -770,22 +1086,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { l3RefillPerf.zipWithIndex.map{ case (l, i) => XSPerfAccumulate(s"L3RefillIndex${i}", l) } spRefillPerf.zipWithIndex.map{ case (l, i) => XSPerfAccumulate(s"SPRefillIndex${i}", l) } - - // debug info - for (i <- 0 until PtwWidth) { - XSDebug(p"[io.tlb(${i.U})] ${io.tlb(i)}\n") - } - XSDebug(p"[io.sfence] ${io.sfence}\n") - XSDebug(p"[io.csr] ${io.csr}\n") - - XSDebug(p"req:${req} arb.io.out:(${arb.io.out.valid},${arb.io.out.ready}) arbChosen:${arbChosen} ptwFinish:${ptwFinish}\n") - - XSDebug(p"[mem][A] (${mem.a.valid},${mem.a.ready})\n") - XSDebug("[mem][A] memAddr:0x${Hexadecimal(memAddr)} l1addr:0x${Hexadecimal(l1addr)} l2addr:0x${Hexadecimal(l2addr)} l3addr:0x${Hexadecimal(l3addr)} memAddrReg:0x${Hexadecimal(memAddrReg)} memPteReg.ppn:0x${Hexadecimal(memPteReg.ppn)}") - XSDebug(p"[mem][D] (${mem.d.valid},${mem.d.ready}) memSelData:0x${Hexadecimal(memSelData)} memPte:${memPte} memPte.isLeaf:${memPte.isLeaf()} memPte.isPf(${level}):${memPte.isPf(level)}\n") - XSDebug(memRespFire, p"[mem][D] memPtes:${printVec(memPtes)}\n") - - XSDebug(p"[fsm] state:${state} level:${level} pteHit:${pteHit} sfenceLatch:${sfenceLatch} notFound:${notFound}\n") + // debug XSDebug(sfence.valid, p"[sfence] original v and g vector:\n") XSDebug(sfence.valid, p"[sfence] l1v:${Binary(l1v)}\n") XSDebug(sfence.valid, p"[sfence] l2v:${Binary(l2v)}\n") @@ -800,32 +1101,154 @@ class PTWImp(outer: PTW) extends PtwModule(outer) { XSDebug(RegNext(sfence.valid), p"[sfence] spv:${Binary(spv)}\n") } -class PTWRepeater(implicit p: Parameters) extends XSModule with HasXSParameter with HasPtwConst { - val io = IO(new Bundle { - val tlb = Flipped(new TlbPtwIO) - val ptw = new TlbPtwIO - val sfence = Input(new SfenceBundle) +/* ptw finite state machine, the actual page table walker + */ +class PtwFsmIO()(implicit p: Parameters) extends PtwBundle { + val req = Flipped(DecoupledIO(new Bundle { + val source = UInt(bPtwWidth.W) + val l1Hit = Bool() + val l2Hit = Bool() + val vpn = UInt(vpnLen.W) + val ppn = UInt(ppnLen.W) + })) + val resp = DecoupledIO(new Bundle { + val source = UInt(bPtwWidth.W) + val resp = new PtwResp }) - val (tlb, ptw, sfence) = (io.tlb, io.ptw, RegNext(io.sfence.valid)) - val req = RegEnable(tlb.req.bits, tlb.req.fire()) - val resp = RegEnable(ptw.resp.bits, ptw.resp.fire()) - val haveOne = BoolStopWatch(tlb.req.fire(), tlb.resp.fire() || sfence) - val sent = BoolStopWatch(ptw.req.fire(), tlb.req.fire() || sfence) - val recv = BoolStopWatch(ptw.resp.fire(), tlb.req.fire() || sfence) + val mem = new Bundle { + val req = DecoupledIO(new Bundle { + val addr = UInt(PAddrBits.W) + }) + val resp = Flipped(ValidIO(new Bundle { + val data = UInt(MemBandWidth.W) + })) + } - tlb.req.ready := !haveOne - ptw.req.valid := haveOne && !sent - ptw.req.bits := req + val csr = Input(new TlbCsrBundle) + val sfence = Input(new SfenceBundle) + val sfenceLatch = Output(Bool()) + val refill = Output(new Bundle { + val vpn = UInt(vpnLen.W) + val level = UInt(log2Up(Level).W) + val memAddr = UInt(PAddrBits.W) + }) +} - tlb.resp.bits := resp - tlb.resp.valid := haveOne && recv - ptw.resp.ready := !recv +class PtwFsm()(implicit p: Parameters) extends XSModule with HasPtwConst { + val io = IO(new PtwFsmIO) - XSDebug(haveOne, p"haveOne:${haveOne} sent:${sent} recv:${recv} sfence:${sfence} req:${req} resp:${resp}") - XSDebug(io.tlb.req.valid || io.tlb.resp.valid, p"tlb: ${tlb}\n") - XSDebug(io.ptw.req.valid || io.ptw.resp.valid, p"ptw: ${ptw}\n") - assert(!RegNext(recv && io.ptw.resp.valid), "re-receive ptw.resp") + val sfence = io.sfence + val mem = io.mem + val satp = io.csr.satp + + val s_idle :: s_mem_req :: s_mem_resp :: s_resp :: Nil = Enum(4) + val state = RegInit(s_idle) + val level = RegInit(0.U(log2Up(Level).W)) + val ppn = Reg(UInt(ppnLen.W)) + val vpn = Reg(UInt(vpnLen.W)) + val levelNext = level + 1.U + + val sfenceLatch = RegEnable(false.B, init = false.B, mem.resp.valid) // NOTE: store sfence to disable mem.resp.fire(), but not stall other ptw req + val memAddrReg = RegEnable(mem.req.bits.addr, mem.req.fire()) + val l1Hit = Reg(Bool()) + val l2Hit = Reg(Bool()) + + val memRdata = mem.resp.bits.data + val memSelData = memRdata.asTypeOf(Vec(MemBandWidth/XLEN, UInt(XLEN.W)))(memAddrReg(log2Up(l1BusDataWidth/8) - 1, log2Up(XLEN/8))) + val memPtes = (0 until PtwL3SectorSize).map(i => memRdata((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle)) + val memPte = memSelData.asTypeOf(new PteBundle) + val memPteReg = RegEnable(memPte, mem.resp.fire()) + + val notFound = WireInit(false.B) + switch (state) { + is (s_idle) { + when (io.req.fire()) { + val req = io.req.bits + state := s_mem_req + level := Mux(req.l2Hit, 2.U, Mux(req.l1Hit, 1.U, 0.U)) + ppn := Mux(req.l2Hit || req.l1Hit, io.req.bits.ppn, satp.ppn) + vpn := io.req.bits.vpn + l1Hit := req.l1Hit + l2Hit := req.l2Hit + } + } + + is (s_mem_req) { + when (mem.req.fire()) { + state := s_mem_resp + } + } + + is (s_mem_resp) { + when (mem.resp.fire()) { + when (memPte.isLeaf() || memPte.isPf(level)) { + state := s_resp + notFound := memPte.isPf(level) + }.otherwise { + when (level =/= 2.U) { + level := levelNext + state := s_mem_req + }.otherwise { + state := s_resp + notFound := true.B + } + } + } + } + + is (s_resp) { + when (io.resp.fire()) { + state := s_idle + } + } + } + + when (sfence.valid) { + state := s_idle + when (state === s_mem_resp && !mem.resp.fire() || state === s_mem_req && mem.req.fire()) { + sfenceLatch := true.B + } + } + + val finish = mem.resp.fire() && (memPte.isLeaf() || memPte.isPf(level) || level === 2.U) + val resp = Reg(io.resp.bits.cloneType) + when (finish && !sfenceLatch) { + resp.source := RegEnable(io.req.bits.source, io.req.fire()) + resp.resp.pf := level === 3.U || notFound + resp.resp.entry.tag := vpn + resp.resp.entry.ppn := memPte.ppn + resp.resp.entry.perm.map(_ := memPte.getPerm()) + resp.resp.entry.level.map(_ := level) + } + io.resp.valid := state === s_resp + io.resp.bits := resp + io.req.ready := state === s_idle + + val l1addr = MakeAddr(satp.ppn, getVpnn(vpn, 2)) + val l2addr = MakeAddr(Mux(l1Hit, ppn, memPteReg.ppn), getVpnn(vpn, 1)) + val l3addr = MakeAddr(Mux(l2Hit, ppn, memPteReg.ppn), getVpnn(vpn, 0)) + mem.req.valid := state === s_mem_req && !sfenceLatch + mem.req.bits.addr := Mux(level === 0.U, l1addr, Mux(level === 1.U, l2addr, l3addr)) + + io.refill.vpn := vpn + io.refill.level := level + io.refill.memAddr := memAddrReg + io.sfenceLatch := sfenceLatch + + XSDebug(p"[fsm] state:${state} level:${level} sfenceLatch:${sfenceLatch} notFound:${notFound}\n") + + // perf + XSPerfAccumulate("fsm_count", io.req.fire()) + for (i <- 0 until PtwWidth) { + XSPerfAccumulate(s"fsm_count_source${i}", io.req.fire() && io.req.bits.source === i.U) + } + XSPerfAccumulate("fsm_busy", state =/= s_idle) + XSPerfAccumulate("fsm_idle", state === s_idle) + XSPerfAccumulate("resp_blocked", io.resp.valid && !io.resp.ready) + XSPerfAccumulate("mem_count", mem.req.fire()) + XSPerfAccumulate("mem_cycle", BoolStopWatch(mem.req.fire, mem.resp.fire(), true)) + XSPerfAccumulate("mem_blocked", mem.req.valid && !mem.req.ready) } class PTEHelper() extends BlackBox { @@ -844,20 +1267,20 @@ class FakePTW()(implicit p: Parameters) extends XSModule with HasPtwConst { val io = IO(new PtwIO) for (i <- 0 until PtwWidth) { - io.tlb(i).req.ready := true.B + io.tlb(i).req(0).ready := true.B val helper = Module(new PTEHelper()) helper.io.clock := clock - helper.io.enable := io.tlb(i).req.valid + helper.io.enable := io.tlb(i).req(0).valid helper.io.satp := io.csr.satp.ppn - helper.io.vpn := io.tlb(i).req.bits.vpn + helper.io.vpn := io.tlb(i).req(0).bits.vpn val pte = helper.io.pte.asTypeOf(new PteBundle) val level = helper.io.level val pf = helper.io.pf - io.tlb(i).resp.valid := RegNext(io.tlb(i).req.valid) + io.tlb(i).resp.valid := RegNext(io.tlb(i).req(0).valid) assert(!io.tlb(i).resp.valid || io.tlb(i).resp.ready) - io.tlb(i).resp.bits.entry.tag := RegNext(io.tlb(i).req.bits.vpn) + io.tlb(i).resp.bits.entry.tag := RegNext(io.tlb(i).req(0).bits.vpn) io.tlb(i).resp.bits.entry.ppn := pte.ppn io.tlb(i).resp.bits.entry.perm.map(_ := pte.getPerm()) io.tlb(i).resp.bits.entry.level.map(_ := level) diff --git a/src/main/scala/xiangshan/cache/TLB.scala b/src/main/scala/xiangshan/cache/TLB.scala index 7605d74e31923344989f75202f1de9ff8cbaba96..d598c6919816dd0421c5bc5dbed48d632e93cfa0 100644 --- a/src/main/scala/xiangshan/cache/TLB.scala +++ b/src/main/scala/xiangshan/cache/TLB.scala @@ -267,18 +267,20 @@ class BlockTlbRequestIO()(implicit p: Parameters) extends TlbBundle { val resp = Flipped(DecoupledIO(new TlbResp)) } -class TlbPtwIO(implicit p: Parameters) extends TlbBundle { - val req = DecoupledIO(new PtwReq) +class TlbPtwIO(Width: Int = 1)(implicit p: Parameters) extends TlbBundle { + val req = Vec(Width, DecoupledIO(new PtwReq)) val resp = Flipped(DecoupledIO(new PtwResp)) + override def cloneType: this.type = (new TlbPtwIO(Width)).asInstanceOf[this.type] + override def toPrintable: Printable = { - p"req:${req.valid} ${req.ready} ${req.bits} | resp:${resp.valid} ${resp.ready} ${resp.bits}" + p"req(0):${req(0).valid} ${req(0).ready} ${req(0).bits} | resp:${resp.valid} ${resp.ready} ${resp.bits}" } } class TlbIO(Width: Int)(implicit p: Parameters) extends TlbBundle { val requestor = Vec(Width, Flipped(new TlbRequestIO)) - val ptw = new TlbPtwIO + val ptw = new TlbPtwIO(Width) val sfence = Input(new SfenceBundle) val csr = Input(new TlbCsrBundle) @@ -358,19 +360,25 @@ class TLB(Width: Int, isDtlb: Boolean)(implicit p: Parameters) extends TlbModule val refillIdx = sRefillIdx refillIdx.suggestName(s"SuperRefillIdx") - sv(refillIdx) := true.B - sMeta(refillIdx).apply( - vpn = resp.entry.tag, - level = resp.entry.level.getOrElse(0.U) - ) - sData(refillIdx).apply( - ppn = resp.entry.ppn, - level = resp.entry.level.getOrElse(0.U), - perm = VecInit(resp.entry.perm.getOrElse(0.U)).asUInt, - pf = resp.pf - ) - sReplace.access(sRefillIdx) - XSDebug(p"Refill superpage: idx:${refillIdx} entry:${resp.entry} pf:${resp.pf}\n") + val dup = Cat(sv.zip(sMeta).map{ case (v, m) => + v && m.hit(resp.entry.tag) + }).orR // NOTE: may have long latency, RegNext it + + when (!dup) { + sv(refillIdx) := true.B + sMeta(refillIdx).apply( + vpn = resp.entry.tag, + level = resp.entry.level.getOrElse(0.U) + ) + sData(refillIdx).apply( + ppn = resp.entry.ppn, + level = resp.entry.level.getOrElse(0.U), + perm = VecInit(resp.entry.perm.getOrElse(0.U)).asUInt, + pf = resp.pf + ) + sReplace.access(sRefillIdx) + XSDebug(p"Refill superpage: idx:${refillIdx} entry:${resp.entry} pf:${resp.pf}\n") + } } } @@ -476,30 +484,12 @@ class TLB(Width: Int, isDtlb: Boolean)(implicit p: Parameters) extends TlbModule val missVec = readResult.map(res => res._2) val hitVecVec = readResult.map(res => res._3) val validRegVec = readResult.map(res => res._4) - val hasMissReq = Cat(missVec).orR - // ptw - val waiting = RegInit(false.B) - when (ptw.req.fire()) { - waiting := true.B - } - when (sfence.valid || ptw.resp.valid) { - waiting := false.B - } - assert(!ptw.resp.valid || waiting) - - // ptw <> DontCare // TODO: need check it - ptw.req.valid := hasMissReq && !waiting && !RegNext(refill) - ptw.resp.ready := waiting - - // val ptwReqSeq = Wire(Seq.fill(Width)(new comBundle())) - val ptwReqSeq = Seq.fill(Width)(Wire(new comBundle())) for (i <- 0 until Width) { - ptwReqSeq(i).valid := ((if (isDtlb) RegNext(valid(i)) else valid(i)) && missVec(i)) - ptwReqSeq(i).roqIdx := (if (isDtlb) RegNext(req(i).bits.roqIdx) else req(i).bits.roqIdx) - ptwReqSeq(i).bits.vpn := (if (isDtlb) RegNext(reqAddr(i).vpn) else reqAddr(i).vpn) + io.ptw.req(i).valid := validRegVec(i) && missVec(i) && !RegNext(refill) + io.ptw.req(i).bits.vpn := RegNext(reqAddr(i).vpn) } - ptw.req.bits := Compare(ptwReqSeq).bits + io.ptw.resp.ready := true.B // val tooManyPf = PopCount(pf) > 5.U // when (tooManyPf) { // when too much pf, just clear @@ -540,18 +530,13 @@ class TLB(Width: Int, isDtlb: Boolean)(implicit p: Parameters) extends TlbModule // NOTE: ITLB is blocked, so every resp will be valid only when hit // every req will be ready only when hit XSPerfAccumulate("access", io.requestor(0).req.fire() && vmEnable) - XSPerfAccumulate("miss", ptw.req.fire()) + XSPerfAccumulate("miss", ptw.req(0).fire()) } - val reqCycleCnt = Reg(UInt(16.W)) - when (ptw.req.fire()) { - reqCycleCnt := 1.U - } - when (waiting) { - reqCycleCnt := reqCycleCnt + 1.U - } - XSPerfAccumulate("ptw_req_count", ptw.req.fire()) - XSPerfAccumulate("ptw_req_cycle", Mux(ptw.resp.fire(), reqCycleCnt, 0.U)) - XSPerfAccumulate("wait_blocked_count", waiting && hasMissReq) + //val reqCycleCnt = Reg(UInt(16.W)) + //reqCycleCnt := reqCycleCnt + BoolStopWatch(ptw.req(0).fire(), ptw.resp.fire || sfence.valid) + //XSPerfAccumulate("ptw_req_count", ptw.req.fire()) + //XSPerfAccumulate("ptw_req_cycle", Mux(ptw.resp.fire(), reqCycleCnt, 0.U)) + XSPerfAccumulate("ptw_resp_count", ptw.resp.fire()) XSPerfAccumulate("ptw_resp_pf_count", ptw.resp.fire() && ptw.resp.bits.pf) for (i <- 0 until TlbEntrySize) { val indexHitVec = hitVecVec.zip(validRegVec).map{ case (h, v) => h(i) && v } @@ -577,19 +562,13 @@ class TLB(Width: Int, isDtlb: Boolean)(implicit p: Parameters) extends TlbModule XSDebug(sfence.valid, p"Sfence: ${sfence}\n") XSDebug(ParallelOR(valid)|| ptw.resp.valid, p"CSR: ${csr}\n") XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hitVec).asUInt)} miss:${Binary(VecInit(missVec).asUInt)} v:${Hexadecimal(VecInit(v).asUInt)} pf:${Hexadecimal(pf.asUInt)}\n") - XSDebug(ptw.req.fire(), p"PTW req:${ptw.req.bits}\n") + for (i <- ptw.req.indices) { + XSDebug(ptw.req(i).fire(), p"PTW req:${ptw.req(i).bits}\n") + } XSDebug(ptw.resp.valid, p"PTW resp:${ptw.resp.bits} (v:${ptw.resp.valid}r:${ptw.resp.ready}) \n") // // NOTE: just for simple tlb debug, comment it after tlb's debug -// for (i <- 0 until Width) { -// if(isDtlb) { -// XSDebug(!(!vmEnable || RegNext(req(i).bits.vaddr)===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss || Cat(VecInit(resp(i).bits.excp.pf).asUInt).orR), p"Dtlb: vaddr:${Hexadecimal(RegNext(req(i).bits.vaddr))} paddr:${Hexadecimal(resp(i).bits.paddr)} should be equal\n") -// assert(!vmEnable || RegNext(req(i).bits.vaddr)===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss || Cat(VecInit(resp(i).bits.excp.pf).asUInt).orR) -// } else { -// XSDebug(!(!vmEnable || req(i).bits.vaddr===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss || Cat(VecInit(resp(i).bits.excp.pf).asUInt).orR), p"Itlb: vaddr:${Hexadecimal(RegNext(req(i).bits.vaddr))} paddr:${Hexadecimal(resp(i).bits.paddr)} should be equal\n") -// assert(!vmEnable || req(i).bits.vaddr===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss || Cat(VecInit(resp(i).bits.excp.pf).asUInt).orR) -// } -// } + // assert(!io.ptw.resp.valid || io.ptw.resp.bits.entry.tag === io.ptw.resp.bits.entry.ppn, "Simple tlb debug requires vpn === ppn") } object TLB {