diff --git a/src/main/scala/xiangshan/XSCore.scala b/src/main/scala/xiangshan/XSCore.scala index 19ad8350bda7b2f2656d6bf2dcacdee94e8b12a8..3f9ee8ce35d03229695d6bbe4e0e3ea0fbf90fab 100644 --- a/src/main/scala/xiangshan/XSCore.scala +++ b/src/main/scala/xiangshan/XSCore.scala @@ -23,13 +23,11 @@ import chisel3.util._ import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp} import freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple} import freechips.rocketchip.tile.HasFPUParameters -import freechips.rocketchip.tilelink.TLBuffer import system.HasSoCParameter import utils._ import utility._ import xiangshan.backend._ import xiangshan.backend.exu.{ExuConfig, Wb2Ctrl, WbArbiterWrapper} -import xiangshan.cache.mmu._ import xiangshan.frontend._ import xiangshan.mem.L1PrefetchFuzzer @@ -138,14 +136,8 @@ abstract class XSCoreBase()(implicit p: config.Parameters) extends LazyModule val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1)) // outer facing nodes val frontend = LazyModule(new Frontend()) - val ptw = LazyModule(new L2TLBWrapper()) - val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null val csrOut = BundleBridgeSource(Some(() => new DistributedCSRIO())) - if (!coreParams.softPTW) { - ptw_to_l2_buffer.node := ptw.node - } - val wbArbiter = LazyModule(new WbArbiterWrapper(exuConfigs, NRIntWritePorts, NRFpWritePorts)) val intWbPorts = wbArbiter.intWbPorts val fpWbPorts = wbArbiter.fpWbPorts @@ -257,8 +249,6 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) val ctrlBlock = outer.ctrlBlock.module val wb2Ctrl = outer.wb2Ctrl.module val memBlock = outer.memBlock.module - val ptw = outer.ptw.module - val ptw_to_l2_buffer = if (!coreParams.softPTW) outer.ptw_to_l2_buffer.module else null val exuBlocks = outer.exuBlocks.map(_.module) frontend.io.hartId := io.hartId @@ -372,11 +362,6 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) ctrlBlock.perfinfo.perfEventsEu0 := exuBlocks(0).getPerf.dropRight(outer.exuBlocks(0).scheduler.numRs) ctrlBlock.perfinfo.perfEventsEu1 := exuBlocks(1).getPerf.dropRight(outer.exuBlocks(1).scheduler.numRs) - if (!coreParams.softPTW) { - memBlock.io.perfEventsPTW := ptw.getPerf - } else { - memBlock.io.perfEventsPTW := DontCare - } ctrlBlock.perfinfo.perfEventsRs := outer.exuBlocks.flatMap(b => b.module.getPerf.takeRight(b.scheduler.numRs)) csrioIn.hartId <> io.hartId @@ -416,6 +401,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) fenceio.sfence <> memBlock.io.sfence fenceio.sbuffer <> memBlock.io.fenceToSbuffer + memBlock.io.itlb <> frontend.io.ptw memBlock.io.redirect <> ctrlBlock.io.redirect memBlock.io.rsfeedback <> exuBlocks(0).io.scheExtra.feedback.get memBlock.io.csrCtrl <> csrioIn.customCtrl @@ -427,29 +413,13 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) memBlock.io.l2_hint.valid := io.l2_hint.valid memBlock.io.l2_hint.bits.sourceId := io.l2_hint.bits.sourceId - val itlbRepeater1 = PTWFilter(itlbParams.fenceDelay,frontend.io.ptw, fenceio.sfence, csrioIn.tlb, l2tlbParams.ifilterSize) - val itlbRepeater2 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, itlbRepeater1.io.ptw, ptw.io.tlb(0), fenceio.sfence, csrioIn.tlb) - val dtlbRepeater1 = PTWFilter(ldtlbParams.fenceDelay, memBlock.io.ptw, fenceio.sfence, csrioIn.tlb, l2tlbParams.dfilterSize) - val dtlbRepeater2 = PTWRepeaterNB(passReady = false, ldtlbParams.fenceDelay, dtlbRepeater1.io.ptw, ptw.io.tlb(1), fenceio.sfence, csrioIn.tlb) - ptw.io.sfence <> fenceio.sfence - ptw.io.csr.tlb <> csrioIn.tlb - ptw.io.csr.distribute_csr <> csrioIn.customCtrl.distribute_csr - - ExcitingUtils.addSource(dtlbRepeater1.io.rob_head_miss_in_tlb, s"miss_in_dtlb_${coreParams.HartId}", ExcitingUtils.Perf, true) - // if l2 prefetcher use stream prefetch, it should be placed in XSCore io.l2_pf_enable := csrioIn.customCtrl.l2_pf_enable // Modules are reset one by one val resetTree = ResetGenNode( Seq( - ModuleNode(memBlock), ModuleNode(dtlbRepeater1), - ResetGenNode(Seq( - ModuleNode(itlbRepeater2), - ModuleNode(ptw), - ModuleNode(dtlbRepeater2), - ModuleNode(ptw_to_l2_buffer), - )), + ModuleNode(memBlock), ResetGenNode(Seq( ModuleNode(exuBlocks.head), ResetGenNode( @@ -458,7 +428,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) ResetGenNode(Seq( ModuleNode(ctrlBlock), ResetGenNode(Seq( - ModuleNode(frontend), ModuleNode(itlbRepeater1) + ModuleNode(frontend) )) )) )) diff --git a/src/main/scala/xiangshan/XSTile.scala b/src/main/scala/xiangshan/XSTile.scala index 49f9451d767e94bc9aaae1eaea03bb5324c81395..ee03ec468c2221f03bd0167f598db5f6c05d5833 100644 --- a/src/main/scala/xiangshan/XSTile.scala +++ b/src/main/scala/xiangshan/XSTile.scala @@ -127,7 +127,7 @@ class XSTile()(implicit p: Parameters) extends LazyModule misc.misc_l2_pmu := TLLogger(s"L2_L1I_${coreParams.HartId}", !debugOpts.FPGAPlatform && debugOpts.AlwaysBasicDB) := core.frontend.icache.clientNode if (!coreParams.softPTW) { - misc.misc_l2_pmu := TLLogger(s"L2_PTW_${coreParams.HartId}", !debugOpts.FPGAPlatform && debugOpts.AlwaysBasicDB) := core.ptw_to_l2_buffer.node + misc.misc_l2_pmu := TLLogger(s"L2_PTW_${coreParams.HartId}", !debugOpts.FPGAPlatform && debugOpts.AlwaysBasicDB) := core.memBlock.ptw_to_l2_buffer.node } l2cache match { diff --git a/src/main/scala/xiangshan/backend/MemBlock.scala b/src/main/scala/xiangshan/backend/MemBlock.scala index c28c727cfa3cf0ed1a648d4bced2816a5d567e14..0b6206b7555cb2d8cd86f96d5b36ef8ca720b403 100644 --- a/src/main/scala/xiangshan/backend/MemBlock.scala +++ b/src/main/scala/xiangshan/backend/MemBlock.scala @@ -21,6 +21,7 @@ import chisel3._ import chisel3.util._ import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp} import freechips.rocketchip.tile.HasFPUParameters +import freechips.rocketchip.tilelink.TLBuffer import coupledL2.PrefetchRecv import utils._ import utility._ @@ -29,7 +30,7 @@ import xiangshan.backend.exu.StdExeUnit import xiangshan.backend.fu._ import xiangshan.backend.rob.{DebugLSIO, LsTopdownInfo, RobLsqIO} import xiangshan.cache._ -import xiangshan.cache.mmu.{VectorTlbPtwIO, TLBNonBlock, TlbReplace} +import xiangshan.cache.mmu._ import xiangshan.mem._ import xiangshan.mem.mdp._ import xiangshan.mem.prefetch.{BasePrefecher, SMSParams, SMSPrefetcher} @@ -46,10 +47,14 @@ class MemBlock()(implicit p: Parameters) extends LazyModule val dcache = LazyModule(new DCacheWrapper()) val uncache = LazyModule(new Uncache()) + val ptw = LazyModule(new L2TLBWrapper()) + val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null val pf_sender_opt = coreParams.prefetcher.map(_ => BundleBridgeSource(() => new PrefetchRecv) ) + ptw_to_l2_buffer.node := ptw.node + lazy val module = new MemBlockImp(this) override val writebackSourceParams: Seq[WritebackSourceParams] = { @@ -70,7 +75,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) val io = IO(new Bundle { val hartId = Input(UInt(8.W)) val redirect = Flipped(ValidIO(new Redirect)) - // in + // in from fetch + val itlb = Flipped(new TlbPtwIO()) + // in from ooo val issue = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, Flipped(DecoupledIO(new ExuInput))) val loadFastMatch = Vec(exuParameters.LduCnt, Input(UInt(exuParameters.LduCnt.W))) val loadFastImm = Vec(exuParameters.LduCnt, Input(UInt(12.W))) @@ -91,7 +98,6 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) // misc val stIn = Vec(exuParameters.StuCnt, ValidIO(new ExuInput)) val memoryViolation = ValidIO(new Redirect) - val ptw = new VectorTlbPtwIO(exuParameters.LduCnt + exuParameters.StuCnt + 1) // load + store + hw prefetch val sfence = Input(new SfenceBundle) val tlbCsr = Input(new TlbCsrBundle) val fenceToSbuffer = Flipped(new FenceToSbuffer) @@ -111,7 +117,6 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) val lqFull = Output(Bool()) val dcacheMSHRFull = Output(Bool()) } - val perfEventsPTW = Input(Vec(19, new PerfEvent)) val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W)) val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W)) val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W)) @@ -238,9 +243,24 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) sbuffer.io.hartId := io.hartId atomicsUnit.io.hartId := io.hartId - // dtlb + // ptw val sfence = RegNext(RegNext(io.sfence)) val tlbcsr = RegNext(RegNext(io.tlbCsr)) + val ptw = outer.ptw.module + val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module + ptw.io.sfence <> sfence + ptw.io.csr.tlb <> tlbcsr + ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr + ptw.io.tlb(0) <> io.itlb + + val perfEventsPTW = Wire(Vec(19, new PerfEvent)) + if (!coreParams.softPTW) { + perfEventsPTW := ptw.getPerf + } else { + perfEventsPTW := DontCare + } + + // dtlb val dtlb_ld = VecInit(Seq.fill(1){ val tlb_ld = Module(new TLBNonBlock(exuParameters.LduCnt, 2, ldtlbParams)) tlb_ld.io // let the module have name in waveform @@ -254,6 +274,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) tlb_prefetch.io // let the module have name in waveform }) val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch + val ptwio = Wire(new VectorTlbPtwIO(exuParameters.LduCnt + exuParameters.StuCnt + 1)) // load + store + hw prefetch val dtlb_reqs = dtlb.map(_.requestor).flatten val dtlb_pmps = dtlb.map(_.pmp).flatten dtlb.map(_.sfence := sfence) @@ -264,30 +285,31 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) require(ldtlbParams.outReplace) val replace = Module(new TlbReplace(exuParameters.LduCnt + exuParameters.StuCnt + 1, ldtlbParams)) - replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace), io.ptw.resp.bits.data.entry.tag) + replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace), ptwio.resp.bits.data.entry.tag) } else { if (ldtlbParams.outReplace) { val replace_ld = Module(new TlbReplace(exuParameters.LduCnt, ldtlbParams)) - replace_ld.io.apply_sep(dtlb_ld.map(_.replace), io.ptw.resp.bits.data.entry.tag) + replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.entry.tag) } if (sttlbParams.outReplace) { val replace_st = Module(new TlbReplace(exuParameters.StuCnt, sttlbParams)) - replace_st.io.apply_sep(dtlb_st.map(_.replace), io.ptw.resp.bits.data.entry.tag) + replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.entry.tag) } } - val ptw_resp_next = RegEnable(io.ptw.resp.bits, io.ptw.resp.valid) - val ptw_resp_v = RegNext(io.ptw.resp.valid && !(sfence.valid && tlbcsr.satp.changed), init = false.B) - io.ptw.resp.ready := true.B + val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid) + val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed), init = false.B) + ptwio.resp.ready := true.B dtlb.flatMap(a => a.ptw.req) .zipWithIndex .foreach{ case (tlb, i) => - tlb <> io.ptw.req(i) + tlb.ready := ptwio.req(i).ready + ptwio.req(i).bits := tlb.bits val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR else if (i < exuParameters.LduCnt) Cat(ptw_resp_next.vector.take(exuParameters.LduCnt)).orR else Cat(ptw_resp_next.vector.drop(exuParameters.LduCnt)).orR - io.ptw.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && + ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, allType = true, ignoreAsid = true)) } dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data) @@ -299,14 +321,10 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.drop(exuParameters.LduCnt + exuParameters.StuCnt)).orR) } - for (i <- 0 until exuParameters.LduCnt) { - io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls - } - for (i <- 0 until exuParameters.StuCnt) { - io.debug_ls.debugLsInfo(i + exuParameters.LduCnt) := storeUnits(i).io.debug_ls - } - - io.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) + val dtlbRepeater1 = PTWFilter(ldtlbParams.fenceDelay, ptwio, sfence, tlbcsr, l2tlbParams.dfilterSize) + val dtlbRepeater2 = PTWRepeaterNB(passReady = false, ldtlbParams.fenceDelay, dtlbRepeater1.io.ptw, ptw.io.tlb(1), sfence, tlbcsr) + val itlbRepeater2 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.itlb, ptw.io.tlb(0), sfence, tlbcsr) + ExcitingUtils.addSource(dtlbRepeater1.io.rob_head_miss_in_tlb, s"miss_in_dtlb_${coreParams.HartId}", ExcitingUtils.Perf, true) // pmp val pmp = Module(new PMP()) @@ -319,11 +337,20 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) } for (i <- 0 until 8) { val pmp_check_ptw = Module(new PMPCheckerv2(lgMaxSize = 3, sameCycle = false, leaveHitMux = true)) - pmp_check_ptw.io.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, io.ptw.resp.valid, - Cat(io.ptw.resp.bits.data.entry.ppn, io.ptw.resp.bits.data.ppn_low(i), 0.U(12.W)).asUInt) + pmp_check_ptw.io.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, ptwio.resp.valid, + Cat(ptwio.resp.bits.data.entry.ppn, ptwio.resp.bits.data.ppn_low(i), 0.U(12.W)).asUInt) dtlb.map(_.ptw_replenish(i) := pmp_check_ptw.io.resp) } + for (i <- 0 until exuParameters.LduCnt) { + io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls + } + for (i <- 0 until exuParameters.StuCnt) { + io.debug_ls.debugLsInfo(i + exuParameters.LduCnt) := storeUnits(i).io.debug_ls + } + + io.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) + val tdata = RegInit(VecInit(Seq.fill(6)(0.U.asTypeOf(new MatchTriggerIO)))) val tEnable = RegInit(VecInit(Seq.fill(6)(false.B))) val en = csrCtrl.trigger_enable @@ -745,7 +772,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ("stDeqCount", stDeqCount), ) val allPerfEvents = memBlockPerfEvents ++ (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents) - val hpmEvents = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) ++ io.perfEventsPTW + val hpmEvents = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) ++ perfEventsPTW val perfEvents = HPerfMonitor(csrevents, hpmEvents).getPerfEvents generatePerfEvent() } diff --git a/src/main/scala/xiangshan/frontend/Frontend.scala b/src/main/scala/xiangshan/frontend/Frontend.scala index 35ec3c3c5d5cbc40500b7fe19eaa286d7e5bae1f..1fc96376f6dae685268dab6af3958dceee2300e0 100644 --- a/src/main/scala/xiangshan/frontend/Frontend.scala +++ b/src/main/scala/xiangshan/frontend/Frontend.scala @@ -44,7 +44,7 @@ class FrontendImp (outer: Frontend) extends LazyModuleImp(outer) val hartId = Input(UInt(8.W)) val reset_vector = Input(UInt(PAddrBits.W)) val fencei = Input(Bool()) - val ptw = new VectorTlbPtwIO(coreParams.itlbPortNum) + val ptw = new TlbPtwIO() val backend = new FrontendToCtrlIO val sfence = Input(new SfenceBundle) val tlbCsr = Input(new TlbCsrBundle) @@ -109,11 +109,15 @@ class FrontendImp (outer: Frontend) extends LazyModuleImp(outer) Seq(true, true) ++ Seq.fill(prefetchPipeNum)(false) ++ Seq(true), itlbParams)) itlb.io.requestor.take(2 + prefetchPipeNum) zip icache.io.itlb foreach {case (a,b) => a <> b} itlb.io.requestor.last <> ifu.io.iTLBInter // mmio may need re-tlb, blocked - itlb.io.base_connect(io.sfence, tlbCsr) - io.ptw.connect(itlb.io.ptw) + itlb.io.base_connect(sfence, tlbCsr) itlb.io.ptw_replenish <> DontCare itlb.io.flushPipe.map(_ := needFlush) + val itlb_ptw = Wire(new VectorTlbPtwIO(coreParams.itlbPortNum)) + itlb_ptw.connect(itlb.io.ptw) + val itlbRepeater1 = PTWFilter(itlbParams.fenceDelay, itlb_ptw, sfence, tlbCsr, l2tlbParams.ifilterSize) + io.ptw <> itlbRepeater1.io.ptw + icache.io.prefetch <> ftq.io.toPrefetch