未验证 提交 1a718038 编写于 作者: H Haoyuan Feng 提交者: GitHub

PTW: Move PTW to MemBlock (#2211)

* PTW: Move PTW to MemBlock

Move itlbrepeater to Frontend and MemBlock, dtlbrepeater to MemBlock,
L2 TLB (PTW) and ptw_to_l2_buffer to Memblock for better partition.

* MMU: Fix sfence delay to synchronize modules
上级 119ae8e0
......@@ -23,13 +23,11 @@ import chisel3.util._
import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp}
import freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple}
import freechips.rocketchip.tile.HasFPUParameters
import freechips.rocketchip.tilelink.TLBuffer
import system.HasSoCParameter
import utils._
import utility._
import xiangshan.backend._
import xiangshan.backend.exu.{ExuConfig, Wb2Ctrl, WbArbiterWrapper}
import xiangshan.cache.mmu._
import xiangshan.frontend._
import xiangshan.mem.L1PrefetchFuzzer
......@@ -138,14 +136,8 @@ abstract class XSCoreBase()(implicit p: config.Parameters) extends LazyModule
val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1))
// outer facing nodes
val frontend = LazyModule(new Frontend())
val ptw = LazyModule(new L2TLBWrapper())
val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null
val csrOut = BundleBridgeSource(Some(() => new DistributedCSRIO()))
if (!coreParams.softPTW) {
ptw_to_l2_buffer.node := ptw.node
}
val wbArbiter = LazyModule(new WbArbiterWrapper(exuConfigs, NRIntWritePorts, NRFpWritePorts))
val intWbPorts = wbArbiter.intWbPorts
val fpWbPorts = wbArbiter.fpWbPorts
......@@ -257,8 +249,6 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
val ctrlBlock = outer.ctrlBlock.module
val wb2Ctrl = outer.wb2Ctrl.module
val memBlock = outer.memBlock.module
val ptw = outer.ptw.module
val ptw_to_l2_buffer = if (!coreParams.softPTW) outer.ptw_to_l2_buffer.module else null
val exuBlocks = outer.exuBlocks.map(_.module)
frontend.io.hartId := io.hartId
......@@ -372,11 +362,6 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
ctrlBlock.perfinfo.perfEventsEu0 := exuBlocks(0).getPerf.dropRight(outer.exuBlocks(0).scheduler.numRs)
ctrlBlock.perfinfo.perfEventsEu1 := exuBlocks(1).getPerf.dropRight(outer.exuBlocks(1).scheduler.numRs)
if (!coreParams.softPTW) {
memBlock.io.perfEventsPTW := ptw.getPerf
} else {
memBlock.io.perfEventsPTW := DontCare
}
ctrlBlock.perfinfo.perfEventsRs := outer.exuBlocks.flatMap(b => b.module.getPerf.takeRight(b.scheduler.numRs))
csrioIn.hartId <> io.hartId
......@@ -416,6 +401,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
fenceio.sfence <> memBlock.io.sfence
fenceio.sbuffer <> memBlock.io.fenceToSbuffer
memBlock.io.itlb <> frontend.io.ptw
memBlock.io.redirect <> ctrlBlock.io.redirect
memBlock.io.rsfeedback <> exuBlocks(0).io.scheExtra.feedback.get
memBlock.io.csrCtrl <> csrioIn.customCtrl
......@@ -427,29 +413,13 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
memBlock.io.l2_hint.valid := io.l2_hint.valid
memBlock.io.l2_hint.bits.sourceId := io.l2_hint.bits.sourceId
val itlbRepeater1 = PTWFilter(itlbParams.fenceDelay,frontend.io.ptw, fenceio.sfence, csrioIn.tlb, l2tlbParams.ifilterSize)
val itlbRepeater2 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, itlbRepeater1.io.ptw, ptw.io.tlb(0), fenceio.sfence, csrioIn.tlb)
val dtlbRepeater1 = PTWFilter(ldtlbParams.fenceDelay, memBlock.io.ptw, fenceio.sfence, csrioIn.tlb, l2tlbParams.dfilterSize)
val dtlbRepeater2 = PTWRepeaterNB(passReady = false, ldtlbParams.fenceDelay, dtlbRepeater1.io.ptw, ptw.io.tlb(1), fenceio.sfence, csrioIn.tlb)
ptw.io.sfence <> fenceio.sfence
ptw.io.csr.tlb <> csrioIn.tlb
ptw.io.csr.distribute_csr <> csrioIn.customCtrl.distribute_csr
ExcitingUtils.addSource(dtlbRepeater1.io.rob_head_miss_in_tlb, s"miss_in_dtlb_${coreParams.HartId}", ExcitingUtils.Perf, true)
// if l2 prefetcher use stream prefetch, it should be placed in XSCore
io.l2_pf_enable := csrioIn.customCtrl.l2_pf_enable
// Modules are reset one by one
val resetTree = ResetGenNode(
Seq(
ModuleNode(memBlock), ModuleNode(dtlbRepeater1),
ResetGenNode(Seq(
ModuleNode(itlbRepeater2),
ModuleNode(ptw),
ModuleNode(dtlbRepeater2),
ModuleNode(ptw_to_l2_buffer),
)),
ModuleNode(memBlock),
ResetGenNode(Seq(
ModuleNode(exuBlocks.head),
ResetGenNode(
......@@ -458,7 +428,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
ResetGenNode(Seq(
ModuleNode(ctrlBlock),
ResetGenNode(Seq(
ModuleNode(frontend), ModuleNode(itlbRepeater1)
ModuleNode(frontend)
))
))
))
......
......@@ -127,7 +127,7 @@ class XSTile()(implicit p: Parameters) extends LazyModule
misc.misc_l2_pmu := TLLogger(s"L2_L1I_${coreParams.HartId}", !debugOpts.FPGAPlatform && debugOpts.AlwaysBasicDB) := core.frontend.icache.clientNode
if (!coreParams.softPTW) {
misc.misc_l2_pmu := TLLogger(s"L2_PTW_${coreParams.HartId}", !debugOpts.FPGAPlatform && debugOpts.AlwaysBasicDB) := core.ptw_to_l2_buffer.node
misc.misc_l2_pmu := TLLogger(s"L2_PTW_${coreParams.HartId}", !debugOpts.FPGAPlatform && debugOpts.AlwaysBasicDB) := core.memBlock.ptw_to_l2_buffer.node
}
l2cache match {
......
......@@ -21,6 +21,7 @@ import chisel3._
import chisel3.util._
import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp}
import freechips.rocketchip.tile.HasFPUParameters
import freechips.rocketchip.tilelink.TLBuffer
import coupledL2.PrefetchRecv
import utils._
import utility._
......@@ -29,7 +30,7 @@ import xiangshan.backend.exu.StdExeUnit
import xiangshan.backend.fu._
import xiangshan.backend.rob.{DebugLSIO, LsTopdownInfo, RobLsqIO}
import xiangshan.cache._
import xiangshan.cache.mmu.{VectorTlbPtwIO, TLBNonBlock, TlbReplace}
import xiangshan.cache.mmu._
import xiangshan.mem._
import xiangshan.mem.mdp._
import xiangshan.mem.prefetch.{BasePrefecher, SMSParams, SMSPrefetcher}
......@@ -46,10 +47,14 @@ class MemBlock()(implicit p: Parameters) extends LazyModule
val dcache = LazyModule(new DCacheWrapper())
val uncache = LazyModule(new Uncache())
val ptw = LazyModule(new L2TLBWrapper())
val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null
val pf_sender_opt = coreParams.prefetcher.map(_ =>
BundleBridgeSource(() => new PrefetchRecv)
)
ptw_to_l2_buffer.node := ptw.node
lazy val module = new MemBlockImp(this)
override val writebackSourceParams: Seq[WritebackSourceParams] = {
......@@ -70,7 +75,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val io = IO(new Bundle {
val hartId = Input(UInt(8.W))
val redirect = Flipped(ValidIO(new Redirect))
// in
// in from fetch
val itlb = Flipped(new TlbPtwIO())
// in from ooo
val issue = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, Flipped(DecoupledIO(new ExuInput)))
val loadFastMatch = Vec(exuParameters.LduCnt, Input(UInt(exuParameters.LduCnt.W)))
val loadFastImm = Vec(exuParameters.LduCnt, Input(UInt(12.W)))
......@@ -91,7 +98,6 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// misc
val stIn = Vec(exuParameters.StuCnt, ValidIO(new ExuInput))
val memoryViolation = ValidIO(new Redirect)
val ptw = new VectorTlbPtwIO(exuParameters.LduCnt + exuParameters.StuCnt + 1) // load + store + hw prefetch
val sfence = Input(new SfenceBundle)
val tlbCsr = Input(new TlbCsrBundle)
val fenceToSbuffer = Flipped(new FenceToSbuffer)
......@@ -111,7 +117,6 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val lqFull = Output(Bool())
val dcacheMSHRFull = Output(Bool())
}
val perfEventsPTW = Input(Vec(19, new PerfEvent))
val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W))
val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W))
......@@ -238,9 +243,24 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
sbuffer.io.hartId := io.hartId
atomicsUnit.io.hartId := io.hartId
// dtlb
// ptw
val sfence = RegNext(RegNext(io.sfence))
val tlbcsr = RegNext(RegNext(io.tlbCsr))
val ptw = outer.ptw.module
val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module
ptw.io.sfence <> sfence
ptw.io.csr.tlb <> tlbcsr
ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr
ptw.io.tlb(0) <> io.itlb
val perfEventsPTW = Wire(Vec(19, new PerfEvent))
if (!coreParams.softPTW) {
perfEventsPTW := ptw.getPerf
} else {
perfEventsPTW := DontCare
}
// dtlb
val dtlb_ld = VecInit(Seq.fill(1){
val tlb_ld = Module(new TLBNonBlock(exuParameters.LduCnt, 2, ldtlbParams))
tlb_ld.io // let the module have name in waveform
......@@ -254,6 +274,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
tlb_prefetch.io // let the module have name in waveform
})
val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch
val ptwio = Wire(new VectorTlbPtwIO(exuParameters.LduCnt + exuParameters.StuCnt + 1)) // load + store + hw prefetch
val dtlb_reqs = dtlb.map(_.requestor).flatten
val dtlb_pmps = dtlb.map(_.pmp).flatten
dtlb.map(_.sfence := sfence)
......@@ -264,30 +285,31 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
require(ldtlbParams.outReplace)
val replace = Module(new TlbReplace(exuParameters.LduCnt + exuParameters.StuCnt + 1, ldtlbParams))
replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace), io.ptw.resp.bits.data.entry.tag)
replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace), ptwio.resp.bits.data.entry.tag)
} else {
if (ldtlbParams.outReplace) {
val replace_ld = Module(new TlbReplace(exuParameters.LduCnt, ldtlbParams))
replace_ld.io.apply_sep(dtlb_ld.map(_.replace), io.ptw.resp.bits.data.entry.tag)
replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.entry.tag)
}
if (sttlbParams.outReplace) {
val replace_st = Module(new TlbReplace(exuParameters.StuCnt, sttlbParams))
replace_st.io.apply_sep(dtlb_st.map(_.replace), io.ptw.resp.bits.data.entry.tag)
replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.entry.tag)
}
}
val ptw_resp_next = RegEnable(io.ptw.resp.bits, io.ptw.resp.valid)
val ptw_resp_v = RegNext(io.ptw.resp.valid && !(sfence.valid && tlbcsr.satp.changed), init = false.B)
io.ptw.resp.ready := true.B
val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid)
val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed), init = false.B)
ptwio.resp.ready := true.B
dtlb.flatMap(a => a.ptw.req)
.zipWithIndex
.foreach{ case (tlb, i) =>
tlb <> io.ptw.req(i)
tlb.ready := ptwio.req(i).ready
ptwio.req(i).bits := tlb.bits
val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR
else if (i < exuParameters.LduCnt) Cat(ptw_resp_next.vector.take(exuParameters.LduCnt)).orR
else Cat(ptw_resp_next.vector.drop(exuParameters.LduCnt)).orR
io.ptw.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit &&
ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit &&
ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, allType = true, ignoreAsid = true))
}
dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data)
......@@ -299,14 +321,10 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.drop(exuParameters.LduCnt + exuParameters.StuCnt)).orR)
}
for (i <- 0 until exuParameters.LduCnt) {
io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls
}
for (i <- 0 until exuParameters.StuCnt) {
io.debug_ls.debugLsInfo(i + exuParameters.LduCnt) := storeUnits(i).io.debug_ls
}
io.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo)
val dtlbRepeater1 = PTWFilter(ldtlbParams.fenceDelay, ptwio, sfence, tlbcsr, l2tlbParams.dfilterSize)
val dtlbRepeater2 = PTWRepeaterNB(passReady = false, ldtlbParams.fenceDelay, dtlbRepeater1.io.ptw, ptw.io.tlb(1), sfence, tlbcsr)
val itlbRepeater2 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.itlb, ptw.io.tlb(0), sfence, tlbcsr)
ExcitingUtils.addSource(dtlbRepeater1.io.rob_head_miss_in_tlb, s"miss_in_dtlb_${coreParams.HartId}", ExcitingUtils.Perf, true)
// pmp
val pmp = Module(new PMP())
......@@ -319,11 +337,20 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
}
for (i <- 0 until 8) {
val pmp_check_ptw = Module(new PMPCheckerv2(lgMaxSize = 3, sameCycle = false, leaveHitMux = true))
pmp_check_ptw.io.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, io.ptw.resp.valid,
Cat(io.ptw.resp.bits.data.entry.ppn, io.ptw.resp.bits.data.ppn_low(i), 0.U(12.W)).asUInt)
pmp_check_ptw.io.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, ptwio.resp.valid,
Cat(ptwio.resp.bits.data.entry.ppn, ptwio.resp.bits.data.ppn_low(i), 0.U(12.W)).asUInt)
dtlb.map(_.ptw_replenish(i) := pmp_check_ptw.io.resp)
}
for (i <- 0 until exuParameters.LduCnt) {
io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls
}
for (i <- 0 until exuParameters.StuCnt) {
io.debug_ls.debugLsInfo(i + exuParameters.LduCnt) := storeUnits(i).io.debug_ls
}
io.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo)
val tdata = RegInit(VecInit(Seq.fill(6)(0.U.asTypeOf(new MatchTriggerIO))))
val tEnable = RegInit(VecInit(Seq.fill(6)(false.B)))
val en = csrCtrl.trigger_enable
......@@ -745,7 +772,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
("stDeqCount", stDeqCount),
)
val allPerfEvents = memBlockPerfEvents ++ (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents)
val hpmEvents = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) ++ io.perfEventsPTW
val hpmEvents = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) ++ perfEventsPTW
val perfEvents = HPerfMonitor(csrevents, hpmEvents).getPerfEvents
generatePerfEvent()
}
......@@ -44,7 +44,7 @@ class FrontendImp (outer: Frontend) extends LazyModuleImp(outer)
val hartId = Input(UInt(8.W))
val reset_vector = Input(UInt(PAddrBits.W))
val fencei = Input(Bool())
val ptw = new VectorTlbPtwIO(coreParams.itlbPortNum)
val ptw = new TlbPtwIO()
val backend = new FrontendToCtrlIO
val sfence = Input(new SfenceBundle)
val tlbCsr = Input(new TlbCsrBundle)
......@@ -109,11 +109,15 @@ class FrontendImp (outer: Frontend) extends LazyModuleImp(outer)
Seq(true, true) ++ Seq.fill(prefetchPipeNum)(false) ++ Seq(true), itlbParams))
itlb.io.requestor.take(2 + prefetchPipeNum) zip icache.io.itlb foreach {case (a,b) => a <> b}
itlb.io.requestor.last <> ifu.io.iTLBInter // mmio may need re-tlb, blocked
itlb.io.base_connect(io.sfence, tlbCsr)
io.ptw.connect(itlb.io.ptw)
itlb.io.base_connect(sfence, tlbCsr)
itlb.io.ptw_replenish <> DontCare
itlb.io.flushPipe.map(_ := needFlush)
val itlb_ptw = Wire(new VectorTlbPtwIO(coreParams.itlbPortNum))
itlb_ptw.connect(itlb.io.ptw)
val itlbRepeater1 = PTWFilter(itlbParams.fenceDelay, itlb_ptw, sfence, tlbCsr, l2tlbParams.ifilterSize)
io.ptw <> itlbRepeater1.io.ptw
icache.io.prefetch <> ftq.io.toPrefetch
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册