未验证 提交 6f688dac 编写于 作者: Y Yinan Xu 提交者: GitHub

core: delay csrCtrl for two cycles (#1336)

This commit adds DelayN(2) to some CSR-related signals, including
control bits to ITLB, DTLB, PTW, etc.

To avoid accessing the ITLB before control bits change, we also need
to delay the flush for two cycles. We assume branch misprediction or
memory violation does not cause csrCtrl to change.
上级 70899835
......@@ -34,8 +34,6 @@ import xiangshan.mem.mdp.{LFST, SSIT, WaitTable}
class CtrlToFtqIO(implicit p: Parameters) extends XSBundle {
val rob_commits = Vec(CommitWidth, Valid(new RobCommitInfo))
val stage2Redirect = Valid(new Redirect)
val stage3Redirect = ValidIO(new Redirect)
val robFlush = ValidIO(new Redirect)
}
class RedirectGenerator(implicit p: Parameters) extends XSModule
......@@ -114,7 +112,6 @@ class RedirectGenerator(implicit p: Parameters) extends XSModule
// stage1 -> stage2
io.stage2Redirect.valid := s1_redirect_valid_reg && !io.flush
io.stage2Redirect.bits := s1_redirect_bits_reg
io.stage2Redirect.bits.cfiUpdate := DontCare
val s1_isReplay = s1_redirect_onehot.last
val s1_isJump = s1_redirect_onehot.head
......@@ -129,6 +126,22 @@ class RedirectGenerator(implicit p: Parameters) extends XSModule
)
)
val stage2CfiUpdate = io.stage2Redirect.bits.cfiUpdate
stage2CfiUpdate.pc := real_pc
stage2CfiUpdate.pd := s1_pd
stage2CfiUpdate.predTaken := s1_redirect_bits_reg.cfiUpdate.predTaken
stage2CfiUpdate.target := target
stage2CfiUpdate.taken := s1_redirect_bits_reg.cfiUpdate.taken
stage2CfiUpdate.isMisPred := s1_redirect_bits_reg.cfiUpdate.isMisPred
val s2_target = RegEnable(target, enable = s1_redirect_valid_reg)
val s2_pc = RegEnable(real_pc, enable = s1_redirect_valid_reg)
val s2_redirect_bits_reg = RegEnable(s1_redirect_bits_reg, enable = s1_redirect_valid_reg)
val s2_redirect_valid_reg = RegNext(s1_redirect_valid_reg && !io.flush, init = false.B)
io.stage3Redirect.valid := s2_redirect_valid_reg
io.stage3Redirect.bits := s2_redirect_bits_reg
// get pc from ftq
// valid only if redirect is caused by load violation
// store_pc is used to update store set
......@@ -144,22 +157,6 @@ class RedirectGenerator(implicit p: Parameters) extends XSModule
// store pc is ready 1 cycle after s1_isReplay is judged
io.memPredUpdate.stpc := XORFold(store_pc(VAddrBits-1, 1), MemPredPCWidth)
val s2_target = RegEnable(target, enable = s1_redirect_valid_reg)
val s2_pd = RegEnable(s1_pd, enable = s1_redirect_valid_reg)
val s2_pc = RegEnable(real_pc, enable = s1_redirect_valid_reg)
val s2_redirect_bits_reg = RegEnable(s1_redirect_bits_reg, enable = s1_redirect_valid_reg)
val s2_redirect_valid_reg = RegNext(s1_redirect_valid_reg && !io.flush, init = false.B)
io.stage3Redirect.valid := s2_redirect_valid_reg
io.stage3Redirect.bits := s2_redirect_bits_reg
val stage3CfiUpdate = io.stage3Redirect.bits.cfiUpdate
stage3CfiUpdate.pc := s2_pc
stage3CfiUpdate.pd := s2_pd
stage3CfiUpdate.predTaken := s2_redirect_bits_reg.cfiUpdate.predTaken
stage3CfiUpdate.target := s2_target
stage3CfiUpdate.taken := s2_redirect_bits_reg.cfiUpdate.taken
stage3CfiUpdate.isMisPred := s2_redirect_bits_reg.cfiUpdate.isMisPred
// recover runahead checkpoint if redirect
if (!env.FPGAPlatform) {
val runahead_redirect = Module(new DifftestRunaheadRedirectEvent)
......@@ -304,15 +301,21 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
redirectGen.io.hartId := io.hartId
redirectGen.io.exuMispredict <> exuRedirect
redirectGen.io.loadReplay <> loadReplay
redirectGen.io.flush := RegNext(rob.io.flushOut.valid)
redirectGen.io.flush := flushRedirect.valid
for(i <- 0 until CommitWidth){
io.frontend.toFtq.rob_commits(i).valid := rob.io.commits.valid(i) && !rob.io.commits.isWalk
io.frontend.toFtq.rob_commits(i).bits := rob.io.commits.info(i)
val frontendFlush = DelayN(flushRedirect, 5)
val frontendStage2Redirect = Mux(frontendFlush.valid, frontendFlush, redirectGen.io.stage2Redirect)
for (i <- 0 until CommitWidth) {
io.frontend.toFtq.rob_commits(i).valid := RegNext(rob.io.commits.valid(i) && !rob.io.commits.isWalk)
io.frontend.toFtq.rob_commits(i).bits := RegNext(rob.io.commits.info(i))
}
io.frontend.toFtq.stage2Redirect := frontendStage2Redirect
val pendingRedirect = RegInit(false.B)
when (stage2Redirect.valid) {
pendingRedirect := true.B
}.elsewhen (RegNext(io.frontend.toFtq.stage2Redirect.valid)) {
pendingRedirect := false.B
}
io.frontend.toFtq.stage2Redirect <> stage2Redirect
io.frontend.toFtq.robFlush <> RegNext(rob.io.flushOut)
io.frontend.toFtq.stage3Redirect := stage3Redirect
decode.io.in <> io.frontend.cfVec
decode.io.csrCtrl := io.csrCtrl
......@@ -362,7 +365,7 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
// pipeline between decode and rename
for (i <- 0 until RenameWidth) {
PipelineConnect(decode.io.out(i), rename.io.in(i), rename.io.in(i).ready,
stage2Redirect.valid || stage3Redirect.valid)
stage2Redirect.valid || pendingRedirect)
}
rename.io.redirect <> stage2Redirect
......
......@@ -94,12 +94,14 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
}
val perfEventsPTW = Input(Vec(19, new PerfEvent))
})
override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.writeback))
val dcache = outer.dcache.module
val uncache = outer.uncache.module
dcache.io.csr.distribute_csr <> io.csrCtrl.distribute_csr
val csrCtrl = DelayN(io.csrCtrl, 2)
dcache.io.csr.distribute_csr <> csrCtrl.distribute_csr
io.csrUpdate := RegNext(dcache.io.csr.update)
io.error <> RegNext(RegNext(dcache.io.error))
......@@ -188,7 +190,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// pmp
val pmp = Module(new PMP())
pmp.io.distribute_csr <> io.csrCtrl.distribute_csr
pmp.io.distribute_csr <> csrCtrl.distribute_csr
val pmp_check = VecInit(Seq.fill(exuParameters.LduCnt + exuParameters.StuCnt)(Module(new PMPChecker(3)).io))
for ((p,d) <- pmp_check zip dtlb.map(_.pmp(0))) {
......@@ -197,10 +199,10 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
}
val tdata = Reg(Vec(6, new MatchTriggerIO))
val tEnable = RegInit(VecInit(Seq.fill(6)(false.B)))
val en = io.csrCtrl.trigger_enable
val en = csrCtrl.trigger_enable
tEnable := VecInit(en(2), en (3), en(7), en(4), en(5), en(9))
when(io.csrCtrl.mem_trigger.t.valid) {
tdata(io.csrCtrl.mem_trigger.t.bits.addr) := io.csrCtrl.mem_trigger.t.bits.tdata
when(csrCtrl.mem_trigger.t.valid) {
tdata(csrCtrl.mem_trigger.t.bits.addr) := csrCtrl.mem_trigger.t.bits.tdata
}
val lTriggerMapping = Map(0 -> 4, 1 -> 5, 2 -> 9)
val sTriggerMapping = Map(0 -> 2, 1 -> 3, 2 -> 7)
......@@ -224,7 +226,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
// ld-ld violation check
loadUnits(i).io.lsq.loadViolationQuery <> lsq.io.loadViolationQuery(i)
loadUnits(i).io.csrCtrl <> io.csrCtrl
loadUnits(i).io.csrCtrl <> csrCtrl
// dtlb
loadUnits(i).io.tlb <> dtlb_ld(i).requestor(0)
// pmp
......@@ -369,7 +371,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
lsq.io.sqempty <> sbuffer.io.sqempty
// Sbuffer
sbuffer.io.csrCtrl <> RegNext(io.csrCtrl)
sbuffer.io.csrCtrl <> csrCtrl
sbuffer.io.dcache <> dcache.io.lsu.store
// TODO: if dcache sbuffer resp needs to ne delayed
// sbuffer.io.dcache.pipe_resp.valid := RegNext(dcache.io.lsu.store.pipe_resp.valid)
......@@ -474,7 +476,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
XSPerfAccumulate("ls_rs_deq_count", rsDeqCount)
val pfevent = Module(new PFEvent)
pfevent.io.distribute_csr := io.csrCtrl.distribute_csr
pfevent.io.distribute_csr := csrCtrl.distribute_csr
val csrevents = pfevent.io.hpmevent.slice(16,24)
val memBlockPerfEvents = Seq(
......
......@@ -51,6 +51,8 @@ class ExeUnit(config: ExuConfig)(implicit p: Parameters) extends Exu(config) {
case c: CSR => c
}.get
csr.csrio <> csrio.get
csrio.get.tlb := DelayN(csr.csrio.tlb, 2)
csrio.get.customCtrl := DelayN(csr.csrio.customCtrl, 2)
disableSfence := csr.csrio.disableSfence
csr_frm := csr.csrio.fpu.frm
// setup skip for hpm CSR read
......
......@@ -74,8 +74,8 @@ class PTWImp(outer: PTW)(implicit p: Parameters) extends PtwModule(outer) with H
difftestIO <> DontCare
val sfence = RegNext(io.sfence)
val csr = RegNext(io.csr.tlb)
val sfence = DelayN(io.sfence, 2)
val csr = DelayN(io.csr.tlb, 2)
val satp = csr.satp
val priv = csr.priv
val flush = sfence.valid || csr.satp.changed
......
......@@ -55,7 +55,7 @@ class PTWRepeater(Width: Int = 1)(implicit p: Parameters) extends XSModule with
arb.io.in <> io.tlb.req
arb.io.out
}
val (tlb, ptw, flush) = (io.tlb, io.ptw, RegNext(io.sfence.valid || io.csr.satp.changed))
val (tlb, ptw, flush) = (io.tlb, io.ptw, DelayN(io.sfence.valid || io.csr.satp.changed, 2))
val req = RegEnable(req_in.bits, req_in.fire())
val resp = RegEnable(ptw.resp.bits, ptw.resp.fire())
val haveOne = BoolStopWatch(req_in.fire(), tlb.resp.fire() || flush)
......@@ -95,7 +95,7 @@ class PTWRepeaterNB(Width: Int = 1, passReady: Boolean = false)(implicit p: Para
arb.io.in <> io.tlb.req
arb.io.out
}
val (tlb, ptw, flush) = (io.tlb, io.ptw, RegNext(io.sfence.valid || io.csr.satp.changed))
val (tlb, ptw, flush) = (io.tlb, io.ptw, DelayN(io.sfence.valid || io.csr.satp.changed, 2))
/* sent: tlb -> repeater -> ptw
* recv: ptw -> repeater -> tlb
* different from PTWRepeater
......@@ -162,7 +162,7 @@ class PTWFilter(Width: Int, Size: Int)(implicit p: Parameters) extends XSModule
val mayFullIss = RegInit(false.B)
val counter = RegInit(0.U(log2Up(Size+1).W))
val flush = RegNext(io.sfence.valid || io.csr.satp.changed)
val flush = DelayN(io.sfence.valid || io.csr.satp.changed, 2)
val tlb_req = WireInit(io.tlb.req)
tlb_req.suggestName("tlb_req")
......
......@@ -65,18 +65,18 @@ class FrontendImp (outer: Frontend) extends LazyModuleImp(outer)
val ibuffer = Module(new Ibuffer)
val ftq = Module(new Ftq)
val tlbCsr = RegNext(io.tlbCsr)
val tlbCsr = DelayN(io.tlbCsr, 2)
val csrCtrl = DelayN(io.csrCtrl, 2)
// trigger
ifu.io.frontendTrigger := io.csrCtrl.frontend_trigger
val triggerEn = io.csrCtrl.trigger_enable
ifu.io.frontendTrigger := csrCtrl.frontend_trigger
val triggerEn = csrCtrl.trigger_enable
ifu.io.csrTriggerEnable := VecInit(triggerEn(0), triggerEn(1), triggerEn(6), triggerEn(8))
// pmp
val pmp = Module(new PMP())
val pmp_check = VecInit(Seq.fill(2)(Module(new PMPChecker(3, sameCycle = true)).io))
pmp.io.distribute_csr := io.csrCtrl.distribute_csr
pmp.io.distribute_csr := csrCtrl.distribute_csr
for (i <- pmp_check.indices) {
pmp_check(i).apply(tlbCsr.priv.imode, pmp.io.pmp, pmp.io.pma, icache.io.pmp(i).req)
icache.io.pmp(i).resp <> pmp_check(i).resp
......@@ -93,7 +93,7 @@ class FrontendImp (outer: Frontend) extends LazyModuleImp(outer)
icache.io.fencei := RegNext(io.fencei)
val needFlush = io.backend.toFtq.stage3Redirect.valid
val needFlush = RegNext(io.backend.toFtq.stage2Redirect.valid)
//IFU-Ftq
ifu.io.ftqInter.fromFtq <> ftq.io.toIfu
......@@ -112,7 +112,7 @@ class FrontendImp (outer: Frontend) extends LazyModuleImp(outer)
//icache.io.missQueue.flush := ifu.io.ftqInter.fromFtq.redirect.valid || (ifu.io.ftqInter.toFtq.pdWb.valid && ifu.io.ftqInter.toFtq.pdWb.bits.misOffset.valid)
icache.io.csr.distribute_csr <> io.csrCtrl.distribute_csr
icache.io.csr.distribute_csr <> csrCtrl.distribute_csr
io.csrUpdate := RegNext(icache.io.csr.update)
//IFU-Ibuffer
......
......@@ -458,11 +458,10 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
})
io.bpuInfo := DontCare
val robFlush = io.fromBackend.robFlush
val stage2Redirect = io.fromBackend.stage2Redirect
val stage3Redirect = io.fromBackend.stage3Redirect
val stage3Redirect = RegNext(io.fromBackend.stage2Redirect)
val stage2Flush = stage2Redirect.valid || robFlush.valid
val stage2Flush = stage2Redirect.valid
val backendFlush = stage2Flush || RegNext(stage2Flush)
val ifuFlush = Wire(Bool())
......@@ -470,8 +469,8 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
val allowBpuIn, allowToIfu = WireInit(false.B)
val flushToIfu = !allowToIfu
allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
allowBpuIn := !ifuFlush && !stage2Redirect.valid && !stage3Redirect.valid
allowToIfu := !ifuFlush && !stage2Redirect.valid && !stage3Redirect.valid
val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
val validEntries = distanceBetween(bpuPtr, commPtr)
......@@ -718,7 +717,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect)
val fromBackendRedirect = WireInit(stage3Redirect)
val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
......@@ -813,7 +812,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
// **************************** flush ptr and state queue ****************************
// ***********************************************************************************
val redirectVec = VecInit(robFlush, stage2Redirect, fromIfuRedirect)
val redirectVec = VecInit(stage2Redirect, fromIfuRedirect)
// when redirect, we should reset ptrs and status queues
when(redirectVec.map(r => r.valid).reduce(_||_)){
......@@ -834,7 +833,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
}
// only the valid bit is actually needed
io.toIfu.redirect.bits := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits)
io.toIfu.redirect.bits := stage2Redirect.bits
io.toIfu.redirect.valid := stage2Flush
// commit
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册