提交 89369e3e 编写于 作者: Z zhanglinjuan

Merge branch 'master' into dev-dcache-rearrange

...@@ -182,7 +182,7 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter { ...@@ -182,7 +182,7 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
xs_core(i).module.io.externalInterrupt.msip := clint.module.io.msip(i) xs_core(i).module.io.externalInterrupt.msip := clint.module.io.msip(i)
// xs_core(i).module.io.externalInterrupt.meip := RegNext(RegNext(io.meip(i))) // xs_core(i).module.io.externalInterrupt.meip := RegNext(RegNext(io.meip(i)))
xs_core(i).module.io.externalInterrupt.meip := plic.module.io.extra.get.meip(i) xs_core(i).module.io.externalInterrupt.meip := plic.module.io.extra.get.meip(i)
l2prefetcher(i).module.io.enable := xs_core(i).module.io.l2_pf_enable l2prefetcher(i).module.io.enable := RegNext(xs_core(i).module.io.l2_pf_enable)
l2prefetcher(i).module.io.in <> l2cache(i).module.io l2prefetcher(i).module.io.in <> l2cache(i).module.io
} }
......
...@@ -72,7 +72,6 @@ abstract class SetAssocReplacementPolicy { ...@@ -72,7 +72,6 @@ abstract class SetAssocReplacementPolicy {
def access(set: UInt, touch_way: UInt): Unit def access(set: UInt, touch_way: UInt): Unit
def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]): Unit def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]): Unit
def way(set: UInt): UInt def way(set: UInt): UInt
def miss(set: UInt): Unit
} }
...@@ -332,7 +331,7 @@ class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocRepl ...@@ -332,7 +331,7 @@ class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocRepl
} }
def way(set: UInt) = logic.get_replace_way(state_vec(set)) def way(set: UInt) = logic.get_replace_way(state_vec(set))
def miss(set: UInt) = {}
} }
class SetAssocRandom(n_sets : Int, n_ways: Int) extends SetAssocReplacementPolicy { class SetAssocRandom(n_sets : Int, n_ways: Int) extends SetAssocReplacementPolicy {
......
...@@ -52,18 +52,11 @@ object ValidUndirectioned { ...@@ -52,18 +52,11 @@ object ValidUndirectioned {
} }
class SCMeta(val useSC: Boolean) extends XSBundle with HasSCParameter { class SCMeta(val useSC: Boolean) extends XSBundle with HasSCParameter {
def maxVal = 8 * ((1 << TageCtrBits) - 1) + SCTableInfo.map { case (_, cb, _) => (1 << cb) - 1 }.reduce(_ + _)
def minVal = -(8 * (1 << TageCtrBits) + SCTableInfo.map { case (_, cb, _) => 1 << cb }.reduce(_ + _))
def sumCtrBits = max(log2Ceil(-minVal), log2Ceil(maxVal + 1)) + 1
val tageTaken = if (useSC) Bool() else UInt(0.W) val tageTaken = if (useSC) Bool() else UInt(0.W)
val scUsed = if (useSC) Bool() else UInt(0.W) val scUsed = if (useSC) Bool() else UInt(0.W)
val scPred = if (useSC) Bool() else UInt(0.W) val scPred = if (useSC) Bool() else UInt(0.W)
// Suppose ctrbits of all tables are identical // Suppose ctrbits of all tables are identical
val ctrs = if (useSC) Vec(SCNTables, SInt(SCCtrBits.W)) else Vec(SCNTables, SInt(0.W)) val ctrs = if (useSC) Vec(SCNTables, SInt(SCCtrBits.W)) else Vec(SCNTables, SInt(0.W))
val sumAbs = if (useSC) UInt(sumCtrBits.W) else UInt(0.W)
} }
class TageMeta extends XSBundle with HasTageParameter { class TageMeta extends XSBundle with HasTageParameter {
...@@ -401,6 +394,7 @@ class RoqCommitIO extends XSBundle { ...@@ -401,6 +394,7 @@ class RoqCommitIO extends XSBundle {
class TlbFeedback extends XSBundle { class TlbFeedback extends XSBundle {
val rsIdx = UInt(log2Up(IssQueSize).W) val rsIdx = UInt(log2Up(IssQueSize).W)
val hit = Bool() val hit = Bool()
val flushState = Bool()
} }
class RSFeedback extends TlbFeedback class RSFeedback extends TlbFeedback
...@@ -539,11 +533,14 @@ class CustomCSRCtrlIO extends XSBundle { ...@@ -539,11 +533,14 @@ class CustomCSRCtrlIO extends XSBundle {
// Prefetcher // Prefetcher
val l1plus_pf_enable = Output(Bool()) val l1plus_pf_enable = Output(Bool())
val l2_pf_enable = Output(Bool()) val l2_pf_enable = Output(Bool())
// Labeled XiangShan
val dsid = Output(UInt(8.W)) // TODO: DsidWidth as parameter val dsid = Output(UInt(8.W)) // TODO: DsidWidth as parameter
// Load violation predict // Load violation predictor
val lvpred_disable = Output(Bool()) val lvpred_disable = Output(Bool())
val no_spec_load = Output(Bool()) val no_spec_load = Output(Bool())
val waittable_timeout = Output(UInt(5.W)) val waittable_timeout = Output(UInt(5.W))
// Branch predicter // Branch predictor
val bp_ctrl = Output(new BPUCtrl) val bp_ctrl = Output(new BPUCtrl)
} // Memory Block
\ No newline at end of file val sbuffer_threshold = Output(UInt(4.W))
}
...@@ -50,7 +50,7 @@ case class XSCoreParameters ...@@ -50,7 +50,7 @@ case class XSCoreParameters
EnableRAS: Boolean = true, EnableRAS: Boolean = true,
EnableLB: Boolean = false, EnableLB: Boolean = false,
EnableLoop: Boolean = true, EnableLoop: Boolean = true,
EnableSC: Boolean = false, EnableSC: Boolean = true,
EnbaleTlbDebug: Boolean = false, EnbaleTlbDebug: Boolean = false,
EnableJal: Boolean = false, EnableJal: Boolean = false,
EnableUBTB: Boolean = true, EnableUBTB: Boolean = true,
...@@ -203,13 +203,14 @@ trait HasXSParameter { ...@@ -203,13 +203,14 @@ trait HasXSParameter {
val icacheParameters = ICacheParameters( val icacheParameters = ICacheParameters(
tagECC = Some("parity"), tagECC = Some("parity"),
dataECC = Some("parity"), dataECC = Some("parity"),
replacer = Some("setlru"), replacer = Some("setplru"),
nMissEntries = 2 nMissEntries = 2
) )
val l1plusCacheParameters = L1plusCacheParameters( val l1plusCacheParameters = L1plusCacheParameters(
tagECC = Some("secded"), tagECC = Some("secded"),
dataECC = Some("secded"), dataECC = Some("secded"),
replacer = Some("setplru"),
nMissEntries = 8 nMissEntries = 8
) )
...@@ -347,7 +348,8 @@ class XSCore()(implicit p: config.Parameters) extends LazyModule ...@@ -347,7 +348,8 @@ class XSCore()(implicit p: config.Parameters) extends LazyModule
fastWakeUpIn = intExuConfigs.filter(_.hasCertainLatency), fastWakeUpIn = intExuConfigs.filter(_.hasCertainLatency),
slowWakeUpIn = intExuConfigs.filter(_.hasUncertainlatency) ++ fpExuConfigs, slowWakeUpIn = intExuConfigs.filter(_.hasUncertainlatency) ++ fpExuConfigs,
fastWakeUpOut = Seq(), fastWakeUpOut = Seq(),
slowWakeUpOut = loadExuConfigs slowWakeUpOut = loadExuConfigs,
numIntWakeUpFp = intExuConfigs.count(_.writeFpRf)
)) ))
lazy val module = new XSCoreImp(this) lazy val module = new XSCoreImp(this)
...@@ -413,8 +415,8 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) ...@@ -413,8 +415,8 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ctrlBlock.io.toLsBlock <> memBlock.io.fromCtrlBlock ctrlBlock.io.toLsBlock <> memBlock.io.fromCtrlBlock
ctrlBlock.io.csrCtrl <> integerBlock.io.csrio.customCtrl ctrlBlock.io.csrCtrl <> integerBlock.io.csrio.customCtrl
val memBlockWakeUpInt = memBlock.io.wakeUpOutInt.slow.map(x => intOutValid(x)) val memBlockWakeUpInt = memBlock.io.wakeUpOutInt.slow.map(WireInit(_))
val memBlockWakeUpFp = memBlock.io.wakeUpOutFp.slow.map(x => fpOutValid(x)) val memBlockWakeUpFp = memBlock.io.wakeUpOutFp.slow.map(WireInit(_))
memBlock.io.wakeUpOutInt.slow.foreach(_.ready := true.B) memBlock.io.wakeUpOutInt.slow.foreach(_.ready := true.B)
memBlock.io.wakeUpOutFp.slow.foreach(_.ready := true.B) memBlock.io.wakeUpOutFp.slow.foreach(_.ready := true.B)
...@@ -422,13 +424,13 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) ...@@ -422,13 +424,13 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
val fpBlockWakeUpInt = fpExuConfigs val fpBlockWakeUpInt = fpExuConfigs
.zip(floatBlock.io.wakeUpOut.slow) .zip(floatBlock.io.wakeUpOut.slow)
.filter(_._1.writeIntRf) .filter(_._1.writeIntRf)
.map(_._2).map(x => intOutValid(x, connectReady = true)) .map(_._2)
intExuConfigs.zip(integerBlock.io.wakeUpOut.slow).filterNot(_._1.writeFpRf).map(_._2.ready := true.B) intExuConfigs.zip(integerBlock.io.wakeUpOut.slow).filterNot(_._1.writeFpRf).map(_._2.ready := true.B)
val intBlockWakeUpFp = intExuConfigs.filter(_.hasUncertainlatency) val intBlockWakeUpFp = intExuConfigs.filter(_.hasUncertainlatency)
.zip(integerBlock.io.wakeUpOut.slow) .zip(integerBlock.io.wakeUpOut.slow)
.filter(_._1.writeFpRf) .filter(_._1.writeFpRf)
.map(_._2).map(x => fpOutValid(x, connectReady = true)) .map(_._2)
integerBlock.io.wakeUpIn.slow <> fpBlockWakeUpInt ++ memBlockWakeUpInt integerBlock.io.wakeUpIn.slow <> fpBlockWakeUpInt ++ memBlockWakeUpInt
integerBlock.io.toMemBlock <> memBlock.io.fromIntBlock integerBlock.io.toMemBlock <> memBlock.io.fromIntBlock
...@@ -446,6 +448,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) ...@@ -446,6 +448,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
// Note: 'WireInit' is used to block 'ready's from memBlock, // Note: 'WireInit' is used to block 'ready's from memBlock,
// we don't need 'ready's from memBlock // we don't need 'ready's from memBlock
memBlock.io.wakeUpIn.slow <> wakeUpMem.flatMap(_.slow.map(x => WireInit(x))) memBlock.io.wakeUpIn.slow <> wakeUpMem.flatMap(_.slow.map(x => WireInit(x)))
memBlock.io.intWakeUpFp <> floatBlock.io.intWakeUpOut
integerBlock.io.csrio.hartId <> io.hartId integerBlock.io.csrio.hartId <> io.hartId
integerBlock.io.csrio.perf <> DontCare integerBlock.io.csrio.perf <> DontCare
...@@ -464,26 +467,27 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) ...@@ -464,26 +467,27 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
integerBlock.io.fenceio.sfence <> memBlock.io.sfence integerBlock.io.fenceio.sfence <> memBlock.io.sfence
integerBlock.io.fenceio.sbuffer <> memBlock.io.fenceToSbuffer integerBlock.io.fenceio.sbuffer <> memBlock.io.fenceToSbuffer
memBlock.io.tlbCsr <> RegNext(integerBlock.io.csrio.tlb) memBlock.io.csrCtrl <> integerBlock.io.csrio.customCtrl
memBlock.io.tlbCsr <> integerBlock.io.csrio.tlb
memBlock.io.lsqio.roq <> ctrlBlock.io.roqio.lsq memBlock.io.lsqio.roq <> ctrlBlock.io.roqio.lsq
memBlock.io.lsqio.exceptionAddr.lsIdx.lqIdx := ctrlBlock.io.roqio.exception.bits.uop.lqIdx memBlock.io.lsqio.exceptionAddr.lsIdx.lqIdx := ctrlBlock.io.roqio.exception.bits.uop.lqIdx
memBlock.io.lsqio.exceptionAddr.lsIdx.sqIdx := ctrlBlock.io.roqio.exception.bits.uop.sqIdx memBlock.io.lsqio.exceptionAddr.lsIdx.sqIdx := ctrlBlock.io.roqio.exception.bits.uop.sqIdx
memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.roqio.exception.bits.uop.ctrl.commitType) memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.roqio.exception.bits.uop.ctrl.commitType)
val itlbRepester = Module(new PTWRepeater()) val itlbRepeater = Module(new PTWRepeater())
val dtlbRepester = Module(new PTWRepeater()) val dtlbRepeater = Module(new PTWRepeater())
itlbRepester.io.tlb <> frontend.io.ptw itlbRepeater.io.tlb <> frontend.io.ptw
dtlbRepester.io.tlb <> memBlock.io.ptw dtlbRepeater.io.tlb <> memBlock.io.ptw
itlbRepester.io.sfence <> integerBlock.io.fenceio.sfence itlbRepeater.io.sfence <> integerBlock.io.fenceio.sfence
dtlbRepester.io.sfence <> integerBlock.io.fenceio.sfence dtlbRepeater.io.sfence <> integerBlock.io.fenceio.sfence
ptw.io.tlb(0) <> dtlbRepester.io.ptw ptw.io.tlb(0) <> dtlbRepeater.io.ptw
ptw.io.tlb(1) <> itlbRepester.io.ptw ptw.io.tlb(1) <> itlbRepeater.io.ptw
ptw.io.sfence <> integerBlock.io.fenceio.sfence ptw.io.sfence <> integerBlock.io.fenceio.sfence
ptw.io.csr <> integerBlock.io.csrio.tlb ptw.io.csr <> integerBlock.io.csrio.tlb
// if l2 prefetcher use stream prefetch, it should be placed in XSCore // if l2 prefetcher use stream prefetch, it should be placed in XSCore
assert(l2PrefetcherParameters._type == "bop") assert(l2PrefetcherParameters._type == "bop")
io.l2_pf_enable := RegNext(integerBlock.io.csrio.customCtrl.l2_pf_enable) io.l2_pf_enable := integerBlock.io.csrio.customCtrl.l2_pf_enable
if (!env.FPGAPlatform) { if (!env.FPGAPlatform) {
val id = hartIdCore() val id = hartIdCore()
......
...@@ -52,7 +52,7 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai ...@@ -52,7 +52,7 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
val stage2FtqRead = new FtqRead val stage2FtqRead = new FtqRead
val stage2Redirect = ValidIO(new Redirect) val stage2Redirect = ValidIO(new Redirect)
val stage3Redirect = ValidIO(new Redirect) val stage3Redirect = ValidIO(new Redirect)
val waitTableUpdate = Output(new WaitTableUpdateReq) val waitTableUpdate = Output(new WaitTableUpdateReq)
}) })
/* /*
LoadQueue Jump ALU0 ALU1 ALU2 ALU3 exception Stage1 LoadQueue Jump ALU0 ALU1 ALU2 ALU3 exception Stage1
...@@ -75,23 +75,14 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai ...@@ -75,23 +75,14 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
val valid = Bool() val valid = Bool()
val idx = UInt(log2Up(n).W) val idx = UInt(log2Up(n).W)
} }
def selectOldestRedirect(xs: Seq[Valid[Redirect]]): (Valid[Redirect], UInt) = { def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
val wrappers = for((r, i) <- xs.zipWithIndex) yield { val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.roqIdx, xs(i).bits.roqIdx)))
val wrap = Wire(new Wrapper(xs.size)) val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
wrap.redirect := r.bits (if (j < i) !xs(j).valid || compareVec(i)(j)
wrap.valid := r.valid else if (j == i) xs(i).valid
wrap.idx := i.U else !xs(j).valid || !compareVec(j)(i))
wrap )).andR))
} resultOnehot
val oldest = ParallelOperation[Wrapper](wrappers, (x, y) => {
Mux(x.valid,
Mux(y.valid, Mux(isAfter(x.redirect.roqIdx, y.redirect.roqIdx), y, x), x), y
)
})
val result = Wire(Valid(new Redirect))
result.valid := oldest.valid
result.bits := oldest.redirect
(result, oldest.idx)
} }
for((ptr, redirect) <- io.stage1FtqRead.map(_.ptr).zip( for((ptr, redirect) <- io.stage1FtqRead.map(_.ptr).zip(
...@@ -106,44 +97,30 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai ...@@ -106,44 +97,30 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
} }
val jumpOut = io.exuMispredict.head val jumpOut = io.exuMispredict.head
val aluOut = VecInit(io.exuMispredict.tail) val allRedirect = VecInit(io.exuMispredict.map(x => getRedirect(x)) :+ io.loadReplay)
val (oldestAluRedirect, oldestAluIdx) = selectOldestRedirect(aluOut.map(getRedirect)) val oldestOneHot = selectOldestRedirect(allRedirect)
val (oldestExuRedirect, jumpIsOlder) = selectOldestRedirect(Seq( val needFlushVec = VecInit(allRedirect.map(_.bits.roqIdx.needFlush(io.stage2Redirect, io.flush)))
oldestAluRedirect, getRedirect(jumpOut) val oldestValid = VecInit(oldestOneHot.zip(needFlushVec).map{ case (v, f) => v && !f }).asUInt.orR
)) val oldestExuOutput = Mux1H((0 until 5).map(oldestOneHot), io.exuMispredict)
val oldestExuOutput = Mux(jumpIsOlder.asBool(), jumpOut, aluOut(oldestAluIdx)) val oldestRedirect = Mux1H(oldestOneHot, allRedirect)
val (oldestRedirect, _) = selectOldestRedirect(Seq(io.loadReplay, oldestExuRedirect))
val s1_isJump = RegNext(jumpIsOlder.asBool(), init = false.B)
val s1_jumpTarget = RegEnable(jumpOut.bits.redirect.cfiUpdate.target, jumpOut.valid) val s1_jumpTarget = RegEnable(jumpOut.bits.redirect.cfiUpdate.target, jumpOut.valid)
val s1_imm12_reg = RegEnable(oldestExuOutput.bits.uop.ctrl.imm(11, 0), oldestExuOutput.valid) val s1_imm12_reg = RegNext(oldestExuOutput.bits.uop.ctrl.imm(11, 0))
val s1_pd = RegEnable(oldestExuOutput.bits.uop.cf.pd, oldestExuOutput.valid) val s1_pd = RegNext(oldestExuOutput.bits.uop.cf.pd)
val s1_redirect_bits_reg = Reg(new Redirect) val s1_redirect_bits_reg = RegNext(oldestRedirect.bits)
val s1_redirect_valid_reg = RegInit(false.B) val s1_redirect_valid_reg = RegNext(oldestValid)
val s1_aluIdx = RegEnable(oldestAluIdx, oldestAluRedirect.valid) val s1_redirect_onehot = RegNext(oldestOneHot)
// stage1 -> stage2 // stage1 -> stage2
when(oldestRedirect.valid && !oldestRedirect.bits.roqIdx.needFlush(io.stage2Redirect, io.flush)){
s1_redirect_bits_reg := oldestRedirect.bits
s1_redirect_valid_reg := true.B
}.otherwise({
s1_redirect_valid_reg := false.B
})
io.stage2Redirect.valid := s1_redirect_valid_reg && !io.flush io.stage2Redirect.valid := s1_redirect_valid_reg && !io.flush
io.stage2Redirect.bits := s1_redirect_bits_reg io.stage2Redirect.bits := s1_redirect_bits_reg
io.stage2Redirect.bits.cfiUpdate := DontCare io.stage2Redirect.bits.cfiUpdate := DontCare
// at stage2, we read ftq to get pc // at stage2, we read ftq to get pc
io.stage2FtqRead.ptr := s1_redirect_bits_reg.ftqIdx io.stage2FtqRead.ptr := s1_redirect_bits_reg.ftqIdx
val isReplay = RedirectLevel.flushItself(s1_redirect_bits_reg.level) val s1_isReplay = s1_redirect_onehot(5)
val ftqRead = Mux(isReplay, val s1_isJump = s1_redirect_onehot(0)
io.stage1FtqRead.last.entry, val ftqRead = Mux1H(s1_redirect_onehot, io.stage1FtqRead).entry
Mux(
s1_isJump,
io.stage1FtqRead.head.entry,
VecInit(io.stage1FtqRead.tail.take(exuParameters.AluCnt).map(_.entry))(s1_aluIdx)
)
)
val cfiUpdate_pc = Cat( val cfiUpdate_pc = Cat(
ftqRead.ftqPC.head(VAddrBits - s1_redirect_bits_reg.ftqOffset.getWidth - instOffsetBits), ftqRead.ftqPC.head(VAddrBits - s1_redirect_bits_reg.ftqOffset.getWidth - instOffsetBits),
s1_redirect_bits_reg.ftqOffset, s1_redirect_bits_reg.ftqOffset,
...@@ -155,7 +132,7 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai ...@@ -155,7 +132,7 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
) )
val brTarget = real_pc + SignExt(ImmUnion.B.toImm32(s1_imm12_reg), XLEN) val brTarget = real_pc + SignExt(ImmUnion.B.toImm32(s1_imm12_reg), XLEN)
val snpc = real_pc + Mux(s1_pd.isRVC, 2.U, 4.U) val snpc = real_pc + Mux(s1_pd.isRVC, 2.U, 4.U)
val target = Mux(isReplay, val target = Mux(s1_isReplay,
real_pc, // repaly from itself real_pc, // repaly from itself
Mux(s1_redirect_bits_reg.cfiUpdate.taken, Mux(s1_redirect_bits_reg.cfiUpdate.taken,
Mux(s1_isJump, s1_jumpTarget, brTarget), Mux(s1_isJump, s1_jumpTarget, brTarget),
...@@ -164,12 +141,17 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai ...@@ -164,12 +141,17 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
) )
// update waittable if load violation redirect triggered // update waittable if load violation redirect triggered
io.waitTableUpdate.valid := RegNext(isReplay && s1_redirect_valid_reg, init = false.B) io.waitTableUpdate.valid := RegNext(s1_isReplay && s1_redirect_valid_reg, init = false.B)
io.waitTableUpdate.waddr := RegNext(XORFold(real_pc(VAddrBits-1, 1), WaitTableAddrWidth)) io.waitTableUpdate.waddr := RegNext(XORFold(real_pc(VAddrBits-1, 1), WaitTableAddrWidth))
io.waitTableUpdate.wdata := true.B io.waitTableUpdate.wdata := true.B
io.stage2FtqRead.ptr := s1_redirect_bits_reg.ftqIdx io.stage2FtqRead.ptr := s1_redirect_bits_reg.ftqIdx
val s2_br_mask = RegEnable(ftqRead.br_mask, enable = s1_redirect_valid_reg)
val s2_sawNotTakenBranch = RegEnable(VecInit((0 until PredictWidth).map{ i =>
if(i == 0) false.B else Cat(ftqRead.br_mask.take(i)).orR()
})(s1_redirect_bits_reg.ftqOffset), enable = s1_redirect_valid_reg)
val s2_hist = RegEnable(ftqRead.hist, enable = s1_redirect_valid_reg)
val s2_target = RegEnable(target, enable = s1_redirect_valid_reg) val s2_target = RegEnable(target, enable = s1_redirect_valid_reg)
val s2_pd = RegEnable(s1_pd, enable = s1_redirect_valid_reg) val s2_pd = RegEnable(s1_pd, enable = s1_redirect_valid_reg)
val s2_cfiUpdata_pc = RegEnable(cfiUpdate_pc, enable = s1_redirect_valid_reg) val s2_cfiUpdata_pc = RegEnable(cfiUpdate_pc, enable = s1_redirect_valid_reg)
...@@ -186,11 +168,9 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai ...@@ -186,11 +168,9 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
stage3CfiUpdate.rasEntry := s2_ftqRead.rasTop stage3CfiUpdate.rasEntry := s2_ftqRead.rasTop
stage3CfiUpdate.predHist := s2_ftqRead.predHist stage3CfiUpdate.predHist := s2_ftqRead.predHist
stage3CfiUpdate.specCnt := s2_ftqRead.specCnt stage3CfiUpdate.specCnt := s2_ftqRead.specCnt
stage3CfiUpdate.hist := s2_ftqRead.hist stage3CfiUpdate.hist := s2_hist
stage3CfiUpdate.predTaken := s2_redirect_bits_reg.cfiUpdate.predTaken stage3CfiUpdate.predTaken := s2_redirect_bits_reg.cfiUpdate.predTaken
stage3CfiUpdate.sawNotTakenBranch := VecInit((0 until PredictWidth).map{ i => stage3CfiUpdate.sawNotTakenBranch := s2_sawNotTakenBranch
if(i == 0) false.B else Cat(s2_ftqRead.br_mask.take(i)).orR()
})(s2_redirect_bits_reg.ftqOffset)
stage3CfiUpdate.target := s2_target stage3CfiUpdate.target := s2_target
stage3CfiUpdate.taken := s2_redirect_bits_reg.cfiUpdate.taken stage3CfiUpdate.taken := s2_redirect_bits_reg.cfiUpdate.taken
stage3CfiUpdate.isMisPred := s2_redirect_bits_reg.cfiUpdate.isMisPred stage3CfiUpdate.isMisPred := s2_redirect_bits_reg.cfiUpdate.isMisPred
......
...@@ -30,6 +30,7 @@ class FloatBlock ...@@ -30,6 +30,7 @@ class FloatBlock
val intWakeUpFp = Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput))) val intWakeUpFp = Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput)))
val memWakeUpFp = Vec(memSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput))) val memWakeUpFp = Vec(memSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput)))
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size)) val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val intWakeUpOut = Vec(intSlowWakeUpIn.size, DecoupledIO(new ExuOutput))
// from csr // from csr
val frm = Input(UInt(3.W)) val frm = Input(UInt(3.W))
...@@ -39,24 +40,28 @@ class FloatBlock ...@@ -39,24 +40,28 @@ class FloatBlock
val flush = io.fromCtrlBlock.flush val flush = io.fromCtrlBlock.flush
val intWakeUpFpReg = Wire(Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput)))) val intWakeUpFpReg = Wire(Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput))))
intWakeUpFpReg.zip(io.intWakeUpFp).foreach{ for((w, r) <- io.intWakeUpFp.zip(intWakeUpFpReg)){
case (inReg, wakeUpIn) => val in = WireInit(w)
val in = WireInit(wakeUpIn) w.ready := in.ready
wakeUpIn.ready := in.ready in.valid := w.valid && !w.bits.uop.roqIdx.needFlush(redirect, flush)
in.valid := wakeUpIn.valid && !wakeUpIn.bits.uop.roqIdx.needFlush(redirect, flush) PipelineConnect(in, r, r.fire() || r.bits.uop.roqIdx.needFlush(redirect, flush), false.B)
PipelineConnect(in, inReg,
inReg.fire() || inReg.bits.uop.roqIdx.needFlush(redirect, flush), false.B
)
} }
val intRecoded = WireInit(intWakeUpFpReg) // to memBlock's store rs
for(((rec, reg), cfg) <- intRecoded.zip(intWakeUpFpReg).zip(intSlowWakeUpIn)){ io.intWakeUpOut <> intWakeUpFpReg.map(x => WireInit(x))
rec.bits.data := Mux(reg.bits.uop.ctrl.fpu.typeTagOut === S,
recode(reg.bits.data(31, 0), S), val intRecoded = intWakeUpFpReg.map(x => {
recode(reg.bits.data(63, 0), D) val rec = Wire(DecoupledIO(new ExuOutput))
rec.valid := x.valid && x.bits.uop.ctrl.fpWen
rec.bits := x.bits
rec.bits.data := Mux(x.bits.uop.ctrl.fpu.typeTagOut === S,
recode(x.bits.data(31, 0), S),
recode(x.bits.data(63, 0), D)
) )
rec.bits.redirectValid := false.B rec.bits.redirectValid := false.B
reg.ready := rec.ready || !rec.valid x.ready := rec.ready || !rec.valid
} rec
})
val memRecoded = WireInit(io.memWakeUpFp) val memRecoded = WireInit(io.memWakeUpFp)
for((rec, reg) <- memRecoded.zip(io.memWakeUpFp)){ for((rec, reg) <- memRecoded.zip(io.memWakeUpFp)){
rec.bits.data := fpRdataHelper(reg.bits.uop, reg.bits.data) rec.bits.data := fpRdataHelper(reg.bits.uop, reg.bits.data)
...@@ -166,7 +171,9 @@ class FloatBlock ...@@ -166,7 +171,9 @@ class FloatBlock
NRFpWritePorts, NRFpWritePorts,
isFp = true isFp = true
)) ))
fpWbArbiter.io.in.drop(exeUnits.length).zip(wakeUpInRecode).foreach(x => x._1 <> x._2) fpWbArbiter.io.in.drop(exeUnits.length).zip(wakeUpInRecode).foreach(
x => x._1 <> fpOutValid(x._2, connectReady = true)
)
for((exu, i) <- exeUnits.zipWithIndex){ for((exu, i) <- exeUnits.zipWithIndex){
val out, outReg = Wire(DecoupledIO(new ExuOutput)) val out, outReg = Wire(DecoupledIO(new ExuOutput))
......
...@@ -254,10 +254,13 @@ class IntegerBlock ...@@ -254,10 +254,13 @@ class IntegerBlock
)) ))
intWbArbiter.io.in <> exeUnits.map(e => { intWbArbiter.io.in <> exeUnits.map(e => {
val w = WireInit(e.io.out) val w = WireInit(e.io.out)
val fpWen = if(e.config.writeFpRf) e.io.out.bits.uop.ctrl.fpWen else false.B if(e.config.writeFpRf){
w.valid := e.io.out.valid && !fpWen w.valid := e.io.out.valid && !e.io.out.bits.uop.ctrl.fpWen && io.wakeUpOut.slow(0).ready
} else {
w.valid := e.io.out.valid
}
w w
}) ++ io.wakeUpIn.slow }) ++ io.wakeUpIn.slow.map(x => intOutValid(x, connectReady = true))
XSPerf("competition", intWbArbiter.io.in.map(i => !i.ready && i.valid).foldRight(0.U)(_+_)) XSPerf("competition", intWbArbiter.io.in.map(i => !i.ready && i.valid).foldRight(0.U)(_+_))
......
...@@ -33,7 +33,8 @@ class MemBlock( ...@@ -33,7 +33,8 @@ class MemBlock(
val fastWakeUpIn: Seq[ExuConfig], val fastWakeUpIn: Seq[ExuConfig],
val slowWakeUpIn: Seq[ExuConfig], val slowWakeUpIn: Seq[ExuConfig],
val fastWakeUpOut: Seq[ExuConfig], val fastWakeUpOut: Seq[ExuConfig],
val slowWakeUpOut: Seq[ExuConfig] val slowWakeUpOut: Seq[ExuConfig],
val numIntWakeUpFp: Int
)(implicit p: Parameters) extends LazyModule { )(implicit p: Parameters) extends LazyModule {
val dcache = LazyModule(new DCache()) val dcache = LazyModule(new DCache())
...@@ -55,6 +56,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -55,6 +56,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val slowWakeUpIn = outer.slowWakeUpIn val slowWakeUpIn = outer.slowWakeUpIn
val fastWakeUpOut = outer.fastWakeUpOut val fastWakeUpOut = outer.fastWakeUpOut
val slowWakeUpOut = outer.slowWakeUpOut val slowWakeUpOut = outer.slowWakeUpOut
val numIntWakeUpFp = outer.numIntWakeUpFp
val io = IO(new Bundle { val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToLsBlockIO) val fromCtrlBlock = Flipped(new CtrlToLsBlockIO)
...@@ -63,6 +65,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -63,6 +65,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val toCtrlBlock = new LsBlockToCtrlIO val toCtrlBlock = new LsBlockToCtrlIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size) val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val intWakeUpFp = Vec(numIntWakeUpFp, Flipped(DecoupledIO(new ExuOutput)))
val wakeUpOutInt = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size)) val wakeUpOutInt = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val wakeUpOutFp = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size)) val wakeUpOutFp = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
...@@ -75,6 +78,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -75,6 +78,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val exceptionAddr = new ExceptionAddrIO // to csr val exceptionAddr = new ExceptionAddrIO // to csr
val roq = Flipped(new RoqLsqIO) // roq to lsq val roq = Flipped(new RoqLsqIO) // roq to lsq
} }
val csrCtrl = Flipped(new CustomCSRCtrlIO)
}) })
val difftestIO = IO(new Bundle() { val difftestIO = IO(new Bundle() {
val fromSbuffer = new Bundle() { val fromSbuffer = new Bundle() {
...@@ -144,14 +149,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -144,14 +149,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
slowWakeUpIn.zip(io.wakeUpIn.slow) slowWakeUpIn.zip(io.wakeUpIn.slow)
.filter(x => (x._1.writeIntRf && readIntRf) || (x._1.writeFpRf && readFpRf)) .filter(x => (x._1.writeIntRf && readIntRf) || (x._1.writeFpRf && readFpRf))
.map{ .map{
case (Exu.jumpExeUnitCfg, value) if cfg == Exu.stExeUnitCfg => case (Exu.jumpExeUnitCfg, _) if cfg == Exu.stExeUnitCfg =>
val jumpOut = Wire(Flipped(DecoupledIO(new ExuOutput))) (Exu.jumpExeUnitCfg, io.intWakeUpFp.head)
jumpOut.bits := RegNext(value.bits)
jumpOut.valid := RegNext(
value.valid && !value.bits.uop.roqIdx.needFlush(redirect, io.fromCtrlBlock.flush)
)
jumpOut.ready := true.B
(Exu.jumpExeUnitCfg, jumpOut)
case (config, value) => (config, value) case (config, value) => (config, value)
} }
).map(a => (a._1, decoupledIOToValidIO(a._2))) ).map(a => (a._1, decoupledIOToValidIO(a._2)))
...@@ -189,7 +188,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -189,7 +188,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// exeUnits(i).io.fromInt <> rs.io.deq // exeUnits(i).io.fromInt <> rs.io.deq
rs.io.memfeedback := DontCare rs.io.memfeedback := DontCare
rs.suggestName(s"rsd_${cfg.name}") rs.suggestName(s"rs_${cfg.name}")
rs rs
}) })
...@@ -212,6 +211,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -212,6 +211,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
io.wakeUpOutInt.slow <> exeWbReqs io.wakeUpOutInt.slow <> exeWbReqs
io.wakeUpOutFp.slow <> wakeUpFp io.wakeUpOutFp.slow <> wakeUpFp
io.wakeUpIn.slow.foreach(_.ready := true.B) io.wakeUpIn.slow.foreach(_.ready := true.B)
io.intWakeUpFp.foreach(_.ready := true.B)
val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true)) val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true))
val lsq = Module(new LsqWrappper) val lsq = Module(new LsqWrappper)
...@@ -222,7 +222,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -222,7 +222,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// dtlb // dtlb
io.ptw <> dtlb.io.ptw io.ptw <> dtlb.io.ptw
dtlb.io.sfence <> io.sfence dtlb.io.sfence <> io.sfence
dtlb.io.csr <> io.tlbCsr dtlb.io.csr <> RegNext(io.tlbCsr)
if (!env.FPGAPlatform) { if (!env.FPGAPlatform) {
difftestIO.fromSbuffer <> sbuffer.difftestIO difftestIO.fromSbuffer <> sbuffer.difftestIO
difftestIO.fromSQ <> lsq.difftestIO.fromSQ difftestIO.fromSQ <> lsq.difftestIO.fromSQ
...@@ -306,6 +306,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -306,6 +306,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
lsq.io.sqempty <> sbuffer.io.sqempty lsq.io.sqempty <> sbuffer.io.sqempty
// Sbuffer // Sbuffer
sbuffer.io.csrCtrl <> RegNext(io.csrCtrl)
sbuffer.io.dcache <> dcache.io.lsu.store sbuffer.io.dcache <> dcache.io.lsu.store
sbuffer.io.dcache.resp.valid := RegNext(dcache.io.lsu.store.resp.valid) sbuffer.io.dcache.resp.valid := RegNext(dcache.io.lsu.store.resp.valid)
sbuffer.io.dcache.resp.bits := RegNext(dcache.io.lsu.store.resp.bits) sbuffer.io.dcache.resp.bits := RegNext(dcache.io.lsu.store.resp.bits)
......
...@@ -62,16 +62,16 @@ class FtqNRSRAM[T <: Data](gen: T, numRead: Int) extends XSModule { ...@@ -62,16 +62,16 @@ class FtqNRSRAM[T <: Data](gen: T, numRead: Int) extends XSModule {
class Ftq_4R_SRAMEntry extends XSBundle { class Ftq_4R_SRAMEntry extends XSBundle {
val ftqPC = UInt(VAddrBits.W) val ftqPC = UInt(VAddrBits.W)
val lastPacketPC = ValidUndirectioned(UInt(VAddrBits.W)) val lastPacketPC = ValidUndirectioned(UInt(VAddrBits.W))
val hist = new GlobalHistory
val br_mask = Vec(PredictWidth, Bool())
} }
// redirect and commit need read these infos // redirect and commit need read these infos
class Ftq_2R_SRAMEntry extends XSBundle { class Ftq_2R_SRAMEntry extends XSBundle {
val rasSp = UInt(log2Ceil(RasSize).W) val rasSp = UInt(log2Ceil(RasSize).W)
val rasEntry = new RASEntry val rasEntry = new RASEntry
val hist = new GlobalHistory
val predHist = new GlobalHistory val predHist = new GlobalHistory
val specCnt = Vec(PredictWidth, UInt(10.W)) val specCnt = Vec(PredictWidth, UInt(10.W))
val br_mask = Vec(PredictWidth, Bool())
} }
class Ftq_1R_Commit_SRAMEntry extends XSBundle { class Ftq_1R_Commit_SRAMEntry extends XSBundle {
...@@ -127,15 +127,15 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper { ...@@ -127,15 +127,15 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
ftq_pc_mem.io.waddr(0) := tailPtr.value ftq_pc_mem.io.waddr(0) := tailPtr.value
ftq_pc_mem.io.wdata(0).ftqPC := io.enq.bits.ftqPC ftq_pc_mem.io.wdata(0).ftqPC := io.enq.bits.ftqPC
ftq_pc_mem.io.wdata(0).lastPacketPC := io.enq.bits.lastPacketPC ftq_pc_mem.io.wdata(0).lastPacketPC := io.enq.bits.lastPacketPC
ftq_pc_mem.io.wdata(0).hist := io.enq.bits.hist
ftq_pc_mem.io.wdata(0).br_mask := io.enq.bits.br_mask
val ftq_2r_sram = Module(new FtqNRSRAM(new Ftq_2R_SRAMEntry, 2)) val ftq_2r_sram = Module(new FtqNRSRAM(new Ftq_2R_SRAMEntry, 2))
ftq_2r_sram.io.wen := real_fire ftq_2r_sram.io.wen := real_fire
ftq_2r_sram.io.waddr := tailPtr.value ftq_2r_sram.io.waddr := tailPtr.value
ftq_2r_sram.io.wdata.rasSp := io.enq.bits.rasSp ftq_2r_sram.io.wdata.rasSp := io.enq.bits.rasSp
ftq_2r_sram.io.wdata.rasEntry := io.enq.bits.rasTop ftq_2r_sram.io.wdata.rasEntry := io.enq.bits.rasTop
ftq_2r_sram.io.wdata.hist := io.enq.bits.hist
ftq_2r_sram.io.wdata.predHist := io.enq.bits.predHist ftq_2r_sram.io.wdata.predHist := io.enq.bits.predHist
ftq_2r_sram.io.wdata.specCnt := io.enq.bits.specCnt ftq_2r_sram.io.wdata.specCnt := io.enq.bits.specCnt
ftq_2r_sram.io.wdata.br_mask := io.enq.bits.br_mask
val pred_target_sram = Module(new FtqNRSRAM(UInt(VAddrBits.W), 1)) val pred_target_sram = Module(new FtqNRSRAM(UInt(VAddrBits.W), 1))
pred_target_sram.io.wen := real_fire pred_target_sram.io.wen := real_fire
pred_target_sram.io.waddr := tailPtr.value pred_target_sram.io.waddr := tailPtr.value
...@@ -229,13 +229,13 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper { ...@@ -229,13 +229,13 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
// from 4r sram // from 4r sram
commitEntry.ftqPC := RegNext(ftq_pc_mem.io.rdata(0).ftqPC) commitEntry.ftqPC := RegNext(ftq_pc_mem.io.rdata(0).ftqPC)
commitEntry.lastPacketPC := RegNext(ftq_pc_mem.io.rdata(0).lastPacketPC) commitEntry.lastPacketPC := RegNext(ftq_pc_mem.io.rdata(0).lastPacketPC)
commitEntry.hist := RegNext(ftq_pc_mem.io.rdata(0).hist)
commitEntry.br_mask := RegNext(ftq_pc_mem.io.rdata(0).br_mask)
// from 2r sram // from 2r sram
commitEntry.rasSp := RegNext(ftq_2r_sram.io.rdata(0).rasSp) commitEntry.rasSp := RegNext(ftq_2r_sram.io.rdata(0).rasSp)
commitEntry.rasTop := RegNext(ftq_2r_sram.io.rdata(0).rasEntry) commitEntry.rasTop := RegNext(ftq_2r_sram.io.rdata(0).rasEntry)
commitEntry.hist := RegNext(ftq_2r_sram.io.rdata(0).hist)
commitEntry.predHist := RegNext(ftq_2r_sram.io.rdata(0).predHist) commitEntry.predHist := RegNext(ftq_2r_sram.io.rdata(0).predHist)
commitEntry.specCnt := RegNext(ftq_2r_sram.io.rdata(0).specCnt) commitEntry.specCnt := RegNext(ftq_2r_sram.io.rdata(0).specCnt)
commitEntry.br_mask := RegNext(ftq_2r_sram.io.rdata(0).br_mask)
// from 1r sram // from 1r sram
commitEntry.metas := RegNext(ftq_1r_sram.io.rdata(0).metas) commitEntry.metas := RegNext(ftq_1r_sram.io.rdata(0).metas)
commitEntry.rvc_mask := RegNext(ftq_1r_sram.io.rdata(0).rvc_mask) commitEntry.rvc_mask := RegNext(ftq_1r_sram.io.rdata(0).rvc_mask)
...@@ -258,6 +258,8 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper { ...@@ -258,6 +258,8 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
ftq_pc_mem.io.raddr(1 + i) := req.ptr.value ftq_pc_mem.io.raddr(1 + i) := req.ptr.value
req.entry.ftqPC := ftq_pc_mem.io.rdata(1 + i).ftqPC req.entry.ftqPC := ftq_pc_mem.io.rdata(1 + i).ftqPC
req.entry.lastPacketPC := ftq_pc_mem.io.rdata(1 + i).lastPacketPC req.entry.lastPacketPC := ftq_pc_mem.io.rdata(1 + i).lastPacketPC
req.entry.hist := ftq_pc_mem.io.rdata(1 + i).hist
req.entry.br_mask := ftq_pc_mem.io.rdata(1 + i).br_mask
if(i == 0){ // jump, read npc if(i == 0){ // jump, read npc
pred_target_sram.io.raddr(0) := req.ptr.value pred_target_sram.io.raddr(0) := req.ptr.value
pred_target_sram.io.ren(0) := true.B pred_target_sram.io.ren(0) := true.B
...@@ -269,10 +271,8 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper { ...@@ -269,10 +271,8 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
io.cfiRead.entry := DontCare io.cfiRead.entry := DontCare
io.cfiRead.entry.rasTop := ftq_2r_sram.io.rdata(1).rasEntry io.cfiRead.entry.rasTop := ftq_2r_sram.io.rdata(1).rasEntry
io.cfiRead.entry.rasSp := ftq_2r_sram.io.rdata(1).rasSp io.cfiRead.entry.rasSp := ftq_2r_sram.io.rdata(1).rasSp
io.cfiRead.entry.hist := ftq_2r_sram.io.rdata(1).hist
io.cfiRead.entry.predHist := ftq_2r_sram.io.rdata(1).predHist io.cfiRead.entry.predHist := ftq_2r_sram.io.rdata(1).predHist
io.cfiRead.entry.specCnt := ftq_2r_sram.io.rdata(1).specCnt io.cfiRead.entry.specCnt := ftq_2r_sram.io.rdata(1).specCnt
io.cfiRead.entry.br_mask := ftq_2r_sram.io.rdata(1).br_mask
// redirect, reset ptr // redirect, reset ptr
when(io.flush || io.redirect.valid){ when(io.flush || io.redirect.valid){
val idx = Mux(io.flush, io.flushIdx, io.redirect.bits.ftqIdx) val idx = Mux(io.flush, io.flushIdx, io.redirect.bits.ftqIdx)
......
...@@ -355,6 +355,11 @@ class CSR extends FunctionUnit with HasCSRConst ...@@ -355,6 +355,11 @@ class CSR extends FunctionUnit with HasCSRConst
csrio.customCtrl.no_spec_load := slvpredctl(1) csrio.customCtrl.no_spec_load := slvpredctl(1)
csrio.customCtrl.waittable_timeout := slvpredctl(8, 4) csrio.customCtrl.waittable_timeout := slvpredctl(8, 4)
// smblockctl: memory block configurations
// bits 0-3: store buffer flush threshold (default: 8 entries)
val smblockctl = RegInit(UInt(XLEN.W), "h7".U)
csrio.customCtrl.sbuffer_threshold := smblockctl(3, 0)
val tlbBundle = Wire(new TlbCsrBundle) val tlbBundle = Wire(new TlbCsrBundle)
tlbBundle.satp := satp.asTypeOf(new SatpStruct) tlbBundle.satp := satp.asTypeOf(new SatpStruct)
csrio.tlb := tlbBundle csrio.tlb := tlbBundle
...@@ -486,6 +491,7 @@ class CSR extends FunctionUnit with HasCSRConst ...@@ -486,6 +491,7 @@ class CSR extends FunctionUnit with HasCSRConst
MaskedRegMap(Spfctl, spfctl), MaskedRegMap(Spfctl, spfctl),
MaskedRegMap(Sdsid, sdsid), MaskedRegMap(Sdsid, sdsid),
MaskedRegMap(Slvpredctl, slvpredctl), MaskedRegMap(Slvpredctl, slvpredctl),
MaskedRegMap(Smblockctl, smblockctl),
//--- Machine Information Registers --- //--- Machine Information Registers ---
MaskedRegMap(Mvendorid, mvendorid, 0.U, MaskedRegMap.Unwritable), MaskedRegMap(Mvendorid, mvendorid, 0.U, MaskedRegMap.Unwritable),
......
...@@ -53,8 +53,8 @@ trait HasCSRConst { ...@@ -53,8 +53,8 @@ trait HasCSRConst {
// Supervisor Custom Read/Write // Supervisor Custom Read/Write
val Sbpctl = 0x5C0 val Sbpctl = 0x5C0
val Spfctl = 0x5C1 val Spfctl = 0x5C1
val Slvpredctl = 0x5C2 val Slvpredctl = 0x5C2
val Smblockctl = 0x5C3
val Sdsid = 0x9C0 val Sdsid = 0x9C0
......
...@@ -126,6 +126,10 @@ class ReservationStation ...@@ -126,6 +126,10 @@ class ReservationStation
val ctrl = Module(new ReservationStationCtrl(exuCfg, srcLen, fastPortsCfg, slowPortsCfg, fixedDelay, fastWakeup, feedback)) val ctrl = Module(new ReservationStationCtrl(exuCfg, srcLen, fastPortsCfg, slowPortsCfg, fixedDelay, fastWakeup, feedback))
val data = Module(new ReservationStationData(exuCfg, srcLen, fastPortsCfg, slowPortsCfg, fixedDelay, fastWakeup, feedback)) val data = Module(new ReservationStationData(exuCfg, srcLen, fastPortsCfg, slowPortsCfg, fixedDelay, fastWakeup, feedback))
select.suggestName(s"${this.name}_select")
ctrl.suggestName(s"${this.name}_ctrl")
data.suggestName(s"${this.name}_data")
select.io.redirect := io.redirect select.io.redirect := io.redirect
select.io.flush := io.flush select.io.flush := io.flush
io.numExist := select.io.numExist io.numExist := select.io.numExist
...@@ -136,6 +140,7 @@ class ReservationStation ...@@ -136,6 +140,7 @@ class ReservationStation
select.io.deq.ready := io.deq.ready select.io.deq.ready := io.deq.ready
if (feedback) { if (feedback) {
select.io.memfeedback := io.memfeedback select.io.memfeedback := io.memfeedback
select.io.flushState := io.memfeedback.bits.flushState
} }
ctrl.io.in.valid := select.io.enq.fire()// && !(io.redirect.valid || io.flush) // NOTE: same as select ctrl.io.in.valid := select.io.enq.fire()// && !(io.redirect.valid || io.flush) // NOTE: same as select
...@@ -207,7 +212,7 @@ class ReservationStationSelect ...@@ -207,7 +212,7 @@ class ReservationStationSelect
val fastPortsCnt = fastPortsCfg.size val fastPortsCnt = fastPortsCfg.size
val slowPortsCnt = slowPortsCfg.size val slowPortsCnt = slowPortsCfg.size
require(nonBlocked==fastWakeup) require(nonBlocked==fastWakeup)
val replayDelay = VecInit(Seq(5, 10, 40, 40).map(_.U(6.W))) val replayDelay = VecInit(Seq(5, 10, 25, 25).map(_.U(5.W)))
val io = IO(new Bundle { val io = IO(new Bundle {
val redirect = Flipped(ValidIO(new Redirect)) val redirect = Flipped(ValidIO(new Redirect))
...@@ -228,6 +233,8 @@ class ReservationStationSelect ...@@ -228,6 +233,8 @@ class ReservationStationSelect
def fire() = valid && ready def fire() = valid && ready
} }
val deq = DecoupledIO(UInt(iqIdxWidth.W)) val deq = DecoupledIO(UInt(iqIdxWidth.W))
val flushState = if (feedback) Input(Bool()) else null
}) })
def widthMap[T <: Data](f: Int => T) = VecInit((0 until iqSize).map(f)) def widthMap[T <: Data](f: Int => T) = VecInit((0 until iqSize).map(f))
...@@ -275,6 +282,7 @@ class ReservationStationSelect ...@@ -275,6 +282,7 @@ class ReservationStationSelect
assert(RegNext(!(haveReady && selectPtr >= tailPtr.asUInt)), "bubble should not have valid state like s_valid or s_wait") assert(RegNext(!(haveReady && selectPtr >= tailPtr.asUInt)), "bubble should not have valid state like s_valid or s_wait")
// sel bubble // sel bubble
val isFull = Wire(Bool())
val lastbubbleMask = Wire(UInt(iqSize.W)) val lastbubbleMask = Wire(UInt(iqSize.W))
val bubbleMask = WireInit(VecInit((0 until iqSize).map(i => emptyIdxQueue(i)))).asUInt & lastbubbleMask val bubbleMask = WireInit(VecInit((0 until iqSize).map(i => emptyIdxQueue(i)))).asUInt & lastbubbleMask
// val bubbleIndex = ParallelMux(bubbleMask zip indexQueue) // NOTE: the idx in the indexQueue // val bubbleIndex = ParallelMux(bubbleMask zip indexQueue) // NOTE: the idx in the indexQueue
...@@ -282,7 +290,9 @@ class ReservationStationSelect ...@@ -282,7 +290,9 @@ class ReservationStationSelect
val findBubble = Cat(bubbleMask).orR val findBubble = Cat(bubbleMask).orR
val haveBubble = findBubble && (bubblePtr < tailPtr.asUInt) val haveBubble = findBubble && (bubblePtr < tailPtr.asUInt)
val bubbleIndex = indexQueue(bubblePtr) val bubbleIndex = indexQueue(bubblePtr)
val bubbleValid = haveBubble && (if (feedback) true.B else !selectValid) val bubbleValid = haveBubble && (if (feedback) true.B
else if (nonBlocked) !selectValid
else Mux(isFull, true.B, !selectValid))
val bubbleReg = RegNext(bubbleValid) val bubbleReg = RegNext(bubbleValid)
val bubblePtrReg = RegNext(Mux(moveMask(bubblePtr), bubblePtr-1.U, bubblePtr)) val bubblePtrReg = RegNext(Mux(moveMask(bubblePtr), bubblePtr-1.U, bubblePtr))
lastbubbleMask := ~Mux(bubbleReg, UIntToOH(bubblePtrReg), 0.U) & lastbubbleMask := ~Mux(bubbleReg, UIntToOH(bubblePtrReg), 0.U) &
...@@ -292,8 +302,9 @@ class ReservationStationSelect ...@@ -292,8 +302,9 @@ class ReservationStationSelect
// deq // deq
val dequeue = if (feedback) bubbleReg val dequeue = if (feedback) bubbleReg
else bubbleReg || issueFire else bubbleReg || issueFire
val deqPtr = if (feedback) bubblePtrReg val deqPtr = if (feedback) bubblePtrReg
else Mux(selectReg, selectPtrReg, bubblePtrReg) else if (nonBlocked) Mux(selectReg, selectPtrReg, bubblePtrReg)
else Mux(bubbleReg, bubblePtrReg, selectPtrReg)
moveMask := { moveMask := {
(Fill(iqSize, 1.U(1.W)) << deqPtr)(iqSize-1, 0) (Fill(iqSize, 1.U(1.W)) << deqPtr)(iqSize-1, 0)
} & Fill(iqSize, dequeue) } & Fill(iqSize, dequeue)
...@@ -326,11 +337,15 @@ class ReservationStationSelect ...@@ -326,11 +337,15 @@ class ReservationStationSelect
// redirect and feedback && wakeup // redirect and feedback && wakeup
for (i <- 0 until iqSize) { for (i <- 0 until iqSize) {
// replay // replay
when (stateQueue(i) === s_replay) { if (feedback) {
countQueue(i) := countQueue(i) - 1.U when (stateQueue(i) === s_replay) {
when (countQueue(i) === 0.U) { countQueue(i) := countQueue(i) - 1.U
stateQueue(i) := s_valid when (countQueue(i) === 0.U && !io.flushState) {
cntCountQueue(i) := Mux(cntCountQueue(i)===3.U, cntCountQueue(i), cntCountQueue(i) + 1.U) cntCountQueue(i) := Mux(cntCountQueue(i)===3.U, cntCountQueue(i), cntCountQueue(i) + 1.U)
}
when (io.flushState || countQueue(i) === 0.U) {
stateQueue(i) := s_valid
}
} }
} }
...@@ -349,7 +364,7 @@ class ReservationStationSelect ...@@ -349,7 +364,7 @@ class ReservationStationSelect
} }
// enq // enq
val isFull = tailPtr.flag isFull := tailPtr.flag
// agreement with dispatch: don't fire when io.redirect.valid // agreement with dispatch: don't fire when io.redirect.valid
val enqueue = io.enq.fire() && !(io.redirect.valid || io.flush) val enqueue = io.enq.fire() && !(io.redirect.valid || io.flush)
val tailInc = tailPtr + 1.U val tailInc = tailPtr + 1.U
...@@ -372,7 +387,7 @@ class ReservationStationSelect ...@@ -372,7 +387,7 @@ class ReservationStationSelect
io.deq.valid := selectValid io.deq.valid := selectValid
io.deq.bits := selectIndex io.deq.bits := selectIndex
io.numExist := RegNext(Mux(nextTailPtr.flag, (iqSize-1).U, nextTailPtr.value)) io.numExist := RegNext(Mux(nextTailPtr.flag, if(isPow2(iqSize)) (iqSize-1).U else iqSize.U, nextTailPtr.value))
assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B))) assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B)))
} }
...@@ -489,12 +504,13 @@ class ReservationStationCtrl ...@@ -489,12 +504,13 @@ class ReservationStationCtrl
when (!isAfter(sq, io.stIssuePtr)) { when (!isAfter(sq, io.stIssuePtr)) {
lw := true.B lw := true.B
} }
} }
when (enqEn) { when (enqEn) {
ldWait(enqPtr) := !enqUop.cf.loadWaitBit ldWait(enqPtr) := !enqUop.cf.loadWaitBit
sqIdx(enqPtr) := enqUop.sqIdx sqIdx(enqPtr) := enqUop.sqIdx
} }
ldWait.suggestName(s"${this.name}_ldWait") ldWait.suggestName(s"${this.name}_ldWait")
sqIdx.suggestName(s"${this.name}_sqIdx")
io.readyVec := srcQueue.map(Cat(_).andR).zip(ldWait).map{ case (s, l) => s&l } io.readyVec := srcQueue.map(Cat(_).andR).zip(ldWait).map{ case (s, l) => s&l }
} }
......
...@@ -46,6 +46,7 @@ trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst with Has ...@@ -46,6 +46,7 @@ trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst with Has
def insLen = if (HasCExtension) 16 else 32 def insLen = if (HasCExtension) 16 else 32
def RVCInsLen = 16 def RVCInsLen = 16
def groupPC(pc: UInt): UInt = Cat(pc(PAddrBits-1, groupAlign), 0.U(groupAlign.W)) def groupPC(pc: UInt): UInt = Cat(pc(PAddrBits-1, groupAlign), 0.U(groupAlign.W))
def plruAccessNum = 2 //hit and miss
// def encRowBits = cacheParams.dataCode.width(rowBits) // def encRowBits = cacheParams.dataCode.width(rowBits)
// def encTagBits = cacheParams.tagCode.width(tagBits) // def encTagBits = cacheParams.tagCode.width(tagBits)
...@@ -77,6 +78,7 @@ abstract class ICacheBundle extends XSBundle ...@@ -77,6 +78,7 @@ abstract class ICacheBundle extends XSBundle
abstract class ICacheModule extends XSModule abstract class ICacheModule extends XSModule
with HasICacheParameters with HasICacheParameters
with HasFrontEndExceptionNo with HasFrontEndExceptionNo
with HasIFUConst
abstract class ICacheArray extends XSModule abstract class ICacheArray extends XSModule
with HasICacheParameters with HasICacheParameters
...@@ -365,8 +367,13 @@ class ICache extends ICacheModule ...@@ -365,8 +367,13 @@ class ICache extends ICacheModule
val replacer = cacheParams.replacement val replacer = cacheParams.replacement
val victimWayMask = UIntToOH(replacer.way(s2_idx)) val victimWayMask = UIntToOH(replacer.way(s2_idx))
when(s2_hit) {replacer.access(s2_idx, OHToUInt(hitVec))} val (touch_sets, touch_ways) = ( Wire(Vec(plruAccessNum, UInt(log2Ceil(nSets).W))), Wire(Vec(plruAccessNum, Valid(UInt(log2Ceil(nWays).W)))) )
touch_sets(0) := s2_idx
touch_ways(0).valid := s2_hit
touch_ways(0).bits := OHToUInt(hitVec)
replacer.access(touch_sets, touch_ways)
//deal with icache exception //deal with icache exception
val icacheExceptionVec = Wire(Vec(8,Bool())) val icacheExceptionVec = Wire(Vec(8,Bool()))
...@@ -436,7 +443,7 @@ class ICache extends ICacheModule ...@@ -436,7 +443,7 @@ class ICache extends ICacheModule
/* icache miss /* icache miss
* send a miss req to ICache Miss Queue, excluding exception/flush/blocking * send a miss req to ICache Miss Queue, excluding exception/flush/blocking
* block the pipeline until refill finishes * block the pipeline until refill finishes
*/ */
val icacheMissQueue = Module(new IcacheMissQueue) val icacheMissQueue = Module(new IcacheMissQueue)
...@@ -471,6 +478,11 @@ class ICache extends ICacheModule ...@@ -471,6 +478,11 @@ class ICache extends ICacheModule
waymask=metaWriteReq.meta_write_waymask) waymask=metaWriteReq.meta_write_waymask)
val wayNum = OHToUInt(metaWriteReq.meta_write_waymask.asTypeOf(Vec(nWays,Bool()))) val wayNum = OHToUInt(metaWriteReq.meta_write_waymask.asTypeOf(Vec(nWays,Bool())))
touch_sets(1) := metaWriteReq.meta_write_idx
touch_ways(1).valid := icacheMissQueue.io.meta_write.valid
touch_ways(1).bits := wayNum
val validPtr = Cat(metaWriteReq.meta_write_idx,wayNum) val validPtr = Cat(metaWriteReq.meta_write_idx,wayNum)
when(icacheMissQueue.io.meta_write.valid && !cacheflushed){ when(icacheMissQueue.io.meta_write.valid && !cacheflushed){
validArray := validArray.bitSet(validPtr, true.B) validArray := validArray.bitSet(validPtr, true.B)
...@@ -519,8 +531,8 @@ class ICache extends ICacheModule ...@@ -519,8 +531,8 @@ class ICache extends ICacheModule
} }
} }
val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)})) val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)}))
val insLenLog = log2Ceil(insLen) val insLenLog = log2Ceil(insLen/8)
val start = (pc >> insLenLog.U)(log2Ceil(mmioBeats * mmioBusBytes/instBytes) -1, 0) val start = Cat(0.U(2.W),(pc >> insLenLog.U)(log2Ceil(mmioBusBytes/instBytes) -1, 0)) //4bit
val outMask = mask >> start val outMask = mask >> start
(0 until PredictWidth ).foreach{ i => (0 until PredictWidth ).foreach{ i =>
cutPacket(i) := Mux(outMask(i).asBool,sourceVec_inst(start + i.U),0.U) cutPacket(i) := Mux(outMask(i).asBool,sourceVec_inst(start + i.U),0.U)
...@@ -528,7 +540,7 @@ class ICache extends ICacheModule ...@@ -528,7 +540,7 @@ class ICache extends ICacheModule
(cutPacket.asUInt, outMask.asUInt) (cutPacket.asUInt, outMask.asUInt)
} }
val mmioDataVec = io.mmio_grant.bits.data.asTypeOf(Vec(mmioBeats,UInt(mmioBusWidth.W))) val mmioDataVec = io.mmio_grant.bits.data.asTypeOf(Vec(mmioBeats,UInt(mmioBusWidth.W)))
val (mmio_packet,mmio_mask) = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask) val mmio_packet = io.mmio_grant.bits.data//cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)
XSDebug("mmio data %x\n", mmio_packet) XSDebug("mmio data %x\n", mmio_packet)
...@@ -541,7 +553,7 @@ class ICache extends ICacheModule ...@@ -541,7 +553,7 @@ class ICache extends ICacheModule
val refillData = Mux(useRefillReg,cutHelper(refillDataVecReg, s3_req_pc,s3_req_mask),cutHelper(refillDataVec, s3_req_pc,s3_req_mask)) val refillData = Mux(useRefillReg,cutHelper(refillDataVecReg, s3_req_pc,s3_req_mask),cutHelper(refillDataVec, s3_req_pc,s3_req_mask))
wayResp.pc := s3_req_pc wayResp.pc := s3_req_pc
wayResp.data := Mux(s3_valid && s3_hit, wayData, Mux(s3_mmio ,mmio_packet ,refillData)) wayResp.data := Mux(s3_valid && s3_hit, wayData, Mux(s3_mmio ,mmio_packet ,refillData))
wayResp.mask := Mux(s3_mmio,mmio_mask,s3_req_mask) wayResp.mask := s3_req_mask
wayResp.ipf := s3_exception_vec(pageFault) wayResp.ipf := s3_exception_vec(pageFault)
wayResp.acf := s3_exception_vec(accessFault) || s3_meta_wrong || s3_data_wrong wayResp.acf := s3_exception_vec(accessFault) || s3_meta_wrong || s3_data_wrong
//|| (icacheMissQueue.io.resp.valid && icacheMissQueue.io.resp.bits.eccWrong) //|| (icacheMissQueue.io.resp.valid && icacheMissQueue.io.resp.bits.eccWrong)
...@@ -564,7 +576,7 @@ class ICache extends ICacheModule ...@@ -564,7 +576,7 @@ class ICache extends ICacheModule
//icache response: to pre-decoder //icache response: to pre-decoder
io.resp.valid := s3_valid && (s3_hit || exception || icacheMissQueue.io.resp.valid || io.mmio_grant.valid) io.resp.valid := s3_valid && (s3_hit || exception || icacheMissQueue.io.resp.valid || io.mmio_grant.valid)
io.resp.bits.mask := Mux(s3_mmio,mmio_mask,s3_req_mask) io.resp.bits.mask := s3_req_mask
io.resp.bits.pc := s3_req_pc io.resp.bits.pc := s3_req_pc
io.resp.bits.data := DontCare io.resp.bits.data := DontCare
io.resp.bits.ipf := s3_tlb_resp.excp.pf.instr io.resp.bits.ipf := s3_tlb_resp.excp.pf.instr
...@@ -590,7 +602,7 @@ class ICache extends ICacheModule ...@@ -590,7 +602,7 @@ class ICache extends ICacheModule
io.prefetchTrainReq.bits.addr := groupPC(s3_tlb_resp.paddr) io.prefetchTrainReq.bits.addr := groupPC(s3_tlb_resp.paddr)
//To icache Uncache //To icache Uncache
io.mmio_acquire.valid := s3_mmio && s3_valid io.mmio_acquire.valid := s3_mmio && s3_valid && !s3_has_exception && !s3_flush && !blocking
io.mmio_acquire.bits.addr := mmioBusAligned(s3_tlb_resp.paddr) io.mmio_acquire.bits.addr := mmioBusAligned(s3_tlb_resp.paddr)
io.mmio_acquire.bits.id := cacheID.U io.mmio_acquire.bits.id := cacheID.U
......
...@@ -22,7 +22,7 @@ class InsUncacheResp extends ICacheBundle ...@@ -22,7 +22,7 @@ class InsUncacheResp extends ICacheBundle
} }
// One miss entry deals with one mmio request // One miss entry deals with one mmio request
class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters with HasIFUConst
{ {
val io = IO(new Bundle { val io = IO(new Bundle {
val id = Input(UInt(log2Up(cacheParams.nMMIOs).W)) val id = Input(UInt(log2Up(cacheParams.nMMIOs).W))
...@@ -86,7 +86,7 @@ class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters ...@@ -86,7 +86,7 @@ class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters
io.mmio_acquire.valid := true.B io.mmio_acquire.valid := true.B
io.mmio_acquire.bits := edge.Get( io.mmio_acquire.bits := edge.Get(
fromSource = io.id, fromSource = io.id,
toAddress = req.addr + (beatCounter.value << log2Ceil(mmioBusBytes).U), toAddress = packetAligned(req.addr) + (beatCounter.value << log2Ceil(mmioBusBytes).U),
lgSize = log2Ceil(mmioBusBytes).U lgSize = log2Ceil(mmioBusBytes).U
)._2 )._2
...@@ -101,15 +101,17 @@ class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters ...@@ -101,15 +101,17 @@ class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters
io.mmio_grant.ready := true.B io.mmio_grant.ready := true.B
when (io.mmio_grant.fire()) { when (io.mmio_grant.fire()) {
// val realAddr = packetAligned(req.addr) + (beatCounter.value << log2Ceil(mmioBusBytes).U)
// val start = realAddr(5,3)
respDataReg(beatCounter.value) := io.mmio_grant.bits.data respDataReg(beatCounter.value) := io.mmio_grant.bits.data
state := Mux(needFlush || io.flush, s_invalid,Mux(beatCounter.value === (mmioBeats - 1).U,s_send_resp,s_refill_req)) state :=Mux((beatCounter.value === (mmioBeats - 1).U) || needFlush || io.flush ,s_send_resp,s_refill_req)
beatCounter.inc() beatCounter.inc()
} }
} }
// -------------------------------------------- // --------------------------------------------
when (state === s_send_resp) { when (state === s_send_resp) {
io.resp.valid := true.B io.resp.valid := !needFlush
io.resp.bits.data := respDataReg.asUInt io.resp.bits.data := respDataReg.asUInt
io.resp.bits.id := req.id io.resp.bits.id := req.id
// meta data should go with the response // meta data should go with the response
...@@ -153,7 +155,7 @@ class icacheUncacheImp(outer: InstrUncache) ...@@ -153,7 +155,7 @@ class icacheUncacheImp(outer: InstrUncache)
val io = IO(new icacheUncacheIO) val io = IO(new icacheUncacheIO)
val (bus, edge) = outer.clientNode.out.head val (bus, edge) = outer.clientNode.out.head
require(bus.d.bits.data.getWidth == wordBits, "Uncache: tilelink width does not match") //require(bus.d.bits.data.getWidth == wordBits, "Uncache: tilelink width does not match")
val resp_arb = Module(new Arbiter(new InsUncacheResp, cacheParams.nMMIOs)) val resp_arb = Module(new Arbiter(new InsUncacheResp, cacheParams.nMMIOs))
......
...@@ -2,7 +2,7 @@ package xiangshan.cache ...@@ -2,7 +2,7 @@ package xiangshan.cache
import chisel3._ import chisel3._
import chisel3.util._ import chisel3.util._
import utils.{Code, RandomReplacement, HasTLDump, XSDebug, SRAMTemplate} import utils.{Code, ReplacementPolicy, HasTLDump, XSDebug, SRAMTemplate, XSPerf}
import xiangshan.{HasXSLog} import xiangshan.{HasXSLog}
import chipsalliance.rocketchip.config.Parameters import chipsalliance.rocketchip.config.Parameters
...@@ -26,6 +26,7 @@ case class L1plusCacheParameters ...@@ -26,6 +26,7 @@ case class L1plusCacheParameters
rowBits: Int = 64, rowBits: Int = 64,
tagECC: Option[String] = None, tagECC: Option[String] = None,
dataECC: Option[String] = None, dataECC: Option[String] = None,
replacer: Option[String] = Some("random"),
nMissEntries: Int = 1, nMissEntries: Int = 1,
blockBytes: Int = 64 blockBytes: Int = 64
) extends L1CacheParameters { ) extends L1CacheParameters {
...@@ -33,7 +34,7 @@ case class L1plusCacheParameters ...@@ -33,7 +34,7 @@ case class L1plusCacheParameters
def tagCode: Code = Code.fromString(tagECC) def tagCode: Code = Code.fromString(tagECC)
def dataCode: Code = Code.fromString(dataECC) def dataCode: Code = Code.fromString(dataECC)
def replacement = new RandomReplacement(nWays) def replacement = ReplacementPolicy.fromString(replacer,nWays,nSets)
} }
trait HasL1plusCacheParameters extends HasL1CacheParameters { trait HasL1plusCacheParameters extends HasL1CacheParameters {
...@@ -48,6 +49,7 @@ trait HasL1plusCacheParameters extends HasL1CacheParameters { ...@@ -48,6 +49,7 @@ trait HasL1plusCacheParameters extends HasL1CacheParameters {
def bankNum = 2 def bankNum = 2
def bankRows = blockRows / bankNum def bankRows = blockRows / bankNum
def blockEcodedBits = blockRows * encRowBits def blockEcodedBits = blockRows * encRowBits
def plruAccessNum = 2 //hit and miss
def missQueueEntryIdWidth = log2Up(cfg.nMissEntries) def missQueueEntryIdWidth = log2Up(cfg.nMissEntries)
// def icacheMissQueueEntryIdWidth = log2Up(icfg.nMissEntries) // def icacheMissQueueEntryIdWidth = log2Up(icfg.nMissEntries)
...@@ -91,6 +93,11 @@ object L1plusCacheMetadata { ...@@ -91,6 +93,11 @@ object L1plusCacheMetadata {
} }
} }
/* tagIdx is from the io.in.req (Wire)
* validIdx is from s1_addr (Register)
*/
class L1plusCacheMetaReadReq extends L1plusCacheBundle { class L1plusCacheMetaReadReq extends L1plusCacheBundle {
val tagIdx = UInt(idxBits.W) val tagIdx = UInt(idxBits.W)
val validIdx = UInt(idxBits.W) val validIdx = UInt(idxBits.W)
...@@ -383,6 +390,8 @@ class L1plusCacheImp(outer: L1plusCache) extends LazyModuleImp(outer) with HasL1 ...@@ -383,6 +390,8 @@ class L1plusCacheImp(outer: L1plusCache) extends LazyModuleImp(outer) with HasL1
pipe.io.data_resp <> dataArray.io.resp pipe.io.data_resp <> dataArray.io.resp
pipe.io.meta_read <> metaArray.io.read pipe.io.meta_read <> metaArray.io.read
pipe.io.meta_resp <> metaArray.io.resp pipe.io.meta_resp <> metaArray.io.resp
pipe.io.miss_meta_write.valid := missQueue.io.meta_write.valid
pipe.io.miss_meta_write.bits <> missQueue.io.meta_write.bits
missQueue.io.req <> pipe.io.miss_req missQueue.io.req <> pipe.io.miss_req
bus.a <> missQueue.io.mem_acquire bus.a <> missQueue.io.mem_acquire
...@@ -478,6 +487,7 @@ class L1plusCachePipe extends L1plusCacheModule ...@@ -478,6 +487,7 @@ class L1plusCachePipe extends L1plusCacheModule
val meta_read = DecoupledIO(new L1plusCacheMetaReadReq) val meta_read = DecoupledIO(new L1plusCacheMetaReadReq)
val meta_resp = Input(Vec(nWays, new L1plusCacheMetadata)) val meta_resp = Input(Vec(nWays, new L1plusCacheMetadata))
val miss_req = DecoupledIO(new L1plusCacheMissReq) val miss_req = DecoupledIO(new L1plusCacheMissReq)
val miss_meta_write = Flipped(ValidIO(new L1plusCacheMetaWriteReq))
val inflight_req_idxes = Output(Vec(2, Valid(UInt()))) val inflight_req_idxes = Output(Vec(2, Valid(UInt())))
val empty = Output(Bool()) val empty = Output(Bool())
}) })
...@@ -554,6 +564,16 @@ class L1plusCachePipe extends L1plusCacheModule ...@@ -554,6 +564,16 @@ class L1plusCachePipe extends L1plusCacheModule
val s2_hit = s2_tag_match_way.orR val s2_hit = s2_tag_match_way.orR
val s2_hit_way = OHToUInt(s2_tag_match_way, nWays) val s2_hit_way = OHToUInt(s2_tag_match_way, nWays)
//replacement marker
val replacer = cacheParams.replacement
val (touch_sets, touch_ways) = ( Wire(Vec(plruAccessNum, UInt(log2Ceil(nSets).W))), Wire(Vec(plruAccessNum, Valid(UInt(log2Ceil(nWays).W)))) )
touch_sets(0) := get_idx(s2_req.addr)
touch_ways(0).valid := s2_valid && s2_hit
touch_ways(0).bits := s2_hit_way
replacer.access(touch_sets, touch_ways)
val data_resp = io.data_resp val data_resp = io.data_resp
val s2_data = data_resp(s2_hit_way) val s2_data = data_resp(s2_hit_way)
...@@ -577,8 +597,7 @@ class L1plusCachePipe extends L1plusCacheModule ...@@ -577,8 +597,7 @@ class L1plusCachePipe extends L1plusCacheModule
io.resp.bits.id := s2_req.id io.resp.bits.id := s2_req.id
// replacement policy // replacement policy
val replacer = cacheParams.replacement val replaced_way_en = UIntToOH(replacer.way(get_idx(s2_req.addr)))
val replaced_way_en = UIntToOH(replacer.way)
io.miss_req.valid := s2_valid && !s2_hit io.miss_req.valid := s2_valid && !s2_hit
io.miss_req.bits.id := s2_req.id io.miss_req.bits.id := s2_req.id
...@@ -586,11 +605,12 @@ class L1plusCachePipe extends L1plusCacheModule ...@@ -586,11 +605,12 @@ class L1plusCachePipe extends L1plusCacheModule
io.miss_req.bits.addr := s2_req.addr io.miss_req.bits.addr := s2_req.addr
io.miss_req.bits.way_en := replaced_way_en io.miss_req.bits.way_en := replaced_way_en
s2_passdown := s2_valid && ((s2_hit && io.resp.ready) || (!s2_hit && io.miss_req.ready)) touch_sets(1) := io.miss_meta_write.bits.tagIdx
touch_ways(1).valid := io.miss_meta_write.valid
touch_ways(1).bits := OHToUInt(io.miss_meta_write.bits.way_en.asUInt)
when (io.miss_req.fire()) {
replacer.miss s2_passdown := s2_valid && ((s2_hit && io.resp.ready) || (!s2_hit && io.miss_req.ready))
}
val resp = io.resp val resp = io.resp
when (resp.valid) { when (resp.valid) {
...@@ -615,6 +635,10 @@ class L1plusCachePipe extends L1plusCacheModule ...@@ -615,6 +635,10 @@ class L1plusCachePipe extends L1plusCacheModule
) )
} }
} }
XSPerf("req", s0_valid)
XSPerf("miss", s2_valid && !s2_hit)
} }
class L1plusCacheMissReq extends L1plusCacheBundle class L1plusCacheMissReq extends L1plusCacheBundle
......
...@@ -249,8 +249,10 @@ class TlbResp extends TlbBundle { ...@@ -249,8 +249,10 @@ class TlbResp extends TlbBundle {
val instr = Bool() val instr = Bool()
} }
} }
val ptwBack = Bool() // when ptw back, wake up replay rs's state
override def toPrintable: Printable = { override def toPrintable: Printable = {
p"paddr:0x${Hexadecimal(paddr)} miss:${miss} excp.pf: ld:${excp.pf.ld} st:${excp.pf.st} instr:${excp.pf.instr}" p"paddr:0x${Hexadecimal(paddr)} miss:${miss} excp.pf: ld:${excp.pf.ld} st:${excp.pf.st} instr:${excp.pf.instr} ptwBack:${ptwBack}"
} }
} }
...@@ -439,6 +441,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ ...@@ -439,6 +441,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
resp(i).valid := validReg resp(i).valid := validReg
resp(i).bits.paddr := Mux(vmEnable, paddr, if (isDtlb) RegNext(vaddr) else vaddr) resp(i).bits.paddr := Mux(vmEnable, paddr, if (isDtlb) RegNext(vaddr) else vaddr)
resp(i).bits.miss := miss resp(i).bits.miss := miss
resp(i).bits.ptwBack := io.ptw.resp.fire()
val perm = hitPerm // NOTE: given the excp, the out module choose one to use? val perm = hitPerm // NOTE: given the excp, the out module choose one to use?
val update = false.B && hit && (!hitPerm.a || !hitPerm.d && TlbCmd.isWrite(cmdReg)) // update A/D through exception val update = false.B && hit && (!hitPerm.a || !hitPerm.d && TlbCmd.isWrite(cmdReg)) // update A/D through exception
......
...@@ -60,9 +60,9 @@ class BIM extends BasePredictor with BimParams { ...@@ -60,9 +60,9 @@ class BIM extends BasePredictor with BimParams {
val updateRow = bimAddr.getBankIdx(u.ftqPC) val updateRow = bimAddr.getBankIdx(u.ftqPC)
val wrbypass_ctrs = Reg(Vec(bypassEntries, Vec(BimBanks, UInt(2.W)))) val wrbypass_ctrs = RegInit(0.U.asTypeOf(Vec(bypassEntries, Vec(BimBanks, UInt(2.W)))))
val wrbypass_ctr_valids = Reg(Vec(bypassEntries, Vec(BimBanks, Bool()))) val wrbypass_ctr_valids = RegInit(0.U.asTypeOf(Vec(bypassEntries, Vec(BimBanks, Bool()))))
val wrbypass_rows = Reg(Vec(bypassEntries, UInt(log2Up(nRows).W))) val wrbypass_rows = RegInit(0.U.asTypeOf(Vec(bypassEntries, UInt(log2Up(nRows).W))))
val wrbypass_enq_idx = RegInit(0.U(log2Up(bypassEntries).W)) val wrbypass_enq_idx = RegInit(0.U(log2Up(bypassEntries).W))
val wrbypass_hits = VecInit((0 until bypassEntries).map( i => val wrbypass_hits = VecInit((0 until bypassEntries).map( i =>
...@@ -83,19 +83,26 @@ class BIM extends BasePredictor with BimParams { ...@@ -83,19 +83,26 @@ class BIM extends BasePredictor with BimParams {
when (reset.asBool) { wrbypass_ctr_valids.foreach(_.foreach(_ := false.B))} when (reset.asBool) { wrbypass_ctr_valids.foreach(_.foreach(_ := false.B))}
for (b <- 0 until BimBanks) { for (b <- 0 until BimBanks) {
when (needToUpdate(b)) { when (needToUpdate.reduce(_||_)) {
when (wrbypass_hit) { when (wrbypass_hit) {
wrbypass_ctrs(wrbypass_hit_idx)(b) := newCtrs(b) when (needToUpdate(b)) {
wrbypass_ctr_valids(wrbypass_hit_idx)(b) := true.B wrbypass_ctrs(wrbypass_hit_idx)(b) := newCtrs(b)
} .otherwise { wrbypass_ctr_valids(wrbypass_hit_idx)(b) := true.B
wrbypass_ctrs(wrbypass_enq_idx)(b) := newCtrs(b) }
(0 until BimBanks).foreach(b => wrbypass_ctr_valids(wrbypass_enq_idx)(b) := false.B) // reset valid bits }.otherwise {
wrbypass_ctr_valids(wrbypass_enq_idx)(b) := true.B wrbypass_ctr_valids(wrbypass_enq_idx)(b) := false.B
wrbypass_rows(wrbypass_enq_idx) := updateRow when (needToUpdate(b)) {
wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Up(bypassEntries)-1,0) wrbypass_ctr_valids(wrbypass_enq_idx)(b) := true.B
wrbypass_ctrs(wrbypass_enq_idx)(b) := newCtrs(b)
}
} }
} }
} }
when (needToUpdate.reduce(_||_) && !wrbypass_hit) {
wrbypass_rows(wrbypass_enq_idx) := updateRow
wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Up(bypassEntries)-1,0)
}
bim.io.w.apply( bim.io.w.apply(
valid = needToUpdate.asUInt.orR || doing_reset, valid = needToUpdate.asUInt.orR || doing_reset,
...@@ -104,16 +111,25 @@ class BIM extends BasePredictor with BimParams { ...@@ -104,16 +111,25 @@ class BIM extends BasePredictor with BimParams {
waymask = Mux(doing_reset, Fill(BimBanks, "b1".U).asUInt, needToUpdate.asUInt) waymask = Mux(doing_reset, Fill(BimBanks, "b1".U).asUInt, needToUpdate.asUInt)
) )
XSPerf("bim_wrbypass_hit", needToUpdate.reduce(_||_) && wrbypass_hit)
XSPerf("bim_wrbypass_enq", needToUpdate.reduce(_||_) && !wrbypass_hit)
if (BPUDebug && debug) { if (BPUDebug && debug) {
val u = io.update.bits
XSDebug(doing_reset, "Reseting...\n") XSDebug(doing_reset, "Reseting...\n")
XSDebug("[update] v=%d pc=%x valids=%b, tgt=%x\n", updateValid, u.ftqPC, u.valids.asUInt, u.target) XSDebug("[update] v=%d pc=%x valids=%b, tgt=%x\n", updateValid, u.ftqPC, u.valids.asUInt, u.target)
XSDebug("[update] brMask=%b, taken=%b isMisPred=%b\n", u.br_mask.asUInt, newTakens.asUInt, u.mispred.asUInt) XSDebug("[update] brMask=%b, taken=%b isMisPred=%b\n", u.br_mask.asUInt, newTakens.asUInt, u.mispred.asUInt)
for (i <- 0 until BimBanks) { for (i <- 0 until BimBanks) {
XSDebug(RegNext(io.pc.valid && io.inMask(i)), p"BimResp[$i]: ctr = ${io.resp.ctrs(i)}\n")
XSDebug(needToUpdate(i),
p"update bim bank $i: pc:${Hexadecimal(u.ftqPC)}, taken:${u.takens(i)}, " +
p"oldCtr:${oldCtrs(i)}, newCtr:${newCtrs(i)}\n")
XSDebug(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(i) && needToUpdate(i),
p"bank $i wrbypass hit wridx $wrbypass_hit_idx: row:$updateRow, " +
p"ctr:${oldCtrs(i)}, newCtr:${newCtrs(i)}\n")
XSDebug(true.B, p"bimCtr(${i.U})=${Binary(u.metas(i).bimCtr)} oldCtr=${Binary(oldCtrs(i))} newCtr=${Binary(newCtrs(i))}\n") XSDebug(true.B, p"bimCtr(${i.U})=${Binary(u.metas(i).bimCtr)} oldCtr=${Binary(oldCtrs(i))} newCtr=${Binary(newCtrs(i))}\n")
} }
XSDebug("needToUpdate=%b updateRow=%x\n", needToUpdate.asUInt, updateRow)
XSDebug("[wrbypass] hit=%d hits=%b\n", wrbypass_hit, wrbypass_hits.asUInt)
} }
} }
\ No newline at end of file
...@@ -185,7 +185,7 @@ class BTB extends BasePredictor with BTBParams{ ...@@ -185,7 +185,7 @@ class BTB extends BasePredictor with BTBParams{
val updateBank = u.cfiIndex.bits val updateBank = u.cfiIndex.bits
val updateRow = btbAddr.getBankIdx(cfi_pc) val updateRow = btbAddr.getBankIdx(cfi_pc)
val updateIsBr = u.br_mask(u.cfiIndex.bits) val updateIsBr = u.br_mask(u.cfiIndex.bits)
val updateTaken = u.cfiIndex.valid val updateTaken = u.cfiIndex.valid && u.valids(u.cfiIndex.bits)
// TODO: remove isRVC // TODO: remove isRVC
val metaWrite = BtbMetaEntry(btbAddr.getTag(cfi_pc), updateIsBr, u.cfiIsRVC) val metaWrite = BtbMetaEntry(btbAddr.getTag(cfi_pc), updateIsBr, u.cfiIsRVC)
val dataWrite = BtbDataEntry(new_lower, new_extended) val dataWrite = BtbDataEntry(new_lower, new_extended)
...@@ -200,6 +200,11 @@ class BTB extends BasePredictor with BTBParams{ ...@@ -200,6 +200,11 @@ class BTB extends BasePredictor with BTBParams{
data.io.w.apply(updateValid, dataWrite, updateRow, updateWayMask) data.io.w.apply(updateValid, dataWrite, updateRow, updateWayMask)
edata.io.w.apply(updateValid && new_extended, u.target, updateRow, "b1".U) edata.io.w.apply(updateValid && new_extended, u.target, updateRow, "b1".U)
val alloc_conflict =
VecInit((0 until BtbBanks).map(i =>
if2_metaRead(allocWays(i))(i).valid && !if2_bankHits(i) && if2_mask(i)))
XSPerf("btb_alloc_conflict", PopCount(alloc_conflict))
if (BPUDebug && debug) { if (BPUDebug && debug) {
val debug_verbose = true val debug_verbose = true
val validLatch = RegNext(io.pc.valid) val validLatch = RegNext(io.pc.valid)
......
...@@ -129,24 +129,21 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with ...@@ -129,24 +129,21 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
val if2_ready = WireInit(false.B) val if2_ready = WireInit(false.B)
val if2_valid = RegInit(init = false.B) val if2_valid = RegInit(init = false.B)
val if2_allReady = WireInit(if2_ready && icache.io.req.ready) val if2_allReady = WireInit(if2_ready && icache.io.req.ready)
val if1_fire = (if1_valid && if2_allReady) && (icache.io.tlb.resp.valid || !if2_valid) val if1_fire = if1_valid && if2_allReady
val if1_can_go = if1_fire
val if1_gh, if2_gh, if3_gh, if4_gh = Wire(new GlobalHistory) val if1_gh, if2_gh, if3_gh, if4_gh = Wire(new GlobalHistory)
val if2_predicted_gh, if3_predicted_gh, if4_predicted_gh = Wire(new GlobalHistory) val if2_predicted_gh, if3_predicted_gh, if4_predicted_gh = Wire(new GlobalHistory)
val final_gh = RegInit(0.U.asTypeOf(new GlobalHistory)) val final_gh = RegInit(0.U.asTypeOf(new GlobalHistory))
val final_gh_bypass = WireInit(0.U.asTypeOf(new GlobalHistory))
val flush_final_gh = WireInit(false.B)
//********************** IF2 ****************************// //********************** IF2 ****************************//
val if2_allValid = if2_valid && icache.io.tlb.resp.valid val if2_allValid = if2_valid && icache.io.tlb.resp.valid
val if3_ready = WireInit(false.B) val if3_ready = WireInit(false.B)
val if2_fire = (if2_valid && if3_ready) && icache.io.tlb.resp.valid val if2_fire = if2_allValid && if3_ready
val if2_pc = RegEnable(next = if1_npc, init = resetVector.U, enable = if1_can_go) val if2_pc = RegEnable(next = if1_npc, init = resetVector.U, enable = if1_fire)
val if2_snpc = snpc(if2_pc) val if2_snpc = snpc(if2_pc)
val if2_predHist = RegEnable(if1_gh.predHist, enable=if1_can_go) val if2_predHist = RegEnable(if1_gh.predHist, enable=if1_fire)
if2_ready := if3_ready || !if2_valid if2_ready := if3_ready && icache.io.tlb.resp.valid || !if2_valid
when (if1_can_go) { if2_valid := true.B } when (if1_fire) { if2_valid := true.B }
.elsewhen (if2_flush) { if2_valid := false.B } .elsewhen (if2_flush) { if2_valid := false.B }
.elsewhen (if2_fire) { if2_valid := false.B } .elsewhen (if2_fire) { if2_valid := false.B }
...@@ -387,8 +384,8 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with ...@@ -387,8 +384,8 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
when (if4_fire) { when (if4_fire) {
final_gh := if4_predicted_gh final_gh := if4_predicted_gh
} }
if4_gh := Mux(flush_final_gh, final_gh_bypass, final_gh) if4_gh := final_gh
if3_gh := Mux(if4_valid && !if4_flush, if4_predicted_gh, if4_gh) if3_gh := Mux(if4_valid, if4_predicted_gh, if4_gh)
if2_gh := Mux(if3_valid && !if3_flush, if3_predicted_gh, if3_gh) if2_gh := Mux(if3_valid && !if3_flush, if3_predicted_gh, if3_gh)
if1_gh := Mux(if2_valid && !if2_flush, if2_predicted_gh, if2_gh) if1_gh := Mux(if2_valid && !if2_flush, if2_predicted_gh, if2_gh)
...@@ -455,8 +452,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with ...@@ -455,8 +452,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
val taken = Mux(isMisPred, b.taken, b.predTaken) val taken = Mux(isMisPred, b.taken, b.predTaken)
val updatedGh = oldGh.update(sawNTBr, isBr && taken) val updatedGh = oldGh.update(sawNTBr, isBr && taken)
final_gh := updatedGh final_gh := updatedGh
final_gh_bypass := updatedGh if1_gh := updatedGh
flush_final_gh := true.B
} }
npcGen.register(io.redirect.valid, io.redirect.bits.cfiUpdate.target, Some("backend_redirect")) npcGen.register(io.redirect.valid, io.redirect.bits.cfiUpdate.target, Some("backend_redirect"))
...@@ -465,7 +461,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with ...@@ -465,7 +461,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
if1_npc := npcGen() if1_npc := npcGen()
icache.io.req.valid := if1_can_go icache.io.req.valid := if1_fire
icache.io.resp.ready := if4_ready icache.io.resp.ready := if4_ready
icache.io.req.bits.addr := if1_npc icache.io.req.bits.addr := if1_npc
icache.io.req.bits.mask := mask(if1_npc) icache.io.req.bits.mask := mask(if1_npc)
...@@ -487,7 +483,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with ...@@ -487,7 +483,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
bpu.io.commit <> io.commitUpdate bpu.io.commit <> io.commitUpdate
bpu.io.redirect <> io.redirect bpu.io.redirect <> io.redirect
bpu.io.inFire(0) := if1_can_go bpu.io.inFire(0) := if1_fire
bpu.io.inFire(1) := if2_fire bpu.io.inFire(1) := if2_fire
bpu.io.inFire(2) := if3_fire bpu.io.inFire(2) := if3_fire
bpu.io.inFire(3) := if4_fire bpu.io.inFire(3) := if4_fire
...@@ -550,6 +546,28 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with ...@@ -550,6 +546,28 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
} }
} }
// TODO: perfs
// frontend redirect from each stage
XSPerf("if2_redirect", if2_valid && if2_bp.taken && !if2_flush)
XSPerf("if2_redirect_fired", if2_fire && if2_bp.taken && !if2_flush)
XSPerf("if3_redirect", if3_valid && if3_redirect && !if3_flush)
XSPerf("if3_redirect_fired", if3_fire && if3_redirect && !if3_flush)
XSPerf("if4_redirect", if4_valid && if4_redirect && !if4_flush)
XSPerf("if4_redirect_fired", if4_fire && if4_redirect && !if4_flush)
XSPerf("if1_total_stall", !if2_allReady && if1_valid)
XSPerf("if1_stall_from_icache_req", !icache.io.req.ready && if1_valid)
XSPerf("if1_stall_from_if2", !if2_ready && if1_valid)
XSPerf("itlb_stall", if2_valid && if3_ready && !icache.io.tlb.resp.valid)
XSPerf("icache_resp_stall", if3_valid && if4_ready && !icache.io.resp.valid)
XSPerf("if4_stall", if4_valid && !if4_fire)
XSPerf("if4_stall_ibuffer", if4_valid && !io.fetchPacket.ready && ftqEnqBuf_ready)
XSPerf("if4_stall_ftq", if4_valid && io.fetchPacket.ready && !ftqEnqBuf_ready)
XSPerf("if3_prevHalfConsumed", if3_prevHalfConsumed)
XSPerf("if4_prevHalfConsumed", if4_prevHalfConsumed)
// debug info // debug info
if (IFUDebug) { if (IFUDebug) {
XSDebug(RegNext(reset.asBool) && !reset.asBool, "Reseting...\n") XSDebug(RegNext(reset.asBool) && !reset.asBool, "Reseting...\n")
...@@ -557,13 +575,12 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with ...@@ -557,13 +575,12 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
XSDebug(icache.io.flush(1).asBool, "Flush icache stage3...\n") XSDebug(icache.io.flush(1).asBool, "Flush icache stage3...\n")
XSDebug(io.redirect.valid, p"Redirect from backend! target=${Hexadecimal(io.redirect.bits.cfiUpdate.target)}\n") XSDebug(io.redirect.valid, p"Redirect from backend! target=${Hexadecimal(io.redirect.bits.cfiUpdate.target)}\n")
XSDebug("[IF1] v=%d fire=%d cango=%d flush=%d pc=%x mask=%b\n", if1_valid, if1_fire,if1_can_go, if1_flush, if1_npc, mask(if1_npc)) XSDebug("[IF1] v=%d fire=%d flush=%d pc=%x mask=%b\n", if1_valid, if1_fire, if1_flush, if1_npc, mask(if1_npc))
XSDebug("[IF2] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x snpc=%x\n", if2_valid, if2_ready, if2_fire, if2_redirect, if2_flush, if2_pc, if2_snpc) XSDebug("[IF2] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x snpc=%x\n", if2_valid, if2_ready, if2_fire, if2_redirect, if2_flush, if2_pc, if2_snpc)
XSDebug("[IF3] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if3_valid, if3_ready, if3_fire, if3_redirect, if3_flush, if3_pc, crossPageIPF, if3_bp.hasNotTakenBrs) XSDebug("[IF3] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if3_valid, if3_ready, if3_fire, if3_redirect, if3_flush, if3_pc, crossPageIPF, if3_bp.hasNotTakenBrs)
XSDebug("[IF4] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if4_valid, if4_ready, if4_fire, if4_redirect, if4_flush, if4_pc, if4_crossPageIPF, if4_bp.hasNotTakenBrs) XSDebug("[IF4] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if4_valid, if4_ready, if4_fire, if4_redirect, if4_flush, if4_pc, if4_crossPageIPF, if4_bp.hasNotTakenBrs)
XSDebug("[IF1][icacheReq] v=%d r=%d addr=%x\n", icache.io.req.valid, icache.io.req.ready, icache.io.req.bits.addr) XSDebug("[IF1][icacheReq] v=%d r=%d addr=%x\n", icache.io.req.valid, icache.io.req.ready, icache.io.req.bits.addr)
XSDebug("[IF1][ghr] hist=%b\n", if1_gh.asUInt) XSDebug("[IF1][ghr] hist=%b\n", if1_gh.asUInt)
XSDebug("[IF1][ghr] extHist=%b\n\n", if1_gh.asUInt)
XSDebug("[IF2][bp] taken=%d jmpIdx=%d hasNTBrs=%d target=%x saveHalfRVI=%d\n\n", if2_bp.taken, if2_bp.jmpIdx, if2_bp.hasNotTakenBrs, if2_bp.target, if2_bp.saveHalfRVI) XSDebug("[IF2][bp] taken=%d jmpIdx=%d hasNTBrs=%d target=%x saveHalfRVI=%d\n\n", if2_bp.taken, if2_bp.jmpIdx, if2_bp.hasNotTakenBrs, if2_bp.target, if2_bp.saveHalfRVI)
if2_gh.debug("if2") if2_gh.debug("if2")
......
...@@ -67,9 +67,12 @@ class Ibuffer extends XSModule with HasCircularQueuePtrHelper { ...@@ -67,9 +67,12 @@ class Ibuffer extends XSModule with HasCircularQueuePtrHelper {
val allowEnq = RegInit(true.B) val allowEnq = RegInit(true.B)
val numEnq = Mux(io.in.fire, PopCount(io.in.bits.mask), 0.U) val numEnq = Mux(io.in.fire, PopCount(io.in.bits.mask), 0.U)
val numTryDeq = Mux(validEntries >= DecodeWidth.U, DecodeWidth.U, validEntries)
val numDeq = PopCount(io.out.map(_.fire)) val numDeq = PopCount(io.out.map(_.fire))
allowEnq := (IBufSize - PredictWidth).U >= validEntries +& numEnq val numAfterEnq = validEntries +& numEnq
val nextValidEntries = Mux(io.out(0).ready, numAfterEnq - numTryDeq, numAfterEnq)
allowEnq := (IBufSize - PredictWidth).U >= nextValidEntries
// Enque // Enque
io.in.ready := allowEnq io.in.ready := allowEnq
......
...@@ -159,6 +159,10 @@ class LTBColumn extends LTBModule { ...@@ -159,6 +159,10 @@ class LTBColumn extends LTBModule {
val wen = WireInit(false.B) val wen = WireInit(false.B)
when(wen) {ltb.write(if4_rIdx, wEntry)} when(wen) {ltb.write(if4_rIdx, wEntry)}
val loop_entry_is_learned = WireInit(false.B)
val loop_learned_entry_conflict = WireInit(false.B)
val loop_conf_entry_evicted = WireInit(false.B)
when(redirectValid && redirect.mispred && !isReplay && !doingReset) { when(redirectValid && redirect.mispred && !isReplay && !doingReset) {
wen := true.B wen := true.B
when(tagMatch) { when(tagMatch) {
...@@ -170,6 +174,7 @@ class LTBColumn extends LTBModule { ...@@ -170,6 +174,7 @@ class LTBColumn extends LTBModule {
when(cntMatch) { when(cntMatch) {
XSDebug("[redirect] 1\n") XSDebug("[redirect] 1\n")
wEntry.conf := if4_rEntry.conf + 1.U wEntry.conf := if4_rEntry.conf + 1.U
loop_entry_is_learned := true.B
wEntry.specCnt := 0.U wEntry.specCnt := 0.U
}.otherwise { }.otherwise {
XSDebug("[redirect] 2\n") XSDebug("[redirect] 2\n")
...@@ -194,10 +199,12 @@ class LTBColumn extends LTBModule { ...@@ -194,10 +199,12 @@ class LTBColumn extends LTBModule {
when(if4_rEntry.isLearned) { when(if4_rEntry.isLearned) {
XSDebug("[redirect] 5\n") XSDebug("[redirect] 5\n")
// do nothing? or release this entry // do nothing? or release this entry
loop_learned_entry_conflict := true.B
}.elsewhen(if4_rEntry.isConf) { }.elsewhen(if4_rEntry.isConf) {
when(if4_rEntry.age === 0.U) { when(if4_rEntry.age === 0.U) {
XSDebug("[redirect] 6\n") XSDebug("[redirect] 6\n")
wEntry.tag := redirectTag wEntry.tag := redirectTag
loop_conf_entry_evicted := true.B
wEntry.conf := 1.U wEntry.conf := 1.U
wEntry.specCnt := 0.U wEntry.specCnt := 0.U
wEntry.tripCnt := redirect.specCnt wEntry.tripCnt := redirect.specCnt
...@@ -266,6 +273,11 @@ class LTBColumn extends LTBModule { ...@@ -266,6 +273,11 @@ class LTBColumn extends LTBModule {
} }
if (BPUDebug && debug) { if (BPUDebug && debug) {
// Perf counters
XSPerf("loop_entry_is_learned ", loop_entry_is_learned)
XSPerf("loop_learned_entry_conflict ", loop_learned_entry_conflict)
XSPerf("loop_conf_entry_evicted ", loop_conf_entry_evicted)
//debug info //debug info
XSDebug(doingReset, "Reseting...\n") XSDebug(doingReset, "Reseting...\n")
XSDebug(io.repair, "Repair...\n") XSDebug(io.repair, "Repair...\n")
...@@ -338,9 +350,8 @@ class LoopPredictor extends BasePredictor with LTBParams { ...@@ -338,9 +350,8 @@ class LoopPredictor extends BasePredictor with LTBParams {
val updateValid = io.update.valid val updateValid = io.update.valid
val update = io.update.bits val update = io.update.bits
val do_redirect = RegNext(io.redirect) val redirectValid = io.redirect.valid
val redirectValid = do_redirect.valid val redirect = io.redirect.bits.cfiUpdate
val redirect = do_redirect.bits.cfiUpdate
val redirectPC = redirect.pc val redirectPC = redirect.pc
val redirectBank = ltbAddr.getBank(redirectPC) val redirectBank = ltbAddr.getBank(redirectPC)
...@@ -363,7 +374,7 @@ class LoopPredictor extends BasePredictor with LTBParams { ...@@ -363,7 +374,7 @@ class LoopPredictor extends BasePredictor with LTBParams {
ltbs(i).io.redirect.bits.specCnt := redirect.specCnt(i) ltbs(i).io.redirect.bits.specCnt := redirect.specCnt(i)
ltbs(i).io.redirect.bits.mispred := redirect.isMisPred ltbs(i).io.redirect.bits.mispred := redirect.isMisPred
ltbs(i).io.redirect.bits.taken := redirect.taken ltbs(i).io.redirect.bits.taken := redirect.taken
ltbs(i).io.redirect.bits.isReplay := do_redirect.bits.flushItself ltbs(i).io.redirect.bits.isReplay := io.redirect.bits.flushItself
ltbs(i).io.repair := redirectValid && redirectBank =/= i.U ltbs(i).io.repair := redirectValid && redirectBank =/= i.U
} }
...@@ -394,7 +405,7 @@ class LoopPredictor extends BasePredictor with LTBParams { ...@@ -394,7 +405,7 @@ class LoopPredictor extends BasePredictor with LTBParams {
XSDebug("[IF4][req] inMask=%b\n", inMask) XSDebug("[IF4][req] inMask=%b\n", inMask)
XSDebug("[IF4][req] updatePC=%x, updateValid=%d, isBr=%b\n", update.ftqPC, updateValid, update.br_mask.asUInt) XSDebug("[IF4][req] updatePC=%x, updateValid=%d, isBr=%b\n", update.ftqPC, updateValid, update.br_mask.asUInt)
XSDebug("[IF4][req] redirectPC=%x redirectBank=%d, redirectValid=%d, isBr=%d, isReplay=%d\n", redirect.pc, redirectBank, redirectValid, redirect.pd.isBr, do_redirect.bits.flushItself) XSDebug("[IF4][req] redirectPC=%x redirectBank=%d, redirectValid=%d, isBr=%d, isReplay=%d\n", redirect.pc, redirectBank, redirectValid, redirect.pd.isBr, io.redirect.bits.flushItself)
XSDebug("[IF4][req] isMisPred=%d\n", redirect.isMisPred) XSDebug("[IF4][req] isMisPred=%d\n", redirect.isMisPred)
XSDebug(redirectValid, "[redirect SpecCnt] ") XSDebug(redirectValid, "[redirect SpecCnt] ")
......
...@@ -199,10 +199,10 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio ...@@ -199,10 +199,10 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
waymask = Mux(doing_clear_u_lo, Fill(TageBanks, "b1".U), io.update.uMask.asUInt) waymask = Mux(doing_clear_u_lo, Fill(TageBanks, "b1".U), io.update.uMask.asUInt)
) )
val wrbypass_tags = Reg(Vec(wrBypassEntries, UInt(tagLen.W))) val wrbypass_tags = RegInit(0.U.asTypeOf(Vec(wrBypassEntries, UInt(tagLen.W))))
val wrbypass_idxs = Reg(Vec(wrBypassEntries, UInt(log2Ceil(nRows).W))) val wrbypass_idxs = RegInit(0.U.asTypeOf(Vec(wrBypassEntries, UInt(log2Ceil(nRows).W))))
val wrbypass_ctrs = Reg(Vec(wrBypassEntries, Vec(TageBanks, UInt(TageCtrBits.W)))) val wrbypass_ctrs = RegInit(0.U.asTypeOf(Vec(wrBypassEntries, Vec(TageBanks, UInt(TageCtrBits.W)))))
val wrbypass_ctr_valids = Reg(Vec(wrBypassEntries, Vec(TageBanks, Bool()))) val wrbypass_ctr_valids = RegInit(0.U.asTypeOf(Vec(wrBypassEntries, Vec(TageBanks, Bool()))))
val wrbypass_enq_idx = RegInit(0.U(log2Ceil(wrBypassEntries).W)) val wrbypass_enq_idx = RegInit(0.U(log2Ceil(wrBypassEntries).W))
when (reset.asBool) { wrbypass_ctr_valids.foreach(_.foreach(_ := false.B))} when (reset.asBool) { wrbypass_ctr_valids.foreach(_.foreach(_ := false.B))}
...@@ -215,7 +215,7 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio ...@@ -215,7 +215,7 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val wrbypass_hit = wrbypass_hits.reduce(_||_) val wrbypass_hit = wrbypass_hits.reduce(_||_)
// val wrbypass_rhit = wrbypass_rhits.reduce(_||_) // val wrbypass_rhit = wrbypass_rhits.reduce(_||_)
val wrbypass_hit_idx = PriorityEncoder(wrbypass_hits) val wrbypass_hit_idx = ParallelPriorityEncoder(wrbypass_hits)
// val wrbypass_rhit_idx = PriorityEncoder(wrbypass_rhits) // val wrbypass_rhit_idx = PriorityEncoder(wrbypass_rhits)
// val wrbypass_rctr_hits = VecInit((0 until TageBanks).map( b => wrbypass_ctr_valids(wrbypass_rhit_idx)(b))) // val wrbypass_rctr_hits = VecInit((0 until TageBanks).map( b => wrbypass_ctr_valids(wrbypass_rhit_idx)(b)))
...@@ -248,21 +248,33 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio ...@@ -248,21 +248,33 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
update_hi_wdata(w) := io.update.u(w)(1) update_hi_wdata(w) := io.update.u(w)(1)
update_lo_wdata(w) := io.update.u(w)(0) update_lo_wdata(w) := io.update.u(w)(0)
}
when (io.update.mask.reduce(_||_)) { when (io.update.mask.reduce(_||_)) {
when (wrbypass_hits.reduce(_||_)) { when (wrbypass_hit) {
wrbypass_ctrs(wrbypass_hit_idx)(updateBank) := update_wdata(updateBank).ctr when (io.update.mask(w)) {
wrbypass_ctr_valids(wrbypass_hit_idx)(updateBank) := true.B wrbypass_ctrs(wrbypass_hit_idx)(w) := update_wdata(w).ctr
} .otherwise { wrbypass_ctr_valids(wrbypass_hit_idx)(w) := true.B
wrbypass_ctrs(wrbypass_enq_idx)(updateBank) := update_wdata(updateBank).ctr }
(0 until TageBanks).foreach(b => wrbypass_ctr_valids(wrbypass_enq_idx)(b) := false.B) // reset valid bits } .otherwise {
wrbypass_ctr_valids(wrbypass_enq_idx)(updateBank) := true.B // reset valid bit first
wrbypass_tags(wrbypass_enq_idx) := update_tag wrbypass_ctr_valids(wrbypass_enq_idx)(w) := false.B
wrbypass_idxs(wrbypass_enq_idx) := update_idx when (io.update.mask(w)) {
wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Ceil(wrBypassEntries)-1,0) wrbypass_ctr_valids(wrbypass_enq_idx)(w) := true.B
wrbypass_ctrs(wrbypass_enq_idx)(w) := update_wdata(w).ctr
}
}
} }
} }
when (io.update.mask.reduce(_||_) && !wrbypass_hit) {
wrbypass_tags(wrbypass_enq_idx) := update_tag
wrbypass_idxs(wrbypass_enq_idx) := update_idx
wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Ceil(wrBypassEntries)-1,0)
}
XSPerf("tage_table_wrbypass_hit", io.update.mask.reduce(_||_) && wrbypass_hit)
XSPerf("tage_table_wrbypass_enq", io.update.mask.reduce(_||_) && !wrbypass_hit)
XSPerf("tage_table_hits", PopCount(VecInit(io.resp.map(_.valid))))
if (BPUDebug && debug) { if (BPUDebug && debug) {
val u = io.update val u = io.update
...@@ -270,28 +282,28 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio ...@@ -270,28 +282,28 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val ub = PriorityEncoder(u.uMask) val ub = PriorityEncoder(u.uMask)
val idx = if2_idx val idx = if2_idx
val tag = if2_tag val tag = if2_tag
XSDebug(io.req.valid, "tableReq: pc=0x%x, hist=%x, idx=%d, tag=%x, mask=%b, mask=%b\n", XSDebug(io.req.valid,
io.req.bits.pc, io.req.bits.hist, idx, tag, io.req.bits.mask, if2_mask) p"tableReq: pc=0x${Hexadecimal(io.req.bits.pc)}, " +
p"hist=${Hexadecimal(io.req.bits.hist)}, idx=$idx, " +
p"tag=$tag, mask=${Binary(if2_mask)}\n")
for (i <- 0 until TageBanks) { for (i <- 0 until TageBanks) {
XSDebug(RegNext(io.req.valid) && if3_req_rhits(i), "TageTableResp[%d]: idx=%d, hit:%d, ctr:%d, u:%d\n", XSDebug(RegNext(io.req.valid && io.req.bits.mask(i)) && if3_req_rhits(i),
i.U, if3_idx, if3_req_rhits(i), io.resp(i).bits.ctr, io.resp(i).bits.u) p"TageTableResp[$i]: idx=$if3_idx, hit:${if3_req_rhits(i)}, " +
p"ctr:${io.resp(i).bits.ctr}, u:${io.resp(i).bits.u}\n")
XSDebug(io.update.mask(i),
p"update Table bank $i: pc:${Hexadecimal(u.pc)}, hist:${Hexadecimal(u.hist)}, " +
p"taken:${u.taken(i)}, alloc:${u.alloc(i)}, oldCtr:${u.oldCtr(i)}\n")
XSDebug(io.update.mask(i),
p"update Table bank $i: writing tag:${update_tag}, " +
p"ctr: ${update_wdata(b).ctr} in idx $update_idx\n")
val hitCtr = wrbypass_ctrs(wrbypass_hit_idx)(i)
XSDebug(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(i) && io.update.mask(i),
p"bank $i wrbypass hit wridx:$wrbypass_hit_idx, idx:$update_idx, tag: $update_tag, " +
p"ctr:$hitCtr, newCtr:${update_wdata(i).ctr}")
} }
XSDebug(RegNext(io.req.valid), "TageTableResp: hits:%b, maskLatch is %b\n", if3_req_rhits.asUInt, if3_mask)
XSDebug(RegNext(io.req.valid) && !if3_req_rhits.reduce(_||_), "TageTableResp: no hits!\n") XSDebug(RegNext(io.req.valid) && !if3_req_rhits.reduce(_||_), "TageTableResp: no hits!\n")
XSDebug(io.update.mask.reduce(_||_), "update Table: pc:%x, hist:%x, bank:%d, taken:%d, alloc:%d, oldCtr:%d\n",
u.pc, u.hist, b, u.taken(b), u.alloc(b), u.oldCtr(b))
XSDebug(io.update.mask.reduce(_||_), "update Table: writing tag:%b, ctr%d in idx:%d\n",
update_wdata(b).tag, update_wdata(b).ctr, update_idx)
XSDebug(io.update.mask.reduce(_||_), "update u: pc:%x, hist:%x, bank:%d, writing in u:%b\n",
u.pc, u.hist, ub, io.update.u(ub))
val updateBank = PriorityEncoder(io.update.mask)
XSDebug(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(updateBank),
"wrbypass hits, wridx:%d, tag:%x, idx:%d, hitctr:%d, bank:%d\n",
wrbypass_hit_idx, update_tag, update_idx, wrbypass_ctrs(wrbypass_hit_idx)(updateBank), updateBank)
// when (wrbypass_rhit && wrbypass_ctr_valids(wrbypass_rhit_idx).reduce(_||_)) { // when (wrbypass_rhit && wrbypass_ctr_valids(wrbypass_rhit_idx).reduce(_||_)) {
// for (b <- 0 until TageBanks) { // for (b <- 0 until TageBanks) {
// XSDebug(wrbypass_ctr_valids(wrbypass_rhit_idx)(b), // XSDebug(wrbypass_ctr_valids(wrbypass_rhit_idx)(b),
...@@ -367,10 +379,12 @@ class Tage extends BaseTage { ...@@ -367,10 +379,12 @@ class Tage extends BaseTage {
val debug_hist_s3 = RegEnable(debug_hist_s2, enable=s3_fire) val debug_hist_s3 = RegEnable(debug_hist_s2, enable=s3_fire)
val u = io.update.bits val u = io.update.bits
val updateValids = u.valids.map(v => v && io.update.valid) val updateValids =
VecInit(u.valids zip u.br_mask map {
case (v, b) => v && b && io.update.valid
})
val updateHist = u.predHist.asUInt val updateHist = u.predHist.asUInt
val updateBrMask = u.br_mask
val updateMetas = VecInit(u.metas.map(_.tageMeta)) val updateMetas = VecInit(u.metas.map(_.tageMeta))
val updateMisPred = u.mispred val updateMisPred = u.mispred
...@@ -400,7 +414,7 @@ class Tage extends BaseTage { ...@@ -400,7 +414,7 @@ class Tage extends BaseTage {
val if4_providerCtrs = RegEnable(if3_providerCtrs, s3_fire) val if4_providerCtrs = RegEnable(if3_providerCtrs, s3_fire)
val updateTageMisPreds = VecInit((0 until PredictWidth).map(i => updateMetas(i).taken =/= u.takens(i) && updateBrMask(i))) val updateTageMisPreds = VecInit((0 until PredictWidth).map(i => updateMetas(i).taken =/= u.takens(i)))
// val updateBank = u.pc(log2Ceil(TageBanks)+instOffsetBits-1, instOffsetBits) // val updateBank = u.pc(log2Ceil(TageBanks)+instOffsetBits-1, instOffsetBits)
...@@ -454,10 +468,9 @@ class Tage extends BaseTage { ...@@ -454,10 +468,9 @@ class Tage extends BaseTage {
val updateValid = updateValids(w) val updateValid = updateValids(w)
val updateMeta = updateMetas(w) val updateMeta = updateMetas(w)
val updateIsBr = updateBrMask(w) val isUpdateTaken = updateValid && u.takens(w)
val isUpdateTaken = updateValid && u.takens(w) && updateIsBr
val updateMisPred = updateTageMisPreds(w) val updateMisPred = updateTageMisPreds(w)
when (updateValid && updateIsBr) { when (updateValid) {
when (updateMeta.provider.valid) { when (updateMeta.provider.valid) {
val provider = updateMeta.provider.bits val provider = updateMeta.provider.bits
...@@ -482,6 +495,7 @@ class Tage extends BaseTage { ...@@ -482,6 +495,7 @@ class Tage extends BaseTage {
updateUMask(allocate.bits)(w) := true.B updateUMask(allocate.bits)(w) := true.B
updateU(allocate.bits)(w) := 0.U updateU(allocate.bits)(w) := 0.U
}.otherwise { }.otherwise {
val provider = updateMeta.provider val provider = updateMeta.provider
val decrMask = Mux(provider.valid, ~LowerMask(UIntToOH(provider.bits), TageNTables), 0.U(TageNTables.W)) val decrMask = Mux(provider.valid, ~LowerMask(UIntToOH(provider.bits), TageNTables), 0.U(TageNTables.W))
for (i <- 0 until TageNTables) { for (i <- 0 until TageNTables) {
...@@ -510,6 +524,41 @@ class Tage extends BaseTage { ...@@ -510,6 +524,41 @@ class Tage extends BaseTage {
} }
def pred_perf(name: String, cnt: UInt) = XSPerf(s"${name}_at_pred", cnt)
def commit_perf(name: String, cnt: UInt) = XSPerf(s"${name}_at_commit", cnt)
def tage_perf(name: String, pred_cnt: UInt, commit_cnt: UInt) = {
pred_perf(name, pred_cnt)
commit_perf(name, commit_cnt)
}
for (i <- 0 until TageNTables) {
val pred_i_provided =
VecInit(io.meta map (m => m.provider.valid && m.provider.bits === i.U))
val commit_i_provided =
VecInit(updateMetas zip updateValids map {
case (m, v) => m.provider.valid && m.provider.bits === i.U && v
})
tage_perf(s"tage_table_${i}_provided",
PopCount(pred_i_provided),
PopCount(commit_i_provided))
}
tage_perf("tage_use_bim",
PopCount(VecInit(io.meta map (!_.provider.valid))),
PopCount(VecInit(updateMetas zip updateValids map {
case (m, v) => !m.provider.valid && v}))
)
def unconf(providerCtr: UInt) = providerCtr === 3.U || providerCtr === 4.U
tage_perf("tage_use_altpred",
PopCount(VecInit(io.meta map (
m => m.provider.valid && unconf(m.providerCtr)))),
PopCount(VecInit(updateMetas zip updateValids map {
case (m, v) => m.provider.valid && unconf(m.providerCtr) && v
})))
tage_perf("tage_provided",
PopCount(io.meta.map(_.provider.valid)),
PopCount(VecInit(updateMetas zip updateValids map {
case (m, v) => m.provider.valid && v
})))
if (BPUDebug && debug) { if (BPUDebug && debug) {
for (b <- 0 until TageBanks) { for (b <- 0 until TageBanks) {
val m = updateMetas(b) val m = updateMetas(b)
......
...@@ -180,7 +180,7 @@ class MicroBTB extends BasePredictor ...@@ -180,7 +180,7 @@ class MicroBTB extends BasePredictor
val read_resps = VecInit(banks.map(b => b.read_resp)) val read_resps = VecInit(banks.map(b => b.read_resp))
for (b <- 0 until PredictWidth) { for (b <- 0 until PredictWidth) {
banks(b).read_pc.valid := io.pc.valid && io.inMask(b) banks(b).read_pc.valid := io.inMask(b)
banks(b).read_pc.bits := io.pc.bits banks(b).read_pc.bits := io.pc.bits
//only when hit and instruction valid and entry valid can output data //only when hit and instruction valid and entry valid can output data
......
...@@ -40,6 +40,7 @@ class LsPipelineBundle extends XSBundle { ...@@ -40,6 +40,7 @@ class LsPipelineBundle extends XSBundle {
val miss = Bool() val miss = Bool()
val tlbMiss = Bool() val tlbMiss = Bool()
val ptwBack = Bool()
val mmio = Bool() val mmio = Bool()
val rsIdx = UInt(log2Up(IssQueSize).W) val rsIdx = UInt(log2Up(IssQueSize).W)
......
...@@ -110,7 +110,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue ...@@ -110,7 +110,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
} }
// no inst will be commited 1 cycle before tval update // no inst will be commited 1 cycle before tval update
vaddrModule.io.raddr(0) := (cmtPtrExt(0) + commitCount).value vaddrModule.io.raddr(0) := (cmtPtrExt(0) + commitCount).value
/** /**
* Enqueue at dispatch * Enqueue at dispatch
...@@ -150,21 +150,12 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue ...@@ -150,21 +150,12 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val IssuePtrMoveStride = 4 val IssuePtrMoveStride = 4
require(IssuePtrMoveStride >= 2) require(IssuePtrMoveStride >= 2)
val issueLookup = Wire(Vec(IssuePtrMoveStride, Bool())) val issueLookupVec = (0 until IssuePtrMoveStride).map(issuePtrExt + _.U)
for (i <- 0 until IssuePtrMoveStride) { val issueLookup = issueLookupVec.map(ptr => allocated(ptr.value) && issued(ptr.value) && ptr =/= enqPtrExt(0))
val lookUpPtr = issuePtrExt.value + i.U val nextIssuePtr = issuePtrExt + PriorityEncoder(VecInit(issueLookup.map(!_) :+ true.B))
if(i == 0){ issuePtrExt := nextIssuePtr
issueLookup(i) := allocated(lookUpPtr) && issued(lookUpPtr)
}else{
issueLookup(i) := allocated(lookUpPtr) && issued(lookUpPtr) && issueLookup(i-1)
}
when(issueLookup(i)){ when (io.brqRedirect.valid || io.flush) {
issuePtrExt := issuePtrExt + (i+1).U
}
}
when(io.brqRedirect.valid || io.flush){
issuePtrExt := Mux( issuePtrExt := Mux(
isAfter(cmtPtrExt(0), deqPtrExt(0)), isAfter(cmtPtrExt(0), deqPtrExt(0)),
cmtPtrExt(0), cmtPtrExt(0),
......
...@@ -88,6 +88,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{ ...@@ -88,6 +88,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
io.tlbFeedback.valid := RegNext(RegNext(io.in.valid)) io.tlbFeedback.valid := RegNext(RegNext(io.in.valid))
io.tlbFeedback.bits.hit := true.B io.tlbFeedback.bits.hit := true.B
io.tlbFeedback.bits.rsIdx := RegEnable(io.rsIdx, io.in.valid) io.tlbFeedback.bits.rsIdx := RegEnable(io.rsIdx, io.in.valid)
io.tlbFeedback.bits.flushState := DontCare
// tlb translation, manipulating signals && deal with exception // tlb translation, manipulating signals && deal with exception
when (state === s_tlb) { when (state === s_tlb) {
......
...@@ -33,7 +33,7 @@ class LoadUnit_S0 extends XSModule { ...@@ -33,7 +33,7 @@ class LoadUnit_S0 extends XSModule {
// val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0)) // val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0))
val imm12 = WireInit(s0_uop.ctrl.imm(11,0)) val imm12 = WireInit(s0_uop.ctrl.imm(11,0))
val s0_vaddr_lo = io.in.bits.src1(11,0) + Cat(0.U(1.W), imm12) val s0_vaddr_lo = io.in.bits.src1(11,0) + Cat(0.U(1.W), imm12)
val s0_vaddr_hi = Mux(s0_vaddr_lo(12), val s0_vaddr_hi = Mux(s0_vaddr_lo(12),
Mux(imm12(11), io.in.bits.src1(VAddrBits-1, 12), io.in.bits.src1(VAddrBits-1, 12)+1.U), Mux(imm12(11), io.in.bits.src1(VAddrBits-1, 12), io.in.bits.src1(VAddrBits-1, 12)+1.U),
Mux(imm12(11), io.in.bits.src1(VAddrBits-1, 12)+SignExt(1.U, VAddrBits-12), io.in.bits.src1(VAddrBits-1, 12)), Mux(imm12(11), io.in.bits.src1(VAddrBits-1, 12)+SignExt(1.U, VAddrBits-12), io.in.bits.src1(VAddrBits-1, 12)),
) )
...@@ -131,6 +131,7 @@ class LoadUnit_S1 extends XSModule { ...@@ -131,6 +131,7 @@ class LoadUnit_S1 extends XSModule {
io.out.bits.tlbMiss := s1_tlb_miss io.out.bits.tlbMiss := s1_tlb_miss
io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
io.out.bits.uop.cf.exceptionVec(loadAccessFault) := io.dtlbResp.bits.excp.af.ld io.out.bits.uop.cf.exceptionVec(loadAccessFault) := io.dtlbResp.bits.excp.af.ld
io.out.bits.ptwBack := io.dtlbResp.bits.ptwBack
io.out.bits.rsIdx := io.in.bits.rsIdx io.out.bits.rsIdx := io.in.bits.rsIdx
io.in.ready := !io.in.valid || io.out.ready io.in.ready := !io.in.valid || io.out.ready
...@@ -169,6 +170,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper { ...@@ -169,6 +170,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
io.tlbFeedback.valid := io.in.valid io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s2_tlb_miss && (!s2_cache_replay || s2_mmio || s2_exception) io.tlbFeedback.bits.hit := !s2_tlb_miss && (!s2_cache_replay || s2_mmio || s2_exception)
io.tlbFeedback.bits.rsIdx := io.in.bits.rsIdx io.tlbFeedback.bits.rsIdx := io.in.bits.rsIdx
io.tlbFeedback.bits.flushState := io.in.bits.ptwBack
io.needReplayFromRS := s2_cache_replay io.needReplayFromRS := s2_cache_replay
// merge forward result // merge forward result
...@@ -225,7 +227,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper { ...@@ -225,7 +227,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
// Such inst will be writebacked from load queue. // Such inst will be writebacked from load queue.
io.dataForwarded := s2_cache_miss && fullForward && !s2_exception io.dataForwarded := s2_cache_miss && fullForward && !s2_exception
// io.out.bits.forwardX will be send to lq // io.out.bits.forwardX will be send to lq
io.out.bits.forwardMask := forwardMask io.out.bits.forwardMask := forwardMask
// data retbrived from dcache is also included in io.out.bits.forwardData // data retbrived from dcache is also included in io.out.bits.forwardData
io.out.bits.forwardData := rdataVec io.out.bits.forwardData := rdataVec
......
...@@ -78,6 +78,7 @@ class StoreUnit_S1 extends XSModule { ...@@ -78,6 +78,7 @@ class StoreUnit_S1 extends XSModule {
// Send TLB feedback to store issue queue // Send TLB feedback to store issue queue
io.tlbFeedback.valid := io.in.valid io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s1_tlb_miss io.tlbFeedback.bits.hit := !s1_tlb_miss
io.tlbFeedback.bits.flushState := io.dtlbResp.bits.ptwBack
io.tlbFeedback.bits.rsIdx := io.in.bits.rsIdx io.tlbFeedback.bits.rsIdx := io.in.bits.rsIdx
XSDebug(io.tlbFeedback.valid, XSDebug(io.tlbFeedback.valid,
"S1 Store: tlbHit: %d roqIdx: %d\n", "S1 Store: tlbHit: %d roqIdx: %d\n",
......
...@@ -14,15 +14,13 @@ class SbufferFlushBundle extends Bundle { ...@@ -14,15 +14,13 @@ class SbufferFlushBundle extends Bundle {
trait HasSbufferConst extends HasXSParameter { trait HasSbufferConst extends HasXSParameter {
// use 1h to speedup selection // use 1h to speedup selection
def s_invalid = (1<<0).U(4.W) def s_invalid = (1<<0).U(3.W)
def s_valid = (1<<1).U(4.W) def s_valid = (1<<1).U(3.W)
def s_prepare = (1<<2).U(4.W) def s_inflight = (1<<2).U(3.W)
def s_inflight = (1<<3).U(4.W)
def isInvalid(i: UInt): Bool = i(0).asBool def isInvalid(i: UInt): Bool = i(0).asBool
def isValid(i: UInt): Bool = i(1).asBool def isValid(i: UInt): Bool = i(1).asBool
def isPrepare(i: UInt): Bool = i(2).asBool def isInflight(i: UInt): Bool = i(2).asBool
def isInflight(i: UInt): Bool = i(3).asBool
val evictCycle = 1 << 20 val evictCycle = 1 << 20
require(isPow2(evictCycle)) require(isPow2(evictCycle))
...@@ -35,84 +33,40 @@ trait HasSbufferConst extends HasXSParameter { ...@@ -35,84 +33,40 @@ trait HasSbufferConst extends HasXSParameter {
val OffsetWidth: Int = log2Up(CacheLineBytes) val OffsetWidth: Int = log2Up(CacheLineBytes)
val WordsWidth: Int = log2Up(CacheLineWords) val WordsWidth: Int = log2Up(CacheLineWords)
val TagWidth: Int = PAddrBits - OffsetWidth val TagWidth: Int = PAddrBits - OffsetWidth
val WordOffsetWidth: Int = PAddrBits - WordsWidth
} }
class SbufferBundle extends XSBundle with HasSbufferConst class SbufferBundle extends XSBundle with HasSbufferConst
class SbufferLine extends SbufferBundle { class DataWriteReq extends SbufferBundle {
val tag = UInt(TagWidth.W) val idx = UInt(SbufferIndexWidth.W)
val data = UInt(CacheLineSize.W) val mask = UInt((DataBits/8).W)
val mask = UInt(CacheLineBytes.W) val data = UInt(DataBits.W)
val wordOffset = UInt(WordOffsetWidth.W)
override def toPrintable: Printable = {
p"tag:${Hexadecimal(tag)} data:${Hexadecimal(data)} mask:${Binary(mask)}\n"
}
}
class ChooseReplace(nWay: Int) extends XSModule {
val io = IO(new Bundle{
val mask = Vec(nWay, Input(Bool()))
val way = Output(UInt(nWay.W))
val flush = Input(Bool())
})
val wayReg = RegInit(0.U(log2Up(nWay).W))
val wayMask = ~((UIntToOH(wayReg)<<1.U)(nWay-1,0) - 1.U)
val stateMask = Cat(io.mask.reverse)
val loMask = (wayMask & stateMask)(nWay-1,0)
val nextWay = PriorityEncoder(Cat(stateMask, loMask))(log2Up(nWay)-1, 0)
XSDebug(p"nextWay[${nextWay}]\n")
wayReg := nextWay
io.way := wayReg
when(io.flush){
wayReg := 0.U
}
} }
class SbufferLru(nWay: Int) extends XSModule { class SbufferData extends XSModule with HasSbufferConst {
val io = IO(new Bundle{ val io = IO(new Bundle(){
val in = Vec(StorePipelineWidth, Input(UInt(nWay.W))) val writeReq = Vec(StorePipelineWidth, Flipped(ValidIO(new DataWriteReq)))
val mask = Vec(StoreBufferSize, Input(Bool())) val dataOut = Output(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
val way = Output(UInt(nWay.W))
val flush = Input(Bool())
}) })
val lruRect = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(nWay.W)))) val data = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
val count = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(log2Up(nWay+1).W))))
val idx = RegInit(VecInit(Seq.tabulate(StoreBufferSize)(i => i.U)))
//update
val updataMask = ParallelOR(io.in)
val updateValue = (~updataMask).asUInt()
for(i <- 0 until nWay){
val lruUpdate = Mux(updataMask(i), updateValue, lruRect(i) & updateValue)
lruRect(i) := lruUpdate
count(i) := PopCount(lruUpdate)
}
// get evictionIdx
val maskCount = Wire(Vec(StoreBufferSize, UInt((log2Up(1 + nWay) + log2Up(nWay)).W))) // (popcount, Idx)
val countZipIdx = maskCount.zip((0 until nWay).map(_.U))
for(i <- 0 until nWay){
val value = Mux(io.mask(i), count(i), nWay.U)
maskCount(i) := Cat(value, idx(i))
}
io.way := ParallelMin(maskCount)(log2Up(nWay)-1,0) val req = io.writeReq
// flush for(i <- 0 until StorePipelineWidth) {
when(io.flush){ when(req(i).valid){
for(i <- 0 until nWay){ for(j <- 0 until DataBytes){
lruRect(i) := 0.U when(req(i).bits.mask(j)){
count(i) := nWay.U data(req(i).bits.idx)(req(i).bits.wordOffset)(j) := req(i).bits.data(j*8+7, j*8)
}
}
} }
XSDebug("drain sbuffer finish, flush lru\n")
} }
}
io.dataOut := data
}
class NewSbuffer extends XSModule with HasSbufferConst { class NewSbuffer extends XSModule with HasSbufferConst {
val io = IO(new Bundle() { val io = IO(new Bundle() {
...@@ -121,6 +75,7 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -121,6 +75,7 @@ class NewSbuffer extends XSModule with HasSbufferConst {
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val sqempty = Input(Bool()) val sqempty = Input(Bool())
val flush = Flipped(new SbufferFlushBundle) val flush = Flipped(new SbufferFlushBundle)
val csrCtrl = Flipped(new CustomCSRCtrlIO)
}) })
val difftestIO = IO(new Bundle() { val difftestIO = IO(new Bundle() {
val sbufferResp = Output(Bool()) val sbufferResp = Output(Bool())
...@@ -130,10 +85,13 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -130,10 +85,13 @@ class NewSbuffer extends XSModule with HasSbufferConst {
}) })
difftestIO <> DontCare difftestIO <> DontCare
val buffer = Mem(StoreBufferSize, new SbufferLine) val dataModule = Module(new SbufferData)
dataModule.io.writeReq <> DontCare
val writeReq = dataModule.io.writeReq
val tag = Reg(Vec(StoreBufferSize, UInt(TagWidth.W))) val tag = Reg(Vec(StoreBufferSize, UInt(TagWidth.W)))
val mask = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, Bool())))) val mask = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, Bool()))))
val data = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) // TODO: will be replaced by SyncDataModuleTemplate val data = dataModule.io.dataOut
val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(s_invalid))) val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(s_invalid)))
val cohCount = Reg(Vec(StoreBufferSize, UInt(countBits.W))) val cohCount = Reg(Vec(StoreBufferSize, UInt(countBits.W)))
/* /*
...@@ -166,14 +124,24 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -166,14 +124,24 @@ class NewSbuffer extends XSModule with HasSbufferConst {
def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f
// sbuffer entry count // sbuffer entry count
val invalidCount = RegInit(StoreBufferSize.U((log2Up(StoreBufferSize) + 1).W))
val validCount = RegInit(0.U((log2Up(StoreBufferSize) + 1).W))
val full = invalidCount === 0.U // full = TODO: validCount(log2Up(StoreBufferSize))
val lru = Module(new ChooseReplace(StoreBufferSize)) val plru = new PseudoLRU(StoreBufferSize)
val evictionIdx = lru.io.way val accessIdx = Wire(Vec(StorePipelineWidth + 1, Valid(UInt(SbufferIndexWidth.W))))
val replaceIdx = plru.way
plru.access(accessIdx)
//-------------------------cohCount-----------------------------
// insert and merge: cohCount=0
// every cycle cohCount+=1
// if cohCount(countBits-1)==1, evict
val timeOutMask = VecInit(widthMap(i => cohCount(i)(countBits - 1)))
val (timeOutIdx, hasTimeOut) = PriorityEncoderWithFlag(timeOutMask)
val validMask = VecInit(stateVec.map(s => isValid(s)))
val drainIdx = PriorityEncoder(validMask)
lru.io.mask := stateVec.map(isValid(_)) val inflightMask = VecInit(stateVec.map(s => isInflight(s)))
val intags = io.in.map(in => getTag(in.bits.addr)) val intags = io.in.map(in => getTag(in.bits.addr))
val sameTag = intags(0) === intags(1) val sameTag = intags(0) === intags(1)
...@@ -181,7 +149,6 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -181,7 +149,6 @@ class NewSbuffer extends XSModule with HasSbufferConst {
val secondWord = getWord(io.in(1).bits.addr) val secondWord = getWord(io.in(1).bits.addr)
val sameWord = firstWord === secondWord val sameWord = firstWord === secondWord
// merge condition // merge condition
val mergeMask = Wire(Vec(StorePipelineWidth, Vec(StoreBufferSize, Bool()))) val mergeMask = Wire(Vec(StorePipelineWidth, Vec(StoreBufferSize, Bool())))
val mergeIdx = mergeMask.map(PriorityEncoder(_)) val mergeIdx = mergeMask.map(PriorityEncoder(_))
...@@ -189,16 +156,17 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -189,16 +156,17 @@ class NewSbuffer extends XSModule with HasSbufferConst {
for(i <- 0 until StorePipelineWidth){ for(i <- 0 until StorePipelineWidth){
mergeMask(i) := widthMap(j => mergeMask(i) := widthMap(j =>
intags(i) === tag(j) && isValid(stateVec(j)) intags(i) === tag(j) && validMask(j)
) )
} }
// insert confition // insert condition
// firstInsert: the first invalid entry // firstInsert: the first invalid entry
// if first entry canMerge or second entry has the same tag with the first entry , secondInsert equal the first invalid entry, otherwise, the second invalid entry // if first entry canMerge or second entry has the same tag with the first entry,
val invalidMask = stateVec.map(s => isInvalid(s)) // secondInsert equal the first invalid entry, otherwise, the second invalid entry
val evenInvalidMask = GetEvenBits(VecInit(invalidMask).asUInt) val invalidMask = VecInit(stateVec.map(s => isInvalid(s)))
val oddInvalidMask = GetOddBits(VecInit(invalidMask).asUInt) val evenInvalidMask = GetEvenBits(invalidMask.asUInt)
val oddInvalidMask = GetOddBits(invalidMask.asUInt)
val (evenRawInsertIdx, evenCanInsert) = PriorityEncoderWithFlag(evenInvalidMask) val (evenRawInsertIdx, evenCanInsert) = PriorityEncoderWithFlag(evenInvalidMask)
val (oddRawInsertIdx, oddCanInsert) = PriorityEncoderWithFlag(oddInvalidMask) val (oddRawInsertIdx, oddCanInsert) = PriorityEncoderWithFlag(oddInvalidMask)
...@@ -228,7 +196,6 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -228,7 +196,6 @@ class NewSbuffer extends XSModule with HasSbufferConst {
stateVec(insertIdx) := s_valid stateVec(insertIdx) := s_valid
cohCount(insertIdx) := 0.U cohCount(insertIdx) := 0.U
tag(insertIdx) := reqtag tag(insertIdx) := reqtag
when(flushMask){ when(flushMask){
for(j <- 0 until CacheLineWords){ for(j <- 0 until CacheLineWords){
for(i <- 0 until DataBytes){ for(i <- 0 until DataBytes){
...@@ -236,11 +203,10 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -236,11 +203,10 @@ class NewSbuffer extends XSModule with HasSbufferConst {
} }
} }
} }
for(i <- 0 until DataBytes){ for(i <- 0 until DataBytes){
when(req.mask(i)){ when(req.mask(i)){
mask(insertIdx)(wordOffset)(i) := true.B mask(insertIdx)(wordOffset)(i) := true.B
data(insertIdx)(wordOffset)(i) := req.data(i*8+7, i*8) // data(insertIdx)(wordOffset)(i) := req.data(i*8+7, i*8)
} }
} }
} }
...@@ -250,32 +216,33 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -250,32 +216,33 @@ class NewSbuffer extends XSModule with HasSbufferConst {
for(i <- 0 until DataBytes){ for(i <- 0 until DataBytes){
when(req.mask(i)){ when(req.mask(i)){
mask(mergeIdx)(wordOffset)(i) := true.B mask(mergeIdx)(wordOffset)(i) := true.B
data(mergeIdx)(wordOffset)(i) := req.data(i*8+7, i*8) // data(mergeIdx)(wordOffset)(i) := req.data(i*8+7, i*8)
} }
} }
} }
// first store for(((in, wordOffset), i) <- io.in.zip(Seq(firstWord, secondWord)).zipWithIndex){
when(io.in(0).fire()){ writeReq(i).valid := in.fire()
when(canMerge(0)){ writeReq(i).bits.wordOffset := wordOffset
mergeWordReq(io.in(0).bits, mergeIdx(0), firstWord) writeReq(i).bits.mask := in.bits.mask
XSDebug(p"merge req 0 to line [${mergeIdx(0)}]\n") writeReq(i).bits.data := in.bits.data
}.otherwise{ val insertIdx = if(i == 0) firstInsertIdx else secondInsertIdx
wordReqToBufLine(io.in(0).bits, intags(0), firstInsertIdx, firstWord, true.B) val flushMask = if(i == 0) true.B else !sameTag
XSDebug(p"insert req 0 to line[$firstInsertIdx]\n") accessIdx(i).valid := RegNext(in.fire())
accessIdx(i).bits := RegNext(Mux(canMerge(i), mergeIdx(i), insertIdx))
when(in.fire()){
when(canMerge(i)){
writeReq(i).bits.idx := mergeIdx(i)
mergeWordReq(in.bits, mergeIdx(i), wordOffset)
XSDebug(p"merge req $i to line [${mergeIdx(i)}]\n")
}.otherwise({
writeReq(i).bits.idx := insertIdx
wordReqToBufLine(in.bits, intags(i), insertIdx, wordOffset, flushMask)
XSDebug(p"insert req $i to line[$insertIdx]\n")
})
} }
} }
// second store
when(io.in(1).fire()){
when(canMerge(1)){
mergeWordReq(io.in(1).bits, mergeIdx(1), secondWord)
XSDebug(p"merge req 1 to line [${mergeIdx(1)}]\n")
}.otherwise{
wordReqToBufLine(io.in(1).bits, intags(1), secondInsertIdx, secondWord, !sameTag)
XSDebug(p"insert req 1 to line[$secondInsertIdx]\n")
}
}
for(i <- 0 until StoreBufferSize){ for(i <- 0 until StoreBufferSize){
XSDebug(stateVec(i)=/=s_invalid, XSDebug(stateVec(i)=/=s_invalid,
...@@ -295,16 +262,17 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -295,16 +262,17 @@ class NewSbuffer extends XSModule with HasSbufferConst {
) )
} }
// ---------------------- Send Dcache Req --------------------- // ---------------------- Send Dcache Req ---------------------
val do_eviction = Wire(Bool()) val empty = Cat(invalidMask).andR() && !Cat(io.in.map(_.valid)).orR()
val empty = Cat(stateVec.map(s => isInvalid(s))).andR() && !Cat(io.in.map(_.valid)).orR() val threshold = RegNext(io.csrCtrl.sbuffer_threshold +& 1.U)
val validCount = PopCount(validMask)
val do_eviction = RegNext(validCount >= threshold, init = false.B)
do_eviction := validCount >= 12.U XSDebug(p"validCount[$validCount]\n")
io.flush.empty := RegNext(empty && io.sqempty) io.flush.empty := RegNext(empty && io.sqempty)
lru.io.flush := sbuffer_state === x_drain_sbuffer && empty // lru.io.flush := sbuffer_state === x_drain_sbuffer && empty
switch(sbuffer_state){ switch(sbuffer_state){
is(x_idle){ is(x_idle){
when(io.flush.valid){ when(io.flush.valid){
...@@ -329,59 +297,56 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -329,59 +297,56 @@ class NewSbuffer extends XSModule with HasSbufferConst {
XSDebug(p"sbuffer state:${sbuffer_state} do eviction:${do_eviction} empty:${empty}\n") XSDebug(p"sbuffer state:${sbuffer_state} do eviction:${do_eviction} empty:${empty}\n")
def noSameBlockInflight(idx: UInt): Bool = { def noSameBlockInflight(idx: UInt): Bool = {
val atag = tag(idx) // stateVec(idx) itself must not be s_inflight
!Cat(widthMap(i => { !Cat(widthMap(i => inflightMask(i) && tag(idx) === tag(i))).orR()
// stateVec(idx) itself must not be s_inflight*
(isInflight(stateVec(i)) || isPrepare(stateVec(i))) &&
atag === tag(i)
})).orR()
} }
val need_drain = sbuffer_state === x_drain_sbuffer
val need_replace = do_eviction || (sbuffer_state === x_replace)
val evictionIdx = Mux(need_drain,
drainIdx,
Mux(hasTimeOut, timeOutIdx, replaceIdx)
)
/* /*
If there is a inflight dcache req which has same tag with evictionIdx's tag, If there is a inflight dcache req which has same tag with evictionIdx's tag,
current eviction should be blocked. current eviction should be blocked.
*/ */
// val evictionEntry = Wire(DecoupledIO(UInt(SbufferIndexWidth.W))) val prepareValid = (need_drain || hasTimeOut || need_replace) &&
// noSameBlockInflight(evictionIdx) && validMask(evictionIdx)
// evictionEntry.valid := val prepareValidReg = RegInit(false.B)
// do_eviction && sbuffer_state === x_replace || sbuffer_state === x_drain_sbuffer && val canSendDcacheReq = io.dcache.req.ready || !prepareValidReg
// stateVec(evictionIdx)===s_valid && val willSendDcacheReq = prepareValid && canSendDcacheReq
// noSameBlockInflight(evictionIdx)
//
// evictionEntry.bits := evictionIdx
val prepareValid = ((do_eviction && sbuffer_state === x_replace) || (sbuffer_state === x_drain_sbuffer)) &&
isValid(stateVec(evictionIdx)) &&
noSameBlockInflight(evictionIdx)
when(prepareValid){
stateVec(evictionIdx) := s_prepare
}
val prepareMask = stateVec.map(s => isPrepare(s))
val (prepareIdx, prepareEn) = PriorityEncoderWithFlag(prepareMask)
val dcacheReqValid = RegInit(false.B)
val dcacheCandidate = Reg(new DCacheLineReq)
when(io.dcache.req.fire()){ when(io.dcache.req.fire()){
dcacheReqValid := false.B prepareValidReg := false.B
} }
when(prepareEn && (!dcacheReqValid || io.dcache.req.fire())) { when(canSendDcacheReq){
dcacheCandidate.addr := getAddr(tag(prepareIdx)) prepareValidReg := prepareValid
dcacheCandidate.data := data(prepareIdx).asUInt
dcacheCandidate.mask := mask(prepareIdx).asUInt
dcacheCandidate.cmd := MemoryOpConstants.M_XWR
dcacheCandidate.id := prepareIdx
stateVec(prepareIdx) := s_inflight
dcacheReqValid := true.B
} }
when(willSendDcacheReq){
io.dcache.req.valid := dcacheReqValid stateVec(evictionIdx) := s_inflight
io.dcache.req.bits := dcacheCandidate XSDebug(p"$evictionIdx will be sent to Dcache\n")
// evictionEntry.ready := io.dcache.req.ready }
XSDebug(p"need drain:$need_drain hasTimeOut: $hasTimeOut need replace:$need_replace\n")
XSDebug(p"drainIdx:$drainIdx tIdx:$timeOutIdx replIdx:$replaceIdx " +
p"blocked:${!noSameBlockInflight(evictionIdx)} v:${validMask(evictionIdx)}\n")
XSDebug(p"prepareValid:$prepareValid evictIdx:$evictionIdx dcache ready:${io.dcache.req.ready}\n")
// Note: if other dcache req in the same block are inflight,
// the lru update may note accurate
accessIdx(StorePipelineWidth).valid := invalidMask(replaceIdx) || (
need_replace && !need_drain && !hasTimeOut && canSendDcacheReq && validMask(replaceIdx))
accessIdx(StorePipelineWidth).bits := replaceIdx
val evictionIdxReg = RegEnable(evictionIdx, enable = willSendDcacheReq)
val evictionTag = RegEnable(tag(evictionIdx), enable = willSendDcacheReq)
io.dcache.req.valid := prepareValidReg
io.dcache.req.bits.addr := getAddr(evictionTag)
io.dcache.req.bits.data := data(evictionIdxReg).asUInt
io.dcache.req.bits.mask := mask(evictionIdxReg).asUInt
io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR
io.dcache.req.bits.id := evictionIdxReg
XSDebug(io.dcache.req.fire(), XSDebug(io.dcache.req.fire(),
p"send buf [$prepareIdx] to Dcache, req fire\n" p"send buf [$evictionIdxReg] to Dcache, req fire\n"
) )
io.dcache.resp.ready := true.B // sbuffer always ready to recv dcache resp io.dcache.resp.ready := true.B // sbuffer always ready to recv dcache resp
...@@ -399,23 +364,8 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -399,23 +364,8 @@ class NewSbuffer extends XSModule with HasSbufferConst {
difftestIO.sbufferMask := WireInit(mask(respId).asUInt) difftestIO.sbufferMask := WireInit(mask(respId).asUInt)
} }
val needSpace = (io.in(0).fire && !canMerge(0)) +& (io.in(1).fire && !canMerge(1) && !sameTag)
invalidCount := invalidCount - needSpace + io.dcache.resp.fire()
validCount := validCount + needSpace - prepareValid
XSDebug(p"needSpace[$needSpace] invalidCount[$invalidCount] validCount[$validCount]\n")
//-------------------------cohCount-----------------------------
// insert and merge: cohCount=0
// every cycle cohCount+=1
// if cohCount(countBits-1)==1,evict
for(i <- 0 until StoreBufferSize){ for(i <- 0 until StoreBufferSize){
when(isValid(stateVec(i))){ when(validMask(i) && !timeOutMask(i)){
when(cohCount(i)(countBits-1)){
assert(stateVec(i) === s_valid)
stateVec(i) := s_prepare
}
cohCount(i) := cohCount(i)+1.U cohCount(i) := cohCount(i)+1.U
} }
} }
...@@ -423,11 +373,9 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -423,11 +373,9 @@ class NewSbuffer extends XSModule with HasSbufferConst {
// ---------------------- Load Data Forward --------------------- // ---------------------- Load Data Forward ---------------------
for ((forward, i) <- io.forward.zipWithIndex) { for ((forward, i) <- io.forward.zipWithIndex) {
val tag_matches = widthMap(i => tag(i) === getTag(forward.paddr)) val tag_matches = widthMap(w => tag(w) === getTag(forward.paddr))
val valid_tag_matches = widthMap(i => tag_matches(i) && isValid(stateVec(i))) val valid_tag_matches = widthMap(w => tag_matches(w) && validMask(w))
val inflight_tag_matches = widthMap(i => val inflight_tag_matches = widthMap(w => tag_matches(w) && inflightMask(w))
tag_matches(i) && (isInflight(stateVec(i)) || isPrepare(stateVec(i)))
)
val line_offset_mask = UIntToOH(getWordOffset(forward.paddr)) val line_offset_mask = UIntToOH(getWordOffset(forward.paddr))
val valid_tag_match_reg = valid_tag_matches.map(RegNext(_)) val valid_tag_match_reg = valid_tag_matches.map(RegNext(_))
...@@ -456,9 +404,3 @@ class NewSbuffer extends XSModule with HasSbufferConst { ...@@ -456,9 +404,3 @@ class NewSbuffer extends XSModule with HasSbufferConst {
} }
} }
} }
object NewSbuffer extends App {
override def main(args: Array[String]): Unit = {
chisel3.Driver.execute(args, ()=> new NewSbuffer)
}
}
...@@ -98,7 +98,9 @@ public: ...@@ -98,7 +98,9 @@ public:
uint64_t execute(uint64_t max_cycle, uint64_t max_instr); uint64_t execute(uint64_t max_cycle, uint64_t max_instr);
uint64_t get_cycles() const { return cycles; } uint64_t get_cycles() const { return cycles; }
EmuArgs get_args() const { return args; } EmuArgs get_args() const { return args; }
bool is_good_trap() { return trapCode == STATE_GOODTRAP; }; bool is_good_trap() {
return trapCode == STATE_GOODTRAP || trapCode == STATE_LIMIT_EXCEEDED;
};
int get_trapcode() { return trapCode; } int get_trapcode() { return trapCode; }
}; };
......
...@@ -30,7 +30,6 @@ int main(int argc, const char** argv) { ...@@ -30,7 +30,6 @@ int main(int argc, const char** argv) {
auto args = emu->get_args(); auto args = emu->get_args();
uint64_t cycles = emu->execute(args.max_cycles, args.max_instr); uint64_t cycles = emu->execute(args.max_cycles, args.max_instr);
bool is_good_trap = emu->is_good_trap(); bool is_good_trap = emu->is_good_trap();
int trapcode = emu->get_trapcode();
delete emu; delete emu;
extern uint32_t uptime(void); extern uint32_t uptime(void);
...@@ -40,6 +39,5 @@ int main(int argc, const char** argv) { ...@@ -40,6 +39,5 @@ int main(int argc, const char** argv) {
" (this will be different from cycleCnt if emu loads a snapshot)\n" ANSI_COLOR_RESET, args.seed, cycles); " (this will be different from cycleCnt if emu loads a snapshot)\n" ANSI_COLOR_RESET, args.seed, cycles);
eprintf(ANSI_COLOR_BLUE "Host time spent: %'dms\n" ANSI_COLOR_RESET, ms); eprintf(ANSI_COLOR_BLUE "Host time spent: %'dms\n" ANSI_COLOR_RESET, ms);
// return !is_good_trap; return !is_good_trap;
return trapcode;
} }
...@@ -350,13 +350,15 @@ void dramsim3_helper_rising(const axi_channel &axi) { ...@@ -350,13 +350,15 @@ void dramsim3_helper_rising(const axi_channel &axi) {
void *data_start = meta->data + meta->offset * meta->size / sizeof(uint64_t); void *data_start = meta->data + meta->offset * meta->size / sizeof(uint64_t);
axi_get_wdata(axi, data_start, meta->size); axi_get_wdata(axi, data_start, meta->size);
meta->offset++; meta->offset++;
// printf("accept a new write data\n");
}
if (wait_req_w) {
dramsim3_meta *meta = static_cast<dramsim3_meta *>(wait_req_w->meta);
// if this is the last beat // if this is the last beat
if (meta->offset == meta->len) { if (meta->offset == meta->len && dram->will_accept(wait_req_w->address, true)) {
assert(dram->will_accept(wait_req_w->address, true));
dram->add_request(wait_req_w); dram->add_request(wait_req_w);
wait_req_w = NULL; wait_req_w = NULL;
} }
// printf("accept a new write data\n");
} }
} }
...@@ -397,7 +399,11 @@ void dramsim3_helper_falling(axi_channel &axi) { ...@@ -397,7 +399,11 @@ void dramsim3_helper_falling(axi_channel &axi) {
// WDATA: check whether the write data can be accepted // WDATA: check whether the write data can be accepted
if (wait_req_w != NULL && dram->will_accept(wait_req_w->address, true)) { if (wait_req_w != NULL && dram->will_accept(wait_req_w->address, true)) {
axi_accept_wdata(axi); dramsim3_meta *meta = static_cast<dramsim3_meta *>(wait_req_w->meta);
// we have to check whether the last finished write request has been accepted by dram
if (meta->offset != meta->len) {
axi_accept_wdata(axi);
}
} }
// WRESP: if finished, we try the next write response // WRESP: if finished, we try the next write response
......
...@@ -8,25 +8,16 @@ import chisel3.util._ ...@@ -8,25 +8,16 @@ import chisel3.util._
import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.must.Matchers import org.scalatest.matchers.must.Matchers
import xiangshan._ import xiangshan._
import xiangshan.cache.{DCacheLineIO, DCacheWordReq}
import xiangshan.mem.{LoadForwardQueryIO, NewSbuffer} import xiangshan.mem.{LoadForwardQueryIO, NewSbuffer}
import xiangshan.testutils._ import xiangshan.testutils._
import scala.util.Random import scala.util.Random
class SbufferWapper extends XSModule { class SbufferWapper extends XSModule {
val io = IO(new Bundle() {
val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReq)))
val dcache = new DCacheLineIO
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val flush = new Bundle {
val valid = Input(Bool())
val empty = Output(Bool())
} // sbuffer flush
})
val sbuffer = Module(new NewSbuffer) val sbuffer = Module(new NewSbuffer)
val io = IO(sbuffer.io.cloneType)
io <> sbuffer.io io <> sbuffer.io
AddSinks()
// fake dcache // fake dcache
sbuffer.io.dcache.req.ready := true.B sbuffer.io.dcache.req.ready := true.B
sbuffer.io.dcache.resp.valid := RegNext(RegNext(RegNext(RegNext(sbuffer.io.dcache.req.valid)))) sbuffer.io.dcache.resp.valid := RegNext(RegNext(RegNext(RegNext(sbuffer.io.dcache.req.valid))))
...@@ -40,124 +31,65 @@ class SbufferTest extends AnyFlatSpec ...@@ -40,124 +31,65 @@ class SbufferTest extends AnyFlatSpec
with ParallelTestExecution with ParallelTestExecution
with HasPartialDecoupledDriver { with HasPartialDecoupledDriver {
top.Parameters.set(top.Parameters.debugParameters) top.Parameters.set(top.Parameters.debugParameters)
def make_store_req(addr: UInt, data: UInt, mask: UInt, portIdx: Int)
(implicit c: SbufferWapper) = {
val port = c.io.in(portIdx)
port.enqueuePartial(chiselTypeOf(port.bits).Lit(
_.addr -> addr,
_.data -> data,
_.mask -> mask,
))
}
// it should "random req" in { def make_forward_req
// test(new SbufferWapper{AddSinks()}){ c => (addr: UInt, mask: UInt, ref_data: UInt, portIdx: Int)
// (implicit c: SbufferWapper) = {
// def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) ={ val port = c.io.forward(portIdx)
// (0 until StorePipelineWidth).map { i => port.paddr.poke(addr)
// c.io.in(i).valid.poke(true.B) port.mask.poke(mask)
// c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit( c.clock.step(1)
// _.mask -> mask(i), for(i <- 0 until 8){
// _.addr -> addr(i), port.forwardData(i).expect(ref_data(i * 8 + 7, i * 8))
// _.data -> data(i) }
// )) }
// }
// c.clock.step(1)
// for (in <- c.io.in){ in.valid.poke(false.B)}
// }
//
// def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = {
// (0 until LoadPipelineWidth).map{ i =>
// c.io.forward(i).paddr.poke(addr(i))
// c.io.forward(i).mask.poke(mask(i))
// if(c.io.in(i).ready.peek() == true.B) {
// (0 until 8).map { j =>
// c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8))
// }
// }
// }
// }
//
// val TEST_SIZE = 100
// for(i <- 0 until TEST_SIZE) {
// val addr = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7ffffffff8L).U)// align to block size
// val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U)
// val mask = Seq.fill(StorePipelineWidth)(0xff.U)
// store_enq(addr, data, mask)
// forward_req_and_resp(addr, data, mask)
// }
// }
// }
//
// it should "sequence req" in {
// test(new SbufferWapper{AddSinks()}){ c =>
//
// def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) = {
// (0 until StorePipelineWidth).map { i =>
// c.io.in(i).valid.poke(true.B)
// c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit(
// _.mask -> mask(i),
// _.addr -> addr(i),
// _.data -> data(i)
// ))
// }
// c.clock.step(1)
// for (in <- c.io.in){ in.valid.poke(false.B)}
// }
//
// def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = {
// (0 until LoadPipelineWidth).map{ i =>
// c.io.forward(i).paddr.poke(addr(i))
// c.io.forward(i).mask.poke(mask(i))
// if(c.io.in(i).ready.peek() == true.B) {
// (0 until 8).map { j =>
// c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8))
// }
// }
// }
// }
//
// val TEST_SIZE = 100
// val start_addr = Random.nextLong() & 0x7ffffffff8L
// for(i <- 0 until TEST_SIZE) {
// val addr = Seq(((i<<4) + start_addr).U,((i<<4)+8+start_addr).U)
// val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U)
// val mask = Seq.fill(StorePipelineWidth)(0xff.U)
// store_enq(addr, data, mask)
// forward_req_and_resp(addr, data, mask)
// }
// }
// }
it should "sbuffer coherence" in {
test(new SbufferWapper{AddSinks()}){ c => it should "allow multi-inflight dcache requests" in {
def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) ={ test(new SbufferWapper){ c =>
(0 until StorePipelineWidth).map { i => implicit val circuit = c
c.io.in(i).valid.poke(true.B) c.io.in.foreach(p => p.initSource().setSourceClock(c.clock))
c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit( val TEST_SIZE = 1000
_.mask -> mask(i), var addr = 0
_.addr -> addr(i), for(_ <- 0 until TEST_SIZE){
_.data -> data(i) val data = (Random.nextLong() & 0x7fffffffffffffffL).U
)) val mask = 0xff.U
} make_store_req(addr.U, data, mask, 0)
c.clock.step(1) addr += 512
for (in <- c.io.in){ in.valid.poke(false.B)}
}
def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = {
(0 until LoadPipelineWidth).map{ i =>
c.io.forward(i).paddr.poke(addr(i))
c.io.forward(i).mask.poke(mask(i))
if(c.io.in(i).ready.peek() == true.B) {
(0 until 8).map { j =>
c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8))
}
}
}
} }
}
}
it should "forward older store's data to younger load" in {
test(new SbufferWapper){ c =>
implicit val circuit = c
c.io.in.foreach(p => p.initSource().setSourceClock(c.clock))
val TEST_SIZE = 10 val TEST_SIZE = 10
for(i <- 0 until TEST_SIZE) { def testPort(i : Int) = {
val addr = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7ffffffff8L).U)// align to for(_ <- 0 until TEST_SIZE){
val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U) val addr = (Random.nextLong() & 0x7ffffffff8L).U
val mask = Seq.fill(StorePipelineWidth)(0xff.U) val data = (Random.nextLong() & 0x7fffffffffffffffL).U
store_enq(addr, data, mask) val mask = 0xff.U
forward_req_and_resp(addr, data, mask) make_store_req(addr, data, mask, i)
make_forward_req(addr, mask, data, i)
}
} }
fork(
c.clock.step(512 + 10) testPort(0)
).fork(
testPort(1)
).join()
} }
} }
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册