提交 89369e3e 编写于 作者: Z zhanglinjuan

Merge branch 'master' into dev-dcache-rearrange

......@@ -182,7 +182,7 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
xs_core(i).module.io.externalInterrupt.msip := clint.module.io.msip(i)
// xs_core(i).module.io.externalInterrupt.meip := RegNext(RegNext(io.meip(i)))
xs_core(i).module.io.externalInterrupt.meip := plic.module.io.extra.get.meip(i)
l2prefetcher(i).module.io.enable := xs_core(i).module.io.l2_pf_enable
l2prefetcher(i).module.io.enable := RegNext(xs_core(i).module.io.l2_pf_enable)
l2prefetcher(i).module.io.in <> l2cache(i).module.io
}
......
......@@ -72,7 +72,6 @@ abstract class SetAssocReplacementPolicy {
def access(set: UInt, touch_way: UInt): Unit
def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]): Unit
def way(set: UInt): UInt
def miss(set: UInt): Unit
}
......@@ -332,7 +331,7 @@ class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocRepl
}
def way(set: UInt) = logic.get_replace_way(state_vec(set))
def miss(set: UInt) = {}
}
class SetAssocRandom(n_sets : Int, n_ways: Int) extends SetAssocReplacementPolicy {
......
......@@ -52,18 +52,11 @@ object ValidUndirectioned {
}
class SCMeta(val useSC: Boolean) extends XSBundle with HasSCParameter {
def maxVal = 8 * ((1 << TageCtrBits) - 1) + SCTableInfo.map { case (_, cb, _) => (1 << cb) - 1 }.reduce(_ + _)
def minVal = -(8 * (1 << TageCtrBits) + SCTableInfo.map { case (_, cb, _) => 1 << cb }.reduce(_ + _))
def sumCtrBits = max(log2Ceil(-minVal), log2Ceil(maxVal + 1)) + 1
val tageTaken = if (useSC) Bool() else UInt(0.W)
val scUsed = if (useSC) Bool() else UInt(0.W)
val scPred = if (useSC) Bool() else UInt(0.W)
// Suppose ctrbits of all tables are identical
val ctrs = if (useSC) Vec(SCNTables, SInt(SCCtrBits.W)) else Vec(SCNTables, SInt(0.W))
val sumAbs = if (useSC) UInt(sumCtrBits.W) else UInt(0.W)
}
class TageMeta extends XSBundle with HasTageParameter {
......@@ -401,6 +394,7 @@ class RoqCommitIO extends XSBundle {
class TlbFeedback extends XSBundle {
val rsIdx = UInt(log2Up(IssQueSize).W)
val hit = Bool()
val flushState = Bool()
}
class RSFeedback extends TlbFeedback
......@@ -539,11 +533,14 @@ class CustomCSRCtrlIO extends XSBundle {
// Prefetcher
val l1plus_pf_enable = Output(Bool())
val l2_pf_enable = Output(Bool())
// Labeled XiangShan
val dsid = Output(UInt(8.W)) // TODO: DsidWidth as parameter
// Load violation predict
// Load violation predictor
val lvpred_disable = Output(Bool())
val no_spec_load = Output(Bool())
val waittable_timeout = Output(UInt(5.W))
// Branch predicter
// Branch predictor
val bp_ctrl = Output(new BPUCtrl)
}
\ No newline at end of file
// Memory Block
val sbuffer_threshold = Output(UInt(4.W))
}
......@@ -50,7 +50,7 @@ case class XSCoreParameters
EnableRAS: Boolean = true,
EnableLB: Boolean = false,
EnableLoop: Boolean = true,
EnableSC: Boolean = false,
EnableSC: Boolean = true,
EnbaleTlbDebug: Boolean = false,
EnableJal: Boolean = false,
EnableUBTB: Boolean = true,
......@@ -203,13 +203,14 @@ trait HasXSParameter {
val icacheParameters = ICacheParameters(
tagECC = Some("parity"),
dataECC = Some("parity"),
replacer = Some("setlru"),
replacer = Some("setplru"),
nMissEntries = 2
)
val l1plusCacheParameters = L1plusCacheParameters(
tagECC = Some("secded"),
dataECC = Some("secded"),
replacer = Some("setplru"),
nMissEntries = 8
)
......@@ -347,7 +348,8 @@ class XSCore()(implicit p: config.Parameters) extends LazyModule
fastWakeUpIn = intExuConfigs.filter(_.hasCertainLatency),
slowWakeUpIn = intExuConfigs.filter(_.hasUncertainlatency) ++ fpExuConfigs,
fastWakeUpOut = Seq(),
slowWakeUpOut = loadExuConfigs
slowWakeUpOut = loadExuConfigs,
numIntWakeUpFp = intExuConfigs.count(_.writeFpRf)
))
lazy val module = new XSCoreImp(this)
......@@ -413,8 +415,8 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ctrlBlock.io.toLsBlock <> memBlock.io.fromCtrlBlock
ctrlBlock.io.csrCtrl <> integerBlock.io.csrio.customCtrl
val memBlockWakeUpInt = memBlock.io.wakeUpOutInt.slow.map(x => intOutValid(x))
val memBlockWakeUpFp = memBlock.io.wakeUpOutFp.slow.map(x => fpOutValid(x))
val memBlockWakeUpInt = memBlock.io.wakeUpOutInt.slow.map(WireInit(_))
val memBlockWakeUpFp = memBlock.io.wakeUpOutFp.slow.map(WireInit(_))
memBlock.io.wakeUpOutInt.slow.foreach(_.ready := true.B)
memBlock.io.wakeUpOutFp.slow.foreach(_.ready := true.B)
......@@ -422,13 +424,13 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
val fpBlockWakeUpInt = fpExuConfigs
.zip(floatBlock.io.wakeUpOut.slow)
.filter(_._1.writeIntRf)
.map(_._2).map(x => intOutValid(x, connectReady = true))
.map(_._2)
intExuConfigs.zip(integerBlock.io.wakeUpOut.slow).filterNot(_._1.writeFpRf).map(_._2.ready := true.B)
val intBlockWakeUpFp = intExuConfigs.filter(_.hasUncertainlatency)
.zip(integerBlock.io.wakeUpOut.slow)
.filter(_._1.writeFpRf)
.map(_._2).map(x => fpOutValid(x, connectReady = true))
.map(_._2)
integerBlock.io.wakeUpIn.slow <> fpBlockWakeUpInt ++ memBlockWakeUpInt
integerBlock.io.toMemBlock <> memBlock.io.fromIntBlock
......@@ -446,6 +448,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
// Note: 'WireInit' is used to block 'ready's from memBlock,
// we don't need 'ready's from memBlock
memBlock.io.wakeUpIn.slow <> wakeUpMem.flatMap(_.slow.map(x => WireInit(x)))
memBlock.io.intWakeUpFp <> floatBlock.io.intWakeUpOut
integerBlock.io.csrio.hartId <> io.hartId
integerBlock.io.csrio.perf <> DontCare
......@@ -464,26 +467,27 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
integerBlock.io.fenceio.sfence <> memBlock.io.sfence
integerBlock.io.fenceio.sbuffer <> memBlock.io.fenceToSbuffer
memBlock.io.tlbCsr <> RegNext(integerBlock.io.csrio.tlb)
memBlock.io.csrCtrl <> integerBlock.io.csrio.customCtrl
memBlock.io.tlbCsr <> integerBlock.io.csrio.tlb
memBlock.io.lsqio.roq <> ctrlBlock.io.roqio.lsq
memBlock.io.lsqio.exceptionAddr.lsIdx.lqIdx := ctrlBlock.io.roqio.exception.bits.uop.lqIdx
memBlock.io.lsqio.exceptionAddr.lsIdx.sqIdx := ctrlBlock.io.roqio.exception.bits.uop.sqIdx
memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.roqio.exception.bits.uop.ctrl.commitType)
val itlbRepester = Module(new PTWRepeater())
val dtlbRepester = Module(new PTWRepeater())
itlbRepester.io.tlb <> frontend.io.ptw
dtlbRepester.io.tlb <> memBlock.io.ptw
itlbRepester.io.sfence <> integerBlock.io.fenceio.sfence
dtlbRepester.io.sfence <> integerBlock.io.fenceio.sfence
ptw.io.tlb(0) <> dtlbRepester.io.ptw
ptw.io.tlb(1) <> itlbRepester.io.ptw
val itlbRepeater = Module(new PTWRepeater())
val dtlbRepeater = Module(new PTWRepeater())
itlbRepeater.io.tlb <> frontend.io.ptw
dtlbRepeater.io.tlb <> memBlock.io.ptw
itlbRepeater.io.sfence <> integerBlock.io.fenceio.sfence
dtlbRepeater.io.sfence <> integerBlock.io.fenceio.sfence
ptw.io.tlb(0) <> dtlbRepeater.io.ptw
ptw.io.tlb(1) <> itlbRepeater.io.ptw
ptw.io.sfence <> integerBlock.io.fenceio.sfence
ptw.io.csr <> integerBlock.io.csrio.tlb
// if l2 prefetcher use stream prefetch, it should be placed in XSCore
assert(l2PrefetcherParameters._type == "bop")
io.l2_pf_enable := RegNext(integerBlock.io.csrio.customCtrl.l2_pf_enable)
io.l2_pf_enable := integerBlock.io.csrio.customCtrl.l2_pf_enable
if (!env.FPGAPlatform) {
val id = hartIdCore()
......
......@@ -52,7 +52,7 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
val stage2FtqRead = new FtqRead
val stage2Redirect = ValidIO(new Redirect)
val stage3Redirect = ValidIO(new Redirect)
val waitTableUpdate = Output(new WaitTableUpdateReq)
val waitTableUpdate = Output(new WaitTableUpdateReq)
})
/*
LoadQueue Jump ALU0 ALU1 ALU2 ALU3 exception Stage1
......@@ -75,23 +75,14 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
val valid = Bool()
val idx = UInt(log2Up(n).W)
}
def selectOldestRedirect(xs: Seq[Valid[Redirect]]): (Valid[Redirect], UInt) = {
val wrappers = for((r, i) <- xs.zipWithIndex) yield {
val wrap = Wire(new Wrapper(xs.size))
wrap.redirect := r.bits
wrap.valid := r.valid
wrap.idx := i.U
wrap
}
val oldest = ParallelOperation[Wrapper](wrappers, (x, y) => {
Mux(x.valid,
Mux(y.valid, Mux(isAfter(x.redirect.roqIdx, y.redirect.roqIdx), y, x), x), y
)
})
val result = Wire(Valid(new Redirect))
result.valid := oldest.valid
result.bits := oldest.redirect
(result, oldest.idx)
def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.roqIdx, xs(i).bits.roqIdx)))
val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
(if (j < i) !xs(j).valid || compareVec(i)(j)
else if (j == i) xs(i).valid
else !xs(j).valid || !compareVec(j)(i))
)).andR))
resultOnehot
}
for((ptr, redirect) <- io.stage1FtqRead.map(_.ptr).zip(
......@@ -106,44 +97,30 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
}
val jumpOut = io.exuMispredict.head
val aluOut = VecInit(io.exuMispredict.tail)
val (oldestAluRedirect, oldestAluIdx) = selectOldestRedirect(aluOut.map(getRedirect))
val (oldestExuRedirect, jumpIsOlder) = selectOldestRedirect(Seq(
oldestAluRedirect, getRedirect(jumpOut)
))
val oldestExuOutput = Mux(jumpIsOlder.asBool(), jumpOut, aluOut(oldestAluIdx))
val (oldestRedirect, _) = selectOldestRedirect(Seq(io.loadReplay, oldestExuRedirect))
val s1_isJump = RegNext(jumpIsOlder.asBool(), init = false.B)
val allRedirect = VecInit(io.exuMispredict.map(x => getRedirect(x)) :+ io.loadReplay)
val oldestOneHot = selectOldestRedirect(allRedirect)
val needFlushVec = VecInit(allRedirect.map(_.bits.roqIdx.needFlush(io.stage2Redirect, io.flush)))
val oldestValid = VecInit(oldestOneHot.zip(needFlushVec).map{ case (v, f) => v && !f }).asUInt.orR
val oldestExuOutput = Mux1H((0 until 5).map(oldestOneHot), io.exuMispredict)
val oldestRedirect = Mux1H(oldestOneHot, allRedirect)
val s1_jumpTarget = RegEnable(jumpOut.bits.redirect.cfiUpdate.target, jumpOut.valid)
val s1_imm12_reg = RegEnable(oldestExuOutput.bits.uop.ctrl.imm(11, 0), oldestExuOutput.valid)
val s1_pd = RegEnable(oldestExuOutput.bits.uop.cf.pd, oldestExuOutput.valid)
val s1_redirect_bits_reg = Reg(new Redirect)
val s1_redirect_valid_reg = RegInit(false.B)
val s1_aluIdx = RegEnable(oldestAluIdx, oldestAluRedirect.valid)
val s1_imm12_reg = RegNext(oldestExuOutput.bits.uop.ctrl.imm(11, 0))
val s1_pd = RegNext(oldestExuOutput.bits.uop.cf.pd)
val s1_redirect_bits_reg = RegNext(oldestRedirect.bits)
val s1_redirect_valid_reg = RegNext(oldestValid)
val s1_redirect_onehot = RegNext(oldestOneHot)
// stage1 -> stage2
when(oldestRedirect.valid && !oldestRedirect.bits.roqIdx.needFlush(io.stage2Redirect, io.flush)){
s1_redirect_bits_reg := oldestRedirect.bits
s1_redirect_valid_reg := true.B
}.otherwise({
s1_redirect_valid_reg := false.B
})
io.stage2Redirect.valid := s1_redirect_valid_reg && !io.flush
io.stage2Redirect.bits := s1_redirect_bits_reg
io.stage2Redirect.bits.cfiUpdate := DontCare
// at stage2, we read ftq to get pc
io.stage2FtqRead.ptr := s1_redirect_bits_reg.ftqIdx
val isReplay = RedirectLevel.flushItself(s1_redirect_bits_reg.level)
val ftqRead = Mux(isReplay,
io.stage1FtqRead.last.entry,
Mux(
s1_isJump,
io.stage1FtqRead.head.entry,
VecInit(io.stage1FtqRead.tail.take(exuParameters.AluCnt).map(_.entry))(s1_aluIdx)
)
)
val s1_isReplay = s1_redirect_onehot(5)
val s1_isJump = s1_redirect_onehot(0)
val ftqRead = Mux1H(s1_redirect_onehot, io.stage1FtqRead).entry
val cfiUpdate_pc = Cat(
ftqRead.ftqPC.head(VAddrBits - s1_redirect_bits_reg.ftqOffset.getWidth - instOffsetBits),
s1_redirect_bits_reg.ftqOffset,
......@@ -155,7 +132,7 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
)
val brTarget = real_pc + SignExt(ImmUnion.B.toImm32(s1_imm12_reg), XLEN)
val snpc = real_pc + Mux(s1_pd.isRVC, 2.U, 4.U)
val target = Mux(isReplay,
val target = Mux(s1_isReplay,
real_pc, // repaly from itself
Mux(s1_redirect_bits_reg.cfiUpdate.taken,
Mux(s1_isJump, s1_jumpTarget, brTarget),
......@@ -164,12 +141,17 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
)
// update waittable if load violation redirect triggered
io.waitTableUpdate.valid := RegNext(isReplay && s1_redirect_valid_reg, init = false.B)
io.waitTableUpdate.valid := RegNext(s1_isReplay && s1_redirect_valid_reg, init = false.B)
io.waitTableUpdate.waddr := RegNext(XORFold(real_pc(VAddrBits-1, 1), WaitTableAddrWidth))
io.waitTableUpdate.wdata := true.B
io.stage2FtqRead.ptr := s1_redirect_bits_reg.ftqIdx
val s2_br_mask = RegEnable(ftqRead.br_mask, enable = s1_redirect_valid_reg)
val s2_sawNotTakenBranch = RegEnable(VecInit((0 until PredictWidth).map{ i =>
if(i == 0) false.B else Cat(ftqRead.br_mask.take(i)).orR()
})(s1_redirect_bits_reg.ftqOffset), enable = s1_redirect_valid_reg)
val s2_hist = RegEnable(ftqRead.hist, enable = s1_redirect_valid_reg)
val s2_target = RegEnable(target, enable = s1_redirect_valid_reg)
val s2_pd = RegEnable(s1_pd, enable = s1_redirect_valid_reg)
val s2_cfiUpdata_pc = RegEnable(cfiUpdate_pc, enable = s1_redirect_valid_reg)
......@@ -186,11 +168,9 @@ class RedirectGenerator extends XSModule with HasCircularQueuePtrHelper with Wai
stage3CfiUpdate.rasEntry := s2_ftqRead.rasTop
stage3CfiUpdate.predHist := s2_ftqRead.predHist
stage3CfiUpdate.specCnt := s2_ftqRead.specCnt
stage3CfiUpdate.hist := s2_ftqRead.hist
stage3CfiUpdate.hist := s2_hist
stage3CfiUpdate.predTaken := s2_redirect_bits_reg.cfiUpdate.predTaken
stage3CfiUpdate.sawNotTakenBranch := VecInit((0 until PredictWidth).map{ i =>
if(i == 0) false.B else Cat(s2_ftqRead.br_mask.take(i)).orR()
})(s2_redirect_bits_reg.ftqOffset)
stage3CfiUpdate.sawNotTakenBranch := s2_sawNotTakenBranch
stage3CfiUpdate.target := s2_target
stage3CfiUpdate.taken := s2_redirect_bits_reg.cfiUpdate.taken
stage3CfiUpdate.isMisPred := s2_redirect_bits_reg.cfiUpdate.isMisPred
......
......@@ -30,6 +30,7 @@ class FloatBlock
val intWakeUpFp = Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput)))
val memWakeUpFp = Vec(memSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput)))
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val intWakeUpOut = Vec(intSlowWakeUpIn.size, DecoupledIO(new ExuOutput))
// from csr
val frm = Input(UInt(3.W))
......@@ -39,24 +40,28 @@ class FloatBlock
val flush = io.fromCtrlBlock.flush
val intWakeUpFpReg = Wire(Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput))))
intWakeUpFpReg.zip(io.intWakeUpFp).foreach{
case (inReg, wakeUpIn) =>
val in = WireInit(wakeUpIn)
wakeUpIn.ready := in.ready
in.valid := wakeUpIn.valid && !wakeUpIn.bits.uop.roqIdx.needFlush(redirect, flush)
PipelineConnect(in, inReg,
inReg.fire() || inReg.bits.uop.roqIdx.needFlush(redirect, flush), false.B
)
for((w, r) <- io.intWakeUpFp.zip(intWakeUpFpReg)){
val in = WireInit(w)
w.ready := in.ready
in.valid := w.valid && !w.bits.uop.roqIdx.needFlush(redirect, flush)
PipelineConnect(in, r, r.fire() || r.bits.uop.roqIdx.needFlush(redirect, flush), false.B)
}
val intRecoded = WireInit(intWakeUpFpReg)
for(((rec, reg), cfg) <- intRecoded.zip(intWakeUpFpReg).zip(intSlowWakeUpIn)){
rec.bits.data := Mux(reg.bits.uop.ctrl.fpu.typeTagOut === S,
recode(reg.bits.data(31, 0), S),
recode(reg.bits.data(63, 0), D)
// to memBlock's store rs
io.intWakeUpOut <> intWakeUpFpReg.map(x => WireInit(x))
val intRecoded = intWakeUpFpReg.map(x => {
val rec = Wire(DecoupledIO(new ExuOutput))
rec.valid := x.valid && x.bits.uop.ctrl.fpWen
rec.bits := x.bits
rec.bits.data := Mux(x.bits.uop.ctrl.fpu.typeTagOut === S,
recode(x.bits.data(31, 0), S),
recode(x.bits.data(63, 0), D)
)
rec.bits.redirectValid := false.B
reg.ready := rec.ready || !rec.valid
}
x.ready := rec.ready || !rec.valid
rec
})
val memRecoded = WireInit(io.memWakeUpFp)
for((rec, reg) <- memRecoded.zip(io.memWakeUpFp)){
rec.bits.data := fpRdataHelper(reg.bits.uop, reg.bits.data)
......@@ -166,7 +171,9 @@ class FloatBlock
NRFpWritePorts,
isFp = true
))
fpWbArbiter.io.in.drop(exeUnits.length).zip(wakeUpInRecode).foreach(x => x._1 <> x._2)
fpWbArbiter.io.in.drop(exeUnits.length).zip(wakeUpInRecode).foreach(
x => x._1 <> fpOutValid(x._2, connectReady = true)
)
for((exu, i) <- exeUnits.zipWithIndex){
val out, outReg = Wire(DecoupledIO(new ExuOutput))
......
......@@ -254,10 +254,13 @@ class IntegerBlock
))
intWbArbiter.io.in <> exeUnits.map(e => {
val w = WireInit(e.io.out)
val fpWen = if(e.config.writeFpRf) e.io.out.bits.uop.ctrl.fpWen else false.B
w.valid := e.io.out.valid && !fpWen
if(e.config.writeFpRf){
w.valid := e.io.out.valid && !e.io.out.bits.uop.ctrl.fpWen && io.wakeUpOut.slow(0).ready
} else {
w.valid := e.io.out.valid
}
w
}) ++ io.wakeUpIn.slow
}) ++ io.wakeUpIn.slow.map(x => intOutValid(x, connectReady = true))
XSPerf("competition", intWbArbiter.io.in.map(i => !i.ready && i.valid).foldRight(0.U)(_+_))
......
......@@ -33,7 +33,8 @@ class MemBlock(
val fastWakeUpIn: Seq[ExuConfig],
val slowWakeUpIn: Seq[ExuConfig],
val fastWakeUpOut: Seq[ExuConfig],
val slowWakeUpOut: Seq[ExuConfig]
val slowWakeUpOut: Seq[ExuConfig],
val numIntWakeUpFp: Int
)(implicit p: Parameters) extends LazyModule {
val dcache = LazyModule(new DCache())
......@@ -55,6 +56,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val slowWakeUpIn = outer.slowWakeUpIn
val fastWakeUpOut = outer.fastWakeUpOut
val slowWakeUpOut = outer.slowWakeUpOut
val numIntWakeUpFp = outer.numIntWakeUpFp
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToLsBlockIO)
......@@ -63,6 +65,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val toCtrlBlock = new LsBlockToCtrlIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val intWakeUpFp = Vec(numIntWakeUpFp, Flipped(DecoupledIO(new ExuOutput)))
val wakeUpOutInt = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val wakeUpOutFp = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
......@@ -75,6 +78,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val exceptionAddr = new ExceptionAddrIO // to csr
val roq = Flipped(new RoqLsqIO) // roq to lsq
}
val csrCtrl = Flipped(new CustomCSRCtrlIO)
})
val difftestIO = IO(new Bundle() {
val fromSbuffer = new Bundle() {
......@@ -144,14 +149,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
slowWakeUpIn.zip(io.wakeUpIn.slow)
.filter(x => (x._1.writeIntRf && readIntRf) || (x._1.writeFpRf && readFpRf))
.map{
case (Exu.jumpExeUnitCfg, value) if cfg == Exu.stExeUnitCfg =>
val jumpOut = Wire(Flipped(DecoupledIO(new ExuOutput)))
jumpOut.bits := RegNext(value.bits)
jumpOut.valid := RegNext(
value.valid && !value.bits.uop.roqIdx.needFlush(redirect, io.fromCtrlBlock.flush)
)
jumpOut.ready := true.B
(Exu.jumpExeUnitCfg, jumpOut)
case (Exu.jumpExeUnitCfg, _) if cfg == Exu.stExeUnitCfg =>
(Exu.jumpExeUnitCfg, io.intWakeUpFp.head)
case (config, value) => (config, value)
}
).map(a => (a._1, decoupledIOToValidIO(a._2)))
......@@ -189,7 +188,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// exeUnits(i).io.fromInt <> rs.io.deq
rs.io.memfeedback := DontCare
rs.suggestName(s"rsd_${cfg.name}")
rs.suggestName(s"rs_${cfg.name}")
rs
})
......@@ -212,6 +211,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
io.wakeUpOutInt.slow <> exeWbReqs
io.wakeUpOutFp.slow <> wakeUpFp
io.wakeUpIn.slow.foreach(_.ready := true.B)
io.intWakeUpFp.foreach(_.ready := true.B)
val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true))
val lsq = Module(new LsqWrappper)
......@@ -222,7 +222,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// dtlb
io.ptw <> dtlb.io.ptw
dtlb.io.sfence <> io.sfence
dtlb.io.csr <> io.tlbCsr
dtlb.io.csr <> RegNext(io.tlbCsr)
if (!env.FPGAPlatform) {
difftestIO.fromSbuffer <> sbuffer.difftestIO
difftestIO.fromSQ <> lsq.difftestIO.fromSQ
......@@ -306,6 +306,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
lsq.io.sqempty <> sbuffer.io.sqempty
// Sbuffer
sbuffer.io.csrCtrl <> RegNext(io.csrCtrl)
sbuffer.io.dcache <> dcache.io.lsu.store
sbuffer.io.dcache.resp.valid := RegNext(dcache.io.lsu.store.resp.valid)
sbuffer.io.dcache.resp.bits := RegNext(dcache.io.lsu.store.resp.bits)
......
......@@ -62,16 +62,16 @@ class FtqNRSRAM[T <: Data](gen: T, numRead: Int) extends XSModule {
class Ftq_4R_SRAMEntry extends XSBundle {
val ftqPC = UInt(VAddrBits.W)
val lastPacketPC = ValidUndirectioned(UInt(VAddrBits.W))
val hist = new GlobalHistory
val br_mask = Vec(PredictWidth, Bool())
}
// redirect and commit need read these infos
class Ftq_2R_SRAMEntry extends XSBundle {
val rasSp = UInt(log2Ceil(RasSize).W)
val rasEntry = new RASEntry
val hist = new GlobalHistory
val predHist = new GlobalHistory
val specCnt = Vec(PredictWidth, UInt(10.W))
val br_mask = Vec(PredictWidth, Bool())
}
class Ftq_1R_Commit_SRAMEntry extends XSBundle {
......@@ -127,15 +127,15 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
ftq_pc_mem.io.waddr(0) := tailPtr.value
ftq_pc_mem.io.wdata(0).ftqPC := io.enq.bits.ftqPC
ftq_pc_mem.io.wdata(0).lastPacketPC := io.enq.bits.lastPacketPC
ftq_pc_mem.io.wdata(0).hist := io.enq.bits.hist
ftq_pc_mem.io.wdata(0).br_mask := io.enq.bits.br_mask
val ftq_2r_sram = Module(new FtqNRSRAM(new Ftq_2R_SRAMEntry, 2))
ftq_2r_sram.io.wen := real_fire
ftq_2r_sram.io.waddr := tailPtr.value
ftq_2r_sram.io.wdata.rasSp := io.enq.bits.rasSp
ftq_2r_sram.io.wdata.rasEntry := io.enq.bits.rasTop
ftq_2r_sram.io.wdata.hist := io.enq.bits.hist
ftq_2r_sram.io.wdata.predHist := io.enq.bits.predHist
ftq_2r_sram.io.wdata.specCnt := io.enq.bits.specCnt
ftq_2r_sram.io.wdata.br_mask := io.enq.bits.br_mask
val pred_target_sram = Module(new FtqNRSRAM(UInt(VAddrBits.W), 1))
pred_target_sram.io.wen := real_fire
pred_target_sram.io.waddr := tailPtr.value
......@@ -229,13 +229,13 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
// from 4r sram
commitEntry.ftqPC := RegNext(ftq_pc_mem.io.rdata(0).ftqPC)
commitEntry.lastPacketPC := RegNext(ftq_pc_mem.io.rdata(0).lastPacketPC)
commitEntry.hist := RegNext(ftq_pc_mem.io.rdata(0).hist)
commitEntry.br_mask := RegNext(ftq_pc_mem.io.rdata(0).br_mask)
// from 2r sram
commitEntry.rasSp := RegNext(ftq_2r_sram.io.rdata(0).rasSp)
commitEntry.rasTop := RegNext(ftq_2r_sram.io.rdata(0).rasEntry)
commitEntry.hist := RegNext(ftq_2r_sram.io.rdata(0).hist)
commitEntry.predHist := RegNext(ftq_2r_sram.io.rdata(0).predHist)
commitEntry.specCnt := RegNext(ftq_2r_sram.io.rdata(0).specCnt)
commitEntry.br_mask := RegNext(ftq_2r_sram.io.rdata(0).br_mask)
// from 1r sram
commitEntry.metas := RegNext(ftq_1r_sram.io.rdata(0).metas)
commitEntry.rvc_mask := RegNext(ftq_1r_sram.io.rdata(0).rvc_mask)
......@@ -258,6 +258,8 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
ftq_pc_mem.io.raddr(1 + i) := req.ptr.value
req.entry.ftqPC := ftq_pc_mem.io.rdata(1 + i).ftqPC
req.entry.lastPacketPC := ftq_pc_mem.io.rdata(1 + i).lastPacketPC
req.entry.hist := ftq_pc_mem.io.rdata(1 + i).hist
req.entry.br_mask := ftq_pc_mem.io.rdata(1 + i).br_mask
if(i == 0){ // jump, read npc
pred_target_sram.io.raddr(0) := req.ptr.value
pred_target_sram.io.ren(0) := true.B
......@@ -269,10 +271,8 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
io.cfiRead.entry := DontCare
io.cfiRead.entry.rasTop := ftq_2r_sram.io.rdata(1).rasEntry
io.cfiRead.entry.rasSp := ftq_2r_sram.io.rdata(1).rasSp
io.cfiRead.entry.hist := ftq_2r_sram.io.rdata(1).hist
io.cfiRead.entry.predHist := ftq_2r_sram.io.rdata(1).predHist
io.cfiRead.entry.specCnt := ftq_2r_sram.io.rdata(1).specCnt
io.cfiRead.entry.br_mask := ftq_2r_sram.io.rdata(1).br_mask
// redirect, reset ptr
when(io.flush || io.redirect.valid){
val idx = Mux(io.flush, io.flushIdx, io.redirect.bits.ftqIdx)
......
......@@ -355,6 +355,11 @@ class CSR extends FunctionUnit with HasCSRConst
csrio.customCtrl.no_spec_load := slvpredctl(1)
csrio.customCtrl.waittable_timeout := slvpredctl(8, 4)
// smblockctl: memory block configurations
// bits 0-3: store buffer flush threshold (default: 8 entries)
val smblockctl = RegInit(UInt(XLEN.W), "h7".U)
csrio.customCtrl.sbuffer_threshold := smblockctl(3, 0)
val tlbBundle = Wire(new TlbCsrBundle)
tlbBundle.satp := satp.asTypeOf(new SatpStruct)
csrio.tlb := tlbBundle
......@@ -486,6 +491,7 @@ class CSR extends FunctionUnit with HasCSRConst
MaskedRegMap(Spfctl, spfctl),
MaskedRegMap(Sdsid, sdsid),
MaskedRegMap(Slvpredctl, slvpredctl),
MaskedRegMap(Smblockctl, smblockctl),
//--- Machine Information Registers ---
MaskedRegMap(Mvendorid, mvendorid, 0.U, MaskedRegMap.Unwritable),
......
......@@ -53,8 +53,8 @@ trait HasCSRConst {
// Supervisor Custom Read/Write
val Sbpctl = 0x5C0
val Spfctl = 0x5C1
val Slvpredctl = 0x5C2
val Smblockctl = 0x5C3
val Sdsid = 0x9C0
......
......@@ -126,6 +126,10 @@ class ReservationStation
val ctrl = Module(new ReservationStationCtrl(exuCfg, srcLen, fastPortsCfg, slowPortsCfg, fixedDelay, fastWakeup, feedback))
val data = Module(new ReservationStationData(exuCfg, srcLen, fastPortsCfg, slowPortsCfg, fixedDelay, fastWakeup, feedback))
select.suggestName(s"${this.name}_select")
ctrl.suggestName(s"${this.name}_ctrl")
data.suggestName(s"${this.name}_data")
select.io.redirect := io.redirect
select.io.flush := io.flush
io.numExist := select.io.numExist
......@@ -136,6 +140,7 @@ class ReservationStation
select.io.deq.ready := io.deq.ready
if (feedback) {
select.io.memfeedback := io.memfeedback
select.io.flushState := io.memfeedback.bits.flushState
}
ctrl.io.in.valid := select.io.enq.fire()// && !(io.redirect.valid || io.flush) // NOTE: same as select
......@@ -207,7 +212,7 @@ class ReservationStationSelect
val fastPortsCnt = fastPortsCfg.size
val slowPortsCnt = slowPortsCfg.size
require(nonBlocked==fastWakeup)
val replayDelay = VecInit(Seq(5, 10, 40, 40).map(_.U(6.W)))
val replayDelay = VecInit(Seq(5, 10, 25, 25).map(_.U(5.W)))
val io = IO(new Bundle {
val redirect = Flipped(ValidIO(new Redirect))
......@@ -228,6 +233,8 @@ class ReservationStationSelect
def fire() = valid && ready
}
val deq = DecoupledIO(UInt(iqIdxWidth.W))
val flushState = if (feedback) Input(Bool()) else null
})
def widthMap[T <: Data](f: Int => T) = VecInit((0 until iqSize).map(f))
......@@ -275,6 +282,7 @@ class ReservationStationSelect
assert(RegNext(!(haveReady && selectPtr >= tailPtr.asUInt)), "bubble should not have valid state like s_valid or s_wait")
// sel bubble
val isFull = Wire(Bool())
val lastbubbleMask = Wire(UInt(iqSize.W))
val bubbleMask = WireInit(VecInit((0 until iqSize).map(i => emptyIdxQueue(i)))).asUInt & lastbubbleMask
// val bubbleIndex = ParallelMux(bubbleMask zip indexQueue) // NOTE: the idx in the indexQueue
......@@ -282,7 +290,9 @@ class ReservationStationSelect
val findBubble = Cat(bubbleMask).orR
val haveBubble = findBubble && (bubblePtr < tailPtr.asUInt)
val bubbleIndex = indexQueue(bubblePtr)
val bubbleValid = haveBubble && (if (feedback) true.B else !selectValid)
val bubbleValid = haveBubble && (if (feedback) true.B
else if (nonBlocked) !selectValid
else Mux(isFull, true.B, !selectValid))
val bubbleReg = RegNext(bubbleValid)
val bubblePtrReg = RegNext(Mux(moveMask(bubblePtr), bubblePtr-1.U, bubblePtr))
lastbubbleMask := ~Mux(bubbleReg, UIntToOH(bubblePtrReg), 0.U) &
......@@ -292,8 +302,9 @@ class ReservationStationSelect
// deq
val dequeue = if (feedback) bubbleReg
else bubbleReg || issueFire
val deqPtr = if (feedback) bubblePtrReg
else Mux(selectReg, selectPtrReg, bubblePtrReg)
val deqPtr = if (feedback) bubblePtrReg
else if (nonBlocked) Mux(selectReg, selectPtrReg, bubblePtrReg)
else Mux(bubbleReg, bubblePtrReg, selectPtrReg)
moveMask := {
(Fill(iqSize, 1.U(1.W)) << deqPtr)(iqSize-1, 0)
} & Fill(iqSize, dequeue)
......@@ -326,11 +337,15 @@ class ReservationStationSelect
// redirect and feedback && wakeup
for (i <- 0 until iqSize) {
// replay
when (stateQueue(i) === s_replay) {
countQueue(i) := countQueue(i) - 1.U
when (countQueue(i) === 0.U) {
stateQueue(i) := s_valid
cntCountQueue(i) := Mux(cntCountQueue(i)===3.U, cntCountQueue(i), cntCountQueue(i) + 1.U)
if (feedback) {
when (stateQueue(i) === s_replay) {
countQueue(i) := countQueue(i) - 1.U
when (countQueue(i) === 0.U && !io.flushState) {
cntCountQueue(i) := Mux(cntCountQueue(i)===3.U, cntCountQueue(i), cntCountQueue(i) + 1.U)
}
when (io.flushState || countQueue(i) === 0.U) {
stateQueue(i) := s_valid
}
}
}
......@@ -349,7 +364,7 @@ class ReservationStationSelect
}
// enq
val isFull = tailPtr.flag
isFull := tailPtr.flag
// agreement with dispatch: don't fire when io.redirect.valid
val enqueue = io.enq.fire() && !(io.redirect.valid || io.flush)
val tailInc = tailPtr + 1.U
......@@ -372,7 +387,7 @@ class ReservationStationSelect
io.deq.valid := selectValid
io.deq.bits := selectIndex
io.numExist := RegNext(Mux(nextTailPtr.flag, (iqSize-1).U, nextTailPtr.value))
io.numExist := RegNext(Mux(nextTailPtr.flag, if(isPow2(iqSize)) (iqSize-1).U else iqSize.U, nextTailPtr.value))
assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B)))
}
......@@ -489,12 +504,13 @@ class ReservationStationCtrl
when (!isAfter(sq, io.stIssuePtr)) {
lw := true.B
}
}
}
when (enqEn) {
ldWait(enqPtr) := !enqUop.cf.loadWaitBit
sqIdx(enqPtr) := enqUop.sqIdx
}
ldWait.suggestName(s"${this.name}_ldWait")
sqIdx.suggestName(s"${this.name}_sqIdx")
io.readyVec := srcQueue.map(Cat(_).andR).zip(ldWait).map{ case (s, l) => s&l }
}
......
......@@ -46,6 +46,7 @@ trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst with Has
def insLen = if (HasCExtension) 16 else 32
def RVCInsLen = 16
def groupPC(pc: UInt): UInt = Cat(pc(PAddrBits-1, groupAlign), 0.U(groupAlign.W))
def plruAccessNum = 2 //hit and miss
// def encRowBits = cacheParams.dataCode.width(rowBits)
// def encTagBits = cacheParams.tagCode.width(tagBits)
......@@ -77,6 +78,7 @@ abstract class ICacheBundle extends XSBundle
abstract class ICacheModule extends XSModule
with HasICacheParameters
with HasFrontEndExceptionNo
with HasIFUConst
abstract class ICacheArray extends XSModule
with HasICacheParameters
......@@ -365,8 +367,13 @@ class ICache extends ICacheModule
val replacer = cacheParams.replacement
val victimWayMask = UIntToOH(replacer.way(s2_idx))
when(s2_hit) {replacer.access(s2_idx, OHToUInt(hitVec))}
val (touch_sets, touch_ways) = ( Wire(Vec(plruAccessNum, UInt(log2Ceil(nSets).W))), Wire(Vec(plruAccessNum, Valid(UInt(log2Ceil(nWays).W)))) )
touch_sets(0) := s2_idx
touch_ways(0).valid := s2_hit
touch_ways(0).bits := OHToUInt(hitVec)
replacer.access(touch_sets, touch_ways)
//deal with icache exception
val icacheExceptionVec = Wire(Vec(8,Bool()))
......@@ -436,7 +443,7 @@ class ICache extends ICacheModule
/* icache miss
* send a miss req to ICache Miss Queue, excluding exception/flush/blocking
* send a miss req to ICache Miss Queue, excluding exception/flush/blocking
* block the pipeline until refill finishes
*/
val icacheMissQueue = Module(new IcacheMissQueue)
......@@ -471,6 +478,11 @@ class ICache extends ICacheModule
waymask=metaWriteReq.meta_write_waymask)
val wayNum = OHToUInt(metaWriteReq.meta_write_waymask.asTypeOf(Vec(nWays,Bool())))
touch_sets(1) := metaWriteReq.meta_write_idx
touch_ways(1).valid := icacheMissQueue.io.meta_write.valid
touch_ways(1).bits := wayNum
val validPtr = Cat(metaWriteReq.meta_write_idx,wayNum)
when(icacheMissQueue.io.meta_write.valid && !cacheflushed){
validArray := validArray.bitSet(validPtr, true.B)
......@@ -519,8 +531,8 @@ class ICache extends ICacheModule
}
}
val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)}))
val insLenLog = log2Ceil(insLen)
val start = (pc >> insLenLog.U)(log2Ceil(mmioBeats * mmioBusBytes/instBytes) -1, 0)
val insLenLog = log2Ceil(insLen/8)
val start = Cat(0.U(2.W),(pc >> insLenLog.U)(log2Ceil(mmioBusBytes/instBytes) -1, 0)) //4bit
val outMask = mask >> start
(0 until PredictWidth ).foreach{ i =>
cutPacket(i) := Mux(outMask(i).asBool,sourceVec_inst(start + i.U),0.U)
......@@ -528,7 +540,7 @@ class ICache extends ICacheModule
(cutPacket.asUInt, outMask.asUInt)
}
val mmioDataVec = io.mmio_grant.bits.data.asTypeOf(Vec(mmioBeats,UInt(mmioBusWidth.W)))
val (mmio_packet,mmio_mask) = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)
val mmio_packet = io.mmio_grant.bits.data//cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)
XSDebug("mmio data %x\n", mmio_packet)
......@@ -541,7 +553,7 @@ class ICache extends ICacheModule
val refillData = Mux(useRefillReg,cutHelper(refillDataVecReg, s3_req_pc,s3_req_mask),cutHelper(refillDataVec, s3_req_pc,s3_req_mask))
wayResp.pc := s3_req_pc
wayResp.data := Mux(s3_valid && s3_hit, wayData, Mux(s3_mmio ,mmio_packet ,refillData))
wayResp.mask := Mux(s3_mmio,mmio_mask,s3_req_mask)
wayResp.mask := s3_req_mask
wayResp.ipf := s3_exception_vec(pageFault)
wayResp.acf := s3_exception_vec(accessFault) || s3_meta_wrong || s3_data_wrong
//|| (icacheMissQueue.io.resp.valid && icacheMissQueue.io.resp.bits.eccWrong)
......@@ -564,7 +576,7 @@ class ICache extends ICacheModule
//icache response: to pre-decoder
io.resp.valid := s3_valid && (s3_hit || exception || icacheMissQueue.io.resp.valid || io.mmio_grant.valid)
io.resp.bits.mask := Mux(s3_mmio,mmio_mask,s3_req_mask)
io.resp.bits.mask := s3_req_mask
io.resp.bits.pc := s3_req_pc
io.resp.bits.data := DontCare
io.resp.bits.ipf := s3_tlb_resp.excp.pf.instr
......@@ -590,7 +602,7 @@ class ICache extends ICacheModule
io.prefetchTrainReq.bits.addr := groupPC(s3_tlb_resp.paddr)
//To icache Uncache
io.mmio_acquire.valid := s3_mmio && s3_valid
io.mmio_acquire.valid := s3_mmio && s3_valid && !s3_has_exception && !s3_flush && !blocking
io.mmio_acquire.bits.addr := mmioBusAligned(s3_tlb_resp.paddr)
io.mmio_acquire.bits.id := cacheID.U
......
......@@ -22,7 +22,7 @@ class InsUncacheResp extends ICacheBundle
}
// One miss entry deals with one mmio request
class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters
class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters with HasIFUConst
{
val io = IO(new Bundle {
val id = Input(UInt(log2Up(cacheParams.nMMIOs).W))
......@@ -86,7 +86,7 @@ class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters
io.mmio_acquire.valid := true.B
io.mmio_acquire.bits := edge.Get(
fromSource = io.id,
toAddress = req.addr + (beatCounter.value << log2Ceil(mmioBusBytes).U),
toAddress = packetAligned(req.addr) + (beatCounter.value << log2Ceil(mmioBusBytes).U),
lgSize = log2Ceil(mmioBusBytes).U
)._2
......@@ -101,15 +101,17 @@ class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters
io.mmio_grant.ready := true.B
when (io.mmio_grant.fire()) {
// val realAddr = packetAligned(req.addr) + (beatCounter.value << log2Ceil(mmioBusBytes).U)
// val start = realAddr(5,3)
respDataReg(beatCounter.value) := io.mmio_grant.bits.data
state := Mux(needFlush || io.flush, s_invalid,Mux(beatCounter.value === (mmioBeats - 1).U,s_send_resp,s_refill_req))
state :=Mux((beatCounter.value === (mmioBeats - 1).U) || needFlush || io.flush ,s_send_resp,s_refill_req)
beatCounter.inc()
}
}
// --------------------------------------------
when (state === s_send_resp) {
io.resp.valid := true.B
io.resp.valid := !needFlush
io.resp.bits.data := respDataReg.asUInt
io.resp.bits.id := req.id
// meta data should go with the response
......@@ -153,7 +155,7 @@ class icacheUncacheImp(outer: InstrUncache)
val io = IO(new icacheUncacheIO)
val (bus, edge) = outer.clientNode.out.head
require(bus.d.bits.data.getWidth == wordBits, "Uncache: tilelink width does not match")
//require(bus.d.bits.data.getWidth == wordBits, "Uncache: tilelink width does not match")
val resp_arb = Module(new Arbiter(new InsUncacheResp, cacheParams.nMMIOs))
......
......@@ -2,7 +2,7 @@ package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.{Code, RandomReplacement, HasTLDump, XSDebug, SRAMTemplate}
import utils.{Code, ReplacementPolicy, HasTLDump, XSDebug, SRAMTemplate, XSPerf}
import xiangshan.{HasXSLog}
import chipsalliance.rocketchip.config.Parameters
......@@ -26,6 +26,7 @@ case class L1plusCacheParameters
rowBits: Int = 64,
tagECC: Option[String] = None,
dataECC: Option[String] = None,
replacer: Option[String] = Some("random"),
nMissEntries: Int = 1,
blockBytes: Int = 64
) extends L1CacheParameters {
......@@ -33,7 +34,7 @@ case class L1plusCacheParameters
def tagCode: Code = Code.fromString(tagECC)
def dataCode: Code = Code.fromString(dataECC)
def replacement = new RandomReplacement(nWays)
def replacement = ReplacementPolicy.fromString(replacer,nWays,nSets)
}
trait HasL1plusCacheParameters extends HasL1CacheParameters {
......@@ -48,6 +49,7 @@ trait HasL1plusCacheParameters extends HasL1CacheParameters {
def bankNum = 2
def bankRows = blockRows / bankNum
def blockEcodedBits = blockRows * encRowBits
def plruAccessNum = 2 //hit and miss
def missQueueEntryIdWidth = log2Up(cfg.nMissEntries)
// def icacheMissQueueEntryIdWidth = log2Up(icfg.nMissEntries)
......@@ -91,6 +93,11 @@ object L1plusCacheMetadata {
}
}
/* tagIdx is from the io.in.req (Wire)
* validIdx is from s1_addr (Register)
*/
class L1plusCacheMetaReadReq extends L1plusCacheBundle {
val tagIdx = UInt(idxBits.W)
val validIdx = UInt(idxBits.W)
......@@ -383,6 +390,8 @@ class L1plusCacheImp(outer: L1plusCache) extends LazyModuleImp(outer) with HasL1
pipe.io.data_resp <> dataArray.io.resp
pipe.io.meta_read <> metaArray.io.read
pipe.io.meta_resp <> metaArray.io.resp
pipe.io.miss_meta_write.valid := missQueue.io.meta_write.valid
pipe.io.miss_meta_write.bits <> missQueue.io.meta_write.bits
missQueue.io.req <> pipe.io.miss_req
bus.a <> missQueue.io.mem_acquire
......@@ -478,6 +487,7 @@ class L1plusCachePipe extends L1plusCacheModule
val meta_read = DecoupledIO(new L1plusCacheMetaReadReq)
val meta_resp = Input(Vec(nWays, new L1plusCacheMetadata))
val miss_req = DecoupledIO(new L1plusCacheMissReq)
val miss_meta_write = Flipped(ValidIO(new L1plusCacheMetaWriteReq))
val inflight_req_idxes = Output(Vec(2, Valid(UInt())))
val empty = Output(Bool())
})
......@@ -554,6 +564,16 @@ class L1plusCachePipe extends L1plusCacheModule
val s2_hit = s2_tag_match_way.orR
val s2_hit_way = OHToUInt(s2_tag_match_way, nWays)
//replacement marker
val replacer = cacheParams.replacement
val (touch_sets, touch_ways) = ( Wire(Vec(plruAccessNum, UInt(log2Ceil(nSets).W))), Wire(Vec(plruAccessNum, Valid(UInt(log2Ceil(nWays).W)))) )
touch_sets(0) := get_idx(s2_req.addr)
touch_ways(0).valid := s2_valid && s2_hit
touch_ways(0).bits := s2_hit_way
replacer.access(touch_sets, touch_ways)
val data_resp = io.data_resp
val s2_data = data_resp(s2_hit_way)
......@@ -577,8 +597,7 @@ class L1plusCachePipe extends L1plusCacheModule
io.resp.bits.id := s2_req.id
// replacement policy
val replacer = cacheParams.replacement
val replaced_way_en = UIntToOH(replacer.way)
val replaced_way_en = UIntToOH(replacer.way(get_idx(s2_req.addr)))
io.miss_req.valid := s2_valid && !s2_hit
io.miss_req.bits.id := s2_req.id
......@@ -586,11 +605,12 @@ class L1plusCachePipe extends L1plusCacheModule
io.miss_req.bits.addr := s2_req.addr
io.miss_req.bits.way_en := replaced_way_en
s2_passdown := s2_valid && ((s2_hit && io.resp.ready) || (!s2_hit && io.miss_req.ready))
touch_sets(1) := io.miss_meta_write.bits.tagIdx
touch_ways(1).valid := io.miss_meta_write.valid
touch_ways(1).bits := OHToUInt(io.miss_meta_write.bits.way_en.asUInt)
when (io.miss_req.fire()) {
replacer.miss
}
s2_passdown := s2_valid && ((s2_hit && io.resp.ready) || (!s2_hit && io.miss_req.ready))
val resp = io.resp
when (resp.valid) {
......@@ -615,6 +635,10 @@ class L1plusCachePipe extends L1plusCacheModule
)
}
}
XSPerf("req", s0_valid)
XSPerf("miss", s2_valid && !s2_hit)
}
class L1plusCacheMissReq extends L1plusCacheBundle
......
......@@ -249,8 +249,10 @@ class TlbResp extends TlbBundle {
val instr = Bool()
}
}
val ptwBack = Bool() // when ptw back, wake up replay rs's state
override def toPrintable: Printable = {
p"paddr:0x${Hexadecimal(paddr)} miss:${miss} excp.pf: ld:${excp.pf.ld} st:${excp.pf.st} instr:${excp.pf.instr}"
p"paddr:0x${Hexadecimal(paddr)} miss:${miss} excp.pf: ld:${excp.pf.ld} st:${excp.pf.st} instr:${excp.pf.instr} ptwBack:${ptwBack}"
}
}
......@@ -439,6 +441,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
resp(i).valid := validReg
resp(i).bits.paddr := Mux(vmEnable, paddr, if (isDtlb) RegNext(vaddr) else vaddr)
resp(i).bits.miss := miss
resp(i).bits.ptwBack := io.ptw.resp.fire()
val perm = hitPerm // NOTE: given the excp, the out module choose one to use?
val update = false.B && hit && (!hitPerm.a || !hitPerm.d && TlbCmd.isWrite(cmdReg)) // update A/D through exception
......
......@@ -60,9 +60,9 @@ class BIM extends BasePredictor with BimParams {
val updateRow = bimAddr.getBankIdx(u.ftqPC)
val wrbypass_ctrs = Reg(Vec(bypassEntries, Vec(BimBanks, UInt(2.W))))
val wrbypass_ctr_valids = Reg(Vec(bypassEntries, Vec(BimBanks, Bool())))
val wrbypass_rows = Reg(Vec(bypassEntries, UInt(log2Up(nRows).W)))
val wrbypass_ctrs = RegInit(0.U.asTypeOf(Vec(bypassEntries, Vec(BimBanks, UInt(2.W)))))
val wrbypass_ctr_valids = RegInit(0.U.asTypeOf(Vec(bypassEntries, Vec(BimBanks, Bool()))))
val wrbypass_rows = RegInit(0.U.asTypeOf(Vec(bypassEntries, UInt(log2Up(nRows).W))))
val wrbypass_enq_idx = RegInit(0.U(log2Up(bypassEntries).W))
val wrbypass_hits = VecInit((0 until bypassEntries).map( i =>
......@@ -83,19 +83,26 @@ class BIM extends BasePredictor with BimParams {
when (reset.asBool) { wrbypass_ctr_valids.foreach(_.foreach(_ := false.B))}
for (b <- 0 until BimBanks) {
when (needToUpdate(b)) {
when (needToUpdate.reduce(_||_)) {
when (wrbypass_hit) {
wrbypass_ctrs(wrbypass_hit_idx)(b) := newCtrs(b)
wrbypass_ctr_valids(wrbypass_hit_idx)(b) := true.B
} .otherwise {
wrbypass_ctrs(wrbypass_enq_idx)(b) := newCtrs(b)
(0 until BimBanks).foreach(b => wrbypass_ctr_valids(wrbypass_enq_idx)(b) := false.B) // reset valid bits
wrbypass_ctr_valids(wrbypass_enq_idx)(b) := true.B
wrbypass_rows(wrbypass_enq_idx) := updateRow
wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Up(bypassEntries)-1,0)
when (needToUpdate(b)) {
wrbypass_ctrs(wrbypass_hit_idx)(b) := newCtrs(b)
wrbypass_ctr_valids(wrbypass_hit_idx)(b) := true.B
}
}.otherwise {
wrbypass_ctr_valids(wrbypass_enq_idx)(b) := false.B
when (needToUpdate(b)) {
wrbypass_ctr_valids(wrbypass_enq_idx)(b) := true.B
wrbypass_ctrs(wrbypass_enq_idx)(b) := newCtrs(b)
}
}
}
}
when (needToUpdate.reduce(_||_) && !wrbypass_hit) {
wrbypass_rows(wrbypass_enq_idx) := updateRow
wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Up(bypassEntries)-1,0)
}
bim.io.w.apply(
valid = needToUpdate.asUInt.orR || doing_reset,
......@@ -104,16 +111,25 @@ class BIM extends BasePredictor with BimParams {
waymask = Mux(doing_reset, Fill(BimBanks, "b1".U).asUInt, needToUpdate.asUInt)
)
XSPerf("bim_wrbypass_hit", needToUpdate.reduce(_||_) && wrbypass_hit)
XSPerf("bim_wrbypass_enq", needToUpdate.reduce(_||_) && !wrbypass_hit)
if (BPUDebug && debug) {
val u = io.update.bits
XSDebug(doing_reset, "Reseting...\n")
XSDebug("[update] v=%d pc=%x valids=%b, tgt=%x\n", updateValid, u.ftqPC, u.valids.asUInt, u.target)
XSDebug("[update] brMask=%b, taken=%b isMisPred=%b\n", u.br_mask.asUInt, newTakens.asUInt, u.mispred.asUInt)
for (i <- 0 until BimBanks) {
XSDebug(RegNext(io.pc.valid && io.inMask(i)), p"BimResp[$i]: ctr = ${io.resp.ctrs(i)}\n")
XSDebug(needToUpdate(i),
p"update bim bank $i: pc:${Hexadecimal(u.ftqPC)}, taken:${u.takens(i)}, " +
p"oldCtr:${oldCtrs(i)}, newCtr:${newCtrs(i)}\n")
XSDebug(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(i) && needToUpdate(i),
p"bank $i wrbypass hit wridx $wrbypass_hit_idx: row:$updateRow, " +
p"ctr:${oldCtrs(i)}, newCtr:${newCtrs(i)}\n")
XSDebug(true.B, p"bimCtr(${i.U})=${Binary(u.metas(i).bimCtr)} oldCtr=${Binary(oldCtrs(i))} newCtr=${Binary(newCtrs(i))}\n")
}
XSDebug("needToUpdate=%b updateRow=%x\n", needToUpdate.asUInt, updateRow)
XSDebug("[wrbypass] hit=%d hits=%b\n", wrbypass_hit, wrbypass_hits.asUInt)
}
}
\ No newline at end of file
......@@ -185,7 +185,7 @@ class BTB extends BasePredictor with BTBParams{
val updateBank = u.cfiIndex.bits
val updateRow = btbAddr.getBankIdx(cfi_pc)
val updateIsBr = u.br_mask(u.cfiIndex.bits)
val updateTaken = u.cfiIndex.valid
val updateTaken = u.cfiIndex.valid && u.valids(u.cfiIndex.bits)
// TODO: remove isRVC
val metaWrite = BtbMetaEntry(btbAddr.getTag(cfi_pc), updateIsBr, u.cfiIsRVC)
val dataWrite = BtbDataEntry(new_lower, new_extended)
......@@ -200,6 +200,11 @@ class BTB extends BasePredictor with BTBParams{
data.io.w.apply(updateValid, dataWrite, updateRow, updateWayMask)
edata.io.w.apply(updateValid && new_extended, u.target, updateRow, "b1".U)
val alloc_conflict =
VecInit((0 until BtbBanks).map(i =>
if2_metaRead(allocWays(i))(i).valid && !if2_bankHits(i) && if2_mask(i)))
XSPerf("btb_alloc_conflict", PopCount(alloc_conflict))
if (BPUDebug && debug) {
val debug_verbose = true
val validLatch = RegNext(io.pc.valid)
......
......@@ -129,24 +129,21 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
val if2_ready = WireInit(false.B)
val if2_valid = RegInit(init = false.B)
val if2_allReady = WireInit(if2_ready && icache.io.req.ready)
val if1_fire = (if1_valid && if2_allReady) && (icache.io.tlb.resp.valid || !if2_valid)
val if1_can_go = if1_fire
val if1_fire = if1_valid && if2_allReady
val if1_gh, if2_gh, if3_gh, if4_gh = Wire(new GlobalHistory)
val if2_predicted_gh, if3_predicted_gh, if4_predicted_gh = Wire(new GlobalHistory)
val final_gh = RegInit(0.U.asTypeOf(new GlobalHistory))
val final_gh_bypass = WireInit(0.U.asTypeOf(new GlobalHistory))
val flush_final_gh = WireInit(false.B)
//********************** IF2 ****************************//
val if2_allValid = if2_valid && icache.io.tlb.resp.valid
val if3_ready = WireInit(false.B)
val if2_fire = (if2_valid && if3_ready) && icache.io.tlb.resp.valid
val if2_pc = RegEnable(next = if1_npc, init = resetVector.U, enable = if1_can_go)
val if2_fire = if2_allValid && if3_ready
val if2_pc = RegEnable(next = if1_npc, init = resetVector.U, enable = if1_fire)
val if2_snpc = snpc(if2_pc)
val if2_predHist = RegEnable(if1_gh.predHist, enable=if1_can_go)
if2_ready := if3_ready || !if2_valid
when (if1_can_go) { if2_valid := true.B }
val if2_predHist = RegEnable(if1_gh.predHist, enable=if1_fire)
if2_ready := if3_ready && icache.io.tlb.resp.valid || !if2_valid
when (if1_fire) { if2_valid := true.B }
.elsewhen (if2_flush) { if2_valid := false.B }
.elsewhen (if2_fire) { if2_valid := false.B }
......@@ -387,8 +384,8 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
when (if4_fire) {
final_gh := if4_predicted_gh
}
if4_gh := Mux(flush_final_gh, final_gh_bypass, final_gh)
if3_gh := Mux(if4_valid && !if4_flush, if4_predicted_gh, if4_gh)
if4_gh := final_gh
if3_gh := Mux(if4_valid, if4_predicted_gh, if4_gh)
if2_gh := Mux(if3_valid && !if3_flush, if3_predicted_gh, if3_gh)
if1_gh := Mux(if2_valid && !if2_flush, if2_predicted_gh, if2_gh)
......@@ -455,8 +452,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
val taken = Mux(isMisPred, b.taken, b.predTaken)
val updatedGh = oldGh.update(sawNTBr, isBr && taken)
final_gh := updatedGh
final_gh_bypass := updatedGh
flush_final_gh := true.B
if1_gh := updatedGh
}
npcGen.register(io.redirect.valid, io.redirect.bits.cfiUpdate.target, Some("backend_redirect"))
......@@ -465,7 +461,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
if1_npc := npcGen()
icache.io.req.valid := if1_can_go
icache.io.req.valid := if1_fire
icache.io.resp.ready := if4_ready
icache.io.req.bits.addr := if1_npc
icache.io.req.bits.mask := mask(if1_npc)
......@@ -487,7 +483,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
bpu.io.commit <> io.commitUpdate
bpu.io.redirect <> io.redirect
bpu.io.inFire(0) := if1_can_go
bpu.io.inFire(0) := if1_fire
bpu.io.inFire(1) := if2_fire
bpu.io.inFire(2) := if3_fire
bpu.io.inFire(3) := if4_fire
......@@ -550,6 +546,28 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
}
}
// TODO: perfs
// frontend redirect from each stage
XSPerf("if2_redirect", if2_valid && if2_bp.taken && !if2_flush)
XSPerf("if2_redirect_fired", if2_fire && if2_bp.taken && !if2_flush)
XSPerf("if3_redirect", if3_valid && if3_redirect && !if3_flush)
XSPerf("if3_redirect_fired", if3_fire && if3_redirect && !if3_flush)
XSPerf("if4_redirect", if4_valid && if4_redirect && !if4_flush)
XSPerf("if4_redirect_fired", if4_fire && if4_redirect && !if4_flush)
XSPerf("if1_total_stall", !if2_allReady && if1_valid)
XSPerf("if1_stall_from_icache_req", !icache.io.req.ready && if1_valid)
XSPerf("if1_stall_from_if2", !if2_ready && if1_valid)
XSPerf("itlb_stall", if2_valid && if3_ready && !icache.io.tlb.resp.valid)
XSPerf("icache_resp_stall", if3_valid && if4_ready && !icache.io.resp.valid)
XSPerf("if4_stall", if4_valid && !if4_fire)
XSPerf("if4_stall_ibuffer", if4_valid && !io.fetchPacket.ready && ftqEnqBuf_ready)
XSPerf("if4_stall_ftq", if4_valid && io.fetchPacket.ready && !ftqEnqBuf_ready)
XSPerf("if3_prevHalfConsumed", if3_prevHalfConsumed)
XSPerf("if4_prevHalfConsumed", if4_prevHalfConsumed)
// debug info
if (IFUDebug) {
XSDebug(RegNext(reset.asBool) && !reset.asBool, "Reseting...\n")
......@@ -557,13 +575,12 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
XSDebug(icache.io.flush(1).asBool, "Flush icache stage3...\n")
XSDebug(io.redirect.valid, p"Redirect from backend! target=${Hexadecimal(io.redirect.bits.cfiUpdate.target)}\n")
XSDebug("[IF1] v=%d fire=%d cango=%d flush=%d pc=%x mask=%b\n", if1_valid, if1_fire,if1_can_go, if1_flush, if1_npc, mask(if1_npc))
XSDebug("[IF1] v=%d fire=%d flush=%d pc=%x mask=%b\n", if1_valid, if1_fire, if1_flush, if1_npc, mask(if1_npc))
XSDebug("[IF2] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x snpc=%x\n", if2_valid, if2_ready, if2_fire, if2_redirect, if2_flush, if2_pc, if2_snpc)
XSDebug("[IF3] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if3_valid, if3_ready, if3_fire, if3_redirect, if3_flush, if3_pc, crossPageIPF, if3_bp.hasNotTakenBrs)
XSDebug("[IF4] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if4_valid, if4_ready, if4_fire, if4_redirect, if4_flush, if4_pc, if4_crossPageIPF, if4_bp.hasNotTakenBrs)
XSDebug("[IF1][icacheReq] v=%d r=%d addr=%x\n", icache.io.req.valid, icache.io.req.ready, icache.io.req.bits.addr)
XSDebug("[IF1][ghr] hist=%b\n", if1_gh.asUInt)
XSDebug("[IF1][ghr] extHist=%b\n\n", if1_gh.asUInt)
XSDebug("[IF2][bp] taken=%d jmpIdx=%d hasNTBrs=%d target=%x saveHalfRVI=%d\n\n", if2_bp.taken, if2_bp.jmpIdx, if2_bp.hasNotTakenBrs, if2_bp.target, if2_bp.saveHalfRVI)
if2_gh.debug("if2")
......
......@@ -67,9 +67,12 @@ class Ibuffer extends XSModule with HasCircularQueuePtrHelper {
val allowEnq = RegInit(true.B)
val numEnq = Mux(io.in.fire, PopCount(io.in.bits.mask), 0.U)
val numTryDeq = Mux(validEntries >= DecodeWidth.U, DecodeWidth.U, validEntries)
val numDeq = PopCount(io.out.map(_.fire))
allowEnq := (IBufSize - PredictWidth).U >= validEntries +& numEnq
val numAfterEnq = validEntries +& numEnq
val nextValidEntries = Mux(io.out(0).ready, numAfterEnq - numTryDeq, numAfterEnq)
allowEnq := (IBufSize - PredictWidth).U >= nextValidEntries
// Enque
io.in.ready := allowEnq
......
......@@ -159,6 +159,10 @@ class LTBColumn extends LTBModule {
val wen = WireInit(false.B)
when(wen) {ltb.write(if4_rIdx, wEntry)}
val loop_entry_is_learned = WireInit(false.B)
val loop_learned_entry_conflict = WireInit(false.B)
val loop_conf_entry_evicted = WireInit(false.B)
when(redirectValid && redirect.mispred && !isReplay && !doingReset) {
wen := true.B
when(tagMatch) {
......@@ -170,6 +174,7 @@ class LTBColumn extends LTBModule {
when(cntMatch) {
XSDebug("[redirect] 1\n")
wEntry.conf := if4_rEntry.conf + 1.U
loop_entry_is_learned := true.B
wEntry.specCnt := 0.U
}.otherwise {
XSDebug("[redirect] 2\n")
......@@ -194,10 +199,12 @@ class LTBColumn extends LTBModule {
when(if4_rEntry.isLearned) {
XSDebug("[redirect] 5\n")
// do nothing? or release this entry
loop_learned_entry_conflict := true.B
}.elsewhen(if4_rEntry.isConf) {
when(if4_rEntry.age === 0.U) {
XSDebug("[redirect] 6\n")
wEntry.tag := redirectTag
loop_conf_entry_evicted := true.B
wEntry.conf := 1.U
wEntry.specCnt := 0.U
wEntry.tripCnt := redirect.specCnt
......@@ -266,6 +273,11 @@ class LTBColumn extends LTBModule {
}
if (BPUDebug && debug) {
// Perf counters
XSPerf("loop_entry_is_learned ", loop_entry_is_learned)
XSPerf("loop_learned_entry_conflict ", loop_learned_entry_conflict)
XSPerf("loop_conf_entry_evicted ", loop_conf_entry_evicted)
//debug info
XSDebug(doingReset, "Reseting...\n")
XSDebug(io.repair, "Repair...\n")
......@@ -338,9 +350,8 @@ class LoopPredictor extends BasePredictor with LTBParams {
val updateValid = io.update.valid
val update = io.update.bits
val do_redirect = RegNext(io.redirect)
val redirectValid = do_redirect.valid
val redirect = do_redirect.bits.cfiUpdate
val redirectValid = io.redirect.valid
val redirect = io.redirect.bits.cfiUpdate
val redirectPC = redirect.pc
val redirectBank = ltbAddr.getBank(redirectPC)
......@@ -363,7 +374,7 @@ class LoopPredictor extends BasePredictor with LTBParams {
ltbs(i).io.redirect.bits.specCnt := redirect.specCnt(i)
ltbs(i).io.redirect.bits.mispred := redirect.isMisPred
ltbs(i).io.redirect.bits.taken := redirect.taken
ltbs(i).io.redirect.bits.isReplay := do_redirect.bits.flushItself
ltbs(i).io.redirect.bits.isReplay := io.redirect.bits.flushItself
ltbs(i).io.repair := redirectValid && redirectBank =/= i.U
}
......@@ -394,7 +405,7 @@ class LoopPredictor extends BasePredictor with LTBParams {
XSDebug("[IF4][req] inMask=%b\n", inMask)
XSDebug("[IF4][req] updatePC=%x, updateValid=%d, isBr=%b\n", update.ftqPC, updateValid, update.br_mask.asUInt)
XSDebug("[IF4][req] redirectPC=%x redirectBank=%d, redirectValid=%d, isBr=%d, isReplay=%d\n", redirect.pc, redirectBank, redirectValid, redirect.pd.isBr, do_redirect.bits.flushItself)
XSDebug("[IF4][req] redirectPC=%x redirectBank=%d, redirectValid=%d, isBr=%d, isReplay=%d\n", redirect.pc, redirectBank, redirectValid, redirect.pd.isBr, io.redirect.bits.flushItself)
XSDebug("[IF4][req] isMisPred=%d\n", redirect.isMisPred)
XSDebug(redirectValid, "[redirect SpecCnt] ")
......
......@@ -199,10 +199,10 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
waymask = Mux(doing_clear_u_lo, Fill(TageBanks, "b1".U), io.update.uMask.asUInt)
)
val wrbypass_tags = Reg(Vec(wrBypassEntries, UInt(tagLen.W)))
val wrbypass_idxs = Reg(Vec(wrBypassEntries, UInt(log2Ceil(nRows).W)))
val wrbypass_ctrs = Reg(Vec(wrBypassEntries, Vec(TageBanks, UInt(TageCtrBits.W))))
val wrbypass_ctr_valids = Reg(Vec(wrBypassEntries, Vec(TageBanks, Bool())))
val wrbypass_tags = RegInit(0.U.asTypeOf(Vec(wrBypassEntries, UInt(tagLen.W))))
val wrbypass_idxs = RegInit(0.U.asTypeOf(Vec(wrBypassEntries, UInt(log2Ceil(nRows).W))))
val wrbypass_ctrs = RegInit(0.U.asTypeOf(Vec(wrBypassEntries, Vec(TageBanks, UInt(TageCtrBits.W)))))
val wrbypass_ctr_valids = RegInit(0.U.asTypeOf(Vec(wrBypassEntries, Vec(TageBanks, Bool()))))
val wrbypass_enq_idx = RegInit(0.U(log2Ceil(wrBypassEntries).W))
when (reset.asBool) { wrbypass_ctr_valids.foreach(_.foreach(_ := false.B))}
......@@ -215,7 +215,7 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val wrbypass_hit = wrbypass_hits.reduce(_||_)
// val wrbypass_rhit = wrbypass_rhits.reduce(_||_)
val wrbypass_hit_idx = PriorityEncoder(wrbypass_hits)
val wrbypass_hit_idx = ParallelPriorityEncoder(wrbypass_hits)
// val wrbypass_rhit_idx = PriorityEncoder(wrbypass_rhits)
// val wrbypass_rctr_hits = VecInit((0 until TageBanks).map( b => wrbypass_ctr_valids(wrbypass_rhit_idx)(b)))
......@@ -248,21 +248,33 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
update_hi_wdata(w) := io.update.u(w)(1)
update_lo_wdata(w) := io.update.u(w)(0)
}
when (io.update.mask.reduce(_||_)) {
when (wrbypass_hits.reduce(_||_)) {
wrbypass_ctrs(wrbypass_hit_idx)(updateBank) := update_wdata(updateBank).ctr
wrbypass_ctr_valids(wrbypass_hit_idx)(updateBank) := true.B
} .otherwise {
wrbypass_ctrs(wrbypass_enq_idx)(updateBank) := update_wdata(updateBank).ctr
(0 until TageBanks).foreach(b => wrbypass_ctr_valids(wrbypass_enq_idx)(b) := false.B) // reset valid bits
wrbypass_ctr_valids(wrbypass_enq_idx)(updateBank) := true.B
wrbypass_tags(wrbypass_enq_idx) := update_tag
wrbypass_idxs(wrbypass_enq_idx) := update_idx
wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Ceil(wrBypassEntries)-1,0)
when (io.update.mask.reduce(_||_)) {
when (wrbypass_hit) {
when (io.update.mask(w)) {
wrbypass_ctrs(wrbypass_hit_idx)(w) := update_wdata(w).ctr
wrbypass_ctr_valids(wrbypass_hit_idx)(w) := true.B
}
} .otherwise {
// reset valid bit first
wrbypass_ctr_valids(wrbypass_enq_idx)(w) := false.B
when (io.update.mask(w)) {
wrbypass_ctr_valids(wrbypass_enq_idx)(w) := true.B
wrbypass_ctrs(wrbypass_enq_idx)(w) := update_wdata(w).ctr
}
}
}
}
when (io.update.mask.reduce(_||_) && !wrbypass_hit) {
wrbypass_tags(wrbypass_enq_idx) := update_tag
wrbypass_idxs(wrbypass_enq_idx) := update_idx
wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Ceil(wrBypassEntries)-1,0)
}
XSPerf("tage_table_wrbypass_hit", io.update.mask.reduce(_||_) && wrbypass_hit)
XSPerf("tage_table_wrbypass_enq", io.update.mask.reduce(_||_) && !wrbypass_hit)
XSPerf("tage_table_hits", PopCount(VecInit(io.resp.map(_.valid))))
if (BPUDebug && debug) {
val u = io.update
......@@ -270,28 +282,28 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val ub = PriorityEncoder(u.uMask)
val idx = if2_idx
val tag = if2_tag
XSDebug(io.req.valid, "tableReq: pc=0x%x, hist=%x, idx=%d, tag=%x, mask=%b, mask=%b\n",
io.req.bits.pc, io.req.bits.hist, idx, tag, io.req.bits.mask, if2_mask)
XSDebug(io.req.valid,
p"tableReq: pc=0x${Hexadecimal(io.req.bits.pc)}, " +
p"hist=${Hexadecimal(io.req.bits.hist)}, idx=$idx, " +
p"tag=$tag, mask=${Binary(if2_mask)}\n")
for (i <- 0 until TageBanks) {
XSDebug(RegNext(io.req.valid) && if3_req_rhits(i), "TageTableResp[%d]: idx=%d, hit:%d, ctr:%d, u:%d\n",
i.U, if3_idx, if3_req_rhits(i), io.resp(i).bits.ctr, io.resp(i).bits.u)
XSDebug(RegNext(io.req.valid && io.req.bits.mask(i)) && if3_req_rhits(i),
p"TageTableResp[$i]: idx=$if3_idx, hit:${if3_req_rhits(i)}, " +
p"ctr:${io.resp(i).bits.ctr}, u:${io.resp(i).bits.u}\n")
XSDebug(io.update.mask(i),
p"update Table bank $i: pc:${Hexadecimal(u.pc)}, hist:${Hexadecimal(u.hist)}, " +
p"taken:${u.taken(i)}, alloc:${u.alloc(i)}, oldCtr:${u.oldCtr(i)}\n")
XSDebug(io.update.mask(i),
p"update Table bank $i: writing tag:${update_tag}, " +
p"ctr: ${update_wdata(b).ctr} in idx $update_idx\n")
val hitCtr = wrbypass_ctrs(wrbypass_hit_idx)(i)
XSDebug(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(i) && io.update.mask(i),
p"bank $i wrbypass hit wridx:$wrbypass_hit_idx, idx:$update_idx, tag: $update_tag, " +
p"ctr:$hitCtr, newCtr:${update_wdata(i).ctr}")
}
XSDebug(RegNext(io.req.valid), "TageTableResp: hits:%b, maskLatch is %b\n", if3_req_rhits.asUInt, if3_mask)
XSDebug(RegNext(io.req.valid) && !if3_req_rhits.reduce(_||_), "TageTableResp: no hits!\n")
XSDebug(io.update.mask.reduce(_||_), "update Table: pc:%x, hist:%x, bank:%d, taken:%d, alloc:%d, oldCtr:%d\n",
u.pc, u.hist, b, u.taken(b), u.alloc(b), u.oldCtr(b))
XSDebug(io.update.mask.reduce(_||_), "update Table: writing tag:%b, ctr%d in idx:%d\n",
update_wdata(b).tag, update_wdata(b).ctr, update_idx)
XSDebug(io.update.mask.reduce(_||_), "update u: pc:%x, hist:%x, bank:%d, writing in u:%b\n",
u.pc, u.hist, ub, io.update.u(ub))
val updateBank = PriorityEncoder(io.update.mask)
XSDebug(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(updateBank),
"wrbypass hits, wridx:%d, tag:%x, idx:%d, hitctr:%d, bank:%d\n",
wrbypass_hit_idx, update_tag, update_idx, wrbypass_ctrs(wrbypass_hit_idx)(updateBank), updateBank)
// when (wrbypass_rhit && wrbypass_ctr_valids(wrbypass_rhit_idx).reduce(_||_)) {
// for (b <- 0 until TageBanks) {
// XSDebug(wrbypass_ctr_valids(wrbypass_rhit_idx)(b),
......@@ -367,10 +379,12 @@ class Tage extends BaseTage {
val debug_hist_s3 = RegEnable(debug_hist_s2, enable=s3_fire)
val u = io.update.bits
val updateValids = u.valids.map(v => v && io.update.valid)
val updateValids =
VecInit(u.valids zip u.br_mask map {
case (v, b) => v && b && io.update.valid
})
val updateHist = u.predHist.asUInt
val updateBrMask = u.br_mask
val updateMetas = VecInit(u.metas.map(_.tageMeta))
val updateMisPred = u.mispred
......@@ -400,7 +414,7 @@ class Tage extends BaseTage {
val if4_providerCtrs = RegEnable(if3_providerCtrs, s3_fire)
val updateTageMisPreds = VecInit((0 until PredictWidth).map(i => updateMetas(i).taken =/= u.takens(i) && updateBrMask(i)))
val updateTageMisPreds = VecInit((0 until PredictWidth).map(i => updateMetas(i).taken =/= u.takens(i)))
// val updateBank = u.pc(log2Ceil(TageBanks)+instOffsetBits-1, instOffsetBits)
......@@ -454,10 +468,9 @@ class Tage extends BaseTage {
val updateValid = updateValids(w)
val updateMeta = updateMetas(w)
val updateIsBr = updateBrMask(w)
val isUpdateTaken = updateValid && u.takens(w) && updateIsBr
val isUpdateTaken = updateValid && u.takens(w)
val updateMisPred = updateTageMisPreds(w)
when (updateValid && updateIsBr) {
when (updateValid) {
when (updateMeta.provider.valid) {
val provider = updateMeta.provider.bits
......@@ -482,6 +495,7 @@ class Tage extends BaseTage {
updateUMask(allocate.bits)(w) := true.B
updateU(allocate.bits)(w) := 0.U
}.otherwise {
val provider = updateMeta.provider
val decrMask = Mux(provider.valid, ~LowerMask(UIntToOH(provider.bits), TageNTables), 0.U(TageNTables.W))
for (i <- 0 until TageNTables) {
......@@ -510,6 +524,41 @@ class Tage extends BaseTage {
}
def pred_perf(name: String, cnt: UInt) = XSPerf(s"${name}_at_pred", cnt)
def commit_perf(name: String, cnt: UInt) = XSPerf(s"${name}_at_commit", cnt)
def tage_perf(name: String, pred_cnt: UInt, commit_cnt: UInt) = {
pred_perf(name, pred_cnt)
commit_perf(name, commit_cnt)
}
for (i <- 0 until TageNTables) {
val pred_i_provided =
VecInit(io.meta map (m => m.provider.valid && m.provider.bits === i.U))
val commit_i_provided =
VecInit(updateMetas zip updateValids map {
case (m, v) => m.provider.valid && m.provider.bits === i.U && v
})
tage_perf(s"tage_table_${i}_provided",
PopCount(pred_i_provided),
PopCount(commit_i_provided))
}
tage_perf("tage_use_bim",
PopCount(VecInit(io.meta map (!_.provider.valid))),
PopCount(VecInit(updateMetas zip updateValids map {
case (m, v) => !m.provider.valid && v}))
)
def unconf(providerCtr: UInt) = providerCtr === 3.U || providerCtr === 4.U
tage_perf("tage_use_altpred",
PopCount(VecInit(io.meta map (
m => m.provider.valid && unconf(m.providerCtr)))),
PopCount(VecInit(updateMetas zip updateValids map {
case (m, v) => m.provider.valid && unconf(m.providerCtr) && v
})))
tage_perf("tage_provided",
PopCount(io.meta.map(_.provider.valid)),
PopCount(VecInit(updateMetas zip updateValids map {
case (m, v) => m.provider.valid && v
})))
if (BPUDebug && debug) {
for (b <- 0 until TageBanks) {
val m = updateMetas(b)
......
......@@ -180,7 +180,7 @@ class MicroBTB extends BasePredictor
val read_resps = VecInit(banks.map(b => b.read_resp))
for (b <- 0 until PredictWidth) {
banks(b).read_pc.valid := io.pc.valid && io.inMask(b)
banks(b).read_pc.valid := io.inMask(b)
banks(b).read_pc.bits := io.pc.bits
//only when hit and instruction valid and entry valid can output data
......
......@@ -40,6 +40,7 @@ class LsPipelineBundle extends XSBundle {
val miss = Bool()
val tlbMiss = Bool()
val ptwBack = Bool()
val mmio = Bool()
val rsIdx = UInt(log2Up(IssQueSize).W)
......
......@@ -110,7 +110,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
}
// no inst will be commited 1 cycle before tval update
vaddrModule.io.raddr(0) := (cmtPtrExt(0) + commitCount).value
vaddrModule.io.raddr(0) := (cmtPtrExt(0) + commitCount).value
/**
* Enqueue at dispatch
......@@ -150,21 +150,12 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val IssuePtrMoveStride = 4
require(IssuePtrMoveStride >= 2)
val issueLookup = Wire(Vec(IssuePtrMoveStride, Bool()))
for (i <- 0 until IssuePtrMoveStride) {
val lookUpPtr = issuePtrExt.value + i.U
if(i == 0){
issueLookup(i) := allocated(lookUpPtr) && issued(lookUpPtr)
}else{
issueLookup(i) := allocated(lookUpPtr) && issued(lookUpPtr) && issueLookup(i-1)
}
val issueLookupVec = (0 until IssuePtrMoveStride).map(issuePtrExt + _.U)
val issueLookup = issueLookupVec.map(ptr => allocated(ptr.value) && issued(ptr.value) && ptr =/= enqPtrExt(0))
val nextIssuePtr = issuePtrExt + PriorityEncoder(VecInit(issueLookup.map(!_) :+ true.B))
issuePtrExt := nextIssuePtr
when(issueLookup(i)){
issuePtrExt := issuePtrExt + (i+1).U
}
}
when(io.brqRedirect.valid || io.flush){
when (io.brqRedirect.valid || io.flush) {
issuePtrExt := Mux(
isAfter(cmtPtrExt(0), deqPtrExt(0)),
cmtPtrExt(0),
......
......@@ -88,6 +88,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
io.tlbFeedback.valid := RegNext(RegNext(io.in.valid))
io.tlbFeedback.bits.hit := true.B
io.tlbFeedback.bits.rsIdx := RegEnable(io.rsIdx, io.in.valid)
io.tlbFeedback.bits.flushState := DontCare
// tlb translation, manipulating signals && deal with exception
when (state === s_tlb) {
......
......@@ -33,7 +33,7 @@ class LoadUnit_S0 extends XSModule {
// val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0))
val imm12 = WireInit(s0_uop.ctrl.imm(11,0))
val s0_vaddr_lo = io.in.bits.src1(11,0) + Cat(0.U(1.W), imm12)
val s0_vaddr_hi = Mux(s0_vaddr_lo(12),
val s0_vaddr_hi = Mux(s0_vaddr_lo(12),
Mux(imm12(11), io.in.bits.src1(VAddrBits-1, 12), io.in.bits.src1(VAddrBits-1, 12)+1.U),
Mux(imm12(11), io.in.bits.src1(VAddrBits-1, 12)+SignExt(1.U, VAddrBits-12), io.in.bits.src1(VAddrBits-1, 12)),
)
......@@ -131,6 +131,7 @@ class LoadUnit_S1 extends XSModule {
io.out.bits.tlbMiss := s1_tlb_miss
io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
io.out.bits.uop.cf.exceptionVec(loadAccessFault) := io.dtlbResp.bits.excp.af.ld
io.out.bits.ptwBack := io.dtlbResp.bits.ptwBack
io.out.bits.rsIdx := io.in.bits.rsIdx
io.in.ready := !io.in.valid || io.out.ready
......@@ -169,6 +170,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s2_tlb_miss && (!s2_cache_replay || s2_mmio || s2_exception)
io.tlbFeedback.bits.rsIdx := io.in.bits.rsIdx
io.tlbFeedback.bits.flushState := io.in.bits.ptwBack
io.needReplayFromRS := s2_cache_replay
// merge forward result
......@@ -225,7 +227,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
// Such inst will be writebacked from load queue.
io.dataForwarded := s2_cache_miss && fullForward && !s2_exception
// io.out.bits.forwardX will be send to lq
io.out.bits.forwardMask := forwardMask
io.out.bits.forwardMask := forwardMask
// data retbrived from dcache is also included in io.out.bits.forwardData
io.out.bits.forwardData := rdataVec
......
......@@ -78,6 +78,7 @@ class StoreUnit_S1 extends XSModule {
// Send TLB feedback to store issue queue
io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s1_tlb_miss
io.tlbFeedback.bits.flushState := io.dtlbResp.bits.ptwBack
io.tlbFeedback.bits.rsIdx := io.in.bits.rsIdx
XSDebug(io.tlbFeedback.valid,
"S1 Store: tlbHit: %d roqIdx: %d\n",
......
......@@ -14,15 +14,13 @@ class SbufferFlushBundle extends Bundle {
trait HasSbufferConst extends HasXSParameter {
// use 1h to speedup selection
def s_invalid = (1<<0).U(4.W)
def s_valid = (1<<1).U(4.W)
def s_prepare = (1<<2).U(4.W)
def s_inflight = (1<<3).U(4.W)
def s_invalid = (1<<0).U(3.W)
def s_valid = (1<<1).U(3.W)
def s_inflight = (1<<2).U(3.W)
def isInvalid(i: UInt): Bool = i(0).asBool
def isValid(i: UInt): Bool = i(1).asBool
def isPrepare(i: UInt): Bool = i(2).asBool
def isInflight(i: UInt): Bool = i(3).asBool
def isInflight(i: UInt): Bool = i(2).asBool
val evictCycle = 1 << 20
require(isPow2(evictCycle))
......@@ -35,84 +33,40 @@ trait HasSbufferConst extends HasXSParameter {
val OffsetWidth: Int = log2Up(CacheLineBytes)
val WordsWidth: Int = log2Up(CacheLineWords)
val TagWidth: Int = PAddrBits - OffsetWidth
val WordOffsetWidth: Int = PAddrBits - WordsWidth
}
class SbufferBundle extends XSBundle with HasSbufferConst
class SbufferLine extends SbufferBundle {
val tag = UInt(TagWidth.W)
val data = UInt(CacheLineSize.W)
val mask = UInt(CacheLineBytes.W)
override def toPrintable: Printable = {
p"tag:${Hexadecimal(tag)} data:${Hexadecimal(data)} mask:${Binary(mask)}\n"
}
}
class ChooseReplace(nWay: Int) extends XSModule {
val io = IO(new Bundle{
val mask = Vec(nWay, Input(Bool()))
val way = Output(UInt(nWay.W))
val flush = Input(Bool())
})
val wayReg = RegInit(0.U(log2Up(nWay).W))
val wayMask = ~((UIntToOH(wayReg)<<1.U)(nWay-1,0) - 1.U)
val stateMask = Cat(io.mask.reverse)
val loMask = (wayMask & stateMask)(nWay-1,0)
val nextWay = PriorityEncoder(Cat(stateMask, loMask))(log2Up(nWay)-1, 0)
XSDebug(p"nextWay[${nextWay}]\n")
wayReg := nextWay
io.way := wayReg
when(io.flush){
wayReg := 0.U
}
class DataWriteReq extends SbufferBundle {
val idx = UInt(SbufferIndexWidth.W)
val mask = UInt((DataBits/8).W)
val data = UInt(DataBits.W)
val wordOffset = UInt(WordOffsetWidth.W)
}
class SbufferLru(nWay: Int) extends XSModule {
val io = IO(new Bundle{
val in = Vec(StorePipelineWidth, Input(UInt(nWay.W)))
val mask = Vec(StoreBufferSize, Input(Bool()))
val way = Output(UInt(nWay.W))
val flush = Input(Bool())
class SbufferData extends XSModule with HasSbufferConst {
val io = IO(new Bundle(){
val writeReq = Vec(StorePipelineWidth, Flipped(ValidIO(new DataWriteReq)))
val dataOut = Output(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
})
val lruRect = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(nWay.W))))
val count = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(log2Up(nWay+1).W))))
val idx = RegInit(VecInit(Seq.tabulate(StoreBufferSize)(i => i.U)))
//update
val updataMask = ParallelOR(io.in)
val updateValue = (~updataMask).asUInt()
for(i <- 0 until nWay){
val lruUpdate = Mux(updataMask(i), updateValue, lruRect(i) & updateValue)
lruRect(i) := lruUpdate
count(i) := PopCount(lruUpdate)
}
// get evictionIdx
val maskCount = Wire(Vec(StoreBufferSize, UInt((log2Up(1 + nWay) + log2Up(nWay)).W))) // (popcount, Idx)
val countZipIdx = maskCount.zip((0 until nWay).map(_.U))
for(i <- 0 until nWay){
val value = Mux(io.mask(i), count(i), nWay.U)
maskCount(i) := Cat(value, idx(i))
}
val data = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
io.way := ParallelMin(maskCount)(log2Up(nWay)-1,0)
val req = io.writeReq
// flush
when(io.flush){
for(i <- 0 until nWay){
lruRect(i) := 0.U
count(i) := nWay.U
for(i <- 0 until StorePipelineWidth) {
when(req(i).valid){
for(j <- 0 until DataBytes){
when(req(i).bits.mask(j)){
data(req(i).bits.idx)(req(i).bits.wordOffset)(j) := req(i).bits.data(j*8+7, j*8)
}
}
}
XSDebug("drain sbuffer finish, flush lru\n")
}
}
io.dataOut := data
}
class NewSbuffer extends XSModule with HasSbufferConst {
val io = IO(new Bundle() {
......@@ -121,6 +75,7 @@ class NewSbuffer extends XSModule with HasSbufferConst {
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val sqempty = Input(Bool())
val flush = Flipped(new SbufferFlushBundle)
val csrCtrl = Flipped(new CustomCSRCtrlIO)
})
val difftestIO = IO(new Bundle() {
val sbufferResp = Output(Bool())
......@@ -130,10 +85,13 @@ class NewSbuffer extends XSModule with HasSbufferConst {
})
difftestIO <> DontCare
val buffer = Mem(StoreBufferSize, new SbufferLine)
val dataModule = Module(new SbufferData)
dataModule.io.writeReq <> DontCare
val writeReq = dataModule.io.writeReq
val tag = Reg(Vec(StoreBufferSize, UInt(TagWidth.W)))
val mask = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, Bool()))))
val data = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) // TODO: will be replaced by SyncDataModuleTemplate
val data = dataModule.io.dataOut
val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(s_invalid)))
val cohCount = Reg(Vec(StoreBufferSize, UInt(countBits.W)))
/*
......@@ -166,14 +124,24 @@ class NewSbuffer extends XSModule with HasSbufferConst {
def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f
// sbuffer entry count
val invalidCount = RegInit(StoreBufferSize.U((log2Up(StoreBufferSize) + 1).W))
val validCount = RegInit(0.U((log2Up(StoreBufferSize) + 1).W))
val full = invalidCount === 0.U // full = TODO: validCount(log2Up(StoreBufferSize))
val lru = Module(new ChooseReplace(StoreBufferSize))
val evictionIdx = lru.io.way
val plru = new PseudoLRU(StoreBufferSize)
val accessIdx = Wire(Vec(StorePipelineWidth + 1, Valid(UInt(SbufferIndexWidth.W))))
val replaceIdx = plru.way
plru.access(accessIdx)
//-------------------------cohCount-----------------------------
// insert and merge: cohCount=0
// every cycle cohCount+=1
// if cohCount(countBits-1)==1, evict
val timeOutMask = VecInit(widthMap(i => cohCount(i)(countBits - 1)))
val (timeOutIdx, hasTimeOut) = PriorityEncoderWithFlag(timeOutMask)
val validMask = VecInit(stateVec.map(s => isValid(s)))
val drainIdx = PriorityEncoder(validMask)
lru.io.mask := stateVec.map(isValid(_))
val inflightMask = VecInit(stateVec.map(s => isInflight(s)))
val intags = io.in.map(in => getTag(in.bits.addr))
val sameTag = intags(0) === intags(1)
......@@ -181,7 +149,6 @@ class NewSbuffer extends XSModule with HasSbufferConst {
val secondWord = getWord(io.in(1).bits.addr)
val sameWord = firstWord === secondWord
// merge condition
val mergeMask = Wire(Vec(StorePipelineWidth, Vec(StoreBufferSize, Bool())))
val mergeIdx = mergeMask.map(PriorityEncoder(_))
......@@ -189,16 +156,17 @@ class NewSbuffer extends XSModule with HasSbufferConst {
for(i <- 0 until StorePipelineWidth){
mergeMask(i) := widthMap(j =>
intags(i) === tag(j) && isValid(stateVec(j))
intags(i) === tag(j) && validMask(j)
)
}
// insert confition
// insert condition
// firstInsert: the first invalid entry
// if first entry canMerge or second entry has the same tag with the first entry , secondInsert equal the first invalid entry, otherwise, the second invalid entry
val invalidMask = stateVec.map(s => isInvalid(s))
val evenInvalidMask = GetEvenBits(VecInit(invalidMask).asUInt)
val oddInvalidMask = GetOddBits(VecInit(invalidMask).asUInt)
// if first entry canMerge or second entry has the same tag with the first entry,
// secondInsert equal the first invalid entry, otherwise, the second invalid entry
val invalidMask = VecInit(stateVec.map(s => isInvalid(s)))
val evenInvalidMask = GetEvenBits(invalidMask.asUInt)
val oddInvalidMask = GetOddBits(invalidMask.asUInt)
val (evenRawInsertIdx, evenCanInsert) = PriorityEncoderWithFlag(evenInvalidMask)
val (oddRawInsertIdx, oddCanInsert) = PriorityEncoderWithFlag(oddInvalidMask)
......@@ -228,7 +196,6 @@ class NewSbuffer extends XSModule with HasSbufferConst {
stateVec(insertIdx) := s_valid
cohCount(insertIdx) := 0.U
tag(insertIdx) := reqtag
when(flushMask){
for(j <- 0 until CacheLineWords){
for(i <- 0 until DataBytes){
......@@ -236,11 +203,10 @@ class NewSbuffer extends XSModule with HasSbufferConst {
}
}
}
for(i <- 0 until DataBytes){
when(req.mask(i)){
mask(insertIdx)(wordOffset)(i) := true.B
data(insertIdx)(wordOffset)(i) := req.data(i*8+7, i*8)
// data(insertIdx)(wordOffset)(i) := req.data(i*8+7, i*8)
}
}
}
......@@ -250,32 +216,33 @@ class NewSbuffer extends XSModule with HasSbufferConst {
for(i <- 0 until DataBytes){
when(req.mask(i)){
mask(mergeIdx)(wordOffset)(i) := true.B
data(mergeIdx)(wordOffset)(i) := req.data(i*8+7, i*8)
// data(mergeIdx)(wordOffset)(i) := req.data(i*8+7, i*8)
}
}
}
// first store
when(io.in(0).fire()){
when(canMerge(0)){
mergeWordReq(io.in(0).bits, mergeIdx(0), firstWord)
XSDebug(p"merge req 0 to line [${mergeIdx(0)}]\n")
}.otherwise{
wordReqToBufLine(io.in(0).bits, intags(0), firstInsertIdx, firstWord, true.B)
XSDebug(p"insert req 0 to line[$firstInsertIdx]\n")
for(((in, wordOffset), i) <- io.in.zip(Seq(firstWord, secondWord)).zipWithIndex){
writeReq(i).valid := in.fire()
writeReq(i).bits.wordOffset := wordOffset
writeReq(i).bits.mask := in.bits.mask
writeReq(i).bits.data := in.bits.data
val insertIdx = if(i == 0) firstInsertIdx else secondInsertIdx
val flushMask = if(i == 0) true.B else !sameTag
accessIdx(i).valid := RegNext(in.fire())
accessIdx(i).bits := RegNext(Mux(canMerge(i), mergeIdx(i), insertIdx))
when(in.fire()){
when(canMerge(i)){
writeReq(i).bits.idx := mergeIdx(i)
mergeWordReq(in.bits, mergeIdx(i), wordOffset)
XSDebug(p"merge req $i to line [${mergeIdx(i)}]\n")
}.otherwise({
writeReq(i).bits.idx := insertIdx
wordReqToBufLine(in.bits, intags(i), insertIdx, wordOffset, flushMask)
XSDebug(p"insert req $i to line[$insertIdx]\n")
})
}
}
// second store
when(io.in(1).fire()){
when(canMerge(1)){
mergeWordReq(io.in(1).bits, mergeIdx(1), secondWord)
XSDebug(p"merge req 1 to line [${mergeIdx(1)}]\n")
}.otherwise{
wordReqToBufLine(io.in(1).bits, intags(1), secondInsertIdx, secondWord, !sameTag)
XSDebug(p"insert req 1 to line[$secondInsertIdx]\n")
}
}
for(i <- 0 until StoreBufferSize){
XSDebug(stateVec(i)=/=s_invalid,
......@@ -295,16 +262,17 @@ class NewSbuffer extends XSModule with HasSbufferConst {
)
}
// ---------------------- Send Dcache Req ---------------------
val do_eviction = Wire(Bool())
val empty = Cat(stateVec.map(s => isInvalid(s))).andR() && !Cat(io.in.map(_.valid)).orR()
val empty = Cat(invalidMask).andR() && !Cat(io.in.map(_.valid)).orR()
val threshold = RegNext(io.csrCtrl.sbuffer_threshold +& 1.U)
val validCount = PopCount(validMask)
val do_eviction = RegNext(validCount >= threshold, init = false.B)
do_eviction := validCount >= 12.U
XSDebug(p"validCount[$validCount]\n")
io.flush.empty := RegNext(empty && io.sqempty)
lru.io.flush := sbuffer_state === x_drain_sbuffer && empty
// lru.io.flush := sbuffer_state === x_drain_sbuffer && empty
switch(sbuffer_state){
is(x_idle){
when(io.flush.valid){
......@@ -329,59 +297,56 @@ class NewSbuffer extends XSModule with HasSbufferConst {
XSDebug(p"sbuffer state:${sbuffer_state} do eviction:${do_eviction} empty:${empty}\n")
def noSameBlockInflight(idx: UInt): Bool = {
val atag = tag(idx)
!Cat(widthMap(i => {
// stateVec(idx) itself must not be s_inflight*
(isInflight(stateVec(i)) || isPrepare(stateVec(i))) &&
atag === tag(i)
})).orR()
// stateVec(idx) itself must not be s_inflight
!Cat(widthMap(i => inflightMask(i) && tag(idx) === tag(i))).orR()
}
val need_drain = sbuffer_state === x_drain_sbuffer
val need_replace = do_eviction || (sbuffer_state === x_replace)
val evictionIdx = Mux(need_drain,
drainIdx,
Mux(hasTimeOut, timeOutIdx, replaceIdx)
)
/*
If there is a inflight dcache req which has same tag with evictionIdx's tag,
current eviction should be blocked.
*/
// val evictionEntry = Wire(DecoupledIO(UInt(SbufferIndexWidth.W)))
//
// evictionEntry.valid :=
// do_eviction && sbuffer_state === x_replace || sbuffer_state === x_drain_sbuffer &&
// stateVec(evictionIdx)===s_valid &&
// noSameBlockInflight(evictionIdx)
//
// evictionEntry.bits := evictionIdx
val prepareValid = ((do_eviction && sbuffer_state === x_replace) || (sbuffer_state === x_drain_sbuffer)) &&
isValid(stateVec(evictionIdx)) &&
noSameBlockInflight(evictionIdx)
when(prepareValid){
stateVec(evictionIdx) := s_prepare
}
val prepareMask = stateVec.map(s => isPrepare(s))
val (prepareIdx, prepareEn) = PriorityEncoderWithFlag(prepareMask)
val dcacheReqValid = RegInit(false.B)
val dcacheCandidate = Reg(new DCacheLineReq)
val prepareValid = (need_drain || hasTimeOut || need_replace) &&
noSameBlockInflight(evictionIdx) && validMask(evictionIdx)
val prepareValidReg = RegInit(false.B)
val canSendDcacheReq = io.dcache.req.ready || !prepareValidReg
val willSendDcacheReq = prepareValid && canSendDcacheReq
when(io.dcache.req.fire()){
dcacheReqValid := false.B
prepareValidReg := false.B
}
when(prepareEn && (!dcacheReqValid || io.dcache.req.fire())) {
dcacheCandidate.addr := getAddr(tag(prepareIdx))
dcacheCandidate.data := data(prepareIdx).asUInt
dcacheCandidate.mask := mask(prepareIdx).asUInt
dcacheCandidate.cmd := MemoryOpConstants.M_XWR
dcacheCandidate.id := prepareIdx
stateVec(prepareIdx) := s_inflight
dcacheReqValid := true.B
when(canSendDcacheReq){
prepareValidReg := prepareValid
}
io.dcache.req.valid := dcacheReqValid
io.dcache.req.bits := dcacheCandidate
// evictionEntry.ready := io.dcache.req.ready
when(willSendDcacheReq){
stateVec(evictionIdx) := s_inflight
XSDebug(p"$evictionIdx will be sent to Dcache\n")
}
XSDebug(p"need drain:$need_drain hasTimeOut: $hasTimeOut need replace:$need_replace\n")
XSDebug(p"drainIdx:$drainIdx tIdx:$timeOutIdx replIdx:$replaceIdx " +
p"blocked:${!noSameBlockInflight(evictionIdx)} v:${validMask(evictionIdx)}\n")
XSDebug(p"prepareValid:$prepareValid evictIdx:$evictionIdx dcache ready:${io.dcache.req.ready}\n")
// Note: if other dcache req in the same block are inflight,
// the lru update may note accurate
accessIdx(StorePipelineWidth).valid := invalidMask(replaceIdx) || (
need_replace && !need_drain && !hasTimeOut && canSendDcacheReq && validMask(replaceIdx))
accessIdx(StorePipelineWidth).bits := replaceIdx
val evictionIdxReg = RegEnable(evictionIdx, enable = willSendDcacheReq)
val evictionTag = RegEnable(tag(evictionIdx), enable = willSendDcacheReq)
io.dcache.req.valid := prepareValidReg
io.dcache.req.bits.addr := getAddr(evictionTag)
io.dcache.req.bits.data := data(evictionIdxReg).asUInt
io.dcache.req.bits.mask := mask(evictionIdxReg).asUInt
io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR
io.dcache.req.bits.id := evictionIdxReg
XSDebug(io.dcache.req.fire(),
p"send buf [$prepareIdx] to Dcache, req fire\n"
p"send buf [$evictionIdxReg] to Dcache, req fire\n"
)
io.dcache.resp.ready := true.B // sbuffer always ready to recv dcache resp
......@@ -399,23 +364,8 @@ class NewSbuffer extends XSModule with HasSbufferConst {
difftestIO.sbufferMask := WireInit(mask(respId).asUInt)
}
val needSpace = (io.in(0).fire && !canMerge(0)) +& (io.in(1).fire && !canMerge(1) && !sameTag)
invalidCount := invalidCount - needSpace + io.dcache.resp.fire()
validCount := validCount + needSpace - prepareValid
XSDebug(p"needSpace[$needSpace] invalidCount[$invalidCount] validCount[$validCount]\n")
//-------------------------cohCount-----------------------------
// insert and merge: cohCount=0
// every cycle cohCount+=1
// if cohCount(countBits-1)==1,evict
for(i <- 0 until StoreBufferSize){
when(isValid(stateVec(i))){
when(cohCount(i)(countBits-1)){
assert(stateVec(i) === s_valid)
stateVec(i) := s_prepare
}
when(validMask(i) && !timeOutMask(i)){
cohCount(i) := cohCount(i)+1.U
}
}
......@@ -423,11 +373,9 @@ class NewSbuffer extends XSModule with HasSbufferConst {
// ---------------------- Load Data Forward ---------------------
for ((forward, i) <- io.forward.zipWithIndex) {
val tag_matches = widthMap(i => tag(i) === getTag(forward.paddr))
val valid_tag_matches = widthMap(i => tag_matches(i) && isValid(stateVec(i)))
val inflight_tag_matches = widthMap(i =>
tag_matches(i) && (isInflight(stateVec(i)) || isPrepare(stateVec(i)))
)
val tag_matches = widthMap(w => tag(w) === getTag(forward.paddr))
val valid_tag_matches = widthMap(w => tag_matches(w) && validMask(w))
val inflight_tag_matches = widthMap(w => tag_matches(w) && inflightMask(w))
val line_offset_mask = UIntToOH(getWordOffset(forward.paddr))
val valid_tag_match_reg = valid_tag_matches.map(RegNext(_))
......@@ -456,9 +404,3 @@ class NewSbuffer extends XSModule with HasSbufferConst {
}
}
}
object NewSbuffer extends App {
override def main(args: Array[String]): Unit = {
chisel3.Driver.execute(args, ()=> new NewSbuffer)
}
}
......@@ -98,7 +98,9 @@ public:
uint64_t execute(uint64_t max_cycle, uint64_t max_instr);
uint64_t get_cycles() const { return cycles; }
EmuArgs get_args() const { return args; }
bool is_good_trap() { return trapCode == STATE_GOODTRAP; };
bool is_good_trap() {
return trapCode == STATE_GOODTRAP || trapCode == STATE_LIMIT_EXCEEDED;
};
int get_trapcode() { return trapCode; }
};
......
......@@ -30,7 +30,6 @@ int main(int argc, const char** argv) {
auto args = emu->get_args();
uint64_t cycles = emu->execute(args.max_cycles, args.max_instr);
bool is_good_trap = emu->is_good_trap();
int trapcode = emu->get_trapcode();
delete emu;
extern uint32_t uptime(void);
......@@ -40,6 +39,5 @@ int main(int argc, const char** argv) {
" (this will be different from cycleCnt if emu loads a snapshot)\n" ANSI_COLOR_RESET, args.seed, cycles);
eprintf(ANSI_COLOR_BLUE "Host time spent: %'dms\n" ANSI_COLOR_RESET, ms);
// return !is_good_trap;
return trapcode;
return !is_good_trap;
}
......@@ -350,13 +350,15 @@ void dramsim3_helper_rising(const axi_channel &axi) {
void *data_start = meta->data + meta->offset * meta->size / sizeof(uint64_t);
axi_get_wdata(axi, data_start, meta->size);
meta->offset++;
// printf("accept a new write data\n");
}
if (wait_req_w) {
dramsim3_meta *meta = static_cast<dramsim3_meta *>(wait_req_w->meta);
// if this is the last beat
if (meta->offset == meta->len) {
assert(dram->will_accept(wait_req_w->address, true));
if (meta->offset == meta->len && dram->will_accept(wait_req_w->address, true)) {
dram->add_request(wait_req_w);
wait_req_w = NULL;
}
// printf("accept a new write data\n");
}
}
......@@ -397,7 +399,11 @@ void dramsim3_helper_falling(axi_channel &axi) {
// WDATA: check whether the write data can be accepted
if (wait_req_w != NULL && dram->will_accept(wait_req_w->address, true)) {
axi_accept_wdata(axi);
dramsim3_meta *meta = static_cast<dramsim3_meta *>(wait_req_w->meta);
// we have to check whether the last finished write request has been accepted by dram
if (meta->offset != meta->len) {
axi_accept_wdata(axi);
}
}
// WRESP: if finished, we try the next write response
......
......@@ -8,25 +8,16 @@ import chisel3.util._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.must.Matchers
import xiangshan._
import xiangshan.cache.{DCacheLineIO, DCacheWordReq}
import xiangshan.mem.{LoadForwardQueryIO, NewSbuffer}
import xiangshan.testutils._
import scala.util.Random
class SbufferWapper extends XSModule {
val io = IO(new Bundle() {
val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReq)))
val dcache = new DCacheLineIO
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val flush = new Bundle {
val valid = Input(Bool())
val empty = Output(Bool())
} // sbuffer flush
})
val sbuffer = Module(new NewSbuffer)
val io = IO(sbuffer.io.cloneType)
io <> sbuffer.io
AddSinks()
// fake dcache
sbuffer.io.dcache.req.ready := true.B
sbuffer.io.dcache.resp.valid := RegNext(RegNext(RegNext(RegNext(sbuffer.io.dcache.req.valid))))
......@@ -40,124 +31,65 @@ class SbufferTest extends AnyFlatSpec
with ParallelTestExecution
with HasPartialDecoupledDriver {
top.Parameters.set(top.Parameters.debugParameters)
def make_store_req(addr: UInt, data: UInt, mask: UInt, portIdx: Int)
(implicit c: SbufferWapper) = {
val port = c.io.in(portIdx)
port.enqueuePartial(chiselTypeOf(port.bits).Lit(
_.addr -> addr,
_.data -> data,
_.mask -> mask,
))
}
// it should "random req" in {
// test(new SbufferWapper{AddSinks()}){ c =>
//
// def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) ={
// (0 until StorePipelineWidth).map { i =>
// c.io.in(i).valid.poke(true.B)
// c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit(
// _.mask -> mask(i),
// _.addr -> addr(i),
// _.data -> data(i)
// ))
// }
// c.clock.step(1)
// for (in <- c.io.in){ in.valid.poke(false.B)}
// }
//
// def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = {
// (0 until LoadPipelineWidth).map{ i =>
// c.io.forward(i).paddr.poke(addr(i))
// c.io.forward(i).mask.poke(mask(i))
// if(c.io.in(i).ready.peek() == true.B) {
// (0 until 8).map { j =>
// c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8))
// }
// }
// }
// }
//
// val TEST_SIZE = 100
// for(i <- 0 until TEST_SIZE) {
// val addr = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7ffffffff8L).U)// align to block size
// val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U)
// val mask = Seq.fill(StorePipelineWidth)(0xff.U)
// store_enq(addr, data, mask)
// forward_req_and_resp(addr, data, mask)
// }
// }
// }
//
// it should "sequence req" in {
// test(new SbufferWapper{AddSinks()}){ c =>
//
// def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) = {
// (0 until StorePipelineWidth).map { i =>
// c.io.in(i).valid.poke(true.B)
// c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit(
// _.mask -> mask(i),
// _.addr -> addr(i),
// _.data -> data(i)
// ))
// }
// c.clock.step(1)
// for (in <- c.io.in){ in.valid.poke(false.B)}
// }
//
// def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = {
// (0 until LoadPipelineWidth).map{ i =>
// c.io.forward(i).paddr.poke(addr(i))
// c.io.forward(i).mask.poke(mask(i))
// if(c.io.in(i).ready.peek() == true.B) {
// (0 until 8).map { j =>
// c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8))
// }
// }
// }
// }
//
// val TEST_SIZE = 100
// val start_addr = Random.nextLong() & 0x7ffffffff8L
// for(i <- 0 until TEST_SIZE) {
// val addr = Seq(((i<<4) + start_addr).U,((i<<4)+8+start_addr).U)
// val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U)
// val mask = Seq.fill(StorePipelineWidth)(0xff.U)
// store_enq(addr, data, mask)
// forward_req_and_resp(addr, data, mask)
// }
// }
// }
def make_forward_req
(addr: UInt, mask: UInt, ref_data: UInt, portIdx: Int)
(implicit c: SbufferWapper) = {
val port = c.io.forward(portIdx)
port.paddr.poke(addr)
port.mask.poke(mask)
c.clock.step(1)
for(i <- 0 until 8){
port.forwardData(i).expect(ref_data(i * 8 + 7, i * 8))
}
}
it should "sbuffer coherence" in {
test(new SbufferWapper{AddSinks()}){ c =>
def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) ={
(0 until StorePipelineWidth).map { i =>
c.io.in(i).valid.poke(true.B)
c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit(
_.mask -> mask(i),
_.addr -> addr(i),
_.data -> data(i)
))
}
c.clock.step(1)
for (in <- c.io.in){ in.valid.poke(false.B)}
}
def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = {
(0 until LoadPipelineWidth).map{ i =>
c.io.forward(i).paddr.poke(addr(i))
c.io.forward(i).mask.poke(mask(i))
if(c.io.in(i).ready.peek() == true.B) {
(0 until 8).map { j =>
c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8))
}
}
}
it should "allow multi-inflight dcache requests" in {
test(new SbufferWapper){ c =>
implicit val circuit = c
c.io.in.foreach(p => p.initSource().setSourceClock(c.clock))
val TEST_SIZE = 1000
var addr = 0
for(_ <- 0 until TEST_SIZE){
val data = (Random.nextLong() & 0x7fffffffffffffffL).U
val mask = 0xff.U
make_store_req(addr.U, data, mask, 0)
addr += 512
}
}
}
it should "forward older store's data to younger load" in {
test(new SbufferWapper){ c =>
implicit val circuit = c
c.io.in.foreach(p => p.initSource().setSourceClock(c.clock))
val TEST_SIZE = 10
for(i <- 0 until TEST_SIZE) {
val addr = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7ffffffff8L).U)// align to
val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U)
val mask = Seq.fill(StorePipelineWidth)(0xff.U)
store_enq(addr, data, mask)
forward_req_and_resp(addr, data, mask)
def testPort(i : Int) = {
for(_ <- 0 until TEST_SIZE){
val addr = (Random.nextLong() & 0x7ffffffff8L).U
val data = (Random.nextLong() & 0x7fffffffffffffffL).U
val mask = 0xff.U
make_store_req(addr, data, mask, i)
make_forward_req(addr, mask, data, i)
}
}
c.clock.step(512 + 10)
fork(
testPort(0)
).fork(
testPort(1)
).join()
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册