未验证 提交 8f6a1237 编写于 作者: S Steve Gou 提交者: GitHub

btb: use single port sram to meet timing constraints (#692)

* add perf counters for btb and ubtb
* update btb only on not hit or jalr mispredicts to reduce write stalls
上级 e834a6fe
......@@ -114,6 +114,7 @@ class PredictorAnswer extends XSBundle {
class BpuMeta extends XSBundle with HasBPUParameter {
val btbWriteWay = UInt(log2Up(BtbWays).W)
val btbHit = Bool()
val bimCtr = UInt(2.W)
val tageMeta = new TageMeta
// for global history
......@@ -124,6 +125,8 @@ class BpuMeta extends XSBundle with HasBPUParameter {
val predictor = if (BPUDebug) UInt(log2Up(4).W) else UInt(0.W) // Mark which component this prediction comes from {ubtb, btb, tage, loopPredictor}
val ubtbHit = if (BPUDebug) UInt(1.W) else UInt(0.W)
val ubtbAns = new PredictorAnswer
val btbAns = new PredictorAnswer
val tageAns = new PredictorAnswer
......@@ -194,7 +197,7 @@ class FtqEntry extends XSBundle {
val specCnt = Vec(PredictWidth, UInt(10.W))
val metas = Vec(PredictWidth, new BpuMeta)
val cfiIsCall, cfiIsRet, cfiIsRVC = Bool()
val cfiIsCall, cfiIsRet, cfiIsJalr, cfiIsRVC = Bool()
val rvc_mask = Vec(PredictWidth, Bool())
val br_mask = Vec(PredictWidth, Bool())
val cfiIndex = ValidUndirectioned(UInt(log2Up(PredictWidth).W))
......@@ -214,7 +217,7 @@ class FtqEntry extends XSBundle {
p"ftqPC: ${Hexadecimal(ftqPC)} lastPacketPC: ${Hexadecimal(lastPacketPC.bits)} hasLastPrev:$hasLastPrev " +
p"rasSp:$rasSp specCnt:$specCnt brmask:${Binary(Cat(br_mask))} rvcmask:${Binary(Cat(rvc_mask))} " +
p"valids:${Binary(valids.asUInt())} cfi valid: ${cfiIndex.valid} " +
p"cfi index: ${cfiIndex.bits} isCall:$cfiIsCall isRet:$cfiIsRet isRvc:$cfiIsRVC " +
p"cfi index: ${cfiIndex.bits} isCall:$cfiIsCall isRet:$cfiIsRet isJalr:$cfiIsJalr, isRvc:$cfiIsRVC " +
p"mispred:${Binary(Cat(mispred))} target:${Hexadecimal(target)}\n"
}
......
......@@ -150,7 +150,7 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
// multi-write
val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Up(PredictWidth).W))))
val cfiIsCall, cfiIsRet, cfiIsRVC = Reg(Vec(FtqSize, Bool()))
val cfiIsCall, cfiIsRet, cfiIsJalr, cfiIsRVC = Reg(Vec(FtqSize, Bool()))
val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
val s_invalid :: s_valid :: s_commited :: Nil = Enum(3)
......@@ -164,6 +164,7 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
cfiIndex_vec(enqIdx) := io.enq.bits.cfiIndex
cfiIsCall(enqIdx) := io.enq.bits.cfiIsCall
cfiIsRet(enqIdx) := io.enq.bits.cfiIsRet
cfiIsJalr(enqIdx) := io.enq.bits.cfiIsJalr
cfiIsRVC(enqIdx) := io.enq.bits.cfiIsRVC
mispredict_vec(enqIdx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
update_target(enqIdx) := io.enq.bits.target
......@@ -183,6 +184,7 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
cfiIndex_vec(wbIdx).bits := offset
cfiIsCall(wbIdx) := wb.bits.uop.cf.pd.isCall
cfiIsRet(wbIdx) := wb.bits.uop.cf.pd.isRet
cfiIsJalr(wbIdx) := wb.bits.uop.cf.pd.isJalr
cfiIsRVC(wbIdx) := wb.bits.uop.cf.pd.isRVC
}
when (offset === cfiIndex_vec(wbIdx).bits) {
......@@ -246,6 +248,7 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
commitEntry.cfiIndex := RegNext(RegNext(cfiIndex_vec(headPtr.value)))
commitEntry.cfiIsCall := RegNext(RegNext(cfiIsCall(headPtr.value)))
commitEntry.cfiIsRet := RegNext(RegNext(cfiIsRet(headPtr.value)))
commitEntry.cfiIsJalr := RegNext(RegNext(cfiIsJalr(headPtr.value)))
commitEntry.cfiIsRVC := RegNext(RegNext(cfiIsRVC(headPtr.value)))
commitEntry.target := RegNext(RegNext(update_target(headPtr.value)))
......
......@@ -131,8 +131,9 @@ abstract class BasePredictor extends XSModule
val inMask = Input(UInt(PredictWidth.W))
val update = Flipped(ValidIO(new FtqEntry))
}
val io = new DefaultBasePredictorIO
val in_ready = IO(Output(Bool()))
in_ready := true.B
val debug = true
}
......@@ -441,6 +442,8 @@ abstract class BaseBPU extends XSModule with BranchPredictorComponents
// from if1
val in = Input(new BPUReq)
val inFire = Input(Vec(4, Bool()))
// to if1
val in_ready = Output(Bool())
// to if2/if3/if4
val out = Vec(3, Output(new BranchPrediction))
// from if4
......@@ -454,6 +457,8 @@ abstract class BaseBPU extends XSModule with BranchPredictorComponents
p.fires <> io.inFire
p.ctrl <> io.ctrl
})
io.in_ready := preds.map(p => p.in_ready).reduce(_&&_)
val s1 = Module(new BPUStage1)
val s2 = Module(new BPUStage2)
......@@ -528,6 +533,9 @@ class BPU extends BaseBPU {
// Wrap ubtb response into resp_in and brInfo_in
s1_resp_in.ubtb <> ubtb.io.out
for (i <- 0 until PredictWidth) {
s1_brInfo_in.metas(i).ubtbHit := ubtb.io.out.hits(i)
}
btb.io.pc.valid := s1_fire
btb.io.pc.bits := io.in.pc
......@@ -539,6 +547,7 @@ class BPU extends BaseBPU {
s1_resp_in.btb <> btb.io.resp
for (i <- 0 until PredictWidth) {
s1_brInfo_in.metas(i).btbWriteWay := btb.io.meta.writeWay(i)
s1_brInfo_in.metas(i).btbHit := btb.io.meta.hits(i)
}
bim.io.pc.valid := s1_fire
......
......@@ -60,6 +60,7 @@ class BTB extends BasePredictor with BTBParams{
}
class BTBMeta extends Meta {
val writeWay = Vec(PredictWidth, UInt(log2Up(BtbWays).W))
val hits = Vec(PredictWidth, Bool())
}
class BTBFromOthers extends FromOthers {}
......@@ -76,10 +77,10 @@ class BTB extends BasePredictor with BTBParams{
val if2_pc = RegEnable(if1_packetAlignedPC, io.pc.valid)
// layout: way 0 bank 0, way 0 bank 1, ..., way 0 bank BtbBanks-1, way 1 bank 0, ..., way 1 bank BtbBanks-1
val data = Module(new SRAMTemplate(new BtbDataEntry, set = nRows, way=BtbWays*BtbBanks, shouldReset = true, holdRead = true))
val meta = Module(new SRAMTemplate(new BtbMetaEntry, set = nRows, way=BtbWays*BtbBanks, shouldReset = true, holdRead = true))
val data = Module(new SRAMTemplate(new BtbDataEntry, set = nRows, way=BtbWays*BtbBanks, shouldReset = true, holdRead = true, singlePort = true))
val meta = Module(new SRAMTemplate(new BtbMetaEntry, set = nRows, way=BtbWays*BtbBanks, shouldReset = true, holdRead = true, singlePort = true))
val edata = Module(new SRAMTemplate(UInt(VAddrBits.W), set = extendedNRows, shouldReset = true, holdRead = true))
val edata = Module(new SRAMTemplate(UInt(VAddrBits.W), set = extendedNRows, shouldReset = true, holdRead = true, singlePort = true))
val if1_mask = io.inMask
val if2_mask = RegEnable(if1_mask, io.pc.valid)
......@@ -157,6 +158,7 @@ class BTB extends BasePredictor with BTBParams{
io.resp.isBrs(b) := meta_entry.isBr
io.resp.isRVC(b) := meta_entry.isRVC
io.meta.writeWay(b) := writeWay(b)
io.meta.hits(b) := if2_bankHits(b)
// io.meta.hitJal(b) := if2_bankHits(b) && meta_entry.btbType === BTBtype.J
}
......@@ -186,12 +188,15 @@ class BTB extends BasePredictor with BTBParams{
val updateRow = btbAddr.getBankIdx(cfi_pc)
val updateIsBr = u.br_mask(u.cfiIndex.bits)
val updateTaken = u.cfiIndex.valid && u.valids(u.cfiIndex.bits)
val updateIndirectMisPred = u.mispred(u.cfiIndex.bits) && u.cfiIsJalr
// TODO: remove isRVC
val metaWrite = BtbMetaEntry(btbAddr.getTag(cfi_pc), updateIsBr, u.cfiIsRVC)
val dataWrite = BtbDataEntry(new_lower, new_extended)
val updateValid = do_update.valid && updateTaken
val cfi_hit = u.metas(u.cfiIndex.bits).btbHit
// for brs and jals, prediction is right once hit, so we only update on not hit or jalr mispreds
val updateValid = do_update.valid && updateTaken && (!cfi_hit || updateIndirectMisPred)
in_ready := !updateValid
// Update btb
require(isPow2(BtbBanks))
// this is one hot, since each fetch bundle has at most 1 taken instruction
......@@ -200,10 +205,34 @@ class BTB extends BasePredictor with BTBParams{
data.io.w.apply(updateValid, dataWrite, updateRow, updateWayMask)
edata.io.w.apply(updateValid && new_extended, u.target, updateRow, "b1".U)
val alloc_conflict =
VecInit((0 until BtbBanks).map(i =>
if2_metaRead(allocWays(i))(i).valid && !if2_bankHits(i) && if2_mask(i)))
XSPerf("btb_alloc_conflict", PopCount(alloc_conflict))
if (!env.FPGAPlatform) {
val alloc_conflict =
VecInit((0 until BtbBanks).map(i =>
if2_metaRead(allocWays(i))(i).valid && !if2_bankHits(i) && if2_mask(i)))
XSPerf("btb_alloc_conflict", PopCount(alloc_conflict))
XSPerf("btb_update_req", updateValid)
XSPerf("ebtb_update_req", updateValid && new_extended)
XSPerf("btb_wr_conflict", updateValid && io.pc.valid)
XSPerf("ebtb_wr_conflict", updateValid && new_extended && io.pc.valid)
XSPerf("btb_update_indirect_mispred", updateValid && updateIndirectMisPred)
def btb_perf(hit_cond: Bool)(str: String, cfi_cond: PreDecodeInfoForDebug => UInt): Unit = {
XSPerf(str, PopCount((u.takens zip u.valids zip u.metas zip u.pd) map {
case (((t, v), m), pd) => t && v && (m.btbHit.asBool === hit_cond) && cfi_cond(pd).asBool && do_update.valid && updateTaken}))
}
val btb_miss_perf = btb_perf(false.B)(_,_)
val btb_hit_perf = btb_perf(true.B)(_,_)
btb_hit_perf("btb_commit_hits", pd => !pd.notCFI)
btb_hit_perf("btb_commit_hit_brs", pd => pd.isBr)
btb_hit_perf("btb_commit_hit_jals", pd => pd.isJal)
btb_hit_perf("btb_commit_hit_jalrs", pd => pd.isJalr)
btb_hit_perf("btb_commit_hit_rets", pd => pd.isRet)
btb_miss_perf("btb_commit_misses", pd => !pd.notCFI)
btb_miss_perf("btb_commit_miss_brs", pd => pd.isBr)
btb_miss_perf("btb_commit_miss_jals", pd => pd.isJal)
btb_miss_perf("btb_commit_miss_jalrs", pd => pd.isJalr)
btb_miss_perf("btb_commit_miss_rets", pd => pd.isRet)
}
if (BPUDebug && debug) {
val debug_verbose = true
......
......@@ -128,7 +128,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
val if1_npc = WireInit(0.U(VAddrBits.W))
val if2_ready = WireInit(false.B)
val if2_valid = RegInit(init = false.B)
val if2_allReady = WireInit(if2_ready && icache.io.req.ready)
val if2_allReady = WireInit(if2_ready && icache.io.req.ready && bpu.io.in_ready)
val if1_fire = if1_valid && if2_allReady
val if1_gh, if2_gh, if3_gh, if4_gh = Wire(new GlobalHistory)
......@@ -428,8 +428,10 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
val cfiIsCall = if4_pd.pd(if4_jmpIdx).isCall
val cfiIsRet = if4_pd.pd(if4_jmpIdx).isRet
val cfiIsRVC = if4_pd.pd(if4_jmpIdx).isRVC
val cfiIsJalr = if4_pd.pd(if4_jmpIdx).isJalr
toFtqBuf.cfiIsCall := cfiIsCall
toFtqBuf.cfiIsRet := cfiIsRet
toFtqBuf.cfiIsJalr := cfiIsJalr
toFtqBuf.cfiIsRVC := cfiIsRVC
toFtqBuf.cfiIndex.valid := if4_taken
toFtqBuf.cfiIndex.bits := if4_jmpIdx
......@@ -558,6 +560,7 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
XSPerf("if1_total_stall", !if2_allReady && if1_valid)
XSPerf("if1_stall_from_icache_req", !icache.io.req.ready && if1_valid)
XSPerf("if1_stall_from_if2", !if2_ready && if1_valid)
XSPerf("if1_stall_from_bpu", !bpu.io.in_ready && if1_valid)
XSPerf("itlb_stall", if2_valid && if3_ready && !icache.io.tlb.resp.valid)
XSPerf("icache_resp_stall", if3_valid && if4_ready && !icache.io.resp.valid)
XSPerf("if4_stall", if4_valid && !if4_fire)
......@@ -618,8 +621,8 @@ class IFU extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with
)
}
val b = ftqEnqBuf
XSDebug("[FtqEnqBuf] v=%d r=%d pc=%x cfiIndex(%d)=%d cfiIsCall=%d cfiIsRet=%d cfiIsRVC=%d\n",
ftqEnqBuf_valid, ftqEnqBuf_ready, b.ftqPC, b.cfiIndex.valid, b.cfiIndex.bits, b.cfiIsCall, b.cfiIsRet, b.cfiIsRVC)
XSDebug("[FtqEnqBuf] v=%d r=%d pc=%x cfiIndex(%d)=%d cfiIsCall=%d cfiIsRet=%d cfiIsJalr=%d cfiIsRVC=%d\n",
ftqEnqBuf_valid, ftqEnqBuf_ready, b.ftqPC, b.cfiIndex.valid, b.cfiIndex.bits, b.cfiIsCall, b.cfiIsRet, b.cfiIsJalr, b.cfiIsRVC)
XSDebug("[FtqEnqBuf] valids=%b br_mask=%b rvc_mask=%b hist=%x predHist=%x rasSp=%d rasTopAddr=%x rasTopCtr=%d\n",
b.valids.asUInt, b.br_mask.asUInt, b.rvc_mask.asUInt, b.hist.asUInt, b.predHist.asUInt, b.rasSp, b.rasTop.retAddr, b.rasTop.ctr)
XSDebug("[ToFTQ] v=%d r=%d leftOne=%d ptr=%d\n", io.toFtq.valid, io.toFtq.ready, io.ftqLeftOne, io.ftqEnqPtr.value)
......
......@@ -228,6 +228,15 @@ class MicroBTB extends BasePredictor
banks(b).update_write_data.bits := update_write_datas(b)
banks(b).update_taken := update_takens(b)
}
if (!env.FPGAPlatform) {
XSPerf("ubtb_commit_hits",
PopCount((u.takens zip u.valids zip u.metas zip u.pd) map {
case (((t, v), m), pd) => t && v && m.ubtbHit.asBool && !pd.notCFI && update_valid}))
XSPerf("ubtb_commit_misses",
PopCount((u.takens zip u.valids zip u.metas zip u.pd) map {
case (((t, v), m), pd) => t && v && !m.ubtbHit.asBool && !pd.notCFI && update_valid}))
}
if (BPUDebug && debug) {
val update_pcs = VecInit((0 until PredictWidth).map(i => update_packet_pc + (i << instOffsetBits).U))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册