提交 eeb5ff92 编写于 作者: L Lingrui98

frontend: let br/jmp share the last slot of an ftb entry, ghist update timing optimization

上级 bf358e08
......@@ -32,6 +32,9 @@ trait HasBPUConst extends HasXSParameter with HasIFUConst {
val numBr = 2
val useBPD = true
val useLHist = true
val shareTailSlot = true
val numBrSlot = if (shareTailSlot) numBr-1 else numBr
val totalSlot = numBrSlot + 1
def BP_S1 = 1.U(2.W)
def BP_S2 = 2.U(2.W)
......@@ -360,13 +363,9 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst {
// History manage
// s1
val s1_shift = Mux(resp.s1.preds.hit,
Mux(resp.s1.real_br_taken_mask.asUInt === 0.U, PopCount(resp.s1.ftb_entry.brValids), PopCount(LowerMaskFromLowest(resp.s1.real_br_taken_mask.asUInt))),
0.U((log2Ceil(numBr)+1).W))
val s1_taken = Mux(resp.s1.preds.hit, resp.s1.real_br_taken_mask.asUInt =/= 0.U, false.B)
val s1_predicted_ghist = s1_ghist.update(s1_shift, s1_taken)
val s1_predicted_ghist = s1_ghist.update(resp.s1.preds.br_valids, resp.s1.real_br_taken_mask())
XSDebug(p"[hit] ${resp.s1.preds.hit} [s1_real_br_taken_mask] ${Binary(resp.s1.real_br_taken_mask.asUInt)} [s1_shift] ${s1_shift} [s1_taken] ${s1_taken}\n")
XSDebug(p"[hit] ${resp.s1.preds.hit} [s1_real_br_taken_mask] ${Binary(resp.s1.real_br_taken_mask.asUInt)}\n")
XSDebug(p"s1_predicted_ghist=${Binary(s1_predicted_ghist.predHist)}\n")
when(s1_valid) {
......@@ -376,17 +375,13 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst {
}
// s2
val s2_shift = Mux(resp.s2.preds.hit,
Mux(resp.s2.real_br_taken_mask.asUInt === 0.U, PopCount(resp.s2.ftb_entry.brValids), PopCount(LowerMaskFromLowest(resp.s2.real_br_taken_mask.asUInt))),
0.U((log2Ceil(numBr)+1).W))
val s2_taken = Mux(resp.s2.preds.hit, resp.s2.real_br_taken_mask.asUInt =/= 0.U, false.B)
val s2_predicted_ghist = s2_ghist.update(s2_shift, s2_taken)
val s2_predicted_ghist = s2_ghist.update(resp.s2.preds.br_valids, resp.s2.real_br_taken_mask())
val s2_correct_s1_ghist = s1_ghist =/= s2_predicted_ghist
val s2_correct_s0_ghist_reg = s0_ghist_reg =/= s2_predicted_ghist
val previous_s1_pred_taken = RegEnable(resp.s1.real_taken_mask.asUInt.orR, init=false.B, enable=s1_fire)
val s2_pred_taken = resp.s2.real_taken_mask.asUInt.orR
val previous_s1_pred_taken = RegEnable(resp.s1.real_slot_taken_mask.asUInt.orR, init=false.B, enable=s1_fire)
val s2_pred_taken = resp.s2.real_slot_taken_mask.asUInt.orR
when(s2_fire) {
when((s1_valid && (s1_pc =/= resp.s2.target || s2_correct_s1_ghist)) ||
......@@ -414,22 +409,18 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst {
XSPerfAccumulate("s2_redirect_target_diff_both_hit", s2_redirect_target && s2_saw_s1_hit && resp.s2.preds.hit)
XSPerfAccumulate("s2_redirect_br_direction_diff",
s2_redirect_target_both_hit &&
RegEnable(PriorityEncoder(resp.s1.preds.taken_mask), s1_fire) =/= PriorityEncoder(resp.s2.preds.taken_mask))
RegEnable(PriorityEncoder(resp.s1.preds.br_taken_mask), s1_fire) =/= PriorityEncoder(resp.s2.preds.br_taken_mask))
XSPerfAccumulate("s2_redirect_because_ghist_diff", s2_fire && s1_valid && s2_correct_s1_ghist)
// s3
val s3_shift = Mux(resp.s3.preds.hit,
Mux(resp.s3.real_br_taken_mask.asUInt === 0.U, PopCount(resp.s3.ftb_entry.brValids), PopCount(LowerMaskFromLowest(resp.s3.real_br_taken_mask.asUInt))),
0.U((log2Ceil(numBr)+1).W))
val s3_taken = Mux(resp.s3.preds.hit, resp.s3.real_br_taken_mask.asUInt =/= 0.U, false.B)
val s3_predicted_ghist = s3_ghist.update(s3_shift, s3_taken)
val s3_predicted_ghist = s3_ghist.update(resp.s3.preds.br_valids, resp.s3.real_br_taken_mask())
val s3_correct_s2_ghist = s2_ghist =/= s3_predicted_ghist
val s3_correct_s1_ghist = s1_ghist =/= s3_predicted_ghist
val s3_correct_s0_ghist_reg = s0_ghist_reg =/= s3_predicted_ghist
val previous_s2_pred_taken = RegEnable(resp.s2.real_taken_mask.asUInt.orR, init=false.B, enable=s2_fire)
val s3_pred_taken = resp.s3.real_taken_mask.asUInt.orR
val previous_s2_pred_taken = RegEnable(resp.s2.real_slot_taken_mask.asUInt.orR, init=false.B, enable=s2_fire)
val s3_pred_taken = resp.s3.real_slot_taken_mask.asUInt.orR
when(s3_fire) {
when((s2_valid && (s2_pc =/= resp.s3.target || s3_correct_s2_ghist)) ||
......
......@@ -55,10 +55,10 @@ class BIM(implicit p: Parameters) extends BasePredictor with BimParams with BPUU
val s1_latch_meta = s1_read.asUInt()
override val meta_size = s1_latch_meta.getWidth
io.out.resp.s1.preds.taken_mask := s1_latch_taken_mask
io.out.resp.s2.preds.taken_mask := RegEnable(s1_latch_taken_mask, 0.U.asTypeOf(Vec(numBr, Bool())), io.s1_fire)
io.out.resp.s1.preds.br_taken_mask := s1_latch_taken_mask
io.out.resp.s2.preds.br_taken_mask := RegEnable(s1_latch_taken_mask, 0.U.asTypeOf(Vec(numBr, Bool())), io.s1_fire)
io.out.resp.s3.preds.taken_mask := RegEnable(RegEnable(s1_latch_taken_mask, io.s1_fire), io.s2_fire)
io.out.resp.s3.preds.br_taken_mask := RegEnable(RegEnable(s1_latch_taken_mask, io.s1_fire), io.s2_fire)
io.out.s3_meta := RegEnable(RegEnable(s1_latch_meta, io.s1_fire), io.s2_fire)
// Update logic
......@@ -82,12 +82,12 @@ class BIM(implicit p: Parameters) extends BasePredictor with BimParams with BPUU
Mux(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(i),
wrbypass_ctrs(wrbypass_hit_idx)(i), update.meta(2*i+1, 2*i))))
val newTakens = update.preds.taken_mask
val newTakens = update.preds.br_taken_mask
val newCtrs = VecInit((0 until numBr).map(i =>
satUpdate(oldCtrs(i), 2, newTakens(i))
))
val update_mask = LowerMask(PriorityEncoderOH(update.preds.taken_mask.asUInt))
val update_mask = LowerMask(PriorityEncoderOH(update.preds.br_taken_mask.asUInt))
val need_to_update = VecInit((0 until numBr).map(i => u_valid && update.ftb_entry.brValids(i) && update_mask(i)))
when (reset.asBool) { wrbypass_ctr_valids.foreach(_ := VecInit(Seq.fill(numBr)(false.B)))}
......
......@@ -25,6 +25,7 @@ import utils._
import chisel3.experimental.chiselName
import scala.math.min
import os.copy
trait FTBParams extends HasXSParameter with HasBPUConst {
......@@ -33,6 +34,8 @@ trait FTBParams extends HasXSParameter with HasBPUConst {
val numSets = numEntries/numWays // 512
val tagSize = 20
val TAR_STAT_SZ = 2
def TAR_FIT = 0.U(TAR_STAT_SZ.W)
def TAR_OVF = 1.U(TAR_STAT_SZ.W)
......@@ -42,18 +45,74 @@ trait FTBParams extends HasXSParameter with HasBPUConst {
def JMP_OFFSET_LEN = 20
}
class FtbSlot(val offsetLen: Int, val subOffsetLen: Int = 0)(implicit p: Parameters) extends XSBundle with FTBParams {
require(subOffsetLen <= offsetLen)
val offset = UInt(log2Ceil(PredictWidth).W)
val lower = UInt(offsetLen.W)
val tarStat = UInt(TAR_STAT_SZ.W)
val sharing = Bool()
val valid = Bool()
def setLowerStatByTarget(pc: UInt, target: UInt, isShare: Boolean) = {
def getTargetStatByHigher(pc_higher: UInt, target_higher: UInt) =
Mux(target_higher > pc_higher, TAR_OVF,
Mux(target_higher < pc_higher, TAR_UDF, TAR_FIT))
def getLowerByTarget(target: UInt, offsetLen: Int) = target(offsetLen, 1)
val offLen = if (isShare) this.subOffsetLen else this.offsetLen
val pc_higher = pc(VAddrBits-1, offLen+1)
val target_higher = target(VAddrBits-1, offLen+1)
val stat = getTargetStatByHigher(pc_higher, target_higher)
val lower = ZeroExt(getLowerByTarget(target, offLen), this.offsetLen)
this.lower := lower
this.tarStat := stat
this.sharing := isShare.B
}
def getTarget(pc: UInt) = {
def getTarget(offLen: Int)(pc: UInt, lower: UInt, stat: UInt) = {
val higher = pc(VAddrBits-1, offLen+1)
val target =
Cat(
Mux(stat === TAR_OVF, higher+1.U,
Mux(stat === TAR_UDF, higher-1.U, higher)),
lower(offLen-1, 0), 0.U(1.W)
)
require(target.getWidth == VAddrBits)
require(offLen != 0)
target
}
if (subOffsetLen != 0)
Mux(sharing,
getTarget(subOffsetLen)(pc, lower, tarStat),
getTarget(offsetLen)(pc, lower, tarStat)
)
else
getTarget(offsetLen)(pc, lower, tarStat)
}
def fromAnotherSlot(that: FtbSlot) = {
require(
this.offsetLen > that.offsetLen && that.offsetLen == this.subOffsetLen ||
this.offsetLen == that.offsetLen
)
this.offset := that.offset
this.tarStat := that.tarStat
this.sharing := (this.offsetLen > that.offsetLen && that.offsetLen == this.subOffsetLen).B
this.valid := that.valid
this.lower := ZeroExt(that.lower, this.offsetLen)
}
}
class FTBEntry(implicit p: Parameters) extends XSBundle with FTBParams with BPUUtils {
val valid = Bool()
val brOffset = Vec(numBr, UInt(log2Up(FetchWidth*2).W))
val brLowers = Vec(numBr, UInt(BR_OFFSET_LEN.W))
val brTarStats = Vec(numBr, UInt(TAR_STAT_SZ.W))
val brValids = Vec(numBr, Bool())
val brSlots = Vec(numBrSlot, new FtbSlot(BR_OFFSET_LEN))
val jmpOffset = UInt(log2Ceil(PredictWidth).W)
val jmpLower = UInt(JMP_OFFSET_LEN.W)
val jmpTarStat = UInt(TAR_STAT_SZ.W)
val jmpValid = Bool()
// if shareTailSlot is set, this slot can hold a branch or a jal/jalr
// else this slot holds only jal/jalr
val tailSlot = new FtbSlot(JMP_OFFSET_LEN, BR_OFFSET_LEN)
// Partial Fall-Through Address
val pftAddr = UInt((log2Up(PredictWidth)+1).W)
......@@ -63,76 +122,104 @@ class FTBEntry(implicit p: Parameters) extends XSBundle with FTBParams with BPUU
val isRet = Bool()
val isJalr = Bool()
//
val oversize = Bool()
val last_is_rvc = Bool()
val always_taken = Vec(numBr, Bool())
def getTarget(offsetLen: Int)(pc: UInt, lower: UInt, stat: UInt) = {
val higher = pc(VAddrBits-1, offsetLen+1)
Cat(
Mux(stat === TAR_OVF, higher+1.U,
Mux(stat === TAR_UDF, higher-1.U, higher)),
lower, 0.U(1.W)
def getSlotForBr(idx: Int): FtbSlot = {
require(
idx < numBr-1 || idx == numBr-1 && !shareTailSlot ||
idx == numBr-1 && shareTailSlot
)
(idx, numBr, shareTailSlot) match {
case (i, n, true) if i == n-1 => this.tailSlot
case _ => this.brSlots(idx)
}
}
val getBrTarget = getTarget(BR_OFFSET_LEN)(_, _, _)
def getBrTargets(pc: UInt) = {
VecInit((brLowers zip brTarStats).map{
case (lower, stat) => getBrTarget(pc, lower, stat)
})
}
def getJmpTarget(pc: UInt) = getTarget(JMP_OFFSET_LEN)(pc, jmpLower, jmpTarStat)
def getLowerStatByTarget(offsetLen: Int)(pc: UInt, target: UInt) = {
val pc_higher = pc(VAddrBits-1, offsetLen+1)
val target_higher = target(VAddrBits-1, offsetLen+1)
val stat = WireInit(Mux(target_higher > pc_higher, TAR_OVF,
Mux(target_higher < pc_higher, TAR_UDF, TAR_FIT)))
val lower = WireInit(target(offsetLen, 1))
(lower, stat)
def allSlotsForBr = {
(0 until numBr).map(getSlotForBr(_))
}
def getBrLowerStatByTarget(pc: UInt, target: UInt) = getLowerStatByTarget(BR_OFFSET_LEN)(pc, target)
def getJmpLowerStatByTarget(pc: UInt, target: UInt) = getLowerStatByTarget(JMP_OFFSET_LEN)(pc, target)
def setByBrTarget(brIdx: Int, pc: UInt, target: UInt) = {
val (lower, stat) = getBrLowerStatByTarget(pc, target)
this.brLowers(brIdx) := lower
this.brTarStats(brIdx) := stat
val slot = getSlotForBr(brIdx)
slot.setLowerStatByTarget(pc, target, shareTailSlot && brIdx == numBr-1)
}
def setByJmpTarget(pc: UInt, target: UInt) = {
val (lower, stat) = getJmpLowerStatByTarget(pc, target)
this.jmpLower := lower
this.jmpTarStat := stat
this.tailSlot.setLowerStatByTarget(pc, target, false)
}
def getTargetVec(pc: UInt) = {
VecInit(getBrTargets(pc) :+ getJmpTarget(pc))
VecInit((brSlots :+ tailSlot).map(_.getTarget(pc)))
}
def getOffsetVec = VecInit(brOffset :+ jmpOffset)
def getOffsetVec = VecInit(brSlots.map(_.offset) :+ tailSlot.offset)
def isJal = !isJalr
def getFallThrough(pc: UInt) = getFallThroughAddr(pc, carry, pftAddr)
def hasBr(offset: UInt) = (brValids zip brOffset).map{
case (v, off) => v && off <= offset
}.reduce(_||_)
def hasBr(offset: UInt) =
brSlots.map{ s => s.valid && s.offset <= offset}.reduce(_||_) ||
(shareTailSlot.B && tailSlot.valid && tailSlot.offset <= offset && tailSlot.sharing)
def getBrMaskByOffset(offset: UInt) =
brSlots.map{ s => s.valid && s.offset <= offset } ++
(if (shareTailSlot) Seq(tailSlot.valid && tailSlot.offset <= offset && tailSlot.sharing) else Nil)
def getBrRecordedVec(offset: UInt) = {
VecInit(
brSlots.map(s => s.valid && s.offset === offset) ++
(if (shareTailSlot) Seq(tailSlot.valid && tailSlot.offset === offset && tailSlot.sharing) else Nil)
)
}
def brIsSaved(offset: UInt) = getBrRecordedVec(offset).reduce(_||_)
def getBrMaskByOffset(offset: UInt) = (brValids zip brOffset).map{
case (v, off) => v && off <= offset
def onNotHit(pc: UInt) = {
pftAddr := pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U
carry := pc(instOffsetBits + log2Ceil(PredictWidth)).asBool
oversize := false.B
}
def brIsSaved(offset: UInt) = (brValids zip brOffset).map{
case (v, off) => v && off === offset
}.reduce(_||_)
def brValids = {
VecInit(
brSlots.map(_.valid) ++
(if (shareTailSlot) Seq(tailSlot.valid && tailSlot.sharing) else Nil)
)
}
def noEmptySlotForNewBr = {
VecInit(
brSlots.map(_.valid) ++
(if (shareTailSlot) Seq(tailSlot.valid) else Nil)
).reduce(_&&_)
}
def newBrCanNotInsert(offset: UInt) = {
val lastSlotForBr = if (shareTailSlot) tailSlot else brSlots.last
lastSlotForBr.valid && lastSlotForBr.offset < offset
}
def jmpValid = {
tailSlot.valid && (!shareTailSlot.B || !tailSlot.sharing)
}
def brOffset = {
VecInit(
brSlots.map(_.offset) ++
(if (shareTailSlot) Seq(tailSlot.offset) else Nil)
)
}
def display(cond: Bool): Unit = {
XSDebug(cond, p"-----------FTB entry----------- \n")
XSDebug(cond, p"v=${valid}\n")
for(i <- 0 until numBr) {
XSDebug(cond, p"[br$i]: v=${brValids(i)}, offset=${brOffset(i)}, lower=${Hexadecimal(brLowers(i))}\n")
XSDebug(cond, p"[br$i]: v=${allSlotsForBr(i).valid}, offset=${allSlotsForBr(i).offset}," +
p"lower=${Hexadecimal(allSlotsForBr(i).lower)}\n")
}
XSDebug(cond, p"[jmp]: v=${jmpValid}, offset=${jmpOffset}, lower=${Hexadecimal(jmpLower)}\n")
XSDebug(cond, p"[tailSlot]: v=${tailSlot.valid}, offset=${tailSlot.offset}," +
p"lower=${Hexadecimal(tailSlot.lower)}, sharing=${tailSlot.sharing}}\n")
XSDebug(cond, p"pftAddr=${Hexadecimal(pftAddr)}, carry=$carry\n")
XSDebug(cond, p"isCall=$isCall, isRet=$isRet, isjalr=$isJalr\n")
XSDebug(cond, p"oversize=$oversize, last_is_rvc=$last_is_rvc\n")
......@@ -145,16 +232,8 @@ class FTBEntryWithTag(implicit p: Parameters) extends XSBundle with FTBParams wi
val entry = new FTBEntry
val tag = UInt(tagSize.W)
def display(cond: Bool): Unit = {
XSDebug(cond, p"-----------FTB entry----------- \n")
XSDebug(cond, p"v=${entry.valid}, tag=${Hexadecimal(tag)}\n")
for(i <- 0 until numBr) {
XSDebug(cond, p"[br$i]: v=${entry.brValids(i)}, offset=${entry.brOffset(i)}, lower=${Hexadecimal(entry.brLowers(i))}\n")
}
XSDebug(cond, p"[jmp]: v=${entry.jmpValid}, offset=${entry.jmpOffset}, lower=${Hexadecimal(entry.jmpLower)}\n")
XSDebug(cond, p"pftAddr=${Hexadecimal(entry.pftAddr)}, carry=${entry.carry}\n")
XSDebug(cond, p"isCall=${entry.isCall}, isRet=${entry.isRet}, isjalr=${entry.isJalr}\n")
XSDebug(cond, p"oversize=${entry.oversize}, last_is_rvc=${entry.last_is_rvc}\n")
XSDebug(cond, p"------------------------------- \n")
entry.display(cond)
XSDebug(cond, p"tag is ${Hexadecimal(tag)}\n------------------------------- \n")
}
}
......@@ -245,6 +324,9 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU
val u_mask = io.update_write_mask
ftb.io.w.apply(u_valid, u_data, u_idx, u_mask)
// print hit entry info
PriorityMux(total_hits, ftb.io.r.resp.data).display(true.B)
} // FTBBank
val ftbBank = Module(new FTBBank(numSets, numWays))
......@@ -271,22 +353,17 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU
io.out.resp.s2.ftb_entry := ftb_entry
io.out.resp.s2.preds.fromFtbEntry(ftb_entry, s2_pc)
io.out.s3_meta := RegEnable(RegEnable(FTBMeta(writeWay.asUInt(), s1_hit, GTimer()).asUInt(), io.s1_fire), io.s2_fire)
io.out.s3_meta := RegEnable(RegEnable(FTBMeta(writeWay.asUInt(), s1_hit, GTimer()).asUInt(), io.s1_fire), io.s2_fire)
when(s2_hit) {
io.out.resp.s2.ftb_entry.pftAddr := ftb_entry.pftAddr
io.out.resp.s2.ftb_entry.carry := ftb_entry.carry
}.otherwise {
io.out.resp.s2.ftb_entry.pftAddr := s2_pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U
io.out.resp.s2.ftb_entry.carry := s2_pc(instOffsetBits + log2Ceil(PredictWidth)).asBool
io.out.resp.s2.ftb_entry.oversize := false.B
when(!s2_hit) {
io.out.resp.s2.ftb_entry.onNotHit(s2_pc)
}
// always taken logic
when (s2_hit) {
for (i <- 0 until numBr) {
when (ftb_entry.always_taken(i)) {
io.out.resp.s2.preds.taken_mask(i) := true.B
io.out.resp.s2.preds.br_taken_mask(i) := true.B
}
}
}
......@@ -321,12 +398,10 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU
XSDebug("req_v=%b, req_pc=%x, ready=%b (resp at next cycle)\n", io.s0_fire, s0_pc, ftbBank.io.req_pc.ready)
XSDebug("s2_hit=%b, hit_way=%b\n", s2_hit, writeWay.asUInt)
XSDebug("s2_taken_mask=%b, s2_real_taken_mask=%b\n",
io.in.bits.resp_in(0).s2.preds.taken_mask.asUInt, io.out.resp.s2.real_taken_mask().asUInt)
XSDebug("s2_br_taken_mask=%b, s2_real_taken_mask=%b\n",
io.in.bits.resp_in(0).s2.preds.br_taken_mask.asUInt, io.out.resp.s2.real_slot_taken_mask().asUInt)
XSDebug("s2_target=%x\n", io.out.resp.s2.target)
ftb_entry.display(true.B)
XSDebug(u_valid, "Update from ftq\n")
XSDebug(u_valid, "update_pc=%x, tag=%x, update_write_way=%b, pred_cycle=%d\n",
update.pc, ftbAddr.getTag(update.pc), u_way_mask, u_meta.pred_cycle)
......@@ -344,8 +419,8 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU
XSPerfAccumulate("ftb_read_hits", RegNext(io.s0_fire) && s1_hit)
XSPerfAccumulate("ftb_read_misses", RegNext(io.s0_fire) && !s1_hit)
XSPerfAccumulate("ftb_commit_hits", u_valid && update.preds.hit)
XSPerfAccumulate("ftb_commit_misses", u_valid && !update.preds.hit)
XSPerfAccumulate("ftb_commit_hits", io.update.valid && io.update.bits.preds.hit)
XSPerfAccumulate("ftb_commit_misses", io.update.valid && !io.update.bits.preds.hit)
XSPerfAccumulate("ftb_update_req", io.update.valid)
XSPerfAccumulate("ftb_update_ignored", io.update.valid && io.update.bits.old_entry)
......
......@@ -111,12 +111,29 @@ class GlobalHistory(implicit p: Parameters) extends XSBundle with HasBPUConst {
// g
// }
def update(shift: UInt, taken: Bool, hist: UInt = predHist): GlobalHistory = {
def update(shift: UInt, taken: Bool, hist: UInt = this.predHist): GlobalHistory = {
val g = Wire(new GlobalHistory)
g.predHist := (hist << shift) | taken
g
}
def update(br_valids: Vec[Bool], real_taken_mask: Vec[Bool]): GlobalHistory = {
require(br_valids.length == numBr)
require(real_taken_mask.length == numBr)
val last_valid_idx = PriorityMux(
br_valids.reverse :+ true.B,
(numBr to 0 by -1).map(_.U(log2Ceil(numBr+1).W))
)
val first_taken_idx = PriorityEncoder(false.B +: real_taken_mask)
val smaller = Mux(last_valid_idx < first_taken_idx,
last_valid_idx,
first_taken_idx
)
val shift = smaller
val taken = real_taken_mask.reduce(_||_)
update(shift, taken, this.predHist)
}
final def === (that: GlobalHistory): Bool = {
predHist === that.predHist
}
......@@ -141,34 +158,59 @@ class TableAddr(val idxBits: Int, val banks: Int)(implicit p: Parameters) extend
def getBank(x: UInt) = if (banks > 1) getIdx(x)(log2Up(banks) - 1, 0) else 0.U
def getBankIdx(x: UInt) = if (banks > 1) getIdx(x)(idxBits - 1, log2Up(banks)) else getIdx(x)
}
@chiselName
class BranchPrediction(implicit p: Parameters) extends XSBundle with HasBPUConst {
val taken_mask = Vec(numBr, Bool())
val br_taken_mask = Vec(numBr, Bool())
val br_valids = Vec(numBr, Bool())
val br_targets = Vec(numBr, UInt(VAddrBits.W))
val slot_valids = Vec(totalSlot, Bool())
val jmp_valid = Bool()
val jmp_target = UInt(VAddrBits.W)
val targets = Vec(totalSlot, UInt(VAddrBits.W))
val is_jal = Bool()
val is_jalr = Bool()
val is_call = Bool()
val is_ret = Bool()
val is_br_sharing = Bool()
// val call_is_rvc = Bool()
val hit = Bool()
def taken = taken_mask.reduce(_||_) // || (is_jal || is_jalr)
def br_slot_valids = slot_valids.init
def tail_slot_valid = slot_valids.last
def br_valids = {
VecInit(
if (shareTailSlot)
br_slot_valids :+ (tail_slot_valid && is_br_sharing)
else
br_slot_valids
)
}
def taken_mask_on_slot = {
VecInit(
if (shareTailSlot)
(br_slot_valids zip br_taken_mask.init).map{ case (t, v) => t && v } :+ (
(br_taken_mask.last && tail_slot_valid && is_br_sharing) ||
tail_slot_valid && !is_br_sharing
)
else
(br_slot_valids zip br_taken_mask).map{ case (v, t) => v && t } :+
tail_slot_valid
)
}
def taken = br_taken_mask.reduce(_||_) || slot_valids.last // || (is_jal || is_jalr)
def fromFtbEntry(entry: FTBEntry, pc: UInt) = {
br_valids := entry.brValids
br_targets := entry.getBrTargets(pc)
jmp_valid := entry.jmpValid
jmp_target := entry.getJmpTarget(pc)
is_jal := entry.jmpValid && entry.isJal
is_jalr := entry.jmpValid && entry.isJalr
is_call := entry.jmpValid && entry.isCall
is_ret := entry.jmpValid && entry.isRet
slot_valids := entry.brSlots.map(_.valid) :+ entry.tailSlot.valid
targets := entry.getTargetVec(pc)
is_jal := entry.tailSlot.valid && entry.isJal
is_jalr := entry.tailSlot.valid && entry.isJalr
is_call := entry.tailSlot.valid && entry.isCall
is_ret := entry.tailSlot.valid && entry.isRet
is_br_sharing := entry.tailSlot.valid && entry.tailSlot.sharing
}
// override def toPrintable: Printable = {
// p"-----------BranchPrediction----------- " +
......@@ -179,7 +221,7 @@ class BranchPrediction(implicit p: Parameters) extends XSBundle with HasBPUConst
// }
def display(cond: Bool): Unit = {
XSDebug(cond, p"[taken_mask] ${Binary(taken_mask.asUInt)} [hit] $hit\n")
XSDebug(cond, p"[taken_mask] ${Binary(br_taken_mask.asUInt)} [hit] $hit\n")
}
}
......@@ -203,15 +245,24 @@ class BranchPredictionBundle(implicit p: Parameters) extends XSBundle with HasBP
val ftb_entry = new FTBEntry() // TODO: Send this entry to ftq
def real_br_taken_mask(): Vec[Bool] = {
VecInit(preds.taken_mask.zip(preds.br_valids).map{ case(m, b) => m && b && preds.hit})
def real_slot_taken_mask(): Vec[Bool] = {
VecInit(preds.taken_mask_on_slot.map(_ && preds.hit))
}
def real_taken_mask(): Vec[Bool] = {
VecInit(real_br_taken_mask() :+ (preds.jmp_valid && preds.hit))
// len numBr
def real_br_taken_mask(): Vec[Bool] = {
if (shareTailSlot)
VecInit(
preds.taken_mask_on_slot.map(_ && preds.hit).init :+
(preds.br_taken_mask.last && preds.tail_slot_valid && preds.is_br_sharing && preds.hit)
)
else
VecInit(real_slot_taken_mask().init)
}
def hit_taken_on_jmp = !real_br_taken_mask().asUInt.orR && preds.hit && preds.jmp_valid
def hit_taken_on_jmp =
!real_slot_taken_mask().init.reduce(_||_) &&
real_slot_taken_mask().last && !preds.is_br_sharing
def hit_taken_on_call = hit_taken_on_jmp && preds.is_call
def hit_taken_on_ret = hit_taken_on_jmp && preds.is_ret
def hit_taken_on_jalr = hit_taken_on_jmp && preds.is_jalr
......@@ -219,17 +270,17 @@ class BranchPredictionBundle(implicit p: Parameters) extends XSBundle with HasBP
def fallThroughAddr = getFallThroughAddr(pc, ftb_entry.carry, ftb_entry.pftAddr)
def target(): UInt = {
val targetVec = ftb_entry.getTargetVec(pc) :+ fallThroughAddr :+ (pc + (FetchWidth*4).U)
val selVec = real_taken_mask() :+ (preds.hit && !real_taken_mask().asUInt.orR) :+ true.B
val targetVec = preds.targets :+ fallThroughAddr :+ (pc + (FetchWidth*4).U)
val selVec = real_slot_taken_mask() :+ (preds.hit && !real_slot_taken_mask().asUInt.orR) :+ true.B
PriorityMux(selVec zip targetVec)
}
def genCfiIndex = {
val cfiIndex = Wire(ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)))
cfiIndex.valid := real_taken_mask.asUInt.orR
cfiIndex.valid := real_slot_taken_mask().asUInt.orR
// when no takens, set cfiIndex to PredictWidth-1
cfiIndex.bits :=
ParallelPriorityMux(real_taken_mask, ftb_entry.getOffsetVec) |
Fill(log2Ceil(PredictWidth), (!real_taken_mask.asUInt.orR).asUInt)
ParallelPriorityMux(real_slot_taken_mask(), ftb_entry.getOffsetVec) |
Fill(log2Ceil(PredictWidth), (!real_slot_taken_mask().asUInt.orR).asUInt)
cfiIndex
}
......
......@@ -500,7 +500,9 @@ class ITTage(implicit p: Parameters) extends BaseITTage {
// Update logic
val u_valid = io.update.valid
val update = io.update.bits
val updateValids = VecInit(Seq.fill(ITTageBanks)(update.ftb_entry.isJalr && u_valid && !(update.real_br_taken_mask().reduce((a,b) => a || b))))
val updateValids = VecInit(Seq.fill(ITTageBanks)(
update.ftb_entry.isJalr && u_valid && update.ftb_entry.jmpValid &&
!(update.real_br_taken_mask().reduce(_||_))))
val updateHist = update.ghist
val updatePhist = update.phist
......@@ -558,7 +560,7 @@ class ITTage(implicit p: Parameters) extends BaseITTage {
when(io.s2_fire && io.in.bits.resp_in(0).s2.hit_taken_on_jalr && s2_tageTakens(w)) {
// FIXME: should use s1 globally
io.out.resp.s2.preds.jmp_target := s2_tageTargets(w)
io.out.resp.s2.preds.targets.last := s2_tageTargets(w)
}
resp_meta(w).provider.valid := s2_provideds(w)
......@@ -689,16 +691,18 @@ class ITTage(implicit p: Parameters) extends BaseITTage {
PopCount(VecInit(updateMetas zip updateValids map {
case (m, v) => m.provider.valid && v
})))
XSPerfAccumulate("ittage_updated", updateValids.asUInt)
if (debug) {
for (b <- 0 until ITTageBanks) {
val m = updateMetas(b)
// val bri = u.metas(b)
XSDebug(updateValids(b), "update(%d): pc=%x, cycle=%d, hist=%x, taken:%b, misPred:%d, bimctr:%d, pvdr(%d):%d, altDiff:%d, pvdrU:%d, pvdrCtr:%d, alloc(%d):%d\n",
b.U, update.pc, 0.U, updateHist.predHist, update.preds.taken_mask(b), update.mispred_mask(b),
0.U, m.provider.valid, m.provider.bits, m.altDiffers, m.providerU, m.providerCtr, m.allocate.valid, m.allocate.bits
)
}
// for (b <- 0 until ITTageBanks) {
// val m = updateMetas(b)
// // val bri = u.metas(b)
// XSDebug(updateValids(b), "update(%d): pc=%x, cycle=%d, hist=%x, taken:%b, misPred:%d, bimctr:%d, pvdr(%d):%d, altDiff:%d, pvdrU:%d, pvdrCtr:%d, alloc(%d):%d\n",
// b.U, update.pc, 0.U, updateHist.predHist, update.preds.taken_mask(b), update.mispred_mask(b),
// 0.U, m.provider.valid, m.provider.bits, m.altDiffers, m.providerU, m.providerCtr, m.allocate.valid, m.allocate.bits
// )
// }
val s2_resps = RegEnable(s1_resps, io.s1_fire)
XSDebug("req: v=%d, pc=0x%x, hist=%b\n", io.s0_fire, s0_pc, io.in.bits.ghist)
XSDebug("s1_fire:%d, resp: pc=%x, hist=%b\n", io.s1_fire, debug_pc_s1, debug_hist_s1)
......
......@@ -290,14 +290,24 @@ class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedire
// if not hit, establish a new entry
init_entry.valid := true.B
// tag is left for ftb to assign
init_entry.brValids(0) := cfi_is_br
init_entry.brOffset(0) := io.cfiIndex.bits
init_entry.setByBrTarget(0, io.start_addr, io.target)
init_entry.always_taken := WireInit(0.U.asTypeOf(Vec(numBr, Bool())))
init_entry.always_taken(0) := cfi_is_br // set to always taken on init
init_entry.jmpOffset := pd.jmpOffset
init_entry.jmpValid := new_jmp_is_jal || new_jmp_is_jalr
init_entry.setByJmpTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget))
// case br
val init_br_slot = init_entry.getSlotForBr(0)
when (cfi_is_br) {
init_br_slot.valid := true.B
init_br_slot.offset := io.cfiIndex.bits
init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
init_entry.always_taken(0) := true.B // set to always taken on init
}
// init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
// case jmp
when (entry_has_jmp) {
init_entry.tailSlot.offset := pd.jmpOffset
init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
}
val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
init_entry.carry := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
......@@ -310,57 +320,72 @@ class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedire
// if hit, check whether a new cfi(only br is possible) is detected
val oe = io.old_entry
val br_recorded_vec = VecInit((oe.brValids zip oe.brOffset).map {
case (v, off) => v && (off === io.cfiIndex.bits)
})
val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
val br_recorded = br_recorded_vec.asUInt.orR
val is_new_br = cfi_is_br && !br_recorded
val br_full = oe.brValids.asUInt.andR // all slots have brs
val new_br_offset = io.cfiIndex.bits
// vec(i) means new br will be inserted BEFORE old br(i)
val allBrSlotsVec = oe.allSlotsForBr
val new_br_insert_onehot = VecInit((0 until numBr).map{
i => i match {
case 0 => !oe.brValids(0) || new_br_offset < oe.brOffset(0)
case idx => oe.brValids(idx-1) && new_br_offset > oe.brOffset(idx-1) &&
(!oe.brValids(idx) || new_br_offset < oe.brOffset(idx))
case 0 =>
!allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
case idx =>
allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
(!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
}
})
val old_entry_modified = WireInit(io.old_entry)
val (new_br_lower, new_br_tar_stat) = old_entry_modified.getBrLowerStatByTarget(io.start_addr, io.target)
for (i <- 0 until numBr) {
old_entry_modified.brOffset(i) := Mux(new_br_insert_onehot(i), new_br_offset,
Mux(oe.brOffset(i) < new_br_offset, oe.brOffset(i),
(if (i != 0) oe.brOffset(i-1) else oe.brOffset(i))))
old_entry_modified.brLowers(i) := Mux(new_br_insert_onehot(i), new_br_lower,
Mux(oe.brOffset(i) < new_br_offset, oe.brLowers(i),
(if (i != 0) oe.brLowers(i-1) else oe.brLowers(i))))
old_entry_modified.brTarStats(i) := Mux(new_br_insert_onehot(i), new_br_tar_stat,
Mux(oe.brOffset(i) < new_br_offset, oe.brTarStats(i),
(if (i != 0) oe.brTarStats(i-1) else oe.brTarStats(i))))
old_entry_modified.always_taken(i) := Mux(new_br_insert_onehot(i), true.B,
Mux(oe.brOffset(i) < new_br_offset, false.B,
(if (i != 0) oe.always_taken(i-1) else oe.always_taken(i))))
val slot = old_entry_modified.allSlotsForBr(i)
when (new_br_insert_onehot(i)) {
slot.valid := true.B
slot.offset := new_br_offset
slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
old_entry_modified.always_taken(i) := true.B
}.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
old_entry_modified.always_taken(i) := false.B
// all other fields remain unchanged
}.otherwise {
// case i == 0, remain unchanged
if (i != 0) {
val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
when (!noNeedToMoveFromFormerSlot) {
slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
old_entry_modified.always_taken(i) := oe.always_taken(i)
}
}
}
}
old_entry_modified.brValids := VecInit((oe.brValids zip new_br_insert_onehot).map{case (v1, v2) => v1 || v2})
// in this case, pft_addr should be the addrs of the last br in packet
val pft_need_to_change = is_new_br && br_full
// two circumstances:
// 1. oe: | br | j |, new br should be in front of j, thus addr of j should be new pft
// 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
// the previous last br or the new br
val may_have_to_replace = oe.noEmptySlotForNewBr
val pft_need_to_change = is_new_br && may_have_to_replace
// it should either be the given last br or the new br
when (pft_need_to_change) {
val new_pft_offset = Mux(new_br_insert_onehot.asUInt.orR, oe.brOffset.last, new_br_offset)
val new_pft_offset =
Mux(!oe.tailSlot.sharing || new_br_insert_onehot.asUInt.orR,
oe.tailSlot.offset,
new_br_offset
)
old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
old_entry_modified.oversize := false.B
old_entry_modified.jmpValid := false.B
old_entry_modified.isCall := false.B
old_entry_modified.isRet := false.B
old_entry_modified.isJalr := false.B
}
val old_entry_jmp_target_modified = WireInit(oe)
val old_target = oe.getJmpTarget(io.start_addr)
val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) // TODO: pass full jalr target
val old_target = oe.tailSlot.getTarget(io.start_addr)
val old_tail_is_jmp = !oe.tailSlot.sharing
val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
when (jalr_target_modified) {
old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
......@@ -399,7 +424,7 @@ class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedire
io.is_new_br := hit && is_new_br
io.is_jalr_target_modified := hit && jalr_target_modified
io.is_always_taken_modified := hit && always_taken_modified
io.is_br_full := hit && is_new_br && br_full
io.is_br_full := hit && is_new_br && may_have_to_replace
}
class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
......@@ -632,12 +657,19 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
when (RegNext(hit_pd_valid)) {
// check for false hit
val pred_ftb_entry = ftb_entry_mem.io.rdata.head
val brSlots = pred_ftb_entry.brSlots
val tailSlot = pred_ftb_entry.tailSlot
// we check cfis that bpu predicted
val br_false_hit = (pred_ftb_entry.brValids zip pred_ftb_entry.brOffset).map{
case (v, offset) => v && !(pd_reg(offset).valid && pd_reg(offset).isBr)
}.reduce(_||_)
// bpu predicted branches but denied by predecode
val br_false_hit =
brSlots.map{
s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
}.reduce(_||_) ||
(shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
!(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
val jmpOffset = pred_ftb_entry.jmpOffset
val jmpOffset = tailSlot.offset
val jmp_pd = pd_reg(jmpOffset)
val jal_false_hit = pred_ftb_entry.jmpValid &&
((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) ||
......@@ -690,10 +722,10 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
(backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
!(r_ftb_entry.brValids(numBr-1) && r_ftqOffset > r_ftb_entry.brOffset(numBr-1)))
!r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
!(r_ftb_entry.brValids(numBr-1) && r_ftqOffset > r_ftb_entry.brOffset(numBr-1)))
!r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
}.otherwise {
backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
......@@ -911,7 +943,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
update.mispred_mask := ftbEntryGen.mispred_mask
update.old_entry := ftbEntryGen.is_old_entry
update.preds.taken_mask := ftbEntryGen.taken_mask
update.preds.br_taken_mask := ftbEntryGen.taken_mask
// ******************************************************************************
// **************************** commit perf counters ****************************
......
......@@ -161,7 +161,7 @@ class RAS(implicit p: Parameters) extends BasePredictor {
spec_pop := io.s2_fire && io.in.bits.resp_in(0).s2.hit_taken_on_ret
when (spec_pop) {
io.out.resp.s2.preds.jmp_target := spec_top_addr
io.out.resp.s2.preds.targets.last := spec_top_addr
}
io.out.resp.s2.rasSp := spec_ras.sp
......
......@@ -310,7 +310,7 @@ trait HasSC extends HasSCParameter { this: Tage =>
s2_agree(w) := s2_tageTakens(w) === pred
s2_disagree(w) := s2_tageTakens(w) =/= pred
// fit to always-taken condition
io.out.resp.s2.preds.taken_mask(w) := pred
io.out.resp.s2.preds.br_taken_mask(w) := pred
XSDebug(p"pc(${Hexadecimal(debug_pc)}) SC(${w.U}) overriden pred to ${pred}\n")
}
}
......@@ -320,7 +320,7 @@ trait HasSC extends HasSCParameter { this: Tage =>
when (updateValids(w) && updateSCMeta.scUsed.asBool) {
val scPred = updateSCMeta.scPred
val tagePred = updateSCMeta.tageTaken
val taken = update.preds.taken_mask(w)
val taken = update.preds.br_taken_mask(w)
val scOldCtrs = updateSCMeta.ctrs
val pvdrCtr = updateTageMeta.providerCtr
val sum = ParallelSingedExpandingAdd(scOldCtrs.map(getCentered)) +& getPvdrCentered(pvdrCtr)
......
......@@ -207,7 +207,7 @@ class TageBTable
Mux(taken, old + 1.U, old - 1.U)))
}
val newTakens = update.preds.taken_mask
val newTakens = update.preds.br_taken_mask
val newCtrs = VecInit((0 until numBr).map(i =>
satUpdate(oldCtrs(i), 2, newTakens(i))
))
......@@ -570,7 +570,7 @@ class Tage(implicit p: Parameters) extends BaseTage {
val update = io.update.bits
val updateValids = VecInit((0 until TageBanks).map(w =>
update.ftb_entry.brValids(w) && u_valid && !update.ftb_entry.always_taken(w) &&
!(PriorityEncoder(update.preds.taken_mask) < w.U)))
!(PriorityEncoder(update.preds.br_taken_mask) < w.U)))
val updateHist = update.ghist
val updatePhist = update.phist
......@@ -665,7 +665,7 @@ class Tage(implicit p: Parameters) extends BaseTage {
// Update in loop
val updateValid = updateValids(w)
val updateMeta = updateMetas(w)
val isUpdateTaken = updateValid && update.preds.taken_mask(w)
val isUpdateTaken = updateValid && update.preds.br_taken_mask(w)
val updateMisPred = updateMisPreds(w)
val up_altpredhit = updateMeta.altpredhit
val up_prednum = updateMeta.prednum.bits
......@@ -737,7 +737,7 @@ class Tage(implicit p: Parameters) extends BaseTage {
}
for (i <- 0 until numBr) {
resp_s2.preds.taken_mask(i) := s2_tageTakens(i)
resp_s2.preds.br_taken_mask(i) := s2_tageTakens(i)
}
// io.out.resp.s3 := RegEnable(resp_s2, io.s2_fire)
......@@ -803,7 +803,7 @@ class Tage(implicit p: Parameters) extends BaseTage {
val m = updateMetas(b)
// val bri = u.metas(b)
XSDebug(updateValids(b), "update(%d): pc=%x, cycle=%d, hist=%x, taken:%b, misPred:%d, bimctr:%d, pvdr(%d):%d, altDiff:%d, pvdrU:%d, pvdrCtr:%d, alloc(%d):%d\n",
b.U, update.pc, 0.U, updateHist.predHist, update.preds.taken_mask(b), update.mispred_mask(b),
b.U, update.pc, 0.U, updateHist.predHist, update.preds.br_taken_mask(b), update.mispred_mask(b),
0.U, m.provider.valid, m.provider.bits, m.altDiffers, m.providerU, m.providerCtr, m.allocate.valid, m.allocate.bits
)
}
......
......@@ -144,9 +144,7 @@ class MicroBTB(implicit p: Parameters) extends BasePredictor
io.out.resp.s1.preds.fromFtbEntry(read_entry, s1_pc)
when(!bank.read_hit) {
io.out.resp.s1.ftb_entry.pftAddr := s1_pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U
io.out.resp.s1.ftb_entry.carry := s1_pc(instOffsetBits + log2Ceil(PredictWidth)).asBool
io.out.resp.s1.ftb_entry.oversize := false.B
io.out.resp.s1.ftb_entry.onNotHit(s1_pc)
}
outMeta.hit := bank.read_hit
......@@ -157,7 +155,7 @@ class MicroBTB(implicit p: Parameters) extends BasePredictor
val u_valid = RegNext(io.update.valid)
val u_pc = update.pc
val u_taken = update.preds.taken
val u_taken_mask = update.preds.taken_mask
val u_br_taken_mask = update.preds.br_taken_mask
val u_meta = update.meta.asTypeOf(new MicroBTBOutMeta)
val u_tag = getTag(u_pc)
......@@ -168,12 +166,12 @@ class MicroBTB(implicit p: Parameters) extends BasePredictor
XSDebug("req_v=%b, req_pc=%x, hit=%b\n", io.s1_fire, s1_pc, bank.read_hit)
XSDebug("target=%x, real_taken_mask=%b, taken_mask=%b, brValids=%b, jmpValid=%b\n",
io.out.resp.s1.target, io.out.resp.s1.real_taken_mask.asUInt, io.out.resp.s1.preds.taken_mask.asUInt, read_entry.brValids.asUInt, read_entry.jmpValid.asUInt)
io.out.resp.s1.target, io.out.resp.s1.real_slot_taken_mask.asUInt, io.out.resp.s1.preds.br_taken_mask.asUInt, read_entry.brValids.asUInt, read_entry.jmpValid.asUInt)
XSDebug(u_valid, "[update]Update from ftq\n")
XSDebug(u_valid, "[update]update_pc=%x, tag=%x\n", u_pc, getTag(u_pc))
XSDebug(u_valid, "[update]taken_mask=%b, brValids=%b, jmpValid=%b\n",
u_taken_mask.asUInt, update.ftb_entry.brValids.asUInt, update.ftb_entry.jmpValid)
u_br_taken_mask.asUInt, update.ftb_entry.brValids.asUInt, update.ftb_entry.jmpValid)
XSPerfAccumulate("ubtb_read_hits", RegNext(io.s1_fire) && bank.read_hit)
XSPerfAccumulate("ubtb_read_misses", RegNext(io.s1_fire) && !bank.read_hit)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册