提交 b30c10d6 编写于 作者: L Lingrui98

bpu: timing optimizations

* use parallel mux to select provider and altprovider for TAGE and ITTAGE
* reduce logics on SC prediction
* calculate higher bits of targets at stage 1 for ftb
* reduce logics for RAS and ITTAGE prediction assignment
上级 c1b370c4
......@@ -100,3 +100,46 @@ object ParallelPriorityEncoder {
object ParallelSingedExpandingAdd {
def apply(in: Seq[SInt]): SInt = ParallelOperation(in, (a: SInt, b: SInt) => a +& b)
}
class SelectTwoInterRes[T <: Data](gen: T) extends Bundle {
// val valid = Bool()
val hasOne = Bool()
val hasTwo = Bool()
val first = chiselTypeOf(gen)
val second = chiselTypeOf(gen)
override def cloneType = new SelectTwoInterRes(gen).asInstanceOf[this.type]
}
object SelectTwoInterRes {
def apply[T <: Data](hasOne: Bool, hasTwo: Bool, first: T, second: T): SelectTwoInterRes[T] = {
val res = Wire(new SelectTwoInterRes(first))
res.hasOne := hasOne
res.hasTwo := hasTwo
res.first := first
res.second := second
res
}
def apply[T <: Data](valid: Bool, data: T): SelectTwoInterRes[T] = {
val res = apply(valid, false.B, data, data)
res
}
}
object ParallelSelectTwo {
def mergeSelectFirstTwo[T <: Data](a: SelectTwoInterRes[T], b: SelectTwoInterRes[T]): SelectTwoInterRes[T] = {
SelectTwoInterRes(
a.hasOne || b.hasOne,
a.hasTwo || b.hasTwo || a.hasOne && b.hasOne,
Mux(a.hasOne, a.first, b.first),
Mux1H(Seq(
(!a.hasOne, b.second),
(a.hasOne && !a.hasTwo, b.first),
(a.hasTwo, a.second)
))
)
}
def apply[T <: Data](xs: Seq[SelectTwoInterRes[T]]): SelectTwoInterRes[T] = {
ParallelOperation(xs, mergeSelectFirstTwo[T])
}
}
......@@ -386,9 +386,9 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst {
s2_fire := s2_valid
when(s2_flush) { s2_valid := false.B }
.elsewhen(s1_fire && !s1_flush) { s2_valid := true.B }
.elsewhen(s2_fire) { s2_valid := false.B }
when(s2_flush) { s2_valid := false.B }
.elsewhen(s1_fire) { s2_valid := !s1_flush }
.elsewhen(s2_fire) { s2_valid := false.B }
predictors.io.s2_fire := s2_fire
......@@ -437,10 +437,19 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst {
}
def preds_needs_redirect(x: BranchPredictionBundle, y: BranchPredictionBundle) = {
x.preds.hit =/= y.preds.hit ||
x.real_slot_taken_mask().asUInt.orR =/= y.real_slot_taken_mask().asUInt().orR ||
x.preds.br_valids.asUInt =/= y.preds.br_valids.asUInt ||
PriorityEncoder(x.real_br_taken_mask()) =/= PriorityEncoder(y.real_br_taken_mask)
}
def no_need_to_redirect(x: BranchPredictionBundle, y: BranchPredictionBundle) = {
!x.preds.hit && !y.preds.hit ||
x.preds.hit && y.preds.hit && (
VecInit(x.lastBrPosOH).asUInt === VecInit(y.lastBrPosOH).asUInt &&
x.preds.taken_mask_on_slot.asUInt === y.preds.taken_mask_on_slot.asUInt
)
}
// s2
val s2_possible_predicted_ghist_ptrs = (0 to numBr).map(s2_ghist_ptr - _.U)
val s2_predicted_ghist_ptr = Mux1H(resp.s2.lastBrPosOH, s2_possible_predicted_ghist_ptrs)
......@@ -458,6 +467,8 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst {
val previous_s1_pred = RegEnable(resp.s1, init=0.U.asTypeOf(resp.s1), s1_fire)
// val s2_redirect_s1_last_pred = !no_need_to_redirect(s1_last_pred, resp.s2)
// val s2_redirect_s0_last_pred = !no_need_to_redirect(s0_last_pred_reg, resp.s2)
val s2_redirect_s1_last_pred = preds_needs_redirect(s1_last_pred, resp.s2)
val s2_redirect_s0_last_pred = preds_needs_redirect(s0_last_pred_reg, resp.s2)
......
......@@ -45,8 +45,10 @@ trait FTBParams extends HasXSParameter with HasBPUConst {
def JMP_OFFSET_LEN = 20
}
class FtbSlot(val offsetLen: Int, val subOffsetLen: Int = 0)(implicit p: Parameters) extends XSBundle with FTBParams {
require(subOffsetLen <= offsetLen)
class FtbSlot(val offsetLen: Int, val subOffsetLen: Option[Int] = None)(implicit p: Parameters) extends XSBundle with FTBParams {
if (subOffsetLen.isDefined) {
require(subOffsetLen.get <= offsetLen)
}
val offset = UInt(log2Ceil(PredictWidth).W)
val lower = UInt(offsetLen.W)
val tarStat = UInt(TAR_STAT_SZ.W)
......@@ -58,7 +60,7 @@ class FtbSlot(val offsetLen: Int, val subOffsetLen: Int = 0)(implicit p: Paramet
Mux(target_higher > pc_higher, TAR_OVF,
Mux(target_higher < pc_higher, TAR_UDF, TAR_FIT))
def getLowerByTarget(target: UInt, offsetLen: Int) = target(offsetLen, 1)
val offLen = if (isShare) this.subOffsetLen else this.offsetLen
val offLen = if (isShare) this.subOffsetLen.get else this.offsetLen
val pc_higher = pc(VAddrBits-1, offLen+1)
val target_higher = target(VAddrBits-1, offLen+1)
val stat = getTargetStatByHigher(pc_higher, target_higher)
......@@ -68,35 +70,54 @@ class FtbSlot(val offsetLen: Int, val subOffsetLen: Int = 0)(implicit p: Paramet
this.sharing := isShare.B
}
def getTarget(pc: UInt) = {
def getTarget(offLen: Int)(pc: UInt, lower: UInt, stat: UInt) = {
val higher = pc(VAddrBits-1, offLen+1)
def getTarget(pc: UInt, last_stage: Option[Tuple2[UInt, Bool]] = None) = {
def getTarget(offLen: Int)(pc: UInt, lower: UInt, stat: UInt,
last_stage: Option[Tuple2[UInt, Bool]] = None) = {
val h = pc(VAddrBits-1, offLen+1)
val higher = Wire(UInt((VAddrBits-offLen-1).W))
val higher_plus_one = Wire(UInt((VAddrBits-offLen-1).W))
val higher_minus_one = Wire(UInt((VAddrBits-offLen-1).W))
if (last_stage.isDefined) {
val last_stage_pc = last_stage.get._1
val last_stage_pc_h = last_stage_pc(VAddrBits-1, offLen+1)
val stage_en = last_stage.get._2
higher := RegEnable(last_stage_pc_h, stage_en)
higher_plus_one := RegEnable(last_stage_pc_h+1.U, stage_en)
higher_minus_one := RegEnable(last_stage_pc_h-1.U, stage_en)
} else {
higher := h
higher_plus_one := h + 1.U
higher_minus_one := h - 1.U
}
val target =
Cat(
Mux(stat === TAR_OVF, higher+1.U,
Mux(stat === TAR_UDF, higher-1.U, higher)),
Mux1H(Seq(
(stat === TAR_OVF, higher_plus_one),
(stat === TAR_UDF, higher_minus_one),
(stat === TAR_FIT, higher),
)),
lower(offLen-1, 0), 0.U(1.W)
)
require(target.getWidth == VAddrBits)
require(offLen != 0)
target
}
if (subOffsetLen != 0)
if (subOffsetLen.isDefined)
Mux(sharing,
getTarget(subOffsetLen)(pc, lower, tarStat),
getTarget(offsetLen)(pc, lower, tarStat)
getTarget(subOffsetLen.get)(pc, lower, tarStat, last_stage),
getTarget(offsetLen)(pc, lower, tarStat, last_stage)
)
else
getTarget(offsetLen)(pc, lower, tarStat)
getTarget(offsetLen)(pc, lower, tarStat, last_stage)
}
def fromAnotherSlot(that: FtbSlot) = {
require(
this.offsetLen > that.offsetLen && that.offsetLen == this.subOffsetLen ||
this.offsetLen > that.offsetLen && this.subOffsetLen.map(_ == that.offsetLen).getOrElse(true) ||
this.offsetLen == that.offsetLen
)
this.offset := that.offset
this.tarStat := that.tarStat
this.sharing := (this.offsetLen > that.offsetLen && that.offsetLen == this.subOffsetLen).B
this.sharing := (this.offsetLen > that.offsetLen && that.offsetLen == this.subOffsetLen.get).B
this.valid := that.valid
this.lower := ZeroExt(that.lower, this.offsetLen)
}
......@@ -112,7 +133,7 @@ class FTBEntry(implicit p: Parameters) extends XSBundle with FTBParams with BPUU
// if shareTailSlot is set, this slot can hold a branch or a jal/jalr
// else this slot holds only jal/jalr
val tailSlot = new FtbSlot(JMP_OFFSET_LEN, BR_OFFSET_LEN)
val tailSlot = new FtbSlot(JMP_OFFSET_LEN, Some(BR_OFFSET_LEN))
// Partial Fall-Through Address
val pftAddr = UInt((log2Up(PredictWidth)+1).W)
......@@ -150,8 +171,8 @@ class FTBEntry(implicit p: Parameters) extends XSBundle with FTBParams with BPUU
this.tailSlot.setLowerStatByTarget(pc, target, false)
}
def getTargetVec(pc: UInt) = {
VecInit((brSlots :+ tailSlot).map(_.getTarget(pc)))
def getTargetVec(pc: UInt, last_stage: Option[Tuple2[UInt, Bool]] = None) = {
VecInit((brSlots :+ tailSlot).map(_.getTarget(pc, last_stage)))
}
def getOffsetVec = VecInit(brSlots.map(_.offset) :+ tailSlot.offset)
......@@ -204,7 +225,6 @@ class FTBEntry(implicit p: Parameters) extends XSBundle with FTBParams with BPUU
)
}
def display(cond: Bool): Unit = {
XSDebug(cond, p"-----------FTB entry----------- \n")
XSDebug(cond, p"v=${valid}\n")
......@@ -424,17 +444,13 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU
io.out.resp.s2.preds.hit := s2_hit
io.out.resp.s2.pc := s2_pc
io.out.resp.s2.ftb_entry := ftb_entry
io.out.resp.s2.preds.fromFtbEntry(ftb_entry, s2_pc)
io.out.resp.s2.preds.fromFtbEntry(ftb_entry, s2_pc, Some((s1_pc, io.s1_fire)))
io.out.last_stage_meta := RegEnable(FTBMeta(writeWay.asUInt(), s1_hit, GTimer()).asUInt(), io.s1_fire)
// always taken logic
when (s2_hit) {
for (i <- 0 until numBr) {
when (ftb_entry.always_taken(i)) {
io.out.resp.s2.preds.br_taken_mask(i) := true.B
}
}
for (i <- 0 until numBr) {
io.out.resp.s2.preds.br_taken_mask(i) := io.in.bits.resp_in(0).s2.preds.br_taken_mask(i) || s2_hit && ftb_entry.always_taken(i)
}
// Update logic
......
......@@ -284,6 +284,7 @@ class BranchPrediction(implicit p: Parameters) extends XSBundle with HasBPUConst
val slot_valids = Vec(totalSlot, Bool())
val targets = Vec(totalSlot, UInt(VAddrBits.W))
val jalr_target = UInt(VAddrBits.W) // special path for indirect predictors
val offsets = Vec(totalSlot, UInt(log2Ceil(PredictWidth).W))
val fallThroughAddr = UInt(VAddrBits.W)
val oversize = Bool()
......@@ -313,8 +314,9 @@ class BranchPrediction(implicit p: Parameters) extends XSBundle with HasBPUConst
VecInit(
if (shareTailSlot)
(br_slot_valids zip br_taken_mask.init).map{ case (t, v) => t && v } :+ (
(br_taken_mask.last && tail_slot_valid && is_br_sharing) ||
tail_slot_valid && !is_br_sharing
tail_slot_valid && (
is_br_sharing && br_taken_mask.last || !is_br_sharing
)
)
else
(br_slot_valids zip br_taken_mask).map{ case (v, t) => v && t } :+
......@@ -324,9 +326,10 @@ class BranchPrediction(implicit p: Parameters) extends XSBundle with HasBPUConst
def taken = br_taken_mask.reduce(_||_) || slot_valids.last // || (is_jal || is_jalr)
def fromFtbEntry(entry: FTBEntry, pc: UInt) = {
def fromFtbEntry(entry: FTBEntry, pc: UInt, last_stage: Option[Tuple2[UInt, Bool]] = None) = {
slot_valids := entry.brSlots.map(_.valid) :+ entry.tailSlot.valid
targets := entry.getTargetVec(pc)
jalr_target := targets.last
offsets := entry.getOffsetVec
fallThroughAddr := entry.getFallThrough(pc)
oversize := entry.oversize
......@@ -340,6 +343,7 @@ class BranchPrediction(implicit p: Parameters) extends XSBundle with HasBPUConst
def fromMicroBTBEntry(entry: MicroBTBEntry) = {
slot_valids := entry.slot_valids
targets := entry.targets
jalr_target := DontCare
offsets := entry.offsets
fallThroughAddr := entry.fallThroughAddr
oversize := entry.oversize
......
......@@ -453,51 +453,59 @@ class ITTage(implicit p: Parameters) extends BaseITTage {
// val updateTageMisPreds = VecInit((0 until numBr).map(i => updateMetas(i).taken =/= u.takens(i)))
val updateMisPred = update.mispred_mask(numBr) // the last one indicates jmp results
// access tag tables and output meta info
val basePred = base_table_resp.bits.ctr =/= 0.U
val baseTarget = base_table_resp.bits.target
s1_tageTaken := basePred // TODO: reintroduce BIM
s1_tageTarget := baseTarget
s1_finalAltPred := basePred
val s1_finalAltTarget = WireInit(baseTarget)
var s1_temp_altPred = basePred
var s1_temp_altTarget = baseTarget
var s1_temp_provided = false.B
var s1_temp_provider = 0.U
var s1_temp_alt_provided = false.B
var s1_temp_alt_provider = 0.U
for (i <- 1 until ITTageNTables) { // skip base table
val hit = s1_resps(i).valid
val ctr = s1_resps(i).bits.ctr
val target = s1_resps(i).bits.target
when (hit) {
s1_tageTaken := Mux(ctr === 0.U, s1_temp_altPred, true.B) // Use altpred on weak taken
s1_tageTarget := Mux(ctr === 0.U, s1_temp_altTarget, target)
s1_finalAltPred := s1_temp_altPred
s1_finalAltTarget := s1_temp_altTarget
}
s1_temp_alt_provided = (s1_temp_provided && hit || s1_temp_alt_provided) // assign before s1_provided
s1_temp_provided = s1_temp_provided || hit // Once hit then provide
s1_temp_alt_provider = Mux(hit, s1_temp_provider, s1_temp_alt_provider) // assign before s1 provider
s1_temp_provider = Mux(hit, i.U, s1_temp_provider) // Use the last hit as provider
s1_temp_altPred = Mux(hit, true.B, s1_temp_altPred) // Save current pred as potential altpred
s1_temp_altTarget = Mux(hit, target, s1_temp_altTarget)
class ITTageTableInfo(implicit p: Parameters) extends ITTageResp {
val tableIdx = UInt(log2Ceil(ITTageNTables).W)
}
s1_provided := s1_temp_provided
s1_provider := s1_temp_provider
s1_altProvided := s1_temp_alt_provided
s1_altProvider := s1_temp_alt_provider
s1_providerU := s1_resps(s1_temp_provider).bits.u
s1_providerCtr := s1_resps(s1_temp_provider).bits.ctr
s1_altProviderCtr := s1_resps(s1_temp_alt_provider).bits.ctr
s1_providerTarget := s1_resps(s1_temp_provider).bits.target
s1_altProviderTarget := s1_finalAltTarget
val inputRes = VecInit(s1_resps.zipWithIndex.map{case (r, i) => {
val tableInfo = Wire(new ITTageTableInfo)
tableInfo.u := r.bits.u
tableInfo.ctr := r.bits.ctr
tableInfo.target := r.bits.target
tableInfo.tableIdx := i.U(log2Ceil(ITTageNTables).W)
SelectTwoInterRes(r.valid, tableInfo)
}}.init)
val selectedInfo = ParallelSelectTwo(inputRes.reverse)
val provided = selectedInfo.hasOne
val altProvided = selectedInfo.hasTwo
val providerInfo = selectedInfo.first
val altProviderInfo = selectedInfo.second
val providerNull = providerInfo.ctr === 0.U
val basePred = base_table_resp.bits.ctr =/= 0.U
val baseTarget = base_table_resp.bits.target
s1_tageTaken := Mux1H(Seq(
(provided && !providerNull, providerInfo.ctr(ITTageCtrBits-1)),
(altProvided && providerNull, altProviderInfo.ctr(ITTageCtrBits-1)),
(!provided, basePred)
)) // TODO: reintroduce BIM
s1_tageTarget := Mux1H(Seq(
(provided && !providerNull, providerInfo.target),
(altProvided && providerNull, altProviderInfo.target),
(!provided, baseTarget)
))
s1_finalAltPred := Mux(altProvided, altProviderInfo.ctr(ITTageCtrBits-1), basePred)
s1_provided := provided
s1_provider := providerInfo.tableIdx
s1_altProvided := altProvided
s1_altProvider := altProviderInfo.tableIdx
s1_providerU := providerInfo.u
s1_providerCtr := providerInfo.ctr
s1_altProviderCtr := altProviderInfo.ctr
s1_providerTarget := providerInfo.target
s1_altProviderTarget := altProviderInfo.target
XSDebug(io.s2_fire, p"hit_taken_jalr:")
when(io.s2_fire && io.in.bits.resp_in(0).s2.hit_taken_on_jalr && s2_tageTaken) {
when(s2_tageTaken) {
io.out.resp.s2.preds.jalr_target := s2_tageTarget
// FIXME: should use s1 globally
io.out.resp.s2.preds.targets.last := s2_tageTarget
}
// this is handled in RAS
// val is_jalr = io.in.bits.resp_in(0).s2.preds.is_jalr
// val last_target_in = io.in.bits.resp_in(0).s2.preds.targets.last
// val last_target_out = io.out.resp.s2.preds.targets.last
// last_target_out := Mux(is_jalr, jalr_target, last_target_in)
resp_meta.provider.valid := s2_provided
resp_meta.provider.bits := s2_provider
......
......@@ -160,9 +160,17 @@ class RAS(implicit p: Parameters) extends BasePredictor {
spec_push := io.s2_fire && io.in.bits.resp_in(0).s2.hit_taken_on_call
spec_pop := io.s2_fire && io.in.bits.resp_in(0).s2.hit_taken_on_ret
when (spec_pop) {
io.out.resp.s2.preds.targets.last := spec_top_addr
val jalr_target = io.out.resp.s2.preds.jalr_target
val last_target_in = io.in.bits.resp_in(0).s2.preds.targets.last
val last_target_out = io.out.resp.s2.preds.targets.last
val is_jalr = io.in.bits.resp_in(0).s2.preds.is_jalr
val is_ret = io.in.bits.resp_in(0).s2.preds.is_ret
// assert(is_jalr && is_ret || !is_ret)
when(is_ret) {
jalr_target := spec_top_addr
// FIXME: should use s1 globally
}
last_target_out := Mux(is_jalr, jalr_target, last_target_in)
io.out.resp.s2.rasSp := spec_ras.sp
io.out.resp.s2.rasTop := spec_ras.top
......
......@@ -198,12 +198,12 @@ trait HasSC extends HasSCParameter { this: Tage =>
val scThresholds = List.fill(TageBanks)(RegInit(SCThreshold(5)))
val useThresholds = VecInit(scThresholds map (_.thres))
def belowThreshold(sum: SInt, threshold: UInt) = {
def aboveThreshold(sum: SInt, threshold: UInt) = {
def sign(x: SInt) = x(x.getWidth-1)
def pos(x: SInt) = !sign(x)
def neg(x: SInt) = sign(x)
val signedThres = threshold.zext
sum <= signedThres && pos(sum) || sum >= -signedThres && neg(sum)
sum > signedThres && pos(sum) || sum < -signedThres && neg(sum)
}
val updateThresholds = VecInit(useThresholds map (t => (t << 3) +& 21.U))
......@@ -244,13 +244,13 @@ trait HasSC extends HasSCParameter { this: Tage =>
val providerCtr = s1_providerCtrs(w)
val s1_pvdrCtrCentered = getPvdrCentered(providerCtr)
val s1_totalSums = VecInit(s1_scTableSums.map(_ +& s1_pvdrCtrCentered))
val s1_sumBelowThresholds = VecInit((s1_totalSums zip useThresholds) map { case (sum, thres) => belowThreshold(sum, thres)})
val s1_sumAboveThresholds = VecInit((s1_totalSums zip useThresholds) map { case (sum, thres) => aboveThreshold(sum, thres)})
val s1_scPreds = VecInit(s1_totalSums.map (_ >= 0.S))
val s2_sumBelowThresholds = RegEnable(s1_sumBelowThresholds, io.s1_fire)
val s2_sumAboveThresholds = RegEnable(s1_sumAboveThresholds, io.s1_fire)
val s2_scPreds = RegEnable(s1_scPreds, io.s1_fire)
val s2_scCtrs = RegEnable(VecInit(s1_scResps(w).map(r => r.ctr(s1_tageTakens(w).asUInt))), io.s1_fire)
val s2_scResps = VecInit(RegEnable(s1_scResps(w), io.s1_fire).map(_.ctr))
val s2_scCtrs = VecInit(s2_scResps.map(_(s2_tageTakens(w).asUInt)))
val s2_chooseBit = s2_tageTakens(w)
scMeta.tageTaken := s2_tageTakens(w)
scMeta.scUsed := s2_provideds(w)
......@@ -259,20 +259,24 @@ trait HasSC extends HasSCParameter { this: Tage =>
when (s2_provideds(w)) {
s2_sc_used(w) := true.B
s2_unconf(w) := s2_sumBelowThresholds(s2_chooseBit)
s2_conf(w) := !s2_sumBelowThresholds(s2_chooseBit)
s2_unconf(w) := !s2_sumAboveThresholds(s2_chooseBit)
s2_conf(w) := s2_sumAboveThresholds(s2_chooseBit)
// Use prediction from Statistical Corrector
XSDebug(p"---------tage_bank_${w} provided so that sc used---------\n")
when (!s2_sumBelowThresholds(s2_chooseBit)) {
when (s2_sumAboveThresholds(s2_chooseBit)) {
val pred = s2_scPreds(s2_chooseBit)
val debug_pc = Cat(debug_pc_s2, w.U, 0.U(instOffsetBits.W))
s2_agree(w) := s2_tageTakens(w) === pred
s2_disagree(w) := s2_tageTakens(w) =/= pred
// fit to always-taken condition
io.out.resp.s2.preds.br_taken_mask(w) := pred
// io.out.resp.s2.preds.br_taken_mask(w) := pred
XSDebug(p"pc(${Hexadecimal(debug_pc)}) SC(${w.U}) overriden pred to ${pred}\n")
}
}
io.out.resp.s2.preds.br_taken_mask(w) :=
Mux(s2_provideds(w) && s2_sumAboveThresholds(s2_chooseBit),
s2_scPreds(s2_chooseBit), s2_tageTakens(w))
val updateSCMeta = updateSCMetas(w)
val updateTageMeta = updateMetas(w)
......@@ -284,14 +288,14 @@ trait HasSC extends HasSCParameter { this: Tage =>
val pvdrCtr = updateTageMeta.providerCtr
val sum = ParallelSingedExpandingAdd(scOldCtrs.map(getCentered)) +& getPvdrCentered(pvdrCtr)
val sumAbs = sum.abs.asUInt
val sumBelowThreshold = belowThreshold(sum, useThresholds(w))
val sumAboveThreshold = aboveThreshold(sum, useThresholds(w))
scUpdateTagePreds(w) := tagePred
scUpdateTakens(w) := taken
(scUpdateOldCtrs(w) zip scOldCtrs).foreach{case (t, c) => t := c}
update_sc_used(w) := true.B
update_unconf(w) := sumBelowThreshold
update_conf(w) := !sumBelowThreshold
update_unconf(w) := !sumAboveThreshold
update_conf(w) := sumAboveThreshold
update_agree(w) := scPred === tagePred
update_disagree(w) := scPred =/= tagePred
sc_corr_tage_misp(w) := scPred === taken && tagePred =/= taken && update_conf(w)
......@@ -305,7 +309,7 @@ trait HasSC extends HasSCParameter { this: Tage =>
}
val updateThres = updateThresholds(w)
when (scPred =/= taken || sumBelowThreshold) {
when (scPred =/= taken || !sumAboveThreshold) {
scUpdateMask(w).foreach(_ := true.B)
XSDebug(sum < 0.S,
p"scUpdate: bank(${w}), scPred(${scPred}), tagePred(${tagePred}), " +
......
......@@ -49,6 +49,11 @@ trait TageParams extends HasBPUConst with HasXSParameter {
}.reduce(_+_)
}.reduce(_+_)
def posUnconf(ctr: UInt) = ctr === (1 << (ctr.getWidth - 1)).U
def negUnconf(ctr: UInt) = ctr === ((1 << (ctr.getWidth - 1)) - 1).U
def unconf(ctr: UInt) = posUnconf(ctr) || negUnconf(ctr)
}
trait HasFoldedHistory {
......@@ -98,7 +103,7 @@ class TageUpdate(implicit p: Parameters) extends TageBundle {
val oldCtr = UInt(TageCtrBits.W)
// update u
val uMask = Bool()
val u = UInt(2.W)
val u = Bool()
val reset_u = Bool()
}
......@@ -112,9 +117,9 @@ class TageMeta(val bank: Int)(implicit p: Parameters)
val providerU = Bool()
val providerCtr = UInt(TageCtrBits.W)
val basecnt = UInt(2.W)
val predcnt = UInt(3.W)
val predcnt = UInt(TageCtrBits.W)
val altpredhit = Bool()
val altpredcnt = UInt(3.W)
val altpredcnt = UInt(TageCtrBits.W)
val allocate = ValidUndirectioned(UInt(log2Ceil(BankTageNTables(bank)).W))
val taken = Bool()
val scMeta = new SCMeta(EnableSC, BankSCNTables(bank))
......@@ -526,44 +531,44 @@ class Tage(implicit p: Parameters) extends BaseTage {
val updateMisPreds = update.mispred_mask
class TageTableInfo(val bank: Int)(implicit p: Parameters) extends TageResp {
val tableIdx = UInt(log2Ceil(BankTageNTables(bank)).W)
}
// access tag tables and output meta info
for (w <- 0 until TageBanks) {
val s1_tageTaken = WireInit(bt.io.s1_cnt(w)(1))
var s1_altPred = WireInit(bt.io.s1_cnt(w)(1))
val s1_finalAltPred = WireInit(bt.io.s1_cnt(w)(1))
var s1_provided = false.B
var s1_provider = 0.U
var s1_altprednum = 0.U
var s1_altpredhit = false.B
var s1_prednum = 0.U
var s1_basecnt = 0.U
for (i <- 0 until BankTageNTables(w)) {
val hit = s1_resps(w)(i).valid
val ctr = s1_resps(w)(i).bits.ctr
when (hit) {
s1_tageTaken := Mux(ctr === 3.U || ctr === 4.U, s1_altPred, ctr(2)) // Use altpred on weak taken
s1_finalAltPred := s1_altPred
}
s1_altpredhit = (s1_provided && hit) || s1_altpredhit // Once hit then provide
s1_provided = s1_provided || hit // Once hit then provide
s1_provider = Mux(hit, i.U, s1_provider) // Use the last hit as provider
s1_altPred = Mux(hit, ctr(2), s1_altPred) // Save current pred as potential altpred
s1_altprednum = Mux(hit,s1_prednum,s1_altprednum) // get altpredict table number
s1_prednum = Mux(hit,i.U,s1_prednum) // get predict table number
}
s1_provideds(w) := s1_provided
val inputRes = VecInit(s1_resps(w).zipWithIndex.map{case (r, i) => {
val tableInfo = Wire(new TageTableInfo(w))
tableInfo.u := r.bits.u
tableInfo.ctr := r.bits.ctr
tableInfo.tableIdx := i.U(log2Ceil(BankTageNTables(w)).W)
SelectTwoInterRes(r.valid, tableInfo)
}})
val selectedInfo = ParallelSelectTwo(inputRes.reverse)
val provided = selectedInfo.hasOne
val altProvided = selectedInfo.hasTwo
val providerInfo = selectedInfo.first
val altProviderInfo = selectedInfo.second
val providerUnconf = unconf(providerInfo.ctr)
s1_provideds(w) := provided
s1_basecnts(w) := bt.io.s1_cnt(w)
s1_providers(w) := s1_provider
s1_finalAltPreds(w) := s1_finalAltPred
s1_tageTakens(w) := s1_tageTaken
s1_providerUs(w) := s1_resps(w)(s1_provider).bits.u
s1_providerCtrs(w) := s1_resps(w)(s1_provider).bits.ctr
s1_prednums(w) := s1_prednum
s1_altprednums(w) := s1_altprednum
s1_predcnts(w) := s1_resps(w)(s1_prednum).bits.ctr
s1_altpredhits(w) := s1_altpredhit
s1_altpredcnts(w) := s1_resps(w)(s1_altprednum).bits.ctr
s1_providers(w) := providerInfo.tableIdx
s1_finalAltPreds(w) := Mux(altProvided, altProviderInfo.ctr(TageCtrBits-1), bt.io.s1_cnt(w)(1))
s1_tageTakens(w) := Mux1H(Seq(
(provided && !providerUnconf, providerInfo.ctr(TageCtrBits-1)),
(altProvided && providerUnconf, altProviderInfo.ctr(TageCtrBits-1)),
(!provided, bt.io.s1_cnt(w)(1))
))
s1_providerUs(w) := providerInfo.u
s1_providerCtrs(w) := providerInfo.ctr
s1_prednums(w) := providerInfo.tableIdx
s1_altprednums(w) := altProviderInfo.tableIdx
s1_predcnts(w) := providerInfo.ctr
s1_altpredhits(w) := altProvided
s1_altpredcnts(w) := altProviderInfo.ctr
resp_meta(w).provider.valid := s2_provideds(w)
resp_meta(w).provider.bits := s2_providers(w)
......@@ -586,8 +591,8 @@ class Tage(implicit p: Parameters) extends BaseTage {
val allocatableSlots =
RegEnable(
VecInit(s1_resps(w).map(r => !r.valid && !r.bits.u)).asUInt &
~(LowerMask(UIntToOH(s1_provider), BankTageNTables(w)) &
Fill(BankTageNTables(w), s1_provided.asUInt)),
~(LowerMask(UIntToOH(s1_providers(w)), BankTageNTables(w)) &
Fill(BankTageNTables(w), s1_provideds(w).asUInt)),
io.s1_fire
)
val allocLFSR = LFSR64()(BankTageNTables(w) - 1, 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册