diff --git a/debug/cputest.sh b/debug/cputest.sh index 7ea91d0d003568ef15b067b34736e5af56b7880b..5510d73acde11dc1fc7b94c3c5fdbb2104111d84 100755 --- a/debug/cputest.sh +++ b/debug/cputest.sh @@ -6,7 +6,7 @@ for test in $(ls $TEST_HOME/tests) do t=${test%.c} echo -n -e "\x1b[0m $t: " - make -C $TEST_HOME ARCH=riscv64-noop E=0 ALL=$t run 2>/dev/null | grep "HIT GOOD TRAP" + make -C $TEST_HOME ARCH=riscv64-noop E=0 ALL=$t run 2>/dev/null | grep -E "HIT GOOD TRAP|IPC" if [[ $? == 1 ]]; then echo -e "\x1b[31mfail" diff --git a/src/main/scala/xiangshan/XSCore.scala b/src/main/scala/xiangshan/XSCore.scala index 0884847b06e03e80ddf382953205abe0f292b8ad..d84162fda6ff013dfca640d84cc74727065afc56 100644 --- a/src/main/scala/xiangshan/XSCore.scala +++ b/src/main/scala/xiangshan/XSCore.scala @@ -34,7 +34,7 @@ case class XSCoreParameters EnableBPD: Boolean = true, EnableRAS: Boolean = true, EnableLB: Boolean = true, - EnableLoop: Boolean = false, + EnableLoop: Boolean = true, HistoryLength: Int = 64, BtbSize: Int = 2048, JbtacSize: Int = 1024, diff --git a/src/main/scala/xiangshan/frontend/BPU.scala b/src/main/scala/xiangshan/frontend/BPU.scala index f243400c74501708153d9b0a27289dba15888711..64d5fe3d4e1549946cb3c4c4d83bfc353d25995e 100644 --- a/src/main/scala/xiangshan/frontend/BPU.scala +++ b/src/main/scala/xiangshan/frontend/BPU.scala @@ -79,6 +79,7 @@ abstract class BasePredictor extends XSModule with HasBPUParameter{ val hist = Input(UInt(HistoryLength.W)) val inMask = Input(UInt(PredictWidth.W)) val update = Flipped(ValidIO(new BranchUpdateInfoWithHist)) + val outFire = Input(Bool()) } val io = new DefaultBasePredictorIO @@ -608,7 +609,11 @@ class BPU extends BaseBPU { loop.io.pc.valid := s2.io.out.fire() loop.io.pc.bits := s2.io.out.bits.pc loop.io.inMask := s2.io.out.bits.mask - + loop.io.outFire := s3.io.pred.fire() + loop.io.respIn.taken := s3.io.pred.bits.taken + loop.io.respIn.jmpIdx := s3.io.pred.bits.jmpIdx + + s3.io.in.bits.resp.tage <> tage.io.resp s3.io.in.bits.resp.loop <> loop.io.resp for (i <- 0 until PredictWidth) { diff --git a/src/main/scala/xiangshan/frontend/Bim.scala b/src/main/scala/xiangshan/frontend/Bim.scala index 20481c6436691a8d37c9783a9bd9b2442d2f041c..5f0fb444088d6dcb7b45b2eeb323b3122afeabd2 100644 --- a/src/main/scala/xiangshan/frontend/Bim.scala +++ b/src/main/scala/xiangshan/frontend/Bim.scala @@ -1,140 +1,140 @@ -package xiangshan.frontend - -import chisel3._ -import chisel3.util._ -import xiangshan._ -import xiangshan.backend.ALUOpType -import utils._ -import xiangshan.backend.decode.XSTrap - -trait BimParams extends HasXSParameter { - val BimBanks = PredictWidth - val BimSize = 4096 - val nRows = BimSize / BimBanks - val bypassEntries = 4 -} - -class BIM extends BasePredictor with BimParams{ - class BIMResp extends Resp { - val ctrs = Vec(PredictWidth, UInt(2.W)) - } - class BIMMeta extends Meta { - val ctrs = Vec(PredictWidth, UInt(2.W)) - } - class BIMFromOthers extends FromOthers {} - - class BIMIO extends DefaultBasePredictorIO { - val resp = Output(new BIMResp) - val meta = Output(new BIMMeta) - } - - override val io = IO(new BIMIO) - // Update logic - // 1 calculate new 2-bit saturated counter value - def satUpdate(old: UInt, len: Int, taken: Bool): UInt = { - val oldSatTaken = old === ((1 << len)-1).U - val oldSatNotTaken = old === 0.U - Mux(oldSatTaken && taken, ((1 << len)-1).U, - Mux(oldSatNotTaken && !taken, 0.U, - Mux(taken, old + 1.U, old - 1.U))) - } - - val bimAddr = new TableAddr(log2Up(BimSize), BimBanks) - - val pcLatch = RegEnable(io.pc.bits, io.pc.valid) - - val bim = List.fill(BimBanks) { - Module(new SRAMTemplate(UInt(2.W), set = nRows, shouldReset = false, holdRead = true)) - } - - val doing_reset = RegInit(true.B) - val resetRow = RegInit(0.U(log2Ceil(nRows).W)) - resetRow := resetRow + doing_reset - when (resetRow === (nRows-1).U) { doing_reset := false.B } - - val baseBank = bimAddr.getBank(io.pc.bits) - - val realMask = circularShiftRight(io.inMask, BimBanks, baseBank) - - // those banks whose indexes are less than baseBank are in the next row - val isInNextRow = VecInit((0 until BtbBanks).map(_.U < baseBank)) - - val baseRow = bimAddr.getBankIdx(io.pc.bits) - - val realRow = VecInit((0 until BimBanks).map(b => Mux(isInNextRow(b.U), (baseRow+1.U)(log2Up(nRows)-1, 0), baseRow))) - - val realRowLatch = VecInit(realRow.map(RegEnable(_, enable=io.pc.valid))) - - for (b <- 0 until BimBanks) { - bim(b).reset := reset.asBool - bim(b).io.r.req.valid := realMask(b) && io.pc.valid - bim(b).io.r.req.bits.setIdx := realRow(b) - } - - val bimRead = VecInit(bim.map(_.io.r.resp.data(0))) - - val baseBankLatch = bimAddr.getBank(pcLatch) - - // e.g: baseBank == 5 => (5, 6,..., 15, 0, 1, 2, 3, 4) - val bankIdxInOrder = VecInit((0 until BimBanks).map(b => (baseBankLatch +& b.U)(log2Up(BimBanks)-1, 0))) - - for (b <- 0 until BimBanks) { - val ctr = bimRead(bankIdxInOrder(b)) - io.resp.ctrs(b) := ctr - io.meta.ctrs(b) := ctr - } - - val u = io.update.bits.ui - - val updateBank = bimAddr.getBank(u.pc) - val updateRow = bimAddr.getBankIdx(u.pc) - - - val wrbypass_ctrs = Reg(Vec(bypassEntries, Vec(BimBanks, UInt(2.W)))) - val wrbypass_ctr_valids = Reg(Vec(bypassEntries, Vec(BimBanks, Bool()))) - val wrbypass_rows = Reg(Vec(bypassEntries, UInt(log2Up(nRows).W))) - val wrbypass_enq_idx = RegInit(0.U(log2Up(bypassEntries).W)) - - val wrbypass_hits = VecInit((0 until bypassEntries).map( i => - !doing_reset && wrbypass_rows(i) === updateRow)) - val wrbypass_hit = wrbypass_hits.reduce(_||_) - val wrbypass_hit_idx = PriorityEncoder(wrbypass_hits) - - val oldCtr = Mux(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(updateBank), wrbypass_ctrs(wrbypass_hit_idx)(updateBank), u.brInfo.bimCtr) - val newTaken = u.taken - val newCtr = satUpdate(oldCtr, 2, newTaken) - // val oldSaturated = newCtr === oldCtr - - val needToUpdate = io.update.valid && u.pd.isBr - - when (reset.asBool) { wrbypass_ctr_valids.foreach(_.foreach(_ := false.B))} - - when (needToUpdate) { - when (wrbypass_hit) { - wrbypass_ctrs(wrbypass_hit_idx)(updateBank) := newCtr - wrbypass_ctr_valids(wrbypass_enq_idx)(updateBank) := true.B - } .otherwise { - wrbypass_ctrs(wrbypass_hit_idx)(updateBank) := newCtr - (0 until BimBanks).foreach(b => wrbypass_ctr_valids(wrbypass_enq_idx)(b) := false.B) // reset valid bits - wrbypass_ctr_valids(wrbypass_enq_idx)(updateBank) := true.B - wrbypass_rows(wrbypass_enq_idx) := updateRow - wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Up(bypassEntries)-1,0) - } - } - - for (b <- 0 until BimBanks) { - bim(b).io.w.req.valid := needToUpdate && b.U === updateBank || doing_reset - bim(b).io.w.req.bits.setIdx := Mux(doing_reset, resetRow, updateRow) - bim(b).io.w.req.bits.data := Mux(doing_reset, 2.U(2.W), newCtr) - } - - if (BPUDebug && debug) { - XSDebug(doing_reset, "Reseting...\n") - XSDebug("[update] v=%d pc=%x pnpc=%x tgt=%x brTgt=%x\n", io.update.valid, u.pc, u.pnpc, u.target, u.brTarget) - XSDebug("[update] taken=%d isMisPred=%d", u.taken, u.isMisPred) - XSDebug(false, true.B, p"brTag=${u.brTag} pd.isBr=${u.pd.isBr} brInfo.bimCtr=${Binary(u.brInfo.bimCtr)}\n") - XSDebug("needToUpdate=%d updateBank=%x updateRow=%x newCtr=%b oldCtr=%b\n", needToUpdate, updateBank, updateRow, newCtr, oldCtr) - XSDebug("[wrbypass] hit=%d hits=%b\n", wrbypass_hit, wrbypass_hits.asUInt) - } - +package xiangshan.frontend + +import chisel3._ +import chisel3.util._ +import xiangshan._ +import xiangshan.backend.ALUOpType +import utils._ +import xiangshan.backend.decode.XSTrap + +trait BimParams extends HasXSParameter { + val BimBanks = PredictWidth + val BimSize = 4096 + val nRows = BimSize / BimBanks + val bypassEntries = 4 +} + +class BIM extends BasePredictor with BimParams{ + class BIMResp extends Resp { + val ctrs = Vec(PredictWidth, UInt(2.W)) + } + class BIMMeta extends Meta { + val ctrs = Vec(PredictWidth, UInt(2.W)) + } + class BIMFromOthers extends FromOthers {} + + class BIMIO extends DefaultBasePredictorIO { + val resp = Output(new BIMResp) + val meta = Output(new BIMMeta) + } + + override val io = IO(new BIMIO) + // Update logic + // 1 calculate new 2-bit saturated counter value + def satUpdate(old: UInt, len: Int, taken: Bool): UInt = { + val oldSatTaken = old === ((1 << len)-1).U + val oldSatNotTaken = old === 0.U + Mux(oldSatTaken && taken, ((1 << len)-1).U, + Mux(oldSatNotTaken && !taken, 0.U, + Mux(taken, old + 1.U, old - 1.U))) + } + + val bimAddr = new TableAddr(log2Up(BimSize), BimBanks) + + val pcLatch = RegEnable(io.pc.bits, io.pc.valid) + + val bim = List.fill(BimBanks) { + Module(new SRAMTemplate(UInt(2.W), set = nRows, shouldReset = false, holdRead = true)) + } + + val doing_reset = RegInit(true.B) + val resetRow = RegInit(0.U(log2Ceil(nRows).W)) + resetRow := resetRow + doing_reset + when (resetRow === (nRows-1).U) { doing_reset := false.B } + + val baseBank = bimAddr.getBank(io.pc.bits) + + val realMask = circularShiftRight(io.inMask, BimBanks, baseBank) + + // those banks whose indexes are less than baseBank are in the next row + val isInNextRow = VecInit((0 until BtbBanks).map(_.U < baseBank)) + + val baseRow = bimAddr.getBankIdx(io.pc.bits) + + val realRow = VecInit((0 until BimBanks).map(b => Mux(isInNextRow(b.U), (baseRow+1.U)(log2Up(nRows)-1, 0), baseRow))) + + val realRowLatch = VecInit(realRow.map(RegEnable(_, enable=io.pc.valid))) + + for (b <- 0 until BimBanks) { + bim(b).reset := reset.asBool + bim(b).io.r.req.valid := realMask(b) && io.pc.valid + bim(b).io.r.req.bits.setIdx := realRow(b) + } + + val bimRead = VecInit(bim.map(_.io.r.resp.data(0))) + + val baseBankLatch = bimAddr.getBank(pcLatch) + + // e.g: baseBank == 5 => (5, 6,..., 15, 0, 1, 2, 3, 4) + val bankIdxInOrder = VecInit((0 until BimBanks).map(b => (baseBankLatch +& b.U)(log2Up(BimBanks)-1, 0))) + + for (b <- 0 until BimBanks) { + val ctr = bimRead(bankIdxInOrder(b)) + io.resp.ctrs(b) := ctr + io.meta.ctrs(b) := ctr + } + + val u = io.update.bits.ui + + val updateBank = bimAddr.getBank(u.pc) + val updateRow = bimAddr.getBankIdx(u.pc) + + + val wrbypass_ctrs = Reg(Vec(bypassEntries, Vec(BimBanks, UInt(2.W)))) + val wrbypass_ctr_valids = Reg(Vec(bypassEntries, Vec(BimBanks, Bool()))) + val wrbypass_rows = Reg(Vec(bypassEntries, UInt(log2Up(nRows).W))) + val wrbypass_enq_idx = RegInit(0.U(log2Up(bypassEntries).W)) + + val wrbypass_hits = VecInit((0 until bypassEntries).map( i => + !doing_reset && wrbypass_rows(i) === updateRow)) + val wrbypass_hit = wrbypass_hits.reduce(_||_) + val wrbypass_hit_idx = PriorityEncoder(wrbypass_hits) + + val oldCtr = Mux(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(updateBank), wrbypass_ctrs(wrbypass_hit_idx)(updateBank), u.brInfo.bimCtr) + val newTaken = u.taken + val newCtr = satUpdate(oldCtr, 2, newTaken) + // val oldSaturated = newCtr === oldCtr + + val needToUpdate = io.update.valid && u.pd.isBr + + when (reset.asBool) { wrbypass_ctr_valids.foreach(_.foreach(_ := false.B))} + + when (needToUpdate) { + when (wrbypass_hit) { + wrbypass_ctrs(wrbypass_hit_idx)(updateBank) := newCtr + wrbypass_ctr_valids(wrbypass_enq_idx)(updateBank) := true.B + } .otherwise { + wrbypass_ctrs(wrbypass_hit_idx)(updateBank) := newCtr + (0 until BimBanks).foreach(b => wrbypass_ctr_valids(wrbypass_enq_idx)(b) := false.B) // reset valid bits + wrbypass_ctr_valids(wrbypass_enq_idx)(updateBank) := true.B + wrbypass_rows(wrbypass_enq_idx) := updateRow + wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Up(bypassEntries)-1,0) + } + } + + for (b <- 0 until BimBanks) { + bim(b).io.w.req.valid := needToUpdate && b.U === updateBank || doing_reset + bim(b).io.w.req.bits.setIdx := Mux(doing_reset, resetRow, updateRow) + bim(b).io.w.req.bits.data := Mux(doing_reset, 2.U(2.W), newCtr) + } + + if (BPUDebug && debug) { + XSDebug(doing_reset, "Reseting...\n") + XSDebug("[update] v=%d pc=%x pnpc=%x tgt=%x brTgt=%x\n", io.update.valid, u.pc, u.pnpc, u.target, u.brTarget) + XSDebug("[update] taken=%d isMisPred=%d", u.taken, u.isMisPred) + XSDebug(false, true.B, p"brTag=${u.brTag} pd.isBr=${u.pd.isBr} brInfo.bimCtr=${Binary(u.brInfo.bimCtr)}\n") + XSDebug("needToUpdate=%d updateBank=%x updateRow=%x newCtr=%b oldCtr=%b\n", needToUpdate, updateBank, updateRow, newCtr, oldCtr) + XSDebug("[wrbypass] hit=%d hits=%b\n", wrbypass_hit, wrbypass_hits.asUInt) + } + } \ No newline at end of file diff --git a/src/main/scala/xiangshan/frontend/LoopPredictor.scala b/src/main/scala/xiangshan/frontend/LoopPredictor.scala index 29ecee27df8e5a117f0a605165b164ea103d053c..27773df3d7f8e67eadcf8fa5237a65ffd3984ca8 100644 --- a/src/main/scala/xiangshan/frontend/LoopPredictor.scala +++ b/src/main/scala/xiangshan/frontend/LoopPredictor.scala @@ -67,7 +67,10 @@ class LTBColumnUpdate extends LTBBundle { class LTBColumn extends LTBModule { val io = IO(new Bundle() { // if3 send req - val req = Input(Valid(new LTBColumnReq)) + val req = Input(new LTBColumnReq) + val if3_fire = Input(Bool()) + val if4_fire = Input(Bool()) + val outMask = Input(Bool()) // send out resp to if4 val resp = Output(new LTBColumnResp) val update = Input(Valid(new LTBColumnUpdate)) @@ -86,14 +89,19 @@ class LTBColumn extends LTBModule { when (resetIdx === (nRows - 1).U) { doingReset := false.B } // during branch prediction - val if3_idx = io.req.bits.idx - val if3_tag = io.req.bits.tag - val if3_pc = io.req.bits.pc // only for debug - val if3_entry = WireInit(ltb(if3_idx)) + val if4_idx = io.req.idx + val if4_tag = io.req.tag + val if4_pc = io.req.pc // only for debug + val if4_entry = WireInit(ltb(if4_idx)) - io.resp.meta := RegEnable(if3_entry.specCnt + 1.U, io.req.valid) - // io.resp.exit := RegNext(if3_tag === if3_entry.tag && (if3_entry.specCnt + 1.U) === if3_entry.tripCnt/* && if3_entry.isConf*/ && io.req.valid) - io.resp.exit := RegEnable(if3_tag === if3_entry.tag && (if3_entry.specCnt + 1.U) === if3_entry.tripCnt && io.req.valid && !if3_entry.unusable, io.req.valid) + val valid = RegInit(false.B) + when (io.if4_fire) { valid := false.B } + when (io.if3_fire) { valid := true.B } + when (io.update.valid && io.update.bits.misPred) { valid := false.B } + + io.resp.meta := if4_entry.specCnt + 1.U + // io.resp.exit := if4_tag === if4_entry.tag && (if4_entry.specCnt + 1.U) === if4_entry.tripCnt && valid && !if4_entry.unusable + io.resp.exit := if4_tag === if4_entry.tag && (if4_entry.specCnt + 1.U) === if4_entry.tripCnt && valid && if4_entry.isConf // when resolving a branch val entry = ltb(updateIdx) @@ -109,8 +117,8 @@ class LTBColumn extends LTBModule { wEntry.conf := 0.U wEntry.age := 7.U wEntry.tripCnt := Fill(cntBits, 1.U(1.W)) - wEntry.specCnt := 1.U - wEntry.nSpecCnt := 1.U + wEntry.specCnt := Mux(io.update.bits.taken, 1.U, 0.U) + wEntry.nSpecCnt := Mux(io.update.bits.taken, 1.U, 0.U) wEntry.brTag := updateBrTag wEntry.unusable := false.B ltb(updateIdx) := wEntry @@ -119,10 +127,14 @@ class LTBColumn extends LTBModule { when (io.update.bits.taken) { wEntry.nSpecCnt := entry.nSpecCnt + 1.U wEntry.specCnt := Mux(io.update.bits.misPred/* && !entry.brTag.needBrFlush(updateBrTag)*/, entry.nSpecCnt + 1.U, entry.specCnt) + wEntry.conf := Mux(io.update.bits.misPred, 0.U, entry.conf) + // wEntry.tripCnt := Fill(cntBits, 1.U(1.W)) + wEntry.tripCnt := Mux(io.update.bits.misPred, Fill(cntBits, 1.U(1.W)), entry.tripCnt) // A not-taken loop-branch found in the LTB during branch resolution updates its trip count and conf. }.otherwise { // wEntry.conf := Mux(entry.nSpecCnt === entry.tripCnt, Mux(entry.isLearned, 7.U, entry.conf + 1.U), 0.U) - wEntry.conf := Mux(io.update.bits.misPred, 0.U, Mux(entry.isLearned, 7.U, entry.conf + 1.U)) + // wEntry.conf := Mux(io.update.bits.misPred, 0.U, Mux(entry.isLearned, 7.U, entry.conf + 1.U)) + wEntry.conf := Mux((entry.nSpecCnt + 1.U) === entry.tripCnt, Mux(entry.isLearned, 7.U, entry.conf + 1.U), 0.U) // wEntry.tripCnt := entry.nSpecCnt + 1.U wEntry.tripCnt := io.update.bits.meta wEntry.specCnt := Mux(io.update.bits.misPred, 0.U, entry.specCnt/* - entry.nSpecCnt - 1.U*/) @@ -134,14 +146,13 @@ class LTBColumn extends LTBModule { } } - // speculatively update specCnt - when (io.req.valid && if3_entry.tag === if3_tag) { - when ((if3_entry.specCnt + 1.U) === if3_entry.tripCnt/* && if3_entry.isConf*/) { - ltb(if3_idx).age := 7.U - ltb(if3_idx).specCnt := 0.U + when (io.if4_fire && if4_entry.tag === if4_tag && io.outMask) { + when ((if4_entry.specCnt + 1.U) === if4_entry.tripCnt) { + ltb(if4_idx).age := 7.U + ltb(if4_idx).specCnt := 0.U }.otherwise { - ltb(if3_idx).age := Mux(if3_entry.age === 7.U, 7.U, if3_entry.age + 1.U) - ltb(if3_idx).specCnt := if3_entry.specCnt + 1.U + ltb(if4_idx).age := Mux(if4_entry.age === 7.U, 7.U, if4_entry.age + 1.U) + ltb(if4_idx).specCnt := if4_entry.specCnt + 1.U } } @@ -157,33 +168,32 @@ class LTBColumn extends LTBModule { } } - // bypass for if3_entry.specCnt - when (io.update.valid && !doingReset && io.req.valid && updateIdx === if3_idx) { + // bypass for if4_entry.specCnt + when (io.update.valid && !doingReset && valid && updateIdx === if4_idx) { when (!tagMatch && io.update.bits.misPred || tagMatch) { - if3_entry.specCnt := wEntry.specCnt + if4_entry.specCnt := wEntry.specCnt } } - when (io.repair && !doingReset && io.req.valid) { - if3_entry.specCnt := if3_entry.nSpecCnt + when (io.repair && !doingReset && valid) { + if4_entry.specCnt := if4_entry.nSpecCnt } if (BPUDebug && debug) { //debug info XSDebug(doingReset, "Reseting...\n") - XSDebug("[IF3][req] v=%d pc=%x idx=%x tag=%x\n", io.req.valid, io.req.bits.pc, io.req.bits.idx, io.req.bits.tag) - XSDebug("[IF3][if3_entry] tag=%x conf=%d age=%d tripCnt=%d specCnt=%d nSpecCnt=%d", if3_entry.tag, if3_entry.conf, if3_entry.age, if3_entry.tripCnt, if3_entry.specCnt, if3_entry.nSpecCnt) - XSDebug(false, true.B, p" brTag=${if3_entry.brTag}\n") - // XSDebug("[IF4] idx=%x tag=%x specCnt=%d\n", if4_idx, if4_tag, if4_specCnt) - // XSDebug(RegNext(io.req.valid) && if4_entry.tag === if4_tag, "[IF4][speculative update] new specCnt=%d\n", - // Mux(if4_specCnt === if4_entry.tripCnt && if4_entry.isLearned, 0.U, if4_specCnt + 1.U)) - XSDebug(io.req.valid && if3_entry.tag === if3_tag, "[IF3][speculative update] new specCnt=%d\n", - Mux(if3_entry.specCnt === if3_entry.tripCnt && if3_entry.isConf, 0.U, if3_entry.specCnt + 1.U)) + XSDebug("if3_fire=%d if4_fire=%d valid=%d\n", io.if3_fire, io.if4_fire,valid) + XSDebug("[req] v=%d pc=%x idx=%x tag=%x\n", valid, io.req.pc, io.req.idx, io.req.tag) + XSDebug("[if4_entry] tag=%x conf=%d age=%d tripCnt=%d specCnt=%d nSpecCnt=%d", + if4_entry.tag, if4_entry.conf, if4_entry.age, if4_entry.tripCnt, if4_entry.specCnt, if4_entry.nSpecCnt) + XSDebug(false, true.B, p" brTag=${if4_entry.brTag} unusable=${if4_entry.unusable}\n") + XSDebug(io.if4_fire && if4_entry.tag === if4_tag && io.outMask, "[speculative update] new specCnt=%d\n", + Mux((if4_entry.specCnt + 1.U) === if4_entry.tripCnt, 0.U, if4_entry.specCnt + 1.U)) XSDebug("[update] v=%d misPred=%d pc=%x idx=%x tag=%x meta=%d taken=%d tagMatch=%d cntMatch=%d", io.update.valid, io.update.bits.misPred, io.update.bits.pc, updateIdx, updateTag, io.update.bits.meta, io.update.bits.taken, tagMatch, cntMatch) XSDebug(false, true.B, p" brTag=${updateBrTag}\n") XSDebug("[entry ] tag=%x conf=%d age=%d tripCnt=%d specCnt=%d nSpecCnt=%d", entry.tag, entry.conf, entry.age, entry.tripCnt, entry.specCnt, entry.nSpecCnt) - XSDebug(false, true.B, p" brTag=${entry.brTag}\n") + XSDebug(false, true.B, p" brTag=${entry.brTag} unusable=${entry.unusable}\n") XSDebug("[wEntry] tag=%x conf=%d age=%d tripCnt=%d specCnt=%d nSpecCnt=%d", wEntry.tag, wEntry.conf, wEntry.age, wEntry.tripCnt, wEntry.specCnt, wEntry.nSpecCnt) - XSDebug(false, true.B, p" brTag=${wEntry.brTag}\n") + XSDebug(false, true.B, p" brTag=${wEntry.brTag} unusable=${wEntry.unusable}\n") XSDebug(io.update.valid && io.update.bits.misPred || io.repair, "MisPred or repairing, all of the nSpecCnts copy their values into the specCnts\n") } @@ -196,8 +206,13 @@ class LoopPredictor extends BasePredictor with LTBParams { class LoopMeta extends Meta { val specCnts = Vec(PredictWidth, UInt(cntBits.W)) } + class LoopRespIn extends XSBundle { + val taken = Bool() + val jmpIdx = UInt(log2Up(PredictWidth).W) + } class LoopIO extends DefaultBasePredictorIO { + val respIn = Input(new LoopRespIn) val resp = Output(new LoopResp) val meta = Output(new LoopMeta) } @@ -208,32 +223,37 @@ class LoopPredictor extends BasePredictor with LTBParams { val ltbAddr = new TableAddr(idxLen + 4, PredictWidth) - val baseBank = ltbAddr.getBank(io.pc.bits) - val baseRow = ltbAddr.getBankIdx(io.pc.bits) - val baseTag = ltbAddr.getTag(io.pc.bits) + // Latch for 1 cycle + val pc = RegEnable(io.pc.bits, io.pc.valid) + val inMask = RegEnable(io.inMask, io.pc.valid) + val baseBank = ltbAddr.getBank(pc) + val baseRow = ltbAddr.getBankIdx(pc) + val baseTag = ltbAddr.getTag(pc) val nextRowStartsUp = baseRow.andR // TODO: use parallel andR val isInNextRow = VecInit((0 until PredictWidth).map(_.U < baseBank)) val tagIncremented = VecInit((0 until PredictWidth).map(i => isInNextRow(i.U) && nextRowStartsUp)) val realTags = VecInit((0 until PredictWidth).map(i => Mux(tagIncremented(i), baseTag + 1.U, baseTag)(tagLen - 1, 0))) val bankIdxInOrder = VecInit((0 until PredictWidth).map(i => (baseBank +& i.U)(log2Up(PredictWidth) - 1, 0))) - val realMask = circularShiftLeft(io.inMask, PredictWidth, baseBank) + val realMask = circularShiftLeft(inMask, PredictWidth, baseBank) + val outMask = inMask & (Fill(PredictWidth, !io.respIn.taken) | (Fill(PredictWidth, 1.U(1.W)) >> (~io.respIn.jmpIdx))) for (i <- 0 until PredictWidth) { - ltbs(i).io.req.bits.pc := io.pc.bits + ltbs(i).io.req.pc := pc + ltbs(i).io.outMask := false.B for (j <- 0 until PredictWidth) { when (Mux(isInNextRow(i), baseBank + j.U === (PredictWidth + i).U, baseBank + j.U === i.U)) { - ltbs(i).io.req.bits.pc := io.pc.bits + (j.U << 1) + ltbs(i).io.req.pc := pc + (j.U << 1) + ltbs(i).io.outMask := outMask(j).asBool } } } for (i <- 0 until PredictWidth) { - ltbs(i).io.req.valid := io.pc.valid && !io.flush && realMask(i) - // ltbs(i).io.req.bits.pc := io.pc.bits + (bankIdxInOrder(i) << 1) // only for debug - ltbs(i).io.req.bits.idx := Mux(isInNextRow(i), baseRow + 1.U, baseRow) - ltbs(i).io.req.bits.tag := realTags(i) - // ltbs(i).io.if4_fire := io.if4_fire - // ltbs(i).io.update := io.update + ltbs(i).io.if3_fire := io.pc.valid + ltbs(i).io.if4_fire := io.outFire + ltbs(i).io.req.idx := Mux(isInNextRow(i), baseRow + 1.U, baseRow) + ltbs(i).io.req.tag := realTags(i) + // ltbs(i).io.outMask := outMask(i) ltbs(i).io.update.valid := i.U === ltbAddr.getBank(io.update.bits.ui.pc) && io.update.valid && io.update.bits.ui.pd.isBr ltbs(i).io.update.bits.misPred := io.update.bits.ui.isMisPred ltbs(i).io.update.bits.pc := io.update.bits.ui.pc @@ -243,30 +263,28 @@ class LoopPredictor extends BasePredictor with LTBParams { ltbs(i).io.repair := i.U =/= ltbAddr.getBank(io.update.bits.ui.pc) && io.update.valid && io.update.bits.ui.isMisPred } - val baseBankLatch = RegEnable(baseBank, io.pc.valid) - // val bankIdxInOrder = VecInit((0 until PredictWidth).map(i => (baseBankLatch +& i.U)(log2Up(PredictWidth) - 1, 0)))] - val bankIdxInOrderLatch = RegEnable(bankIdxInOrder, io.pc.valid) val ltbResps = VecInit((0 until PredictWidth).map(i => ltbs(i).io.resp)) - (0 until PredictWidth).foreach(i => io.resp.exit(i) := ltbResps(bankIdxInOrderLatch(i)).exit) - (0 until PredictWidth).foreach(i => io.meta.specCnts(i) := ltbResps(bankIdxInOrderLatch(i)).meta) + (0 until PredictWidth).foreach(i => io.resp.exit(i) := ltbResps(bankIdxInOrder(i)).exit) + (0 until PredictWidth).foreach(i => io.meta.specCnts(i) := ltbResps(bankIdxInOrder(i)).meta) if (BPUDebug && debug) { // debug info - XSDebug("[IF3][req] fire=%d flush=%d fetchpc=%x baseBank=%x baseRow=%x baseTag=%x\n", io.pc.valid, io.flush, io.pc.bits, baseBank, baseRow, baseTag) - XSDebug("[IF3][req] isInNextRow=%b tagInc=%b\n", isInNextRow.asUInt, tagIncremented.asUInt) + XSDebug("[IF3][req] fire=%d flush=%d fetchpc=%x\n", io.pc.valid, io.flush, io.pc.bits) + XSDebug("[IF4][req] fire=%d baseBank=%x baseRow=%x baseTag=%x\n", io.outFire, baseBank, baseRow, baseTag) + XSDebug("[IF4][req] isInNextRow=%b tagInc=%b\n", isInNextRow.asUInt, tagIncremented.asUInt) for (i <- 0 until PredictWidth) { - XSDebug("[IF3][req] bank %d: v=%d mask=%d pc=%x idx=%x tag=%x\n", i.U, ltbs(i).io.req.valid, realMask(i), ltbs(i).io.req.bits.pc, ltbs(i).io.req.bits.idx, ltbs(i).io.req.bits.tag) + XSDebug("[IF4][req] bank %d: realMask=%d pc=%x idx=%x tag=%x\n", i.U, realMask(i), ltbs(i).io.req.pc, ltbs(i).io.req.idx, ltbs(i).io.req.tag) } - XSDebug("[IF4] baseBankLatch=%x bankIdxInOrderLatch=", baseBankLatch) + XSDebug("[IF4] baseBank=%x bankIdxInOrder=", baseBank) for (i <- 0 until PredictWidth) { - XSDebug(false, true.B, "%x ", bankIdxInOrderLatch(i)) + XSDebug(false, true.B, "%x ", bankIdxInOrder(i)) } XSDebug(false, true.B, "\n") for (i <- 0 until PredictWidth) { - XSDebug(RegNext(io.pc.valid) && (i.U === 0.U || i.U === 8.U), "[IF4][resps]") - XSDebug(false, RegNext(io.pc.valid), " %d:%d %d", i.U, io.resp.exit(i), io.meta.specCnts(i)) - XSDebug(false, RegNext(io.pc.valid) && (i.U === 7.U || i.U === 15.U), "\n") + XSDebug(io.outFire && (i.U === 0.U || i.U === 8.U), "[IF4][resps]") + XSDebug(false, io.outFire, " %d:%d %d", i.U, io.resp.exit(i), io.meta.specCnts(i)) + XSDebug(false, io.outFire && (i.U === 7.U || i.U === 15.U), "\n") } } } \ No newline at end of file