未验证 提交 f313272f 编写于 作者: L ljw 提交者: GitHub

Merge pull request #151 from RISCVERS/dev-bpu-rebase-tage

Dev bpu rebase tage
......@@ -11,12 +11,12 @@ class FetchPacket extends XSBundle {
val instrs = Vec(FetchWidth, UInt(32.W))
val mask = UInt((FetchWidth*2).W)
val pc = UInt(VAddrBits.W) // the pc of first inst in the fetch group
val pnpc = Vec(FetchWidth, UInt(VAddrBits.W))
val hist = Vec(FetchWidth, UInt(HistoryLength.W))
val pnpc = Vec(FetchWidth*2, UInt(VAddrBits.W))
val hist = Vec(FetchWidth*2, UInt(HistoryLength.W))
// val btbVictimWay = UInt(log2Up(BtbWays).W)
val predCtr = Vec(FetchWidth, UInt(2.W))
val btbHitWay = Bool()
val tageMeta = Vec(FetchWidth, (new TageMeta))
val predCtr = Vec(FetchWidth*2, UInt(2.W))
val btbHit = Vec(FetchWidth*2, Bool())
val tageMeta = Vec(FetchWidth*2, (new TageMeta))
val rasSp = UInt(log2Up(RasSize).W)
val rasTopCtr = UInt(8.W)
}
......@@ -47,20 +47,20 @@ class BranchPrediction extends XSBundle {
val redirect = Bool()
// mask off all the instrs after the first redirect instr
val instrValid = Vec(FetchWidth, Bool())
val instrValid = Vec(FetchWidth*2, Bool())
// target of the first redirect instr in a fetch package
val target = UInt(VAddrBits.W)
val lateJump = Bool()
// save these info in brq!
// global history of each valid(or uncancelled) instruction, excluding branch's own prediction result
val hist = Vec(FetchWidth, UInt(HistoryLength.W))
val hist = Vec(FetchWidth*2, UInt(HistoryLength.W))
// victim way when updating btb
// val btbVictimWay = UInt(log2Up(BtbWays).W)
// 2-bit saturated counter
val predCtr = Vec(FetchWidth, UInt(2.W))
val btbHitWay = Bool()
val predCtr = Vec(FetchWidth*2, UInt(2.W))
val btbHit = Vec(FetchWidth*2, Bool())
// tage meta info
val tageMeta = Vec(FetchWidth, (new TageMeta))
val tageMeta = Vec(FetchWidth*2, (new TageMeta))
// ras checkpoint, only used in Stage3
val rasSp = UInt(log2Up(RasSize).W)
val rasTopCtr = UInt(8.W)
......@@ -68,9 +68,10 @@ class BranchPrediction extends XSBundle {
// Save predecode info in icache
class Predecode extends XSBundle {
val mask = UInt(FetchWidth.W)
val fuTypes = Vec(FetchWidth, FuType())
val fuOpTypes = Vec(FetchWidth, FuOpType())
val mask = UInt((FetchWidth*2).W)
val isRVC = Vec(FetchWidth*2, Bool())
val fuTypes = Vec(FetchWidth*2, FuType())
val fuOpTypes = Vec(FetchWidth*2, FuOpType())
}
// Dequeue DecodeWidth insts from Ibuffer
......@@ -82,7 +83,7 @@ class CtrlFlow extends XSBundle {
val hist = UInt(HistoryLength.W)
// val btbVictimWay = UInt(log2Up(BtbWays).W)
val btbPredCtr = UInt(2.W)
val btbHitWay = Bool()
val btbHit = Bool()
val tageMeta = new TageMeta
val rasSp = UInt(log2Up(RasSize).W)
val rasTopCtr = UInt(8.W)
......@@ -129,14 +130,15 @@ class Redirect extends XSBundle {
val brTarget = UInt(VAddrBits.W)
val brTag = new BrqPtr
val btbType = UInt(2.W)
val isRVC = Bool()
//val isCall = Bool()
val taken = Bool()
val hist = UInt(HistoryLength.W)
val tageMeta = new TageMeta
val fetchIdx = UInt(log2Up(FetchWidth).W)
val fetchIdx = UInt(log2Up(FetchWidth*2).W)
// val btbVictimWay = UInt(log2Up(BtbWays).W)
val btbPredCtr = UInt(2.W)
val btbHitWay = Bool()
val btbHit = Bool()
val rasSp = UInt(log2Up(RasSize).W)
val rasTopCtr = UInt(8.W)
val isException = Bool()
......
......@@ -29,6 +29,7 @@ trait HasXSParameter {
val PredictWidth = FetchWidth * 2
val EnableBPU = true
val EnableBPD = false // enable backing predictor(like Tage) in BPUStage3
val EnableRAS = false
val HistoryLength = 64
val BtbSize = 256
// val BtbWays = 4
......
......@@ -62,14 +62,15 @@ class AluExeUnit extends Exu(Exu.aluExeUnitCfg) {
io.out.bits.redirect.target := Mux(!taken && isBranch, pcLatchSlot, target)
io.out.bits.redirect.brTarget := target
io.out.bits.redirect.brTag := uop.brTag
io.out.bits.redirect.btbType := "b00".U
io.out.bits.redirect.btbType := "b00".U
io.out.bits.redirect.isRVC := isRVC
io.out.bits.redirect.taken := isBranch && taken
io.out.bits.redirect.hist := uop.cf.hist
io.out.bits.redirect.tageMeta := uop.cf.tageMeta
io.out.bits.redirect.fetchIdx := uop.cf.fetchOffset >> 2.U //TODO: consider RVC
io.out.bits.redirect.fetchIdx := uop.cf.fetchOffset >> 1.U //TODO: consider RVC
// io.out.bits.redirect.btbVictimWay := uop.cf.btbVictimWay
io.out.bits.redirect.btbPredCtr := uop.cf.btbPredCtr
io.out.bits.redirect.btbHitWay := uop.cf.btbHitWay
io.out.bits.redirect.btbHit := uop.cf.btbHit
io.out.bits.redirect.rasSp := uop.cf.rasSp
io.out.bits.redirect.rasTopCtr := uop.cf.rasTopCtr
io.out.bits.redirect.isException := DontCare // false.B
......
......@@ -47,12 +47,13 @@ class JmpExeUnit(implicit val p: XSConfig) extends Exu(Exu.jmpExeUnitCfg) {
csrExuOut.redirect.pc := uop.cf.pc
csrExuOut.redirect.brTarget := DontCare // DontCare
csrExuOut.redirect.btbType := LookupTree(uop.ctrl.fuOpType, RV32I_BRUInstr.bruFuncTobtbTypeTable)
csrExuOut.redirect.isRVC := uop.cf.isRVC
csrExuOut.redirect.taken := false.B
csrExuOut.redirect.hist := uop.cf.hist
csrExuOut.redirect.tageMeta := uop.cf.tageMeta
csrExuOut.redirect.fetchIdx := uop.cf.fetchOffset >> 2.U //TODO: consider RVC
csrExuOut.redirect.fetchIdx := uop.cf.fetchOffset >> 1.U //TODO: consider RVC
csrExuOut.redirect.btbPredCtr := uop.cf.btbPredCtr
csrExuOut.redirect.btbHitWay := uop.cf.btbHitWay
csrExuOut.redirect.btbHit := uop.cf.btbHit
csrExuOut.redirect.rasSp := uop.cf.rasSp
csrExuOut.redirect.rasTopCtr := uop.cf.rasTopCtr
......
......@@ -26,12 +26,13 @@ class Jump extends FunctionUnit(jmpCfg){
io.out.bits.redirect.brTarget := target // DontCare
io.out.bits.redirect.brTag := uop.brTag
io.out.bits.redirect.btbType := LookupTree(func, RV32I_BRUInstr.bruFuncTobtbTypeTable)
io.out.bits.redirect.isRVC := isRVC
io.out.bits.redirect.taken := true.B
io.out.bits.redirect.hist := uop.cf.hist
io.out.bits.redirect.tageMeta := uop.cf.tageMeta
io.out.bits.redirect.fetchIdx := uop.cf.fetchOffset >> 2.U //TODO: consider RVC
io.out.bits.redirect.fetchIdx := uop.cf.fetchOffset >> 1.U //TODO: consider RVC
io.out.bits.redirect.btbPredCtr := uop.cf.btbPredCtr
io.out.bits.redirect.btbHitWay := uop.cf.btbHitWay
io.out.bits.redirect.btbHit := uop.cf.btbHit
io.out.bits.redirect.rasSp := uop.cf.rasSp
io.out.bits.redirect.rasTopCtr := uop.cf.rasTopCtr
io.out.bits.redirect.isException := false.B
......
......@@ -48,7 +48,7 @@ trait HasPipelineReg { this: ArrayMultiplier =>
}
for(i <- 1 to latency){
when(flushVec(i) || rdyVec(i) && !validVec(i-1)){
when(flushVec(i-1) || rdyVec(i) && !validVec(i-1)){
validVec(i) := false.B
}.elsewhen(rdyVec(i-1) && validVec(i-1) && !flushVec(i-1)){
validVec(i) := validVec(i-1)
......
......@@ -34,11 +34,14 @@ class TempPreDecoder extends XSModule {
for (i <- 0 until FetchWidth) {
tempPreDecoders(i).io.in <> DontCare
tempPreDecoders(i).io.in.instr <> io.in(i)
io.out.fuTypes(i) := tempPreDecoders(i).io.out.ctrl.fuType
io.out.fuOpTypes(i) := tempPreDecoders(i).io.out.ctrl.fuOpType
io.out.fuTypes(2*i) := tempPreDecoders(i).io.out.ctrl.fuType
io.out.fuTypes(2*i+1) := tempPreDecoders(i).io.out.ctrl.fuType
io.out.fuOpTypes(2*i) := tempPreDecoders(i).io.out.ctrl.fuOpType
io.out.fuOpTypes(2*i+1) := tempPreDecoders(i).io.out.ctrl.fuOpType
}
io.out.mask := DontCare
io.out.isRVC := DontCare
}
......@@ -129,7 +132,7 @@ class FakeCache extends XSModule with HasICacheConst {
val s3_valid = RegEnable(next=s2_valid,init=false.B,enable=s2_fire)
val s3_ram_out = RegEnable(next=s2_ram_out,enable=s2_fire)
s3_ready := io.out.ready
s3_ready := (!s3_valid && io.out.ready) || io.out.fire()
val needflush = io.in.bits.flush
XSDebug("[ICache-Stage3] s3_valid:%d || s3_ready:%d ",s3_valid,s3_ready)
......
package xiangshan.frontend
package xiangshan.frontend
import chisel3._
import chisel3.util._
......@@ -27,7 +27,7 @@ class FakeBPU extends XSModule{
val redirectInfo = Input(new RedirectInfo)
val in = new Bundle { val pc = Flipped(Valid(UInt(VAddrBits.W))) }
val btbOut = ValidIO(new BranchPrediction)
val tageOut = ValidIO(new BranchPrediction)
val tageOut = Decoupled(new BranchPrediction)
val predecode = Flipped(ValidIO(new Predecode))
})
......@@ -54,26 +54,20 @@ class IFU extends XSModule with HasIFUConst
val if1_pc = RegInit(resetVector.U(VAddrBits.W))
//next
val if2_ready = WireInit(false.B)
val if2_snpc = snpc(if1_pc) //TODO: this is ugly
val if2_snpc = snpc(if1_pc) //TODO: calculate snpc according to mask of current fetch packet
val needflush = WireInit(false.B)
// when an RVI instruction is predicted as taken and it crosses over two fetch packets,
// IFU should not take this branch but fetch the latter half of the instruction sequentially,
// and take the jump target in the next fetch cycle
val if2_lateJumpLatch = WireInit(false.B)
val if2_lateJumpTarget = RegInit(0.U(VAddrBits.W))
val if4_lateJumpLatch = WireInit(false.B)
val if4_lateJumpTarget = RegInit(0.U(VAddrBits.W))
//pipe fire
val if1_fire = if1_valid && if2_ready
val if1_fire = if1_valid && if2_ready || needflush
val if1_pcUpdate = if1_fire || needflush
when(RegNext(reset.asBool) && !reset.asBool){
//when((GTimer() === 501.U)){ //TODO:this is ugly
XSDebug("RESET....\n")
if1_npc := resetVector.U(VAddrBits.W)
} .otherwise{
if1_npc := if2_snpc
}
when(if1_pcUpdate)
{
if1_pc := if1_npc
}
bpu.io.in.pc.valid := if1_fire
bpu.io.in.pc.bits := if1_npc
bpu.io.redirectInfo := io.redirectInfo
......@@ -90,8 +84,15 @@ class IFU extends XSModule with HasIFUConst
val if2_valid = RegEnable(next=if1_valid,init=false.B,enable=if1_fire)
val if2_pc = if1_pc
val if2_btb_taken = bpu.io.btbOut.valid && bpu.io.btbOut.bits.redirect
val if2_btb_insMask = bpu.io.btbOut.bits.instrValid
val if2_btb_target = bpu.io.btbOut.bits.target
val if2_btb_lateJump = WireInit(false.B)
val if2_btb_insMask = Mux(if2_btb_taken, bpu.io.btbOut.bits.instrValid.asUInt, Fill(FetchWidth*2, 1.U(1.W))) // TODO: FIX THIS
val if2_btb_target = Mux(if2_btb_lateJump, if2_snpc, bpu.io.btbOut.bits.target)
if2_lateJumpLatch := BoolStopWatch(if2_btb_lateJump, if1_fire, startHighPriority = true)
// since late jump target should be taken after the latter half of late jump instr is fetched, we need to latch this target
when (if2_btb_lateJump) {
if2_lateJumpTarget := bpu.io.btbOut.bits.target
}
//next
val if3_ready = WireInit(false.B)
......@@ -103,11 +104,29 @@ class IFU extends XSModule with HasIFUConst
io.icacheReq.valid := if2_valid
io.icacheReq.bits.addr := if2_pc
when(if2_valid && if2_btb_taken)
when(RegNext(reset.asBool) && !reset.asBool){
//when((GTimer() === 501.U)){ //TODO:this is ugly
XSDebug("RESET....\n")
if1_npc := resetVector.U(VAddrBits.W)
}.elsewhen (if2_fire) {
if1_npc := Mux(if4_lateJumpLatch, if4_lateJumpTarget, Mux(if2_lateJumpLatch, if2_lateJumpTarget, if2_snpc))
}.otherwise {
if1_npc := if1_pc
}
when(if1_pcUpdate)
{
if1_pc := if1_npc
}
// when if2 fire and if2 redirects, update npc
when(if2_fire && if2_btb_taken)
{
if1_npc := if2_btb_target
}
bpu.io.in.pc.valid := if1_fire && !if2_btb_lateJump
XSDebug("[IF2]if2_valid:%d || if2_pc:0x%x || if3_ready:%d ",if2_valid,if2_pc,if3_ready)
XSDebug(false,if2_fire,"------IF2->fire!!!")
XSDebug(false,true.B,"\n")
......@@ -119,10 +138,11 @@ class IFU extends XSModule with HasIFUConst
//local
val if3_valid = RegEnable(next=if2_valid,init=false.B,enable=if2_fire)
val if3_pc = RegEnable(if2_pc,if2_fire)
val if3_npc = RegEnable(if1_npc,if2_fire)
val if3_btb_target = RegEnable(if2_btb_target,if2_fire)
val if3_btb_taken = RegEnable(if2_btb_taken,if2_fire)
val if3_btb_insMask = RegEnable(if2_btb_insMask, if2_fire)
val if3_npc = RegEnable(if1_npc, if2_fire)
val if3_btb_target = RegEnable(Mux(if2_lateJumpLatch, if2_lateJumpTarget, Mux(if2_btb_lateJump, bpu.io.btbOut.bits.target, if2_btb_target)), if2_fire)
val if3_btb_taken = RegEnable(Mux(if2_lateJumpLatch, true.B, if2_btb_taken), if2_fire)
val if3_btb_insMask = RegEnable(Mux(if2_lateJumpLatch, 1.U((FetchWidth*2).W), if2_btb_insMask), if2_fire)
val if3_btb_lateJump = RegEnable(if2_btb_lateJump, if2_fire)
//next
val if4_ready = WireInit(false.B)
......@@ -133,6 +153,8 @@ class IFU extends XSModule with HasIFUConst
XSDebug("[IF3]if3_valid:%d || if3_pc:0x%x if3_npc:0x%x || if4_ready:%d ",if3_valid,if3_pc,if3_npc,if4_ready)
XSDebug("[IF3]if3_btb_taken:%d if3_btb_insMask:%b if3_btb_lateJump:%d if3_btb_target:0x%x\n",
if3_btb_taken, if3_btb_insMask, if3_btb_lateJump, if3_btb_target)
XSDebug(false,if3_fire,"------IF3->fire!!!")
XSDebug(false,true.B,"\n")
......@@ -147,27 +169,31 @@ class IFU extends XSModule with HasIFUConst
val if4_btb_target = RegEnable(if3_btb_target,if3_fire)
val if4_btb_taken = RegEnable(if3_btb_taken,if3_fire)
val if4_btb_insMask = RegEnable(if3_btb_insMask, if3_fire)
val if4_tage_target = bpu.io.tageOut.bits.target
val if4_btb_lateJump = RegEnable(if3_btb_lateJump, if3_fire)
val if4_tage_taken = bpu.io.tageOut.valid && bpu.io.tageOut.bits.redirect
val if4_tage_lateJump = if4_tage_taken && bpu.io.tageOut.bits.lateJump && !io.redirectInfo.flush()
val if4_tage_insMask = bpu.io.tageOut.bits.instrValid
val if4_btb_missPre = WireInit(false.B)
val if4_snpc = if4_pc + (PopCount(if4_tage_insMask) << 1.U)
val if4_tage_target = Mux(if4_tage_lateJump, if4_snpc, bpu.io.tageOut.bits.target)
if2_btb_lateJump := if2_btb_taken && bpu.io.btbOut.bits.lateJump && !io.redirectInfo.flush() && !if4_tage_taken
if4_lateJumpLatch := BoolStopWatch(if4_tage_lateJump, if1_fire, startHighPriority = true)
when (if4_tage_lateJump) {
if4_lateJumpTarget := bpu.io.tageOut.bits.target
}
bpu.io.in.pc.valid := if1_fire && !if2_btb_lateJump && !if4_tage_lateJump
XSDebug("[IF4]if4_valid:%d || if4_pc:0x%x if4_npc:0x%x\n",if4_valid,if4_pc,if4_npc)
XSDebug("[IF4-TAGE-out]if4_tage_taken:%d || if4_btb_insMask:%b || if4_tage_target:0x%x \n",if4_tage_taken,if4_tage_insMask.asUInt,if4_tage_target)
XSDebug("[IF4] if4_btb_taken:%d if4_btb_lateJump:%d if4_btb_insMask:%b if4_btb_target:0x%x\n",if4_btb_taken, if4_btb_lateJump, if4_btb_insMask.asUInt, if4_btb_target)
XSDebug("[IF4-TAGE-out]if4_tage_taken:%d if4_tage_lateJump:%d if4_tage_insMask:%b if4_tage_target:0x%x\n",if4_tage_taken,if4_tage_lateJump,if4_tage_insMask.asUInt,if4_tage_target)
XSDebug("[IF4-ICACHE-RESP]icacheResp.valid:%d icacheResp.ready:%d\n",io.icacheResp.valid,io.icacheResp.ready)
when(io.icacheResp.fire() && if4_tage_taken &&if4_valid)
{
if1_npc := if4_tage_target
}
//redirect: tage result differ btb
if4_btb_missPre := (if4_tage_taken ^ if4_btb_taken) || (if4_tage_taken && if4_btb_taken && (if4_tage_target =/= if4_btb_target))
if(EnableBPD){
when(!if4_tage_taken && if4_btb_taken && if4_valid){
if1_npc := if4_pc + (PopCount(io.fetchPacket.bits.mask) >> 2.U)
}
}
//redirect: miss predict
when(io.redirectInfo.flush()){
......@@ -177,9 +203,11 @@ class IFU extends XSModule with HasIFUConst
//flush pipline
if(EnableBPD){needflush := (if4_valid && if4_btb_missPre) || io.redirectInfo.flush() }
else {needflush := io.redirectInfo.flush()}
// if(EnableBPD){needflush := (if4_valid && if4_tage_taken) || io.redirectInfo.flush() }
// else {needflush := io.redirectInfo.flush()}
needflush := (if4_valid && if4_tage_taken && io.icacheResp.fire()) || io.redirectInfo.flush()
when(needflush){
// if2_valid := false.B
if3_valid := false.B
if4_valid := false.B
}
......@@ -191,45 +219,62 @@ class IFU extends XSModule with HasIFUConst
if4_ready := io.fetchPacket.ready && (io.icacheResp.valid || !if4_valid) && (GTimer() > 500.U)
io.fetchPacket.valid := if4_valid && !io.redirectInfo.flush()
io.fetchPacket.bits.instrs := io.icacheResp.bits.icacheOut
/*
if(EnableBPU){
io.fetchPacket.bits.mask := Mux(if4_tage_taken,(Fill(FetchWidth*2, 1.U(1.W)) & Reverse(Cat(if4_tage_insMask.map(i => Fill(2, i.asUInt))).asUInt)),
Mux(if4_btb_taken, Fill(FetchWidth*2, 1.U(1.W)) & Reverse(Cat(if4_btb_insMask.map(i => Fill(2, i.asUInt))).asUInt),
Fill(FetchWidth*2, 1.U(1.W)))
io.fetchPacket.bits.mask := Mux(if4_tage_taken, Fill(FetchWidth*2, 1.U(1.W)) & if4_tage_insMask.asUInt,
Mux(if4_btb_taken, Fill(FetchWidth*2, 1.U(1.W)) & if4_btb_insMask.asUInt,
Fill(FetchWidth*2, 1.U(1.W)))
)
}
else{
io.fetchPacket.bits.mask := Fill(FetchWidth*2, 1.U(1.W)) //TODO : consider cross cacheline fetch
}
}
*/
io.fetchPacket.bits.mask := Mux(if4_lateJumpLatch, 1.U((FetchWidth*2).W),
Mux(if4_tage_taken, Fill(FetchWidth*2, 1.U(1.W)) & if4_tage_insMask.asUInt,
Fill(FetchWidth*2, 1.U(1.W)) & if4_btb_insMask.asUInt))
io.fetchPacket.bits.pc := if4_pc
XSDebug(io.fetchPacket.fire,"[IFU-Out-FetchPacket] starPC:0x%x GroupPC:0x%xn\n",if4_pc.asUInt,groupPC(if4_pc).asUInt)
XSDebug(io.fetchPacket.fire,"[IFU-Out-FetchPacket] instrmask %b\n",io.fetchPacket.bits.mask.asUInt)
for(i <- 0 until FetchWidth){
//io.fetchPacket.bits.pnpc(i) := if1_npc
when (if4_btb_taken && !if4_tage_taken && i.U === OHToUInt(HighestBit(if4_btb_insMask.asUInt, FetchWidth))) {
if(EnableBPD){io.fetchPacket.bits.pnpc(i) := if4_pc + ((i + 1).U << 2.U) } //tage not taken use snpc
else{io.fetchPacket.bits.pnpc(i) := if4_btb_target}//use fetch PC
}.elsewhen (if4_tage_taken && i.U === OHToUInt(HighestBit(if4_tage_insMask.asUInt, FetchWidth))) {
io.fetchPacket.bits.pnpc(i) := if1_npc
for(i <- 0 until (FetchWidth*2)) {
when (if4_btb_taken && !if4_tage_taken && i.U === OHToUInt(HighestBit(if4_btb_insMask.asUInt, FetchWidth*2))) {
io.fetchPacket.bits.pnpc(i) := if4_btb_target
if (i != 0) {
when (!io.icacheResp.bits.predecode.isRVC(i) && !if4_btb_lateJump) {
io.fetchPacket.bits.pnpc(i-1) := if4_btb_target
}
}
}.elsewhen (if4_tage_taken && i.U === OHToUInt(HighestBit(if4_tage_insMask.asUInt, FetchWidth*2))) {
io.fetchPacket.bits.pnpc(i) := Mux(if4_tage_lateJump, bpu.io.tageOut.bits.target, if4_tage_target)
if (i != 0) {
when (!io.icacheResp.bits.predecode.isRVC(i) && !if4_tage_lateJump) {
io.fetchPacket.bits.pnpc(i-1) := if4_tage_target
}
}
}.otherwise {
io.fetchPacket.bits.pnpc(i) := if4_pc + ((i + 1).U << 2.U) //use fetch PC
io.fetchPacket.bits.pnpc(i) := if4_pc + (i.U << 1.U) + Mux(io.icacheResp.bits.predecode.isRVC(i), 2.U, 4.U)
}
XSDebug(io.fetchPacket.fire,"[IFU-Out-FetchPacket] instruction %x pnpc:0x%x\n",io.fetchPacket.bits.instrs(i).asUInt,io.fetchPacket.bits.pnpc(i).asUInt)
}
XSDebug(io.fetchPacket.fire,"[IFU-Out-FetchPacket] instruction %x pnpc:0x%x\n",
Mux((i.U)(0), io.fetchPacket.bits.instrs(i>>1)(31,16), io.fetchPacket.bits.instrs(i>>1)(15,0)),
io.fetchPacket.bits.pnpc(i))
}
io.fetchPacket.bits.hist := bpu.io.tageOut.bits.hist
// io.fetchPacket.bits.btbVictimWay := bpu.io.tageOut.bits.btbVictimWay
io.fetchPacket.bits.predCtr := bpu.io.tageOut.bits.predCtr
io.fetchPacket.bits.btbHitWay := bpu.io.tageOut.bits.btbHitWay
io.fetchPacket.bits.btbHit := bpu.io.tageOut.bits.btbHit
io.fetchPacket.bits.tageMeta := bpu.io.tageOut.bits.tageMeta
io.fetchPacket.bits.rasSp := bpu.io.tageOut.bits.rasSp
io.fetchPacket.bits.rasTopCtr := bpu.io.tageOut.bits.rasTopCtr
bpu.io.tageOut.ready := io.fetchPacket.ready
//to BPU
bpu.io.predecode.valid := io.icacheResp.fire() && if4_valid
bpu.io.predecode.bits <> io.icacheResp.bits.predecode
bpu.io.predecode.bits.mask := Fill(FetchWidth, 1.U(1.W)) //TODO: consider RVC && consider cross cacheline fetch
//TODO: consider RVC && consider cross cacheline fetch
bpu.io.predecode.bits.mask := Fill(FetchWidth*2, 1.U(1.W))
bpu.io.predecode.bits.isRVC := 0.U.asTypeOf(Vec(FetchWidth*2, Bool()))
bpu.io.redirectInfo := io.redirectInfo
io.icacheResp.ready := io.fetchPacket.ready && (GTimer() > 500.U)
}
}
\ No newline at end of file
......@@ -20,7 +20,7 @@ class Ibuffer extends XSModule {
val fetchOffset = UInt((log2Up(FetchWidth * 4)).W)
val hist = UInt(HistoryLength.W)
val btbPredCtr = UInt(2.W)
val btbHitWay = Bool()
val btbHit = Bool()
val tageMeta = new TageMeta
val rasSp = UInt(log2Up(RasSize).W)
val rasTopCtr = UInt(8.W)
......@@ -61,17 +61,22 @@ class Ibuffer extends XSModule {
when(io.in.bits.mask(i)) {
ibuf(enq_idx).inst := Mux(i.U(0), io.in.bits.instrs(i>>1)(31,16), io.in.bits.instrs(i>>1)(15,0))
ibuf(enq_idx).pc := io.in.bits.pc + ((enq_idx - tail_ptr)<<1).asUInt
ibuf(enq_idx).pnpc := io.in.bits.pnpc(i>>1)
ibuf(enq_idx).pnpc := io.in.bits.pnpc(i)
ibuf(enq_idx).fetchOffset := ((enq_idx - tail_ptr) << 1).asUInt
ibuf(enq_idx).hist := io.in.bits.hist(i>>1)
ibuf(enq_idx).hist := io.in.bits.hist(i)
// ibuf(enq_idx).btbVictimWay := io.in.bits.btbVictimWay
ibuf(enq_idx).btbPredCtr := io.in.bits.predCtr(i>>1)
ibuf(enq_idx).btbHitWay := io.in.bits.btbHitWay
ibuf(enq_idx).tageMeta := io.in.bits.tageMeta(i>>1)
ibuf(enq_idx).btbPredCtr := io.in.bits.predCtr(i)
ibuf(enq_idx).btbHit := io.in.bits.btbHit(i)
ibuf(enq_idx).tageMeta := io.in.bits.tageMeta(i)
ibuf(enq_idx).rasSp := io.in.bits.rasSp
ibuf(enq_idx).rasTopCtr := io.in.bits.rasTopCtr
ibuf_valid(enq_idx) := true.B
XSDebug("Enq: i:%d idx:%d mask:%b instr:%x pc:%x fetchOffset=%d\n",
i.U, enq_idx, io.in.bits.mask(i), Mux(i.U(0), io.in.bits.instrs(i>>1)(31,16), io.in.bits.instrs(i>>1)(15,0)), io.in.bits.pc + ((enq_idx - tail_ptr)<<1).asUInt, ((enq_idx - tail_ptr) << 1).asUInt)
}
// XSDebug(!(i.U)(0), "Enq: i:%d Idx:%d mask:%b instr:%x pc:%x pnpc:%x\n",
// (i/2).U, enq_idx, io.in.bits.mask(i), io.in.bits.instrs(i/2), io.in.bits.pc + ((enq_idx - tail_ptr)<<1).asUInt, io.in.bits.pnpc(i/2))
enq_idx = enq_idx + io.in.bits.mask(i)
}
......@@ -94,7 +99,7 @@ class Ibuffer extends XSModule {
io.out(i).bits.hist := ibuf(deq_idx).hist
// io.out(i).bits.btbVictimWay := ibuf(deq_idx).btbVictimWay
io.out(i).bits.btbPredCtr := ibuf(deq_idx).btbPredCtr
io.out(i).bits.btbHitWay := ibuf(deq_idx).btbHitWay
io.out(i).bits.btbHit := ibuf(deq_idx).btbHit
io.out(i).bits.tageMeta := ibuf(deq_idx).tageMeta
io.out(i).bits.rasSp := ibuf(deq_idx).rasSp
io.out(i).bits.rasTopCtr := ibuf(deq_idx).rasTopCtr
......@@ -109,7 +114,7 @@ class Ibuffer extends XSModule {
io.out(i).bits.hist := ibuf(deq_idx).hist
// io.out(i).bits.btbVictimWay := ibuf(deq_idx).btbVictimWay
io.out(i).bits.btbPredCtr := ibuf(deq_idx).btbPredCtr
io.out(i).bits.btbHitWay := ibuf(deq_idx).btbHitWay
io.out(i).bits.btbHit := ibuf(deq_idx).btbHit
io.out(i).bits.tageMeta := ibuf(deq_idx).tageMeta
io.out(i).bits.rasSp := ibuf(deq_idx).rasSp
io.out(i).bits.rasTopCtr := ibuf(deq_idx).rasTopCtr
......@@ -125,7 +130,7 @@ class Ibuffer extends XSModule {
io.out(i).bits.hist := 0.U(HistoryLength.W)
// io.out(i).bits.btbVictimWay := 0.U(log2Up(BtbWays).W)
io.out(i).bits.btbPredCtr := 0.U(2.W)
io.out(i).bits.btbHitWay := false.B
io.out(i).bits.btbHit := false.B
io.out(i).bits.tageMeta := 0.U.asTypeOf(new TageMeta)
io.out(i).bits.rasSp := 0.U(log2Up(RasSize))
io.out(i).bits.rasTopCtr := 0.U(8.W)
......@@ -140,12 +145,13 @@ class Ibuffer extends XSModule {
io.out(i).bits.hist := ibuf(head_ptr + (i<<1).U).hist
// io.out(i).bits.btbVictimWay := ibuf(head_ptr + (i<<1).U).btbVictimWay
io.out(i).bits.btbPredCtr := ibuf(head_ptr + (i<<1).U).btbPredCtr
io.out(i).bits.btbHitWay := ibuf(head_ptr + (i<<1).U).btbHitWay
io.out(i).bits.btbHit := ibuf(head_ptr + (i<<1).U).btbHit
io.out(i).bits.tageMeta := ibuf(head_ptr + (i<<1).U).tageMeta
io.out(i).bits.rasSp := ibuf(head_ptr + (i<<1).U).rasSp
io.out(i).bits.rasTopCtr := ibuf(head_ptr + (i<<1).U).rasTopCtr
io.out(i).bits.isRVC := false.B
}
XSDebug(deqValid, p"Deq: i:${i.U} valid:${ibuf_valid(deq_idx)} idx=${Decimal(deq_idx)} ${Decimal(deq_idx + 1.U)} instr:${Hexadecimal(io.out(i).bits.instr)} PC=${Hexadecimal(io.out(i).bits.pc)} v=${io.out(i).valid} r=${io.out(i).ready}\n")
// When can't deque, deq_idx+0
// when RVC deque, deq_idx+1
......@@ -169,7 +175,7 @@ class Ibuffer extends XSModule {
io.out(i).bits.hist := 0.U(HistoryLength.W)
// io.out(i).bits.btbVictimWay := 0.U(log2Up(BtbWays).W)
io.out(i).bits.btbPredCtr := 0.U(2.W)
io.out(i).bits.btbHitWay := false.B
io.out(i).bits.btbHit := false.B
io.out(i).bits.tageMeta := 0.U.asTypeOf(new TageMeta)
io.out(i).bits.rasSp := 0.U(log2Up(RasSize))
io.out(i).bits.rasTopCtr := 0.U(8.W)
......@@ -192,16 +198,16 @@ class Ibuffer extends XSModule {
}
//Debug Info
XSDebug(enqValid, "Enque:\n")
for(i <- 0 until FetchWidth) {
XSDebug(enqValid, p"${Hexadecimal(io.in.bits.instrs(i))}\n")
}
// XSDebug(enqValid, "Enque:\n")
// for(i <- 0 until FetchWidth) {
// XSDebug(enqValid, p"${Binary(io.in.bits.instrs(i))}\n")
// }
XSInfo(io.flush, "Flush signal received, clear buffer\n")
XSDebug(deqValid, "Deque:\n")
for(i <- 0 until DecodeWidth) {
XSDebug(deqValid, p"${Hexadecimal(io.out(i).bits.instr)} PC=${Hexadecimal(io.out(i).bits.pc)} v=${io.out(i).valid} r=${io.out(i).ready}\n")
}
XSDebug(enqValid, p"last_head_ptr=$head_ptr last_tail_ptr=$tail_ptr\n")
// XSDebug(deqValid, "Deque:\n")
// for(i <- 0 until DecodeWidth) {
// XSDebug(deqValid, p"${Binary(io.out(i).bits.instr)} PC=${Hexadecimal(io.out(i).bits.pc)} v=${io.out(i).valid} r=${io.out(i).ready}\n")
// }
XSDebug(p"head_ptr=$head_ptr tail_ptr=$tail_ptr\n")
// XSInfo(full, "Queue is full\n")
}
......@@ -17,7 +17,7 @@ trait HasTageParameter {
( 128, 64, 9))
val TageNTables = TableInfo.size
val UBitPeriod = 2048
val BankWidth = 8 // FetchWidth
val BankWidth = 16 // FetchWidth
val TotalBits = TableInfo.map {
case (s, h, t) => {
......@@ -107,11 +107,11 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val tageEntrySz = 1 + tagLen + 3
val (hashed_idx, tag) = compute_tag_and_hash(io.req.bits.pc >> (2 + log2Ceil(FetchWidth)), io.req.bits.hist)
val (hashed_idx, tag) = compute_tag_and_hash(io.req.bits.pc, io.req.bits.hist)
val hi_us = List.fill(BankWidth)(Module(new SRAMTemplate(Bool(), set=nRows, shouldReset=true, holdRead=true, singlePort=false)))
val lo_us = List.fill(BankWidth)(Module(new SRAMTemplate(Bool(), set=nRows, shouldReset=true, holdRead=true, singlePort=false)))
val table = List.fill(BankWidth)(Module(new SRAMTemplate(new TageEntry, set=nRows, shouldReset=true, holdRead=true, singlePort=false)))
val hi_us = List.fill(BankWidth)(Module(new SRAMTemplate(Bool(), set=nRows, shouldReset=false, holdRead=true, singlePort=false)))
val lo_us = List.fill(BankWidth)(Module(new SRAMTemplate(Bool(), set=nRows, shouldReset=false, holdRead=true, singlePort=false)))
val table = List.fill(BankWidth)(Module(new SRAMTemplate(new TageEntry, set=nRows, shouldReset=false, holdRead=true, singlePort=false)))
val hi_us_r = Wire(Vec(BankWidth, Bool()))
val lo_us_r = Wire(Vec(BankWidth, Bool()))
......@@ -133,9 +133,9 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
lo_us_r(b) := lo_us(b).io.r.resp.data(0)
table_r(b) := table(b).io.r.resp.data(0)
io.resp(b).valid := table_r(b).valid && table_r(b).tag === tag // Missing reset logic
io.resp(b).bits.ctr := table_r(b).ctr
io.resp(b).bits.u := Cat(hi_us_r(b),lo_us_r(b))
// io.resp(b).valid := table_r(b).valid && table_r(b).tag === tag // Missing reset logic
// io.resp(b).bits.ctr := table_r(b).ctr
// io.resp(b).bits.u := Cat(hi_us_r(b),lo_us_r(b))
}
)
......@@ -220,6 +220,10 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
wrbypass_enq_idx := (wrbypass_enq_idx + 1.U)(log2Ceil(wrBypassEntries)-1,0)
}
}
XSDebug(io.req.valid, "tableReq: pc=0x%x, hist=%b, idx=%d, tag=%x\n", io.req.bits.pc, io.req.bits.hist, hashed_idx, tag)
for (i <- 0 until BankWidth) {
XSDebug(RegNext(io.req.valid), "TageTableResp[%d]: idx=%d, hit:%d, ctr:%d, u:%d\n", i.U, RegNext(hashed_idx), req_rhits(i), table_r(i).ctr, Cat(hi_us_r(i),lo_us_r(i)).asUInt)
}
}
......@@ -227,14 +231,14 @@ class FakeTAGE extends TageModule {
val io = IO(new Bundle() {
val req = Input(Valid(new TageReq))
val out = new Bundle {
val hits = Output(UInt(FetchWidth.W))
val takens = Output(Vec(FetchWidth, Bool()))
val hits = Output(UInt(BankWidth.W))
val takens = Output(Vec(BankWidth, Bool()))
}
val meta = Output(Vec(FetchWidth, (new TageMeta)))
val meta = Output(Vec(BankWidth, (new TageMeta)))
val redirectInfo = Input(new RedirectInfo)
})
io.out.hits := 0.U(FetchWidth.W)
io.out.hits := 0.U(BankWidth.W)
io.out.takens := DontCare
io.meta := DontCare
}
......@@ -244,10 +248,10 @@ class Tage extends TageModule {
val io = IO(new Bundle() {
val req = Input(Valid(new TageReq))
val out = new Bundle {
val hits = Output(UInt(FetchWidth.W))
val takens = Output(Vec(FetchWidth, Bool()))
val hits = Output(UInt(BankWidth.W))
val takens = Output(Vec(BankWidth, Bool()))
}
val meta = Output(Vec(FetchWidth, (new TageMeta)))
val meta = Output(Vec(BankWidth, (new TageMeta)))
val redirectInfo = Input(new RedirectInfo)
})
......@@ -262,7 +266,7 @@ class Tage extends TageModule {
val updateMeta = io.redirectInfo.redirect.tageMeta
//val updateMisPred = UIntToOH(io.redirectInfo.redirect.fetchIdx) &
// Fill(FetchWidth, (io.redirectInfo.misPred && io.redirectInfo.redirect.btbType === BTBtype.B).asUInt)
// Fill(BankWidth, (io.redirectInfo.misPred && io.redirectInfo.redirect.btbType === BTBtype.B).asUInt)
val updateMisPred = io.redirectInfo.misPred && io.redirectInfo.redirect.btbType === BTBtype.B
val updateMask = WireInit(0.U.asTypeOf(Vec(TageNTables, Vec(BankWidth, Bool()))))
......@@ -277,7 +281,7 @@ class Tage extends TageModule {
updateU := DontCare
// access tag tables and output meta info
val outHits = Wire(Vec(FetchWidth, Bool()))
val outHits = Wire(Vec(BankWidth, Bool()))
for (w <- 0 until BankWidth) {
var altPred = false.B
val finalAltPred = WireInit(false.B)
......@@ -368,10 +372,15 @@ class Tage extends TageModule {
tables(i).io.update.u(w) := updateU(i)(w)
}
// use fetch pc instead of instruction pc
tables(i).io.update.pc := io.redirectInfo.redirect.pc - (io.redirectInfo.redirect.fetchIdx << 2.U)
tables(i).io.update.pc := io.redirectInfo.redirect.pc - (io.redirectInfo.redirect.fetchIdx << 1.U)
tables(i).io.update.hist := io.redirectInfo.redirect.hist
}
io.out.hits := outHits.asUInt
val m = updateMeta
XSDebug(io.req.valid, "req: pc=0x%x, hist=%b\n", io.req.bits.pc, io.req.bits.hist)
XSDebug(io.redirectInfo.valid, "redirect: provider(%d):%d, altDiffers:%d, providerU:%d, providerCtr:%d, allocate(%d):%d\n", m.provider.valid, m.provider.bits, m.altDiffers, m.providerU, m.providerCtr, m.allocate.valid, m.allocate.bits)
XSDebug(RegNext(io.req.valid), "resp: pc=%x, outHits=%b, takens=%b\n", RegNext(io.req.bits.pc), io.out.hits, io.out.takens.asUInt)
}
\ No newline at end of file
......@@ -9,12 +9,9 @@ import chisel3.util.experimental.BoringUtils
import xiangshan.backend.decode.XSTrap
class BTBUpdateBundle extends XSBundle {
// val fetchPC = UInt(VAddrBits.W)
val pc = UInt(VAddrBits.W)
// val fetchIdx = UInt(log2Up(FetchWidth*2).W)
val hit = Bool()
val misPred = Bool()
// val writeWay = UInt(log2Up(BtbWays).W)
val oldCtr = UInt(2.W)
val taken = Bool()
val target = UInt(VAddrBits.W)
......@@ -23,15 +20,16 @@ class BTBUpdateBundle extends XSBundle {
}
class BTBPred extends XSBundle {
val hit = Bool()
val taken = Bool()
val takenIdx = UInt(log2Up(FetchWidth).W)
val takenIdx = UInt(log2Up(PredictWidth).W)
val target = UInt(VAddrBits.W)
// val writeWay = UInt(log2Up(BtbWays).W)
val notTakens = Vec(FetchWidth, Bool())
val dEntries = Vec(FetchWidth, btbDataEntry())
val hits = Vec(FetchWidth, Bool())
val notTakens = Vec(PredictWidth, Bool())
val dEntries = Vec(PredictWidth, btbDataEntry())
val hits = Vec(PredictWidth, Bool())
// whether an RVI instruction crosses over two fetch packet
val isRVILateJump = Bool()
}
case class btbDataEntry() extends XSBundle {
......@@ -53,7 +51,7 @@ class BTB extends XSModule {
val in = new Bundle {
val pc = Flipped(Decoupled(UInt(VAddrBits.W)))
val pcLatch = Input(UInt(VAddrBits.W))
val mask = Input(UInt((PredictWidth).W))
val mask = Input(UInt(PredictWidth.W))
}
val redirectValid = Input(Bool())
val flush = Input(Bool())
......@@ -64,10 +62,7 @@ class BTB extends XSModule {
io.in.pc.ready := true.B
val fireLatch = RegNext(io.in.pc.fire())
val nextFire = Wire(Bool())
nextFire := fireLatch
val maskLatch = RegEnable(io.in.mask, io.in.pc.fire())
val btbAddr = new TableAddr(log2Up(BtbSize), BtbBanks)
......@@ -79,8 +74,6 @@ class BTB extends XSModule {
// BTB read requests
val baseBank = btbAddr.getBank(io.in.pc.bits)
// val baseTag = btbAddr.getTag(io.in.pc.bits)
// val isAligned = baseBank === 0.U
// circular shifting
def circularShiftLeft(source: UInt, len: Int, shamt: UInt): UInt = {
val res = Wire(UInt(len.W))
......@@ -110,8 +103,6 @@ class BTB extends XSModule {
}
// // latch pc for 1 cycle latency when reading SRAM
// val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid)
// Entries read from SRAM
val metaRead = Wire(Vec(BtbBanks, btbMetaEntry()))
val dataRead = Wire(Vec(BtbBanks, btbDataEntry()))
......@@ -145,15 +136,6 @@ class BTB extends XSModule {
// e.g: baseBank == 5 => (5, 6,..., 15, 0, 1, 2, 3, 4)
val bankIdxInOrder = VecInit((0 until BtbBanks).map(b => (baseBankLatch + b.U) % BtbBanks.U))
// Let predTakens(0) be in correspond with the first instruction in fetchPC
// val predUInt = predTakens.asUInt
// val realPreds = Mux(isAlignedLatch, predUInt, Cat(predUInt(BtbBanks-baseBankLatch-1, 0), predUInt(BtbBanks-1, BtbBanks-baseBankLatch))
// val realPredsVec = VecInit((0 until BtbBanks).map(realPreds(_).asBool))
// val ntbUInt = notTakenBranches.asUInt
// val realNtb = Mux(isAlignedLatch, ntbUInt, Cat(ntbUInt(BtbBanks-baseBankLatch-1, 0), ntbUInt(BtbBanks-1, BtbBanks-baseBankLatch))
// val realNtbVec = VecInit((0 until BtbBanks).map(realNtb(_).asBool))
val isTaken = predTakens.reduce(_||_)
// Priority mux which corresponds with inst orders
......@@ -168,7 +150,7 @@ class BTB extends XSModule {
def satUpdate(old: UInt, len: Int, taken: Bool): UInt = {
val oldSatTaken = old === ((1 << len)-1).U
val oldSatNotTaken = old === 0.U
Mux(oldSatTaken && taken, ((1 << len)-1-1).U,
Mux(oldSatTaken && taken, ((1 << len)-1).U,
Mux(oldSatNotTaken && !taken, 0.U,
Mux(taken, old + 1.U, old - 1.U)))
}
......@@ -195,7 +177,7 @@ class BTB extends XSModule {
val notBrOrJ = u.btbType =/= BTBtype.B && u.btbType =/= BTBtype.J
// Do not update BTB on indirect or return, or correctly predicted J or saturated counters
val noNeedToUpdate = (!u.misPred && (isBr && updateOnSaturated || isJ)) || (u.misPred && notBrOrJ)
val noNeedToUpdate = (!u.misPred && (isBr && updateOnSaturated || isJ)) || notBrOrJ
// do not update on saturated ctrs
val btbWriteValid = io.redirectValid && !noNeedToUpdate
......@@ -209,25 +191,56 @@ class BTB extends XSModule {
btbData(b).io.w.req.bits.data := btbDataWrite
}
io.out.hit := bankHits.reduce(_||_)
// io.out.hit := bankHits.reduce(_||_)
io.out.taken := isTaken
io.out.takenIdx := takenIdx(log2Up(PredictWidth)-1, 1)
io.out.takenIdx := takenIdx
io.out.target := takenTarget
// io.out.writeWay := writeWay
io.out.notTakens := VecInit((0 until BtbBanks by 2).map(b => notTakenBranches(bankIdxInOrder(b))))
io.out.dEntries := VecInit((0 until BtbBanks by 2).map(b => dataRead(bankIdxInOrder(b))))
io.out.hits := VecInit((0 until BtbBanks by 2).map(b => bankHits(bankIdxInOrder(b))))
io.out.notTakens := VecInit((0 until BtbBanks).map(b => notTakenBranches(bankIdxInOrder(b))))
io.out.dEntries := VecInit((0 until BtbBanks).map(b => dataRead(bankIdxInOrder(b))))
io.out.hits := VecInit((0 until BtbBanks).map(b => bankHits(bankIdxInOrder(b))))
io.out.isRVILateJump := io.out.taken && takenIdx === OHToUInt(HighestBit(maskLatch, PredictWidth)) && !dataRead(bankIdxInOrder(takenIdx)).isRVC
// read-after-write bypass
val rawBypassHit = Wire(Vec(BtbBanks, Bool()))
for (b <- 0 until BtbBanks) {
when (b.U === updateBankIdx && realRow(b) === updateRow) { // read and write to the same address
when (realMask(b) && io.in.pc.valid && btbWriteValid) { // both read and write valid
rawBypassHit(b) := true.B
btbMeta(b).io.r.req.valid := false.B
btbData(b).io.r.req.valid := false.B
// metaRead(b) := RegNext(btbMetaWrite)
// dataRead(b) := RegNext(btbDataWrite)
readFire(b) := true.B
XSDebug("raw bypass hits: bank=%d, row=%d, meta: %d %x, data: tgt=%x pred=%b btbType=%b isRVC=%d\n",
b.U, updateRow,
btbMetaWrite.valid, btbMetaWrite.tag,
btbDataWrite.target, btbDataWrite.pred, btbDataWrite.btbType, btbDataWrite.isRVC)
}.otherwise {
rawBypassHit(b) := false.B
}
}.otherwise {
rawBypassHit(b) := false.B
}
when (RegNext(rawBypassHit(b))) {
metaRead(b) := RegNext(btbMetaWrite)
dataRead(b) := RegNext(btbDataWrite)
}
}
XSDebug(io.in.pc.fire(), "read: pc=0x%x, baseBank=%d, realMask=%b\n", io.in.pc.bits, baseBank, realMask)
XSDebug(nextFire, "read_resp: pc=0x%x, readIdx=%d-------------------------------\n",
XSDebug(fireLatch, "read_resp: pc=0x%x, readIdx=%d-------------------------------\n",
io.in.pcLatch, btbAddr.getIdx(io.in.pcLatch))
for (i <- 0 until BtbBanks){
XSDebug(nextFire, "read_resp[b=%d][r=%d]: valid=%d, tag=0x%x, target=0x%x, type=%d, ctr=%d\n",
XSDebug(fireLatch, "read_resp[b=%d][r=%d]: valid=%d, tag=0x%x, target=0x%x, type=%d, ctr=%d\n",
i.U, realRowLatch(i), metaRead(i).valid, metaRead(i).tag, dataRead(i).target, dataRead(i).btbType, dataRead(i).pred)
}
XSDebug(nextFire, "bankIdxInOrder:")
for (i <- 0 until BtbBanks){ XSDebug(nextFire, "%d ", bankIdxInOrder(i))}
XSDebug(nextFire, "\n")
XSDebug("out: taken=%d takenIdx=%d tgt=%x notTakens=%b hits=%b isRVILateJump=%d\n",
io.out.taken, io.out.takenIdx, io.out.target, io.out.notTakens.asUInt, io.out.hits.asUInt, io.out.isRVILateJump)
XSDebug(fireLatch, "bankIdxInOrder:")
for (i <- 0 until BtbBanks){ XSDebug(fireLatch, "%d ", bankIdxInOrder(i))}
XSDebug(fireLatch, "\n")
XSDebug(io.redirectValid, "update_req: pc=0x%x, hit=%d, misPred=%d, oldCtr=%d, taken=%d, target=0x%x, btbType=%d\n",
u.pc, u.hit, u.misPred, u.oldCtr, u.taken, u.target, u.btbType)
XSDebug(io.redirectValid, "update: noNeedToUpdate=%d, writeValid=%d, bank=%d, row=%d, newCtr=%d\n",
......
......@@ -14,12 +14,15 @@ class JBTACUpdateBundle extends XSBundle {
val target = UInt(VAddrBits.W)
val btbType = UInt(2.W)
val misPred = Bool()
val isRVC = Bool()
}
class JBTACPred extends XSBundle {
val hit = Bool()
val target = UInt(VAddrBits.W)
val hitIdx = UInt(log2Up(PredictWidth).W)
val isRVILateJump = Bool()
val isRVC = Bool()
}
class JBTAC extends XSModule {
......@@ -27,6 +30,7 @@ class JBTAC extends XSModule {
val in = new Bundle {
val pc = Flipped(Decoupled(UInt(VAddrBits.W)))
val pcLatch = Input(UInt(VAddrBits.W))
val mask = Input(UInt(PredictWidth.W))
val hist = Input(UInt(HistoryLength.W))
}
val redirectValid = Input(Bool())
......@@ -39,8 +43,6 @@ class JBTAC extends XSModule {
io.in.pc.ready := true.B
val fireLatch = RegNext(io.in.pc.fire())
val nextFire = Wire(Bool())
nextFire := fireLatch
// JBTAC, divided into 8 banks, makes prediction for indirect jump except ret.
val jbtacAddr = new TableAddr(log2Up(JbtacSize), JbtacBanks)
......@@ -50,6 +52,7 @@ class JBTAC extends XSModule {
val tag = UInt(jbtacAddr.tagBits.W + jbtacAddr.idxBits.W)
val target = UInt(VAddrBits.W)
val offset = UInt(log2Up(PredictWidth).W)
val isRVC = Bool()
}
val jbtac = List.fill(JbtacBanks)(Module(new SRAMTemplate(jbtacEntry(), set = JbtacSize / JbtacBanks, shouldReset = true, holdRead = true, singlePort = false)))
......@@ -76,16 +79,19 @@ class JBTAC extends XSModule {
val readBankLatch = jbtacAddr.getBank(histXORAddrLatch)
val readRowLatch = jbtacAddr.getBankIdx(histXORAddrLatch)
val readMaskLatch = RegEnable(io.in.mask, io.in.pc.fire())
val outHit = readEntries(readBankLatch).valid &&
readEntries(readBankLatch).tag === Cat(jbtacAddr.getTag(io.in.pcLatch), jbtacAddr.getIdx(io.in.pcLatch)) &&
!io.flush && readFire(readBankLatch)
!io.flush && RegNext(readFire(readBankLatch)) && readMaskLatch(readEntries(readBankLatch).offset).asBool
io.out.hit := outHit
io.out.hitIdx := readEntries(readBankLatch).offset(log2Up(PredictWidth)-1, 1)
io.out.hitIdx := readEntries(readBankLatch).offset
io.out.target := readEntries(readBankLatch).target
io.out.isRVILateJump := io.out.hit && io.out.hitIdx === OHToUInt(HighestBit(readMaskLatch, PredictWidth)) && !readEntries(readBankLatch).isRVC
io.out.isRVC := readEntries(readBankLatch).isRVC
// 2. update jbtac
// update jbtac
val writeEntry = Wire(jbtacEntry())
// val updateHistXORAddr = updatefetchPC ^ Cat(r.hist, 0.U(2.W))(VAddrBits - 1, 0)
val updateHistXORAddr = io.update.fetchPC ^ Cat(io.update.hist, 0.U(1.W))(VAddrBits - 1, 0)
......@@ -95,6 +101,7 @@ class JBTAC extends XSModule {
writeEntry.target := io.update.target
// writeEntry.offset := updateFetchIdx
writeEntry.offset := io.update.fetchIdx
writeEntry.isRVC := io.update.isRVC
val writeBank = jbtacAddr.getBank(updateHistXORAddr)
val writeRow = jbtacAddr.getBankIdx(updateHistXORAddr)
......@@ -111,10 +118,34 @@ class JBTAC extends XSModule {
}
}
XSDebug(io.in.pc.fire(), "[JBTAC]read: pc=0x%x, histXORAddr=0x%x, bank=%d, row=%d, hist=%b\n",
// read-after-write bypass
val rawBypassHit = Wire(Vec(JbtacBanks, Bool()))
for (b <- 0 until JbtacBanks) {
when (readBank === writeBank && readRow === writeRow && b.U === readBank) {
when (io.in.pc.fire() && writeValid) {
rawBypassHit(b) := true.B
jbtac(b).io.r.req.valid := false.B
// readEntries(b) := RegNext(writeEntry)
readFire(b) := true.B
XSDebug("raw bypass hits: bank=%d, row=%d, tag=%x, tgt=%x, offet=%d, isRVC=%d\n",
b.U, readRow, writeEntry.tag, writeEntry.target, writeEntry.offset, writeEntry.isRVC)
}.otherwise {
rawBypassHit(b) := false.B
}
}.otherwise {
rawBypassHit(b) := false.B
}
when (RegNext(rawBypassHit(b))) { readEntries(b) := RegNext(writeEntry) }
}
XSDebug(io.in.pc.fire(), "read: pc=0x%x, histXORAddr=0x%x, bank=%d, row=%d, hist=%b\n",
io.in.pc.bits, histXORAddr, readBank, readRow, io.in.hist)
XSDebug(nextFire, "[JBTAC]read_resp: pc=0x%x, bank=%d, row=%d, target=0x%x, offset=%d, hit=%d\n",
XSDebug("out: hit=%d tgt=%x hitIdx=%d iRVILateJump=%d isRVC=%d\n",
io.out.hit, io.out.target, io.out.hitIdx, io.out.isRVILateJump, io.out.isRVC)
XSDebug(fireLatch, "read_resp: pc=0x%x, bank=%d, row=%d, target=0x%x, offset=%d, hit=%d\n",
io.in.pcLatch, readBankLatch, readRowLatch, readEntries(readBankLatch).target, readEntries(readBankLatch).offset, outHit)
XSDebug(io.redirectValid, "[JBTAC]update_req: fetchPC=0x%x, writeValid=%d, hist=%b, bank=%d, row=%d, target=0x%x, offset=%d, type=0x%d\n",
XSDebug(io.redirectValid, "update_req: fetchPC=0x%x, writeValid=%d, hist=%b, bank=%d, row=%d, target=0x%x, offset=%d, type=0x%d\n",
io.update.fetchPC, writeValid, io.update.hist, writeBank, writeRow, io.update.target, io.update.fetchIdx, io.update.btbType)
}
\ No newline at end of file
include $(AM_HOME)/Makefile.check
.PHONY: all run clean latest $(ALL)
ALL = $(basename $(notdir $(shell find tests/. -name "*.c")))
all: $(addprefix Makefile., $(ALL))
@echo "" $(ALL)
$(ALL): %: Makefile.%
Makefile.%: tests/%.c latest
@/bin/echo -e "NAME = $*\nSRCS = $<\nLIBS += klib\ninclude $${AM_HOME}/Makefile.app" > $@
-@make -s -f $@ ARCH=$(ARCH) $(MAKECMDGOALS)
-@rm -f Makefile.$*
#cancel rules included by $(AM_HOME)/Makefile.check
image: ;
default $(MAKECMDGOALS): all ;
clean:
rm -rf Makefile.* build/
latest:
\ No newline at end of file
// #include <am.h>
// #include <klib.h>
# define OutLoopNum 20
# define InnerLoopNum 20
int array[OutLoopNum][InnerLoopNum];
void double_loop() {
for (int i = 0; i < OutLoopNum; i++) {
for (int j = 1; j < InnerLoopNum; j++) {
array[i][j] = i + j;
}
}
}
int main () {
double_loop();
return 0;
}
// #include <am.h>
// #include <klib.h>
# define DEPTH 100
int nest(int depth) {
if (depth == 0) return depth;
nest(depth - 1);
return depth;
}
int main () {
nest(DEPTH);
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册