未验证 提交 d298fe6d 编写于 作者: S Steve Gou 提交者: GitHub

Merge pull request #1604 from OpenXiangShan/bpu-ftq-timing

timing optimizations for bpu and ftq
...@@ -596,8 +596,8 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H ...@@ -596,8 +596,8 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
val redirect = do_redirect.bits val redirect = do_redirect.bits
predictors.io.update := io.ftq_to_bpu.update predictors.io.update := RegNext(io.ftq_to_bpu.update)
predictors.io.update.bits.ghist := getHist(io.ftq_to_bpu.update.bits.histPtr) predictors.io.update.bits.ghist := RegNext(getHist(io.ftq_to_bpu.update.bits.histPtr))
predictors.io.redirect := do_redirect predictors.io.redirect := do_redirect
// Redirect logic // Redirect logic
......
...@@ -295,9 +295,6 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU ...@@ -295,9 +295,6 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU
val update_write_data = Flipped(Valid(new FTBEntryWithTag)) val update_write_data = Flipped(Valid(new FTBEntryWithTag))
val update_write_way = Input(UInt(log2Ceil(numWays).W)) val update_write_way = Input(UInt(log2Ceil(numWays).W))
val update_write_alloc = Input(Bool()) val update_write_alloc = Input(Bool())
val try_to_write_way = Flipped(Valid(UInt(log2Ceil(numWays).W)))
val try_to_write_pc = Input(UInt(VAddrBits.W))
}) })
// Extract holdRead logic to fix bug that update read override predict read result // Extract holdRead logic to fix bug that update read override predict read result
...@@ -362,75 +359,45 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU ...@@ -362,75 +359,45 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU
replacer.access(touch_set, touch_way) replacer.access(touch_set, touch_way)
// def allocWay(valids: UInt, meta_tags: UInt, req_tag: UInt) = { def allocWay(valids: UInt, idx: UInt): UInt = {
// val randomAlloc = false
// if (numWays > 1) {
// val w = Wire(UInt(log2Up(numWays).W))
// val valid = WireInit(valids.andR)
// val tags = Cat(meta_tags, req_tag)
// val l = log2Up(numWays)
// val nChunks = (tags.getWidth + l - 1) / l
// val chunks = (0 until nChunks).map( i =>
// tags(min((i+1)*l, tags.getWidth)-1, i*l)
// )
// w := Mux(valid, if (randomAlloc) {LFSR64()(log2Up(numWays)-1,0)} else {chunks.reduce(_^_)}, PriorityEncoder(~valids))
// w
// } else {
// val w = WireInit(0.U)
// w
// }
// }
// val allocWriteWay = allocWay(
// VecInit(read_entries.map(_.valid)).asUInt,
// VecInit(read_tags).asUInt,
// req_tag
// )
def allocWay(valids: UInt, idx: UInt) = {
if (numWays > 1) { if (numWays > 1) {
val w = Wire(UInt(log2Up(numWays).W)) val w = Wire(UInt(log2Up(numWays).W))
val valid = WireInit(valids.andR) val valid = WireInit(valids.andR)
w := Mux(valid, replacer.way(idx), PriorityEncoder(~valids)) w := Mux(valid, replacer.way(idx), PriorityEncoder(~valids))
w w
}else { } else {
val w = WireInit(0.U) val w = WireInit(0.U(log2Up(numWays).W))
w w
} }
} }
io.read_resp := Mux1H(total_hits, read_entries) // Mux1H io.read_resp := Mux1H(total_hits, read_entries) // Mux1H
io.read_hits.valid := hit io.read_hits.valid := hit
// io.read_hits.bits := Mux(hit, hit_way_1h, VecInit(UIntToOH(allocWriteWay).asBools()))
io.read_hits.bits := hit_way io.read_hits.bits := hit_way
io.update_hits.valid := u_hit io.update_hits.valid := u_hit
io.update_hits.bits := u_hit_way io.update_hits.bits := u_hit_way
// XSDebug(!hit, "FTB not hit, alloc a way: %d\n", allocWriteWay)
// Update logic // Update logic
val u_valid = io.update_write_data.valid val u_valid = io.update_write_data.valid
val u_data = io.update_write_data.bits val u_data = io.update_write_data.bits
val u_idx = ftbAddr.getIdx(io.update_pc) val u_idx = ftbAddr.getIdx(io.update_pc)
val allocWriteWay = allocWay(VecInit(ftb_r_entries.map(_.valid)).asUInt, u_idx) val allocWriteWay = allocWay(RegNext(VecInit(ftb_r_entries.map(_.valid))).asUInt, u_idx)
val u_mask = UIntToOH(Mux(io.update_write_alloc, allocWriteWay, io.update_write_way)) val u_way = Mux(io.update_write_alloc, allocWriteWay, io.update_write_way)
val u_mask = UIntToOH(u_way)
for (i <- 0 until numWays) { for (i <- 0 until numWays) {
XSPerfAccumulate(f"ftb_replace_way$i", u_valid && io.update_write_alloc && OHToUInt(u_mask) === i.U) XSPerfAccumulate(f"ftb_replace_way$i", u_valid && io.update_write_alloc && u_way === i.U)
XSPerfAccumulate(f"ftb_replace_way${i}_has_empty", u_valid && io.update_write_alloc && !ftb_r_entries.map(_.valid).reduce(_&&_) && OHToUInt(u_mask) === i.U) XSPerfAccumulate(f"ftb_replace_way${i}_has_empty", u_valid && io.update_write_alloc && !ftb_r_entries.map(_.valid).reduce(_&&_) && u_way === i.U)
XSPerfAccumulate(f"ftb_hit_way$i", hit && !io.update_access && hit_way === i.U) XSPerfAccumulate(f"ftb_hit_way$i", hit && !io.update_access && hit_way === i.U)
} }
ftb.io.w.apply(u_valid, u_data, u_idx, u_mask) ftb.io.w.apply(u_valid, u_data, u_idx, u_mask)
// for replacer // for replacer
write_set := Mux(u_valid, u_idx, ftbAddr.getIdx(io.try_to_write_pc)) write_set := u_idx
write_way.valid := u_valid || io.try_to_write_way.valid write_way.valid := u_valid
write_way.bits := Mux(u_valid, write_way.bits := Mux(io.update_write_alloc, allocWriteWay, io.update_write_way)
Mux(io.update_write_alloc, allocWriteWay, io.update_write_way),
io.try_to_write_way.bits
)
// print hit entry info // print hit entry info
Mux1H(total_hits, ftb.io.r.resp.data).display(true.B) Mux1H(total_hits, ftb.io.r.resp.data).display(true.B)
...@@ -476,58 +443,39 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU ...@@ -476,58 +443,39 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU
} }
// Update logic // Update logic
val update = RegNext(io.update.bits) val update = io.update.bits
// val update_queue = Mem(64, new UpdateQueueEntry)
// val head, tail = RegInit(UpdateQueuePtr(false.B, 0.U))
// val u_queue = Module(new Queue(new UpdateQueueEntry, entries = 64, flow = true))
// assert(u_queue.io.count < 64.U)
val u_meta = update.meta.asTypeOf(new FTBMeta) val u_meta = update.meta.asTypeOf(new FTBMeta)
val u_valid = RegNext(io.update.valid && !io.update.bits.old_entry) val u_valid = io.update.valid && !io.update.bits.old_entry
// io.s1_ready := ftbBank.io.req_pc.ready && u_queue.io.count === 0.U && !u_valid val delay2_pc = DelayN(update.pc, 2)
io.s1_ready := ftbBank.io.req_pc.ready && !(u_valid && !u_meta.hit) val delay2_entry = DelayN(update.ftb_entry, 2)
// val update_now = u_queue.io.deq.fire && u_queue.io.deq.bits.hit
val update_now = u_valid && u_meta.hit val update_now = u_valid && u_meta.hit
val update_need_read = u_valid && !u_meta.hit
// stall one more cycle because we use a whole cycle to do update read tag hit
io.s1_ready := ftbBank.io.req_pc.ready && !(update_need_read) && !RegNext(update_need_read)
ftbBank.io.u_req_pc.valid := u_valid && !u_meta.hit ftbBank.io.u_req_pc.valid := update_need_read
ftbBank.io.u_req_pc.bits := update.pc ftbBank.io.u_req_pc.bits := update.pc
// assert(!(u_valid && RegNext(u_valid) && update.pc === RegNext(update.pc)))
// assert(!(u_valid && RegNext(u_valid)))
// val u_way = u_queue.io.deq.bits.hit_way
val ftb_write = Wire(new FTBEntryWithTag) val ftb_write = Wire(new FTBEntryWithTag)
// ftb_write.entry := Mux(update_now, u_queue.io.deq.bits.ftb_entry, RegNext(u_queue.io.deq.bits.ftb_entry)) ftb_write.entry := Mux(update_now, update.ftb_entry, delay2_entry)
// ftb_write.tag := ftbAddr.getTag(Mux(update_now, u_queue.io.deq.bits.pc, RegNext(u_queue.io.deq.bits.pc)))(tagSize-1, 0) ftb_write.tag := ftbAddr.getTag(Mux(update_now, update.pc, delay2_pc))(tagSize-1, 0)
ftb_write.entry := Mux(update_now, update.ftb_entry, RegNext(update.ftb_entry))
ftb_write.tag := ftbAddr.getTag(Mux(update_now, update.pc, RegNext(update.pc)))(tagSize-1, 0)
// val write_valid = update_now || RegNext(u_queue.io.deq.fire && !u_queue.io.deq.bits.hit) val write_valid = update_now || DelayN(u_valid && !u_meta.hit, 2)
val write_valid = update_now || RegNext(u_valid && !u_meta.hit)
// u_queue.io.enq.valid := u_valid
// u_queue.io.enq.bits := UpdateQueueEntry(update.pc, update.ftb_entry, u_meta.hit, u_meta.writeWay)
// u_queue.io.deq.ready := RegNext(!u_queue.io.deq.fire || update_now)
ftbBank.io.update_write_data.valid := write_valid ftbBank.io.update_write_data.valid := write_valid
ftbBank.io.update_write_data.bits := ftb_write ftbBank.io.update_write_data.bits := ftb_write
// ftbBank.io.update_pc := Mux(update_now, u_queue.io.deq.bits.pc, RegNext(u_queue.io.deq.bits.pc)) ftbBank.io.update_pc := Mux(update_now, update.pc, delay2_pc)
ftbBank.io.update_pc := Mux(update_now, update.pc, RegNext(update.pc)) ftbBank.io.update_write_way := Mux(update_now, u_meta.writeWay, RegNext(ftbBank.io.update_hits.bits)) // use it one cycle later
ftbBank.io.update_write_way := Mux(update_now, u_meta.writeWay, ftbBank.io.update_hits.bits) ftbBank.io.update_write_alloc := Mux(update_now, false.B, RegNext(!ftbBank.io.update_hits.valid)) // use it one cycle later
// ftbBank.io.update_write_alloc := Mux(update_now, !u_queue.io.deq.bits.hit, !ftbBank.io.update_hits.valid)
ftbBank.io.update_write_alloc := Mux(update_now, false.B, !ftbBank.io.update_hits.valid)
ftbBank.io.update_access := u_valid && !u_meta.hit ftbBank.io.update_access := u_valid && !u_meta.hit
ftbBank.io.s1_fire := io.s1_fire ftbBank.io.s1_fire := io.s1_fire
// for replacer
ftbBank.io.try_to_write_way.valid := RegNext(io.update.valid) && u_meta.hit
ftbBank.io.try_to_write_way.bits := u_meta.writeWay
ftbBank.io.try_to_write_pc := update.pc
XSDebug("req_v=%b, req_pc=%x, ready=%b (resp at next cycle)\n", io.s0_fire, s0_pc, ftbBank.io.req_pc.ready) XSDebug("req_v=%b, req_pc=%x, ready=%b (resp at next cycle)\n", io.s0_fire, s0_pc, ftbBank.io.req_pc.ready)
XSDebug("s2_hit=%b, hit_way=%b\n", s2_hit, writeWay.asUInt) XSDebug("s2_hit=%b, hit_way=%b\n", s2_hit, writeWay.asUInt)
XSDebug("s2_br_taken_mask=%b, s2_real_taken_mask=%b\n", XSDebug("s2_br_taken_mask=%b, s2_real_taken_mask=%b\n",
...@@ -539,8 +487,8 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU ...@@ -539,8 +487,8 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU
XSPerfAccumulate("ftb_read_hits", RegNext(io.s0_fire) && s1_hit) XSPerfAccumulate("ftb_read_hits", RegNext(io.s0_fire) && s1_hit)
XSPerfAccumulate("ftb_read_misses", RegNext(io.s0_fire) && !s1_hit) XSPerfAccumulate("ftb_read_misses", RegNext(io.s0_fire) && !s1_hit)
XSPerfAccumulate("ftb_commit_hits", RegNext(io.update.valid) && u_meta.hit) XSPerfAccumulate("ftb_commit_hits", io.update.valid && u_meta.hit)
XSPerfAccumulate("ftb_commit_misses", RegNext(io.update.valid) && !u_meta.hit) XSPerfAccumulate("ftb_commit_misses", io.update.valid && !u_meta.hit)
XSPerfAccumulate("ftb_update_req", io.update.valid) XSPerfAccumulate("ftb_update_req", io.update.valid)
XSPerfAccumulate("ftb_update_ignored", io.update.valid && io.update.bits.old_entry) XSPerfAccumulate("ftb_update_ignored", io.update.valid && io.update.bits.old_entry)
......
...@@ -453,6 +453,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -453,6 +453,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U)) val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
val validEntries = distanceBetween(bpuPtr, commPtr) val validEntries = distanceBetween(bpuPtr, commPtr)
// ********************************************************************** // **********************************************************************
...@@ -520,18 +521,27 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -520,18 +521,27 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3) val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit))) val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
// modify registers one cycle later to cut critical path
when (bpu_in_fire) { val last_cycle_bpu_in = RegNext(bpu_in_fire)
entry_fetch_status(bpu_in_resp_idx) := f_to_send val last_cycle_bpu_in_idx = RegNext(bpu_in_resp_idx)
commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid)) val last_cycle_update_target = RegNext(bpu_in_resp.getTarget)
cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.cfiIndex val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex)
mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B))) val last_cycle_bpu_in_stage = RegNext(bpu_in_stage)
update_target(bpu_in_resp_idx) := bpu_in_resp.getTarget when (last_cycle_bpu_in) {
pred_stage(bpu_in_resp_idx) := bpu_in_stage entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
commitStateQueue(last_cycle_bpu_in_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
mispredict_vec(last_cycle_bpu_in_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
update_target(last_cycle_bpu_in_idx) := last_cycle_update_target
pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
} }
bpuPtr := bpuPtr + enq_fire bpuPtr := bpuPtr + enq_fire
ifuPtr := ifuPtr + (io.toIfu.req.fire && allowToIfu) when (io.toIfu.req.fire && allowToIfu) {
ifuPtr := ifuPtrPlus1
ifuPtrPlus1 := ifuPtrPlus1 + 1.U
}
// only use ftb result to assign hit status // only use ftb result to assign hit status
when (bpu_s2_resp.valid) { when (bpu_s2_resp.valid) {
...@@ -546,6 +556,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -546,6 +556,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
// only when ifuPtr runs ahead of bpu s2 resp should we recover it // only when ifuPtr runs ahead of bpu s2 resp should we recover it
when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) { when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
ifuPtr := bpu_s2_resp.ftq_idx ifuPtr := bpu_s2_resp.ftq_idx
ifuPtrPlus1 := bpu_s2_resp.ftq_idx + 1.U
} }
} }
...@@ -556,6 +567,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -556,6 +567,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
// only when ifuPtr runs ahead of bpu s2 resp should we recover it // only when ifuPtr runs ahead of bpu s2 resp should we recover it
when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) { when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
ifuPtr := bpu_s3_resp.ftq_idx ifuPtr := bpu_s3_resp.ftq_idx
ifuPtrPlus1 := bpu_s3_resp.ftq_idx + 1.U
} }
} }
...@@ -566,32 +578,38 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -566,32 +578,38 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
// **************************************************************** // ****************************************************************
val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), bpu_in_fire) val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), bpu_in_fire)
val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr) val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
val last_cycle_bpu_in = RegNext(bpu_in_fire)
val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire) val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
// read pc and target // read pc and target
ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value ftq_pc_mem.io.raddr.init.last := ifuPtrPlus1.value
io.toIfu.req.bits.ftqIdx := ifuPtr io.toIfu.req.bits.ftqIdx := ifuPtr
io.toIfu.req.bits.nextStartAddr := update_target(ifuPtr.value)
io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
val toIfuPcBundle = Wire(new Ftq_RF_Components) val toIfuPcBundle = Wire(new Ftq_RF_Components)
val entry_is_to_send = WireInit(false.B) val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
val entry_next_addr = WireInit(update_target(ifuPtr.value))
val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) { when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
toIfuPcBundle := bpu_in_bypass_buf toIfuPcBundle := bpu_in_bypass_buf
entry_is_to_send := true.B entry_is_to_send := true.B
entry_next_addr := last_cycle_update_target
entry_ftq_offset := last_cycle_cfiIndex
}.elsewhen (last_cycle_to_ifu_fire) { }.elsewhen (last_cycle_to_ifu_fire) {
toIfuPcBundle := ftq_pc_mem.io.rdata.init.last toIfuPcBundle := ftq_pc_mem.io.rdata.init.last
entry_is_to_send := RegNext(entry_fetch_status((ifuPtr+1.U).value) === f_to_send) entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
}.otherwise { }.otherwise {
toIfuPcBundle := ftq_pc_mem.io.rdata.init.init.last toIfuPcBundle := ftq_pc_mem.io.rdata.init.init.last
entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send)
} }
io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
io.toIfu.req.bits.nextStartAddr := entry_next_addr
io.toIfu.req.bits.ftqOffset := entry_ftq_offset
io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle) io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
// when fall through is smaller in value than start address, there must be a false hit // when fall through is smaller in value than start address, there must be a false hit
...@@ -868,6 +886,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -868,6 +886,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
bpuPtr := next bpuPtr := next
ifuPtr := next ifuPtr := next
ifuWbPtr := next ifuWbPtr := next
ifuPtrPlus1 := idx + 2.U
when (notIfu) { when (notIfu) {
commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) => commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
when(i.U > offset || i.U === offset && flushItSelf){ when(i.U > offset || i.U === offset && flushItSelf){
...@@ -907,7 +926,9 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -907,7 +926,9 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu) io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
val may_have_stall_from_bpu = RegInit(false.B) val may_have_stall_from_bpu = Wire(Bool())
val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu && val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
Cat(commitStateQueue(commPtr.value).map(s => { Cat(commitStateQueue(commPtr.value).map(s => {
s === c_invalid || s === c_commited s === c_invalid || s === c_commited
...@@ -948,7 +969,22 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -948,7 +969,22 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu switch (bpu_ftb_update_stall) {
is (0.U) {
when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
bpu_ftb_update_stall := 2.U // 2-cycle stall
}
}
is (2.U) {
bpu_ftb_update_stall := 1.U
}
is (1.U) {
bpu_ftb_update_stall := 0.U
}
is (3.U) {
XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
}
}
io.toBpu.update := DontCare io.toBpu.update := DontCare
io.toBpu.update.valid := commit_valid && do_commit io.toBpu.update.valid := commit_valid && do_commit
...@@ -1004,8 +1040,16 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -1004,8 +1040,16 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
// XSError(true.B, "\ns3_redirect mechanism not implemented!\n") // XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
} }
io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && entry_fetch_status(prefetchPtr.value) === f_to_send
io.toPrefetch.req.bits.target := update_target(prefetchPtr.value) val prefetch_is_to_send = WireInit(entry_fetch_status(prefetchPtr.value) === f_to_send)
val prefetch_addr = WireInit(update_target(prefetchPtr.value))
when (last_cycle_bpu_in && bpu_in_bypass_ptr === prefetchPtr) {
prefetch_is_to_send := true.B
prefetch_addr := last_cycle_update_target
}
io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && prefetch_is_to_send
io.toPrefetch.req.bits.target := prefetch_addr
when(redirectVec.map(r => r.valid).reduce(_||_)){ when(redirectVec.map(r => r.valid).reduce(_||_)){
val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits))) val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册