未验证 提交 d3975bec 编写于 作者: Y YikeZhou 提交者: GitHub

backend, rename: performance bug fixed in move elimination process (#934)

* Rename: add perf counter for move elimination
[NOTE] There are three reasons why one ME is cancelled:
  1. counter reaching max value
  2. RAW dependency with former instruction
  3. 2 move instruction with same psrc in 1 cycle

* Rename: add debug log + fix perf bug for move elim cancelation

* AlternativeFreeList: parameterize width of counter

* Rename:[bug fix] RAW conflict in meEnable decision
(suppose former inst=i while latter inst=j, i does
not have to be move instruction)
上级 ba8c0d5e
......@@ -76,23 +76,24 @@ class AlternativeFreeList(implicit p: Parameters) extends XSModule with HasCircu
})
val FL_SIZE = NRPhyRegs // TODO calculate max number of free list using NRPhyRegs and width of counter
val COUNTER_WIDTH = 2.W // width of reference counters below
// recording referenced times of each physical registers
val archRefCounter = RegInit(VecInit(Seq.fill(NRPhyRegs)(0.U(2.W))))
val specRefCounter = RegInit(VecInit(Seq.fill(NRPhyRegs)(0.U(2.W))))
val cmtCounter = RegInit(VecInit(Seq.fill(NRPhyRegs)(0.U(2.W))))
val archRefCounter = RegInit(VecInit(Seq.fill(NRPhyRegs)(0.U(COUNTER_WIDTH))))
val specRefCounter = RegInit(VecInit(Seq.fill(NRPhyRegs)(0.U(COUNTER_WIDTH))))
val cmtCounter = RegInit(VecInit(Seq.fill(NRPhyRegs)(0.U(COUNTER_WIDTH))))
val archRefCounterNext = Wire(Vec(NRPhyRegs, UInt(2.W)))
val archRefCounterNext = Wire(Vec(NRPhyRegs, UInt(COUNTER_WIDTH)))
archRefCounterNext.foreach(_ := DontCare)
val updateArchRefCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B)))
val clearArchRefCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B)))
val specRefCounterNext = Wire(Vec(NRPhyRegs, UInt(2.W)))
val specRefCounterNext = Wire(Vec(NRPhyRegs, UInt(COUNTER_WIDTH)))
specRefCounterNext.foreach(_ := DontCare)
val updateSpecRefCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B))) // update with xxxNext
val clearSpecRefCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B))) // reset to zero
val cmtCounterNext = Wire(Vec(NRPhyRegs, UInt(2.W)))
val cmtCounterNext = Wire(Vec(NRPhyRegs, UInt(COUNTER_WIDTH)))
cmtCounterNext.foreach(_ := DontCare)
val updateCmtCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B)))
val clearCmtCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B)))
......
......@@ -194,7 +194,7 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
} else {
// compare psrc0
psrc_cmp(i-1) := Cat((0 until i).map(j => {
uops(i).psrc(0) === uops(j).psrc(0)
uops(i).psrc(0) === uops(j).psrc(0) && io.in(i).bits.ctrl.isMove && io.in(j).bits.ctrl.isMove
}) /* reverse is not necessary here */)
// calculate meEnable
......@@ -394,4 +394,22 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
XSPerfAccumulate("stall_cycle_fp", hasValid && io.out(0).ready && !fpFreeList.io.req.canAlloc && intFreeList.io.inc.canInc && !io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_int", hasValid && io.out(0).ready && fpFreeList.io.req.canAlloc && !intFreeList.io.inc.canInc && !io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_walk", hasValid && io.out(0).ready && fpFreeList.io.req.canAlloc && intFreeList.io.inc.canInc && io.roqCommits.isWalk)
XSPerfAccumulate("move_instr_count", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove)))
XSPerfAccumulate("move_elim_enabled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && meEnable(i))))
XSPerfAccumulate("move_elim_cancelled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))))
XSPerfAccumulate("move_elim_cancelled_psrc_bypass", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR })))
XSPerfAccumulate("move_elim_cancelled_cnt_limit", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax(io.out(i).bits.psrc(0)))))
XSPerfAccumulate("move_elim_cancelled_inc_more_than_one", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR })))
// to make sure meEnable functions as expected
for (i <- 0 until RenameWidth) {
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax(io.out(i).bits.psrc(0)),
p"ME_CANCELLED: ref counter hits max value (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR },
p"ME_CANCELLED: RAW dependency (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR },
p"ME_CANCELLED: psrc duplicates with former instruction (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
}
XSDebug(VecInit(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))).asUInt().orR,
p"ME_CANCELLED: pc group [ " + (0 until RenameWidth).map(i => p"fire:${io.out(i).fire()},pc:0x${Hexadecimal(io.in(i).bits.cf.pc)} ").reduceLeft(_ + _) + p"]\n")
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册