未验证 提交 7fa2c198 编写于 作者: Y Yinan Xu 提交者: GitHub

renameTable: optimize read and write timing (#1101)

This commit optimizes RenameTable's timing.

Read addresses come from instruction buffer directly and has best
timing. So we let data read at decode stage and bypass write data
from this clock cycle to the read data at next cycle.

For write, we latch the write request and process it at the next cycle.
上级 3feeca58
......@@ -111,7 +111,6 @@ case class XSCoreParameters
LoadQueueSize: Int = 80,
StoreQueueSize: Int = 64,
RobSize: Int = 256,
EnableIntMoveElim: Boolean = true,
IntRefCounterWidth: Int = 2,
dpParams: DispatchParameters = DispatchParameters(
IntDqSize = 16,
......@@ -291,7 +290,6 @@ trait HasXSParameter {
val NRPhyRegs = coreParams.NRPhyRegs
val PhyRegIdxWidth = log2Up(NRPhyRegs)
val RobSize = coreParams.RobSize
val EnableIntMoveElim = coreParams.EnableIntMoveElim
val IntRefCounterWidth = coreParams.IntRefCounterWidth
val StdFreeListSize = NRPhyRegs - 32
// val MEFreeListSize = NRPhyRegs - { if (IntRefCounterWidth > 0 && IntRefCounterWidth < 5) (32 / Math.pow(2, IntRefCounterWidth)).toInt else 1 }
......
......@@ -23,7 +23,7 @@ import utils._
import xiangshan._
import xiangshan.backend.decode.{DecodeStage, ImmUnion}
import xiangshan.backend.dispatch.{Dispatch, DispatchQueue}
import xiangshan.backend.rename.Rename
import xiangshan.backend.rename.{Rename, RenameTableWrapper}
import xiangshan.backend.rob.{Rob, RobCSRIO, RobLsqIO}
import xiangshan.frontend.{FtqPtr, FtqRead}
import xiangshan.mem.LsqEnqIO
......@@ -212,6 +212,7 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
})
val decode = Module(new DecodeStage)
val rat = Module(new RenameTableWrapper)
val rename = Module(new Rename)
val dispatch = Module(new Dispatch)
val intDq = Module(new DispatchQueue(dpParams.IntDqSize, RenameWidth, dpParams.IntDqDeqWidth, "int"))
......@@ -285,12 +286,24 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
decode.io.memPredUpdate(1).valid := false.B
decode.io.csrCtrl := RegNext(io.csrCtrl)
val jumpInst = io.dispatch(0).bits
val jumpPcRead = io.frontend.fromFtq.getJumpPcRead
io.jumpPc := jumpPcRead(jumpInst.cf.ftqPtr, jumpInst.cf.ftqOffset)
val jumpTargetRead = io.frontend.fromFtq.target_read
io.jalr_target := jumpTargetRead(jumpInst.cf.ftqPtr, jumpInst.cf.ftqOffset)
rat.io.flush := flushReg
rat.io.robCommits := rob.io.commits
for ((r, i) <- rat.io.intReadPorts.zipWithIndex) {
val raddr = decode.io.out(i).bits.ctrl.lsrc.take(2) :+ decode.io.out(i).bits.ctrl.ldest
r.map(_.addr).zip(raddr).foreach(x => x._1 := x._2)
rename.io.intReadPorts(i) := r.map(_.data)
r.foreach(_.hold := !rename.io.in(i).ready)
}
rat.io.intRenamePorts := rename.io.intRenamePorts
for ((r, i) <- rat.io.fpReadPorts.zipWithIndex) {
val raddr = decode.io.out(i).bits.ctrl.lsrc.take(3) :+ decode.io.out(i).bits.ctrl.ldest
r.map(_.addr).zip(raddr).foreach(x => x._1 := x._2)
rename.io.fpReadPorts(i) := r.map(_.data)
r.foreach(_.hold := !rename.io.in(i).ready)
}
rat.io.fpRenamePorts := rename.io.fpRenamePorts
rat.io.debug_int_rat <> io.debug_int_rat
rat.io.debug_fp_rat <> io.debug_fp_rat
// pipeline between decode and rename
val redirectValid = stage2Redirect.valid || flushReg
......@@ -331,6 +344,12 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
io.dispatch <> intDq.io.deq ++ lsDq.io.deq ++ fpDq.io.deq
val jumpInst = io.dispatch(0).bits
val jumpPcRead = io.frontend.fromFtq.getJumpPcRead
io.jumpPc := jumpPcRead(jumpInst.cf.ftqPtr, jumpInst.cf.ftqOffset)
val jumpTargetRead = io.frontend.fromFtq.target_read
io.jalr_target := jumpTargetRead(jumpInst.cf.ftqPtr, jumpInst.cf.ftqOffset)
rob.io.redirect <> stage2Redirect
val exeWbResults = VecInit(io.writeback ++ io.stOut)
val timer = GTimer()
......@@ -342,8 +361,6 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
io.redirect <> stage2Redirect
io.flush <> flushReg
io.debug_int_rat <> rename.io.debug_int_rat
io.debug_fp_rat <> rename.io.debug_fp_rat
// rob to int block
io.robio.toCSR <> rob.io.csr
......
......@@ -169,11 +169,7 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasExceptionNO {
updatedUop(i).psrc(1) := updatedPsrc2(i)
updatedUop(i).psrc(2) := updatedPsrc3(i)
updatedUop(i).old_pdest := updatedOldPdest(i)
if (EnableIntMoveElim) {
updatedUop(i).debugInfo.eliminatedMove := io.fromRename(i).bits.eliminatedMove
} else {
updatedUop(i).debugInfo.eliminatedMove := DontCare
}
updatedUop(i).debugInfo.eliminatedMove := io.fromRename(i).bits.eliminatedMove
// update commitType
updatedUop(i).ctrl.commitType := updatedCommitType(i)
// update robIdx, lqIdx, sqIdx
......@@ -333,15 +329,9 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasExceptionNO {
// send uops to dispatch queues
// Note that if one of their previous instructions cannot enqueue, they should not enter dispatch queue.
// We use notBlockedByPrevious here.
if (EnableIntMoveElim) {
io.toIntDq.needAlloc(i) := io.fromRename(i).valid && isInt(i) && !io.fromRename(i).bits.eliminatedMove
io.toIntDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isInt(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRob.canAccept && io.toFpDq.canAccept && io.toLsDq.canAccept && !io.fromRename(i).bits.eliminatedMove
} else {
io.toIntDq.needAlloc(i) := io.fromRename(i).valid && isInt(i)
io.toIntDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isInt(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRob.canAccept && io.toFpDq.canAccept && io.toLsDq.canAccept
}
io.toIntDq.needAlloc(i) := io.fromRename(i).valid && isInt(i) && !io.fromRename(i).bits.eliminatedMove
io.toIntDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isInt(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRob.canAccept && io.toFpDq.canAccept && io.toLsDq.canAccept && !io.fromRename(i).bits.eliminatedMove
io.toIntDq.req(i).bits := updatedUop(i)
io.toFpDq.needAlloc(i) := io.fromRename(i).valid && isFp(i)
......@@ -373,11 +363,7 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasExceptionNO {
p"rob ${updatedUop(i).robIdx}, lq ${updatedUop(i).lqIdx}, sq ${updatedUop(i).sqIdx})\n"
)
if (EnableIntMoveElim) {
io.allocPregs(i).isInt := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.rfWen && (io.fromRename(i).bits.ctrl.ldest =/= 0.U) && !io.fromRename(i).bits.eliminatedMove
} else {
io.allocPregs(i).isInt := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.rfWen && (io.fromRename(i).bits.ctrl.ldest =/= 0.U)
}
io.allocPregs(i).isInt := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.rfWen && (io.fromRename(i).bits.ctrl.ldest =/= 0.U) && !io.fromRename(i).bits.eliminatedMove
io.allocPregs(i).isFp := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.fpWen
io.allocPregs(i).preg := io.fromRename(i).bits.pdest
}
......
......@@ -36,31 +36,23 @@ class Rename(implicit p: Parameters) extends XSModule {
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
val robCommits = Flipped(new RobCommitIO)
// from decode buffer
// from decode
val in = Vec(RenameWidth, Flipped(DecoupledIO(new CfCtrl)))
// to rename table
val intReadPorts = Vec(RenameWidth, Vec(3, Input(UInt(PhyRegIdxWidth.W))))
val fpReadPorts = Vec(RenameWidth, Vec(4, Input(UInt(PhyRegIdxWidth.W))))
val intRenamePorts = Vec(RenameWidth, Output(new RatWritePort))
val fpRenamePorts = Vec(RenameWidth, Output(new RatWritePort))
// to dispatch1
val out = Vec(RenameWidth, DecoupledIO(new MicroOp))
val renameBypass = Output(new RenameBypassInfo)
val dispatchInfo = Output(new PreDispatchInfo)
// for debug printing
val debug_int_rat = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
val debug_fp_rat = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
})
// create free list and rat
val intFreeList = Module(if (EnableIntMoveElim) new freelist.MEFreeList else new freelist.StdFreeList)
val intFreeList = Module(new freelist.MEFreeList)
val fpFreeList = Module(new freelist.StdFreeList)
val intRat = Module(new RenameTable(float = false))
val fpRat = Module(new RenameTable(float = true))
// connect flush and redirect ports for rat
Seq(intRat, fpRat) foreach { case rat =>
rat.io.redirect := io.redirect.valid
rat.io.flush := io.flush
rat.io.walkWen := io.robCommits.isWalk
}
// decide if given instruction needs allocating a new physical register (CfCtrl: from decode; RobCommitInfo: from rob)
def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = {
{if(fp) x.ctrl.fpWen else x.ctrl.rfWen && (x.ctrl.ldest =/= 0.U)}
......@@ -98,7 +90,6 @@ class Rename(implicit p: Parameters) extends XSModule {
/* default */ robIdxHead)))) // no instructions passed by this cycle: stick to old value
robIdxHead := robIdxHeadNext
/**
* Rename: allocate free physical register and update rename table
*/
......@@ -119,7 +110,7 @@ class Rename(implicit p: Parameters) extends XSModule {
val hasValid = Cat(io.in.map(_.valid)).orR
val isMove = io.in.map(_.bits.ctrl.isMove)
val isMax = if (EnableIntMoveElim) Some(intFreeList.asInstanceOf[freelist.MEFreeList].maxVec) else None
val isMax = intFreeList.maxVec
val meEnable = WireInit(VecInit(Seq.fill(RenameWidth)(false.B)))
val psrc_cmp = Wire(MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W))))
val intPsrc = Wire(Vec(RenameWidth, UInt()))
......@@ -151,72 +142,39 @@ class Rename(implicit p: Parameters) extends XSModule {
uops(i).robIdx := robIdxHead + PopCount(io.in.take(i).map(_.valid))
// read rename table
def readRat(lsrcList: List[UInt], ldest: UInt, fp: Boolean) = {
val rat = if(fp) fpRat else intRat
val srcCnt = lsrcList.size
val psrcVec = Wire(Vec(srcCnt, UInt(PhyRegIdxWidth.W)))
val old_pdest = Wire(UInt(PhyRegIdxWidth.W))
for(k <- 0 until srcCnt+1){
val rportIdx = i * (srcCnt+1) + k
if(k != srcCnt){
rat.io.readPorts(rportIdx).addr := lsrcList(k)
psrcVec(k) := rat.io.readPorts(rportIdx).rdata
} else {
rat.io.readPorts(rportIdx).addr := ldest
old_pdest := rat.io.readPorts(rportIdx).rdata
}
}
(psrcVec, old_pdest)
}
val lsrcList = List(uops(i).ctrl.lsrc(0), uops(i).ctrl.lsrc(1), uops(i).ctrl.lsrc(2))
val ldest = uops(i).ctrl.ldest
val (intPhySrcVec, intOldPdest) = readRat(lsrcList.take(2), ldest, fp = false)
val intPhySrcVec = io.intReadPorts(i).take(2)
val intOldPdest = io.intReadPorts(i).last
intPsrc(i) := intPhySrcVec(0)
val (fpPhySrcVec, fpOldPdest) = readRat(lsrcList, ldest, fp = true)
val fpPhySrcVec = io.fpReadPorts(i).take(3)
val fpOldPdest = io.fpReadPorts(i).last
uops(i).psrc(0) := Mux(uops(i).ctrl.srcType(0) === SrcType.reg, intPhySrcVec(0), fpPhySrcVec(0))
uops(i).psrc(1) := Mux(uops(i).ctrl.srcType(1) === SrcType.reg, intPhySrcVec(1), fpPhySrcVec(1))
uops(i).psrc(2) := fpPhySrcVec(2)
uops(i).old_pdest := Mux(uops(i).ctrl.rfWen, intOldPdest, fpOldPdest)
if (EnableIntMoveElim) {
if (i == 0) {
// calculate meEnable
meEnable(i) := isMove(i) && (!isMax.get(intPsrc(i)) || uops(i).ctrl.lsrc(0) === 0.U)
} else {
// compare psrc0
psrc_cmp(i-1) := Cat((0 until i).map(j => {
intPsrc(i) === intPsrc(j) && io.in(i).bits.ctrl.isMove && io.in(j).bits.ctrl.isMove
}) /* reverse is not necessary here */)
// calculate meEnable
meEnable(i) := isMove(i) && (!(io.renameBypass.lsrc1_bypass(i-1).orR | psrc_cmp(i-1).orR | isMax.get(intPsrc(i))) || uops(i).ctrl.lsrc(0) === 0.U)
}
uops(i).eliminatedMove := meEnable(i) || (uops(i).ctrl.isMove && uops(i).ctrl.ldest === 0.U)
// send psrc of eliminated move instructions to free list and label them as eliminated
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).valid := meEnable(i)
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).bits := intPsrc(i)
// when (meEnable(i)) {
// XSInfo(io.in(i).valid && io.out(i).valid, p"Move instruction ${Hexadecimal(io.in(i).bits.cf.pc)} eliminated successfully! psrc:${uops(i).psrc(0)}\n")
// } .otherwise {
// XSInfo(io.in(i).valid && io.out(i).valid && isMove(i), p"Move instruction ${Hexadecimal(io.in(i).bits.cf.pc)} failed to be eliminated! psrc:${uops(i).psrc(0)}\n")
// }
// update pdest
uops(i).pdest := Mux(meEnable(i), intPsrc(i), // move eliminated
Mux(needIntDest(i), intFreeList.allocatePhyReg(i), // normal int inst
Mux(uops(i).ctrl.ldest===0.U && uops(i).ctrl.rfWen, 0.U // int inst with dst=r0
/* default */, fpFreeList.allocatePhyReg(i)))) // normal fp inst
if (i == 0) {
// calculate meEnable
meEnable(i) := isMove(i) && (!isMax(intPsrc(i)) || uops(i).ctrl.lsrc(0) === 0.U)
} else {
uops(i).eliminatedMove := DontCare
psrc_cmp.foreach(_ := DontCare)
// update pdest
uops(i).pdest := Mux(needIntDest(i), intFreeList.allocatePhyReg(i), // normal int inst
Mux(uops(i).ctrl.ldest===0.U && uops(i).ctrl.rfWen, 0.U // int inst with dst=r0
/* default */, fpFreeList.allocatePhyReg(i))) // normal fp inst
// compare psrc0
psrc_cmp(i-1) := Cat((0 until i).map(j => {
intPsrc(i) === intPsrc(j) && io.in(i).bits.ctrl.isMove && io.in(j).bits.ctrl.isMove
}) /* reverse is not necessary here */)
// calculate meEnable
meEnable(i) := isMove(i) && (!(io.renameBypass.lsrc1_bypass(i-1).orR | psrc_cmp(i-1).orR | isMax(intPsrc(i))) || uops(i).ctrl.lsrc(0) === 0.U)
}
uops(i).eliminatedMove := meEnable(i) || (uops(i).ctrl.isMove && uops(i).ctrl.ldest === 0.U)
// send psrc of eliminated move instructions to free list and label them as eliminated
intFreeList.psrcOfMove(i).valid := meEnable(i)
intFreeList.psrcOfMove(i).bits := intPsrc(i)
// update pdest
uops(i).pdest := Mux(meEnable(i), intPsrc(i), // move eliminated
Mux(needIntDest(i), intFreeList.allocatePhyReg(i), // normal int inst
Mux(uops(i).ctrl.ldest===0.U && uops(i).ctrl.rfWen, 0.U // int inst with dst=r0
/* default */, fpFreeList.allocatePhyReg(i)))) // normal fp inst
// Assign performance counters
uops(i).debugInfo.renameTime := GTimer()
......@@ -267,7 +225,7 @@ class Rename(implicit p: Parameters) extends XSModule {
*/
for (i <- 0 until CommitWidth) {
Seq((intRat, false), (fpRat, true)) foreach { case (rat, fp) =>
Seq((io.intRenamePorts, false), (io.fpRenamePorts, true)) foreach { case (rat, fp) =>
// is valid commit req and given instruction has destination register
val commitDestValid = io.robCommits.valid(i) && needDestRegCommit(fp, io.robCommits.info(i))
XSDebug(p"isFp[${fp}]index[$i]-commitDestValid:$commitDestValid,isWalk:${io.robCommits.isWalk}\n")
......@@ -278,50 +236,23 @@ class Rename(implicit p: Parameters) extends XSModule {
// walk back write - restore spec state : ldest => old_pdest
if (fp && i < RenameWidth) {
rat.io.specWritePorts(i).wen := (commitDestValid && io.robCommits.isWalk) || fpSpecWen(i)
rat.io.specWritePorts(i).addr := Mux(fpSpecWen(i), uops(i).ctrl.ldest, io.robCommits.info(i).ldest)
rat.io.specWritePorts(i).wdata := Mux(fpSpecWen(i), fpFreeList.allocatePhyReg(i), io.robCommits.info(i).old_pdest)
// When redirect happens (mis-prediction), don't update the rename table
rat(i).wen := fpSpecWen(i) && !io.flush && !io.redirect.valid
rat(i).addr := uops(i).ctrl.ldest
rat(i).data := fpFreeList.allocatePhyReg(i)
} else if (!fp && i < RenameWidth) {
rat.io.specWritePorts(i).wen := (commitDestValid && io.robCommits.isWalk) || intSpecWen(i)
rat.io.specWritePorts(i).addr := Mux(intSpecWen(i), uops(i).ctrl.ldest, io.robCommits.info(i).ldest)
if (EnableIntMoveElim) {
rat.io.specWritePorts(i).wdata :=
Mux(intSpecWen(i), Mux(meEnable(i), intPsrc(i), intFreeList.allocatePhyReg(i)), io.robCommits.info(i).old_pdest)
} else {
rat.io.specWritePorts(i).wdata :=
Mux(intSpecWen(i), intFreeList.allocatePhyReg(i), io.robCommits.info(i).old_pdest)
}
// when i >= RenameWidth, this write must happens during WALK process
} else if (i >= RenameWidth) {
rat.io.specWritePorts(i).wen := commitDestValid && io.robCommits.isWalk
rat.io.specWritePorts(i).addr := io.robCommits.info(i).ldest
rat.io.specWritePorts(i).wdata := io.robCommits.info(i).old_pdest
rat(i).wen := intSpecWen(i) && !io.flush && !io.redirect.valid
rat(i).addr := uops(i).ctrl.ldest
rat(i).data := Mux(meEnable(i), intPsrc(i), intFreeList.allocatePhyReg(i))
}
when (commitDestValid && io.robCommits.isWalk) {
XSInfo({if(fp) p"[fp" else p"[int"} + p" walk] " +
p"ldest:${rat.io.specWritePorts(i).addr} -> old_pdest:${rat.io.specWritePorts(i).wdata}\n")
}
// normal write - update arch state (serve as initialization)
rat.io.archWritePorts(i).wen := commitDestValid && !io.robCommits.isWalk
rat.io.archWritePorts(i).addr := io.robCommits.info(i).ldest
rat.io.archWritePorts(i).wdata := io.robCommits.info(i).pdest
XSInfo(rat.io.archWritePorts(i).wen,
{if(fp) p"[fp" else p"[int"} + p" arch rat update] ldest:${rat.io.archWritePorts(i).addr} ->" +
p" pdest:${rat.io.archWritePorts(i).wdata}\n"
)
/*
II. Free List Update
*/
if (fp) { // Float Point free list
fpFreeList.freeReq(i) := commitDestValid && !io.robCommits.isWalk
fpFreeList.freePhyReg(i) := io.robCommits.info(i).old_pdest
} else if (EnableIntMoveElim) { // Integer free list
} else { // Integer free list
// during walk process:
// 1. for normal inst, free pdest + revert rat from ldest->pdest to ldest->old_pdest
......@@ -334,11 +265,8 @@ class Rename(implicit p: Parameters) extends XSModule {
intFreeList.freeReq(i) := commitDestValid // walk or not walk
intFreeList.freePhyReg(i) := Mux(io.robCommits.isWalk, io.robCommits.info(i).pdest, io.robCommits.info(i).old_pdest)
intFreeList.asInstanceOf[freelist.MEFreeList].eliminatedMove(i) := io.robCommits.info(i).eliminatedMove
intFreeList.asInstanceOf[freelist.MEFreeList].multiRefPhyReg(i) := io.robCommits.info(i).pdest
} else {
intFreeList.freeReq(i) := commitDestValid && !io.robCommits.isWalk
intFreeList.freePhyReg(i) := io.robCommits.info(i).old_pdest
intFreeList.eliminatedMove(i) := io.robCommits.info(i).eliminatedMove
intFreeList.multiRefPhyReg(i) := io.robCommits.info(i).pdest
}
}
}
......@@ -370,18 +298,13 @@ class Rename(implicit p: Parameters) extends XSModule {
for (i <- 0 until CommitWidth) {
val info = io.robCommits.info(i)
XSDebug(io.robCommits.isWalk && io.robCommits.valid(i), p"[#$i walk info] pc:${Hexadecimal(info.pc)} " +
p"ldest:${info.ldest} rfWen:${info.rfWen} fpWen:${info.fpWen} " + { if (EnableIntMoveElim) p"eliminatedMove:${info.eliminatedMove} " else p"" } +
p"ldest:${info.ldest} rfWen:${info.rfWen} fpWen:${info.fpWen} " + p"eliminatedMove:${info.eliminatedMove} " +
p"pdest:${info.pdest} old_pdest:${info.old_pdest}\n")
}
XSDebug(p"inValidVec: ${Binary(Cat(io.in.map(_.valid)))}\n")
XSInfo(!canOut, p"stall at rename, hasValid:${hasValid}, fpCanAlloc:${fpFreeList.canAllocate}, intCanAlloc:${intFreeList.canAllocate} dispatch1ready:${io.out(0).ready}, isWalk:${io.robCommits.isWalk}\n")
intRat.io.debug_rdata <> io.debug_int_rat
fpRat.io.debug_rdata <> io.debug_fp_rat
XSDebug(p"Arch Int RAT:" + io.debug_int_rat.zipWithIndex.map{ case (r, i) => p"#$i:$r " }.reduceLeft(_ + _) + p"\n")
XSPerfAccumulate("in", Mux(RegNext(io.in(0).ready), PopCount(io.in.map(_.valid)), 0.U))
XSPerfAccumulate("utilization", PopCount(io.in.map(_.valid)))
XSPerfAccumulate("waitInstr", PopCount((0 until RenameWidth).map(i => io.in(i).valid && !io.in(i).ready)))
......@@ -393,25 +316,23 @@ class Rename(implicit p: Parameters) extends XSModule {
ExcitingUtils.addSource(io.robCommits.isWalk, "TMA_backendiswalk")
}
if (EnableIntMoveElim) {
XSPerfAccumulate("move_instr_count", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove)))
XSPerfAccumulate("move_elim_enabled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && meEnable(i))))
XSPerfAccumulate("move_elim_cancelled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))))
XSPerfAccumulate("move_elim_cancelled_psrc_bypass", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR })))
XSPerfAccumulate("move_elim_cancelled_cnt_limit", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax.get(io.out(i).bits.psrc(0)))))
XSPerfAccumulate("move_elim_cancelled_inc_more_than_one", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR })))
// to make sure meEnable functions as expected
for (i <- 0 until RenameWidth) {
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax.get(io.out(i).bits.psrc(0)),
p"ME_CANCELLED: ref counter hits max value (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR },
p"ME_CANCELLED: RAW dependency (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR },
p"ME_CANCELLED: psrc duplicates with former instruction (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
}
XSDebug(VecInit(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))).asUInt().orR,
p"ME_CANCELLED: pc group [ " + (0 until RenameWidth).map(i => p"fire:${io.out(i).fire()},pc:0x${Hexadecimal(io.in(i).bits.cf.pc)} ").reduceLeft(_ + _) + p"]\n")
XSInfo(meEnable.asUInt().orR(), p"meEnableVec:${Binary(meEnable.asUInt)}\n")
XSPerfAccumulate("move_instr_count", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove)))
XSPerfAccumulate("move_elim_enabled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && meEnable(i))))
XSPerfAccumulate("move_elim_cancelled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))))
XSPerfAccumulate("move_elim_cancelled_psrc_bypass", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR })))
XSPerfAccumulate("move_elim_cancelled_cnt_limit", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax(io.out(i).bits.psrc(0)))))
XSPerfAccumulate("move_elim_cancelled_inc_more_than_one", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR })))
// to make sure meEnable functions as expected
for (i <- 0 until RenameWidth) {
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax(io.out(i).bits.psrc(0)),
p"ME_CANCELLED: ref counter hits max value (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR },
p"ME_CANCELLED: RAW dependency (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR },
p"ME_CANCELLED: psrc duplicates with former instruction (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
}
XSDebug(VecInit(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))).asUInt().orR,
p"ME_CANCELLED: pc group [ " + (0 until RenameWidth).map(i => p"fire:${io.out(i).fire()},pc:0x${Hexadecimal(io.in(i).bits.cf.pc)} ").reduceLeft(_ + _) + p"]\n")
XSInfo(meEnable.asUInt().orR(), p"meEnableVec:${Binary(meEnable.asUInt)}\n")
}
......@@ -19,59 +19,132 @@ package xiangshan.backend.rename
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import utils.{ParallelPriorityMux, XSError}
import xiangshan._
class RatReadPort(implicit p: Parameters) extends XSBundle {
val hold = Input(Bool())
val addr = Input(UInt(5.W))
val rdata = Output(UInt(PhyRegIdxWidth.W))
val data = Output(UInt(PhyRegIdxWidth.W))
}
class RatWritePort(implicit p: Parameters) extends XSBundle {
val wen = Input(Bool())
val addr = Input(UInt(5.W))
val wdata = Input(UInt(PhyRegIdxWidth.W))
val wen = Bool()
val addr = UInt(5.W)
val data = UInt(PhyRegIdxWidth.W)
}
class RenameTable(float: Boolean)(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val redirect = Input(Bool())
val flush = Input(Bool())
val walkWen = Input(Bool())
val readPorts = Vec({if(float) 4 else 3} * RenameWidth, new RatReadPort)
val specWritePorts = Vec(CommitWidth, new RatWritePort)
val archWritePorts = Vec(CommitWidth, new RatWritePort)
val specWritePorts = Vec(CommitWidth, Input(new RatWritePort))
val archWritePorts = Vec(CommitWidth, Input(new RatWritePort))
val debug_rdata = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
})
// speculative rename table
val spec_table = RegInit(VecInit(Seq.tabulate(32)(i => i.U(PhyRegIdxWidth.W))))
val spec_table_next = WireInit(spec_table)
// arch state rename table
val arch_table = RegInit(VecInit(Seq.tabulate(32)(i => i.U(PhyRegIdxWidth.W))))
// When redirect happens (mis-prediction), don't update the rename table
// However, when mis-prediction and walk happens at the same time, rename table needs to be updated
for (w <- io.specWritePorts){
when (w.wen && (!(io.redirect || io.flush) || io.walkWen)) {
spec_table(w.addr) := w.wdata
}
}
// For better timing, we optimize reading and writing to RenameTable as follows:
// (1) Writing at T0 will be actually processed at T1.
// (2) Reading is synchronous now.
// (3) RAddr at T0 will be used to access the table and get data at T0.
// (4) WData at T0 is bypassed to RData at T1.
val t1_rdata = io.readPorts.map(p => RegNext(Mux(p.hold, p.data, spec_table_next(p.addr))))
val t1_raddr = io.readPorts.map(p => RegEnable(p.addr, !p.hold))
val t1_wSpec = RegNext(io.specWritePorts)
for((r, i) <- io.readPorts.zipWithIndex){
r.rdata := spec_table(r.addr)
// WRITE: when instruction commits or walking
val t1_flush = RegNext(io.flush)
val t1_wSpec_addr = t1_wSpec.map(w => Mux(w.wen, UIntToOH(w.addr), 0.U))
for ((next, i) <- spec_table_next.zipWithIndex) {
val matchVec = t1_wSpec_addr.map(w => w(i))
val wMatch = ParallelPriorityMux(matchVec.reverse, t1_wSpec.map(_.data).reverse)
// When there's a flush, we use arch_table to update spec_table.
next := Mux(t1_flush, arch_table(i), Mux(VecInit(matchVec).asUInt.orR, wMatch, spec_table(i)))
}
spec_table := spec_table_next
for(w <- io.archWritePorts){
when(w.wen){ arch_table(w.addr) := w.wdata }
// READ: decode-rename stage
for ((r, i) <- io.readPorts.zipWithIndex) {
// We use two comparisons here because r.hold has bad timing but addrs have better timing.
val t0_bypass = io.specWritePorts.map(w => w.wen && Mux(r.hold, w.addr === t1_raddr(i), w.addr === r.addr))
val t1_bypass = RegNext(VecInit(t0_bypass))
val bypass_data = ParallelPriorityMux(t1_bypass.reverse, t1_wSpec.map(_.data).reverse)
r.data := Mux(t1_bypass.asUInt.orR, bypass_data, t1_rdata(i))
}
when (io.flush) {
spec_table := arch_table
// spec table needs to be updated when flushPipe
for (w <- io.archWritePorts) {
when(w.wen){ spec_table(w.addr) := w.wdata }
for (w <- io.archWritePorts) {
when (w.wen) {
arch_table(w.addr) := w.data
}
}
io.debug_rdata := arch_table
}
class RenameTableWrapper(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val flush = Input(Bool())
val robCommits = Flipped(new RobCommitIO)
val intReadPorts = Vec(RenameWidth, Vec(3, new RatReadPort))
val intRenamePorts = Vec(RenameWidth, Input(new RatWritePort))
val fpReadPorts = Vec(RenameWidth, Vec(4, new RatReadPort))
val fpRenamePorts = Vec(RenameWidth, Input(new RatWritePort))
// for debug printing
val debug_int_rat = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
val debug_fp_rat = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
})
val intRat = Module(new RenameTable(float = false))
val fpRat = Module(new RenameTable(float = true))
intRat.io.flush := io.flush
intRat.io.debug_rdata <> io.debug_int_rat
intRat.io.readPorts <> io.intReadPorts.flatten
val intDestValid = io.robCommits.info.map(info => info.rfWen && info.ldest =/= 0.U)
for ((arch, i) <- intRat.io.archWritePorts.zipWithIndex) {
arch.wen := !io.robCommits.isWalk && io.robCommits.valid(i) && intDestValid(i)
arch.addr := io.robCommits.info(i).ldest
arch.data := io.robCommits.info(i).pdest
}
for ((spec, i) <- intRat.io.specWritePorts.zipWithIndex) {
spec.wen := io.robCommits.isWalk && io.robCommits.valid(i) && intDestValid(i)
spec.addr := io.robCommits.info(i).ldest
spec.data := io.robCommits.info(i).old_pdest
}
for ((spec, rename) <- intRat.io.specWritePorts.zip(io.intRenamePorts)) {
when (rename.wen) {
spec.wen := true.B
spec.addr := rename.addr
spec.data := rename.data
}
}
fpRat.io.flush := io.flush
// debug read ports for difftest
fpRat.io.debug_rdata <> io.debug_fp_rat
fpRat.io.readPorts <> io.fpReadPorts.flatten
for ((arch, i) <- fpRat.io.archWritePorts.zipWithIndex) {
arch.wen := !io.robCommits.isWalk && io.robCommits.valid(i) && io.robCommits.info(i).fpWen
arch.addr := io.robCommits.info(i).ldest
arch.data := io.robCommits.info(i).pdest
}
for ((spec, i) <- fpRat.io.specWritePorts.zipWithIndex) {
spec.wen := io.robCommits.isWalk && io.robCommits.valid(i) && io.robCommits.info(i).fpWen
spec.addr := io.robCommits.info(i).ldest
spec.data := io.robCommits.info(i).old_pdest
}
for ((spec, rename) <- fpRat.io.specWritePorts.zip(io.fpRenamePorts)) {
when (rename.wen) {
spec.wen := true.B
spec.addr := rename.addr
spec.data := rename.data
}
}
}
......@@ -698,12 +698,8 @@ class Rob(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
// enqueue logic set 6 writebacked to false
for (i <- 0 until RenameWidth) {
when (canEnqueue(i)) {
if (EnableIntMoveElim) {
eliminatedMove(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove
writebacked(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove && !io.enq.req(i).bits.cf.exceptionVec.asUInt().orR
} else {
writebacked(enqPtrVec(i).value) := false.B
}
eliminatedMove(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove
writebacked(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove && !io.enq.req(i).bits.cf.exceptionVec.asUInt.orR
val isStu = io.enq.req(i).bits.ctrl.fuType === FuType.stu
store_data_writebacked(enqPtrVec(i).value) := !isStu
}
......@@ -755,11 +751,7 @@ class Rob(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
wdata.fpWen := req.ctrl.fpWen
wdata.wflags := req.ctrl.fpu.wflags
wdata.commitType := req.ctrl.commitType
if (EnableIntMoveElim) {
wdata.eliminatedMove := req.eliminatedMove
} else {
wdata.eliminatedMove := DontCare
}
wdata.eliminatedMove := req.eliminatedMove
wdata.pdest := req.pdest
wdata.old_pdest := req.old_pdest
wdata.ftqIdx := req.cf.ftqPtr
......@@ -872,10 +864,8 @@ class Rob(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
XSPerfAccumulate("commitInstr", ifCommit(trueCommitCnt))
val commitIsMove = commitDebugUop.map(_.ctrl.isMove)
XSPerfAccumulate("commitInstrMove", ifCommit(PopCount(io.commits.valid.zip(commitIsMove).map{ case (v, m) => v && m })))
if (EnableIntMoveElim) {
val commitMoveElim = commitDebugUop.map(_.debugInfo.eliminatedMove)
XSPerfAccumulate("commitInstrMoveElim", ifCommit(PopCount(io.commits.valid zip commitMoveElim map { case (v, e) => v && e })))
}
val commitMoveElim = commitDebugUop.map(_.debugInfo.eliminatedMove)
XSPerfAccumulate("commitInstrMoveElim", ifCommit(PopCount(io.commits.valid zip commitMoveElim map { case (v, e) => v && e })))
XSPerfAccumulate("commitInstrFused", ifCommit(fuseCommitCnt))
val commitIsLoad = io.commits.info.map(_.commitType).map(_ === CommitType.LOAD)
val commitLoadValid = io.commits.valid.zip(commitIsLoad).map{ case (v, t) => v && t }
......@@ -969,13 +959,9 @@ class Rob(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
difftest.io.pc := RegNext(SignExt(uop.cf.pc, XLEN))
difftest.io.instr := RegNext(uop.cf.instr)
difftest.io.special := RegNext(uop.ctrl.isFused =/= 0.U)
if (EnableIntMoveElim) {
// when committing an eliminated move instruction,
// we must make sure that skip is properly set to false (output from EXU is random value)
difftest.io.skip := RegNext(Mux(uop.eliminatedMove, false.B, exuOut.isMMIO || exuOut.isPerfCnt))
} else {
difftest.io.skip := RegNext(exuOut.isMMIO || exuOut.isPerfCnt)
}
// when committing an eliminated move instruction,
// we must make sure that skip is properly set to false (output from EXU is random value)
difftest.io.skip := RegNext(Mux(uop.eliminatedMove, false.B, exuOut.isMMIO || exuOut.isPerfCnt))
difftest.io.isRVC := RegNext(uop.cf.pd.isRVC)
difftest.io.scFailed := RegNext(!uop.diffTestDebugLrScValid &&
uop.ctrl.fuType === FuType.mou &&
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册