提交 39d3280e 编写于 作者: Y YikeZhou

rename: [refactor] move free list into 'freelist' package

        "trait" was used to improve code style
parameters: use EnableIntMoveElim to control code generation
[WIP] EnableIntMoveElim=false hasn't been tested
上级 6e3cddfe
......@@ -230,7 +230,7 @@ trait HasXSParameter {
val EnableIntMoveElim = coreParams.EnableIntMoveElim
val IntRefCounterWidth = coreParams.IntRefCounterWidth
val StdFreeListSize = NRPhyRegs - 32
val AltFreeListSize = NRPhyRegs - { if (IntRefCounterWidth > 0 && IntRefCounterWidth < 5) (32 / Math.pow(2, IntRefCounterWidth)).toInt else 1 }
val MEFreeListSize = NRPhyRegs - { if (IntRefCounterWidth > 0 && IntRefCounterWidth < 5) (32 / Math.pow(2, IntRefCounterWidth)).toInt else 1 }
val LoadQueueSize = coreParams.LoadQueueSize
val StoreQueueSize = coreParams.StoreQueueSize
val dpParams = coreParams.dpParams
......
......@@ -25,7 +25,7 @@ import utils._
class AlternativeFreeList(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
class IntFreeListPtr extends CircularQueuePtr[IntFreeListPtr](AltFreeListSize)
class IntFreeListPtr extends CircularQueuePtr[IntFreeListPtr](MEFreeListSize)
object IntFreeListPtr {
def apply(f: Bool, v:UInt): IntFreeListPtr = {
......@@ -104,7 +104,7 @@ class AlternativeFreeList(implicit p: Parameters) extends XSModule with HasCircu
val freeRegCnt = Wire(UInt())
// free list as circular buffer
val freeList = RegInit(VecInit(Seq.tabulate(AltFreeListSize){
val freeList = RegInit(VecInit(Seq.tabulate(MEFreeListSize){
case n if (n >= 0 && n < NRPhyRegs - 32) => (n + 32).U
case _ => DontCare
}))
......@@ -337,8 +337,8 @@ class AlternativeFreeList(implicit p: Parameters) extends XSModule with HasCircu
XSError(specRefCounter(i) < cmtCounter(i), p"Commits Overflow of preg${i}")
}
XSDebug(Array.range(0, AltFreeListSize).map(x => x.toString()).mkString("Free List (idx): ", "\t", "\n"))
XSDebug(p"Free List (val): " + Array.range(0, AltFreeListSize).map(x => p"${freeList(x)}\t").reduceLeft(_ + _) + "\n")
XSDebug(Array.range(0, MEFreeListSize).map(x => x.toString()).mkString("Free List (idx): ", "\t", "\n"))
XSDebug(p"Free List (val): " + Array.range(0, MEFreeListSize).map(x => p"${freeList(x)}\t").reduceLeft(_ + _) + "\n")
XSDebug(p"head:$headPtr tail:$tailPtr headPtrNext:$headPtrNext tailPtrNext:$tailPtrNext freeRegCnt:$freeRegCnt\n")
......
......@@ -31,7 +31,7 @@ class RenameBypassInfo(implicit p: Parameters) extends XSBundle {
val ldest_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
}
class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
class Rename(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
......@@ -48,8 +48,8 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
})
// create free list and rat
val intFreeList = Module(new AlternativeFreeList)
val fpFreeList = Module(new FreeList)
val intFreeList = Module(if (EnableIntMoveElim) new freelist.MEFreeList else new freelist.StdFreeList)
val fpFreeList = Module(new freelist.StdFreeList)
val intRat = Module(new RenameTable(float = false))
val fpRat = Module(new RenameTable(float = true))
......@@ -61,18 +61,21 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
rat.io.walkWen := io.roqCommits.isWalk
}
// connect flush and redirect ports for __float point__ free list
fpFreeList.io.flush := io.flush
fpFreeList.io.redirect := io.redirect.valid
fpFreeList.io.walk.valid := io.roqCommits.isWalk
// connect flush and redirect ports for __integer__ free list *(walk) is handled by dec
intFreeList.io.flush := io.flush
intFreeList.io.redirect := io.redirect.valid
intFreeList.io.walk := io.roqCommits.isWalk
// connect [flush + redirect + walk] ports for __float point__ & __integer__ free list
Seq(fpFreeList, intFreeList).foreach{ fl =>
fl.flush := io.flush
fl.redirect := io.redirect.valid
fl.walk := io.roqCommits.isWalk
// when isWalk, use stepBack to restore head pointer of free list
// (if ME enabled, stepBack of intFreeList should be useless thus optimized out)
fl.stepBack := PopCount(io.roqCommits.valid.zip(io.roqCommits.info).map{case (v, i) => v && needDestRegCommit(true, i)})
// walk has higher priority than allocation and thus we don't use isWalk here
// only when both fp and int free list and dispatch1 has enough space can we do allocation
fl.doAllocate := fl.canAllocate && io.out(0).ready
}
// dispatch1 ready ++ float point free list ready ++ int free list ready ++ not walk
val canOut = io.out(0).ready && fpFreeList.io.req.canAlloc && intFreeList.io.inc.canInc && !io.roqCommits.isWalk
val canOut = io.out(0).ready && fpFreeList.canAllocate && intFreeList.canAllocate && !io.roqCommits.isWalk
// decide if given instruction needs allocating a new physical register (CfCtrl: from decode; RoqCommitInfo: from roq)
def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = {
......@@ -82,16 +85,6 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
{if(fp) x.fpWen else x.rfWen && (x.ldest =/= 0.U)}
}
// when roqCommits.isWalk, use walk.bits to restore head pointer of free list
fpFreeList.io.walk.bits := PopCount(io.roqCommits.valid.zip(io.roqCommits.info).map{case (v, i) => v && needDestRegCommit(true, i)})
// walk has higher priority than allocation and thus we don't use isWalk here
// only when both fp and int free list and dispatch1 has enough space can we do allocation
fpFreeList.io.req.doAlloc := intFreeList.io.inc.canInc && io.out(0).ready
intFreeList.io.inc.doInc := fpFreeList.io.req.canAlloc && io.out(0).ready
// speculatively assign the instruction with an roqIdx
val validCount = PopCount(io.in.map(_.valid)) // number of instructions waiting to enter roq (from decode)
......@@ -125,7 +118,7 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
val hasValid = Cat(io.in.map(_.valid)).orR
val isMove = io.in.map(_.bits.ctrl.isMove)
val isMax = intFreeList.io.maxVec
val isMax = if (EnableIntMoveElim) Some(intFreeList.asInstanceOf[freelist.MEFreeList].maxVec) else None
val meEnable = WireInit(VecInit(Seq.fill(RenameWidth)(false.B)))
val psrc_cmp = Wire(MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W))))
......@@ -142,8 +135,8 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
// alloc a new phy reg
needFpDest(i) := inValid && needDestReg(fp = true, io.in(i).bits)
needIntDest(i) := inValid && needDestReg(fp = false, io.in(i).bits)
fpFreeList.io.req.allocReqs(i) := needFpDest(i)
intFreeList.io.inc.req(i) := needIntDest(i)
fpFreeList.allocateReq(i) := needFpDest(i)
intFreeList.allocateReq(i) := needIntDest(i)
// no valid instruction from decode stage || all resources (dispatch1 + both free lists) ready
io.in(i).ready := !hasValid || canOut
......@@ -157,7 +150,7 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
uops(i).roqIdx := roqIdxHead + i.U
io.out(i).valid := io.in(i).valid && intFreeList.io.inc.canInc && fpFreeList.io.req.canAlloc && !io.roqCommits.isWalk
io.out(i).valid := io.in(i).valid && intFreeList.canAllocate && fpFreeList.canAllocate && !io.roqCommits.isWalk
io.out(i).bits := uops(i)
......@@ -190,7 +183,7 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
if (i == 0) {
// calculate meEnable
meEnable(i) := isMove(i) && !isMax(uops(i).psrc(0))
meEnable(i) := isMove(i) && !isMax.get(uops(i).psrc(0))
} else {
// compare psrc0
psrc_cmp(i-1) := Cat((0 until i).map(j => {
......@@ -198,37 +191,31 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
}) /* reverse is not necessary here */)
// calculate meEnable
meEnable(i) := isMove(i) && !(io.renameBypass.lsrc1_bypass(i-1).orR | psrc_cmp(i-1).orR | isMax(uops(i).psrc(0)))
meEnable(i) := isMove(i) && !(io.renameBypass.lsrc1_bypass(i-1).orR | psrc_cmp(i-1).orR | isMax.get(uops(i).psrc(0)))
}
uops(i).eliminatedMove := meEnable(i)
// send psrc of eliminated move instructions to free list and label them as eliminated
when (meEnable(i)) {
intFreeList.io.inc.psrcOfMove(i).valid := true.B
intFreeList.io.inc.psrcOfMove(i).bits := uops(i).psrc(0)
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).valid := true.B
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).bits := uops(i).psrc(0)
XSInfo(io.in(i).valid && io.out(i).valid, p"Move instruction ${Hexadecimal(io.in(i).bits.cf.pc)} eliminated successfully! psrc:${uops(i).psrc(0)}\n")
} .otherwise {
intFreeList.io.inc.psrcOfMove(i).valid := false.B
intFreeList.io.inc.psrcOfMove(i).bits := DontCare
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).valid := false.B
intFreeList.asInstanceOf[freelist.MEFreeList].psrcOfMove(i).bits := DontCare
XSInfo(io.in(i).valid && io.out(i).valid && isMove(i), p"Move instruction ${Hexadecimal(io.in(i).bits.cf.pc)} failed to be eliminated! psrc:${uops(i).psrc(0)}\n")
}
// update pdest
uops(i).pdest := Mux(meEnable(i), uops(i).psrc(0), // move eliminated
Mux(needIntDest(i), intFreeList.io.inc.pdests(i), // normal int inst
Mux(needIntDest(i), intFreeList.allocatePhyReg(i), // normal int inst
Mux(uops(i).ctrl.ldest===0.U && uops(i).ctrl.rfWen, 0.U // int inst with dst=r0
/* default */, fpFreeList.io.req.pdests(i)))) // normal fp inst
/* default */, fpFreeList.allocatePhyReg(i)))) // normal fp inst
// write speculative rename table
intSpecWen(i) := intFreeList.io.inc.req(i) && intFreeList.io.inc.canInc && intFreeList.io.inc.doInc && !io.roqCommits.isWalk
// intRat.io.specWritePorts(i).wen := intSpecWen
// intRat.io.specWritePorts(i).addr := uops(i).ctrl.ldest
// intRat.io.specWritePorts(i).wdata := Mux(meEnable(i), uops(i).psrc(0), intFreeList.io.inc.pdests(i))
fpSpecWen(i) := fpFreeList.io.req.allocReqs(i) && fpFreeList.io.req.canAlloc && fpFreeList.io.req.doAlloc && !io.roqCommits.isWalk
// fpRat.io.specWritePorts(i).wen := fpSpecWen
// fpRat.io.specWritePorts(i).addr := uops(i).ctrl.ldest
// fpRat.io.specWritePorts(i).wdata := fpFreeList.io.req.pdests(i)
// we update rat later inside commit code
intSpecWen(i) := intFreeList.allocateReq(i) && intFreeList.canAllocate && intFreeList.doAllocate && !io.roqCommits.isWalk
fpSpecWen(i) := fpFreeList.allocateReq(i) && fpFreeList.canAllocate && fpFreeList.doAllocate && !io.roqCommits.isWalk
}
// We don't bypass the old_pdest from valid instructions with the same ldest currently in rename stage.
......@@ -290,16 +277,19 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
if (fp && i < RenameWidth) {
rat.io.specWritePorts(i).wen := (commitDestValid && io.roqCommits.isWalk) || fpSpecWen(i)
rat.io.specWritePorts(i).addr := Mux(fpSpecWen(i), uops(i).ctrl.ldest, io.roqCommits.info(i).ldest)
rat.io.specWritePorts(i).wdata := Mux(fpSpecWen(i), fpFreeList.io.req.pdests(i), io.roqCommits.info(i).old_pdest)
rat.io.specWritePorts(i).wdata := Mux(fpSpecWen(i), fpFreeList.allocatePhyReg(i), io.roqCommits.info(i).old_pdest)
} else if (!fp && i < RenameWidth) {
rat.io.specWritePorts(i).wen := (commitDestValid && io.roqCommits.isWalk) || intSpecWen(i)
rat.io.specWritePorts(i).addr := Mux(intSpecWen(i), uops(i).ctrl.ldest, io.roqCommits.info(i).ldest)
rat.io.specWritePorts(i).wdata := Mux(intSpecWen(i), Mux(meEnable(i), uops(i).psrc(0), intFreeList.io.inc.pdests(i)), io.roqCommits.info(i).old_pdest)
} else if (fp && i >= RenameWidth) {
rat.io.specWritePorts(i).wen := commitDestValid && io.roqCommits.isWalk
rat.io.specWritePorts(i).addr := io.roqCommits.info(i).ldest
rat.io.specWritePorts(i).wdata := io.roqCommits.info(i).old_pdest
} else if (!fp && i >= RenameWidth) {
if (EnableIntMoveElim) {
rat.io.specWritePorts(i).wdata :=
Mux(intSpecWen(i), Mux(meEnable(i), uops(i).psrc(0), intFreeList.allocatePhyReg(i)), io.roqCommits.info(i).old_pdest)
} else {
rat.io.specWritePorts(i).wdata :=
Mux(intSpecWen(i), intFreeList.allocatePhyReg(i), io.roqCommits.info(i).old_pdest)
}
// when i >= RenameWidth, this write must happens during WALK process
} else if (i >= RenameWidth) {
rat.io.specWritePorts(i).wen := commitDestValid && io.roqCommits.isWalk
rat.io.specWritePorts(i).addr := io.roqCommits.info(i).ldest
rat.io.specWritePorts(i).wdata := io.roqCommits.info(i).old_pdest
......@@ -326,9 +316,9 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
*/
if (fp) { // Float Point free list
fpFreeList.io.deallocReqs(i) := commitDestValid && !io.roqCommits.isWalk
fpFreeList.io.deallocPregs(i) := io.roqCommits.info(i).old_pdest
} else { // Integer free list
fpFreeList.freeReq(i) := commitDestValid && !io.roqCommits.isWalk
fpFreeList.freePhyReg(i) := io.roqCommits.info(i).old_pdest
} else if (EnableIntMoveElim) { // Integer free list
// during walk process:
// 1. for normal inst, free pdest + revert rat from ldest->pdest to ldest->old_pdest
......@@ -339,10 +329,13 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
// b. treat walk as normal commit except replace old_pdests with pdests and set io.walk to true
// c. ignore pdests port when walking
intFreeList.io.dec.req(i) := commitDestValid // walk or not walk
intFreeList.io.dec.old_pdests(i) := Mux(io.roqCommits.isWalk, io.roqCommits.info(i).pdest, io.roqCommits.info(i).old_pdest)
intFreeList.io.dec.eliminatedMove(i) := io.roqCommits.info(i).eliminatedMove
intFreeList.io.dec.pdests(i) := io.roqCommits.info(i).pdest
intFreeList.freeReq(i) := commitDestValid // walk or not walk
intFreeList.freePhyReg(i) := Mux(io.roqCommits.isWalk, io.roqCommits.info(i).pdest, io.roqCommits.info(i).old_pdest)
intFreeList.asInstanceOf[freelist.MEFreeList].eliminatedMove(i) := io.roqCommits.info(i).eliminatedMove
intFreeList.asInstanceOf[freelist.MEFreeList].multiRefPhyReg(i) := io.roqCommits.info(i).pdest
} else {
intFreeList.freeReq(i) := commitDestValid && !io.roqCommits.isWalk
intFreeList.freePhyReg(i) := io.roqCommits.info(i).old_pdest
}
}
}
......@@ -379,7 +372,7 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
}
XSDebug(p"inValidVec: ${Binary(Cat(io.in.map(_.valid)))}\n")
XSInfo(!canOut, p"stall at rename, hasValid:${hasValid}, fpCanAlloc:${fpFreeList.io.req.canAlloc}, intCanAlloc:${intFreeList.io.inc.canInc} dispatch1ready:${io.out(0).ready}, isWalk:${io.roqCommits.isWalk}\n")
XSInfo(!canOut, p"stall at rename, hasValid:${hasValid}, fpCanAlloc:${fpFreeList.canAllocate}, intCanAlloc:${intFreeList.canAllocate} dispatch1ready:${io.out(0).ready}, isWalk:${io.roqCommits.isWalk}\n")
XSInfo(meEnable.asUInt().orR(), p"meEnableVec:${Binary(meEnable.asUInt)}\n")
intRat.io.debug_rdata <> io.debug_int_rat
......@@ -390,20 +383,20 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
XSPerfAccumulate("in", Mux(RegNext(io.in(0).ready), PopCount(io.in.map(_.valid)), 0.U))
XSPerfAccumulate("utilization", PopCount(io.in.map(_.valid)))
XSPerfAccumulate("waitInstr", PopCount((0 until RenameWidth).map(i => io.in(i).valid && !io.in(i).ready)))
XSPerfAccumulate("stall_cycle_dispatch", hasValid && !io.out(0).ready && fpFreeList.io.req.canAlloc && intFreeList.io.inc.canInc && !io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_fp", hasValid && io.out(0).ready && !fpFreeList.io.req.canAlloc && intFreeList.io.inc.canInc && !io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_int", hasValid && io.out(0).ready && fpFreeList.io.req.canAlloc && !intFreeList.io.inc.canInc && !io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_walk", hasValid && io.out(0).ready && fpFreeList.io.req.canAlloc && intFreeList.io.inc.canInc && io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_dispatch", hasValid && !io.out(0).ready && fpFreeList.canAllocate && intFreeList.canAllocate && !io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_fp", hasValid && io.out(0).ready && !fpFreeList.canAllocate && intFreeList.canAllocate && !io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_int", hasValid && io.out(0).ready && fpFreeList.canAllocate && !intFreeList.canAllocate && !io.roqCommits.isWalk)
XSPerfAccumulate("stall_cycle_walk", hasValid && io.out(0).ready && fpFreeList.canAllocate && intFreeList.canAllocate && io.roqCommits.isWalk)
XSPerfAccumulate("move_instr_count", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove)))
XSPerfAccumulate("move_elim_enabled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && meEnable(i))))
XSPerfAccumulate("move_elim_cancelled", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i))))
XSPerfAccumulate("move_elim_cancelled_psrc_bypass", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR })))
XSPerfAccumulate("move_elim_cancelled_cnt_limit", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax(io.out(i).bits.psrc(0)))))
XSPerfAccumulate("move_elim_cancelled_cnt_limit", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax.get(io.out(i).bits.psrc(0)))))
XSPerfAccumulate("move_elim_cancelled_inc_more_than_one", PopCount(Seq.tabulate(RenameWidth)(i => io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else psrc_cmp(i-1).orR })))
// to make sure meEnable functions as expected
for (i <- 0 until RenameWidth) {
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax(io.out(i).bits.psrc(0)),
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && isMax.get(io.out(i).bits.psrc(0)),
p"ME_CANCELLED: ref counter hits max value (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
XSDebug(io.out(i).fire() && io.in(i).bits.ctrl.isMove && !meEnable(i) && { if (i == 0) false.B else io.renameBypass.lsrc1_bypass(i-1).orR },
p"ME_CANCELLED: RAW dependency (pc:0x${Hexadecimal(io.in(i).bits.cf.pc)})\n")
......
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package xiangshan.backend.rename.freelist
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
trait FreeListBaseIO {
// control signals from CtrlBlock
def flush: Bool
def redirect: Bool
def walk: Bool
// allocate physical registers (rename)
def allocateReq: Vec[Bool]
def allocatePhyReg: Vec[UInt]
def canAllocate: Bool
def doAllocate: Bool
// free old physical registers (commit)
def freeReq: Vec[Bool]
def freePhyReg: Vec[UInt]
// walk recovery
def stepBack: UInt
}
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package xiangshan.backend.rename.freelist
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import chipsalliance.rocketchip.config
class MEFreeList(implicit val p: config.Parameters) extends MultiIOModule with MEFreeListIO with HasXSParameter with HasCircularQueuePtrHelper {
val flush = IO(Input(Bool()))
val redirect = IO(Input(Bool()))
val walk = IO(Input(Bool()))
val allocateReq = IO(Input(Vec(RenameWidth, Bool())))
val allocatePhyReg = IO(Output(Vec(RenameWidth, UInt(PhyRegIdxWidth.W))))
val canAllocate = IO(Output(Bool()))
val doAllocate = IO(Input(Bool()))
val freeReq = IO(Input(Vec(CommitWidth, Bool())))
val freePhyReg = IO(Input(Vec(CommitWidth, UInt(PhyRegIdxWidth.W))))
val stepBack = IO(Input(UInt(log2Up(CommitWidth + 1).W)))
// additional ports designed for move elimination
val psrcOfMove = IO(Vec(RenameWidth, Flipped(ValidIO(UInt(PhyRegIdxWidth.W)))))
val eliminatedMove = IO(Vec(CommitWidth, Input(Bool())))
val multiRefPhyReg = IO(Vec(CommitWidth, Input(UInt(PhyRegIdxWidth.W))))
val maxVec = IO(Vec(NRPhyRegs, Output(Bool())))
class FreeListPtr extends CircularQueuePtr[FreeListPtr](MEFreeListSize)
object FreeListPtr {
def apply(f: Bool, v: UInt): FreeListPtr = {
val ptr = Wire(new FreeListPtr)
ptr.flag := f
ptr.value := v
ptr
}
}
// recording referenced times of each physical registers
val archRefCounter = RegInit(VecInit(Seq.fill(NRPhyRegs)(0.U(IntRefCounterWidth.W))))
val specRefCounter = RegInit(VecInit(Seq.fill(NRPhyRegs)(0.U(IntRefCounterWidth.W))))
val cmtCounter = RegInit(VecInit(Seq.fill(NRPhyRegs)(0.U(IntRefCounterWidth.W))))
val archRefCounterNext = Wire(Vec(NRPhyRegs, UInt(IntRefCounterWidth.W)))
archRefCounterNext.foreach(_ := DontCare)
val updateArchRefCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B)))
val clearArchRefCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B)))
val specRefCounterNext = Wire(Vec(NRPhyRegs, UInt(IntRefCounterWidth.W)))
specRefCounterNext.foreach(_ := DontCare)
val updateSpecRefCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B))) // update with xxxNext
val clearSpecRefCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B))) // reset to zero
val cmtCounterNext = Wire(Vec(NRPhyRegs, UInt(IntRefCounterWidth.W)))
cmtCounterNext.foreach(_ := DontCare)
val updateCmtCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B)))
val clearCmtCounter = WireInit(VecInit(Seq.fill(NRPhyRegs)(false.B)))
// send max flag of spec ref counter to rename stage
maxVec zip specRefCounter foreach { case (max, cnt) =>
max := cnt.andR()
}
// number of free registers
val freeRegCnt = Wire(UInt())
// free list as circular buffer
val freeList = RegInit(VecInit(Seq.tabulate(MEFreeListSize){
case n if (n >= 0 && n < NRPhyRegs - 32) => (n + 32).U
case _ => DontCare
}))
// head and tail pointer
val headPtr = RegInit(FreeListPtr(false.B, 0.U))
val tailPtr = RegInit(FreeListPtr(false.B, (NRPhyRegs-32).U))
/*
Decrements: from roq commits
*/
val freeVec = WireInit(VecInit(Seq.fill(CommitWidth)(false.B))) // if dec(i).bits is freed and ready for writing back to free list
val freeRegCandidates = (0 until CommitWidth).map(freePhyReg(_))
val updateCmtCounterVec = WireInit(VecInit(Seq.fill(CommitWidth)(false.B)))
val updateArchRefCounterVec = WireInit(VecInit(Seq.fill(CommitWidth)(false.B)))
val decreaseSpecRefCounterVec = WireInit(VecInit(Seq.fill(CommitWidth)(false.B))) // used when walking ME instructions
val decreaseSpecRefCounterValueVec = Wire(Vec(CommitWidth, UInt(log2Ceil(CommitWidth-1).W)))
val oldPdestIsUnique = WireInit(VecInit(Seq.fill(CommitWidth)(false.B)))
val oldPdestNotUniqueButLast = WireInit(VecInit(Seq.fill(CommitWidth)(false.B)))
val pdestIsUnique = WireInit(VecInit(Seq.fill(CommitWidth)(false.B)))
val pdestNotUniqueButLast = WireInit(VecInit(Seq.fill(CommitWidth)(false.B)))
// handle duplicate INC requirements on cmtCounter and archRefCounter
val old_pdests_cmp = Wire(MixedVec(List.tabulate(CommitWidth-1)(i => UInt((i+1).W))))
val pdests_cmp = Wire(MixedVec(List.tabulate(CommitWidth-1)(i => UInt((i+1).W))))
for (i <- 1 until CommitWidth) {
// compare pdest and old_pdest with former inputs
old_pdests_cmp(i - 1) := Cat((0 until i).map(j => {
freeReq(i) && freeReq(j) && freePhyReg(i) === freePhyReg(j)
}).reverse)
pdests_cmp(i - 1) := Cat((0 until i).map(j => {
freeReq(i) && freeReq(j) && eliminatedMove(i) && eliminatedMove(j) && multiRefPhyReg(i) === multiRefPhyReg(j)
}).reverse)
}
def getCompareResult(m: MixedVec[UInt]): (Vec[Bool], Vec[Bool], Vec[UInt]) = {
val is_last = WireInit(VecInit(Seq.tabulate(CommitWidth){
case last if (last == CommitWidth - 1) => true.B
case i => !(Cat((i until (CommitWidth - 1)).map(j => m(j)(i))).orR)
}))
val has_same_before = WireInit(VecInit(Seq.tabulate(CommitWidth){
case 0 => false.B
case i => m(i - 1).orR()
}))
val times = WireInit(VecInit(Seq.tabulate(CommitWidth){
case 0 => 0.U(log2Ceil(CommitWidth-1).W)
case i => PopCount(m(i - 1))
}))
(is_last, has_same_before, times)
}
val (old_pdests_is_last, old_pdests_has_same_before, old_pdests_times) = getCompareResult(old_pdests_cmp)
val (pdests_is_last, pdests_has_same_before, pdests_times) = getCompareResult(pdests_cmp)
for (i <- 0 until CommitWidth) {
XSDebug(p"decReq:${freeReq(i)},dec_old_pdst:${freePhyReg(i)},dec_is_me:${eliminatedMove(i)},dec_pdest:${multiRefPhyReg(i)}(isWalk:${walk})\n")
val preg = freeRegCandidates(i) // physical register waiting for freeing
oldPdestIsUnique(i) := old_pdests_is_last(i) && !old_pdests_has_same_before(i)
oldPdestNotUniqueButLast(i) := old_pdests_is_last(i) && old_pdests_has_same_before(i)
XSDebug(freeReq(i), p"port[$i]:old_pdest:${freePhyReg(i)},isUnique:${oldPdestIsUnique(i)},notUniqueButLast:${oldPdestNotUniqueButLast(i)}\n")
pdestIsUnique(i) := pdests_is_last(i) && !pdests_has_same_before(i)
pdestNotUniqueButLast(i) := pdests_is_last(i) && pdests_has_same_before(i)
XSDebug(freeReq(i) && eliminatedMove(i), p"port[$i]:pdest:${multiRefPhyReg(i)},isUnique:${pdestIsUnique(i)},notUniqueButLast:${pdestNotUniqueButLast(i)}\n")
freeVec(i) := ((oldPdestIsUnique(i) && (cmtCounter(preg) === Mux(updateSpecRefCounter(preg), specRefCounterNext(preg), specRefCounter(preg))))
|| (oldPdestNotUniqueButLast(i) && (cmtCounter(preg) + old_pdests_times(i) === Mux(updateSpecRefCounter(preg), specRefCounterNext(preg), specRefCounter(preg))))) && freeReq(i) && !walk
updateCmtCounterVec(i) := freeReq(i) && (oldPdestIsUnique(i) || oldPdestNotUniqueButLast(i)) && !walk
XSDebug(p"port[$i]cmtCounterInfo:plus_1=${cmtCounter(preg) + 1.U},plus_1_plus_times=${cmtCounter(preg) + 1.U + old_pdests_times(i)}\n")
XSDebug(p"port[$i]cmtCounterCtl:plus_1=${(freeReq(i) && oldPdestIsUnique(i)).asBool()},plus_1_plus_times=${freeReq(i) && oldPdestNotUniqueButLast(i)},clear=${freeVec(i)}\n")
updateArchRefCounterVec(i) := freeReq(i) && eliminatedMove(i) && (pdestIsUnique(i) || pdestNotUniqueButLast(i)) && !walk
XSDebug((specRefCounter(preg) === 0.U) && freeVec(i), p"normal preg free, preg:${preg}\n")
XSDebug((cmtCounter(preg) === specRefCounter(preg) && (specRefCounter(preg) =/= 0.U)) && freeVec(i), p"multi referenced preg free, preg:${preg}\n")
decreaseSpecRefCounterVec(i) := freeReq(i) && eliminatedMove(i) && walk && (pdestIsUnique(i) || pdestNotUniqueButLast(i))
decreaseSpecRefCounterValueVec(i) := pdests_times(i) + 1.U
// write freed preg into free list at tail ptr
val offset = i match {
case 0 => 0.U
case n => PopCount(freeVec.take(n))
}
val ptr = tailPtr + offset
val idx = ptr.value
when (freeVec(i)) {
freeList(idx) := freeRegCandidates(i)
XSDebug(p"[$i] Free List enqueue: [ preg ${freeRegCandidates(i)} ]\n")
}
}
// set counters-update flag
for (preg <- 0 until NRPhyRegs) {
// set clear bit
freeVec.zipWithIndex.foreach { case (ready, idx) =>
when (ready && preg.U === freeRegCandidates(idx)) {
clearArchRefCounter(preg) := true.B
clearSpecRefCounter(preg) := true.B
clearCmtCounter(preg) := true.B
}
}
// set update bit
updateCmtCounterVec.zipWithIndex.foreach { case (ready, idx) =>
when (ready && preg.U === freeRegCandidates(idx)) {
updateCmtCounter(preg) := true.B
// cmt counter after incrementing/ stay not change
// free vec has higher priority than cmtCounterNext, so normal free wouldn't cause cmtCounter increasing
cmtCounterNext(preg) := Mux(freeReq(idx) && oldPdestIsUnique(idx), cmtCounter(preg) + 1.U,
Mux(freeReq(idx) && oldPdestNotUniqueButLast(idx), cmtCounter(preg) + 1.U + old_pdests_times(idx),
/* stay not change */ cmtCounter(preg)))
}
}
updateArchRefCounterVec.zipWithIndex.foreach { case (ready, idx) =>
when (ready && preg.U === multiRefPhyReg(idx)) {
updateArchRefCounter(preg) := true.B
// arch ref counter of pdest
archRefCounterNext(multiRefPhyReg(idx)) := Mux(/* if this is me inst */freeReq(idx) && eliminatedMove(idx) && pdestIsUnique(idx), archRefCounter(multiRefPhyReg(idx)) + 1.U,
Mux(freeReq(idx) && eliminatedMove(idx) && pdestNotUniqueButLast(idx), archRefCounter(multiRefPhyReg(idx)) + 1.U + pdests_times(idx), archRefCounter(multiRefPhyReg(idx))))
}
}
}
/*
Increments: from rename stage
*/
val needAllocatingVec = WireInit(VecInit(Seq.fill(RenameWidth)(false.B)))
XSDebug(needAllocatingVec.asUInt().orR(), p"needAllocatingVec:${Binary(needAllocatingVec.asUInt)}\n")
val increaseSpecRefCounterVec = WireInit(VecInit(Seq.fill(RenameWidth)(false.B)))
for (i <- 0 until RenameWidth) {
allocatePhyReg(i) := DontCare
// enqueue instr, isn't move elimination
needAllocatingVec(i) := allocateReq(i) && canAllocate && doAllocate && !flush && !psrcOfMove(i).valid && !redirect && !walk
// enqueue instr, is move elimination
when (allocateReq(i) && canAllocate && doAllocate && !flush && psrcOfMove(i).valid && !redirect && !walk) {
// specRefCounterNext(psrcOfMove(i).bits) := specRefCounter(psrcOfMove(i).bits) + 1.U
// updateSpecRefCounter(psrcOfMove(i).bits) := true.B
increaseSpecRefCounterVec(i) := true.B
}
val offset = i match {
case 0 => 0.U
case n => PopCount(needAllocatingVec.take(n))
}
val ptr = headPtr + offset
when (needAllocatingVec(i)) {
val pdest = freeList(ptr.value)
XSDebug(p"[$i] Allocate phy reg $pdest\n")
allocatePhyReg(i) := pdest
}
}
for (preg <- 0 until NRPhyRegs) {
val increaseCmpVec = WireInit(VecInit(Seq.tabulate(RenameWidth)(i => increaseSpecRefCounterVec(i) && psrcOfMove(i).bits === preg.U)))
val decreaseCmpVec = WireInit(VecInit(Seq.tabulate(CommitWidth)(i => decreaseSpecRefCounterVec(i) && freeRegCandidates(i) === preg.U)))
val doIncrease = increaseCmpVec.asUInt.orR
val doDecrease = decreaseCmpVec.asUInt.orR
updateSpecRefCounter(preg) := doIncrease || doDecrease
specRefCounterNext(preg) := specRefCounter(preg) + doIncrease.asUInt - Mux(doDecrease, decreaseSpecRefCounterValueVec(OHToUInt(decreaseCmpVec)), 0.U)
}
/*
Flush: directly flush reference counter according to arch-rat
- replace specRefCounter with archRefCounter; reset headPtr to [ tailPtr - (NRPhyRegs-32) - archRefCounter.reduce(_ + _) ]
*/
// update tail pointer
val tailPtrNext = Mux(walk, tailPtr, tailPtr + PopCount(freeVec))
// update head pointer
val headPtrNext = Mux(flush, tailPtr - (NRPhyRegs-32).U - archRefCounter.reduceTree(_ + _), // FIXME Maybe this is too complicated?
Mux(walk, headPtr - PopCount(freeReq.zip(eliminatedMove).map{ case (rq, em) => rq && !em }),
headPtr + PopCount(needAllocatingVec))) // when redirect is valid, needAllocatingVec is all-zero
freeRegCnt := distanceBetween(tailPtrNext, headPtrNext)
canAllocate := RegNext(freeRegCnt >= RenameWidth.U)
headPtr := headPtrNext
tailPtr := tailPtrNext
// update reg counter
for (i <- 0 until NRPhyRegs) {
specRefCounter(i) := Mux(flush, archRefCounter(i),
Mux(clearSpecRefCounter(i), 0.U, Mux(updateSpecRefCounter(i), specRefCounterNext(i), specRefCounter(i))))
archRefCounter(i) := Mux(clearArchRefCounter(i), 0.U, Mux(updateArchRefCounter(i), archRefCounterNext(i), archRefCounter(i) ))
cmtCounter(i) := Mux(clearCmtCounter(i), 0.U, Mux(updateCmtCounter(i), cmtCounterNext(i), cmtCounter(i) ))
}
/*
Re-direct: restore by walking, handled by rename using `dec` port
*/
/*
Debug Info
*/
for (i <- 0 until NRPhyRegs) {
XSDebug(specRefCounter(i) =/= 0.U || archRefCounter(i) =/= 0.U || cmtCounter(i) =/= 0.U,
p"preg[$i] specRefCounter:${specRefCounter(i)} archRefCounter:${archRefCounter(i)} cmtCounter:${cmtCounter(i)}\n")
XSDebug(specRefCounter(i) =/= 0.U || archRefCounter(i) =/= 0.U || cmtCounter(i) =/= 0.U,
p"preg[$i] specRefCounterNext:${specRefCounterNext(i)} archRefCounterNext:${archRefCounterNext(i)} cmtCounterNext:${cmtCounterNext(i)}\n")
// specRefCounter(i) must >= cmtCounter(i)
XSError(specRefCounter(i) < cmtCounter(i), p"Commits Overflow of preg${i}")
}
XSDebug(Array.range(0, MEFreeListSize).map(x => x.toString()).mkString("Free List (idx): ", "\t", "\n"))
XSDebug(p"Free List (val): " + Array.range(0, MEFreeListSize).map(x => p"${freeList(x)}\t").reduceLeft(_ + _) + "\n")
XSDebug(p"head:$headPtr tail:$tailPtr headPtrNext:$headPtrNext tailPtrNext:$tailPtrNext freeRegCnt:$freeRegCnt\n")
XSDebug(p"flush ${flush} redirect ${redirect} walk ${walk}\n")
XSDebug(PopCount(freeReq) =/= PopCount(freeVec), p"WARNING: Please check DEC requirement\n")
XSDebug(PopCount(allocateReq) =/= PopCount(needAllocatingVec), p"WARNING: Please check INC requirement\n")
/*
Assertions
*/
val enableFreeListCheck = false
if (enableFreeListCheck) {
for (i <- 0 until RenameWidth) {
for (j <- (i + 1) until RenameWidth) {
XSError(needAllocatingVec(i) && needAllocatingVec(j) && allocatePhyReg(i) === allocatePhyReg(j),
p"Duplicate INC requirements detected!" + allocatePhyReg.zipWithIndex.map{case (p, idx) => p" ($idx):$p"}.reduceLeft(_ + _) + "\n")
XSError(allocateReq(i) && allocateReq(j) && canAllocate && doAllocate && psrcOfMove(i).valid && psrcOfMove(j).valid && psrcOfMove(i).bits === psrcOfMove(j).bits,
p"Duplicate ME requirements detected! Cannot inc same specRefCount in 1 cycle!\n")
}
// also, we cannot count ref numbers more than 3 (which is very rare)
XSError(needAllocatingVec(i) && !psrcOfMove(i).valid && specRefCounter(allocatePhyReg(i)).andR(), p"(norm) Exceeding specRefCounter Max Value: preg[${allocatePhyReg(i)}]\n")
XSError(allocateReq(i) && canAllocate && doAllocate && psrcOfMove(i).valid && specRefCounter(psrcOfMove(i).bits).andR(), p"(move) Exceeding specRefCounter Max Value: preg[${psrcOfMove(i).bits}]\n")
}
for (i <- 0 until CommitWidth) {
// we cannot handle duplicate inc/dec requirements on a preg in 1 cycle for now
for (j <- (i + 1) until CommitWidth) {
XSInfo(freeReq(i) && freeReq(j) && freePhyReg(i) === freePhyReg(j),
p"Duplicate DEC requirements detected!" + freePhyReg.zipWithIndex.map{case (p, idx) => p" ($idx):$p"}.reduceLeft(_ + _) + "\n")
XSInfo(freeReq(i) && freeReq(j) && eliminatedMove(i) && eliminatedMove(j) && multiRefPhyReg(i) === multiRefPhyReg(j),
p"Duplicate INC requirements on archRefCount detected!" + multiRefPhyReg.zipWithIndex.map{case (p, idx) => p" ($idx):$p"}.reduceLeft(_ + _) + "\n")
XSError(freeVec(i) && freeVec(j) && freePhyReg(i) === freePhyReg(j), "Fatal Error: free 1 reg 2 times in 1 cycle!\n")
}
// not inc and dec same reg in 1 cycle
for (j <- 0 until RenameWidth) {
XSDebug(allocateReq(j) && canAllocate && doAllocate && psrcOfMove(j).valid && !redirect && !walk &&
freeReq(i) && freePhyReg(i) === allocatePhyReg(j), p"INC and DEC Conflict Detected! inc($j): preg ${allocatePhyReg(j)}, dec($i): preg ${freePhyReg(i)}\n")
}
}
}
}
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package xiangshan.backend.rename.freelist
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
trait MEFreeListIO extends FreeListBaseIO {
// psrc of move instructions ready for elimination
def psrcOfMove: Vec[Valid[UInt]]
// instruction fits move elimination
def eliminatedMove: Vec[Bool]
// for eliminated move instruction, increase arch ref count of (new) p_dest reg
def multiRefPhyReg: Vec[UInt]
// max vector from speculative reference counter
def maxVec: Vec[Bool]
}
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package xiangshan.backend.rename.freelist
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import chipsalliance.rocketchip.config
class StdFreeList(implicit val p: config.Parameters) extends MultiIOModule with FreeListBaseIO with HasXSParameter with HasCircularQueuePtrHelper {
val flush = IO(Input(Bool()))
val redirect = IO(Input(Bool()))
val walk = IO(Input(Bool()))
val allocateReq = IO(Input(Vec(RenameWidth, Bool())))
val allocatePhyReg = IO(Output(Vec(RenameWidth, UInt(PhyRegIdxWidth.W))))
val canAllocate = IO(Output(Bool()))
val doAllocate = IO(Input(Bool()))
val freeReq = IO(Input(Vec(CommitWidth, Bool())))
val freePhyReg = IO(Input(Vec(CommitWidth, UInt(PhyRegIdxWidth.W))))
val stepBack = IO(Input(UInt(log2Up(CommitWidth + 1).W)))
class FreeListPtr extends CircularQueuePtr[FreeListPtr](StdFreeListSize)
object FreeListPtr {
def apply(f: Bool, v: UInt): FreeListPtr = {
val ptr = Wire(new FreeListPtr)
ptr.flag := f
ptr.value := v
ptr
}
}
val freeList = RegInit(VecInit(Seq.tabulate(StdFreeListSize)( i => (i + 32).U(PhyRegIdxWidth.W) )))
val headPtr = RegInit(FreeListPtr(false.B, 0.U))
val tailPtr = RegInit(FreeListPtr(true.B, 0.U))
//
// free committed instructions' `old_pdest` reg
//
for (i <- 0 until CommitWidth) {
val offset = if (i == 0) 0.U else PopCount(freeReq.take(i))
val ptr = tailPtr + offset
val idx = ptr.value
when (freeReq(i)) {
freeList(idx) := freePhyReg(i)
XSDebug(p"req#$i free physical reg: ${freePhyReg(i)}\n")
}
}
val tailPtrNext = tailPtr + PopCount(freeReq)
tailPtr := tailPtrNext
//
// allocate new physical registers for instructions at rename stage
//
val freeRegCnt = Wire(UInt()) // number of free registers in free list
canAllocate := RegNext(freeRegCnt >= RenameWidth.U)
XSDebug(p"freeRegCnt: $freeRegCnt\n")
val allocatePtr = (0 until RenameWidth).map(i => headPtr + i.U)
val phyRegCandidates = VecInit(allocatePtr.map(ptr => freeList(ptr.value)))
for(i <- 0 until RenameWidth){
allocatePhyReg(i) := phyRegCandidates(/* if (i == 0) 0.U else */PopCount(allocateReq.take(i)))
XSDebug(p"req:${allocateReq(i)} canAllocate:${canAllocate} pdest:${allocatePhyReg(i)}\n")
}
val headPtrAllocate = headPtr + PopCount(allocateReq)
val headPtrNext = Mux(canAllocate && doAllocate, headPtrAllocate, headPtr)
freeRegCnt := distanceBetween(tailPtr, headPtrNext)
// priority: (1) exception and flushPipe; (2) walking; (3) mis-prediction; (4) normal dequeue
headPtr := Mux(flush,
FreeListPtr(!tailPtrNext.flag, tailPtrNext.value),
Mux(walk,
headPtr - stepBack,
Mux(redirect, headPtr, headPtrNext))
)
XSDebug(p"head:$headPtr tail:$tailPtr\n")
val enableFreeListCheck = false
if (enableFreeListCheck) {
for (i <- 0 until StdFreeListSize) {
for (j <- i+1 until StdFreeListSize) {
XSError(freeList(i) === freeList(j), s"Found same entry in free list! (i=$i j=$j)\n")
}
}
}
XSPerfAccumulate("utilization", freeRegCnt)
XSPerfAccumulate("allocation_blocked", !canAllocate)
XSPerfAccumulate("can_alloc_wrong", !canAllocate && freeRegCnt >= RenameWidth.U)
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册