未验证 提交 459d1cae 编写于 作者: Y Yinan Xu 提交者: GitHub

refCounter: optimize timing for freeRegs (#1255)

This commit changes how isFreed is calculated. Instead of using
refCounter in the next, we compute it at this cycle and RegNext it.
上级 2ace40b9
......@@ -362,8 +362,6 @@ trait HasXSParameter {
val PhyRegIdxWidth = log2Up(NRPhyRegs)
val RobSize = coreParams.RobSize
val IntRefCounterWidth = log2Ceil(RobSize)
val StdFreeListSize = NRPhyRegs - 32
val MEFreeListSize = NRPhyRegs
val LoadQueueSize = coreParams.LoadQueueSize
val StoreQueueSize = coreParams.StoreQueueSize
val dpParams = coreParams.dpParams
......
......@@ -45,9 +45,9 @@ class Rename(implicit p: Parameters) extends XSModule {
})
// create free list and rat
val intFreeList = Module(new MEFreeList(MEFreeListSize))
val intRefCounter = Module(new RefCounter(MEFreeListSize))
val fpFreeList = Module(new StdFreeList(StdFreeListSize))
val intFreeList = Module(new MEFreeList(NRPhyRegs))
val intRefCounter = Module(new RefCounter(NRPhyRegs))
val fpFreeList = Module(new StdFreeList(NRPhyRegs - 32))
// decide if given instruction needs allocating a new physical register (CfCtrl: from decode; RobCommitInfo: from rob)
def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = {
......
......@@ -33,22 +33,29 @@ class RefCounter(size: Int)(implicit p: Parameters) extends XSModule {
val allocate = RegNext(io.allocate)
val deallocate = RegNext(io.deallocate)
// recording referenced times of each physical registers
// refCounter: increase at rename; decrease at walk/commit
// Originally 0-31 registers have counters of ones.
val refCounter = RegInit(VecInit(Seq.fill(32)(1.U(IntRefCounterWidth.W)) ++ Seq.fill(NRPhyRegs - 32)(0.U(IntRefCounterWidth.W))))
val refCounter = RegInit(VecInit(Seq.fill(32)(1.U(IntRefCounterWidth.W)) ++ Seq.fill(size - 32)(0.U(IntRefCounterWidth.W))))
val refCounterInc = WireInit(refCounter)
val refCounterDec = WireInit(refCounter)
val refCounterNext = WireInit(refCounter)
// One-hot Encoding for allocation and de-allocation
val allocateOH = allocate.map(alloc => UIntToOH(alloc.bits))
val deallocateOH = deallocate.map(dealloc => UIntToOH(dealloc.bits))
/**
* Deallocation: when refCounter becomes zero, the register can be released to freelist
* De-allocation: when refCounter becomes zero, the register can be released to freelist
*/
for ((de, i) <- deallocate.zipWithIndex) {
val isNonZero = de.valid && refCounter(de.bits) =/= 0.U
val hasDuplicate = deallocate.take(i).map(de => de.valid && de.bits === deallocate(i).bits)
val blockedByDup = if (i == 0) false.B else VecInit(hasDuplicate).asUInt.orR
val isFreed = refCounter(RegNext(de.bits)) === 0.U
io.freeRegs(i).valid := RegNext(isNonZero && !blockedByDup) && isFreed
val isFreed = refCounter(de.bits) + refCounterInc(de.bits) === refCounterDec(de.bits)
io.freeRegs(i).valid := RegNext(isNonZero && !blockedByDup) && RegNext(isFreed)
val isFreed1 = refCounter(RegNext(de.bits)) === 0.U
XSError(RegNext(isFreed) =/= isFreed1, p"why isFreed ${RegNext(isFreed)} $isFreed1\n")
io.freeRegs(i).bits := RegNext(deallocate(i).bits)
}
......@@ -68,17 +75,23 @@ class RefCounter(size: Int)(implicit p: Parameters) extends XSModule {
* We don't count the number of references for physical register 0.
* It should never be released to freelist.
*/
for (i <- 1 until NRPhyRegs) {
val numAlloc = PopCount(allocate.map(alloc => alloc.valid && alloc.bits === i.U))
val numDealloc = PopCount(deallocate.map(dealloc => dealloc.valid && dealloc.bits === i.U))
refCounterNext(i) := refCounter(i) + numAlloc - numDealloc
XSError(RegNext(refCounter(i) + numAlloc < numDealloc), p"why $i?\n")
for (i <- 1 until size) {
refCounterInc(i) := PopCount(allocate.zip(allocateOH).map(alloc => alloc._1.valid && alloc._2(i)))
refCounterDec(i) := PopCount(deallocate.zip(deallocateOH).map(dealloc => dealloc._1.valid && dealloc._2(i)))
val numAlloc1 = PopCount(allocate.map(alloc => alloc.valid && alloc.bits === i.U))
val numDealloc1 = PopCount(deallocate.map(dealloc => dealloc.valid && dealloc.bits === i.U))
XSError(refCounterInc(i) =/= numAlloc1, p"why numAlloc ${refCounterInc(i)} $numAlloc1??")
XSError(refCounterDec(i) =/= numDealloc1, p"why numDealloc ${refCounterDec(i)} $numDealloc1??")
refCounterNext(i) := refCounter(i) + refCounterInc(i) - refCounterDec(i)
XSError(RegNext(refCounter(i) + refCounterInc(i) < refCounterDec(i)), p"why $i?\n")
refCounter(i) := refCounterNext(i)
}
for (i <- 0 until RobSize) {
val numCounters = PopCount(refCounter.map(_ === i.U))
XSPerfAccumulate(s"ref_counter_$i", numCounters)
}
for (i <- 0 until size) {
val isFreed = io.freeRegs.map(f => f.valid && f.bits === i.U)
XSPerfAccumulate(s"free_reg_$i", VecInit(isFreed).asUInt.orR)
}
......
......@@ -88,13 +88,13 @@ class StdFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size)
val perfEvents = Output(new PerfEventsBundle(4))
})
val perfEvents = Seq(
("std_freelist_1/4_valid ", (freeRegCnt < (StdFreeListSize.U/4.U)) ),
("std_freelist_2/4_valid ", (freeRegCnt > (StdFreeListSize.U/4.U)) & (freeRegCnt <= (StdFreeListSize.U/2.U)) ),
("std_freelist_3/4_valid ", (freeRegCnt > (StdFreeListSize.U/2.U)) & (freeRegCnt <= (StdFreeListSize.U*3.U/4.U))),
("std_freelist_4/4_valid ", (freeRegCnt > (StdFreeListSize.U*3.U/4.U)) ),
("std_freelist_1_4_valid", (freeRegCnt < (size / 4).U) ),
("std_freelist_2_4_valid", (freeRegCnt > (size / 4).U) & (freeRegCnt <= (size / 2).U) ),
("std_freelist_3_4_valid", (freeRegCnt > (size / 2).U) & (freeRegCnt <= (size * 3 / 4).U)),
("std_freelist_4_4_valid", (freeRegCnt > (size * 3 / 4).U) )
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
for (((perf_out, (perf_name, perf)), i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册