未验证 提交 fa7f2c26 编写于 作者: T Tang Haojin 提交者: GitHub

CtrlBlock: implement rename snapshot (#2191)

* CtrlBlock: new ME method for better timing and area

* ctrlblock: implement snapshot recovery

* rename: enlarge distance between snapshots

* snapshot: add rename snapshot switch

* CtrlBlock: add snapshotGen API

* snapshot: optimize timing

* snapshot: put snapshot logic in a module
上级 74dc6eb6
......@@ -234,6 +234,7 @@ class MicroOp(implicit p: Parameters) extends CfCtrl {
val lqIdx = new LqPtr
val sqIdx = new SqPtr
val eliminatedMove = Bool()
val snapshot = Bool()
val debugInfo = new PerfDebugInfo
def needRfRPort(index: Int, isFp: Boolean, ignoreState: Boolean = true) : Bool = {
val stateReady = srcState(index) === SrcState.rdy || ignoreState.B
......@@ -392,11 +393,19 @@ class RobCommitIO(implicit p: Parameters) extends XSBundle {
val walkValid = Vec(CommitWidth, Bool())
val info = Vec(CommitWidth, new RobCommitInfo)
val robIdx = Vec(CommitWidth, new RobPtr)
def hasWalkInstr: Bool = isWalk && walkValid.asUInt.orR
def hasCommitInstr: Bool = isCommit && commitValid.asUInt.orR
}
class SnapshotPort(implicit p: Parameters) extends XSBundle {
val snptEnq = Bool()
val snptDeq = Bool()
val useSnpt = Bool()
val snptSelect = UInt(log2Ceil(RenameSnapshotNum).W)
}
class RSFeedback(implicit p: Parameters) extends XSBundle {
val rsIdx = UInt(log2Up(IssQueSize).W)
val hit = Bool()
......
......@@ -127,6 +127,8 @@ case class XSCoreParameters
DecodeWidth: Int = 6,
RenameWidth: Int = 6,
CommitWidth: Int = 6,
EnableRenameSnapshot: Boolean = true,
RenameSnapshotNum: Int = 4,
FtqSize: Int = 64,
EnableLoadFastWakeUp: Boolean = true, // NOTE: not supported now, make it false
IssQueSize: Int = 16,
......@@ -396,6 +398,8 @@ trait HasXSParameter {
val DecodeWidth = coreParams.DecodeWidth
val RenameWidth = coreParams.RenameWidth
val CommitWidth = coreParams.CommitWidth
val EnableRenameSnapshot = coreParams.EnableRenameSnapshot
val RenameSnapshotNum = coreParams.RenameSnapshotNum
val FtqSize = coreParams.FtqSize
val IssQueSize = coreParams.IssQueSize
val EnableLoadFastWakeUp = coreParams.EnableLoadFastWakeUp
......
......@@ -40,6 +40,64 @@ class CtrlToFtqIO(implicit p: Parameters) extends XSBundle {
val redirect = Valid(new Redirect)
}
class SnapshotPtr(implicit p: Parameters) extends CircularQueuePtr[SnapshotPtr](
p => p(XSCoreParamsKey).RenameSnapshotNum
)
object SnapshotGenerator extends HasCircularQueuePtrHelper {
def apply[T <: Data](enqData: T, enq: Bool, deq: Bool, flush: Bool)(implicit p: Parameters): Vec[T] = {
val snapshotGen = Module(new SnapshotGenerator(enqData))
snapshotGen.io.enq := enq
snapshotGen.io.enqData.head := enqData
snapshotGen.io.deq := deq
snapshotGen.io.flush := flush
snapshotGen.io.snapshots
}
}
class SnapshotGenerator[T <: Data](dataType: T)(implicit p: Parameters) extends XSModule
with HasCircularQueuePtrHelper {
class SnapshotGeneratorIO extends Bundle {
val enq = Input(Bool())
val enqData = Input(Vec(1, chiselTypeOf(dataType))) // make chisel happy
val deq = Input(Bool())
val flush = Input(Bool())
val snapshots = Output(Vec(RenameSnapshotNum, chiselTypeOf(dataType)))
val enqPtr = Output(new SnapshotPtr)
val deqPtr = Output(new SnapshotPtr)
val valids = Output(Vec(RenameSnapshotNum, Bool()))
}
val io = IO(new SnapshotGeneratorIO)
val snapshots = Reg(Vec(RenameSnapshotNum, chiselTypeOf(dataType)))
val snptEnqPtr = RegInit(0.U.asTypeOf(new SnapshotPtr))
val snptDeqPtr = RegInit(0.U.asTypeOf(new SnapshotPtr))
val snptValids = RegInit(VecInit.fill(RenameSnapshotNum)(false.B))
io.snapshots := snapshots
io.enqPtr := snptEnqPtr
io.deqPtr := snptDeqPtr
io.valids := snptValids
when(!isFull(snptEnqPtr, snptDeqPtr) && io.enq) {
snapshots(snptEnqPtr.value) := io.enqData.head
snptValids(snptEnqPtr.value) := true.B
snptEnqPtr := snptEnqPtr + 1.U
}
when(io.deq) {
snptValids(snptDeqPtr.value) := false.B
snptDeqPtr := snptDeqPtr + 1.U
XSError(isEmpty(snptEnqPtr, snptDeqPtr), "snapshots should not be empty when dequeue!\n")
}
when(io.flush) {
snptValids := 0.U.asTypeOf(snptValids)
snptEnqPtr := 0.U.asTypeOf(new SnapshotPtr)
snptDeqPtr := 0.U.asTypeOf(new SnapshotPtr)
}
}
class RedirectGenerator(implicit p: Parameters) extends XSModule
with HasCircularQueuePtrHelper {
......@@ -397,6 +455,41 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
waittable.io.update <> RegNext(redirectGen.io.memPredUpdate)
waittable.io.csrCtrl := RegNext(io.csrCtrl)
// snapshot check
val snpt = Module(new SnapshotGenerator(rename.io.out.head.bits.robIdx))
snpt.io.enq := rename.io.out.head.bits.snapshot && rename.io.out.head.fire
snpt.io.enqData.head := rename.io.out.head.bits.robIdx
snpt.io.deq := snpt.io.valids(snpt.io.deqPtr.value) && rob.io.commits.isCommit &&
Cat(rob.io.commits.commitValid.zip(rob.io.commits.robIdx).map(x => x._1 && x._2 === snpt.io.snapshots(snpt.io.deqPtr.value))).orR
snpt.io.flush := stage2Redirect.valid
val useSnpt = VecInit.tabulate(RenameSnapshotNum)(idx =>
snpt.io.valids(idx) && stage2Redirect.bits.robIdx >= snpt.io.snapshots(idx)).reduceTree(_ || _)
val snptSelect = MuxCase(0.U(log2Ceil(RenameSnapshotNum).W),
(1 to RenameSnapshotNum).map(i => (snpt.io.enqPtr - i.U).value).map(idx =>
(snpt.io.valids(idx) && stage2Redirect.bits.robIdx >= snpt.io.snapshots(idx), idx)
))
rob.io.snpt.snptEnq := DontCare
rob.io.snpt.snptDeq := snpt.io.deq
rob.io.snpt.useSnpt := useSnpt
rob.io.snpt.snptSelect := snptSelect
rat.io.snpt.snptEnq := rename.io.out.head.bits.snapshot && rename.io.out.head.fire
rat.io.snpt.snptDeq := snpt.io.deq
rat.io.snpt.useSnpt := useSnpt
rat.io.snpt.snptSelect := snptSelect
rename.io.snpt.snptEnq := DontCare
rename.io.snpt.snptDeq := snpt.io.deq
rename.io.snpt.useSnpt := useSnpt
rename.io.snpt.snptSelect := snptSelect
// prevent rob from generating snapshot when full here
val renameOut = Wire(chiselTypeOf(rename.io.out))
renameOut <> rename.io.out
when(isFull(snpt.io.enqPtr, snpt.io.deqPtr)) {
renameOut.head.bits.snapshot := false.B
}
// LFST lookup and update
dispatch.io.lfst := DontCare
if (LFSTEnable) {
......@@ -472,7 +565,7 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
// pipeline between rename and dispatch
for (i <- 0 until RenameWidth) {
PipelineConnect(rename.io.out(i), dispatch.io.fromRename(i), dispatch.io.recv(i), stage2Redirect.valid)
PipelineConnect(renameOut(i), dispatch.io.fromRename(i), dispatch.io.recv(i), stage2Redirect.valid)
}
dispatch.io.hartId := io.hartId
......
......@@ -49,6 +49,8 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
val int_need_free = Vec(CommitWidth, Input(Bool()))
// to dispatch1
val out = Vec(RenameWidth, DecoupledIO(new MicroOp))
// for snapshots
val snpt = Input(new SnapshotPort)
// debug arch ports
val debug_int_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
val debug_fp_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
......@@ -115,6 +117,7 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
uop.debugInfo := DontCare
uop.lqIdx := DontCare
uop.sqIdx := DontCare
uop.snapshot := DontCare
})
require(RenameWidth >= CommitWidth)
......@@ -283,6 +286,21 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe
}
val hasCFI = VecInit(io.in.map(in => (!in.bits.cf.pd.notCFI || FuType.isJumpExu(in.bits.ctrl.fuType)) && in.fire)).asUInt.orR
val snapshotCtr = RegInit((4 * CommitWidth).U)
val allowSnpt = if (EnableRenameSnapshot) !snapshotCtr.orR else false.B
io.out.head.bits.snapshot := hasCFI && allowSnpt
when(io.out.head.fire && io.out.head.bits.snapshot) {
snapshotCtr := (4 * CommitWidth).U - PopCount(io.out.map(_.fire))
}.elsewhen(io.out.head.fire) {
snapshotCtr := Mux(snapshotCtr < PopCount(io.out.map(_.fire)), 0.U, snapshotCtr - PopCount(io.out.map(_.fire)))
}
intFreeList.io.snpt := io.snpt
fpFreeList.io.snpt := io.snpt
intFreeList.io.snpt.snptEnq := io.out.head.fire && io.out.head.bits.snapshot
fpFreeList.io.snpt.snptEnq := io.out.head.fire && io.out.head.bits.snapshot
/**
* Instructions commit: update freelist and rename table
*/
......
......@@ -19,9 +19,11 @@ package xiangshan.backend.rename
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import utility.HasCircularQueuePtrHelper
import utility.ParallelPriorityMux
import utils.XSError
import xiangshan._
import xiangshan.backend.SnapshotGenerator
class RatReadPort(implicit p: Parameters) extends XSBundle {
val hold = Input(Bool())
......@@ -35,7 +37,7 @@ class RatWritePort(implicit p: Parameters) extends XSBundle {
val data = UInt(PhyRegIdxWidth.W)
}
class RenameTable(float: Boolean)(implicit p: Parameters) extends XSModule {
class RenameTable(float: Boolean)(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new Bundle {
val redirect = Input(Bool())
val readPorts = Vec({if(float) 4 else 3} * RenameWidth, new RatReadPort)
......@@ -43,6 +45,7 @@ class RenameTable(float: Boolean)(implicit p: Parameters) extends XSModule {
val archWritePorts = Vec(CommitWidth, Input(new RatWritePort))
val old_pdest = Vec(CommitWidth, Output(UInt(PhyRegIdxWidth.W)))
val need_free = Vec(CommitWidth, Output(Bool()))
val snpt = Input(new SnapshotPort)
val debug_rdata = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
})
......@@ -67,13 +70,21 @@ class RenameTable(float: Boolean)(implicit p: Parameters) extends XSModule {
val t1_raddr = io.readPorts.map(p => RegEnable(p.addr, !p.hold))
val t1_wSpec = RegNext(Mux(io.redirect, 0.U.asTypeOf(io.specWritePorts), io.specWritePorts))
val t1_snpt = RegNext(io.snpt, 0.U.asTypeOf(io.snpt))
val snapshots = SnapshotGenerator(spec_table, t1_snpt.snptEnq, t1_snpt.snptDeq, t1_redirect)
// WRITE: when instruction commits or walking
val t1_wSpec_addr = t1_wSpec.map(w => Mux(w.wen, UIntToOH(w.addr), 0.U))
for ((next, i) <- spec_table_next.zipWithIndex) {
val matchVec = t1_wSpec_addr.map(w => w(i))
val wMatch = ParallelPriorityMux(matchVec.reverse, t1_wSpec.map(_.data).reverse)
// When there's a flush, we use arch_table to update spec_table.
next := Mux(t1_redirect, arch_table(i), Mux(VecInit(matchVec).asUInt.orR, wMatch, spec_table(i)))
next := Mux(
t1_redirect,
Mux(t1_snpt.useSnpt, snapshots(t1_snpt.snptSelect)(i), arch_table(i)),
Mux(VecInit(matchVec).asUInt.orR, wMatch, spec_table(i))
)
}
spec_table := spec_table_next
......@@ -119,6 +130,7 @@ class RenameTableWrapper(implicit p: Parameters) extends XSModule {
val int_old_pdest = Vec(CommitWidth, Output(UInt(PhyRegIdxWidth.W)))
val fp_old_pdest = Vec(CommitWidth, Output(UInt(PhyRegIdxWidth.W)))
val int_need_free = Vec(CommitWidth, Output(Bool()))
val snpt = Input(new SnapshotPort)
// for debug printing
val debug_int_rat = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
val debug_fp_rat = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
......@@ -131,6 +143,8 @@ class RenameTableWrapper(implicit p: Parameters) extends XSModule {
intRat.io.readPorts <> io.intReadPorts.flatten
intRat.io.redirect := io.redirect
fpRat.io.redirect := io.redirect
intRat.io.snpt := io.snpt
fpRat.io.snpt := io.snpt
io.int_old_pdest := intRat.io.old_pdest
io.fp_old_pdest := fpRat.io.old_pdest
io.int_need_free := intRat.io.need_free
......
......@@ -20,6 +20,7 @@ import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import xiangshan._
import xiangshan.backend.SnapshotGenerator
import utils._
import utility._
......@@ -40,6 +41,8 @@ abstract class BaseFreeList(size: Int)(implicit p: Parameters) extends XSModule
val commit = Input(new RobCommitIO)
val snpt = Input(new SnapshotPort)
val debug_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
})
......@@ -53,4 +56,28 @@ abstract class BaseFreeList(size: Int)(implicit p: Parameters) extends XSModule
ptr
}
}
val lastCycleRedirect = RegNext(io.redirect, false.B)
val lastCycleSnpt = RegNext(io.snpt, 0.U.asTypeOf(io.snpt))
val headPtr = RegInit(FreeListPtr(false, 0))
val headPtrOH = RegInit(1.U(size.W))
val archHeadPtr = RegInit(FreeListPtr(false, 0))
XSError(headPtr.toOH =/= headPtrOH, p"wrong one-hot reg between $headPtr and $headPtrOH")
val headPtrOHShift = CircularShift(headPtrOH)
// may shift [0, RenameWidth] steps
val headPtrOHVec = VecInit.tabulate(RenameWidth + 1)(headPtrOHShift.left)
val snapshots = SnapshotGenerator(headPtr, io.snpt.snptEnq, io.snpt.snptDeq, io.redirect)
val redirectedHeadPtr = Mux(
lastCycleSnpt.useSnpt,
snapshots(lastCycleSnpt.snptSelect) + PopCount(io.walkReq),
archHeadPtr + PopCount(io.walkReq)
)
val redirectedHeadPtrOH = Mux(
lastCycleSnpt.useSnpt,
(snapshots(lastCycleSnpt.snptSelect) + PopCount(io.walkReq)).toOH,
(archHeadPtr + PopCount(io.walkReq)).toOH
)
}
......@@ -29,21 +29,12 @@ class MEFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size) w
// originally {1, 2, ..., size - 1} are free. Register 0-31 are mapped to x0.
Seq.tabulate(size - 1)(i => (i + 1).U(PhyRegIdxWidth.W)) :+ 0.U(PhyRegIdxWidth.W)))
// head and tail pointer
val headPtr = RegInit(FreeListPtr(false, 0))
val headPtrOH = RegInit(1.U(size.W))
XSError(headPtr.toOH =/= headPtrOH, p"wrong one-hot reg between $headPtr and $headPtrOH")
val headPtrOHShift = CircularShift(headPtrOH)
// may shift [0, RenameWidth] steps
val headPtrOHVec = VecInit.tabulate(RenameWidth + 1)(headPtrOHShift.left)
val tailPtr = RegInit(FreeListPtr(false, size - 1))
val archHeadPtr = RegInit(FreeListPtr(false, 0))
val doWalkRename = io.walk && io.doAllocate && !io.redirect
val doNormalRename = io.canAllocate && io.doAllocate && !io.redirect
val doRename = doWalkRename || doNormalRename
val doCommit = io.commit.isCommit
val lastCycleRedirect = RegNext(io.redirect, false.B)
/**
* Allocation: from freelist (same as StdFreelist)
......@@ -64,8 +55,8 @@ class MEFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size) w
// update head pointer
val numAllocate = Mux(io.walk, PopCount(io.walkReq), PopCount(io.allocateReq))
val headPtrNew = Mux(lastCycleRedirect, archHeadPtr + PopCount(io.walkReq), headPtr + numAllocate)
val headPtrOHNew = Mux(lastCycleRedirect, (archHeadPtr + PopCount(io.walkReq)).toOH, headPtrOHVec(numAllocate))
val headPtrNew = Mux(lastCycleRedirect, redirectedHeadPtr, headPtr + numAllocate)
val headPtrOHNew = Mux(lastCycleRedirect, redirectedHeadPtrOH, headPtrOHVec(numAllocate))
val headPtrNext = Mux(doRename, headPtrNew, headPtr)
val headPtrOHNext = Mux(doRename, headPtrOHNew, headPtrOH)
headPtr := headPtrNext
......
......@@ -27,18 +27,9 @@ import utility._
class StdFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size) with HasPerfEvents {
val freeList = RegInit(VecInit(Seq.tabulate(size)( i => (i + 32).U(PhyRegIdxWidth.W) )))
val headPtr = RegInit(FreeListPtr(false, 0))
val headPtrOH = RegInit(1.U(size.W))
val headPtrOHShift = CircularShift(headPtrOH)
// may shift [0, RenameWidth] steps
val headPtrOHVec = VecInit.tabulate(RenameWidth + 1)(headPtrOHShift.left)
XSError(headPtr.toOH =/= headPtrOH, p"wrong one-hot reg between $headPtr and $headPtrOH")
val lastTailPtr = RegInit(FreeListPtr(true, 0)) // tailPtr in the last cycle (need to add freeReqReg)
val tailPtr = Wire(new FreeListPtr) // this is the real tailPtr
val tailPtrOHReg = RegInit(0.U(size.W))
val archHeadPtr = RegInit(FreeListPtr(false, 0))
val lastCycleRedirect = RegNext(io.redirect, false.B)
//
// free committed instructions' `old_pdest` reg
......@@ -84,8 +75,8 @@ class StdFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size)
val isNormalAlloc = io.canAllocate && io.doAllocate
val isAllocate = isWalkAlloc || isNormalAlloc
val numAllocate = Mux(io.walk, PopCount(io.walkReq), PopCount(io.allocateReq))
val headPtrAllocate = Mux(lastCycleRedirect, archHeadPtr + PopCount(io.walkReq), headPtr + numAllocate)
val headPtrOHAllocate = Mux(lastCycleRedirect, (archHeadPtr + PopCount(io.walkReq)).toOH, headPtrOHVec(numAllocate))
val headPtrAllocate = Mux(lastCycleRedirect, redirectedHeadPtr, headPtr + numAllocate)
val headPtrOHAllocate = Mux(lastCycleRedirect, redirectedHeadPtrOH, headPtrOHVec(numAllocate))
val headPtrNext = Mux(isAllocate, headPtrAllocate, headPtr)
freeRegCnt := Mux(isWalkAlloc && !lastCycleRedirect, distanceBetween(tailPtr, headPtr) - PopCount(io.walkReq),
Mux(isNormalAlloc, distanceBetween(tailPtr, headPtr) - PopCount(io.allocateReq),
......
......@@ -24,6 +24,7 @@ import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import utils._
import utility._
import xiangshan._
import xiangshan.backend.SnapshotGenerator
import xiangshan.backend.exu.ExuConfig
import xiangshan.frontend.FtqPtr
import xiangshan.mem.{LsqEnqIO, LqPtr}
......@@ -410,6 +411,7 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
val lsq = new RobLsqIO
val robDeqPtr = Output(new RobPtr)
val csr = new RobCSRIO
val snpt = Input(new SnapshotPort)
val robFull = Output(Bool())
val headNotReady = Output(Bool())
val cpu_halt = Output(Bool())
......@@ -478,6 +480,9 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
val isEmpty = enqPtr === deqPtr
val isReplaying = io.redirect.valid && RedirectLevel.flushItself(io.redirect.bits.level)
val snptEnq = io.enq.canAccept && io.enq.req.head.valid && io.enq.req.head.bits.snapshot
val snapshots = SnapshotGenerator(enqPtrVec, snptEnq, io.snpt.snptDeq, io.redirect.valid)
val debug_lsIssue = WireDefault(debug_lsIssued)
debug_lsIssue(deqPtr.value) := io.debugHeadLsIssue
......@@ -732,7 +737,7 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
misPredBlockCounter >> 1.U
)
val misPredBlock = misPredBlockCounter(0)
val blockCommit = misPredBlock || isReplaying || lastCycleFlush || hasWFI
val blockCommit = misPredBlock || isReplaying || lastCycleFlush || hasWFI || io.redirect.valid
io.commits.isWalk := state === s_walk
io.commits.isCommit := state === s_idle && !blockCommit
......@@ -749,7 +754,8 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
// when intrBitSetReg, allow only one instruction to commit at each clock cycle
val isBlocked = if (i != 0) Cat(commit_block.take(i)).orR || allowOnlyOneCommit else intrEnable || deqHasException || deqHasReplayInst
io.commits.commitValid(i) := commit_v(i) && commit_w(i) && !isBlocked
io.commits.info(i) := dispatchDataRead(i)
io.commits.info(i) := dispatchDataRead(i)
io.commits.robIdx(i) := deqPtrVec(i)
when (state === s_walk) {
io.commits.walkValid(i) := shouldWalkVec(i)
......@@ -831,7 +837,7 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
// (1) redirect occurs: update according to state
// (2) walk: move forwards
val walkPtrVec_next = Mux(io.redirect.valid,
deqPtrVec_next,
Mux(io.snpt.useSnpt, snapshots(io.snpt.snptSelect), deqPtrVec_next),
Mux(state === s_walk, VecInit(walkPtrVec.map(_ + CommitWidth.U)), walkPtrVec)
)
walkPtrVec := walkPtrVec_next
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册