未验证 提交 0dc4893d 编写于 作者: Y Yinan Xu 提交者: GitHub

core: optimize redirect timing (#1630)

This commit adds separated redirect registers in ExuBlock and MemBlock.
They have one cycle latency compared to redirect in CtrlBlock. This will
help reduce the fanout of redirect registers.
上级 0febc381
......@@ -20,6 +20,20 @@ import chisel3._
import chisel3.util._
import scala.math.min
object RegNextWithEnable {
def apply[T <: Data](data: Valid[T], hasInit: Boolean = true): Valid[T] = {
val next = Wire(data.cloneType)
if (hasInit) {
next.valid := RegNext(data.valid, false.B)
}
else {
next.valid := RegNext(data.valid)
}
next.bits := RegEnable(data.bits, data.valid)
next
}
}
class CircularShift(data: UInt) {
private def helper(step: Int, isLeft: Boolean): UInt = {
if (step == 0) {
......
......@@ -268,7 +268,7 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
val exuOutput = WireInit(writeback)
val timer = GTimer()
for ((wb_next, wb) <- exuOutput.zip(writeback)) {
wb_next.valid := RegNext(wb.valid && !wb.bits.uop.robIdx.needFlush(stage2Redirect))
wb_next.valid := RegNext(wb.valid && !wb.bits.uop.robIdx.needFlush(Seq(stage2Redirect, redirectForExu)))
wb_next.bits := RegNext(wb.bits)
wb_next.bits.uop.debugInfo.writebackTime := timer
}
......@@ -302,11 +302,12 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
flushRedirectReg.bits := RegEnable(flushRedirect.bits, flushRedirect.valid)
val stage2Redirect = Mux(flushRedirect.valid, flushRedirect, redirectGen.io.stage2Redirect)
// val stage3Redirect = Mux(flushRedirectReg.valid, flushRedirectReg, redirectGen.io.stage3Redirect)
// Redirect will be RegNext at ExuBlocks.
val redirectForExu = RegNextWithEnable(stage2Redirect)
val exuRedirect = io.exuRedirect.map(x => {
val valid = x.valid && x.bits.redirectValid
val killedByOlder = x.bits.uop.robIdx.needFlush(stage2Redirect)
val killedByOlder = x.bits.uop.robIdx.needFlush(Seq(stage2Redirect, redirectForExu))
val delayed = Wire(Valid(new ExuOutput))
delayed.valid := RegNext(valid && !killedByOlder, init = false.B)
delayed.bits := RegEnable(x.bits, x.valid)
......@@ -314,7 +315,7 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
})
val loadReplay = Wire(Valid(new Redirect))
loadReplay.valid := RegNext(io.memoryViolation.valid &&
!io.memoryViolation.bits.robIdx.needFlush(stage2Redirect),
!io.memoryViolation.bits.robIdx.needFlush(Seq(stage2Redirect, redirectForExu)),
init = false.B
)
loadReplay.bits := RegEnable(io.memoryViolation.bits, io.memoryViolation.valid)
......@@ -471,9 +472,9 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
dispatch.io.allocPregs <> io.allocPregs
dispatch.io.singleStep := RegNext(io.csrCtrl.singlestep)
intDq.io.redirect <> stage2Redirect
fpDq.io.redirect <> stage2Redirect
lsDq.io.redirect <> stage2Redirect
intDq.io.redirect <> redirectForExu
fpDq.io.redirect <> redirectForExu
lsDq.io.redirect <> redirectForExu
io.dispatch <> intDq.io.deq ++ lsDq.io.deq ++ fpDq.io.deq
......
......@@ -81,9 +81,11 @@ class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp
})
override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.fuWriteback))
val redirect = RegNextWithEnable(io.redirect)
// IO for the scheduler
scheduler.io.hartId := io.hartId
scheduler.io.redirect <> io.redirect
scheduler.io.redirect <> redirect
scheduler.io.allocPregs <> io.allocPregs
scheduler.io.in <> io.in
scheduler.io.fastUopOut <> io.fastUopOut
......@@ -101,7 +103,7 @@ class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp
}
// IO for the function units
fuBlock.io.redirect <> io.redirect
fuBlock.io.redirect <> redirect
fuBlock.io.writeback <> io.fuWriteback
fuBlock.io.extra <> io.fuExtra
......@@ -134,7 +136,7 @@ class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp
scheWb.valid := wb.valid
scheWb.bits := wb.bits
if (cfg.hasFastUopOut) {
val isFlushed = wb.bits.uop.robIdx.needFlush(io.redirect)
val isFlushed = wb.bits.uop.robIdx.needFlush(redirect)
scheWb.valid := RegNext(wb.valid && !isFlushed)
scheWb.bits.uop := RegNext(wb.bits.uop)
}
......@@ -164,7 +166,7 @@ class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp
wbOut.bits.uop := fastWakeup.bits
}
else {
val isFlushed = fastWakeup.bits.robIdx.needFlush(io.redirect)
val isFlushed = fastWakeup.bits.robIdx.needFlush(redirect)
wbOut.valid := RegNext(fastWakeup.valid && !isFlushed)
wbOut.bits.uop := RegNext(fastWakeup.bits)
}
......@@ -188,7 +190,7 @@ class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp
require(wakeupIdx.length == wbIdx.length)
for ((i, j) <- wakeupIdx.zip(wbIdx)) {
val scheWb = scheduler.io.writeback(j)
val isFlushed = scheduler.io.fastUopOut(i).bits.robIdx.needFlush(io.redirect)
val isFlushed = scheduler.io.fastUopOut(i).bits.robIdx.needFlush(redirect)
scheWb.valid := RegNext(scheduler.io.fastUopOut(i).valid && !isFlushed)
scheWb.bits.uop := RegNext(scheduler.io.fastUopOut(i).bits)
}
......
......@@ -101,6 +101,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.writeback))
val redirect = RegNextWithEnable(io.redirect)
val dcache = outer.dcache.module
val uncache = outer.uncache.module
......@@ -248,7 +250,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// LoadUnit
for (i <- 0 until exuParameters.LduCnt) {
loadUnits(i).io.redirect <> io.redirect
loadUnits(i).io.redirect <> redirect
loadUnits(i).io.feedbackSlow <> io.rsfeedback(i).feedbackSlow
loadUnits(i).io.feedbackFast <> io.rsfeedback(i).feedbackFast
loadUnits(i).io.rsIdx := io.rsfeedback(i).rsIdx
......@@ -333,12 +335,12 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
for (i <- 0 until exuParameters.StuCnt) {
val stu = storeUnits(i)
stdExeUnits(i).io.redirect <> io.redirect
stdExeUnits(i).io.redirect <> redirect
stdExeUnits(i).io.fromInt <> io.issue(i + exuParameters.LduCnt + exuParameters.StuCnt)
stdExeUnits(i).io.fromFp := DontCare
stdExeUnits(i).io.out := DontCare
stu.io.redirect <> io.redirect
stu.io.redirect <> redirect
stu.io.feedbackSlow <> io.rsfeedback(exuParameters.LduCnt + i).feedbackSlow
stu.io.rsIdx <> io.rsfeedback(exuParameters.LduCnt + i).rsIdx
// NOTE: just for dtlb's perf cnt
......@@ -437,7 +439,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// Lsq
lsq.io.rob <> io.lsqio.rob
lsq.io.enq <> io.enqLsq
lsq.io.brqRedirect <> io.redirect
lsq.io.brqRedirect <> redirect
io.memoryViolation <> lsq.io.rollback
lsq.io.uncache <> uncache.io.lsq
// delay dcache refill for 1 cycle for better timing
......@@ -499,7 +501,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
st_data_atomics(i) -> stData(i).bits))
atomicsUnit.io.rsIdx := Mux1H(Seq.tabulate(exuParameters.StuCnt)(i =>
st_atomics(i) -> io.rsfeedback(atomic_rs(i)).rsIdx))
atomicsUnit.io.redirect <> io.redirect
atomicsUnit.io.redirect <> redirect
// TODO: complete amo's pmp support
val amoTlb = dtlb_ld(0).requestor(0)
......@@ -533,7 +535,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// Exception address is used serveral cycles after flush.
// We delay it by 10 cycles to ensure its flush safety.
val atomicsException = RegInit(false.B)
when (DelayN(io.redirect.valid, 10) && atomicsException) {
when (DelayN(redirect.valid, 10) && atomicsException) {
atomicsException := false.B
}.elsewhen (atomicsUnit.io.exceptionAddr.valid) {
atomicsException := true.B
......
......@@ -68,7 +68,6 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
val isTrueEmpty = !VecInit(stateEntries.map(_ === s_valid)).asUInt.orR
val canEnqueue = allowEnqueue
val canActualEnqueue = canEnqueue && !io.redirect.valid
/**
* Part 1: update states and uops when enqueue, dequeue, commit, redirect/replay
......@@ -87,7 +86,7 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
val enqIndexOH = (0 until enqnum).map(i => tailPtrOHVec(PopCount(io.enq.needAlloc.take(i))))
for (i <- 0 until size) {
val validVec = io.enq.req.map(_.valid).zip(enqIndexOH).map{ case (v, oh) => v && oh(i) }
when (VecInit(validVec).asUInt.orR && canActualEnqueue) {
when (VecInit(validVec).asUInt.orR && canEnqueue) {
data(i) := Mux1H(validVec, io.enq.req.map(_.bits))
stateEntries(i) := s_valid
}
......
......@@ -75,7 +75,7 @@ class ExuWbArbiter(n: Int, hasFastUopOut: Boolean, fastVec: Seq[Boolean])(implic
val sel = VecInit(io.in.map(_.fire)).asUInt
io.out.bits := Mux1H(RegNext(sel), dataVec)
// uop comes at the same cycle with valid and only RegNext is needed.
io.out.bits.uop := RegNext(uop)
io.out.bits.uop := RegEnable(uop, ctrl_arb.io.out.valid)
}
}
......@@ -156,6 +156,8 @@ class WbArbiterImp(outer: WbArbiter)(implicit p: Parameters) extends LazyModuleI
val out = Vec(outer.numOutPorts, ValidIO(new ExuOutput))
})
val redirect = RegNextWithEnable(io.redirect)
val exclusiveIn = outer.exclusivePorts.map(io.in(_))
val sharedIn = outer.sharedPorts.map(io.in(_))
......@@ -168,12 +170,12 @@ class WbArbiterImp(outer: WbArbiter)(implicit p: Parameters) extends LazyModuleI
require(!hasFastUopOut || !outer.needRegNext(i))
if (hasFastUopOut) {
// When hasFastUopOut, only uop comes at the same cycle with valid.
out.valid := RegNext(in.valid && !in.bits.uop.robIdx.needFlush(io.redirect))
out.bits.uop := RegNext(in.bits.uop)
out.valid := RegNext(in.valid && !in.bits.uop.robIdx.needFlush(redirect))
out.bits.uop := RegEnable(in.bits.uop, in.valid)
}
if (outer.needRegNext(i)) {
out.valid := RegNext(in.valid && !in.bits.uop.robIdx.needFlush(io.redirect))
out.bits := RegNext(in.bits)
out.valid := RegNext(in.valid && !in.bits.uop.robIdx.needFlush(redirect))
out.bits := RegEnable(in.bits, in.valid)
}
in.ready := true.B
}
......@@ -190,7 +192,7 @@ class WbArbiterImp(outer: WbArbiter)(implicit p: Parameters) extends LazyModuleI
val flushFunc = (o: ExuOutput, r: Valid[Redirect]) => o.uop.robIdx.needFlush(r)
if (outer.cfgHasFast(i)) {
val ctrl_pipe = Wire(io.in(i).cloneType)
val buffer = PipelineConnect(io.in(i), ctrl_pipe, flushFunc, io.redirect, io.in(i).bits, 1)
val buffer = PipelineConnect(io.in(i), ctrl_pipe, flushFunc, redirect, io.in(i).bits, 1)
buffer.extra.in := io.in(i).bits
val buffer_out = Wire(io.in(i).cloneType)
ctrl_pipe.ready := buffer_out.ready
......@@ -200,7 +202,7 @@ class WbArbiterImp(outer: WbArbiter)(implicit p: Parameters) extends LazyModuleI
buffer_out
}
else {
PipelineNext(io.in(i), flushFunc, io.redirect)
PipelineNext(io.in(i), flushFunc, redirect)
}
}
else io.in(i)
......@@ -208,7 +210,7 @@ class WbArbiterImp(outer: WbArbiter)(implicit p: Parameters) extends LazyModuleI
val hasFastUopOut = outer.hasFastUopOut(portIndex)
val fastVec = outer.hasFastUopOutVec(portIndex)
val arb = Module(new ExuWbArbiter(shared.size, hasFastUopOut, fastVec))
arb.io.redirect <> io.redirect
arb.io.redirect <> redirect
arb.io.in <> shared
out.valid := arb.io.out.valid
out.bits := arb.io.out.bits
......@@ -347,8 +349,8 @@ class Wb2Ctrl(configs: Seq[ExuConfig])(implicit p: Parameters) extends LazyModul
module.io.in := sink._1.zip(sink._2).zip(sourceMod).flatMap(x => x._1._1.writebackSource1(x._2)(x._1._2))
}
lazy val module = new LazyModuleImp(this)
with HasWritebackSourceImp
lazy val module = new LazyModuleImp(this)
with HasWritebackSourceImp
with HasXSParameter
{
val io = IO(new Bundle {
......@@ -357,13 +359,14 @@ class Wb2Ctrl(configs: Seq[ExuConfig])(implicit p: Parameters) extends LazyModul
val out = Vec(configs.length, ValidIO(new ExuOutput))
val delayedLoadError = Vec(LoadPipelineWidth, Input(Bool())) // Dirty fix of data ecc error timing
})
val redirect = RegNextWithEnable(io.redirect)
for (((out, in), config) <- io.out.zip(io.in).zip(configs)) {
out.valid := in.fire
out.bits := in.bits
if (config.hasFastUopOut || config.hasLoadError) {
out.valid := RegNext(in.fire && !in.bits.uop.robIdx.needFlush(io.redirect))
out.bits.uop := RegNext(in.bits.uop)
out.valid := RegNext(in.fire && !in.bits.uop.robIdx.needFlush(redirect))
out.bits.uop := RegEnable(in.bits.uop, in.fire)
}
}
......@@ -374,7 +377,7 @@ class Wb2Ctrl(configs: Seq[ExuConfig])(implicit p: Parameters) extends LazyModul
){
// overwrite load exception writeback
out.bits.uop.cf.exceptionVec(loadAccessFault) := delayed_error ||
RegNext(in.bits.uop.cf.exceptionVec(loadAccessFault))
RegEnable(in.bits.uop.cf.exceptionVec(loadAccessFault), in.valid)
}
}
......
......@@ -35,6 +35,7 @@ class RobPtr(implicit p: Parameters) extends CircularQueuePtr[RobPtr](
redirect.valid && (flushItself || isAfter(this, redirect.bits.robIdx))
}
def needFlush(redirect: Seq[Valid[Redirect]]): Bool = VecInit(redirect.map(needFlush)).asUInt.orR
}
object RobPtr {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册