未验证 提交 dd381594 编写于 作者: Y Yinan Xu 提交者: GitHub

backend,exu: connect writeback when possible (#977)

This commit optimizes ExuBlock timing by connecting writeback when
possible.

The timing priorities are RegNext(rs.fastUopOut) > fu.writeback >
arbiter.out(--> io.rfWriteback --> rs.writeback). The higher priority,
the better timing.

(1) When function units have exclusive writeback ports, their
wakeup ports for reservation stations can be connected directly from
function units' writeback ports. Special case: when the function unit
has fastUopOut, valid and uop should be RegNext.

(2) If the reservation station has fastUopOut for all instructions
in this exu, we should replace io.fuWriteback with RegNext(fastUopOut).
In this case, the corresponding execution units must have exclusive
writeback ports, unless it's impossible that rs can ensure the
instruction is able to write the regfile.

(3) If the reservation station has fastUopOut for all instructions in
this exu, we should replace io.rfWriteback (rs.writeback) with
RegNext(rs.wakeupOut).
上级 dd81f7f0
...@@ -35,7 +35,7 @@ class ExuBlock( ...@@ -35,7 +35,7 @@ class ExuBlock(
val scheduler = LazyModule(new Scheduler(configs, dpPorts, intRfWbPorts, fpRfWbPorts, outFastPorts, outFpRfReadPorts)) val scheduler = LazyModule(new Scheduler(configs, dpPorts, intRfWbPorts, fpRfWbPorts, outFastPorts, outFpRfReadPorts))
val allRfWbPorts = intRfWbPorts ++ fpRfWbPorts val allRfWbPorts = intRfWbPorts ++ fpRfWbPorts
val wbPosition = configs.map(cfg => allRfWbPorts.zipWithIndex.filter(_._1.contains(cfg._1)).map(_._2)) def getWbIndex(cfg: ExuConfig): Seq[Int] = allRfWbPorts.zipWithIndex.filter(_._1.contains(cfg)).map(_._2)
lazy val module = new ExuBlockImp(this) lazy val module = new ExuBlockImp(this)
} }
...@@ -62,6 +62,7 @@ class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp ...@@ -62,6 +62,7 @@ class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp
val fuExtra = fuBlock.io.extra.cloneType val fuExtra = fuBlock.io.extra.cloneType
}) })
// IO for the scheduler
scheduler.io.redirect <> io.redirect scheduler.io.redirect <> io.redirect
scheduler.io.flush <> io.flush scheduler.io.flush <> io.flush
scheduler.io.allocate <> io.allocate scheduler.io.allocate <> io.allocate
...@@ -70,29 +71,78 @@ class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp ...@@ -70,29 +71,78 @@ class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp
scheduler.io.fastUopIn <> io.fastUopIn scheduler.io.fastUopIn <> io.fastUopIn
scheduler.io.extra <> io.scheExtra scheduler.io.extra <> io.scheExtra
// the scheduler issues instructions to function units
scheduler.io.issue <> fuBlock.io.issue scheduler.io.issue <> fuBlock.io.issue
val flattenFuConfigs = fuConfigs.zip(outer.wbPosition).flatMap(c => Seq.fill(c._1._2)((c._1._1, c._2))) // IO for the function units
require(flattenFuConfigs.length == fuBlock.io.writeback.length)
val directConn = flattenFuConfigs.zip(fuBlock.io.writeback).filterNot(_._1._1.hasUncertainlatency)
if (directConn.length > 0) {
val directWbPorts = directConn.map(_._1._2).reduce(_ ++ _).toSet.toSeq
println(s"Ports $directWbPorts are directly connected from function units.")
require(directConn.length == directWbPorts.length)
val wbPortExuCfgs = directWbPorts.map(outer.allRfWbPorts(_))
wbPortExuCfgs.foreach(cfgs => require(cfgs.length == 1))
val schedWbPorts = directWbPorts.map(scheduler.io.writeback(_))
val outerWbPorts = directWbPorts.map(io.rfWriteback(_))
schedWbPorts.zip(directConn.map(_._2)).zip(outerWbPorts).map{ case ((s, f), o) =>
s := f
XSError((o.valid || f.valid) && o.bits.uop.roqIdx =/= f.bits.uop.roqIdx, "different instruction\n")
XSError((o.valid || f.valid) && o.bits.data =/= f.bits.data, "different data\n")
}
}
fuBlock.io.redirect <> io.redirect fuBlock.io.redirect <> io.redirect
fuBlock.io.flush <> io.flush fuBlock.io.flush <> io.flush
fuBlock.io.writeback <> io.fuWriteback fuBlock.io.writeback <> io.fuWriteback
fuBlock.io.extra <> io.fuExtra fuBlock.io.extra <> io.fuExtra
val flattenFuConfigs = fuConfigs.flatMap(c => Seq.fill(c._2)(c._1))
require(flattenFuConfigs.length == fuBlock.io.writeback.length)
// Timing priority: RegNext(rs.fastUopOut) > fu.writeback > arbiter.out(--> io.rfWriteback --> rs.writeback)
// Filter condition: allWakeupFromRS > hasExclusiveWbPort > None
// The higher priority, the better timing.
// (1) When function units have exclusive writeback ports, their wakeup ports for
// reservation stations can be connected directly from function units' writeback ports.
// Special case: when the function unit has fastUopOut, valid and uop should be RegNext.
val exclusiveFuWb = flattenFuConfigs.zip(fuBlock.io.writeback).filter(_._1.hasExclusiveWbPort)
val exclusiveRfWbIdx = fuConfigs.map(_._1).filter(_.hasExclusiveWbPort).flatMap(cfg => outer.getWbIndex(cfg))
require(exclusiveFuWb.length == exclusiveRfWbIdx.length, s"${exclusiveFuWb.length} != ${exclusiveRfWbIdx.length}")
for ((i, (cfg, wb)) <- exclusiveRfWbIdx.zip(exclusiveFuWb)) {
val scheWb = scheduler.io.writeback(i)
scheWb.valid := wb.valid
scheWb.bits := wb.bits
if (cfg.hasFastUopOut) {
val isFlushed = wb.bits.uop.roqIdx.needFlush(io.redirect, io.flush)
scheWb.valid := RegNext(wb.valid && !isFlushed)
scheWb.bits.uop := RegNext(wb.bits.uop)
}
println(s"scheduler.writeback($i) is connected from exu ${cfg.name}")
val outerWb = io.rfWriteback(i)
val hasWb = outerWb.valid || scheWb.valid
XSError(hasWb && outerWb.bits.uop.roqIdx =/= scheWb.bits.uop.roqIdx, "different instruction\n")
XSError(hasWb && outerWb.bits.data =/= scheWb.bits.data, "different data\n")
}
// (2) If the reservation station has fastUopOut for all instructions in this exu,
// we should replace io.fuWriteback with RegNext(fastUopOut).
// In this case, the corresponding execution units must have exclusive writeback ports,
// unless it's impossible that rs can ensure the instruction is able to write the regfile.
val allWakeupFromRs = flattenFuConfigs.zipWithIndex.filter(_._1.allWakeupFromRS)
for ((cfg, i) <- allWakeupFromRs) {
val wbOut = io.fuWriteback(i)
val fastWakeup = scheduler.io.fastUopOut(i)
val isFlushed = fastWakeup.bits.roqIdx.needFlush(io.redirect, io.flush)
wbOut.valid := RegNext(fastWakeup.valid && !isFlushed)
wbOut.bits.uop := RegNext(fastWakeup.bits)
println(s"writeback from exu $i is replaced by RegNext(rs.fastUopOut)")
XSError(wbOut.valid && !wbOut.ready, "fast uop wb should not be blocked\n")
require(cfg.hasExclusiveWbPort, "it's impossible to have allWakeupFromRs if it doesn't have exclusive rf ports")
val fuWb = fuBlock.io.writeback(i)
XSError((wbOut.valid || fuWb.valid) && wbOut.bits.uop.roqIdx =/= fuWb.bits.uop.roqIdx, "different instruction\n")
XSError((wbOut.valid || fuWb.valid) && wbOut.bits.data =/= fuWb.bits.data, "different data\n")
}
// (3) If the reservation station has fastUopOut for all instructions in this exu,
// we should replace io.rfWriteback (rs.writeback) with RegNext(rs.wakeupOut).
val allWakeFromRsCfgs = fuConfigs.map(_._1).filter(_.allWakeupFromRS)
for (cfg <- allWakeFromRsCfgs) {
val wakeupIdx = flattenFuConfigs.zipWithIndex.filter(_._1 == cfg).map(_._2)
val wbIdx = outer.getWbIndex(cfg)
require(wakeupIdx.length == wbIdx.length)
for ((i, j) <- wakeupIdx.zip(wbIdx)) {
val scheWb = scheduler.io.writeback(j)
val isFlushed = scheduler.io.fastUopOut(i).bits.roqIdx.needFlush(io.redirect, io.flush)
scheWb.valid := RegNext(scheduler.io.fastUopOut(i).valid && !isFlushed)
scheWb.bits.uop := RegNext(scheduler.io.fastUopOut(i).bits)
}
}
} }
...@@ -84,7 +84,9 @@ case class ExuConfig ...@@ -84,7 +84,9 @@ case class ExuConfig
val hasCertainLatency = if (name == "MulDivExeUnit") true else latency.latencyVal.nonEmpty val hasCertainLatency = if (name == "MulDivExeUnit") true else latency.latencyVal.nonEmpty
val hasUncertainlatency = if (name == "MulDivExeUnit") true else latency.latencyVal.isEmpty val hasUncertainlatency = if (name == "MulDivExeUnit") true else latency.latencyVal.isEmpty
val wakeupFromRS = hasCertainLatency && (wbIntPriority <= 1 || wbFpPriority <= 1) val wakeupFromRS = hasCertainLatency && (wbIntPriority <= 1 || wbFpPriority <= 1)
val allWakeupFromRS = !hasUncertainlatency && (wbIntPriority <= 1 || wbFpPriority <= 1)
val wakeupFromExu = !wakeupFromRS val wakeupFromExu = !wakeupFromRS
val hasExclusiveWbPort = (wbIntPriority == 0 && writeIntRf) || (wbFpPriority == 0 && writeFpRf)
def canAccept(fuType: UInt): Bool = { def canAccept(fuType: UInt): Bool = {
Cat(fuConfigs.map(_.fuType === fuType)).orR() Cat(fuConfigs.map(_.fuType === fuType)).orR()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册