未验证 提交 f973ab00 编写于 作者: Y Yinan Xu 提交者: GitHub

dispatch2Rs: load balance between two ports (#1110)

This commit adds load balance support for two dispatch ports, between 0
and 2, 1 and 3, etc.
上级 73be64b3
......@@ -344,7 +344,9 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
io.dispatch <> intDq.io.deq ++ lsDq.io.deq ++ fpDq.io.deq
val jumpInst = io.dispatch(0).bits
val pingpong = RegInit(false.B)
pingpong := !pingpong
val jumpInst = Mux(pingpong && (exuParameters.AluCnt > 2).B, io.dispatch(2).bits, io.dispatch(0).bits)
val jumpPcRead = io.frontend.fromFtq.getJumpPcRead
io.jumpPc := jumpPcRead(jumpInst.cf.ftqPtr, jumpInst.cf.ftqOffset)
val jumpTargetRead = io.frontend.fromFtq.target_read
......
......@@ -98,29 +98,66 @@ class Dispatch2RsLessExuImp(outer: Dispatch2Rs)(implicit p: Parameters) extends
io.readFpState.get.map(_.req).zip(req).foreach(x => x._1 := x._2)
}
val enableLoadBalance = outer.numOut > 2
val numPingPongBits = outer.numOut / 2
val pingpong = Seq.fill(numPingPongBits)(RegInit(false.B))
pingpong.foreach(p => p := !p)
val pairIndex = (0 until outer.numOut).map(i => (i + 2) % outer.numOut)
def needLoadBalance(index: Int): Bool = {
val bitIndex = Seq(index, pairIndex(index), numPingPongBits - 1).min
// When ping pong bit is set, use pairIndex
if (enableLoadBalance) pingpong(bitIndex) && (index != pairIndex(index)).B else false.B
}
// out is directly connected from in for better timing
// TODO: select critical instruction first
val maxExuConfig = outer.exuConfigCases.last._1
val numMaxExuConfig = outer.exuConfigCases.last._1.length
for ((config, i) <- outer.configs.zipWithIndex) {
io.out(i) <> io.in(i)
// When the corresponding execution units do not have full functionalities,
// we have to filter out the instructions that these execution units does not accept.
if (config.length < maxExuConfig.length) {
if (config.length < numMaxExuConfig) {
val thisCanAccept = config.map(_.canAccept(io.in(i).bits.ctrl.fuType)).reduce(_ || _)
io.out(i).valid := io.in(i).valid && thisCanAccept
io.in(i).ready := io.out(i).ready && thisCanAccept
}
}
// For load balance, the out port alternates between different in ports
// It must be another for loop because the former for loop does not have any condition
// and will override the assignments.
for ((config, i) <- outer.configs.zipWithIndex) {
when (needLoadBalance(i)) {
io.out(i) <> io.in(pairIndex(i))
if (config.length < numMaxExuConfig) {
val thisCanAccept = config.map(_.canAccept(io.in(pairIndex(i)).bits.ctrl.fuType)).reduce(_ || _)
io.out(i).valid := io.in(pairIndex(i)).valid && thisCanAccept
io.in(pairIndex(i)).ready := io.out(i).ready && thisCanAccept
}
println(s"Dispatch2Rs ports balance between $i and ${pairIndex(i)}")
}
}
// srcState is read from outside and connected directly
if (io.readIntState.isDefined) {
val intSrcStateVec = io.out.flatMap(_.bits.srcState.take(numIntSrc))
io.readIntState.get.map(_.resp).zip(intSrcStateVec).foreach(x => x._2 := x._1)
for (i <- 0 until outer.numOut) {
val pairState = io.readIntState.get.slice(numIntSrc * pairIndex(i), numIntSrc * pairIndex(i) + numIntSrc)
when (needLoadBalance(i)) {
pairState.map(_.resp).zip(io.out(i).bits.srcState.take(numIntSrc)).foreach(x => x._2 := x._1)
}
}
}
if (io.readFpState.isDefined) {
require(io.readIntState.isEmpty, "we do not implement int+fp in isLessExu")
val fpSrcStateVec = io.out.flatMap(_.bits.srcState.take(numFpSrc))
io.readFpState.get.map(_.resp).zip(fpSrcStateVec).foreach(x => x._2 := x._1)
for (i <- 0 until outer.numOut) {
val pairState = io.readFpState.get.slice(numFpSrc * pairIndex(i), numFpSrc * pairIndex(i) + numFpSrc)
when (needLoadBalance(i)) {
pairState.map(_.resp).zip(io.out(i).bits.srcState.take(numFpSrc)).foreach(x => x._2 := x._1)
}
}
}
// If io.out is wider than io.in, we need to set io.in.ready to false.B.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册