提交 99b8dc2c 编写于 作者: Y Yinan Xu

rename: don't bypass preg and leave it to dispatch1

Rename now provides vectors indicating whether there're matches between lsrc1/lsrc2/lsrc3/ldest
and previous instructions' ldest. Dispatch1 updates uops' psrc1/psrc2/psrc3/old_pdest with
previous instructions pdest. This method optimizes rename' timing.
上级 009bc171
......@@ -103,6 +103,7 @@ class CtrlBlock extends XSModule {
rename.io.redirect <> redirect
rename.io.roqCommits <> roq.io.commits
rename.io.out <> dispatch.io.fromRename
rename.io.renameBypass <> dispatch.io.renameBypass
dispatch.io.redirect <> redirect
dispatch.io.enqRoq <> roq.io.enq
......
......@@ -7,6 +7,7 @@ import utils._
import xiangshan.backend.regfile.RfReadPort
import chisel3.ExcitingUtils._
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.rename.RenameBypassInfo
case class DispatchParameters
(
......@@ -28,6 +29,7 @@ class Dispatch extends XSModule {
val redirect = Flipped(ValidIO(new Redirect))
// from rename
val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val renameBypass = Input(new RenameBypassInfo)
// enq Roq
val enqRoq = new Bundle {
val canAccept = Input(Bool())
......@@ -72,6 +74,7 @@ class Dispatch extends XSModule {
// dispatch 1: accept uops from rename and dispatch them to the three dispatch queues
dispatch1.io.redirect <> io.redirect
dispatch1.io.renameBypass := RegEnable(io.renameBypass, io.fromRename(0).valid && dispatch1.io.fromRename(0).ready)
dispatch1.io.enqRoq <> io.enqRoq
dispatch1.io.enqLsq <> io.enqLsq
dispatch1.io.toIntDqReady <> intDq.io.enqReady
......
......@@ -6,6 +6,7 @@ import chisel3.ExcitingUtils._
import xiangshan._
import utils.{XSDebug, XSError, XSInfo}
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.rename.RenameBypassInfo
// read rob and enqueue
class Dispatch1 extends XSModule {
......@@ -13,6 +14,7 @@ class Dispatch1 extends XSModule {
val redirect = Flipped(ValidIO(new Redirect))
// from rename
val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val renameBypass = Input(new RenameBypassInfo)
val recv = Output(Vec(RenameWidth, Bool()))
// enq Roq
val enqRoq = new Bundle {
......@@ -38,6 +40,8 @@ class Dispatch1 extends XSModule {
val toLsDqReady = Input(Bool())
val toLsDq = Vec(dpParams.DqEnqWidth, ValidIO(new MicroOp))
})
/**
* Part 1: choose the target dispatch queue and the corresponding write ports
*/
......@@ -67,8 +71,58 @@ class Dispatch1 extends XSModule {
ExcitingUtils.addSource(!dispatchNotEmpty, "perfCntCondDp1Empty", Perf)
}
/**
* Part 2:
* Update commitType, psrc1, psrc2, psrc3, old_pdest for the uops
*/
val updatedUop = Wire(Vec(RenameWidth, new MicroOp))
val updatedCommitType = Wire(Vec(RenameWidth, CommitType()))
val updatedPsrc1 = Wire(Vec(RenameWidth, UInt(PhyRegIdxWidth.W)))
val updatedPsrc2 = Wire(Vec(RenameWidth, UInt(PhyRegIdxWidth.W)))
val updatedPsrc3 = Wire(Vec(RenameWidth, UInt(PhyRegIdxWidth.W)))
val updatedOldPdest = Wire(Vec(RenameWidth, UInt(PhyRegIdxWidth.W)))
for (i <- 0 until RenameWidth) {
updatedCommitType(i) := Cat(isLs(i), isStore(i) | isFp(i))
updatedPsrc1(i) := io.fromRename.take(i).map(_.bits.pdest)
.zip(if (i == 0) Seq() else io.renameBypass.lsrc1_bypass(i-1).asBools)
.foldLeft(io.fromRename(i).bits.psrc1) {
(z, next) => Mux(next._2, next._1, z)
}
updatedPsrc2(i) := io.fromRename.take(i).map(_.bits.pdest)
.zip(if (i == 0) Seq() else io.renameBypass.lsrc2_bypass(i-1).asBools)
.foldLeft(io.fromRename(i).bits.psrc2) {
(z, next) => Mux(next._2, next._1, z)
}
updatedPsrc3(i) := io.fromRename.take(i).map(_.bits.pdest)
.zip(if (i == 0) Seq() else io.renameBypass.lsrc3_bypass(i-1).asBools)
.foldLeft(io.fromRename(i).bits.psrc3) {
(z, next) => Mux(next._2, next._1, z)
}
updatedOldPdest(i) := io.fromRename.take(i).map(_.bits.pdest)
.zip(if (i == 0) Seq() else io.renameBypass.ldest_bypass(i-1).asBools)
.foldLeft(io.fromRename(i).bits.old_pdest) {
(z, next) => Mux(next._2, next._1, z)
}
updatedUop(i) := io.fromRename(i).bits
// update bypass psrc1/psrc2/psrc3/old_pdest
updatedUop(i).psrc1 := updatedPsrc1(i)
updatedUop(i).psrc2 := updatedPsrc2(i)
updatedUop(i).psrc3 := updatedPsrc3(i)
updatedUop(i).old_pdest := updatedOldPdest(i)
XSError(updatedUop(i).psrc1 =/= io.fromRename(i).bits.psrc1, "psrc1 bypass not working correctly\n")
XSError(updatedUop(i).psrc2 =/= io.fromRename(i).bits.psrc2, "psrc2 bypass not working correctly\n")
XSError(updatedUop(i).psrc3 =/= io.fromRename(i).bits.psrc3, "psrc3 bypass not working correctly\n")
XSError(updatedUop(i).old_pdest =/= io.fromRename(i).bits.old_pdest, "old_pdest bypass not working correctly\n")
// update commitType
updatedUop(i).ctrl.commitType := updatedCommitType(i)
}
/**
* Part 3:
* acquire ROQ (all), LSQ (load/store only) and dispatch queue slots
* only set valid when all of them provides enough entries
*/
......@@ -107,51 +161,53 @@ class Dispatch1 extends XSModule {
// (2) previous instructions are ready
val thisCanActualOut = (0 until RenameWidth).map(i => allResourceReady && thisCanOut(i) && notBlockedByPrevious(i))
val uopWithIndex = Wire(Vec(RenameWidth, new MicroOp))
// input for ROQ and LSQ
// note that LSQ needs roqIdx
for (i <- 0 until RenameWidth) {
// input for ROQ and LSQ
val commitType = Cat(isLs(i), isStore(i) | isFp(i))
io.enqRoq.extraWalk(i) := io.fromRename(i).valid && !thisCanActualOut(i)
io.enqRoq.req(i).valid := io.fromRename(i).valid && thisCanActualOut(i)
io.enqRoq.req(i).bits := io.fromRename(i).bits
io.enqRoq.req(i).bits.ctrl.commitType := commitType
io.enqRoq.req(i).bits := updatedUop(i)
val shouldEnqLsq = isLs(i) && io.fromRename(i).bits.ctrl.fuType =/= FuType.mou
io.enqLsq.req(i).valid := io.fromRename(i).valid && shouldEnqLsq && !redirectValid && thisCanActualOut(i)
io.enqLsq.req(i).bits := io.fromRename(i).bits
io.enqLsq.req(i).bits.ctrl.commitType := commitType
io.enqLsq.req(i).bits := updatedUop(i)
io.enqLsq.req(i).bits.roqIdx := io.enqRoq.resp(i)
// append ROQ and LSQ indexed to uop
uopWithIndex(i) := io.fromRename(i).bits
uopWithIndex(i).roqIdx := io.enqRoq.resp(i)
uopWithIndex(i).lqIdx := io.enqLsq.resp(i).lqIdx
uopWithIndex(i).sqIdx := io.enqLsq.resp(i).sqIdx
XSDebug(io.enqLsq.req(i).valid,
p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} receives lq ${io.enqLsq.resp(i).lqIdx} sq ${io.enqLsq.resp(i).sqIdx}\n")
XSDebug(io.enqRoq.req(i).valid, p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} receives nroq ${io.enqRoq.resp(i)}\n")
}
/**
* Part 4:
* append ROQ and LSQ indexed to uop, and send them to dispatch queue
*/
val updateUopWithIndex = Wire(Vec(RenameWidth, new MicroOp))
for (i <- 0 until RenameWidth) {
updateUopWithIndex(i) := updatedUop(i)
updateUopWithIndex(i).roqIdx := io.enqRoq.resp(i)
updateUopWithIndex(i).lqIdx := io.enqLsq.resp(i).lqIdx
updateUopWithIndex(i).sqIdx := io.enqLsq.resp(i).sqIdx
}
// send uops with correct indexes to dispatch queues
// Note that if one of their previous instructions cannot enqueue, they should not enter dispatch queue.
// We use notBlockedByPrevious here since mapping(i).valid implies there's a valid instruction that can enqueue,
// thus we don't need to check thisCanOut.
for (i <- 0 until dpParams.DqEnqWidth) {
io.toIntDq(i).bits := uopWithIndex(intIndex.io.mapping(i).bits)
io.toIntDq(i).bits := updateUopWithIndex(intIndex.io.mapping(i).bits)
io.toIntDq(i).valid := intIndex.io.mapping(i).valid && allResourceReady &&
!thisIsBlocked(intIndex.io.mapping(i).bits) && notBlockedByPrevious(intIndex.io.mapping(i).bits)
// NOTE: floating point instructions are not noSpecExec currently
// remove commit /**/ when fp instructions are possible to be noSpecExec
io.toFpDq(i).bits := uopWithIndex(fpIndex.io.mapping(i).bits)
io.toFpDq(i).bits := updateUopWithIndex(fpIndex.io.mapping(i).bits)
io.toFpDq(i).valid := fpIndex.io.mapping(i).valid && allResourceReady &&
/*!thisIsBlocked(fpIndex.io.mapping(i).bits) && */notBlockedByPrevious(fpIndex.io.mapping(i).bits)
io.toLsDq(i).bits := uopWithIndex(lsIndex.io.mapping(i).bits)
io.toLsDq(i).bits := updateUopWithIndex(lsIndex.io.mapping(i).bits)
io.toLsDq(i).valid := lsIndex.io.mapping(i).valid && allResourceReady &&
!thisIsBlocked(lsIndex.io.mapping(i).bits) && notBlockedByPrevious(lsIndex.io.mapping(i).bits)
......@@ -170,7 +226,7 @@ class Dispatch1 extends XSModule {
XSInfo(io.recv(i),
p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)}, type(${isInt(i)}, ${isFp(i)}, ${isLs(i)}), " +
p"roq ${uopWithIndex(i).roqIdx}, lq ${uopWithIndex(i).lqIdx}, sq ${uopWithIndex(i).sqIdx}, " +
p"roq ${updateUopWithIndex(i).roqIdx}, lq ${updateUopWithIndex(i).lqIdx}, sq ${updateUopWithIndex(i).sqIdx}, " +
p"(${intIndex.io.reverseMapping(i).bits}, ${fpIndex.io.reverseMapping(i).bits}, ${lsIndex.io.reverseMapping(i).bits})\n"
)
......
......@@ -5,6 +5,13 @@ import chisel3.util._
import xiangshan._
import utils.XSInfo
class RenameBypassInfo extends XSBundle {
val lsrc1_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
val lsrc2_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
val lsrc3_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
val ldest_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
}
class Rename extends XSModule {
val io = IO(new Bundle() {
val redirect = Flipped(ValidIO(new Redirect))
......@@ -13,6 +20,7 @@ class Rename extends XSModule {
val in = Vec(RenameWidth, Flipped(DecoupledIO(new CfCtrl)))
// to dispatch1
val out = Vec(RenameWidth, DecoupledIO(new MicroOp))
val renameBypass = Output(new RenameBypassInfo)
})
def printRenameInfo(in: DecoupledIO[CfCtrl], out: DecoupledIO[MicroOp]) = {
......@@ -61,6 +69,8 @@ class Rename extends XSModule {
uop.sqIdx := DontCare
})
val needFpDest = Wire(Vec(RenameWidth, Bool()))
val needIntDest = Wire(Vec(RenameWidth, Bool()))
var lastReady = WireInit(io.out(0).ready)
// debug assert
val outRdy = Cat(io.out.map(_.ready))
......@@ -73,17 +83,17 @@ class Rename extends XSModule {
val inValid = io.in(i).valid
// alloc a new phy reg
val needFpDest = inValid && needDestReg(fp = true, io.in(i).bits)
val needIntDest = inValid && needDestReg(fp = false, io.in(i).bits)
fpFreeList.allocReqs(i) := needFpDest && lastReady
intFreeList.allocReqs(i) := needIntDest && lastReady
needFpDest(i) := inValid && needDestReg(fp = true, io.in(i).bits)
needIntDest(i) := inValid && needDestReg(fp = false, io.in(i).bits)
fpFreeList.allocReqs(i) := needFpDest(i) && lastReady
intFreeList.allocReqs(i) := needIntDest(i) && lastReady
val fpCanAlloc = fpFreeList.canAlloc(i)
val intCanAlloc = intFreeList.canAlloc(i)
val this_can_alloc = Mux(
needIntDest,
needIntDest(i),
intCanAlloc,
Mux(
needFpDest,
needFpDest(i),
fpCanAlloc,
true.B
)
......@@ -98,7 +108,7 @@ class Rename extends XSModule {
lastReady = io.in(i).ready
uops(i).pdest := Mux(needIntDest,
uops(i).pdest := Mux(needIntDest(i),
intFreeList.pdests(i),
Mux(
uops(i).ctrl.ldest===0.U && uops(i).ctrl.rfWen,
......@@ -173,6 +183,28 @@ class Rename extends XSModule {
uops(i).old_pdest := Mux(uops(i).ctrl.rfWen, intOldPdest, fpOldPdest)
}
// We don't bypass the old_pdest from valid instructions with the same ldest currently in rename stage.
// Instead, we determine whether there're some dependences between the valid instructions.
for (i <- 1 until RenameWidth) {
io.renameBypass.lsrc1_bypass(i-1) := Cat((0 until i).map(j => {
val fpMatch = needFpDest(j) && io.in(i).bits.ctrl.src1Type === SrcType.fp
val intMatch = needIntDest(j) && io.in(i).bits.ctrl.src1Type === SrcType.reg
(fpMatch || intMatch) && io.in(j).bits.ctrl.ldest === io.in(i).bits.ctrl.lsrc1
}).reverse)
io.renameBypass.lsrc2_bypass(i-1) := Cat((0 until i).map(j => {
val fpMatch = needFpDest(j) && io.in(i).bits.ctrl.src2Type === SrcType.fp
val intMatch = needIntDest(j) && io.in(i).bits.ctrl.src2Type === SrcType.reg
(fpMatch || intMatch) && io.in(j).bits.ctrl.ldest === io.in(i).bits.ctrl.lsrc2
}).reverse)
io.renameBypass.lsrc3_bypass(i-1) := Cat((0 until i).map(j => {
val fpMatch = needFpDest(j) && io.in(i).bits.ctrl.src3Type === SrcType.fp
val intMatch = needIntDest(j) && io.in(i).bits.ctrl.src3Type === SrcType.reg
(fpMatch || intMatch) && io.in(j).bits.ctrl.ldest === io.in(i).bits.ctrl.lsrc3
}).reverse)
io.renameBypass.ldest_bypass(i-1) := Cat((0 until i).map(j => {
val fpMatch = needFpDest(j) && needFpDest(i)
val intMatch = needIntDest(j) && needIntDest(i)
(fpMatch || intMatch) && io.in(j).bits.ctrl.ldest === io.in(i).bits.ctrl.ldest
}).reverse)
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册