From 99b8dc2c6b96e882df4a4e3816e96177ff4ebf3c Mon Sep 17 00:00:00 2001 From: Yinan Xu Date: Mon, 30 Nov 2020 20:00:09 +0800 Subject: [PATCH] rename: don't bypass preg and leave it to dispatch1 Rename now provides vectors indicating whether there're matches between lsrc1/lsrc2/lsrc3/ldest and previous instructions' ldest. Dispatch1 updates uops' psrc1/psrc2/psrc3/old_pdest with previous instructions pdest. This method optimizes rename' timing. --- .../scala/xiangshan/backend/CtrlBlock.scala | 1 + .../xiangshan/backend/dispatch/Dispatch.scala | 3 + .../backend/dispatch/Dispatch1.scala | 94 +++++++++++++++---- .../xiangshan/backend/rename/Rename.scala | 50 ++++++++-- 4 files changed, 120 insertions(+), 28 deletions(-) diff --git a/src/main/scala/xiangshan/backend/CtrlBlock.scala b/src/main/scala/xiangshan/backend/CtrlBlock.scala index 5c68ee944..e4befbd76 100644 --- a/src/main/scala/xiangshan/backend/CtrlBlock.scala +++ b/src/main/scala/xiangshan/backend/CtrlBlock.scala @@ -103,6 +103,7 @@ class CtrlBlock extends XSModule { rename.io.redirect <> redirect rename.io.roqCommits <> roq.io.commits rename.io.out <> dispatch.io.fromRename + rename.io.renameBypass <> dispatch.io.renameBypass dispatch.io.redirect <> redirect dispatch.io.enqRoq <> roq.io.enq diff --git a/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala b/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala index 23b6b05f7..81ec93929 100644 --- a/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala +++ b/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala @@ -7,6 +7,7 @@ import utils._ import xiangshan.backend.regfile.RfReadPort import chisel3.ExcitingUtils._ import xiangshan.backend.roq.RoqPtr +import xiangshan.backend.rename.RenameBypassInfo case class DispatchParameters ( @@ -28,6 +29,7 @@ class Dispatch extends XSModule { val redirect = Flipped(ValidIO(new Redirect)) // from rename val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp))) + val renameBypass = Input(new RenameBypassInfo) // enq Roq val enqRoq = new Bundle { val canAccept = Input(Bool()) @@ -72,6 +74,7 @@ class Dispatch extends XSModule { // dispatch 1: accept uops from rename and dispatch them to the three dispatch queues dispatch1.io.redirect <> io.redirect + dispatch1.io.renameBypass := RegEnable(io.renameBypass, io.fromRename(0).valid && dispatch1.io.fromRename(0).ready) dispatch1.io.enqRoq <> io.enqRoq dispatch1.io.enqLsq <> io.enqLsq dispatch1.io.toIntDqReady <> intDq.io.enqReady diff --git a/src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala b/src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala index 40dd8d921..f94b95886 100644 --- a/src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala +++ b/src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala @@ -6,6 +6,7 @@ import chisel3.ExcitingUtils._ import xiangshan._ import utils.{XSDebug, XSError, XSInfo} import xiangshan.backend.roq.RoqPtr +import xiangshan.backend.rename.RenameBypassInfo // read rob and enqueue class Dispatch1 extends XSModule { @@ -13,6 +14,7 @@ class Dispatch1 extends XSModule { val redirect = Flipped(ValidIO(new Redirect)) // from rename val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp))) + val renameBypass = Input(new RenameBypassInfo) val recv = Output(Vec(RenameWidth, Bool())) // enq Roq val enqRoq = new Bundle { @@ -38,6 +40,8 @@ class Dispatch1 extends XSModule { val toLsDqReady = Input(Bool()) val toLsDq = Vec(dpParams.DqEnqWidth, ValidIO(new MicroOp)) }) + + /** * Part 1: choose the target dispatch queue and the corresponding write ports */ @@ -67,8 +71,58 @@ class Dispatch1 extends XSModule { ExcitingUtils.addSource(!dispatchNotEmpty, "perfCntCondDp1Empty", Perf) } + /** * Part 2: + * Update commitType, psrc1, psrc2, psrc3, old_pdest for the uops + */ + val updatedUop = Wire(Vec(RenameWidth, new MicroOp)) + val updatedCommitType = Wire(Vec(RenameWidth, CommitType())) + val updatedPsrc1 = Wire(Vec(RenameWidth, UInt(PhyRegIdxWidth.W))) + val updatedPsrc2 = Wire(Vec(RenameWidth, UInt(PhyRegIdxWidth.W))) + val updatedPsrc3 = Wire(Vec(RenameWidth, UInt(PhyRegIdxWidth.W))) + val updatedOldPdest = Wire(Vec(RenameWidth, UInt(PhyRegIdxWidth.W))) + + for (i <- 0 until RenameWidth) { + updatedCommitType(i) := Cat(isLs(i), isStore(i) | isFp(i)) + updatedPsrc1(i) := io.fromRename.take(i).map(_.bits.pdest) + .zip(if (i == 0) Seq() else io.renameBypass.lsrc1_bypass(i-1).asBools) + .foldLeft(io.fromRename(i).bits.psrc1) { + (z, next) => Mux(next._2, next._1, z) + } + updatedPsrc2(i) := io.fromRename.take(i).map(_.bits.pdest) + .zip(if (i == 0) Seq() else io.renameBypass.lsrc2_bypass(i-1).asBools) + .foldLeft(io.fromRename(i).bits.psrc2) { + (z, next) => Mux(next._2, next._1, z) + } + updatedPsrc3(i) := io.fromRename.take(i).map(_.bits.pdest) + .zip(if (i == 0) Seq() else io.renameBypass.lsrc3_bypass(i-1).asBools) + .foldLeft(io.fromRename(i).bits.psrc3) { + (z, next) => Mux(next._2, next._1, z) + } + updatedOldPdest(i) := io.fromRename.take(i).map(_.bits.pdest) + .zip(if (i == 0) Seq() else io.renameBypass.ldest_bypass(i-1).asBools) + .foldLeft(io.fromRename(i).bits.old_pdest) { + (z, next) => Mux(next._2, next._1, z) + } + + updatedUop(i) := io.fromRename(i).bits + // update bypass psrc1/psrc2/psrc3/old_pdest + updatedUop(i).psrc1 := updatedPsrc1(i) + updatedUop(i).psrc2 := updatedPsrc2(i) + updatedUop(i).psrc3 := updatedPsrc3(i) + updatedUop(i).old_pdest := updatedOldPdest(i) + XSError(updatedUop(i).psrc1 =/= io.fromRename(i).bits.psrc1, "psrc1 bypass not working correctly\n") + XSError(updatedUop(i).psrc2 =/= io.fromRename(i).bits.psrc2, "psrc2 bypass not working correctly\n") + XSError(updatedUop(i).psrc3 =/= io.fromRename(i).bits.psrc3, "psrc3 bypass not working correctly\n") + XSError(updatedUop(i).old_pdest =/= io.fromRename(i).bits.old_pdest, "old_pdest bypass not working correctly\n") + // update commitType + updatedUop(i).ctrl.commitType := updatedCommitType(i) + } + + + /** + * Part 3: * acquire ROQ (all), LSQ (load/store only) and dispatch queue slots * only set valid when all of them provides enough entries */ @@ -107,51 +161,53 @@ class Dispatch1 extends XSModule { // (2) previous instructions are ready val thisCanActualOut = (0 until RenameWidth).map(i => allResourceReady && thisCanOut(i) && notBlockedByPrevious(i)) - val uopWithIndex = Wire(Vec(RenameWidth, new MicroOp)) - + // input for ROQ and LSQ + // note that LSQ needs roqIdx for (i <- 0 until RenameWidth) { - // input for ROQ and LSQ - val commitType = Cat(isLs(i), isStore(i) | isFp(i)) - io.enqRoq.extraWalk(i) := io.fromRename(i).valid && !thisCanActualOut(i) io.enqRoq.req(i).valid := io.fromRename(i).valid && thisCanActualOut(i) - io.enqRoq.req(i).bits := io.fromRename(i).bits - io.enqRoq.req(i).bits.ctrl.commitType := commitType + io.enqRoq.req(i).bits := updatedUop(i) val shouldEnqLsq = isLs(i) && io.fromRename(i).bits.ctrl.fuType =/= FuType.mou io.enqLsq.req(i).valid := io.fromRename(i).valid && shouldEnqLsq && !redirectValid && thisCanActualOut(i) - io.enqLsq.req(i).bits := io.fromRename(i).bits - io.enqLsq.req(i).bits.ctrl.commitType := commitType + io.enqLsq.req(i).bits := updatedUop(i) io.enqLsq.req(i).bits.roqIdx := io.enqRoq.resp(i) - // append ROQ and LSQ indexed to uop - uopWithIndex(i) := io.fromRename(i).bits - uopWithIndex(i).roqIdx := io.enqRoq.resp(i) - uopWithIndex(i).lqIdx := io.enqLsq.resp(i).lqIdx - uopWithIndex(i).sqIdx := io.enqLsq.resp(i).sqIdx - XSDebug(io.enqLsq.req(i).valid, p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} receives lq ${io.enqLsq.resp(i).lqIdx} sq ${io.enqLsq.resp(i).sqIdx}\n") XSDebug(io.enqRoq.req(i).valid, p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} receives nroq ${io.enqRoq.resp(i)}\n") } + + /** + * Part 4: + * append ROQ and LSQ indexed to uop, and send them to dispatch queue + */ + val updateUopWithIndex = Wire(Vec(RenameWidth, new MicroOp)) + for (i <- 0 until RenameWidth) { + updateUopWithIndex(i) := updatedUop(i) + updateUopWithIndex(i).roqIdx := io.enqRoq.resp(i) + updateUopWithIndex(i).lqIdx := io.enqLsq.resp(i).lqIdx + updateUopWithIndex(i).sqIdx := io.enqLsq.resp(i).sqIdx + } + // send uops with correct indexes to dispatch queues // Note that if one of their previous instructions cannot enqueue, they should not enter dispatch queue. // We use notBlockedByPrevious here since mapping(i).valid implies there's a valid instruction that can enqueue, // thus we don't need to check thisCanOut. for (i <- 0 until dpParams.DqEnqWidth) { - io.toIntDq(i).bits := uopWithIndex(intIndex.io.mapping(i).bits) + io.toIntDq(i).bits := updateUopWithIndex(intIndex.io.mapping(i).bits) io.toIntDq(i).valid := intIndex.io.mapping(i).valid && allResourceReady && !thisIsBlocked(intIndex.io.mapping(i).bits) && notBlockedByPrevious(intIndex.io.mapping(i).bits) // NOTE: floating point instructions are not noSpecExec currently // remove commit /**/ when fp instructions are possible to be noSpecExec - io.toFpDq(i).bits := uopWithIndex(fpIndex.io.mapping(i).bits) + io.toFpDq(i).bits := updateUopWithIndex(fpIndex.io.mapping(i).bits) io.toFpDq(i).valid := fpIndex.io.mapping(i).valid && allResourceReady && /*!thisIsBlocked(fpIndex.io.mapping(i).bits) && */notBlockedByPrevious(fpIndex.io.mapping(i).bits) - io.toLsDq(i).bits := uopWithIndex(lsIndex.io.mapping(i).bits) + io.toLsDq(i).bits := updateUopWithIndex(lsIndex.io.mapping(i).bits) io.toLsDq(i).valid := lsIndex.io.mapping(i).valid && allResourceReady && !thisIsBlocked(lsIndex.io.mapping(i).bits) && notBlockedByPrevious(lsIndex.io.mapping(i).bits) @@ -170,7 +226,7 @@ class Dispatch1 extends XSModule { XSInfo(io.recv(i), p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)}, type(${isInt(i)}, ${isFp(i)}, ${isLs(i)}), " + - p"roq ${uopWithIndex(i).roqIdx}, lq ${uopWithIndex(i).lqIdx}, sq ${uopWithIndex(i).sqIdx}, " + + p"roq ${updateUopWithIndex(i).roqIdx}, lq ${updateUopWithIndex(i).lqIdx}, sq ${updateUopWithIndex(i).sqIdx}, " + p"(${intIndex.io.reverseMapping(i).bits}, ${fpIndex.io.reverseMapping(i).bits}, ${lsIndex.io.reverseMapping(i).bits})\n" ) diff --git a/src/main/scala/xiangshan/backend/rename/Rename.scala b/src/main/scala/xiangshan/backend/rename/Rename.scala index 67cf93ced..806fd2f88 100644 --- a/src/main/scala/xiangshan/backend/rename/Rename.scala +++ b/src/main/scala/xiangshan/backend/rename/Rename.scala @@ -5,6 +5,13 @@ import chisel3.util._ import xiangshan._ import utils.XSInfo +class RenameBypassInfo extends XSBundle { + val lsrc1_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W))) + val lsrc2_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W))) + val lsrc3_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W))) + val ldest_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W))) +} + class Rename extends XSModule { val io = IO(new Bundle() { val redirect = Flipped(ValidIO(new Redirect)) @@ -13,6 +20,7 @@ class Rename extends XSModule { val in = Vec(RenameWidth, Flipped(DecoupledIO(new CfCtrl))) // to dispatch1 val out = Vec(RenameWidth, DecoupledIO(new MicroOp)) + val renameBypass = Output(new RenameBypassInfo) }) def printRenameInfo(in: DecoupledIO[CfCtrl], out: DecoupledIO[MicroOp]) = { @@ -61,6 +69,8 @@ class Rename extends XSModule { uop.sqIdx := DontCare }) + val needFpDest = Wire(Vec(RenameWidth, Bool())) + val needIntDest = Wire(Vec(RenameWidth, Bool())) var lastReady = WireInit(io.out(0).ready) // debug assert val outRdy = Cat(io.out.map(_.ready)) @@ -73,17 +83,17 @@ class Rename extends XSModule { val inValid = io.in(i).valid // alloc a new phy reg - val needFpDest = inValid && needDestReg(fp = true, io.in(i).bits) - val needIntDest = inValid && needDestReg(fp = false, io.in(i).bits) - fpFreeList.allocReqs(i) := needFpDest && lastReady - intFreeList.allocReqs(i) := needIntDest && lastReady + needFpDest(i) := inValid && needDestReg(fp = true, io.in(i).bits) + needIntDest(i) := inValid && needDestReg(fp = false, io.in(i).bits) + fpFreeList.allocReqs(i) := needFpDest(i) && lastReady + intFreeList.allocReqs(i) := needIntDest(i) && lastReady val fpCanAlloc = fpFreeList.canAlloc(i) val intCanAlloc = intFreeList.canAlloc(i) val this_can_alloc = Mux( - needIntDest, + needIntDest(i), intCanAlloc, Mux( - needFpDest, + needFpDest(i), fpCanAlloc, true.B ) @@ -98,7 +108,7 @@ class Rename extends XSModule { lastReady = io.in(i).ready - uops(i).pdest := Mux(needIntDest, + uops(i).pdest := Mux(needIntDest(i), intFreeList.pdests(i), Mux( uops(i).ctrl.ldest===0.U && uops(i).ctrl.rfWen, @@ -173,6 +183,28 @@ class Rename extends XSModule { uops(i).old_pdest := Mux(uops(i).ctrl.rfWen, intOldPdest, fpOldPdest) } - - + // We don't bypass the old_pdest from valid instructions with the same ldest currently in rename stage. + // Instead, we determine whether there're some dependences between the valid instructions. + for (i <- 1 until RenameWidth) { + io.renameBypass.lsrc1_bypass(i-1) := Cat((0 until i).map(j => { + val fpMatch = needFpDest(j) && io.in(i).bits.ctrl.src1Type === SrcType.fp + val intMatch = needIntDest(j) && io.in(i).bits.ctrl.src1Type === SrcType.reg + (fpMatch || intMatch) && io.in(j).bits.ctrl.ldest === io.in(i).bits.ctrl.lsrc1 + }).reverse) + io.renameBypass.lsrc2_bypass(i-1) := Cat((0 until i).map(j => { + val fpMatch = needFpDest(j) && io.in(i).bits.ctrl.src2Type === SrcType.fp + val intMatch = needIntDest(j) && io.in(i).bits.ctrl.src2Type === SrcType.reg + (fpMatch || intMatch) && io.in(j).bits.ctrl.ldest === io.in(i).bits.ctrl.lsrc2 + }).reverse) + io.renameBypass.lsrc3_bypass(i-1) := Cat((0 until i).map(j => { + val fpMatch = needFpDest(j) && io.in(i).bits.ctrl.src3Type === SrcType.fp + val intMatch = needIntDest(j) && io.in(i).bits.ctrl.src3Type === SrcType.reg + (fpMatch || intMatch) && io.in(j).bits.ctrl.ldest === io.in(i).bits.ctrl.lsrc3 + }).reverse) + io.renameBypass.ldest_bypass(i-1) := Cat((0 until i).map(j => { + val fpMatch = needFpDest(j) && needFpDest(i) + val intMatch = needIntDest(j) && needIntDest(i) + (fpMatch || intMatch) && io.in(j).bits.ctrl.ldest === io.in(i).bits.ctrl.ldest + }).reverse) + } } -- GitLab