未验证 提交 66220144 编写于 作者: Y Yinan Xu 提交者: GitHub

backend: wrap all RS into a larger scheduler module (#880)

This commit adds an non-parameterized scheduler containing all reservation stations.
Now IntegerBlock, FloatBlock, MemBlock contain only function units.
The Schduler connects dispatch with all function units.
Parameterization to be added later.
上级 0fb3674e
......@@ -19,7 +19,7 @@ import chisel3._
import chisel3.util._
import xiangshan.backend._
import xiangshan.backend.fu.HasExceptionNO
import xiangshan.backend.dispatch.DispatchParameters
import xiangshan.backend.exu.Wb
import xiangshan.frontend._
import xiangshan.mem._
import xiangshan.cache.{DCacheParameters, ICacheParameters, L1plusCacheWrapper, L1plusCacheParameters, PTWWrapper, PTWRepeater, PTWFilter}
......@@ -31,15 +31,6 @@ import freechips.rocketchip.tile.HasFPUParameters
import system.{HasSoCParameter, L1CacheErrorInfo}
import utils._
object hartIdCore extends (() => Int) {
var x = 0
def apply(): Int = {
x = x + 1
x - 1
}
}
abstract class XSModule(implicit val p: Parameters) extends MultiIOModule
with HasXSParameter
with HasExceptionNO
......@@ -76,13 +67,7 @@ abstract class XSCoreBase()(implicit p: config.Parameters) extends LazyModule
val frontend = LazyModule(new Frontend())
val l1pluscache = LazyModule(new L1plusCacheWrapper())
val ptw = LazyModule(new PTWWrapper())
val memBlock = LazyModule(new MemBlock(
fastWakeUpIn = intExuConfigs.filter(_.hasCertainLatency),
slowWakeUpIn = intExuConfigs.filter(_.hasUncertainlatency) ++ fpExuConfigs,
fastWakeUpOut = Seq(),
slowWakeUpOut = loadExuConfigs,
numIntWakeUpFp = intExuConfigs.count(_.writeFpRf)
))
val memBlock = LazyModule(new MemBlock)
}
......@@ -112,25 +97,44 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
val intBlockSlowWakeUp = intExuConfigs.filter(_.hasUncertainlatency)
val ctrlBlock = Module(new CtrlBlock)
val integerBlock = Module(new IntegerBlock(
fastWakeUpIn = Seq(),
slowWakeUpIn = fpExuConfigs.filter(_.writeIntRf) ++ loadExuConfigs,
memFastWakeUpIn = loadExuConfigs,
fastWakeUpOut = intBlockFastWakeUp,
slowWakeUpOut = intBlockSlowWakeUp
))
val floatBlock = Module(new FloatBlock(
intSlowWakeUpIn = intExuConfigs.filter(_.writeFpRf),
memSlowWakeUpIn = loadExuConfigs,
fastWakeUpOut = Seq(),
slowWakeUpOut = fpExuConfigs
))
val scheduler = Module(new Scheduler)
val integerBlock = Module(new IntegerBlock)
val floatBlock = Module(new FloatBlock)
val frontend = outer.frontend.module
val memBlock = outer.memBlock.module
val l1pluscache = outer.l1pluscache.module
val ptw = outer.ptw.module
val intConfigs = intExuConfigs ++ fpExuConfigs.filter(_.writeIntRf) ++ loadExuConfigs
val intArbiter = Module(new Wb(intConfigs, NRIntWritePorts, isFp = false))
val intWriteback = integerBlock.io.writeback ++ floatBlock.io.writeback.drop(4) ++ memBlock.io.writeback.take(2)
// set default value for ready
integerBlock.io.writeback.map(_.ready := true.B)
floatBlock.io.writeback.map(_.ready := true.B)
memBlock.io.writeback.map(_.ready := true.B)
intArbiter.io.in.zip(intWriteback).foreach { case (arb, wb) =>
arb.valid := wb.valid && !wb.bits.uop.ctrl.fpWen
arb.bits := wb.bits
when (arb.valid) {
wb.ready := arb.ready
}
}
val fpArbiter = Module(new Wb(
fpExuConfigs ++ intExuConfigs.take(1) ++ loadExuConfigs,
NRFpWritePorts,
isFp = true
))
val fpWriteback = floatBlock.io.writeback ++ integerBlock.io.writeback.take(1) ++ memBlock.io.writeback.take(2)
fpArbiter.io.in.zip(fpWriteback).foreach{ case (arb, wb) =>
arb.valid := wb.valid && wb.bits.uop.ctrl.fpWen
arb.bits := wb.bits
when (arb.valid) {
wb.ready := arb.ready
}
}
io.l1plus_error <> l1pluscache.io.error
io.icache_error <> frontend.io.error
io.dcache_error <> memBlock.io.error
......@@ -145,52 +149,37 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
l1pluscache.io.flush := frontend.io.l1plusFlush
frontend.io.fencei := integerBlock.io.fenceio.fencei
ctrlBlock.io.fromIntBlock <> integerBlock.io.toCtrlBlock
ctrlBlock.io.fromFpBlock <> floatBlock.io.toCtrlBlock
ctrlBlock.io.fromLsBlock <> memBlock.io.toCtrlBlock
ctrlBlock.io.toIntBlock <> integerBlock.io.fromCtrlBlock
ctrlBlock.io.toFpBlock <> floatBlock.io.fromCtrlBlock
ctrlBlock.io.toLsBlock <> memBlock.io.fromCtrlBlock
ctrlBlock.io.csrCtrl <> integerBlock.io.csrio.customCtrl
val memBlockWakeUpInt = memBlock.io.wakeUpOutInt.slow.map(WireInit(_))
val memBlockWakeUpFp = memBlock.io.wakeUpOutFp.slow.map(WireInit(_))
memBlock.io.wakeUpOutInt.slow.foreach(_.ready := true.B)
memBlock.io.wakeUpOutFp.slow.foreach(_.ready := true.B)
fpExuConfigs.zip(floatBlock.io.wakeUpOut.slow).filterNot(_._1.writeIntRf).map(_._2.ready := true.B)
val fpBlockWakeUpInt = fpExuConfigs
.zip(floatBlock.io.wakeUpOut.slow)
.filter(_._1.writeIntRf)
.map(_._2)
intExuConfigs.zip(integerBlock.io.wakeUpOut.slow).filterNot(_._1.writeFpRf).map(_._2.ready := true.B)
val intBlockWakeUpFp = intExuConfigs.filter(_.hasUncertainlatency)
.zip(integerBlock.io.wakeUpOut.slow)
.filter(_._1.writeFpRf)
.map(_._2)
integerBlock.io.wakeUpIn.slow <> fpBlockWakeUpInt ++ memBlockWakeUpInt
integerBlock.io.toMemBlock <> memBlock.io.fromIntBlock
integerBlock.io.memFastWakeUp <> memBlock.io.ldFastWakeUpInt
floatBlock.io.intWakeUpFp <> intBlockWakeUpFp
floatBlock.io.memWakeUpFp <> memBlockWakeUpFp
floatBlock.io.toMemBlock <> memBlock.io.fromFpBlock
val wakeUpMem = Seq(
integerBlock.io.wakeUpOut,
floatBlock.io.wakeUpOut,
)
memBlock.io.wakeUpIn.fastUops <> wakeUpMem.flatMap(_.fastUops)
memBlock.io.wakeUpIn.fast <> wakeUpMem.flatMap(_.fast)
// Note: 'WireInit' is used to block 'ready's from memBlock,
// we don't need 'ready's from memBlock
memBlock.io.wakeUpIn.slow <> wakeUpMem.flatMap(_.slow.map(x => WireInit(x)))
memBlock.io.intWakeUpFp <> floatBlock.io.intWakeUpOut
memBlock.io.intWbOut := integerBlock.io.intWbOut
memBlock.io.fpWbOut := floatBlock.io.fpWbOut
ctrlBlock.io.exuRedirect <> integerBlock.io.exuRedirect
ctrlBlock.io.stIn <> memBlock.io.stIn
ctrlBlock.io.stOut <> memBlock.io.stOut
ctrlBlock.io.memoryViolation <> memBlock.io.memoryViolation
ctrlBlock.io.enqLsq <> memBlock.io.enqLsq
// TODO
ctrlBlock.io.writeback <> VecInit(intArbiter.io.out ++ fpArbiter.io.out)
scheduler.io.redirect <> ctrlBlock.io.redirect
scheduler.io.flush <> ctrlBlock.io.flush
scheduler.io.allocate <> ctrlBlock.io.enqIQ
scheduler.io.issue <> integerBlock.io.issue ++ floatBlock.io.issue ++ memBlock.io.issue
// TODO arbiter
scheduler.io.writeback <> VecInit(intArbiter.io.out ++ fpArbiter.io.out)
scheduler.io.replay <> memBlock.io.replay
scheduler.io.rsIdx <> memBlock.io.rsIdx
scheduler.io.isFirstIssue <> memBlock.io.isFirstIssue
scheduler.io.stData <> memBlock.io.stData
scheduler.io.otherFastWakeup <> memBlock.io.otherFastWakeup
scheduler.io.jumpPc <> ctrlBlock.io.jumpPc
scheduler.io.jalr_target <> ctrlBlock.io.jalr_target
scheduler.io.stIssuePtr <> memBlock.io.stIssuePtr
scheduler.io.debug_fp_rat <> ctrlBlock.io.debug_fp_rat
scheduler.io.debug_int_rat <> ctrlBlock.io.debug_int_rat
scheduler.io.readIntRf <> ctrlBlock.io.readIntRf
scheduler.io.readFpRf <> ctrlBlock.io.readFpRf
integerBlock.io.redirect <> ctrlBlock.io.redirect
integerBlock.io.flush <> ctrlBlock.io.flush
integerBlock.io.csrio.hartId <> io.hartId
integerBlock.io.csrio.perf <> DontCare
integerBlock.io.csrio.perf.retiredInstr <> ctrlBlock.io.roqio.toCSR.perfinfo.retiredInstr
......@@ -210,9 +199,14 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
integerBlock.io.csrio.memExceptionVAddr <> memBlock.io.lsqio.exceptionAddr.vaddr
integerBlock.io.csrio.externalInterrupt <> io.externalInterrupt
floatBlock.io.redirect <> ctrlBlock.io.redirect
floatBlock.io.flush <> ctrlBlock.io.flush
integerBlock.io.fenceio.sfence <> memBlock.io.sfence
integerBlock.io.fenceio.sbuffer <> memBlock.io.fenceToSbuffer
memBlock.io.redirect <> ctrlBlock.io.redirect
memBlock.io.flush <> ctrlBlock.io.flush
memBlock.io.csrCtrl <> integerBlock.io.csrio.customCtrl
memBlock.io.tlbCsr <> integerBlock.io.csrio.tlb
memBlock.io.lsqio.roq <> ctrlBlock.io.roqio.lsq
......
......@@ -28,36 +28,6 @@ import xiangshan.backend.ftq.{Ftq, FtqRead, HasFtqHelper}
import xiangshan.backend.roq.{Roq, RoqCSRIO, RoqLsqIO, RoqPtr}
import xiangshan.mem.LsqEnqIO
class CtrlToIntBlockIO(implicit p: Parameters) extends XSBundle {
val enqIqCtrl = Vec(exuParameters.IntExuCnt, DecoupledIO(new MicroOp))
val readRf = Vec(NRIntReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val jumpPc = Output(UInt(VAddrBits.W))
val jalr_target = Output(UInt(VAddrBits.W))
// int block only uses port 0~7
val readPortIndex = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(8 / 2).W))) // TODO parameterize 8 here
val redirect = ValidIO(new Redirect)
val flush = Output(Bool())
val debug_rat = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
}
class CtrlToFpBlockIO(implicit p: Parameters) extends XSBundle {
val enqIqCtrl = Vec(exuParameters.FpExuCnt, DecoupledIO(new MicroOp))
val readRf = Vec(NRFpReadPorts, Output(UInt(PhyRegIdxWidth.W)))
// fp block uses port 0~11
val readPortIndex = Vec(exuParameters.FpExuCnt, Output(UInt(log2Ceil((NRFpReadPorts - exuParameters.StuCnt) / 3).W)))
val redirect = ValidIO(new Redirect)
val flush = Output(Bool())
val debug_rat = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
}
class CtrlToLsBlockIO(implicit p: Parameters) extends XSBundle {
val enqIqCtrl = Vec(exuParameters.LsExuCnt, DecoupledIO(new MicroOp))
val enqLsq = Flipped(new LsqEnqIO)
val memPredUpdate = Vec(StorePipelineWidth, Input(new MemPredUpdateReq))
val redirect = ValidIO(new Redirect)
val flush = Output(Bool())
}
class RedirectGenerator(implicit p: Parameters) extends XSModule
with HasCircularQueuePtrHelper with HasFtqHelper {
val numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt
......@@ -213,12 +183,15 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
with HasCircularQueuePtrHelper with HasFtqHelper {
val io = IO(new Bundle {
val frontend = Flipped(new FrontendToBackendIO)
val fromIntBlock = Flipped(new IntBlockToCtrlIO)
val fromFpBlock = Flipped(new FpBlockToCtrlIO)
val fromLsBlock = Flipped(new LsBlockToCtrlIO)
val toIntBlock = new CtrlToIntBlockIO
val toFpBlock = new CtrlToFpBlockIO
val toLsBlock = new CtrlToLsBlockIO
val enqIQ = Vec(12, DecoupledIO(new MicroOp))
// from int block
val exuRedirect = Vec(exuParameters.AluCnt + exuParameters.JmpCnt, Flipped(ValidIO(new ExuOutput)))
val stIn = Vec(exuParameters.StuCnt, Flipped(ValidIO(new ExuInput)))
val stOut = Vec(exuParameters.StuCnt, Flipped(ValidIO(new ExuOutput)))
val memoryViolation = Flipped(ValidIO(new Redirect))
val enqLsq = Flipped(new LsqEnqIO)
val jumpPc = Output(UInt(VAddrBits.W))
val jalr_target = Output(UInt(VAddrBits.W))
val roqio = new Bundle {
// to int block
val toCSR = new RoqCSRIO
......@@ -239,6 +212,14 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
val bpWrong = Output(UInt(XLEN.W))
}
})
val writeback = Vec(16, Flipped(ValidIO(new ExuOutput)))
// redirect out
val redirect = ValidIO(new Redirect)
val flush = Output(Bool())
val readIntRf = Vec(NRIntReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val readFpRf = Vec(NRFpReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val debug_int_rat = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
val debug_fp_rat = Vec(32, Output(UInt(PhyRegIdxWidth.W)))
})
val ftq = Module(new Ftq)
......@@ -258,7 +239,7 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
val flush = roq.io.flushOut.valid
val flushReg = RegNext(flush)
val exuRedirect = io.fromIntBlock.exuRedirect.map(x => {
val exuRedirect = io.exuRedirect.map(x => {
val valid = x.valid && x.bits.redirectValid
val killedByOlder = x.bits.uop.roqIdx.needFlush(backendRedirect, flushReg)
val delayed = Wire(Valid(new ExuOutput))
......@@ -267,11 +248,11 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
delayed
})
val loadReplay = Wire(Valid(new Redirect))
loadReplay.valid := RegNext(io.fromLsBlock.replay.valid &&
!io.fromLsBlock.replay.bits.roqIdx.needFlush(backendRedirect, flushReg),
loadReplay.valid := RegNext(io.memoryViolation.valid &&
!io.memoryViolation.bits.roqIdx.needFlush(backendRedirect, flushReg),
init = false.B
)
loadReplay.bits := RegEnable(io.fromLsBlock.replay.bits, io.fromLsBlock.replay.valid)
loadReplay.bits := RegEnable(io.memoryViolation.bits, io.memoryViolation.valid)
VecInit(ftq.io.ftqRead.tail.dropRight(2)) <> redirectGen.io.stage1FtqRead
ftq.io.ftqRead.dropRight(1).last <> redirectGen.io.memPredFtqRead
ftq.io.cfiRead <> redirectGen.io.stage2FtqRead
......@@ -330,12 +311,12 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
val ftqOffsetReg = Reg(UInt(log2Up(PredictWidth).W))
ftqOffsetReg := jumpInst.cf.ftqOffset
ftq.io.ftqRead(0).ptr := jumpInst.cf.ftqPtr // jump
io.toIntBlock.jumpPc := GetPcByFtq(
io.jumpPc := GetPcByFtq(
ftq.io.ftqRead(0).entry.ftqPC, ftqOffsetReg,
ftq.io.ftqRead(0).entry.lastPacketPC.valid,
ftq.io.ftqRead(0).entry.lastPacketPC.bits
)
io.toIntBlock.jalr_target := ftq.io.ftqRead(0).entry.target
io.jalr_target := ftq.io.ftqRead(0).entry.target
// pipeline between decode and dispatch
for (i <- 0 until RenameWidth) {
......@@ -354,29 +335,27 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
dispatch.io.redirect <> backendRedirect
dispatch.io.flush := flushReg
dispatch.io.enqRoq <> roq.io.enq
dispatch.io.enqLsq <> io.toLsBlock.enqLsq
dispatch.io.readIntRf <> io.toIntBlock.readRf
dispatch.io.readFpRf <> io.toFpBlock.readRf
dispatch.io.enqLsq <> io.enqLsq
dispatch.io.allocPregs.zipWithIndex.foreach { case (preg, i) =>
intBusyTable.io.allocPregs(i).valid := preg.isInt
fpBusyTable.io.allocPregs(i).valid := preg.isFp
intBusyTable.io.allocPregs(i).bits := preg.preg
fpBusyTable.io.allocPregs(i).bits := preg.preg
}
dispatch.io.numExist <> io.fromIntBlock.numExist ++ io.fromFpBlock.numExist ++ io.fromLsBlock.numExist
dispatch.io.enqIQCtrl <> io.toIntBlock.enqIqCtrl ++ io.toFpBlock.enqIqCtrl ++ io.toLsBlock.enqIqCtrl
// dispatch.io.enqIQData <> io.toIntBlock.enqIqData ++ io.toFpBlock.enqIqData ++ io.toLsBlock.enqIqData
dispatch.io.enqIQCtrl := DontCare
io.enqIQ <> dispatch.io.enqIQCtrl.take(4) ++ dispatch.io.enqIQCtrl.slice(7, 11) ++ dispatch.io.enqIQCtrl.drop(13)
dispatch.io.csrCtrl <> io.csrCtrl
dispatch.io.storeIssue <> io.fromLsBlock.stIn
dispatch.io.storeIssue <> io.stIn
dispatch.io.readIntRf <> io.readIntRf
dispatch.io.readFpRf <> io.readFpRf
fpBusyTable.io.flush := flushReg
intBusyTable.io.flush := flushReg
for((wb, setPhyRegRdy) <- io.fromIntBlock.wbRegs.zip(intBusyTable.io.wbPregs)){
for((wb, setPhyRegRdy) <- io.writeback.take(8).zip(intBusyTable.io.wbPregs)){
setPhyRegRdy.valid := wb.valid && wb.bits.uop.ctrl.rfWen
setPhyRegRdy.bits := wb.bits.uop.pdest
}
for((wb, setPhyRegRdy) <- io.fromFpBlock.wbRegs.zip(fpBusyTable.io.wbPregs)){
for((wb, setPhyRegRdy) <- io.writeback.drop(8).zip(fpBusyTable.io.wbPregs)){
setPhyRegRdy.valid := wb.valid && wb.bits.uop.ctrl.fpWen
setPhyRegRdy.bits := wb.bits.uop.pdest
}
......@@ -384,24 +363,20 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
fpBusyTable.io.read <> dispatch.io.readFpState
roq.io.redirect <> backendRedirect
val exeWbResults = VecInit(io.fromIntBlock.wbRegs ++ io.fromFpBlock.wbRegs ++ io.fromLsBlock.stOut)
val exeWbResults = VecInit(io.writeback ++ io.stOut)
for((roq_wb, wb) <- roq.io.exeWbResults.zip(exeWbResults)) {
roq_wb.valid := RegNext(wb.valid && !wb.bits.uop.roqIdx.needFlush(backendRedirect, flushReg))
roq_wb.bits := RegNext(wb.bits)
}
// TODO: is 'backendRedirect' necesscary?
io.toIntBlock.redirect <> backendRedirect
io.toIntBlock.flush <> flushReg
io.toIntBlock.debug_rat <> rename.io.debug_int_rat
io.toFpBlock.redirect <> backendRedirect
io.toFpBlock.flush <> flushReg
io.toFpBlock.debug_rat <> rename.io.debug_fp_rat
io.toLsBlock.redirect <> backendRedirect
io.toLsBlock.flush <> flushReg
dispatch.io.readPortIndex.intIndex <> io.toIntBlock.readPortIndex
dispatch.io.readPortIndex.fpIndex <> io.toFpBlock.readPortIndex
io.redirect <> backendRedirect
io.flush <> flushReg
io.debug_int_rat <> rename.io.debug_int_rat
io.debug_fp_rat <> rename.io.debug_fp_rat
// dispatch.io.readPortIndex.intIndex <> io.toIntBlock.readPortIndex
// dispatch.io.readPortIndex.fpIndex <> io.toFpBlock.readPortIndex
// roq to int block
io.roqio.toCSR <> roq.io.csr
......
......@@ -26,73 +26,48 @@ import xiangshan.backend.issue.ReservationStation
import xiangshan.mem.{HasFpLoadHelper, HasLoadHelper}
import difftest._
class FpBlockToCtrlIO(implicit p: Parameters) extends XSBundle {
val wbRegs = Vec(NRFpWritePorts, ValidIO(new ExuOutput))
val numExist = Vec(exuParameters.FpExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
}
class FloatBlock
(
intSlowWakeUpIn: Seq[ExuConfig],
memSlowWakeUpIn: Seq[ExuConfig],
fastWakeUpOut: Seq[ExuConfig],
slowWakeUpOut: Seq[ExuConfig],
)(implicit p: Parameters) extends XSModule with HasExeBlockHelper with HasFpLoadHelper {
class FloatBlock()(implicit p: Parameters) extends XSModule with HasExeBlockHelper with HasFpLoadHelper {
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToFpBlockIO)
val toCtrlBlock = new FpBlockToCtrlIO
val toMemBlock = new FpBlockToMemBlockIO
val intWakeUpFp = Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput)))
val memWakeUpFp = Vec(memSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput)))
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val intWakeUpOut = Vec(intSlowWakeUpIn.size, DecoupledIO(new ExuOutput))
val fpWbOut = Vec(8, ValidIO(new ExuOutput))
// from csr
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
// in
val issue = Vec(6, Flipped(DecoupledIO(new ExuInput)))
// out
val writeback = Vec(6, DecoupledIO(new ExuOutput))
// misc from csr
val frm = Input(UInt(3.W))
})
val redirect = io.fromCtrlBlock.redirect
val flush = io.fromCtrlBlock.flush
val intWakeUpFpReg = Wire(Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput))))
for((w, r) <- io.intWakeUpFp.zip(intWakeUpFpReg)){
val in = WireInit(w)
w.ready := in.ready
in.valid := w.valid && !w.bits.uop.roqIdx.needFlush(redirect, flush)
PipelineConnect(in, r, r.fire() || r.bits.uop.roqIdx.needFlush(redirect, flush), false.B)
}
// to memBlock's store rs
io.intWakeUpOut <> intWakeUpFpReg.map(x => WireInit(x))
val intRecoded = intWakeUpFpReg.map(x => {
val rec = Wire(DecoupledIO(new ExuOutput))
rec.valid := x.valid && x.bits.uop.ctrl.fpWen
rec.bits := x.bits
rec.bits.data := Mux(x.bits.uop.ctrl.fpu.typeTagOut === S,
recode(x.bits.data(31, 0), S),
recode(x.bits.data(63, 0), D)
)
rec.bits.redirectValid := false.B
x.ready := rec.ready || !rec.valid
rec
})
val memRecoded = WireInit(io.memWakeUpFp)
for((rec, reg) <- memRecoded.zip(io.memWakeUpFp)){
rec.bits.data := fpRdataHelper(reg.bits.uop, reg.bits.data)
rec.bits.redirectValid := false.B
reg.ready := true.B
}
val wakeUpInRecode = intRecoded ++ memRecoded
val fpRf = Module(new Regfile(
numReadPorts = NRFpReadPorts,
numWirtePorts = NRFpWritePorts,
hasZero = false,
len = XLEN + 1
))
// val intWakeUpFpReg = Wire(Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput))))
// for((w, r) <- io.intWakeUpFp.zip(intWakeUpFpReg)){
// val in = WireInit(w)
// w.ready := in.ready
// in.valid := w.valid && !w.bits.uop.roqIdx.needFlush(io.redirect, io.flush)
// PipelineConnect(in, r, r.fire() || r.bits.uop.roqIdx.needFlush(io.redirect, io.flush), false.B)
// }
// // to memBlock's store rs
// io.intWakeUpOut <> intWakeUpFpReg.map(x => WireInit(x))
//
// val intRecoded = intWakeUpFpReg.map(x => {
// val rec = Wire(DecoupledIO(new ExuOutput))
// rec.valid := x.valid && x.bits.uop.ctrl.fpWen
// rec.bits := x.bits
// rec.bits.data := Mux(x.bits.uop.ctrl.fpu.typeTagOut === S,
// recode(x.bits.data(31, 0), S),
// recode(x.bits.data(63, 0), D)
// )
// rec.bits.redirectValid := false.B
// x.ready := rec.ready || !rec.valid
// rec
// })
// val memRecoded = WireInit(io.memWakeUpFp)
// for((rec, reg) <- memRecoded.zip(io.memWakeUpFp)){
// rec.bits.data := fpRdataHelper(reg.bits.uop, reg.bits.data)
// rec.bits.redirectValid := false.B
// reg.ready := true.B
// }
// val wakeUpInRecode = intRecoded ++ memRecoded
val fmacExeUnits = Array.tabulate(exuParameters.FmacCnt)(_ => Module(new FmacExeUnit))
val fmiscExeUnits = Array.tabulate(exuParameters.FmiscCnt)(_ => Module(new FmiscExeUnit))
......@@ -101,155 +76,28 @@ class FloatBlock
fmiscExeUnits.foreach(_.frm := io.frm)
val exeUnits = fmacExeUnits ++ fmiscExeUnits
val fpWbArbiter = Module(new Wb(
exeUnits.map(_.config) ++ intSlowWakeUpIn ++ memSlowWakeUpIn,
NRFpWritePorts,
isFp = true
))
io.fpWbOut.zip(fpWbArbiter.io.out).map{ case (wakeup, wb) =>
wakeup.valid := RegNext(wb.valid && !wb.bits.uop.roqIdx.needFlush(redirect, flush))
wakeup.bits := RegNext(wb.bits)
wakeup.bits.data := ieee(RegNext(wb.bits.data))
}
def needWakeup(cfg: ExuConfig): Boolean =
(cfg.readIntRf && cfg.writeIntRf) || (cfg.readFpRf && cfg.writeFpRf)
def needData(a: ExuConfig, b: ExuConfig): Boolean =
(a.readIntRf && b.writeIntRf) || (a.readFpRf && b.writeFpRf)
// val readPortIndex = RegNext(io.fromCtrlBlock.readPortIndex)
val readPortIndex = Seq(0, 1, 2, 3, 2, 3)
val reservationStations = exeUnits.map(_.config).zipWithIndex.map({ case (cfg, i) =>
var certainLatency = -1
if (cfg.hasCertainLatency) {
certainLatency = cfg.latency.latencyVal.get
for ((exu, i) <- exeUnits.zipWithIndex) {
exeUnits(i).io.redirect <> io.redirect
exeUnits(i).io.flush <> io.flush
// in
exeUnits(i).io.fromFp <> io.issue(i)
for (j <- 0 until 3) {
// when one of the higher bits is zero, then it's not a legal single-precision number
val isLegalSingle = io.issue(i).bits.uop.ctrl.fpu.typeTagIn === S && io.issue(i).bits.src(j)(63, 32).andR
val single = recode(io.issue(i).bits.src(j)(31, 0), S)
val double = recode(io.issue(i).bits.src(j)(63, 0), D)
exeUnits(i).io.fromFp.bits.src(j) := Mux(isLegalSingle, single, double)
}
val readFpRf = cfg.readFpRf
val wakeUpInRecodeWithCfg = intSlowWakeUpIn.zip(intRecoded) ++ memSlowWakeUpIn.zip(memRecoded)
val inBlockFastPorts = exeUnits.filter(e => e.config.hasCertainLatency).map(a => (a.config, a.io.out.bits.data))
val fastPortsCnt = inBlockFastPorts.length
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency).map(a => (a.config, a.io.out))
val slowPorts = VecInit(fpWbArbiter.io.out.drop(4))
val slowPortsCnt = slowPorts.length
println(s"${i}: exu:${cfg.name} fastPortsCnt: ${fastPortsCnt} " +
s"slowPorts: ${slowPortsCnt} " +
s"delay:${certainLatency}"
)
val rs = Module(new ReservationStation(s"rs_${cfg.name}", cfg, IssQueSize, XLEN + 1,
inBlockFastPorts.map(_._1).length,
slowPorts.length,
fixedDelay = certainLatency,
fastWakeup = certainLatency >= 0,
feedback = false,1, 1
))
rs.io.redirect <> redirect // TODO: remove it
rs.io.flush <> flush // TODO: remove it
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl(i))
rs.io.srcRegValue := DontCare
val src1Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3).data))
val src2Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3 + 1).data))
val src3Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3 + 2).data))
rs.io.srcRegValue(0)(0) := src1Value(readPortIndex(i))
rs.io.srcRegValue(0)(1) := src2Value(readPortIndex(i))
if (cfg.fpSrcCnt > 2) rs.io.srcRegValue(0)(2) := src3Value(readPortIndex(i))
rs.io.fastDatas <> inBlockFastPorts.map(_._2)
rs.io.slowPorts <> slowPorts
exeUnits(i).io.redirect <> redirect
exeUnits(i).io.flush <> flush
exeUnits(i).io.fromFp <> rs.io.deq(0)
// rs.io.memfeedback := DontCare
rs.suggestName(s"rs_${cfg.name}")
rs
})
for(rs <- reservationStations){
val inBlockUops = reservationStations.filter(x =>
x.exuCfg.hasCertainLatency && x.exuCfg.writeFpRf
).map(x => {
val raw = WireInit(x.io.fastUopOut(0))
raw.valid := x.io.fastUopOut(0).valid && raw.bits.ctrl.fpWen
raw
})
rs.io.fastUopsIn <> inBlockUops
}
// read fp rf from ctrl block
fpRf.io.readPorts.zipWithIndex.map{ case (r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
(0 until exuParameters.StuCnt).foreach(i =>
io.toMemBlock.readFpRf(i).data := RegNext(ieee(fpRf.io.readPorts(i + 12).data))
)
// write fp rf arbiter
fpWbArbiter.io.in.drop(exeUnits.length).zip(wakeUpInRecode).foreach(
x => x._1 <> fpOutValid(x._2, connectReady = true)
)
for((exu, i) <- exeUnits.zipWithIndex){
val out, outReg = Wire(DecoupledIO(new ExuOutput))
out.bits := exu.io.out.bits
out.valid := exu.io.out.valid && !out.bits.uop.roqIdx.needFlush(redirect, flush)
PipelineConnect(out, outReg,
outReg.fire() || outReg.bits.uop.roqIdx.needFlush(redirect, flush), false.B
// out
io.writeback(i).valid := exu.io.out.valid
io.writeback(i).bits := exu.io.out.bits
io.writeback(i).bits.data := Mux(exu.io.out.bits.uop.ctrl.fpWen,
ieee(exu.io.out.bits.data),
exu.io.out.bits.data
)
io.wakeUpOut.slow(i).valid := outReg.valid
io.wakeUpOut.slow(i).bits := outReg.bits
io.wakeUpOut.slow(i).bits.redirectValid := false.B
io.wakeUpOut.slow(i).bits.data := Mux(outReg.bits.uop.ctrl.fpWen,
ieee(outReg.bits.data),
outReg.bits.data
)
fpWbArbiter.io.in(i).valid := exu.io.out.valid && exu.io.out.bits.uop.ctrl.fpWen && outReg.ready
fpWbArbiter.io.in(i).bits := exu.io.out.bits
if(exu.config.writeIntRf){
outReg.ready := !outReg.valid || (
io.wakeUpOut.slow(i).ready && outReg.bits.uop.ctrl.rfWen
) || outReg.bits.uop.ctrl.fpWen
// don't consider flush in 'intFire'
val intFire = exu.io.out.valid && out.ready && out.bits.uop.ctrl.rfWen
exu.io.out.ready := intFire || fpWbArbiter.io.in(i).fire() || !exu.io.out.valid
} else {
outReg.ready := true.B
exu.io.out.ready := fpWbArbiter.io.in(i).fire() || !exu.io.out.valid
}
}
XSPerfAccumulate("competition", fpWbArbiter.io.in.map(i => !i.ready && i.valid).foldRight(0.U)(_+_))
// set busytable and update roq
io.toCtrlBlock.wbRegs <> fpWbArbiter.io.out
fpRf.io.writePorts.zip(fpWbArbiter.io.out).foreach{
case (rf, wb) =>
rf.wen := wb.valid
rf.addr := wb.bits.uop.pdest
rf.data := wb.bits.data
exu.io.out.ready := io.writeback(i).ready
}
fpRf.io.debug_rports := DontCare
if (!env.FPGAPlatform) {
for ((rport, rat) <- fpRf.io.debug_rports.zip(io.fromCtrlBlock.debug_rat)) {
rport.addr := rat
}
val difftest = Module(new DifftestArchFpRegState)
difftest.io.clock := clock
difftest.io.coreid := hardId.U
difftest.io.fpr := VecInit(fpRf.io.debug_rports.map(p => ieee(p.data)))
}
val rsDeqCount = PopCount(reservationStations.map(_.io.deq(0).valid))
XSPerfAccumulate("fp_rs_deq_count", rsDeqCount)
XSPerfHistogram("fp_rs_deq_count", rsDeqCount, true.B, 0, 6, 1)
}
......@@ -35,15 +35,6 @@ class WakeUpBundle(numFast: Int, numSlow: Int)(implicit p: Parameters) extends X
}
class IntBlockToCtrlIO(implicit p: Parameters) extends XSBundle {
// write back regfile signals after arbiter
// used to update busytable and roq state
val wbRegs = Vec(NRIntWritePorts, ValidIO(new ExuOutput))
// write back to brq
val exuRedirect = Vec(exuParameters.AluCnt + exuParameters.JmpCnt, ValidIO(new ExuOutput))
val numExist = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
}
trait HasExeBlockHelper {
def fpUopValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
val uop = WireInit(x)
......@@ -93,24 +84,16 @@ trait HasExeBlockHelper {
}
}
class IntegerBlock
(
fastWakeUpIn: Seq[ExuConfig],
slowWakeUpIn: Seq[ExuConfig],
memFastWakeUpIn: Seq[ExuConfig],
fastWakeUpOut: Seq[ExuConfig],
slowWakeUpOut: Seq[ExuConfig]
)(implicit p: Parameters) extends XSModule with HasExeBlockHelper {
class IntegerBlock()(implicit p: Parameters) extends XSModule with HasExeBlockHelper {
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToIntBlockIO)
val toCtrlBlock = new IntBlockToCtrlIO
val toMemBlock = new IntBlockToMemBlockIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val memFastWakeUp = new WakeUpBundle(exuParameters.LduCnt, 0)
val intWbOut = Vec(4, ValidIO(new ExuOutput))
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
// in
val issue = Vec(7, Flipped(DecoupledIO(new ExuInput)))
// out
val exuRedirect = Vec(exuParameters.AluCnt + exuParameters.JmpCnt, ValidIO(new ExuOutput))
val writeback = Vec(7, DecoupledIO(new ExuOutput))
// misc
val csrio = new CSRFileIO
val fenceio = new Bundle {
val sfence = Output(new SfenceBundle) // to front,mem
......@@ -118,132 +101,22 @@ class IntegerBlock
val sbuffer = new FenceToSbuffer // to mem
}
})
val redirect = io.fromCtrlBlock.redirect
val flush = io.fromCtrlBlock.flush
val intRf = Module(new Regfile(
numReadPorts = NRIntReadPorts,
numWirtePorts = NRIntWritePorts,
hasZero = true,
len = XLEN
))
val jmpExeUnit = Module(new JumpExeUnit)
val mduExeUnits = Array.tabulate(exuParameters.MduCnt)(_ => Module(new MulDivExeUnit))
val aluExeUnits = Array.tabulate(exuParameters.AluCnt)(_ => Module(new AluExeUnit))
val exeUnits = jmpExeUnit +: (mduExeUnits ++ aluExeUnits)
val intWbArbiter = Module(new Wb(
(exeUnits.map(_.config) ++ fastWakeUpIn ++ slowWakeUpIn),
NRIntWritePorts,
isFp = false
))
io.intWbOut := VecInit(intWbArbiter.io.out.drop(4))
for (exe <- exeUnits) {
exe.io.redirect <> redirect
exe.io.flush <> flush
}
val jmp_rs = Module(new ReservationStation("rs_jmp", JumpExeUnitCfg, IssQueSize, XLEN, 6, 4, -1, false, false, 1, 1))
val mul_rs_0 = Module(new ReservationStation("rs_mul_0", MulDivExeUnitCfg, IssQueSize, XLEN, 6, 4, 2, false, false, 2, 1))
val mul_rs_1 = Module(new ReservationStation("rs_mul_1", MulDivExeUnitCfg, IssQueSize, XLEN, 6, 4, 2, false, false, 2, 1))
val alu_rs_0 = Module(new ReservationStation("rs_alu_0", AluExeUnitCfg, 4*IssQueSize, XLEN,
8, 4, 0, true, false, 4, 4
))
val aluFastData = VecInit(exeUnits.drop(3).map(_.io.out.bits.data))
val mulFastData = VecInit(exeUnits.drop(1).take(2).map(_.io.out.bits.data))
val memFastData = VecInit(io.memFastWakeUp.fast.map(_.bits.data))
val slowPorts = intWbArbiter.io.out.drop(4)
jmp_rs.io.numExist <> io.toCtrlBlock.numExist(0)
jmp_rs.io.fromDispatch <> io.fromCtrlBlock.enqIqCtrl.take(1)
jmp_rs.io.fromDispatch(0).valid := io.fromCtrlBlock.enqIqCtrl(0).valid && FuType.jmpCanAccept(io.fromCtrlBlock.enqIqCtrl(0).bits.ctrl.fuType)
jmp_rs.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.take(2).map(_.data))
jmp_rs.io.jumpPc := io.fromCtrlBlock.jumpPc
jmp_rs.io.jalr_target := io.fromCtrlBlock.jalr_target
jmp_rs.io.fastDatas <> mulFastData ++ aluFastData
jmp_rs.io.deq(0) <> jmpExeUnit.io.fromInt
mul_rs_0.io.numExist <> io.toCtrlBlock.numExist(1)
mul_rs_0.io.fromDispatch <> io.fromCtrlBlock.enqIqCtrl.take(2)
mul_rs_0.io.fromDispatch(0).valid := io.fromCtrlBlock.enqIqCtrl(0).valid && FuType.mduCanAccept(io.fromCtrlBlock.enqIqCtrl(0).bits.ctrl.fuType)
mul_rs_0.io.fromDispatch(1).valid := io.fromCtrlBlock.enqIqCtrl(1).valid && FuType.mduCanAccept(io.fromCtrlBlock.enqIqCtrl(1).bits.ctrl.fuType)
mul_rs_0.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.take(2).map(_.data))
mul_rs_0.io.srcRegValue(1) <> VecInit(intRf.io.readPorts.drop(2).take(2).map(_.data))
mul_rs_0.io.fastDatas <> mulFastData ++ aluFastData
mul_rs_0.io.deq(0) <> mduExeUnits(0).io.fromInt
mul_rs_1.io.numExist <> io.toCtrlBlock.numExist(2)
mul_rs_1.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl.drop(2).take(2))
mul_rs_1.io.fromDispatch(0).valid := io.fromCtrlBlock.enqIqCtrl(2).valid && FuType.mduCanAccept(io.fromCtrlBlock.enqIqCtrl(2).bits.ctrl.fuType)
mul_rs_1.io.fromDispatch(1).valid := io.fromCtrlBlock.enqIqCtrl(3).valid && FuType.mduCanAccept(io.fromCtrlBlock.enqIqCtrl(3).bits.ctrl.fuType)
mul_rs_1.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.drop(4).take(2).map(_.data))
mul_rs_1.io.srcRegValue(1) <> VecInit(intRf.io.readPorts.drop(6).take(2).map(_.data))
mul_rs_1.io.fastDatas <> mulFastData ++ aluFastData
mul_rs_1.io.deq(0) <> mduExeUnits(1).io.fromInt
io.toCtrlBlock.numExist(3) := alu_rs_0.io.numExist >> 2
io.toCtrlBlock.numExist(4) := alu_rs_0.io.numExist >> 2
io.toCtrlBlock.numExist(5) := alu_rs_0.io.numExist >> 2
io.toCtrlBlock.numExist(6) := alu_rs_0.io.numExist >> 2
alu_rs_0.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl.take(4))
for (i <- 0 until 4) {
alu_rs_0.io.fromDispatch(i).valid := io.fromCtrlBlock.enqIqCtrl(i).valid && FuType.aluCanAccept(io.fromCtrlBlock.enqIqCtrl(i).bits.ctrl.fuType)
}
alu_rs_0.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.take(2).map(_.data))
alu_rs_0.io.srcRegValue(1) <> VecInit(intRf.io.readPorts.drop(2).take(2).map(_.data))
alu_rs_0.io.srcRegValue(2) <> VecInit(intRf.io.readPorts.drop(4).take(2).map(_.data))
alu_rs_0.io.srcRegValue(3) <> VecInit(intRf.io.readPorts.drop(6).take(2).map(_.data))
alu_rs_0.io.fastDatas <> mulFastData ++ aluFastData ++ memFastData
alu_rs_0.io.deq(0) <> aluExeUnits(0).io.fromInt
alu_rs_0.io.deq(1) <> aluExeUnits(1).io.fromInt
alu_rs_0.io.deq(2) <> aluExeUnits(2).io.fromInt
alu_rs_0.io.deq(3) <> aluExeUnits(3).io.fromInt
io.fromCtrlBlock.enqIqCtrl(0).ready := jmp_rs.io.fromDispatch(0).fire() || mul_rs_0.io.fromDispatch(0).fire() || alu_rs_0.io.fromDispatch(0).fire()
io.fromCtrlBlock.enqIqCtrl(1).ready := mul_rs_0.io.fromDispatch(1).fire() || alu_rs_0.io.fromDispatch(1).fire()
io.fromCtrlBlock.enqIqCtrl(2).ready := mul_rs_1.io.fromDispatch(0).fire() || alu_rs_0.io.fromDispatch(2).fire()
io.fromCtrlBlock.enqIqCtrl(3).ready := mul_rs_1.io.fromDispatch(1).fire() || alu_rs_0.io.fromDispatch(3).fire()
io.fromCtrlBlock.enqIqCtrl(4).ready := false.B
io.fromCtrlBlock.enqIqCtrl(5).ready := false.B
io.fromCtrlBlock.enqIqCtrl(6).ready := false.B
val reservationStations = Seq(jmp_rs, mul_rs_0, mul_rs_1, alu_rs_0)
val aluFastUop = Wire(Vec(4, ValidIO(new MicroOp)))
val mulFastUop = Wire(Vec(2, ValidIO(new MicroOp)))
val memFastUop = io.memFastWakeUp.fastUops
aluFastUop(0) := alu_rs_0.io.fastUopOut(0)
aluFastUop(1) := alu_rs_0.io.fastUopOut(1)
aluFastUop(2) := alu_rs_0.io.fastUopOut(2)
aluFastUop(3) := alu_rs_0.io.fastUopOut(3)
mulFastUop(0) := mul_rs_0.io.fastUopOut(0)
mulFastUop(1) := mul_rs_1.io.fastUopOut(0)
io.writeback <> exeUnits.map(_.io.out)
for (rs <- reservationStations) {
rs.io.redirect <> redirect
rs.io.redirect <> redirect
rs.io.flush <> flush
rs.io.slowPorts := slowPorts
for ((exe, i) <- exeUnits.zipWithIndex) {
exe.io.redirect <> io.redirect
exe.io.flush <> io.flush
io.issue(i) <> exe.io.fromInt
}
jmp_rs.io.fastUopsIn := mulFastUop ++ aluFastUop
mul_rs_0.io.fastUopsIn := mulFastUop ++ aluFastUop
mul_rs_1.io.fastUopsIn := mulFastUop ++ aluFastUop
alu_rs_0.io.fastUopsIn := mulFastUop ++ aluFastUop ++ memFastUop
// alu_rs_1.io.fastUopsIn := mulFastUop ++ aluFastUop ++ memFastUop
io.wakeUpOut.fastUops := mulFastUop ++ aluFastUop
io.wakeUpOut.fast <> exeUnits.filter(
x => x.config.hasCertainLatency
).map(_.io.out).map(decoupledIOToValidIO)
io.wakeUpOut.slow <> exeUnits.filter(
x => x.config.hasUncertainlatency
).map(x => WireInit(x.io.out))
// send misprediction to brq
io.toCtrlBlock.exuRedirect.zip(
io.exuRedirect.zip(
exeUnits.filter(_.config.hasRedirect).map(_.io.out)
).foreach {
case (x, y) =>
......@@ -257,56 +130,4 @@ class IntegerBlock
io.csrio.customCtrl := RegNext(jmpExeUnit.csrio.customCtrl)
jmpExeUnit.fenceio <> io.fenceio
// read int rf from ctrl block
intRf.io.readPorts.zipWithIndex.map { case (r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
(0 until NRMemReadPorts).foreach(i => io.toMemBlock.readIntRf(i).data := intRf.io.readPorts(i + 8).data)
// write int rf arbiter
intWbArbiter.io.in <> exeUnits.map(e => {
val w = WireInit(e.io.out)
if(e.config.writeFpRf){
w.valid := e.io.out.valid && !e.io.out.bits.uop.ctrl.fpWen && io.wakeUpOut.slow(0).ready
} else {
w.valid := e.io.out.valid
}
w
}) ++ io.wakeUpIn.slow.map(x => intOutValid(x, connectReady = true))
XSPerfAccumulate("competition", intWbArbiter.io.in.map(i => !i.ready && i.valid).foldRight(0.U)(_+_))
exeUnits.zip(intWbArbiter.io.in).foreach{
case (exu, wInt) =>
if(exu.config.writeFpRf){
val wakeUpOut = io.wakeUpOut.slow(0) // jmpExeUnit
val writeFpReady = wakeUpOut.fire() && wakeUpOut.bits.uop.ctrl.fpWen
exu.io.out.ready := wInt.fire() || writeFpReady || !exu.io.out.valid
} else {
exu.io.out.ready := wInt.fire() || !exu.io.out.valid
}
}
// set busytable and update roq
io.toCtrlBlock.wbRegs <> intWbArbiter.io.out
intRf.io.writePorts.zip(intWbArbiter.io.out).foreach {
case (rf, wb) =>
rf.wen := wb.valid && wb.bits.uop.ctrl.rfWen
rf.addr := wb.bits.uop.pdest
rf.data := wb.bits.data
}
intRf.io.debug_rports := DontCare
if (!env.FPGAPlatform) {
for ((rport, rat) <- intRf.io.debug_rports.zip(io.fromCtrlBlock.debug_rat)) {
rport.addr := rat
}
val difftest = Module(new DifftestArchIntRegState)
difftest.io.clock := clock
difftest.io.coreid := hardId.U
difftest.io.gpr := VecInit(intRf.io.debug_rports.map(_.data))
}
val rsDeqCount = PopCount(reservationStations.map(_.io.deq(0).valid))
XSPerfAccumulate("int_rs_deq_count", rsDeqCount)
XSPerfHistogram("int_rs_deq_count", rsDeqCount, true.B, 0, 7, 1)
}
......@@ -22,37 +22,13 @@ import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import freechips.rocketchip.tile.HasFPUParameters
import system.L1CacheErrorInfo
import xiangshan._
import xiangshan.backend.roq.{RoqLsqIO, RoqPtr}
import xiangshan.backend.exu._
import xiangshan.backend.roq.RoqLsqIO
import xiangshan.cache._
import xiangshan.mem._
import xiangshan.backend.fu.{FenceToSbuffer, HasExceptionNO}
import xiangshan.backend.issue.ReservationStation
import xiangshan.backend.regfile.RfReadPort
import utils._
class LsBlockToCtrlIO(implicit p: Parameters) extends XSBundle {
val stIn = Vec(exuParameters.StuCnt, ValidIO(new ExuInput))
val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput))
val numExist = Vec(exuParameters.LsExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
val replay = ValidIO(new Redirect)
}
class IntBlockToMemBlockIO(implicit p: Parameters) extends XSBundle {
val readIntRf = Vec(NRMemReadPorts, new RfReadPort(XLEN))
}
class FpBlockToMemBlockIO(implicit p: Parameters) extends XSBundle {
val readFpRf = Vec(exuParameters.StuCnt, new RfReadPort(XLEN + 1))
}
class MemBlock(
val fastWakeUpIn: Seq[ExuConfig],
val slowWakeUpIn: Seq[ExuConfig],
val fastWakeUpOut: Seq[ExuConfig],
val slowWakeUpOut: Seq[ExuConfig],
val numIntWakeUpFp: Int
)(implicit p: Parameters) extends LazyModule {
class MemBlock()(implicit p: Parameters) extends LazyModule {
val dcache = LazyModule(new DCacheWrapper())
val uncache = LazyModule(new Uncache())
......@@ -68,37 +44,33 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
with HasFpLoadHelper
{
val fastWakeUpIn = outer.fastWakeUpIn
val slowWakeUpIn = outer.slowWakeUpIn
val fastWakeUpOut = outer.fastWakeUpOut
val slowWakeUpOut = outer.slowWakeUpOut
val numIntWakeUpFp = outer.numIntWakeUpFp
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToLsBlockIO)
val fromIntBlock = Flipped(new IntBlockToMemBlockIO)
val fromFpBlock = Flipped(new FpBlockToMemBlockIO)
val toCtrlBlock = new LsBlockToCtrlIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val intWakeUpFp = Vec(numIntWakeUpFp, Flipped(DecoupledIO(new ExuOutput)))
val wakeUpOutInt = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val wakeUpOutFp = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val ldFastWakeUpInt = Flipped(new WakeUpBundle(exuParameters.LduCnt, 0))
val intWbOut = Vec(4, Flipped(ValidIO(new ExuOutput)))
val fpWbOut = Vec(8, Flipped(ValidIO(new ExuOutput)))
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
// in
val issue = Vec(4, Flipped(DecoupledIO(new ExuInput)))
val replay = Vec(4, ValidIO(new RSFeedback))
val rsIdx = Vec(4, Input(UInt(log2Up(IssQueSize).W)))
val isFirstIssue = Vec(4, Input(Bool()))
val stData = Vec(2, Flipped(ValidIO(new StoreDataBundle)))
val stIssuePtr = Output(new SqPtr())
// out
val writeback = Vec(4, DecoupledIO(new ExuOutput))
val otherFastWakeup = Vec(2, ValidIO(new MicroOp))
// misc
val stIn = Vec(exuParameters.StuCnt, ValidIO(new ExuInput))
val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput))
val memoryViolation = ValidIO(new Redirect)
val ptw = new TlbPtwIO(LoadPipelineWidth + StorePipelineWidth)
val sfence = Input(new SfenceBundle)
val tlbCsr = Input(new TlbCsrBundle)
val fenceToSbuffer = Flipped(new FenceToSbuffer)
val enqLsq = new LsqEnqIO
val memPredUpdate = Vec(StorePipelineWidth, Input(new MemPredUpdateReq))
val lsqio = new Bundle {
val exceptionAddr = new ExceptionAddrIO // to csr
val roq = Flipped(new RoqLsqIO) // roq to lsq
}
val csrCtrl = Flipped(new CustomCSRCtrlIO)
val error = new L1CacheErrorInfo
val memInfo = new Bundle {
......@@ -113,8 +85,6 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
io.error <> RegNext(RegNext(dcache.io.error))
val redirect = io.fromCtrlBlock.redirect
val loadUnits = Seq.fill(exuParameters.LduCnt)(Module(new LoadUnit))
val storeUnits = Seq.fill(exuParameters.StuCnt)(Module(new StoreUnit))
val exeUnits = loadUnits ++ storeUnits
......@@ -132,96 +102,17 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits.head.io.ldout.ready := ldOut0.ready
val exeWbReqs = ldOut0 +: loadUnits.tail.map(_.io.ldout)
// 'wakeUpFp' is 1 cycle later than 'exeWbReqs'
val wakeUpFp = Wire(Vec(exuParameters.LduCnt, Decoupled(new ExuOutput)))
val readPortIndex = Seq(0, 1, 2, 4)
io.fromIntBlock.readIntRf.foreach(_.addr := DontCare)
io.fromFpBlock.readFpRf.foreach(_.addr := DontCare)
val reservationStations = (loadExuConfigs ++ storeExuConfigs).zipWithIndex.map({ case (cfg, i) =>
var certainLatency = -1
if (cfg.hasCertainLatency) {
certainLatency = cfg.latency.latencyVal.get
}
val readIntRf = cfg.readIntRf
val readFpRf = cfg.readFpRf
// load has uncertain latency, so only use external wake up data
val fastDatas = fastWakeUpIn.zip(io.wakeUpIn.fast)
.filter(x => (x._1.writeIntRf && readIntRf) || (x._1.writeFpRf && readFpRf))
.map(a => (a._1, a._2.bits.data)) ++
(if (cfg == LdExeUnitCfg && EnableLoadFastWakeUp) loadExuConfigs.zip(loadUnits.map(_.io.ldout.bits.data)) else Seq())
val fastPortsCnt = fastDatas.length
val slowPorts = if (cfg == StExeUnitCfg) io.intWbOut ++ io.fpWbOut else io.intWbOut
val slowPortsCnt = slowPorts.length
// if tlb miss, replay
val feedback = true
println(s"${i}: exu:${cfg.name} fastPortsCnt: ${fastPortsCnt} slowPorts: ${slowPortsCnt} delay:${certainLatency} feedback:${feedback}")
val rs = Module(new ReservationStation(s"rs_${cfg.name}", cfg, IssQueSize, XLEN,
fastDatas.map(_._1).length,
slowPorts.length,
fixedDelay = certainLatency,
fastWakeup = certainLatency >= 0,
feedback = feedback, 1, 1)
)
rs.io.redirect <> redirect // TODO: remove it
rs.io.flush <> io.fromCtrlBlock.flush // TODO: remove it
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl(i))
rs.io.srcRegValue(0)(0) := io.fromIntBlock.readIntRf(readPortIndex(i)).data
if (i >= exuParameters.LduCnt) {
rs.io.srcRegValue(0)(1) := io.fromIntBlock.readIntRf(readPortIndex(i) + 1).data
rs.io.fpRegValue := io.fromFpBlock.readFpRf(i - exuParameters.LduCnt).data
}
rs.io.fastDatas <> fastDatas.map(_._2)
rs.io.slowPorts <> slowPorts
// exeUnits(i).io.redirect <> redirect
// exeUnits(i).io.fromInt <> rs.io.deq
rs.io.memfeedback := DontCare
rs.suggestName(s"rs_${cfg.name}")
rs
})
for(rs <- reservationStations){
rs.io.fastUopsIn <> fastWakeUpIn.zip(io.wakeUpIn.fastUops)
.filter(x => (x._1.writeIntRf && rs.exuCfg.readIntRf) || (x._1.writeFpRf && rs.exuCfg.readFpRf))
.map(_._2) ++
(if (rs.exuCfg == LdExeUnitCfg && EnableLoadFastWakeUp) loadUnits.map(_.io.fastUop) else Seq())
}
wakeUpFp.zip(exeWbReqs).foreach{
case(w, e) =>
val r = RegNext(e.bits)
w.bits := r
w.valid := RegNext(e.valid && !e.bits.uop.roqIdx.needFlush(redirect, io.fromCtrlBlock.flush))
e.ready := true.B
assert(w.ready === true.B)
}
io.writeback <> exeWbReqs ++ VecInit(storeUnits.map(_.io.stout))
io.otherFastWakeup <> loadUnits.map(_.io.fastUop)
io.ldFastWakeUpInt.fastUops <> loadUnits.map(_.io.fastUop)
io.ldFastWakeUpInt.fast <> loadUnits.map(_.io.ldout).map(decoupledIOToValidIO)
io.wakeUpOutInt.slow <> exeWbReqs
io.wakeUpOutFp.slow <> wakeUpFp
io.wakeUpIn.slow.foreach(_.ready := true.B)
io.intWakeUpFp.foreach(_.ready := true.B)
// TODO: fast load wakeup
val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true))
val lsq = Module(new LsqWrappper)
val sbuffer = Module(new NewSbuffer)
// if you wants to stress test dcache store, use FakeSbuffer
// val sbuffer = Module(new FakeSbuffer)
io.stIssuePtr := lsq.io.issuePtrExt
// dtlb
io.ptw <> dtlb.io.ptw
......@@ -230,14 +121,14 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// LoadUnit
for (i <- 0 until exuParameters.LduCnt) {
loadUnits(i).io.redirect <> io.fromCtrlBlock.redirect
loadUnits(i).io.flush <> io.fromCtrlBlock.flush
loadUnits(i).io.rsFeedback <> reservationStations(i).io.memfeedback
loadUnits(i).io.rsIdx := reservationStations(i).io.rsIdx // TODO: beautify it
loadUnits(i).io.isFirstIssue := reservationStations(i).io.isFirstIssue // NOTE: just for dtlb's perf cnt
loadUnits(i).io.redirect <> io.redirect
loadUnits(i).io.flush <> io.flush
loadUnits(i).io.rsFeedback <> io.replay(i)
loadUnits(i).io.rsIdx := io.rsIdx(i) // TODO: beautify it
loadUnits(i).io.isFirstIssue := io.isFirstIssue(i) // NOTE: just for dtlb's perf cnt
loadUnits(i).io.dtlb <> dtlb.io.requestor(i)
// get input form dispatch
loadUnits(i).io.ldin <> reservationStations(i).io.deq(0)
loadUnits(i).io.ldin <> io.issue(i)
// dcache access
loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
// forward
......@@ -245,7 +136,6 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
// Lsq to load unit's rs
reservationStations(i).io.stIssuePtr := lsq.io.issuePtrExt
// passdown to lsq
lsq.io.loadIn(i) <> loadUnits(i).io.lsq.loadIn
......@@ -254,57 +144,56 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// update waittable
// TODO: read pc
io.fromCtrlBlock.memPredUpdate(i) := DontCare
io.memPredUpdate(i) := DontCare
lsq.io.needReplayFromRS(i) <> loadUnits(i).io.lsq.needReplayFromRS
}
// StoreUnit
for (i <- 0 until exuParameters.StuCnt) {
val stu = storeUnits(i)
val rs = reservationStations(exuParameters.LduCnt + i)
val dtlbReq = dtlb.io.requestor(exuParameters.LduCnt + i)
stu.io.redirect <> io.fromCtrlBlock.redirect
stu.io.flush <> io.fromCtrlBlock.flush
stu.io.rsFeedback <> rs.io.memfeedback
stu.io.rsIdx <> rs.io.rsIdx
stu.io.isFirstIssue <> rs.io.isFirstIssue // NOTE: just for dtlb's perf cnt
stu.io.redirect <> io.redirect
stu.io.flush <> io.flush
stu.io.rsFeedback <> io.replay(exuParameters.LduCnt + i)
stu.io.rsIdx <> io.rsIdx(exuParameters.LduCnt + i)
// NOTE: just for dtlb's perf cnt
stu.io.isFirstIssue <> io.isFirstIssue(exuParameters.LduCnt + i)
stu.io.dtlb <> dtlbReq
stu.io.stin <> rs.io.deq(0)
stu.io.stin <> io.issue(exuParameters.LduCnt + i)
stu.io.lsq <> lsq.io.storeIn(i)
// Lsq to load unit's rs
rs.io.stIssuePtr := lsq.io.issuePtrExt
// rs.io.storeData <> lsq.io.storeDataIn(i)
lsq.io.storeDataIn(i) := rs.io.stData
lsq.io.storeDataIn(i) := io.stData(i)
// sync issue info to rs
lsq.io.storeIssue(i).valid := rs.io.deq(0).valid
lsq.io.storeIssue(i).bits := rs.io.deq(0).bits
lsq.io.storeIssue(i).valid := io.issue(exuParameters.LduCnt + i).valid
lsq.io.storeIssue(i).bits := io.issue(exuParameters.LduCnt + i).bits
// sync issue info to store set LFST
io.toCtrlBlock.stIn(i).valid := rs.io.deq(0).valid
io.toCtrlBlock.stIn(i).bits := rs.io.deq(0).bits
io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid
io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits
io.toCtrlBlock.stOut(i).valid := stu.io.stout.valid
io.toCtrlBlock.stOut(i).bits := stu.io.stout.bits
io.stOut(i).valid := stu.io.stout.valid
io.stOut(i).bits := stu.io.stout.bits
stu.io.stout.ready := true.B
}
// mmio store writeback will use store writeback port 0
lsq.io.mmioStout.ready := false.B
when (lsq.io.mmioStout.valid && !storeUnits(0).io.stout.valid) {
io.toCtrlBlock.stOut(0).valid := true.B
io.toCtrlBlock.stOut(0).bits := lsq.io.mmioStout.bits
io.stOut(0).valid := true.B
io.stOut(0).bits := lsq.io.mmioStout.bits
lsq.io.mmioStout.ready := true.B
}
// Lsq
lsq.io.roq <> io.lsqio.roq
lsq.io.enq <> io.fromCtrlBlock.enqLsq
lsq.io.brqRedirect <> io.fromCtrlBlock.redirect
lsq.io.flush <> io.fromCtrlBlock.flush
io.toCtrlBlock.replay <> lsq.io.rollback
lsq.io.enq <> io.enqLsq
lsq.io.brqRedirect <> io.redirect
lsq.io.flush <> io.flush
io.memoryViolation <> lsq.io.rollback
lsq.io.uncache <> uncache.io.lsq
// delay dcache refill for 1 cycle for better timing
// TODO: remove RegNext after fixing refill paddr timing
......@@ -338,21 +227,21 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val atomic_rs0 = exuParameters.LduCnt + 0
val atomic_rs1 = exuParameters.LduCnt + 1
val st0_atomics = reservationStations(atomic_rs0).io.deq(0).valid && FuType.storeIsAMO(reservationStations(atomic_rs0).io.deq(0).bits.uop.ctrl.fuType)
val st1_atomics = reservationStations(atomic_rs1).io.deq(0).valid && FuType.storeIsAMO(reservationStations(atomic_rs1).io.deq(0).bits.uop.ctrl.fuType)
val st0_atomics = io.issue(atomic_rs0).valid && FuType.storeIsAMO(io.issue(atomic_rs0).bits.uop.ctrl.fuType)
val st1_atomics = io.issue(atomic_rs1).valid && FuType.storeIsAMO(io.issue(atomic_rs1).bits.uop.ctrl.fuType)
val st0_data_atomics = reservationStations(atomic_rs0).io.stData.valid && FuType.storeIsAMO(reservationStations(atomic_rs0).io.stData.bits.uop.ctrl.fuType)
val st1_data_atomics = reservationStations(atomic_rs1).io.stData.valid && FuType.storeIsAMO(reservationStations(atomic_rs1).io.stData.bits.uop.ctrl.fuType)
val st0_data_atomics = io.stData(0).valid && FuType.storeIsAMO(io.stData(0).bits.uop.ctrl.fuType)
val st1_data_atomics = io.stData(1).valid && FuType.storeIsAMO(io.stData(1).bits.uop.ctrl.fuType)
when (st0_atomics) {
reservationStations(atomic_rs0).io.deq(0).ready := atomicsUnit.io.in.ready
io.issue(atomic_rs0).ready := atomicsUnit.io.in.ready
storeUnits(0).io.stin.valid := false.B
state := s_atomics_0
assert(!st1_atomics)
}
when (st1_atomics) {
reservationStations(atomic_rs1).io.deq(0).ready := atomicsUnit.io.in.ready
io.issue(atomic_rs1).ready := atomicsUnit.io.in.ready
storeUnits(1).io.stin.valid := false.B
state := s_atomics_1
......@@ -364,12 +253,12 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
}
atomicsUnit.io.in.valid := st0_atomics || st1_atomics
atomicsUnit.io.in.bits := Mux(st0_atomics, reservationStations(atomic_rs0).io.deq(0).bits, reservationStations(atomic_rs1).io.deq(0).bits)
atomicsUnit.io.in.bits := Mux(st0_atomics, io.issue(atomic_rs0).bits, io.issue(atomic_rs1).bits)
atomicsUnit.io.storeDataIn.valid := st0_data_atomics || st1_data_atomics
atomicsUnit.io.storeDataIn.bits := Mux(st0_data_atomics, reservationStations(atomic_rs0).io.stData.bits, reservationStations(atomic_rs1).io.stData.bits)
atomicsUnit.io.rsIdx := Mux(st0_atomics, reservationStations(atomic_rs0).io.rsIdx, reservationStations(atomic_rs1).io.rsIdx)
atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
atomicsUnit.io.flush <> io.fromCtrlBlock.flush
atomicsUnit.io.storeDataIn.bits := Mux(st0_data_atomics, io.stData(0).bits, io.stData(1).bits)
atomicsUnit.io.rsIdx := Mux(st0_atomics, io.rsIdx(atomic_rs0), io.rsIdx(atomic_rs1))
atomicsUnit.io.redirect <> io.redirect
atomicsUnit.io.flush <> io.flush
atomicsUnit.io.dtlb.resp.valid := false.B
atomicsUnit.io.dtlb.resp.bits := DontCare
......@@ -391,12 +280,12 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
}
when (state === s_atomics_0) {
atomicsUnit.io.rsFeedback <> reservationStations(atomic_rs0).io.memfeedback
atomicsUnit.io.rsFeedback <> io.replay(atomic_rs0)
assert(!storeUnits(0).io.rsFeedback.valid)
}
when (state === s_atomics_1) {
atomicsUnit.io.rsFeedback <> reservationStations(atomic_rs1).io.memfeedback
atomicsUnit.io.rsFeedback <> io.replay(atomic_rs1)
assert(!storeUnits(1).io.rsFeedback.valid)
}
......@@ -409,8 +298,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
io.memInfo.lqFull := RegNext(lsq.io.lqFull)
io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
val ldDeqCount = PopCount(reservationStations.take(2).map(_.io.deq(0).valid))
val stDeqCount = PopCount(reservationStations.drop(2).map(_.io.deq(0).valid))
val ldDeqCount = PopCount(io.issue.take(2).map(_.valid))
val stDeqCount = PopCount(io.issue.drop(2).map(_.valid))
val rsDeqCount = ldDeqCount + stDeqCount
XSPerfAccumulate("load_rs_deq_count", ldDeqCount)
XSPerfHistogram("load_rs_deq_count", ldDeqCount, true.B, 1, 2, 1)
......
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package xiangshan.backend
import chisel3._
import chisel3.util._
import chipsalliance.rocketchip.config.Parameters
import difftest.{DifftestArchFpRegState, DifftestArchIntRegState}
import xiangshan._
import utils._
import xiangshan.backend.issue.{RSParams, ReservationStation}
import xiangshan.backend.regfile.Regfile
import xiangshan.mem.{SqPtr, StoreDataBundle}
// TODO: parameters
class Scheduler(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle {
// global control
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
// dispatch and issue ports
val allocate = Vec(12, Flipped(DecoupledIO(new MicroOp)))
// read regfile
val readIntRf = Vec(NRIntReadPorts, Input(UInt(PhyRegIdxWidth.W)))
val readFpRf = Vec(NRFpReadPorts, Input(UInt(PhyRegIdxWidth.W)))
val issue = Vec(17, DecoupledIO(new ExuInput))
val writeback = Vec(16, Flipped(ValidIO(new ExuOutput)))
val replay = Vec(4, Flipped(ValidIO(new RSFeedback)))
val rsIdx = Vec(4, Output(UInt(log2Up(IssQueSize).W)))
val isFirstIssue = Vec(4, Output(Bool()))
val stData = Vec(2, ValidIO(new StoreDataBundle))
// 2LOAD, data is selected from writeback ports
val otherFastWakeup = Vec(2, Flipped(ValidIO(new MicroOp)))
// misc
val jumpPc = Input(UInt(VAddrBits.W))
val jalr_target = Input(UInt(VAddrBits.W))
val stIssuePtr = Input(new SqPtr())
// debug
val debug_int_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
val debug_fp_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
})
// write ports: 0-3 ALU, 4-5 MUL, 6-7 LOAD
val intRf = Module(new Regfile(
numReadPorts = NRIntReadPorts,
numWirtePorts = NRIntWritePorts,
hasZero = true,
len = XLEN
))
// write ports: 0-3 FMA 4-5 FMISC, 6-7 LOAD
val fpRf = Module(new Regfile(
numReadPorts = NRFpReadPorts,
numWirtePorts = NRFpWritePorts,
hasZero = false,
len = XLEN
))
io.readIntRf <> intRf.io.readPorts.map(_.addr)
io.readFpRf <> fpRf.io.readPorts.map(_.addr)
val jmpParam = RSParams(IssQueSize, 1, 1, 2, 64, PhyRegIdxWidth, 6, 14, 8, false, false, -1, false, false)
val jmp_rs = Module(new ReservationStation(JumpExeUnitCfg, jmpParam))
val mulParam = RSParams(IssQueSize, 2, 1, 2, 64, PhyRegIdxWidth, 6, 14, 8, false, false, 2, false, false)
val mul_rs_0 = Module(new ReservationStation(MulDivExeUnitCfg, mulParam))
val mul_rs_1 = Module(new ReservationStation(MulDivExeUnitCfg, mulParam))
val aluParam = RSParams(4*IssQueSize, 4, 4, 2, 64, PhyRegIdxWidth, 8, 16, 8, false, false, 0, false, true)
val alu_rs_0 = Module(new ReservationStation(AluExeUnitCfg, aluParam))
val fmacParam = RSParams(4*IssQueSize, 4, 4, 3, 64, PhyRegIdxWidth, 4, 12, 8, false, false, 4, false, false)
val fmac_rs0 = Module(new ReservationStation(FmacExeUnitCfg, fmacParam))
val fiscParam = RSParams(IssQueSize, 2, 1, 2, 64, PhyRegIdxWidth, 4, 12, 8, false, false, -1, false, false)
val fmisc_rs0 = Module(new ReservationStation(FmiscExeUnitCfg, fiscParam))
val fmisc_rs1 = Module(new ReservationStation(FmiscExeUnitCfg, fiscParam))
val loadParam = RSParams(IssQueSize, 1, 1, 1, 64, PhyRegIdxWidth, 8, 16, 8, true, false, -1, true, false)
val load_rs0 = Module(new ReservationStation(LdExeUnitCfg, loadParam))
val load_rs1 = Module(new ReservationStation(LdExeUnitCfg, loadParam))
val storeParam = RSParams(IssQueSize, 1, 1, 2, 64, PhyRegIdxWidth, 6, 22, 16, true, false, -1, true, false)
val store_rs0 = Module(new ReservationStation(StExeUnitCfg, storeParam))
val store_rs1 = Module(new ReservationStation(StExeUnitCfg, storeParam))
val intRs = Seq(jmp_rs, mul_rs_0, mul_rs_1, alu_rs_0)
val fpRs = Seq(fmac_rs0, fmisc_rs0, fmisc_rs1)
val lsRs = Seq(load_rs0, load_rs1, store_rs0, store_rs1)
val reservationStations = intRs ++ fpRs ++ lsRs
for (rs <- reservationStations) {
rs.io.redirect <> io.redirect
rs.io.redirect <> io.redirect
rs.io.flush <> io.flush
}
val mulFastData = VecInit(io.writeback.slice(6, 8).map(_.bits.data))
val aluFastData = VecInit(io.writeback.slice(0, 4).map(_.bits.data))
val memFastData = VecInit(io.writeback.slice(4, 6).map(_.bits.data))
val fmaFastData = VecInit(io.writeback.slice(8, 12).map(_.bits.data))
jmp_rs.io.fromDispatch <> io.allocate.take(1)
jmp_rs.io.fromDispatch(0).valid := io.allocate(0).valid && FuType.jmpCanAccept(io.allocate(0).bits.ctrl.fuType)
jmp_rs.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.take(2).map(_.data))
jmp_rs.io.jumpPc := io.jumpPc
jmp_rs.io.jalr_target := io.jalr_target
jmp_rs.io.fastDatas <> mulFastData ++ aluFastData
jmp_rs.io.deq(0) <> io.issue(0)
mul_rs_0.io.fromDispatch <> io.allocate.slice(0, 1) ++ io.allocate.slice(2, 3)
mul_rs_0.io.fromDispatch(0).valid := io.allocate(0).valid && FuType.mduCanAccept(io.allocate(0).bits.ctrl.fuType)
mul_rs_0.io.fromDispatch(1).valid := io.allocate(2).valid && FuType.mduCanAccept(io.allocate(2).bits.ctrl.fuType)
mul_rs_0.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.slice(0, 2).map(_.data))
mul_rs_0.io.srcRegValue(1) <> VecInit(intRf.io.readPorts.slice(4, 6).map(_.data))
mul_rs_0.io.fastDatas <> mulFastData ++ aluFastData
mul_rs_0.io.deq(0) <> io.issue(1)
mul_rs_1.io.fromDispatch <> io.allocate.slice(1, 2) ++ io.allocate.slice(3, 4)
mul_rs_1.io.fromDispatch(0).valid := io.allocate(1).valid && FuType.mduCanAccept(io.allocate(1).bits.ctrl.fuType)
mul_rs_1.io.fromDispatch(1).valid := io.allocate(3).valid && FuType.mduCanAccept(io.allocate(3).bits.ctrl.fuType)
mul_rs_1.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.slice(2, 4).map(_.data))
mul_rs_1.io.srcRegValue(1) <> VecInit(intRf.io.readPorts.slice(6, 8).map(_.data))
mul_rs_1.io.fastDatas <> mulFastData ++ aluFastData
mul_rs_1.io.deq(0) <> io.issue(2)
alu_rs_0.io.fromDispatch <> VecInit(io.allocate.take(4))
for (i <- 0 until 4) {
alu_rs_0.io.fromDispatch(i).valid := io.allocate(i).valid && FuType.aluCanAccept(io.allocate(i).bits.ctrl.fuType)
}
alu_rs_0.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.take(2).map(_.data))
alu_rs_0.io.srcRegValue(1) <> VecInit(intRf.io.readPorts.slice(2, 4).map(_.data))
alu_rs_0.io.srcRegValue(2) <> VecInit(intRf.io.readPorts.slice(4, 6).map(_.data))
alu_rs_0.io.srcRegValue(3) <> VecInit(intRf.io.readPorts.slice(6, 8).map(_.data))
alu_rs_0.io.fastDatas <> mulFastData ++ aluFastData ++ memFastData
alu_rs_0.io.deq <> io.issue.slice(3, 7)
io.allocate(0).ready := jmp_rs.io.fromDispatch(0).fire() || mul_rs_0.io.fromDispatch(0).fire() || alu_rs_0.io.fromDispatch(0).fire()
io.allocate(1).ready := mul_rs_1.io.fromDispatch(0).fire() || alu_rs_0.io.fromDispatch(1).fire()
io.allocate(2).ready := mul_rs_0.io.fromDispatch(1).fire() || alu_rs_0.io.fromDispatch(2).fire()
io.allocate(3).ready := mul_rs_1.io.fromDispatch(1).fire() || alu_rs_0.io.fromDispatch(3).fire()
fmac_rs0.io.fromDispatch <> VecInit(io.allocate.slice(4, 8))
for (i <- 0 until 4) {
fmac_rs0.io.fromDispatch(i).valid := io.allocate(i + 4).valid && FuType.fmacCanAccept(io.allocate(i + 4).bits.ctrl.fuType)
fmac_rs0.io.srcRegValue(i) <> VecInit(fpRf.io.readPorts.slice(3*i, 3*i+3).map(_.data))
}
fmac_rs0.io.fastDatas <> fmaFastData
fmac_rs0.io.deq <> io.issue.slice(7, 11)
fmisc_rs0.io.fromDispatch <> VecInit(io.allocate.slice(4, 5) ++ io.allocate.slice(6, 7))
for (i <- 0 until 2) {
fmisc_rs0.io.fromDispatch(i).valid := io.allocate(i*2+4).valid && FuType.fmiscCanAccept(io.allocate(i*2+4).bits.ctrl.fuType)
}
fmisc_rs0.io.srcRegValue(0) <> VecInit(fpRf.io.readPorts.slice(0, 2).map(_.data))
fmisc_rs0.io.srcRegValue(1) <> VecInit(fpRf.io.readPorts.slice(6, 8).map(_.data))
fmisc_rs0.io.fastDatas <> fmaFastData
fmisc_rs0.io.deq <> io.issue.slice(11, 12)
fmisc_rs1.io.fromDispatch <> VecInit(io.allocate.slice(5, 6) ++ io.allocate.slice(7, 8))
for (i <- 0 until 2) {
fmisc_rs1.io.fromDispatch(i).valid := io.allocate(i*2+5).valid && FuType.fmiscCanAccept(io.allocate(i*2+5).bits.ctrl.fuType)
}
fmisc_rs1.io.srcRegValue(0) <> VecInit(fpRf.io.readPorts.slice(3, 5).map(_.data))
fmisc_rs1.io.srcRegValue(1) <> VecInit(fpRf.io.readPorts.slice(9, 11).map(_.data))
fmisc_rs1.io.fastDatas <> fmaFastData
fmisc_rs1.io.deq <> io.issue.slice(12, 13)
io.allocate(4).ready := fmisc_rs0.io.fromDispatch(0).fire() || fmac_rs0.io.fromDispatch(0).fire()
io.allocate(5).ready := fmisc_rs1.io.fromDispatch(0).fire() || fmac_rs0.io.fromDispatch(1).fire()
io.allocate(6).ready := fmisc_rs0.io.fromDispatch(1).fire() || fmac_rs0.io.fromDispatch(2).fire()
io.allocate(7).ready := fmisc_rs1.io.fromDispatch(1).fire() || fmac_rs0.io.fromDispatch(3).fire()
load_rs0.io.fromDispatch <> io.allocate.slice(8, 9)
load_rs0.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.slice(8, 9).map(_.data))
load_rs0.io.fastDatas <> mulFastData ++ aluFastData ++ memFastData
load_rs0.io.deq <> io.issue.slice(13, 14)
load_rs1.io.fromDispatch <> io.allocate.slice(9, 10)
load_rs1.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.slice(9, 10).map(_.data))
load_rs1.io.fastDatas <> mulFastData ++ aluFastData ++ memFastData
load_rs1.io.deq <> io.issue.slice(14, 15)
store_rs0.io.fromDispatch <> io.allocate.slice(10, 11)
store_rs0.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.slice(10, 12).map(_.data))
when (RegNext(store_rs0.io.fromDispatch(0).bits.ctrl.srcType(1) === SrcType.fp)) {
store_rs0.io.srcRegValue(0)(1) := fpRf.io.readPorts(12).data
}
store_rs0.io.fastDatas <> mulFastData ++ aluFastData
store_rs0.io.deq <> io.issue.slice(15, 16)
store_rs1.io.fromDispatch <> io.allocate.slice(11, 12)
store_rs1.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.slice(12, 14).map(_.data))
when (RegNext(store_rs1.io.fromDispatch(0).bits.ctrl.srcType(1) === SrcType.fp)) {
store_rs1.io.srcRegValue(0)(1) := fpRf.io.readPorts(13).data
}
store_rs1.io.fastDatas <> mulFastData ++ aluFastData
store_rs1.io.deq <> io.issue.slice(16, 17)
val aluFastUop = alu_rs_0.io.fastUopOut
val mulFastUop = mul_rs_0.io.fastUopOut ++ mul_rs_1.io.fastUopOut
val memFastUop = io.otherFastWakeup
val fmacFastUop = fmac_rs0.io.fastUopOut
jmp_rs.io.fastUopsIn := mulFastUop ++ aluFastUop
mul_rs_0.io.fastUopsIn := mulFastUop ++ aluFastUop
mul_rs_1.io.fastUopsIn := mulFastUop ++ aluFastUop
alu_rs_0.io.fastUopsIn := mulFastUop ++ aluFastUop ++ memFastUop
fmac_rs0.io.fastUopsIn := fmacFastUop
fmisc_rs0.io.fastUopsIn := fmacFastUop
fmisc_rs1.io.fastUopsIn := fmacFastUop
load_rs0.io.fastUopsIn := mulFastUop ++ aluFastUop ++ memFastUop
load_rs1.io.fastUopsIn := mulFastUop ++ aluFastUop ++ memFastUop
store_rs0.io.fastUopsIn := mulFastUop ++ aluFastUop
store_rs1.io.fastUopsIn := mulFastUop ++ aluFastUop
jmp_rs.io.slowPorts := io.writeback.take(8)
mul_rs_0.io.slowPorts := io.writeback.take(8)
mul_rs_1.io.slowPorts := io.writeback.take(8)
alu_rs_0.io.slowPorts := io.writeback.take(8)
fmac_rs0.io.slowPorts := io.writeback.drop(8)
fmisc_rs0.io.slowPorts := io.writeback.drop(8)
fmisc_rs1.io.slowPorts := io.writeback.drop(8)
load_rs0.io.slowPorts := io.writeback.take(8)
load_rs1.io.slowPorts := io.writeback.take(8)
store_rs0.io.slowPorts := io.writeback
store_rs1.io.slowPorts := io.writeback
// load-store specific connections
load_rs0.io.memfeedback <> io.replay(0)
load_rs1.io.memfeedback <> io.replay(1)
store_rs0.io.memfeedback <> io.replay(2)
store_rs1.io.memfeedback <> io.replay(3)
load_rs0.io.rsIdx <> io.rsIdx(0)
load_rs1.io.rsIdx <> io.rsIdx(1)
store_rs0.io.rsIdx <> io.rsIdx(2)
store_rs1.io.rsIdx <> io.rsIdx(3)
load_rs0.io.isFirstIssue <> io.isFirstIssue(0)
load_rs1.io.isFirstIssue <> io.isFirstIssue(1)
store_rs0.io.isFirstIssue <> io.isFirstIssue(2)
store_rs1.io.isFirstIssue <> io.isFirstIssue(3)
store_rs0.io.stData <> io.stData(0)
store_rs1.io.stData <> io.stData(1)
store_rs0.io.stIssuePtr <> io.stIssuePtr
store_rs1.io.stIssuePtr <> io.stIssuePtr
load_rs0.io.stIssuePtr <> io.stIssuePtr
load_rs1.io.stIssuePtr <> io.stIssuePtr
// regfile write ports
intRf.io.writePorts.zip(io.writeback.take(8)).foreach {
case (rf, wb) =>
rf.wen := wb.valid && wb.bits.uop.ctrl.rfWen
rf.addr := wb.bits.uop.pdest
rf.data := wb.bits.data
}
fpRf.io.writePorts.zip(io.writeback.drop(8)).foreach{
case (rf, wb) =>
rf.wen := wb.valid
rf.addr := wb.bits.uop.pdest
rf.data := wb.bits.data
}
intRf.io.debug_rports := DontCare
fpRf.io.debug_rports := DontCare
if (!env.FPGAPlatform) {
for ((rport, rat) <- intRf.io.debug_rports.zip(io.debug_int_rat)) {
rport.addr := rat
}
val difftest = Module(new DifftestArchIntRegState)
difftest.io.clock := clock
difftest.io.coreid := hardId.U
difftest.io.gpr := VecInit(intRf.io.debug_rports.map(_.data))
}
if (!env.FPGAPlatform) {
for ((rport, rat) <- fpRf.io.debug_rports.zip(io.debug_fp_rat)) {
rport.addr := rat
}
val difftest = Module(new DifftestArchFpRegState)
difftest.io.clock := clock
difftest.io.coreid := hardId.U
difftest.io.fpr := VecInit(fpRf.io.debug_rports.map(_.data))
}
}
......@@ -57,7 +57,6 @@ class Dispatch(implicit p: Parameters) extends XSModule {
val readIntState= Vec(NRIntReadPorts, Flipped(new BusyTableReadIO))
val readFpState = Vec(NRFpReadPorts, Flipped(new BusyTableReadIO))
// to reservation stations
val numExist = Input(Vec(exuParameters.ExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.ExuCnt, DecoupledIO(new MicroOp))
// send reg file read port index to reservation stations
val readPortIndex = new Bundle {
......@@ -117,7 +116,6 @@ class Dispatch(implicit p: Parameters) extends XSModule {
intDispatch.io.fromDq <> intDq.io.deq
intDispatch.io.readRf.zipWithIndex.map({case (r, i) => r <> io.readIntRf(i)})
intDispatch.io.readState.zipWithIndex.map({case (r, i) => r <> io.readIntState(i)})
intDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(i)})
intDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(i)})
// intDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(i)})
intDispatch.io.readPortIndex <> io.readPortIndex.intIndex
......@@ -127,7 +125,6 @@ class Dispatch(implicit p: Parameters) extends XSModule {
fpDispatch.io.fromDq <> fpDq.io.deq
fpDispatch.io.readRf.zipWithIndex.map({case (r, i) => r <> io.readFpRf(i)})
fpDispatch.io.readState.zipWithIndex.map({case (r, i) => r <> io.readFpState(i)})
fpDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(i + exuParameters.IntExuCnt)})
fpDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(i + exuParameters.IntExuCnt)})
// fpDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(i + exuParameters.IntExuCnt)})
fpDispatch.io.readPortIndex <> io.readPortIndex.fpIndex
......@@ -139,7 +136,6 @@ class Dispatch(implicit p: Parameters) extends XSModule {
lsDispatch.io.readFpRf.zipWithIndex.map({case (r, i) => r <> io.readFpRf(i + 12)})
lsDispatch.io.readIntState.zipWithIndex.map({case (r, i) => r <> io.readIntState(i + 8)})
lsDispatch.io.readFpState.zipWithIndex.map({case (r, i) => r <> io.readFpState(i + 12)})
lsDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
lsDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
// lsDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
......
......@@ -27,25 +27,35 @@ class Dispatch2Fp(implicit p: Parameters) extends XSModule {
val fromDq = Flipped(Vec(dpParams.FpDqDeqWidth, DecoupledIO(new MicroOp)))
val readRf = Vec(NRFpReadPorts - exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
val readState = Vec(NRFpReadPorts - exuParameters.StuCnt, Flipped(new BusyTableReadIO))
val numExist = Input(Vec(exuParameters.FpExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.FpExuCnt, DecoupledIO(new MicroOp))
val readPortIndex = Vec(exuParameters.FpExuCnt, Output(UInt(log2Ceil((NRFpReadPorts - exuParameters.StuCnt) / 3).W)))
})
io.enqIQCtrl <> DontCare
io.fromDq <> io.enqIQCtrl.take(4)
io.enqIQCtrl(4).valid := false.B
io.enqIQCtrl(5).valid := false.B
for (i <- 0 until 4) {
io.readRf(3*i) := io.enqIQCtrl(i).bits.psrc(0)
io.readRf(3*i+1) := io.enqIQCtrl(i).bits.psrc(1)
io.readRf(3*i+2) := io.enqIQCtrl(i).bits.psrc(2)
}
/**
* Part 1: generate indexes for reservation stations
*/
// val fmacIndexGen = Module(new IndexMapping(dpParams.FpDqDeqWidth, exuParameters.FmacCnt, true))
val fmacCanAccept = VecInit(io.fromDq.map(deq => deq.valid && FuType.fmacCanAccept(deq.bits.ctrl.fuType)))
val (fmacPriority, fmacIndex) = PriorityGen((0 until exuParameters.FmacCnt).map(i => io.numExist(i)))
// val fmacCanAccept = VecInit(io.fromDq.map(deq => deq.valid && FuType.fmacCanAccept(deq.bits.ctrl.fuType)))
// val (fmacPriority, fmacIndex) = PriorityGen((0 until exuParameters.FmacCnt).map(i => 0.U))
// fmacIndexGen.io.validBits := fmacCanAccept
// fmacIndexGen.io.priority := fmacPriority
val fmiscIndexGen = Module(new IndexMapping(dpParams.FpDqDeqWidth, exuParameters.FmiscCnt, true))
val fmiscCanAccept = VecInit(io.fromDq.map(deq => deq.valid && FuType.fmiscCanAccept(deq.bits.ctrl.fuType)))
val (fmiscPriority, _) = PriorityGen((0 until exuParameters.FmiscCnt).map(i => io.numExist(i+exuParameters.FmacCnt)))
fmiscIndexGen.io.validBits := fmiscCanAccept
fmiscIndexGen.io.priority := fmiscPriority
// val fmiscIndexGen = Module(new IndexMapping(dpParams.FpDqDeqWidth, exuParameters.FmiscCnt, true))
// val fmiscCanAccept = VecInit(io.fromDq.map(deq => deq.valid && FuType.fmiscCanAccept(deq.bits.ctrl.fuType)))
// val (fmiscPriority, _) = PriorityGen((0 until exuParameters.FmiscCnt).map(i => 0.U))
// fmiscIndexGen.io.validBits := fmiscCanAccept
// fmiscIndexGen.io.priority := fmiscPriority
// val allIndexGen = Seq(fmacIndexGen, fmiscIndexGen)
// val validVec = allIndexGen.map(_.io.mapping.map(_.valid)).reduceLeft(_ ++ _)
......@@ -78,63 +88,66 @@ class Dispatch2Fp(implicit p: Parameters) extends XSModule {
io.readState(3*i ).req := io.fromDq(i).bits.psrc(0)
io.readState(3*i+1).req := io.fromDq(i).bits.psrc(1)
io.readState(3*i+2).req := io.fromDq(i).bits.psrc(2)
io.enqIQCtrl(i).bits.srcState(0) := io.readState(3*i).resp
io.enqIQCtrl(i).bits.srcState(1) := io.readState(3*i+1).resp
io.enqIQCtrl(i).bits.srcState(2) := io.readState(3*i+2).resp
}
io.readRf(0) := io.enqIQCtrl(0).bits.psrc(0)
io.readRf(1) := io.enqIQCtrl(0).bits.psrc(1)
io.readRf(2) := io.enqIQCtrl(0).bits.psrc(2)
io.readRf(3) := io.enqIQCtrl(1).bits.psrc(0)
io.readRf(4) := io.enqIQCtrl(1).bits.psrc(1)
io.readRf(5) := io.enqIQCtrl(1).bits.psrc(2)
io.readRf(6) := Mux(io.enqIQCtrl(2).valid, io.enqIQCtrl(2).bits.psrc(0), io.enqIQCtrl(4).bits.psrc(0))
io.readRf(7) := Mux(io.enqIQCtrl(2).valid, io.enqIQCtrl(2).bits.psrc(1), io.enqIQCtrl(4).bits.psrc(1))
io.readRf(8) := Mux(io.enqIQCtrl(2).valid, io.enqIQCtrl(2).bits.psrc(2), io.enqIQCtrl(4).bits.psrc(2))
io.readRf(9) := Mux(io.enqIQCtrl(3).valid, io.enqIQCtrl(3).bits.psrc(0), io.enqIQCtrl(5).bits.psrc(0))
io.readRf(10) := Mux(io.enqIQCtrl(3).valid, io.enqIQCtrl(3).bits.psrc(1), io.enqIQCtrl(5).bits.psrc(1))
io.readRf(11) := Mux(io.enqIQCtrl(3).valid, io.enqIQCtrl(3).bits.psrc(2), io.enqIQCtrl(5).bits.psrc(2))
// io.readRf(0) := io.enqIQCtrl(0).bits.psrc(0)
// io.readRf(1) := io.enqIQCtrl(0).bits.psrc(1)
// io.readRf(2) := io.enqIQCtrl(0).bits.psrc(2)
// io.readRf(3) := io.enqIQCtrl(1).bits.psrc(0)
// io.readRf(4) := io.enqIQCtrl(1).bits.psrc(1)
// io.readRf(5) := io.enqIQCtrl(1).bits.psrc(2)
// io.readRf(6) := Mux(io.enqIQCtrl(2).valid, io.enqIQCtrl(2).bits.psrc(0), io.enqIQCtrl(4).bits.psrc(0))
// io.readRf(7) := Mux(io.enqIQCtrl(2).valid, io.enqIQCtrl(2).bits.psrc(1), io.enqIQCtrl(4).bits.psrc(1))
// io.readRf(8) := Mux(io.enqIQCtrl(2).valid, io.enqIQCtrl(2).bits.psrc(2), io.enqIQCtrl(4).bits.psrc(2))
// io.readRf(9) := Mux(io.enqIQCtrl(3).valid, io.enqIQCtrl(3).bits.psrc(0), io.enqIQCtrl(5).bits.psrc(0))
// io.readRf(10) := Mux(io.enqIQCtrl(3).valid, io.enqIQCtrl(3).bits.psrc(1), io.enqIQCtrl(5).bits.psrc(1))
// io.readRf(11) := Mux(io.enqIQCtrl(3).valid, io.enqIQCtrl(3).bits.psrc(2), io.enqIQCtrl(5).bits.psrc(2))
/**
* Part 3: dispatch to reservation stations
*/
// val fmacReady = Cat(io.enqIQCtrl.take(exuParameters.FmacCnt).map(_.ready)).andR
val fmiscReady = Cat(io.enqIQCtrl.drop(exuParameters.FmacCnt).map(_.ready)).andR
for (i <- 0 until exuParameters.FpExuCnt) {
val enq = io.enqIQCtrl(i)
val deqIndex = if (i < exuParameters.FmacCnt) fmacPriority(i) else fmiscIndexGen.io.mapping(i-exuParameters.FmacCnt).bits
if (i < exuParameters.FmacCnt) {
enq.valid := fmacCanAccept(fmacPriority(i))//fmacIndexGen.io.mapping(i).valid && fmacReady
}
else {
enq.valid := fmiscIndexGen.io.mapping(i - exuParameters.FmacCnt).valid && fmiscReady && !io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid
}
enq.bits := io.fromDq(deqIndex).bits
val src1Ready = VecInit((0 until 4).map(i => io.readState(i * 3).resp))
val src2Ready = VecInit((0 until 4).map(i => io.readState(i * 3 + 1).resp))
val src3Ready = VecInit((0 until 4).map(i => io.readState(i * 3 + 2).resp))
enq.bits.srcState(0) := src1Ready(deqIndex)
enq.bits.srcState(1) := src2Ready(deqIndex)
enq.bits.srcState(2) := src3Ready(deqIndex)
XSInfo(enq.fire(), p"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} " +
p"srcState(${enq.bits.srcState(0)} ${enq.bits.srcState(1)} ${enq.bits.srcState(2)}) " +
p"enters reservation station $i from ${deqIndex}\n")
}
// val fmiscReady = Cat(io.enqIQCtrl.drop(exuParameters.FmacCnt).map(_.ready)).andR
// for (i <- 0 until exuParameters.FpExuCnt) {
// val enq = io.enqIQCtrl(i)
// val deqIndex = if (i < exuParameters.FmacCnt) fmacPriority(i) else fmiscIndexGen.io.mapping(i-exuParameters.FmacCnt).bits
// if (i < exuParameters.FmacCnt) {
// enq.valid := fmacCanAccept(fmacPriority(i))//fmacIndexGen.io.mapping(i).valid && fmacReady
// }
// else {
// enq.valid := fmiscIndexGen.io.mapping(i - exuParameters.FmacCnt).valid && fmiscReady && !io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid
// }
// enq.bits := io.fromDq(deqIndex).bits
//
// val src1Ready = VecInit((0 until 4).map(i => io.readState(i * 3).resp))
// val src2Ready = VecInit((0 until 4).map(i => io.readState(i * 3 + 1).resp))
// val src3Ready = VecInit((0 until 4).map(i => io.readState(i * 3 + 2).resp))
// enq.bits.srcState(0) := src1Ready(deqIndex)
// enq.bits.srcState(1) := src2Ready(deqIndex)
// enq.bits.srcState(2) := src3Ready(deqIndex)
//
// XSInfo(enq.fire(), p"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} " +
// p"srcState(${enq.bits.srcState(0)} ${enq.bits.srcState(1)} ${enq.bits.srcState(2)}) " +
// p"enters reservation station $i from ${deqIndex}\n")
// }
/**
* Part 4: response to dispatch queue
*/
val fmisc2CanOut = !(fmiscCanAccept(0) && fmiscCanAccept(1))
val fmisc3CanOut = !(fmiscCanAccept(0) && fmiscCanAccept(1) || fmiscCanAccept(0) && fmiscCanAccept(2) || fmiscCanAccept(1) && fmiscCanAccept(2))
val fmacReadyVec = VecInit(io.enqIQCtrl.take(4).map(_.ready))
for (i <- 0 until dpParams.FpDqDeqWidth) {
io.fromDq(i).ready := fmacCanAccept(i) && fmacReadyVec(fmacIndex(i)) ||
fmiscCanAccept(i) && (if (i <= 1) true.B else if (i == 2) fmisc2CanOut else fmisc3CanOut) && fmiscReady && !io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid
XSInfo(io.fromDq(i).fire(),
p"pc 0x${Hexadecimal(io.fromDq(i).bits.cf.pc)} leaves Fp dispatch queue $i with nroq ${io.fromDq(i).bits.roqIdx}\n")
XSDebug(io.fromDq(i).valid && !io.fromDq(i).ready,
p"pc 0x${Hexadecimal(io.fromDq(i).bits.cf.pc)} waits at Fp dispatch queue with index $i\n")
}
// val fmisc2CanOut = !(fmiscCanAccept(0) && fmiscCanAccept(1))
// val fmisc3CanOut = !(fmiscCanAccept(0) && fmiscCanAccept(1) || fmiscCanAccept(0) && fmiscCanAccept(2) || fmiscCanAccept(1) && fmiscCanAccept(2))
// val fmacReadyVec = VecInit(io.enqIQCtrl.take(4).map(_.ready))
// for (i <- 0 until dpParams.FpDqDeqWidth) {
// io.fromDq(i).ready := fmacCanAccept(i) && fmacReadyVec(fmacIndex(i)) ||
// fmiscCanAccept(i) && (if (i <= 1) true.B else if (i == 2) fmisc2CanOut else fmisc3CanOut) && fmiscReady && !io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid
//
// XSInfo(io.fromDq(i).fire(),
// p"pc 0x${Hexadecimal(io.fromDq(i).bits.cf.pc)} leaves Fp dispatch queue $i with nroq ${io.fromDq(i).bits.roqIdx}\n")
// XSDebug(io.fromDq(i).valid && !io.fromDq(i).ready,
// p"pc 0x${Hexadecimal(io.fromDq(i).bits.cf.pc)} waits at Fp dispatch queue with index $i\n")
// }
XSError(PopCount(io.fromDq.map(_.fire())) =/= PopCount(io.enqIQCtrl.map(_.fire())), "deq =/= enq\n")
/**
......@@ -162,29 +175,29 @@ class Dispatch2Fp(implicit p: Parameters) extends XSModule {
// p"(${readPortIndexReg(i)+2.U}, ${uopReg(i).psrc(2)}, ${Hexadecimal(io.enqIQData(i).src(2))})\n")
// }
XSPerfAccumulate("in", PopCount(io.fromDq.map(_.valid)))
XSPerfAccumulate("out", PopCount(io.enqIQCtrl.map(_.fire())))
XSPerfAccumulate("out_fmac0", io.enqIQCtrl(0).fire())
XSPerfAccumulate("out_fmac1", io.enqIQCtrl(1).fire())
XSPerfAccumulate("out_fmac2", io.enqIQCtrl(2).fire())
XSPerfAccumulate("out_fmac3", io.enqIQCtrl(3).fire())
XSPerfAccumulate("out_fmisc0", io.enqIQCtrl(4).fire())
XSPerfAccumulate("out_fmisc1", io.enqIQCtrl(5).fire())
val block_num = PopCount(io.fromDq.map(deq => deq.valid && !deq.ready))
XSPerfAccumulate("blocked", block_num)
XSPerfAccumulate("blocked_index", Mux(block_num =/= 0.U, PriorityEncoder(io.fromDq.map(deq => deq.valid && !deq.ready)), 0.U))
XSPerfAccumulate("misc_deq", PopCount(fmiscCanAccept))
XSPerfAccumulate("misc_deq_exceed_limit", Mux(PopCount(fmiscCanAccept) >= 2.U, PopCount(fmiscCanAccept) - 2.U, 0.U))
XSPerfAccumulate("mac0_blocked_by_mac0", io.enqIQCtrl(0).valid && !io.enqIQCtrl(0).ready)
XSPerfAccumulate("mac1_blocked_by_mac1", io.enqIQCtrl(1).valid && !io.enqIQCtrl(1).ready)
XSPerfAccumulate("mac2_blocked_by_mac2", io.enqIQCtrl(2).valid && !io.enqIQCtrl(2).ready)
XSPerfAccumulate("mac3_blocked_by_mac3", io.enqIQCtrl(3).valid && !io.enqIQCtrl(3).ready)
XSPerfAccumulate("misc0_blocked_by_mac", fmiscIndexGen.io.mapping(0).valid && fmiscReady && (io.enqIQCtrl(2).valid || io.enqIQCtrl(3).valid))
XSPerfAccumulate("misc0_blocked_by_mac2", fmiscIndexGen.io.mapping(0).valid && fmiscReady && io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid)
XSPerfAccumulate("misc0_blocked_by_mac3", fmiscIndexGen.io.mapping(0).valid && fmiscReady && !io.enqIQCtrl(2).valid && io.enqIQCtrl(3).valid)
XSPerfAccumulate("misc0_blocked_by_misc1", fmiscIndexGen.io.mapping(0).valid && io.enqIQCtrl(4).ready && !io.enqIQCtrl(5).ready && !io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid)
XSPerfAccumulate("misc1_blocked_by_mac", fmiscIndexGen.io.mapping(1).valid && fmiscReady && (io.enqIQCtrl(2).valid || io.enqIQCtrl(3).valid))
XSPerfAccumulate("misc1_blocked_by_mac2", fmiscIndexGen.io.mapping(1).valid && fmiscReady && io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid)
XSPerfAccumulate("misc1_blocked_by_mac3", fmiscIndexGen.io.mapping(1).valid && fmiscReady && !io.enqIQCtrl(2).valid && io.enqIQCtrl(3).valid)
XSPerfAccumulate("misc1_blocked_by_misc0", fmiscIndexGen.io.mapping(1).valid && !io.enqIQCtrl(4).ready && io.enqIQCtrl(5).ready && !io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid)
// XSPerfAccumulate("in", PopCount(io.fromDq.map(_.valid)))
// XSPerfAccumulate("out", PopCount(io.enqIQCtrl.map(_.fire())))
// XSPerfAccumulate("out_fmac0", io.enqIQCtrl(0).fire())
// XSPerfAccumulate("out_fmac1", io.enqIQCtrl(1).fire())
// XSPerfAccumulate("out_fmac2", io.enqIQCtrl(2).fire())
// XSPerfAccumulate("out_fmac3", io.enqIQCtrl(3).fire())
// XSPerfAccumulate("out_fmisc0", io.enqIQCtrl(4).fire())
// XSPerfAccumulate("out_fmisc1", io.enqIQCtrl(5).fire())
// val block_num = PopCount(io.fromDq.map(deq => deq.valid && !deq.ready))
// XSPerfAccumulate("blocked", block_num)
// XSPerfAccumulate("blocked_index", Mux(block_num =/= 0.U, PriorityEncoder(io.fromDq.map(deq => deq.valid && !deq.ready)), 0.U))
// XSPerfAccumulate("misc_deq", PopCount(fmiscCanAccept))
// XSPerfAccumulate("misc_deq_exceed_limit", Mux(PopCount(fmiscCanAccept) >= 2.U, PopCount(fmiscCanAccept) - 2.U, 0.U))
// XSPerfAccumulate("mac0_blocked_by_mac0", io.enqIQCtrl(0).valid && !io.enqIQCtrl(0).ready)
// XSPerfAccumulate("mac1_blocked_by_mac1", io.enqIQCtrl(1).valid && !io.enqIQCtrl(1).ready)
// XSPerfAccumulate("mac2_blocked_by_mac2", io.enqIQCtrl(2).valid && !io.enqIQCtrl(2).ready)
// XSPerfAccumulate("mac3_blocked_by_mac3", io.enqIQCtrl(3).valid && !io.enqIQCtrl(3).ready)
// XSPerfAccumulate("misc0_blocked_by_mac", fmiscIndexGen.io.mapping(0).valid && fmiscReady && (io.enqIQCtrl(2).valid || io.enqIQCtrl(3).valid))
// XSPerfAccumulate("misc0_blocked_by_mac2", fmiscIndexGen.io.mapping(0).valid && fmiscReady && io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid)
// XSPerfAccumulate("misc0_blocked_by_mac3", fmiscIndexGen.io.mapping(0).valid && fmiscReady && !io.enqIQCtrl(2).valid && io.enqIQCtrl(3).valid)
// XSPerfAccumulate("misc0_blocked_by_misc1", fmiscIndexGen.io.mapping(0).valid && io.enqIQCtrl(4).ready && !io.enqIQCtrl(5).ready && !io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid)
// XSPerfAccumulate("misc1_blocked_by_mac", fmiscIndexGen.io.mapping(1).valid && fmiscReady && (io.enqIQCtrl(2).valid || io.enqIQCtrl(3).valid))
// XSPerfAccumulate("misc1_blocked_by_mac2", fmiscIndexGen.io.mapping(1).valid && fmiscReady && io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid)
// XSPerfAccumulate("misc1_blocked_by_mac3", fmiscIndexGen.io.mapping(1).valid && fmiscReady && !io.enqIQCtrl(2).valid && io.enqIQCtrl(3).valid)
// XSPerfAccumulate("misc1_blocked_by_misc0", fmiscIndexGen.io.mapping(1).valid && !io.enqIQCtrl(4).ready && io.enqIQCtrl(5).ready && !io.enqIQCtrl(2).valid && !io.enqIQCtrl(3).valid)
}
......@@ -27,7 +27,6 @@ class Dispatch2Int(implicit p: Parameters) extends XSModule {
val fromDq = Flipped(Vec(dpParams.IntDqDeqWidth, DecoupledIO(new MicroOp)))
val readRf = Vec(NRIntReadPorts - NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val readState = Vec(NRIntReadPorts - NRMemReadPorts, Flipped(new BusyTableReadIO))
val numExist = Input(Vec(exuParameters.IntExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.IntExuCnt, DecoupledIO(new MicroOp))
val readPortIndex = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(8 / 2).W)))
})
......
......@@ -30,7 +30,6 @@ class Dispatch2Ls(implicit p: Parameters) extends XSModule {
val readFpRf = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
val readIntState = Vec(NRMemReadPorts, Flipped(new BusyTableReadIO))
val readFpState = Vec(exuParameters.StuCnt, Flipped(new BusyTableReadIO))
val numExist = Input(Vec(exuParameters.LsExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.LsExuCnt, DecoupledIO(new MicroOp))
})
......@@ -39,13 +38,13 @@ class Dispatch2Ls(implicit p: Parameters) extends XSModule {
*/
val loadIndexGen = Module(new IndexMapping(dpParams.LsDqDeqWidth, exuParameters.LduCnt, true))
val loadCanAccept = VecInit(io.fromDq.map(deq => deq.valid && FuType.loadCanAccept(deq.bits.ctrl.fuType)))
val (loadPriority, _) = PriorityGen((0 until exuParameters.LduCnt).map(i => io.numExist(i)))
val (loadPriority, _) = PriorityGen((0 until exuParameters.LduCnt).map(i => 0.U))
loadIndexGen.io.validBits := loadCanAccept
loadIndexGen.io.priority := loadPriority
val storeIndexGen = Module(new IndexMapping(dpParams.LsDqDeqWidth, exuParameters.StuCnt, true))
val storeCanAccept = VecInit(io.fromDq.map(deq => deq.valid && FuType.storeCanAccept(deq.bits.ctrl.fuType)))
val (storePriority, _) = PriorityGen((0 until exuParameters.StuCnt).map(i => io.numExist(i+exuParameters.LduCnt)))
val (storePriority, _) = PriorityGen((0 until exuParameters.StuCnt).map(i => 0.U))
storeIndexGen.io.validBits := storeCanAccept
storeIndexGen.io.priority := storePriority
......
......@@ -50,29 +50,29 @@ class DataArrayMultiWriteIO(numEntries: Int, numSrc: Int, dataBits: Int)(implici
new DataArrayMultiWriteIO(numEntries, numSrc, dataBits).asInstanceOf[this.type]
}
class DataArrayIO(config: RSConfig)(implicit p: Parameters) extends XSBundle {
val read = Vec(config.numDeq, new DataArrayReadIO(config.numEntries, config.numSrc, config.dataBits))
val write = Vec(config.numEnq, new DataArrayWriteIO(config.numEntries, config.numSrc, config.dataBits))
val multiWrite = Vec(config.numValueBroadCast, new DataArrayMultiWriteIO(config.numEntries, config.numSrc, config.dataBits))
val delayedWrite = if (config.delayedRf) Vec(config.numEnq, Flipped(ValidIO(UInt(config.dataBits.W)))) else null
class DataArrayIO(params: RSParams)(implicit p: Parameters) extends XSBundle {
val read = Vec(params.numDeq, new DataArrayReadIO(params.numEntries, params.numSrc, params.dataBits))
val write = Vec(params.numEnq, new DataArrayWriteIO(params.numEntries, params.numSrc, params.dataBits))
val multiWrite = Vec(params.numDataCapture, new DataArrayMultiWriteIO(params.numEntries, params.numSrc, params.dataBits))
val delayedWrite = if (params.delayedRf) Vec(params.numEnq, Flipped(ValidIO(UInt(params.dataBits.W)))) else null
override def cloneType: DataArrayIO.this.type =
new DataArrayIO(config).asInstanceOf[this.type]
new DataArrayIO(params).asInstanceOf[this.type]
}
class DataArray(config: RSConfig)(implicit p: Parameters) extends XSModule {
val io = IO(new DataArrayIO(config))
class DataArray(params: RSParams)(implicit p: Parameters) extends XSModule {
val io = IO(new DataArrayIO(params))
for (i <- 0 until config.numSrc) {
val delayedWen = if (i == 1 && config.delayedRf) io.delayedWrite.map(_.valid) else Seq()
val delayedWaddr = if (i == 1 && config.delayedRf) RegNext(VecInit(io.write.map(_.addr))) else Seq()
val delayedWdata = if (i == 1 && config.delayedRf) io.delayedWrite.map(_.bits) else Seq()
for (i <- 0 until params.numSrc) {
val delayedWen = if (i == 1 && params.delayedRf) io.delayedWrite.map(_.valid) else Seq()
val delayedWaddr = if (i == 1 && params.delayedRf) RegNext(VecInit(io.write.map(_.addr))) else Seq()
val delayedWdata = if (i == 1 && params.delayedRf) io.delayedWrite.map(_.bits) else Seq()
val wen = io.write.map(w => w.enable && w.mask(i)) ++ io.multiWrite.map(_.enable) ++ delayedWen
val waddr = io.write.map(_.addr) ++ io.multiWrite.map(_.addr(i)) ++ delayedWaddr
val wdata = io.write.map(_.data(i)) ++ io.multiWrite.map(_.data) ++ delayedWdata
val dataModule = Module(new AsyncRawDataModuleTemplate(UInt(config.dataBits.W), config.numEntries, io.read.length, wen.length))
val dataModule = Module(new AsyncRawDataModuleTemplate(UInt(params.dataBits.W), params.numEntries, io.read.length, wen.length))
dataModule.io.rvec := VecInit(io.read.map(_.addr))
io.read.map(_.data(i)).zip(dataModule.io.rdata).map{ case (d, r) => d := r }
dataModule.io.wen := wen
......@@ -112,7 +112,7 @@ class AluImmExtractor(implicit p: Parameters) extends ImmExtractor(2, 64) {
}
object ImmExtractor {
def apply(config: RSConfig, exuCfg: ExuConfig, uop: MicroOp, data_in: Vec[UInt], pc: UInt, target: UInt)(implicit p: Parameters): Vec[UInt] = {
def apply(config: RSParams, exuCfg: ExuConfig, uop: MicroOp, data_in: Vec[UInt], pc: UInt, target: UInt)(implicit p: Parameters): Vec[UInt] = {
val immExt = exuCfg match {
case JumpExeUnitCfg => {
val ext = Module(new JumpImmExtractor)
......
......@@ -21,30 +21,30 @@ import chisel3.util._
import xiangshan._
import utils._
class PayloadArrayReadIO[T <: Data](gen: T, config: RSConfig) extends Bundle {
val addr = Input(UInt(config.numEntries.W))
class PayloadArrayReadIO[T <: Data](gen: T, params: RSParams) extends Bundle {
val addr = Input(UInt(params.numEntries.W))
val data = Output(gen)
override def cloneType: PayloadArrayReadIO.this.type =
new PayloadArrayReadIO(gen, config).asInstanceOf[this.type]
new PayloadArrayReadIO(gen, params).asInstanceOf[this.type]
}
class PayloadArrayWriteIO[T <: Data](gen: T, config: RSConfig) extends Bundle {
class PayloadArrayWriteIO[T <: Data](gen: T, params: RSParams) extends Bundle {
val enable = Input(Bool())
val addr = Input(UInt(config.numEntries.W))
val addr = Input(UInt(params.numEntries.W))
val data = Input(gen)
override def cloneType: PayloadArrayWriteIO.this.type =
new PayloadArrayWriteIO(gen, config).asInstanceOf[this.type]
new PayloadArrayWriteIO(gen, params).asInstanceOf[this.type]
}
class PayloadArray[T <: Data](gen: T, config: RSConfig)(implicit p: Parameters) extends XSModule {
class PayloadArray[T <: Data](gen: T, params: RSParams)(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle {
val read = Vec(config.numDeq, new PayloadArrayReadIO(gen, config))
val write = Vec(config.numEnq, new PayloadArrayWriteIO(gen, config))
val read = Vec(params.numDeq, new PayloadArrayReadIO(gen, params))
val write = Vec(params.numEnq, new PayloadArrayWriteIO(gen, params))
})
val payload = Reg(Vec(config.numEntries, gen))
val payload = Reg(Vec(params.numEntries, gen))
// read ports
io.read.map(_.data).zip(io.read.map(_.addr)).map {
......@@ -53,7 +53,7 @@ class PayloadArray[T <: Data](gen: T, config: RSConfig)(implicit p: Parameters)
}
// write ports
for (i <- 0 until config.numEntries) {
for (i <- 0 until params.numEntries) {
val wenVec = VecInit(io.write.map(w => w.enable && w.addr(i)))
val wen = wenVec.asUInt.orR
val wdata = Mux1H(wenVec, io.write.map(_.data))
......
......@@ -26,8 +26,7 @@ import xiangshan.mem.{SqPtr, StoreDataBundle}
import scala.math.max
case class RSConfig (
name: String,
case class RSParams(
numEntries: Int,
numEnq: Int,
numDeq: Int,
......@@ -36,7 +35,7 @@ case class RSConfig (
dataIdBits: Int,
numFastWakeup: Int,
numWakeup: Int,
numValueBroadCast: Int,
numDataCapture: Int,
hasFeedback: Boolean = false,
delayedRf: Boolean = false,
fixedLatency: Int = -1,
......@@ -46,74 +45,42 @@ case class RSConfig (
class ReservationStation
(
myName : String,
val exuCfg: ExuConfig,
iqSize : Int,
srcLen: Int,
fastPortsCnt: Int,
slowPortsCnt: Int,
fixedDelay: Int,
fastWakeup: Boolean,
feedback: Boolean,
enqNum: Int,
deqNum: Int
params: RSParams
)(implicit p: Parameters) extends XSModule {
val iqIdxWidth = log2Up(iqSize+1)
val nonBlocked = if (exuCfg == MulDivExeUnitCfg) false else fixedDelay >= 0
val srcNum = if (exuCfg == JumpExeUnitCfg) 2 else max(exuCfg.intSrcCnt, exuCfg.fpSrcCnt)
// require(nonBlocked==fastWakeup)
val config = RSConfig(
name = myName,
numEntries = iqSize,
numEnq = enqNum,
numDeq = deqNum,
numSrc = srcNum,
dataBits = srcLen,
dataIdBits = PhyRegIdxWidth,
numFastWakeup = fastPortsCnt,
// for now alu and fmac are not in slowPorts
numWakeup = fastPortsCnt + (4 + slowPortsCnt),
numValueBroadCast = (4 + slowPortsCnt),
hasFeedback = feedback,
delayedRf = exuCfg == StExeUnitCfg,
fixedLatency = fixedDelay,
checkWaitBit = if (exuCfg == LdExeUnitCfg || exuCfg == StExeUnitCfg) true else false,
optBuf = if (exuCfg == AluExeUnitCfg) true else false
)
val io = IO(new Bundle {
val numExist = Output(UInt(iqIdxWidth.W))
val numExist = Output(UInt(log2Up(params.numEntries + 1).W))
// enq
val fromDispatch = Vec(config.numEnq, Flipped(DecoupledIO(new MicroOp)))
val srcRegValue = Vec(config.numEnq, Input(Vec(srcNum, UInt(srcLen.W))))
val fpRegValue = if (config.delayedRf) Input(UInt(srcLen.W)) else null
val fromDispatch = Vec(params.numEnq, Flipped(DecoupledIO(new MicroOp)))
val srcRegValue = Vec(params.numEnq, Input(Vec(params.numSrc, UInt(params.dataBits.W))))
val fpRegValue = if (params.delayedRf) Input(UInt(params.dataBits.W)) else null
// deq
val deq = Vec(config.numDeq, DecoupledIO(new ExuInput))
val deq = Vec(params.numDeq, DecoupledIO(new ExuInput))
val stData = if (exuCfg == StExeUnitCfg) ValidIO(new StoreDataBundle) else null
val stIssuePtr = if (config.checkWaitBit) Input(new SqPtr()) else null
val stIssuePtr = if (params.checkWaitBit) Input(new SqPtr()) else null
val jumpPc = if(exuCfg == JumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
val jalr_target = if(exuCfg == JumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
val fastUopOut = Vec(config.numDeq, ValidIO(new MicroOp))
val fastUopsIn = Vec(config.numFastWakeup, Flipped(ValidIO(new MicroOp)))
val fastDatas = Vec(config.numFastWakeup, Input(UInt(srcLen.W)))
val slowPorts = Vec(slowPortsCnt, Flipped(ValidIO(new ExuOutput)))
val fastUopOut = Vec(params.numDeq, ValidIO(new MicroOp))
val fastUopsIn = Vec(params.numFastWakeup, Flipped(ValidIO(new MicroOp)))
val fastDatas = Vec(params.numFastWakeup, Input(UInt(params.dataBits.W)))
val slowPorts = Vec(params.numDataCapture, Flipped(ValidIO(new ExuOutput)))
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
val memfeedback = if (config.hasFeedback) Flipped(ValidIO(new RSFeedback)) else null
val rsIdx = if (config.hasFeedback) Output(UInt(log2Up(iqSize).W)) else null
val isFirstIssue = if (config.hasFeedback) Output(Bool()) else null // NOTE: just use for tlb perf cnt
val memfeedback = if (params.hasFeedback) Flipped(ValidIO(new RSFeedback)) else null
val rsIdx = if (params.hasFeedback) Output(UInt(log2Up(params.numEntries).W)) else null
val isFirstIssue = if (params.hasFeedback) Output(Bool()) else null // NOTE: just use for tlb perf cnt
})
val statusArray = Module(new StatusArray(config))
val select = Module(new SelectPolicy(config))
val dataArray = Module(new DataArray(config))
val payloadArray = Module(new PayloadArray(new MicroOp, config))
val statusArray = Module(new StatusArray(params))
val select = Module(new SelectPolicy(params))
val dataArray = Module(new DataArray(params))
val payloadArray = Module(new PayloadArray(new MicroOp, params))
io.numExist := PopCount(statusArray.io.isValid)
statusArray.io.redirect := io.redirect
......@@ -124,9 +91,9 @@ class ReservationStation
*/
// enqueue from dispatch
select.io.validVec := statusArray.io.isValid
val doEnqueue = Wire(Vec(config.numEnq, Bool()))
val needFpSource = Wire(Vec(config.numEnq, Bool()))
for (i <- 0 until config.numEnq) {
val doEnqueue = Wire(Vec(params.numEnq, Bool()))
val needFpSource = Wire(Vec(params.numEnq, Bool()))
for (i <- 0 until params.numEnq) {
io.fromDispatch(i).ready := select.io.allocate(i).valid
// agreement with dispatch: don't enqueue when io.redirect.valid
doEnqueue(i) := io.fromDispatch(i).fire() && !io.redirect.valid && !io.flush
......@@ -135,12 +102,12 @@ class ReservationStation
statusArray.io.update(i).addr := select.io.allocate(i).bits
statusArray.io.update(i).data.valid := true.B
needFpSource(i) := io.fromDispatch(i).bits.needRfRPort(1, 1, false)
statusArray.io.update(i).data.scheduled := (if (config.delayedRf) needFpSource(i) else false.B)
statusArray.io.update(i).data.blocked := (if (config.checkWaitBit) io.fromDispatch(i).bits.cf.loadWaitBit else false.B)
statusArray.io.update(i).data.credit := (if (config.delayedRf) Mux(needFpSource(i), 2.U, 0.U) else 0.U)
statusArray.io.update(i).data.srcState := VecInit(io.fromDispatch(i).bits.srcIsReady.take(config.numSrc))
statusArray.io.update(i).data.psrc := VecInit(io.fromDispatch(i).bits.psrc.take(config.numSrc))
statusArray.io.update(i).data.srcType := VecInit(io.fromDispatch(i).bits.ctrl.srcType.take(config.numSrc))
statusArray.io.update(i).data.scheduled := (if (params.delayedRf) needFpSource(i) else false.B)
statusArray.io.update(i).data.blocked := (if (params.checkWaitBit) io.fromDispatch(i).bits.cf.loadWaitBit else false.B)
statusArray.io.update(i).data.credit := (if (params.delayedRf) Mux(needFpSource(i), 2.U, 0.U) else 0.U)
statusArray.io.update(i).data.srcState := VecInit(io.fromDispatch(i).bits.srcIsReady.take(params.numSrc))
statusArray.io.update(i).data.psrc := VecInit(io.fromDispatch(i).bits.psrc.take(params.numSrc))
statusArray.io.update(i).data.srcType := VecInit(io.fromDispatch(i).bits.ctrl.srcType.take(params.numSrc))
statusArray.io.update(i).data.roqIdx := io.fromDispatch(i).bits.roqIdx
statusArray.io.update(i).data.sqIdx := io.fromDispatch(i).bits.sqIdx
payloadArray.io.write(i).enable := doEnqueue(i)
......@@ -148,31 +115,15 @@ class ReservationStation
payloadArray.io.write(i).data := io.fromDispatch(i).bits
}
// when config.checkWaitBit is set, we need to block issue until the corresponding store issues
if (config.checkWaitBit) {
if (params.checkWaitBit) {
statusArray.io.stIssuePtr := io.stIssuePtr
}
// wakeup from other RS or function units
val fastNotInSlowWakeup = exuCfg match {
case LdExeUnitCfg => io.fastUopsIn.drop(2).take(4)
case StExeUnitCfg => io.fastUopsIn.drop(2)
case JumpExeUnitCfg => io.fastUopsIn.drop(2)
case MulDivExeUnitCfg => io.fastUopsIn.drop(2)
case AluExeUnitCfg => io.fastUopsIn.drop(2).take(4)
case _ => io.fastUopsIn
}
val fastNotInSlowData = exuCfg match {
case LdExeUnitCfg => io.fastDatas.drop(2).take(4)
case StExeUnitCfg => io.fastDatas.drop(2)
case JumpExeUnitCfg => io.fastDatas.drop(2)
case MulDivExeUnitCfg => io.fastDatas.drop(2)
case AluExeUnitCfg => io.fastDatas.drop(2).take(4)
case _ => io.fastDatas
}
val wakeupValid = io.fastUopsIn.map(_.valid) ++ RegNext(VecInit(fastNotInSlowWakeup.map(_.valid))) ++ io.slowPorts.map(_.valid)
val wakeupDest = io.fastUopsIn.map(_.bits) ++ RegNext(VecInit(fastNotInSlowWakeup.map(_.bits))) ++ io.slowPorts.map(_.bits.uop)
require(wakeupValid.size == config.numWakeup)
require(wakeupDest.size == config.numWakeup)
for (i <- 0 until config.numWakeup) {
val wakeupValid = io.fastUopsIn.map(_.valid) ++ io.slowPorts.map(_.valid)
val wakeupDest = io.fastUopsIn.map(_.bits) ++ io.slowPorts.map(_.bits.uop)
require(wakeupValid.size == params.numWakeup)
require(wakeupDest.size == params.numWakeup)
for (i <- 0 until params.numWakeup) {
statusArray.io.wakeup(i).valid := wakeupValid(i)
statusArray.io.wakeup(i).bits := wakeupDest(i)
}
......@@ -182,10 +133,10 @@ class ReservationStation
*/
// select the issue instructions
select.io.request := statusArray.io.canIssue
for (i <- 0 until config.numDeq) {
for (i <- 0 until params.numDeq) {
select.io.grant(i).ready := io.deq(i).ready
if (config.hasFeedback) {
require(config.numDeq == 1)
if (params.hasFeedback) {
require(params.numDeq == 1)
statusArray.io.issueGranted(0).valid := select.io.grant(0).fire
statusArray.io.issueGranted(0).bits := select.io.grant(0).bits
statusArray.io.deqResp(0).valid := io.memfeedback.valid
......@@ -200,8 +151,8 @@ class ReservationStation
statusArray.io.deqResp(i).bits.success := io.deq(i).ready
}
payloadArray.io.read(i).addr := select.io.grant(i).bits
if (fixedDelay >= 0) {
val wakeupQueue = Module(new WakeupQueue(fixedDelay))
if (params.fixedLatency >= 0) {
val wakeupQueue = Module(new WakeupQueue(params.fixedLatency))
val fuCheck = (if (exuCfg == MulDivExeUnitCfg) payloadArray.io.read(i).data.ctrl.fuType === FuType.mul else true.B)
wakeupQueue.io.in.valid := select.io.grant(i).fire && fuCheck
wakeupQueue.io.in.bits := payloadArray.io.read(i).data
......@@ -219,9 +170,9 @@ class ReservationStation
// for read-before-issue, it's done over the enqueue uop (and store the imm in dataArray to save space)
// lastAllocateUop: Vec(config.numEnq, new MicroOp)
val lastAllocateUop = RegNext(VecInit(io.fromDispatch.map(_.bits)))
val immBypassedData = Wire(Vec(config.numEnq, Vec(config.numSrc, UInt(config.dataBits.W))))
val immBypassedData = Wire(Vec(params.numEnq, Vec(params.numSrc, UInt(params.dataBits.W))))
for (((uop, data), bypass) <- lastAllocateUop.zip(io.srcRegValue).zip(immBypassedData)) {
bypass := ImmExtractor(config, exuCfg, uop, data, io.jumpPc, io.jalr_target)
bypass := ImmExtractor(params, exuCfg, uop, data, io.jumpPc, io.jalr_target)
}
/**
......@@ -230,30 +181,30 @@ class ReservationStation
* Note: this is only needed when read-before-issue
*/
// dispatch data: the next cycle after enqueue
for (i <- 0 until config.numEnq) {
for (i <- 0 until params.numEnq) {
dataArray.io.write(i).enable := RegNext(doEnqueue(i))
dataArray.io.write(i).mask := RegNext(statusArray.io.update(i).data.srcState)
dataArray.io.write(i).addr := RegNext(select.io.allocate(i).bits)
dataArray.io.write(i).data := immBypassedData(i)
if (config.delayedRf) {
if (params.delayedRf) {
dataArray.io.delayedWrite(i).valid := RegNext(RegNext(doEnqueue(i) && needFpSource(i)))
dataArray.io.delayedWrite(i).bits := io.fpRegValue
}
}
// data broadcast: from function units (only slow wakeup date are needed)
val broadcastValid = RegNext(VecInit(fastNotInSlowWakeup.map(_.valid))) ++ io.slowPorts.map(_.valid)
val broadcastValue = fastNotInSlowData ++ VecInit(io.slowPorts.map(_.bits.data))
require(broadcastValid.size == config.numValueBroadCast)
require(broadcastValue.size == config.numValueBroadCast)
val slowWakeupMatchVec = Wire(Vec(config.numEntries, Vec(config.numSrc, Vec(config.numValueBroadCast, Bool()))))
for (i <- 0 until config.numEntries) {
for (j <- 0 until config.numSrc) {
slowWakeupMatchVec(i)(j) := statusArray.io.wakeupMatch(i)(j).asBools.drop(config.numFastWakeup)
val broadcastValid = io.slowPorts.map(_.valid)
val broadcastValue = VecInit(io.slowPorts.map(_.bits.data))
require(broadcastValid.size == params.numDataCapture)
require(broadcastValue.size == params.numDataCapture)
val slowWakeupMatchVec = Wire(Vec(params.numEntries, Vec(params.numSrc, Vec(params.numDataCapture, Bool()))))
for (i <- 0 until params.numEntries) {
for (j <- 0 until params.numSrc) {
slowWakeupMatchVec(i)(j) := statusArray.io.wakeupMatch(i)(j).asBools.drop(params.numFastWakeup)
}
}
dataArray.io.multiWrite.zipWithIndex.map { case (w, i) =>
w.enable := broadcastValid(i)
for (j <- 0 until config.numSrc) {
for (j <- 0 until params.numSrc) {
w.addr(j) := VecInit(slowWakeupMatchVec.map(_(j)(i))).asUInt
}
w.data := broadcastValue(i)
......@@ -262,8 +213,8 @@ class ReservationStation
/**
* S1: read data from regfile
*/
val s1_out = Wire(Vec(config.numDeq, Decoupled(new ExuInput)))
for (i <- 0 until config.numDeq) {
val s1_out = Wire(Vec(params.numDeq, Decoupled(new ExuInput)))
for (i <- 0 until params.numDeq) {
dataArray.io.read(i).addr := select.io.grant(i).bits
// for read-before-issue, we need to bypass the enqueue data here
// for read-after-issue, we need to bypass the imm here
......@@ -288,7 +239,7 @@ class ReservationStation
s1_out(i).valid := select.io.grant(i).valid && !deqUop.roqIdx.needFlush(io.redirect, io.flush)
s1_out(i).bits := DontCare
for (j <- 0 until config.numSrc) {
for (j <- 0 until params.numSrc) {
s1_out(i).bits.src(j) := deqData(j)
}
s1_out(i).bits.uop := deqUop
......@@ -299,24 +250,24 @@ class ReservationStation
* S1: detect bypass from fast wakeup
*/
// control: check the fast wakeup match
val fastWakeupMatchVec = Wire(Vec(config.numEntries, Vec(config.numSrc, Vec(config.numFastWakeup, Bool()))))
for (i <- 0 until config.numEntries) {
for (j <- 0 until config.numSrc) {
fastWakeupMatchVec(i)(j) := statusArray.io.wakeupMatch(i)(j).asBools.take(config.numFastWakeup)
val fastWakeupMatchVec = Wire(Vec(params.numEntries, Vec(params.numSrc, Vec(params.numFastWakeup, Bool()))))
for (i <- 0 until params.numEntries) {
for (j <- 0 until params.numSrc) {
fastWakeupMatchVec(i)(j) := statusArray.io.wakeupMatch(i)(j).asBools.take(params.numFastWakeup)
}
}
val fastWakeupMatchRegVec = RegNext(fastWakeupMatchVec)
for (i <- 0 until config.numDeq) {
for (i <- 0 until params.numDeq) {
val targetFastWakeupMatch = Mux1H(select.io.grant(i).bits, fastWakeupMatchRegVec)
val wakeupBypassMask = Wire(Vec(config.numFastWakeup, Vec(config.numSrc, Bool())))
for (j <- 0 until config.numFastWakeup) {
val wakeupBypassMask = Wire(Vec(params.numFastWakeup, Vec(params.numSrc, Bool())))
for (j <- 0 until params.numFastWakeup) {
wakeupBypassMask(j) := VecInit(targetFastWakeupMatch.map(_(j)))
}
// data: send to bypass network
// TODO: these should be done outside RS
val bypassNetwork = Module(new BypassNetwork(config.numSrc, config.numFastWakeup, config.dataBits, config.optBuf))
val bypassNetwork = Module(new BypassNetwork(params.numSrc, params.numFastWakeup, params.dataBits, params.optBuf))
bypassNetwork.io.hold := !io.deq(i).ready
bypassNetwork.io.source := s1_out(i).bits.src.take(config.numSrc)
bypassNetwork.io.source := s1_out(i).bits.src.take(params.numSrc)
bypassNetwork.io.bypass.zip(wakeupBypassMask.zip(io.fastDatas)).map { case (by, (m, d)) =>
by.valid := m
by.data := d
......@@ -329,12 +280,12 @@ class ReservationStation
// TODO: these should be done outside RS
PipelineConnect(s1_out(i), io.deq(i), io.deq(i).ready || io.deq(i).bits.uop.roqIdx.needFlush(io.redirect, io.flush), false.B)
val pipeline_fire = s1_out(i).valid && io.deq(i).ready
if (config.hasFeedback) {
if (params.hasFeedback) {
io.rsIdx := RegEnable(OHToUInt(select.io.grant(i).bits), pipeline_fire)
io.isFirstIssue := false.B
}
for (j <- 0 until config.numSrc) {
for (j <- 0 until params.numSrc) {
io.deq(i).bits.src(j) := bypassNetwork.io.target(j)
}
......
......@@ -21,21 +21,21 @@ import chisel3.util._
import xiangshan._
import utils._
class SelectPolicy(config: RSConfig)(implicit p: Parameters) extends XSModule {
class SelectPolicy(params: RSParams)(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle {
// select for enqueue
val validVec = Input(UInt(config.numEntries.W))
val allocate = Vec(config.numEnq, DecoupledIO(UInt(config.numEntries.W)))
val validVec = Input(UInt(params.numEntries.W))
val allocate = Vec(params.numEnq, DecoupledIO(UInt(params.numEntries.W)))
// select for issue
val request = Input(UInt(config.numEntries.W))
val grant = Vec(config.numDeq, DecoupledIO(UInt(config.numEntries.W))) //TODO: optimize it
val request = Input(UInt(params.numEntries.W))
val grant = Vec(params.numDeq, DecoupledIO(UInt(params.numEntries.W))) //TODO: optimize it
})
val policy = if (config.numDeq > 2 && config.numEntries > 32) "oddeven" else if (config.numDeq > 2) "circ" else "naive"
val policy = if (params.numDeq > 2 && params.numEntries > 32) "oddeven" else if (params.numDeq > 2) "circ" else "naive"
val emptyVec = VecInit(io.validVec.asBools.map(v => !v))
val allocate = SelectOne(policy, emptyVec, config.numEnq)
for (i <- 0 until config.numEnq) {
val allocate = SelectOne(policy, emptyVec, params.numEnq)
for (i <- 0 until params.numEnq) {
val sel = allocate.getNthOH(i + 1)
io.allocate(i).valid := sel._1
io.allocate(i).bits := sel._2.asUInt
......@@ -47,8 +47,8 @@ class SelectPolicy(config: RSConfig)(implicit p: Parameters) extends XSModule {
// a better one: select from both directions
val request = io.request.asBools
val select = SelectOne(policy, request, config.numDeq)
for (i <- 0 until config.numDeq) {
val select = SelectOne(policy, request, params.numDeq)
for (i <- 0 until params.numDeq) {
val sel = select.getNthOH(i + 1)
io.grant(i).valid := sel._1
io.grant(i).bits := sel._2.asUInt
......
......@@ -23,61 +23,61 @@ import utils._
import xiangshan.backend.roq.RoqPtr
import xiangshan.mem.SqPtr
class StatusArrayUpdateIO(config: RSConfig)(implicit p: Parameters) extends Bundle {
class StatusArrayUpdateIO(params: RSParams)(implicit p: Parameters) extends Bundle {
val enable = Input(Bool())
// should be one-hot
val addr = Input(UInt(config.numEntries.W))
val data = Input(new StatusEntry(config))
val addr = Input(UInt(params.numEntries.W))
val data = Input(new StatusEntry(params))
def isLegal() = {
PopCount(addr.asBools) === 0.U
}
override def cloneType: StatusArrayUpdateIO.this.type =
new StatusArrayUpdateIO(config).asInstanceOf[this.type]
new StatusArrayUpdateIO(params).asInstanceOf[this.type]
}
class StatusEntry(config: RSConfig)(implicit p: Parameters) extends XSBundle {
class StatusEntry(params: RSParams)(implicit p: Parameters) extends XSBundle {
// states
val valid = Bool()
val scheduled = Bool()
val blocked = Bool()
val credit = UInt(4.W)
val srcState = Vec(config.numSrc, Bool())
val srcState = Vec(params.numSrc, Bool())
// data
val psrc = Vec(config.numSrc, UInt(config.dataIdBits.W))
val srcType = Vec(config.numSrc, SrcType())
val psrc = Vec(params.numSrc, UInt(params.dataIdBits.W))
val srcType = Vec(params.numSrc, SrcType())
val roqIdx = new RoqPtr
val sqIdx = new SqPtr
override def cloneType: StatusEntry.this.type =
new StatusEntry(config).asInstanceOf[this.type]
new StatusEntry(params).asInstanceOf[this.type]
override def toPrintable: Printable = {
p"$valid, $scheduled, ${Binary(srcState.asUInt)}, $psrc, $roqIdx"
}
}
class StatusArray(config: RSConfig)(implicit p: Parameters) extends XSModule
class StatusArray(params: RSParams)(implicit p: Parameters) extends XSModule
with HasCircularQueuePtrHelper {
val io = IO(new Bundle {
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
// current status
val isValid = Output(UInt(config.numEntries.W))
val canIssue = Output(UInt(config.numEntries.W))
val isValid = Output(UInt(params.numEntries.W))
val canIssue = Output(UInt(params.numEntries.W))
// enqueue, dequeue, wakeup, flush
val update = Vec(config.numEnq, new StatusArrayUpdateIO(config))
val wakeup = Vec(config.numWakeup, Flipped(ValidIO(new MicroOp)))
val wakeupMatch = Vec(config.numEntries, Vec(config.numSrc, Output(UInt(config.numWakeup.W))))
val issueGranted = Vec(config.numDeq, Flipped(ValidIO(UInt(config.numEntries.W))))
val deqResp = Vec(config.numDeq, Flipped(ValidIO(new Bundle {
val rsMask = UInt(config.numEntries.W)
val update = Vec(params.numEnq, new StatusArrayUpdateIO(params))
val wakeup = Vec(params.numWakeup, Flipped(ValidIO(new MicroOp)))
val wakeupMatch = Vec(params.numEntries, Vec(params.numSrc, Output(UInt(params.numWakeup.W))))
val issueGranted = Vec(params.numDeq, Flipped(ValidIO(UInt(params.numEntries.W))))
val deqResp = Vec(params.numDeq, Flipped(ValidIO(new Bundle {
val rsMask = UInt(params.numEntries.W)
val success = Bool()
})))
val stIssuePtr = if (config.checkWaitBit) Input(new SqPtr()) else null
val stIssuePtr = if (params.checkWaitBit) Input(new SqPtr()) else null
})
val statusArray = Reg(Vec(config.numEntries, new StatusEntry(config)))
val statusArray = Reg(Vec(params.numEntries, new StatusEntry(params)))
val statusArrayNext = WireInit(statusArray)
statusArray := statusArrayNext
when (reset.asBool) {
......@@ -128,7 +128,7 @@ class StatusArray(config: RSConfig)(implicit p: Parameters) extends XSModule
val hasIssued = VecInit(io.issueGranted.map(iss => iss.valid && iss.bits(i))).asUInt.orR
val (deqResp, deqGrant) = deqRespSel(i)
XSError(deqResp && !status.valid, "should not deq an invalid entry\n")
if (config.hasFeedback) {
if (params.hasFeedback) {
XSError(deqResp && !status.scheduled, "should not deq an un-scheduled entry\n")
}
val wakeupEnVec = VecInit(status.psrc.zip(status.srcType).map{ case (p, t) => wakeupMatch(p, t) })
......@@ -138,7 +138,7 @@ class StatusArray(config: RSConfig)(implicit p: Parameters) extends XSModule
// (1) when deq is not granted, unset its scheduled bit; (2) set scheduled if issued
statusNext.scheduled := Mux(deqResp && !deqGrant || status.credit === 1.U, false.B, status.scheduled || hasIssued)
XSError(hasIssued && !status.valid, "should not issue an invalid entry\n")
if (config.checkWaitBit) {
if (params.checkWaitBit) {
statusNext.blocked := status.blocked && isAfter(status.sqIdx, io.stIssuePtr)
}
else {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册