提交 0412e00d 编写于 作者: L LinJiawei

[WIP] backend: connect ctrl block

上级 694b0180
......@@ -6,6 +6,7 @@ import top.Parameters
import xiangshan.backend._
import xiangshan.backend.dispatch.DispatchParameters
import xiangshan.backend.exu.ExuParameters
import xiangshan.backend.exu.Exu._
import xiangshan.frontend._
import xiangshan.mem._
import xiangshan.backend.fu.HasExceptionNO
......@@ -280,11 +281,43 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) with HasXSParameter
val externalInterrupt = new ExternalInterruptIO
})
// to fast wake up fp, mem rs
val intBlockFastWakeUpFp = intExuConfigs.count(cfg => cfg.hasCertainLatency && cfg.writeFpRf)
val intBlockSlowWakeUpFp = intExuConfigs.count(cfg => cfg.hasUncertainlatency && cfg.writeFpRf)
val intBlockFastWakeUpInt = intExuConfigs.count(cfg => cfg.hasCertainLatency && cfg.writeIntRf)
val intBlockSlowWakeUpInt = intExuConfigs.count(cfg => cfg.hasUncertainlatency && cfg.writeIntRf)
val fpBlockFastWakeUpFp = fpExuConfigs.count(cfg => cfg.hasCertainLatency && cfg.writeFpRf)
val fpBlockSlowWakeUpFp = fpExuConfigs.count(cfg => cfg.hasUncertainlatency && cfg.writeFpRf)
val fpBlockFastWakeUpInt = fpExuConfigs.count(cfg => cfg.hasCertainLatency && cfg.writeIntRf)
val fpBlockSlowWakeUpInt = fpExuConfigs.count(cfg => cfg.hasUncertainlatency && cfg.writeIntRf)
val frontend = Module(new Frontend)
val ctrlBlock = Module(new CtrlBlock)
val integerBlock = Module(new IntegerBlock)
val floatBlock = Module(new FloatBlock)
val memBlock = Module(new MemBlock)
val integerBlock = Module(new IntegerBlock(
fastWakeUpInCnt = fpBlockFastWakeUpInt,
slowWakeUpInCnt = fpBlockSlowWakeUpInt + exuParameters.LduCnt,
fastFpOutCnt = intBlockFastWakeUpFp,
slowFpOutCnt = intBlockSlowWakeUpFp,
fastIntOutCnt = intBlockFastWakeUpInt,
slowIntOutCnt = intBlockSlowWakeUpInt
))
val floatBlock = Module(new FloatBlock(
fastWakeUpInCnt = intBlockFastWakeUpFp,
slowWakeUpInCnt = intBlockSlowWakeUpFp + exuParameters.LduCnt,
fastFpOutCnt = fpBlockFastWakeUpFp,
slowFpOutCnt = fpBlockSlowWakeUpFp,
fastIntOutCnt = fpBlockFastWakeUpInt,
slowIntOutCnt = fpBlockSlowWakeUpInt
))
val memBlock = Module(new MemBlock(
fastWakeUpInCnt = intBlockFastWakeUpInt + intBlockFastWakeUpFp + fpBlockFastWakeUpInt + fpBlockFastWakeUpFp,
slowWakeUpInCnt = intBlockSlowWakeUpInt + intBlockSlowWakeUpFp + fpBlockSlowWakeUpInt + fpBlockSlowWakeUpFp,
fastFpOutCnt = 0,
slowFpOutCnt = exuParameters.LduCnt,
fastIntOutCnt = 0,
slowIntOutCnt = exuParameters.LduCnt
))
val dcache = outer.dcache.module
val uncache = outer.uncache.module
......@@ -292,6 +325,12 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) with HasXSParameter
val ptw = outer.ptw.module
val icache = Module(new ICache)
//TODO: remove following code
memBlock.io <> DontCare
integerBlock.io <> DontCare
floatBlock.io <> DontCare
frontend.io.backend <> ctrlBlock.io.frontend
frontend.io.icacheResp <> icache.io.resp
frontend.io.icacheToTlb <> icache.io.tlb
......@@ -310,6 +349,27 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) with HasXSParameter
ctrlBlock.io.toFpBlock <> floatBlock.io.fromCtrlBlock
ctrlBlock.io.toLsBlock <> memBlock.io.fromCtrlBlock
integerBlock.io.wakeUpIn.fast <> floatBlock.io.wakeUpIntOut.fast
integerBlock.io.wakeUpIn.slow <> floatBlock.io.wakeUpIntOut.slow ++ memBlock.io.wakeUpIntOut.slow
floatBlock.io.wakeUpIn.fast <> integerBlock.io.wakeUpFpOut.fast
floatBlock.io.wakeUpIn.slow <> integerBlock.io.wakeUpFpOut.slow ++ memBlock.io.wakeUpFpOut.slow
memBlock.io.wakeUpIn.fast <> integerBlock.io.wakeUpIntOut.fast ++
integerBlock.io.wakeUpFpOut.fast ++
floatBlock.io.wakeUpIntOut.fast ++
floatBlock.io.wakeUpFpOut.fast
memBlock.io.wakeUpIn.slow <> integerBlock.io.wakeUpIntOut.slow ++
integerBlock.io.wakeUpFpOut.slow ++
floatBlock.io.wakeUpIntOut.slow ++
floatBlock.io.wakeUpFpOut.slow
integerBlock.io.csrOnly.memExceptionVAddr := memBlock.io.csr.exceptionAddr.vaddr
integerBlock.io.csrOnly.externalInterrupt := io.externalInterrupt
integerBlock.io.csrOnly.isInterrupt := DontCare //TODO: fix it
io.externalInterrupt <> integerBlock.io.externalInterrupt
ptw.io.tlb(0) <> memBlock.io.ptw
......@@ -323,4 +383,13 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) with HasXSParameter
dcache.io.lsu.store <> memBlock.io.dcache.sbufferToDcache
uncache.io.lsroq <> memBlock.io.dcache.uncache
val debugIntReg, debugFpReg = WireInit(VecInit(Seq.fill(32)(0.U(XLEN.W))))
ExcitingUtils.addSink(debugIntReg, "DEBUG_INT_ARCH_REG", ExcitingUtils.Debug)
ExcitingUtils.addSink(debugFpReg, "DEBUG_FP_ARCH_REG", ExcitingUtils.Debug)
val debugArchReg = WireInit(VecInit(debugIntReg ++ debugFpReg))
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(debugArchReg, "difftestRegs", ExcitingUtils.Debug)
}
}
......@@ -264,29 +264,28 @@ class Backend extends XSModule
val wbIntArbiter = Module(new Wb(
wbIntExus.map(_.config.wbIntPriority) ++ ldConfigs.map(_.wbIntPriority),
NRIntWritePorts,
wen = (e: ExuOutput)=>e.uop.ctrl.rfWen
NRIntWritePorts
))
val wbFpArbiter = Module(new Wb(
wbFpExus.map(_.config.wbFpPriority) ++ ldConfigs.map(_.wbFpPriority),
NRFpWritePorts,
wen = (e: ExuOutput) => e.uop.ctrl.fpWen
NRFpWritePorts
))
wbIntArbiter.io.in <> wbIntExus.map(_.io.toInt) ++ ldIntOut
wbFpArbiter.io.in <> wbFpExus.map(_.io.toFp) ++ ldFpOut
def exuOutToRfWrite(x: Valid[ExuOutput]): RfWritePort = {
def exuOutToRfWrite(x: Valid[ExuOutput], fp: Boolean): RfWritePort = {
val rfWrite = Wire(new RfWritePort)
rfWrite.wen := x.valid
val wen = if(fp) x.bits.uop.ctrl.rfWen else x.bits.uop.ctrl.fpWen
rfWrite.wen := x.valid && wen
rfWrite.addr := x.bits.uop.pdest
rfWrite.data := x.bits.data
rfWrite
}
intRf.io.writePorts <> wbIntArbiter.io.out.map(exuOutToRfWrite)
fpRf.io.writePorts <> wbFpArbiter.io.out.map(exuOutToRfWrite)
intRf.io.writePorts <> wbIntArbiter.io.out.map(w => exuOutToRfWrite(w, fp = false))
fpRf.io.writePorts <> wbFpArbiter.io.out.map(w => exuOutToRfWrite(w, fp = true))
rename.io.wbIntResults <> wbIntArbiter.io.out
rename.io.wbFpResults <> wbFpArbiter.io.out
......
......@@ -35,7 +35,7 @@ class CtrlToLsBlockIO extends XSBundle {
// from roq: send commits info to lsq
val commits = Vec(CommitWidth, ValidIO(new RoqCommit))
// from roq: the newest roqDeqPtr
val roqDeqPtr = Input(new RoqPtr)
val roqDeqPtr = Output(new RoqPtr)
}
class CtrlBlock extends XSModule {
......@@ -58,10 +58,7 @@ class CtrlBlock extends XSModule {
// val fpBusyTable = Module(new BusyTable(NRFpReadPorts, NRFpWritePorts))
// val intBusyTable = Module(new BusyTable(NRIntReadPorts, NRIntWritePorts))
val fpWbSize = exuConfigs.count(_.writeFpRf)
val intWbSize = exuConfigs.count(_.writeIntRf)
// wb int exu + wb fp exu + ldu / stu + brq
val roqWbSize = intWbSize + fpWbSize + exuParameters.LduCnt + exuParameters.StuCnt + 1
val roqWbSize = NRIntWritePorts + NRFpWritePorts + exuParameters.StuCnt + 1
val roq = Module(new Roq(roqWbSize))
......@@ -87,6 +84,12 @@ class CtrlBlock extends XSModule {
decode.io.brTags <> brq.io.brTags
decode.io.out <> decBuf.io.in
brq.io.roqRedirect <> roq.io.redirect
brq.io.memRedirect <> io.fromLsBlock.replay
brq.io.bcommit <> roq.io.bcommit
brq.io.enqReqs <> decode.io.toBrq
brq.io.exuRedirect <> io.fromIntBlock.exuRedirect
decBuf.io.isWalking := roq.io.commits(0).valid && roq.io.commits(0).bits.isWalk
decBuf.io.redirect <> redirect
decBuf.io.out <> rename.io.in
......@@ -94,8 +97,8 @@ class CtrlBlock extends XSModule {
rename.io.redirect <> redirect
rename.io.roqCommits <> roq.io.commits
// they should be moved to busytables
rename.io.wbIntResults <> io.fromIntBlock.wbIntRegs ++ io.fromFpBlock.wbIntRegs ++ io.fromLsBlock.wbIntRegs
rename.io.wbFpResults <> io.fromIntBlock.wbFpRegs ++ io.fromFpBlock.wbFpRegs ++ io.fromLsBlock.wbFpRegs
rename.io.wbIntResults <> io.fromIntBlock.wbRegs
rename.io.wbFpResults <> io.fromFpBlock.wbRegs
rename.io.intRfReadAddr <> dispatch.io.readIntRf.map(_.addr)
rename.io.fpRfReadAddr <> dispatch.io.readFpRf.map(_.addr)
rename.io.intPregRdy <> dispatch.io.intPregRdy
......@@ -109,18 +112,40 @@ class CtrlBlock extends XSModule {
dispatch.io.toLsroq <> io.toLsBlock.lsqIdxReq
dispatch.io.lsIdxs <> io.fromLsBlock.lsqIdxResp
dispatch.io.dequeueRoqIndex.valid := roq.io.commitRoqIndex.valid || io.fromLsBlock.oldestStore.valid
dispatch.io.dequeueRoqIndex.bits := Mux(io.fromLsBlock.oldestStore.valid, io.fromLsBlock.oldestStore.bits, roq.io.commitRoqIndex.bits)
dispatch.io.dequeueRoqIndex.bits := Mux(io.fromLsBlock.oldestStore.valid,
io.fromLsBlock.oldestStore.bits,
roq.io.commitRoqIndex.bits
)
dispatch.io.readIntRf <> io.toIntBlock.readRf
dispatch.io.readFpRf <> io.toFpBlock.readRf
dispatch.io.numExist <> io.fromIntBlock.numExist ++ io.fromFpBlock.numExist ++ io.fromLsBlock.numExist
dispatch.io.enqIQCtrl <> io.toIntBlock.enqIqCtrl ++ io.toFpBlock.enqIqCtrl ++ io.toLsBlock.enqIqCtrl
dispatch.io.enqIQData <> io.toIntBlock.enqIqData ++ io.toFpBlock.enqIqData ++ io.toLsBlock.enqIqData
roq.io.memRedirect <> io.fromLsBlock.replay
roq.io.brqRedirect <> brq.io.redirect
roq.io.dp1Req <> dispatch.io.toRoq
roq.io.exeWbResults.take(roqWbSize-1).zip(
io.fromIntBlock.wbRegs ++ io.fromFpBlock.wbRegs ++ io.fromLsBlock.stOut
).foreach{
case(x, y) =>
x.bits := y.bits
x.valid := y.valid && !y.bits.redirectValid
}
roq.io.exeWbResults.last := brq.io.out
io.toIntBlock.redirect := redirect
io.toIntBlock.roqToCSR <> roq.io.csr
// val flush = redirect.valid && (redirect.bits.isException || redirect.bits.isFlushPipe)
// fpBusyTable.flush := flush
// intBusyTable.flush := flush
// busytable io
// maybe update busytable in dispatch1?
io.toFpBlock.redirect := redirect
io.toLsBlock.redirect := redirect
io.toLsBlock.roqDeqPtr := roq.io.roqDeqPtr
io.toLsBlock.commits := roq.io.commits
}
......@@ -9,65 +9,25 @@ import xiangshan.backend.issue.ReservationStationNew
class FpBlockToCtrlIO extends XSBundle {
// TODO: should not be FpExuCnt
val wbIntRegs = Vec(exuParameters.FpExuCnt, Flipped(ValidIO(new ExuOutput)))
val wbFpRegs = Vec(exuParameters.FpExuCnt, Flipped(ValidIO(new ExuOutput)))
val wbRegs = Vec(NRFpWritePorts, ValidIO(new ExuOutput))
val numExist = Vec(exuParameters.FpExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
}
class FloatBlock extends XSModule {
class FloatBlock
(
fastWakeUpInCnt: Int,
slowWakeUpInCnt: Int,
fastFpOutCnt: Int,
slowFpOutCnt: Int,
fastIntOutCnt: Int,
slowIntOutCnt: Int
) extends XSModule with NeedImpl {
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToFpBlockIO)
val toCtrlBlock = new FpBlockToCtrlIO
// TODO: ramdonly set 5
// writeback from other blocks
val writebackData = Vec(5, Input(UInt(XLEN.W)))
val extraListenPorts = Vec(5, Flipped(DecoupledIO(new ExuOutput)))
// output writeback (wakeup other blocks)
// val
})
// floating-point regfile
val regfile = Module(new Regfile(
numReadPorts = NRFpReadPorts,
numWirtePorts = NRFpWritePorts,
hasZero = false
))
val fmacExeUnits = Array.tabulate(exuParameters.FmacCnt)(_ => Module(new FmacExeUnit))
val fmiscExeUnits = Array.tabulate(exuParameters.FmiscCnt)(_ => Module(new FmiscExeUnit))
val exeUnits = fmacExeUnits ++ fmiscExeUnits
val exuConfigs = exeUnits.map(_.config)
// generate reservation stations
val exeWbReqs = exeUnits.map(_.io.out)
val writebackData = exuConfigs.zip(exeWbReqs).filter(x => x._1.hasCertainLatency && x._1.writeIntRf).map(_._2.bits.data)
val extraListenPorts = exuConfigs.zip(exeWbReqs).filter(x => x._1.hasUncertainlatency && x._1.writeIntRf).map(_._2)
val rsConfigs = Seq(5, 5, 5, 5, -1, -1)
val reservationStations = exuConfigs.zipWithIndex.map({ case (cfg, i) =>
val rs = Module(new ReservationStationNew(cfg, 5, 4, fixedDelay = rsConfigs(i), feedback = true))
rs.io.redirect <> io.fromCtrlBlock.redirect
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.enqCtrl <> io.fromCtrlBlock.enqIqCtrl(i)
rs.io.enqData <> io.fromCtrlBlock.enqIqData(i)
rs.io.writeBackedData <> writebackData ++ io.writebackData
for((x, y) <- rs.io.extraListenPorts.zip(extraListenPorts ++ io.extraListenPorts)){
x.valid := y.fire()
x.bits := y.bits
}
exeUnits(i).io.in <> rs.io.deq
exeUnits(i).io.redirect <> io.fromCtrlBlock.redirect
rs.io.tlbFeedback := DontCare
rs.suggestName(s"rs_${cfg.name}")
rs
val wakeUpIn = new WakeUpBundle(fastWakeUpInCnt, slowWakeUpInCnt)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOutCnt, slowFpOutCnt))
val wakeUpIntOut = Flipped(new WakeUpBundle(fastIntOutCnt, slowIntOutCnt))
})
// connect writeback
// val wbArbiter =
}
......@@ -3,93 +3,51 @@ package xiangshan.backend
import chisel3._
import chisel3.util._
import xiangshan._
import xiangshan.backend.regfile.Regfile
import xiangshan.backend.exu._
import xiangshan.backend.issue.ReservationStationNew
// wbIntRegs,wbFpRegs are used for updating busytables
class WakeUpBundle(numFast: Int, numSlow: Int) extends XSBundle {
val fastUops = Vec(numFast, Flipped(ValidIO(new MicroOp)))
val fast = Vec(numFast, Flipped(DecoupledIO(new ExuOutput))) //one cycle later than fastUops
val slow = Vec(numSlow, Flipped(DecoupledIO(new ExuOutput)))
override def cloneType = (new WakeUpBundle(numFast, numSlow)).asInstanceOf[this.type]
}
class IntBlockToCtrlIO extends XSBundle {
// TODO: should not be IntExuCnt
val wbIntRegs = Vec(exuParameters.IntExuCnt, Flipped(ValidIO(new ExuOutput)))
val wbFpRegs = Vec(exuParameters.IntExuCnt, Flipped(ValidIO(new ExuOutput)))
// write back regfile signals after arbiter
// used to update busytable and roq state
val wbRegs = Vec(NRIntWritePorts, ValidIO(new ExuOutput))
// write back to brq
val exuRedirect = Vec(exuParameters.AluCnt+exuParameters.JmpCnt, ValidIO(new ExuOutput))
val numExist = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
val sfence = Output(new SfenceBundle)
val tlbCsrIO = Output(new TlbCsrBundle)
}
class IntegerBlock extends XSModule {
class IntegerBlock
(
fastWakeUpInCnt: Int,
slowWakeUpInCnt: Int,
fastFpOutCnt: Int,
slowFpOutCnt: Int,
fastIntOutCnt: Int,
slowIntOutCnt: Int
) extends XSModule with NeedImpl
{
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToIntBlockIO)
val toCtrlBlock = new IntBlockToCtrlIO
// TODO: ramdonly set 5
// writeback from other blocks
val writebackData = Vec(5, Input(UInt(XLEN.W)))
val extraListenPorts = Vec(5, Flipped(DecoupledIO(new ExuOutput)))
// output writeback (wakeup other blocks)
// val
val wakeUpIn = new WakeUpBundle(fastWakeUpInCnt, slowWakeUpInCnt)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOutCnt, slowFpOutCnt))
val wakeUpIntOut = Flipped(new WakeUpBundle(fastIntOutCnt, slowIntOutCnt))
val externalInterrupt = new ExternalInterruptIO
val sfence = Output(new SfenceBundle)
val fencei = Output(Bool())
val tlbCsrIO = Output(new TlbCsrBundle)
val csrOnly = new CSRSpecialIO
})
// integer regfile
val regfile = Module(new Regfile(
numReadPorts = NRIntReadPorts,
numWirtePorts = NRIntWritePorts,
hasZero = true,
XLEN
))
val jmpExeUnit = Module(new JumpExeUnit)
val mduExeUnits = Array.tabulate(exuParameters.MduCnt)(_ => Module(new MulDivExeUnit))
val aluExeUnits = Array.tabulate(exuParameters.AluCnt)(_ => Module(new AluExeUnit))
val exeUnits = jmpExeUnit +: (mduExeUnits ++ aluExeUnits)
val exuConfigs = exeUnits.map(_.config)
// generate reservation stations
val exeWbReqs = exeUnits.map(_.io.out)
val writebackData = exuConfigs.zip(exeWbReqs).filter(x => x._1.hasCertainLatency && x._1.writeIntRf).map(_._2.bits.data)
val extraListenPorts = exuConfigs.zip(exeWbReqs).filter(x => x._1.hasUncertainlatency && x._1.writeIntRf).map(_._2)
val rsConfigs = Seq(0, -1, -1, 0, 0, 0, 0)
val reservationStations = exuConfigs.zipWithIndex.map({ case (cfg, i) =>
val rs = Module(new ReservationStationNew(cfg, 5, 6, fixedDelay = rsConfigs(i), feedback = false))
rs.io.redirect <> io.fromCtrlBlock.redirect
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.enqCtrl <> io.fromCtrlBlock.enqIqCtrl(i)
rs.io.enqData <> io.fromCtrlBlock.enqIqData(i)
rs.io.writeBackedData <> writebackData ++ io.writebackData
for((x, y) <- rs.io.extraListenPorts.zip(extraListenPorts ++ io.extraListenPorts)){
x.valid := y.fire()
x.bits := y.bits
}
exeUnits(i).io.in <> rs.io.deq
exeUnits(i).io.redirect <> io.fromCtrlBlock.redirect
rs.io.tlbFeedback := DontCare
rs.suggestName(s"rs_${cfg.name}")
rs
})
// IOs for special execution units
// CSR is in jmpExeUnit
io.fromCtrlBlock.roqToCSR.intrBitSet := jmpExeUnit.io.csrOnly.interrupt
io.fromCtrlBlock.roqToCSR.trapTarget := jmpExeUnit.io.csrOnly.trapTarget
jmpExeUnit.fflags := io.fromCtrlBlock.roqToCSR.fflags
jmpExeUnit.dirty_fs := io.fromCtrlBlock.roqToCSR.dirty_fs
jmpExeUnit.io.csrOnly.exception.valid := roq.io.redirect.valid && roq.io.redirect.bits.isException
jmpExeUnit.io.csrOnly.exception.bits := roq.io.exception
jmpExeUnit.io.csrOnly.externalInterrupt := io.externalInterrupt
jmpExeUnit.io.csrOnly.memExceptionVAddr := io.mem.exceptionAddr.vaddr
jmpExeUnit.fenceToSbuffer <> io.mem.fenceToSbuffer
// TODO: connect writeback
// val wbArbiter =
}
......@@ -12,9 +12,7 @@ import xiangshan.backend.issue.ReservationStationNew
import xiangshan.backend.fu.FunctionUnit.{lduCfg, mouCfg, stuCfg}
class LsBlockToCtrlIO extends XSBundle {
// TODO: should not be LsExuCnt
val wbIntRegs = Vec(exuParameters.LsExuCnt, Flipped(ValidIO(new ExuOutput)))
val wbFpRegs = Vec(exuParameters.LsExuCnt, Flipped(ValidIO(new ExuOutput)))
val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput)) // write to roq
val numExist = Vec(exuParameters.LsExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
val lsqIdxResp = Vec(RenameWidth, Output(new LSIdx))
val oldestStore = Output(Valid(new RoqPtr))
......@@ -36,169 +34,179 @@ class MemBlockCSRIO extends XSBundle {
val tlbInfo = Input(new TlbCsrBundle)
}
class MemBlock extends XSModule {
class MemBlock
(
fastWakeUpInCnt: Int,
slowWakeUpInCnt: Int,
fastFpOutCnt: Int,
slowFpOutCnt: Int,
fastIntOutCnt: Int,
slowIntOutCnt: Int
) extends XSModule with NeedImpl {
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToLsBlockIO)
val toCtrlBlock = new LsBlockToCtrlIO
val wakeUpIn = new WakeUpBundle(fastWakeUpInCnt, slowWakeUpInCnt)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOutCnt, slowFpOutCnt))
val wakeUpIntOut = Flipped(new WakeUpBundle(fastIntOutCnt, slowIntOutCnt))
val ptw = new TlbPtwIO
// TODO: dcache should be inside MemBlock
val dcache = new MemBlockToDcacheIO
val csr = new MemBlockCSRIO
// writeback from other blocks
val writebackData = Vec(5, Input(UInt(XLEN.W)))
val extraListenPorts = Vec(5, Flipped(DecoupledIO(new ExuOutput)))
// output writeback
})
val loadUnits = Array.tabulate(exuParameters.LduCnt)(_ => Module(new LoadUnit))
val storeUnits = Array.tabulate(exuParameters.StuCnt)(_ => Module(new StoreUnit))
val exeUnits = loadUnits ++ storeUnits
val ldExeUnitCfg = ExuConfig("LoadExu", Seq(lduCfg), wbIntPriority = 0, wbFpPriority = 0)
val stExeUnitCfg = ExuConfig("StoreExu", Seq(stuCfg, mouCfg), wbIntPriority = Int.MaxValue, wbFpPriority = Int.MaxValue)
val exuConfigs = Seq.fill(exuParameters.LduCnt)(ldExeUnitCfg) ++ Seq.fill(exuParameters.StuCnt)(stExeUnitCfg)
val atomicsUnit = Module(new AtomicsUnit)
val loadWritebackOverride = Mux(atomicsUnit.io.out.valid, atomicsUnit.io.out, loadUnits.head.io.ldout)
val exeWbReqs = loadWritebackOverride +: loadUnits.tail.map(_.io.ldout)
val writebackData = exuConfigs.zip(exeWbReqs).filter(x => x._1.hasCertainLatency && x._1.writeIntRf).map(_._2.bits.data)
val extraListenPorts = exuConfigs.zip(exeWbReqs).filter(x => x._1.hasUncertainlatency && x._1.writeIntRf).map(_._2)
val rsConfigs = Seq(5, 5, 9, 9)
val reservationStations = exuConfigs.zipWithIndex.map({ case (cfg, i) =>
val rs = Module(new ReservationStationNew(cfg, rsConfigs(i), 6, fixedDelay = -1, feedback = true))
rs.io.redirect <> io.fromCtrlBlock.redirect
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.enqCtrl <> io.fromCtrlBlock.enqIqCtrl(i)
rs.io.enqData <> io.fromCtrlBlock.enqIqData(i)
rs.io.writeBackedData <> writebackData ++ io.writebackData
for((x, y) <- rs.io.extraListenPorts.zip(extraListenPorts ++ io.extraListenPorts)){
x.valid := y.fire()
x.bits := y.bits
}
rs.suggestName(s"rs_${cfg.name}")
rs
})
val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true))
val lsroq = Module(new LsqWrappper)
val sbuffer = Module(new NewSbuffer)
// if you wants to stress test dcache store, use FakeSbuffer
// val sbuffer = Module(new FakeSbuffer)
// dtlb
io.ptw <> dtlb.io.ptw
dtlb.io.sfence <> io.csr.sfence
dtlb.io.csr <> io.csr.tlbInfo
// LoadUnit
for (i <- 0 until exuParameters.LduCnt) {
loadUnits(i).io.redirect <> io.fromCtrlBlock.redirect
loadUnits(i).io.tlbFeedback <> reservationStations(i).io.tlbFeedback
loadUnits(i).io.dtlb <> dtlb.io.requestor(i)
// get input form dispatch
loadUnits(i).io.ldin <> reservationStations(i).io.deq
// dcache access
loadUnits(i).io.dcache <> io.dcache.loadUnitToDcacheVec(i)
// forward
loadUnits(i).io.lsroq.forward <> lsroq.io.forward(i)
loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
// passdown to lsroq
lsroq.io.loadIn(i) <> loadUnits(i).io.lsroq.loadIn
lsroq.io.ldout(i) <> loadUnits(i).io.lsroq.ldout
}
// StoreUnit
for (i <- 0 until exuParameters.StuCnt) {
storeUnits(i).io.redirect <> io.fromCtrlBlock.redirect
storeUnits(i).io.tlbFeedback <> reservationStations(exuParameters.LduCnt + i).io.tlbFeedback
storeUnits(i).io.dtlb <> dtlb.io.requestor(exuParameters.LduCnt + i)
// get input form dispatch
storeUnits(i).io.stin <> reservationStations(exuParameters.LduCnt + i).io.deq
// passdown to lsroq
storeUnits(i).io.lsroq <> lsroq.io.storeIn(i)
}
// Lsroq
lsroq.io.commits <> io.fromCtrlBlock.commits
lsroq.io.dp1Req <> io.fromCtrlBlock.lsqIdxReq
lsroq.io.oldestStore <> io.toCtrlBlock.oldestStore
lsroq.io.lsIdxs <> io.toCtrlBlock.lsqIdxResp
lsroq.io.brqRedirect := io.fromCtrlBlock.redirect
lsroq.io.roqDeqPtr := io.fromCtrlBlock.roqDeqPtr
io.toCtrlBlock.replay <> lsroq.io.rollback
lsroq.io.dcache <> io.dcache.loadMiss
lsroq.io.uncache <> io.dcache.uncache
// LSROQ to store buffer
lsroq.io.sbuffer <> sbuffer.io.in
// Sbuffer
sbuffer.io.dcache <> io.dcache.sbufferToDcache
// flush sbuffer
val fenceFlush = io.csr.fenceToSbuffer.flushSb
val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid
io.csr.fenceToSbuffer.sbIsEmpty := sbuffer.io.flush.empty
// if both of them tries to flush sbuffer at the same time
// something must have gone wrong
assert(!(fenceFlush && atomicsFlush))
sbuffer.io.flush.valid := fenceFlush || atomicsFlush
// TODO: make 0/1 configurable
// AtomicsUnit
// AtomicsUnit will override other control signials,
// as atomics insts (LR/SC/AMO) will block the pipeline
val st0_atomics = reservationStations(2).io.deq.valid && reservationStations(2).io.deq.bits.uop.ctrl.fuType === FuType.mou
val st1_atomics = reservationStations(3).io.deq.valid && reservationStations(3).io.deq.bits.uop.ctrl.fuType === FuType.mou
// amo should always go through store issue queue 0
assert(!st1_atomics)
atomicsUnit.io.dtlb.resp.valid := false.B
atomicsUnit.io.dtlb.resp.bits := DontCare
atomicsUnit.io.out.ready := false.B
// dispatch 0 takes priority
atomicsUnit.io.in.valid := st0_atomics
atomicsUnit.io.in.bits := reservationStations(2).io.deq.bits
when (st0_atomics) {
reservationStations(0).io.deq.ready := atomicsUnit.io.in.ready
storeUnits(0).io.stin.valid := false.B
}
when(atomicsUnit.io.dtlb.req.valid) {
dtlb.io.requestor(0) <> atomicsUnit.io.dtlb // TODO: check it later
// take load unit 0's tlb port
// make sure not to disturb loadUnit
assert(!loadUnits(0).io.dtlb.req.valid)
loadUnits(0).io.dtlb.resp.valid := false.B
}
when(atomicsUnit.io.tlbFeedback.valid) {
assert(!storeUnits(0).io.tlbFeedback.valid)
atomicsUnit.io.tlbFeedback <> reservationStations(exuParameters.LduCnt + 0).io.tlbFeedback
}
atomicsUnit.io.dcache <> io.dcache.atomics
atomicsUnit.io.flush_sbuffer.empty := sbuffer.io.flush.empty
atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
when(atomicsUnit.io.out.valid){
// take load unit 0's write back port
assert(!loadUnits(0).io.ldout.valid)
loadUnits(0).io.ldout.ready := false.B
}
lsroq.io.exceptionAddr.lsIdx := io.csr.exceptionAddr.lsIdx
lsroq.io.exceptionAddr.isStore := io.csr.exceptionAddr.isStore
io.csr.exceptionAddr.vaddr := Mux(atomicsUnit.io.exceptionAddr.valid, atomicsUnit.io.exceptionAddr.bits, lsroq.io.exceptionAddr.vaddr)
// val loadUnits = Array.tabulate(exuParameters.LduCnt)(_ => Module(new LoadUnit))
// val storeUnits = Array.tabulate(exuParameters.StuCnt)(_ => Module(new StoreUnit))
// val exeUnits = loadUnits ++ storeUnits
// val ldExeUnitCfg = ExuConfig("LoadExu", Seq(lduCfg), wbIntPriority = 0, wbFpPriority = 0)
// val stExeUnitCfg = ExuConfig("StoreExu", Seq(stuCfg, mouCfg), wbIntPriority = Int.MaxValue, wbFpPriority = Int.MaxValue)
// val exuConfigs = Seq.fill(exuParameters.LduCnt)(ldExeUnitCfg) ++ Seq.fill(exuParameters.StuCnt)(stExeUnitCfg)
//
// val atomicsUnit = Module(new AtomicsUnit)
//
// val loadWritebackOverride = Mux(atomicsUnit.io.out.valid, atomicsUnit.io.out, loadUnits.head.io.ldout)
// val exeWbReqs = loadWritebackOverride +: loadUnits.tail.map(_.io.ldout)
// val writebackData = exuConfigs.zip(exeWbReqs).filter(x => x._1.hasCertainLatency && x._1.writeIntRf).map(_._2.bits.data)
// val extraListenPorts = exuConfigs.zip(exeWbReqs).filter(x => x._1.hasUncertainlatency && x._1.writeIntRf).map(_._2)
//
// val rsConfigs = Seq(5, 5, 9, 9)
// val reservationStations = exuConfigs.zipWithIndex.map({ case (cfg, i) =>
// val rs = Module(new ReservationStationNew(cfg, rsConfigs(i), 6, fixedDelay = -1, feedback = true))
//
// rs.io.redirect <> io.fromCtrlBlock.redirect
// rs.io.numExist <> io.toCtrlBlock.numExist(i)
// rs.io.enqCtrl <> io.fromCtrlBlock.enqIqCtrl(i)
// rs.io.enqData <> io.fromCtrlBlock.enqIqData(i)
//
// rs.io.writeBackedData <> writebackData ++ io.writebackData
// for((x, y) <- rs.io.extraListenPorts.zip(extraListenPorts ++ io.extraListenPorts)){
// x.valid := y.fire()
// x.bits := y.bits
// }
//
// rs.suggestName(s"rs_${cfg.name}")
// rs
// })
//
//
// val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true))
// val lsroq = Module(new LsqWrappper)
// val sbuffer = Module(new NewSbuffer)
// // if you wants to stress test dcache store, use FakeSbuffer
// // val sbuffer = Module(new FakeSbuffer)
//
// // dtlb
// io.ptw <> dtlb.io.ptw
// dtlb.io.sfence <> io.csr.sfence
// dtlb.io.csr <> io.csr.tlbInfo
//
// // LoadUnit
// for (i <- 0 until exuParameters.LduCnt) {
// loadUnits(i).io.redirect <> io.fromCtrlBlock.redirect
// loadUnits(i).io.tlbFeedback <> reservationStations(i).io.tlbFeedback
// loadUnits(i).io.dtlb <> dtlb.io.requestor(i)
// // get input form dispatch
// loadUnits(i).io.ldin <> reservationStations(i).io.deq
// // dcache access
// loadUnits(i).io.dcache <> io.dcache.loadUnitToDcacheVec(i)
// // forward
// loadUnits(i).io.lsroq.forward <> lsroq.io.forward(i)
// loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
//
// // passdown to lsroq
// lsroq.io.loadIn(i) <> loadUnits(i).io.lsroq.loadIn
// lsroq.io.ldout(i) <> loadUnits(i).io.lsroq.ldout
// }
//
// // StoreUnit
// for (i <- 0 until exuParameters.StuCnt) {
// storeUnits(i).io.redirect <> io.fromCtrlBlock.redirect
// storeUnits(i).io.tlbFeedback <> reservationStations(exuParameters.LduCnt + i).io.tlbFeedback
// storeUnits(i).io.dtlb <> dtlb.io.requestor(exuParameters.LduCnt + i)
// // get input form dispatch
// storeUnits(i).io.stin <> reservationStations(exuParameters.LduCnt + i).io.deq
// // passdown to lsroq
// storeUnits(i).io.lsroq <> lsroq.io.storeIn(i)
// }
//
// // Lsroq
// lsroq.io.commits <> io.fromCtrlBlock.commits
// lsroq.io.dp1Req <> io.fromCtrlBlock.lsqIdxReq
// lsroq.io.oldestStore <> io.toCtrlBlock.oldestStore
// lsroq.io.lsIdxs <> io.toCtrlBlock.lsqIdxResp
// lsroq.io.brqRedirect := io.fromCtrlBlock.redirect
// lsroq.io.roqDeqPtr := io.fromCtrlBlock.roqDeqPtr
//
// io.toCtrlBlock.replay <> lsroq.io.rollback
//
// lsroq.io.dcache <> io.dcache.loadMiss
// lsroq.io.uncache <> io.dcache.uncache
//
// // LSROQ to store buffer
// lsroq.io.sbuffer <> sbuffer.io.in
//
// // Sbuffer
// sbuffer.io.dcache <> io.dcache.sbufferToDcache
//
// // flush sbuffer
// val fenceFlush = io.csr.fenceToSbuffer.flushSb
// val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid
// io.csr.fenceToSbuffer.sbIsEmpty := sbuffer.io.flush.empty
// // if both of them tries to flush sbuffer at the same time
// // something must have gone wrong
// assert(!(fenceFlush && atomicsFlush))
// sbuffer.io.flush.valid := fenceFlush || atomicsFlush
//
// // TODO: make 0/1 configurable
// // AtomicsUnit
// // AtomicsUnit will override other control signials,
// // as atomics insts (LR/SC/AMO) will block the pipeline
// val st0_atomics = reservationStations(2).io.deq.valid && reservationStations(2).io.deq.bits.uop.ctrl.fuType === FuType.mou
// val st1_atomics = reservationStations(3).io.deq.valid && reservationStations(3).io.deq.bits.uop.ctrl.fuType === FuType.mou
// // amo should always go through store issue queue 0
// assert(!st1_atomics)
//
// atomicsUnit.io.dtlb.resp.valid := false.B
// atomicsUnit.io.dtlb.resp.bits := DontCare
// atomicsUnit.io.out.ready := false.B
//
// // dispatch 0 takes priority
// atomicsUnit.io.in.valid := st0_atomics
// atomicsUnit.io.in.bits := reservationStations(2).io.deq.bits
// when (st0_atomics) {
// reservationStations(0).io.deq.ready := atomicsUnit.io.in.ready
// storeUnits(0).io.stin.valid := false.B
// }
//
// when(atomicsUnit.io.dtlb.req.valid) {
// dtlb.io.requestor(0) <> atomicsUnit.io.dtlb // TODO: check it later
// // take load unit 0's tlb port
// // make sure not to disturb loadUnit
// assert(!loadUnits(0).io.dtlb.req.valid)
// loadUnits(0).io.dtlb.resp.valid := false.B
// }
//
// when(atomicsUnit.io.tlbFeedback.valid) {
// assert(!storeUnits(0).io.tlbFeedback.valid)
// atomicsUnit.io.tlbFeedback <> reservationStations(exuParameters.LduCnt + 0).io.tlbFeedback
// }
//
// atomicsUnit.io.dcache <> io.dcache.atomics
// atomicsUnit.io.flush_sbuffer.empty := sbuffer.io.flush.empty
//
// atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
//
// when(atomicsUnit.io.out.valid){
// // take load unit 0's write back port
// assert(!loadUnits(0).io.ldout.valid)
// loadUnits(0).io.ldout.ready := false.B
// }
//
// lsroq.io.exceptionAddr.lsIdx := io.csr.exceptionAddr.lsIdx
// lsroq.io.exceptionAddr.isStore := io.csr.exceptionAddr.isStore
// io.csr.exceptionAddr.vaddr := Mux(atomicsUnit.io.exceptionAddr.valid, atomicsUnit.io.exceptionAddr.bits, lsroq.io.exceptionAddr.vaddr)
}
......@@ -209,11 +209,16 @@ object Exu {
val ldExeUnitCfg = ExuConfig("LoadExu", Seq(lduCfg), wbIntPriority = 0, wbFpPriority = 0)
val stExeUnitCfg = ExuConfig("StoreExu", Seq(stuCfg, mouCfg), wbIntPriority = Int.MaxValue, wbFpPriority = Int.MaxValue)
val exuConfigs: Seq[ExuConfig] = jumpExeUnitCfg +: (
Seq.fill(exuParameters.AluCnt)(aluExeUnitCfg) ++
Seq.fill(exuParameters.MduCnt)(mulDivExeUnitCfg) ++
Seq.fill(exuParameters.FmacCnt)(fmacExeUnitCfg) ++
val intExuConfigs = jumpExeUnitCfg +: (
Seq.fill(exuParameters.AluCnt)(aluExeUnitCfg) ++
Seq.fill(exuParameters.MduCnt)(mulDivExeUnitCfg)
)
val fpExuConfigs =
Seq.fill(exuParameters.FmacCnt)(fmacExeUnitCfg) ++
Seq.fill(exuParameters.FmiscCnt)(fmiscExeUnitCfg)
)
val exuConfigs: Seq[ExuConfig] = intExuConfigs ++ fpExuConfigs
}
\ No newline at end of file
......@@ -4,12 +4,13 @@ import chisel3._
import chisel3.util._
import xiangshan.backend.exu.Exu.fmacExeUnitCfg
import xiangshan.backend.fu.fpu._
import xiangshan.backend.fu.fpu.fma.FMA
class FmacExeUnit extends Exu(fmacExeUnitCfg)
{
val frm = IO(Input(UInt(3.W)))
val fma = supportedFunctionUnits.head
val fma = supportedFunctionUnits.head.asInstanceOf[FMA]
val input = io.fromFp.bits
val fmaOut = fma.io.out.bits
......
......@@ -6,23 +6,23 @@ import xiangshan._
import utils._
class Wb(priorities: Seq[Int], numOut: Int, wen: ExuOutput => Bool) extends XSModule {
class Wb(priorities: Seq[Int], numOut: Int) extends XSModule {
val io = IO(new Bundle() {
val in = Vec(priorities.size, Flipped(DecoupledIO(new ExuOutput)))
val out = Vec(numOut, ValidIO(new ExuOutput))
})
def exuOutToRfReq(exuOut: DecoupledIO[ExuOutput]): DecoupledIO[ExuOutput] = {
val req = WireInit(exuOut)
req.valid := exuOut.valid && wen(exuOut.bits)
exuOut.ready := Mux(req.valid, req.ready, true.B)
req
}
// def exuOutToRfReq(exuOut: DecoupledIO[ExuOutput]): DecoupledIO[ExuOutput] = {
// val req = WireInit(exuOut)
// req.valid := exuOut.valid && wen(exuOut.bits)
// exuOut.ready := Mux(req.valid, req.ready, true.B)
// req
// }
val directConnect = io.in.zip(priorities).filter(x => x._2 == 0).map(_._1).map(exuOutToRfReq)
val mulReq = io.in.zip(priorities).filter(x => x._2 == 1).map(_._1).map(exuOutToRfReq)
val otherReq = io.in.zip(priorities).filter(x => x._2 > 1).map(_._1).map(exuOutToRfReq)
val directConnect = io.in.zip(priorities).filter(x => x._2 == 0).map(_._1)
val mulReq = io.in.zip(priorities).filter(x => x._2 == 1).map(_._1)
val otherReq = io.in.zip(priorities).filter(x => x._2 > 1).map(_._1)
val portUsed = directConnect.size + mulReq.size
require(portUsed <= numOut)
......
package xiangshan.backend.fu.fpu
import chisel3._
import chisel3.util._
import xiangshan.FuType
import xiangshan.backend.fu.{CertainLatency, FuConfig}
import xiangshan.backend.fu.fpu.util.ORTree
class IntToFloat extends FPUPipelineModule(
FuConfig(FuType.i2f, 1, 0, writeIntRf = false, writeFpRf = true, hasRedirect = false, CertainLatency(2))
) {
/** Stage 1: Count leading zeros and shift
*/
val a = io.in.bits.src(0)
val aNeg = (~a).asUInt()
val aComp = aNeg + 1.U
val aSign = Mux(op(0), false.B, Mux(op(1), a(63), a(31)))
val leadingZerosComp = PriorityEncoder(Mux(op(1), aComp, aComp(31, 0)).asBools().reverse)
val leadingZerosNeg = PriorityEncoder(Mux(op(1), aNeg, aNeg(31, 0)).asBools().reverse)
val leadingZerosPos = PriorityEncoder(Mux(op(1), a, a(31,0)).asBools().reverse)
val aVal = Mux(aSign, Mux(op(1), aComp, aComp(31, 0)), Mux(op(1), a, a(31, 0)))
val leadingZeros = Mux(aSign, leadingZerosNeg, leadingZerosPos)
// exp = xlen - 1 - leadingZeros + bias
val expUnrounded = S1Reg(
Mux(isDouble,
(64 - 1 + Float64.expBiasInt).U - leadingZeros,
(64 - 1 + Float32.expBiasInt).U - leadingZeros
)
)
val leadingZeroHasError = S1Reg(aSign && (leadingZerosComp=/=leadingZerosNeg))
val rmReg = S1Reg(rm)
val opReg = S1Reg(op)
val isDoubleReg = S1Reg(isDouble)
val aIsZeroReg = S1Reg(a===0.U)
val aSignReg = S1Reg(aSign)
val aShifted = S1Reg((aVal << leadingZeros)(63, 0))
/** Stage 2: Rounding
*/
val aShiftedFix = Mux(leadingZeroHasError, aShifted(63, 1), aShifted(62, 0))
val mantD = aShiftedFix(62, 62-51)
val mantS = aShiftedFix(62, 62-22)
val g = Mux(isDoubleReg, aShiftedFix(62-52), aShiftedFix(62-23))
val r = Mux(isDoubleReg, aShiftedFix(62-53), aShiftedFix(62-24))
val s = Mux(isDoubleReg, ORTree(aShiftedFix(62-54, 0)), ORTree(aShiftedFix(62-25, 0)))
val roudingUnit = Module(new RoundingUnit(Float64.mantWidth))
roudingUnit.io.in.rm := rmReg
roudingUnit.io.in.mant := Mux(isDoubleReg, mantD, mantS)
roudingUnit.io.in.sign := aSignReg
roudingUnit.io.in.guard := g
roudingUnit.io.in.round := r
roudingUnit.io.in.sticky := s
val mantRounded = roudingUnit.io.out.mantRounded
val expRounded = Mux(isDoubleReg,
expUnrounded + roudingUnit.io.out.mantCout,
expUnrounded + mantRounded(Float32.mantWidth)
) + leadingZeroHasError
val resS = Cat(
aSignReg,
expRounded(Float32.expWidth-1, 0),
mantRounded(Float32.mantWidth-1, 0)
)
val resD = Cat(aSignReg, expRounded, mantRounded)
io.out.bits.data := S2Reg(Mux(aIsZeroReg, 0.U, Mux(isDoubleReg, resD, resS)))
fflags.inexact := S2Reg(roudingUnit.io.out.inexact)
fflags.underflow := false.B
fflags.overflow := false.B
fflags.infinite := false.B
fflags.invalid := false.B
}
//package xiangshan.backend.fu.fpu
//
//import chisel3._
//import chisel3.util._
//import xiangshan.FuType
//import xiangshan.backend.fu.{CertainLatency, FuConfig}
//import xiangshan.backend.fu.fpu.util.ORTree
//
//class IntToFloat extends FPUPipelineModule(
// FuConfig(FuType.i2f, 1, 0, writeIntRf = false, writeFpRf = true, hasRedirect = false, CertainLatency(2))
//) {
// /** Stage 1: Count leading zeros and shift
// */
//
// val a = io.in.bits.src(0)
// val aNeg = (~a).asUInt()
// val aComp = aNeg + 1.U
// val aSign = Mux(op(0), false.B, Mux(op(1), a(63), a(31)))
//
// val leadingZerosComp = PriorityEncoder(Mux(op(1), aComp, aComp(31, 0)).asBools().reverse)
// val leadingZerosNeg = PriorityEncoder(Mux(op(1), aNeg, aNeg(31, 0)).asBools().reverse)
// val leadingZerosPos = PriorityEncoder(Mux(op(1), a, a(31,0)).asBools().reverse)
//
// val aVal = Mux(aSign, Mux(op(1), aComp, aComp(31, 0)), Mux(op(1), a, a(31, 0)))
// val leadingZeros = Mux(aSign, leadingZerosNeg, leadingZerosPos)
//
// // exp = xlen - 1 - leadingZeros + bias
// val expUnrounded = S1Reg(
// Mux(isDouble,
// (64 - 1 + Float64.expBiasInt).U - leadingZeros,
// (64 - 1 + Float32.expBiasInt).U - leadingZeros
// )
// )
// val leadingZeroHasError = S1Reg(aSign && (leadingZerosComp=/=leadingZerosNeg))
// val rmReg = S1Reg(rm)
// val opReg = S1Reg(op)
// val isDoubleReg = S1Reg(isDouble)
// val aIsZeroReg = S1Reg(a===0.U)
// val aSignReg = S1Reg(aSign)
// val aShifted = S1Reg((aVal << leadingZeros)(63, 0))
//
// /** Stage 2: Rounding
// */
// val aShiftedFix = Mux(leadingZeroHasError, aShifted(63, 1), aShifted(62, 0))
// val mantD = aShiftedFix(62, 62-51)
// val mantS = aShiftedFix(62, 62-22)
//
// val g = Mux(isDoubleReg, aShiftedFix(62-52), aShiftedFix(62-23))
// val r = Mux(isDoubleReg, aShiftedFix(62-53), aShiftedFix(62-24))
// val s = Mux(isDoubleReg, ORTree(aShiftedFix(62-54, 0)), ORTree(aShiftedFix(62-25, 0)))
//
// val roudingUnit = Module(new RoundingUnit(Float64.mantWidth))
// roudingUnit.io.in.rm := rmReg
// roudingUnit.io.in.mant := Mux(isDoubleReg, mantD, mantS)
// roudingUnit.io.in.sign := aSignReg
// roudingUnit.io.in.guard := g
// roudingUnit.io.in.round := r
// roudingUnit.io.in.sticky := s
//
// val mantRounded = roudingUnit.io.out.mantRounded
// val expRounded = Mux(isDoubleReg,
// expUnrounded + roudingUnit.io.out.mantCout,
// expUnrounded + mantRounded(Float32.mantWidth)
// ) + leadingZeroHasError
//
// val resS = Cat(
// aSignReg,
// expRounded(Float32.expWidth-1, 0),
// mantRounded(Float32.mantWidth-1, 0)
// )
// val resD = Cat(aSignReg, expRounded, mantRounded)
//
// io.out.bits.data := S2Reg(Mux(aIsZeroReg, 0.U, Mux(isDoubleReg, resD, resS)))
// fflags.inexact := S2Reg(roudingUnit.io.out.inexact)
// fflags.underflow := false.B
// fflags.overflow := false.B
// fflags.infinite := false.B
// fflags.invalid := false.B
//}
......@@ -10,7 +10,7 @@ import xiangshan.backend.fu.fpu.util.{CSA3_2, FPUDebug, ORTree, ShiftLeftJam, Sh
class FMA extends FPUPipelineModule {
override val latency = FunctionUnit.fmaCfg.latency.latencyVal.get
override val latency = FunctionUnit.fmacCfg.latency.latencyVal.get
def UseRealArraryMult = false
......
......@@ -148,6 +148,7 @@ class XSSimTop()(implicit p: config.Parameters) extends LazyModule {
ExcitingUtils.addSource(timer, "logTimestamp")
// Check and dispaly all source and sink connections
ExcitingUtils.fixConnections()
ExcitingUtils.checkAndDisplay()
}
}
......
package xiangshan.backend.exu
import org.scalatest._
import chiseltest._
import chisel3._
import chisel3.experimental.BundleLiterals._
import chiseltest.experimental.TestOptionBuilder._
import chiseltest.internal.VerilatorBackendAnnotation
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.must.Matchers
import xiangshan._
import xiangshan.testutils._
import xiangshan.testutils.TestCaseGenerator._
import scala.util.Random
class AluTest extends AnyFlatSpec
with ChiselScalatestTester
with Matchers
with ParallelTestExecution
with HasPartialDecoupledDriver
{
it should "do simple test corrcetly" in {
test(new AluExeUnit){c =>
c.io.in.initSource().setSourceClock(c.clock)
c.io.out.initSink().setSinkClock(c.clock)
parallel(
c.io.in.enqueuePartial(genAluAdd(c.io.in.bits, 0, 0)),
c.io.out.expectDequeuePartial(chiselTypeOf(c.io.out.bits).Lit(_.data -> 0.U))
)
}
}
it should "do random add correctly" in {
test(new AluExeUnit){c =>
c.io.in.initSource().setSourceClock(c.clock)
c.io.out.initSink().setSinkClock(c.clock)
def TEST_SIZE = 10
val src1, src2, res = Array.fill(TEST_SIZE)(0)
for(i <- 0 until TEST_SIZE){
// avoid neg add res
src1(i) = Random.nextInt(0x3fffffff)
src2(i) = Random.nextInt(0x3fffffff)
res(i) = src1(i) + src2(i)
}
val inputSeq = (0 until TEST_SIZE).map(i =>
genAluAdd(c.io.in.bits, src1(i), src2(i))
)
val outputSeq = (0 until TEST_SIZE).map(i =>
chiselTypeOf(c.io.out.bits).Lit(
_.data -> res(i).U
)
)
parallel(
c.io.in.enqueuePartialSeq(inputSeq),
c.io.out.expectDequeuePartialSeq(outputSeq)
)
}
}
}
package xiangshan.backend.exu
import org.scalatest._
import chiseltest._
import chisel3._
import chisel3.experimental.BundleLiterals._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.must.Matchers
import top.Parameters
import utils.XSLog
import xiangshan.testutils._
import xiangshan.testutils.TestCaseGenerator._
import scala.util.Random
class MduTest extends AnyFlatSpec
with ChiselScalatestTester
with Matchers
with ParallelTestExecution
with HasPartialDecoupledDriver
{
Parameters.set(Parameters.debugParameters)
"MUL" should "random enq and deq correctly" in {
test(new MulExeUnit{
AddSinks()
}){ c =>
c.io.in.initSource().setSourceClock(c.clock)
c.io.out.initSink().setSinkClock(c.clock)
def TEST_SIZE = 100
val pcSeq = (0 until TEST_SIZE).map(_ => Random.nextInt(0x7fffffff))
fork{
c.io.in.enqueuePartialSeq(pcSeq.map(pc => genMul(c.io.in.bits, pc)))
}.fork{
c.io.out.expectDequeuePartialSeq(pcSeq.map(
pc => chiselTypeOf(c.io.out.bits).Lit(
_.uop.cf.pc -> pc.U
)
))
}.join()
}
}
"MUL" should "only flush instrs newer than the redirect instr" in {
test(new MulExeUnit{
AddSinks()
}){ c =>
c.io.in.initSource().setSourceClock(c.clock)
c.io.out.initSink().setSinkClock(c.clock)
fork{
// 29
c.io.in.enqueuePartial(chiselTypeOf(c.io.in.bits).Lit(
_.uop.cf.pc -> 666.U,
_.uop.brTag.flag -> true.B,
_.uop.brTag.value -> 12.U
))
// 30
c.io.redirect.pokePartial(chiselTypeOf(c.io.redirect).Lit(
_.valid -> true.B,
_.bits.isException -> false.B,
_.bits.brTag.flag -> true.B,
_.bits.brTag.value -> 11.U
))
c.io.in.enqueuePartial(chiselTypeOf(c.io.in.bits).Lit(
_.uop.cf.pc -> 777.U,
_.uop.brTag.flag -> true.B,
_.uop.brTag.value -> 10.U
))
c.io.redirect.pokePartial(chiselTypeOf(c.io.redirect).Lit(_.valid -> false.B))
}.fork{
c.io.out.expectDequeuePartial(chiselTypeOf(c.io.out.bits).Lit(_.uop.cf.pc -> 777.U))
}.join()
}
}
"MUL" should "dont flush same br tag" in {
test(new MulExeUnit{
AddSinks()
}){ c =>
c.io.in.initSource().setSourceClock(c.clock)
c.io.out.initSink().setSinkClock(c.clock)
def TEST_SIZE = 100
val pcSeq = (0 until TEST_SIZE).map(_ => Random.nextInt(0x7fffffff))
fork{
// 53
c.io.in.enqueuePartial(chiselTypeOf(c.io.in.bits).Lit(
_.uop.cf.pc -> 666.U,
_.uop.brTag.flag -> true.B,
_.uop.brTag.value -> 15.U
))
// 54
c.clock.step(1)
// 55
c.io.redirect.valid.poke(true.B)
c.io.redirect.bits.pokePartial(chiselTypeOf(c.io.redirect.bits).Lit(
_.isException -> false.B,
_.brTag.flag -> true.B,
_.brTag.value -> 15.U
))
c.clock.step(1)
// 56
c.io.redirect.valid.poke(false.B)
}.fork{
c.io.out.expectDequeuePartial(chiselTypeOf(c.io.out.bits).Lit(_.uop.cf.pc -> 666.U))
}.join()
}
}
"MDU" should "random enq and deq correctly" in {
test(new MulDivExeUnit{
AddSinks()
}){ c =>
c.io.in.initSource().setSourceClock(c.clock)
c.io.out.initSink().setSinkClock(c.clock)
def TEST_SIZE = 50
val pcSeq = (0 until TEST_SIZE).map(_ => Random.nextInt(0x7fffffff))
fork{
c.io.in.enqueuePartialSeq(pcSeq.map(pc => {
genDiv(c.io.in.bits, pc)
}))
}.fork{
c.io.out.expectDequeuePartialSeq(pcSeq.map(
pc => chiselTypeOf(c.io.out.bits).Lit(
_.uop.cf.pc -> pc.U
)
))
}.join()
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册