提交 49681eda 编写于 作者: Y Yinan Xu

Merge remote-tracking branch 'origin/master' into dev-prefetch-switch

......@@ -173,6 +173,7 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
plic.module.io.extra.get.intrVec <> RegNext(RegNext(io.extIntrs))
for (i <- 0 until NumCores) {
xs_core(i).module.io.hartId := i.U
xs_core(i).module.io.externalInterrupt.mtip := clint.module.io.mtip(i)
xs_core(i).module.io.externalInterrupt.msip := clint.module.io.msip(i)
// xs_core(i).module.io.externalInterrupt.meip := RegNext(RegNext(io.meip(i)))
......
......@@ -4,6 +4,7 @@ import chisel3._
import top.Parameters
import xiangshan.HasXSParameter
import utils.XSLogLevel.XSLogLevel
import chisel3.ExcitingUtils.ConnectionType
object XSLogLevel extends Enumeration {
type XSLogLevel = Value
......@@ -103,26 +104,33 @@ object XSWarn extends LogHelper(XSLogLevel.WARN)
object XSError extends LogHelper(XSLogLevel.ERROR)
object XSPerf {
def apply(perfName: String, perfCnt: UInt, acc: Boolean = false, intervalBits: Int = 15)(implicit name: String) = {
def apply(perfName: String, perfCnt: UInt, acc: Boolean = false, realtime: Boolean = false, intervalBits: Int = 15)(implicit name: String) = {
val counter = RegInit(0.U(64.W))
val next_counter = WireInit(0.U(64.W))
val logTimestamp = WireInit(0.U(64.W))
val enableDebug = Parameters.get.envParameters.EnablePerfDebug
val env = Parameters.get.envParameters
next_counter := counter + perfCnt
counter := next_counter
if (enableDebug) {
if (env.EnablePerfDebug) {
ExcitingUtils.addSink(logTimestamp, "logTimestamp")
val printCond =
if(intervalBits == 0) true.B
else (logTimestamp(intervalBits - 1, 0) === 0.U)
when(printCond) { // TODO: Need print when program exit?
if(acc) {
val printCond = if (intervalBits == 0) true.B else (logTimestamp(intervalBits - 1, 0) === 0.U)
val printEnable = if (realtime) printCond else false.B
val xstrap = WireInit(false.B)
if (!env.FPGAPlatform && !env.DualCore) {
ExcitingUtils.addSink(xstrap, "XSTRAP", ConnectionType.Debug)
}
when (printEnable) { // interval print
if (acc) {
XSLog(XSLogLevel.PERF)(true, true.B, p"$perfName, $next_counter\n")
}else{
} else {
XSLog(XSLogLevel.PERF)(true, true.B, p"$perfName, $perfCnt\n")
}
}
when (xstrap) { // summary print
// dump acc counter by default
printf("%d <- " + perfName + "\n", next_counter)
}
}
}
}
......@@ -37,7 +37,7 @@ object MaskedRegMap { // TODO: add read mask
}
def isIllegalAddr(mapping: Map[Int, (UInt, UInt, UInt => UInt, UInt, UInt => UInt)], addr: UInt):Bool = {
val illegalAddr = Wire(Bool())
illegalAddr := LookupTreeDefault(addr, true.B, mapping.map { case (a, _) => (a.U, false.B) })
illegalAddr := LookupTreeDefault(addr, true.B, mapping.toSeq.sortBy(_._1).map { case (a, _) => (a.U, false.B) })
illegalAddr
}
def generate(mapping: Map[Int, (UInt, UInt, UInt => UInt, UInt, UInt => UInt)], addr: UInt, rdata: UInt,
......
......@@ -10,7 +10,7 @@ import xiangshan.backend.exu.Exu._
import xiangshan.frontend._
import xiangshan.mem._
import xiangshan.backend.fu.HasExceptionNO
import xiangshan.cache.{DCache,InstrUncache, DCacheParameters, ICache, ICacheParameters, L1plusCache, L1plusCacheParameters, PTW, PTWRepeater, Uncache, MemoryOpConstants, MissReq}
import xiangshan.cache.{DCache, InstrUncache, DCacheParameters, ICache, ICacheParameters, L1plusCache, L1plusCacheParameters, PTW, PTWRepeater, Uncache, MemoryOpConstants, MissReq}
import xiangshan.cache.prefetch._
import chipsalliance.rocketchip.config
import freechips.rocketchip.diplomacy.{AddressSet, LazyModule, LazyModuleImp}
......@@ -24,9 +24,10 @@ import utils._
object hartIdCore extends (() => Int) {
var x = 0
def apply(): Int = {
x = x + 1
x-1
x - 1
}
}
......@@ -43,7 +44,7 @@ case class XSCoreParameters
VAddrBits: Int = 39,
PAddrBits: Int = 40,
HasFPU: Boolean = true,
FectchWidth: Int = 8,
FetchWidth: Int = 8,
EnableBPU: Boolean = true,
EnableBPD: Boolean = true,
EnableRAS: Boolean = true,
......@@ -106,7 +107,7 @@ case class XSCoreParameters
PtwL3EntrySize: Int = 4096, //(256 * 16) or 512
PtwSPEntrySize: Int = 16,
PtwL1EntrySize: Int = 16,
PtwL2EntrySize: Int = 2048,//(256 * 8)
PtwL2EntrySize: Int = 2048, //(256 * 8)
NumPerfCounters: Int = 16,
NrExtIntr: Int = 150
)
......@@ -119,7 +120,9 @@ trait HasXSParameter {
val XLEN = 64
val minFLen = 32
val fLen = 64
def xLen = 64
val HasMExtension = core.HasMExtension
val HasCExtension = core.HasCExtension
val HasDiv = core.HasDiv
......@@ -133,7 +136,7 @@ trait HasXSParameter {
val DataBits = XLEN
val DataBytes = DataBits / 8
val HasFPU = core.HasFPU
val FetchWidth = core.FectchWidth
val FetchWidth = core.FetchWidth
val PredictWidth = FetchWidth * (if (HasCExtension) 2 else 1)
val EnableBPU = core.EnableBPU
val EnableBPD = core.EnableBPD // enable backing predictor(like Tage) in BPUStage3
......@@ -173,7 +176,7 @@ trait HasXSParameter {
val exuParameters = core.exuParameters
val NRIntReadPorts = core.NRIntReadPorts
val NRIntWritePorts = core.NRIntWritePorts
val NRMemReadPorts = exuParameters.LduCnt + 2*exuParameters.StuCnt
val NRMemReadPorts = exuParameters.LduCnt + 2 * exuParameters.StuCnt
val NRFpReadPorts = core.NRFpReadPorts
val NRFpWritePorts = core.NRFpWritePorts
val LoadPipelineWidth = core.LoadPipelineWidth
......@@ -256,7 +259,7 @@ trait HasXSParameter {
// dcache prefetcher
val l2PrefetcherParameters = L2PrefetcherParameters(
enable = true,
_type = "bop",// "stream" or "bop"
_type = "bop", // "stream" or "bop"
streamParams = StreamPrefetchParameters(
streamCnt = 4,
streamSize = 4,
......@@ -277,7 +280,8 @@ trait HasXSParameter {
)
}
trait HasXSLog { this: RawModule =>
trait HasXSLog {
this: RawModule =>
implicit val moduleName: String = this.name
}
......@@ -285,13 +289,13 @@ abstract class XSModule extends MultiIOModule
with HasXSParameter
with HasExceptionNO
with HasXSLog
with HasFPUParameters
{
with HasFPUParameters {
def io: Record
}
//remove this trait after impl module logic
trait NeedImpl { this: RawModule =>
trait NeedImpl {
this: RawModule =>
override protected def IO[T <: Data](iodef: T): T = {
println(s"[Warn]: (${this.name}) please reomve 'NeedImpl' after implement this module")
val io = chisel3.experimental.IO(iodef)
......@@ -327,35 +331,19 @@ case class EnviromentParameters
// }
class XSCore()(implicit p: config.Parameters) extends LazyModule
with HasXSParameter
with HasExeBlockHelper
{
// to fast wake up fp, mem rs
val intBlockFastWakeUpFp = intExuConfigs.filter(fpFastFilter)
val intBlockSlowWakeUpFp = intExuConfigs.filter(fpSlowFilter)
val intBlockFastWakeUpInt = intExuConfigs.filter(intFastFilter)
val intBlockSlowWakeUpInt = intExuConfigs.filter(intSlowFilter)
val fpBlockFastWakeUpFp = fpExuConfigs.filter(fpFastFilter)
val fpBlockSlowWakeUpFp = fpExuConfigs.filter(fpSlowFilter)
val fpBlockFastWakeUpInt = fpExuConfigs.filter(intFastFilter)
val fpBlockSlowWakeUpInt = fpExuConfigs.filter(intSlowFilter)
with HasExeBlockHelper {
// outer facing nodes
val frontend = LazyModule(new Frontend())
val l1pluscache = LazyModule(new L1plusCache())
val ptw = LazyModule(new PTW())
val l2Prefetcher = LazyModule(new L2Prefetcher())
val memBlock = LazyModule(new MemBlock(
fastWakeUpIn = intBlockFastWakeUpInt ++ intBlockFastWakeUpFp ++ fpBlockFastWakeUpInt ++ fpBlockFastWakeUpFp,
slowWakeUpIn = intBlockSlowWakeUpInt ++ intBlockSlowWakeUpFp ++ fpBlockSlowWakeUpInt ++ fpBlockSlowWakeUpFp,
fastFpOut = Seq(),
slowFpOut = loadExuConfigs,
fastIntOut = Seq(),
slowIntOut = loadExuConfigs
fastWakeUpIn = intExuConfigs.filter(_.hasCertainLatency),
slowWakeUpIn = intExuConfigs.filter(_.hasUncertainlatency) ++ fpExuConfigs,
fastWakeUpOut = Seq(),
slowWakeUpOut = loadExuConfigs
))
lazy val module = new XSCoreImp(this)
......@@ -363,9 +351,9 @@ class XSCore()(implicit p: config.Parameters) extends LazyModule
class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
with HasXSParameter
with HasExeBlockHelper
{
with HasExeBlockHelper {
val io = IO(new Bundle {
val hartId = Input(UInt(64.W))
val externalInterrupt = new ExternalInterruptIO
val l2ToPrefetcher = Flipped(new PrefetcherIO(PAddrBits))
})
......@@ -381,32 +369,21 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
AddressSpace.printMemmap()
// to fast wake up fp, mem rs
val intBlockFastWakeUpFp = intExuConfigs.filter(fpFastFilter)
val intBlockSlowWakeUpFp = intExuConfigs.filter(fpSlowFilter)
val intBlockFastWakeUpInt = intExuConfigs.filter(intFastFilter)
val intBlockSlowWakeUpInt = intExuConfigs.filter(intSlowFilter)
val fpBlockFastWakeUpFp = fpExuConfigs.filter(fpFastFilter)
val fpBlockSlowWakeUpFp = fpExuConfigs.filter(fpSlowFilter)
val fpBlockFastWakeUpInt = fpExuConfigs.filter(intFastFilter)
val fpBlockSlowWakeUpInt = fpExuConfigs.filter(intSlowFilter)
val intBlockFastWakeUp = intExuConfigs.filter(_.hasCertainLatency)
val intBlockSlowWakeUp = intExuConfigs.filter(_.hasUncertainlatency)
val ctrlBlock = Module(new CtrlBlock)
val integerBlock = Module(new IntegerBlock(
fastWakeUpIn = fpBlockFastWakeUpInt,
slowWakeUpIn = fpBlockSlowWakeUpInt ++ loadExuConfigs,
fastFpOut = intBlockFastWakeUpFp,
slowFpOut = intBlockSlowWakeUpFp,
fastIntOut = intBlockFastWakeUpInt,
slowIntOut = intBlockSlowWakeUpInt
fastWakeUpIn = Seq(),
slowWakeUpIn = fpExuConfigs.filter(_.writeIntRf) ++ loadExuConfigs,
fastWakeUpOut = intBlockFastWakeUp,
slowWakeUpOut = intBlockSlowWakeUp
))
val floatBlock = Module(new FloatBlock(
fastWakeUpIn = intBlockFastWakeUpFp,
slowWakeUpIn = intBlockSlowWakeUpFp ++ loadExuConfigs,
fastFpOut = fpBlockFastWakeUpFp,
slowFpOut = fpBlockSlowWakeUpFp,
fastIntOut = fpBlockFastWakeUpInt,
slowIntOut = fpBlockSlowWakeUpInt
fastWakeUpIn = Seq(),
slowWakeUpIn = intExuConfigs.filter(_.writeFpRf) ++ loadExuConfigs,
fastWakeUpOut = Seq(),
slowWakeUpOut = fpExuConfigs
))
val frontend = outer.frontend.module
......@@ -432,38 +409,39 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ctrlBlock.io.toFpBlock <> floatBlock.io.fromCtrlBlock
ctrlBlock.io.toLsBlock <> memBlock.io.fromCtrlBlock
integerBlock.io.wakeUpIn.fastUops <> floatBlock.io.wakeUpIntOut.fastUops
integerBlock.io.wakeUpIn.fast <> floatBlock.io.wakeUpIntOut.fast
integerBlock.io.wakeUpIn.slow <> floatBlock.io.wakeUpIntOut.slow ++ memBlock.io.wakeUpIntOut.slow
integerBlock.io.toMemBlock <> memBlock.io.fromIntBlock
val memBlockWakeUpInt = memBlock.io.wakeUpOut.slow.map(x => intOutValid(x))
val memBlockWakeUpFp = memBlock.io.wakeUpOut.slow.map(x => fpOutValid(x))
memBlock.io.wakeUpOut.slow.foreach(_.ready := true.B)
floatBlock.io.wakeUpIn.fastUops <> integerBlock.io.wakeUpFpOut.fastUops
floatBlock.io.wakeUpIn.fast <> integerBlock.io.wakeUpFpOut.fast
floatBlock.io.wakeUpIn.slow <> integerBlock.io.wakeUpFpOut.slow ++ memBlock.io.wakeUpFpOut.slow
floatBlock.io.toMemBlock <> memBlock.io.fromFpBlock
fpExuConfigs.zip(floatBlock.io.wakeUpOut.slow).filterNot(_._1.writeIntRf).map(_._2.ready := true.B)
val fpBlockWakeUpInt = fpExuConfigs
.zip(floatBlock.io.wakeUpOut.slow)
.filter(_._1.writeIntRf)
.map(_._2).map(x => intOutValid(x, connectReady = true))
intExuConfigs.zip(integerBlock.io.wakeUpOut.slow).filterNot(_._1.writeFpRf).map(_._2.ready := true.B)
val intBlockWakeUpFp = intExuConfigs.filter(_.hasUncertainlatency)
.zip(integerBlock.io.wakeUpOut.slow)
.filter(_._1.writeFpRf)
.map(_._2).map(x => fpOutValid(x, connectReady = true))
integerBlock.io.wakeUpIntOut.fast.map(_.ready := true.B)
integerBlock.io.wakeUpIntOut.slow.map(_.ready := true.B)
floatBlock.io.wakeUpFpOut.fast.map(_.ready := true.B)
floatBlock.io.wakeUpFpOut.slow.map(_.ready := true.B)
integerBlock.io.wakeUpIn.slow <> fpBlockWakeUpInt ++ memBlockWakeUpInt
integerBlock.io.toMemBlock <> memBlock.io.fromIntBlock
floatBlock.io.wakeUpIn.slow <> intBlockWakeUpFp ++ memBlockWakeUpFp
floatBlock.io.toMemBlock <> memBlock.io.fromFpBlock
val wakeUpMem = Seq(
integerBlock.io.wakeUpIntOut,
integerBlock.io.wakeUpFpOut,
floatBlock.io.wakeUpIntOut,
floatBlock.io.wakeUpFpOut
integerBlock.io.wakeUpOut,
floatBlock.io.wakeUpOut,
)
memBlock.io.wakeUpIn.fastUops <> wakeUpMem.flatMap(_.fastUops)
memBlock.io.wakeUpIn.fast <> wakeUpMem.flatMap(w => w.fast.map(f => {
val raw = WireInit(f)
raw
}))
memBlock.io.wakeUpIn.slow <> wakeUpMem.flatMap(w => w.slow.map(s => {
val raw = WireInit(s)
raw
}))
memBlock.io.wakeUpIn.fast <> wakeUpMem.flatMap(_.fast)
// Note: 'WireInit' is used to block 'ready's from memBlock,
// we don't need 'ready's from memBlock
memBlock.io.wakeUpIn.slow <> wakeUpMem.flatMap(_.slow.map(x => WireInit(x)))
integerBlock.io.csrio.hartId <> io.hartId
integerBlock.io.csrio.perf <> DontCare
integerBlock.io.csrio.perf.retiredInstr <> ctrlBlock.io.roqio.toCSR.perfinfo.retiredInstr
integerBlock.io.csrio.fpu.fflags <> ctrlBlock.io.roqio.toCSR.fflags
......@@ -495,7 +473,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ptw.io.tlb(0) <> dtlbRepester.io.ptw
ptw.io.tlb(1) <> itlbRepester.io.ptw
ptw.io.sfence <> integerBlock.io.fenceio.sfence
ptw.io.csr <> integerBlock.io.csrio.tlb
ptw.io.csr <> integerBlock.io.csrio.tlb
val l2PrefetcherIn = Wire(Decoupled(new MissReq))
if (l2PrefetcherParameters.enable && l2PrefetcherParameters._type == "bop") {
......
......@@ -292,6 +292,7 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
rename.io.roqCommits <> roq.io.commits
rename.io.out <> dispatch.io.fromRename
rename.io.renameBypass <> dispatch.io.renameBypass
rename.io.dispatchInfo <> dispatch.io.preDpInfo
dispatch.io.redirect <> backendRedirect
dispatch.io.flush := flushReg
......
......@@ -6,7 +6,8 @@ import xiangshan._
import utils._
import xiangshan.backend.regfile.Regfile
import xiangshan.backend.exu._
import xiangshan.backend.issue.{ReservationStation}
import xiangshan.backend.issue.ReservationStation
import xiangshan.mem.HasLoadHelper
class FpBlockToCtrlIO extends XSBundle {
......@@ -18,19 +19,16 @@ class FloatBlock
(
fastWakeUpIn: Seq[ExuConfig],
slowWakeUpIn: Seq[ExuConfig],
fastFpOut: Seq[ExuConfig],
slowFpOut: Seq[ExuConfig],
fastIntOut: Seq[ExuConfig],
slowIntOut: Seq[ExuConfig]
) extends XSModule with HasExeBlockHelper {
fastWakeUpOut: Seq[ExuConfig],
slowWakeUpOut: Seq[ExuConfig],
) extends XSModule with HasExeBlockHelper with HasLoadHelper {
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToFpBlockIO)
val toCtrlBlock = new FpBlockToCtrlIO
val toMemBlock = new FpBlockToMemBlockIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOut.size, slowFpOut.size))
val wakeUpIntOut = Flipped(new WakeUpBundle(fastIntOut.size, slowIntOut.size))
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
// from csr
val frm = Input(UInt(3.W))
......@@ -39,6 +37,25 @@ class FloatBlock
val redirect = io.fromCtrlBlock.redirect
val flush = io.fromCtrlBlock.flush
require(fastWakeUpIn.isEmpty)
val wakeUpInReg = Wire(Flipped(new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)))
wakeUpInReg.slow.zip(io.wakeUpIn.slow).foreach{
case (inReg, in) =>
PipelineConnect(in, inReg, inReg.fire(), in.bits.uop.roqIdx.needFlush(redirect, flush))
}
val wakeUpInRecode = WireInit(wakeUpInReg)
for(((rec, reg), cfg) <- wakeUpInRecode.slow.zip(wakeUpInReg.slow).zip(slowWakeUpIn)){
rec.bits.data := {
if(cfg == Exu.ldExeUnitCfg) fpRdataHelper(reg.bits.uop, reg.bits.data)
else Mux(reg.bits.uop.ctrl.fpu.typeTagOut === S,
recode(reg.bits.data(31, 0), S),
recode(reg.bits.data(63, 0), D)
)
}
rec.bits.redirectValid := false.B
reg.ready := rec.ready
}
val fpRf = Module(new Regfile(
numReadPorts = NRFpReadPorts,
numWirtePorts = NRFpWritePorts,
......@@ -70,12 +87,11 @@ class FloatBlock
val readFpRf = cfg.readFpRf
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency && readFpRf).map(_.io.toFp.bits.data)
val writeBackData = inBlockWbData ++ io.wakeUpIn.fast.map(_.bits.data)
val fastPortsCnt = writeBackData.length
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency).map(_.io.out.bits.data)
val fastPortsCnt = inBlockWbData.length
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency && readFpRf).map(_.io.toFp)
val slowPorts = inBlockListenPorts ++ io.wakeUpIn.slow
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency).map(_.io.out)
val slowPorts = (inBlockListenPorts ++ wakeUpInRecode.slow).map(decoupledIOToValidIO)
val slowPortsCnt = slowPorts.length
println(s"${i}: exu:${cfg.name} fastPortsCnt: ${fastPortsCnt} " +
......@@ -99,11 +115,8 @@ class FloatBlock
rs.io.srcRegValue(1) := src2Value(readPortIndex(i))
if (cfg.fpSrcCnt > 2) rs.io.srcRegValue(2) := src3Value(readPortIndex(i))
rs.io.fastDatas <> writeBackData
for ((x, y) <- rs.io.slowPorts.zip(slowPorts)) {
x.valid := y.fire()
x.bits := y.bits
}
rs.io.fastDatas <> inBlockWbData
rs.io.slowPorts <> slowPorts
exeUnits(i).io.redirect <> redirect
exeUnits(i).io.flush <> flush
......@@ -123,44 +136,44 @@ class FloatBlock
raw.valid := x.io.fastUopOut.valid && raw.bits.ctrl.fpWen
raw
})
rs.io.fastUopsIn <> inBlockUops ++ io.wakeUpIn.fastUops
rs.io.fastUopsIn <> inBlockUops
}
io.wakeUpFpOut.fastUops <> reservedStations.filter(
rs => fpFastFilter(rs.exuCfg)
).map(_.io.fastUopOut).map(fpValid)
io.wakeUpFpOut.fast <> exeUnits.filter(
x => fpFastFilter(x.config)
).map(_.io.toFp)
io.wakeUpFpOut.slow <> exeUnits.filter(
x => fpSlowFilter(x.config)
).map(_.io.toFp)
io.wakeUpIntOut.fastUops <> reservedStations.filter(
rs => intFastFilter(rs.exuCfg)
).map(_.io.fastUopOut).map(intValid)
io.wakeUpIntOut.fast <> exeUnits.filter(
x => intFastFilter(x.config)
).map(_.io.toInt)
io.wakeUpIntOut.slow <> exeUnits.filter(
x => intSlowFilter(x.config)
).map(_.io.toInt)
val (recodeOut, ieeeOutReg) = exeUnits.map(e => {
val rec = WireInit(e.io.out)
val recReg = Wire(DecoupledIO(new ExuOutput))
PipelineConnect(
rec, recReg, recReg.fire(),
rec.bits.uop.roqIdx.needFlush(redirect, flush)
)
val ieeeReg = WireInit(recReg)
recReg.ready := ieeeReg.ready
ieeeReg.bits.data := Mux(recReg.bits.uop.ctrl.fpWen, ieee(recReg.bits.data), recReg.bits.data)
ieeeReg.bits.redirectValid := false.B
(rec, ieeeReg)
}).unzip
io.wakeUpOut.slow <> ieeeOutReg
// read fp rf from ctrl block
fpRf.io.readPorts.zipWithIndex.map{ case (r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
(0 until exuParameters.StuCnt).foreach(i => io.toMemBlock.readFpRf(i).data := fpRf.io.readPorts(i + 12).data)
(0 until exuParameters.StuCnt).foreach(i =>
io.toMemBlock.readFpRf(i).data := RegNext(ieee(fpRf.io.readPorts(i + 12).data))
)
// write fp rf arbiter
val fpWbArbiter = Module(new Wb(
(exeUnits.map(_.config) ++ fastWakeUpIn ++ slowWakeUpIn),
NRFpWritePorts,
isFp = true
))
fpWbArbiter.io.in <> exeUnits.map(_.io.toFp) ++ io.wakeUpIn.fast ++ io.wakeUpIn.slow
fpWbArbiter.io.in <> exeUnits.map(e =>
if(e.config.writeIntRf) WireInit(e.io.out) else e.io.out
) ++ wakeUpInRecode.slow
exeUnits.zip(recodeOut).zip(fpWbArbiter.io.in).filter(_._1._1.config.writeIntRf).foreach {
case ((exu, wInt), wFp) =>
exu.io.out.ready := wInt.fire() || wFp.fire()
}
// set busytable and update roq
io.toCtrlBlock.wbRegs <> fpWbArbiter.io.out
......
......@@ -11,7 +11,7 @@ import xiangshan.backend.regfile.Regfile
class WakeUpBundle(numFast: Int, numSlow: Int) extends XSBundle {
val fastUops = Vec(numFast, Flipped(ValidIO(new MicroOp)))
val fast = Vec(numFast, Flipped(DecoupledIO(new ExuOutput))) //one cycle later than fastUops
val fast = Vec(numFast, Flipped(ValidIO(new ExuOutput))) //one cycle later than fastUops
val slow = Vec(numSlow, Flipped(DecoupledIO(new ExuOutput)))
override def cloneType = (new WakeUpBundle(numFast, numSlow)).asInstanceOf[this.type]
......@@ -23,32 +23,56 @@ class IntBlockToCtrlIO extends XSBundle {
// used to update busytable and roq state
val wbRegs = Vec(NRIntWritePorts, ValidIO(new ExuOutput))
// write back to brq
val exuRedirect = Vec(exuParameters.AluCnt+exuParameters.JmpCnt, ValidIO(new ExuOutput))
val exuRedirect = Vec(exuParameters.AluCnt + exuParameters.JmpCnt, ValidIO(new ExuOutput))
val numExist = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
}
trait HasExeBlockHelper {
def fpFastFilter(cfg: ExuConfig): Boolean = {
cfg.hasCertainLatency && cfg.writeFpRf
def fpUopValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
val uop = WireInit(x)
uop.valid := x.valid && x.bits.ctrl.fpWen
uop
}
def fpSlowFilter(cfg: ExuConfig): Boolean = {
cfg.hasUncertainlatency && cfg.writeFpRf
def fpOutValid(x: ValidIO[ExuOutput]): ValidIO[ExuOutput] = {
val out = WireInit(x)
out.valid := x.valid && x.bits.uop.ctrl.fpWen
out
}
def intFastFilter(cfg: ExuConfig): Boolean = {
cfg.hasCertainLatency && cfg.writeIntRf
def fpOutValid(x: DecoupledIO[ExuOutput], connectReady: Boolean = false): DecoupledIO[ExuOutput] = {
val out = WireInit(x)
if(connectReady) x.ready := out.ready
out.valid := x.valid && x.bits.uop.ctrl.fpWen
out
}
def intSlowFilter(cfg: ExuConfig): Boolean = {
cfg.hasUncertainlatency && cfg.writeIntRf
def intUopValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
val uop = WireInit(x)
uop.valid := x.valid && x.bits.ctrl.rfWen
uop
}
def fpValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
val uop = WireInit(x)
uop.valid := x.valid && x.bits.ctrl.fpWen
uop
def intOutValid(x: ValidIO[ExuOutput]): ValidIO[ExuOutput] = {
val out = WireInit(x)
out.valid := x.valid && x.bits.uop.ctrl.rfWen
out
}
def intValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
val uop = WireInit(x)
uop.valid := x.valid && x.bits.ctrl.rfWen
uop
def intOutValid(x: DecoupledIO[ExuOutput], connectReady: Boolean = false): DecoupledIO[ExuOutput] = {
val out = WireInit(x)
if(connectReady) x.ready := out.ready
out.valid := x.valid && x.bits.uop.ctrl.rfWen
out
}
def decoupledIOToValidIO[T <: Data](d: DecoupledIO[T]): Valid[T] = {
val v = Wire(Valid(d.bits.cloneType))
v.valid := d.valid
v.bits := d.bits
v
}
def validIOToDecoupledIO[T <: Data](v: Valid[T]): DecoupledIO[T] = {
val d = Wire(DecoupledIO(v.bits.cloneType))
d.valid := v.valid
d.ready := true.B
d.bits := v.bits
d
}
}
......@@ -56,26 +80,22 @@ class IntegerBlock
(
fastWakeUpIn: Seq[ExuConfig],
slowWakeUpIn: Seq[ExuConfig],
fastFpOut: Seq[ExuConfig],
slowFpOut: Seq[ExuConfig],
fastIntOut: Seq[ExuConfig],
slowIntOut: Seq[ExuConfig]
) extends XSModule with HasExeBlockHelper
{
fastWakeUpOut: Seq[ExuConfig],
slowWakeUpOut: Seq[ExuConfig]
) extends XSModule with HasExeBlockHelper {
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToIntBlockIO)
val toCtrlBlock = new IntBlockToCtrlIO
val toMemBlock = new IntBlockToMemBlockIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOut.size, slowFpOut.size))
val wakeUpIntOut = Flipped(new WakeUpBundle(fastIntOut.size, slowIntOut.size))
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val csrio = new CSRFileIO
val fenceio = new Bundle {
val sfence = Output(new SfenceBundle) // to front,mem
val fencei = Output(Bool()) // to icache
val sbuffer = new FenceToSbuffer // to mem
val fencei = Output(Bool()) // to icache
val sbuffer = new FenceToSbuffer // to mem
}
})
val difftestIO = IO(new Bundle() {
......@@ -136,12 +156,12 @@ class IntegerBlock
val readIntRf = cfg.readIntRf
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency && readIntRf).map(_.io.toInt.bits.data)
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency).map(_.io.out.bits.data)
val fastDatas = inBlockWbData ++ io.wakeUpIn.fast.map(_.bits.data)
val wakeupCnt = fastDatas.length
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency && readIntRf).map(_.io.toInt)
val slowPorts = inBlockListenPorts ++ io.wakeUpIn.slow
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency).map(_.io.out)
val slowPorts = (inBlockListenPorts ++ io.wakeUpIn.slow).map(decoupledIOToValidIO)
val extraListenPortsCnt = slowPorts.length
val feedback = (cfg == ldExeUnitCfg) || (cfg == stExeUnitCfg)
......@@ -166,10 +186,7 @@ class IntegerBlock
}
rs.io.fastDatas <> fastDatas
for ((x, y) <- rs.io.slowPorts.zip(slowPorts)) {
x.valid := y.fire()
x.bits := y.bits
}
rs.io.slowPorts <> slowPorts
exeUnits(i).io.redirect <> redirect
exeUnits(i).io.fromInt <> rs.io.deq
......@@ -181,7 +198,7 @@ class IntegerBlock
rs
})
for(rs <- reservationStations){
for (rs <- reservationStations) {
val inBlockUops = reservationStations.filter(x =>
x.exuCfg.hasCertainLatency && x.exuCfg.writeIntRf
).map(x => {
......@@ -192,34 +209,22 @@ class IntegerBlock
rs.io.fastUopsIn <> inBlockUops ++ io.wakeUpIn.fastUops
}
io.wakeUpFpOut.fastUops <> reservationStations.filter(
rs => fpFastFilter(rs.exuCfg)
).map(_.io.fastUopOut).map(fpValid)
io.wakeUpOut.fastUops <> reservationStations.filter(
rs => rs.exuCfg.hasCertainLatency
).map(_.io.fastUopOut).map(intUopValid)
io.wakeUpFpOut.fast <> exeUnits.filter(
x => fpFastFilter(x.config)
).map(_.io.toFp)
io.wakeUpOut.fast <> exeUnits.filter(
x => x.config.hasCertainLatency
).map(_.io.out).map(decoupledIOToValidIO)
io.wakeUpFpOut.slow <> exeUnits.filter(
x => fpSlowFilter(x.config)
).map(_.io.toFp)
io.wakeUpIntOut.fastUops <> reservationStations.filter(
rs => intFastFilter(rs.exuCfg)
).map(_.io.fastUopOut).map(intValid)
io.wakeUpIntOut.fast <> exeUnits.filter(
x => intFastFilter(x.config)
).map(_.io.toInt)
io.wakeUpIntOut.slow <> exeUnits.filter(
x => intSlowFilter(x.config)
).map(_.io.toInt)
io.wakeUpOut.slow <> exeUnits.filter(
x => x.config.hasUncertainlatency
).map(x => WireInit(x.io.out))
// send misprediction to brq
io.toCtrlBlock.exuRedirect.zip(
exeUnits.filter(_.config.hasRedirect).map(_.io.toInt)
).foreach{
exeUnits.filter(_.config.hasRedirect).map(_.io.out)
).foreach {
case (x, y) =>
x.valid := y.fire() && y.bits.redirectValid
x.bits := y.bits
......@@ -232,7 +237,7 @@ class IntegerBlock
}
// read int rf from ctrl block
intRf.io.readPorts.zipWithIndex.map{ case(r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
intRf.io.readPorts.zipWithIndex.map { case (r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
(0 until NRMemReadPorts).foreach(i => io.toMemBlock.readIntRf(i).data := intRf.io.readPorts(i + 8).data)
// write int rf arbiter
val intWbArbiter = Module(new Wb(
......@@ -240,12 +245,19 @@ class IntegerBlock
NRIntWritePorts,
isFp = false
))
intWbArbiter.io.in <> exeUnits.map(_.io.toInt) ++ io.wakeUpIn.fast ++ io.wakeUpIn.slow
intWbArbiter.io.in <> exeUnits.map(e => {
if(e.config.writeFpRf) WireInit(e.io.out) else e.io.out
}) ++ io.wakeUpIn.slow
exeUnits.zip(intWbArbiter.io.in).filter(_._1.config.writeFpRf).zip(io.wakeUpIn.slow).foreach{
case ((exu, wInt), wFp) =>
exu.io.out.ready := wFp.fire() || wInt.fire()
}
// set busytable and update roq
io.toCtrlBlock.wbRegs <> intWbArbiter.io.out
intRf.io.writePorts.zip(intWbArbiter.io.out).foreach{
intRf.io.writePorts.zip(intWbArbiter.io.out).foreach {
case (rf, wb) =>
rf.wen := wb.valid && wb.bits.uop.ctrl.rfWen
rf.addr := wb.bits.uop.pdest
......
......@@ -30,29 +30,19 @@ class FpBlockToMemBlockIO extends XSBundle {
}
class MemBlock(
fastWakeUpIn: Seq[ExuConfig],
slowWakeUpIn: Seq[ExuConfig],
fastFpOut: Seq[ExuConfig],
slowFpOut: Seq[ExuConfig],
fastIntOut: Seq[ExuConfig],
slowIntOut: Seq[ExuConfig]
val fastWakeUpIn: Seq[ExuConfig],
val slowWakeUpIn: Seq[ExuConfig],
val fastWakeUpOut: Seq[ExuConfig],
val slowWakeUpOut: Seq[ExuConfig]
)(implicit p: Parameters) extends LazyModule {
val dcache = LazyModule(new DCache())
val uncache = LazyModule(new Uncache())
lazy val module = new MemBlockImp(fastWakeUpIn, slowWakeUpIn, fastFpOut, slowFpOut, fastIntOut, slowIntOut)(this)
lazy val module = new MemBlockImp(this)
}
class MemBlockImp
(
fastWakeUpIn: Seq[ExuConfig],
slowWakeUpIn: Seq[ExuConfig],
fastFpOut: Seq[ExuConfig],
slowFpOut: Seq[ExuConfig],
fastIntOut: Seq[ExuConfig],
slowIntOut: Seq[ExuConfig]
) (outer: MemBlock) extends LazyModuleImp(outer)
class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
with HasXSParameter
with HasExceptionNO
with HasXSLog
......@@ -60,6 +50,11 @@ class MemBlockImp
with HasExeBlockHelper
{
val fastWakeUpIn = outer.fastWakeUpIn
val slowWakeUpIn = outer.slowWakeUpIn
val fastWakeUpOut = outer.fastWakeUpOut
val slowWakeUpOut = outer.slowWakeUpOut
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToLsBlockIO)
val fromIntBlock = Flipped(new IntBlockToMemBlockIO)
......@@ -67,8 +62,7 @@ class MemBlockImp
val toCtrlBlock = new LsBlockToCtrlIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOut.size, slowFpOut.size))
val wakeUpIntOut = Flipped(new WakeUpBundle(fastIntOut.size, slowIntOut.size))
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val ptw = new TlbPtwIO
val sfence = Input(new SfenceBundle)
......@@ -124,8 +118,7 @@ class MemBlockImp
atomicsUnit.io.out.ready := ldOut0.ready
loadUnits.head.io.ldout.ready := ldOut0.ready
val intExeWbReqs = ldOut0 +: loadUnits.tail.map(_.io.ldout)
val fpExeWbReqs = loadUnits.map(_.io.fpout)
val exeWbReqs = ldOut0 +: loadUnits.tail.map(_.io.ldout)
val readPortIndex = Seq(0, 1, 2, 4)
io.fromIntBlock.readIntRf.foreach(_.addr := DontCare)
......@@ -145,11 +138,10 @@ class MemBlockImp
.map(_._2.bits.data)
val wakeupCnt = fastDatas.length
val inBlockListenPorts = intExeWbReqs ++ fpExeWbReqs
val slowPorts = inBlockListenPorts ++
val slowPorts = (exeWbReqs ++
slowWakeUpIn.zip(io.wakeUpIn.slow)
.filter(x => (x._1.writeIntRf && readIntRf) || (x._1.writeFpRf && readFpRf))
.map(_._2)
.map(_._2)).map(decoupledIOToValidIO)
val slowPortsCnt = slowPorts.length
......@@ -165,18 +157,14 @@ class MemBlockImp
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.fromDispatch <> io.fromCtrlBlock.enqIqCtrl(i)
val src2IsFp = RegNext(io.fromCtrlBlock.enqIqCtrl(i).bits.ctrl.src2Type === SrcType.fp)
rs.io.srcRegValue := DontCare
rs.io.srcRegValue(0) := io.fromIntBlock.readIntRf(readPortIndex(i)).data
if (i >= exuParameters.LduCnt) {
rs.io.srcRegValue(1) := Mux(src2IsFp, io.fromFpBlock.readFpRf(i - exuParameters.LduCnt).data, io.fromIntBlock.readIntRf(readPortIndex(i) + 1).data)
rs.io.srcRegValue(1) := io.fromIntBlock.readIntRf(readPortIndex(i) + 1).data
rs.io.fpRegValue := io.fromFpBlock.readFpRf(i - exuParameters.LduCnt).data
}
rs.io.fastDatas <> fastDatas
for ((x, y) <- rs.io.slowPorts.zip(slowPorts)) {
x.valid := y.fire()
x.bits := y.bits
}
rs.io.slowPorts <> slowPorts
// exeUnits(i).io.redirect <> redirect
// exeUnits(i).io.fromInt <> rs.io.deq
......@@ -193,17 +181,9 @@ class MemBlockImp
.map(_._2)
}
// TODO: make this better
io.wakeUpIn.fast.foreach(_.ready := true.B)
io.wakeUpOut.slow <> exeWbReqs
io.wakeUpIn.slow.foreach(_.ready := true.B)
io.wakeUpFpOut.slow <> fpExeWbReqs
io.wakeUpIntOut.slow <> intExeWbReqs
// load always ready
fpExeWbReqs.foreach(_.ready := true.B)
intExeWbReqs.foreach(_.ready := true.B)
val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true))
val lsq = Module(new LsqWrappper)
val sbuffer = Module(new NewSbuffer)
......
......@@ -17,6 +17,7 @@ class FPDecoder extends XSModule{
def Y = BitPat("b1")
val s = BitPat(S)
val d = BitPat(D)
val i = BitPat(I)
val default = List(X,X,X,N,N,N,X,X,X)
......@@ -27,15 +28,15 @@ class FPDecoder extends XSModule{
FCVT_S_WU-> List(N,s,s,Y,Y,Y,N,N,Y),
FCVT_S_L -> List(N,s,s,Y,Y,Y,N,N,Y),
FCVT_S_LU-> List(N,s,s,Y,Y,Y,N,N,Y),
FMV_X_W -> List(N,d,X,N,N,N,N,N,N),
FCLASS_S -> List(N,s,X,N,N,N,N,N,N),
FCVT_W_S -> List(N,s,X,N,Y,N,N,N,Y),
FCVT_WU_S-> List(N,s,X,N,Y,N,N,N,Y),
FCVT_L_S -> List(N,s,X,N,Y,N,N,N,Y),
FCVT_LU_S-> List(N,s,X,N,Y,N,N,N,Y),
FEQ_S -> List(N,s,X,N,Y,N,N,N,N),
FLT_S -> List(N,s,X,N,Y,N,N,N,N),
FLE_S -> List(N,s,X,N,Y,N,N,N,N),
FMV_X_W -> List(N,d,i,N,N,N,N,N,N),
FCLASS_S -> List(N,s,i,N,N,N,N,N,N),
FCVT_W_S -> List(N,s,i,N,Y,N,N,N,Y),
FCVT_WU_S-> List(N,s,i,N,Y,N,N,N,Y),
FCVT_L_S -> List(N,s,i,N,Y,N,N,N,Y),
FCVT_LU_S-> List(N,s,i,N,Y,N,N,N,Y),
FEQ_S -> List(N,s,i,N,Y,N,N,N,N),
FLT_S -> List(N,s,i,N,Y,N,N,N,N),
FLE_S -> List(N,s,i,N,Y,N,N,N,N),
FSGNJ_S -> List(N,s,s,N,N,Y,N,N,N),
FSGNJN_S -> List(N,s,s,N,N,Y,N,N,N),
FSGNJX_S -> List(N,s,s,N,N,Y,N,N,N),
......@@ -60,17 +61,17 @@ class FPDecoder extends XSModule{
FCVT_D_WU-> List(N,d,d,Y,Y,Y,N,N,Y),
FCVT_D_L -> List(N,d,d,Y,Y,Y,N,N,Y),
FCVT_D_LU-> List(N,d,d,Y,Y,Y,N,N,Y),
FMV_X_D -> List(N,d,X,N,N,N,N,N,N),
FCLASS_D -> List(N,d,X,N,N,N,N,N,N),
FCVT_W_D -> List(N,d,X,N,Y,N,N,N,Y),
FCVT_WU_D-> List(N,d,X,N,Y,N,N,N,Y),
FCVT_L_D -> List(N,d,X,N,Y,N,N,N,Y),
FCVT_LU_D-> List(N,d,X,N,Y,N,N,N,Y),
FMV_X_D -> List(N,d,i,N,N,N,N,N,N),
FCLASS_D -> List(N,d,i,N,N,N,N,N,N),
FCVT_W_D -> List(N,d,i,N,Y,N,N,N,Y),
FCVT_WU_D-> List(N,d,i,N,Y,N,N,N,Y),
FCVT_L_D -> List(N,d,i,N,Y,N,N,N,Y),
FCVT_LU_D-> List(N,d,i,N,Y,N,N,N,Y),
FCVT_S_D -> List(N,d,s,N,Y,Y,N,N,Y),
FCVT_D_S -> List(N,s,d,N,Y,Y,N,N,Y),
FEQ_D -> List(N,d,X,N,Y,N,N,N,N),
FLT_D -> List(N,d,X,N,Y,N,N,N,N),
FLE_D -> List(N,d,X,N,Y,N,N,N,N),
FEQ_D -> List(N,d,i,N,Y,N,N,N,N),
FLT_D -> List(N,d,i,N,Y,N,N,N,N),
FLE_D -> List(N,d,i,N,Y,N,N,N,N),
FSGNJ_D -> List(N,d,d,N,N,Y,N,N,N),
FSGNJN_D -> List(N,d,d,N,N,Y,N,N,N),
FSGNJX_D -> List(N,d,d,N,N,Y,N,N,N),
......
......@@ -28,6 +28,7 @@ class Dispatch extends XSModule {
// from rename
val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val renameBypass = Input(new RenameBypassInfo)
val preDpInfo = Input(new PreDispatchInfo)
// to busytable: set pdest to busy (not ready) when they are dispatched
val allocPregs = Vec(RenameWidth, Output(new ReplayPregReq))
// enq Roq
......@@ -52,9 +53,9 @@ class Dispatch extends XSModule {
})
val dispatch1 = Module(new Dispatch1)
val intDq = Module(new DispatchQueue(dpParams.IntDqSize, RenameWidth, dpParams.IntDqDeqWidth))
val fpDq = Module(new DispatchQueue(dpParams.FpDqSize, RenameWidth, dpParams.FpDqDeqWidth))
val lsDq = Module(new DispatchQueue(dpParams.LsDqSize, RenameWidth, dpParams.LsDqDeqWidth))
val intDq = Module(new DispatchQueue(dpParams.IntDqSize, RenameWidth, dpParams.IntDqDeqWidth, "int"))
val fpDq = Module(new DispatchQueue(dpParams.FpDqSize, RenameWidth, dpParams.FpDqDeqWidth, "fp"))
val lsDq = Module(new DispatchQueue(dpParams.LsDqSize, RenameWidth, dpParams.LsDqDeqWidth, "ls"))
// pipeline between rename and dispatch
// accepts all at once
......@@ -66,6 +67,7 @@ class Dispatch extends XSModule {
// dispatch 1: accept uops from rename and dispatch them to the three dispatch queues
// dispatch1.io.redirect <> io.redirect
dispatch1.io.renameBypass := RegEnable(io.renameBypass, io.fromRename(0).valid && dispatch1.io.fromRename(0).ready)
dispatch1.io.preDpInfo := RegEnable(io.preDpInfo, io.fromRename(0).valid && dispatch1.io.fromRename(0).ready)
dispatch1.io.enqRoq <> io.enqRoq
dispatch1.io.enqLsq <> io.enqLsq
dispatch1.io.toIntDq <> intDq.io.enq
......
......@@ -10,12 +10,18 @@ import xiangshan.backend.rename.RenameBypassInfo
import xiangshan.mem.LsqEnqIO
import xiangshan.backend.fu.HasExceptionNO
class PreDispatchInfo extends XSBundle {
val lsqNeedAlloc = Vec(RenameWidth, UInt(2.W))
}
// read rob and enqueue
class Dispatch1 extends XSModule with HasExceptionNO {
val io = IO(new Bundle() {
// from rename
val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val renameBypass = Input(new RenameBypassInfo)
val preDpInfo = Input(new PreDispatchInfo)
val recv = Output(Vec(RenameWidth, Bool()))
// enq Roq
val enqRoq = Flipped(new RoqEnqIO)
......@@ -147,7 +153,7 @@ class Dispatch1 extends XSModule with HasExceptionNO {
io.enqRoq.req(i).bits := updatedUop(i)
XSDebug(io.enqRoq.req(i).valid, p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} receives nroq ${io.enqRoq.resp(i)}\n")
io.enqLsq.needAlloc(i) := io.fromRename(i).valid && isLs(i)
io.enqLsq.needAlloc(i) := Mux(io.fromRename(i).valid, io.preDpInfo.lsqNeedAlloc(i), 0.U)
io.enqLsq.req(i).valid := io.fromRename(i).valid && isLs(i) && thisCanActualOut(i) && io.enqRoq.canAccept && io.toIntDq.canAccept && io.toFpDq.canAccept && io.toLsDq.canAccept
io.enqLsq.req(i).bits := updatedUop(i)
io.enqLsq.req(i).bits.roqIdx := io.enqRoq.resp(i)
......@@ -200,6 +206,6 @@ class Dispatch1 extends XSModule with HasExceptionNO {
PopCount(io.toLsDq.req.map(_.valid && io.toLsDq.canAccept))
XSError(enqFireCnt > renameFireCnt, "enqFireCnt should not be greater than renameFireCnt\n")
XSPerf("utilization", PopCount(io.fromRename.map(_.valid)))
XSPerf("waitInstr", PopCount((0 until RenameWidth).map(i => io.fromRename(i).valid && !io.recv(i))))
XSPerf("dp1_in", PopCount(io.fromRename.map(_.valid)))
XSPerf("dp1_waitInstr", PopCount((0 until RenameWidth).map(i => io.fromRename(i).valid && !io.recv(i))))
}
......@@ -148,6 +148,6 @@ class Dispatch2Fp extends XSModule {
// p"(${readPortIndexReg(i)+2.U}, ${uopReg(i).psrc3}, ${Hexadecimal(io.enqIQData(i).src3)})\n")
// }
XSPerf("utilization", PopCount(io.fromDq.map(_.valid)))
XSPerf("dp2fp_in", PopCount(io.fromDq.map(_.valid)))
}
......@@ -153,6 +153,6 @@ class Dispatch2Int extends XSModule {
// p"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
// }
XSPerf("utilization", PopCount(io.fromDq.map(_.valid)))
XSPerf("dp2int_in", PopCount(io.fromDq.map(_.valid)))
}
......@@ -146,7 +146,7 @@ class Dispatch2Ls extends XSModule {
// p"(${readPort(i)+1}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
// }
XSPerf("utilization", PopCount(io.fromDq.map(_.valid)))
XSPerf("waitInstr", PopCount(io.fromDq.map(r => r.valid && !r.ready)))
XSPerf("dp2ls_in", PopCount(io.fromDq.map(_.valid)))
XSPerf("dp2ls_waitInstr", PopCount(io.fromDq.map(r => r.valid && !r.ready)))
}
......@@ -23,7 +23,7 @@ class DispatchQueueIO(enqnum: Int, deqnum: Int) extends XSBundle {
}
// dispatch queue: accepts at most enqnum uops from dispatch1 and dispatches deqnum uops at every clock cycle
class DispatchQueue(size: Int, enqnum: Int, deqnum: Int) extends XSModule with HasCircularQueuePtrHelper {
class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, name: String) extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new DispatchQueueIO(enqnum, deqnum))
val s_invalid :: s_valid:: Nil = Enum(2)
......@@ -203,5 +203,5 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int) extends XSModule with H
XSDebug(false, true.B, "\n")
// XSError(isAfter(headPtr(0), tailPtr(0)), p"assert greaterOrEqualThan(tailPtr: ${tailPtr(0)}, headPtr: ${headPtr(0)}) failed\n")
XSPerf("utilization", PopCount(stateEntries.map(_ =/= s_invalid)))
XSPerf("dq_"+name+"_utilization", PopCount(stateEntries.map(_ =/= s_invalid)))
}
......@@ -13,11 +13,11 @@ class AluExeUnit extends Exu(aluExeUnitCfg)
case a: Alu => a
}.get
io.toInt.bits.redirectValid := alu.redirectOutValid
io.toInt.bits.redirect := alu.redirectOut
io.out.bits.redirectValid := alu.redirectOutValid
io.out.bits.redirect := alu.redirectOut
XSDebug(io.fromInt.valid || io.redirect.valid,
p"fromInt(${io.fromInt.valid} ${io.fromInt.ready}) toInt(${io.toInt.valid} ${io.toInt.ready})" +
p"fromInt(${io.fromInt.valid} ${io.fromInt.ready}) toInt(${io.out.valid} ${io.out.ready})" +
p"Redirect:(${io.redirect.valid}) roqIdx:${io.redirect.bits.roqIdx}\n",
)
XSDebug(io.fromInt.valid,
......@@ -25,7 +25,7 @@ class AluExeUnit extends Exu(aluExeUnitCfg)
p"src3:${Hexadecimal(io.fromInt.bits.src3)} func:${Binary(io.fromInt.bits.uop.ctrl.fuOpType)} " +
p"pc:${Hexadecimal(io.fromInt.bits.uop.cf.pc)} roqIdx:${io.fromInt.bits.uop.roqIdx}\n"
)
XSDebug(io.toInt.valid,
p"res:${Hexadecimal(io.toInt.bits.data)}\n"
XSDebug(io.out.valid,
p"res:${Hexadecimal(io.out.bits.data)}\n"
)
}
\ No newline at end of file
......@@ -83,8 +83,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
val fromFp = if (config.readFpRf) Flipped(DecoupledIO(new ExuInput)) else null
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
val toInt = if (config.writeIntRf) DecoupledIO(new ExuOutput) else null
val toFp = if (config.writeFpRf) DecoupledIO(new ExuOutput) else null
val out = DecoupledIO(new ExuOutput)
})
for ((fuCfg, (fu, sel)) <- config.fuConfigs.zip(supportedFunctionUnits.zip(fuSel))) {
......@@ -147,15 +146,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
}
}
val intArb = if (config.writeIntRf) writebackArb(
supportedFunctionUnits.zip(config.fuConfigs).filter(x => !x._2.writeFpRf).map(_._1.io.out),
io.toInt
) else null
val fpArb = if (config.writeFpRf) writebackArb(
supportedFunctionUnits.zip(config.fuConfigs).filter(x => x._2.writeFpRf).map(_._1.io.out),
io.toFp
) else null
val arb = writebackArb(supportedFunctionUnits.map(_.io.out), io.out)
val readIntFu = config.fuConfigs
.zip(supportedFunctionUnits.zip(fuSel))
......@@ -179,7 +170,6 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
}
}
if (config.readIntRf) {
io.fromInt.ready := inReady(readIntFu)
}
......@@ -198,12 +188,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
out.redirectValid := false.B
}
if (config.writeFpRf) {
assignDontCares(io.toFp.bits)
}
if (config.writeIntRf) {
assignDontCares(io.toInt.bits)
}
assignDontCares(io.out.bits)
}
object Exu {
......@@ -233,6 +218,4 @@ object Exu {
Seq.fill(exuParameters.FmiscCnt)(fmiscExeUnitCfg)
val exuConfigs: Seq[ExuConfig] = intExuConfigs ++ fpExuConfigs
}
\ No newline at end of file
......@@ -20,8 +20,8 @@ class FmacExeUnit extends Exu(fmacExeUnitCfg)
fma.io.redirectIn := io.redirect
fma.io.flushIn := io.flush
fma.io.out.ready := io.toFp.ready
fma.io.out.ready := io.out.ready
io.toFp.bits.data := box(fma.io.out.bits.data, fma.io.out.bits.uop.ctrl.fpu.typeTagOut)
io.toFp.bits.fflags := fma.fflags
io.out.bits.data := box(fma.io.out.bits.data, fma.io.out.bits.uop.ctrl.fpu.typeTagOut)
io.out.bits.fflags := fma.fflags
}
......@@ -10,12 +10,7 @@ class FmiscExeUnit extends Exu(fmiscExeUnitCfg) {
val frm = IO(Input(UInt(3.W)))
val f2i :: f2f :: fdivSqrt :: Nil = supportedFunctionUnits.map(fu => fu.asInstanceOf[FPUSubModule])
val toFpUnits = Seq(f2f, fdivSqrt)
val toIntUnits = Seq(f2i)
assert(toFpUnits.size == 1 || fpArb.io.in.length == toFpUnits.size)
assert(toIntUnits.size == 1 || intArb.io.in.length == toIntUnits.size)
val fus = supportedFunctionUnits.map(fu => fu.asInstanceOf[FPUSubModule])
val input = io.fromFp
val isRVF = input.bits.uop.ctrl.isRVF
......@@ -28,15 +23,10 @@ class FmiscExeUnit extends Exu(fmiscExeUnitCfg) {
module.asInstanceOf[FPUSubModule].rm := Mux(instr_rm =/= 7.U, instr_rm, frm)
}
io.toFp.bits.fflags := MuxCase(
0.U,
toFpUnits.map(x => x.io.out.fire() -> x.fflags)
)
val fpOutCtrl = io.toFp.bits.uop.ctrl.fpu
io.toFp.bits.data := box(fpArb.io.out.bits.data, fpOutCtrl.typeTagOut)
io.toInt.bits.fflags := MuxCase(
io.out.bits.fflags := MuxCase(
0.U,
toIntUnits.map(x => x.io.out.fire() -> x.fflags)
fus.map(x => x.io.out.fire() -> x.fflags)
)
val fpOutCtrl = io.out.bits.uop.ctrl.fpu
io.out.bits.data := box(arb.io.out.bits.data, fpOutCtrl.typeTagOut)
}
......@@ -73,6 +73,6 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg)
val isDouble = !uop.ctrl.isRVF
io.toInt.bits.redirectValid := jmp.redirectOutValid
io.toInt.bits.redirect := jmp.redirectOut
io.out.bits.redirectValid := jmp.redirectOutValid
io.out.bits.redirect := jmp.redirectOut
}
......@@ -67,13 +67,13 @@ class MulDivExeUnit extends Exu(mulDivExeUnitCfg) {
XSDebug(io.fromInt.valid, "In(%d %d) Out(%d %d) Redirect:(%d %d)\n",
io.fromInt.valid, io.fromInt.ready,
io.toInt.valid, io.toInt.ready,
io.out.valid, io.out.ready,
io.redirect.valid,
io.redirect.bits.level
)
XSDebug(io.fromInt.valid, "src1:%x src2:%x pc:%x\n", src1, src2, io.fromInt.bits.uop.cf.pc)
XSDebug(io.toInt.valid, "Out(%d %d) res:%x pc:%x\n",
io.toInt.valid, io.toInt.ready, io.toInt.bits.data, io.toInt.bits.uop.cf.pc
XSDebug(io.out.valid, "Out(%d %d) res:%x pc:%x\n",
io.out.valid, io.out.ready, io.out.bits.data, io.out.bits.uop.cf.pc
)
}
......@@ -3,8 +3,45 @@ package xiangshan.backend.exu
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
class ExuWbArbiter(n: Int) extends XSModule {
val io = IO(new Bundle() {
val in = Vec(n, Flipped(DecoupledIO(new ExuOutput)))
val out = DecoupledIO(new ExuOutput)
})
class ExuCtrl extends Bundle{
val uop = new MicroOp
val fflags = UInt(5.W)
val redirectValid = Bool()
val redirect = new Redirect
val debug = new DebugBundle
}
val ctrl_arb = Module(new Arbiter(new ExuCtrl, n))
val data_arb = Module(new Arbiter(UInt((XLEN+1).W), n))
ctrl_arb.io.out.ready := io.out.ready
data_arb.io.out.ready := io.out.ready
for(((in, ctrl), data) <- io.in.zip(ctrl_arb.io.in).zip(data_arb.io.in)){
ctrl.valid := in.valid
for((name, d) <- ctrl.bits.elements) {
d := in.bits.elements(name)
}
data.valid := in.valid
data.bits := in.bits.data
in.ready := ctrl.ready
assert(ctrl.ready === data.ready)
}
assert(ctrl_arb.io.chosen === data_arb.io.chosen)
io.out.bits.data := data_arb.io.out.bits
for((name, d) <- ctrl_arb.io.out.bits.elements){
io.out.bits.elements(name) := d
}
io.out.valid := ctrl_arb.io.out.valid
assert(ctrl_arb.io.out.valid === data_arb.io.out.valid)
}
class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
......@@ -15,14 +52,6 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
val out = Vec(numOut, ValidIO(new ExuOutput))
})
// def exuOutToRfReq(exuOut: DecoupledIO[ExuOutput]): DecoupledIO[ExuOutput] = {
// val req = WireInit(exuOut)
// req.valid := exuOut.valid && wen(exuOut.bits)
// exuOut.ready := Mux(req.valid, req.ready, true.B)
// req
// }
val directConnect = io.in.zip(priorities).filter(x => x._2 == 0).map(_._1)
val mulReq = io.in.zip(priorities).filter(x => x._2 == 1).map(_._1)
val otherReq = io.in.zip(priorities).filter(x => x._2 > 1).map(_._1)
......@@ -32,9 +61,11 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
io.out.take(directConnect.size).zip(directConnect).foreach{
case (o, i) =>
o.bits := i.bits
o.valid := i.valid
i.ready := true.B
val arb = Module(new ExuWbArbiter(1))
arb.io.in.head <> i
o.bits := arb.io.out.bits
o.valid := arb.io.out.valid
arb.io.out.ready := true.B
}
def splitN[T](in: Seq[T], n: Int): Seq[Option[Seq[T]]] = {
......@@ -59,17 +90,11 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
for(i <- mulReq.indices) {
val out = io.out(directConnect.size + i)
val other = arbReq(i).getOrElse(Seq())
if(other.isEmpty){
out.valid := mulReq(i).valid
out.bits := mulReq(i).bits
mulReq(i).ready := true.B
} else {
val arb = Module(new Arbiter(new ExuOutput, 1+other.size))
arb.io.in <> mulReq(i) +: other
out.valid := arb.io.out.valid
out.bits := arb.io.out.bits
arb.io.out.ready := true.B
}
val arb = Module(new ExuWbArbiter(1+other.size))
arb.io.in <> mulReq(i) +: other
out.valid := arb.io.out.valid
out.bits := arb.io.out.bits
arb.io.out.ready := true.B
}
if(portUsed < numOut){
......
......@@ -221,28 +221,28 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
}
})
// from 4r sram
commitEntry.ftqPC := ftq_4r_sram.io.rdata(0).ftqPC
commitEntry.lastPacketPC := ftq_4r_sram.io.rdata(0).lastPacketPC
commitEntry.ftqPC := RegNext(ftq_4r_sram.io.rdata(0).ftqPC)
commitEntry.lastPacketPC := RegNext(ftq_4r_sram.io.rdata(0).lastPacketPC)
// from 2r sram
commitEntry.rasSp := ftq_2r_sram.io.rdata(0).rasSp
commitEntry.rasTop := ftq_2r_sram.io.rdata(0).rasEntry
commitEntry.hist := ftq_2r_sram.io.rdata(0).hist
commitEntry.predHist := ftq_2r_sram.io.rdata(0).predHist
commitEntry.specCnt := ftq_2r_sram.io.rdata(0).specCnt
commitEntry.br_mask := ftq_2r_sram.io.rdata(0).br_mask
commitEntry.rasSp := RegNext(ftq_2r_sram.io.rdata(0).rasSp)
commitEntry.rasTop := RegNext(ftq_2r_sram.io.rdata(0).rasEntry)
commitEntry.hist := RegNext(ftq_2r_sram.io.rdata(0).hist)
commitEntry.predHist := RegNext(ftq_2r_sram.io.rdata(0).predHist)
commitEntry.specCnt := RegNext(ftq_2r_sram.io.rdata(0).specCnt)
commitEntry.br_mask := RegNext(ftq_2r_sram.io.rdata(0).br_mask)
// from 1r sram
commitEntry.metas := ftq_1r_sram.io.rdata(0).metas
commitEntry.rvc_mask := ftq_1r_sram.io.rdata(0).rvc_mask
commitEntry.metas := RegNext(ftq_1r_sram.io.rdata(0).metas)
commitEntry.rvc_mask := RegNext(ftq_1r_sram.io.rdata(0).rvc_mask)
// from regs
commitEntry.valids := RegNext(commit_valids)
commitEntry.mispred := RegNext(mispredict_vec(headPtr.value))
commitEntry.cfiIndex := RegNext(cfiIndex_vec(headPtr.value))
commitEntry.cfiIsCall := RegNext(cfiIsCall(headPtr.value))
commitEntry.cfiIsRet := RegNext(cfiIsRet(headPtr.value))
commitEntry.cfiIsRVC := RegNext(cfiIsRVC(headPtr.value))
commitEntry.target := RegNext(update_target(headPtr.value))
io.commit_ftqEntry.valid := RegNext(Cat(commit_valids).orR()) //TODO: do we need this?
commitEntry.valids := RegNext(RegNext(commit_valids))
commitEntry.mispred := RegNext(RegNext(mispredict_vec(headPtr.value)))
commitEntry.cfiIndex := RegNext(RegNext(cfiIndex_vec(headPtr.value)))
commitEntry.cfiIsCall := RegNext(RegNext(cfiIsCall(headPtr.value)))
commitEntry.cfiIsRet := RegNext(RegNext(cfiIsRet(headPtr.value)))
commitEntry.cfiIsRVC := RegNext(RegNext(cfiIsRVC(headPtr.value)))
commitEntry.target := RegNext(RegNext(update_target(headPtr.value)))
io.commit_ftqEntry.valid := RegNext(RegNext(Cat(commit_valids).orR())) //TODO: do we need this?
io.commit_ftqEntry.bits := commitEntry
// read logic
......@@ -286,10 +286,10 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
}
}
XSPerf("ftqEntries", validEntries)
XSPerf("ftqStallAcc", io.enq.valid && !io.enq.ready, acc = true)
XSPerf("mispredictRedirectAcc", io.redirect.valid && RedirectLevel.flushAfter === io.redirect.bits.level, acc = true)
XSPerf("replayRedirectAcc", io.redirect.valid && RedirectLevel.flushItself(io.redirect.bits.level), acc = true)
XSPerf("ftq_entries", validEntries)
XSPerf("ftq_stall", io.enq.valid && !io.enq.ready, acc = true)
XSPerf("ftq_mispredictRedirect", io.redirect.valid && RedirectLevel.flushAfter === io.redirect.bits.level, acc = true)
XSPerf("ftq_replayRedirect", io.redirect.valid && RedirectLevel.flushItself(io.redirect.bits.level), acc = true)
XSDebug(io.commit_ftqEntry.valid, p"ftq commit: ${io.commit_ftqEntry.bits}")
XSDebug(io.enq.fire(), p"ftq enq: ${io.enq.bits}")
......
......@@ -35,21 +35,12 @@ class RightShiftModule extends XSModule {
val io = IO(new Bundle() {
val shamt = Input(UInt(6.W))
val srlSrc, sraSrc = Input(UInt(XLEN.W))
val srl, sra = Output(UInt(XLEN.W))
val srl_l, srl_w, sra_l, sra_w = Output(UInt(XLEN.W))
})
io.srl := io.srlSrc >> io.shamt
io.sra := (io.sraSrc.asSInt() >> io.shamt).asUInt()
}
class ShiftModule extends XSModule {
val io = IO(new Bundle() {
val shamt = Input(UInt(6.W))
val shsrc1 = Input(UInt(XLEN.W))
val sll, srl, sra = Output(UInt(XLEN.W))
})
io.sll := (io.shsrc1 << io.shamt)(XLEN-1, 0)
io.srl := io.shsrc1 >> io.shamt
io.sra := (io.shsrc1.asSInt >> io.shamt).asUInt
io.srl_l := io.srlSrc >> io.shamt
io.srl_w := io.srlSrc(31, 0) >> io.shamt
io.sra_l := (io.sraSrc.asSInt() >> io.shamt).asUInt()
io.sra_w := (Cat(Fill(32, io.sraSrc(31)), io.sraSrc(31, 0)).asSInt() >> io.shamt).asUInt()
}
class MiscResultSelect extends XSModule {
......@@ -87,17 +78,15 @@ class AluResSel extends XSModule {
io.aluRes := Cat(h32, res(31, 0))
}
class Alu extends FunctionUnit with HasRedirectOut {
val (src1, src2, func, pc, uop) = (
io.in.bits.src(0),
io.in.bits.src(1),
io.in.bits.uop.ctrl.fuOpType,
SignExt(io.in.bits.uop.cf.pc, AddrBits),
io.in.bits.uop
)
val valid = io.in.valid
class AluDataModule extends XSModule {
val io = IO(new Bundle() {
val src1, src2 = Input(UInt(XLEN.W))
val func = Input(FuOpType())
val pred_taken, isBranch = Input(Bool())
val result = Output(UInt(XLEN.W))
val taken, mispredict = Output(Bool())
})
val (src1, src2, func) = (io.src1, io.src2, io.func)
val isAdderSub = (func =/= ALUOpType.add) && (func =/= ALUOpType.addw)
val addModule = Module(new AddModule)
......@@ -121,18 +110,12 @@ class Alu extends FunctionUnit with HasRedirectOut {
val rightShiftModule = Module(new RightShiftModule)
rightShiftModule.io.shamt := shamt
rightShiftModule.io.srlSrc := Cat(
Mux(isW, 0.U(32.W), src1(63, 32)),
src1(31, 0)
)
rightShiftModule.io.sraSrc := Cat(
Mux(isW, Fill(32, src1(31)), src1(63, 32)),
src1(31, 0)
)
rightShiftModule.io.srlSrc := src1
rightShiftModule.io.sraSrc := src1
val sll = leftShiftModule.io.sll
val srl = rightShiftModule.io.srl
val sra = rightShiftModule.io.sra
val srl = Mux(isW, rightShiftModule.io.srl_w, rightShiftModule.io.srl_l)
val sra = Mux(isW, rightShiftModule.io.sra_w, rightShiftModule.io.sra_l)
val miscResSel = Module(new MiscResultSelect)
miscResSel.io.func := func(3, 0)
......@@ -160,9 +143,32 @@ class Alu extends FunctionUnit with HasRedirectOut {
ALUOpType.getBranchType(ALUOpType.blt) -> slt,
ALUOpType.getBranchType(ALUOpType.bltu) -> sltu
)
val taken = LookupTree(ALUOpType.getBranchType(func), branchOpTable) ^ ALUOpType.isBranchInvert(func)
io.result := aluRes
io.taken := taken
io.mispredict := (io.pred_taken ^ taken) && io.isBranch
}
class Alu extends FunctionUnit with HasRedirectOut {
val (src1, src2, func, pc, uop) = (
io.in.bits.src(0),
io.in.bits.src(1),
io.in.bits.uop.ctrl.fuOpType,
SignExt(io.in.bits.uop.cf.pc, AddrBits),
io.in.bits.uop
)
val valid = io.in.valid
val isBranch = ALUOpType.isBranch(func)
val taken = LookupTree(ALUOpType.getBranchType(func), branchOpTable) ^ ALUOpType.isBranchInvert(func)
val dataModule = Module(new AluDataModule)
dataModule.io.src1 := src1
dataModule.io.src2 := src2
dataModule.io.func := func
dataModule.io.pred_taken := uop.cf.pred_taken
dataModule.io.isBranch := isBranch
redirectOutValid := io.out.valid && isBranch
redirectOut := DontCare
......@@ -170,12 +176,12 @@ class Alu extends FunctionUnit with HasRedirectOut {
redirectOut.roqIdx := uop.roqIdx
redirectOut.ftqIdx := uop.cf.ftqPtr
redirectOut.ftqOffset := uop.cf.ftqOffset
redirectOut.cfiUpdate.isMisPred := (uop.cf.pred_taken ^ taken) && isBranch
redirectOut.cfiUpdate.taken := taken
redirectOut.cfiUpdate.isMisPred := dataModule.io.mispredict
redirectOut.cfiUpdate.taken := dataModule.io.taken
redirectOut.cfiUpdate.predTaken := uop.cf.pred_taken
io.in.ready := io.out.ready
io.out.valid := valid
io.out.bits.uop <> io.in.bits.uop
io.out.bits.data := aluRes
io.out.bits.data := dataModule.io.result
}
......@@ -8,14 +8,6 @@ import xiangshan._
import xiangshan.backend._
import xiangshan.backend.fu.util._
object hartId extends (() => Int) {
var x = 0
def apply(): Int = {
x = x + 1
x-1
}
}
trait HasExceptionNO {
def instrAddrMisaligned = 0
def instrAccessFault = 1
......@@ -129,6 +121,7 @@ class CustomCSRCtrlIO extends XSBundle {
}
class CSRFileIO extends XSBundle {
val hartId = Input(UInt(64.W))
// output (for func === CSROpType.jmp)
val perf = new PerfCounterIO
val isPerfCnt = Output(Bool())
......@@ -265,8 +258,7 @@ class CSR extends FunctionUnit with HasCSRConst
val mvendorid = RegInit(UInt(XLEN.W), 0.U) // this is a non-commercial implementation
val marchid = RegInit(UInt(XLEN.W), 0.U) // return 0 to indicate the field is not implemented
val mimpid = RegInit(UInt(XLEN.W), 0.U) // provides a unique encoding of the version of the processor implementation
val mhartNo = hartId()
val mhartid = RegInit(UInt(XLEN.W), mhartNo.asUInt) // the hardware thread running the code
val mhartid = RegInit(UInt(XLEN.W), csrio.hartId) // the hardware thread running the code
val mstatus = RegInit(UInt(XLEN.W), 0.U)
// mstatus Value Table
......@@ -813,113 +805,9 @@ class CSR extends FunctionUnit with HasCSRConst
XSDebug(raiseExceptionIntr && delegS, "sepc is writen!!! pc:%x\n", cfIn.pc)
/**
* Emu Performance counters
*/
val emuPerfCntList = Map(
// "Mcycle" -> (0x1000, "perfCntCondMcycle" ),
// "Minstret" -> (0x1002, "perfCntCondMinstret" ),
"BpInstr" -> (0x1003, "perfCntCondBpInstr" ),
"BpRight" -> (0x1004, "perfCntCondBpRight" ),
"BpWrong" -> (0x1005, "perfCntCondBpWrong" ),
"BpBRight" -> (0x1006, "perfCntCondBpBRight"),
"BpBWrong" -> (0x1007, "perfCntCondBpBWrong"),
"BpJRight" -> (0x1008, "perfCntCondBpJRight"),
"BpJWrong" -> (0x1009, "perfCntCondBpJWrong"),
"BpIRight" -> (0x100a, "perfCntCondBpIRight"),
"BpIWrong" -> (0x100b, "perfCntCondBpIWrong"),
"BpRRight" -> (0x100c, "perfCntCondBpRRight"),
"BpRWrong" -> (0x100d, "perfCntCondBpRWrong"),
"RoqWalk" -> (0x100f, "perfCntCondRoqWalk" ),
"DTlbReqCnt0" -> (0x1015, "perfCntDtlbReqCnt0" ),
"DTlbReqCnt1" -> (0x1016, "perfCntDtlbReqCnt1" ),
"DTlbReqCnt2" -> (0x1017, "perfCntDtlbReqCnt2" ),
"DTlbReqCnt3" -> (0x1018, "perfCntDtlbReqCnt3" ),
"DTlbMissCnt0"-> (0x1019, "perfCntDtlbMissCnt0" ),
"DTlbMissCnt1"-> (0x1020, "perfCntDtlbMissCnt1" ),
"DTlbMissCnt2"-> (0x1021, "perfCntDtlbMissCnt2" ),
"DTlbMissCnt3"-> (0x1022, "perfCntDtlbMissCnt3" ),
"ITlbReqCnt0" -> (0x1023, "perfCntItlbReqCnt0" ),
"ITlbMissCnt0"-> (0x1024, "perfCntItlbMissCnt0" ),
"PtwReqCnt" -> (0x1025, "perfCntPtwReqCnt" ),
"PtwCycleCnt" -> (0x1026, "perfCntPtwCycleCnt" ),
"PtwL2TlbHit" -> (0x1027, "perfCntPtwL2TlbHit" ),
"ICacheReq" -> (0x1028, "perfCntIcacheReqCnt" ),
"ICacheMiss" -> (0x1029, "perfCntIcacheMissCnt"),
"ICacheMMIO" -> (0x102a, "perfCntIcacheMMIOCnt"),
// "FetchFromLoopBuffer" -> (0x102b, "CntFetchFromLoopBuffer"),
// "ExitLoop1" -> (0x102c, "CntExitLoop1"),
// "ExitLoop2" -> (0x102d, "CntExitLoop2"),
// "ExitLoop3" -> (0x102e, "CntExitLoop3")
"ubtbRight" -> (0x1030, "perfCntubtbRight"),
"ubtbWrong" -> (0x1031, "perfCntubtbWrong"),
"btbRight" -> (0x1032, "perfCntbtbRight"),
"btbWrong" -> (0x1033, "perfCntbtbWrong"),
"tageRight" -> (0x1034, "perfCnttageRight"),
"tageWrong" -> (0x1035, "perfCnttageWrong"),
"rasRight" -> (0x1036, "perfCntrasRight"),
"rasWrong" -> (0x1037, "perfCntrasWrong"),
"loopRight" -> (0x1038, "perfCntloopRight"),
"loopWrong" -> (0x1039, "perfCntloopWrong"),
"s1Right" -> (0x103a, "perfCntS1Right"),
"s1Wrong" -> (0x103b, "perfCntS1Wrong"),
"s2Right" -> (0x103c, "perfCntS2Right"),
"s2Wrong" -> (0x103d, "perfCntS2Wrong"),
"s3Right" -> (0x103e, "perfCntS3Right"),
"s3Wrong" -> (0x103f, "perfCntS3Wrong"),
"loopExit" -> (0x1040, "perfCntLoopExit"),
"takenButWrong" -> (0x1041, "perfCntTakenButWrong"),
// "L2cacheHit" -> (0x1023, "perfCntCondL2cacheHit")
) ++ (
(0 until dcacheParameters.nMissEntries).map(i =>
("DCacheMissQueuePenalty" + Integer.toString(i, 10), (0x1042 + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))
).toMap
) ++ (
(0 until icacheParameters.nMissEntries).map(i =>
("ICacheMissQueuePenalty" + Integer.toString(i, 10), (0x1042 + dcacheParameters.nMissEntries + i, "perfCntICacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))
).toMap
) ++ (
(0 until l1plusPrefetcherParameters.nEntries).map(i =>
("L1+PrefetchPenalty" + Integer.toString(i, 10), (0x1042 + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + i, "perfCntL1plusPrefetchPenaltyEntry" + Integer.toString(i, 10)))
).toMap
) ++ (
(0 until l2PrefetcherParameters.nEntries).map(i =>
("L2PrefetchPenalty" + Integer.toString(i, 10), (0x1042 + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + l1plusPrefetcherParameters.nEntries + i, "perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10)))
).toMap
)
emuPerfCntList.foreach {
case (_, (address, boringId)) =>
if (hasEmuPerfCnt) {
ExcitingUtils.addSink(emuPerfCntCond(address & 0x7f), boringId, ConnectionType.Perf)
}
// if (!hasEmuPerfCnt) {
// // do not enable perfcnts except for Mcycle and Minstret
// if (address != emuPerfCntList("Mcycle")._1 && address != emuPerfCntList("Minstret")._1) {
// perfCntCond(address & 0x7f) := false.B
// }
// }
}
val xstrap = WireInit(false.B)
if (!env.FPGAPlatform && EnableBPU && !env.DualCore) {
ExcitingUtils.addSink(xstrap, "XSTRAP", ConnectionType.Debug)
}
def readWithScala(addr: Int): UInt = mapping(addr)._1
val difftestIntrNO = Mux(raiseIntr, causeNO, 0.U)
if (!env.FPGAPlatform) {
// display all perfcnt when nooptrap is executed
when (xstrap) {
printf("======== PerfCnt =========\n")
emuPerfCntList.toSeq.sortBy(_._2._1).foreach { case (str, (address, _)) =>
printf("%d <- " + str + "\n", readWithScala(address))
}
}
}
if (!env.FPGAPlatform) {
difftestIO.intrNO := RegNext(difftestIntrNO)
......
......@@ -14,6 +14,34 @@ trait HasRedirectOut { this: RawModule =>
val redirectOut = IO(Output(new Redirect))
}
class JumpDataModule extends XSModule {
val io = IO(new Bundle() {
val src1 = Input(UInt(XLEN.W))
val pc = Input(UInt(XLEN.W)) // sign-ext to XLEN
val immMin = Input(UInt(ImmUnion.maxLen.W))
val func = Input(FuOpType())
val isRVC = Input(Bool())
val result, target = Output(UInt(XLEN.W))
val isAuipc = Output(Bool())
})
val (src1, pc, immMin, func, isRVC) = (io.src1, io.pc, io.immMin, io.func, io.isRVC)
val isJalr = JumpOpType.jumpOpisJalr(func)
val isAuipc = JumpOpType.jumpOpisAuipc(func)
val offset = SignExt(ParallelMux(Seq(
isJalr -> ImmUnion.I.toImm32(immMin),
isAuipc -> ImmUnion.U.toImm32(immMin),
!(isJalr || isAuipc) -> ImmUnion.J.toImm32(immMin)
)), XLEN)
val snpc = Mux(isRVC, pc + 2.U, pc + 4.U)
val target = src1 + offset // NOTE: src1 is (pc/rf(rs1)), src2 is (offset)
io.target := target
io.result := Mux(JumpOpType.jumpOpisAuipc(func), target, snpc)
io.isAuipc := isAuipc
}
class Jump extends FunctionUnit with HasRedirectOut {
val (src1, jalr_target, pc, immMin, func, uop) = (
......@@ -25,41 +53,33 @@ class Jump extends FunctionUnit with HasRedirectOut {
io.in.bits.uop
)
val isJalr = JumpOpType.jumpOpisJalr(func)
val isAuipc = JumpOpType.jumpOpisAuipc(func)
val offset = SignExt(ParallelMux(Seq(
isJalr -> ImmUnion.I.toImm32(immMin),
isAuipc -> ImmUnion.U.toImm32(immMin),
!(isJalr || isAuipc) -> ImmUnion.J.toImm32(immMin)
)), XLEN)
val redirectHit = uop.roqIdx.needFlush(io.redirectIn, io.flushIn)
val valid = io.in.valid
val isRVC = uop.cf.pd.isRVC
val snpc = Mux(isRVC, pc + 2.U, pc + 4.U)
val target = src1 + offset // NOTE: src1 is (pc/rf(rs1)), src2 is (offset)
redirectOutValid := valid && !isAuipc
val jumpDataModule = Module(new JumpDataModule)
jumpDataModule.io.src1 := src1
jumpDataModule.io.pc := pc
jumpDataModule.io.immMin := immMin
jumpDataModule.io.func := func
jumpDataModule.io.isRVC := isRVC
redirectOutValid := valid && !jumpDataModule.io.isAuipc
redirectOut := DontCare
redirectOut.cfiUpdate.target := target
redirectOut.cfiUpdate.target := jumpDataModule.io.target
redirectOut.level := RedirectLevel.flushAfter
redirectOut.roqIdx := uop.roqIdx
redirectOut.ftqIdx := uop.cf.ftqPtr
redirectOut.ftqOffset := uop.cf.ftqOffset
redirectOut.cfiUpdate.predTaken := true.B
redirectOut.cfiUpdate.taken := true.B
redirectOut.cfiUpdate.target := target
redirectOut.cfiUpdate.isMisPred := target =/= jalr_target || !uop.cf.pred_taken
// Output
val res = Mux(JumpOpType.jumpOpisAuipc(func), target, snpc)
redirectOut.cfiUpdate.target := jumpDataModule.io.target
redirectOut.cfiUpdate.isMisPred := jumpDataModule.io.target =/= jalr_target || !uop.cf.pred_taken
io.in.ready := io.out.ready
io.out.valid := valid
io.out.bits.uop <> io.in.bits.uop
io.out.bits.data := res
io.out.bits.data := jumpDataModule.io.result
// NOTE: the debug info is for one-cycle exec, if FMV needs multi-cycle, may needs change it
XSDebug(io.in.valid, "In(%d %d) Out(%d %d) Redirect:(%d %d %d)\n",
......@@ -71,5 +91,4 @@ class Jump extends FunctionUnit with HasRedirectOut {
io.redirectIn.bits.level,
redirectHit
)
XSDebug(io.in.valid, "src1:%x offset:%x func:%b type:JUMP pc:%x res:%x\n", src1, offset, func, pc, res)
}
......@@ -42,14 +42,15 @@ class NaiveMultiplier(len: Int, val latency: Int)
XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n")
}
class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len) with HasPipelineReg {
override def latency = doReg.size
class ArrayMulDataModule(len: Int, doReg: Seq[Int]) extends XSModule {
val io = IO(new Bundle() {
val a, b = Input(UInt(len.W))
val regEnables = Input(Vec(doReg.size, Bool()))
val result = Output(UInt((2 * len).W))
})
val (a, b) = (io.a, io.b)
val doRegSorted = doReg.sortWith(_ < _)
val (a, b) = (io.in.bits.src(0), io.in.bits.src(1))
val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len+1).W))
b_sext := SignExt(b, len+1)
bx2 := b_sext << 1
......@@ -149,7 +150,7 @@ class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len)
val needReg = doRegSorted.contains(depth)
val toNextLayer = if(needReg)
columns_next.map(_.map(PipelineReg(doRegSorted.indexOf(depth) + 1)(_)))
columns_next.map(_.map(x => RegEnable(x, io.regEnables(doRegSorted.indexOf(depth)))))
else
columns_next
......@@ -158,7 +159,18 @@ class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len)
}
val (sum, carry) = addAll(cols = columns, depth = 0)
val result = sum + carry
io.result := sum + carry
}
class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len) with HasPipelineReg {
override def latency = doReg.size
val mulDataModule = Module(new ArrayMulDataModule(len, doReg))
mulDataModule.io.a := io.in.bits.src(0)
mulDataModule.io.b := io.in.bits.src(1)
mulDataModule.io.regEnables := VecInit((1 to doReg.size) map (i => regEnable(i)))
val result = mulDataModule.io.result
var ctrlVec = Seq(ctrl)
for(i <- 1 to latency){
......
......@@ -4,120 +4,134 @@ import chisel3._
import chisel3.stage.{ChiselGeneratorAnnotation, ChiselStage}
import chisel3.util._
import utils.SignExt
import xiangshan.XSModule
import xiangshan.backend.fu.util.CSA3_2
/** A Radix-4 SRT Integer Divider
*
* 2 ~ (5 + (len+3)/2) cycles are needed for each division.
*/
class SRT4Divider(len: Int) extends AbstractDivider(len) {
class SRT4DividerDataModule(len: Int) extends Module {
val io = IO(new Bundle() {
val src1, src2 = Input(UInt(len.W))
val valid, sign, kill_w, kill_r, isHi, isW = Input(Bool())
val in_ready = Output(Bool())
val out_valid = Output(Bool())
val out_data = Output(UInt(len.W))
val out_ready = Input(Bool())
})
val (a, b, sign, valid, kill_w, kill_r, isHi, isW) =
(io.src1, io.src2, io.sign, io.valid, io.kill_w, io.kill_r, io.isHi, io.isW)
val in_fire = valid && io.in_ready
val out_fire = io.out_ready && io.out_valid
val s_idle :: s_lzd :: s_normlize :: s_recurrence :: s_recovery_1 :: s_recovery_2 :: s_finish :: Nil = Enum(7)
val state = RegInit(s_idle)
val newReq = (state === s_idle) && io.in.fire()
val cnt_next = Wire(UInt(log2Up((len+3)/2).W))
val cnt = RegEnable(cnt_next, state===s_normlize || state===s_recurrence)
val cnt_next = Wire(UInt(log2Up((len + 3) / 2).W))
val cnt = RegEnable(cnt_next, state === s_normlize || state === s_recurrence)
val rec_enough = cnt_next === 0.U
val newReq = in_fire
def abs(a: UInt, sign: Bool): (Bool, UInt) = {
val s = a(len - 1) && sign
(s, Mux(s, -a, a))
}
val (a, b) = (io.in.bits.src(0), io.in.bits.src(1))
val uop = io.in.bits.uop
val (aSign, aVal) = abs(a, sign)
val (bSign, bVal) = abs(b, sign)
val aSignReg = RegEnable(aSign, newReq)
val qSignReg = RegEnable(aSign ^ bSign, newReq)
val uopReg = RegEnable(uop, newReq)
val ctrlReg = RegEnable(ctrl, newReq)
val divZero = b === 0.U
val divZeroReg = RegEnable(divZero, newReq)
val kill = state=/=s_idle && uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)
switch(state){
is(s_idle){
when (io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)) {
switch(state) {
is(s_idle) {
when(in_fire && !kill_w) {
state := Mux(divZero, s_finish, s_lzd)
}
}
is(s_lzd){ // leading zero detection
is(s_lzd) { // leading zero detection
state := s_normlize
}
is(s_normlize){ // shift a/b
is(s_normlize) { // shift a/b
state := s_recurrence
}
is(s_recurrence){ // (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d
when(rec_enough){ state := s_recovery_1 }
is(s_recurrence) { // (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d
when(rec_enough) {
state := s_recovery_1
}
}
is(s_recovery_1){ // if rem < 0, rem = rem + d
is(s_recovery_1) { // if rem < 0, rem = rem + d
state := s_recovery_2
}
is(s_recovery_2){ // recovery shift
is(s_recovery_2) { // recovery shift
state := s_finish
}
is(s_finish){
when(io.out.fire()){ state := s_idle }
is(s_finish) {
when(out_fire) {
state := s_idle
}
}
}
when(kill){
when(kill_r) {
state := s_idle
}
/** Calculate abs(a)/abs(b) by recurrence
*
* ws, wc: partial remainder in carry-save form,
* in recurrence steps, ws/wc = 4ws[j]/4wc[j];
* in recovery step, ws/wc = ws[j]/wc[j];
* in final step, ws = abs(a)/abs(b).
* in recurrence steps, ws/wc = 4ws[j]/4wc[j];
* in recovery step, ws/wc = ws[j]/wc[j];
* in final step, ws = abs(a)/abs(b).
*
* d: normlized divisor(1/2<=d<1)
*
* wLen = 3 integer bits + (len+1) frac bits
*/
def wLen = 3 + len + 1
val ws, wc = Reg(UInt(wLen.W))
val ws_next, wc_next = Wire(UInt(wLen.W))
val d = Reg(UInt(wLen.W))
val aLeadingZeros = RegEnable(
next = PriorityEncoder(ws(len-1, 0).asBools().reverse),
enable = state===s_lzd
next = PriorityEncoder(ws(len - 1, 0).asBools().reverse),
enable = state === s_lzd
)
val bLeadingZeros = RegEnable(
next = PriorityEncoder(d(len-1, 0).asBools().reverse),
enable = state===s_lzd
next = PriorityEncoder(d(len - 1, 0).asBools().reverse),
enable = state === s_lzd
)
val diff = Cat(0.U(1.W), bLeadingZeros).asSInt() - Cat(0.U(1.W), aLeadingZeros).asSInt()
val isNegDiff = diff(diff.getWidth - 1)
val quotientBits = Mux(isNegDiff, 0.U, diff.asUInt())
val qBitsIsOdd = quotientBits(0)
val recoveryShift = RegEnable(len.U - bLeadingZeros, state===s_normlize)
val recoveryShift = RegEnable(len.U - bLeadingZeros, state === s_normlize)
val a_shifted, b_shifted = Wire(UInt(len.W))
a_shifted := Mux(isNegDiff,
ws(len-1, 0) << bLeadingZeros,
ws(len-1, 0) << aLeadingZeros
ws(len - 1, 0) << bLeadingZeros,
ws(len - 1, 0) << aLeadingZeros
)
b_shifted := d(len-1, 0) << bLeadingZeros
b_shifted := d(len - 1, 0) << bLeadingZeros
val rem_temp = ws + wc
val rem_fixed = Mux(rem_temp(wLen-1), rem_temp + d, rem_temp)
val rem_abs = (ws << recoveryShift)(2*len, len+1)
val rem_fixed = Mux(rem_temp(wLen - 1), rem_temp + d, rem_temp)
val rem_abs = (ws << recoveryShift) (2 * len, len + 1)
when(newReq){
when(newReq) {
ws := Cat(0.U(4.W), Mux(divZero, a, aVal))
wc := 0.U
d := Cat(0.U(4.W), bVal)
}.elsewhen(state === s_normlize){
}.elsewhen(state === s_normlize) {
d := Cat(0.U(3.W), b_shifted, 0.U(1.W))
ws := Mux(qBitsIsOdd, a_shifted, a_shifted << 1)
}.elsewhen(state === s_recurrence){
}.elsewhen(state === s_recurrence) {
ws := Mux(rec_enough, ws_next, ws_next << 2)
wc := Mux(rec_enough, wc_next, wc_next << 2)
}.elsewhen(state === s_recovery_1){
}.elsewhen(state === s_recovery_1) {
ws := rem_fixed
}.elsewhen(state === s_recovery_2){
}.elsewhen(state === s_recovery_2) {
ws := rem_abs
}
......@@ -140,8 +154,8 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
sel_dx2 -> 2.U(2.W)
))
val w_truncated = (ws(wLen-1, wLen-1-6) + wc(wLen-1, wLen-1-6)).asSInt()
val d_truncated = d(len-1, len-3)
val w_truncated = (ws(wLen - 1, wLen - 1 - 6) + wc(wLen - 1, wLen - 1 - 6)).asSInt()
val d_truncated = d(len - 1, len - 3)
val qSelTable = Array(
Array(12, 4, -4, -13),
......@@ -156,9 +170,9 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
// ge(x): w_truncated >= x
var ge = Map[Int, Bool]()
for(row <- qSelTable){
for(k <- row){
if(!ge.contains(k)) ge = ge + (k -> (w_truncated >= k.S(7.W)))
for (row <- qSelTable) {
for (k <- row) {
if (!ge.contains(k)) ge = ge + (k -> (w_truncated >= k.S(7.W)))
}
}
q_sel := MuxLookup(d_truncated, sel_0,
......@@ -169,7 +183,7 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
ge(x(2)) -> sel_0,
ge(x(3)) -> sel_neg_d
))
).zipWithIndex.map({case(v, i) => i.U -> v})
).zipWithIndex.map({ case (v, i) => i.U -> v })
)
/** Calculate (ws[j+1],wc[j+1]) by a [3-2]carry-save adder
......@@ -178,7 +192,7 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
*/
val csa = Module(new CSA3_2(wLen))
csa.io.in(0) := ws
csa.io.in(1) := Cat(wc(wLen-1, 2), wc_adj)
csa.io.in(1) := Cat(wc(wLen - 1, 2), wc_adj)
csa.io.in(2) := MuxLookup(q_sel, 0.U, Seq(
sel_d -> neg_d,
sel_dx2 -> neg_dx2,
......@@ -190,10 +204,10 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
// On the fly quotient conversion
val q, qm = Reg(UInt(len.W))
when(newReq){
when(newReq) {
q := 0.U
qm := 0.U
}.elsewhen(state === s_recurrence){
}.elsewhen(state === s_recurrence) {
val qMap = Seq(
sel_0 -> (q, 0),
sel_d -> (q, 1),
......@@ -202,7 +216,7 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
sel_neg_dx2 -> (qm, 2)
)
q := MuxLookup(q_sel, 0.U,
qMap.map(m => m._1 -> Cat(m._2._1(len-3, 0), m._2._2.U(2.W)))
qMap.map(m => m._1 -> Cat(m._2._1(len - 3, 0), m._2._2.U(2.W)))
)
val qmMap = Seq(
sel_0 -> (qm, 3),
......@@ -212,27 +226,53 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
sel_neg_dx2 -> (qm, 1)
)
qm := MuxLookup(q_sel, 0.U,
qmMap.map(m => m._1 -> Cat(m._2._1(len-3, 0), m._2._2.U(2.W)))
qmMap.map(m => m._1 -> Cat(m._2._1(len - 3, 0), m._2._2.U(2.W)))
)
}.elsewhen(state === s_recovery_1){
q := Mux(rem_temp(wLen-1), qm, q)
}.elsewhen(state === s_recovery_1) {
q := Mux(rem_temp(wLen - 1), qm, q)
}
val remainder = Mux(aSignReg, -ws(len-1, 0), ws(len-1, 0))
val remainder = Mux(aSignReg, -ws(len - 1, 0), ws(len - 1, 0))
val quotient = Mux(qSignReg, -q, q)
val res = Mux(ctrlReg.isHi,
Mux(divZeroReg, ws(len-1, 0), remainder),
val res = Mux(isHi,
Mux(divZeroReg, ws(len - 1, 0), remainder),
Mux(divZeroReg, Fill(len, 1.U(1.W)), quotient)
)
io.in.ready := state===s_idle
io.out.valid := state===s_finish
io.out.bits.data := Mux(ctrlReg.isW,
io.out_data := Mux(isW,
SignExt(res(31, 0), len),
res
)
io.out.bits.uop := uopReg
io.in_ready := state === s_idle
io.out_valid := state === s_finish
}
class SRT4Divider(len: Int) extends AbstractDivider(len) {
val newReq = io.in.fire()
val uop = io.in.bits.uop
val uopReg = RegEnable(uop, newReq)
val ctrlReg = RegEnable(ctrl, newReq)
val divDataModule = Module(new SRT4DividerDataModule(len))
val kill_w = uop.roqIdx.needFlush(io.redirectIn, io.flushIn)
val kill_r = !divDataModule.io.in_ready && uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)
divDataModule.io.src1 := io.in.bits.src(0)
divDataModule.io.src2 := io.in.bits.src(1)
divDataModule.io.valid := io.in.valid
divDataModule.io.sign := sign
divDataModule.io.kill_w := kill_w
divDataModule.io.kill_r := kill_r
divDataModule.io.isHi := ctrlReg.isHi
divDataModule.io.isW := ctrlReg.isW
divDataModule.io.out_ready := io.out.ready
io.in.ready := divDataModule.io.in_ready
io.out.valid := divDataModule.io.out_valid
io.out.bits.data := divDataModule.io.out_data
io.out.bits.uop := uopReg
}
......@@ -5,7 +5,15 @@ import chisel3.util._
import freechips.rocketchip.tile.FType
import hardfloat.{DivSqrtRecFNToRaw_small, RoundAnyRawFNToRecFN}
class FDivSqrt extends FPUSubModule {
class FDivSqrtDataModule extends FPUDataModule {
val in_valid, out_ready = IO(Input(Bool()))
val in_ready, out_valid = IO(Output(Bool()))
val kill_w = IO(Input(Bool()))
val kill_r = IO(Input(Bool()))
val in_fire = in_valid && in_ready
val out_fire = out_valid && out_ready
val killReg = RegInit(false.B)
val s_idle :: s_div :: s_finish :: Nil = Enum(3)
val state = RegInit(s_idle)
......@@ -13,48 +21,42 @@ class FDivSqrt extends FPUSubModule {
val divSqrt = Module(new DivSqrtRecFNToRaw_small(FType.D.exp, FType.D.sig, 0))
val divSqrtRawValid = divSqrt.io.rawOutValid_sqrt || divSqrt.io.rawOutValid_div
val fpCtrl = io.in.bits.uop.ctrl.fpu
val fpCtrl = io.in.fpCtrl
val tag = fpCtrl.typeTagIn
val uopReg = RegEnable(io.in.bits.uop, io.in.fire())
val single = RegEnable(tag === S, io.in.fire())
val rmReg = RegEnable(rm, io.in.fire())
val kill = uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)
val killReg = RegInit(false.B)
val single = RegEnable(tag === S, in_fire)
val rmReg = RegEnable(rm, in_fire)
switch(state){
is(s_idle){
when(io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)){ state := s_div }
when(in_fire && !kill_w){ state := s_div }
}
is(s_div){
when(divSqrtRawValid){
when(kill || killReg){
when(kill_r || killReg){
state := s_idle
killReg := false.B
}.otherwise({
state := s_finish
})
}.elsewhen(kill){
}.elsewhen(kill_r){
killReg := true.B
}
}
is(s_finish){
when(io.out.fire() || kill){
when(out_fire || kill_r){
state := s_idle
}
}
}
val src1 = unbox(io.in.bits.src(0), tag, None)
val src2 = unbox(io.in.bits.src(1), tag, None)
divSqrt.io.inValid := io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)
val src1 = unbox(io.in.src(0), tag, None)
val src2 = unbox(io.in.src(1), tag, None)
divSqrt.io.inValid := in_fire && !kill_w
divSqrt.io.sqrtOp := fpCtrl.sqrt
divSqrt.io.a := src1
divSqrt.io.b := src2
divSqrt.io.roundingMode := rm
val round32 = Module(new RoundAnyRawFNToRecFN(
FType.D.exp, FType.D.sig+2, FType.S.exp, FType.S.sig, 0
))
......@@ -73,9 +75,25 @@ class FDivSqrt extends FPUSubModule {
val data = Mux(single, round32.io.out, round64.io.out)
val flags = Mux(single, round32.io.exceptionFlags, round64.io.exceptionFlags)
io.in.ready := state===s_idle
io.out.valid := state===s_finish && !killReg
io.out.bits.uop := uopReg
io.out.bits.data := RegNext(data, divSqrtRawValid)
in_ready := state===s_idle
out_valid := state===s_finish && !killReg
io.out.data := RegNext(data, divSqrtRawValid)
fflags := RegNext(flags, divSqrtRawValid)
}
class FDivSqrt extends FPUSubModule {
val uopReg = RegEnable(io.in.bits.uop, io.in.fire())
val kill_r = uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)
override val dataModule = Module(new FDivSqrtDataModule)
connectDataModule
dataModule.in_valid := io.in.valid
dataModule.out_ready := io.out.ready
dataModule.kill_w := io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)
dataModule.kill_r := kill_r
io.in.ready := dataModule.in_ready
io.out.valid := dataModule.out_valid
io.out.bits.uop := uopReg
}
package xiangshan.backend.fu.fpu
import chisel3._
import chisel3.util.RegEnable
import freechips.rocketchip.tile.FType
import hardfloat.{MulAddRecFN_pipeline_stage1, MulAddRecFN_pipeline_stage2, MulAddRecFN_pipeline_stage3, MulAddRecFN_pipeline_stage4, RoundAnyRawFNToRecFN}
import xiangshan.backend.fu.FunctionUnit
class FMA extends FPUPipelineModule {
override def latency: Int = FunctionUnit.fmacCfg.latency.latencyVal.get
class FMADataModule(latency: Int) extends FPUDataModule {
val fpCtrl = io.in.bits.uop.ctrl.fpu
val regEnables = IO(Input(Vec(latency, Bool())))
val typeTagOut = IO(Input(UInt(2.W)))
val fpCtrl = io.in.fpCtrl
val typeTagIn = fpCtrl.typeTagIn
val src1 = unbox(io.in.bits.src(0), typeTagIn, None)
val src2 = unbox(io.in.bits.src(1), typeTagIn, None)
val src3 = unbox(io.in.bits.src(2), typeTagIn, None)
val src1 = unbox(io.in.src(0), typeTagIn, None)
val src2 = unbox(io.in.src(1), typeTagIn, None)
val src3 = unbox(io.in.src(2), typeTagIn, None)
val (in1, in2, in3) = (
WireInit(src1), WireInit(src2), WireInit(Mux(fpCtrl.isAddSub, src2, src3))
)
......@@ -34,7 +37,7 @@ class FMA extends FPUPipelineModule {
))
mul.io.a := stage1.io.mulAddA
mul.io.b := stage1.io.mulAddB
mul.io.reg_en := regEnable(1)
mul.io.reg_en := regEnables(0)
stage2.io.mulSum := mul.io.sum
stage2.io.mulCarry := mul.io.carry
......@@ -54,10 +57,10 @@ class FMA extends FPUPipelineModule {
stage1.io.in.bits.roundingMode := rm
stage1.io.in.bits.detectTininess := hardfloat.consts.tininess_afterRounding
stage2.io.fromStage1.bits <> S1Reg(stage1.io.toStage2.bits)
stage3.io.fromStage2.bits <> S2Reg(stage2.io.toStage3.bits)
stage4.io.fromStage3.bits <> S3Reg(stage3.io.toStage4.bits)
val stage4toStage5 = S4Reg(stage4.io.toStage5.bits)
stage2.io.fromStage1.bits <> RegEnable(stage1.io.toStage2.bits, regEnables(0))
stage3.io.fromStage2.bits <> RegEnable(stage2.io.toStage3.bits, regEnables(1))
stage4.io.fromStage3.bits <> RegEnable(stage3.io.toStage4.bits, regEnables(2))
val stage4toStage5 = RegEnable(stage4.io.toStage5.bits, regEnables(3))
val rounders = Seq(FType.S, FType.D).map(t => {
val rounder = Module(new RoundAnyRawFNToRecFN(FType.D.exp, FType.D.sig+2, t.exp, t.sig, 0))
......@@ -69,8 +72,8 @@ class FMA extends FPUPipelineModule {
rounder
})
val singleOut = io.out.bits.uop.ctrl.fpu.typeTagOut === S
io.out.bits.data := Mux(singleOut,
val singleOut = typeTagOut === S
io.out.data := Mux(singleOut,
sanitizeNaN(rounders(0).io.out, FType.S),
sanitizeNaN(rounders(1).io.out, FType.D)
)
......@@ -79,3 +82,12 @@ class FMA extends FPUPipelineModule {
rounders(1).io.exceptionFlags
)
}
class FMA extends FPUPipelineModule {
override def latency: Int = FunctionUnit.fmacCfg.latency.latencyVal.get
override val dataModule = Module(new FMADataModule(latency))
connectDataModule
dataModule.regEnables <> VecInit((1 to latency) map (i => regEnable(i)))
dataModule.typeTagOut := io.out.bits.uop.ctrl.fpu.typeTagOut
}
......@@ -8,18 +8,18 @@ import chisel3.util._
import hardfloat.CompareRecFN
import xiangshan.backend.fu.FunctionUnit
class FPToFP extends FPUPipelineModule{
class FPToFPDataModule(latency: Int) extends FPUDataModule {
override def latency: Int = FunctionUnit.f2iCfg.latency.latencyVal.get
val regEnables = IO(Input(Vec(latency, Bool())))
val ctrlIn = io.in.bits.uop.ctrl.fpu
val ctrl = S1Reg(ctrlIn)
val ctrlIn = io.in.fpCtrl
val ctrl = RegEnable(ctrlIn, regEnables(0))
val inTag = ctrl.typeTagIn
val outTag = ctrl.typeTagOut
val wflags = ctrl.wflags
val src1 = S1Reg(unbox(io.in.bits.src(0), ctrlIn.typeTagIn, None))
val src2 = S1Reg(unbox(io.in.bits.src(1), ctrlIn.typeTagIn, None))
val rmReg = S1Reg(rm)
val src1 = RegEnable(unbox(io.in.src(0), ctrlIn.typeTagIn, None), regEnables(0))
val src2 = RegEnable(unbox(io.in.src(1), ctrlIn.typeTagIn, None), regEnables(0))
val rmReg = RegEnable(rm, regEnables(0))
val signNum = Mux(rmReg(1), src1 ^ src2, Mux(rmReg(0), ~src2, src2))
val fsgnj = Cat(signNum(fLen), src1(fLen-1, 0))
......@@ -79,6 +79,15 @@ class FPToFP extends FPUPipelineModule{
}
}
io.out.bits.data := S2Reg(mux.data)
fflags := S2Reg(mux.exc)
io.out.data := RegEnable(mux.data, regEnables(1))
fflags := RegEnable(mux.exc, regEnables(1))
}
class FPToFP extends FPUPipelineModule{
override def latency: Int = FunctionUnit.f2iCfg.latency.latencyVal.get
override val dataModule = Module(new FPToFPDataModule(latency))
connectDataModule
dataModule.regEnables <> VecInit((1 to latency) map (i => regEnable(i)))
}
......@@ -10,19 +10,18 @@ import hardfloat.RecFNToIN
import utils.SignExt
import xiangshan.backend.fu.FunctionUnit
class FPToInt extends FPUPipelineModule {
override def latency = FunctionUnit.f2iCfg.latency.latencyVal.get
val (src1, src2) = (io.in.bits.src(0), io.in.bits.src(1))
class FPToIntDataModule(latency: Int) extends FPUDataModule {
val regEnables = IO(Input(Vec(latency, Bool())))
val (src1, src2) = (io.in.src(0), io.in.src(1))
val ctrl = io.in.bits.uop.ctrl.fpu
val ctrl = io.in.fpCtrl
// stage 1: unbox inputs
val src1_d = S1Reg(unbox(src1, ctrl.typeTagIn, None))
val src2_d = S1Reg(unbox(src2, ctrl.typeTagIn, None))
val ctrl_reg = S1Reg(ctrl)
val rm_reg = S1Reg(rm)
val src1_d = RegEnable(unbox(src1, ctrl.typeTagIn, None), regEnables(0))
val src2_d = RegEnable(unbox(src2, ctrl.typeTagIn, None), regEnables(0))
val ctrl_reg = RegEnable(ctrl, regEnables(0))
val rm_reg = RegEnable(rm, regEnables(0))
// stage2
......@@ -79,13 +78,22 @@ class FPToInt extends FPUPipelineModule {
Mux(rm_reg(0), classify_out, move_out)
)
val doubleOut = Mux(ctrl_reg.fcvt, ctrl_reg.typ(1), ctrl_reg.fmt(0))
val intValue = S2Reg(Mux(doubleOut,
val intValue = RegEnable(Mux(doubleOut,
SignExt(intData, XLEN),
SignExt(intData(31, 0), XLEN)
))
), regEnables(1))
val exc = S2Reg(Mux(ctrl_reg.fcvt, conv_exc, dcmp_exc))
val exc = RegEnable(Mux(ctrl_reg.fcvt, conv_exc, dcmp_exc), regEnables(1))
io.out.bits.data := intValue
io.out.data := intValue
fflags := exc
}
class FPToInt extends FPUPipelineModule {
override def latency = FunctionUnit.f2iCfg.latency.latencyVal.get
override val dataModule = Module(new FPToIntDataModule(latency))
connectDataModule
dataModule.regEnables <> VecInit((1 to latency) map (i => regEnable(i)))
}
......@@ -2,6 +2,7 @@ package xiangshan.backend.fu.fpu
import chisel3._
import chisel3.util._
import xiangshan.{FPUCtrlSignals, XSModule}
import xiangshan.backend.fu.{FuConfig, FunctionUnit, HasPipelineReg}
trait HasUIntToSIntHelper {
......@@ -10,11 +11,36 @@ trait HasUIntToSIntHelper {
}
}
abstract class FPUDataModule extends XSModule {
val io = IO(new Bundle() {
val in = Input(new Bundle() {
val src = Vec(3, UInt(65.W))
val fpCtrl = new FPUCtrlSignals
val rm = UInt(3.W)
})
val out = Output(new Bundle() {
val data = UInt(65.W)
val fflags = UInt(5.W)
})
})
val rm = io.in.rm
val fflags = io.out.fflags
}
abstract class FPUSubModule extends FunctionUnit(len = 65)
with HasUIntToSIntHelper
{
val rm = IO(Input(UInt(3.W)))
val fflags = IO(Output(UInt(5.W)))
val dataModule: FPUDataModule
def connectDataModule = {
dataModule.io.in.src <> io.in.bits.src
dataModule.io.in.fpCtrl <> io.in.bits.uop.ctrl.fpu
dataModule.io.in.rm <> rm
io.out.bits.data := dataModule.io.out.data
fflags := dataModule.io.out.fflags
}
}
abstract class FPUPipelineModule
......
......@@ -8,41 +8,50 @@ import chisel3.util._
import hardfloat.INToRecFN
import utils.{SignExt, ZeroExt}
class IntToFP extends FPUSubModule {
class IntToFPDataModule extends FPUDataModule {
val in_valid, out_ready = IO(Input(Bool()))
val in_ready, out_valid = IO(Output(Bool()))
val kill_w, kill_r = IO(Input(Bool()))
val s_idle :: s_cvt :: s_finish :: Nil = Enum(3)
val s_idle :: s_cvt :: s_ieee :: s_finish :: Nil = Enum(4)
val state = RegInit(s_idle)
io.in.ready := state === s_idle
io.out.valid := state === s_finish
val src1 = RegEnable(io.in.bits.src(0)(XLEN-1, 0), io.in.fire())
val uopReg = RegEnable(io.in.bits.uop, io.in.fire())
val rmReg = RegEnable(rm, io.in.fire())
val in_fire = in_valid && in_ready
val out_fire = out_valid && out_ready
in_ready := state === s_idle
out_valid := state === s_finish
val src1 = RegEnable(io.in.src(0)(XLEN-1, 0), in_fire)
val rmReg = RegEnable(rm, in_fire)
val ctrl = RegEnable(io.in.fpCtrl, in_fire)
switch(state){
is(s_idle){
when(io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)){
when(in_fire && !kill_w){
state := s_cvt
}
}
is(s_cvt){
state := s_ieee
}
is(s_ieee){
state := s_finish
}
is(s_finish){
when(io.out.fire()){
when(out_fire){
state := s_idle
}
}
}
when(state =/= s_idle && uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)){
when(state =/= s_idle && kill_r){
state := s_idle
}
/*
s_cvt
*/
val ctrl = uopReg.ctrl.fpu
val tag = ctrl.typeTagIn
val typ = ctrl.typ
val wflags = ctrl.wflags
......@@ -73,9 +82,26 @@ class IntToFP extends FPUSubModule {
mux.exc := VecInit(exc)(tag)
}
val muxReg = RegEnable(mux, enable = state === s_cvt)
val muxReg = Reg(mux.cloneType)
when(state === s_cvt){
muxReg := mux
}.elsewhen(state === s_ieee){
muxReg.data := ieee(box(muxReg.data, ctrl.typeTagOut))
}
fflags := muxReg.exc
io.out.data := muxReg.data
}
class IntToFP extends FPUSubModule {
override val dataModule = Module(new IntToFPDataModule)
dataModule.in_valid := io.in.valid
dataModule.out_ready := io.out.ready
connectDataModule
val uopReg = RegEnable(io.in.bits.uop, io.in.fire())
dataModule.kill_w := io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)
dataModule.kill_r := uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)
io.in.ready := dataModule.in_ready
io.out.valid := dataModule.out_valid
io.out.bits.uop := uopReg
io.out.bits.data := box(muxReg.data, ctrl.typeTagOut)
}
......@@ -100,6 +100,7 @@ class ReservationStation
val fromDispatch = Flipped(DecoupledIO(new MicroOp))
val deq = DecoupledIO(new ExuInput)
val srcRegValue = Input(Vec(srcNum, UInt(srcLen.W)))
val fpRegValue = if (exuCfg == Exu.stExeUnitCfg) Input(UInt(srcLen.W)) else null
val jumpPc = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
val jalr_target = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
......@@ -130,7 +131,7 @@ class ReservationStation
select.io.memfeedback := io.memfeedback
}
ctrl.io.in.valid := select.io.enq.fire() && !(io.redirect.valid || io.flush) // NOTE: same as select
ctrl.io.in.valid := select.io.enq.fire()// && !(io.redirect.valid || io.flush) // NOTE: same as select
ctrl.io.flush := io.flush
ctrl.io.in.bits.addr := select.io.enq.bits
ctrl.io.in.bits.uop := io.fromDispatch.bits
......@@ -155,6 +156,9 @@ class ReservationStation
data.io.jumpPc := io.jumpPc
data.io.jalr_target := io.jalr_target
}
if (exuCfg == Exu.stExeUnitCfg) {
data.io.fpRegValue := io.fpRegValue
}
data.io.sel := select.io.deq.bits
data.io.listen.wen := ctrl.io.listen
for (i <- 0 until fastPortsCnt) {
......@@ -345,7 +349,8 @@ class ReservationStationSelect
val enqueue = io.enq.fire() && !(io.redirect.valid || io.flush)
val tailInc = tailPtr + 1.U
val tailDec = tailPtr - 1.U
tailPtr := Mux(dequeue === enqueue, tailPtr, Mux(dequeue, tailDec, tailInc))
val nextTailPtr = Mux(dequeue === enqueue, tailPtr, Mux(dequeue, tailDec, tailInc))
tailPtr := nextTailPtr
val enqPtr = Mux(tailPtr.flag, deqPtr, tailPtr.value)
val enqIdx = indexQueue(enqPtr)
......@@ -362,7 +367,7 @@ class ReservationStationSelect
io.deq.valid := selectValid
io.deq.bits := selectIndex
io.numExist := Mux(tailPtr.flag, (iqSize-1).U, tailPtr.value)
io.numExist := RegNext(Mux(nextTailPtr.flag, (iqSize-1).U, nextTailPtr.value))
assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B)))
}
......@@ -450,6 +455,15 @@ class ReservationStationCtrl
when (enqEn) {
srcQueue(enqPtr).zip(enqSrcReady).map{ case (s, e) => s := e }
}
// NOTE: delay one cycle for fp src will come one cycle later than usual
if (exuCfg == Exu.stExeUnitCfg) {
when (enqEn) {
when (enqUop.ctrl.src2Type === SrcType.fp) { srcQueue(enqPtr)(1) := false.B }
}
when (enqEnReg && RegNext(enqUop.ctrl.src2Type === SrcType.fp && enqSrcReady(1))) {
srcQueue(enqPtrReg)(1) := true.B
}
}
for (i <- 0 until iqSize) {
for (j <- 0 until srcNum) {
when (srcUpdate(i)(j)) { srcQueue(i)(j) := true.B }
......@@ -591,18 +605,18 @@ class ReservationStationCtrl
}
}
class RSDataSingleSrc(srcLen: Int, numEntries: Int, numListen: Int) extends XSModule {
class RSDataSingleSrc(srcLen: Int, numEntries: Int, numListen: Int, writePort: Int = 1) extends XSModule {
val io = IO(new Bundle {
val r = new Bundle {
// val valid = Bool() // NOTE: if read valid is necessary, but now it is not completed
val addr = Input(UInt(log2Up(numEntries).W))
val rdata = Output(UInt(srcLen.W))
}
val w = Input(new Bundle {
val w = Input(Vec(writePort, new Bundle {
val wen = Bool()
val addr = UInt(log2Up(numEntries).W)
val wdata = Input(UInt(srcLen.W))
})
val wdata = UInt(srcLen.W)
}))
val listen = Input(new Bundle {
val wdata = Vec(numListen, UInt(srcLen.W))
val wen = Vec(numEntries, Vec(numListen, Bool()))
......@@ -611,9 +625,14 @@ class RSDataSingleSrc(srcLen: Int, numEntries: Int, numListen: Int) extends XSMo
val value = Reg(Vec(numEntries, UInt(srcLen.W)))
val wMask = Mux(io.w.wen, UIntToOH(io.w.addr)(numEntries-1, 0), 0.U(numEntries.W))
val data = io.listen.wdata :+ io.w.wdata
val wen = io.listen.wen.zip(wMask.asBools).map{ case (w, m) => w :+ m }
val wMaskT = io.w.map(w => Mux(w.wen, UIntToOH(w.addr)(numEntries-1, 0), 0.U(numEntries.W)))
val wMask = (0 until numEntries).map(i =>
(0 until writePort).map(j =>
wMaskT(j)(i)
))
val wData = io.w.map(w => w.wdata)
val data = io.listen.wdata ++ io.w.map(_.wdata)
val wen = io.listen.wen.zip(wMask).map{ case (w, m) => w ++ m }
for (i <- 0 until numEntries) {
when (Cat(wen(i)).orR) {
value(i) := ParallelMux(wen(i) zip data)
......@@ -640,8 +659,10 @@ class ReservationStationData
val srcNum = if (exuCfg == Exu.jumpExeUnitCfg) 2 else max(exuCfg.intSrcCnt, exuCfg.fpSrcCnt)
require(nonBlocked==fastWakeup)
val io = IO(new XSBundle {
val srcRegValue = Vec(srcNum, Input(UInt(srcLen.W)))
val fpRegValue = if (exuCfg == Exu.stExeUnitCfg) Input(UInt(srcLen.W)) else null
val jumpPc = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
val jalr_target = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
val in = Input(new Bundle {
......@@ -665,25 +686,35 @@ class ReservationStationData
// Data : single read, multi write
// ------------------------
val data = (0 until srcNum).map{i =>
val d = Module(new RSDataSingleSrc(srcLen, iqSize, fastPortsCnt + slowPortsCnt))
d.suggestName(s"${this.name}_data${i}")
d.io
val data = if (exuCfg == Exu.stExeUnitCfg) {
val srcBase = Module(new RSDataSingleSrc(srcLen, iqSize, fastPortsCnt + slowPortsCnt, 1))
val srcData = Module(new RSDataSingleSrc(srcLen, iqSize, fastPortsCnt + slowPortsCnt, 2))
srcBase.suggestName(s"${this.name}_data0")
srcData.suggestName(s"${this.name}_data1")
Seq(srcBase.io, srcData.io)
} else {
(0 until srcNum).map{i =>
val d = Module(new RSDataSingleSrc(srcLen, iqSize, fastPortsCnt + slowPortsCnt, 1))
d.suggestName(s"${this.name}_data${i}")
d.io
}
}
(0 until srcNum).foreach{ i =>
data(i).listen.wen := io.listen.wen(i)
data(i).listen.wdata := io.listen.wdata
}
data.map(_.w.addr := RegEnable(io.in.addr, io.in.valid))
data.zip(io.in.enqSrcReady).map{ case (src, ready) => src.w.wen := RegNext(ready && io.in.valid) }
val addrReg = RegEnable(io.in.addr, io.in.valid)
val enqSrcReadyReg = io.in.enqSrcReady.map(r => RegNext(r && io.in.valid))
data.map(_.w(0).addr := addrReg)
data.zip(enqSrcReadyReg).map{ case (src, ready) => src.w(0).wen := ready }
val pcMem = if(exuCfg == Exu.jumpExeUnitCfg)
Some(Module(new SyncDataModuleTemplate(UInt(VAddrBits.W), iqSize, numRead = 1, numWrite = 1))) else None
if(pcMem.nonEmpty){
pcMem.get.io.wen(0) := RegNext(io.in.valid)
pcMem.get.io.waddr(0) := RegNext(io.in.addr)
pcMem.get.io.waddr(0) := addrReg
pcMem.get.io.wdata(0) := io.jumpPc
}
......@@ -694,15 +725,15 @@ class ReservationStationData
io.srcRegValue(0)
)
// data.io.w.bits.data(0) := src1Mux
data(0).w.wdata := src1Mux
data(1).w.wdata := io.jalr_target
data(0).w(0).wdata := src1Mux
data(1).w(0).wdata := io.jalr_target
case Exu.aluExeUnitCfg =>
val src1Mux = Mux(enqUopReg.ctrl.src1Type === SrcType.pc,
SignExt(enqUopReg.cf.pc, XLEN),
io.srcRegValue(0)
)
data(0).w.wdata := src1Mux
data(0).w(0).wdata := src1Mux
// alu only need U type and I type imm
val imm32 = Mux(enqUopReg.ctrl.selImm === SelImm.IMM_U,
ImmUnion.U.toImm32(enqUopReg.ctrl.imm),
......@@ -712,9 +743,17 @@ class ReservationStationData
val src2Mux = Mux(enqUopReg.ctrl.src2Type === SrcType.imm,
imm64, io.srcRegValue(1)
)
data(1).w.wdata := src2Mux
data(1).w(0).wdata := src2Mux
case Exu.stExeUnitCfg =>
(0 until srcNum).foreach(i => data(i).w(0).wdata := io.srcRegValue(i) )
data(1).w(1).wdata := io.fpRegValue
data(1).w(1).addr := RegNext(addrReg)
data(1).w(1).wen := RegNext(enqSrcReadyReg(1) && enqUopReg.ctrl.src2Type === SrcType.fp)
data(1).w(0).wen := enqSrcReadyReg(1) && enqUopReg.ctrl.src2Type =/= SrcType.fp
case _ =>
(0 until srcNum).foreach(i => data(i).w.wdata := io.srcRegValue(i) )
(0 until srcNum).foreach(i => data(i).w(0).wdata := io.srcRegValue(i) )
}
// deq
data.map(_.r.addr := io.sel)
......
......@@ -5,6 +5,7 @@ import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.dispatch.PreDispatchInfo
class RenameBypassInfo extends XSBundle {
val lsrc1_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
......@@ -23,6 +24,7 @@ class Rename extends XSModule with HasCircularQueuePtrHelper {
// to dispatch1
val out = Vec(RenameWidth, DecoupledIO(new MicroOp))
val renameBypass = Output(new RenameBypassInfo)
val dispatchInfo = Output(new PreDispatchInfo)
})
def printRenameInfo(in: DecoupledIO[CfCtrl], out: DecoupledIO[MicroOp]) = {
......@@ -202,6 +204,12 @@ class Rename extends XSModule with HasCircularQueuePtrHelper {
}).reverse)
}
val isLs = VecInit(uops.map(uop => FuType.isLoadStore(uop.ctrl.fuType)))
val isStore = VecInit(uops.map(uop => FuType.isStoreExu(uop.ctrl.fuType)))
val isAMO = VecInit(uops.map(uop => FuType.isAMO(uop.ctrl.fuType)))
io.dispatchInfo.lsqNeedAlloc := VecInit((0 until RenameWidth).map(i =>
Mux(isLs(i), Mux(isStore(i) && !isAMO(i), 2.U, 1.U), 0.U)))
/**
* Instructions commit: update freelist and rename table
*/
......
......@@ -792,22 +792,22 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
if(i % 4 == 3) XSDebug(false, true.B, "\n")
}
XSPerf("utilization", PopCount((0 until RoqSize).map(valid(_))))
XSPerf("commitInstr", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid)))
XSPerf("commitInstrLoad", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid.zip(io.commits.info.map(_.commitType)).map{ case (v, t) => v && t === CommitType.LOAD})))
XSPerf("commitInstrStore", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid.zip(io.commits.info.map(_.commitType)).map{ case (v, t) => v && t === CommitType.STORE})))
XSPerf("writeback", PopCount((0 until RoqSize).map(i => valid(i) && writebacked(i))))
XSPerf("roq_utilization", PopCount((0 until RoqSize).map(valid(_))))
XSPerf("roq_commitInstr", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid)))
XSPerf("roq_commitInstrLoad", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid.zip(io.commits.info.map(_.commitType)).map{ case (v, t) => v && t === CommitType.LOAD})))
XSPerf("roq_commitInstrStore", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid.zip(io.commits.info.map(_.commitType)).map{ case (v, t) => v && t === CommitType.STORE})))
XSPerf("roq_writeback", PopCount((0 until RoqSize).map(i => valid(i) && writebacked(i))))
// XSPerf("enqInstr", PopCount(io.dp1Req.map(_.fire())))
// XSPerf("d2rVnR", PopCount(io.dp1Req.map(p => p.valid && !p.ready)))
XSPerf("walkInstrAcc", Mux(io.commits.isWalk, PopCount(io.commits.valid), 0.U), acc = true)
XSPerf("walkCycleAcc", state === s_walk || state === s_extrawalk, acc = true)
XSPerf("roq_walkInstrAcc", Mux(io.commits.isWalk, PopCount(io.commits.valid), 0.U), acc = true)
XSPerf("roq_walkCycleAcc", state === s_walk || state === s_extrawalk, acc = true)
val deqNotWritebacked = valid(deqPtr.value) && !writebacked(deqPtr.value)
val deqUopCommitType = io.commits.info(0).commitType
XSPerf("waitNormalCycleAcc", deqNotWritebacked && deqUopCommitType === CommitType.NORMAL, acc = true)
XSPerf("waitBranchCycleAcc", deqNotWritebacked && deqUopCommitType === CommitType.BRANCH, acc = true)
XSPerf("waitLoadCycleAcc", deqNotWritebacked && deqUopCommitType === CommitType.LOAD, acc = true)
XSPerf("waitStoreCycleAcc", deqNotWritebacked && deqUopCommitType === CommitType.STORE, acc = true)
XSPerf("roqHeadPC", io.commits.info(0).pc)
XSPerf("roq_waitNormalCycleAcc", deqNotWritebacked && deqUopCommitType === CommitType.NORMAL, acc = true)
XSPerf("roq_waitBranchCycleAcc", deqNotWritebacked && deqUopCommitType === CommitType.BRANCH, acc = true)
XSPerf("roq_waitLoadCycleAcc", deqNotWritebacked && deqUopCommitType === CommitType.LOAD, acc = true)
XSPerf("roq_waitStoreCycleAcc", deqNotWritebacked && deqUopCommitType === CommitType.STORE, acc = true)
XSPerf("roq_roqHeadPC", io.commits.info(0).pc)
val instrCnt = RegInit(0.U(64.W))
val retireCounter = Mux(state === s_idle, commitCnt, 0.U)
......@@ -866,7 +866,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val trapCode = PriorityMux(wdata.zip(trapVec).map(x => x._2 -> x._1))
val trapPC = SignExt(PriorityMux(wpc.zip(trapVec).map(x => x._2 ->x._1)), XLEN)
if (!env.FPGAPlatform && EnableBPU && !env.DualCore) {
if (!env.FPGAPlatform && !env.DualCore) {
ExcitingUtils.addSource(hitTrap, "XSTRAP", ConnectionType.Debug)
}
......
......@@ -272,7 +272,17 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
mainPipeReqArb.io.in(AtomicsMainPipeReqPort) <> atomicsReplayUnit.io.pipe_req
mainPipeReqArb.io.in(ProbeMainPipeReqPort) <> probeQueue.io.pipe_req
mainPipe.io.req <> mainPipeReqArb.io.out
// add a stage to break the Arbiter bits.addr to ready path
val mainPipeReq_valid = RegInit(false.B)
val mainPipeReq_fire = mainPipeReq_valid && mainPipe.io.req.ready
val mainPipeReq_req = RegEnable(mainPipeReqArb.io.out.bits, mainPipeReqArb.io.out.fire())
mainPipeReqArb.io.out.ready := mainPipe.io.req.ready
mainPipe.io.req.valid := mainPipeReq_valid
mainPipe.io.req.bits := mainPipeReq_req
when (mainPipeReqArb.io.out.fire()) { mainPipeReq_valid := true.B }
when (!mainPipeReqArb.io.out.fire() && mainPipeReq_fire) { mainPipeReq_valid := false.B }
missQueue.io.pipe_resp <> mainPipe.io.miss_resp
storeReplayUnit.io.pipe_resp <> mainPipe.io.store_resp
......
......@@ -629,9 +629,7 @@ class ICache extends ICacheModule
dump_pipe_info()
// Performance Counter
if (!env.FPGAPlatform && !env.DualCore) {
ExcitingUtils.addSource( s3_valid && !blocking, "perfCntIcacheReqCnt", Perf)
ExcitingUtils.addSource( s3_miss && blocking && io.resp.fire(), "perfCntIcacheMissCnt", Perf)
ExcitingUtils.addSource( s3_mmio && blocking && io.resp.fire(), "perfCntIcacheMMIOCnt", Perf)
}
XSPerf("icache_req", s3_valid && !blocking)
XSPerf("icache_miss", s3_miss && blocking && io.resp.fire())
XSPerf("icache_mmio", s3_mmio && blocking && io.resp.fire())
}
\ No newline at end of file
......@@ -229,16 +229,13 @@ class IcacheMissQueue extends ICacheMissQueueModule
entry.io.mem_grant <> io.mem_grant
}
if (!env.FPGAPlatform && !env.DualCore) {
ExcitingUtils.addSource(
BoolStopWatch(
start = entry.io.req.fire(),
stop = entry.io.resp.fire() || entry.io.flush,
startHighPriority = true),
"perfCntICacheMissQueuePenaltyEntry" + Integer.toString(i, 10),
Perf
)
}
XSPerf(
"ICacheMissQueue_PenaltyEntry" + Integer.toString(i, 10),
BoolStopWatch(
start = entry.io.req.fire(),
stop = entry.io.resp.fire() || entry.io.flush,
startHighPriority = true)
)
entry
}
......
......@@ -5,7 +5,7 @@ import chisel3.util._
import chisel3.ExcitingUtils._
import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleA, TLBundleD, TLBundleE, TLPermissions, TLArbiter, ClientMetadata}
import utils.{HasTLDump, XSDebug, BoolStopWatch, OneHot}
import utils.{HasTLDump, XSDebug, BoolStopWatch, OneHot, XSPerf}
class MissReq extends DCacheBundle
{
......@@ -413,16 +413,13 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
}
/*
if (!env.FPGAPlatform && !env.DualCore) {
ExcitingUtils.addSource(
BoolStopWatch(
start = entry.io.req.fire(),
stop = entry.io.resp.fire(),
startHighPriority = true),
"perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10),
Perf
)
}
XSPerf(
"perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10),
BoolStopWatch(
start = entry.io.req.fire(),
stop = entry.io.resp.fire(),
startHighPriority = true)
)
*/
entry
......@@ -492,7 +489,5 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
io.mem_finish.bits.dump
}
if (!env.FPGAPlatform && !env.DualCore) {
ExcitingUtils.addSource(io.req.fire(), "perfCntDCacheMiss", Perf)
}
XSPerf("dcache_miss", io.req.fire())
}
......@@ -137,8 +137,9 @@ class StoreReplayQueue extends DCacheModule
val alloc_idx = PriorityEncoder(primary_ready)
val req = io.lsu.req
val block_conflict = Wire(Bool())
req.ready := allocate && !block_conflict
// do not use block conflict in req allocate path
// compare with all entries incus much latency
req.ready := allocate
val entries = (0 until cfg.nStoreReplayEntries) map { i =>
val entry = Module(new StoreReplayEntry)
......@@ -146,7 +147,7 @@ class StoreReplayQueue extends DCacheModule
entry.io.id := i.U
// entry req
entry.io.lsu.req.valid := (i.U === alloc_idx) && allocate && req.valid && !block_conflict
entry.io.lsu.req.valid := (i.U === alloc_idx) && allocate && req.valid
primary_ready(i) := entry.io.lsu.req.ready
entry.io.lsu.req.bits := req.bits
......@@ -165,7 +166,7 @@ class StoreReplayQueue extends DCacheModule
io.lsu.resp <> resp_arb.io.out
io.pipe_req <> pipe_req_arb.io.out
block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.lsu.req.bits.addr)).asUInt.orR
val block_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.lsu.req.bits.addr)).asUInt.orR
// sanity check
when (io.lsu.req.valid) {
......
......@@ -112,19 +112,22 @@ class CAMTemplate[T <: Data](val gen: T, val set: Int, val readWidth: Int) exten
val io = IO(new Bundle {
val r = new Bundle {
val req = Input(Vec(readWidth, gen))
val resp = Output(Vec(readWidth, UInt(set.W)))
val resp = Output(Vec(readWidth, Vec(set, Bool())))
}
val w = Flipped(ValidIO(new Bundle {
val index = UInt(log2Up(set).W)
val data = gen
}))
val w = Input(new Bundle {
val valid = Bool()
val bits = new Bundle {
val index = UInt(log2Up(set).W)
val data = gen
}
})
})
val wordType = UInt(gen.getWidth.W)
val array = Reg(Vec(set, wordType))
io.r.resp.zipWithIndex.map{ case (a,i) =>
a := VecInit(array.map(io.r.req(i).asUInt === _)).asUInt
a := array.map(io.r.req(i).asUInt === _)
}
when (io.w.valid) {
......@@ -132,78 +135,73 @@ class CAMTemplate[T <: Data](val gen: T, val set: Int, val readWidth: Int) exten
}
}
class TlbEntryData extends TlbBundle {
val ppn = UInt(ppnLen.W)
val perm = new TlbPermBundle
// TODO: change perm to every kinds of pf check
class TlbSPMeta extends TlbBundle {
val tag = UInt(vpnLen.W) // tag is vpn
val level = UInt(1.W) // 1 for 2MB, 0 for 1GB
override def toPrintable: Printable = {
p"ppn:0x${Hexadecimal(ppn)} perm:${perm}"
def hit(vpn: UInt): Bool = {
val a = tag(vpnnLen*3-1, vpnnLen*2) === vpn(vpnnLen*3-1, vpnnLen*2)
val b = tag(vpnnLen*2-1, vpnnLen*1) === vpn(vpnnLen*2-1, vpnnLen*1)
XSDebug(Mux(level.asBool, a&b, a), p"Hit superpage: hit:${Mux(level.asBool, a&b, a)} tag:${Hexadecimal(tag)} level:${level} a:${a} b:${b} vpn:${Hexadecimal(vpn)}\n")
Mux(level.asBool, a&b, a)
}
}
class TlbEntry(superpage: Boolean = false) extends TlbBundle {
val tag = UInt(vpnLen.W) // tag is vpn
val level = if(superpage) Some(UInt(1.W)) else None // /*2 for 4KB,*/ 1 for 2MB, 0 for 1GB
val data = new TlbEntryData
def apply(vpn: UInt, level: UInt) = {
this.tag := vpn
this.level := level(0)
def hit(vpn: UInt): Bool = {
if (superpage) {
val insideLevel = level.getOrElse(0.U)
val a = tag(vpnnLen*3-1, vpnnLen*2) === vpn(vpnnLen*3-1, vpnnLen*2)
val b = tag(vpnnLen*2-1, vpnnLen*1) === vpn(vpnnLen*2-1, vpnnLen*1)
XSDebug(Mux(insideLevel.asBool, a&b, a), p"Hit superpage: hit:${Mux(insideLevel.asBool, a&b, a)} tag:${Hexadecimal(tag)} level:${insideLevel} data:${data} a:${a} b:${b} vpn:${Hexadecimal(vpn)}\n")
Mux(insideLevel.asBool, a&b, a)
} else {
XSDebug(tag === vpn, p"Hit normalpage: hit:${tag === vpn} tag:${Hexadecimal(tag)} data:${data} vpn:${Hexadecimal(vpn)}\n")
tag === vpn
}
this
}
def ppn(vpn: UInt): UInt = {
}
class TlbData(superpage: Boolean = false) extends TlbBundle {
val level = if(superpage) Some(UInt(1.W)) else None // /*2 for 4KB,*/ 1 for 2MB, 0 for 1GB
val ppn = UInt(ppnLen.W)
val perm = new TlbPermBundle
def genPPN(vpn: UInt): UInt = {
if (superpage) {
val insideLevel = level.getOrElse(0.U)
Mux(insideLevel.asBool, Cat(data.ppn(data.ppn.getWidth-1, vpnnLen*1), vpn(vpnnLen*1-1, 0)),
Cat(data.ppn(data.ppn.getWidth-1, vpnnLen*2), vpn(vpnnLen*2-1, 0)))
Mux(insideLevel.asBool, Cat(ppn(ppn.getWidth-1, vpnnLen*1), vpn(vpnnLen*1-1, 0)),
Cat(ppn(ppn.getWidth-1, vpnnLen*2), vpn(vpnnLen*2-1, 0)))
} else {
data.ppn
ppn
}
}
def apply(vpn: UInt, ppn: UInt, level: UInt, perm: UInt, pf: Bool) = {
this.tag := vpn
def apply(ppn: UInt, level: UInt, perm: UInt, pf: Bool) = {
this.level.map(_ := level(0))
this.data.ppn := ppn
this.ppn := ppn
// refill pagetable perm
val ptePerm = perm.asTypeOf(new PtePermBundle)
this.data.perm.pf:= pf
this.data.perm.d := ptePerm.d
this.data.perm.a := ptePerm.a
this.data.perm.g := ptePerm.g
this.data.perm.u := ptePerm.u
this.data.perm.x := ptePerm.x
this.data.perm.w := ptePerm.w
this.data.perm.r := ptePerm.r
this.perm.pf:= pf
this.perm.d := ptePerm.d
this.perm.a := ptePerm.a
this.perm.g := ptePerm.g
this.perm.u := ptePerm.u
this.perm.x := ptePerm.x
this.perm.w := ptePerm.w
this.perm.r := ptePerm.r
// get pma perm
val (pmaMode, accessWidth) = AddressSpace.memmapAddrMatch(Cat(ppn, 0.U(12.W)))
this.data.perm.pr := PMAMode.read(pmaMode)
this.data.perm.pw := PMAMode.write(pmaMode)
this.data.perm.pe := PMAMode.execute(pmaMode)
this.data.perm.pa := PMAMode.atomic(pmaMode)
this.data.perm.pi := PMAMode.icache(pmaMode)
this.data.perm.pd := PMAMode.dcache(pmaMode)
this.perm.pr := PMAMode.read(pmaMode)
this.perm.pw := PMAMode.write(pmaMode)
this.perm.pe := PMAMode.execute(pmaMode)
this.perm.pa := PMAMode.atomic(pmaMode)
this.perm.pi := PMAMode.icache(pmaMode)
this.perm.pd := PMAMode.dcache(pmaMode)
this
}
override def toPrintable: Printable = {
val insideLevel = level.getOrElse(0.U)
p"vpn:0x${Hexadecimal(tag)} level:${insideLevel} data:${data}"
p"level:${insideLevel} ppn:${Hexadecimal(ppn)} perm:${perm}"
}
override def cloneType: this.type = (new TlbEntry(superpage)).asInstanceOf[this.type]
override def cloneType: this.type = (new TlbData(superpage)).asInstanceOf[this.type]
}
object TlbCmd {
......@@ -311,13 +309,15 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
// Normal page && Super page
val nv = RegInit(VecInit(Seq.fill(TlbEntrySize)(false.B)))
val nentry = Reg(Vec(TlbEntrySize, new TlbEntry(false)))
val nMeta = Module(new CAMTemplate(UInt(vpnLen.W), TlbEntrySize, Width + 1)).io
val nData = Reg(Vec(TlbEntrySize, new TlbData(false)))
val sv = RegInit(VecInit(Seq.fill(TlbSPEntrySize)(false.B)))
val sentry = Reg(Vec(TlbSPEntrySize, new TlbEntry(true)))
val sMeta = Reg(Vec(TlbSPEntrySize, new TlbSPMeta))
val sData = Reg(Vec(TlbSPEntrySize, new TlbData(true)))
val v = nv ++ sv
val entry = nentry ++ sentry
val g = VecInit(entry.map(_.data.perm.g))
val pf = VecInit(entry.zip(v).map{ case(e, vi) => e.data.perm.pf & vi })
val data = nData ++ sData
val g = VecInit(data.map(_.perm.g))
val pf = VecInit(data.zip(v).map{ case(e, vi) => e.perm.pf & vi })
/**
* PTW refill
......@@ -331,14 +331,19 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
val nRefillIdx = replaceWrapper(nv, nReplace.way)
val sRefillIdx = replaceWrapper(sv, sReplace.way)
nMeta.w := DontCare
nMeta.w.valid := false.B
when (refill) {
val resp = ptw.resp.bits
when (resp.entry.level.getOrElse(0.U) === 2.U) {
val refillIdx = nRefillIdx
refillIdx.suggestName(s"NormalRefillIdx")
nv(refillIdx) := true.B
nentry(refillIdx).apply(
vpn = resp.entry.tag,
nMeta.w.bits.index := nRefillIdx
nMeta.w.bits.data := resp.entry.tag
nMeta.w.valid := true.B
nData(refillIdx).apply(
ppn = resp.entry.ppn,
level = resp.entry.level.getOrElse(0.U),
perm = VecInit(resp.entry.perm.getOrElse(0.U)).asUInt,
......@@ -348,9 +353,13 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
}.otherwise {
val refillIdx = sRefillIdx
refillIdx.suggestName(s"SuperRefillIdx")
sv(refillIdx) := true.B
sentry(refillIdx).apply(
vpn = resp.entry.tag,
sMeta(refillIdx).apply(
vpn = resp.entry.tag,
level = resp.entry.level.getOrElse(0.U)
)
sData(refillIdx).apply(
ppn = resp.entry.ppn,
level = resp.entry.level.getOrElse(0.U),
perm = VecInit(resp.entry.perm.getOrElse(0.U)).asUInt,
......@@ -363,14 +372,21 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
/**
* L1 TLB read
*/
val sfenceVpn = sfence.bits.addr.asTypeOf(vaBundle).vpn
for (i <- 0 until Width) {
nMeta.r.req(i) := io.requestor(i).req.bits.vaddr.asTypeOf(vaBundle).vpn
}
nMeta.r.req(Width) := sfenceVpn
val nRefillMask = Mux(refill, UIntToOH(nRefillIdx)(TlbEntrySize-1, 0), 0.U).asBools
val sRefillMask = Mux(refill, UIntToOH(sRefillIdx)(TlbSPEntrySize-1, 0), 0.U).asBools
def TLBNormalRead(i: Int) = {
val entryHitVec = (
if (isDtlb)
VecInit(entry.zip(nRefillMask ++ sRefillMask).map{ case (e,m) => ~m && e.hit(reqAddr(i).vpn)})
VecInit(nMeta.r.resp(i).zip(nRefillMask).map{ case (e, m) => ~m && e } ++
sMeta.zip(sRefillMask).map{ case (e,m) => ~m && e.hit(reqAddr(i).vpn) })
else
VecInit(entry.map(_.hit(reqAddr(i).vpn/*, satp.asid*/)))
VecInit(nMeta.r.resp(i) ++ sMeta.map(_.hit(reqAddr(i).vpn/*, satp.asid*/)))
)
val reqAddrReg = if (isDtlb) RegNext(reqAddr(i)) else reqAddr(i)
......@@ -384,8 +400,8 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
val pfArray = ParallelOR(pfHitVec).asBool && validReg && vmEnable
val hit = ParallelOR(hitVec).asBool && validReg && vmEnable && ~pfArray
val miss = !hit && validReg && vmEnable && ~pfArray
val hitppn = ParallelMux(hitVec zip entry.map(_.ppn(reqAddrReg.vpn)))
val hitPerm = ParallelMux(hitVec zip entry.map(_.data.perm))
val hitppn = ParallelMux(hitVec zip data.map(_.genPPN(reqAddrReg.vpn)))
val hitPerm = ParallelMux(hitVec zip data.map(_.perm))
hitVec.suggestName(s"hitVec_${i}")
pfHitVec.suggestName(s"pfHitVec_${i}")
......@@ -488,6 +504,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
// }
// sfence (flush)
val sfenceHit = nMeta.r.resp(Width) ++ sMeta.map(_.hit(sfenceVpn))
when (sfence.valid) {
when (sfence.bits.rs1) { // virtual address *.rs1 <- (rs1===0.U)
when (sfence.bits.rs2) { // asid, but i do not want to support asid, *.rs2 <- (rs2===0.U)
......@@ -498,31 +515,26 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
v.zipWithIndex.map{ case (a,i) => a := a & g(i) }
}
}.otherwise {
val sfenceVpn = sfence.bits.addr.asTypeOf(vaBundle).vpn
when (sfence.bits.rs2) {
// specific addr but all asid
v.zipWithIndex.map{ case (a,i) => a := a & !entry(i).hit(sfenceVpn) }
v.zipWithIndex.map{ case (a,i) => a := a & !sfenceHit(i) }
}.otherwise {
// specific addr and specific asid
v.zipWithIndex.map{ case (a,i) => a := a & !(entry(i).hit(sfenceVpn) && !g(i))}
v.zipWithIndex.map{ case (a,i) => a := a & !sfenceHit(i) && !g(i) }
}
}
}
if (!env.FPGAPlatform && !env.DualCore && isDtlb) {
ExcitingUtils.addSource(valid(0) && vmEnable, "perfCntDtlbReqCnt0", Perf)
ExcitingUtils.addSource(valid(1) && vmEnable, "perfCntDtlbReqCnt1", Perf)
ExcitingUtils.addSource(valid(2) && vmEnable, "perfCntDtlbReqCnt2", Perf)
ExcitingUtils.addSource(valid(3) && vmEnable, "perfCntDtlbReqCnt3", Perf)
ExcitingUtils.addSource(valid(0) && vmEnable && missVec(0), "perfCntDtlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(1) && vmEnable && missVec(1), "perfCntDtlbMissCnt1", Perf)
ExcitingUtils.addSource(valid(2) && vmEnable && missVec(2), "perfCntDtlbMissCnt2", Perf)
ExcitingUtils.addSource(valid(3) && vmEnable && missVec(3), "perfCntDtlbMissCnt3", Perf)
}
if (!env.FPGAPlatform && !env.DualCore && !isDtlb) {
ExcitingUtils.addSource(valid(0) && vmEnable, "perfCntItlbReqCnt0", Perf)
ExcitingUtils.addSource(valid(0) && vmEnable && missVec(0), "perfCntItlbMissCnt0", Perf)
if (isDtlb) {
for (i <- 0 until Width) {
XSPerf("dtlb_access" + Integer.toString(i, 10), valid(i) && vmEnable)
}
for (i <- 0 until Width) {
XSPerf("dtlb_miss" + Integer.toString(i, 10), valid(i) && vmEnable && missVec(i))
}
} else {
XSPerf("itlb_access", valid(0) && vmEnable)
XSPerf("itlb_miss", valid(0) && vmEnable && missVec(0))
}
// Log
......
......@@ -52,21 +52,18 @@ class L1plusPrefetcher extends PrefetchModule {
XSDebug(p"io.mem_acquire: v=${io.mem_acquire.valid} r=${io.mem_acquire.ready} ${io.mem_acquire.bits}\n")
XSDebug(p"io.mem_grant: v=${io.mem_grant.valid} r=${io.mem_grant.ready} ${io.mem_grant.bits}\n")
if (!env.FPGAPlatform && !env.DualCore) {
ExcitingUtils.addSource(io.mem_acquire.fire(), "perfCntL1plusPrefetchReqCnt", Perf)
def idWidth: Int = log2Up(l1plusPrefetcherParameters.nEntries)
(0 until l1plusPrefetcherParameters.nEntries).foreach(i =>
ExcitingUtils.addSource(
BoolStopWatch(
start = io.mem_acquire.fire() && io.mem_acquire.bits.id(idWidth - 1, 0) === i.U,
stop = io.mem_grant.fire() && io.mem_grant.bits.id(idWidth - 1, 0) === i.U,
startHighPriority = true
),
"perfCntL1plusPrefetchPenaltyEntry" + Integer.toString(i, 10),
Perf
XSPerf("L1+Prefetch_reqCnt", io.mem_acquire.fire())
def idWidth: Int = log2Up(l1plusPrefetcherParameters.nEntries)
(0 until l1plusPrefetcherParameters.nEntries).foreach(i =>
XSPerf(
"L1+Prefetch_penaltyEntry" + Integer.toString(i, 10),
BoolStopWatch(
start = io.mem_acquire.fire() && io.mem_acquire.bits.id(idWidth - 1, 0) === i.U,
stop = io.mem_grant.fire() && io.mem_grant.bits.id(idWidth - 1, 0) === i.U,
startHighPriority = true
)
)
}
)
} else {
io.in.ready := true.B
......
......@@ -128,19 +128,16 @@ class L2PrefetcherImp(outer: L2Prefetcher) extends LazyModuleImp(outer) with Has
bus.e.valid := false.B
bus.e.bits := DontCare
if (!env.FPGAPlatform && !env.DualCore) {
ExcitingUtils.addSource(bus.a.fire(), "perfCntL2PrefetchReqCnt", Perf)
(0 until l2PrefetcherParameters.nEntries).foreach(i =>
ExcitingUtils.addSource(
BoolStopWatch(
start = bus.a.fire() && bus.a.bits.source(l2PrefetcherParameters.totalWidth - 1, 0) === i.U,
stop = bus.d.fire() && bus.d.bits.source(l2PrefetcherParameters.totalWidth - 1, 0) === i.U,
startHighPriority = true
),
"perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10),
Perf
XSPerf("L2Prefetch_reqCnt", bus.a.fire())
(0 until l2PrefetcherParameters.nEntries).foreach(i =>
XSPerf(
"L2Prefetch_penaltyEntry" + Integer.toString(i, 10),
BoolStopWatch(
start = bus.a.fire() && bus.a.bits.source(l2PrefetcherParameters.totalWidth - 1, 0) === i.U,
stop = bus.d.fire() && bus.d.bits.source(l2PrefetcherParameters.totalWidth - 1, 0) === i.U,
startHighPriority = true
)
)
}
)
}
......@@ -53,7 +53,8 @@ class BIM extends BasePredictor with BimParams {
io.resp.ctrs := if2_bimRead
io.meta.ctrs := if2_bimRead
val u = io.update.bits
val updateValid = RegNext(io.update.valid)
val u = RegNext(io.update.bits)
val updateRow = bimAddr.getBankIdx(u.ftqPC)
......@@ -76,7 +77,7 @@ class BIM extends BasePredictor with BimParams {
val newCtrs = VecInit((0 until BimBanks).map(b => satUpdate(oldCtrs(b), 2, newTakens(b))))
// val oldSaturated = newCtr === oldCtr
val needToUpdate = VecInit((0 until PredictWidth).map(i => io.update.valid && u.br_mask(i) && u.valids(i)))
val needToUpdate = VecInit((0 until PredictWidth).map(i => updateValid && u.br_mask(i) && u.valids(i)))
when (reset.asBool) { wrbypass_ctr_valids.foreach(_.foreach(_ := false.B))}
......@@ -104,7 +105,7 @@ class BIM extends BasePredictor with BimParams {
if (BPUDebug && debug) {
XSDebug(doing_reset, "Reseting...\n")
XSDebug("[update] v=%d pc=%x valids=%b, tgt=%x\n", io.update.valid, u.ftqPC, u.valids.asUInt, u.target)
XSDebug("[update] v=%d pc=%x valids=%b, tgt=%x\n", updateValid, u.ftqPC, u.valids.asUInt, u.target)
XSDebug("[update] brMask=%b, taken=%b isMisPred=%b\n", u.br_mask.asUInt, newTakens.asUInt, u.mispred.asUInt)
for (i <- 0 until BimBanks) {
......
......@@ -167,7 +167,9 @@ class BTB extends BasePredictor with BTBParams{
when (pd.isBr) { t := BTBtype.B}
t
}
val u = io.update.bits
val do_update = RegNext(io.update)
val u = do_update.bits
val cfi_pc = packetAligned(u.ftqPC) + (u.cfiIndex.bits << instOffsetBits)
val new_target = u.target
......@@ -188,7 +190,7 @@ class BTB extends BasePredictor with BTBParams{
val dataWrite = BtbDataEntry(new_lower, new_extended)
val updateValid = io.update.valid && updateTaken
val updateValid = do_update.valid && updateTaken
// Update btb
require(isPow2(BtbBanks))
// this is one hot, since each fetch bundle has at most 1 taken instruction
......
......@@ -182,5 +182,5 @@ class Ibuffer extends XSModule with HasCircularQueuePtrHelper {
// )
// }
XSPerf("utilization", validEntries)
XSPerf("ibuf_utilization", validEntries)
}
......@@ -334,8 +334,9 @@ class LoopPredictor extends BasePredictor with LTBParams {
val updateValid = io.update.valid
val update = io.update.bits
val redirectValid = io.redirect.valid
val redirect = io.redirect.bits.cfiUpdate
val do_redirect = RegNext(io.redirect)
val redirectValid = do_redirect.valid
val redirect = do_redirect.bits.cfiUpdate
val redirectPC = redirect.pc
val redirectBank = ltbAddr.getBank(redirectPC)
......@@ -358,7 +359,7 @@ class LoopPredictor extends BasePredictor with LTBParams {
ltbs(i).io.redirect.bits.specCnt := redirect.specCnt(i)
ltbs(i).io.redirect.bits.mispred := redirect.isMisPred
ltbs(i).io.redirect.bits.taken := redirect.taken
ltbs(i).io.redirect.bits.isReplay := io.redirect.bits.flushItself
ltbs(i).io.redirect.bits.isReplay := do_redirect.bits.flushItself
ltbs(i).io.repair := redirectValid && redirectBank =/= i.U
}
......@@ -379,9 +380,7 @@ class LoopPredictor extends BasePredictor with LTBParams {
io.meta.specCnts(i) := ltbResps(i).specCnt
}
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(io.resp.exit.reduce(_||_), "perfCntLoopExit", Perf)
}
XSPerf("LoopExit", io.resp.exit.reduce(_||_))
if (BPUDebug && debug) {
// debug info
......@@ -391,7 +390,7 @@ class LoopPredictor extends BasePredictor with LTBParams {
XSDebug("[IF4][req] inMask=%b\n", inMask)
XSDebug("[IF4][req] updatePC=%x, updateValid=%d, isBr=%b\n", update.ftqPC, updateValid, update.br_mask.asUInt)
XSDebug("[IF4][req] redirectPC=%x redirectBank=%d, redirectValid=%d, isBr=%d, isReplay=%d\n", redirect.pc, redirectBank, redirectValid, redirect.pd.isBr, io.redirect.bits.flushItself)
XSDebug("[IF4][req] redirectPC=%x redirectBank=%d, redirectValid=%d, isBr=%d, isReplay=%d\n", redirect.pc, redirectBank, redirectValid, redirect.pd.isBr, do_redirect.bits.flushItself)
XSDebug("[IF4][req] isMisPred=%d\n", redirect.isMisPred)
XSDebug(redirectValid, "[redirect SpecCnt] ")
......
......@@ -179,11 +179,12 @@ class RAS extends BasePredictor
spec_push := !spec_is_full && io.callIdx.valid && io.pc.valid
spec_pop := !spec_is_empty && io.is_ret && io.pc.valid
val copy_valid = io.redirect.valid
val recover_cfi = io.redirect.bits.cfiUpdate
val redirect = RegNext(io.redirect)
val copy_valid = redirect.valid
val recover_cfi = redirect.bits.cfiUpdate
val retMissPred = copy_valid && io.redirect.bits.level === 0.U && recover_cfi.pd.isRet
val callMissPred = copy_valid && io.redirect.bits.level === 0.U && recover_cfi.pd.isCall
val retMissPred = copy_valid && redirect.bits.level === 0.U && recover_cfi.pd.isRet
val callMissPred = copy_valid && redirect.bits.level === 0.U && recover_cfi.pd.isCall
// when we mispredict a call, we must redo a push operation
// similarly, when we mispredict a return, we should redo a pop
spec_ras.recover_valid := copy_valid
......@@ -215,7 +216,7 @@ class RAS extends BasePredictor
XSDebug(spec_push, "(spec_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",
spec_new_addr,spec_debug.push_entry.ctr,spec_debug.alloc_new,spec_debug.sp.asUInt)
XSDebug(spec_pop, "(spec_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target)
val redirectUpdate = io.redirect.bits.cfiUpdate
val redirectUpdate = redirect.bits.cfiUpdate
XSDebug("copyValid:%d recover(SP:%d retAddr:%x ctr:%d) \n",
copy_valid,redirectUpdate.rasSp,redirectUpdate.rasEntry.retAddr,redirectUpdate.rasEntry.ctr)
}
......
......@@ -12,8 +12,6 @@ trait MicroBTBPatameter{
val nWays = 16
val lowerBitsSize = 20
val tagSize = 20
val extended_stat = false
}
@chiselName
......@@ -62,231 +60,170 @@ class MicroBTB extends BasePredictor
val tag = UInt(tagSize.W)
}
class MicroBTBEntry extends XSBundle
class MicroBTBData extends XSBundle
{
val lower = UInt(lowerBitsSize.W)
}
class MetaOutput extends XSBundle {
val is_Br = Bool()
class ReadResp extends XSBundle
{
val valid = Bool()
val taken = Bool()
val target = UInt(VAddrBits.W)
val is_RVC = Bool()
val pred = UInt(2.W)
val is_Br = Bool()
}
@chiselName
class UBTBMetaBank(nWays: Int) extends XSModule {
class UBTBBank(val nWays: Int) extends XSModule with HasIFUConst {
val io = IO(new Bundle {
val wen = Input(Bool())
val wWay = Input(UInt(log2Up(nWays).W))
val wdata = Input(new MicroBTBMeta)
val rtag = Input(UInt(tagSize.W))
val rdata = Output(new MetaOutput)
val hit_and_taken = Output(Bool())
val hit_ohs = Output(Vec(nWays, Bool()))
val hit_way = Output(UInt(log2Up(nWays).W))
val allocatable_way = Valid(UInt(log2Up(nWays).W))
val rWay = Input(UInt(log2Up(nWays).W))
val rpred = Output(UInt(2.W))
})
val mem = Mem(nWays, new MicroBTBMeta)
val rentries = VecInit((0 until nWays) map (i => mem(i)))
val hit_ohs = VecInit(rentries map (e => e.valid && e.tag === io.rtag))
io.hit_and_taken := VecInit(rentries map (e => e.valid && e.tag === io.rtag && e.pred(1))).asUInt.orR
val hit_way = OHToUInt(hit_ohs)
//val hit_entry = rentries(hit_way)
val hit_entry = ParallelMux(hit_ohs zip rentries)
val read_pc = Flipped(Valid(UInt(VAddrBits.W)))
val read_resp = Output(new ReadResp)
val read_hit = Output(Bool())
val to_write_way = Output(UInt(log2Ceil(nWays).W))
io.hit_ohs := hit_ohs
io.hit_way := hit_way
io.rdata.is_Br := hit_entry.is_Br
io.rdata.is_RVC := hit_entry.is_RVC
io.rdata.pred := hit_entry.pred
val entry_emptys = VecInit(rentries.map(e => !e.valid))
val allocatable = ParallelOR(entry_emptys)
io.allocatable_way.bits := PriorityEncoder(entry_emptys)
io.allocatable_way.valid := allocatable
io.rpred := rentries(io.rWay).pred
when (io.wen) {
mem.write(io.wWay, io.wdata)
}
}
val update_way = Input(UInt(log2Ceil(nWays).W))
val update_read_pred = Output(UInt(2.W))
@chiselName
class UBTBDataBank(nWays: Int) extends XSModule {
val io = IO(new Bundle {
val wen = Input(Bool())
val wWay = Input(UInt(log2Up(nWays).W))
val wdata = Input(new MicroBTBEntry)
val rOHs = Input(Vec(nWays, Bool()))
val rdata = Output(new MicroBTBEntry)
val update_write_meta = Flipped(Valid(new MicroBTBMeta))
val update_write_data = Flipped(Valid(new MicroBTBData))
})
val mem = Mem(nWays, new MicroBTBEntry)
val rentries = VecInit((0 until nWays) map (i => mem(i)))
// io.rdata := rentries(io.rWay)
io.rdata := ParallelMux(io.rOHs zip rentries)
when (io.wen) {
mem.write(io.wWay, io.wdata)
val meta = Module(new AsyncDataModuleTemplate(new MicroBTBMeta, nWays, nWays, 1))
val data = Module(new AsyncDataModuleTemplate(new MicroBTBData, nWays, nWays, 1))
for (w <- 0 until nWays) {
meta.io.raddr(w) := w.U
data.io.raddr(w) := w.U
}
}
meta.io.waddr(0) := io.update_way
meta.io.wen(0) := io.update_write_meta.valid
meta.io.wdata(0) := io.update_write_meta.bits
data.io.waddr(0) := io.update_way
data.io.wen(0) := io.update_write_data.valid
data.io.wdata(0) := io.update_write_data.bits
val rmetas = meta.io.rdata
val rdatas = data.io.rdata
val packetAlignedPC = packetAligned(io.read_pc.bits)
val read_tag = getTag(io.read_pc.bits)
val hits = VecInit(rmetas.map(m => m.valid && m.tag === read_tag))
val takens = VecInit(rmetas.map(m => m.pred(1)))
val hit_oh = hits.asUInt
val hit_and_taken = VecInit((hits zip takens) map {case (h, t) => h && t}).asUInt.orR
val hit_meta = ParallelMux(hits zip rmetas)
val hit_data = ParallelMux(hits zip rdatas)
val target = Cat(io.read_pc.bits(VAddrBits-1, lowerBitsSize+instOffsetBits), hit_data.lower, 0.U(instOffsetBits.W))
val emptys = rmetas.map(m => !m.valid)
val allocatable = VecInit(emptys).asUInt.orR
val empty_way = ParallelPriorityEncoder(emptys)
val hit_way = OHToUInt(hit_oh)
val random_way = LFSR64()(log2Ceil(nWays)-1,0)
io.to_write_way := Mux(hit_oh.orR, hit_way, Mux(allocatable, empty_way, random_way))
val ren = io.read_pc.valid
io.read_resp.valid := ren
io.read_resp.is_RVC := ren && hit_meta.is_RVC
io.read_resp.is_Br := ren && hit_meta.is_Br
io.read_resp.taken := ren && hit_and_taken
io.read_resp.target := target
io.read_hit := ren && hit_oh.orR
io.update_read_pred := rmetas(io.update_way).pred
}
val ubtbBanks = Seq.fill(PredictWidth)(Module(new UBTBBank(nWays)))
val banks = VecInit(ubtbBanks.map(_.io))
val read_resps = VecInit(banks.map(b => b.read_resp))
val metaBanks = Seq.fill(PredictWidth)(Module(new UBTBMetaBank(nWays)))
val dataBanks = Seq.fill(PredictWidth)(Module(new UBTBDataBank(nWays)))
val metas = VecInit(metaBanks.map(_.io))
val datas = VecInit(dataBanks.map(_.io))
for (b <- 0 until PredictWidth) {
banks(b).read_pc.valid := io.pc.valid && io.inMask(b)
banks(b).read_pc.bits := io.pc.bits
out_ubtb_br_info.writeWay(b) := banks(b).to_write_way
out_ubtb_br_info.hits(b) := banks(b).read_hit
val uBTBMeta = VecInit(metas.map(m => m.rdata))
val uBTB = VecInit(datas.map(d => d.rdata))
//only when hit and instruction valid and entry valid can output data
io.out.targets(b) := read_resps(b).target
io.out.hits(b) := banks(b).read_hit
io.out.takens(b) := read_resps(b).taken
io.out.is_RVC(b) := read_resps(b).is_RVC
io.out.brMask(b) := read_resps(b).is_Br
}
val do_reset = RegInit(true.B)
val reset_way = RegInit(0.U(log2Ceil(nWays).W))
when (do_reset) { reset_way := reset_way + 1.U }
when (reset_way === (nWays-1).U) { do_reset := false.B }
//uBTB read
//tag is packet aligned
val packetAlignedPC = packetAligned(io.pc.bits)
val read_valid = io.pc.valid
val read_req_tag = getTag(packetAlignedPC)
class ReadRespEntry extends XSBundle
{
val is_RVC = Bool()
val target = UInt(VAddrBits.W)
val valid = Bool()
val taken = Bool()
val is_Br = Bool()
}
val read_resp = Wire(Vec(PredictWidth,new ReadRespEntry))
(0 until PredictWidth).map{ b => metas(b).rtag := read_req_tag }
val read_hit_ohs = (0 until PredictWidth).map{ b => metas(b).hit_ohs }
val read_hit_vec = VecInit(read_hit_ohs.map{oh => ParallelOR(oh).asBool})
val read_hit_ways = (0 until PredictWidth).map{ b => metas(b).hit_way }
(0 until PredictWidth).map(b => datas(b).rOHs := read_hit_ohs(b))
val uBTBMeta_resp = VecInit((0 until PredictWidth).map(b => metas(b).rdata))
val btb_resp = VecInit((0 until PredictWidth).map(b => datas(b).rdata))
for(i <- 0 until PredictWidth){
// do not need to decide whether to produce results\
read_resp(i).valid := io.inMask(i)
read_resp(i).taken := read_resp(i).valid && metas(i).hit_and_taken
read_resp(i).is_Br := read_resp(i).valid && uBTBMeta_resp(i).is_Br
read_resp(i).target := Cat(io.pc.bits(VAddrBits-1, lowerBitsSize+instOffsetBits), btb_resp(i).asUInt, 0.U(instOffsetBits.W))
read_resp(i).is_RVC := read_resp(i).valid && uBTBMeta_resp(i).is_RVC
out_ubtb_br_info.hits(i) := read_hit_vec(i)
}
//TODO: way alloc algorithm
def alloc_way(valids:UInt ,meta_tags:UInt,req_tag:UInt) = {
val way = Wire(UInt(log2Up(BtbWays).W))
val all_valid = valids.andR.asBool
val tags = Cat(meta_tags,req_tag)
val l = log2Ceil(nWays)
val nChunks = (tags.getWidth + l - 1) / l
val chunks = (0 until nChunks) map { i =>
tags(min((i+1)*l, tags.getWidth)-1, i*l)
}
way := Mux(all_valid,chunks.reduce(_^_),PriorityEncoder(~valids))
way
}
val alloc_ways = (0 until PredictWidth).map{ b =>
Mux(metas(b).allocatable_way.valid, metas(b).allocatable_way.bits, LFSR64()(log2Ceil(nWays)-1,0))}
(0 until PredictWidth).map(i => out_ubtb_br_info.writeWay(i) := Mux(read_hit_vec(i).asBool,read_hit_ways(i),alloc_ways(i)))
//response
//only when hit and instruction valid and entry valid can output data
for(i <- 0 until PredictWidth)
{
io.out.targets(i) := read_resp(i).target
io.out.hits(i) := read_resp(i).valid && read_hit_vec(i)
io.out.takens(i) := read_resp(i).taken
io.out.is_RVC(i) := read_resp(i).is_RVC
io.out.brMask(i) := read_resp(i).is_Br
}
//uBTB update
//backend should send fetch pc to update
val u = io.update.bits
val u = RegNext(io.update.bits)
val update_valid = RegNext(io.update.valid)
val update_packet_pc = packetAligned(u.ftqPC)
val update_pcs = VecInit((0 until PredictWidth).map(i => update_packet_pc + (i << instOffsetBits).U))
val update_write_ways = VecInit(u.metas.map(_.ubtbWriteWay))
val update_hits = u.metas.map(_.ubtbHits)
val update_takens = u.takens
val update_bank = u.cfiIndex.bits
val update_tag = getTag(update_packet_pc)
val update_target = u.target
val update_target_lower = update_target(lowerBitsSize-1+instOffsetBits, instOffsetBits)
val update_target_lower = u.target(lowerBitsSize-1+instOffsetBits, instOffsetBits)
// only when taken should we update target
val entry_write_valid = io.update.valid && u.valids(u.cfiIndex.bits) && u.takens(u.cfiIndex.bits)
val data_write_valids =
VecInit((0 until PredictWidth).map(i =>
update_valid && u.valids(i) && u.takens(i)))
val meta_write_valids =
VecInit((0 until PredictWidth).map(i => io.update.valid && u.valids(i) && (u.br_mask(i) || u.takens(i))))
VecInit((0 until PredictWidth).map(i =>
update_valid && u.valids(i) && (u.br_mask(i) || u.takens(i))))
val new_preds =
VecInit((0 until PredictWidth).map(i =>
Mux(!update_hits(i), Mux(update_takens(i),3.U,0.U),
satUpdate(banks(i).update_read_pred,2,update_takens(i)))))
for (b <- 0 until PredictWidth) {
datas(b).wen := do_reset || (entry_write_valid && b.U === update_bank)
datas(b).wWay := Mux(do_reset, reset_way, update_write_ways(u.cfiIndex.bits))
datas(b).wdata := Mux(do_reset, 0.U.asTypeOf(new MicroBTBEntry), update_target_lower.asTypeOf(new MicroBTBEntry))
}
val new_preds = VecInit((0 until PredictWidth).map(i =>
Mux(!update_hits(i),
Mux(update_takens(i),3.U,0.U),
satUpdate( metas(i).rpred,2,update_takens(i)))))
//write the uBTBMeta
(0 until PredictWidth).map(i => metas(i).rWay := update_write_ways(i))
val update_write_metas = Wire(Vec(PredictWidth, new MicroBTBMeta))
val update_write_datas = Wire(Vec(PredictWidth, new MicroBTBData))
for (i <- 0 until PredictWidth) {
update_write_metas(i).is_Br := u.br_mask(i)
update_write_metas(i).is_RVC := u.rvc_mask(i)
update_write_metas(i).valid := true.B
update_write_metas(i).tag := update_tag
update_write_metas(i).pred := new_preds(i)
}
update_write_datas(i).lower := update_target_lower
}
for (b <- 0 until PredictWidth) {
metas(b).wen := do_reset || meta_write_valids(b)
metas(b).wWay := Mux(do_reset, reset_way, update_write_ways(b))
metas(b).wdata := Mux(do_reset, 0.U.asTypeOf(new MicroBTBMeta), update_write_metas(b))
banks(b).update_way := update_write_ways(b)
banks(b).update_write_meta.valid := do_reset || meta_write_valids(b)
banks(b).update_write_meta.bits :=
Mux(do_reset, 0.U.asTypeOf(new MicroBTBMeta), update_write_metas(b))
banks(b).update_write_data.valid := do_reset || data_write_valids(b)
banks(b).update_write_data.bits :=
Mux(do_reset, 0.U.asTypeOf(new MicroBTBData), update_write_datas(b))
}
if (BPUDebug && debug) {
val update_pcs = VecInit((0 until PredictWidth).map(i => update_packet_pc + (i << instOffsetBits).U))
val update_bank = u.cfiIndex.bits
val read_valid = io.pc.valid
val read_req_tag = getTag(io.pc.bits)
val read_hit_vec = VecInit(banks.map(b => b.read_hit))
val read_hit_ways = VecInit(banks.map(b => b.to_write_way))
XSDebug(read_valid,"uBTB read req: pc:0x%x, tag:%x \n",io.pc.bits,read_req_tag)
XSDebug(read_valid,"uBTB read resp: read_hit_vec:%b, \n",read_hit_vec.asUInt)
for(i <- 0 until PredictWidth) {
XSDebug(read_valid,"bank(%d) hit:%d way:%d valid:%d is_RVC:%d taken:%d isBr:%d target:0x%x alloc_way:%d\n",
i.U, read_hit_vec(i), read_hit_ways(i), read_resp(i).valid, read_resp(i).is_RVC,
read_resp(i).taken, read_resp(i).is_Br, read_resp(i).target, out_ubtb_br_info.writeWay(i))
XSDebug(entry_write_valid && (i.U === update_bank),
i.U, read_hit_vec(i), read_hit_ways(i), read_resps(i).valid, read_resps(i).is_RVC,
read_resps(i).taken, read_resps(i).is_Br, read_resps(i).target, out_ubtb_br_info.writeWay(i))
XSDebug(data_write_valids(i),
"uBTB update data(%d): update | pc:0x%x | update hits:%b | update_write_way:%d | update_lower 0x%x\n ",
i.U, update_pcs(i), update_hits(i), update_write_ways(i), update_target_lower(lowerBitsSize-1,0))
XSDebug(meta_write_valids(i), "uBTB update meta(%d): update_taken:%d | old_pred:%b | new_pred:%b | br:%d | rvc:%d | update_tag:%x\n",
i.U, update_takens(i), metas(i).rpred, new_preds(i), u.br_mask(i), u.rvc_mask(i), update_tag)
i.U, update_takens(i), banks(i).update_read_pred, new_preds(i), u.br_mask(i), u.rvc_mask(i), update_tag)
}
}
if (extended_stat) {
val high_identical = update_target(VAddrBits-1, lowerBitsSize) =/= update_packet_pc(VAddrBits-1, lowerBitsSize)
XSDebug(io.update.valid, "extended_stat: identical %d\n", high_identical)
}
//bypass:read-after-write
// for( b <- 0 until PredictWidth) {
// when(update_bank === b.U && meta_write_valid && read_valid
// && Mux(b.U < update_base_bank,update_tag===read_req_tag+1.U ,update_tag===read_req_tag)) //read and write is the same fetch-packet
// {
// io.out.targets(b) := u.target
// io.out.takens(b) := u.taken
// io.out.is_RVC(b) := u.pd.isRVC
// io.out.notTakens(b) := (u.pd.brType === BrType.branch) && (!io.out.takens(b))
// XSDebug("uBTB bypass hit! : hitpc:0x%x | hitbanck:%d | out_target:0x%x\n",io.pc.bits+(b<<1).asUInt(),b.U, io.out.targets(b))
// }
// }
}
\ No newline at end of file
......@@ -29,7 +29,7 @@ class InflightBlockInfo extends XSBundle {
class LsqEnqIO extends XSBundle {
val canAccept = Output(Bool())
val needAlloc = Vec(RenameWidth, Input(Bool()))
val needAlloc = Vec(RenameWidth, Input(UInt(2.W)))
val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
val resp = Vec(RenameWidth, Output(new LSIdx))
}
......@@ -75,15 +75,13 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
loadQueue.io.enq.sqCanAccept := storeQueue.io.enq.canAccept
storeQueue.io.enq.lqCanAccept := loadQueue.io.enq.canAccept
for (i <- 0 until RenameWidth) {
val isStore = CommitType.lsInstIsStore(io.enq.req(i).bits.ctrl.commitType)
loadQueue.io.enq.needAlloc(i) := io.enq.needAlloc(i) && !isStore
loadQueue.io.enq.req(i).valid := !isStore && io.enq.req(i).valid
loadQueue.io.enq.needAlloc(i) := io.enq.needAlloc(i)(0)
loadQueue.io.enq.req(i).valid := io.enq.needAlloc(i)(0) && io.enq.req(i).valid
loadQueue.io.enq.req(i).bits := io.enq.req(i).bits
storeQueue.io.enq.needAlloc(i) := io.enq.needAlloc(i) && isStore
storeQueue.io.enq.req(i).valid := isStore && io.enq.req(i).valid
storeQueue.io.enq.req(i).bits := io.enq.req(i).bits
storeQueue.io.enq.needAlloc(i) := io.enq.needAlloc(i)(1)
storeQueue.io.enq.req(i).valid := io.enq.needAlloc(i)(1) && io.enq.req(i).valid
storeQueue.io.enq.req(i).bits := io.enq.req(i).bits
io.enq.resp(i).lqIdx := loadQueue.io.enq.resp(i)
io.enq.resp(i).sqIdx := storeQueue.io.enq.resp(i)
......
......@@ -30,7 +30,7 @@ trait HasLoadHelper { this: XSModule =>
LookupTree(uop.ctrl.fuOpType, List(
LSUOpType.lb -> SignExt(rdata(7, 0) , XLEN),
LSUOpType.lh -> SignExt(rdata(15, 0), XLEN),
LSUOpType.lw -> Mux(fpWen, rdata, SignExt(rdata(31, 0), XLEN)),
LSUOpType.lw -> Mux(fpWen, Cat(Fill(32, 1.U(1.W)), rdata(31, 0)), SignExt(rdata(31, 0), XLEN)),
LSUOpType.ld -> Mux(fpWen, rdata, SignExt(rdata(63, 0), XLEN)),
LSUOpType.lbu -> ZeroExt(rdata(7, 0) , XLEN),
LSUOpType.lhu -> ZeroExt(rdata(15, 0), XLEN),
......@@ -604,7 +604,8 @@ class LoadQueue extends XSModule
}
// Read vaddr for mem exception
vaddrModule.io.raddr(0) := deqPtr + io.roq.lcommit
// Note that both io.roq.lcommit and RegNext(io.roq.lcommit) should be take into consideration
vaddrModule.io.raddr(0) := (deqPtrExt + commitCount + io.roq.lcommit).value
io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
// misprediction recovery / exception redirect
......
......@@ -104,7 +104,9 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
dataModule.io.raddr(i) := deqPtrExtNext(i).value
paddrModule.io.raddr(i) := deqPtrExtNext(i).value
}
vaddrModule.io.raddr(0) := cmtPtr + io.roq.scommit
// Note that both io.roq.scommit and RegNext(io.roq.scommit) should be take into consideration
vaddrModule.io.raddr(0) := (cmtPtrExt(0) + commitCount + io.roq.scommit).value
/**
* Enqueue at dispatch
......
......@@ -241,7 +241,6 @@ class LoadUnit extends XSModule with HasLoadHelper {
val io = IO(new Bundle() {
val ldin = Flipped(Decoupled(new ExuInput))
val ldout = Decoupled(new ExuOutput)
val fpout = Decoupled(new ExuOutput)
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
val tlbFeedback = ValidIO(new TlbFeedback)
......@@ -304,53 +303,27 @@ class LoadUnit extends XSModule with HasLoadHelper {
// write to rob and writeback bus
val s2_wb_valid = load_s2.io.out.valid && !load_s2.io.out.bits.miss
val refillFpLoad = io.lsq.ldout.bits.uop.ctrl.fpWen
// Int load, if hit, will be writebacked at s2
val intHitLoadOut = Wire(Valid(new ExuOutput))
intHitLoadOut.valid := s2_wb_valid && !load_s2.io.out.bits.uop.ctrl.fpWen
intHitLoadOut.bits.uop := load_s2.io.out.bits.uop
intHitLoadOut.bits.data := load_s2.io.out.bits.data
intHitLoadOut.bits.redirectValid := false.B
intHitLoadOut.bits.redirect := DontCare
intHitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio
intHitLoadOut.bits.debug.isPerfCnt := false.B
intHitLoadOut.bits.debug.paddr := load_s2.io.out.bits.paddr
intHitLoadOut.bits.fflags := DontCare
val hitLoadOut = Wire(Valid(new ExuOutput))
hitLoadOut.valid := s2_wb_valid
hitLoadOut.bits.uop := load_s2.io.out.bits.uop
hitLoadOut.bits.data := load_s2.io.out.bits.data
hitLoadOut.bits.redirectValid := false.B
hitLoadOut.bits.redirect := DontCare
hitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio
hitLoadOut.bits.debug.isPerfCnt := false.B
hitLoadOut.bits.debug.paddr := load_s2.io.out.bits.paddr
hitLoadOut.bits.fflags := DontCare
load_s2.io.out.ready := true.B
io.ldout.bits := Mux(intHitLoadOut.valid, intHitLoadOut.bits, io.lsq.ldout.bits)
io.ldout.valid := intHitLoadOut.valid || io.lsq.ldout.valid && !refillFpLoad
io.ldout.bits := Mux(hitLoadOut.valid, hitLoadOut.bits, io.lsq.ldout.bits)
io.ldout.valid := hitLoadOut.valid || io.lsq.ldout.valid
// Fp load, if hit, will be stored to reg at s2, then it will be recoded at s3, writebacked at s4
val fpHitLoadOut = Wire(Valid(new ExuOutput))
fpHitLoadOut.valid := s2_wb_valid && load_s2.io.out.bits.uop.ctrl.fpWen
fpHitLoadOut.bits := intHitLoadOut.bits
val fpLoadUnRecodedReg = Reg(Valid(new ExuOutput))
fpLoadUnRecodedReg.valid := fpHitLoadOut.valid || io.lsq.ldout.valid && refillFpLoad
when(fpHitLoadOut.valid || io.lsq.ldout.valid && refillFpLoad){
fpLoadUnRecodedReg.bits := Mux(fpHitLoadOut.valid, fpHitLoadOut.bits, io.lsq.ldout.bits)
}
val fpLoadRecodedReg = Reg(Valid(new ExuOutput))
when(fpLoadUnRecodedReg.valid){
fpLoadRecodedReg := fpLoadUnRecodedReg
fpLoadRecodedReg.bits.data := fpRdataHelper(fpLoadUnRecodedReg.bits.uop, fpLoadUnRecodedReg.bits.data) // recode
}
fpLoadRecodedReg.valid := fpLoadUnRecodedReg.valid
io.fpout.bits := fpLoadRecodedReg.bits
io.fpout.valid := fpLoadRecodedReg.valid
io.lsq.ldout.ready := Mux(refillFpLoad, !fpHitLoadOut.valid, !intHitLoadOut.valid)
io.lsq.ldout.ready := !hitLoadOut.valid
when(io.ldout.fire()){
XSDebug("ldout %x\n", io.ldout.bits.uop.cf.pc)
}
when(io.fpout.fire()){
XSDebug("fpout %x\n", io.fpout.bits.uop.cf.pc)
}
}
......@@ -37,9 +37,6 @@ class StoreUnit_S0 extends XSModule {
io.out.bits.vaddr := saddr
io.out.bits.data := genWdata(io.in.bits.src2, io.in.bits.uop.ctrl.fuOpType(1,0))
when(io.in.bits.uop.ctrl.src2Type === SrcType.fp){
io.out.bits.data := io.in.bits.src2
} // not not touch fp store raw data
io.out.bits.uop := io.in.bits.uop
io.out.bits.miss := DontCare
io.out.bits.rsIdx := io.rsIdx
......@@ -64,7 +61,6 @@ class StoreUnit_S1 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
// val fp_out = Decoupled(new LsPipelineBundle)
val lsq = ValidIO(new LsPipelineBundle)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
......@@ -92,7 +88,7 @@ class StoreUnit_S1 extends XSModule {
// get paddr from dtlb, check if rollback is needed
// writeback store inst to lsq
io.lsq.valid := io.in.valid && !s1_tlb_miss// TODO: && ! FP
io.lsq.valid := io.in.valid && !s1_tlb_miss
io.lsq.bits := io.in.bits
io.lsq.bits.paddr := s1_paddr
io.lsq.bits.miss := false.B
......@@ -103,12 +99,6 @@ class StoreUnit_S1 extends XSModule {
// mmio inst with exception will be writebacked immediately
io.out.valid := io.in.valid && (!io.out.bits.mmio || s1_exception) && !s1_tlb_miss
io.out.bits := io.lsq.bits
// encode data for fp store
when(io.in.bits.uop.ctrl.src2Type === SrcType.fp){
io.lsq.bits.data := genWdata(ieee(io.in.bits.data), io.in.bits.uop.ctrl.fuOpType(1,0))
}
}
class StoreUnit_S2 extends XSModule {
......
......@@ -484,5 +484,5 @@ class Sbuffer extends XSModule with HasSBufferConst {
XSDebug(line.valid, "[#%d line] Tag: %x, data: %x, mask: %x\n", i.U, line.tag, line.data.asUInt(), line.mask.asUInt())
}}
XSPerf("waitResp", waitingCacheLine.valid)
XSPerf("sbuf_waitResp", waitingCacheLine.valid)
}
......@@ -574,6 +574,22 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) {
}
}
// first instruction commit
for (int i = 0; i < NumCore; i++) {
if (lastcommit[i] - max_cycle > firstCommit_limit && !hascommit[i]) {
eprintf("No instruction commits for %d cycles of core %d. Please check the first instruction.\n", i, firstCommit_limit);
eprintf("Note: The first instruction may lie in 0x10000000 which may executes and commits after 500 cycles.\n");
eprintf(" Or the first instruction may lie in 0x80000000 which may executes and commits after 2000 cycles.\n");
#ifdef DUALCORE
int priviledgeMode = (i == 0) ? dut_ptr->io_difftest_priviledgeMode : dut_ptr->io_difftest2_priviledgeMode;
#else
int priviledgeMode = dut_ptr->io_difftest_priviledgeMode;
#endif
difftest_display(priviledgeMode, i);
trapCode = STATE_ABORT;
}
}
for (int i = 0; i < NumCore; i++) {
#ifdef DUALCORE
int first_instr_commit = (i == 0) ? dut_ptr->io_difftest_commit && dut_ptr->io_difftest_thisPC == 0x80000000u :
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册