未验证 提交 2bbbb077 编写于 作者: Y Yinan Xu 提交者: GitHub

Merge branch 'master' into L1DTiming

......@@ -173,6 +173,7 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
plic.module.io.extra.get.intrVec <> RegNext(RegNext(io.extIntrs))
for (i <- 0 until NumCores) {
xs_core(i).module.io.hartId := i.U
xs_core(i).module.io.externalInterrupt.mtip := clint.module.io.mtip(i)
xs_core(i).module.io.externalInterrupt.msip := clint.module.io.msip(i)
// xs_core(i).module.io.externalInterrupt.meip := RegNext(RegNext(io.meip(i)))
......
......@@ -37,7 +37,7 @@ object MaskedRegMap { // TODO: add read mask
}
def isIllegalAddr(mapping: Map[Int, (UInt, UInt, UInt => UInt, UInt, UInt => UInt)], addr: UInt):Bool = {
val illegalAddr = Wire(Bool())
illegalAddr := LookupTreeDefault(addr, true.B, mapping.map { case (a, _) => (a.U, false.B) })
illegalAddr := LookupTreeDefault(addr, true.B, mapping.toSeq.sortBy(_._1).map { case (a, _) => (a.U, false.B) })
illegalAddr
}
def generate(mapping: Map[Int, (UInt, UInt, UInt => UInt, UInt, UInt => UInt)], addr: UInt, rdata: UInt,
......
......@@ -10,7 +10,7 @@ import xiangshan.backend.exu.Exu._
import xiangshan.frontend._
import xiangshan.mem._
import xiangshan.backend.fu.HasExceptionNO
import xiangshan.cache.{DCache,InstrUncache, DCacheParameters, ICache, ICacheParameters, L1plusCache, L1plusCacheParameters, PTW, PTWRepeater, Uncache, MemoryOpConstants, MissReq}
import xiangshan.cache.{DCache, InstrUncache, DCacheParameters, ICache, ICacheParameters, L1plusCache, L1plusCacheParameters, PTW, PTWRepeater, Uncache, MemoryOpConstants, MissReq}
import xiangshan.cache.prefetch._
import chipsalliance.rocketchip.config
import freechips.rocketchip.diplomacy.{AddressSet, LazyModule, LazyModuleImp}
......@@ -24,9 +24,10 @@ import utils._
object hartIdCore extends (() => Int) {
var x = 0
def apply(): Int = {
x = x + 1
x-1
x - 1
}
}
......@@ -106,7 +107,7 @@ case class XSCoreParameters
PtwL3EntrySize: Int = 4096, //(256 * 16) or 512
PtwSPEntrySize: Int = 16,
PtwL1EntrySize: Int = 16,
PtwL2EntrySize: Int = 2048,//(256 * 8)
PtwL2EntrySize: Int = 2048, //(256 * 8)
NumPerfCounters: Int = 16,
NrExtIntr: Int = 150
)
......@@ -119,7 +120,9 @@ trait HasXSParameter {
val XLEN = 64
val minFLen = 32
val fLen = 64
def xLen = 64
val HasMExtension = core.HasMExtension
val HasCExtension = core.HasCExtension
val HasDiv = core.HasDiv
......@@ -173,7 +176,7 @@ trait HasXSParameter {
val exuParameters = core.exuParameters
val NRIntReadPorts = core.NRIntReadPorts
val NRIntWritePorts = core.NRIntWritePorts
val NRMemReadPorts = exuParameters.LduCnt + 2*exuParameters.StuCnt
val NRMemReadPorts = exuParameters.LduCnt + 2 * exuParameters.StuCnt
val NRFpReadPorts = core.NRFpReadPorts
val NRFpWritePorts = core.NRFpWritePorts
val LoadPipelineWidth = core.LoadPipelineWidth
......@@ -256,7 +259,7 @@ trait HasXSParameter {
// dcache prefetcher
val l2PrefetcherParameters = L2PrefetcherParameters(
enable = true,
_type = "bop",// "stream" or "bop"
_type = "bop", // "stream" or "bop"
streamParams = StreamPrefetchParameters(
streamCnt = 4,
streamSize = 4,
......@@ -277,7 +280,8 @@ trait HasXSParameter {
)
}
trait HasXSLog { this: RawModule =>
trait HasXSLog {
this: RawModule =>
implicit val moduleName: String = this.name
}
......@@ -285,13 +289,13 @@ abstract class XSModule extends MultiIOModule
with HasXSParameter
with HasExceptionNO
with HasXSLog
with HasFPUParameters
{
with HasFPUParameters {
def io: Record
}
//remove this trait after impl module logic
trait NeedImpl { this: RawModule =>
trait NeedImpl {
this: RawModule =>
override protected def IO[T <: Data](iodef: T): T = {
println(s"[Warn]: (${this.name}) please reomve 'NeedImpl' after implement this module")
val io = chisel3.experimental.IO(iodef)
......@@ -327,35 +331,19 @@ case class EnviromentParameters
// }
class XSCore()(implicit p: config.Parameters) extends LazyModule
with HasXSParameter
with HasExeBlockHelper
{
// to fast wake up fp, mem rs
val intBlockFastWakeUpFp = intExuConfigs.filter(fpFastFilter)
val intBlockSlowWakeUpFp = intExuConfigs.filter(fpSlowFilter)
val intBlockFastWakeUpInt = intExuConfigs.filter(intFastFilter)
val intBlockSlowWakeUpInt = intExuConfigs.filter(intSlowFilter)
val fpBlockFastWakeUpFp = fpExuConfigs.filter(fpFastFilter)
val fpBlockSlowWakeUpFp = fpExuConfigs.filter(fpSlowFilter)
val fpBlockFastWakeUpInt = fpExuConfigs.filter(intFastFilter)
val fpBlockSlowWakeUpInt = fpExuConfigs.filter(intSlowFilter)
with HasExeBlockHelper {
// outer facing nodes
val frontend = LazyModule(new Frontend())
val l1pluscache = LazyModule(new L1plusCache())
val ptw = LazyModule(new PTW())
val l2Prefetcher = LazyModule(new L2Prefetcher())
val memBlock = LazyModule(new MemBlock(
fastWakeUpIn = intBlockFastWakeUpInt ++ intBlockFastWakeUpFp ++ fpBlockFastWakeUpInt ++ fpBlockFastWakeUpFp,
slowWakeUpIn = intBlockSlowWakeUpInt ++ intBlockSlowWakeUpFp ++ fpBlockSlowWakeUpInt ++ fpBlockSlowWakeUpFp,
fastFpOut = Seq(),
slowFpOut = loadExuConfigs,
fastIntOut = Seq(),
slowIntOut = loadExuConfigs
fastWakeUpIn = intExuConfigs.filter(_.hasCertainLatency),
slowWakeUpIn = intExuConfigs.filter(_.hasUncertainlatency) ++ fpExuConfigs,
fastWakeUpOut = Seq(),
slowWakeUpOut = loadExuConfigs
))
lazy val module = new XSCoreImp(this)
......@@ -363,9 +351,9 @@ class XSCore()(implicit p: config.Parameters) extends LazyModule
class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
with HasXSParameter
with HasExeBlockHelper
{
with HasExeBlockHelper {
val io = IO(new Bundle {
val hartId = Input(UInt(64.W))
val externalInterrupt = new ExternalInterruptIO
val l2ToPrefetcher = Flipped(new PrefetcherIO(PAddrBits))
})
......@@ -381,32 +369,21 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
AddressSpace.printMemmap()
// to fast wake up fp, mem rs
val intBlockFastWakeUpFp = intExuConfigs.filter(fpFastFilter)
val intBlockSlowWakeUpFp = intExuConfigs.filter(fpSlowFilter)
val intBlockFastWakeUpInt = intExuConfigs.filter(intFastFilter)
val intBlockSlowWakeUpInt = intExuConfigs.filter(intSlowFilter)
val fpBlockFastWakeUpFp = fpExuConfigs.filter(fpFastFilter)
val fpBlockSlowWakeUpFp = fpExuConfigs.filter(fpSlowFilter)
val fpBlockFastWakeUpInt = fpExuConfigs.filter(intFastFilter)
val fpBlockSlowWakeUpInt = fpExuConfigs.filter(intSlowFilter)
val intBlockFastWakeUp = intExuConfigs.filter(_.hasCertainLatency)
val intBlockSlowWakeUp = intExuConfigs.filter(_.hasUncertainlatency)
val ctrlBlock = Module(new CtrlBlock)
val integerBlock = Module(new IntegerBlock(
fastWakeUpIn = fpBlockFastWakeUpInt,
slowWakeUpIn = fpBlockSlowWakeUpInt ++ loadExuConfigs,
fastFpOut = intBlockFastWakeUpFp,
slowFpOut = intBlockSlowWakeUpFp,
fastIntOut = intBlockFastWakeUpInt,
slowIntOut = intBlockSlowWakeUpInt
fastWakeUpIn = Seq(),
slowWakeUpIn = fpExuConfigs.filter(_.writeIntRf) ++ loadExuConfigs,
fastWakeUpOut = intBlockFastWakeUp,
slowWakeUpOut = intBlockSlowWakeUp
))
val floatBlock = Module(new FloatBlock(
fastWakeUpIn = intBlockFastWakeUpFp,
slowWakeUpIn = intBlockSlowWakeUpFp ++ loadExuConfigs,
fastFpOut = fpBlockFastWakeUpFp,
slowFpOut = fpBlockSlowWakeUpFp,
fastIntOut = fpBlockFastWakeUpInt,
slowIntOut = fpBlockSlowWakeUpInt
fastWakeUpIn = Seq(),
slowWakeUpIn = intExuConfigs.filter(_.writeFpRf) ++ loadExuConfigs,
fastWakeUpOut = Seq(),
slowWakeUpOut = fpExuConfigs
))
val frontend = outer.frontend.module
......@@ -431,38 +408,39 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ctrlBlock.io.toFpBlock <> floatBlock.io.fromCtrlBlock
ctrlBlock.io.toLsBlock <> memBlock.io.fromCtrlBlock
integerBlock.io.wakeUpIn.fastUops <> floatBlock.io.wakeUpIntOut.fastUops
integerBlock.io.wakeUpIn.fast <> floatBlock.io.wakeUpIntOut.fast
integerBlock.io.wakeUpIn.slow <> floatBlock.io.wakeUpIntOut.slow ++ memBlock.io.wakeUpIntOut.slow
integerBlock.io.toMemBlock <> memBlock.io.fromIntBlock
val memBlockWakeUpInt = memBlock.io.wakeUpOut.slow.map(x => intOutValid(x))
val memBlockWakeUpFp = memBlock.io.wakeUpOut.slow.map(x => fpOutValid(x))
memBlock.io.wakeUpOut.slow.foreach(_.ready := true.B)
floatBlock.io.wakeUpIn.fastUops <> integerBlock.io.wakeUpFpOut.fastUops
floatBlock.io.wakeUpIn.fast <> integerBlock.io.wakeUpFpOut.fast
floatBlock.io.wakeUpIn.slow <> integerBlock.io.wakeUpFpOut.slow ++ memBlock.io.wakeUpFpOut.slow
floatBlock.io.toMemBlock <> memBlock.io.fromFpBlock
fpExuConfigs.zip(floatBlock.io.wakeUpOut.slow).filterNot(_._1.writeIntRf).map(_._2.ready := true.B)
val fpBlockWakeUpInt = fpExuConfigs
.zip(floatBlock.io.wakeUpOut.slow)
.filter(_._1.writeIntRf)
.map(_._2).map(x => intOutValid(x, connectReady = true))
intExuConfigs.zip(integerBlock.io.wakeUpOut.slow).filterNot(_._1.writeFpRf).map(_._2.ready := true.B)
val intBlockWakeUpFp = intExuConfigs.filter(_.hasUncertainlatency)
.zip(integerBlock.io.wakeUpOut.slow)
.filter(_._1.writeFpRf)
.map(_._2).map(x => fpOutValid(x, connectReady = true))
integerBlock.io.wakeUpIntOut.fast.map(_.ready := true.B)
integerBlock.io.wakeUpIntOut.slow.map(_.ready := true.B)
floatBlock.io.wakeUpFpOut.fast.map(_.ready := true.B)
floatBlock.io.wakeUpFpOut.slow.map(_.ready := true.B)
integerBlock.io.wakeUpIn.slow <> fpBlockWakeUpInt ++ memBlockWakeUpInt
integerBlock.io.toMemBlock <> memBlock.io.fromIntBlock
floatBlock.io.wakeUpIn.slow <> intBlockWakeUpFp ++ memBlockWakeUpFp
floatBlock.io.toMemBlock <> memBlock.io.fromFpBlock
val wakeUpMem = Seq(
integerBlock.io.wakeUpIntOut,
integerBlock.io.wakeUpFpOut,
floatBlock.io.wakeUpIntOut,
floatBlock.io.wakeUpFpOut
integerBlock.io.wakeUpOut,
floatBlock.io.wakeUpOut,
)
memBlock.io.wakeUpIn.fastUops <> wakeUpMem.flatMap(_.fastUops)
memBlock.io.wakeUpIn.fast <> wakeUpMem.flatMap(w => w.fast.map(f => {
val raw = WireInit(f)
raw
}))
memBlock.io.wakeUpIn.slow <> wakeUpMem.flatMap(w => w.slow.map(s => {
val raw = WireInit(s)
raw
}))
memBlock.io.wakeUpIn.fast <> wakeUpMem.flatMap(_.fast)
// Note: 'WireInit' is used to block 'ready's from memBlock,
// we don't need 'ready's from memBlock
memBlock.io.wakeUpIn.slow <> wakeUpMem.flatMap(_.slow.map(x => WireInit(x)))
integerBlock.io.csrio.hartId <> io.hartId
integerBlock.io.csrio.fflags <> ctrlBlock.io.roqio.toCSR.fflags
integerBlock.io.csrio.dirty_fs <> ctrlBlock.io.roqio.toCSR.dirty_fs
integerBlock.io.csrio.exception <> ctrlBlock.io.roqio.exception
......@@ -492,7 +470,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ptw.io.tlb(0) <> dtlbRepester.io.ptw
ptw.io.tlb(1) <> itlbRepester.io.ptw
ptw.io.sfence <> integerBlock.io.fenceio.sfence
ptw.io.csr <> integerBlock.io.csrio.tlb
ptw.io.csr <> integerBlock.io.csrio.tlb
val l2PrefetcherIn = Wire(Decoupled(new MissReq))
if (l2PrefetcherParameters.enable && l2PrefetcherParameters._type == "bop") {
......
......@@ -6,7 +6,8 @@ import xiangshan._
import utils._
import xiangshan.backend.regfile.Regfile
import xiangshan.backend.exu._
import xiangshan.backend.issue.{ReservationStation}
import xiangshan.backend.issue.ReservationStation
import xiangshan.mem.HasLoadHelper
class FpBlockToCtrlIO extends XSBundle {
......@@ -18,19 +19,16 @@ class FloatBlock
(
fastWakeUpIn: Seq[ExuConfig],
slowWakeUpIn: Seq[ExuConfig],
fastFpOut: Seq[ExuConfig],
slowFpOut: Seq[ExuConfig],
fastIntOut: Seq[ExuConfig],
slowIntOut: Seq[ExuConfig]
) extends XSModule with HasExeBlockHelper {
fastWakeUpOut: Seq[ExuConfig],
slowWakeUpOut: Seq[ExuConfig],
) extends XSModule with HasExeBlockHelper with HasLoadHelper {
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToFpBlockIO)
val toCtrlBlock = new FpBlockToCtrlIO
val toMemBlock = new FpBlockToMemBlockIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOut.size, slowFpOut.size))
val wakeUpIntOut = Flipped(new WakeUpBundle(fastIntOut.size, slowIntOut.size))
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
// from csr
val frm = Input(UInt(3.W))
......@@ -39,6 +37,25 @@ class FloatBlock
val redirect = io.fromCtrlBlock.redirect
val flush = io.fromCtrlBlock.flush
require(fastWakeUpIn.isEmpty)
val wakeUpInReg = Wire(Flipped(new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)))
wakeUpInReg.slow.zip(io.wakeUpIn.slow).foreach{
case (inReg, in) =>
PipelineConnect(in, inReg, inReg.fire(), in.bits.uop.roqIdx.needFlush(redirect, flush))
}
val wakeUpInRecode = WireInit(wakeUpInReg)
for(((rec, reg), cfg) <- wakeUpInRecode.slow.zip(wakeUpInReg.slow).zip(slowWakeUpIn)){
rec.bits.data := {
if(cfg == Exu.ldExeUnitCfg) fpRdataHelper(reg.bits.uop, reg.bits.data)
else Mux(reg.bits.uop.ctrl.fpu.typeTagOut === S,
recode(reg.bits.data(31, 0), S),
recode(reg.bits.data(63, 0), D)
)
}
rec.bits.redirectValid := false.B
reg.ready := rec.ready
}
val fpRf = Module(new Regfile(
numReadPorts = NRFpReadPorts,
numWirtePorts = NRFpWritePorts,
......@@ -70,12 +87,11 @@ class FloatBlock
val readFpRf = cfg.readFpRf
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency && readFpRf).map(_.io.toFp.bits.data)
val writeBackData = inBlockWbData ++ io.wakeUpIn.fast.map(_.bits.data)
val fastPortsCnt = writeBackData.length
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency).map(_.io.out.bits.data)
val fastPortsCnt = inBlockWbData.length
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency && readFpRf).map(_.io.toFp)
val slowPorts = inBlockListenPorts ++ io.wakeUpIn.slow
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency).map(_.io.out)
val slowPorts = (inBlockListenPorts ++ wakeUpInRecode.slow).map(decoupledIOToValidIO)
val slowPortsCnt = slowPorts.length
println(s"${i}: exu:${cfg.name} fastPortsCnt: ${fastPortsCnt} " +
......@@ -99,11 +115,8 @@ class FloatBlock
rs.io.srcRegValue(1) := src2Value(readPortIndex(i))
if (cfg.fpSrcCnt > 2) rs.io.srcRegValue(2) := src3Value(readPortIndex(i))
rs.io.fastDatas <> writeBackData
for ((x, y) <- rs.io.slowPorts.zip(slowPorts)) {
x.valid := y.fire()
x.bits := y.bits
}
rs.io.fastDatas <> inBlockWbData
rs.io.slowPorts <> slowPorts
exeUnits(i).io.redirect <> redirect
exeUnits(i).io.flush <> flush
......@@ -123,44 +136,44 @@ class FloatBlock
raw.valid := x.io.fastUopOut.valid && raw.bits.ctrl.fpWen
raw
})
rs.io.fastUopsIn <> inBlockUops ++ io.wakeUpIn.fastUops
rs.io.fastUopsIn <> inBlockUops
}
io.wakeUpFpOut.fastUops <> reservedStations.filter(
rs => fpFastFilter(rs.exuCfg)
).map(_.io.fastUopOut).map(fpValid)
io.wakeUpFpOut.fast <> exeUnits.filter(
x => fpFastFilter(x.config)
).map(_.io.toFp)
io.wakeUpFpOut.slow <> exeUnits.filter(
x => fpSlowFilter(x.config)
).map(_.io.toFp)
io.wakeUpIntOut.fastUops <> reservedStations.filter(
rs => intFastFilter(rs.exuCfg)
).map(_.io.fastUopOut).map(intValid)
io.wakeUpIntOut.fast <> exeUnits.filter(
x => intFastFilter(x.config)
).map(_.io.toInt)
io.wakeUpIntOut.slow <> exeUnits.filter(
x => intSlowFilter(x.config)
).map(_.io.toInt)
val (recodeOut, ieeeOutReg) = exeUnits.map(e => {
val rec = WireInit(e.io.out)
val recReg = Wire(DecoupledIO(new ExuOutput))
PipelineConnect(
rec, recReg, recReg.fire(),
rec.bits.uop.roqIdx.needFlush(redirect, flush)
)
val ieeeReg = WireInit(recReg)
recReg.ready := ieeeReg.ready
ieeeReg.bits.data := Mux(recReg.bits.uop.ctrl.fpWen, ieee(recReg.bits.data), recReg.bits.data)
ieeeReg.bits.redirectValid := false.B
(rec, ieeeReg)
}).unzip
io.wakeUpOut.slow <> ieeeOutReg
// read fp rf from ctrl block
fpRf.io.readPorts.zipWithIndex.map{ case (r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
(0 until exuParameters.StuCnt).foreach(i => io.toMemBlock.readFpRf(i).data := fpRf.io.readPorts(i + 12).data)
(0 until exuParameters.StuCnt).foreach(i =>
io.toMemBlock.readFpRf(i).data := RegNext(ieee(fpRf.io.readPorts(i + 12).data))
)
// write fp rf arbiter
val fpWbArbiter = Module(new Wb(
(exeUnits.map(_.config) ++ fastWakeUpIn ++ slowWakeUpIn),
NRFpWritePorts,
isFp = true
))
fpWbArbiter.io.in <> exeUnits.map(_.io.toFp) ++ io.wakeUpIn.fast ++ io.wakeUpIn.slow
fpWbArbiter.io.in <> exeUnits.map(e =>
if(e.config.writeIntRf) WireInit(e.io.out) else e.io.out
) ++ wakeUpInRecode.slow
exeUnits.zip(recodeOut).zip(fpWbArbiter.io.in).filter(_._1._1.config.writeIntRf).foreach {
case ((exu, wInt), wFp) =>
exu.io.out.ready := wInt.fire() || wFp.fire()
}
// set busytable and update roq
io.toCtrlBlock.wbRegs <> fpWbArbiter.io.out
......
......@@ -11,7 +11,7 @@ import xiangshan.backend.regfile.Regfile
class WakeUpBundle(numFast: Int, numSlow: Int) extends XSBundle {
val fastUops = Vec(numFast, Flipped(ValidIO(new MicroOp)))
val fast = Vec(numFast, Flipped(DecoupledIO(new ExuOutput))) //one cycle later than fastUops
val fast = Vec(numFast, Flipped(ValidIO(new ExuOutput))) //one cycle later than fastUops
val slow = Vec(numSlow, Flipped(DecoupledIO(new ExuOutput)))
override def cloneType = (new WakeUpBundle(numFast, numSlow)).asInstanceOf[this.type]
......@@ -23,32 +23,56 @@ class IntBlockToCtrlIO extends XSBundle {
// used to update busytable and roq state
val wbRegs = Vec(NRIntWritePorts, ValidIO(new ExuOutput))
// write back to brq
val exuRedirect = Vec(exuParameters.AluCnt+exuParameters.JmpCnt, ValidIO(new ExuOutput))
val exuRedirect = Vec(exuParameters.AluCnt + exuParameters.JmpCnt, ValidIO(new ExuOutput))
val numExist = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
}
trait HasExeBlockHelper {
def fpFastFilter(cfg: ExuConfig): Boolean = {
cfg.hasCertainLatency && cfg.writeFpRf
def fpUopValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
val uop = WireInit(x)
uop.valid := x.valid && x.bits.ctrl.fpWen
uop
}
def fpSlowFilter(cfg: ExuConfig): Boolean = {
cfg.hasUncertainlatency && cfg.writeFpRf
def fpOutValid(x: ValidIO[ExuOutput]): ValidIO[ExuOutput] = {
val out = WireInit(x)
out.valid := x.valid && x.bits.uop.ctrl.fpWen
out
}
def intFastFilter(cfg: ExuConfig): Boolean = {
cfg.hasCertainLatency && cfg.writeIntRf
def fpOutValid(x: DecoupledIO[ExuOutput], connectReady: Boolean = false): DecoupledIO[ExuOutput] = {
val out = WireInit(x)
if(connectReady) x.ready := out.ready
out.valid := x.valid && x.bits.uop.ctrl.fpWen
out
}
def intSlowFilter(cfg: ExuConfig): Boolean = {
cfg.hasUncertainlatency && cfg.writeIntRf
def intUopValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
val uop = WireInit(x)
uop.valid := x.valid && x.bits.ctrl.rfWen
uop
}
def fpValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
val uop = WireInit(x)
uop.valid := x.valid && x.bits.ctrl.fpWen
uop
def intOutValid(x: ValidIO[ExuOutput]): ValidIO[ExuOutput] = {
val out = WireInit(x)
out.valid := x.valid && x.bits.uop.ctrl.rfWen
out
}
def intValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
val uop = WireInit(x)
uop.valid := x.valid && x.bits.ctrl.rfWen
uop
def intOutValid(x: DecoupledIO[ExuOutput], connectReady: Boolean = false): DecoupledIO[ExuOutput] = {
val out = WireInit(x)
if(connectReady) x.ready := out.ready
out.valid := x.valid && x.bits.uop.ctrl.rfWen
out
}
def decoupledIOToValidIO[T <: Data](d: DecoupledIO[T]): Valid[T] = {
val v = Wire(Valid(d.bits.cloneType))
v.valid := d.valid
v.bits := d.bits
v
}
def validIOToDecoupledIO[T <: Data](v: Valid[T]): DecoupledIO[T] = {
val d = Wire(DecoupledIO(v.bits.cloneType))
d.valid := v.valid
d.ready := true.B
d.bits := v.bits
d
}
}
......@@ -56,22 +80,19 @@ class IntegerBlock
(
fastWakeUpIn: Seq[ExuConfig],
slowWakeUpIn: Seq[ExuConfig],
fastFpOut: Seq[ExuConfig],
slowFpOut: Seq[ExuConfig],
fastIntOut: Seq[ExuConfig],
slowIntOut: Seq[ExuConfig]
) extends XSModule with HasExeBlockHelper
{
fastWakeUpOut: Seq[ExuConfig],
slowWakeUpOut: Seq[ExuConfig]
) extends XSModule with HasExeBlockHelper {
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToIntBlockIO)
val toCtrlBlock = new IntBlockToCtrlIO
val toMemBlock = new IntBlockToMemBlockIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOut.size, slowFpOut.size))
val wakeUpIntOut = Flipped(new WakeUpBundle(fastIntOut.size, slowIntOut.size))
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val csrio = new Bundle {
val hartId = Input(UInt(64.W))
val fflags = Flipped(Valid(UInt(5.W))) // from roq
val dirty_fs = Input(Bool()) // from roq
val frm = Output(UInt(3.W)) // to float
......@@ -80,7 +101,7 @@ class IntegerBlock
val isXRet = Output(Bool())
val interrupt = Output(Bool()) // to roq
val memExceptionVAddr = Input(UInt(VAddrBits.W)) // from lsq
val externalInterrupt = new ExternalInterruptIO // from outside
val externalInterrupt = new ExternalInterruptIO // from outside
val tlb = Output(new TlbCsrBundle) // from tlb
val perfinfo = new Bundle {
val retiredInstr = Input(UInt(3.W))
......@@ -88,8 +109,8 @@ class IntegerBlock
}
val fenceio = new Bundle {
val sfence = Output(new SfenceBundle) // to front,mem
val fencei = Output(Bool()) // to icache
val sbuffer = new FenceToSbuffer // to mem
val fencei = Output(Bool()) // to icache
val sbuffer = new FenceToSbuffer // to mem
}
})
val difftestIO = IO(new Bundle() {
......@@ -150,12 +171,12 @@ class IntegerBlock
val readIntRf = cfg.readIntRf
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency && readIntRf).map(_.io.toInt.bits.data)
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency).map(_.io.out.bits.data)
val fastDatas = inBlockWbData ++ io.wakeUpIn.fast.map(_.bits.data)
val wakeupCnt = fastDatas.length
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency && readIntRf).map(_.io.toInt)
val slowPorts = inBlockListenPorts ++ io.wakeUpIn.slow
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency).map(_.io.out)
val slowPorts = (inBlockListenPorts ++ io.wakeUpIn.slow).map(decoupledIOToValidIO)
val extraListenPortsCnt = slowPorts.length
val feedback = (cfg == ldExeUnitCfg) || (cfg == stExeUnitCfg)
......@@ -180,10 +201,7 @@ class IntegerBlock
}
rs.io.fastDatas <> fastDatas
for ((x, y) <- rs.io.slowPorts.zip(slowPorts)) {
x.valid := y.fire()
x.bits := y.bits
}
rs.io.slowPorts <> slowPorts
exeUnits(i).io.redirect <> redirect
exeUnits(i).io.fromInt <> rs.io.deq
......@@ -195,7 +213,7 @@ class IntegerBlock
rs
})
for(rs <- reservationStations){
for (rs <- reservationStations) {
val inBlockUops = reservationStations.filter(x =>
x.exuCfg.hasCertainLatency && x.exuCfg.writeIntRf
).map(x => {
......@@ -206,34 +224,22 @@ class IntegerBlock
rs.io.fastUopsIn <> inBlockUops ++ io.wakeUpIn.fastUops
}
io.wakeUpFpOut.fastUops <> reservationStations.filter(
rs => fpFastFilter(rs.exuCfg)
).map(_.io.fastUopOut).map(fpValid)
io.wakeUpOut.fastUops <> reservationStations.filter(
rs => rs.exuCfg.hasCertainLatency
).map(_.io.fastUopOut).map(intUopValid)
io.wakeUpFpOut.fast <> exeUnits.filter(
x => fpFastFilter(x.config)
).map(_.io.toFp)
io.wakeUpOut.fast <> exeUnits.filter(
x => x.config.hasCertainLatency
).map(_.io.out).map(decoupledIOToValidIO)
io.wakeUpFpOut.slow <> exeUnits.filter(
x => fpSlowFilter(x.config)
).map(_.io.toFp)
io.wakeUpIntOut.fastUops <> reservationStations.filter(
rs => intFastFilter(rs.exuCfg)
).map(_.io.fastUopOut).map(intValid)
io.wakeUpIntOut.fast <> exeUnits.filter(
x => intFastFilter(x.config)
).map(_.io.toInt)
io.wakeUpIntOut.slow <> exeUnits.filter(
x => intSlowFilter(x.config)
).map(_.io.toInt)
io.wakeUpOut.slow <> exeUnits.filter(
x => x.config.hasUncertainlatency
).map(x => WireInit(x.io.out))
// send misprediction to brq
io.toCtrlBlock.exuRedirect.zip(
exeUnits.filter(_.config.hasRedirect).map(_.io.toInt)
).foreach{
exeUnits.filter(_.config.hasRedirect).map(_.io.out)
).foreach {
case (x, y) =>
x.valid := y.fire() && y.bits.redirectValid
x.bits := y.bits
......@@ -246,7 +252,7 @@ class IntegerBlock
}
// read int rf from ctrl block
intRf.io.readPorts.zipWithIndex.map{ case(r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
intRf.io.readPorts.zipWithIndex.map { case (r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
(0 until NRMemReadPorts).foreach(i => io.toMemBlock.readIntRf(i).data := intRf.io.readPorts(i + 8).data)
// write int rf arbiter
val intWbArbiter = Module(new Wb(
......@@ -254,12 +260,19 @@ class IntegerBlock
NRIntWritePorts,
isFp = false
))
intWbArbiter.io.in <> exeUnits.map(_.io.toInt) ++ io.wakeUpIn.fast ++ io.wakeUpIn.slow
intWbArbiter.io.in <> exeUnits.map(e => {
if(e.config.writeFpRf) WireInit(e.io.out) else e.io.out
}) ++ io.wakeUpIn.slow
exeUnits.zip(intWbArbiter.io.in).filter(_._1.config.writeFpRf).zip(io.wakeUpIn.slow).foreach{
case ((exu, wInt), wFp) =>
exu.io.out.ready := wFp.fire() || wInt.fire()
}
// set busytable and update roq
io.toCtrlBlock.wbRegs <> intWbArbiter.io.out
intRf.io.writePorts.zip(intWbArbiter.io.out).foreach{
intRf.io.writePorts.zip(intWbArbiter.io.out).foreach {
case (rf, wb) =>
rf.wen := wb.valid && wb.bits.uop.ctrl.rfWen
rf.addr := wb.bits.uop.pdest
......
......@@ -30,29 +30,19 @@ class FpBlockToMemBlockIO extends XSBundle {
}
class MemBlock(
fastWakeUpIn: Seq[ExuConfig],
slowWakeUpIn: Seq[ExuConfig],
fastFpOut: Seq[ExuConfig],
slowFpOut: Seq[ExuConfig],
fastIntOut: Seq[ExuConfig],
slowIntOut: Seq[ExuConfig]
val fastWakeUpIn: Seq[ExuConfig],
val slowWakeUpIn: Seq[ExuConfig],
val fastWakeUpOut: Seq[ExuConfig],
val slowWakeUpOut: Seq[ExuConfig]
)(implicit p: Parameters) extends LazyModule {
val dcache = LazyModule(new DCache())
val uncache = LazyModule(new Uncache())
lazy val module = new MemBlockImp(fastWakeUpIn, slowWakeUpIn, fastFpOut, slowFpOut, fastIntOut, slowIntOut)(this)
lazy val module = new MemBlockImp(this)
}
class MemBlockImp
(
fastWakeUpIn: Seq[ExuConfig],
slowWakeUpIn: Seq[ExuConfig],
fastFpOut: Seq[ExuConfig],
slowFpOut: Seq[ExuConfig],
fastIntOut: Seq[ExuConfig],
slowIntOut: Seq[ExuConfig]
) (outer: MemBlock) extends LazyModuleImp(outer)
class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
with HasXSParameter
with HasExceptionNO
with HasXSLog
......@@ -60,6 +50,11 @@ class MemBlockImp
with HasExeBlockHelper
{
val fastWakeUpIn = outer.fastWakeUpIn
val slowWakeUpIn = outer.slowWakeUpIn
val fastWakeUpOut = outer.fastWakeUpOut
val slowWakeUpOut = outer.slowWakeUpOut
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToLsBlockIO)
val fromIntBlock = Flipped(new IntBlockToMemBlockIO)
......@@ -67,8 +62,7 @@ class MemBlockImp
val toCtrlBlock = new LsBlockToCtrlIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOut.size, slowFpOut.size))
val wakeUpIntOut = Flipped(new WakeUpBundle(fastIntOut.size, slowIntOut.size))
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val ptw = new TlbPtwIO
val sfence = Input(new SfenceBundle)
......@@ -124,8 +118,7 @@ class MemBlockImp
atomicsUnit.io.out.ready := ldOut0.ready
loadUnits.head.io.ldout.ready := ldOut0.ready
val intExeWbReqs = ldOut0 +: loadUnits.tail.map(_.io.ldout)
val fpExeWbReqs = loadUnits.map(_.io.fpout)
val exeWbReqs = ldOut0 +: loadUnits.tail.map(_.io.ldout)
val readPortIndex = Seq(0, 1, 2, 4)
io.fromIntBlock.readIntRf.foreach(_.addr := DontCare)
......@@ -145,11 +138,10 @@ class MemBlockImp
.map(_._2.bits.data)
val wakeupCnt = fastDatas.length
val inBlockListenPorts = intExeWbReqs ++ fpExeWbReqs
val slowPorts = inBlockListenPorts ++
val slowPorts = (exeWbReqs ++
slowWakeUpIn.zip(io.wakeUpIn.slow)
.filter(x => (x._1.writeIntRf && readIntRf) || (x._1.writeFpRf && readFpRf))
.map(_._2)
.map(_._2)).map(decoupledIOToValidIO)
val slowPortsCnt = slowPorts.length
......@@ -165,18 +157,14 @@ class MemBlockImp
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.fromDispatch <> io.fromCtrlBlock.enqIqCtrl(i)
val src2IsFp = RegNext(io.fromCtrlBlock.enqIqCtrl(i).bits.ctrl.src2Type === SrcType.fp)
rs.io.srcRegValue := DontCare
rs.io.srcRegValue(0) := io.fromIntBlock.readIntRf(readPortIndex(i)).data
if (i >= exuParameters.LduCnt) {
rs.io.srcRegValue(1) := Mux(src2IsFp, io.fromFpBlock.readFpRf(i - exuParameters.LduCnt).data, io.fromIntBlock.readIntRf(readPortIndex(i) + 1).data)
rs.io.srcRegValue(1) := io.fromIntBlock.readIntRf(readPortIndex(i) + 1).data
rs.io.fpRegValue := io.fromFpBlock.readFpRf(i - exuParameters.LduCnt).data
}
rs.io.fastDatas <> fastDatas
for ((x, y) <- rs.io.slowPorts.zip(slowPorts)) {
x.valid := y.fire()
x.bits := y.bits
}
rs.io.slowPorts <> slowPorts
// exeUnits(i).io.redirect <> redirect
// exeUnits(i).io.fromInt <> rs.io.deq
......@@ -193,17 +181,9 @@ class MemBlockImp
.map(_._2)
}
// TODO: make this better
io.wakeUpIn.fast.foreach(_.ready := true.B)
io.wakeUpOut.slow <> exeWbReqs
io.wakeUpIn.slow.foreach(_.ready := true.B)
io.wakeUpFpOut.slow <> fpExeWbReqs
io.wakeUpIntOut.slow <> intExeWbReqs
// load always ready
fpExeWbReqs.foreach(_.ready := true.B)
intExeWbReqs.foreach(_.ready := true.B)
val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true))
val lsq = Module(new LsqWrappper)
val sbuffer = Module(new NewSbuffer)
......
......@@ -17,6 +17,7 @@ class FPDecoder extends XSModule{
def Y = BitPat("b1")
val s = BitPat(S)
val d = BitPat(D)
val i = BitPat(I)
val default = List(X,X,X,N,N,N,X,X,X)
......@@ -27,15 +28,15 @@ class FPDecoder extends XSModule{
FCVT_S_WU-> List(N,s,s,Y,Y,Y,N,N,Y),
FCVT_S_L -> List(N,s,s,Y,Y,Y,N,N,Y),
FCVT_S_LU-> List(N,s,s,Y,Y,Y,N,N,Y),
FMV_X_W -> List(N,d,X,N,N,N,N,N,N),
FCLASS_S -> List(N,s,X,N,N,N,N,N,N),
FCVT_W_S -> List(N,s,X,N,Y,N,N,N,Y),
FCVT_WU_S-> List(N,s,X,N,Y,N,N,N,Y),
FCVT_L_S -> List(N,s,X,N,Y,N,N,N,Y),
FCVT_LU_S-> List(N,s,X,N,Y,N,N,N,Y),
FEQ_S -> List(N,s,X,N,Y,N,N,N,N),
FLT_S -> List(N,s,X,N,Y,N,N,N,N),
FLE_S -> List(N,s,X,N,Y,N,N,N,N),
FMV_X_W -> List(N,d,i,N,N,N,N,N,N),
FCLASS_S -> List(N,s,i,N,N,N,N,N,N),
FCVT_W_S -> List(N,s,i,N,Y,N,N,N,Y),
FCVT_WU_S-> List(N,s,i,N,Y,N,N,N,Y),
FCVT_L_S -> List(N,s,i,N,Y,N,N,N,Y),
FCVT_LU_S-> List(N,s,i,N,Y,N,N,N,Y),
FEQ_S -> List(N,s,i,N,Y,N,N,N,N),
FLT_S -> List(N,s,i,N,Y,N,N,N,N),
FLE_S -> List(N,s,i,N,Y,N,N,N,N),
FSGNJ_S -> List(N,s,s,N,N,Y,N,N,N),
FSGNJN_S -> List(N,s,s,N,N,Y,N,N,N),
FSGNJX_S -> List(N,s,s,N,N,Y,N,N,N),
......@@ -60,17 +61,17 @@ class FPDecoder extends XSModule{
FCVT_D_WU-> List(N,d,d,Y,Y,Y,N,N,Y),
FCVT_D_L -> List(N,d,d,Y,Y,Y,N,N,Y),
FCVT_D_LU-> List(N,d,d,Y,Y,Y,N,N,Y),
FMV_X_D -> List(N,d,X,N,N,N,N,N,N),
FCLASS_D -> List(N,d,X,N,N,N,N,N,N),
FCVT_W_D -> List(N,d,X,N,Y,N,N,N,Y),
FCVT_WU_D-> List(N,d,X,N,Y,N,N,N,Y),
FCVT_L_D -> List(N,d,X,N,Y,N,N,N,Y),
FCVT_LU_D-> List(N,d,X,N,Y,N,N,N,Y),
FMV_X_D -> List(N,d,i,N,N,N,N,N,N),
FCLASS_D -> List(N,d,i,N,N,N,N,N,N),
FCVT_W_D -> List(N,d,i,N,Y,N,N,N,Y),
FCVT_WU_D-> List(N,d,i,N,Y,N,N,N,Y),
FCVT_L_D -> List(N,d,i,N,Y,N,N,N,Y),
FCVT_LU_D-> List(N,d,i,N,Y,N,N,N,Y),
FCVT_S_D -> List(N,d,s,N,Y,Y,N,N,Y),
FCVT_D_S -> List(N,s,d,N,Y,Y,N,N,Y),
FEQ_D -> List(N,d,X,N,Y,N,N,N,N),
FLT_D -> List(N,d,X,N,Y,N,N,N,N),
FLE_D -> List(N,d,X,N,Y,N,N,N,N),
FEQ_D -> List(N,d,i,N,Y,N,N,N,N),
FLT_D -> List(N,d,i,N,Y,N,N,N,N),
FLE_D -> List(N,d,i,N,Y,N,N,N,N),
FSGNJ_D -> List(N,d,d,N,N,Y,N,N,N),
FSGNJN_D -> List(N,d,d,N,N,Y,N,N,N),
FSGNJX_D -> List(N,d,d,N,N,Y,N,N,N),
......
......@@ -13,11 +13,11 @@ class AluExeUnit extends Exu(aluExeUnitCfg)
case a: Alu => a
}.get
io.toInt.bits.redirectValid := alu.redirectOutValid
io.toInt.bits.redirect := alu.redirectOut
io.out.bits.redirectValid := alu.redirectOutValid
io.out.bits.redirect := alu.redirectOut
XSDebug(io.fromInt.valid || io.redirect.valid,
p"fromInt(${io.fromInt.valid} ${io.fromInt.ready}) toInt(${io.toInt.valid} ${io.toInt.ready})" +
p"fromInt(${io.fromInt.valid} ${io.fromInt.ready}) toInt(${io.out.valid} ${io.out.ready})" +
p"Redirect:(${io.redirect.valid}) roqIdx:${io.redirect.bits.roqIdx}\n",
)
XSDebug(io.fromInt.valid,
......@@ -25,7 +25,7 @@ class AluExeUnit extends Exu(aluExeUnitCfg)
p"src3:${Hexadecimal(io.fromInt.bits.src3)} func:${Binary(io.fromInt.bits.uop.ctrl.fuOpType)} " +
p"pc:${Hexadecimal(io.fromInt.bits.uop.cf.pc)} roqIdx:${io.fromInt.bits.uop.roqIdx}\n"
)
XSDebug(io.toInt.valid,
p"res:${Hexadecimal(io.toInt.bits.data)}\n"
XSDebug(io.out.valid,
p"res:${Hexadecimal(io.out.bits.data)}\n"
)
}
\ No newline at end of file
......@@ -83,8 +83,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
val fromFp = if (config.readFpRf) Flipped(DecoupledIO(new ExuInput)) else null
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
val toInt = if (config.writeIntRf) DecoupledIO(new ExuOutput) else null
val toFp = if (config.writeFpRf) DecoupledIO(new ExuOutput) else null
val out = DecoupledIO(new ExuOutput)
})
for ((fuCfg, (fu, sel)) <- config.fuConfigs.zip(supportedFunctionUnits.zip(fuSel))) {
......@@ -147,15 +146,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
}
}
val intArb = if (config.writeIntRf) writebackArb(
supportedFunctionUnits.zip(config.fuConfigs).filter(x => !x._2.writeFpRf).map(_._1.io.out),
io.toInt
) else null
val fpArb = if (config.writeFpRf) writebackArb(
supportedFunctionUnits.zip(config.fuConfigs).filter(x => x._2.writeFpRf).map(_._1.io.out),
io.toFp
) else null
val arb = writebackArb(supportedFunctionUnits.map(_.io.out), io.out)
val readIntFu = config.fuConfigs
.zip(supportedFunctionUnits.zip(fuSel))
......@@ -179,7 +170,6 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
}
}
if (config.readIntRf) {
io.fromInt.ready := inReady(readIntFu)
}
......@@ -198,12 +188,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
out.redirectValid := false.B
}
if (config.writeFpRf) {
assignDontCares(io.toFp.bits)
}
if (config.writeIntRf) {
assignDontCares(io.toInt.bits)
}
assignDontCares(io.out.bits)
}
object Exu {
......@@ -233,6 +218,4 @@ object Exu {
Seq.fill(exuParameters.FmiscCnt)(fmiscExeUnitCfg)
val exuConfigs: Seq[ExuConfig] = intExuConfigs ++ fpExuConfigs
}
\ No newline at end of file
......@@ -20,8 +20,8 @@ class FmacExeUnit extends Exu(fmacExeUnitCfg)
fma.io.redirectIn := io.redirect
fma.io.flushIn := io.flush
fma.io.out.ready := io.toFp.ready
fma.io.out.ready := io.out.ready
io.toFp.bits.data := box(fma.io.out.bits.data, fma.io.out.bits.uop.ctrl.fpu.typeTagOut)
io.toFp.bits.fflags := fma.fflags
io.out.bits.data := box(fma.io.out.bits.data, fma.io.out.bits.uop.ctrl.fpu.typeTagOut)
io.out.bits.fflags := fma.fflags
}
......@@ -10,12 +10,7 @@ class FmiscExeUnit extends Exu(fmiscExeUnitCfg) {
val frm = IO(Input(UInt(3.W)))
val f2i :: f2f :: fdivSqrt :: Nil = supportedFunctionUnits.map(fu => fu.asInstanceOf[FPUSubModule])
val toFpUnits = Seq(f2f, fdivSqrt)
val toIntUnits = Seq(f2i)
assert(toFpUnits.size == 1 || fpArb.io.in.length == toFpUnits.size)
assert(toIntUnits.size == 1 || intArb.io.in.length == toIntUnits.size)
val fus = supportedFunctionUnits.map(fu => fu.asInstanceOf[FPUSubModule])
val input = io.fromFp
val isRVF = input.bits.uop.ctrl.isRVF
......@@ -28,15 +23,10 @@ class FmiscExeUnit extends Exu(fmiscExeUnitCfg) {
module.asInstanceOf[FPUSubModule].rm := Mux(instr_rm =/= 7.U, instr_rm, frm)
}
io.toFp.bits.fflags := MuxCase(
0.U,
toFpUnits.map(x => x.io.out.fire() -> x.fflags)
)
val fpOutCtrl = io.toFp.bits.uop.ctrl.fpu
io.toFp.bits.data := box(fpArb.io.out.bits.data, fpOutCtrl.typeTagOut)
io.toInt.bits.fflags := MuxCase(
io.out.bits.fflags := MuxCase(
0.U,
toIntUnits.map(x => x.io.out.fire() -> x.fflags)
fus.map(x => x.io.out.fire() -> x.fflags)
)
val fpOutCtrl = io.out.bits.uop.ctrl.fpu
io.out.bits.data := box(arb.io.out.bits.data, fpOutCtrl.typeTagOut)
}
......@@ -11,6 +11,7 @@ import xiangshan.backend.fu.{CSR, Fence, FenceToSbuffer, FunctionUnit, Jump}
class JumpExeUnit extends Exu(jumpExeUnitCfg)
{
val csrio = IO(new Bundle {
val hartId = Input(UInt(64.W))
val fflags = Flipped(ValidIO(UInt(5.W)))
val dirty_fs = Input(Bool())
val frm = Output(UInt(3.W))
......@@ -69,6 +70,7 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg)
case i: IntToFP => i
}.get
csr.csrio.hartId <> csrio.hartId
csr.csrio.perf <> DontCare
csr.csrio.perf.retiredInstr <> csrio.perfinfo.retiredInstr
csr.csrio.fpu.fflags <> csrio.fflags
......@@ -99,6 +101,6 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg)
val isDouble = !uop.ctrl.isRVF
io.toInt.bits.redirectValid := jmp.redirectOutValid
io.toInt.bits.redirect := jmp.redirectOut
io.out.bits.redirectValid := jmp.redirectOutValid
io.out.bits.redirect := jmp.redirectOut
}
......@@ -67,13 +67,13 @@ class MulDivExeUnit extends Exu(mulDivExeUnitCfg) {
XSDebug(io.fromInt.valid, "In(%d %d) Out(%d %d) Redirect:(%d %d)\n",
io.fromInt.valid, io.fromInt.ready,
io.toInt.valid, io.toInt.ready,
io.out.valid, io.out.ready,
io.redirect.valid,
io.redirect.bits.level
)
XSDebug(io.fromInt.valid, "src1:%x src2:%x pc:%x\n", src1, src2, io.fromInt.bits.uop.cf.pc)
XSDebug(io.toInt.valid, "Out(%d %d) res:%x pc:%x\n",
io.toInt.valid, io.toInt.ready, io.toInt.bits.data, io.toInt.bits.uop.cf.pc
XSDebug(io.out.valid, "Out(%d %d) res:%x pc:%x\n",
io.out.valid, io.out.ready, io.out.bits.data, io.out.bits.uop.cf.pc
)
}
......@@ -3,8 +3,45 @@ package xiangshan.backend.exu
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
class ExuWbArbiter(n: Int) extends XSModule {
val io = IO(new Bundle() {
val in = Vec(n, Flipped(DecoupledIO(new ExuOutput)))
val out = DecoupledIO(new ExuOutput)
})
class ExuCtrl extends Bundle{
val uop = new MicroOp
val fflags = UInt(5.W)
val redirectValid = Bool()
val redirect = new Redirect
val debug = new DebugBundle
}
val ctrl_arb = Module(new Arbiter(new ExuCtrl, n))
val data_arb = Module(new Arbiter(UInt((XLEN+1).W), n))
ctrl_arb.io.out.ready := io.out.ready
data_arb.io.out.ready := io.out.ready
for(((in, ctrl), data) <- io.in.zip(ctrl_arb.io.in).zip(data_arb.io.in)){
ctrl.valid := in.valid
for((name, d) <- ctrl.bits.elements) {
d := in.bits.elements(name)
}
data.valid := in.valid
data.bits := in.bits.data
in.ready := ctrl.ready
assert(ctrl.ready === data.ready)
}
assert(ctrl_arb.io.chosen === data_arb.io.chosen)
io.out.bits.data := data_arb.io.out.bits
for((name, d) <- ctrl_arb.io.out.bits.elements){
io.out.bits.elements(name) := d
}
io.out.valid := ctrl_arb.io.out.valid
assert(ctrl_arb.io.out.valid === data_arb.io.out.valid)
}
class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
......@@ -15,14 +52,6 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
val out = Vec(numOut, ValidIO(new ExuOutput))
})
// def exuOutToRfReq(exuOut: DecoupledIO[ExuOutput]): DecoupledIO[ExuOutput] = {
// val req = WireInit(exuOut)
// req.valid := exuOut.valid && wen(exuOut.bits)
// exuOut.ready := Mux(req.valid, req.ready, true.B)
// req
// }
val directConnect = io.in.zip(priorities).filter(x => x._2 == 0).map(_._1)
val mulReq = io.in.zip(priorities).filter(x => x._2 == 1).map(_._1)
val otherReq = io.in.zip(priorities).filter(x => x._2 > 1).map(_._1)
......@@ -32,9 +61,11 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
io.out.take(directConnect.size).zip(directConnect).foreach{
case (o, i) =>
o.bits := i.bits
o.valid := i.valid
i.ready := true.B
val arb = Module(new ExuWbArbiter(1))
arb.io.in.head <> i
o.bits := arb.io.out.bits
o.valid := arb.io.out.valid
arb.io.out.ready := true.B
}
def splitN[T](in: Seq[T], n: Int): Seq[Option[Seq[T]]] = {
......@@ -59,17 +90,11 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
for(i <- mulReq.indices) {
val out = io.out(directConnect.size + i)
val other = arbReq(i).getOrElse(Seq())
if(other.isEmpty){
out.valid := mulReq(i).valid
out.bits := mulReq(i).bits
mulReq(i).ready := true.B
} else {
val arb = Module(new Arbiter(new ExuOutput, 1+other.size))
arb.io.in <> mulReq(i) +: other
out.valid := arb.io.out.valid
out.bits := arb.io.out.bits
arb.io.out.ready := true.B
}
val arb = Module(new ExuWbArbiter(1+other.size))
arb.io.in <> mulReq(i) +: other
out.valid := arb.io.out.valid
out.bits := arb.io.out.bits
arb.io.out.ready := true.B
}
if(portUsed < numOut){
......
......@@ -35,21 +35,12 @@ class RightShiftModule extends XSModule {
val io = IO(new Bundle() {
val shamt = Input(UInt(6.W))
val srlSrc, sraSrc = Input(UInt(XLEN.W))
val srl, sra = Output(UInt(XLEN.W))
val srl_l, srl_w, sra_l, sra_w = Output(UInt(XLEN.W))
})
io.srl := io.srlSrc >> io.shamt
io.sra := (io.sraSrc.asSInt() >> io.shamt).asUInt()
}
class ShiftModule extends XSModule {
val io = IO(new Bundle() {
val shamt = Input(UInt(6.W))
val shsrc1 = Input(UInt(XLEN.W))
val sll, srl, sra = Output(UInt(XLEN.W))
})
io.sll := (io.shsrc1 << io.shamt)(XLEN-1, 0)
io.srl := io.shsrc1 >> io.shamt
io.sra := (io.shsrc1.asSInt >> io.shamt).asUInt
io.srl_l := io.srlSrc >> io.shamt
io.srl_w := io.srlSrc(31, 0) >> io.shamt
io.sra_l := (io.sraSrc.asSInt() >> io.shamt).asUInt()
io.sra_w := (Cat(Fill(32, io.sraSrc(31)), io.sraSrc(31, 0)).asSInt() >> io.shamt).asUInt()
}
class MiscResultSelect extends XSModule {
......@@ -87,17 +78,15 @@ class AluResSel extends XSModule {
io.aluRes := Cat(h32, res(31, 0))
}
class Alu extends FunctionUnit with HasRedirectOut {
val (src1, src2, func, pc, uop) = (
io.in.bits.src(0),
io.in.bits.src(1),
io.in.bits.uop.ctrl.fuOpType,
SignExt(io.in.bits.uop.cf.pc, AddrBits),
io.in.bits.uop
)
val valid = io.in.valid
class AluDataModule extends XSModule {
val io = IO(new Bundle() {
val src1, src2 = Input(UInt(XLEN.W))
val func = Input(FuOpType())
val pred_taken, isBranch = Input(Bool())
val result = Output(UInt(XLEN.W))
val taken, mispredict = Output(Bool())
})
val (src1, src2, func) = (io.src1, io.src2, io.func)
val isAdderSub = (func =/= ALUOpType.add) && (func =/= ALUOpType.addw)
val addModule = Module(new AddModule)
......@@ -121,18 +110,12 @@ class Alu extends FunctionUnit with HasRedirectOut {
val rightShiftModule = Module(new RightShiftModule)
rightShiftModule.io.shamt := shamt
rightShiftModule.io.srlSrc := Cat(
Mux(isW, 0.U(32.W), src1(63, 32)),
src1(31, 0)
)
rightShiftModule.io.sraSrc := Cat(
Mux(isW, Fill(32, src1(31)), src1(63, 32)),
src1(31, 0)
)
rightShiftModule.io.srlSrc := src1
rightShiftModule.io.sraSrc := src1
val sll = leftShiftModule.io.sll
val srl = rightShiftModule.io.srl
val sra = rightShiftModule.io.sra
val srl = Mux(isW, rightShiftModule.io.srl_w, rightShiftModule.io.srl_l)
val sra = Mux(isW, rightShiftModule.io.sra_w, rightShiftModule.io.sra_l)
val miscResSel = Module(new MiscResultSelect)
miscResSel.io.func := func(3, 0)
......@@ -160,9 +143,32 @@ class Alu extends FunctionUnit with HasRedirectOut {
ALUOpType.getBranchType(ALUOpType.blt) -> slt,
ALUOpType.getBranchType(ALUOpType.bltu) -> sltu
)
val taken = LookupTree(ALUOpType.getBranchType(func), branchOpTable) ^ ALUOpType.isBranchInvert(func)
io.result := aluRes
io.taken := taken
io.mispredict := (io.pred_taken ^ taken) && io.isBranch
}
class Alu extends FunctionUnit with HasRedirectOut {
val (src1, src2, func, pc, uop) = (
io.in.bits.src(0),
io.in.bits.src(1),
io.in.bits.uop.ctrl.fuOpType,
SignExt(io.in.bits.uop.cf.pc, AddrBits),
io.in.bits.uop
)
val valid = io.in.valid
val isBranch = ALUOpType.isBranch(func)
val taken = LookupTree(ALUOpType.getBranchType(func), branchOpTable) ^ ALUOpType.isBranchInvert(func)
val dataModule = Module(new AluDataModule)
dataModule.io.src1 := src1
dataModule.io.src2 := src2
dataModule.io.func := func
dataModule.io.pred_taken := uop.cf.pred_taken
dataModule.io.isBranch := isBranch
redirectOutValid := io.out.valid && isBranch
redirectOut := DontCare
......@@ -170,12 +176,12 @@ class Alu extends FunctionUnit with HasRedirectOut {
redirectOut.roqIdx := uop.roqIdx
redirectOut.ftqIdx := uop.cf.ftqPtr
redirectOut.ftqOffset := uop.cf.ftqOffset
redirectOut.cfiUpdate.isMisPred := (uop.cf.pred_taken ^ taken) && isBranch
redirectOut.cfiUpdate.taken := taken
redirectOut.cfiUpdate.isMisPred := dataModule.io.mispredict
redirectOut.cfiUpdate.taken := dataModule.io.taken
redirectOut.cfiUpdate.predTaken := uop.cf.pred_taken
io.in.ready := io.out.ready
io.out.valid := valid
io.out.bits.uop <> io.in.bits.uop
io.out.bits.data := aluRes
io.out.bits.data := dataModule.io.result
}
......@@ -8,14 +8,6 @@ import xiangshan._
import xiangshan.backend._
import xiangshan.backend.fu.util._
object hartId extends (() => Int) {
var x = 0
def apply(): Int = {
x = x + 1
x-1
}
}
trait HasExceptionNO {
def instrAddrMisaligned = 0
def instrAccessFault = 1
......@@ -125,6 +117,7 @@ class PerfCounterIO extends XSBundle {
class CSR extends FunctionUnit with HasCSRConst
{
val csrio = IO(new Bundle {
val hartId = Input(UInt(64.W))
// output (for func === CSROpType.jmp)
val perf = new PerfCounterIO
val isPerfCnt = Output(Bool())
......@@ -255,8 +248,7 @@ class CSR extends FunctionUnit with HasCSRConst
val mvendorid = RegInit(UInt(XLEN.W), 0.U) // this is a non-commercial implementation
val marchid = RegInit(UInt(XLEN.W), 0.U) // return 0 to indicate the field is not implemented
val mimpid = RegInit(UInt(XLEN.W), 0.U) // provides a unique encoding of the version of the processor implementation
val mhartNo = hartId()
val mhartid = RegInit(UInt(XLEN.W), mhartNo.asUInt) // the hardware thread running the code
val mhartid = RegInit(UInt(XLEN.W), csrio.hartId) // the hardware thread running the code
val mstatus = RegInit(UInt(XLEN.W), 0.U)
// mstatus Value Table
......
......@@ -14,6 +14,34 @@ trait HasRedirectOut { this: RawModule =>
val redirectOut = IO(Output(new Redirect))
}
class JumpDataModule extends XSModule {
val io = IO(new Bundle() {
val src1 = Input(UInt(XLEN.W))
val pc = Input(UInt(XLEN.W)) // sign-ext to XLEN
val immMin = Input(UInt(ImmUnion.maxLen.W))
val func = Input(FuOpType())
val isRVC = Input(Bool())
val result, target = Output(UInt(XLEN.W))
val isAuipc = Output(Bool())
})
val (src1, pc, immMin, func, isRVC) = (io.src1, io.pc, io.immMin, io.func, io.isRVC)
val isJalr = JumpOpType.jumpOpisJalr(func)
val isAuipc = JumpOpType.jumpOpisAuipc(func)
val offset = SignExt(ParallelMux(Seq(
isJalr -> ImmUnion.I.toImm32(immMin),
isAuipc -> ImmUnion.U.toImm32(immMin),
!(isJalr || isAuipc) -> ImmUnion.J.toImm32(immMin)
)), XLEN)
val snpc = Mux(isRVC, pc + 2.U, pc + 4.U)
val target = src1 + offset // NOTE: src1 is (pc/rf(rs1)), src2 is (offset)
io.target := target
io.result := Mux(JumpOpType.jumpOpisAuipc(func), target, snpc)
io.isAuipc := isAuipc
}
class Jump extends FunctionUnit with HasRedirectOut {
val (src1, jalr_target, pc, immMin, func, uop) = (
......@@ -25,41 +53,33 @@ class Jump extends FunctionUnit with HasRedirectOut {
io.in.bits.uop
)
val isJalr = JumpOpType.jumpOpisJalr(func)
val isAuipc = JumpOpType.jumpOpisAuipc(func)
val offset = SignExt(ParallelMux(Seq(
isJalr -> ImmUnion.I.toImm32(immMin),
isAuipc -> ImmUnion.U.toImm32(immMin),
!(isJalr || isAuipc) -> ImmUnion.J.toImm32(immMin)
)), XLEN)
val redirectHit = uop.roqIdx.needFlush(io.redirectIn, io.flushIn)
val valid = io.in.valid
val isRVC = uop.cf.pd.isRVC
val snpc = Mux(isRVC, pc + 2.U, pc + 4.U)
val target = src1 + offset // NOTE: src1 is (pc/rf(rs1)), src2 is (offset)
redirectOutValid := valid && !isAuipc
val jumpDataModule = Module(new JumpDataModule)
jumpDataModule.io.src1 := src1
jumpDataModule.io.pc := pc
jumpDataModule.io.immMin := immMin
jumpDataModule.io.func := func
jumpDataModule.io.isRVC := isRVC
redirectOutValid := valid && !jumpDataModule.io.isAuipc
redirectOut := DontCare
redirectOut.cfiUpdate.target := target
redirectOut.cfiUpdate.target := jumpDataModule.io.target
redirectOut.level := RedirectLevel.flushAfter
redirectOut.roqIdx := uop.roqIdx
redirectOut.ftqIdx := uop.cf.ftqPtr
redirectOut.ftqOffset := uop.cf.ftqOffset
redirectOut.cfiUpdate.predTaken := true.B
redirectOut.cfiUpdate.taken := true.B
redirectOut.cfiUpdate.target := target
redirectOut.cfiUpdate.isMisPred := target =/= jalr_target || !uop.cf.pred_taken
// Output
val res = Mux(JumpOpType.jumpOpisAuipc(func), target, snpc)
redirectOut.cfiUpdate.target := jumpDataModule.io.target
redirectOut.cfiUpdate.isMisPred := jumpDataModule.io.target =/= jalr_target || !uop.cf.pred_taken
io.in.ready := io.out.ready
io.out.valid := valid
io.out.bits.uop <> io.in.bits.uop
io.out.bits.data := res
io.out.bits.data := jumpDataModule.io.result
// NOTE: the debug info is for one-cycle exec, if FMV needs multi-cycle, may needs change it
XSDebug(io.in.valid, "In(%d %d) Out(%d %d) Redirect:(%d %d %d)\n",
......@@ -71,5 +91,4 @@ class Jump extends FunctionUnit with HasRedirectOut {
io.redirectIn.bits.level,
redirectHit
)
XSDebug(io.in.valid, "src1:%x offset:%x func:%b type:JUMP pc:%x res:%x\n", src1, offset, func, pc, res)
}
......@@ -42,14 +42,15 @@ class NaiveMultiplier(len: Int, val latency: Int)
XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n")
}
class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len) with HasPipelineReg {
override def latency = doReg.size
class ArrayMulDataModule(len: Int, doReg: Seq[Int]) extends XSModule {
val io = IO(new Bundle() {
val a, b = Input(UInt(len.W))
val regEnables = Input(Vec(doReg.size, Bool()))
val result = Output(UInt((2 * len).W))
})
val (a, b) = (io.a, io.b)
val doRegSorted = doReg.sortWith(_ < _)
val (a, b) = (io.in.bits.src(0), io.in.bits.src(1))
val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len+1).W))
b_sext := SignExt(b, len+1)
bx2 := b_sext << 1
......@@ -149,7 +150,7 @@ class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len)
val needReg = doRegSorted.contains(depth)
val toNextLayer = if(needReg)
columns_next.map(_.map(PipelineReg(doRegSorted.indexOf(depth) + 1)(_)))
columns_next.map(_.map(x => RegEnable(x, io.regEnables(doRegSorted.indexOf(depth)))))
else
columns_next
......@@ -158,7 +159,18 @@ class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len)
}
val (sum, carry) = addAll(cols = columns, depth = 0)
val result = sum + carry
io.result := sum + carry
}
class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len) with HasPipelineReg {
override def latency = doReg.size
val mulDataModule = Module(new ArrayMulDataModule(len, doReg))
mulDataModule.io.a := io.in.bits.src(0)
mulDataModule.io.b := io.in.bits.src(1)
mulDataModule.io.regEnables := VecInit((1 to doReg.size) map (i => regEnable(i)))
val result = mulDataModule.io.result
var ctrlVec = Seq(ctrl)
for(i <- 1 to latency){
......
......@@ -4,120 +4,134 @@ import chisel3._
import chisel3.stage.{ChiselGeneratorAnnotation, ChiselStage}
import chisel3.util._
import utils.SignExt
import xiangshan.XSModule
import xiangshan.backend.fu.util.CSA3_2
/** A Radix-4 SRT Integer Divider
*
* 2 ~ (5 + (len+3)/2) cycles are needed for each division.
*/
class SRT4Divider(len: Int) extends AbstractDivider(len) {
class SRT4DividerDataModule(len: Int) extends Module {
val io = IO(new Bundle() {
val src1, src2 = Input(UInt(len.W))
val valid, sign, kill_w, kill_r, isHi, isW = Input(Bool())
val in_ready = Output(Bool())
val out_valid = Output(Bool())
val out_data = Output(UInt(len.W))
val out_ready = Input(Bool())
})
val (a, b, sign, valid, kill_w, kill_r, isHi, isW) =
(io.src1, io.src2, io.sign, io.valid, io.kill_w, io.kill_r, io.isHi, io.isW)
val in_fire = valid && io.in_ready
val out_fire = io.out_ready && io.out_valid
val s_idle :: s_lzd :: s_normlize :: s_recurrence :: s_recovery_1 :: s_recovery_2 :: s_finish :: Nil = Enum(7)
val state = RegInit(s_idle)
val newReq = (state === s_idle) && io.in.fire()
val cnt_next = Wire(UInt(log2Up((len+3)/2).W))
val cnt = RegEnable(cnt_next, state===s_normlize || state===s_recurrence)
val cnt_next = Wire(UInt(log2Up((len + 3) / 2).W))
val cnt = RegEnable(cnt_next, state === s_normlize || state === s_recurrence)
val rec_enough = cnt_next === 0.U
val newReq = in_fire
def abs(a: UInt, sign: Bool): (Bool, UInt) = {
val s = a(len - 1) && sign
(s, Mux(s, -a, a))
}
val (a, b) = (io.in.bits.src(0), io.in.bits.src(1))
val uop = io.in.bits.uop
val (aSign, aVal) = abs(a, sign)
val (bSign, bVal) = abs(b, sign)
val aSignReg = RegEnable(aSign, newReq)
val qSignReg = RegEnable(aSign ^ bSign, newReq)
val uopReg = RegEnable(uop, newReq)
val ctrlReg = RegEnable(ctrl, newReq)
val divZero = b === 0.U
val divZeroReg = RegEnable(divZero, newReq)
val kill = state=/=s_idle && uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)
switch(state){
is(s_idle){
when (io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)) {
switch(state) {
is(s_idle) {
when(in_fire && !kill_w) {
state := Mux(divZero, s_finish, s_lzd)
}
}
is(s_lzd){ // leading zero detection
is(s_lzd) { // leading zero detection
state := s_normlize
}
is(s_normlize){ // shift a/b
is(s_normlize) { // shift a/b
state := s_recurrence
}
is(s_recurrence){ // (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d
when(rec_enough){ state := s_recovery_1 }
is(s_recurrence) { // (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d
when(rec_enough) {
state := s_recovery_1
}
}
is(s_recovery_1){ // if rem < 0, rem = rem + d
is(s_recovery_1) { // if rem < 0, rem = rem + d
state := s_recovery_2
}
is(s_recovery_2){ // recovery shift
is(s_recovery_2) { // recovery shift
state := s_finish
}
is(s_finish){
when(io.out.fire()){ state := s_idle }
is(s_finish) {
when(out_fire) {
state := s_idle
}
}
}
when(kill){
when(kill_r) {
state := s_idle
}
/** Calculate abs(a)/abs(b) by recurrence
*
* ws, wc: partial remainder in carry-save form,
* in recurrence steps, ws/wc = 4ws[j]/4wc[j];
* in recovery step, ws/wc = ws[j]/wc[j];
* in final step, ws = abs(a)/abs(b).
* in recurrence steps, ws/wc = 4ws[j]/4wc[j];
* in recovery step, ws/wc = ws[j]/wc[j];
* in final step, ws = abs(a)/abs(b).
*
* d: normlized divisor(1/2<=d<1)
*
* wLen = 3 integer bits + (len+1) frac bits
*/
def wLen = 3 + len + 1
val ws, wc = Reg(UInt(wLen.W))
val ws_next, wc_next = Wire(UInt(wLen.W))
val d = Reg(UInt(wLen.W))
val aLeadingZeros = RegEnable(
next = PriorityEncoder(ws(len-1, 0).asBools().reverse),
enable = state===s_lzd
next = PriorityEncoder(ws(len - 1, 0).asBools().reverse),
enable = state === s_lzd
)
val bLeadingZeros = RegEnable(
next = PriorityEncoder(d(len-1, 0).asBools().reverse),
enable = state===s_lzd
next = PriorityEncoder(d(len - 1, 0).asBools().reverse),
enable = state === s_lzd
)
val diff = Cat(0.U(1.W), bLeadingZeros).asSInt() - Cat(0.U(1.W), aLeadingZeros).asSInt()
val isNegDiff = diff(diff.getWidth - 1)
val quotientBits = Mux(isNegDiff, 0.U, diff.asUInt())
val qBitsIsOdd = quotientBits(0)
val recoveryShift = RegEnable(len.U - bLeadingZeros, state===s_normlize)
val recoveryShift = RegEnable(len.U - bLeadingZeros, state === s_normlize)
val a_shifted, b_shifted = Wire(UInt(len.W))
a_shifted := Mux(isNegDiff,
ws(len-1, 0) << bLeadingZeros,
ws(len-1, 0) << aLeadingZeros
ws(len - 1, 0) << bLeadingZeros,
ws(len - 1, 0) << aLeadingZeros
)
b_shifted := d(len-1, 0) << bLeadingZeros
b_shifted := d(len - 1, 0) << bLeadingZeros
val rem_temp = ws + wc
val rem_fixed = Mux(rem_temp(wLen-1), rem_temp + d, rem_temp)
val rem_abs = (ws << recoveryShift)(2*len, len+1)
val rem_fixed = Mux(rem_temp(wLen - 1), rem_temp + d, rem_temp)
val rem_abs = (ws << recoveryShift) (2 * len, len + 1)
when(newReq){
when(newReq) {
ws := Cat(0.U(4.W), Mux(divZero, a, aVal))
wc := 0.U
d := Cat(0.U(4.W), bVal)
}.elsewhen(state === s_normlize){
}.elsewhen(state === s_normlize) {
d := Cat(0.U(3.W), b_shifted, 0.U(1.W))
ws := Mux(qBitsIsOdd, a_shifted, a_shifted << 1)
}.elsewhen(state === s_recurrence){
}.elsewhen(state === s_recurrence) {
ws := Mux(rec_enough, ws_next, ws_next << 2)
wc := Mux(rec_enough, wc_next, wc_next << 2)
}.elsewhen(state === s_recovery_1){
}.elsewhen(state === s_recovery_1) {
ws := rem_fixed
}.elsewhen(state === s_recovery_2){
}.elsewhen(state === s_recovery_2) {
ws := rem_abs
}
......@@ -140,8 +154,8 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
sel_dx2 -> 2.U(2.W)
))
val w_truncated = (ws(wLen-1, wLen-1-6) + wc(wLen-1, wLen-1-6)).asSInt()
val d_truncated = d(len-1, len-3)
val w_truncated = (ws(wLen - 1, wLen - 1 - 6) + wc(wLen - 1, wLen - 1 - 6)).asSInt()
val d_truncated = d(len - 1, len - 3)
val qSelTable = Array(
Array(12, 4, -4, -13),
......@@ -156,9 +170,9 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
// ge(x): w_truncated >= x
var ge = Map[Int, Bool]()
for(row <- qSelTable){
for(k <- row){
if(!ge.contains(k)) ge = ge + (k -> (w_truncated >= k.S(7.W)))
for (row <- qSelTable) {
for (k <- row) {
if (!ge.contains(k)) ge = ge + (k -> (w_truncated >= k.S(7.W)))
}
}
q_sel := MuxLookup(d_truncated, sel_0,
......@@ -169,7 +183,7 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
ge(x(2)) -> sel_0,
ge(x(3)) -> sel_neg_d
))
).zipWithIndex.map({case(v, i) => i.U -> v})
).zipWithIndex.map({ case (v, i) => i.U -> v })
)
/** Calculate (ws[j+1],wc[j+1]) by a [3-2]carry-save adder
......@@ -178,7 +192,7 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
*/
val csa = Module(new CSA3_2(wLen))
csa.io.in(0) := ws
csa.io.in(1) := Cat(wc(wLen-1, 2), wc_adj)
csa.io.in(1) := Cat(wc(wLen - 1, 2), wc_adj)
csa.io.in(2) := MuxLookup(q_sel, 0.U, Seq(
sel_d -> neg_d,
sel_dx2 -> neg_dx2,
......@@ -190,10 +204,10 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
// On the fly quotient conversion
val q, qm = Reg(UInt(len.W))
when(newReq){
when(newReq) {
q := 0.U
qm := 0.U
}.elsewhen(state === s_recurrence){
}.elsewhen(state === s_recurrence) {
val qMap = Seq(
sel_0 -> (q, 0),
sel_d -> (q, 1),
......@@ -202,7 +216,7 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
sel_neg_dx2 -> (qm, 2)
)
q := MuxLookup(q_sel, 0.U,
qMap.map(m => m._1 -> Cat(m._2._1(len-3, 0), m._2._2.U(2.W)))
qMap.map(m => m._1 -> Cat(m._2._1(len - 3, 0), m._2._2.U(2.W)))
)
val qmMap = Seq(
sel_0 -> (qm, 3),
......@@ -212,27 +226,53 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
sel_neg_dx2 -> (qm, 1)
)
qm := MuxLookup(q_sel, 0.U,
qmMap.map(m => m._1 -> Cat(m._2._1(len-3, 0), m._2._2.U(2.W)))
qmMap.map(m => m._1 -> Cat(m._2._1(len - 3, 0), m._2._2.U(2.W)))
)
}.elsewhen(state === s_recovery_1){
q := Mux(rem_temp(wLen-1), qm, q)
}.elsewhen(state === s_recovery_1) {
q := Mux(rem_temp(wLen - 1), qm, q)
}
val remainder = Mux(aSignReg, -ws(len-1, 0), ws(len-1, 0))
val remainder = Mux(aSignReg, -ws(len - 1, 0), ws(len - 1, 0))
val quotient = Mux(qSignReg, -q, q)
val res = Mux(ctrlReg.isHi,
Mux(divZeroReg, ws(len-1, 0), remainder),
val res = Mux(isHi,
Mux(divZeroReg, ws(len - 1, 0), remainder),
Mux(divZeroReg, Fill(len, 1.U(1.W)), quotient)
)
io.in.ready := state===s_idle
io.out.valid := state===s_finish
io.out.bits.data := Mux(ctrlReg.isW,
io.out_data := Mux(isW,
SignExt(res(31, 0), len),
res
)
io.out.bits.uop := uopReg
io.in_ready := state === s_idle
io.out_valid := state === s_finish
}
class SRT4Divider(len: Int) extends AbstractDivider(len) {
val newReq = io.in.fire()
val uop = io.in.bits.uop
val uopReg = RegEnable(uop, newReq)
val ctrlReg = RegEnable(ctrl, newReq)
val divDataModule = Module(new SRT4DividerDataModule(len))
val kill_w = uop.roqIdx.needFlush(io.redirectIn, io.flushIn)
val kill_r = !divDataModule.io.in_ready && uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)
divDataModule.io.src1 := io.in.bits.src(0)
divDataModule.io.src2 := io.in.bits.src(1)
divDataModule.io.valid := io.in.valid
divDataModule.io.sign := sign
divDataModule.io.kill_w := kill_w
divDataModule.io.kill_r := kill_r
divDataModule.io.isHi := ctrlReg.isHi
divDataModule.io.isW := ctrlReg.isW
divDataModule.io.out_ready := io.out.ready
io.in.ready := divDataModule.io.in_ready
io.out.valid := divDataModule.io.out_valid
io.out.bits.data := divDataModule.io.out_data
io.out.bits.uop := uopReg
}
......@@ -5,7 +5,15 @@ import chisel3.util._
import freechips.rocketchip.tile.FType
import hardfloat.{DivSqrtRecFNToRaw_small, RoundAnyRawFNToRecFN}
class FDivSqrt extends FPUSubModule {
class FDivSqrtDataModule extends FPUDataModule {
val in_valid, out_ready = IO(Input(Bool()))
val in_ready, out_valid = IO(Output(Bool()))
val kill_w = IO(Input(Bool()))
val kill_r = IO(Input(Bool()))
val in_fire = in_valid && in_ready
val out_fire = out_valid && out_ready
val killReg = RegInit(false.B)
val s_idle :: s_div :: s_finish :: Nil = Enum(3)
val state = RegInit(s_idle)
......@@ -13,48 +21,42 @@ class FDivSqrt extends FPUSubModule {
val divSqrt = Module(new DivSqrtRecFNToRaw_small(FType.D.exp, FType.D.sig, 0))
val divSqrtRawValid = divSqrt.io.rawOutValid_sqrt || divSqrt.io.rawOutValid_div
val fpCtrl = io.in.bits.uop.ctrl.fpu
val fpCtrl = io.in.fpCtrl
val tag = fpCtrl.typeTagIn
val uopReg = RegEnable(io.in.bits.uop, io.in.fire())
val single = RegEnable(tag === S, io.in.fire())
val rmReg = RegEnable(rm, io.in.fire())
val kill = uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)
val killReg = RegInit(false.B)
val single = RegEnable(tag === S, in_fire)
val rmReg = RegEnable(rm, in_fire)
switch(state){
is(s_idle){
when(io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)){ state := s_div }
when(in_fire && !kill_w){ state := s_div }
}
is(s_div){
when(divSqrtRawValid){
when(kill || killReg){
when(kill_r || killReg){
state := s_idle
killReg := false.B
}.otherwise({
state := s_finish
})
}.elsewhen(kill){
}.elsewhen(kill_r){
killReg := true.B
}
}
is(s_finish){
when(io.out.fire() || kill){
when(out_fire || kill_r){
state := s_idle
}
}
}
val src1 = unbox(io.in.bits.src(0), tag, None)
val src2 = unbox(io.in.bits.src(1), tag, None)
divSqrt.io.inValid := io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)
val src1 = unbox(io.in.src(0), tag, None)
val src2 = unbox(io.in.src(1), tag, None)
divSqrt.io.inValid := in_fire && !kill_w
divSqrt.io.sqrtOp := fpCtrl.sqrt
divSqrt.io.a := src1
divSqrt.io.b := src2
divSqrt.io.roundingMode := rm
val round32 = Module(new RoundAnyRawFNToRecFN(
FType.D.exp, FType.D.sig+2, FType.S.exp, FType.S.sig, 0
))
......@@ -73,9 +75,25 @@ class FDivSqrt extends FPUSubModule {
val data = Mux(single, round32.io.out, round64.io.out)
val flags = Mux(single, round32.io.exceptionFlags, round64.io.exceptionFlags)
io.in.ready := state===s_idle
io.out.valid := state===s_finish && !killReg
io.out.bits.uop := uopReg
io.out.bits.data := RegNext(data, divSqrtRawValid)
in_ready := state===s_idle
out_valid := state===s_finish && !killReg
io.out.data := RegNext(data, divSqrtRawValid)
fflags := RegNext(flags, divSqrtRawValid)
}
class FDivSqrt extends FPUSubModule {
val uopReg = RegEnable(io.in.bits.uop, io.in.fire())
val kill_r = uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)
override val dataModule = Module(new FDivSqrtDataModule)
connectDataModule
dataModule.in_valid := io.in.valid
dataModule.out_ready := io.out.ready
dataModule.kill_w := io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)
dataModule.kill_r := kill_r
io.in.ready := dataModule.in_ready
io.out.valid := dataModule.out_valid
io.out.bits.uop := uopReg
}
package xiangshan.backend.fu.fpu
import chisel3._
import chisel3.util.RegEnable
import freechips.rocketchip.tile.FType
import hardfloat.{MulAddRecFN_pipeline_stage1, MulAddRecFN_pipeline_stage2, MulAddRecFN_pipeline_stage3, MulAddRecFN_pipeline_stage4, RoundAnyRawFNToRecFN}
import xiangshan.backend.fu.FunctionUnit
class FMA extends FPUPipelineModule {
override def latency: Int = FunctionUnit.fmacCfg.latency.latencyVal.get
class FMADataModule(latency: Int) extends FPUDataModule {
val fpCtrl = io.in.bits.uop.ctrl.fpu
val regEnables = IO(Input(Vec(latency, Bool())))
val typeTagOut = IO(Input(UInt(2.W)))
val fpCtrl = io.in.fpCtrl
val typeTagIn = fpCtrl.typeTagIn
val src1 = unbox(io.in.bits.src(0), typeTagIn, None)
val src2 = unbox(io.in.bits.src(1), typeTagIn, None)
val src3 = unbox(io.in.bits.src(2), typeTagIn, None)
val src1 = unbox(io.in.src(0), typeTagIn, None)
val src2 = unbox(io.in.src(1), typeTagIn, None)
val src3 = unbox(io.in.src(2), typeTagIn, None)
val (in1, in2, in3) = (
WireInit(src1), WireInit(src2), WireInit(Mux(fpCtrl.isAddSub, src2, src3))
)
......@@ -34,7 +37,7 @@ class FMA extends FPUPipelineModule {
))
mul.io.a := stage1.io.mulAddA
mul.io.b := stage1.io.mulAddB
mul.io.reg_en := regEnable(1)
mul.io.reg_en := regEnables(0)
stage2.io.mulSum := mul.io.sum
stage2.io.mulCarry := mul.io.carry
......@@ -54,10 +57,10 @@ class FMA extends FPUPipelineModule {
stage1.io.in.bits.roundingMode := rm
stage1.io.in.bits.detectTininess := hardfloat.consts.tininess_afterRounding
stage2.io.fromStage1.bits <> S1Reg(stage1.io.toStage2.bits)
stage3.io.fromStage2.bits <> S2Reg(stage2.io.toStage3.bits)
stage4.io.fromStage3.bits <> S3Reg(stage3.io.toStage4.bits)
val stage4toStage5 = S4Reg(stage4.io.toStage5.bits)
stage2.io.fromStage1.bits <> RegEnable(stage1.io.toStage2.bits, regEnables(0))
stage3.io.fromStage2.bits <> RegEnable(stage2.io.toStage3.bits, regEnables(1))
stage4.io.fromStage3.bits <> RegEnable(stage3.io.toStage4.bits, regEnables(2))
val stage4toStage5 = RegEnable(stage4.io.toStage5.bits, regEnables(3))
val rounders = Seq(FType.S, FType.D).map(t => {
val rounder = Module(new RoundAnyRawFNToRecFN(FType.D.exp, FType.D.sig+2, t.exp, t.sig, 0))
......@@ -69,8 +72,8 @@ class FMA extends FPUPipelineModule {
rounder
})
val singleOut = io.out.bits.uop.ctrl.fpu.typeTagOut === S
io.out.bits.data := Mux(singleOut,
val singleOut = typeTagOut === S
io.out.data := Mux(singleOut,
sanitizeNaN(rounders(0).io.out, FType.S),
sanitizeNaN(rounders(1).io.out, FType.D)
)
......@@ -79,3 +82,12 @@ class FMA extends FPUPipelineModule {
rounders(1).io.exceptionFlags
)
}
class FMA extends FPUPipelineModule {
override def latency: Int = FunctionUnit.fmacCfg.latency.latencyVal.get
override val dataModule = Module(new FMADataModule(latency))
connectDataModule
dataModule.regEnables <> VecInit((1 to latency) map (i => regEnable(i)))
dataModule.typeTagOut := io.out.bits.uop.ctrl.fpu.typeTagOut
}
......@@ -8,18 +8,18 @@ import chisel3.util._
import hardfloat.CompareRecFN
import xiangshan.backend.fu.FunctionUnit
class FPToFP extends FPUPipelineModule{
class FPToFPDataModule(latency: Int) extends FPUDataModule {
override def latency: Int = FunctionUnit.f2iCfg.latency.latencyVal.get
val regEnables = IO(Input(Vec(latency, Bool())))
val ctrlIn = io.in.bits.uop.ctrl.fpu
val ctrl = S1Reg(ctrlIn)
val ctrlIn = io.in.fpCtrl
val ctrl = RegEnable(ctrlIn, regEnables(0))
val inTag = ctrl.typeTagIn
val outTag = ctrl.typeTagOut
val wflags = ctrl.wflags
val src1 = S1Reg(unbox(io.in.bits.src(0), ctrlIn.typeTagIn, None))
val src2 = S1Reg(unbox(io.in.bits.src(1), ctrlIn.typeTagIn, None))
val rmReg = S1Reg(rm)
val src1 = RegEnable(unbox(io.in.src(0), ctrlIn.typeTagIn, None), regEnables(0))
val src2 = RegEnable(unbox(io.in.src(1), ctrlIn.typeTagIn, None), regEnables(0))
val rmReg = RegEnable(rm, regEnables(0))
val signNum = Mux(rmReg(1), src1 ^ src2, Mux(rmReg(0), ~src2, src2))
val fsgnj = Cat(signNum(fLen), src1(fLen-1, 0))
......@@ -79,6 +79,15 @@ class FPToFP extends FPUPipelineModule{
}
}
io.out.bits.data := S2Reg(mux.data)
fflags := S2Reg(mux.exc)
io.out.data := RegEnable(mux.data, regEnables(1))
fflags := RegEnable(mux.exc, regEnables(1))
}
class FPToFP extends FPUPipelineModule{
override def latency: Int = FunctionUnit.f2iCfg.latency.latencyVal.get
override val dataModule = Module(new FPToFPDataModule(latency))
connectDataModule
dataModule.regEnables <> VecInit((1 to latency) map (i => regEnable(i)))
}
......@@ -10,19 +10,18 @@ import hardfloat.RecFNToIN
import utils.SignExt
import xiangshan.backend.fu.FunctionUnit
class FPToInt extends FPUPipelineModule {
override def latency = FunctionUnit.f2iCfg.latency.latencyVal.get
val (src1, src2) = (io.in.bits.src(0), io.in.bits.src(1))
class FPToIntDataModule(latency: Int) extends FPUDataModule {
val regEnables = IO(Input(Vec(latency, Bool())))
val (src1, src2) = (io.in.src(0), io.in.src(1))
val ctrl = io.in.bits.uop.ctrl.fpu
val ctrl = io.in.fpCtrl
// stage 1: unbox inputs
val src1_d = S1Reg(unbox(src1, ctrl.typeTagIn, None))
val src2_d = S1Reg(unbox(src2, ctrl.typeTagIn, None))
val ctrl_reg = S1Reg(ctrl)
val rm_reg = S1Reg(rm)
val src1_d = RegEnable(unbox(src1, ctrl.typeTagIn, None), regEnables(0))
val src2_d = RegEnable(unbox(src2, ctrl.typeTagIn, None), regEnables(0))
val ctrl_reg = RegEnable(ctrl, regEnables(0))
val rm_reg = RegEnable(rm, regEnables(0))
// stage2
......@@ -79,13 +78,22 @@ class FPToInt extends FPUPipelineModule {
Mux(rm_reg(0), classify_out, move_out)
)
val doubleOut = Mux(ctrl_reg.fcvt, ctrl_reg.typ(1), ctrl_reg.fmt(0))
val intValue = S2Reg(Mux(doubleOut,
val intValue = RegEnable(Mux(doubleOut,
SignExt(intData, XLEN),
SignExt(intData(31, 0), XLEN)
))
), regEnables(1))
val exc = S2Reg(Mux(ctrl_reg.fcvt, conv_exc, dcmp_exc))
val exc = RegEnable(Mux(ctrl_reg.fcvt, conv_exc, dcmp_exc), regEnables(1))
io.out.bits.data := intValue
io.out.data := intValue
fflags := exc
}
class FPToInt extends FPUPipelineModule {
override def latency = FunctionUnit.f2iCfg.latency.latencyVal.get
override val dataModule = Module(new FPToIntDataModule(latency))
connectDataModule
dataModule.regEnables <> VecInit((1 to latency) map (i => regEnable(i)))
}
......@@ -2,6 +2,7 @@ package xiangshan.backend.fu.fpu
import chisel3._
import chisel3.util._
import xiangshan.{FPUCtrlSignals, XSModule}
import xiangshan.backend.fu.{FuConfig, FunctionUnit, HasPipelineReg}
trait HasUIntToSIntHelper {
......@@ -10,11 +11,36 @@ trait HasUIntToSIntHelper {
}
}
abstract class FPUDataModule extends XSModule {
val io = IO(new Bundle() {
val in = Input(new Bundle() {
val src = Vec(3, UInt(65.W))
val fpCtrl = new FPUCtrlSignals
val rm = UInt(3.W)
})
val out = Output(new Bundle() {
val data = UInt(65.W)
val fflags = UInt(5.W)
})
})
val rm = io.in.rm
val fflags = io.out.fflags
}
abstract class FPUSubModule extends FunctionUnit(len = 65)
with HasUIntToSIntHelper
{
val rm = IO(Input(UInt(3.W)))
val fflags = IO(Output(UInt(5.W)))
val dataModule: FPUDataModule
def connectDataModule = {
dataModule.io.in.src <> io.in.bits.src
dataModule.io.in.fpCtrl <> io.in.bits.uop.ctrl.fpu
dataModule.io.in.rm <> rm
io.out.bits.data := dataModule.io.out.data
fflags := dataModule.io.out.fflags
}
}
abstract class FPUPipelineModule
......
......@@ -8,41 +8,50 @@ import chisel3.util._
import hardfloat.INToRecFN
import utils.{SignExt, ZeroExt}
class IntToFP extends FPUSubModule {
class IntToFPDataModule extends FPUDataModule {
val in_valid, out_ready = IO(Input(Bool()))
val in_ready, out_valid = IO(Output(Bool()))
val kill_w, kill_r = IO(Input(Bool()))
val s_idle :: s_cvt :: s_finish :: Nil = Enum(3)
val s_idle :: s_cvt :: s_ieee :: s_finish :: Nil = Enum(4)
val state = RegInit(s_idle)
io.in.ready := state === s_idle
io.out.valid := state === s_finish
val src1 = RegEnable(io.in.bits.src(0)(XLEN-1, 0), io.in.fire())
val uopReg = RegEnable(io.in.bits.uop, io.in.fire())
val rmReg = RegEnable(rm, io.in.fire())
val in_fire = in_valid && in_ready
val out_fire = out_valid && out_ready
in_ready := state === s_idle
out_valid := state === s_finish
val src1 = RegEnable(io.in.src(0)(XLEN-1, 0), in_fire)
val rmReg = RegEnable(rm, in_fire)
val ctrl = RegEnable(io.in.fpCtrl, in_fire)
switch(state){
is(s_idle){
when(io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)){
when(in_fire && !kill_w){
state := s_cvt
}
}
is(s_cvt){
state := s_ieee
}
is(s_ieee){
state := s_finish
}
is(s_finish){
when(io.out.fire()){
when(out_fire){
state := s_idle
}
}
}
when(state =/= s_idle && uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)){
when(state =/= s_idle && kill_r){
state := s_idle
}
/*
s_cvt
*/
val ctrl = uopReg.ctrl.fpu
val tag = ctrl.typeTagIn
val typ = ctrl.typ
val wflags = ctrl.wflags
......@@ -73,9 +82,26 @@ class IntToFP extends FPUSubModule {
mux.exc := VecInit(exc)(tag)
}
val muxReg = RegEnable(mux, enable = state === s_cvt)
val muxReg = Reg(mux.cloneType)
when(state === s_cvt){
muxReg := mux
}.elsewhen(state === s_ieee){
muxReg.data := ieee(box(muxReg.data, ctrl.typeTagOut))
}
fflags := muxReg.exc
io.out.data := muxReg.data
}
class IntToFP extends FPUSubModule {
override val dataModule = Module(new IntToFPDataModule)
dataModule.in_valid := io.in.valid
dataModule.out_ready := io.out.ready
connectDataModule
val uopReg = RegEnable(io.in.bits.uop, io.in.fire())
dataModule.kill_w := io.in.bits.uop.roqIdx.needFlush(io.redirectIn, io.flushIn)
dataModule.kill_r := uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn)
io.in.ready := dataModule.in_ready
io.out.valid := dataModule.out_valid
io.out.bits.uop := uopReg
io.out.bits.data := box(muxReg.data, ctrl.typeTagOut)
}
......@@ -100,6 +100,7 @@ class ReservationStation
val fromDispatch = Flipped(DecoupledIO(new MicroOp))
val deq = DecoupledIO(new ExuInput)
val srcRegValue = Input(Vec(srcNum, UInt(srcLen.W)))
val fpRegValue = if (exuCfg == Exu.stExeUnitCfg) Input(UInt(srcLen.W)) else null
val jumpPc = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
val jalr_target = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
......@@ -130,7 +131,7 @@ class ReservationStation
select.io.memfeedback := io.memfeedback
}
ctrl.io.in.valid := select.io.enq.fire() && !(io.redirect.valid || io.flush) // NOTE: same as select
ctrl.io.in.valid := select.io.enq.fire()// && !(io.redirect.valid || io.flush) // NOTE: same as select
ctrl.io.flush := io.flush
ctrl.io.in.bits.addr := select.io.enq.bits
ctrl.io.in.bits.uop := io.fromDispatch.bits
......@@ -155,6 +156,9 @@ class ReservationStation
data.io.jumpPc := io.jumpPc
data.io.jalr_target := io.jalr_target
}
if (exuCfg == Exu.stExeUnitCfg) {
data.io.fpRegValue := io.fpRegValue
}
data.io.sel := select.io.deq.bits
data.io.listen.wen := ctrl.io.listen
for (i <- 0 until fastPortsCnt) {
......@@ -345,7 +349,8 @@ class ReservationStationSelect
val enqueue = io.enq.fire() && !(io.redirect.valid || io.flush)
val tailInc = tailPtr + 1.U
val tailDec = tailPtr - 1.U
tailPtr := Mux(dequeue === enqueue, tailPtr, Mux(dequeue, tailDec, tailInc))
val nextTailPtr = Mux(dequeue === enqueue, tailPtr, Mux(dequeue, tailDec, tailInc))
tailPtr := nextTailPtr
val enqPtr = Mux(tailPtr.flag, deqPtr, tailPtr.value)
val enqIdx = indexQueue(enqPtr)
......@@ -362,7 +367,7 @@ class ReservationStationSelect
io.deq.valid := selectValid
io.deq.bits := selectIndex
io.numExist := Mux(tailPtr.flag, (iqSize-1).U, tailPtr.value)
io.numExist := RegNext(Mux(nextTailPtr.flag, (iqSize-1).U, nextTailPtr.value))
assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B)))
}
......@@ -450,6 +455,15 @@ class ReservationStationCtrl
when (enqEn) {
srcQueue(enqPtr).zip(enqSrcReady).map{ case (s, e) => s := e }
}
// NOTE: delay one cycle for fp src will come one cycle later than usual
if (exuCfg == Exu.stExeUnitCfg) {
when (enqEn) {
when (enqUop.ctrl.src2Type === SrcType.fp) { srcQueue(enqPtr)(1) := false.B }
}
when (enqEnReg && RegNext(enqUop.ctrl.src2Type === SrcType.fp && enqSrcReady(1))) {
srcQueue(enqPtrReg)(1) := true.B
}
}
for (i <- 0 until iqSize) {
for (j <- 0 until srcNum) {
when (srcUpdate(i)(j)) { srcQueue(i)(j) := true.B }
......@@ -591,18 +605,18 @@ class ReservationStationCtrl
}
}
class RSDataSingleSrc(srcLen: Int, numEntries: Int, numListen: Int) extends XSModule {
class RSDataSingleSrc(srcLen: Int, numEntries: Int, numListen: Int, writePort: Int = 1) extends XSModule {
val io = IO(new Bundle {
val r = new Bundle {
// val valid = Bool() // NOTE: if read valid is necessary, but now it is not completed
val addr = Input(UInt(log2Up(numEntries).W))
val rdata = Output(UInt(srcLen.W))
}
val w = Input(new Bundle {
val w = Input(Vec(writePort, new Bundle {
val wen = Bool()
val addr = UInt(log2Up(numEntries).W)
val wdata = Input(UInt(srcLen.W))
})
val wdata = UInt(srcLen.W)
}))
val listen = Input(new Bundle {
val wdata = Vec(numListen, UInt(srcLen.W))
val wen = Vec(numEntries, Vec(numListen, Bool()))
......@@ -611,9 +625,14 @@ class RSDataSingleSrc(srcLen: Int, numEntries: Int, numListen: Int) extends XSMo
val value = Reg(Vec(numEntries, UInt(srcLen.W)))
val wMask = Mux(io.w.wen, UIntToOH(io.w.addr)(numEntries-1, 0), 0.U(numEntries.W))
val data = io.listen.wdata :+ io.w.wdata
val wen = io.listen.wen.zip(wMask.asBools).map{ case (w, m) => w :+ m }
val wMaskT = io.w.map(w => Mux(w.wen, UIntToOH(w.addr)(numEntries-1, 0), 0.U(numEntries.W)))
val wMask = (0 until numEntries).map(i =>
(0 until writePort).map(j =>
wMaskT(j)(i)
))
val wData = io.w.map(w => w.wdata)
val data = io.listen.wdata ++ io.w.map(_.wdata)
val wen = io.listen.wen.zip(wMask).map{ case (w, m) => w ++ m }
for (i <- 0 until numEntries) {
when (Cat(wen(i)).orR) {
value(i) := ParallelMux(wen(i) zip data)
......@@ -640,8 +659,10 @@ class ReservationStationData
val srcNum = if (exuCfg == Exu.jumpExeUnitCfg) 2 else max(exuCfg.intSrcCnt, exuCfg.fpSrcCnt)
require(nonBlocked==fastWakeup)
val io = IO(new XSBundle {
val srcRegValue = Vec(srcNum, Input(UInt(srcLen.W)))
val fpRegValue = if (exuCfg == Exu.stExeUnitCfg) Input(UInt(srcLen.W)) else null
val jumpPc = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
val jalr_target = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
val in = Input(new Bundle {
......@@ -665,25 +686,35 @@ class ReservationStationData
// Data : single read, multi write
// ------------------------
val data = (0 until srcNum).map{i =>
val d = Module(new RSDataSingleSrc(srcLen, iqSize, fastPortsCnt + slowPortsCnt))
d.suggestName(s"${this.name}_data${i}")
d.io
val data = if (exuCfg == Exu.stExeUnitCfg) {
val srcBase = Module(new RSDataSingleSrc(srcLen, iqSize, fastPortsCnt + slowPortsCnt, 1))
val srcData = Module(new RSDataSingleSrc(srcLen, iqSize, fastPortsCnt + slowPortsCnt, 2))
srcBase.suggestName(s"${this.name}_data0")
srcData.suggestName(s"${this.name}_data1")
Seq(srcBase.io, srcData.io)
} else {
(0 until srcNum).map{i =>
val d = Module(new RSDataSingleSrc(srcLen, iqSize, fastPortsCnt + slowPortsCnt, 1))
d.suggestName(s"${this.name}_data${i}")
d.io
}
}
(0 until srcNum).foreach{ i =>
data(i).listen.wen := io.listen.wen(i)
data(i).listen.wdata := io.listen.wdata
}
data.map(_.w.addr := RegEnable(io.in.addr, io.in.valid))
data.zip(io.in.enqSrcReady).map{ case (src, ready) => src.w.wen := RegNext(ready && io.in.valid) }
val addrReg = RegEnable(io.in.addr, io.in.valid)
val enqSrcReadyReg = io.in.enqSrcReady.map(r => RegNext(r && io.in.valid))
data.map(_.w(0).addr := addrReg)
data.zip(enqSrcReadyReg).map{ case (src, ready) => src.w(0).wen := ready }
val pcMem = if(exuCfg == Exu.jumpExeUnitCfg)
Some(Module(new SyncDataModuleTemplate(UInt(VAddrBits.W), iqSize, numRead = 1, numWrite = 1))) else None
if(pcMem.nonEmpty){
pcMem.get.io.wen(0) := RegNext(io.in.valid)
pcMem.get.io.waddr(0) := RegNext(io.in.addr)
pcMem.get.io.waddr(0) := addrReg
pcMem.get.io.wdata(0) := io.jumpPc
}
......@@ -694,15 +725,15 @@ class ReservationStationData
io.srcRegValue(0)
)
// data.io.w.bits.data(0) := src1Mux
data(0).w.wdata := src1Mux
data(1).w.wdata := io.jalr_target
data(0).w(0).wdata := src1Mux
data(1).w(0).wdata := io.jalr_target
case Exu.aluExeUnitCfg =>
val src1Mux = Mux(enqUopReg.ctrl.src1Type === SrcType.pc,
SignExt(enqUopReg.cf.pc, XLEN),
io.srcRegValue(0)
)
data(0).w.wdata := src1Mux
data(0).w(0).wdata := src1Mux
// alu only need U type and I type imm
val imm32 = Mux(enqUopReg.ctrl.selImm === SelImm.IMM_U,
ImmUnion.U.toImm32(enqUopReg.ctrl.imm),
......@@ -712,9 +743,17 @@ class ReservationStationData
val src2Mux = Mux(enqUopReg.ctrl.src2Type === SrcType.imm,
imm64, io.srcRegValue(1)
)
data(1).w.wdata := src2Mux
data(1).w(0).wdata := src2Mux
case Exu.stExeUnitCfg =>
(0 until srcNum).foreach(i => data(i).w(0).wdata := io.srcRegValue(i) )
data(1).w(1).wdata := io.fpRegValue
data(1).w(1).addr := RegNext(addrReg)
data(1).w(1).wen := RegNext(enqSrcReadyReg(1) && enqUopReg.ctrl.src2Type === SrcType.fp)
data(1).w(0).wen := enqSrcReadyReg(1) && enqUopReg.ctrl.src2Type =/= SrcType.fp
case _ =>
(0 until srcNum).foreach(i => data(i).w.wdata := io.srcRegValue(i) )
(0 until srcNum).foreach(i => data(i).w(0).wdata := io.srcRegValue(i) )
}
// deq
data.map(_.r.addr := io.sel)
......
......@@ -30,7 +30,7 @@ trait HasLoadHelper { this: XSModule =>
LookupTree(uop.ctrl.fuOpType, List(
LSUOpType.lb -> SignExt(rdata(7, 0) , XLEN),
LSUOpType.lh -> SignExt(rdata(15, 0), XLEN),
LSUOpType.lw -> Mux(fpWen, rdata, SignExt(rdata(31, 0), XLEN)),
LSUOpType.lw -> Mux(fpWen, Cat(Fill(32, 1.U(1.W)), rdata(31, 0)), SignExt(rdata(31, 0), XLEN)),
LSUOpType.ld -> Mux(fpWen, rdata, SignExt(rdata(63, 0), XLEN)),
LSUOpType.lbu -> ZeroExt(rdata(7, 0) , XLEN),
LSUOpType.lhu -> ZeroExt(rdata(15, 0), XLEN),
......
......@@ -241,7 +241,6 @@ class LoadUnit extends XSModule with HasLoadHelper {
val io = IO(new Bundle() {
val ldin = Flipped(Decoupled(new ExuInput))
val ldout = Decoupled(new ExuOutput)
val fpout = Decoupled(new ExuOutput)
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
val tlbFeedback = ValidIO(new TlbFeedback)
......@@ -304,53 +303,27 @@ class LoadUnit extends XSModule with HasLoadHelper {
// write to rob and writeback bus
val s2_wb_valid = load_s2.io.out.valid && !load_s2.io.out.bits.miss
val refillFpLoad = io.lsq.ldout.bits.uop.ctrl.fpWen
// Int load, if hit, will be writebacked at s2
val intHitLoadOut = Wire(Valid(new ExuOutput))
intHitLoadOut.valid := s2_wb_valid && !load_s2.io.out.bits.uop.ctrl.fpWen
intHitLoadOut.bits.uop := load_s2.io.out.bits.uop
intHitLoadOut.bits.data := load_s2.io.out.bits.data
intHitLoadOut.bits.redirectValid := false.B
intHitLoadOut.bits.redirect := DontCare
intHitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio
intHitLoadOut.bits.debug.isPerfCnt := false.B
intHitLoadOut.bits.debug.paddr := load_s2.io.out.bits.paddr
intHitLoadOut.bits.fflags := DontCare
val hitLoadOut = Wire(Valid(new ExuOutput))
hitLoadOut.valid := s2_wb_valid
hitLoadOut.bits.uop := load_s2.io.out.bits.uop
hitLoadOut.bits.data := load_s2.io.out.bits.data
hitLoadOut.bits.redirectValid := false.B
hitLoadOut.bits.redirect := DontCare
hitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio
hitLoadOut.bits.debug.isPerfCnt := false.B
hitLoadOut.bits.debug.paddr := load_s2.io.out.bits.paddr
hitLoadOut.bits.fflags := DontCare
load_s2.io.out.ready := true.B
io.ldout.bits := Mux(intHitLoadOut.valid, intHitLoadOut.bits, io.lsq.ldout.bits)
io.ldout.valid := intHitLoadOut.valid || io.lsq.ldout.valid && !refillFpLoad
io.ldout.bits := Mux(hitLoadOut.valid, hitLoadOut.bits, io.lsq.ldout.bits)
io.ldout.valid := hitLoadOut.valid || io.lsq.ldout.valid
// Fp load, if hit, will be stored to reg at s2, then it will be recoded at s3, writebacked at s4
val fpHitLoadOut = Wire(Valid(new ExuOutput))
fpHitLoadOut.valid := s2_wb_valid && load_s2.io.out.bits.uop.ctrl.fpWen
fpHitLoadOut.bits := intHitLoadOut.bits
val fpLoadUnRecodedReg = Reg(Valid(new ExuOutput))
fpLoadUnRecodedReg.valid := fpHitLoadOut.valid || io.lsq.ldout.valid && refillFpLoad
when(fpHitLoadOut.valid || io.lsq.ldout.valid && refillFpLoad){
fpLoadUnRecodedReg.bits := Mux(fpHitLoadOut.valid, fpHitLoadOut.bits, io.lsq.ldout.bits)
}
val fpLoadRecodedReg = Reg(Valid(new ExuOutput))
when(fpLoadUnRecodedReg.valid){
fpLoadRecodedReg := fpLoadUnRecodedReg
fpLoadRecodedReg.bits.data := fpRdataHelper(fpLoadUnRecodedReg.bits.uop, fpLoadUnRecodedReg.bits.data) // recode
}
fpLoadRecodedReg.valid := fpLoadUnRecodedReg.valid
io.fpout.bits := fpLoadRecodedReg.bits
io.fpout.valid := fpLoadRecodedReg.valid
io.lsq.ldout.ready := Mux(refillFpLoad, !fpHitLoadOut.valid, !intHitLoadOut.valid)
io.lsq.ldout.ready := !hitLoadOut.valid
when(io.ldout.fire()){
XSDebug("ldout %x\n", io.ldout.bits.uop.cf.pc)
}
when(io.fpout.fire()){
XSDebug("fpout %x\n", io.fpout.bits.uop.cf.pc)
}
}
......@@ -37,9 +37,6 @@ class StoreUnit_S0 extends XSModule {
io.out.bits.vaddr := saddr
io.out.bits.data := genWdata(io.in.bits.src2, io.in.bits.uop.ctrl.fuOpType(1,0))
when(io.in.bits.uop.ctrl.src2Type === SrcType.fp){
io.out.bits.data := io.in.bits.src2
} // not not touch fp store raw data
io.out.bits.uop := io.in.bits.uop
io.out.bits.miss := DontCare
io.out.bits.rsIdx := io.rsIdx
......@@ -64,7 +61,6 @@ class StoreUnit_S1 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
// val fp_out = Decoupled(new LsPipelineBundle)
val lsq = ValidIO(new LsPipelineBundle)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
......@@ -92,7 +88,7 @@ class StoreUnit_S1 extends XSModule {
// get paddr from dtlb, check if rollback is needed
// writeback store inst to lsq
io.lsq.valid := io.in.valid && !s1_tlb_miss// TODO: && ! FP
io.lsq.valid := io.in.valid && !s1_tlb_miss
io.lsq.bits := io.in.bits
io.lsq.bits.paddr := s1_paddr
io.lsq.bits.miss := false.B
......@@ -103,12 +99,6 @@ class StoreUnit_S1 extends XSModule {
// mmio inst with exception will be writebacked immediately
io.out.valid := io.in.valid && (!io.out.bits.mmio || s1_exception) && !s1_tlb_miss
io.out.bits := io.lsq.bits
// encode data for fp store
when(io.in.bits.uop.ctrl.src2Type === SrcType.fp){
io.lsq.bits.data := genWdata(ieee(io.in.bits.data), io.in.bits.uop.ctrl.fuOpType(1,0))
}
}
class StoreUnit_S2 extends XSModule {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册