提交 8b4ffe05 编写于 作者: L LinJiawei

Merge remote-tracking branch 'origin/rs-new' into xs-fpu

......@@ -56,7 +56,11 @@ EMU_CXXFLAGS = -std=c++11 -static -Wall -I$(EMU_CSRC_DIR)
EMU_CXXFLAGS += -DVERILATOR -Wno-maybe-uninitialized
EMU_LDFLAGS = -lpthread -lSDL2 -ldl
EMU_THREADS = 1
VTHREAD_FLAGS = --threads $(EMU_THREADS) --threads-dpi none
ifeq ($(EMU_THREADS), 1)
VTHREAD_FLAGS = --threads 1
else
VTHREAD_FLAGS = --threads $(EMU_THREADS) --threads-dpi none
endif
# --trace
VERILATOR_FLAGS = --top-module $(SIM_TOP) \
......@@ -67,7 +71,6 @@ VERILATOR_FLAGS = --top-module $(SIM_TOP) \
$(VTHREAD_FLAGS) \
--trace \
--assert \
--trace \
--savable \
--stats-vars \
--output-split 5000 \
......
此差异已折叠。
......@@ -5,7 +5,7 @@ import firrtl.AnnotationSeq
import firrtl.annotations.NoTargetAnnotation
import firrtl.options.{HasShellOptions, Shell, ShellOption}
import firrtl.stage.{FirrtlCli, RunFirrtlTransformAnnotation}
import xstransforms.ShowPrintTransform
// import xstransforms.ShowPrintTransform
import xstransforms.PrintModuleName
case class DisablePrintfAnnotation(m: String) extends NoTargetAnnotation
......@@ -78,7 +78,7 @@ object XiangShanStage {
(new XiangShanStage).execute(
args,
annotations ++ Seq(
RunFirrtlTransformAnnotation(new ShowPrintTransform),
// RunFirrtlTransformAnnotation(new ShowPrintTransform),
RunFirrtlTransformAnnotation(new PrintModuleName)
)
)
......
......@@ -49,8 +49,8 @@ case class XSCoreParameters
CommitWidth: Int = 6,
BrqSize: Int = 12,
IssQueSize: Int = 8,
NRPhyRegs: Int = 72,
NRIntReadPorts: Int = 8,
NRPhyRegs: Int = 128,
NRIntReadPorts: Int = 14,
NRIntWritePorts: Int = 8,
NRFpReadPorts: Int = 14,
NRFpWritePorts: Int = 8,
......
......@@ -10,7 +10,7 @@ import xiangshan.backend.brq.Brq
import xiangshan.backend.dispatch.Dispatch
import xiangshan.backend.exu._
import xiangshan.backend.fu.FunctionUnit
import xiangshan.backend.issue.{IssueQueue, ReservationStation}
import xiangshan.backend.issue.{IssueQueue, ReservationStationNew}
import xiangshan.backend.regfile.{Regfile, RfWritePort}
import xiangshan.backend.roq.Roq
import xiangshan.mem._
......@@ -61,12 +61,6 @@ class Backend extends XSModule
numWirtePorts = NRFpWritePorts,
hasZero = false
))
val memRf = Module(new Regfile(
numReadPorts = 2*exuParameters.StuCnt + exuParameters.LduCnt,
numWirtePorts = NRIntWritePorts,
hasZero = true,
isMemRf = true
))
// backend redirect, flush pipeline
val redirect = Mux(
......@@ -96,88 +90,62 @@ class Backend extends XSModule
def needData(a: ExuConfig, b: ExuConfig): Boolean =
(a.readIntRf && b.writeIntRf) || (a.readFpRf && b.writeFpRf)
val reservedStations = exeUnits.
zipWithIndex.
map({ case (exu, i) =>
val cfg = exu.config
val wakeUpDateVec = exuConfigs.zip(exeWbReqs).filter(x => needData(cfg, x._1)).map(_._2)
val bypassCnt = exuConfigs.count(c => c.enableBypass && needData(cfg, c))
println(s"exu:${cfg.name} wakeupCnt:${wakeUpDateVec.length} bypassCnt:$bypassCnt")
val rs = Module(new ReservationStation(
cfg, wakeUpDateVec.length, bypassCnt, cfg.enableBypass, false
))
rs.io.redirect <> redirect
rs.io.numExist <> dispatch.io.numExist(i)
rs.io.enqCtrl <> dispatch.io.enqIQCtrl(i)
rs.io.enqData <> dispatch.io.enqIQData(i)
for(
(wakeUpPort, exuOut) <-
rs.io.wakeUpPorts.zip(wakeUpDateVec)
){
wakeUpPort.bits := exuOut.bits
wakeUpPort.valid := exuOut.valid
}
exu.io.in <> rs.io.deq
exu.io.redirect <> redirect
rs
})
for( rs <- reservedStations){
val bypassDataVec = exuConfigs.zip(exeWbReqs).
filter(x => x._1.enableBypass && needData(rs.exuCfg, x._1)).map(_._2)
rs.io.bypassUops <> reservedStations.
filter(x => x.enableBypass && needData(rs.exuCfg, x.exuCfg)).
map(_.io.selectedUop)
for(i <- bypassDataVec.indices){
rs.io.bypassData(i).valid := bypassDataVec(i).valid
rs.io.bypassData(i).bits := bypassDataVec(i).bits
val reservedStations = exuConfigs.zipWithIndex.map({ case (cfg, i) =>
// NOTE: exu could have certern and uncertaion latency
// but could not have multiple certern latency
var certainLatency = -1
if(cfg.hasCertainLatency) { certainLatency = cfg.latency.latencyVal.get }
val writeBackedData = exuConfigs.zip(exeWbReqs).filter(x => x._1.hasCertainLatency && needData(cfg, x._1)).map(_._2.bits.data)
val wakeupCnt = writeBackedData.length
val extraListenPorts = exuConfigs
.zip(exeWbReqs)
.filter(x => x._1.hasUncertainlatency && needData(cfg, x._1))
.map(_._2)
val extraListenPortsCnt = extraListenPorts.length
val feedback = (cfg == Exu.ldExeUnitCfg) || (cfg == Exu.stExeUnitCfg)
println(s"${i}: exu:${cfg.name} wakeupCnt: ${wakeupCnt} extraListenPorts: ${extraListenPortsCnt} delay:${certainLatency} feedback:${feedback}")
val rs = Module(new ReservationStationNew(cfg, wakeupCnt, extraListenPortsCnt, fixedDelay = certainLatency, feedback = feedback))
rs.io.redirect <> redirect
rs.io.numExist <> dispatch.io.numExist(i)
rs.io.enqCtrl <> dispatch.io.enqIQCtrl(i)
rs.io.enqData <> dispatch.io.enqIQData(i)
rs.io.writeBackedData <> writeBackedData
for((x, y) <- rs.io.extraListenPorts.zip(extraListenPorts)){
x.valid := y.fire()
x.bits := y.bits
}
cfg match {
case Exu.ldExeUnitCfg =>
case Exu.stExeUnitCfg =>
case otherCfg =>
exeUnits(i).io.in <> rs.io.deq
exeUnits(i).io.redirect <> redirect
rs.io.tlbFeedback := DontCare
}
}
val issueQueues = exuConfigs.
zipWithIndex.
takeRight(exuParameters.LduCnt + exuParameters.StuCnt).
map({case (cfg, i) =>
val wakeUpDateVec = exuConfigs.zip(exeWbReqs).filter(x => needData(cfg, x._1)).map(_._2)
val bypassUopVec = reservedStations.
filter(r => r.exuCfg.enableBypass && needData(cfg, r.exuCfg)).map(_.io.selectedUop)
val bypassDataVec = exuConfigs.zip(exeWbReqs).
filter(x => x._1.enableBypass && needData(cfg, x._1)).map(_._2)
val iq = Module(new IssueQueue(
cfg, wakeUpDateVec.length, bypassUopVec.length
))
println(s"exu:${cfg.name} wakeupCnt:${wakeUpDateVec.length} bypassCnt:${bypassUopVec.length}")
iq.io.redirect <> redirect
iq.io.tlbFeedback := io.mem.tlbFeedback(i - exuParameters.ExuCnt + exuParameters.LduCnt + exuParameters.StuCnt)
iq.io.enq <> dispatch.io.enqIQCtrl(i)
dispatch.io.numExist(i) := iq.io.numExist
for(
(wakeUpPort, exuOut) <-
iq.io.wakeUpPorts.zip(wakeUpDateVec)
){
wakeUpPort.bits := exuOut.bits
wakeUpPort.valid := exuOut.fire() // data after arbit
}
iq.io.bypassUops <> bypassUopVec
for(i <- bypassDataVec.indices){
iq.io.bypassData(i).valid := bypassDataVec(i).valid
iq.io.bypassData(i).bits := bypassDataVec(i).bits
}
iq
})
rs
})
for(rs <- reservedStations){
rs.io.broadcastedUops <> reservedStations.
filter(x => x.exuCfg.hasCertainLatency && needData(rs.exuCfg, x.exuCfg)).
map(_.io.selectedUop)
}
io.mem.commits <> roq.io.commits
io.mem.roqDeqPtr := roq.io.roqDeqPtr
io.mem.ldin <> issueQueues.filter(_.exuCfg == Exu.ldExeUnitCfg).map(_.io.deq)
io.mem.stin <> issueQueues.filter(_.exuCfg == Exu.stExeUnitCfg).map(_.io.deq)
io.mem.ldin <> reservedStations.filter(_.exuCfg == Exu.ldExeUnitCfg).map(_.io.deq)
io.mem.stin <> reservedStations.filter(_.exuCfg == Exu.stExeUnitCfg).map(_.io.deq)
jmpExeUnit.io.csrOnly.exception.valid := roq.io.redirect.valid && roq.io.redirect.bits.isException
jmpExeUnit.io.csrOnly.exception.bits := roq.io.exception
jmpExeUnit.fflags := roq.io.fflags
......@@ -192,6 +160,10 @@ class Backend extends XSModule
io.mem.exceptionAddr.lsIdx.sqIdx := roq.io.exception.sqIdx
io.mem.exceptionAddr.isStore := CommitType.lsInstIsStore(roq.io.exception.ctrl.commitType)
io.mem.tlbFeedback <> reservedStations.filter(
x => x.exuCfg == Exu.ldExeUnitCfg || x.exuCfg == Exu.stExeUnitCfg
).map(_.io.tlbFeedback)
io.frontend.outOfOrderBrInfo <> brq.io.outOfOrderBrInfo
io.frontend.inOrderBrInfo <> brq.io.inOrderBrInfo
io.frontend.sfence <> jmpExeUnit.sfence
......@@ -217,9 +189,9 @@ class Backend extends XSModule
rename.io.redirect <> redirect
rename.io.roqCommits <> roq.io.commits
rename.io.in <> decBuf.io.out
rename.io.intRfReadAddr <> dispatch.io.readIntRf.map(_.addr) ++ dispatch.io.intMemRegAddr
rename.io.intRfReadAddr <> dispatch.io.readIntRf.map(_.addr) ++ dispatch.io.memIntRf.map(_.addr)
rename.io.intPregRdy <> dispatch.io.intPregRdy ++ dispatch.io.intMemRegRdy
rename.io.fpRfReadAddr <> dispatch.io.readFpRf.map(_.addr) ++ dispatch.io.fpMemRegAddr
rename.io.fpRfReadAddr <> dispatch.io.readFpRf.map(_.addr) ++ dispatch.io.memFpRf.map(_.addr)
rename.io.fpPregRdy <> dispatch.io.fpPregRdy ++ dispatch.io.fpMemRegRdy
rename.io.replayPregReq <> dispatch.io.replayPregReq
dispatch.io.redirect <> redirect
......@@ -238,9 +210,8 @@ class Backend extends XSModule
dispatch.io.dequeueRoqIndex.bits := Mux(io.mem.oldestStore.valid, io.mem.oldestStore.bits, roq.io.commitRoqIndex.bits)
intRf.io.readPorts <> dispatch.io.readIntRf
fpRf.io.readPorts <> dispatch.io.readFpRf ++ issueQueues.flatMap(_.io.readFpRf)
memRf.io.readPorts <> issueQueues.flatMap(_.io.readIntRf)
intRf.io.readPorts <> dispatch.io.readIntRf ++ dispatch.io.memIntRf
fpRf.io.readPorts <> dispatch.io.readFpRf ++ dispatch.io.memFpRf
io.mem.redirect <> redirect
......@@ -257,9 +228,7 @@ class Backend extends XSModule
rfWrite.data := x.bits.data
rfWrite
}
val intRfWrite = wbIntResults.map(exuOutToRfWrite)
intRf.io.writePorts <> intRfWrite
memRf.io.writePorts <> intRfWrite
intRf.io.writePorts <> wbIntResults.map(exuOutToRfWrite)
fpRf.io.writePorts <> wbFpResults.map(exuOutToRfWrite)
rename.io.wbIntResults <> wbIntResults
......
......@@ -38,14 +38,14 @@ class Dispatch extends XSModule {
val lsIdxs = Input(Vec(RenameWidth, new LSIdx))
val dequeueRoqIndex = Input(Valid(new RoqPtr))
// read regfile
val readIntRf = Vec(NRIntReadPorts, Flipped(new RfReadPort))
val readIntRf = Vec(NRIntReadPorts - NRMemReadPorts, Flipped(new RfReadPort))
val readFpRf = Vec(NRFpReadPorts - exuParameters.StuCnt, Flipped(new RfReadPort))
// read reg status (busy/ready)
val intPregRdy = Vec(NRIntReadPorts, Input(Bool()))
val intPregRdy = Vec(NRIntReadPorts - NRMemReadPorts, Input(Bool()))
val fpPregRdy = Vec(NRFpReadPorts - exuParameters.StuCnt, Input(Bool()))
// load + store reg status (busy/ready)
val intMemRegAddr = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val fpMemRegAddr = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
val memIntRf = Vec(NRMemReadPorts, Flipped(new RfReadPort))
val memFpRf = Vec(exuParameters.StuCnt, Flipped(new RfReadPort))
val intMemRegRdy = Vec(NRMemReadPorts, Input(Bool()))
val fpMemRegRdy = Vec(exuParameters.StuCnt, Input(Bool()))
// replay: set preg status to not ready
......@@ -53,7 +53,7 @@ class Dispatch extends XSModule {
// to reservation stations
val numExist = Input(Vec(exuParameters.ExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.ExuCnt, DecoupledIO(new MicroOp))
val enqIQData = Vec(exuParameters.ExuCnt - exuParameters.LsExuCnt, Output(new ExuInput))
val enqIQData = Vec(exuParameters.ExuCnt, Output(new ExuInput))
})
val dispatch1 = Module(new Dispatch1)
......@@ -128,14 +128,15 @@ class Dispatch extends XSModule {
fpDq.io.deq <> DontCare
io.readFpRf <> DontCare
}
// Load/store dispatch queue to load/store issue queues
val lsDispatch = Module(new Dispatch2Ls)
lsDispatch.io.fromDq <> lsDq.io.deq
lsDispatch.io.intRegAddr <> io.intMemRegAddr
lsDispatch.io.fpRegAddr <> io.fpMemRegAddr
lsDispatch.io.readIntRf <> io.memIntRf
lsDispatch.io.readFpRf <> io.memFpRf
lsDispatch.io.intRegRdy <> io.intMemRegRdy
lsDispatch.io.fpRegRdy <> io.fpMemRegRdy
lsDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
lsDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
lsDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
}
......@@ -10,8 +10,8 @@ import xiangshan.backend.exu._
class Dispatch2Int extends XSModule {
val io = IO(new Bundle() {
val fromDq = Flipped(Vec(dpParams.IntDqDeqWidth, DecoupledIO(new MicroOp)))
val readRf = Vec(NRIntReadPorts, Flipped(new RfReadPort))
val regRdy = Vec(NRIntReadPorts, Input(Bool()))
val readRf = Vec(NRIntReadPorts - NRMemReadPorts, Flipped(new RfReadPort))
val regRdy = Vec(NRIntReadPorts - NRMemReadPorts, Input(Bool()))
val numExist = Input(Vec(exuParameters.IntExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.IntExuCnt, DecoupledIO(new MicroOp))
val enqIQData = Vec(exuParameters.IntExuCnt, Output(new ExuInput))
......
......@@ -10,12 +10,15 @@ import xiangshan.backend.exu._
class Dispatch2Ls extends XSModule {
val io = IO(new Bundle() {
val fromDq = Flipped(Vec(dpParams.LsDqDeqWidth, DecoupledIO(new MicroOp)))
val intRegAddr = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val fpRegAddr = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
val readIntRf = Vec(NRMemReadPorts, Flipped(new RfReadPort))
val readFpRf = Vec(exuParameters.StuCnt, Flipped(new RfReadPort))
// val intRegAddr = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W)))
// val fpRegAddr = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
val intRegRdy = Vec(NRMemReadPorts, Input(Bool()))
val fpRegRdy = Vec(exuParameters.StuCnt, Input(Bool()))
val numExist = Input(Vec(exuParameters.LsExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.LsExuCnt, DecoupledIO(new MicroOp))
val enqIQData = Vec(exuParameters.LsExuCnt, Output(new ExuInput))
})
/**
......@@ -70,12 +73,12 @@ class Dispatch2Ls extends XSModule {
val readPort = Seq(0, 1, 2, 4)
for (i <- 0 until exuParameters.LsExuCnt) {
if (i < exuParameters.LduCnt) {
io.intRegAddr(readPort(i)) := io.fromDq(indexVec(i)).bits.psrc1
io.readIntRf(readPort(i)).addr := io.fromDq(indexVec(i)).bits.psrc1
}
else {
io.fpRegAddr(i - exuParameters.LduCnt) := io.fromDq(indexVec(i)).bits.psrc2
io.intRegAddr(readPort(i) ) := io.fromDq(indexVec(i)).bits.psrc1
io.intRegAddr(readPort(i)+1) := io.fromDq(indexVec(i)).bits.psrc2
io.readFpRf(i - exuParameters.LduCnt).addr := io.fromDq(indexVec(i)).bits.psrc2
io.readIntRf(readPort(i) ).addr := io.fromDq(indexVec(i)).bits.psrc1
io.readIntRf(readPort(i)+1).addr := io.fromDq(indexVec(i)).bits.psrc2
}
}
......@@ -111,4 +114,31 @@ class Dispatch2Ls extends XSModule {
XSDebug(io.fromDq(i).valid && !io.fromDq(i).ready,
p"pc 0x${Hexadecimal(io.fromDq(i).bits.cf.pc)} waits at Ls dispatch queue with index $i\n")
}
/**
* Part 5: the second stage of dispatch 2 (send data to reservation station)
*/
val uopReg = Reg(Vec(exuParameters.LsExuCnt, new MicroOp))
val dataValidRegDebug = Reg(Vec(exuParameters.LsExuCnt, Bool()))
for (i <- 0 until exuParameters.LsExuCnt) {
uopReg(i) := io.enqIQCtrl(i).bits
dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
io.enqIQData(i) := DontCare
// assert(uopReg(i).ctrl.src1Type =/= SrcType.pc)
io.enqIQData(i).src1 := io.readIntRf(readPort(i)).data
if (i >= exuParameters.LduCnt) {
io.enqIQData(i).src2 := Mux(
uopReg(i).ctrl.src2Type === SrcType.imm,
uopReg(i).ctrl.imm,
Mux(uopReg(i).ctrl.src2Type === SrcType.fp,
io.readFpRf(i - exuParameters.LduCnt).data,
io.readIntRf(readPort(i) + 1).data))
}
XSDebug(dataValidRegDebug(i),
p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
p"(${readPort(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
p"(${readPort(i)+1}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
}
}
......@@ -91,8 +91,6 @@ class AluExeUnit extends Exu(Exu.aluExeUnitCfg) {
io.redirect.bits.brTag.flag,
io.redirect.bits.brTag.value
)
XSDebug(io.in.valid, "src1:%x src2:%x offset:%x func:%b pc:%x\n",
src1, src2, offset, func, pc)
XSDebug(io.out.valid, "res:%x aluRes:%x isRVC:%d isBranch:%d target:%x taken:%d\n",
io.out.bits.data, aluRes, isRVC, isBranch, target, taken)
XSDebug(io.in.valid, p"src1:${Hexadecimal(src1)} src2:${Hexadecimal(src2)} offset:${Hexadecimal(offset)} func:${Binary(func)} pc:${Hexadecimal(pc)} roqIdx:${uop.roqIdx}\n")
XSDebug(io.out.valid, p"res:${Hexadecimal(io.out.bits.data)} aluRes:${Hexadecimal(aluRes)} isRVC:${isRVC} isBranch:${isBranch} target:${Hexadecimal(target)} taken:${taken}\n")
}
\ No newline at end of file
......@@ -4,7 +4,7 @@ import chisel3._
import chisel3.util._
import xiangshan._
import xiangshan.FuType._
import xiangshan.backend.fu.FuConfig
import xiangshan.backend.fu.{CertainLatency, FuConfig, HasFuLatency, NexusLatency, UncertainLatency}
import utils.ParallelOR
import xiangshan.backend.fu.FunctionUnit._
......@@ -44,6 +44,27 @@ case class ExuConfig
val writeFpRf = supportedFuncUnits.map(_.writeFpRf).reduce(_||_)
val hasRedirect = supportedFuncUnits.map(_.hasRedirect).reduce(_||_)
val latency: HasFuLatency = {
val lats = supportedFuncUnits.map(_.latency)
val latencyValue = lats.collectFirst{
case x if x.latencyVal.nonEmpty =>
x.latencyVal.get
}
val hasUncertain = lats.exists(x => x.latencyVal.isEmpty)
if(latencyValue.nonEmpty){
if(hasUncertain) NexusLatency(latencyValue.get) else CertainLatency(latencyValue.get)
} else UncertainLatency()
}
val hasCertainLatency = latency.latencyVal.nonEmpty
val hasUncertainlatency = latency match {
case _: UncertainLatency =>
true
case _: NexusLatency =>
true
case _ =>
false
}
def canAccept(fuType: UInt): Bool = {
ParallelOR(supportedFuncUnits.map(_.fuType === fuType))
}
......
......@@ -40,8 +40,6 @@ class JmpExeUnit extends Exu(Exu.jmpExeUnitCfg) {
jmp.io <> DontCare
jmp.io.in.valid := io.in.valid && isJmp
jmp.io.out.ready := io.out.ready
//<<<<<<< HEAD
//
jmp.io.in.bits.connectToExuInput(io.in.bits)
jmp.io.redirectIn := io.redirect
......
......@@ -61,9 +61,7 @@ class MulExeUnit extends Exu(Exu.mulExeUnitCfg){
io.redirect.bits.isFlushPipe,
io.redirect.bits.brTag.value
)
XSDebug(io.in.valid, "src1:%x src2:%x pc:%x\n", src1, src2, io.in.bits.uop.cf.pc)
XSDebug(io.out.valid, "Out(%d %d) res:%x pc:%x\n",
io.out.valid, io.out.ready, io.out.bits.data, io.out.bits.uop.cf.pc
)
XSDebug(io.in.valid, p"src1:${Hexadecimal(src1)} src2:${Hexadecimal(src2)} pc:${Hexadecimal(io.in.bits.uop.cf.pc)} roqIdx:${io.in.bits.uop.roqIdx}\n")
XSDebug(io.out.valid, p"Out(${io.out.valid} ${io.out.ready}) res:${Hexadecimal(io.out.bits.data)} pc:${io.out.bits.uop.cf.pc} roqIdx:${io.out.bits.uop.roqIdx}\n")
XSDebug(io.redirect.valid, p"redirect: ${io.redirect.bits.brTag}\n")
}
......@@ -9,6 +9,24 @@ import xiangshan._
A Exu can have one or more function units
*/
trait HasFuLatency {
val latencyVal: Option[Int]
}
case class CertainLatency(value: Int) extends HasFuLatency{
override val latencyVal: Option[Int] = Some(value)
}
case class UncertainLatency() extends HasFuLatency {
override val latencyVal: Option[Int] = None
}
case class NexusLatency(value: Int) extends HasFuLatency {
override val latencyVal: Option[Int] = Some(value)
}
case class FuConfig
(
fuType: UInt,
......@@ -16,7 +34,8 @@ case class FuConfig
numFpSrc: Int,
writeIntRf: Boolean,
writeFpRf: Boolean,
hasRedirect: Boolean
hasRedirect: Boolean,
latency: HasFuLatency = CertainLatency(0)
) {
def srcCnt: Int = math.max(numIntSrc, numFpSrc)
}
......@@ -124,30 +143,45 @@ object FunctionUnit {
FuConfig(FuType.alu, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = true)
val mulCfg =
FuConfig(FuType.mul, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false)
FuConfig(FuType.mul, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false,
UncertainLatency()// CertainLatency(3)
)
val divCfg =
FuConfig(FuType.div, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false)
FuConfig(FuType.div, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false,
UncertainLatency()
)
val fenceCfg =
FuConfig(FuType.fence, 2, 0, writeIntRf = false, writeFpRf = false, hasRedirect = false/*NOTE: need redirect but when commit*/)
val lduCfg =
FuConfig(FuType.ldu, 1, 0, writeIntRf = true, writeFpRf = true, hasRedirect = false)
FuConfig(FuType.ldu, 1, 0, writeIntRf = true, writeFpRf = true, hasRedirect = false,
UncertainLatency()
)
val stuCfg =
FuConfig(FuType.stu, 2, 1, writeIntRf = false, writeFpRf = false, hasRedirect = false)
FuConfig(FuType.stu, 2, 1, writeIntRf = false, writeFpRf = false, hasRedirect = false,
UncertainLatency()
)
// use ldu's write back port, so set writeIntRf to false
val mouCfg =
FuConfig(FuType.mou, 2, 0, writeIntRf = false, writeFpRf = false, hasRedirect = false)
val mouCfg =
FuConfig(FuType.mou, 2, 0, writeIntRf = false, writeFpRf = false, hasRedirect = false,
UncertainLatency()
)
val fmacCfg =
FuConfig(FuType.fmac, 0, 3, writeIntRf = false, writeFpRf = true, hasRedirect = false)
FuConfig(FuType.fmac, 0, 3, writeIntRf = false, writeFpRf = true, hasRedirect = false,
CertainLatency(5)
)
val fmiscCfg =
FuConfig(FuType.fmisc, 0, 2, writeIntRf = true, writeFpRf = true, hasRedirect = false)
FuConfig(FuType.fmisc, 0, 2, writeIntRf = false, writeFpRf = true, hasRedirect = false,
CertainLatency(2)
)
val fDivSqrtCfg =
FuConfig(FuType.fDivSqrt, 0, 2, writeIntRf = false, writeFpRf = true, hasRedirect = false)
FuConfig(FuType.fDivSqrt, 0, 2, writeIntRf = false, writeFpRf = true, hasRedirect = false,
UncertainLatency()
)
}
......@@ -61,6 +61,9 @@ class ReservationStation
// to Dispatch
val numExist = Output(UInt(iqIdxWidth.W))
// tlb hit, inst can deq, only used in ld/st reservation stations
val tlbFeedback = Flipped(ValidIO(new TlbFeedback)) // TODO
})
val srcAllNum = 3
......
package xiangshan.backend.issue
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.exu.{Exu, ExuConfig}
import java.rmi.registry.Registry
import java.{util => ju}
class SrcBundle extends XSBundle {
val src = UInt(PhyRegIdxWidth.W)
val state = SrcState()
val srctype = SrcType()
def hit(uop: MicroOp) : Bool = {
(src === uop.pdest) && (state === SrcState.busy) &&
((srctype === SrcType.reg && uop.ctrl.rfWen && src=/=0.U) ||
(srctype === SrcType.fp && uop.ctrl.fpWen)) // TODO: check if zero map to zero when rename
}
override def toPrintable: Printable = {
p"src:${src} state:${state} type:${srctype}"
}
}
object SrcBundle {
def apply(src: UInt, state: UInt/*SrcState*/, srctype: UInt/*SrcType*/): SrcBundle = {
val b = Wire(new SrcBundle)
b.src := src
b.state := state
b.srctype := srctype
b
}
def stateCheck(src: SrcBundle): UInt /*SrcState*/ = {
Mux( (src.srctype=/=SrcType.reg && src.srctype=/=SrcType.fp) ||
(src.srctype===SrcType.reg && src.src===0.U), SrcState.rdy, src.state)
}
def check(src: UInt, state: UInt, srctype: UInt): SrcBundle = {
val b = Wire(new SrcBundle)
b.src := src
b.state := stateCheck(SrcBundle(src, state, srctype))
b.srctype := srctype
b
}
}
class BypassQueue(number: Int) extends XSModule {
val io = IO(new Bundle {
val in = Flipped(ValidIO(new MicroOp))
val out = ValidIO(new MicroOp)
val redirect = Flipped(ValidIO(new Redirect))
})
if (number < 0) {
io.out.valid := false.B
io.out.bits := DontCare
} else if(number == 0) {
io.in <> io.out
io.out.valid := io.in.valid && !io.out.bits.roqIdx.needFlush(io.redirect)
} else {
val queue = Seq.fill(number)(RegInit(0.U.asTypeOf(new Bundle{
val valid = Bool()
val bits = new MicroOp
})))
queue(0).valid := io.in.valid
queue(0).bits := io.in.bits
(0 until (number-1)).map{i =>
queue(i+1) := queue(i)
queue(i+1).valid := queue(i).valid && !queue(i).bits.roqIdx.needFlush(io.redirect)
}
io.out.valid := queue(number-1).valid && !queue(number-1).bits.roqIdx.needFlush(io.redirect)
io.out.bits := queue(number-1).bits
for (i <- 0 until number) {
XSDebug(queue(i).valid, p"BPQue(${i.U}): pc:${Hexadecimal(queue(i).bits.cf.pc)} roqIdx:${queue(i).bits.roqIdx} pdest:${queue(i).bits.pdest} rfWen:${queue(i).bits.ctrl.rfWen} fpWen${queue(i).bits.ctrl.fpWen}\n")
}
}
}
class ReservationStationNew
(
val exuCfg: ExuConfig,
wakeupCnt: Int,
extraListenPortsCnt: Int,
srcNum: Int = 3,
fixedDelay: Int,
feedback: Boolean,
replayDelay: Int = 16
) extends XSModule {
val iqSize = IssQueSize
val iqIdxWidth = log2Up(iqSize)
val io = IO(new XSBundle {
// flush Issue Queue
val redirect = Flipped(ValidIO(new Redirect))
// enq Ctrl sigs at dispatch-2
val enqCtrl = Flipped(DecoupledIO(new MicroOp))
// enq Data at next cycle (regfile has 1 cycle latency)
val enqData = Input(new ExuInput)
// broadcast selected uop to other issue queues
val selectedUop = ValidIO(new MicroOp)
// send to exu
val deq = DecoupledIO(new ExuInput)
// recv broadcasted uops form any relative issue queue,
// to simplify wake up logic, the uop broadcasted by this queue self
// are also in 'boradcastedUops'
val broadcastedUops = Vec(wakeupCnt, Flipped(ValidIO(new MicroOp)))
// listen to write back data bus
val writeBackedData = Vec(wakeupCnt, Input(UInt(XLEN.W)))
// for some function units with uncertain latency,
// we have to wake up relative uops until those function units write back
val extraListenPorts = Vec(extraListenPortsCnt, Flipped(ValidIO(new ExuOutput)))
// to Dispatch
val numExist = Output(UInt(iqIdxWidth.W))
// TODO: support replay for future use if exu is ldu/stu
val tlbFeedback = Flipped(ValidIO(new TlbFeedback)) // TODO: change its name
})
// io <> DontCare
// GOAL:
// 1. divide control part and data part
// 2. store control signal in sending RS and send out when after paticular cycles
// 3. one RS only have one paticular delay
// 4. remove the issue stage
// 5. support replay will cause one or two more latency for state machine change
// so would not support replay in current edition.
// here is three logial part:
// control part: psrc(5.W)*3 srcState(1.W)*3 fuOpType/Latency(3.W) roqIdx
// data part: data(64.W)*3
// other part: lsroqIdx and many other signal in uop. may set them to control part(close to dispatch)
// control part:
val s_idle :: s_valid :: s_wait :: s_replay :: Nil = Enum(4)
val needFeedback = if (feedback) true.B else false.B
val stateQueue = RegInit(VecInit(Seq.fill(iqSize)(s_idle)))
val validQueue = stateQueue.map(_ === s_valid)
val emptyQueue = stateQueue.map(_ === s_idle)
val srcQueue = Reg(Vec(iqSize, Vec(srcNum, new SrcBundle)))
val cntQueue = Reg(Vec(iqSize, UInt(log2Up(replayDelay).W)))
// data part:
val data = Reg(Vec(iqSize, Vec(3, UInt(XLEN.W))))
// other part:
val uop = Reg(Vec(iqSize, new MicroOp))
// rs queue part:
val tailPtr = RegInit(0.U((iqIdxWidth+1).W))
val idxQueue = RegInit(VecInit((0 until iqSize).map(_.U(iqIdxWidth.W))))
val readyQueue = VecInit(srcQueue.map(a => ParallelAND(a.map(_.state === SrcState.rdy)).asBool).
zip(validQueue).map{ case (a,b) => a&b })
// select ready
// for no replay, select just equal to deq (attached)
// with replay, select is just two stage with deq.
val moveMask = WireInit(0.U(iqSize.W))
val selectedIdxRegOH = Wire(UInt(iqSize.W))
val selectMask = WireInit(VecInit(
(0 until iqSize).map(i =>
readyQueue(i) && !(selectedIdxRegOH(i) && io.deq.fire())
// TODO: add redirect here, may cause long latency , change it
)
))
val haveBubble = Wire(Bool())
val (selectedIdxWire, selected) = PriorityEncoderWithFlag(selectMask)
val redSel = uop(idxQueue(selectedIdxWire)).roqIdx.needFlush(io.redirect)
val selValid = !redSel && selected && !haveBubble
val selReg = RegNext(selValid)
val selectedIdxReg = RegNext(selectedIdxWire - moveMask(selectedIdxWire))
selectedIdxRegOH := UIntToOH(selectedIdxReg)
// sel bubble
// TODO:
val bubIdxRegOH = Wire(UInt(iqSize.W))
val bubMask = WireInit(VecInit(
(0 until iqSize).map(i => emptyQueue(i) && !bubIdxRegOH(i))
))
val (firstBubble, findBubble) = PriorityEncoderWithFlag(bubMask)
haveBubble := findBubble && (firstBubble < tailPtr)
val bubValid = haveBubble
val bubReg = RegNext(bubValid)
val bubIdxReg = RegNext(firstBubble - moveMask(firstBubble))
bubIdxRegOH := UIntToOH(bubIdxReg)
// deq
// TODO: divide needFeedback and not needFeedback
val deqValid = bubReg/*fire an bubble*/ || (selReg && io.deq.ready && !needFeedback/*fire an rdy*/)
val deqIdx = Mux(bubReg, bubIdxReg, selectedIdxReg) // TODO: may have one more cycle delay than fire slot
moveMask := {
(Fill(iqSize, 1.U(1.W)) << deqIdx)(iqSize-1, 0)
} & Fill(iqSize, deqValid)
for(i <- 0 until iqSize-1){
when(moveMask(i)){
idxQueue(i) := idxQueue(i+1)
srcQueue(i).zip(srcQueue(i+1)).map{case (a,b) => a := b}
stateQueue(i) := stateQueue(i+1)
}
}
when(deqValid){
idxQueue.last := idxQueue(deqIdx)
stateQueue.last := s_idle
}
when (selReg && io.deq.ready && needFeedback) {
stateQueue(selectedIdxReg) := s_wait
}
// redirect
val redHitVec = (0 until iqSize).map(i => uop(idxQueue(i)).roqIdx.needFlush(io.redirect))
val fbMatchVec = (0 until iqSize).map(i =>
uop(idxQueue(i)).roqIdx.asUInt === io.tlbFeedback.bits.roqIdx.asUInt && io.tlbFeedback.valid && (stateQueue(i) === s_wait || stateQueue(i)===s_valid))
// TODO: feedback at the same cycle now, may change later
//redHitVec.zip(validQueue).map{ case (r,v) => when (r) { v := false.B } }
for (i <- 0 until iqSize) {
val cnt = cntQueue(idxQueue(i))
if (i != 0) { // TODO: combine the two case
val nextIdx = i.U - moveMask(i-1)
when (stateQueue(i)===s_replay) {
when (cnt===0.U) { stateQueue(nextIdx) := s_valid }
.otherwise { cnt := cnt - 1.U }
}
when (fbMatchVec(i)) {
stateQueue(nextIdx) := Mux(io.tlbFeedback.bits.hit, s_idle, s_replay)
cnt := Mux(io.tlbFeedback.bits.hit, cnt, (replayDelay-1).U)
}
when (redHitVec(i)) { stateQueue(nextIdx) := s_idle }
} else { when (!moveMask(i)) {
val nextIdx = i
when (stateQueue(i)===s_replay) {
when (cnt===0.U) { stateQueue(nextIdx) := s_valid }
.otherwise { cnt := cnt - 1.U }
}
when (fbMatchVec(i)) {
stateQueue(nextIdx) := Mux(io.tlbFeedback.bits.hit, s_idle, s_replay)
cnt := Mux(io.tlbFeedback.bits.hit, cnt, (replayDelay-1).U)
}
when (redHitVec(i)) { stateQueue(nextIdx) := s_idle }
}}
}
// bypass send
// store selected uops and send out one cycle before result back
def bpSelCheck(uop: MicroOp): Bool = { // TODO: wanna a map from FunctionUnit.scala
val fuType = uop.ctrl.fuType
(fuType === FuType.alu) ||
(fuType === FuType.mul) ||
(fuType === FuType.jmp) ||
(fuType === FuType.i2f) ||
(fuType === FuType.csr) ||
(fuType === FuType.fence) ||
(fuType === FuType.fmac)
}
val bpQueue = Module(new BypassQueue(fixedDelay))
bpQueue.io.in.valid := selValid // FIXME: error when function is blocked => fu should not be blocked
bpQueue.io.in.bits := uop(idxQueue(selectedIdxWire))
bpQueue.io.redirect := io.redirect
io.selectedUop.valid := bpQueue.io.out.valid && bpSelCheck(bpQueue.io.out.bits)
io.selectedUop.bits := bpQueue.io.out.bits
if(fixedDelay > 0) {
XSDebug(io.selectedUop.valid, p"SelBypass: pc:0x${Hexadecimal(io.selectedUop.bits.cf.pc)} roqIdx:${io.selectedUop.bits.roqIdx} pdest:${io.selectedUop.bits.pdest} rfWen:${io.selectedUop.bits.ctrl.rfWen} fpWen:${io.selectedUop.bits.ctrl.fpWen}\n" )
}
// output
io.deq.valid := selReg && !uop(idxQueue(selectedIdxReg)).roqIdx.needFlush(io.redirect)// TODO: read it and add assert for rdyQueue
io.deq.bits.uop := uop(idxQueue(selectedIdxReg))
io.deq.bits.src1 := data(idxQueue(selectedIdxReg))(0)
if(srcNum > 1) { io.deq.bits.src2 := data(idxQueue(selectedIdxReg))(1) }
if(srcNum > 2) { io.deq.bits.src3 := data(idxQueue(selectedIdxReg))(2) } // TODO: beautify it
// enq
val tailAfterRealDeq = tailPtr - (io.deq.fire() && !needFeedback|| bubReg)
val isFull = tailAfterRealDeq.head(1).asBool() // tailPtr===qsize.U
tailPtr := tailAfterRealDeq + io.enqCtrl.fire()
io.enqCtrl.ready := !isFull && !io.redirect.valid // TODO: check this redirect && need more optimization
val enqUop = io.enqCtrl.bits
val srcTypeSeq = Seq(enqUop.ctrl.src1Type, enqUop.ctrl.src2Type, enqUop.ctrl.src3Type)
val srcSeq = Seq(enqUop.psrc1, enqUop.psrc2, enqUop.psrc3)
val srcStateSeq = Seq(enqUop.src1State, enqUop.src2State, enqUop.src3State)
val srcDataSeq = Seq(io.enqData.src1, io.enqData.src2, io.enqData.src3)
val enqPtr = Mux(tailPtr.head(1).asBool, selectedIdxReg, tailPtr.tail(1))
val enqIdx_data = idxQueue(enqPtr)
val enqIdx_ctrl = tailAfterRealDeq.tail(1)
val enqIdxNext = RegNext(enqIdx_data)
val enqBpVec = (0 until srcNum).map(i => bypass(SrcBundle(srcSeq(i), srcStateSeq(i), srcTypeSeq(i)), true.B))
when (io.enqCtrl.fire()) {
uop(enqIdx_data) := enqUop
stateQueue(enqIdx_ctrl) := s_valid
srcQueue(enqIdx_ctrl).zipWithIndex.map{ case (s,i) =>
s := SrcBundle.check(srcSeq(i), Mux(enqBpVec(i)._1, SrcState.rdy, srcStateSeq(i)), srcTypeSeq(i)) }
XSDebug(p"EnqCtrlFire: roqIdx:${enqUop.roqIdx} pc:0x${Hexadecimal(enqUop.cf.pc)} src1:${srcSeq(0)} state:${srcStateSeq(0)} type:${srcTypeSeq(0)} src2:${srcSeq(1)} state:${srcStateSeq(1)} type:${srcTypeSeq(1)} src3:${srcSeq(2)} state:${srcStateSeq(2)} type:${srcTypeSeq(2)} enqBpHit:${enqBpVec(0)._1}${enqBpVec(1)._1}${enqBpVec(2)._1}\n")
}
when (RegNext(io.enqCtrl.fire())) {
for(i <- data(0).indices) { data(enqIdxNext)(i) := Mux(enqBpVec(i)._2, enqBpVec(i)._3, srcDataSeq(i)) }
XSDebug(p"EnqDataFire: idx:${enqIdxNext} src1:0x${Hexadecimal(srcDataSeq(0))} src2:0x${Hexadecimal(srcDataSeq(1))} src3:0x${Hexadecimal(srcDataSeq(2))} enqBpHit:(${enqBpVec(0)._2}|0x${Hexadecimal(enqBpVec(0)._3)})(${enqBpVec(1)._2}|0x${Hexadecimal(enqBpVec(1)._3)})(${enqBpVec(2)._2}|0x${Hexadecimal(enqBpVec(2)._3)}\n")
}
// wakeup and bypass
def wakeup(src: SrcBundle, valid: Bool) : (Bool, UInt) = {
val hitVec = io.extraListenPorts.map(port => src.hit(port.bits.uop) && port.valid)
assert(RegNext(PopCount(hitVec)===0.U || PopCount(hitVec)===1.U))
val hit = ParallelOR(hitVec) && valid
(hit, ParallelMux(hitVec zip io.extraListenPorts.map(_.bits.data)))
}
def bypass(src: SrcBundle, valid: Bool) : (Bool, Bool, UInt) = {
val hitVec = io.broadcastedUops.map(port => src.hit(port.bits) && port.valid)
assert(RegNext(PopCount(hitVec)===0.U || PopCount(hitVec)===1.U))
val hit = ParallelOR(hitVec) && valid
(hit, RegNext(hit), ParallelMux(hitVec.map(RegNext(_)) zip io.writeBackedData))
}
for (i <- 0 until iqSize) {
for (j <- 0 until srcNum) {
val (wuHit, wuData) = wakeup(srcQueue(i)(j), validQueue(i))
val (bpHit, bpHitReg, bpData) = bypass(srcQueue(i)(j), validQueue(i))
when (wuHit || bpHit) { srcQueue(i.U - moveMask(i))(j).state := SrcState.rdy }
when (wuHit) { data(idxQueue(i))(j) := wuData }
when (bpHitReg) { data(RegNext(idxQueue(i)))(j) := bpData }
XSDebug(wuHit, p"WUHit: (${i.U})(${j.U}) Data:0x${Hexadecimal(wuData)} idx:${idxQueue(i)}\n")
XSDebug(bpHit, p"BPHit: (${i.U})(${j.U}) Ctrl idx:${idxQueue(i)}\n")
XSDebug(bpHitReg, p"BPHit: (${i.U})(${j.U}) Data:0x${Hexadecimal(bpData)} idx:${idxQueue(i)}\n")
}
}
// other io
io.numExist := tailPtr
// assert
assert(tailPtr <= iqSize.U)
// log
// TODO: add log
val print = io.enqCtrl.valid || io.deq.valid || ParallelOR(validQueue) || tailPtr=/=0.U || true.B
XSDebug(print, p"In(${io.enqCtrl.valid} ${io.enqCtrl.ready}) Out(${io.deq.valid} ${io.deq.ready}) tailPtr:${tailPtr} tailPtr.tail:${tailPtr.tail(1)} tailADeq:${tailAfterRealDeq} isFull:${isFull} validQue:b${Binary(VecInit(validQueue).asUInt)} readyQueue:${Binary(readyQueue.asUInt)} needFeedback:${needFeedback}\n")
XSDebug(io.redirect.valid && print, p"Redirect: roqIdx:${io.redirect.bits.roqIdx} isException:${io.redirect.bits.isException} isMisPred:${io.redirect.bits.isMisPred} isReplay:${io.redirect.bits.isReplay} isFlushPipe:${io.redirect.bits.isFlushPipe} RedHitVec:b${Binary(VecInit(redHitVec).asUInt)}\n")
XSDebug(io.tlbFeedback.valid && print, p"TlbFeedback: roqIdx:${io.tlbFeedback.bits.roqIdx} hit:${io.tlbFeedback.bits.hit} fbMatchVec:${Binary(VecInit(fbMatchVec).asUInt)}\n")
XSDebug(print, p"SelMask:b${Binary(selectMask.asUInt)} MoveMask:b${Binary(moveMask.asUInt)} rdyQue:b${Binary(readyQueue.asUInt)} selIdxWire:${selectedIdxWire} sel:${selected} redSel:${redSel} selValid:${selValid} selIdxReg:${selectedIdxReg} selReg:${selReg} haveBubble:${haveBubble} deqValid:${deqValid} firstBubble:${firstBubble} findBubble:${findBubble} bubReg:${bubReg} bubIdxReg:${bubIdxReg} selRegOH:b${Binary(selectedIdxRegOH)}\n")
XSDebug(io.selectedUop.valid, p"Select: roqIdx:${io.selectedUop.bits.roqIdx} pc:0x${Hexadecimal(io.selectedUop.bits.cf.pc)} fuType:b${Binary(io.selectedUop.bits.ctrl.fuType)} FuOpType:b${Binary(io.selectedUop.bits.ctrl.fuOpType)}}\n")
XSDebug(io.deq.fire, p"Deq: SelIdxReg:${selectedIdxReg} pc:0x${Hexadecimal(io.deq.bits.uop.cf.pc)} Idx:${idxQueue(selectedIdxReg)} roqIdx:${io.deq.bits.uop.roqIdx} src1:0x${Hexadecimal(io.deq.bits.src1)} src2:0x${Hexadecimal(io.deq.bits.src2)} src3:0x${Hexadecimal(io.deq.bits.src3)}\n")
val broadcastedUops = io.broadcastedUops
val extraListenPorts = io.extraListenPorts
for (i <- broadcastedUops.indices) {
XSDebug(broadcastedUops(i).valid && print, p"BpUops(${i.U}): pc:0x${Hexadecimal(broadcastedUops(i).bits.cf.pc)} roqIdx:${broadcastedUops(i).bits.roqIdx} idxQueue:${selectedIdxWire} pdest:${broadcastedUops(i).bits.pdest} rfWen:${broadcastedUops(i).bits.ctrl.rfWen} fpWen:${broadcastedUops(i).bits.ctrl.fpWen} data(last):0x${Hexadecimal(io.writeBackedData(i))}\n")
XSDebug(RegNext(broadcastedUops(i).valid && print), p"BpUopData(${i.U}): data(last):0x${Hexadecimal(io.writeBackedData(i))}\n")
}
for (i <- extraListenPorts.indices) {
XSDebug(extraListenPorts(i).valid && print, p"WakeUp(${i.U}): pc:0x${Hexadecimal(extraListenPorts(i).bits.uop.cf.pc)} roqIdx:${extraListenPorts(i).bits.uop.roqIdx} pdest:${extraListenPorts(i).bits.uop.pdest} rfWen:${extraListenPorts(i).bits.uop.ctrl.rfWen} fpWen:${extraListenPorts(i).bits.uop.ctrl.fpWen} data:0x${Hexadecimal(extraListenPorts(i).bits.data)}\n")
}
XSDebug(print, " :IQ|s|r|cnt| src1 |src2 | src3|pdest(rf|fp)| roqIdx|pc\n")
for(i <- 0 until iqSize) {
XSDebug(print, p"${i.U}: ${idxQueue(i)}|${stateQueue(i)}|${readyQueue(i)}| ${cntQueue(idxQueue(i))}|${srcQueue(i)(0)} 0x${Hexadecimal(data(idxQueue(i))(0))}|${srcQueue(i)(1)} 0x${Hexadecimal(data(idxQueue(i))(1))}|${srcQueue(i)(2)} 0x${Hexadecimal(data(idxQueue(i))(2))}|${uop(idxQueue(i)).pdest}(${uop(idxQueue(i)).ctrl.rfWen}|${uop(idxQueue(i)).ctrl.fpWen})|${uop(idxQueue(i)).roqIdx}|${Hexadecimal(uop(idxQueue(i)).cf.pc)}\n")
}
}
......@@ -20,8 +20,7 @@ class Regfile
(
numReadPorts: Int,
numWirtePorts: Int,
hasZero: Boolean,
isMemRf: Boolean = false
hasZero: Boolean
) extends XSModule {
val io = IO(new Bundle() {
val readPorts = Vec(numReadPorts, new RfReadPort)
......@@ -29,19 +28,6 @@ class Regfile
})
val mem = Mem(NRPhyRegs, UInt(XLEN.W))
val debugRegSync = WireInit(0.U(XLEN.W))
val debugCnt = RegInit(0.U((PhyRegIdxWidth+1).W))
when(!debugCnt.head(1).asBool()){
debugCnt := debugCnt + 1.U
if(isMemRf){
BoringUtils.addSink(debugRegSync, "DEBUG_REG_SYNC")
mem(debugCnt) := debugRegSync
} else if (hasZero) {
debugRegSync := mem(debugCnt)
BoringUtils.addSource(debugRegSync, "DEBUG_REG_SYNC")
}
}
for(r <- io.readPorts){
val addr_reg = RegNext(r.addr)
......@@ -54,11 +40,9 @@ class Regfile
}
}
if(!isMemRf){
val debugArchRat = WireInit(VecInit(Seq.fill(32)(0.U(PhyRegIdxWidth.W))))
BoringUtils.addSink(debugArchRat, if(hasZero) "DEBUG_INI_ARCH_RAT" else "DEBUG_FP_ARCH_RAT")
val debugArchRat = WireInit(VecInit(Seq.fill(32)(0.U(PhyRegIdxWidth.W))))
BoringUtils.addSink(debugArchRat, if(hasZero) "DEBUG_INI_ARCH_RAT" else "DEBUG_FP_ARCH_RAT")
val debugArchReg = WireInit(VecInit(debugArchRat.zipWithIndex.map(x => if(hasZero && x._2==0) 0.U else mem(x._1))))
BoringUtils.addSource(debugArchReg, if(hasZero) "DEBUG_INT_ARCH_REG" else "DEBUG_FP_ARCH_REG")
}
val debugArchReg = WireInit(VecInit(debugArchRat.zipWithIndex.map(x => if(hasZero && x._2==0) 0.U else mem(x._1))))
BoringUtils.addSource(debugArchReg, if(hasZero) "DEBUG_INT_ARCH_REG" else "DEBUG_FP_ARCH_REG")
}
......@@ -11,9 +11,9 @@ class Rename extends XSModule {
val roqCommits = Vec(CommitWidth, Flipped(ValidIO(new RoqCommit)))
val wbIntResults = Vec(NRIntWritePorts, Flipped(ValidIO(new ExuOutput)))
val wbFpResults = Vec(NRFpWritePorts, Flipped(ValidIO(new ExuOutput)))
val intRfReadAddr = Vec(NRIntReadPorts + NRMemReadPorts, Input(UInt(PhyRegIdxWidth.W)))
val intRfReadAddr = Vec(NRIntReadPorts, Input(UInt(PhyRegIdxWidth.W)))
val fpRfReadAddr = Vec(NRFpReadPorts, Input(UInt(PhyRegIdxWidth.W)))
val intPregRdy = Vec(NRIntReadPorts + NRMemReadPorts, Output(Bool()))
val intPregRdy = Vec(NRIntReadPorts, Output(Bool()))
val fpPregRdy = Vec(NRFpReadPorts, Output(Bool()))
// set preg to busy when replay
val replayPregReq = Vec(ReplayWidth, Input(new ReplayPregReq))
......@@ -44,7 +44,7 @@ class Rename extends XSModule {
val fpRat = Module(new RenameTable(float = true)).io
val intRat = Module(new RenameTable(float = false)).io
val fpBusyTable = Module(new BusyTable(NRFpReadPorts, NRFpWritePorts)).io
val intBusyTable = Module(new BusyTable(NRIntReadPorts+NRMemReadPorts, NRIntWritePorts)).io
val intBusyTable = Module(new BusyTable(NRIntReadPorts, NRIntWritePorts)).io
fpFreeList.redirect := io.redirect
intFreeList.redirect := io.redirect
......
......@@ -68,7 +68,7 @@ class AtomicsPipe extends DCacheModule
// ---------------------------------------
// stage 2
val s2_req = RegNext(s1_req)
val s2_valid = RegNext(s1_valid && !io.lsu.s1_kill, init = false.B)
val s2_valid = RegNext(s1_valid, init = false.B)
dump_pipeline_reqs("AtomicsPipe s2", s2_valid, s2_req)
......
......@@ -34,7 +34,6 @@ class AtomicsMissQueue extends DCacheModule
io.replay.req.valid := false.B
io.replay.req.bits := DontCare
io.replay.resp.ready := false.B
io.replay.s1_kill := false.B
io.miss_req.valid := false.B
io.miss_req.bits := DontCare
......
......@@ -25,7 +25,19 @@ class DCacheMeta extends DCacheBundle {
val replay = Bool() // whether it's a replayed request?
}
// ordinary load and special memory operations(lr/sc, atomics)
// for load from load unit
// cycle 0: vaddr
// cycle 1: paddr
class DCacheLoadReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(VAddrBits.W)
val data = UInt(DataBits.W)
val mask = UInt((DataBits/8).W)
val meta = new DCacheMeta
}
// special memory operations(lr/sc, atomics)
class DCacheWordReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
......@@ -45,6 +57,16 @@ class DCacheLineReq extends DCacheBundle
val meta = new DCacheMeta
}
class DCacheLoadResp extends DCacheBundle
{
val data = UInt(DataBits.W)
val meta = new DCacheMeta
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val nack = Bool()
}
class DCacheWordResp extends DCacheBundle
{
val data = UInt(DataBits.W)
......@@ -65,12 +87,19 @@ class DCacheLineResp extends DCacheBundle
val nack = Bool()
}
class DCacheWordIO extends DCacheBundle
class DCacheLoadIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheWordReq )
val req = DecoupledIO(new DCacheWordReq)
val resp = Flipped(DecoupledIO(new DCacheWordResp))
// kill previous cycle's req
val s1_kill = Output(Bool())
val s1_kill = Output(Bool())
val s1_paddr = Output(UInt(PAddrBits.W))
}
class DCacheWordIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheWordReq)
val resp = Flipped(DecoupledIO(new DCacheWordResp))
}
class DCacheLineIO extends DCacheBundle
......@@ -80,7 +109,7 @@ class DCacheLineIO extends DCacheBundle
}
class DCacheToLsuIO extends DCacheBundle {
val load = Vec(LoadPipelineWidth, Flipped(new DCacheWordIO)) // for speculative load
val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load
val lsroq = Flipped(new DCacheLineIO) // lsroq load/store
val store = Flipped(new DCacheLineIO) // for sbuffer
val atomics = Flipped(new DCacheWordIO) // atomics reqs
......@@ -229,6 +258,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
for (w <- 0 until LoadPipelineWidth) {
val load_w_nack = nack_load(io.lsu.load(w).req.bits.addr)
ldu(w).io.lsu.req <> io.lsu.load(w).req
ldu(w).io.lsu.s1_paddr <> io.lsu.load(w).s1_paddr
ldu(w).io.nack := load_w_nack
XSDebug(load_w_nack, s"LoadUnit $w nacked\n")
......@@ -289,8 +319,6 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
"MMIO requests should not go to cache")
assert(!(atomicsReq.fire() && atomicsReq.bits.meta.tlb_miss),
"TLB missed requests should not go to cache")
assert(!io.lsu.atomics.s1_kill, "Lsroq should never use s1 kill on atomics")
//----------------------------------------
// miss queue
......
......@@ -8,7 +8,7 @@ import utils.XSDebug
class LoadPipe extends DCacheModule
{
val io = IO(new DCacheBundle{
val lsu = Flipped(new DCacheWordIO)
val lsu = Flipped(new DCacheLoadIO)
val data_read = DecoupledIO(new L1DataReadReq)
val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W))))
val meta_read = DecoupledIO(new L1MetaReadReq)
......@@ -56,7 +56,8 @@ class LoadPipe extends DCacheModule
// stage 1
val s1_req = RegNext(s0_req)
val s1_valid = RegNext(s0_valid, init = false.B)
val s1_addr = s1_req.addr
// in stage 1, load unit gets the physical address
val s1_addr = io.lsu.s1_paddr
val s1_nack = RegNext(io.nack)
dump_pipeline_reqs("LoadPipe s1", s1_valid, s1_req)
......@@ -76,6 +77,7 @@ class LoadPipe extends DCacheModule
dump_pipeline_reqs("LoadPipe s2", s2_valid, s2_req)
val s2_addr = RegNext(s1_addr)
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = s2_tag_match_way.orR
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegNext(meta_resp(w).coh)))
......@@ -96,12 +98,12 @@ class LoadPipe extends DCacheModule
val s2_data = Wire(Vec(nWays, UInt(encRowBits.W)))
val data_resp = io.data_resp
for (w <- 0 until nWays) {
s2_data(w) := data_resp(w)(get_row(s2_req.addr))
s2_data(w) := data_resp(w)(get_row(s2_addr))
}
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
// the index of word in a row, in case rowBits != wordBits
val s2_word_idx = if (rowWords == 1) 0.U else s2_req.addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
val s2_word_idx = if (rowWords == 1) 0.U else s2_addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
val s2_nack_hit = RegNext(s1_nack)
// Can't allocate MSHR for same set currently being written back
......
......@@ -33,12 +33,13 @@ object genWdata {
class LsPipelineBundle extends XSBundle {
val vaddr = UInt(VAddrBits.W)
val paddr = UInt(PAddrBits.W)
val func = UInt(6.W)
val func = UInt(6.W) //fixme???
val mask = UInt(8.W)
val data = UInt(XLEN.W)
val uop = new MicroOp
val miss = Bool()
val tlbMiss = Bool()
val mmio = Bool()
val rollback = Bool()
......@@ -70,7 +71,7 @@ class MemToBackendIO extends XSBundle {
// replay all instructions form dispatch
val replayAll = ValidIO(new Redirect)
// replay mem instructions form Load Queue/Store Queue
val tlbFeedback = Vec(exuParameters.LduCnt + exuParameters.LduCnt, ValidIO(new TlbFeedback))
val tlbFeedback = Vec(exuParameters.LduCnt + exuParameters.StuCnt, ValidIO(new TlbFeedback))
val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit)))
val dp1Req = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val lsIdxs = Output(Vec(RenameWidth, new LSIdx))
......@@ -88,7 +89,7 @@ class MemToBackendIO extends XSBundle {
class Memend extends XSModule {
val io = IO(new Bundle{
val backend = new MemToBackendIO
val loadUnitToDcacheVec = Vec(exuParameters.LduCnt, new DCacheWordIO)
val loadUnitToDcacheVec = Vec(exuParameters.LduCnt, new DCacheLoadIO)
val loadMiss = new DCacheLineIO
val atomics = new DCacheWordIO
val sbufferToDcache = new DCacheLineIO
......
......@@ -131,7 +131,6 @@ class LsqWrappper extends XSModule with HasDCacheParameters with NeedImpl {
}.otherwise{
io.uncache.resp <> storeQueue.io.uncache.resp
}
io.uncache.s1_kill := false.B
assert(!(loadQueue.io.uncache.req.valid && storeQueue.io.uncache.req.valid))
assert(!(loadQueue.io.uncache.resp.valid && storeQueue.io.uncache.resp.valid))
......
......@@ -32,7 +32,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val brqRedirect = Input(Valid(new Redirect))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // FIXME: Valid() only
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback store
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback load
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit)))
val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
......@@ -121,7 +121,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
)
}.otherwise {
XSInfo(io.loadIn(i).valid, "load hit write to cbd idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
io.loadIn(i).bits.uop.lqIdx.asUInt,
io.loadIn(i).bits.uop.cf.pc,
io.loadIn(i).bits.vaddr,
......@@ -253,10 +253,13 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
allocated(i) && valid(i) && !writebacked(i)
})).asUInt() // use uint instead vec to reduce verilog lines
val loadWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LoadQueueSize).W)))
val loadWbSelV= Wire(Vec(StorePipelineWidth, Bool()))
val lselvec0 = PriorityEncoderOH(loadWbSelVec)
val lselvec1 = PriorityEncoderOH(loadWbSelVec & (~lselvec0).asUInt)
loadWbSel(0) := OHToUInt(lselvec0)
loadWbSelV(0):= lselvec0.orR
loadWbSel(1) := OHToUInt(lselvec1)
loadWbSelV(1) := lselvec1.orR
(0 until StorePipelineWidth).map(i => {
// data select
val rdata = data(loadWbSel(i)).data
......@@ -290,11 +293,12 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
io.ldout(i).bits.redirect := DontCare
io.ldout(i).bits.brUpdate := DontCare
io.ldout(i).bits.debug.isMMIO := data(loadWbSel(i)).mmio
io.ldout(i).valid := loadWbSelVec(loadWbSel(i))
io.ldout(i).bits.fflags := DontCare
io.ldout(i).valid := loadWbSelVec(loadWbSel(i)) && loadWbSelV(i)
when(io.ldout(i).fire()) {
writebacked(loadWbSel(i)) := true.B
XSInfo(io.loadIn(i).valid, "load miss write to cbd idx %d pc 0x%x paddr %x data %x mmio %x\n",
XSInfo("load miss write to cbd roqidx %d lqidx %d pc 0x%x paddr %x data %x mmio %x\n",
io.ldout(i).bits.uop.roqIdx.asUInt,
io.ldout(i).bits.uop.lqIdx.asUInt,
io.ldout(i).bits.uop.cf.pc,
data(loadWbSel(i)).paddr,
......@@ -383,6 +387,8 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val xorMask = lqIdxMask ^ headMask
val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === ringBufferHeadExtended.flag
val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)
// check if load already in lq needs to be rolledback
val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
val addrMatch = allocated(j) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
......@@ -407,18 +413,19 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val wbViolationUop = getOldestInTwo(wbViolationVec, io.loadIn.map(_.bits.uop))
XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")
// check if rollback is needed for load in l4
val l4ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => {
// check if rollback is needed for load in l1
val l1ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => {
io.forward(j).valid && // L4 valid\
isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) &&
(io.storeIn(i).bits.mask & io.forward(j).mask).orR
}))
val l4Violation = l4ViolationVec.asUInt().orR()
val l4ViolationUop = getOldestInTwo(l4ViolationVec, io.forward.map(_.uop))
val l1Violation = l1ViolationVec.asUInt().orR()
val l1ViolationUop = getOldestInTwo(l1ViolationVec, io.forward.map(_.uop))
XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")
val rollbackValidVec = Seq(lqViolation, wbViolation, l4Violation)
val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l4ViolationUop)
val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)
rollback(i).valid := Cat(rollbackValidVec).orR
val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
val oneAfterZero = mask(1)(0)
......@@ -432,6 +439,11 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
rollback(i).bits.isException := false.B
rollback(i).bits.isFlushPipe := false.B
XSDebug(
l1Violation,
"need rollback (l4 load) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
)
XSDebug(
lqViolation,
"need rollback (ld wb before store) pc %x roqidx %d target %x\n",
......@@ -442,11 +454,6 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
"need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
)
XSDebug(
l4Violation,
"need rollback (l4 load) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l4ViolationUop.roqIdx.asUInt
)
}.otherwise {
rollback(i).valid := false.B
}
......@@ -491,7 +498,6 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
io.uncache.req.bits.meta.replay := false.B
io.uncache.resp.ready := true.B
io.uncache.s1_kill := false.B
when(io.uncache.req.fire()){
pending(ringBufferTail) := false.B
......@@ -559,7 +565,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
for (i <- 0 until LoadQueueSize) {
if (i % 4 == 0) XSDebug("")
XSDebug(false, true.B, "%x ", uop(i).cf.pc)
XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, data(i).paddr)
PrintFlag(allocated(i), "a")
PrintFlag(allocated(i) && valid(i), "v")
PrintFlag(allocated(i) && writebacked(i), "w")
......
......@@ -273,6 +273,8 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.sbuffer(i).bits.meta.mmio := mmio
io.sbuffer(i).bits.meta.mask := data(ptr).mask
XSDebug(io.sbuffer(i).fire(), "[SBUFFER STORE REQ] pa %x data %x\n", data(ptr).paddr, data(ptr).data)
// update sq meta if store inst is send to sbuffer
when(storeCommitValid(i) && (mmio || io.sbuffer(i).ready)) {
allocated(ptr) := false.B
......@@ -304,7 +306,6 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.uncache.req.bits.meta.replay := false.B
io.uncache.resp.ready := true.B
io.uncache.s1_kill := false.B
when(io.uncache.req.fire()){
pending(ringBufferTail) := false.B
......@@ -363,7 +364,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
for (i <- 0 until StoreQueueSize) {
if (i % 4 == 0) XSDebug("")
XSDebug(false, true.B, "%x ", uop(i).cf.pc)
XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, data(i).paddr)
PrintFlag(allocated(i), "a")
PrintFlag(allocated(i) && valid(i), "v")
PrintFlag(allocated(i) && writebacked(i), "w")
......
......@@ -590,18 +590,19 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe
val wbViolationUop = getOldestInTwo(wbViolationVec, io.loadIn.map(_.bits.uop))
XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")
// check if rollback is needed for load in l4
val l4ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => {
// check if rollback is needed for load in l1
val l1ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => {
io.forward(j).valid && // L4 valid\
isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) &&
(io.storeIn(i).bits.mask & io.forward(j).mask).orR
}))
val l4Violation = l4ViolationVec.asUInt().orR()
val l4ViolationUop = getOldestInTwo(l4ViolationVec, io.forward.map(_.uop))
val l1Violation = l1ViolationVec.asUInt().orR()
val l1ViolationUop = getOldestInTwo(l1ViolationVec, io.forward.map(_.uop))
XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")
val rollbackValidVec = Seq(lsroqViolation, wbViolation, l4Violation)
val rollbackUopVec = Seq(lsroqViolationUop, wbViolationUop, l4ViolationUop)
val rollbackValidVec = Seq(lsroqViolation, wbViolation, l1Violation)
val rollbackUopVec = Seq(lsroqViolationUop, wbViolationUop, l1ViolationUop)
rollback(i).valid := Cat(rollbackValidVec).orR
val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
val oneAfterZero = mask(1)(0)
......@@ -615,6 +616,12 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe
rollback(i).bits.isException := false.B
rollback(i).bits.isFlushPipe := false.B
XSDebug(
l1Violation,
"need rollback (l4 load) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
)
XSDebug(
lsroqViolation,
"need rollback (ld wb before store) pc %x roqidx %d target %x\n",
......@@ -625,11 +632,6 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe
"need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
)
XSDebug(
l4Violation,
"need rollback (l4 load) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l4ViolationUop.roqIdx.asUInt
)
}.otherwise {
rollback(i).valid := false.B
}
......@@ -674,7 +676,6 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe
io.uncache.req.bits.meta.replay := false.B
io.uncache.resp.ready := true.B
io.uncache.s1_kill := false.B
when(io.uncache.req.fire()){
pending(ringBufferTail) := false.B
......
......@@ -42,7 +42,6 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
io.dcache.req.valid := false.B
io.dcache.req.bits := DontCare
io.dcache.s1_kill := false.B
io.dcache.resp.ready := false.B
io.dtlb.req.valid := false.B
......
......@@ -368,6 +368,9 @@ class NewSbuffer extends XSModule with HasSbufferCst {
XSDebug(valid_tag_match,
p"valid tag match: forward [$i] <> buf[$valid_forward_idx]\n"
)
XSDebug(inflight_tag_match || valid_tag_match,
p"[$i] forward paddr:${Hexadecimal(forward.paddr)}\n"
)
}
}
......
......@@ -8,6 +8,8 @@ std::function<double()> get_sc_time_stamp = []() -> double { return 0; };
double sc_time_stamp() { return get_sc_time_stamp(); }
int main(int argc, const char** argv) {
printf("Emu compiled at %s, %s\n", __DATE__, __TIME__);
setbuf(stderr, mybuf);
auto emu = new Emulator(argc, argv);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册