提交 7962cc88 编写于 作者: W William Wang

Merge remote-tracking branch 'origin/opt-load-to-use' into dev-memend

......@@ -50,7 +50,7 @@ case class XSCoreParameters
BrqSize: Int = 16,
IssQueSize: Int = 8,
NRPhyRegs: Int = 128,
NRIntReadPorts: Int = 8,
NRIntReadPorts: Int = 14,
NRIntWritePorts: Int = 8,
NRFpReadPorts: Int = 14,
NRFpWritePorts: Int = 8,
......
......@@ -57,12 +57,6 @@ class Backend extends XSModule
numWirtePorts = NRFpWritePorts,
hasZero = false
))
val memRf = Module(new Regfile(
numReadPorts = 2*exuParameters.StuCnt + exuParameters.LduCnt,
numWirtePorts = NRIntWritePorts,
hasZero = true,
isMemRf = true
))
// backend redirect, flush pipeline
val redirect = Mux(
......@@ -92,36 +86,39 @@ class Backend extends XSModule
def needData(a: ExuConfig, b: ExuConfig): Boolean =
(a.readIntRf && b.writeIntRf) || (a.readFpRf && b.writeFpRf)
val reservedStations = exeUnits.
zipWithIndex.
map({ case (exu, i) =>
val cfg = exu.config
val wakeUpDateVec = exuConfigs.zip(exeWbReqs).filter(x => needData(cfg, x._1)).map(_._2)
val bypassCnt = exuConfigs.count(c => c.enableBypass && needData(cfg, c))
val reservedStations = exuConfigs.zipWithIndex.map({ case (cfg, i) =>
val wakeUpDateVec = exuConfigs.zip(exeWbReqs).filter(x => needData(cfg, x._1)).map(_._2)
val bypassCnt = exuConfigs.count(c => c.enableBypass && needData(cfg, c))
println(s"exu:${cfg.name} wakeupCnt:${wakeUpDateVec.length} bypassCnt:$bypassCnt")
println(s"exu:${cfg.name} wakeupCnt:${wakeUpDateVec.length} bypassCnt:$bypassCnt")
val rs = Module(new ReservationStation(
cfg, wakeUpDateVec.length, bypassCnt, cfg.enableBypass, false
))
rs.io.redirect <> redirect
rs.io.numExist <> dispatch.io.numExist(i)
rs.io.enqCtrl <> dispatch.io.enqIQCtrl(i)
rs.io.enqData <> dispatch.io.enqIQData(i)
for(
(wakeUpPort, exuOut) <-
val rs = Module(new ReservationStation(
cfg, wakeUpDateVec.length, bypassCnt, cfg.enableBypass, fifo = false
))
rs.io.redirect <> redirect
rs.io.numExist <> dispatch.io.numExist(i)
rs.io.enqCtrl <> dispatch.io.enqIQCtrl(i)
rs.io.enqData <> dispatch.io.enqIQData(i)
for(
(wakeUpPort, exuOut) <-
rs.io.wakeUpPorts.zip(wakeUpDateVec)
){
wakeUpPort.bits := exuOut.bits
wakeUpPort.valid := exuOut.valid
}
){
wakeUpPort.bits := exuOut.bits
wakeUpPort.valid := exuOut.valid
}
exu.io.in <> rs.io.deq
exu.io.redirect <> redirect
rs
})
cfg match {
case Exu.ldExeUnitCfg =>
case Exu.stExeUnitCfg =>
case otherCfg =>
exeUnits(i).io.in <> rs.io.deq
exeUnits(i).io.redirect <> redirect
}
rs
})
for( rs <- reservedStations){
rs.io.bypassUops <> reservedStations.
......@@ -137,43 +134,10 @@ class Backend extends XSModule
}
}
val issueQueues = exuConfigs.
zipWithIndex.
takeRight(exuParameters.LduCnt + exuParameters.StuCnt).
map({case (cfg, i) =>
val wakeUpDateVec = exuConfigs.zip(exeWbReqs).filter(x => needData(cfg, x._1)).map(_._2)
val bypassUopVec = reservedStations.
filter(r => r.exuCfg.enableBypass && needData(cfg, r.exuCfg)).map(_.io.selectedUop)
val bypassDataVec = exuConfigs.zip(exeWbReqs).
filter(x => x._1.enableBypass && needData(cfg, x._1)).map(_._2)
val iq = Module(new IssueQueue(
cfg, wakeUpDateVec.length, bypassUopVec.length
))
println(s"exu:${cfg.name} wakeupCnt:${wakeUpDateVec.length} bypassCnt:${bypassUopVec.length}")
iq.io.redirect <> redirect
iq.io.tlbFeedback := io.mem.tlbFeedback(i - exuParameters.ExuCnt + exuParameters.LduCnt + exuParameters.StuCnt)
iq.io.enq <> dispatch.io.enqIQCtrl(i)
dispatch.io.numExist(i) := iq.io.numExist
for(
(wakeUpPort, exuOut) <-
iq.io.wakeUpPorts.zip(wakeUpDateVec)
){
wakeUpPort.bits := exuOut.bits
wakeUpPort.valid := exuOut.fire() // data after arbit
}
iq.io.bypassUops <> bypassUopVec
for(i <- bypassDataVec.indices){
iq.io.bypassData(i).valid := bypassDataVec(i).valid
iq.io.bypassData(i).bits := bypassDataVec(i).bits
}
iq
})
io.mem.commits <> roq.io.commits
io.mem.roqDeqPtr := roq.io.roqDeqPtr
io.mem.ldin <> issueQueues.filter(_.exuCfg == Exu.ldExeUnitCfg).map(_.io.deq)
io.mem.stin <> issueQueues.filter(_.exuCfg == Exu.stExeUnitCfg).map(_.io.deq)
io.mem.ldin <> reservedStations.filter(_.exuCfg == Exu.ldExeUnitCfg).map(_.io.deq)
io.mem.stin <> reservedStations.filter(_.exuCfg == Exu.stExeUnitCfg).map(_.io.deq)
jmpExeUnit.io.exception.valid := roq.io.redirect.valid && roq.io.redirect.bits.isException
jmpExeUnit.io.exception.bits := roq.io.exception
......@@ -197,9 +161,9 @@ class Backend extends XSModule
rename.io.redirect <> redirect
rename.io.roqCommits <> roq.io.commits
rename.io.in <> decBuf.io.out
rename.io.intRfReadAddr <> dispatch.io.readIntRf.map(_.addr) ++ dispatch.io.intMemRegAddr
rename.io.intRfReadAddr <> dispatch.io.readIntRf.map(_.addr) ++ dispatch.io.memIntRf.map(_.addr)
rename.io.intPregRdy <> dispatch.io.intPregRdy ++ dispatch.io.intMemRegRdy
rename.io.fpRfReadAddr <> dispatch.io.readFpRf.map(_.addr) ++ dispatch.io.fpMemRegAddr
rename.io.fpRfReadAddr <> dispatch.io.readFpRf.map(_.addr) ++ dispatch.io.memFpRf.map(_.addr)
rename.io.fpPregRdy <> dispatch.io.fpPregRdy ++ dispatch.io.fpMemRegRdy
rename.io.replayPregReq <> dispatch.io.replayPregReq
dispatch.io.redirect <> redirect
......@@ -213,9 +177,8 @@ class Backend extends XSModule
dispatch.io.commits <> roq.io.commits
dispatch.io.lsIdxs <> io.mem.lsIdxs
intRf.io.readPorts <> dispatch.io.readIntRf
fpRf.io.readPorts <> dispatch.io.readFpRf ++ issueQueues.flatMap(_.io.readFpRf)
memRf.io.readPorts <> issueQueues.flatMap(_.io.readIntRf)
intRf.io.readPorts <> dispatch.io.readIntRf ++ dispatch.io.memIntRf
fpRf.io.readPorts <> dispatch.io.readFpRf ++ dispatch.io.memFpRf
io.mem.redirect <> redirect
......@@ -232,9 +195,7 @@ class Backend extends XSModule
rfWrite.data := x.bits.data
rfWrite
}
val intRfWrite = wbIntResults.map(exuOutToRfWrite)
intRf.io.writePorts <> intRfWrite
memRf.io.writePorts <> intRfWrite
intRf.io.writePorts <> wbIntResults.map(exuOutToRfWrite)
fpRf.io.writePorts <> wbFpResults.map(exuOutToRfWrite)
rename.io.wbIntResults <> wbIntResults
......
......@@ -38,13 +38,13 @@ class Dispatch extends XSModule {
val commits = Input(Vec(CommitWidth, Valid(new RoqCommit)))
// read regfile
val readIntRf = Vec(NRIntReadPorts, Flipped(new RfReadPort))
val readFpRf = Vec(NRFpReadPorts - exuParameters.StuCnt, Flipped(new RfReadPort))
val readFpRf = Vec(NRFpReadPorts, Flipped(new RfReadPort))
// read reg status (busy/ready)
val intPregRdy = Vec(NRIntReadPorts, Input(Bool()))
val fpPregRdy = Vec(NRFpReadPorts - exuParameters.StuCnt, Input(Bool()))
val fpPregRdy = Vec(NRFpReadPorts, Input(Bool()))
// load + store reg status (busy/ready)
val intMemRegAddr = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val fpMemRegAddr = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
val memIntRf = Vec(NRMemReadPorts, Flipped(new RfReadPort))
val memFpRf = Vec(exuParameters.StuCnt, Flipped(new RfReadPort))
val intMemRegRdy = Vec(NRMemReadPorts, Input(Bool()))
val fpMemRegRdy = Vec(exuParameters.StuCnt, Input(Bool()))
// replay: set preg status to not ready
......@@ -52,7 +52,7 @@ class Dispatch extends XSModule {
// to reservation stations
val numExist = Input(Vec(exuParameters.ExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.ExuCnt, DecoupledIO(new MicroOp))
val enqIQData = Vec(exuParameters.ExuCnt - exuParameters.LsExuCnt, Output(new ExuInput))
val enqIQData = Vec(exuParameters.ExuCnt, Output(new ExuInput))
})
val dispatch1 = Module(new Dispatch1)
......@@ -140,8 +140,8 @@ class Dispatch extends XSModule {
// Load/store dispatch queue to load/store issue queues
val lsDispatch = Module(new Dispatch2Ls)
lsDispatch.io.fromDq <> lsDq.io.deq
lsDispatch.io.intRegAddr <> io.intMemRegAddr
lsDispatch.io.fpRegAddr <> io.fpMemRegAddr
lsDispatch.io.readIntRf <> io.memIntRf
lsDispatch.io.readFpRf <> io.memFpRf
lsDispatch.io.intRegRdy <> io.intMemRegRdy
lsDispatch.io.fpRegRdy <> io.fpMemRegRdy
lsDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
......
......@@ -10,12 +10,15 @@ import xiangshan.backend.exu._
class Dispatch2Ls extends XSModule {
val io = IO(new Bundle() {
val fromDq = Flipped(Vec(dpParams.LsDqDeqWidth, DecoupledIO(new MicroOp)))
val intRegAddr = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val fpRegAddr = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
val readIntRf = Vec(NRIntReadPorts, Flipped(new RfReadPort))
val readFpRf = Vec(NRIntReadPorts, Flipped(new RfReadPort))
// val intRegAddr = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W)))
// val fpRegAddr = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
val intRegRdy = Vec(NRMemReadPorts, Input(Bool()))
val fpRegRdy = Vec(exuParameters.StuCnt, Input(Bool()))
val numExist = Input(Vec(exuParameters.LsExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.LsExuCnt, DecoupledIO(new MicroOp))
val enqIQData = Vec(exuParameters.LsExuCnt, Output(new ExuInput))
})
/**
......@@ -70,12 +73,12 @@ class Dispatch2Ls extends XSModule {
val readPort = Seq(0, 1, 2, 4)
for (i <- 0 until exuParameters.LsExuCnt) {
if (i < exuParameters.LduCnt) {
io.intRegAddr(readPort(i)) := io.fromDq(indexVec(i)).bits.psrc1
io.readIntRf(readPort(i)).addr := io.fromDq(indexVec(i)).bits.psrc1
}
else {
io.fpRegAddr(i - exuParameters.LduCnt) := io.fromDq(indexVec(i)).bits.psrc2
io.intRegAddr(readPort(i) ) := io.fromDq(indexVec(i)).bits.psrc1
io.intRegAddr(readPort(i)+1) := io.fromDq(indexVec(i)).bits.psrc2
io.readFpRf(i - exuParameters.LduCnt).addr := io.fromDq(indexVec(i)).bits.psrc2
io.readIntRf(readPort(i) ).addr := io.fromDq(indexVec(i)).bits.psrc1
io.readIntRf(readPort(i)+1).addr := io.fromDq(indexVec(i)).bits.psrc2
}
}
......@@ -111,4 +114,31 @@ class Dispatch2Ls extends XSModule {
XSDebug(io.fromDq(i).valid && !io.fromDq(i).ready,
p"pc 0x${Hexadecimal(io.fromDq(i).bits.cf.pc)} waits at Ls dispatch queue with index $i\n")
}
/**
* Part 5: the second stage of dispatch 2 (send data to reservation station)
*/
val uopReg = Reg(Vec(exuParameters.LsExuCnt, new MicroOp))
val dataValidRegDebug = Reg(Vec(exuParameters.LsExuCnt, Bool()))
for (i <- 0 until exuParameters.IntExuCnt) {
uopReg(i) := io.enqIQCtrl(i).bits
dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
io.enqIQData(i) := DontCare
// assert(uopReg(i).ctrl.src1Type =/= SrcType.pc)
io.enqIQData(i).src1 := io.readIntRf(readPort(i)).data
if (i >= exuParameters.LduCnt) {
io.enqIQData(i).src2 := Mux(
uopReg(i).ctrl.src2Type === SrcType.imm,
uopReg(i).ctrl.imm,
Mux(uopReg(i).ctrl.src2Type === SrcType.fp,
io.readFpRf(i - exuParameters.LduCnt).data,
io.readIntRf(readPort(i) + 1).data))
}
XSDebug(dataValidRegDebug(i),
p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
p"(${readPort(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
p"(${readPort(i)+1}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
}
}
......@@ -94,7 +94,7 @@ class IssueQueue
def writeBackHit(src: UInt, srcType: UInt, wbUop: (Bool, MicroOp)): Bool = {
val (v, uop) = wbUop
val isSameType =
(SrcType.isReg(srcType) && uop.ctrl.rfWen) || (SrcType.isFp(srcType) && uop.ctrl.fpWen)
(SrcType.isReg(srcType) && uop.ctrl.rfWen && src =/= 0.U) || (SrcType.isFp(srcType) && uop.ctrl.fpWen)
v && isSameType && (src===uop.pdest)
}
......
......@@ -62,6 +62,9 @@ class ReservationStation
// to Dispatch
val numExist = Output(UInt(iqIdxWidth.W))
// tlb hit, inst can deq, only used in ld/st reservation stations
val tlbFeedback = Flipped(ValidIO(new TlbFeedback))
})
val srcAllNum = 3
......@@ -284,7 +287,7 @@ class ReservationStation
for(i <- idQue.indices) { // Should be IssQue.indices but Mem() does not support
for(j <- 0 until srcListenNum) {
val hitVec = cdbValid.indices.map(k => psrc(i)(j) === cdbPdest(k) && cdbValid(k) && (srcType(i)(j)===SrcType.reg && cdbrfWen(k) || srcType(i)(j)===SrcType.fp && cdbfpWen(k)))
val hitVec = cdbValid.indices.map(k => psrc(i)(j) === cdbPdest(k) && cdbValid(k) && (srcType(i)(j)===SrcType.reg && cdbrfWen(k) && cdbPdest(k) =/= 0.U || srcType(i)(j)===SrcType.fp && cdbfpWen(k)))
val hit = ParallelOR(hitVec).asBool
val data = ParallelMux(hitVec zip cdbData)
when (validQue(i) && !srcRdyVec(i)(j) && hit) {
......@@ -306,7 +309,7 @@ class ReservationStation
for (i <- idQue.indices) { // Should be IssQue.indices but Mem() does not support
for (j <- 0 until srcListenNum) {
val hitVec = bpValid.indices.map(k => psrc(i)(j) === bpPdest(k) && bpValid(k) && (srcType(i)(j)===SrcType.reg && bprfWen(k) || srcType(i)(j)===SrcType.fp && bpfpWen(k)))
val hitVec = bpValid.indices.map(k => psrc(i)(j) === bpPdest(k) && bpValid(k) && (srcType(i)(j)===SrcType.reg && bprfWen(k) && bpPdest(k) =/= 0.U || srcType(i)(j)===SrcType.fp && bpfpWen(k)))
val hitVecNext = hitVec.map(RegNext(_))
val hit = ParallelOR(hitVec).asBool
when (validQue(i) && !srcRdyVec(i)(j) && hit) {
......@@ -333,7 +336,7 @@ class ReservationStation
val enqPsrc = List(enqCtrl.bits.psrc1, enqCtrl.bits.psrc2, enqCtrl.bits.psrc3)
val enqSrcType = List(enqCtrl.bits.ctrl.src1Type, enqCtrl.bits.ctrl.src2Type, enqCtrl.bits.ctrl.src3Type)
for (i <- 0 until srcListenNum) {
val hitVec = bpValid.indices.map(j => enqPsrc(i)===bpPdest(j) && bpValid(j) && (enqSrcType(i)===SrcType.reg && bprfWen(j) || enqSrcType(i)===SrcType.fp && bpfpWen(j)))
val hitVec = bpValid.indices.map(j => enqPsrc(i)===bpPdest(j) && bpValid(j) && (enqSrcType(i)===SrcType.reg && bprfWen(j) && bpPdest(j) =/= 0.U || enqSrcType(i)===SrcType.fp && bpfpWen(j)))
val hitVecNext = hitVec.map(RegNext(_))
val hit = ParallelOR(hitVec).asBool
when (enqFire && hit && !enqSrcRdy(i)) {
......
......@@ -20,8 +20,7 @@ class Regfile
(
numReadPorts: Int,
numWirtePorts: Int,
hasZero: Boolean,
isMemRf: Boolean = false
hasZero: Boolean
) extends XSModule {
val io = IO(new Bundle() {
val readPorts = Vec(numReadPorts, new RfReadPort)
......@@ -29,19 +28,6 @@ class Regfile
})
val mem = Mem(NRPhyRegs, UInt(XLEN.W))
val debugRegSync = WireInit(0.U(XLEN.W))
val debugCnt = RegInit(0.U((PhyRegIdxWidth+1).W))
when(!debugCnt.head(1).asBool()){
debugCnt := debugCnt + 1.U
if(isMemRf){
BoringUtils.addSink(debugRegSync, "DEBUG_REG_SYNC")
mem(debugCnt) := debugRegSync
} else if (hasZero) {
debugRegSync := mem(debugCnt)
BoringUtils.addSource(debugRegSync, "DEBUG_REG_SYNC")
}
}
for(r <- io.readPorts){
val addr_reg = RegNext(r.addr)
......@@ -54,11 +40,9 @@ class Regfile
}
}
if(!isMemRf){
val debugArchRat = WireInit(VecInit(Seq.fill(32)(0.U(PhyRegIdxWidth.W))))
BoringUtils.addSink(debugArchRat, if(hasZero) "DEBUG_INI_ARCH_RAT" else "DEBUG_FP_ARCH_RAT")
val debugArchRat = WireInit(VecInit(Seq.fill(32)(0.U(PhyRegIdxWidth.W))))
BoringUtils.addSink(debugArchRat, if(hasZero) "DEBUG_INI_ARCH_RAT" else "DEBUG_FP_ARCH_RAT")
val debugArchReg = WireInit(VecInit(debugArchRat.zipWithIndex.map(x => if(hasZero && x._2==0) 0.U else mem(x._1))))
BoringUtils.addSource(debugArchReg, if(hasZero) "DEBUG_INT_ARCH_REG" else "DEBUG_FP_ARCH_REG")
}
val debugArchReg = WireInit(VecInit(debugArchRat.zipWithIndex.map(x => if(hasZero && x._2==0) 0.U else mem(x._1))))
BoringUtils.addSource(debugArchReg, if(hasZero) "DEBUG_INT_ARCH_REG" else "DEBUG_FP_ARCH_REG")
}
......@@ -123,7 +123,8 @@ class Roq extends XSModule {
val deqUop = microOp(deqPtr)
val deqPtrWritebacked = writebacked(deqPtr) && valid(deqPtr)
val intrEnable = intrBitSet && !isEmpty && !hasNoSpec // TODO: wanna check why has hasCsr(hasNoSpec)
val intrEnable = intrBitSet && !isEmpty && !hasNoSpec &&
deqUop.ctrl.commitType =/= CommitType.STORE && deqUop.ctrl.commitType =/= CommitType.LOAD// TODO: wanna check why has hasCsr(hasNoSpec)
val exceptionEnable = deqPtrWritebacked && Cat(deqUop.cf.exceptionVec).orR()
val isFlushPipe = deqPtrWritebacked && deqUop.ctrl.flushPipe
io.redirect := DontCare
......
......@@ -25,7 +25,19 @@ class DCacheMeta extends DCacheBundle {
val replay = Bool() // whether it's a replayed request?
}
// ordinary load and special memory operations(lr/sc, atomics)
// for load from load unit
// cycle 0: vaddr
// cycle 1: paddr
class DCacheLoadReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(VAddrBits.W)
val data = UInt(DataBits.W)
val mask = UInt((DataBits/8).W)
val meta = new DCacheMeta
}
// special memory operations(lr/sc, atomics)
class DCacheWordReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
......@@ -45,6 +57,16 @@ class DCacheLineReq extends DCacheBundle
val meta = new DCacheMeta
}
class DCacheLoadResp extends DCacheBundle
{
val data = UInt(DataBits.W)
val meta = new DCacheMeta
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val nack = Bool()
}
class DCacheWordResp extends DCacheBundle
{
val data = UInt(DataBits.W)
......@@ -65,12 +87,19 @@ class DCacheLineResp extends DCacheBundle
val nack = Bool()
}
class DCacheWordIO extends DCacheBundle
class DCacheLoadIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheWordReq )
val req = DecoupledIO(new DCacheWordReq)
val resp = Flipped(DecoupledIO(new DCacheWordResp))
// kill previous cycle's req
val s1_kill = Output(Bool())
val s1_kill = Output(Bool())
val s1_paddr = Output(UInt(PAddrBits.W))
}
class DCacheWordIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheWordReq)
val resp = Flipped(DecoupledIO(new DCacheWordResp))
}
class DCacheLineIO extends DCacheBundle
......@@ -80,7 +109,7 @@ class DCacheLineIO extends DCacheBundle
}
class DCacheToLsuIO extends DCacheBundle {
val load = Vec(LoadPipelineWidth, Flipped(new DCacheWordIO)) // for speculative load
val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load
val lsroq = Flipped(new DCacheLineIO) // lsroq load/store
val store = Flipped(new DCacheLineIO) // for sbuffer
val atomics = Flipped(new DCacheWordIO) // atomics reqs
......@@ -393,6 +422,11 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
}
// sync with prober
missQueue.io.probe_wb_req.valid := prober.io.wb_req.fire()
missQueue.io.probe_wb_req.bits := prober.io.wb_req.bits
missQueue.io.probe_active := prober.io.probe_active
//----------------------------------------
// prober
prober.io.block := block_probe(prober.io.inflight_req_block_addr.bits)
......@@ -410,9 +444,6 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
prober.io.wb_resp := wb.io.resp
wb.io.mem_grant := bus.d.fire() && bus.d.bits.source === cfg.nMissEntries.U
missQueue.io.probe_wb_req.valid := prober.io.wb_req.fire()
missQueue.io.probe_wb_req.bits := prober.io.wb_req.bits
TLArbiter.lowestFromSeq(edge, bus.c, Seq(prober.io.rep, wb.io.release))
// synchronization stuff
......
......@@ -224,7 +224,7 @@ class ICacheImp(outer: ICache) extends ICacheModule(outer)
val metas = metaArray.io.r.resp.asTypeOf(Vec(nWays,new ICacheMetaBundle))
val datas =dataArray.map(b => RegEnable(next=b.io.r.resp.asTypeOf(Vec(nWays,new ICacheDataBundle)), enable=s2_fire))
val validMeta = Cat((0 until nWays).map{w => validArray(Cat(s2_idx, w.U))}.reverse).asUInt
val validMeta = Cat((0 until nWays).map{w => validArray(Cat(s2_idx, w.U(2.W)))}.reverse).asUInt
// hit check and generate victim cacheline mask
val hitVec = VecInit((0 until nWays).map{w => metas(w).tag === s2_tag && validMeta(w) === 1.U})
......@@ -254,14 +254,11 @@ class ICacheImp(outer: ICache) extends ICacheModule(outer)
val s3_hit = RegEnable(next=s2_hit,init=false.B,enable=s2_fire)
val s3_wayMask = RegEnable(next=waymask,init=0.U,enable=s2_fire)
val s3_miss = s3_valid && !s3_hit
val s3_mmio = s3_valid && AddressSpace.isMMIO(s3_tlb_resp.paddr)
when(io.flush(1)) { s3_valid := false.B }
.elsewhen(s2_fire) { s3_valid := s2_valid }
.elsewhen(io.resp.fire()) { s3_valid := false.B }
val refillDataReg = Reg(Vec(refillCycles,UInt(beatBits.W)))
assert(!(s3_hit && s3_mmio), "MMIO address should not hit in ICache!")
// icache hit
// simply cut the hit cacheline
val dataHitWay = s3_data.map(b => Mux1H(s3_wayMask,b).asUInt)
......@@ -269,15 +266,10 @@ class ICacheImp(outer: ICache) extends ICacheModule(outer)
outPacket := cutHelper(VecInit(dataHitWay),s3_req_pc(5,1).asUInt,s3_req_mask.asUInt)
//icache miss
val s_idle :: s_mmioReq :: s_mmioResp :: s_memReadReq :: s_memReadResp :: s_wait_resp :: Nil = Enum(6)
val s_idle :: s_memReadReq :: s_memReadResp :: s_wait_resp :: Nil = Enum(4)
val state = RegInit(s_idle)
val readBeatCnt = Counter(refillCycles)
//uncache request
val mmioBeatCnt = Counter(blockWords)
val mmioAddrReg = RegInit(0.U(PAddrBits.W))
val mmioReg = Reg(Vec(blockWords/2, UInt(blockWords.W)))
//pipeline flush register
val needFlush = RegInit(false.B)
when(io.flush(1) && (state =/= s_idle) && (state =/= s_wait_resp)){ needFlush := true.B }
......@@ -295,35 +287,14 @@ class ICacheImp(outer: ICache) extends ICacheModule(outer)
// state change to wait for a cacheline refill
val countFull = readBeatCnt.value === (refillCycles - 1).U
val mmioCntFull = mmioBeatCnt.value === (blockWords - 1).U
switch(state){
is(s_idle){
when(s3_mmio && io.flush === 0.U){
state := s_mmioReq
mmioBeatCnt.value := 0.U
mmioAddrReg := s3_tlb_resp.paddr
} .elsewhen(s3_miss && io.flush === 0.U){
when(s3_miss && io.flush === 0.U){
state := s_memReadReq
readBeatCnt.value := 0.U
}
}
//mmio request
is(s_mmioReq){
when(bus.a.fire()){
state := s_mmioResp
mmioAddrReg := mmioAddrReg + 8.U //consider MMIO response 64 bits valid data
}
}
is(s_mmioResp){
when (edge.hasData(bus.d.bits) && bus.d.fire()) {
mmioBeatCnt.inc()
assert(refill_done, "MMIO response should be one beat only!")
mmioReg(mmioBeatCnt.value) := bus.d.bits.data(wordBits-1,0)
state := Mux(mmioCntFull,s_wait_resp,s_mmioReq)
}
}
// memory request
is(s_memReadReq){
......@@ -353,9 +324,9 @@ class ICacheImp(outer: ICache) extends ICacheModule(outer)
//refill write
val metaWrite = Wire(new ICacheMetaBundle)
val refillFinalOneBeat = (state === s_memReadResp) && bus.d.fire() && refill_done
val wayNum = OHToUInt(waymask)
val wayNum = OHToUInt(s3_wayMask.asTypeOf(Vec(nWays,Bool())))
val validPtr = Cat(get_idx(s3_req_pc),wayNum)
metaWrite.tag := get_tag(s3_req_pc)
metaWrite.tag := s3_tag
metaArray.io.w.req.valid := refillFinalOneBeat
metaArray.io.w.req.bits.apply(data=metaWrite, setIdx=get_idx(s3_req_pc), waymask=s3_wayMask)
......@@ -445,16 +416,12 @@ class ICacheImp(outer: ICache) extends ICacheModule(outer)
bus.b.ready := true.B
bus.c.valid := false.B
bus.e.valid := false.B
bus.a.valid := (state === s_memReadReq) || (state === s_mmioReq)
bus.a.valid := (state === s_memReadReq)
val memTileReq = edge.Get(
fromSource = cacheID.U,
toAddress = groupPC(s3_tlb_resp.paddr),
lgSize = (log2Up(cacheParams.blockBytes)).U )._2
val mmioTileReq = edge.Get(
fromSource = cacheID.U,
toAddress = mmioAddrReg,
lgSize = (log2Up(wordBits)).U )._2
bus.a.bits := Mux((state === s_mmioReq),mmioTileReq, memTileReq)
bus.a.bits := memTileReq
bus.d.ready := true.B
XSDebug("[flush] flush_0:%d flush_1:%d\n",io.flush(0),io.flush(1))
......
......@@ -8,7 +8,7 @@ import utils.XSDebug
class LoadPipe extends DCacheModule
{
val io = IO(new DCacheBundle{
val lsu = Flipped(new DCacheWordIO)
val lsu = Flipped(new DCacheLoadIO)
val data_read = DecoupledIO(new L1DataReadReq)
val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W))))
val meta_read = DecoupledIO(new L1MetaReadReq)
......@@ -56,7 +56,8 @@ class LoadPipe extends DCacheModule
// stage 1
val s1_req = RegNext(s0_req)
val s1_valid = RegNext(s0_valid, init = false.B)
val s1_addr = s1_req.addr
// in stage 1, load unit gets the physical address
val s1_addr = io.lsu.s1_paddr
val s1_nack = RegNext(io.nack)
dump_pipeline_reqs("LoadPipe s1", s1_valid, s1_req)
......@@ -76,6 +77,7 @@ class LoadPipe extends DCacheModule
dump_pipeline_reqs("LoadPipe s2", s2_valid, s2_req)
val s2_addr = RegNext(s1_addr)
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = s2_tag_match_way.orR
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegNext(meta_resp(w).coh)))
......@@ -96,12 +98,12 @@ class LoadPipe extends DCacheModule
val s2_data = Wire(Vec(nWays, UInt(encRowBits.W)))
val data_resp = io.data_resp
for (w <- 0 until nWays) {
s2_data(w) := data_resp(w)(get_row(s2_req.addr))
s2_data(w) := data_resp(w)(get_row(s2_addr))
}
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
// the index of word in a row, in case rowBits != wordBits
val s2_word_idx = if (rowWords == 1) 0.U else s2_req.addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
val s2_word_idx = if (rowWords == 1) 0.U else s2_addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
val s2_nack_hit = RegNext(s1_nack)
// Can't allocate MSHR for same set currently being written back
......
......@@ -60,6 +60,8 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
// watch prober's write back requests
val probe_wb_req = Flipped(ValidIO(new WritebackReq(edge.bundle.sourceBits)))
val probe_active = Flipped(ValidIO(UInt()))
})
// MSHR:
......@@ -70,7 +72,7 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
// 5. wait for client's finish
// 6. update meta data
// 7. done
val s_invalid :: s_meta_read_req :: s_meta_read_resp :: s_decide_next_state :: s_wb_req :: s_wb_resp :: s_refill_req :: s_refill_resp :: s_data_write_req :: s_mem_finish :: s_send_resp :: s_client_finish :: s_meta_write_req :: Nil = Enum(13)
val s_invalid :: s_meta_read_req :: s_meta_read_resp :: s_decide_next_state :: s_refill_req :: s_refill_resp :: s_mem_finish :: s_wait_probe_exit :: s_send_resp :: s_wb_req :: s_wb_resp :: s_data_write_req :: s_meta_write_req :: s_client_finish :: Nil = Enum(14)
val state = RegInit(s_invalid)
......@@ -332,7 +334,14 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
when (io.mem_finish.fire()) {
grantack.valid := false.B
state := s_wait_probe_exit
}
}
when (state === s_wait_probe_exit) {
// we only wait for probe, when prober is manipulating our set
val should_wait_for_probe_exit = io.probe_active.valid && io.probe_active.bits === req_idx
when (!should_wait_for_probe_exit) {
// no data
when (early_response) {
// load miss respond right after finishing tilelink transactions
......@@ -359,10 +368,12 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
}
}
// during refill, probe may step in, it may release our blocks
// if it releases the block we are trying to acquire, we don't care, since we will get it back eventually
// but we need to know whether it releases the block we are trying to evict
val prober_writeback_our_block = (state === s_refill_req || state === s_refill_resp) &&
val prober_writeback_our_block = (state === s_refill_req || state === s_refill_resp ||
state === s_mem_finish || state === s_wait_probe_exit || state === s_send_resp || state === s_wb_req) &&
io.probe_wb_req.valid && !io.probe_wb_req.bits.voluntary &&
io.probe_wb_req.bits.tag === req_old_meta.tag &&
io.probe_wb_req.bits.idx === req_idx &&
......@@ -475,6 +486,7 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
val wb_resp = Input(Bool())
val probe_wb_req = Flipped(ValidIO(new WritebackReq(edge.bundle.sourceBits)))
val probe_active = Flipped(ValidIO(UInt()))
val inflight_req_idxes = Output(Vec(cfg.nMissEntries, Valid(UInt())))
val inflight_req_block_addrs = Output(Vec(cfg.nMissEntries, Valid(UInt())))
......@@ -527,6 +539,7 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
wb_req_arb.io.in(i) <> entry.io.wb_req
entry.io.wb_resp := io.wb_resp
entry.io.probe_wb_req <> io.probe_wb_req
entry.io.probe_active <> io.probe_active
entry.io.mem_grant.valid := false.B
entry.io.mem_grant.bits := DontCare
......
......@@ -19,6 +19,7 @@ class ProbeUnit(edge: TLEdgeOut) extends DCacheModule with HasTLDump {
val block = Input(Bool())
val inflight_req_idx = Output(Valid(UInt()))
val inflight_req_block_addr = Output(Valid(UInt()))
val probe_active = Output(Valid(UInt()))
})
val s_invalid :: s_wait_sync :: s_meta_read_req :: s_meta_read_resp :: s_decide_next_state :: s_release :: s_wb_req :: s_wb_resp :: s_meta_write_req :: Nil = Enum(9)
......@@ -54,6 +55,11 @@ class ProbeUnit(edge: TLEdgeOut) extends DCacheModule with HasTLDump {
io.inflight_req_block_addr.valid := state =/= s_invalid
io.inflight_req_block_addr.bits := req_block_addr
// active means nobody is blocking it anymore
// it will run free
io.probe_active.valid := state =/= s_invalid && state =/= s_wait_sync
io.probe_active.bits := req_idx
XSDebug("state: %d\n", state)
when (state === s_invalid) {
......
......@@ -30,6 +30,7 @@ class WritebackUnit(edge: TLEdgeOut) extends DCacheModule {
val req = Reg(new WritebackReq(edge.bundle.sourceBits))
val s_invalid :: s_data_read_req :: s_data_read_resp :: s_active :: s_grant :: s_resp :: Nil = Enum(6)
val state = RegInit(s_invalid)
val should_writeback_data = Reg(Bool())
val data_req_cnt = RegInit(0.U(log2Up(refillCycles+1).W))
......@@ -58,11 +59,19 @@ class WritebackUnit(edge: TLEdgeOut) extends DCacheModule {
when (io.req.fire()) {
// for report types: TtoT, BtoB, NtoN, we do nothing
import freechips.rocketchip.tilelink.TLPermissions._
def is_dirty(x: UInt) = x <= TtoN
def do_nothing(x: UInt) = x > BtoN
when (do_nothing(io.req.bits.param)) {
should_writeback_data := false.B
state := s_resp
} .otherwise {
state := s_data_read_req
when (is_dirty(io.req.bits.param)) {
state := s_data_read_req
should_writeback_data := true.B
} .otherwise {
state := s_active
should_writeback_data := false.B
}
data_req_cnt := 0.U
req := io.req.bits
acked := false.B
......@@ -115,6 +124,13 @@ class WritebackUnit(edge: TLEdgeOut) extends DCacheModule {
val id = cfg.nMissEntries
val probeResponse = edge.ProbeAck(
fromSource = id.U,
toAddress = r_address,
lgSize = log2Ceil(cfg.blockBytes).U,
reportPermissions = req.param
)
val probeResponseData = edge.ProbeAck(
fromSource = id.U,
toAddress = r_address,
lgSize = log2Ceil(cfg.blockBytes).U,
......@@ -123,6 +139,13 @@ class WritebackUnit(edge: TLEdgeOut) extends DCacheModule {
)
val voluntaryRelease = edge.Release(
fromSource = id.U,
toAddress = r_address,
lgSize = log2Ceil(cfg.blockBytes).U,
shrinkPermissions = req.param
)._2
val voluntaryReleaseData = edge.Release(
fromSource = id.U,
toAddress = r_address,
lgSize = log2Ceil(cfg.blockBytes).U,
......@@ -132,7 +155,9 @@ class WritebackUnit(edge: TLEdgeOut) extends DCacheModule {
when (state === s_active) {
io.release.valid := data_req_cnt < refillCycles.U
io.release.bits := Mux(req.voluntary, voluntaryRelease, probeResponse)
io.release.bits := Mux(req.voluntary,
Mux(should_writeback_data, voluntaryReleaseData, voluntaryRelease),
Mux(should_writeback_data, probeResponseData, probeResponse))
when (io.mem_grant) {
acked := true.B
......@@ -141,7 +166,9 @@ class WritebackUnit(edge: TLEdgeOut) extends DCacheModule {
when (io.release.fire()) {
data_req_cnt := data_req_cnt + 1.U
when (data_req_cnt === (refillCycles-1).U) {
val last_beat = Mux(should_writeback_data, data_req_cnt === (refillCycles-1).U, true.B)
when (last_beat) {
state := Mux(req.voluntary, s_grant, s_resp)
}
}
......
......@@ -169,8 +169,8 @@ class IFU extends XSModule with HasIFUConst
// the previous half of RVI instruction waits until it meets its last half
val if3_hasPrevHalfInstr = prevHalfInstr.valid && (prevHalfInstr.pc + 2.U) === if3_pc
// set to invalid once consumed
val prevHalfConsumed = if3_hasPrevHalfInstr && if3_fire
// set to invalid once consumed or redirect from backend
val prevHalfConsumed = if3_hasPrevHalfInstr && if3_fire || if4_flush
when (prevHalfConsumed) {
if3_prevHalfInstr.valid := false.B
}
......
......@@ -282,12 +282,12 @@ class Lsroq extends XSModule with HasDCacheParameters {
val loadWbSelVec = VecInit((0 until LsroqSize).map(i => {
allocated(i) && valid(i) && !writebacked(i) && !store(i)
})).asUInt() // use uint instead vec to reduce verilog lines
val loadWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LsroqSize).W)))
val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LsroqSize).W)))
val lselvec0 = PriorityEncoderOH(loadWbSelVec)
val lselvec1 = PriorityEncoderOH(loadWbSelVec & (~lselvec0).asUInt)
loadWbSel(0) := OHToUInt(lselvec0)
loadWbSel(1) := OHToUInt(lselvec1)
(0 until StorePipelineWidth).map(i => {
(0 until LoadPipelineWidth).map(i => {
// data select
val rdata = data(loadWbSel(i)).data
val func = uop(loadWbSel(i)).ctrl.fuOpType
......
......@@ -176,7 +176,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
resp_data := LookupTree(in.uop.ctrl.fuOpType, List(
LSUOpType.lr_w -> SignExt(rdataSel(31, 0), XLEN),
LSUOpType.sc_w -> SignExt(rdataSel(31, 0), XLEN),
LSUOpType.sc_w -> rdata,
LSUOpType.amoswap_w -> SignExt(rdataSel(31, 0), XLEN),
LSUOpType.amoadd_w -> SignExt(rdataSel(31, 0), XLEN),
LSUOpType.amoxor_w -> SignExt(rdataSel(31, 0), XLEN),
......@@ -188,7 +188,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
LSUOpType.amomaxu_w -> SignExt(rdataSel(31, 0), XLEN),
LSUOpType.lr_d -> SignExt(rdataSel(63, 0), XLEN),
LSUOpType.sc_d -> SignExt(rdataSel(63, 0), XLEN),
LSUOpType.sc_d -> rdata,
LSUOpType.amoswap_d -> SignExt(rdataSel(63, 0), XLEN),
LSUOpType.amoadd_d -> SignExt(rdataSel(63, 0), XLEN),
LSUOpType.amoxor_d -> SignExt(rdataSel(63, 0), XLEN),
......
......@@ -18,6 +18,8 @@ void (*ref_difftest_getregs)(void *c) = NULL;
void (*ref_difftest_setregs)(const void *c) = NULL;
void (*ref_difftest_get_mastatus)(void *s) = NULL;
void (*ref_difftest_set_mastatus)(const void *s) = NULL;
void (*ref_difftest_get_csr)(void *c) = NULL;
void (*ref_difftest_set_csr)(const void *c) = NULL;
vaddr_t (*ref_disambiguate_exec)(void *disambiguate_para) = NULL;
static void (*ref_difftest_exec)(uint64_t n) = NULL;
static void (*ref_difftest_raise_intr)(uint64_t NO) = NULL;
......@@ -66,6 +68,12 @@ void init_difftest() {
ref_difftest_set_mastatus = (void (*)(const void *))dlsym(handle, "difftest_set_mastatus");
assert(ref_difftest_set_mastatus);
ref_difftest_get_csr = (void (*)(void *))dlsym(handle, "difftest_get_csr");
assert(ref_difftest_get_csr);
ref_difftest_set_csr = (void (*)(const void *))dlsym(handle, "difftest_set_csr");
assert(ref_difftest_set_csr);
ref_disambiguate_exec = (vaddr_t (*)(void *))dlsym(handle, "disambiguate_exec");
assert(ref_disambiguate_exec);
......@@ -158,6 +166,7 @@ int difftest_step(DiffState *s) {
if(s->sync.scFailed){
struct SyncState sync;
sync.lrscValid = 0;
sync.lrscAddr = 0;
ref_difftest_set_mastatus((uint64_t*)&sync); // sync lr/sc microarchitectural regs
}
......
......@@ -44,6 +44,7 @@ struct SyncChannel {
struct SyncState {
uint64_t lrscValid;
uint64_t lrscAddr;
};
struct DiffState {
......@@ -78,10 +79,10 @@ extern void (*ref_difftest_memcpy_from_dut)(paddr_t dest, void *src, size_t n);
extern void (*ref_difftest_memcpy_from_ref)(void *dest, paddr_t src, size_t n);
extern void (*ref_difftest_getregs)(void *c);
extern void (*ref_difftest_setregs)(const void *c);
extern void (*ref_difftest_getregs)(void *c);
extern void (*ref_difftest_setregs)(const void *c);
extern void (*ref_difftest_get_mastatus)(void *s);
extern void (*ref_difftest_set_mastatus)(const void *s);
extern void (*ref_difftest_get_csr)(void *c);
extern void (*ref_difftest_set_csr)(const void *c);
extern vaddr_t (*ref_disambiguate_exec)(void *disambiguate_para);
void init_difftest();
......
#include "emu.h"
#include "sdcard.h"
#include "difftest.h"
#include <getopt.h>
......@@ -373,6 +374,17 @@ void Emulator::snapshot_save(const char *filename) {
ref_difftest_get_mastatus(&sync_mastate);
stream.unbuf_write(&sync_mastate, sizeof(struct SyncState));
uint64_t csr_buf[4096];
ref_difftest_get_csr(csr_buf);
stream.unbuf_write(&csr_buf, sizeof(csr_buf));
long sdcard_offset;
if(fp)
sdcard_offset = ftell(fp);
else
sdcard_offset = 0;
stream.unbuf_write(&sdcard_offset, sizeof(sdcard_offset));
// actually write to file in snapshot_finalize()
}
......@@ -402,4 +414,13 @@ void Emulator::snapshot_load(const char *filename) {
struct SyncState sync_mastate;
stream.read(&sync_mastate, sizeof(struct SyncState));
ref_difftest_set_mastatus(&sync_mastate);
uint64_t csr_buf[4096];
stream.read(&csr_buf, sizeof(csr_buf));
ref_difftest_set_csr(csr_buf);
long sdcard_offset = 0;
stream.read(&sdcard_offset, sizeof(sdcard_offset));
if(fp)
fseek(fp, sdcard_offset, SEEK_SET);
}
#include "common.h"
#include "sdcard.h"
extern "C" {
FILE *fp = NULL;
static FILE *fp = NULL;
extern "C" {
void sd_setaddr(uint32_t addr) {
fseek(fp, addr, SEEK_SET);
......
#ifndef __SDCARD_H
#define __SDCARD_H
#include "common.h"
extern FILE *fp;
#endif // __SDCARD_H
......@@ -40,9 +40,9 @@ uint8_t uart_getc() {
eprintf(ANSI_COLOR_RED "now = %ds\n" ANSI_COLOR_RESET, now / 1000);
lasttime = now;
}
if (now > 4 * 3600 * 1000) { // 4 hours
ch = uart_dequeue();
}
// if (now > 4 * 3600 * 1000) { // 4 hours
// ch = uart_dequeue();
// }
return ch;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册