提交 8b2adfb7 编写于 作者: L Lingrui98

Merge remote-tracking branch 'origin/master' into ftq

Subproject commit ca387163b32f20406d443bdab34bc034d5281b51
Subproject commit 54a97b8b9325921ea7cdaa45db7519d9a3666da5
......@@ -100,6 +100,12 @@ SUITE = cache.L2CacheTest
unit-test:
cd .. && mill XiangShan.test.testOnly -o -s $(SUITE)
tlc-test:
cd .. && mill XiangShan.test.testOnly -o -s cache.TLCTest.TLCCacheTest
l1-test:
cd .. && mill XiangShan.test.testOnly -o -s cache.L1DTest.L1DCacheTest
unit-test-all:
cd .. && mill XiangShan.test.test -P$(P)
......
......@@ -58,6 +58,8 @@ object OneHot {
def OH1ToUInt(x: UInt): UInt = OHToUInt(OH1ToOH(x))
def UIntToOH1(x: UInt, width: Int): UInt = ~((-1).S(width.W).asUInt << x)(width-1, 0)
def UIntToOH1(x: UInt): UInt = UIntToOH1(x, (1 << x.getWidth) - 1)
def checkOneHot(in: Bits): Unit = assert(PopCount(in) <= 1.U)
def checkOneHot(in: Iterable[Bool]): Unit = assert(PopCount(in) <= 1.U)
}
object LowerMask {
......
......@@ -26,7 +26,7 @@ class DebugIdentityNode()(implicit p: Parameters) extends LazyModule {
val channels = Seq(t.a, t.b, t.c, t.d, t.e)
channels.foreach(c =>
when(fire(c)){
XSDebug(" ")
XSDebug(" isFire:%d ",c.fire())
c.bits.dump
}
)
......
......@@ -20,8 +20,7 @@ object XSLogLevel extends Enumeration {
object XSLog {
val MagicStr = "9527"
def apply(debugLevel: XSLogLevel)
(prefix: Boolean, cond: Bool, pable: Printable)
(implicit name: String): Any =
(prefix: Boolean, cond: Bool, pable: Printable): Any =
{
val logEnable = WireInit(false.B)
val logTimestamp = WireInit(0.U(64.W))
......@@ -53,15 +52,15 @@ object XSLog {
sealed abstract class LogHelper(val logLevel: XSLogLevel) extends HasXSParameter {
def apply(cond: Bool, fmt: String, data: Bits*)(implicit name: String): Any =
def apply(cond: Bool, fmt: String, data: Bits*): Any =
apply(cond, Printable.pack(fmt, data:_*))
def apply(cond: Bool, pable: Printable)(implicit name: String): Any = apply(true, cond, pable)
def apply(fmt: String, data: Bits*)(implicit name: String): Any =
def apply(cond: Bool, pable: Printable): Any = apply(true, cond, pable)
def apply(fmt: String, data: Bits*): Any =
apply(Printable.pack(fmt, data:_*))
def apply(pable: Printable)(implicit name: String): Any = apply(true.B, pable)
def apply(prefix: Boolean, cond: Bool, fmt: String, data: Bits*)(implicit name: String): Any =
def apply(pable: Printable): Any = apply(true.B, pable)
def apply(prefix: Boolean, cond: Bool, fmt: String, data: Bits*): Any =
apply(prefix, cond, Printable.pack(fmt, data:_*))
def apply(prefix: Boolean, cond: Bool, pable: Printable)(implicit name: String): Any =
def apply(prefix: Boolean, cond: Bool, pable: Printable): Any =
XSLog(logLevel)(prefix, cond, pable)
// trigger log or not
......@@ -70,7 +69,7 @@ sealed abstract class LogHelper(val logLevel: XSLogLevel) extends HasXSParameter
XSLog.displayLog
}
def printPrefix()(implicit name: String): Unit = {
def printPrefix(): Unit = {
val commonInfo = p"[$logLevel][time=${GTimer()}] ${XSLog.MagicStr}: "
when (trigger) {
printf(commonInfo)
......@@ -78,7 +77,7 @@ sealed abstract class LogHelper(val logLevel: XSLogLevel) extends HasXSParameter
}
// dump under with certain prefix
def exec(dump: () => Unit)(implicit name: String): Unit = {
def exec(dump: () => Unit): Unit = {
when (trigger) {
printPrefix
dump
......@@ -86,7 +85,7 @@ sealed abstract class LogHelper(val logLevel: XSLogLevel) extends HasXSParameter
}
// dump under certain condition and with certain prefix
def exec(cond: Bool, dump: () => Unit)(implicit name: String): Unit = {
def exec(cond: Bool, dump: () => Unit): Unit = {
when (trigger && cond) {
printPrefix
dump
......
......@@ -388,6 +388,8 @@ class TlbFeedback extends XSBundle {
val hit = Bool()
}
class RSFeedback extends TlbFeedback
class FrontendToBackendIO extends XSBundle {
// to backend end
val cfVec = Vec(DecodeWidth, DecoupledIO(new CtrlFlow))
......
......@@ -208,8 +208,9 @@ trait HasXSParameter {
tagECC = Some("secded"),
dataECC = Some("secded"),
nMissEntries = 16,
nLoadMissEntries = 8,
nStoreMissEntries = 8
nProbeEntries = 16,
nReleaseEntries = 16,
nStoreReplayEntries = 16
)
val LRSCCycles = 100
......
......@@ -6,7 +6,7 @@ import xiangshan._
import utils._
import xiangshan.backend.regfile.Regfile
import xiangshan.backend.exu._
import xiangshan.backend.issue.{ReservationStationCtrl, ReservationStationData}
import xiangshan.backend.issue.{ReservationStation}
class FpBlockToCtrlIO extends XSBundle {
......@@ -71,68 +71,63 @@ class FloatBlock
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency && readFpRf).map(_.io.toFp.bits.data)
val writeBackData = inBlockWbData ++ io.wakeUpIn.fast.map(_.bits.data)
val wakeupCnt = writeBackData.length
val fastPortsCnt = writeBackData.length
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency && readFpRf).map(_.io.toFp)
val extraListenPorts = inBlockListenPorts ++ io.wakeUpIn.slow
val extraListenPortsCnt = extraListenPorts.length
val slowPorts = inBlockListenPorts ++ io.wakeUpIn.slow
val slowPortsCnt = slowPorts.length
println(s"${i}: exu:${cfg.name} wakeupCnt: ${wakeupCnt} " +
s"extraListenPorts: ${extraListenPortsCnt} " +
println(s"${i}: exu:${cfg.name} fastPortsCnt: ${fastPortsCnt} " +
s"slowPorts: ${slowPortsCnt} " +
s"delay:${certainLatency}"
)
val rsCtrl = Module(new ReservationStationCtrl(cfg, wakeupCnt, extraListenPortsCnt, fixedDelay = certainLatency, feedback = false))
val rsData = Module(new ReservationStationData(cfg, wakeupCnt, extraListenPortsCnt, fixedDelay = certainLatency, feedback = false))
val rs = Module(new ReservationStation(cfg, fastPortsCnt, slowPortsCnt, fixedDelay = certainLatency, fastWakeup = certainLatency >= 0, feedback = false))
rsCtrl.io.data <> rsData.io.ctrl
rsCtrl.io.redirect <> redirect // TODO: remove it
rsCtrl.io.flush <> flush // TODO: remove it
rsCtrl.io.numExist <> io.toCtrlBlock.numExist(i)
rsCtrl.io.enqCtrl <> io.fromCtrlBlock.enqIqCtrl(i)
rs.io.redirect <> redirect // TODO: remove it
rs.io.flush <> flush // TODO: remove it
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.fromDispatch <> io.fromCtrlBlock.enqIqCtrl(i)
rsData.io.srcRegValue := DontCare
rs.io.srcRegValue := DontCare
val src1Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3).data))
val src2Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3 + 1).data))
val src3Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3 + 2).data))
rsData.io.srcRegValue(0) := src1Value(readPortIndex(i))
rsData.io.srcRegValue(1) := src2Value(readPortIndex(i))
if (cfg.fpSrcCnt > 2) rsData.io.srcRegValue(2) := src3Value(readPortIndex(i))
rsData.io.redirect <> redirect
rsData.io.flush <> flush
rsData.io.writeBackedData <> writeBackData
for ((x, y) <- rsData.io.extraListenPorts.zip(extraListenPorts)) {
rs.io.srcRegValue(0) := src1Value(readPortIndex(i))
rs.io.srcRegValue(1) := src2Value(readPortIndex(i))
if (cfg.fpSrcCnt > 2) rs.io.srcRegValue(2) := src3Value(readPortIndex(i))
rs.io.fastDatas <> writeBackData
for ((x, y) <- rs.io.slowPorts.zip(slowPorts)) {
x.valid := y.fire()
x.bits := y.bits
}
exeUnits(i).io.redirect <> redirect
exeUnits(i).io.flush <> flush
exeUnits(i).io.fromFp <> rsData.io.deq
rsData.io.feedback := DontCare
exeUnits(i).io.fromFp <> rs.io.deq
rs.io.feedback := DontCare
rsCtrl.suggestName(s"rsc_${cfg.name}")
rsData.suggestName(s"rsd_${cfg.name}")
rs.suggestName(s"rs_${cfg.name}")
rsData
rs
})
for(rs <- reservedStations){
val inBlockUops = reservedStations.filter(x =>
x.exuCfg.hasCertainLatency && x.exuCfg.writeFpRf
).map(x => {
val raw = WireInit(x.io.selectedUop)
raw.valid := x.io.selectedUop.valid && raw.bits.ctrl.fpWen
val raw = WireInit(x.io.fastUopOut)
raw.valid := x.io.fastUopOut.valid && raw.bits.ctrl.fpWen
raw
})
rs.io.broadcastedUops <> inBlockUops ++ io.wakeUpIn.fastUops
rs.io.fastUopsIn <> inBlockUops ++ io.wakeUpIn.fastUops
}
io.wakeUpFpOut.fastUops <> reservedStations.filter(
rs => fpFastFilter(rs.exuCfg)
).map(_.io.selectedUop).map(fpValid)
).map(_.io.fastUopOut).map(fpValid)
io.wakeUpFpOut.fast <> exeUnits.filter(
x => fpFastFilter(x.config)
......@@ -144,7 +139,7 @@ class FloatBlock
io.wakeUpIntOut.fastUops <> reservedStations.filter(
rs => intFastFilter(rs.exuCfg)
).map(_.io.selectedUop).map(intValid)
).map(_.io.fastUopOut).map(intValid)
io.wakeUpIntOut.fast <> exeUnits.filter(
x => intFastFilter(x.config)
......
......@@ -6,7 +6,7 @@ import xiangshan._
import xiangshan.backend.exu.Exu.{ldExeUnitCfg, stExeUnitCfg}
import xiangshan.backend.exu._
import xiangshan.backend.fu.FenceToSbuffer
import xiangshan.backend.issue.{ReservationStationCtrl, ReservationStationData}
import xiangshan.backend.issue.{ReservationStation}
import xiangshan.backend.regfile.Regfile
import xiangshan.backend.roq.RoqExceptionInfo
......@@ -151,72 +151,64 @@ class IntegerBlock
val readIntRf = cfg.readIntRf
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency && readIntRf).map(_.io.toInt.bits.data)
val writeBackData = inBlockWbData ++ io.wakeUpIn.fast.map(_.bits.data)
val wakeupCnt = writeBackData.length
val fastDatas = inBlockWbData ++ io.wakeUpIn.fast.map(_.bits.data)
val wakeupCnt = fastDatas.length
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency && readIntRf).map(_.io.toInt)
val extraListenPorts = inBlockListenPorts ++ io.wakeUpIn.slow
val extraListenPortsCnt = extraListenPorts.length
val slowPorts = inBlockListenPorts ++ io.wakeUpIn.slow
val extraListenPortsCnt = slowPorts.length
val feedback = (cfg == ldExeUnitCfg) || (cfg == stExeUnitCfg)
println(s"${i}: exu:${cfg.name} wakeupCnt: ${wakeupCnt} extraListenPorts: ${extraListenPortsCnt} delay:${certainLatency} feedback:${feedback}")
println(s"${i}: exu:${cfg.name} wakeupCnt: ${wakeupCnt} slowPorts: ${extraListenPortsCnt} delay:${certainLatency} feedback:${feedback}")
// val rs = Module(new ReservationStationNew(
// cfg, wakeupCnt, extraListenPortsCnt, fixedDelay = certainLatency, feedback = feedback
// ))
val rsCtrl = Module(new ReservationStationCtrl(cfg, wakeupCnt, extraListenPortsCnt, fixedDelay = certainLatency, feedback = feedback))
val rsData = Module(new ReservationStationData(cfg, wakeupCnt, extraListenPortsCnt, fixedDelay = certainLatency, feedback = feedback))
val rs = Module(new ReservationStation(cfg, wakeupCnt, extraListenPortsCnt, fixedDelay = certainLatency, fastWakeup = certainLatency >= 0, feedback = feedback))
rsCtrl.io.data <> rsData.io.ctrl
rsCtrl.io.redirect <> redirect // TODO: remove it
rsCtrl.io.flush <> flush // TODO: remove it
rsCtrl.io.numExist <> io.toCtrlBlock.numExist(i)
rsCtrl.io.enqCtrl <> io.fromCtrlBlock.enqIqCtrl(i)
rs.io.redirect <> redirect
rs.io.flush <> flush // TODO: remove it
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.fromDispatch <> io.fromCtrlBlock.enqIqCtrl(i)
rsData.io.srcRegValue := DontCare
rs.io.srcRegValue := DontCare
val src1Value = VecInit((0 until 4).map(i => intRf.io.readPorts(i * 2).data))
val src2Value = VecInit((0 until 4).map(i => intRf.io.readPorts(i * 2 + 1).data))
rsData.io.srcRegValue(0) := src1Value(readPortIndex(i))
if (cfg.intSrcCnt > 1) rsData.io.srcRegValue(1) := src2Value(readPortIndex(i))
rs.io.srcRegValue(0) := src1Value(readPortIndex(i))
if (cfg.intSrcCnt > 1) rs.io.srcRegValue(1) := src2Value(readPortIndex(i))
if (cfg == Exu.jumpExeUnitCfg) {
rsData.io.jumpPc := io.fromCtrlBlock.jumpPc
rsData.io.jalr_target := io.fromCtrlBlock.jalr_target
rs.io.jumpPc := io.fromCtrlBlock.jumpPc
rs.io.jalr_target := io.fromCtrlBlock.jalr_target
}
rsData.io.redirect <> redirect
rsData.io.flush <> flush
rsData.io.writeBackedData <> writeBackData
for ((x, y) <- rsData.io.extraListenPorts.zip(extraListenPorts)) {
rs.io.fastDatas <> fastDatas
for ((x, y) <- rs.io.slowPorts.zip(slowPorts)) {
x.valid := y.fire()
x.bits := y.bits
}
exeUnits(i).io.redirect <> redirect
exeUnits(i).io.fromInt <> rs.io.deq
exeUnits(i).io.flush <> flush
exeUnits(i).io.fromInt <> rsData.io.deq
rsData.io.feedback := DontCare
rs.io.feedback := DontCare
rsCtrl.suggestName(s"rsc_${cfg.name}")
rsData.suggestName(s"rsd_${cfg.name}")
rs.suggestName(s"rs_${cfg.name}")
rsData
rs
})
for(rs <- reservationStations){
val inBlockUops = reservationStations.filter(x =>
x.exuCfg.hasCertainLatency && x.exuCfg.writeIntRf
).map(x => {
val raw = WireInit(x.io.selectedUop)
raw.valid := x.io.selectedUop.valid && raw.bits.ctrl.rfWen
val raw = WireInit(x.io.fastUopOut)
raw.valid := x.io.fastUopOut.valid && raw.bits.ctrl.rfWen
raw
})
rs.io.broadcastedUops <> inBlockUops ++ io.wakeUpIn.fastUops
rs.io.fastUopsIn <> inBlockUops ++ io.wakeUpIn.fastUops
}
io.wakeUpFpOut.fastUops <> reservationStations.filter(
rs => fpFastFilter(rs.exuCfg)
).map(_.io.selectedUop).map(fpValid)
).map(_.io.fastUopOut).map(fpValid)
io.wakeUpFpOut.fast <> exeUnits.filter(
x => fpFastFilter(x.config)
......@@ -228,7 +220,7 @@ class IntegerBlock
io.wakeUpIntOut.fastUops <> reservationStations.filter(
rs => intFastFilter(rs.exuCfg)
).map(_.io.selectedUop).map(intValid)
).map(_.io.fastUopOut).map(intValid)
io.wakeUpIntOut.fast <> exeUnits.filter(
x => intFastFilter(x.config)
......
......@@ -12,7 +12,7 @@ import xiangshan.backend.exu._
import xiangshan.cache._
import xiangshan.mem._
import xiangshan.backend.fu.{HasExceptionNO, FenceToSbuffer}
import xiangshan.backend.issue.{ReservationStationCtrl, ReservationStationData}
import xiangshan.backend.issue.{ReservationStation}
import xiangshan.backend.regfile.RfReadPort
class LsBlockToCtrlIO extends XSBundle {
......@@ -132,60 +132,55 @@ class MemBlockImp
val readFpRf = cfg.readFpRf
// load has uncertain latency, so only use external wake up data
val writeBackData = fastWakeUpIn.zip(io.wakeUpIn.fast)
val fastDatas = fastWakeUpIn.zip(io.wakeUpIn.fast)
.filter(x => (x._1.writeIntRf && readIntRf) || (x._1.writeFpRf && readFpRf))
.map(_._2.bits.data)
val wakeupCnt = writeBackData.length
val wakeupCnt = fastDatas.length
val inBlockListenPorts = intExeWbReqs ++ fpExeWbReqs
val extraListenPorts = inBlockListenPorts ++
val slowPorts = inBlockListenPorts ++
slowWakeUpIn.zip(io.wakeUpIn.slow)
.filter(x => (x._1.writeIntRf && readIntRf) || (x._1.writeFpRf && readFpRf))
.map(_._2)
val extraListenPortsCnt = extraListenPorts.length
val slowPortsCnt = slowPorts.length
// if tlb miss, replay
val feedback = true
println(s"${i}: exu:${cfg.name} wakeupCnt: ${wakeupCnt} extraListenPorts: ${extraListenPortsCnt} delay:${certainLatency} feedback:${feedback}")
println(s"${i}: exu:${cfg.name} wakeupCnt: ${wakeupCnt} slowPorts: ${slowPortsCnt} delay:${certainLatency} feedback:${feedback}")
val rsCtrl = Module(new ReservationStationCtrl(cfg, wakeupCnt, extraListenPortsCnt, fixedDelay = certainLatency, feedback = feedback))
val rsData = Module(new ReservationStationData(cfg, wakeupCnt, extraListenPortsCnt, fixedDelay = certainLatency, feedback = feedback))
val rs = Module(new ReservationStation(cfg, wakeupCnt, slowPortsCnt, fixedDelay = certainLatency, fastWakeup = certainLatency >= 0, feedback = feedback))
rsCtrl.io.data <> rsData.io.ctrl
rsCtrl.io.redirect <> redirect // TODO: remove it
rsCtrl.io.flush <> io.fromCtrlBlock.flush // TODO: remove it
rsCtrl.io.numExist <> io.toCtrlBlock.numExist(i)
rsCtrl.io.enqCtrl <> io.fromCtrlBlock.enqIqCtrl(i)
rs.io.redirect <> redirect // TODO: remove it
rs.io.flush <> io.fromCtrlBlock.flush // TODO: remove it
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.fromDispatch <> io.fromCtrlBlock.enqIqCtrl(i)
val src2IsFp = RegNext(io.fromCtrlBlock.enqIqCtrl(i).bits.ctrl.src2Type === SrcType.fp)
rsData.io.srcRegValue := DontCare
rsData.io.srcRegValue(0) := io.fromIntBlock.readIntRf(readPortIndex(i)).data
rs.io.srcRegValue := DontCare
rs.io.srcRegValue(0) := io.fromIntBlock.readIntRf(readPortIndex(i)).data
if (i >= exuParameters.LduCnt) {
rsData.io.srcRegValue(1) := Mux(src2IsFp, io.fromFpBlock.readFpRf(i - exuParameters.LduCnt).data, io.fromIntBlock.readIntRf(readPortIndex(i) + 1).data)
rs.io.srcRegValue(1) := Mux(src2IsFp, io.fromFpBlock.readFpRf(i - exuParameters.LduCnt).data, io.fromIntBlock.readIntRf(readPortIndex(i) + 1).data)
}
rsData.io.redirect <> redirect
rsData.io.flush <> io.fromCtrlBlock.flush
rsData.io.writeBackedData <> writeBackData
for ((x, y) <- rsData.io.extraListenPorts.zip(extraListenPorts)) {
rs.io.fastDatas <> fastDatas
for ((x, y) <- rs.io.slowPorts.zip(slowPorts)) {
x.valid := y.fire()
x.bits := y.bits
}
// exeUnits(i).io.redirect <> redirect
// exeUnits(i).io.fromInt <> rsData.io.deq
rsData.io.feedback := DontCare
// exeUnits(i).io.fromInt <> rs.io.deq
rs.io.feedback := DontCare
rsCtrl.suggestName(s"rsc_${cfg.name}")
rsData.suggestName(s"rsd_${cfg.name}")
rs.suggestName(s"rsd_${cfg.name}")
rsData
rs
})
for(rs <- reservationStations){
rs.io.broadcastedUops <> fastWakeUpIn.zip(io.wakeUpIn.fastUops)
rs.io.fastUopsIn <> fastWakeUpIn.zip(io.wakeUpIn.fastUops)
.filter(x => (x._1.writeIntRf && rs.exuCfg.readIntRf) || (x._1.writeFpRf && rs.exuCfg.readFpRf))
.map(_._2)
}
......
......@@ -172,7 +172,7 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
when(wb.bits.redirectValid) {
mispredict_vec(wbIdx)(offset) := cfiUpdate.isMisPred
when(cfiUpdate.taken && offset < cfiIndex_vec(wbIdx).bits) {
cfiIndex_vec(wbIdx).valid := true.B
cfiIndex_vec(wbIdx).bits := offset
cfiIsCall(wbIdx) := wb.bits.uop.cf.pd.isCall
cfiIsRet(wbIdx) := wb.bits.uop.cf.pd.isRet
......
......@@ -18,6 +18,7 @@ class IntToFP extends FPUSubModule {
val src1 = RegEnable(io.in.bits.src(0)(XLEN-1, 0), io.in.fire())
val uopReg = RegEnable(io.in.bits.uop, io.in.fire())
val rmReg = RegEnable(rm, io.in.fire())
switch(state){
is(s_idle){
......@@ -63,7 +64,7 @@ class IntToFP extends FPUSubModule {
val i2f = Module(new INToRecFN(XLEN, t.exp, t.sig))
i2f.io.signedIn := ~typ(0)
i2f.io.in := intValue
i2f.io.roundingMode := rm
i2f.io.roundingMode := rmReg
i2f.io.detectTininess := hardfloat.consts.tininess_afterRounding
(sanitizeNaN(i2f.io.out, t), i2f.io.exceptionFlags)
}
......
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
class AtomicsReplayEntry extends DCacheModule
{
val io = IO(new Bundle {
val lsu = Flipped(new DCacheWordIO)
val pipe_req = Decoupled(new MainPipeReq)
val pipe_resp = Flipped(ValidIO(new MainPipeResp))
val block_addr = Output(Valid(UInt()))
})
val s_invalid :: s_pipe_req :: s_pipe_resp :: s_resp :: Nil = Enum(4)
val state = RegInit(s_invalid)
val req = Reg(new DCacheWordReq)
// assign default values to output signals
io.lsu.req.ready := state === s_invalid
io.lsu.resp.valid := false.B
io.lsu.resp.bits := DontCare
io.pipe_req.valid := false.B
io.pipe_req.bits := DontCare
io.block_addr.valid := state =/= s_invalid
io.block_addr.bits := req.addr
when (state =/= s_invalid) {
XSDebug("AtomicsReplayEntry: state: %d block_addr: %x\n", state, io.block_addr.bits)
}
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
when (io.lsu.req.fire()) {
req := io.lsu.req.bits
state := s_pipe_req
}
}
// --------------------------------------------
// replay
when (state === s_pipe_req) {
io.pipe_req.valid := true.B
val pipe_req = io.pipe_req.bits
pipe_req := DontCare
pipe_req.miss := false.B
pipe_req.probe := false.B
pipe_req.source := AMO_SOURCE.U
pipe_req.cmd := req.cmd
pipe_req.addr := get_block_addr(req.addr)
pipe_req.word_idx := get_word(req.addr)
pipe_req.amo_data := req.data
pipe_req.amo_mask := req.mask
when (io.pipe_req.fire()) {
state := s_pipe_resp
}
}
val resp_data = Reg(UInt())
val resp_id = Reg(UInt())
when (state === s_pipe_resp) {
// when not miss
// everything is OK, simply send response back to sbuffer
// when miss and not replay
// wait for missQueue to handling miss and replaying our request
// when miss and replay
// req missed and fail to enter missQueue, manually replay it later
// TODO: add assertions:
// 1. add a replay delay counter?
// 2. when req gets into MissQueue, it should not miss any more
when (io.pipe_resp.fire()) {
when (io.pipe_resp.bits.miss) {
when (io.pipe_resp.bits.replay) {
state := s_pipe_req
}
} .otherwise {
resp_data := io.pipe_resp.bits.data
resp_id := io.pipe_resp.bits.id
state := s_resp
}
}
}
// --------------------------------------------
when (state === s_resp) {
io.lsu.resp.valid := true.B
io.lsu.resp.bits := DontCare
io.lsu.resp.bits.data := resp_data
io.lsu.resp.bits.id := resp_id
when (io.lsu.resp.fire()) {
state := s_invalid
}
}
// debug output
when (io.lsu.req.fire()) {
io.lsu.req.bits.dump()
}
when (io.lsu.resp.fire()) {
io.lsu.resp.bits.dump()
}
when (io.pipe_req.fire()) {
io.pipe_req.bits.dump()
}
when (io.pipe_resp.fire()) {
io.pipe_resp.bits.dump()
}
}
......@@ -20,12 +20,10 @@ case class DCacheParameters
tagECC: Option[String] = None,
dataECC: Option[String] = None,
nMissEntries: Int = 1,
nLoadMissEntries: Int = 1,
nStoreMissEntries: Int = 1,
nMiscMissEntries: Int = 1,
nProbeEntries: Int = 1,
nReleaseEntries: Int = 1,
nStoreReplayEntries: Int = 1,
nMMIOEntries: Int = 1,
nSDQ: Int = 17,
nRPQ: Int = 16,
nMMIOs: Int = 1,
blockBytes: Int = 64
) extends L1CacheParameters {
......@@ -48,23 +46,12 @@ trait HasDCacheParameters extends HasL1CacheParameters {
def nIOMSHRs = cacheParams.nMMIOs
def maxUncachedInFlight = cacheParams.nMMIOs
def missQueueEntryIdWidth = log2Up(cfg.nMissEntries)
def loadMissQueueEntryIdWidth = log2Up(cfg.nLoadMissEntries)
def storeMissQueueEntryIdWidth = log2Up(cfg.nStoreMissEntries)
def miscMissQueueEntryIdWidth = log2Up(cfg.nMiscMissEntries)
def clientMissQueueEntryIdWidth = max(
max(loadMissQueueEntryIdWidth,
storeMissQueueEntryIdWidth),
miscMissQueueEntryIdWidth)
// clients: ldu 0, ldu1, stu, atomics
def nClientMissQueues = 4
def clientIdWidth = log2Up(nClientMissQueues)
def missQueueClientIdWidth = clientIdWidth + clientMissQueueEntryIdWidth
def clientIdMSB = missQueueClientIdWidth - 1
def clientIdLSB = clientMissQueueEntryIdWidth
def entryIdMSB = clientMissQueueEntryIdWidth - 1
def entryIdLSB = 0
def nSourceType = 3
def sourceTypeWidth = log2Up(nSourceType)
def LOAD_SOURCE = 0
def STORE_SOURCE = 1
def AMO_SOURCE = 2
// each source use a id to distinguish its multiple reqs
def reqIdWidth = 64
require(isPow2(nSets), s"nSets($nSets) must be pow2")
......@@ -73,6 +60,7 @@ trait HasDCacheParameters extends HasL1CacheParameters {
require(full_divide(beatBits, rowBits), s"beatBits($beatBits) must be multiple of rowBits($rowBits)")
// this is a VIPT L1 cache
require(pgIdxBits >= untagBits, s"page aliasing problem: pgIdxBits($pgIdxBits) < untagBits($untagBits)")
require(rowWords == 1, "Our DCache Implementation assumes rowWords == 1")
}
abstract class DCacheModule extends L1CacheModule
......@@ -218,7 +206,7 @@ class DuplicatedDataArray extends AbstractDataArray
val ren = io.read(j).valid && io.read(j).bits.way_en(w) && io.read(j).bits.rmask(r)
array.io.r.req.valid := ren
array.io.r.req.bits.apply(setIdx=raddr)
resp(k) := RegNext(array.io.r.resp.data(0))
resp(k) := array.io.r.resp.data(0)
}
}
}
......
package xiangshan.cache
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
import freechips.rocketchip.tilelink.{TLClientNode, TLClientParameters,
TLMasterParameters, TLMasterPortParameters, TLArbiter, TLMessages}
// memory request in word granularity(load, mmio, lr/sc, atomics)
class DCacheWordReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val data = UInt(DataBits.W)
val mask = UInt((DataBits/8).W)
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("DCacheWordReq: cmd: %x addr: %x data: %x mask: %x id: %d\n",
cmd, addr, data, mask, id)
}
}
// memory request in word granularity(store)
class DCacheLineReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val data = UInt((cfg.blockBytes * 8).W)
val mask = UInt(cfg.blockBytes.W)
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("DCacheLineReq: cmd: %x addr: %x data: %x mask: %x id: %d\n",
cmd, addr, data, mask, id)
}
}
class DCacheWordResp extends DCacheBundle
{
val data = UInt(DataBits.W)
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val replay = Bool()
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("DCacheWordResp: data: %x id: %d miss: %b replay: %b\n",
data, id, miss, replay)
}
}
class DCacheLineResp extends DCacheBundle
{
val data = UInt((cfg.blockBytes * 8).W)
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val replay = Bool()
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("DCacheLineResp: data: %x id: %d miss: %b replay: %b\n",
data, id, miss, replay)
}
}
class Refill extends DCacheBundle
{
val addr = UInt(PAddrBits.W)
val data = UInt((cfg.blockBytes * 8).W)
def dump() = {
XSDebug("Refill: addr: %x data: %x\n", addr, data)
}
}
class DCacheWordIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheWordReq)
val resp = Flipped(DecoupledIO(new DCacheWordResp))
}
// used by load unit
class DCacheLoadIO extends DCacheWordIO
{
// kill previous cycle's req
val s1_kill = Output(Bool())
// cycle 0: virtual address: req.addr
// cycle 1: physical address: s1_paddr
val s1_paddr = Output(UInt(PAddrBits.W))
val s1_data = Input(Vec(nWays, UInt(DataBits.W)))
val s2_hit_way = Input(UInt(nWays.W))
}
class DCacheLineIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheLineReq )
val resp = Flipped(DecoupledIO(new DCacheLineResp))
}
class DCacheToLsuIO extends DCacheBundle {
val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load
val lsq = ValidIO(new Refill) // refill to load queue, wake up load misses
val store = Flipped(new DCacheLineIO) // for sbuffer
val atomics = Flipped(new DCacheWordIO) // atomics reqs
}
class DCacheIO extends DCacheBundle {
val lsu = new DCacheToLsuIO
val prefetch = DecoupledIO(new MissReq)
}
class DCache()(implicit p: Parameters) extends LazyModule with HasDCacheParameters {
val clientParameters = TLMasterPortParameters.v1(
Seq(TLMasterParameters.v1(
name = "dcache",
sourceId = IdRange(0, cfg.nMissEntries+1),
supportsProbe = TransferSizes(cfg.blockBytes)
))
)
val clientNode = TLClientNode(Seq(clientParameters))
lazy val module = new DCacheImp(this)
}
class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParameters with HasXSLog {
val io = IO(new DCacheIO)
val (bus, edge) = outer.clientNode.out.head
require(bus.d.bits.data.getWidth == l1BusDataWidth, "DCache: tilelink width does not match")
//----------------------------------------
// core data structures
val dataArray = Module(new DuplicatedDataArray)
val metaArray = Module(new DuplicatedMetaArray)
/*
dataArray.dump()
metaArray.dump()
*/
//----------------------------------------
// core modules
val ldu = Seq.fill(LoadPipelineWidth) { Module(new LoadPipe) }
val storeReplayUnit = Module(new StoreReplayQueue)
val atomicsReplayUnit = Module(new AtomicsReplayEntry)
val mainPipe = Module(new MainPipe)
val missQueue = Module(new MissQueue(edge))
val probeQueue = Module(new ProbeQueue(edge))
val wb = Module(new WritebackQueue(edge))
//----------------------------------------
// meta array
val MetaWritePortCount = 1
val MainPipeMetaWritePort = 0
metaArray.io.write <> mainPipe.io.meta_write
// MainPipe contend MetaRead with Load 0
// give priority to MainPipe
val MetaReadPortCount = 2
val MainPipeMetaReadPort = 0
val LoadPipeMetaReadPort = 1
val metaReadArb = Module(new Arbiter(new L1MetaReadReq, MetaReadPortCount))
metaReadArb.io.in(LoadPipeMetaReadPort) <> ldu(0).io.meta_read
metaReadArb.io.in(MainPipeMetaReadPort) <> mainPipe.io.meta_read
metaArray.io.read(0) <> metaReadArb.io.out
ldu(0).io.meta_resp <> metaArray.io.resp(0)
mainPipe.io.meta_resp <> metaArray.io.resp(0)
for (w <- 1 until LoadPipelineWidth) {
metaArray.io.read(w) <> ldu(w).io.meta_read
ldu(w).io.meta_resp <> metaArray.io.resp(w)
}
//----------------------------------------
// data array
val DataWritePortCount = 1
val MainPipeDataWritePort = 0
dataArray.io.write <> mainPipe.io.data_write
// give priority to MainPipe
val DataReadPortCount = 2
val MainPipeDataReadPort = 0
val LoadPipeDataReadPort = 1
val dataReadArb = Module(new Arbiter(new L1DataReadReq, DataReadPortCount))
dataReadArb.io.in(LoadPipeDataReadPort) <> ldu(0).io.data_read
dataReadArb.io.in(MainPipeDataReadPort) <> mainPipe.io.data_read
dataArray.io.read(0) <> dataReadArb.io.out
dataArray.io.resp(0) <> ldu(0).io.data_resp
dataArray.io.resp(0) <> mainPipe.io.data_resp
for (w <- 1 until LoadPipelineWidth) {
dataArray.io.read(w) <> ldu(w).io.data_read
dataArray.io.resp(w) <> ldu(w).io.data_resp
}
//----------------------------------------
// load pipe
// the s1 kill signal
// only lsu uses this, replay never kills
for (w <- 0 until LoadPipelineWidth) {
ldu(w).io.lsu <> io.lsu.load(w)
// replay and nack not needed anymore
// TODO: remove replay and nack
ldu(w).io.nack := false.B
}
//----------------------------------------
// store pipe and store miss queue
storeReplayUnit.io.lsu <> io.lsu.store
//----------------------------------------
// atomics
// atomics not finished yet
io.lsu.atomics <> atomicsReplayUnit.io.lsu
//----------------------------------------
// miss queue
val MissReqPortCount = LoadPipelineWidth + 1
val MainPipeMissReqPort = 0
// Request
val missReqArb = Module(new RRArbiter(new MissReq, MissReqPortCount))
missReqArb.io.in(MainPipeMissReqPort) <> mainPipe.io.miss_req
for (w <- 0 until LoadPipelineWidth) { missReqArb.io.in(w + 1) <> ldu(w).io.miss_req }
wb.io.miss_req.valid := missReqArb.io.out.valid
wb.io.miss_req.bits := missReqArb.io.out.bits.addr
block_decoupled(missReqArb.io.out, missQueue.io.req, wb.io.block_miss_req)
// refill to load queue
io.lsu.lsq <> missQueue.io.refill
// tilelink stuff
bus.a <> missQueue.io.mem_acquire
bus.e <> missQueue.io.mem_finish
//----------------------------------------
// probe
probeQueue.io.mem_probe <> bus.b
//----------------------------------------
// mainPipe
val MainPipeReqPortCount = 4
val MissMainPipeReqPort = 0
val StoreMainPipeReqPort = 1
val AtomicsMainPipeReqPort = 2
val ProbeMainPipeReqPort = 3
val mainPipeReqArb = Module(new RRArbiter(new MainPipeReq, MainPipeReqPortCount))
mainPipeReqArb.io.in(MissMainPipeReqPort) <> missQueue.io.pipe_req
mainPipeReqArb.io.in(StoreMainPipeReqPort) <> storeReplayUnit.io.pipe_req
mainPipeReqArb.io.in(AtomicsMainPipeReqPort) <> atomicsReplayUnit.io.pipe_req
mainPipeReqArb.io.in(ProbeMainPipeReqPort) <> probeQueue.io.pipe_req
mainPipe.io.req <> mainPipeReqArb.io.out
missQueue.io.pipe_resp <> mainPipe.io.miss_resp
storeReplayUnit.io.pipe_resp <> mainPipe.io.store_resp
atomicsReplayUnit.io.pipe_resp <> mainPipe.io.amo_resp
probeQueue.io.lrsc_locked_block <> mainPipe.io.lrsc_locked_block
//----------------------------------------
// wb
// add a queue between MainPipe and WritebackUnit to reduce MainPipe stalls due to WritebackUnit busy
wb.io.req <> mainPipe.io.wb_req
bus.c <> wb.io.mem_release
// connect bus d
missQueue.io.mem_grant.valid := false.B
missQueue.io.mem_grant.bits := DontCare
wb.io.mem_grant.valid := false.B
wb.io.mem_grant.bits := DontCare
// in L1DCache, we ony expect Grant[Data] and ReleaseAck
bus.d.ready := false.B
when (bus.d.bits.opcode === TLMessages.Grant || bus.d.bits.opcode === TLMessages.GrantData) {
missQueue.io.mem_grant <> bus.d
} .elsewhen (bus.d.bits.opcode === TLMessages.ReleaseAck) {
wb.io.mem_grant <> bus.d
} .otherwise {
assert (!bus.d.fire())
}
// dcache should only deal with DRAM addresses
when (bus.a.fire()) {
assert(bus.a.bits.address >= 0x80000000L.U)
}
when (bus.b.fire()) {
assert(bus.b.bits.address >= 0x80000000L.U)
}
when (bus.c.fire()) {
assert(bus.c.bits.address >= 0x80000000L.U)
}
io.prefetch.valid := missQueue.io.req.fire()
io.prefetch.bits := missQueue.io.req.bits
def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = {
sink.valid := source.valid && !block_signal
source.ready := sink.ready && !block_signal
sink.bits := source.bits
}
}
......@@ -2,6 +2,7 @@ package xiangshan.cache
import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink.ClientMetadata
import utils.XSDebug
......@@ -24,9 +25,6 @@ class LoadPipe extends DCacheModule
})
// LSU requests
// replayed req should never be nacked
assert(!(io.lsu.req.valid && io.lsu.req.bits.meta.replay && io.nack))
// it you got nacked, you can directly passdown
val not_nacked_ready = io.meta_read.ready && io.data_read.ready
val nacked_ready = true.B
......@@ -50,6 +48,7 @@ class LoadPipe extends DCacheModule
data_read.rmask := UIntToOH(get_row(io.lsu.req.bits.addr))
// Pipeline
// --------------------------------------------------------------------------------
// stage 0
val s0_valid = io.lsu.req.fire()
val s0_req = io.lsu.req.bits
......@@ -58,6 +57,8 @@ class LoadPipe extends DCacheModule
dump_pipeline_reqs("LoadPipe s0", s0_valid, s0_req)
// --------------------------------------------------------------------------------
// stage 1
val s1_req = RegNext(s0_req)
val s1_valid = RegNext(s0_valid, init = false.B)
......@@ -73,80 +74,68 @@ class LoadPipe extends DCacheModule
val s1_tag_eq_way = wayMap((w: Int) => meta_resp(w).tag === (get_tag(s1_addr))).asUInt
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta_resp(w).coh.isValid()).asUInt
val s1_tag_match = s1_tag_match_way.orR
val s1_hit_meta = Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w)))
val s1_hit_state = s1_hit_meta.coh
// replacement policy
val replacer = cacheParams.replacement
val s1_repl_way_en = UIntToOH(replacer.way)
val s1_repl_meta = Mux1H(s1_repl_way_en, wayMap((w: Int) => meta_resp(w)))
when (io.miss_req.fire()) {
replacer.miss
val s1_fake_meta = Wire(new L1Metadata)
s1_fake_meta.tag := get_tag(s1_addr)
s1_fake_meta.coh := ClientMetadata.onReset
// when there are no tag match, we give it a Fake Meta
// this simplifies our logic in s2 stage
val s1_hit_meta = Mux(s1_tag_match, Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w))), s1_fake_meta)
val s1_hit_coh = s1_hit_meta.coh
// select the row we are interested in
val s1_data = Wire(Vec(nWays, UInt(encRowBits.W)))
val data_resp = io.data_resp
for (w <- 0 until nWays) { s1_data(w) := data_resp(w)(get_row(s1_addr)) }
// select the word
// the index of word in a row, in case rowBits != wordBits
val s1_word_idx = if (rowWords == 1) 0.U else s1_addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
// load data gen
val s1_data_words = Wire(Vec(nWays, Vec(rowWords, UInt(encWordBits.W))))
for (w <- 0 until nWays) {
for (r <- 0 until rowWords) {
s1_data_words(w)(r) := s1_data(w)(encWordBits * (r + 1) - 1, encWordBits * r)
}
}
assert(!(s1_valid && s1_req.meta.replay && io.lsu.s1_kill),
"lsq tried to kill an replayed request!")
val s1_words = (0 until nWays) map (i => s1_data_words(i)(s1_word_idx))
val s1_decoded = (0 until nWays) map (i => cacheParams.dataCode.decode(s1_words(i)))
val s1_word_decoded = VecInit((0 until nWays) map (i => s1_decoded(i).corrected))
(0 until nWays) map (i => assert (!(s1_valid && s1_tag_match && (i.U === OHToUInt(s1_tag_match_way)) && s1_decoded(i).uncorrectable)))
io.lsu.s1_data := s1_word_decoded
// --------------------------------------------------------------------------------
// stage 2
val s2_req = RegNext(s1_req)
val s2_valid = RegNext(s1_valid && !io.lsu.s1_kill, init = false.B)
val s2_addr = RegNext(s1_addr)
dump_pipeline_reqs("LoadPipe s2", s2_valid, s2_req)
val s2_addr = RegNext(s1_addr)
// hit, miss, nack, permission checking
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = RegNext(s1_tag_match)
val s2_hit_meta = RegNext(s1_hit_meta)
val s2_hit_state = RegNext(s1_hit_state)
val s2_has_permission = s2_hit_state.onAccess(s2_req.cmd)._1
val s2_new_hit_state = s2_hit_state.onAccess(s2_req.cmd)._3
val s2_repl_meta = RegNext(s1_repl_meta)
val s2_repl_way_en = RegNext(s1_repl_way_en)
val s2_old_meta = Mux(s2_tag_match, s2_hit_meta, s2_repl_meta)
val s2_way_en = Mux(s2_tag_match, s2_tag_match_way, s2_repl_way_en)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
//
// If state changes on hit,
// we should treat it as not hit, and let mshr deal with it,
// since we can not write meta data on the main pipeline.
// It's possible that we had permission but state changes on hit:
// eg: write to exclusive but clean block
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state
// nacked or not
val s2_nack = Wire(Bool())
val s2_data = Wire(Vec(nWays, UInt(encRowBits.W)))
val data_resp = io.data_resp
for (w <- 0 until nWays) {
s2_data(w) := data_resp(w)(get_row(s2_addr))
}
val s2_hit_coh = RegNext(s1_hit_coh)
val s2_has_permission = s2_hit_coh.onAccess(s2_req.cmd)._1
val s2_new_hit_coh = s2_hit_coh.onAccess(s2_req.cmd)._3
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
// the index of word in a row, in case rowBits != wordBits
val s2_word_idx = if (rowWords == 1) 0.U else s2_addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
// load data gen
val s2_data_words = Wire(Vec(rowWords, UInt(encWordBits.W)))
for (w <- 0 until rowWords) {
s2_data_words(w) := s2_data_muxed(encWordBits * (w + 1) - 1, encWordBits * w)
}
val s2_data_word = s2_data_words(s2_word_idx)
val s2_decoded = cacheParams.dataCode.decode(s2_data_word)
val s2_data_word_decoded = s2_decoded.corrected
// annotate out this assertion
// when TLB misses, s2_hit may still be true
// which may cause unnecessary assertion
// assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_coh === s2_new_hit_coh
// generate data
val s2_data = RegNext(s1_word_decoded)
// select the way out
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
// when req got nacked, upper levels should replay this request
// the same set is busy
// nacked or not
val s2_nack_hit = RegNext(s1_nack)
// can no allocate mshr for load miss
val s2_nack_no_mshr = io.miss_req.valid && !io.miss_req.ready
......@@ -154,7 +143,7 @@ class LoadPipe extends DCacheModule
// For now, we use DuplicatedDataArray, so no bank conflicts
val s2_nack_data = false.B
s2_nack := s2_nack_hit || s2_nack_no_mshr || s2_nack_data
val s2_nack = s2_nack_hit || s2_nack_no_mshr || s2_nack_data
// only dump these signals when they are actually valid
dump_pipeline_valids("LoadPipe s2", "s2_hit", s2_valid && s2_hit)
......@@ -163,19 +152,18 @@ class LoadPipe extends DCacheModule
dump_pipeline_valids("LoadPipe s2", "s2_nack_no_mshr", s2_valid && s2_nack_no_mshr)
// send load miss to miss queue
io.miss_req.valid := s2_valid && !s2_nack_hit && !s2_nack_data && !s2_hit
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := get_block_addr(s2_addr)
io.miss_req.bits.tag_match := s2_tag_match
io.miss_req.bits.way_en := s2_way_en
io.miss_req.bits.old_meta := s2_old_meta
io.miss_req.bits.client_id := 0.U
io.miss_req.valid := s2_valid && !s2_nack_hit && !s2_nack_data && !s2_hit
io.miss_req.bits := DontCare
io.miss_req.bits.source := LOAD_SOURCE.U
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := get_block_addr(s2_addr)
io.miss_req.bits.coh := s2_hit_coh
// send back response
val resp = Wire(ValidIO(new DCacheWordResp))
resp.valid := s2_valid
resp.bits.data := s2_data_word_decoded
resp.bits.meta := s2_req.meta
resp.bits := DontCare
resp.bits.data := s2_data_muxed
// on miss or nack, upper level should replay request
// but if we successfully sent the request to miss queue
// upper level does not need to replay request
......@@ -188,17 +176,18 @@ class LoadPipe extends DCacheModule
assert(!(resp.valid && !io.lsu.resp.ready))
when (resp.valid) {
XSDebug(s"LoadPipe resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.replay)
resp.bits.dump()
}
io.lsu.s2_hit_way := s2_tag_match_way
// -------
// Debug logging functions
def dump_pipeline_reqs(pipeline_stage_name: String, valid: Bool,
req: DCacheWordReq ) = {
when (valid) {
XSDebug(s"$pipeline_stage_name cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n",
req.cmd, req.addr, req.data, req.mask, req.meta.id, req.meta.replay)
XSDebug(s"$pipeline_stage_name: ")
req.dump()
}
}
......
此差异已折叠。
package xiangshan.cache
import chisel3._
import chisel3.util._
import chisel3.ExcitingUtils._
import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleA, TLBundleD, TLBundleE, TLPermissions, TLArbiter, ClientMetadata}
import utils.{HasTLDump, XSDebug, BoolStopWatch, OneHot}
class MissReq extends DCacheBundle
{
val source = UInt(sourceTypeWidth.W)
val cmd = UInt(M_SZ.W)
// must be aligned to block
val addr = UInt(PAddrBits.W)
// store
val store_data = UInt((cfg.blockBytes * 8).W)
val store_mask = UInt(cfg.blockBytes.W)
// which word does amo work on?
val word_idx = UInt(log2Up(blockWords).W)
val amo_data = UInt(DataBits.W)
val amo_mask = UInt((DataBits/8).W)
// coherence state
val coh = new ClientMetadata
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("MissReq source: %d cmd: %d addr: %x store_data: %x store_mask: %x word_idx: %d amo_data: %x amo_mask: %x coh: %d id: %d\n",
source, cmd, addr, store_data, store_mask, word_idx, amo_data, amo_mask, coh.state, id)
}
}
// One miss entry deals with one missed block
class MissEntry(edge: TLEdgeOut) extends DCacheModule
{
val io = IO(new Bundle {
// MSHR ID
val id = Input(UInt())
// client requests
val req_valid = Input(Bool())
// this entry is free and can be allocated to new reqs
val primary_ready = Output(Bool())
// this entry is busy, but it can merge the new req
val secondary_ready = Output(Bool())
// this entry is busy and it can not merge the new req
val secondary_reject = Output(Bool())
val req = Input((new MissReq))
val refill = ValidIO(new Refill)
// bus
val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle))
val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val mem_finish = DecoupledIO(new TLBundleE(edge.bundle))
val pipe_req = DecoupledIO(new MainPipeReq)
val pipe_resp = Flipped(ValidIO(new MainPipeResp))
})
// MSHR:
// 1. receive req
// 2. send acquire req
// 3. receive grant resp
// 4. let main pipe do refill and replace
// 5. wait for resp
// 6. send finish to end the tilelink transaction
// We only send finish after data is written into cache.
// This prevents L2 from probing the block down.
// See Tilelink spec 1.8.1 page 69
// A slave should not issue a Probe if there is a pending GrantAck on the block. Once the Probe is
// issued, the slave should not issue further Probes on that block until it receives a ProbeAck.
val s_invalid :: s_refill_req :: s_refill_resp :: s_main_pipe_req :: s_main_pipe_resp :: s_mem_finish :: Nil = Enum(6)
val state = RegInit(s_invalid)
// --------------------------------------------
// internal registers
val req = Reg(new MissReq)
// param of grant
val grant_param = Reg(UInt(TLPermissions.bdWidth.W))
// recording the source/sink info from Grant
// so that we can use it grantack
val grantack = Reg(Valid(new TLBundleE(edge.bundle)))
// should we refill the data to load queue to wake up any missed load?
val should_refill_data = Reg(Bool())
// --------------------------------------------
// merge reqs
// see whether we can merge requests
// do not count s_invalid state in
// since we can not merge request at that state
val acquire_not_sent = state === s_refill_req && !io.mem_acquire.ready
val data_not_refilled = state === s_refill_req || state === s_refill_resp
def can_merge(new_req: MissReq): Bool = {
// caution: do not merge with AMO
// we can not do amoalu calculation in MissQueue
// so, we do not know the result after AMO calculation
// so do not merge with AMO
// before read acquire is fired, we can merge read or write
val before_read_sent = acquire_not_sent && req.source === LOAD_SOURCE.U && (new_req.source === LOAD_SOURCE.U || new_req.source === STORE_SOURCE.U)
// before read/write refills data to LoadQueue, we can merge any read
val before_data_refill = data_not_refilled && (req.source === LOAD_SOURCE.U || req.source === STORE_SOURCE.U) && new_req.source === LOAD_SOURCE.U
before_read_sent || before_data_refill
}
def should_merge(new_req: MissReq): Bool = {
val block_match = req.addr === new_req.addr
block_match && can_merge(new_req)
}
def should_reject(new_req: MissReq): Bool = {
val block_match = req.addr === new_req.addr
// do not reject any req when we are in s_invalid
block_match && !can_merge(new_req) && state =/= s_invalid
}
io.primary_ready := state === s_invalid
io.secondary_ready := should_merge(io.req)
io.secondary_reject := should_reject(io.req)
// should not allocate, merge or reject at the same time
// one at a time
OneHot.checkOneHot(Seq(io.primary_ready, io.secondary_ready, io.secondary_reject))
// --------------------------------------------
// assign default values to output signals
io.refill.valid := false.B
io.refill.bits := DontCare
io.mem_acquire.valid := false.B
io.mem_acquire.bits := DontCare
io.mem_grant.ready := false.B
io.mem_finish.valid := false.B
io.mem_finish.bits := DontCare
io.pipe_req.valid := false.B
io.pipe_req.bits := DontCare
when (state =/= s_invalid) {
XSDebug("entry: %d state: %d\n", io.id, state)
req.dump()
}
// --------------------------------------------
// State Machine
// --------------------------------------------
// receive requests
// primary request: allocate for a new request
when (io.req_valid && io.primary_ready) {
assert (state === s_invalid)
// re init some fields
req := io.req
grantack.valid := false.B
// only miss req from load needs a refill to LoadQueue
should_refill_data := io.req.source === LOAD_SOURCE.U
state := s_refill_req
}
// secondary request: merge with existing request
when (io.req_valid && io.secondary_ready) {
// The merged reqs should never have higher permissions
// which means the cache silently upgrade the permission of our block
// without merge with this miss queue request!
// Either our req come in with stale meta, or the req that upgrade the permission does not merge with this req.
// Both cases are bugs of DCache.
//
// DCache can silently drop permission(eg, probed or evicted)
// it should never silently upgrade permissions.
//
// TODO: please check Tilelink Metadata.scala
// and make sure that lower permission are encoded as smaller number
assert (io.req.coh.state <= req.coh.state)
// use the most uptodate meta
req.coh := io.req.coh
// when merging with store
// we should remember its info into our req
// or we will not be able to replay store
when (io.req.source === STORE_SOURCE.U) {
req := io.req
}
should_refill_data := io.req.source === LOAD_SOURCE.U
}
// --------------------------------------------
// refill
// for full overwrite, we can use AcquirePerm to save memory bandwidth
val full_overwrite = req.source === STORE_SOURCE.U && req.store_mask.andR
when (state === s_refill_req) {
val grow_param = req.coh.onAccess(req.cmd)._2
val acquireBlock = edge.AcquireBlock(
fromSource = io.id,
toAddress = req.addr,
lgSize = (log2Up(cfg.blockBytes)).U,
growPermissions = grow_param)._2
val acquirePerm = edge.AcquirePerm(
fromSource = io.id,
toAddress = req.addr,
lgSize = (log2Up(cfg.blockBytes)).U,
growPermissions = grow_param)._2
io.mem_acquire.valid := true.B
io.mem_acquire.bits := Mux(full_overwrite, acquirePerm, acquireBlock)
when (io.mem_acquire.fire()) {
state := s_refill_resp
}
}
val (_, _, refill_done, refill_count) = edge.count(io.mem_grant)
// raw data
val refill_data = Reg(Vec(blockRows, UInt(rowBits.W)))
val new_data = Wire(Vec(blockRows, UInt(rowBits.W)))
val new_mask = Wire(Vec(blockRows, UInt(rowBytes.W)))
for (i <- 0 until blockRows) {
new_data(i) := req.store_data(rowBits * (i + 1) - 1, rowBits * i)
// we only need to merge data for Store
new_mask(i) := Mux(req.source === STORE_SOURCE.U,
req.store_mask(rowBytes * (i + 1) - 1, rowBytes * i), 0.U(rowBytes.W))
}
def mergePutData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
val full_wmask = FillInterleaved(8, wmask)
((~full_wmask & old_data) | (full_wmask & new_data))
}
when (state === s_refill_resp) {
io.mem_grant.ready := true.B
when (io.mem_grant.fire()) {
when (edge.hasData(io.mem_grant.bits)) {
// GrantData
for (i <- 0 until beatRows) {
val idx = (refill_count << log2Floor(beatRows)) + i.U
refill_data(idx) := mergePutData(io.mem_grant.bits.data(rowBits * (i + 1) - 1, rowBits * i), new_data(idx), new_mask(idx))
}
} .otherwise {
// Grant
// since we do not sync between MissQueue and WritebackQueue
// for a AcquireBlock BtoT, we can not protect our block from being replaced by another miss and written back by WritebackQueue
// so AcquireBlock BtoT, we need L2 to give us GrantData, not Grant.
// So that whether our block is replaced or not, we can always refill the block with valid data
// So, if we enters here
// we must be a AcquirePerm, not a AcquireBlock!!!
assert (full_overwrite)
// when we only acquire perm, not data
// use Store's data
for (i <- 0 until blockRows) {
refill_data(i) := new_data(i)
}
}
}
when (refill_done) {
grantack.valid := edge.isRequest(io.mem_grant.bits)
grantack.bits := edge.GrantAck(io.mem_grant.bits)
grant_param := io.mem_grant.bits.param
state := s_main_pipe_req
}
}
// put should_refill_data out of RegNext
// so that when load miss are merged at refill_done
// we can still refill data back
io.refill.valid := RegNext(state === s_refill_resp && refill_done) && should_refill_data
io.refill.bits.addr := req.addr
io.refill.bits.data := refill_data.asUInt
when (state === s_main_pipe_req) {
io.pipe_req.valid := true.B
val pipe_req = io.pipe_req.bits
pipe_req.miss := true.B
pipe_req.miss_id := io.id
pipe_req.miss_param := grant_param
pipe_req.probe := false.B
pipe_req.probe_param := DontCare
pipe_req.source := req.source
pipe_req.cmd := req.cmd
pipe_req.addr := req.addr
pipe_req.store_data := refill_data.asUInt
// full overwrite
pipe_req.store_mask := Fill(cfg.blockBytes, "b1".U)
pipe_req.word_idx := req.word_idx
pipe_req.amo_data := req.amo_data
pipe_req.amo_mask := req.amo_mask
pipe_req.id := req.id
when (io.pipe_req.fire()) {
state := s_main_pipe_resp
}
}
when (state === s_main_pipe_resp) {
when (io.pipe_resp.fire()) {
state := s_mem_finish
}
}
when (state === s_mem_finish) {
io.mem_finish.valid := grantack.valid
io.mem_finish.bits := grantack.bits
when (io.mem_finish.fire()) {
grantack.valid := false.B
state := s_invalid
}
}
}
class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
{
val io = IO(new Bundle {
val req = Flipped(DecoupledIO(new MissReq))
val refill = ValidIO(new Refill)
val mem_acquire = Decoupled(new TLBundleA(edge.bundle))
val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val mem_finish = Decoupled(new TLBundleE(edge.bundle))
val pipe_req = DecoupledIO(new MainPipeReq)
val pipe_resp = Flipped(ValidIO(new MainPipeResp))
})
val pipe_req_arb = Module(new RRArbiter(new MainPipeReq, cfg.nMissEntries))
val refill_arb = Module(new RRArbiter(new Refill, cfg.nMissEntries))
// dispatch req to MSHR
val primary_ready = Wire(Vec(cfg.nMissEntries, Bool()))
val secondary_ready = Wire(Vec(cfg.nMissEntries, Bool()))
val secondary_reject = Wire(Vec(cfg.nMissEntries, Bool()))
// try merging with existing reqs
val merge = secondary_ready.asUInt.orR
val merge_idx = PriorityEncoder(secondary_ready)
// some req says the request can not be merged
val reject = secondary_reject.asUInt.orR
// allocate a new entry for this req
val allocate = !reject && !merge && primary_ready.asUInt.orR
val alloc_idx = PriorityEncoder(primary_ready)
// will this req be accepted
val accept = (merge || allocate) && !reject
// if it's accepted, which entry will it enter
val entry_idx = Mux(allocate, alloc_idx, merge_idx)
// for one block, their should be only one MSHR
// one block should not be stay in multiple MSHRs
// if we a req can not merge with existing reqs
// block it!
OneHot.checkOneHot(secondary_ready)
OneHot.checkOneHot(secondary_reject)
// should not merge and reject at the same time
OneHot.checkOneHot(Seq(merge, reject))
io.req.ready := accept
io.mem_grant.ready := false.B
val entries = (0 until cfg.nMissEntries) map { i =>
val entry = Module(new MissEntry(edge))
entry.io.id := i.U(log2Up(cfg.nMissEntries).W)
// entry req
entry.io.req_valid := (i.U === entry_idx) && accept && io.req.valid
primary_ready(i) := entry.io.primary_ready
secondary_ready(i) := entry.io.secondary_ready
secondary_reject(i) := entry.io.secondary_reject
entry.io.req := io.req.bits
// entry refill
refill_arb.io.in(i).valid := entry.io.refill.valid
refill_arb.io.in(i).bits := entry.io.refill.bits
// pipe_req
pipe_req_arb.io.in(i) <> entry.io.pipe_req
// pipe_req
entry.io.pipe_resp.valid := false.B
entry.io.pipe_resp.bits := DontCare
when (io.pipe_resp.bits.id === i.U) {
entry.io.pipe_resp <> io.pipe_resp
}
entry.io.mem_grant.valid := false.B
entry.io.mem_grant.bits := DontCare
when (io.mem_grant.bits.source === i.U) {
entry.io.mem_grant <> io.mem_grant
}
/*
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(
BoolStopWatch(
start = entry.io.req.fire(),
stop = entry.io.resp.fire(),
startHighPriority = true),
"perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10),
Perf
)
}
*/
entry
}
io.refill.valid := refill_arb.io.out.valid
io.refill.bits := refill_arb.io.out.bits
refill_arb.io.out.ready := true.B
// one refill at a time
OneHot.checkOneHot(refill_arb.io.in.map(r => r.valid))
TLArbiter.robin(edge, io.mem_acquire, entries.map(_.io.mem_acquire):_*)
TLArbiter.robin(edge, io.mem_finish, entries.map(_.io.mem_finish):_*)
io.pipe_req <> pipe_req_arb.io.out
// print all input/output requests for debug purpose
when (io.req.fire()) {
io.req.bits.dump()
// sanity check
val source = io.req.bits.source
val cmd = io.req.bits.cmd
when (source === LOAD_SOURCE.U) {
assert (cmd === M_XRD)
}
when (source === STORE_SOURCE.U) {
assert (cmd === M_XWR)
}
when (source === AMO_SOURCE.U) {
assert (
cmd === M_XA_SWAP ||
cmd === M_XLR ||
cmd === M_XSC ||
cmd === M_XA_ADD ||
cmd === M_XA_XOR ||
cmd === M_XA_OR ||
cmd === M_XA_AND ||
cmd === M_XA_MIN ||
cmd === M_XA_MAX ||
cmd === M_XA_MINU ||
cmd === M_XA_MAXU)
}
// req addr must be aligned to block boundary
assert (io.req.bits.addr(blockOffBits - 1, 0) === 0.U)
}
when (io.refill.fire()) {
io.refill.bits.dump()
}
when (io.mem_acquire.fire()) {
XSDebug("mem_acquire ")
io.mem_acquire.bits.dump
}
when (io.mem_grant.fire()) {
XSDebug("mem_grant ")
io.mem_grant.bits.dump
}
when (io.mem_finish.fire()) {
XSDebug("mem_finish ")
io.mem_finish.bits.dump
}
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(io.req.fire(), "perfCntDCacheMiss", Perf)
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleB, TLMessages, TLPermissions}
import utils.{HasTLDump, XSDebug}
class ProbeReq extends DCacheBundle
{
val source = UInt()
val opcode = UInt()
val addr = UInt(PAddrBits.W)
val param = UInt(TLPermissions.bdWidth.W)
def dump() = {
XSDebug("ProbeReq source: %d opcode: %d addr: %x param: %d\n",
source, opcode, addr, param)
}
}
class ProbeEntry extends DCacheModule {
val io = IO(new Bundle {
val req = Flipped(Decoupled(new ProbeReq))
val pipe_req = DecoupledIO(new MainPipeReq)
val lrsc_locked_block = Input(Valid(UInt()))
// the block we are probing
val block_addr = Output(Valid(UInt()))
})
val s_invalid :: s_pipe_req :: Nil = Enum(2)
val state = RegInit(s_invalid)
val req = Reg(new ProbeReq)
// assign default values to signals
io.req.ready := false.B
io.pipe_req.valid := false.B
io.pipe_req.bits := DontCare
io.block_addr.valid := state =/= s_invalid
io.block_addr.bits := req.addr
when (state =/= s_invalid) {
XSDebug("state: %d\n", state)
}
when (state =/= s_invalid) {
XSDebug("ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits)
}
when (state === s_invalid) {
io.req.ready := true.B
when (io.req.fire()) {
req := io.req.bits
state := s_pipe_req
}
}
when (state === s_pipe_req) {
val lrsc_blocked = io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr
io.pipe_req.valid := !lrsc_blocked
val pipe_req = io.pipe_req.bits
pipe_req := DontCare
pipe_req.miss := false.B
pipe_req.probe := true.B
pipe_req.probe_param := req.param
pipe_req.addr := req.addr
when (io.pipe_req.fire()) {
state := s_invalid
}
}
}
class ProbeQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
{
val io = IO(new Bundle {
val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle)))
val pipe_req = DecoupledIO(new MainPipeReq)
val lrsc_locked_block = Input(Valid(UInt()))
})
val pipe_req_arb = Module(new RRArbiter(new MainPipeReq, cfg.nProbeEntries))
// allocate a free entry for incoming request
val primary_ready = Wire(Vec(cfg.nProbeEntries, Bool()))
val allocate = primary_ready.asUInt.orR
val alloc_idx = PriorityEncoder(primary_ready)
// translate to inner req
val req = Wire(new ProbeReq)
req.source := io.mem_probe.bits.source
req.opcode := io.mem_probe.bits.opcode
req.addr := io.mem_probe.bits.address
req.param := io.mem_probe.bits.param
io.mem_probe.ready := allocate
val entries = (0 until cfg.nProbeEntries) map { i =>
val entry = Module(new ProbeEntry)
// entry req
entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid
primary_ready(i) := entry.io.req.ready
entry.io.req.bits := req
// pipe_req
pipe_req_arb.io.in(i) <> entry.io.pipe_req
entry.io.lrsc_locked_block := io.lrsc_locked_block
entry
}
io.pipe_req <> pipe_req_arb.io.out
// print all input/output requests for debug purpose
when (io.mem_probe.valid) {
// before a probe finishes, L2 should not further issue probes on this block
val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.mem_probe.bits.address)).asUInt.orR
assert (!probe_conflict)
// for now, we can only deal with ProbeBlock
assert (io.mem_probe.bits.opcode === TLMessages.Probe)
}
// debug output
when (io.mem_probe.fire()) {
XSDebug("mem_probe: ")
io.mem_probe.bits.dump
}
when (io.pipe_req.fire()) {
io.pipe_req.bits.dump()
}
when (io.lrsc_locked_block.valid) {
XSDebug("lrsc_locked_block: %x\n", io.lrsc_locked_block.bits)
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import bus.tilelink._
class StoreReplayEntry extends DCacheModule
{
val io = IO(new Bundle {
val id = Input(UInt())
val lsu = Flipped(new DCacheLineIO)
val pipe_req = Decoupled(new MainPipeReq)
val pipe_resp = Flipped(ValidIO(new MainPipeResp))
val block_addr = Output(Valid(UInt()))
})
val s_invalid :: s_pipe_req :: s_pipe_resp :: s_wait :: s_resp :: Nil = Enum(5)
val state = RegInit(s_invalid)
val req = Reg(new DCacheLineReq)
// assign default values to output signals
io.lsu.req.ready := state === s_invalid
io.lsu.resp.valid := false.B
io.lsu.resp.bits := DontCare
io.pipe_req.valid := false.B
io.pipe_req.bits := DontCare
io.block_addr.valid := state =/= s_invalid
io.block_addr.bits := req.addr
when (state =/= s_invalid) {
XSDebug("StoreReplayEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
}
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
when (io.lsu.req.fire()) {
req := io.lsu.req.bits
state := s_pipe_req
}
}
// --------------------------------------------
// replay
when (state === s_pipe_req) {
io.pipe_req.valid := true.B
val pipe_req = io.pipe_req.bits
pipe_req := DontCare
pipe_req.miss := false.B
pipe_req.probe := false.B
pipe_req.source := STORE_SOURCE.U
pipe_req.cmd := req.cmd
pipe_req.addr := req.addr
pipe_req.store_data := req.data
pipe_req.store_mask := req.mask
pipe_req.id := io.id
when (io.pipe_req.fire()) {
state := s_pipe_resp
}
}
val ReplayDelayCycles = 16
val delay_counter = Counter(ReplayDelayCycles)
when (state === s_pipe_resp) {
// when not miss
// everything is OK, simply send response back to sbuffer
// when miss and not replay
// wait for missQueue to handling miss and replaying our request
// when miss and replay
// req missed and fail to enter missQueue, manually replay it later
when (io.pipe_resp.fire()) {
when (io.pipe_resp.bits.miss) {
when (io.pipe_resp.bits.replay) {
delay_counter.value := 0.U
state := s_wait
}
} .otherwise {
state := s_resp
}
}
}
when (state === s_wait) {
delay_counter.inc()
when (delay_counter.value === (ReplayDelayCycles - 1).U) {
state := s_pipe_req
}
}
// --------------------------------------------
when (state === s_resp) {
io.lsu.resp.valid := true.B
io.lsu.resp.bits := DontCare
io.lsu.resp.bits.id := req.id
when (io.lsu.resp.fire()) {
state := s_invalid
}
}
// debug output
when (io.lsu.req.fire()) {
XSDebug(s"StoreReplayEntryTransaction req %d\n", io.id)
}
when (io.lsu.resp.fire()) {
XSDebug(s"StoreReplayEntryTransaction resp %d\n", io.id)
}
}
class StoreReplayQueue extends DCacheModule
{
val io = IO(new Bundle {
val lsu = Flipped(new DCacheLineIO)
val pipe_req = Decoupled(new MainPipeReq)
val pipe_resp = Flipped(ValidIO(new MainPipeResp))
})
val pipe_req_arb = Module(new RRArbiter(new MainPipeReq, cfg.nStoreReplayEntries))
val resp_arb = Module(new RRArbiter(new DCacheLineResp, cfg.nStoreReplayEntries))
// allocate a free entry for incoming request
val primary_ready = Wire(Vec(cfg.nStoreReplayEntries, Bool()))
val allocate = primary_ready.asUInt.orR
val alloc_idx = PriorityEncoder(primary_ready)
val req = io.lsu.req
val block_conflict = Wire(Bool())
req.ready := allocate && !block_conflict
val entries = (0 until cfg.nStoreReplayEntries) map { i =>
val entry = Module(new StoreReplayEntry)
entry.io.id := i.U
// entry req
entry.io.lsu.req.valid := (i.U === alloc_idx) && allocate && req.valid && !block_conflict
primary_ready(i) := entry.io.lsu.req.ready
entry.io.lsu.req.bits := req.bits
// lsu req and resp
resp_arb.io.in(i) <> entry.io.lsu.resp
// replay req and resp
pipe_req_arb.io.in(i) <> entry.io.pipe_req
entry.io.pipe_resp.valid := (i.U === io.pipe_resp.bits.id) && io.pipe_resp.valid
entry.io.pipe_resp.bits := io.pipe_resp.bits
entry
}
io.lsu.resp <> resp_arb.io.out
io.pipe_req <> pipe_req_arb.io.out
block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.lsu.req.bits.addr)).asUInt.orR
// sanity check
when (io.lsu.req.valid) {
assert(io.lsu.req.bits.cmd === M_XWR)
assert (!block_conflict)
}
// debug output
when (io.lsu.req.fire()) {
io.lsu.req.bits.dump()
}
when (io.lsu.resp.fire()) {
io.lsu.resp.bits.dump()
}
when (io.pipe_req.fire()) {
io.pipe_req.bits.dump()
}
when (io.pipe_resp.fire()) {
io.pipe_resp.bits.dump()
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.{XSDebug, HasTLDump}
import freechips.rocketchip.tilelink.{TLBundleC, TLBundleD, TLEdgeOut, TLPermissions, TLArbiter}
class WritebackReq extends DCacheBundle {
val addr = UInt(PAddrBits.W)
val param = UInt(TLPermissions.cWidth.W)
val voluntary = Bool()
val hasData = Bool()
val data = UInt((cfg.blockBytes * 8).W)
def dump() = {
XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
addr, param, voluntary, hasData, data)
}
}
class WritebackEntry(edge: TLEdgeOut) extends DCacheModule with HasTLDump
{
val io = IO(new Bundle {
val id = Input(UInt())
val req = Flipped(DecoupledIO(new WritebackReq))
val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val block_addr = Output(Valid(UInt()))
})
val s_invalid :: s_release_req :: s_release_resp :: Nil = Enum(3)
val state = RegInit(s_invalid)
// internal regs
// remaining beats
val remain = RegInit(0.U(refillCycles.W))
val remain_set = WireInit(0.U(refillCycles.W))
val remain_clr = WireInit(0.U(refillCycles.W))
remain := (remain | remain_set) & ~remain_clr
val busy = remain.orR
val req = Reg(new WritebackReq)
// assign default signals to output signals
io.req.ready := false.B
io.mem_release.valid := false.B
io.mem_release.bits := DontCare
io.mem_grant.ready := false.B
io.block_addr.valid := state =/= s_invalid
io.block_addr.bits := req.addr
when (state =/= s_invalid) {
XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
}
// --------------------------------------------------------------------------------
// s_invalid: receive requests
// new req entering
io.req.ready := state === s_invalid
when (io.req.fire()) {
assert (remain === 0.U)
remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
req := io.req.bits
state := s_release_req
}
// --------------------------------------------------------------------------------
// while there beats remaining to be sent, we keep sending
// which beat to send in this cycle?
val beat = PriorityEncoder(remain)
val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
for (i <- 0 until refillCycles) {
beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits)
}
val probeResponse = edge.ProbeAck(
fromSource = io.id,
toAddress = req.addr,
lgSize = log2Ceil(cfg.blockBytes).U,
reportPermissions = req.param
)
val probeResponseData = edge.ProbeAck(
fromSource = io.id,
toAddress = req.addr,
lgSize = log2Ceil(cfg.blockBytes).U,
reportPermissions = req.param,
data = beat_data(beat)
)
val voluntaryRelease = edge.Release(
fromSource = io.id,
toAddress = req.addr,
lgSize = log2Ceil(cfg.blockBytes).U,
shrinkPermissions = req.param
)._2
val voluntaryReleaseData = edge.Release(
fromSource = io.id,
toAddress = req.addr,
lgSize = log2Ceil(cfg.blockBytes).U,
shrinkPermissions = req.param,
data = beat_data(beat)
)._2
io.mem_release.valid := busy
io.mem_release.bits := Mux(req.voluntary,
Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
Mux(req.hasData, probeResponseData, probeResponse))
when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) }
val (_, _, release_done, _) = edge.count(io.mem_release)
when (state === s_release_req && release_done) {
state := Mux(req.voluntary, s_release_resp, s_invalid)
}
// --------------------------------------------------------------------------------
// receive ReleaseAck for Releases
when (state === s_release_resp) {
io.mem_grant.ready := true.B
when (io.mem_grant.fire()) {
state := s_invalid
}
}
}
class WritebackQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
{
val io = IO(new Bundle {
val req = Flipped(DecoupledIO(new WritebackReq))
val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val miss_req = Flipped(Valid(UInt()))
val block_miss_req = Output(Bool())
})
// allocate a free entry for incoming request
val primary_ready = Wire(Vec(cfg.nReleaseEntries, Bool()))
val allocate = primary_ready.asUInt.orR
val alloc_idx = PriorityEncoder(primary_ready)
val req = io.req
val block_conflict = Wire(Bool())
req.ready := allocate && !block_conflict
// assign default values to output signals
io.mem_release.valid := false.B
io.mem_release.bits := DontCare
io.mem_grant.ready := false.B
val entries = (0 until cfg.nReleaseEntries) map { i =>
val entry = Module(new WritebackEntry(edge))
entry.io.id := i.U
// entry req
entry.io.req.valid := (i.U === alloc_idx) && allocate && req.valid && !block_conflict
primary_ready(i) := entry.io.req.ready
entry.io.req.bits := req.bits
entry.io.mem_grant.valid := (i.U === io.mem_grant.bits.source) && io.mem_grant.valid
entry.io.mem_grant.bits := io.mem_grant.bits
when (i.U === io.mem_grant.bits.source) {
io.mem_grant.ready := entry.io.mem_grant.ready
}
entry
}
block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.req.bits.addr)).asUInt.orR
val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
io.block_miss_req := io.miss_req.valid && miss_req_conflict
TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
// sanity check
// print all input/output requests for debug purpose
// print req
when (io.req.fire()) {
io.req.bits.dump()
}
when (io.mem_release.fire()) {
io.mem_release.bits.dump
}
when (io.mem_grant.fire()) {
io.mem_grant.bits.dump
}
when (io.miss_req.valid) {
XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
}
when (io.block_miss_req) {
XSDebug("block_miss_req\n")
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.{XSDebug}
// this is a traditional cache pipeline:
// it handles load/store/amo/lr,sc
class AtomicsPipe extends DCacheModule
{
val io = IO(new DCacheBundle{
val lsu = Flipped(new DCacheWordIO)
val data_read = DecoupledIO(new L1DataReadReq)
val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W))))
val data_write = DecoupledIO(new L1DataWriteReq)
val meta_read = DecoupledIO(new L1MetaReadReq)
val meta_resp = Input(Vec(nWays, new L1Metadata))
val inflight_req_idxes = Output(Vec(3, Valid(UInt())))
val inflight_req_block_addrs = Output(Vec(3, Valid(UInt())))
val block_probe_addr = Output(Valid(UInt()))
val wb_invalidate_lrsc = Input(Valid(UInt()))
// send miss request to miss queue
val miss_req = DecoupledIO(new MissReq)
})
// LSU requests
io.lsu.req.ready := io.meta_read.ready && io.data_read.ready
io.meta_read.valid := io.lsu.req.valid
io.data_read.valid := io.lsu.req.valid
val meta_read = io.meta_read.bits
val data_read = io.data_read.bits
// Tag read for new requests
meta_read.idx := get_idx(io.lsu.req.bits.addr)
meta_read.way_en := ~0.U(nWays.W)
meta_read.tag := DontCare
// Data read for new requests
data_read.addr := io.lsu.req.bits.addr
data_read.way_en := ~0.U(nWays.W)
// only needs to read the specific beat
data_read.rmask := UIntToOH(get_row(io.lsu.req.bits.addr))
// Pipeline
// ---------------------------------------
// stage 0
val s0_valid = io.lsu.req.fire()
val s0_req = io.lsu.req.bits
dump_pipeline_reqs("AtomicsPipe s0", s0_valid, s0_req)
// ---------------------------------------
// stage 1
val s1_req = RegNext(s0_req)
val s1_valid = RegNext(s0_valid, init = false.B)
val s1_addr = s1_req.addr
val s1_nack = false.B
dump_pipeline_reqs("AtomicsPipe s1", s1_valid, s1_req)
// tag check
val meta_resp = io.meta_resp
def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
val s1_tag_eq_way = wayMap((w: Int) => meta_resp(w).tag === (get_tag(s1_addr))).asUInt
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta_resp(w).coh.isValid()).asUInt
val s1_tag_match = s1_tag_match_way.orR
val s1_hit_meta = Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w)))
val s1_hit_state = s1_hit_meta.coh
// replacement policy
val replacer = cacheParams.replacement
val s1_repl_way_en = UIntToOH(replacer.way)
val s1_repl_meta = Mux1H(s1_repl_way_en, wayMap((w: Int) => meta_resp(w)))
when (io.miss_req.fire()) {
replacer.miss
}
// ---------------------------------------
// stage 2
val s2_req = RegNext(s1_req)
val s2_valid = RegNext(s1_valid, init = false.B)
dump_pipeline_reqs("AtomicsPipe s2", s2_valid, s2_req)
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = s2_tag_match_way.orR
val s2_hit_meta = RegNext(s1_hit_meta)
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegNext(meta_resp(w).coh)))
val s2_has_permission = s2_hit_state.onAccess(s2_req.cmd)._1
val s2_new_hit_state = s2_hit_state.onAccess(s2_req.cmd)._3
val s2_repl_meta = RegNext(s1_repl_meta)
val s2_repl_way_en = RegNext(s1_repl_way_en)
val s2_old_meta = Mux(s2_tag_match, s2_hit_meta, s2_repl_meta)
val s2_way_en = Mux(s2_tag_match, s2_tag_match_way, s2_repl_way_en)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
//
// If state changes on hit,
// we should treat it as not hit, and let mshr deal with it,
// since we can not write meta data on the main pipeline.
// It's possible that we had permission but state changes on hit:
// eg: write to exclusive but clean block
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state
val s2_nack = Wire(Bool())
// when req got nacked, upper levels should replay this request
// the same set is busy
val s2_nack_hit = RegNext(s1_nack)
// can no allocate mshr for store miss
val s2_nack_no_mshr = io.miss_req.valid && !io.miss_req.ready
// Bank conflict on data arrays
// For now, we use DuplicatedDataArray, so no bank conflicts
val s2_nack_data = false.B
s2_nack := s2_nack_hit || s2_nack_no_mshr || s2_nack_data
// lr/sc
val debug_sc_fail_addr = RegInit(0.U)
val debug_sc_fail_cnt = RegInit(0.U(8.W))
val lrsc_count = RegInit(0.U(log2Ceil(lrscCycles).W))
val lrsc_valid = lrsc_count > lrscBackoff.U
val lrsc_addr = Reg(UInt())
val s2_lr = s2_req.cmd === M_XLR && !s2_nack
val s2_sc = s2_req.cmd === M_XSC && !s2_nack
val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === get_block_addr(s2_req.addr)
val s2_sc_fail = s2_sc && !s2_lrsc_addr_match
val s2_sc_resp = Mux(s2_sc_fail, 1.U, 0.U)
// we have permission on this block
// but we can not finish in this pass
// we need to go to miss queue to update meta and set dirty first
val s2_set_dirty = s2_tag_match && s2_has_permission && s2_hit_state =/= s2_new_hit_state
// this sc should succeed, but we need to set dirty first
// do not treat it as a sc failure and reset lr sc counter
val sc_set_dirty = s2_set_dirty && !s2_nack && s2_sc && s2_lrsc_addr_match
when (s2_valid && !sc_set_dirty) {
when (s2_hit && !s2_nack && s2_lr) {
lrsc_count := (lrscCycles - 1).U
lrsc_addr := get_block_addr(s2_req.addr)
} .otherwise {
lrsc_count := 0.U
}
} .elsewhen (lrsc_count > 0.U) {
lrsc_count := lrsc_count - 1.U
}
io.block_probe_addr.valid := lrsc_valid
io.block_probe_addr.bits := lrsc_addr
// when we release this block,
// we invalidate this reservation set
when (io.wb_invalidate_lrsc.valid) {
when (io.wb_invalidate_lrsc.bits === lrsc_addr) {
lrsc_count := 0.U
}
// when we release this block, there should be no matching lrsc inflight
assert (!(s2_valid && (s2_lr || s2_sc) && io.wb_invalidate_lrsc.bits === get_block_addr(s2_req.addr)))
}
when (s2_valid) {
when (s2_req.addr === debug_sc_fail_addr) {
when (s2_sc_fail) {
debug_sc_fail_cnt := debug_sc_fail_cnt + 1.U
} .elsewhen (s2_sc) {
debug_sc_fail_cnt := 0.U
}
} .otherwise {
when (s2_sc_fail) {
debug_sc_fail_addr := s2_req.addr
debug_sc_fail_cnt := 1.U
}
}
}
assert(debug_sc_fail_cnt < 100.U, "L1DCache failed too many SCs in a row")
// only dump these signals when they are actually valid
dump_pipeline_valids("AtomicsPipe s2", "s2_hit", s2_valid && s2_hit)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack", s2_valid && s2_nack)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack_hit", s2_valid && s2_nack_hit)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack_no_mshr", s2_valid && s2_nack_no_mshr)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack_data", s2_valid && s2_nack_data)
when (s2_valid) {
XSDebug("lrsc_count: %d lrsc_valid: %b lrsc_addr: %x\n",
lrsc_count, lrsc_valid, lrsc_addr)
XSDebug("s2_lr: %b s2_sc: %b s2_lrsc_addr_match: %b s2_sc_fail: %b s2_sc_resp: %x\n",
s2_lr, s2_sc, s2_lrsc_addr_match, s2_sc_fail, s2_sc_resp)
XSDebug("debug_sc_fail_addr: %x debug_sc_fail_cnt: %d\n",
debug_sc_fail_addr, debug_sc_fail_cnt)
}
// load data gen
val s2_data = Wire(Vec(nWays, UInt(encRowBits.W)))
val data_resp = io.data_resp
for (w <- 0 until nWays) {
s2_data(w) := data_resp(w)(get_row(s2_req.addr))
}
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
// the index of word in a row, in case rowBits != wordBits
val s2_word_idx = if (rowWords == 1) 0.U else s2_req.addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
val s2_data_words = Wire(Vec(rowWords, UInt(encWordBits.W)))
for (w <- 0 until rowWords) {
s2_data_words(w) := s2_data_muxed(encWordBits * (w + 1) - 1, encWordBits * w)
}
val s2_data_word = s2_data_words(s2_word_idx)
val s2_decoded = cacheParams.dataCode.decode(s2_data_word)
val s2_data_word_decoded = s2_decoded.corrected
assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
// send load miss to miss queue
io.miss_req.valid := s2_valid && !s2_nack_hit && !s2_nack_data && !s2_hit
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := get_block_addr(s2_req.addr)
io.miss_req.bits.tag_match := s2_tag_match
io.miss_req.bits.way_en := s2_way_en
io.miss_req.bits.old_meta := s2_old_meta
io.miss_req.bits.client_id := s2_req.meta.id
val resp = Wire(ValidIO(new DCacheWordResp))
resp.valid := s2_valid
resp.bits.data := Mux(s2_sc, s2_sc_resp, s2_data_word)
resp.bits.meta := s2_req.meta
// reuse this field to pass lr sc valid to commit
// nemu use this to see whether lr sc counter is still valid
resp.bits.meta.id := lrsc_valid
resp.bits.miss := !s2_hit || s2_nack
resp.bits.replay := resp.bits.miss && (!io.miss_req.fire() || s2_nack)
io.lsu.resp.valid := resp.valid
io.lsu.resp.bits := resp.bits
assert(!(resp.valid && !io.lsu.resp.ready))
when (resp.valid) {
XSDebug(s"AtomicsPipe resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.replay)
}
// ---------------------------------------
// s3: do data write
// Store/amo hits
val amoalu = Module(new AMOALU(wordBits))
amoalu.io.mask := s2_req.mask
amoalu.io.cmd := s2_req.cmd
amoalu.io.lhs := s2_data_word_decoded
amoalu.io.rhs := s2_req.data
val s3_req = RegNext(s2_req)
val s3_valid = RegNext(s2_valid && s2_hit && isWrite(s2_req.cmd) && !s2_nack && !s2_sc_fail)
val s3_tag_match_way = RegNext(s2_tag_match_way)
val wdata_encoded = cacheParams.dataCode.encode(amoalu.io.out)
val s3_wdata = Reg(UInt())
s3_wdata := wdata_encoded
// write dcache if hit
// only needs to read the specific beat
val wmask = WireInit(VecInit((0 until blockRows) map (i => 0.U(rowWords.W))))
val wdata = WireInit(VecInit((0 until blockRows) map (i => Cat(
(0 until rowWords) map { w => s3_wdata }))))
wmask(get_row(s3_req.addr)) := ~0.U(rowWords.W)
val data_write = io.data_write.bits
io.data_write.valid := s3_valid
data_write.rmask := DontCare
data_write.way_en := s3_tag_match_way
data_write.addr := s3_req.addr
data_write.wmask := wmask
data_write.data := wdata
assert(!(io.data_write.valid && !io.data_write.ready))
dump_pipeline_reqs("AtomicsPipe s3", s3_valid, s3_req)
// -------
// wire out signals for synchronization
io.inflight_req_idxes(0).valid := io.lsu.req.valid
io.inflight_req_idxes(1).valid := s1_valid
io.inflight_req_idxes(2).valid := s2_valid
io.inflight_req_idxes(0).bits := get_idx(s0_req.addr)
io.inflight_req_idxes(1).bits := get_idx(s1_req.addr)
io.inflight_req_idxes(2).bits := get_idx(s2_req.addr)
io.inflight_req_block_addrs(0).valid := io.lsu.req.valid
io.inflight_req_block_addrs(1).valid := s1_valid
io.inflight_req_block_addrs(2).valid := s2_valid
io.inflight_req_block_addrs(0).bits := get_block_addr(s0_req.addr)
io.inflight_req_block_addrs(1).bits := get_block_addr(s1_req.addr)
io.inflight_req_block_addrs(2).bits := get_block_addr(s2_req.addr)
// -------
// Debug logging functions
def dump_pipeline_reqs(pipeline_stage_name: String, valid: Bool,
req: DCacheWordReq ) = {
when (valid) {
XSDebug(s"$pipeline_stage_name cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n",
req.cmd, req.addr, req.data, req.mask, req.meta.id, req.meta.replay)
}
}
def dump_pipeline_valids(pipeline_stage_name: String, signal_name: String, valid: Bool) = {
when (valid) {
XSDebug(s"$pipeline_stage_name $signal_name\n")
}
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
// wraps around AtomicsPipe
// when requests misse, send miss req to missQueue and replays reqs
class AtomicsMissQueue extends DCacheModule
{
val io = IO(new DCacheBundle {
val lsu = Flipped(new DCacheWordIO)
val replay = new DCacheWordIO
val miss_resp = Flipped(ValidIO(new MissResp))
val miss_finish = DecoupledIO(new MissFinish)
})
val s_invalid :: s_replay_req :: s_replay_resp :: s_resp :: s_miss_resp :: s_miss_finish :: Nil = Enum(6)
val state = RegInit(s_invalid)
val id = 0.U
val req = Reg(new DCacheWordReq)
val resp = Reg(new DCacheWordResp)
val req_block_addr = get_block_addr(req.addr)
val reg_miss_resp = Reg(new MissResp)
// assign default values to output signals
io.lsu.req.ready := state === s_invalid
io.lsu.resp.valid := false.B
io.lsu.resp.bits := DontCare
io.replay.req.valid := false.B
io.replay.req.bits := DontCare
io.replay.resp.ready := false.B
io.miss_finish.valid := false.B
io.miss_finish.bits := DontCare
when (state =/= s_invalid) {
XSDebug("state: %d\n", state)
}
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
when (io.lsu.req.fire()) {
assert(!io.lsu.req.bits.meta.replay)
req := io.lsu.req.bits
state := s_replay_req
}
}
// --------------------------------------------
// replay
when (state === s_replay_req) {
io.replay.req.valid := true.B
io.replay.req.bits := req
when (io.replay.req.fire()) {
state := s_replay_resp
}
}
when (state === s_replay_resp) {
io.replay.resp.ready := true.B
when (io.replay.resp.fire()) {
// req missed
when (io.replay.resp.bits.miss) {
// replayed reqs should not miss
assert(!req.meta.replay)
// the req missed and did not enter mshr
// so replay it until it hits or enters mshr
when (io.replay.resp.bits.replay) {
state := s_replay_req
} .otherwise {
// the req missed and enters mshr
// wait for miss response
state := s_miss_resp
}
} .otherwise {
// req hits, everything OK
resp := io.replay.resp.bits
when (!req.meta.replay) {
state := s_resp
} .otherwise {
// if it's a replayed request
// we need to tell mshr, we are done
state := s_miss_finish
}
}
}
}
when (state === s_miss_resp) {
when (io.miss_resp.fire()) {
reg_miss_resp := io.miss_resp.bits
// mark req as replayed req
req.meta.replay := true.B
state := s_replay_req
}
}
when (state === s_miss_finish) {
io.miss_finish.valid := true.B
io.miss_finish.bits.client_id := id
io.miss_finish.bits.entry_id := reg_miss_resp.entry_id
when (io.miss_finish.fire()) {
state := s_resp
}
}
// --------------------------------------------
when (state === s_resp) {
io.lsu.resp.valid := true.B
io.lsu.resp.bits := resp
when (io.lsu.resp.fire()) {
state := s_invalid
}
}
// debug output
when (io.lsu.req.fire()) {
XSDebug(s"io.lsu.req cmd: %x addr: %x data: %x mask: %x id: %d replayed_req: %b\n",
io.lsu.req.bits.cmd, io.lsu.req.bits.addr, io.lsu.req.bits.data, io.lsu.req.bits.mask, io.lsu.req.bits.meta.id, io.lsu.req.bits.meta.replay)
}
val replay = io.replay.req
when (replay.fire()) {
XSDebug(s"replay cmd: %x addr: %x data: %x mask: %x id: %d replayed_req: %b\n",
replay.bits.cmd, replay.bits.addr, replay.bits.data, replay.bits.mask, replay.bits.meta.id, replay.bits.meta.replay)
}
when (io.lsu.resp.fire()) {
XSDebug(s"io.lsu.resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
io.lsu.resp.bits.data, io.lsu.resp.bits.meta.id, io.lsu.resp.bits.meta.replay, io.lsu.resp.bits.miss, io.lsu.resp.bits.replay)
}
val miss_resp = io.miss_resp
XSDebug(miss_resp.fire(), "miss_resp client_id: %d entry_id: %d\n",
miss_resp.bits.client_id, miss_resp.bits.entry_id)
val miss_finish = io.miss_finish
XSDebug(miss_finish.fire(), "miss_finish client_id: %d entry_id: %d\n",
miss_finish.bits.client_id, miss_finish.bits.entry_id)
when (io.lsu.req.fire()) {
XSDebug(s"AtomicsMissEntryTransaction req 0\n")
}
when (io.lsu.resp.fire()) {
XSDebug(s"AtomicsMissEntryTransaction resp 0\n")
}
}
package xiangshan.cache
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
import freechips.rocketchip.tilelink.{TLClientNode, TLClientParameters, TLMasterParameters, TLMasterPortParameters, TLArbiter}
// Meta data for dcache requests
// anything that should go with reqs and resps goes here
class DCacheMeta extends DCacheBundle {
val id = UInt(reqIdWidth.W)
val vaddr = UInt(VAddrBits.W) // maybe we should use VAddrBits?
val paddr = UInt(PAddrBits.W)
val uop = new MicroOp //FIXME: opt data width
val mmio = Bool()
val tlb_miss = Bool()
// dcache request id
// master uses id to correlate resps to reqs
// different master can allocate and free ids independently
// as long as they do not share resp
val mask = UInt((DataBits/8).W)
val replay = Bool() // whether it's a replayed request?
}
// memory request in word granularity(load, mmio, lr/sc, atomics)
class DCacheWordReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val data = UInt(DataBits.W)
val mask = UInt((DataBits/8).W)
val meta = new DCacheMeta
}
// memory request in word granularity(store)
class DCacheLineReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val data = UInt((cfg.blockBytes * 8).W)
val mask = UInt(cfg.blockBytes.W)
val meta = new DCacheMeta
}
class DCacheWordResp extends DCacheBundle
{
val data = UInt(DataBits.W)
val meta = new DCacheMeta
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val replay = Bool()
}
class DCacheLineResp extends DCacheBundle
{
val data = UInt((cfg.blockBytes * 8).W)
val meta = new DCacheMeta
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val replay = Bool()
}
class Refill extends DCacheBundle
{
val addr = UInt(PAddrBits.W)
val data = UInt((cfg.blockBytes * 8).W)
}
class DCacheWordIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheWordReq)
val resp = Flipped(DecoupledIO(new DCacheWordResp))
}
// used by load unit
class DCacheLoadIO extends DCacheWordIO
{
// kill previous cycle's req
val s1_kill = Output(Bool())
// cycle 0: virtual address: req.addr
// cycle 1: physical address: s1_paddr
val s1_paddr = Output(UInt(PAddrBits.W))
}
class DCacheLineIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheLineReq )
val resp = Flipped(DecoupledIO(new DCacheLineResp))
}
class DCacheToLsuIO extends DCacheBundle {
val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load
val lsq = ValidIO(new Refill) // refill to load queue, wake up load misses
val store = Flipped(new DCacheLineIO) // for sbuffer
val atomics = Flipped(new DCacheWordIO) // atomics reqs
}
class DCacheIO extends DCacheBundle {
val lsu = new DCacheToLsuIO
val prefetch = DecoupledIO(new MissReq)
}
class DCache()(implicit p: Parameters) extends LazyModule with HasDCacheParameters {
val clientParameters = TLMasterPortParameters.v1(
Seq(TLMasterParameters.v1(
name = "dcache",
sourceId = IdRange(0, cfg.nMissEntries+1),
supportsProbe = TransferSizes(cfg.blockBytes)
))
)
val clientNode = TLClientNode(Seq(clientParameters))
lazy val module = new DCacheImp(this)
}
class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParameters with HasXSLog {
val io = IO(new DCacheIO)
val (bus, edge) = outer.clientNode.out.head
require(bus.d.bits.data.getWidth == l1BusDataWidth, "DCache: tilelink width does not match")
//----------------------------------------
// core data structures
val dataArray = Module(new DuplicatedDataArray)
val metaArray = Module(new DuplicatedMetaArray)
/*
dataArray.dump()
metaArray.dump()
*/
//----------------------------------------
// core modules
val ldu = Seq.fill(LoadPipelineWidth) { Module(new LoadPipe) }
val stu = Module(new StorePipe)
val atomics = Module(new AtomicsPipe)
val storeMissQueue = Module(new StoreMissQueue)
val atomicsMissQueue = Module(new AtomicsMissQueue)
val missQueue = Module(new MissQueue(edge))
val wb = Module(new WritebackUnit(edge))
val prober = Module(new ProbeUnit(edge))
//----------------------------------------
// meta array
val MetaWritePortCount = 2
val MissQueueMetaWritePort = 0
val ProberMetaWritePort = 1
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, MetaWritePortCount))
metaWriteArb.io.in(MissQueueMetaWritePort) <> missQueue.io.meta_write
metaWriteArb.io.in(ProberMetaWritePort) <> prober.io.meta_write
metaArray.io.write <> metaWriteArb.io.out
// To simplify port arbitration
// MissQueue, Prober and StorePipe all use port 0
// if contention got severe, considering load balancing on two ports?
val MetaReadPortCount = 4
val ProberMetaReadPort = 0
val StorePipeMetaReadPort = 1
val LoadPipeMetaReadPort = 2
val AtomicsPipeMetaReadPort = 3
val metaReadArb = Module(new Arbiter(new L1MetaReadReq, MetaReadPortCount))
metaReadArb.io.in(ProberMetaReadPort) <> prober.io.meta_read
metaReadArb.io.in(StorePipeMetaReadPort) <> stu.io.meta_read
metaReadArb.io.in(LoadPipeMetaReadPort) <> ldu(0).io.meta_read
metaReadArb.io.in(AtomicsPipeMetaReadPort) <> atomics.io.meta_read
metaArray.io.read(0) <> metaReadArb.io.out
prober.io.meta_resp <> metaArray.io.resp(0)
stu.io.meta_resp <> metaArray.io.resp(0)
ldu(0).io.meta_resp <> metaArray.io.resp(0)
atomics.io.meta_resp <> metaArray.io.resp(0)
for (w <- 1 until LoadPipelineWidth) {
metaArray.io.read(w) <> ldu(w).io.meta_read
ldu(w).io.meta_resp <> metaArray.io.resp(w)
}
//----------------------------------------
// data array
val DataWritePortCount = 3
val StorePipeDataWritePort = 0
val AtomicsPipeDataWritePort = 1
val MissQueueDataWritePort = 2
val dataWriteArb = Module(new Arbiter(new L1DataWriteReq, DataWritePortCount))
dataWriteArb.io.in(StorePipeDataWritePort) <> stu.io.data_write
dataWriteArb.io.in(MissQueueDataWritePort) <> missQueue.io.data_write
dataWriteArb.io.in(AtomicsPipeDataWritePort) <> atomics.io.data_write
dataArray.io.write <> dataWriteArb.io.out
// To simplify port arbitration
// WritebackUnit and StorePipe use port 0
val DataReadPortCount = 4
val WritebackDataReadPort = 0
val StorePipeDataReadPort = 1
val LoadPipeDataReadPort = 2
val AtomicsPipeDataReadPort = 3
val dataReadArb = Module(new Arbiter(new L1DataReadReq, DataReadPortCount))
dataReadArb.io.in(WritebackDataReadPort) <> wb.io.data_req
dataReadArb.io.in(StorePipeDataReadPort) <> stu.io.data_read
dataReadArb.io.in(LoadPipeDataReadPort) <> ldu(0).io.data_read
dataReadArb.io.in(AtomicsPipeDataReadPort) <> atomics.io.data_read
dataArray.io.read(0) <> dataReadArb.io.out
dataArray.io.resp(0) <> wb.io.data_resp
dataArray.io.resp(0) <> stu.io.data_resp
dataArray.io.resp(0) <> atomics.io.data_resp
dataArray.io.resp(0) <> ldu(0).io.data_resp
for (w <- 1 until LoadPipelineWidth) {
dataArray.io.read(w) <> ldu(w).io.data_read
dataArray.io.resp(w) <> ldu(w).io.data_resp
}
//----------------------------------------
// load pipe and load miss queue
// the s1 kill signal
// only lsu uses this, replay never kills
for (w <- 0 until LoadPipelineWidth) {
val load_w_nack = nack_load(io.lsu.load(w).req.bits.addr)
ldu(w).io.lsu.req <> io.lsu.load(w).req
ldu(w).io.lsu.s1_paddr <> io.lsu.load(w).s1_paddr
ldu(w).io.nack := load_w_nack
XSDebug(load_w_nack, s"LoadUnit $w nacked\n")
ldu(w).io.lsu.resp <> io.lsu.load(w).resp
ldu(w).io.lsu.s1_kill <> io.lsu.load(w).s1_kill
assert(!(io.lsu.load(w).req.fire() && io.lsu.load(w).req.bits.meta.replay), "LSU should not replay requests")
}
for (w <- 0 until LoadPipelineWidth) {
assert(!(io.lsu.load(w).req.fire() && io.lsu.load(w).req.bits.meta.mmio), "MMIO requests should not go to cache")
assert(!(io.lsu.load(w).req.fire() && io.lsu.load(w).req.bits.meta.tlb_miss), "TLB missed requests should not go to cache")
}
//----------------------------------------
// store pipe and store miss queue
storeMissQueue.io.lsu <> io.lsu.store
/*
assert(!(storeMissQueue.io.replay.req.fire() && !storeMissQueue.io.replay.req.bits.meta.replay),
"StoreMissQueue should replay requests")
*/
assert(!(io.lsu.store.req.fire() && io.lsu.store.req.bits.meta.replay),
"Sbuffer should not should replay requests")
assert(!(io.lsu.store.req.fire() && io.lsu.store.req.bits.meta.mmio),
"MMIO requests should not go to cache")
assert(!(io.lsu.store.req.fire() && io.lsu.store.req.bits.meta.tlb_miss),
"TLB missed requests should not go to cache")
val store_block = block_store(storeMissQueue.io.replay.req.bits.addr)
block_decoupled(storeMissQueue.io.replay.req, stu.io.lsu.req, store_block && !storeMissQueue.io.replay.req.bits.meta.replay)
storeMissQueue.io.replay.resp <> stu.io.lsu.resp
XSDebug(store_block, "StorePipe blocked\n")
//----------------------------------------
// atomics pipe
atomics.io.wb_invalidate_lrsc := wb.io.inflight_addr
atomicsMissQueue.io.lsu <> io.lsu.atomics
atomicsMissQueue.io.replay <> atomics.io.lsu
val atomics_block = block_atomics(atomicsMissQueue.io.replay.req.bits.addr)
block_decoupled(atomicsMissQueue.io.replay.req, atomics.io.lsu.req, atomics_block && !atomicsMissQueue.io.replay.req.bits.meta.replay)
XSDebug(atomics_block, "AtomicsPipe blocked\n")
// when atomics are in flight, there should be no load or store in flight
// so atomics and store should not show up at the same time
val atomics_inflight = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid)).reduce(_||_)
val store_inflight = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid)).reduce(_||_)
assert(!(atomics_inflight && store_inflight))
// some other stuff
val atomicsReq = io.lsu.atomics.req
assert(!(atomicsReq.fire() && atomicsReq.bits.meta.replay),
"Atomics does not support request replay")
assert(!(atomicsReq.fire() && atomicsReq.bits.meta.mmio),
"MMIO requests should not go to cache")
assert(!(atomicsReq.fire() && atomicsReq.bits.meta.tlb_miss),
"TLB missed requests should not go to cache")
//----------------------------------------
// miss queue
require(LoadPipelineWidth == 2, "We hard code the number of load misses")
val loadMissQueueClientId_0 = 0.U(clientIdWidth.W)
val loadMissQueueClientId_1 = 1.U(clientIdWidth.W)
val storeMissQueueClientId = 2.U(clientIdWidth.W)
val atomicsMissQueueClientId = 3.U(clientIdWidth.W)
// Request
val missReqArb = Module(new Arbiter(new MissReq, nClientMissQueues))
val missReq = missQueue.io.req
val loadMissReq_0 = ldu(0).io.miss_req
val loadMissReq_1 = ldu(1).io.miss_req
val storeMissReq = stu.io.miss_req
val atomicsMissReq = atomics.io.miss_req
missReqArb.io.in(0) <> loadMissReq_0
missReqArb.io.in(0).bits.client_id := Cat(loadMissQueueClientId_0,
loadMissReq_0.bits.client_id(entryIdMSB, entryIdLSB))
missReqArb.io.in(1) <> loadMissReq_1
missReqArb.io.in(1).bits.client_id := Cat(loadMissQueueClientId_1,
loadMissReq_0.bits.client_id(entryIdMSB, entryIdLSB))
missReqArb.io.in(2).valid := storeMissReq.valid
storeMissReq.ready := missReqArb.io.in(2).ready
missReqArb.io.in(2).bits := storeMissReq.bits
missReqArb.io.in(2).bits.client_id := Cat(storeMissQueueClientId,
storeMissReq.bits.client_id(entryIdMSB, entryIdLSB))
missReqArb.io.in(3).valid := atomicsMissReq.valid
atomicsMissReq.ready := missReqArb.io.in(3).ready
missReqArb.io.in(3).bits := atomicsMissReq.bits
missReqArb.io.in(3).bits.client_id := Cat(atomicsMissQueueClientId,
atomicsMissReq.bits.client_id(entryIdMSB, entryIdLSB))
val miss_block = block_miss(missReqArb.io.out.bits.addr)
block_decoupled(missReqArb.io.out, missReq, miss_block)
XSDebug(miss_block, "MissQueue blocked\n")
// Response
// store and atomics wait for miss queue responses
val missResp = missQueue.io.resp
val storeMissResp = storeMissQueue.io.miss_resp
val atomicsMissResp = atomicsMissQueue.io.miss_resp
val clientId = missResp.bits.client_id(clientIdMSB, clientIdLSB)
val isStoreMissResp = clientId === storeMissQueueClientId
storeMissResp.valid := missResp.valid && isStoreMissResp
storeMissResp.bits := missResp.bits
storeMissResp.bits.client_id := missResp.bits.client_id(entryIdMSB, entryIdLSB)
val isAtomicsMissResp = clientId === atomicsMissQueueClientId
atomicsMissResp.valid := missResp.valid && isAtomicsMissResp
atomicsMissResp.bits := missResp.bits
atomicsMissResp.bits.client_id := missResp.bits.client_id(entryIdMSB, entryIdLSB)
// Finish
val missFinish = missQueue.io.finish
val storeMissFinish = storeMissQueue.io.miss_finish
val atomicsMissFinish = atomicsMissQueue.io.miss_finish
val missFinishArb = Module(new Arbiter(new MissFinish, 2))
missFinishArb.io.in(0).valid := storeMissFinish.valid
storeMissFinish.ready := missFinishArb.io.in(0).ready
missFinishArb.io.in(0).bits.entry_id := storeMissFinish.bits.entry_id
missFinishArb.io.in(0).bits.client_id := Cat(storeMissQueueClientId,
storeMissFinish.bits.client_id(entryIdMSB, entryIdLSB))
missFinishArb.io.in(1).valid := atomicsMissFinish.valid
atomicsMissFinish.ready := missFinishArb.io.in(1).ready
missFinishArb.io.in(1).bits.entry_id := atomicsMissFinish.bits.entry_id
missFinishArb.io.in(1).bits.client_id := Cat(atomicsMissQueueClientId,
atomicsMissFinish.bits.client_id(entryIdMSB, entryIdLSB))
missFinish <> missFinishArb.io.out
// refill to load queue
io.lsu.lsq <> missQueue.io.refill
// tilelink stuff
bus.a <> missQueue.io.mem_acquire
bus.e <> missQueue.io.mem_finish
when (bus.d.bits.source === cfg.nMissEntries.U) {
// This should be ReleaseAck
bus.d.ready := true.B
missQueue.io.mem_grant.valid := false.B
missQueue.io.mem_grant.bits := DontCare
} .otherwise {
// This should be GrantData
missQueue.io.mem_grant <> bus.d
}
// sync with prober
missQueue.io.probe_wb_req.valid := prober.io.wb_req.fire()
missQueue.io.probe_wb_req.bits := prober.io.wb_req.bits
missQueue.io.probe_active := prober.io.inflight_req_idx
//----------------------------------------
// prober
prober.io.req.valid := bus.b.valid && !block_probe(get_block_addr(bus.b.bits.address))
bus.b.ready := prober.io.req.ready && !block_probe(get_block_addr(bus.b.bits.address))
prober.io.req.bits := bus.b.bits
//----------------------------------------
// wb
// 0 goes to prober, 1 goes to missQueue evictions
val wbArb = Module(new Arbiter(new WritebackReq(edge.bundle.sourceBits), 2))
wbArb.io.in(0) <> prober.io.wb_req
wbArb.io.in(1) <> missQueue.io.wb_req
wb.io.req <> wbArb.io.out
missQueue.io.wb_resp := wb.io.resp
prober.io.wb_resp := wb.io.resp
wb.io.mem_grant := bus.d.fire() && bus.d.bits.source === cfg.nMissEntries.U
TLArbiter.lowestFromSeq(edge, bus.c, Seq(prober.io.rep, wb.io.release))
// dcache should only deal with DRAM addresses
when (bus.a.fire()) {
assert(bus.a.bits.address >= 0x80000000L.U)
}
when (bus.b.fire()) {
assert(bus.b.bits.address >= 0x80000000L.U)
}
when (bus.c.fire()) {
assert(bus.c.bits.address >= 0x80000000L.U)
}
io.prefetch.valid := missQueue.io.req.fire()
io.prefetch.bits := missQueue.io.req.bits
// synchronization stuff
def nack_load(addr: UInt) = {
val store_addr_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val store_addr_match = store_addr_matches.reduce(_||_)
val atomics_addr_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val atomics_addr_match = atomics_addr_matches.reduce(_||_)
val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
store_addr_match || atomics_addr_match || prober_idx_match || miss_idx_match
}
def block_store(addr: UInt) = {
val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
prober_idx_match || miss_idx_match
}
def block_atomics(addr: UInt) = {
val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
prober_idx_match || miss_idx_match
}
def block_miss(addr: UInt) = {
val prober_idx_match = prober.io.inflight_req_idx.valid && prober.io.inflight_req_idx.bits === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
prober_idx_match || miss_idx_match
}
def block_probe(addr: UInt) = {
val store_idx_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && get_idx(entry.bits) === get_idx(addr)))
val store_idx_match = store_idx_matches.reduce(_||_)
val atomics_idx_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && get_idx(entry.bits) === get_idx(addr)))
val atomics_idx_match = atomics_idx_matches.reduce(_||_)
val lrsc_addr_match = atomics.io.block_probe_addr.valid && atomics.io.block_probe_addr.bits === get_block_addr(addr)
val miss_idx_matches = VecInit(missQueue.io.block_probe_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
// the missed req
val miss_req_idx_match = missReq.fire() && get_idx(missReq.bits.addr) === get_idx(addr)
store_idx_match || atomics_idx_match || lrsc_addr_match || miss_idx_match || miss_req_idx_match
}
def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = {
sink.valid := source.valid && !block_signal
source.ready := sink.ready && !block_signal
sink.bits := source.bits
}
}
......@@ -140,10 +140,10 @@ class TlbEntry(superpage: Boolean = false) extends TlbBundle {
val insideLevel = level.getOrElse(0.U)
val a = tag(vpnnLen*3-1, vpnnLen*2) === vpn(vpnnLen*3-1, vpnnLen*2)
val b = tag(vpnnLen*2-1, vpnnLen*1) === vpn(vpnnLen*2-1, vpnnLen*1)
XSDebug(Mux(insideLevel.asBool, a&b, a), p"Hit superpage: hit:${Mux(insideLevel.asBool, a&b, a)} tag:${Hexadecimal(tag)} level:${insideLevel} data:${data} a:${a} b:${b} vpn:${Hexadecimal(vpn)}\n")("TlbEntrySuperpage")
XSDebug(Mux(insideLevel.asBool, a&b, a), p"Hit superpage: hit:${Mux(insideLevel.asBool, a&b, a)} tag:${Hexadecimal(tag)} level:${insideLevel} data:${data} a:${a} b:${b} vpn:${Hexadecimal(vpn)}\n")
Mux(insideLevel.asBool, a&b, a)
} else {
XSDebug(tag === vpn, p"Hit normalpage: hit:${tag === vpn} tag:${Hexadecimal(tag)} data:${data} vpn:${Hexadecimal(vpn)}\n")("TlbEntryNormalpage")
XSDebug(tag === vpn, p"Hit normalpage: hit:${tag === vpn} tag:${Hexadecimal(tag)} data:${data} vpn:${Hexadecimal(vpn)}\n")
tag === vpn
}
}
......
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import freechips.rocketchip.tilelink._
import utils.{HasTLDump, XSDebug}
class ProbeUnit(edge: TLEdgeOut) extends DCacheModule with HasTLDump {
val io = IO(new Bundle {
val req = Flipped(Decoupled(new TLBundleB(edge.bundle)))
val rep = Decoupled(new TLBundleC(edge.bundle))
val meta_read = Decoupled(new L1MetaReadReq)
val meta_resp = Input(Vec(nWays, new L1Metadata))
val meta_write = Decoupled(new L1MetaWriteReq)
val wb_req = Decoupled(new WritebackReq(edge.bundle.sourceBits))
val wb_resp = Input(Bool())
val inflight_req_idx = Output(Valid(UInt()))
val inflight_req_block_addr = Output(Valid(UInt()))
})
val s_invalid :: s_meta_read_req :: s_meta_read_resp :: s_decide_next_state :: s_release :: s_wb_req :: s_wb_resp :: s_meta_write_req :: Nil = Enum(8)
val state = RegInit(s_invalid)
val req = Reg(new TLBundleB(edge.bundle))
val req_idx = get_idx(req.address)
val req_tag = get_tag(req.address)
val req_block_addr = get_block_addr(req.address)
val req_way_en = Reg(UInt())
val tag_matches = req_way_en.orR
val old_coh = Reg(new ClientMetadata)
val miss_coh = ClientMetadata.onReset
val reply_coh = Mux(tag_matches, old_coh, miss_coh)
val (is_dirty, report_param, new_coh) = reply_coh.onProbe(req.param)
// assign default values to signals
io.req.ready := false.B
io.rep.valid := false.B
io.rep.bits := DontCare
io.meta_read.valid := false.B
io.meta_read.bits := DontCare
io.meta_write.valid := false.B
io.meta_write.bits := DontCare
io.wb_req.valid := false.B
io.wb_req.bits := DontCare
io.inflight_req_idx.valid := state =/= s_invalid
io.inflight_req_idx.bits := req_idx
io.inflight_req_block_addr.valid := state =/= s_invalid
io.inflight_req_block_addr.bits := req_block_addr
when (state =/= s_invalid) {
XSDebug("state: %d\n", state)
}
when (state === s_invalid) {
io.req.ready := true.B
when (io.req.fire()) {
req := io.req.bits
state := s_meta_read_req
}
}
when (state === s_meta_read_req) {
io.meta_read.valid := true.B
val meta_read = io.meta_read.bits
meta_read.idx := req_idx
meta_read.way_en := ~0.U(nWays.W)
meta_read.tag := DontCare
when (io.meta_read.fire()) {
state := s_meta_read_resp
}
}
when (state === s_meta_read_resp) {
// tag check
def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
val tag_eq_way = wayMap((w: Int) => io.meta_resp(w).tag === (req_tag)).asUInt
val tag_match_way = wayMap((w: Int) => tag_eq_way(w) && io.meta_resp(w).coh.isValid()).asUInt
val hit_state = Mux1H(tag_match_way, wayMap((w: Int) => io.meta_resp(w).coh))
old_coh := hit_state
req_way_en := tag_match_way
state := s_decide_next_state
}
when (state === s_decide_next_state) {
// decide next state
state := Mux(tag_matches && is_dirty, s_wb_req, s_release)
}
// no need to write back, just release
when (state === s_release) {
io.rep.valid := true.B
io.rep.bits := edge.ProbeAck(req, report_param)
when (io.rep.fire()) {
state := Mux(tag_matches, s_meta_write_req, s_invalid)
}
}
when (state === s_wb_req) {
io.wb_req.valid := true.B
io.wb_req.bits.tag := req_tag
io.wb_req.bits.idx := req_idx
io.wb_req.bits.param := report_param
io.wb_req.bits.way_en := req_way_en
io.wb_req.bits.source := req.source
io.wb_req.bits.voluntary := false.B
when (io.wb_req.fire()) {
state := s_wb_resp
}
}
when (state === s_wb_resp) {
when (io.wb_resp) {
state := s_meta_write_req
}
}
when (state === s_meta_write_req) {
io.meta_write.valid := true.B
io.meta_write.bits.idx := req_idx
io.meta_write.bits.data.coh := new_coh
io.meta_write.bits.data.tag := req_tag
io.meta_write.bits.way_en := req_way_en
when (io.meta_write.fire()) {
state := s_invalid
}
}
// print wb_req
XSDebug(io.wb_req.fire(), "wb_req idx %x tag: %x source: %d param: %x way_en: %x voluntary: %b\n",
io.wb_req.bits.idx, io.wb_req.bits.tag,
io.wb_req.bits.source, io.wb_req.bits.param,
io.wb_req.bits.way_en, io.wb_req.bits.voluntary)
// print tilelink messages
when (io.req.fire()) {
XSDebug("mem_probe ")
io.req.bits.dump
}
when (io.rep.fire()) {
XSDebug("mem_release ")
io.rep.bits.dump
}
}
此差异已折叠。
......@@ -106,10 +106,10 @@ class MMIOEntry(edge: TLEdgeOut) extends DCacheModule
// --------------------------------------------
when (state === s_send_resp) {
io.resp.valid := true.B
io.resp.bits.data := resp_data
io.resp.bits.data := resp_data
// meta data should go with the response
io.resp.bits.meta := req.meta
io.resp.bits.miss := false.B
io.resp.bits.id := req.id
io.resp.bits.miss := false.B
io.resp.bits.replay := false.B
when (io.resp.fire()) {
......
此差异已折叠。
......@@ -17,8 +17,8 @@ class ExceptionAddrIO extends XSBundle {
}
class FwdEntry extends XSBundle {
val mask = Vec(8, Bool())
val data = Vec(8, UInt(8.W))
val valid = Bool()
val data = UInt(8.W)
}
// inflight miss block reqs
......
......@@ -535,14 +535,7 @@ class LoadQueue extends XSModule
io.uncache.req.bits.data := dataModule.io.uncache.rdata.data
io.uncache.req.bits.mask := dataModule.io.uncache.rdata.mask
io.uncache.req.bits.meta.id := DontCare
io.uncache.req.bits.meta.vaddr := DontCare
io.uncache.req.bits.meta.paddr := dataModule.io.uncache.rdata.paddr
io.uncache.req.bits.meta.uop := uop(deqPtr)
io.uncache.req.bits.meta.mmio := true.B
io.uncache.req.bits.meta.tlb_miss := false.B
io.uncache.req.bits.meta.mask := dataModule.io.uncache.rdata.mask
io.uncache.req.bits.meta.replay := false.B
io.uncache.req.bits.id := DontCare
io.uncache.resp.ready := true.B
......
......@@ -57,7 +57,7 @@ class FakeSbuffer extends XSModule {
dcache_req.bits.addr := block_addr(req.addr)
dcache_req.bits.data := wdataVec.asUInt
dcache_req.bits.mask := wmaskVec.asUInt
dcache_req.bits.meta := DontCare
dcache_req.bits.id := DontCare
when (dcache_req.fire()) {
state := s_resp
......
......@@ -368,7 +368,7 @@ class Sbuffer extends XSModule with HasSBufferConst {
io.dcache.req.bits.data := dcacheData
io.dcache.req.bits.mask := dcacheMask
io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR
io.dcache.req.bits.meta := DontCare // NOT USED
io.dcache.req.bits.id := DontCare // NOT USED
io.dcache.resp.ready := false.B
wb_arb.io.out.ready := false.B
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -30,7 +30,7 @@ class SbufferWapper extends XSModule {
// fake dcache
sbuffer.io.dcache.req.ready := true.B
sbuffer.io.dcache.resp.valid := RegNext(RegNext(RegNext(RegNext(sbuffer.io.dcache.req.valid))))
sbuffer.io.dcache.resp.bits.meta.id := RegNext(RegNext(RegNext(RegNext(sbuffer.io.dcache.req.bits.meta.id))))
sbuffer.io.dcache.resp.bits.id := RegNext(RegNext(RegNext(RegNext(sbuffer.io.dcache.req.bits.id))))
}
class SbufferTest extends AnyFlatSpec
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册