提交 3726264a 编写于 作者: Fa_wang's avatar Fa_wang

Merge remote-tracking branch 'origin/master' into opt-sbuffer-timing

......@@ -345,10 +345,12 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
integerBlock.io.wakeUpIn.fastUops <> floatBlock.io.wakeUpIntOut.fastUops
integerBlock.io.wakeUpIn.fast <> floatBlock.io.wakeUpIntOut.fast
integerBlock.io.wakeUpIn.slow <> floatBlock.io.wakeUpIntOut.slow ++ memBlock.io.wakeUpIntOut.slow
integerBlock.io.toMemBlock <> memBlock.io.fromIntBlock
floatBlock.io.wakeUpIn.fastUops <> integerBlock.io.wakeUpFpOut.fastUops
floatBlock.io.wakeUpIn.fast <> integerBlock.io.wakeUpFpOut.fast
floatBlock.io.wakeUpIn.slow <> integerBlock.io.wakeUpFpOut.slow ++ memBlock.io.wakeUpFpOut.slow
floatBlock.io.toMemBlock <> memBlock.io.fromFpBlock
integerBlock.io.wakeUpIntOut.fast.map(_.ready := true.B)
......@@ -395,13 +397,10 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ptw.io.tlb(0) <> memBlock.io.ptw
ptw.io.tlb(1) <> frontend.io.ptw
ptw.io.sfence <> integerBlock.io.fenceio.sfence
ptw.io.csr <> integerBlock.io.csrio.tlb
ptw.io.csr <> integerBlock.io.csrio.tlb
dcache.io.lsu.load <> memBlock.io.dcache.loadUnitToDcacheVec
dcache.io.lsu.lsq <> memBlock.io.dcache.loadMiss
dcache.io.lsu.atomics <> memBlock.io.dcache.atomics
dcache.io.lsu.store <> memBlock.io.dcache.sbufferToDcache
uncache.io.lsq <> memBlock.io.dcache.uncache
dcache.io.lsu <> memBlock.io.dcache
uncache.io.lsq <> memBlock.io.uncache
if (!env.FPGAPlatform) {
val debugIntReg, debugFpReg = WireInit(VecInit(Seq.fill(32)(0.U(XLEN.W))))
......
......@@ -16,21 +16,22 @@ import xiangshan.mem.LsqEnqIO
class CtrlToIntBlockIO extends XSBundle {
val enqIqCtrl = Vec(exuParameters.IntExuCnt, DecoupledIO(new MicroOp))
val enqIqData = Vec(exuParameters.IntExuCnt, Output(new ExuInput))
val readRf = Vec(NRIntReadPorts, Flipped(new RfReadPort(XLEN)))
// int block only uses port 0~7
val readPortIndex = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(8 / 2).W))) // TODO parameterize 8 here
val redirect = ValidIO(new Redirect)
}
class CtrlToFpBlockIO extends XSBundle {
val enqIqCtrl = Vec(exuParameters.FpExuCnt, DecoupledIO(new MicroOp))
val enqIqData = Vec(exuParameters.FpExuCnt, Output(new ExuInput))
val readRf = Vec(NRFpReadPorts, Flipped(new RfReadPort(XLEN + 1)))
// fp block uses port 0~11
val readPortIndex = Vec(exuParameters.FpExuCnt, Output(UInt(log2Ceil((NRFpReadPorts - exuParameters.StuCnt) / 3).W)))
val redirect = ValidIO(new Redirect)
}
class CtrlToLsBlockIO extends XSBundle {
val enqIqCtrl = Vec(exuParameters.LsExuCnt, DecoupledIO(new MicroOp))
val enqIqData = Vec(exuParameters.LsExuCnt, Output(new ExuInput))
val enqLsq = Flipped(new LsqEnqIO)
val redirect = ValidIO(new Redirect)
}
......@@ -113,7 +114,7 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
}
dispatch.io.numExist <> io.fromIntBlock.numExist ++ io.fromFpBlock.numExist ++ io.fromLsBlock.numExist
dispatch.io.enqIQCtrl <> io.toIntBlock.enqIqCtrl ++ io.toFpBlock.enqIqCtrl ++ io.toLsBlock.enqIqCtrl
dispatch.io.enqIQData <> io.toIntBlock.enqIqData ++ io.toFpBlock.enqIqData ++ io.toLsBlock.enqIqData
// dispatch.io.enqIQData <> io.toIntBlock.enqIqData ++ io.toFpBlock.enqIqData ++ io.toLsBlock.enqIqData
val flush = redirectValid && RedirectLevel.isUnconditional(redirect.level)
......@@ -150,6 +151,9 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
io.toLsBlock.redirect.valid := redirectValid
io.toLsBlock.redirect.bits := redirect
dispatch.io.readPortIndex.intIndex <> io.toIntBlock.readPortIndex
dispatch.io.readPortIndex.fpIndex <> io.toFpBlock.readPortIndex
// roq to int block
io.roqio.toCSR <> roq.io.csr
io.roqio.exception.valid := roq.io.redirectOut.valid && roq.io.redirectOut.bits.isException()
......
......@@ -3,6 +3,7 @@ package xiangshan.backend
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.regfile.Regfile
import xiangshan.backend.exu._
import xiangshan.backend.issue.{ReservationStationCtrl, ReservationStationData}
......@@ -25,6 +26,7 @@ class FloatBlock
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToFpBlockIO)
val toCtrlBlock = new FpBlockToCtrlIO
val toMemBlock = new FpBlockToMemBlockIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOut.size, slowFpOut.size))
......@@ -57,6 +59,7 @@ class FloatBlock
def needData(a: ExuConfig, b: ExuConfig): Boolean =
(a.readIntRf && b.writeIntRf) || (a.readFpRf && b.writeFpRf)
val readPortIndex = RegNext(io.fromCtrlBlock.readPortIndex)
val reservedStations = exeUnits.map(_.config).zipWithIndex.map({ case (cfg, i) =>
var certainLatency = -1
if (cfg.hasCertainLatency) {
......@@ -85,7 +88,15 @@ class FloatBlock
rsCtrl.io.redirect <> redirect // TODO: remove it
rsCtrl.io.numExist <> io.toCtrlBlock.numExist(i)
rsCtrl.io.enqCtrl <> io.fromCtrlBlock.enqIqCtrl(i)
rsData.io.enqData <> io.fromCtrlBlock.enqIqData(i)
rsData.io.srcRegValue := DontCare
val src1Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3).data))
val src2Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3 + 1).data))
val src3Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3 + 2).data))
rsData.io.srcRegValue(0) := src1Value(readPortIndex(i))
rsData.io.srcRegValue(1) := src2Value(readPortIndex(i))
rsData.io.srcRegValue(2) := src3Value(readPortIndex(i))
rsData.io.redirect <> redirect
rsData.io.writeBackedData <> writeBackData
......@@ -142,6 +153,7 @@ class FloatBlock
// read fp rf from ctrl block
fpRf.io.readPorts <> io.fromCtrlBlock.readRf
(0 until exuParameters.StuCnt).foreach(i => io.toMemBlock.readFpRf(i).data := fpRf.io.readPorts(i + 12).data)
// write fp rf arbiter
val fpWbArbiter = Module(new Wb(
(exeUnits.map(_.config) ++ fastWakeUpIn ++ slowWakeUpIn).map(_.wbFpPriority),
......
......@@ -3,8 +3,8 @@ package xiangshan.backend
import chisel3._
import chisel3.util._
import xiangshan._
import xiangshan.backend.exu.Exu.{jumpExeUnitCfg, ldExeUnitCfg, stExeUnitCfg}
import xiangshan.backend.exu.{AluExeUnit, ExuConfig, JumpExeUnit, MulDivExeUnit, Wb}
import xiangshan.backend.exu.Exu.{ldExeUnitCfg, stExeUnitCfg}
import xiangshan.backend.exu._
import xiangshan.backend.fu.FenceToSbuffer
import xiangshan.backend.issue.{ReservationStationCtrl, ReservationStationData}
import xiangshan.backend.regfile.Regfile
......@@ -65,6 +65,7 @@ class IntegerBlock
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToIntBlockIO)
val toCtrlBlock = new IntBlockToCtrlIO
val toMemBlock = new IntBlockToMemBlockIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val wakeUpFpOut = Flipped(new WakeUpBundle(fastFpOut.size, slowFpOut.size))
......@@ -110,6 +111,7 @@ class IntegerBlock
def needData(a: ExuConfig, b: ExuConfig): Boolean =
(a.readIntRf && b.writeIntRf) || (a.readFpRf && b.writeFpRf)
val readPortIndex = RegNext(io.fromCtrlBlock.readPortIndex)
val reservationStations = exeUnits.map(_.config).zipWithIndex.map({ case (cfg, i) =>
var certainLatency = -1
if (cfg.hasCertainLatency) {
......@@ -140,7 +142,12 @@ class IntegerBlock
rsCtrl.io.redirect <> redirect // TODO: remove it
rsCtrl.io.numExist <> io.toCtrlBlock.numExist(i)
rsCtrl.io.enqCtrl <> io.fromCtrlBlock.enqIqCtrl(i)
rsData.io.enqData <> io.fromCtrlBlock.enqIqData(i)
rsData.io.srcRegValue := DontCare
val src1Value = VecInit((0 until 4).map(i => intRf.io.readPorts(i * 2).data))
val src2Value = VecInit((0 until 4).map(i => intRf.io.readPorts(i * 2 + 1).data))
rsData.io.srcRegValue(0) := src1Value(readPortIndex(i))
rsData.io.srcRegValue(1) := src2Value(readPortIndex(i))
rsData.io.redirect <> redirect
rsData.io.writeBackedData <> writeBackData
......@@ -208,6 +215,7 @@ class IntegerBlock
// read int rf from ctrl block
intRf.io.readPorts <> io.fromCtrlBlock.readRf
(0 until NRMemReadPorts).foreach(i => io.toMemBlock.readIntRf(i).data := intRf.io.readPorts(i + 8).data)
// write int rf arbiter
val intWbArbiter = Module(new Wb(
(exeUnits.map(_.config) ++ fastWakeUpIn ++ slowWakeUpIn).map(_.wbIntPriority),
......
......@@ -10,7 +10,7 @@ import xiangshan.cache._
import xiangshan.mem._
import xiangshan.backend.fu.FenceToSbuffer
import xiangshan.backend.issue.{ReservationStationCtrl, ReservationStationData}
import xiangshan.backend.fu.FunctionUnit.{lduCfg, mouCfg, stuCfg}
import xiangshan.backend.regfile.RfReadPort
class LsBlockToCtrlIO extends XSBundle {
val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput)) // write to roq
......@@ -18,12 +18,12 @@ class LsBlockToCtrlIO extends XSBundle {
val replay = ValidIO(new Redirect)
}
class MemBlockToDcacheIO extends XSBundle {
val loadUnitToDcacheVec = Vec(exuParameters.LduCnt, new DCacheLoadIO)
val loadMiss = new DCacheLineIO
val atomics = new DCacheWordIO
val sbufferToDcache = new DCacheLineIO
val uncache = new DCacheWordIO
class IntBlockToMemBlockIO extends XSBundle {
val readIntRf = Vec(NRMemReadPorts, new RfReadPort(XLEN))
}
class FpBlockToMemBlockIO extends XSBundle {
val readFpRf = Vec(exuParameters.StuCnt, new RfReadPort(XLEN + 1))
}
class MemBlock
......@@ -38,6 +38,8 @@ class MemBlock
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToLsBlockIO)
val fromIntBlock = Flipped(new IntBlockToMemBlockIO)
val fromFpBlock = Flipped(new FpBlockToMemBlockIO)
val toCtrlBlock = new LsBlockToCtrlIO
val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
......@@ -46,7 +48,8 @@ class MemBlock
val ptw = new TlbPtwIO
// TODO: dcache should be inside MemBlock
val dcache = new MemBlockToDcacheIO
val dcache = Flipped(new DCacheToLsuIO)
val uncache = new DCacheWordIO
val sfence = Input(new SfenceBundle)
val tlbCsr = Input(new TlbCsrBundle)
val fenceToSbuffer = Flipped(new FenceToSbuffer)
......@@ -76,6 +79,9 @@ class MemBlock
val intExeWbReqs = ldOut0 +: loadUnits.tail.map(_.io.ldout)
val fpExeWbReqs = loadUnits.map(_.io.fpout)
val readPortIndex = Seq(0, 1, 2, 4)
io.fromIntBlock.readIntRf.foreach(_.addr := DontCare)
io.fromFpBlock.readFpRf.foreach(_.addr := DontCare)
val reservationStations = (loadExuConfigs ++ storeExuConfigs).zipWithIndex.map({ case (cfg, i) =>
var certainLatency = -1
if (cfg.hasCertainLatency) {
......@@ -111,7 +117,13 @@ class MemBlock
rsCtrl.io.redirect <> redirect // TODO: remove it
rsCtrl.io.numExist <> io.toCtrlBlock.numExist(i)
rsCtrl.io.enqCtrl <> io.fromCtrlBlock.enqIqCtrl(i)
rsData.io.enqData <> io.fromCtrlBlock.enqIqData(i)
val src2IsFp = RegNext(io.fromCtrlBlock.enqIqCtrl(i).bits.ctrl.src2Type === SrcType.fp)
rsData.io.srcRegValue := DontCare
rsData.io.srcRegValue(0) := io.fromIntBlock.readIntRf(readPortIndex(i)).data
if (i >= exuParameters.LduCnt) {
rsData.io.srcRegValue(1) := Mux(src2IsFp, io.fromFpBlock.readFpRf(i - exuParameters.LduCnt).data, io.fromIntBlock.readIntRf(readPortIndex(i) + 1).data)
}
rsData.io.redirect <> redirect
rsData.io.writeBackedData <> writeBackData
......@@ -166,7 +178,7 @@ class MemBlock
// get input form dispatch
loadUnits(i).io.ldin <> reservationStations(i).io.deq
// dcache access
loadUnits(i).io.dcache <> io.dcache.loadUnitToDcacheVec(i)
loadUnits(i).io.dcache <> io.dcache.load(i)
// forward
loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
......@@ -210,14 +222,14 @@ class MemBlock
lsq.io.brqRedirect := io.fromCtrlBlock.redirect
lsq.io.roqDeqPtr := io.lsqio.roqDeqPtr
io.toCtrlBlock.replay <> lsq.io.rollback
lsq.io.dcache <> io.dcache.loadMiss
lsq.io.uncache <> io.dcache.uncache
lsq.io.dcache <> io.dcache.lsq
lsq.io.uncache <> io.uncache
// LSQ to store buffer
lsq.io.sbuffer <> sbuffer.io.in
// Sbuffer
sbuffer.io.dcache <> io.dcache.sbufferToDcache
sbuffer.io.dcache <> io.dcache.store
// flush sbuffer
val fenceFlush = io.fenceToSbuffer.flushSb
......
......@@ -42,7 +42,12 @@ class Dispatch extends XSModule {
// to reservation stations
val numExist = Input(Vec(exuParameters.ExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.ExuCnt, DecoupledIO(new MicroOp))
val enqIQData = Vec(exuParameters.ExuCnt, Output(new ExuInput))
// send reg file read port index to reservation stations
val readPortIndex = new Bundle {
val intIndex = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(8 / 2).W)))
val fpIndex = Vec(exuParameters.FpExuCnt, Output(UInt(log2Ceil((NRFpReadPorts - exuParameters.StuCnt) / 3).W)))
// ls: hardwired to (0, 1, 2, 4)
}
})
val dispatch1 = Module(new Dispatch1)
......@@ -80,7 +85,8 @@ class Dispatch extends XSModule {
intDispatch.io.regRdy.zipWithIndex.map({case (r, i) => r <> io.intPregRdy(i)})
intDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(i)})
intDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(i)})
intDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(i)})
// intDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(i)})
intDispatch.io.readPortIndex <> io.readPortIndex.intIndex
// Fp dispatch queue to Fp reservation stations
val fpDispatch = Module(new Dispatch2Fp)
......@@ -89,7 +95,8 @@ class Dispatch extends XSModule {
fpDispatch.io.regRdy.zipWithIndex.map({case (r, i) => r <> io.fpPregRdy(i)})
fpDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(i + exuParameters.IntExuCnt)})
fpDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(i + exuParameters.IntExuCnt)})
fpDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(i + exuParameters.IntExuCnt)})
// fpDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(i + exuParameters.IntExuCnt)})
fpDispatch.io.readPortIndex <> io.readPortIndex.fpIndex
// Load/store dispatch queue to load/store issue queues
val lsDispatch = Module(new Dispatch2Ls)
......@@ -100,5 +107,5 @@ class Dispatch extends XSModule {
lsDispatch.io.fpRegRdy.zipWithIndex.map({case (r, i) => r <> io.fpPregRdy(i + 12)})
lsDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
lsDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
lsDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
// lsDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
}
......@@ -14,7 +14,7 @@ class Dispatch2Fp extends XSModule {
val regRdy = Vec(NRFpReadPorts - exuParameters.StuCnt, Input(Bool()))
val numExist = Input(Vec(exuParameters.FpExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.FpExuCnt, DecoupledIO(new MicroOp))
val enqIQData = Vec(exuParameters.FpExuCnt, Output(new ExuInput))
val readPortIndex = Vec(exuParameters.FpExuCnt, Output(UInt(log2Ceil((NRFpReadPorts - exuParameters.StuCnt) / 3).W)))
})
/**
......@@ -55,9 +55,9 @@ class Dispatch2Fp extends XSModule {
io.readRf(3*i+1).addr := io.fromDq(index(fpReadPortSrc(i))).bits.psrc2
io.readRf(3*i+2).addr := io.fromDq(index(fpReadPortSrc(i))).bits.psrc3
}
val readPortIndex = Wire(Vec(exuParameters.FpExuCnt, UInt(log2Ceil(NRFpReadPorts - exuParameters.StuCnt).W)))
fpStaticIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := (3*i).U})
fpDynamicIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := 3.U * fpDynamicExuSrc(i)})
val readPortIndex = Wire(Vec(exuParameters.FpExuCnt, UInt(2.W)))
fpStaticIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := i.U})
fpDynamicIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := fpDynamicExuSrc(i)})
/**
* Part 3: dispatch to reservation stations
......@@ -73,9 +73,13 @@ class Dispatch2Fp extends XSModule {
enq.valid := fmiscIndexGen.io.mapping(i - exuParameters.FmacCnt).valid && fmiscReady
}
enq.bits := io.fromDq(indexVec(i)).bits
enq.bits.src1State := io.regRdy(readPortIndex(i))
enq.bits.src2State := io.regRdy(readPortIndex(i) + 1.U)
enq.bits.src3State := io.regRdy(readPortIndex(i) + 2.U)
val src1Ready = VecInit((0 until 4).map(i => io.regRdy(i * 3)))
val src2Ready = VecInit((0 until 4).map(i => io.regRdy(i * 3 + 1)))
val src3Ready = VecInit((0 until 4).map(i => io.regRdy(i * 3 + 2)))
enq.bits.src1State := src1Ready(readPortIndex(i))
enq.bits.src2State := src2Ready(readPortIndex(i))
enq.bits.src3State := src3Ready(readPortIndex(i))
XSInfo(enq.fire(), p"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} " +
p"srcState(${enq.bits.src1State} ${enq.bits.src2State} ${enq.bits.src3State}) " +
......@@ -99,25 +103,26 @@ class Dispatch2Fp extends XSModule {
XSError(PopCount(io.fromDq.map(_.fire())) =/= PopCount(io.enqIQCtrl.map(_.fire())), "deq =/= enq\n")
/**
* Part 5: the second stage of dispatch 2 (send data to reservation station)
* Part 5: send read port index of register file to reservation station
*/
val readPortIndexReg = Reg(Vec(exuParameters.FpExuCnt, UInt(log2Ceil(NRFpReadPorts - exuParameters.StuCnt).W)))
val uopReg = Reg(Vec(exuParameters.FpExuCnt, new MicroOp))
val dataValidRegDebug = Reg(Vec(exuParameters.FpExuCnt, Bool()))
for (i <- 0 until exuParameters.FpExuCnt) {
readPortIndexReg(i) := readPortIndex(i)
uopReg(i) := io.enqIQCtrl(i).bits
dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
io.enqIQData(i) := DontCare
io.enqIQData(i).src1 := io.readRf(readPortIndexReg(i)).data
io.enqIQData(i).src2 := io.readRf(readPortIndexReg(i) + 1.U).data
io.enqIQData(i).src3 := io.readRf(readPortIndexReg(i) + 2.U).data
XSDebug(dataValidRegDebug(i),
p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
p"(${readPortIndexReg(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
p"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)}), " +
p"(${readPortIndexReg(i)+2.U}, ${uopReg(i).psrc3}, ${Hexadecimal(io.enqIQData(i).src3)})\n")
}
io.readPortIndex := readPortIndex
// val readPortIndexReg = Reg(Vec(exuParameters.FpExuCnt, UInt(log2Ceil(NRFpReadPorts - exuParameters.StuCnt).W)))
// val uopReg = Reg(Vec(exuParameters.FpExuCnt, new MicroOp))
// val dataValidRegDebug = Reg(Vec(exuParameters.FpExuCnt, Bool()))
// for (i <- 0 until exuParameters.FpExuCnt) {
// readPortIndexReg(i) := readPortIndex(i)
// uopReg(i) := io.enqIQCtrl(i).bits
// dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
//
// io.enqIQData(i) := DontCare
// io.enqIQData(i).src1 := io.readRf(readPortIndexReg(i)).data
// io.enqIQData(i).src2 := io.readRf(readPortIndexReg(i) + 1.U).data
// io.enqIQData(i).src3 := io.readRf(readPortIndexReg(i) + 2.U).data
//
// XSDebug(dataValidRegDebug(i),
// p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
// p"(${readPortIndexReg(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
// p"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)}), " +
// p"(${readPortIndexReg(i)+2.U}, ${uopReg(i).psrc3}, ${Hexadecimal(io.enqIQData(i).src3)})\n")
// }
}
......@@ -6,7 +6,6 @@ import xiangshan._
import utils._
import xiangshan.backend.exu.Exu._
import xiangshan.backend.regfile.RfReadPort
import xiangshan.backend.exu._
class Dispatch2Int extends XSModule {
val io = IO(new Bundle() {
......@@ -15,7 +14,7 @@ class Dispatch2Int extends XSModule {
val regRdy = Vec(NRIntReadPorts - NRMemReadPorts, Input(Bool()))
val numExist = Input(Vec(exuParameters.IntExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.IntExuCnt, DecoupledIO(new MicroOp))
val enqIQData = Vec(exuParameters.IntExuCnt, Output(new ExuInput))
val readPortIndex = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(8 / 2).W)))
})
/**
......@@ -59,9 +58,9 @@ class Dispatch2Int extends XSModule {
io.readRf(2*i ).addr := io.fromDq(index(intReadPortSrc(i))).bits.psrc1
io.readRf(2*i+1).addr := io.fromDq(index(intReadPortSrc(i))).bits.psrc2
}
val readPortIndex = Wire(Vec(exuParameters.IntExuCnt, UInt(log2Ceil(NRIntReadPorts).W)))
intStaticIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := (2*i).U})
intDynamicIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := 2.U * intDynamicExuSrc(i)})
val readPortIndex = Wire(Vec(exuParameters.IntExuCnt, UInt(2.W)))
intStaticIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := i.U})
intDynamicIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := intDynamicExuSrc(i)})
/**
* Part 3: dispatch to reservation stations
......@@ -81,8 +80,11 @@ class Dispatch2Int extends XSModule {
enq.valid := mduIndexGen.io.mapping(i - (exuParameters.JmpCnt + exuParameters.AluCnt)).valid && mduReady
}
enq.bits := io.fromDq(indexVec(i)).bits
enq.bits.src1State := io.regRdy(readPortIndex(i))
enq.bits.src2State := io.regRdy(readPortIndex(i) + 1.U)
val src1Ready = VecInit((0 until 4).map(i => io.regRdy(i * 2)))
val src2Ready = VecInit((0 until 4).map(i => io.regRdy(i * 2 + 1)))
enq.bits.src1State := src1Ready(readPortIndex(i))
enq.bits.src2State := src2Ready(readPortIndex(i))
XSInfo(enq.fire(), p"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} " +
p"srcState(${enq.bits.src1State} ${enq.bits.src2State}) " +
......@@ -107,25 +109,26 @@ class Dispatch2Int extends XSModule {
XSError(PopCount(io.fromDq.map(_.fire())) =/= PopCount(io.enqIQCtrl.map(_.fire())), "deq =/= enq\n")
/**
* Part 5: the second stage of dispatch 2 (send data to reservation station)
* Part 5: send read port index of register file to reservation station
*/
val readPortIndexReg = Reg(Vec(exuParameters.IntExuCnt, UInt(log2Ceil(NRIntReadPorts).W)))
val uopReg = Reg(Vec(exuParameters.IntExuCnt, new MicroOp))
val dataValidRegDebug = Reg(Vec(exuParameters.IntExuCnt, Bool()))
for (i <- 0 until exuParameters.IntExuCnt) {
readPortIndexReg(i) := readPortIndex(i)
uopReg(i) := io.enqIQCtrl(i).bits
dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
io.enqIQData(i) := DontCare
io.enqIQData(i).src1 := Mux(uopReg(i).ctrl.src1Type === SrcType.pc,
SignExt(uopReg(i).cf.pc, XLEN), io.readRf(readPortIndexReg(i)).data)
io.enqIQData(i).src2 := Mux(uopReg(i).ctrl.src2Type === SrcType.imm,
uopReg(i).ctrl.imm, io.readRf(readPortIndexReg(i) + 1.U).data)
XSDebug(dataValidRegDebug(i),
p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
p"(${readPortIndexReg(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
p"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
}
io.readPortIndex := readPortIndex
// val readPortIndexReg = Reg(Vec(exuParameters.IntExuCnt, UInt(log2Ceil(NRIntReadPorts).W)))
// val uopReg = Reg(Vec(exuParameters.IntExuCnt, new MicroOp))
// val dataValidRegDebug = Reg(Vec(exuParameters.IntExuCnt, Bool()))
// for (i <- 0 until exuParameters.IntExuCnt) {
// readPortIndexReg(i) := readPortIndex(i)
// uopReg(i) := io.enqIQCtrl(i).bits
// dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
//
// io.enqIQData(i) := DontCare
// io.enqIQData(i).src1 := Mux(uopReg(i).ctrl.src1Type === SrcType.pc,
// SignExt(uopReg(i).cf.pc, XLEN), io.readRf(readPortIndexReg(i)).data)
// io.enqIQData(i).src2 := Mux(uopReg(i).ctrl.src2Type === SrcType.imm,
// uopReg(i).ctrl.imm, io.readRf(readPortIndexReg(i) + 1.U).data)
//
// XSDebug(dataValidRegDebug(i),
// p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
// p"(${readPortIndexReg(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
// p"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
// }
}
......@@ -18,7 +18,6 @@ class Dispatch2Ls extends XSModule {
val fpRegRdy = Vec(exuParameters.StuCnt, Input(Bool()))
val numExist = Input(Vec(exuParameters.LsExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.LsExuCnt, DecoupledIO(new MicroOp))
val enqIQData = Vec(exuParameters.LsExuCnt, Output(new ExuInput))
})
/**
......@@ -111,27 +110,27 @@ class Dispatch2Ls extends XSModule {
/**
* Part 5: the second stage of dispatch 2 (send data to reservation station)
*/
val uopReg = Reg(Vec(exuParameters.LsExuCnt, new MicroOp))
val dataValidRegDebug = Reg(Vec(exuParameters.LsExuCnt, Bool()))
for (i <- 0 until exuParameters.LsExuCnt) {
uopReg(i) := io.enqIQCtrl(i).bits
dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
io.enqIQData(i) := DontCare
// assert(uopReg(i).ctrl.src1Type =/= SrcType.pc)
io.enqIQData(i).src1 := io.readIntRf(readPort(i)).data
if (i >= exuParameters.LduCnt) {
io.enqIQData(i).src2 := Mux(
uopReg(i).ctrl.src2Type === SrcType.imm,
uopReg(i).ctrl.imm,
Mux(uopReg(i).ctrl.src2Type === SrcType.fp,
io.readFpRf(i - exuParameters.LduCnt).data,
io.readIntRf(readPort(i) + 1).data))
}
XSDebug(dataValidRegDebug(i),
p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
p"(${readPort(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
p"(${readPort(i)+1}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
}
// val uopReg = Reg(Vec(exuParameters.LsExuCnt, new MicroOp))
// val dataValidRegDebug = Reg(Vec(exuParameters.LsExuCnt, Bool()))
// for (i <- 0 until exuParameters.LsExuCnt) {
// uopReg(i) := io.enqIQCtrl(i).bits
// dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
//
// io.enqIQData(i) := DontCare
// // assert(uopReg(i).ctrl.src1Type =/= SrcType.pc)
// io.enqIQData(i).src1 := io.readIntRf(readPort(i)).data
// if (i >= exuParameters.LduCnt) {
// io.enqIQData(i).src2 := Mux(
// uopReg(i).ctrl.src2Type === SrcType.imm,
// uopReg(i).ctrl.imm,
// Mux(uopReg(i).ctrl.src2Type === SrcType.fp,
// io.readFpRf(i - exuParameters.LduCnt).data,
// io.readIntRf(readPort(i) + 1).data))
// }
//
// XSDebug(dataValidRegDebug(i),
// p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
// p"(${readPort(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
// p"(${readPort(i)+1}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
// }
}
......@@ -5,6 +5,7 @@ import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.exu.{Exu, ExuConfig}
import xiangshan.backend.regfile.RfReadPort
class BypassQueue(number: Int) extends XSModule {
val io = IO(new Bundle {
......@@ -315,6 +316,24 @@ class ReservationStationData
srcNum: Int = 3
) extends XSModule {
object DispatchType extends Enumeration {
val Disp2Int, Disp2Fp, Disp2Ls = Value
}
def dispatchType(exuConfig: ExuConfig): DispatchType.Value = {
exuConfig match {
case Exu.aluExeUnitCfg => DispatchType.Disp2Int
case Exu.jumpExeUnitCfg => DispatchType.Disp2Int
case Exu.mulDivExeUnitCfg => DispatchType.Disp2Int
case Exu.fmacExeUnitCfg => DispatchType.Disp2Fp
case Exu.fmiscExeUnitCfg => DispatchType.Disp2Fp
case Exu.ldExeUnitCfg => DispatchType.Disp2Ls
case Exu.stExeUnitCfg => DispatchType.Disp2Ls
}
}
val iqSize = IssQueSize
val iqIdxWidth = log2Up(iqSize)
val fastWakeup = fixedDelay >= 0 // NOTE: if do not enable fastWakeup(bypass), set fixedDelay to -1
......@@ -324,15 +343,14 @@ class ReservationStationData
// flush
val redirect = Flipped(ValidIO(new Redirect))
// enq Data at next cycle (regfile has 1 cycle latency)
val enqData = Input(new ExuInput)
// send to exu
val deq = DecoupledIO(new ExuInput)
// listen to RSCtrl
val ctrl = Flipped(new RSCtrlDataIO)
// read src op value
val srcRegValue = Vec(srcNum, Input(UInt((XLEN + 1).W)))
// broadcast selected uop to other issue queues
val selectedUop = ValidIO(new MicroOp)
......@@ -372,12 +390,47 @@ class ReservationStationData
p" src2:${enqUop.psrc2}|${enqUop.src2State}|${enqUop.ctrl.src2Type} src3:${enqUop.psrc3}|" +
p"${enqUop.src3State}|${enqUop.ctrl.src3Type} pc:0x${Hexadecimal(enqUop.cf.pc)} roqIdx:${enqUop.roqIdx}\n")
}
when (enqEnReg) { // TODO: turn to srcNum, not the 3
data(enqPtrReg)(0) := io.enqData.src1
data(enqPtrReg)(1) := io.enqData.src2
data(enqPtrReg)(2) := io.enqData.src3
XSDebug(p"enqData: enqPtrReg:${enqPtrReg} src1:${Hexadecimal(io.enqData.src1)}" +
p" src2:${Hexadecimal(io.enqData.src2)} src3:${Hexadecimal(io.enqData.src2)}\n")
exuCfg match {
case Exu.aluExeUnitCfg =>
// src1: pc or reg
data(enqPtrReg)(0) := Mux(uop(enqPtrReg).ctrl.src1Type === SrcType.pc, SignExt(uop(enqPtrReg).cf.pc, XLEN), io.srcRegValue(0))
// src2: imm or reg
data(enqPtrReg)(1) := Mux(uop(enqPtrReg).ctrl.src2Type === SrcType.imm, uop(enqPtrReg).ctrl.imm, io.srcRegValue(1))
case Exu.jumpExeUnitCfg =>
// src1: pc or reg
data(enqPtrReg)(0) := Mux(uop(enqPtrReg).ctrl.src1Type === SrcType.pc, SignExt(uop(enqPtrReg).cf.pc, XLEN), io.srcRegValue(0))
// src2: imm
data(enqPtrReg)(1) := uop(enqPtrReg).ctrl.imm
case Exu.mulDivExeUnitCfg =>
// src1: reg
data(enqPtrReg)(0) := io.srcRegValue(0)
// src2: reg
data(enqPtrReg)(1) := io.srcRegValue(1)
case Exu.fmacExeUnitCfg =>
(0 until exuCfg.fpSrcCnt).foreach(i => data(enqPtrReg)(i) := io.srcRegValue(i))
case Exu.fmiscExeUnitCfg =>
(0 until exuCfg.fpSrcCnt).foreach(i => data(enqPtrReg)(i) := io.srcRegValue(i))
case Exu.ldExeUnitCfg =>
data(enqPtrReg)(0) := io.srcRegValue(0)
data(enqPtrReg)(1) := Mux(uop(enqPtrReg).ctrl.src2Type === SrcType.imm, uop(enqPtrReg).ctrl.imm, io.srcRegValue(1))
case Exu.stExeUnitCfg =>
data(enqPtrReg)(0) := io.srcRegValue(0)
data(enqPtrReg)(1) := Mux(uop(enqPtrReg).ctrl.src2Type === SrcType.imm, uop(enqPtrReg).ctrl.imm, io.srcRegValue(1))
// default
case _ =>
XSDebug(false.B, "Unhandled exu-config")
}
XSDebug(p"${exuCfg.name}: enqPtrReg:${enqPtrReg} pc: ${Hexadecimal(uop(enqPtrReg).cf.pc)}\n")
XSDebug(p"[srcRegValue] src1: ${Hexadecimal(io.srcRegValue(0))} src2: ${Hexadecimal(io.srcRegValue(1))} src3: ${Hexadecimal(io.srcRegValue(2))}\n")
}
def wbHit(uop: MicroOp, src: UInt, srctype: UInt): Bool = {
......
......@@ -6,13 +6,13 @@ import xiangshan._
class RatReadPort extends XSBundle {
val addr = Input(UInt(5.W))
val rdata = Output(UInt(XLEN.W))
val rdata = Output(UInt(PhyRegIdxWidth.W))
}
class RatWritePort extends XSBundle {
val wen = Input(Bool())
val addr = Input(UInt(5.W))
val wdata = Input(UInt(XLEN.W))
val wdata = Input(UInt(PhyRegIdxWidth.W))
}
class RenameTable(float: Boolean) extends XSModule {
......
......@@ -20,6 +20,9 @@ class AtomicsPipe extends DCacheModule
val inflight_req_block_addrs = Output(Vec(3, Valid(UInt())))
val block_probe_addr = Output(Valid(UInt()))
val wb_invalidate_lrsc = Input(Valid(UInt()))
// send miss request to miss queue
val miss_req = DecoupledIO(new MissReq)
})
// LSU requests
......@@ -63,6 +66,17 @@ class AtomicsPipe extends DCacheModule
def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
val s1_tag_eq_way = wayMap((w: Int) => meta_resp(w).tag === (get_tag(s1_addr))).asUInt
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta_resp(w).coh.isValid()).asUInt
val s1_tag_match = s1_tag_match_way.orR
val s1_hit_meta = Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w)))
val s1_hit_state = s1_hit_meta.coh
// replacement policy
val replacer = cacheParams.replacement
val s1_repl_way_en = UIntToOH(replacer.way)
val s1_repl_meta = Mux1H(s1_repl_way_en, wayMap((w: Int) => meta_resp(w)))
when (io.miss_req.fire()) {
replacer.miss
}
// ---------------------------------------
......@@ -74,10 +88,18 @@ class AtomicsPipe extends DCacheModule
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = s2_tag_match_way.orR
val s2_hit_meta = RegNext(s1_hit_meta)
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegNext(meta_resp(w).coh)))
val s2_has_permission = s2_hit_state.onAccess(s2_req.cmd)._1
val s2_new_hit_state = s2_hit_state.onAccess(s2_req.cmd)._3
val s2_repl_meta = RegNext(s1_repl_meta)
val s2_repl_way_en = RegNext(s1_repl_way_en)
val s2_old_meta = Mux(s2_tag_match, s2_hit_meta, s2_repl_meta)
val s2_way_en = Mux(s2_tag_match, s2_tag_match_way, s2_repl_way_en)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
......@@ -89,24 +111,19 @@ class AtomicsPipe extends DCacheModule
// eg: write to exclusive but clean block
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state
val s2_nack = Wire(Bool())
val s2_data = Wire(Vec(nWays, UInt(encRowBits.W)))
val data_resp = io.data_resp
for (w <- 0 until nWays) {
s2_data(w) := data_resp(w)(get_row(s2_req.addr))
}
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
// the index of word in a row, in case rowBits != wordBits
val s2_word_idx = if (rowWords == 1) 0.U else s2_req.addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
// when req got nacked, upper levels should replay this request
val s2_nack_hit = RegNext(s1_nack)
// Can't allocate MSHR for same set currently being written back
// the same set is busy
val s2_nack_set_busy = s2_valid && false.B
val s2_nack_hit = RegNext(s1_nack)
// can no allocate mshr for store miss
val s2_nack_no_mshr = io.miss_req.valid && !io.miss_req.ready
// Bank conflict on data arrays
// For now, we use DuplicatedDataArray, so no bank conflicts
val s2_nack_data = false.B
s2_nack := s2_nack_hit || s2_nack_set_busy || s2_nack_data
s2_nack := s2_nack_hit || s2_nack_no_mshr || s2_nack_data
// lr/sc
val debug_sc_fail_addr = RegInit(0.U)
......@@ -174,7 +191,8 @@ class AtomicsPipe extends DCacheModule
dump_pipeline_valids("AtomicsPipe s2", "s2_hit", s2_valid && s2_hit)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack", s2_valid && s2_nack)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack_hit", s2_valid && s2_nack_hit)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack_set_busy", s2_valid && s2_nack_set_busy)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack_no_mshr", s2_valid && s2_nack_no_mshr)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack_data", s2_valid && s2_nack_data)
when (s2_valid) {
XSDebug("lrsc_count: %d lrsc_valid: %b lrsc_addr: %x\n",
lrsc_count, lrsc_valid, lrsc_addr)
......@@ -185,6 +203,15 @@ class AtomicsPipe extends DCacheModule
}
// load data gen
val s2_data = Wire(Vec(nWays, UInt(encRowBits.W)))
val data_resp = io.data_resp
for (w <- 0 until nWays) {
s2_data(w) := data_resp(w)(get_row(s2_req.addr))
}
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
// the index of word in a row, in case rowBits != wordBits
val s2_word_idx = if (rowWords == 1) 0.U else s2_req.addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
val s2_data_words = Wire(Vec(rowWords, UInt(encWordBits.W)))
for (w <- 0 until rowWords) {
s2_data_words(w) := s2_data_muxed(encWordBits * (w + 1) - 1, encWordBits * w)
......@@ -195,6 +222,14 @@ class AtomicsPipe extends DCacheModule
assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
// send load miss to miss queue
io.miss_req.valid := s2_valid && !s2_nack_hit && !s2_nack_data && !s2_hit
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := get_block_addr(s2_req.addr)
io.miss_req.bits.tag_match := s2_tag_match
io.miss_req.bits.way_en := s2_way_en
io.miss_req.bits.old_meta := s2_old_meta
io.miss_req.bits.client_id := s2_req.meta.id
val resp = Wire(ValidIO(new DCacheWordResp))
resp.valid := s2_valid
......@@ -203,16 +238,16 @@ class AtomicsPipe extends DCacheModule
// reuse this field to pass lr sc valid to commit
// nemu use this to see whether lr sc counter is still valid
resp.bits.meta.id := lrsc_valid
resp.bits.miss := !s2_hit
resp.bits.nack := s2_nack
resp.bits.miss := !s2_hit || s2_nack
resp.bits.replay := resp.bits.miss && (!io.miss_req.fire() || s2_nack)
io.lsu.resp.valid := resp.valid
io.lsu.resp.bits := resp.bits
assert(!(resp.valid && !io.lsu.resp.ready))
when (resp.valid) {
XSDebug(s"AtomicsPipe resp: data: %x id: %d replay: %b miss: %b nack: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.nack)
XSDebug(s"AtomicsPipe resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.replay)
}
......
......@@ -12,12 +12,11 @@ class AtomicsMissQueue extends DCacheModule
val io = IO(new DCacheBundle {
val lsu = Flipped(new DCacheWordIO)
val replay = new DCacheWordIO
val miss_req = DecoupledIO(new MissReq)
val miss_resp = Flipped(ValidIO(new MissResp))
val miss_finish = DecoupledIO(new MissFinish)
})
val s_invalid :: s_replay_req :: s_replay_resp :: s_resp :: s_miss_req :: s_miss_resp :: s_miss_finish :: Nil = Enum(7)
val s_invalid :: s_replay_req :: s_replay_resp :: s_resp :: s_miss_resp :: s_miss_finish :: Nil = Enum(6)
val state = RegInit(s_invalid)
val id = 0.U
......@@ -35,12 +34,9 @@ class AtomicsMissQueue extends DCacheModule
io.replay.req.bits := DontCare
io.replay.resp.ready := false.B
io.miss_req.valid := false.B
io.miss_req.bits := DontCare
io.miss_finish.valid := false.B
io.miss_finish.bits := DontCare
when (state =/= s_invalid) {
XSDebug("state: %d\n", state)
}
......@@ -68,34 +64,30 @@ class AtomicsMissQueue extends DCacheModule
when (state === s_replay_resp) {
io.replay.resp.ready := true.B
when (io.replay.resp.fire()) {
// req missed
when (io.replay.resp.bits.miss) {
// replayed reqs should not miss
assert(!req.meta.replay)
when (!req.meta.replay) {
state := s_miss_req
// the req missed and did not enter mshr
// so replay it until it hits or enters mshr
when (io.replay.resp.bits.replay) {
state := s_replay_req
} .otherwise {
// the req missed and enters mshr
// wait for miss response
state := s_miss_resp
}
} .otherwise {
// req hits, everything OK
resp := io.replay.resp.bits
when (!req.meta.replay) {
state := s_resp
} .otherwise {
// if it's a replayed request
// we need to tell mshr, we are done
state := s_miss_finish
}
}
assert(!io.replay.resp.bits.nack)
}
}
// --------------------------------------------
when (state === s_miss_req) {
io.miss_req.valid := true.B
io.miss_req.bits.cmd := req.cmd
io.miss_req.bits.addr := req_block_addr
io.miss_req.bits.client_id := id
when (io.miss_req.fire()) {
state := s_miss_resp
}
}
......@@ -129,25 +121,21 @@ class AtomicsMissQueue extends DCacheModule
// debug output
when (io.lsu.req.fire()) {
XSDebug(s"io.lsu.req cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n",
XSDebug(s"io.lsu.req cmd: %x addr: %x data: %x mask: %x id: %d replayed_req: %b\n",
io.lsu.req.bits.cmd, io.lsu.req.bits.addr, io.lsu.req.bits.data, io.lsu.req.bits.mask, io.lsu.req.bits.meta.id, io.lsu.req.bits.meta.replay)
}
val replay = io.replay.req
when (replay.fire()) {
XSDebug(s"replay cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n",
XSDebug(s"replay cmd: %x addr: %x data: %x mask: %x id: %d replayed_req: %b\n",
replay.bits.cmd, replay.bits.addr, replay.bits.data, replay.bits.mask, replay.bits.meta.id, replay.bits.meta.replay)
}
when (io.lsu.resp.fire()) {
XSDebug(s"io.lsu.resp: data: %x id: %d replay: %b miss: %b nack: %b\n",
io.lsu.resp.bits.data, io.lsu.resp.bits.meta.id, io.lsu.resp.bits.meta.replay, io.lsu.resp.bits.miss, io.lsu.resp.bits.nack)
XSDebug(s"io.lsu.resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
io.lsu.resp.bits.data, io.lsu.resp.bits.meta.id, io.lsu.resp.bits.meta.replay, io.lsu.resp.bits.miss, io.lsu.resp.bits.replay)
}
val miss_req = io.miss_req
XSDebug(miss_req.fire(), "miss_req cmd: %x addr: %x client_id: %d\n",
miss_req.bits.cmd, miss_req.bits.addr, miss_req.bits.client_id)
val miss_resp = io.miss_resp
XSDebug(miss_resp.fire(), "miss_resp client_id: %d entry_id: %d\n",
miss_resp.bits.client_id, miss_resp.bits.entry_id)
......
......@@ -57,7 +57,8 @@ trait HasDCacheParameters extends HasL1CacheParameters {
storeMissQueueEntryIdWidth),
miscMissQueueEntryIdWidth)
def nClientMissQueues = 3
// clients: ldu 0, ldu1, stu, atomics
def nClientMissQueues = 4
def clientIdWidth = log2Up(nClientMissQueues)
def missQueueClientIdWidth = clientIdWidth + clientMissQueueEntryIdWidth
def clientIdMSB = missQueueClientIdWidth - 1
......
......@@ -25,19 +25,7 @@ class DCacheMeta extends DCacheBundle {
val replay = Bool() // whether it's a replayed request?
}
// for load from load unit
// cycle 0: vaddr
// cycle 1: paddr
class DCacheLoadReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(VAddrBits.W)
val data = UInt(DataBits.W)
val mask = UInt((DataBits/8).W)
val meta = new DCacheMeta
}
// special memory operations(lr/sc, atomics)
// memory request in word granularity(load, mmio, lr/sc, atomics)
class DCacheWordReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
......@@ -47,7 +35,7 @@ class DCacheWordReq extends DCacheBundle
val meta = new DCacheMeta
}
// ordinary store
// memory request in word granularity(store)
class DCacheLineReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
......@@ -57,16 +45,6 @@ class DCacheLineReq extends DCacheBundle
val meta = new DCacheMeta
}
class DCacheLoadResp extends DCacheBundle
{
val data = UInt(DataBits.W)
val meta = new DCacheMeta
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val nack = Bool()
}
class DCacheWordResp extends DCacheBundle
{
val data = UInt(DataBits.W)
......@@ -74,7 +52,7 @@ class DCacheWordResp extends DCacheBundle
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val nack = Bool()
val replay = Bool()
}
class DCacheLineResp extends DCacheBundle
......@@ -84,16 +62,13 @@ class DCacheLineResp extends DCacheBundle
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val nack = Bool()
val replay = Bool()
}
class DCacheLoadIO extends DCacheBundle
class Refill extends DCacheBundle
{
val req = DecoupledIO(new DCacheWordReq)
val resp = Flipped(DecoupledIO(new DCacheWordResp))
// kill previous cycle's req
val s1_kill = Output(Bool())
val s1_paddr = Output(UInt(PAddrBits.W))
val addr = UInt(PAddrBits.W)
val data = UInt((cfg.blockBytes * 8).W)
}
class DCacheWordIO extends DCacheBundle
......@@ -102,6 +77,16 @@ class DCacheWordIO extends DCacheBundle
val resp = Flipped(DecoupledIO(new DCacheWordResp))
}
// used by load unit
class DCacheLoadIO extends DCacheWordIO
{
// kill previous cycle's req
val s1_kill = Output(Bool())
// cycle 0: virtual address: req.addr
// cycle 1: physical address: s1_paddr
val s1_paddr = Output(UInt(PAddrBits.W))
}
class DCacheLineIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheLineReq )
......@@ -110,7 +95,7 @@ class DCacheLineIO extends DCacheBundle
class DCacheToLsuIO extends DCacheBundle {
val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load
val lsq = Flipped(new DCacheLineIO) // lsq load/store
val lsq = ValidIO(new Refill) // refill to load queue, wake up load misses
val store = Flipped(new DCacheLineIO) // for sbuffer
val atomics = Flipped(new DCacheWordIO) // atomics reqs
}
......@@ -156,7 +141,6 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val ldu = Seq.fill(LoadPipelineWidth) { Module(new LoadPipe) }
val stu = Module(new StorePipe)
val atomics = Module(new AtomicsPipe)
val loadMissQueue = Module(new LoadMissQueue)
val storeMissQueue = Module(new StoreMissQueue)
val atomicsMissQueue = Module(new AtomicsMissQueue)
val missQueue = Module(new MissQueue(edge))
......@@ -179,16 +163,14 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
// To simplify port arbitration
// MissQueue, Prober and StorePipe all use port 0
// if contention got severe, considering load balancing on two ports?
val MetaReadPortCount = 5
val MissQueueMetaReadPort = 0
val ProberMetaReadPort = 1
val StorePipeMetaReadPort = 2
val LoadPipeMetaReadPort = 3
val AtomicsPipeMetaReadPort = 4
val MetaReadPortCount = 4
val ProberMetaReadPort = 0
val StorePipeMetaReadPort = 1
val LoadPipeMetaReadPort = 2
val AtomicsPipeMetaReadPort = 3
val metaReadArb = Module(new Arbiter(new L1MetaReadReq, MetaReadPortCount))
metaReadArb.io.in(MissQueueMetaReadPort) <> missQueue.io.meta_read
metaReadArb.io.in(ProberMetaReadPort) <> prober.io.meta_read
metaReadArb.io.in(StorePipeMetaReadPort) <> stu.io.meta_read
metaReadArb.io.in(LoadPipeMetaReadPort) <> ldu(0).io.meta_read
......@@ -196,7 +178,6 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
metaArray.io.read(0) <> metaReadArb.io.out
missQueue.io.meta_resp <> metaArray.io.resp(0)
prober.io.meta_resp <> metaArray.io.resp(0)
stu.io.meta_resp <> metaArray.io.resp(0)
ldu(0).io.meta_resp <> metaArray.io.resp(0)
......@@ -217,19 +198,18 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val dataWriteArb = Module(new Arbiter(new L1DataWriteReq, DataWritePortCount))
dataWriteArb.io.in(StorePipeDataWritePort) <> stu.io.data_write
dataWriteArb.io.in(MissQueueDataWritePort) <> missQueue.io.refill
dataWriteArb.io.in(MissQueueDataWritePort) <> missQueue.io.data_write
dataWriteArb.io.in(AtomicsPipeDataWritePort) <> atomics.io.data_write
dataArray.io.write <> dataWriteArb.io.out
// To simplify port arbitration
// WritebackUnit and StorePipe use port 0
val DataReadPortCount = 5
val DataReadPortCount = 4
val WritebackDataReadPort = 0
val StorePipeDataReadPort = 1
val LoadPipeDataReadPort = 2
val AtomicsPipeDataReadPort = 3
val LoadMissDataReadPort = 4
val dataReadArb = Module(new Arbiter(new L1DataReadReq, DataReadPortCount))
......@@ -237,14 +217,12 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
dataReadArb.io.in(StorePipeDataReadPort) <> stu.io.data_read
dataReadArb.io.in(LoadPipeDataReadPort) <> ldu(0).io.data_read
dataReadArb.io.in(AtomicsPipeDataReadPort) <> atomics.io.data_read
dataReadArb.io.in(LoadMissDataReadPort) <> loadMissQueue.io.data_req
dataArray.io.read(0) <> dataReadArb.io.out
dataArray.io.resp(0) <> wb.io.data_resp
dataArray.io.resp(0) <> stu.io.data_resp
dataArray.io.resp(0) <> atomics.io.data_resp
dataArray.io.resp(0) <> ldu(0).io.data_resp
dataArray.io.resp(0) <> loadMissQueue.io.data_resp
for (w <- 1 until LoadPipelineWidth) {
dataArray.io.read(w) <> ldu(w).io.data_read
......@@ -272,9 +250,6 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
assert(!(io.lsu.load(w).req.fire() && io.lsu.load(w).req.bits.meta.tlb_miss), "TLB missed requests should not go to cache")
}
// load miss queue
loadMissQueue.io.lsu <> io.lsu.lsq
//----------------------------------------
// store pipe and store miss queue
storeMissQueue.io.lsu <> io.lsu.store
......@@ -322,34 +297,39 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
//----------------------------------------
// miss queue
val loadMissQueueClientId = 0.U(clientIdWidth.W)
val storeMissQueueClientId = 1.U(clientIdWidth.W)
val atomicsMissQueueClientId = 2.U(clientIdWidth.W)
require(LoadPipelineWidth == 2, "We hard code the number of load misses")
val loadMissQueueClientId_0 = 0.U(clientIdWidth.W)
val loadMissQueueClientId_1 = 1.U(clientIdWidth.W)
val storeMissQueueClientId = 2.U(clientIdWidth.W)
val atomicsMissQueueClientId = 3.U(clientIdWidth.W)
// Request
val missReqArb = Module(new Arbiter(new MissReq, 3))
val missReqArb = Module(new Arbiter(new MissReq, nClientMissQueues))
val missReq = missQueue.io.req
val loadMissReq = loadMissQueue.io.miss_req
val storeMissReq = storeMissQueue.io.miss_req
val atomicsMissReq = atomicsMissQueue.io.miss_req
missReqArb.io.in(0).valid := loadMissReq.valid
loadMissReq.ready := missReqArb.io.in(0).ready
missReqArb.io.in(0).bits := loadMissReq.bits
missReqArb.io.in(0).bits.client_id := Cat(loadMissQueueClientId,
loadMissReq.bits.client_id(entryIdMSB, entryIdLSB))
missReqArb.io.in(1).valid := storeMissReq.valid
storeMissReq.ready := missReqArb.io.in(1).ready
missReqArb.io.in(1).bits := storeMissReq.bits
missReqArb.io.in(1).bits.client_id := Cat(storeMissQueueClientId,
val loadMissReq_0 = ldu(0).io.miss_req
val loadMissReq_1 = ldu(1).io.miss_req
val storeMissReq = stu.io.miss_req
val atomicsMissReq = atomics.io.miss_req
missReqArb.io.in(0) <> loadMissReq_0
missReqArb.io.in(0).bits.client_id := Cat(loadMissQueueClientId_0,
loadMissReq_0.bits.client_id(entryIdMSB, entryIdLSB))
missReqArb.io.in(1) <> loadMissReq_1
missReqArb.io.in(1).bits.client_id := Cat(loadMissQueueClientId_1,
loadMissReq_0.bits.client_id(entryIdMSB, entryIdLSB))
missReqArb.io.in(2).valid := storeMissReq.valid
storeMissReq.ready := missReqArb.io.in(2).ready
missReqArb.io.in(2).bits := storeMissReq.bits
missReqArb.io.in(2).bits.client_id := Cat(storeMissQueueClientId,
storeMissReq.bits.client_id(entryIdMSB, entryIdLSB))
missReqArb.io.in(2).valid := atomicsMissReq.valid
atomicsMissReq.ready := missReqArb.io.in(2).ready
missReqArb.io.in(2).bits := atomicsMissReq.bits
missReqArb.io.in(2).bits.client_id := Cat(atomicsMissQueueClientId,
missReqArb.io.in(3).valid := atomicsMissReq.valid
atomicsMissReq.ready := missReqArb.io.in(3).ready
missReqArb.io.in(3).bits := atomicsMissReq.bits
missReqArb.io.in(3).bits.client_id := Cat(atomicsMissQueueClientId,
atomicsMissReq.bits.client_id(entryIdMSB, entryIdLSB))
val miss_block = block_miss(missReqArb.io.out.bits.addr)
......@@ -357,18 +337,13 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
XSDebug(miss_block, "MissQueue blocked\n")
// Response
// store and atomics wait for miss queue responses
val missResp = missQueue.io.resp
val loadMissResp = loadMissQueue.io.miss_resp
val storeMissResp = storeMissQueue.io.miss_resp
val atomicsMissResp = atomicsMissQueue.io.miss_resp
val atomicsMissResp = atomicsMissQueue.io.miss_resp
val clientId = missResp.bits.client_id(clientIdMSB, clientIdLSB)
val isLoadMissResp = clientId === loadMissQueueClientId
loadMissResp.valid := missResp.valid && isLoadMissResp
loadMissResp.bits := missResp.bits
loadMissResp.bits.client_id := missResp.bits.client_id(entryIdMSB, entryIdLSB)
val isStoreMissResp = clientId === storeMissQueueClientId
storeMissResp.valid := missResp.valid && isStoreMissResp
storeMissResp.bits := missResp.bits
......@@ -381,31 +356,27 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
// Finish
val missFinish = missQueue.io.finish
val loadMissFinish = loadMissQueue.io.miss_finish
val storeMissFinish = storeMissQueue.io.miss_finish
val atomicsMissFinish = atomicsMissQueue.io.miss_finish
val missFinishArb = Module(new Arbiter(new MissFinish, 3))
missFinishArb.io.in(0).valid := loadMissFinish.valid
loadMissFinish.ready := missFinishArb.io.in(0).ready
missFinishArb.io.in(0).bits.entry_id := loadMissFinish.bits.entry_id
missFinishArb.io.in(0).bits.client_id := Cat(loadMissQueueClientId,
loadMissFinish.bits.client_id(entryIdMSB, entryIdLSB))
missFinishArb.io.in(1).valid := storeMissFinish.valid
storeMissFinish.ready := missFinishArb.io.in(1).ready
missFinishArb.io.in(1).bits.entry_id := storeMissFinish.bits.entry_id
missFinishArb.io.in(1).bits.client_id := Cat(storeMissQueueClientId,
storeMissFinish.bits.client_id(entryIdMSB, entryIdLSB))
missFinishArb.io.in(2).valid := atomicsMissFinish.valid
atomicsMissFinish.ready := missFinishArb.io.in(2).ready
missFinishArb.io.in(2).bits.entry_id := atomicsMissFinish.bits.entry_id
missFinishArb.io.in(2).bits.client_id := Cat(atomicsMissQueueClientId,
val atomicsMissFinish = atomicsMissQueue.io.miss_finish
val missFinishArb = Module(new Arbiter(new MissFinish, 2))
missFinishArb.io.in(0).valid := storeMissFinish.valid
storeMissFinish.ready := missFinishArb.io.in(0).ready
missFinishArb.io.in(0).bits.entry_id := storeMissFinish.bits.entry_id
missFinishArb.io.in(0).bits.client_id := Cat(storeMissQueueClientId,
storeMissFinish.bits.client_id(entryIdMSB, entryIdLSB))
missFinishArb.io.in(1).valid := atomicsMissFinish.valid
atomicsMissFinish.ready := missFinishArb.io.in(1).ready
missFinishArb.io.in(1).bits.entry_id := atomicsMissFinish.bits.entry_id
missFinishArb.io.in(1).bits.client_id := Cat(atomicsMissQueueClientId,
atomicsMissFinish.bits.client_id(entryIdMSB, entryIdLSB))
missFinish <> missFinishArb.io.out
// refill to load queue
io.lsu.lsq <> missQueue.io.refill
// tilelink stuff
bus.a <> missQueue.io.mem_acquire
bus.e <> missQueue.io.mem_finish
......@@ -464,10 +435,12 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val atomics_addr_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val atomics_addr_match = atomics_addr_matches.reduce(_||_)
val prober_addr_match = prober.io.inflight_req_block_addr.valid && prober.io.inflight_req_block_addr.bits === get_block_addr(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
store_addr_match || atomics_addr_match || miss_idx_match
store_addr_match || atomics_addr_match || prober_addr_match || miss_idx_match
}
def block_store(addr: UInt) = {
......@@ -487,18 +460,12 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
}
def block_miss(addr: UInt) = {
val store_idx_matches = VecInit(stu.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val store_idx_match = store_idx_matches.reduce(_||_)
val atomics_idx_matches = VecInit(atomics.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val atomics_idx_match = atomics_idx_matches.reduce(_||_)
val prober_idx_match = prober.io.inflight_req_idx.valid && prober.io.inflight_req_idx.bits === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
store_idx_match || atomics_idx_match || prober_idx_match || miss_idx_match
prober_idx_match || miss_idx_match
}
def block_probe(addr: UInt) = {
......
......@@ -8,14 +8,19 @@ import utils.XSDebug
class LoadPipe extends DCacheModule
{
val io = IO(new DCacheBundle{
// incoming requests
val lsu = Flipped(new DCacheLoadIO)
// req got nacked in stage 0?
val nack = Input(Bool())
// meta and data array read port
val data_read = DecoupledIO(new L1DataReadReq)
val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W))))
val meta_read = DecoupledIO(new L1MetaReadReq)
val meta_resp = Input(Vec(nWays, new L1Metadata))
// req got nacked in stage 0?
val nack = Input(Bool())
// send miss request to miss queue
val miss_req = DecoupledIO(new MissReq)
})
// LSU requests
......@@ -67,6 +72,17 @@ class LoadPipe extends DCacheModule
def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
val s1_tag_eq_way = wayMap((w: Int) => meta_resp(w).tag === (get_tag(s1_addr))).asUInt
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta_resp(w).coh.isValid()).asUInt
val s1_tag_match = s1_tag_match_way.orR
val s1_hit_meta = Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w)))
val s1_hit_state = s1_hit_meta.coh
// replacement policy
val replacer = cacheParams.replacement
val s1_repl_way_en = UIntToOH(replacer.way)
val s1_repl_meta = Mux1H(s1_repl_way_en, wayMap((w: Int) => meta_resp(w)))
when (io.miss_req.fire()) {
replacer.miss
}
assert(!(s1_valid && s1_req.meta.replay && io.lsu.s1_kill),
"lsq tried to kill an replayed request!")
......@@ -79,11 +95,20 @@ class LoadPipe extends DCacheModule
val s2_addr = RegNext(s1_addr)
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = s2_tag_match_way.orR
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegNext(meta_resp(w).coh)))
val s2_tag_match = RegNext(s1_tag_match)
val s2_hit_meta = RegNext(s1_hit_meta)
val s2_hit_state = RegNext(s1_hit_state)
val s2_has_permission = s2_hit_state.onAccess(s2_req.cmd)._1
val s2_new_hit_state = s2_hit_state.onAccess(s2_req.cmd)._3
val s2_repl_meta = RegNext(s1_repl_meta)
val s2_repl_way_en = RegNext(s1_repl_way_en)
val s2_old_meta = Mux(s2_tag_match, s2_hit_meta, s2_repl_meta)
val s2_way_en = Mux(s2_tag_match, s2_tag_match_way, s2_repl_way_en)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
......@@ -94,6 +119,7 @@ class LoadPipe extends DCacheModule
// It's possible that we had permission but state changes on hit:
// eg: write to exclusive but clean block
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state
// nacked or not
val s2_nack = Wire(Bool())
val s2_data = Wire(Vec(nWays, UInt(encRowBits.W)))
val data_resp = io.data_resp
......@@ -104,22 +130,6 @@ class LoadPipe extends DCacheModule
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
// the index of word in a row, in case rowBits != wordBits
val s2_word_idx = if (rowWords == 1) 0.U else s2_addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
val s2_nack_hit = RegNext(s1_nack)
// Can't allocate MSHR for same set currently being written back
// the same set is busy
val s2_nack_set_busy = s2_valid && false.B
// Bank conflict on data arrays
val s2_nack_data = false.B
s2_nack := s2_nack_hit || s2_nack_set_busy || s2_nack_data
// only dump these signals when they are actually valid
dump_pipeline_valids("LoadPipe s2", "s2_hit", s2_valid && s2_hit)
dump_pipeline_valids("LoadPipe s2", "s2_nack", s2_valid && s2_nack)
dump_pipeline_valids("LoadPipe s2", "s2_nack_hit", s2_valid && s2_nack_hit)
dump_pipeline_valids("LoadPipe s2", "s2_nack_set_busy", s2_valid && s2_nack_set_busy)
// load data gen
val s2_data_words = Wire(Vec(rowWords, UInt(encWordBits.W)))
for (w <- 0 until rowWords) {
......@@ -128,23 +138,58 @@ class LoadPipe extends DCacheModule
val s2_data_word = s2_data_words(s2_word_idx)
val s2_decoded = cacheParams.dataCode.decode(s2_data_word)
val s2_data_word_decoded = s2_decoded.corrected
// annotate out this assertion
// when TLB misses, s2_hit may still be true
// which may cause unnecessary assertion
// assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
// when req got nacked, upper levels should replay this request
// the same set is busy
val s2_nack_hit = RegNext(s1_nack)
// can no allocate mshr for load miss
val s2_nack_no_mshr = io.miss_req.valid && !io.miss_req.ready
// Bank conflict on data arrays
// For now, we use DuplicatedDataArray, so no bank conflicts
val s2_nack_data = false.B
s2_nack := s2_nack_hit || s2_nack_no_mshr || s2_nack_data
// only dump these signals when they are actually valid
dump_pipeline_valids("LoadPipe s2", "s2_hit", s2_valid && s2_hit)
dump_pipeline_valids("LoadPipe s2", "s2_nack", s2_valid && s2_nack)
dump_pipeline_valids("LoadPipe s2", "s2_nack_hit", s2_valid && s2_nack_hit)
dump_pipeline_valids("LoadPipe s2", "s2_nack_no_mshr", s2_valid && s2_nack_no_mshr)
// send load miss to miss queue
io.miss_req.valid := s2_valid && !s2_nack_hit && !s2_nack_data && !s2_hit
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := get_block_addr(s2_addr)
io.miss_req.bits.tag_match := s2_tag_match
io.miss_req.bits.way_en := s2_way_en
io.miss_req.bits.old_meta := s2_old_meta
io.miss_req.bits.client_id := 0.U
// send back response
val resp = Wire(ValidIO(new DCacheWordResp))
resp.valid := s2_valid
resp.bits.data := s2_data_word_decoded
resp.bits.meta := s2_req.meta
resp.bits.miss := !s2_hit
resp.bits.nack := s2_nack
// on miss or nack, upper level should replay request
// but if we successfully sent the request to miss queue
// upper level does not need to replay request
// they can sit in load queue and wait for refill
resp.bits.miss := !s2_hit || s2_nack
resp.bits.replay := resp.bits.miss && (!io.miss_req.fire() || s2_nack)
io.lsu.resp.valid := resp.valid
io.lsu.resp.bits := resp.bits
assert(!(resp.valid && !io.lsu.resp.ready))
when (resp.valid) {
XSDebug(s"LoadPipe resp: data: %x id: %d replay: %b miss: %b nack: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.nack)
XSDebug(s"LoadPipe resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.replay)
}
// -------
......
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import bus.tilelink._
class LoadMissEntry extends DCacheModule
{
val io = IO(new Bundle {
val id = Input(UInt())
val lsu = Flipped(new DCacheLineIO)
val miss_req = DecoupledIO(new MissReq)
val miss_resp = Flipped(ValidIO(new MissResp))
val miss_finish = DecoupledIO(new MissFinish)
val data_req = DecoupledIO(new L1DataReadReq)
val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W))))
val idx = Output(Valid(UInt()))
val tag = Output(Valid(UInt()))
})
val s_invalid :: s_miss_req :: s_miss_resp :: s_miss_finish :: s_data_read_req :: s_data_read_resp :: s_resp :: Nil = Enum(7)
val state = RegInit(s_invalid)
val req = Reg(new DCacheLineReq)
val resp = Reg(new DCacheLineResp)
val req_idx = get_idx(req.addr)
val req_tag = get_tag(req.addr)
val req_block_addr = get_block_addr(req.addr)
val reg_miss_resp = Reg(new MissResp)
// assign default values to output signals
io.lsu.req.ready := state === s_invalid
io.lsu.resp.valid := false.B
io.lsu.resp.bits := DontCare
io.miss_req.valid := false.B
io.miss_req.bits := DontCare
io.miss_finish.valid := false.B
io.miss_finish.bits := DontCare
io.data_req.valid := false.B
io.data_req.bits := DontCare
io.idx.valid := state =/= s_invalid
io.tag.valid := state =/= s_invalid
io.idx.bits := req_idx
io.tag.bits := req_tag
when (state =/= s_invalid) {
XSDebug("entry: %d state: %d\n", io.id, state)
}
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
when (io.lsu.req.fire()) {
assert(io.lsu.req.bits.cmd === M_XRD)
assert(!io.lsu.req.bits.meta.replay)
req := io.lsu.req.bits
resp.meta := io.lsu.req.bits.meta
resp.miss := false.B
resp.nack := false.B
state := s_miss_req
}
}
// --------------------------------------------
when (state === s_miss_req) {
io.miss_req.valid := true.B
io.miss_req.bits.cmd := req.cmd
io.miss_req.bits.addr := req_block_addr
io.miss_req.bits.client_id := io.id
when (io.miss_req.fire()) {
state := s_miss_resp
}
}
when (state === s_miss_resp) {
when (io.miss_resp.fire()) {
reg_miss_resp := io.miss_resp.bits
resp.data := io.miss_resp.bits.data
when (io.miss_resp.bits.has_data) {
state := s_resp
} .otherwise {
// miss queue says that data is already in dcache
// so we need to read it
state := s_data_read_req
}
}
}
val dataArrayLatency = 2
val data_array_ctr = Reg(UInt(log2Up(dataArrayLatency).W))
when (state === s_data_read_req) {
// Data read for new requests
io.data_req.valid := true.B
io.data_req.bits.addr := req_block_addr
io.data_req.bits.way_en := reg_miss_resp.way_en
io.data_req.bits.rmask := ~0.U(blockRows.W)
when (io.data_req.fire()) {
state := s_data_read_resp
data_array_ctr := 0.U
}
}
when (state === s_data_read_resp) {
data_array_ctr := data_array_ctr + 1.U
when (data_array_ctr === (dataArrayLatency - 1).U) {
val way_idx = OHToUInt(reg_miss_resp.way_en)
resp.data := Cat((0 until blockRows).reverse map { i =>
val row = io.data_resp(way_idx)(i)
// decode each word in this row
val row_decoded = Cat((0 until rowWords).reverse map { w =>
val data_word = row(encWordBits * (w + 1) - 1, encWordBits * w)
val decoded = cacheParams.dataCode.decode(data_word)
val data_word_decoded = decoded.corrected
assert(!decoded.uncorrectable)
data_word_decoded
})
row_decoded
})
state := s_resp
}
}
// --------------------------------------------
when (state === s_resp) {
io.lsu.resp.valid := true.B
io.lsu.resp.bits := resp
when (io.lsu.resp.fire()) {
state := s_miss_finish
}
}
when (state === s_miss_finish) {
io.miss_finish.valid := true.B
io.miss_finish.bits.client_id := io.id
io.miss_finish.bits.entry_id := reg_miss_resp.entry_id
when (io.miss_finish.fire()) {
state := s_invalid
}
}
// debug output
when (io.lsu.req.fire()) {
XSDebug(s"LoadMissEntryTransaction req %d\n", io.id)
}
when (io.lsu.resp.fire()) {
XSDebug(s"LoadMissEntryTransaction resp %d\n", io.id)
}
}
class LoadMissQueue extends DCacheModule
{
val io = IO(new Bundle {
val lsu = Flipped(new DCacheLineIO)
val miss_req = DecoupledIO(new MissReq)
val miss_resp = Flipped(ValidIO(new MissResp))
val miss_finish = DecoupledIO(new MissFinish)
val data_req = DecoupledIO(new L1DataReadReq)
val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W))))
})
val miss_req_arb = Module(new Arbiter(new MissReq, cfg.nLoadMissEntries))
val miss_finish_arb = Module(new Arbiter(new MissFinish, cfg.nLoadMissEntries))
val data_req_arb = Module(new Arbiter(new L1DataReadReq, cfg.nLoadMissEntries))
val resp_arb = Module(new Arbiter(new DCacheLineResp, cfg.nLoadMissEntries))
val idx_matches = Wire(Vec(cfg.nLoadMissEntries, Bool()))
val tag_matches = Wire(Vec(cfg.nLoadMissEntries, Bool()))
val tag_match = Mux1H(idx_matches, tag_matches)
val idx_match = idx_matches.reduce(_||_)
val req = io.lsu.req
val entry_alloc_idx = Wire(UInt())
val pri_rdy = WireInit(false.B)
val pri_val = req.valid && !idx_match
val entry_id_MSB = reqIdWidth - 1
val entry_id_LSB = reqIdWidth - loadMissQueueEntryIdWidth
val entries = (0 until cfg.nLoadMissEntries) map { i =>
val entry = Module(new LoadMissEntry)
entry.io.id := i.U(loadMissQueueEntryIdWidth.W)
idx_matches(i) := entry.io.idx.valid && entry.io.idx.bits === get_idx(req.bits.addr)
tag_matches(i) := entry.io.tag.valid && entry.io.tag.bits === get_tag(req.bits.addr)
// lsu req and resp
val entry_lsu = entry.io.lsu
entry_lsu.req.valid := (i.U === entry_alloc_idx) && pri_val
when (i.U === entry_alloc_idx) {
pri_rdy := entry_lsu.req.ready
}
entry_lsu.req.bits := req.bits
resp_arb.io.in(i) <> entry_lsu.resp
miss_req_arb.io.in(i) <> entry.io.miss_req
data_req_arb.io.in(i) <> entry.io.data_req
entry.io.miss_resp.valid := (i.U === io.miss_resp.bits.client_id) && io.miss_resp.valid
entry.io.miss_resp.bits := io.miss_resp.bits
entry.io.data_resp := io.data_resp
miss_finish_arb.io.in(i) <> entry.io.miss_finish
entry
}
entry_alloc_idx := PriorityEncoder(entries.map(m=>m.io.lsu.req.ready))
// whenever index matches, do not let it in
req.ready := pri_rdy && !idx_match
io.lsu.resp <> resp_arb.io.out
io.miss_req <> miss_req_arb.io.out
io.data_req <> data_req_arb.io.out
io.miss_finish <> miss_finish_arb.io.out
// debug output
when (req.fire()) {
XSDebug(s"req cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n",
req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask, req.bits.meta.id, req.bits.meta.replay)
}
val resp = io.lsu.resp
when (resp.fire()) {
XSDebug(s"resp: data: %x id: %d replay: %b miss: %b nack: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.nack)
}
val miss_req = io.miss_req
XSDebug(miss_req.fire(), "miss_req cmd: %x addr: %x client_id: %d\n",
miss_req.bits.cmd, miss_req.bits.addr, miss_req.bits.client_id)
val miss_resp = io.miss_resp
XSDebug(miss_resp.fire(), "miss_resp client_id: %d entry_id: %d has_data: %b data: %x\n",
miss_resp.bits.client_id, miss_resp.bits.entry_id, miss_resp.bits.has_data, miss_resp.bits.data)
val miss_finish = io.miss_finish
XSDebug(miss_finish.fire(), "miss_finish client_id: %d entry_id: %d\n",
miss_finish.bits.client_id, miss_finish.bits.entry_id)
}
......@@ -10,15 +10,15 @@ class MissReq extends DCacheBundle
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val client_id = UInt(missQueueClientIdWidth.W)
val tag_match = Bool()
val way_en = Bits(nWays.W)
val old_meta = new L1Metadata
}
class MissResp extends DCacheBundle
{
val client_id = UInt(missQueueClientIdWidth.W)
val entry_id = UInt(missQueueEntryIdWidth.W)
val way_en = Bits(nWays.W)
val has_data = Bool()
val data = UInt(blockBits.W)
}
class MissFinish extends DCacheBundle
......@@ -39,52 +39,56 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
val req = Flipped(DecoupledIO(new MissReq))
val resp = DecoupledIO(new MissResp)
val finish = Flipped(DecoupledIO(new MissFinish))
// refill to load queue to wake up missed requests
val refill = ValidIO(new Refill)
val block_idx = Output(Valid(UInt()))
val block_addr = Output(Valid(UInt()))
val block_probe_idx = Output(Valid(UInt()))
val block_probe_addr = Output(Valid(UInt()))
// bus
val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle))
val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val mem_finish = DecoupledIO(new TLBundleE(edge.bundle))
val meta_read = DecoupledIO(new L1MetaReadReq)
val meta_resp = Input(Vec(nWays, new L1Metadata))
val meta_write = DecoupledIO(new L1MetaWriteReq)
val refill = DecoupledIO(new L1DataWriteReq)
// write back
val wb_req = DecoupledIO(new WritebackReq(edge.bundle.sourceBits))
val wb_resp = Input(Bool())
// write meta and data
val meta_write = DecoupledIO(new L1MetaWriteReq)
val data_write = DecoupledIO(new L1DataWriteReq)
// for synchronization
val block_idx = Output(Valid(UInt()))
val block_addr = Output(Valid(UInt()))
val block_probe_idx = Output(Valid(UInt()))
val block_probe_addr = Output(Valid(UInt()))
// watch prober's write back requests
val probe_wb_req = Flipped(ValidIO(new WritebackReq(edge.bundle.sourceBits)))
val probe_active = Flipped(ValidIO(UInt()))
})
// MSHR:
// 1. get req
// 2. read meta data and make replacement decisions
// 3. do writeback/refill when necessary
// 4. send response back to client
// 5. wait for client's finish
// 6. update meta data
// 2. refill when necessary
// 3. writeback when necessary
// 4. update meta data
// 5. send response back to client
// 6. wait for client's finish
// 7. done
val s_invalid :: s_meta_read_req :: s_meta_read_resp :: s_decide_next_state :: s_refill_req :: s_refill_resp :: s_mem_finish :: s_wait_probe_exit :: s_send_resp :: s_wb_req :: s_wb_resp :: s_data_write_req :: s_meta_write_req :: s_client_finish :: Nil = Enum(14)
val s_invalid :: s_refill_req :: s_refill_resp :: s_mem_finish :: s_wait_probe_exit :: s_wb_req :: s_wb_resp :: s_data_write_req :: s_meta_write_req :: s_send_resp :: s_client_finish :: Nil = Enum(11)
val state = RegInit(s_invalid)
val req = Reg(new MissReq)
val req_reg = Reg(new MissReq)
val req = Mux(io.req.fire(), io.req.bits, req_reg)
val req_idx = get_idx(req.addr)
val req_tag = get_tag(req.addr)
val req_block_addr = get_block_addr(req.addr)
// meta read results
val req_tag_match = Reg(Bool())
val req_old_meta = Reg(new L1Metadata)
val req_way_en = Reg(UInt(nWays.W))
val req_tag_match = req.tag_match
val req_old_meta = req.old_meta
val req_way_en = req.way_en
// what permission to release for the old block?
val (_, shrink_param, coh_on_clear) = req_old_meta.coh.onCacheControl(M_FLUSH)
......@@ -101,24 +105,14 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
val should_refill_data = Reg(Bool())
val needs_writeback = Reg(Bool())
// for read, to shorten latency
// we send back response as soon as possible
// for read, we do not need to replay requests
// just refill data to load queue, and then, we can exit
// no need to walk through send_resp and client_finish state
//
// for store and amo
// we send back response when we have finished everything
// inform clients to replay requests
val early_response = Reg(Bool())
io.block_idx.valid := state =/= s_invalid
io.block_addr.valid := state =/= s_invalid
io.block_idx.bits := req_idx
io.block_addr.bits := req_block_addr
// to preserve forward progress, we allow probe when we are dealing with acquire/grant
io.block_probe_idx.valid := state =/= s_invalid && state =/= s_refill_req && state =/= s_refill_resp
io.block_probe_addr.valid := state =/= s_invalid && state =/= s_refill_req && state =/= s_refill_resp
io.block_probe_idx.bits := req_idx
io.block_probe_addr.bits := req_block_addr
val no_replay = Reg(Bool())
// assign default values to output signals
io.req.ready := false.B
......@@ -126,25 +120,35 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io.resp.bits := DontCare
io.finish.ready := false.B
io.refill.valid := false.B
io.refill.bits := DontCare
io.mem_acquire.valid := false.B
io.mem_acquire.bits := DontCare
io.mem_grant.ready := false.B
io.mem_finish.valid := false.B
io.mem_finish.bits := DontCare
io.meta_read.valid := false.B
io.meta_read.bits := DontCare
io.wb_req.valid := false.B
io.wb_req.bits := DontCare
io.meta_write.valid := false.B
io.meta_write.bits := DontCare
io.refill.valid := false.B
io.refill.bits := DontCare
io.data_write.valid := false.B
io.data_write.bits := DontCare
io.wb_req.valid := false.B
io.wb_req.bits := DontCare
io.block_idx.valid := state =/= s_invalid
io.block_addr.valid := state =/= s_invalid
// break combinational loop
io.block_idx.bits := get_idx(req_reg.addr)
io.block_addr.bits := get_block_addr(req_reg.addr)
// to preserve forward progress, we allow probe when we are dealing with acquire/grant
io.block_probe_idx.valid := state =/= s_invalid && state =/= s_refill_req && state =/= s_refill_resp
io.block_probe_addr.valid := state =/= s_invalid && state =/= s_refill_req && state =/= s_refill_resp
io.block_probe_idx.bits := get_idx(req_reg.addr)
io.block_probe_addr.bits := get_block_addr(req_reg.addr)
when (state =/= s_invalid) {
XSDebug("entry: %d state: %d\n", io.id, state)
......@@ -154,89 +158,25 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io.id, io.block_probe_idx.valid, io.block_probe_idx.bits, io.block_probe_addr.valid, io.block_probe_addr.bits)
}
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
io.req.ready := true.B
when (io.req.fire()) {
grantack.valid := false.B
refill_ctr := 0.U
should_refill_data := false.B
needs_writeback := false.B
early_response := false.B
req := io.req.bits
state := s_meta_read_req
}
}
// --------------------------------------------
// s_meta_read_req: read meta data
when (state === s_meta_read_req) {
io.meta_read.valid := true.B
val meta_read = io.meta_read.bits
meta_read.idx := req_idx
meta_read.way_en := ~0.U(nWays.W)
meta_read.tag := DontCare
when (io.meta_read.fire()) {
state := s_meta_read_resp
}
}
// s_meta_read_resp: handle meta read response
// check hit, miss
when (state === s_meta_read_resp) {
// tag check
def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
val tag_eq_way = wayMap((w: Int) => io.meta_resp(w).tag === (req_tag)).asUInt
val tag_match_way = wayMap((w: Int) => tag_eq_way(w) && io.meta_resp(w).coh.isValid()).asUInt
val tag_match = tag_match_way.orR
val hit_meta = Mux1H(tag_match_way, wayMap((w: Int) => io.meta_resp(w)))
val hit_state = hit_meta.coh
val has_permission = hit_state.onAccess(req.cmd)._1
val new_hit_state = hit_state.onAccess(req.cmd)._3
val hit = tag_match && has_permission && hit_state === new_hit_state
// replacement policy
val replacer = cacheParams.replacement
val replaced_way_en = UIntToOH(replacer.way)
val repl_meta = Mux1H(replaced_way_en, wayMap((w: Int) => io.meta_resp(w)))
req_tag_match := tag_match
req_old_meta := Mux(tag_match, hit_meta, repl_meta)
req_way_en := Mux(tag_match, tag_match_way, replaced_way_en)
replacer.miss
state := s_decide_next_state
}
// decision making
def decide_next_state(): UInt = {
val new_state = WireInit(s_invalid)
val old_coh = req_old_meta.coh
val needs_wb = old_coh.onCacheControl(M_FLUSH)._1 // does the line we are evicting need to be written back
early_response := req.cmd === M_XRD
no_replay := req.cmd === M_XRD
when (req_tag_match) {
val (is_hit, _, coh_on_hit) = old_coh.onAccess(req.cmd)
when (is_hit) { // set dirty bit
// we do not need to assert write any more
// read may go here as well
// eg: when several load miss on the same block
when (req.cmd === M_XRD) {
// normal read
// read hit, no need to update meta
new_coh := old_coh
new_state := s_send_resp
} .otherwise {
assert(isWrite(req.cmd))
new_coh := coh_on_hit
new_state := s_meta_write_req
}
// read should never go here
// we get here only when we need to set dirty bit
assert(isWrite(req.cmd))
// go update meta
new_coh := coh_on_hit
new_state := s_meta_write_req
} .otherwise { // upgrade permissions
new_coh := old_coh
new_state := s_refill_req
......@@ -251,30 +191,17 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
new_state
}
// this state is unnecessary, we can make decisions in s_meta_read_resp
when (state === s_decide_next_state) {
state := decide_next_state()
}
// --------------------------------------------
// write back
when (state === s_wb_req) {
io.wb_req.valid := true.B
io.wb_req.bits.tag := req_old_meta.tag
io.wb_req.bits.idx := req_idx
io.wb_req.bits.param := shrink_param
io.wb_req.bits.way_en := req_way_en
io.wb_req.bits.source := io.id
io.wb_req.bits.voluntary := true.B
when (io.wb_req.fire()) {
state := s_wb_resp
}
}
when (state === s_invalid) {
io.req.ready := true.B
when (state === s_wb_resp) {
when (io.wb_resp) {
state := s_data_write_req
when (io.req.fire()) {
grantack.valid := false.B
refill_ctr := 0.U
should_refill_data := false.B
needs_writeback := false.B
no_replay := false.B
req_reg := io.req.bits
state := decide_next_state()
}
}
......@@ -294,8 +221,9 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
}
}
// ecc-encoded data
val refill_data = Reg(Vec(blockRows, UInt(encRowBits.W)))
// not encoded data
// raw data
val refill_data_raw = Reg(Vec(blockRows, UInt(rowBits.W)))
when (state === s_refill_resp) {
io.mem_grant.ready := true.B
......@@ -334,6 +262,12 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
}
}
// refill data to load queue
io.refill.valid := RegNext(state === s_refill_resp && refill_done &&
should_refill_data && no_replay)
io.refill.bits.addr := req_block_addr
io.refill.bits.data := refill_data_raw.asUInt
when (state === s_mem_finish) {
io.mem_finish.valid := grantack.valid
io.mem_finish.bits := grantack.bits
......@@ -344,31 +278,23 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
}
}
// --------------------------------------------
// sync with probe
when (state === s_wait_probe_exit) {
// we only wait for probe, when prober is manipulating our set
val should_wait_for_probe_exit = io.probe_active.valid && io.probe_active.bits === req_idx
when (!should_wait_for_probe_exit) {
// no data
when (early_response) {
// load miss respond right after finishing tilelink transactions
assert(should_refill_data)
state := s_send_resp
when (needs_writeback) {
// write back data
state := s_wb_req
} .otherwise {
// if we do not do early respond
// we must be a write
when (needs_writeback) {
// write back data
assert(should_refill_data)
state := s_wb_req
} .otherwise {
// no need to write back
when (should_refill_data) {
// fill data into dcache
state := s_data_write_req
} otherwise {
// just got permission, no need to fill data into dcache
state := s_meta_write_req
}
// no need to write back
when (should_refill_data) {
// fill data into dcache
state := s_data_write_req
} otherwise {
// permission update only
state := s_meta_write_req
}
}
}
......@@ -397,20 +323,42 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
}
when (prober_writeback_our_block) {
req_old_meta.coh := onShrink(io.probe_wb_req.bits.param)
req_reg.old_meta.coh := onShrink(io.probe_wb_req.bits.param)
}
// --------------------------------------------
// write back
when (state === s_wb_req) {
io.wb_req.valid := true.B
io.wb_req.bits.tag := req_old_meta.tag
io.wb_req.bits.idx := req_idx
io.wb_req.bits.param := shrink_param
io.wb_req.bits.way_en := req_way_en
io.wb_req.bits.source := io.id
io.wb_req.bits.voluntary := true.B
when (io.wb_req.fire()) {
state := s_wb_resp
}
}
when (state === s_wb_resp) {
when (io.wb_resp) {
state := s_data_write_req
}
}
// --------------------------------------------
// data write
when (state === s_data_write_req) {
io.refill.valid := true.B
io.refill.bits.addr := req_block_addr
io.refill.bits.way_en := req_way_en
io.refill.bits.wmask := VecInit((0 until blockRows) map (i => ~0.U(rowWords.W)))
io.refill.bits.rmask := DontCare
io.refill.bits.data := refill_data
when (io.refill.fire()) {
io.data_write.valid := true.B
io.data_write.bits.addr := req_block_addr
io.data_write.bits.way_en := req_way_en
io.data_write.bits.wmask := VecInit((0 until blockRows) map (i => ~0.U(rowWords.W)))
io.data_write.bits.rmask := DontCare
io.data_write.bits.data := refill_data
when (io.data_write.fire()) {
state := s_meta_write_req
}
}
......@@ -425,8 +373,9 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io.meta_write.bits.way_en := req_way_en
when (io.meta_write.fire()) {
when (early_response) {
state := s_client_finish
when (no_replay) {
// no need to replay, exit now
state := s_invalid
} .otherwise {
state := s_send_resp
}
......@@ -438,9 +387,6 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io.resp.valid := true.B
io.resp.bits.client_id := req.client_id
io.resp.bits.entry_id := io.id
io.resp.bits.way_en := req_way_en
io.resp.bits.has_data := should_refill_data
io.resp.bits.data := refill_data_raw.asUInt
when (io.resp.fire()) {
// additional assertion
......@@ -448,18 +394,7 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
assert(is_hit, "We still don't have permissions for this block")
assert(new_coh === coh_on_hit, "Incorrect coherence meta data")
// read miss
when (early_response && should_refill_data) {
when (needs_writeback) {
// write back data later
state := s_wb_req
} .otherwise {
// for read, we will write data later
state := s_data_write_req
}
} .otherwise {
state := s_client_finish
}
state := s_client_finish
}
}
......@@ -478,19 +413,18 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
val req = Flipped(DecoupledIO(new MissReq))
val resp = ValidIO(new MissResp)
val finish = Flipped(DecoupledIO(new MissFinish))
val refill = ValidIO(new Refill)
val mem_acquire = Decoupled(new TLBundleA(edge.bundle))
val mem_grant = Flipped(Decoupled(new TLBundleD(edge.bundle)))
val mem_finish = Decoupled(new TLBundleE(edge.bundle))
val meta_read = Decoupled(new L1MetaReadReq)
val meta_resp = Input(Vec(nWays, new L1Metadata))
val meta_write = Decoupled(new L1MetaWriteReq)
val refill = Decoupled(new L1DataWriteReq)
val wb_req = Decoupled(new WritebackReq(edge.bundle.sourceBits))
val wb_resp = Input(Bool())
val meta_write = Decoupled(new L1MetaWriteReq)
val data_write = Decoupled(new L1DataWriteReq)
val probe_wb_req = Flipped(ValidIO(new WritebackReq(edge.bundle.sourceBits)))
val probe_active = Flipped(ValidIO(UInt()))
......@@ -502,9 +436,9 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
})
val resp_arb = Module(new Arbiter(new MissResp, cfg.nMissEntries))
val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, cfg.nMissEntries))
val refill_arb = Module(new Arbiter(new Refill, cfg.nMissEntries))
val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, cfg.nMissEntries))
val refill_arb = Module(new Arbiter(new L1DataWriteReq, cfg.nMissEntries))
val data_write_arb = Module(new Arbiter(new L1DataWriteReq, cfg.nMissEntries))
val wb_req_arb = Module(new Arbiter(new WritebackReq(edge.bundle.sourceBits), cfg.nMissEntries))
// assign default values to output signals
......@@ -528,6 +462,8 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
// entry resp
resp_arb.io.in(i) <> entry.io.resp
refill_arb.io.in(i).valid := entry.io.refill.valid
refill_arb.io.in(i).bits := entry.io.refill.bits
// entry finish
entry.io.finish.valid := (i.U === io.finish.bits.entry_id) && io.finish.valid
......@@ -536,11 +472,8 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
io.finish.ready := entry.io.finish.ready
}
meta_read_arb.io.in(i) <> entry.io.meta_read
entry.io.meta_resp := io.meta_resp
meta_write_arb.io.in(i) <> entry.io.meta_write
refill_arb.io.in(i) <> entry.io.refill
data_write_arb.io.in(i) <> entry.io.data_write
wb_req_arb.io.in(i) <> entry.io.wb_req
entry.io.wb_resp := io.wb_resp
......@@ -568,9 +501,16 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
io.resp.bits := resp_arb.io.out.bits
resp_arb.io.out.ready := true.B
io.meta_read <> meta_read_arb.io.out
io.refill.valid := refill_arb.io.out.valid
io.refill.bits := refill_arb.io.out.bits
refill_arb.io.out.ready := true.B
// one refill at a time
val refill_vec = refill_arb.io.in.map(c => c.valid)
assert(PopCount(refill_vec) === 0.U || PopCount(refill_vec) === 1.U)
io.meta_write <> meta_write_arb.io.out
io.refill <> refill_arb.io.out
io.data_write <> data_write_arb.io.out
io.wb_req <> wb_req_arb.io.out
TLArbiter.lowestFromSeq(edge, io.mem_acquire, entries.map(_.io.mem_acquire))
......@@ -592,8 +532,8 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
XSDebug(finish.fire(), "finish client_id: %d entry_id: %d\n",
finish.bits.client_id, finish.bits.entry_id)
// print refill
XSDebug(io.refill.fire(), "refill addr %x\n", io.refill.bits.addr)
// print data_write
XSDebug(io.data_write.fire(), "refill addr %x\n", io.data_write.bits.addr)
// print meta_write
XSDebug(io.meta_write.fire(), "meta_write idx %x way_en: %x old_tag: %x new_coh: %d new_tag: %x\n",
......
......@@ -14,7 +14,6 @@ class StoreMissEntry extends DCacheModule
val lsu = Flipped(new DCacheLineIO)
val replay = new DCacheLineIO
val miss_req = DecoupledIO(new MissReq)
val miss_resp = Flipped(ValidIO(new MissResp))
val miss_finish = DecoupledIO(new MissFinish)
......@@ -22,7 +21,7 @@ class StoreMissEntry extends DCacheModule
val tag = Output(Valid(UInt()))
})
val s_invalid :: s_replay_req :: s_replay_resp :: s_resp :: s_miss_req :: s_miss_resp :: s_miss_finish :: Nil = Enum(7)
val s_invalid :: s_replay_req :: s_replay_resp :: s_resp :: s_miss_resp :: s_miss_finish :: Nil = Enum(6)
val state = RegInit(s_invalid)
val req = Reg(new DCacheLineReq )
......@@ -42,10 +41,8 @@ class StoreMissEntry extends DCacheModule
io.replay.req.bits := DontCare
io.replay.resp.ready := false.B
io.miss_req.valid := false.B
io.miss_req.bits := DontCare
io.miss_finish.valid := false.B
io.miss_finish.bits := DontCare
io.miss_finish.valid := false.B
io.miss_finish.bits := DontCare
io.idx.valid := state =/= s_invalid
io.tag.valid := state =/= s_invalid
......@@ -73,6 +70,9 @@ class StoreMissEntry extends DCacheModule
when (state === s_replay_req) {
io.replay.req.valid := true.B
io.replay.req.bits := req
// use our own storeMissEntryId
// miss resp are routed by this id
io.replay.req.bits.meta.id := io.id
when (io.replay.req.fire()) {
state := s_replay_resp
}
......@@ -81,34 +81,30 @@ class StoreMissEntry extends DCacheModule
when (state === s_replay_resp) {
io.replay.resp.ready := true.B
when (io.replay.resp.fire()) {
// req missed
when (io.replay.resp.bits.miss) {
// replayed reqs should not miss
assert(!req.meta.replay)
when (!req.meta.replay) {
state := s_miss_req
// the req missed and did not enter mshr
// so replay it until it hits or enters mshr
when (io.replay.resp.bits.replay) {
state := s_replay_req
} .otherwise {
// the req missed and enters mshr
// wait for miss response
state := s_miss_resp
}
} .otherwise {
// req hits, everything OK
resp := io.replay.resp.bits
when (!req.meta.replay) {
state := s_resp
} .otherwise {
// if it's a replayed request
// we need to tell mshr, we are done
state := s_miss_finish
}
}
assert(!io.replay.resp.bits.nack)
}
}
// --------------------------------------------
when (state === s_miss_req) {
io.miss_req.valid := true.B
io.miss_req.bits.cmd := req.cmd
io.miss_req.bits.addr := req_block_addr
io.miss_req.bits.client_id := io.id
when (io.miss_req.fire()) {
state := s_miss_resp
}
}
......@@ -134,6 +130,8 @@ class StoreMissEntry extends DCacheModule
when (state === s_resp) {
io.lsu.resp.valid := true.B
io.lsu.resp.bits := resp
// response to sbuffer should carry the original request id
io.lsu.resp.bits.meta.id := req.meta.id
when (io.lsu.resp.fire()) {
state := s_invalid
......@@ -157,12 +155,10 @@ class StoreMissQueue extends DCacheModule
val lsu = Flipped(new DCacheLineIO)
val replay = new DCacheLineIO
val miss_req = DecoupledIO(new MissReq)
val miss_resp = Flipped(ValidIO(new MissResp))
val miss_finish = DecoupledIO(new MissFinish)
})
val miss_req_arb = Module(new Arbiter(new MissReq, cfg.nStoreMissEntries))
val miss_finish_arb = Module(new Arbiter(new MissFinish, cfg.nStoreMissEntries))
val replay_arb = Module(new Arbiter(new DCacheLineReq, cfg.nStoreMissEntries))
val resp_arb = Module(new Arbiter(new DCacheLineResp, cfg.nStoreMissEntries))
......@@ -222,7 +218,6 @@ class StoreMissQueue extends DCacheModule
io.replay.resp.ready := entry_replay.resp.ready
}
miss_req_arb.io.in(i) <> entry.io.miss_req
entry.io.miss_resp.valid := (i.U === io.miss_resp.bits.client_id) && io.miss_resp.valid
entry.io.miss_resp.bits := io.miss_resp.bits
......@@ -236,7 +231,6 @@ class StoreMissQueue extends DCacheModule
req.ready := pri_rdy && !idx_match
io.lsu.resp <> resp_arb.io.out
io.replay.req <> replay_arb.io.out
io.miss_req <> miss_req_arb.io.out
io.miss_finish <> miss_finish_arb.io.out
// debug output
......@@ -253,14 +247,10 @@ class StoreMissQueue extends DCacheModule
val resp = io.lsu.resp
when (resp.fire()) {
XSDebug(s"resp: data: %x id: %d replay: %b miss: %b nack: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.nack)
XSDebug(s"resp: data: %x id: %d replay: %b miss: %b replay: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.replay)
}
val miss_req = io.miss_req
XSDebug(miss_req.fire(), "miss_req cmd: %x addr: %x client_id: %d\n",
miss_req.bits.cmd, miss_req.bits.addr, miss_req.bits.client_id)
val miss_resp = io.miss_resp
XSDebug(miss_resp.fire(), "miss_resp client_id: %d entry_id: %d\n",
miss_resp.bits.client_id, miss_resp.bits.entry_id)
......
......@@ -16,6 +16,9 @@ class StorePipe extends DCacheModule
val meta_resp = Input(Vec(nWays, new L1Metadata))
val inflight_req_idxes = Output(Vec(3, Valid(UInt())))
val inflight_req_block_addrs = Output(Vec(3, Valid(UInt())))
// send miss request to miss queue
val miss_req = DecoupledIO(new MissReq)
})
......@@ -58,6 +61,17 @@ class StorePipe extends DCacheModule
def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
val s1_tag_eq_way = wayMap((w: Int) => meta_resp(w).tag === (get_tag(s1_addr))).asUInt
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta_resp(w).coh.isValid()).asUInt
val s1_tag_match = s1_tag_match_way.orR
val s1_hit_meta = Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w)))
val s1_hit_state = s1_hit_meta.coh
// replacement policy
val replacer = cacheParams.replacement
val s1_repl_way_en = UIntToOH(replacer.way)
val s1_repl_meta = Mux1H(s1_repl_way_en, wayMap((w: Int) => meta_resp(w)))
when (io.miss_req.fire()) {
replacer.miss
}
// stage 2
......@@ -67,12 +81,19 @@ class StorePipe extends DCacheModule
dump_pipeline_reqs("StorePipe s2", s2_valid, s2_req)
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = s2_tag_match_way.orR
val s2_hit_way = OHToUInt(s2_tag_match_way, nWays)
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegNext(meta_resp(w).coh)))
val s2_tag_match = RegNext(s1_tag_match)
val s2_hit_meta = RegNext(s1_hit_meta)
val s2_hit_state = RegNext(s1_hit_state)
val s2_has_permission = s2_hit_state.onAccess(s2_req.cmd)._1
val s2_new_hit_state = s2_hit_state.onAccess(s2_req.cmd)._3
val s2_repl_meta = RegNext(s1_repl_meta)
val s2_repl_way_en = RegNext(s1_repl_way_en)
val s2_old_meta = Mux(s2_tag_match, s2_hit_meta, s2_repl_meta)
val s2_way_en = Mux(s2_tag_match, s2_tag_match_way, s2_repl_way_en)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
......@@ -85,16 +106,24 @@ class StorePipe extends DCacheModule
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state
val s2_nack = Wire(Bool())
// when req got nacked, upper levels should replay this request
// the same set is busy
val s2_nack_hit = RegNext(s1_nack)
val s2_nack_set_busy = s2_valid && false.B
// can no allocate mshr for store miss
val s2_nack_no_mshr = io.miss_req.valid && !io.miss_req.ready
// Bank conflict on data arrays
// For now, we use DuplicatedDataArray, so no bank conflicts
val s2_nack_data = false.B
s2_nack := s2_nack_hit || s2_nack_set_busy
s2_nack := s2_nack_hit || s2_nack_no_mshr || s2_nack_data
val s2_info = p"tag match: $s2_tag_match hasPerm: $s2_has_permission" +
p" hit state: $s2_hit_state new state: $s2_new_hit_state s2_nack: $s2_nack\n"
// deal with data
val data_resp = io.data_resp
val s2_data = data_resp(s2_hit_way)
val s2_data = Mux1H(s2_tag_match_way, data_resp)
val s2_data_decoded = (0 until blockRows) map { r =>
(0 until rowWords) map { w =>
val data = s2_data(r)(encWordBits * (w + 1) - 1, encWordBits * w)
......@@ -139,22 +168,33 @@ class StorePipe extends DCacheModule
dump_pipeline_valids("StorePipe s2", "s2_hit", s2_valid && s2_hit)
dump_pipeline_valids("StorePipe s2", "s2_nack", s2_valid && s2_nack)
dump_pipeline_valids("StorePipe s2", "s2_nack_hit", s2_valid && s2_nack_hit)
dump_pipeline_valids("StorePipe s2", "s2_nack_set_busy", s2_valid && s2_nack_set_busy)
dump_pipeline_valids("StorePipe s2", "s2_nack_no_mshr", s2_valid && s2_nack_no_mshr)
dump_pipeline_valids("StorePipe s2", "s2_nack_data", s2_valid && s2_nack_data)
// send load miss to miss queue
io.miss_req.valid := s2_valid && !s2_nack_hit && !s2_nack_data && !s2_hit
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := get_block_addr(s2_req.addr)
io.miss_req.bits.tag_match := s2_tag_match
io.miss_req.bits.way_en := s2_way_en
io.miss_req.bits.old_meta := s2_old_meta
io.miss_req.bits.client_id := s2_req.meta.id
val resp = Wire(Valid(new DCacheLineResp))
resp.valid := s2_valid
resp.bits.data := DontCare
resp.bits.meta := s2_req.meta
resp.bits.miss := !s2_hit
resp.bits.nack := s2_nack
resp.bits.miss := !s2_hit || s2_nack
resp.bits.replay := resp.bits.miss && (!io.miss_req.fire() || s2_nack)
io.lsu.resp.valid := resp.valid
io.lsu.resp.bits := resp.bits
assert(!(resp.valid && !io.lsu.resp.ready))
when (resp.valid) {
XSDebug(s"StorePipe resp: data: %x id: %d replay: %b miss: %b nack: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.nack)
XSDebug(s"StorePipe resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.replay)
}
io.inflight_req_idxes(0).valid := io.lsu.req.valid
......
......@@ -110,7 +110,7 @@ class MMIOEntry(edge: TLEdgeOut) extends DCacheModule
// meta data should go with the response
io.resp.bits.meta := req.meta
io.resp.bits.miss := false.B
io.resp.bits.nack := false.B
io.resp.bits.replay := false.B
when (io.resp.fire()) {
state := s_invalid
......
......@@ -47,7 +47,7 @@ class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParam
}
val refill = new Bundle() {
val wen = Input(Vec(size, Bool()))
val dcache = Input(new DCacheLineResp)
val data = Input(UInt((cfg.blockBytes * 8).W))
}
val needForward = Input(Vec(nchannel, Vec(2, UInt(size.W))))
val forward = Vec(nchannel, Flipped(new LoadForwardQueryIO))
......@@ -106,9 +106,7 @@ class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParam
}
// split dcache result into words
val words = VecInit((0 until blockWords) map { i =>
io.refill.dcache.data(DataBits * (i + 1) - 1, DataBits * i)
})
val words = VecInit((0 until blockWords) map { i => io.refill.data(DataBits * (i + 1) - 1, DataBits * i)})
(0 until size).map(i => {
......@@ -248,7 +246,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val commits = Flipped(new RoqCommitIO)
val rollback = Output(Valid(new Redirect))
val dcache = new DCacheLineIO
val dcache = Flipped(ValidIO(new Refill))
val uncache = new DCacheWordIO
val roqDeqPtr = Input(new RoqPtr)
val exceptionAddr = new ExceptionAddrIO
......
......@@ -63,12 +63,12 @@ class LoadQueue extends XSModule
val enq = new LqEnqIO
val brqRedirect = Input(Valid(new Redirect))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // FIXME: Valid() only
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val commits = Flipped(new RoqCommitIO)
val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
val dcache = new DCacheLineIO
val dcache = Flipped(ValidIO(new Refill))
val uncache = new DCacheWordIO
val roqDeqPtr = Input(new RoqPtr)
val exceptionAddr = new ExceptionAddrIO
......@@ -83,7 +83,7 @@ class LoadQueue extends XSModule
val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
// val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
val debug_mmio = Reg(Vec(LoadQueueSize, Bool())) // mmio: inst is an mmio inst
......@@ -124,7 +124,7 @@ class LoadQueue extends XSModule
writebacked(index) := false.B
commited(index) := false.B
miss(index) := false.B
listening(index) := false.B
// listening(index) := false.B
pending(index) := false.B
}
io.enq.resp(i) := lqIdx
......@@ -194,7 +194,7 @@ class LoadQueue extends XSModule
val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
miss(loadWbIndex) := dcacheMissed && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
listening(loadWbIndex) := dcacheMissed
// listening(loadWbIndex) := dcacheMissed
pending(loadWbIndex) := io.loadIn(i).bits.mmio && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
}
}
......@@ -207,83 +207,78 @@ class LoadQueue extends XSModule
* (3) dcache response: datavalid
* (4) writeback to ROB: writeback
*/
val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)
val missRefillSelVec = VecInit(
(0 until LoadQueueSize).map{ i =>
val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
allocated(i) && miss(i) && !inflight
})
val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
io.dcache.req.valid := missRefillSelVec.asUInt.orR
io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
io.dcache.req.bits.addr := missRefillBlockAddr
io.dcache.req.bits.data := DontCare
io.dcache.req.bits.mask := DontCare
io.dcache.req.bits.meta.id := DontCare
io.dcache.req.bits.meta.vaddr := DontCare // dataModule.io.rdata(missRefillSel).vaddr
io.dcache.req.bits.meta.paddr := missRefillBlockAddr
io.dcache.req.bits.meta.uop := uop(missRefillSel)
io.dcache.req.bits.meta.mmio := false.B // mmio(missRefillSel)
io.dcache.req.bits.meta.tlb_miss := false.B
io.dcache.req.bits.meta.mask := DontCare
io.dcache.req.bits.meta.replay := false.B
io.dcache.resp.ready := true.B
assert(!(debug_mmio(missRefillSel) && io.dcache.req.valid))
when(io.dcache.req.fire()) {
miss(missRefillSel) := false.B
listening(missRefillSel) := true.B
// val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
// val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
// val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)
// val missRefillSelVec = VecInit(
// (0 until LoadQueueSize).map{ i =>
// val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
// allocated(i) && miss(i) && !inflight
// })
// val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
// val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
// io.dcache.req.valid := missRefillSelVec.asUInt.orR
// io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
// io.dcache.req.bits.addr := missRefillBlockAddr
// io.dcache.req.bits.data := DontCare
// io.dcache.req.bits.mask := DontCare
// io.dcache.req.bits.meta.id := DontCare
// io.dcache.req.bits.meta.vaddr := DontCare // dataModule.io.rdata(missRefillSel).vaddr
// io.dcache.req.bits.meta.paddr := missRefillBlockAddr
// io.dcache.req.bits.meta.uop := uop(missRefillSel)
// io.dcache.req.bits.meta.mmio := false.B // dataModule.io.rdata(missRefillSel).mmio
// io.dcache.req.bits.meta.tlb_miss := false.B
// io.dcache.req.bits.meta.mask := DontCare
// io.dcache.req.bits.meta.replay := false.B
// assert(!(dataModule.io.rdata(missRefillSel).mmio && io.dcache.req.valid))
// when(io.dcache.req.fire()) {
// miss(missRefillSel) := false.B
// listening(missRefillSel) := true.B
// mark this block as inflight
inflightReqs(reqBlockIndex).valid := true.B
inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
assert(!inflightReqs(reqBlockIndex).valid)
}
when(io.dcache.resp.fire()) {
val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
assert(inflight)
for (i <- 0 until cfg.nLoadMissEntries) {
when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
inflightReqs(i).valid := false.B
}
}
}
when(io.dcache.req.fire()){
XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
)
}
when(io.dcache.resp.fire()){
XSDebug("miss resp: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x data %x\n",
io.dcache.resp.bits.meta.uop.cf.pc, io.dcache.resp.bits.meta.uop.roqIdx.asUInt, io.dcache.resp.bits.meta.uop.lqIdx.asUInt,
io.dcache.resp.bits.meta.paddr, io.dcache.resp.bits.data
)
// inflightReqs(reqBlockIndex).valid := true.B
// inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
// assert(!inflightReqs(reqBlockIndex).valid)
// }
// when(io.dcache.resp.fire()) {
// val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
// assert(inflight)
// for (i <- 0 until cfg.nLoadMissEntries) {
// when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
// inflightReqs(i).valid := false.B
// }
// }
// }
// when(io.dcache.req.fire()){
// XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
// io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
// io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
// )
// }
when(io.dcache.valid) {
XSDebug("miss resp: paddr:0x%x data %x\n", io.dcache.bits.addr, io.dcache.bits.data)
}
// Refill 64 bit in a cycle
// Refill data comes back from io.dcache.resp
dataModule.io.refill.dcache := io.dcache.resp.bits
dataModule.io.refill.data := io.dcache.bits.data
(0 until LoadQueueSize).map(i => {
val blockMatch = get_block_addr(dataModule.io.rdata(i).paddr) === io.dcache.resp.bits.meta.paddr
val blockMatch = get_block_addr(dataModule.io.rdata(i).paddr) === get_block_addr(io.dcache.bits.addr)
dataModule.io.refill.wen(i) := false.B
when(allocated(i) && listening(i) && blockMatch && io.dcache.resp.fire()) {
when(allocated(i) && miss(i) && blockMatch && io.dcache.valid) {
dataModule.io.refill.wen(i) := true.B
datavalid(i) := true.B
listening(i) := false.B
miss(i) := false.B
}
})
......@@ -417,7 +412,7 @@ class LoadQueue extends XSModule
val lqViolationVec = RegNext(VecInit((0 until LoadQueueSize).map(j => {
val addrMatch = allocated(j) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || miss(j))
// TODO: update refilled data
val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
Cat(violationVec).orR() && entryNeedCheck
......@@ -562,7 +557,7 @@ class LoadQueue extends XSModule
dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
dataModule.io.uncache.wen := true.B
XSDebug("uncache resp: data %x\n", io.dcache.resp.bits.data)
XSDebug("uncache resp: data %x\n", io.dcache.bits.data)
}
// Read vaddr for mem exception
......@@ -628,7 +623,7 @@ class LoadQueue extends XSModule
PrintFlag(allocated(i) && writebacked(i), "w")
PrintFlag(allocated(i) && commited(i), "c")
PrintFlag(allocated(i) && miss(i), "m")
PrintFlag(allocated(i) && listening(i), "l")
// PrintFlag(allocated(i) && listening(i), "l")
PrintFlag(allocated(i) && pending(i), "p")
XSDebug(false, true.B, " ")
if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
......
......@@ -21,7 +21,7 @@ class LoadUnit_S0 extends XSModule {
val in = Flipped(Decoupled(new ExuInput))
val out = Decoupled(new LsPipelineBundle)
val dtlbReq = DecoupledIO(new TlbReq)
val dcacheReq = DecoupledIO(new DCacheLoadReq)
val dcacheReq = DecoupledIO(new DCacheWordReq)
})
val s0_uop = io.in.bits.uop
......@@ -82,27 +82,26 @@ class LoadUnit_S1 extends XSModule {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
val dcachePAddr = Output(UInt(PAddrBits.W))
val dcacheKill = Output(Bool())
val sbuffer = new LoadForwardQueryIO
val lsq = new LoadForwardQueryIO
})
val s1_uop = io.in.bits.uop
val s1_paddr = io.dtlbResp.bits.paddr
val s1_exception = io.out.bits.uop.cf.exceptionVec.asUInt.orR
val s1_tlb_miss = io.dtlbResp.bits.miss
val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr) && !io.out.bits.uop.cf.exceptionVec.asUInt.orR
val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr)
val s1_mask = io.in.bits.mask
io.out.bits := io.in.bits // forwardXX field will be updated in s1
io.dtlbResp.ready := true.B
// feedback tlb result to RS
io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s1_tlb_miss
io.tlbFeedback.bits.roqIdx := s1_uop.roqIdx
// TOOD: PMA check
io.dcachePAddr := s1_paddr
io.dcacheKill := s1_tlb_miss || s1_exception || s1_mmio
// load forward query datapath
io.sbuffer.valid := io.in.valid
......@@ -119,9 +118,9 @@ class LoadUnit_S1 extends XSModule {
io.lsq.mask := s1_mask
io.lsq.pc := s1_uop.cf.pc // FIXME: remove it
io.out.valid := io.in.valid && !s1_tlb_miss
io.out.valid := io.in.valid// && !s1_tlb_miss
io.out.bits.paddr := s1_paddr
io.out.bits.mmio := s1_mmio
io.out.bits.mmio := s1_mmio && !s1_exception
io.out.bits.tlbMiss := s1_tlb_miss
io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
......@@ -136,6 +135,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
val tlbFeedback = ValidIO(new TlbFeedback)
val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
val lsq = new LoadForwardQueryIO
val sbuffer = new LoadForwardQueryIO
......@@ -144,12 +144,20 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
val s2_uop = io.in.bits.uop
val s2_mask = io.in.bits.mask
val s2_paddr = io.in.bits.paddr
val s2_tlb_miss = io.in.bits.tlbMiss
val s2_mmio = io.in.bits.mmio
val s2_exception = io.in.bits.uop.cf.exceptionVec.asUInt.orR
val s2_cache_miss = io.dcacheResp.bits.miss
val s2_cache_nack = io.dcacheResp.bits.nack
val s2_cache_replay = io.dcacheResp.bits.replay
io.dcacheResp.ready := true.B
assert(!(io.in.valid && !io.dcacheResp.valid), "DCache response got lost")
val dcacheShouldResp = !(s2_tlb_miss || s2_exception || s2_mmio)
assert(!(io.in.valid && dcacheShouldResp && !io.dcacheResp.valid), "DCache response got lost")
// feedback tlb result to RS
io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s2_tlb_miss && (!s2_cache_replay || s2_mmio)
io.tlbFeedback.bits.roqIdx := s2_uop.roqIdx
val forwardMask = io.out.bits.forwardMask
val forwardData = io.out.bits.forwardData
......@@ -178,13 +186,13 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
// TODO: ECC check
io.out.valid := io.in.valid
io.out.valid := io.in.valid && !s2_tlb_miss && (!s2_cache_replay || s2_mmio)
// Inst will be canceled in store queue / lsq,
// so we do not need to care about flush in load / store unit's out.valid
io.out.bits := io.in.bits
io.out.bits.data := rdataPartialLoad
io.out.bits.miss := (s2_cache_miss || s2_cache_nack) && !fullForward
io.out.bits.mmio := io.in.bits.mmio
io.out.bits.miss := s2_cache_miss && !fullForward
io.out.bits.mmio := s2_mmio
io.in.ready := io.out.ready || !io.in.valid
......@@ -234,14 +242,14 @@ class LoadUnit extends XSModule with HasLoadHelper {
PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, load_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect))
load_s1.io.dtlbResp <> io.dtlb.resp
load_s1.io.tlbFeedback <> io.tlbFeedback
io.dcache.s1_paddr <> load_s1.io.dcachePAddr
io.dcache.s1_kill := DontCare // FIXME
io.dcache.s1_kill <> load_s1.io.dcacheKill
load_s1.io.sbuffer <> io.sbuffer
load_s1.io.lsq <> io.lsq.forward
PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, load_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect))
load_s2.io.tlbFeedback <> io.tlbFeedback
load_s2.io.dcacheResp <> io.dcache.resp
load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
......@@ -302,4 +310,4 @@ class LoadUnit extends XSModule with HasLoadHelper {
when(io.fpout.fire()){
XSDebug("fpout %x\n", io.fpout.bits.uop.cf.pc)
}
}
\ No newline at end of file
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册