提交 096ea47e 编写于 作者: Z zhanglinjuan

fix l2 cache bug

上级 5704b623
......@@ -18,6 +18,7 @@ object SimpleBusCmd {
def writeBurst = "b0011".U // write | refill
def writeLast = "b0111".U // write | refill
def probe = "b1000".U // read | do nothing
def prefetch = "b0100".U // read | refill
// resp
def readLast = "b0110".U
......@@ -55,6 +56,7 @@ class SimpleBusReqBundle(val userBits: Int = 0) extends SimpleBusBundle {
def isWriteSingle() = cmd === SimpleBusCmd.write
def isWriteLast() = cmd === SimpleBusCmd.writeLast
def isProbe() = cmd === SimpleBusCmd.probe
def isPrefetch() = cmd === SimpleBusCmd.prefetch
}
class SimpleBusRespBundle(val userBits: Int = 0) extends SimpleBusBundle {
......@@ -67,6 +69,7 @@ class SimpleBusRespBundle(val userBits: Int = 0) extends SimpleBusBundle {
def isReadLast() = cmd === SimpleBusCmd.readLast
def isProbeHit() = cmd === SimpleBusCmd.probeHit
def isProbeMiss() = cmd === SimpleBusCmd.probeMiss
def isPrefetch() = cmd === SimpleBusCmd.prefetch
}
// Uncache
......
......@@ -168,6 +168,7 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
val metaWriteBus = CacheMetaArrayWriteBus()
val mem = new SimpleBusUC
val mmio = new SimpleBusUC
val dataReadRespToL1 = Output(Bool())
})
val req = io.in.bits.req
......@@ -181,10 +182,15 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
val dataRead = Mux1H(io.in.bits.waymask, io.in.bits.datas).data
val wordMask = Mux(!ro.B && req.isWrite(), MaskExpand(req.wmask), 0.U(DataBits.W))
val writeL2BeatCnt = Counter(LineBeats)
when(io.out.fire() && (req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast())) {
writeL2BeatCnt.inc()
}
val hitWrite = hit && req.isWrite()
val dataHitWriteBus = Wire(CacheDataArrayWriteBus()).apply(
data = Wire(new DataBundle).apply(MaskData(dataRead, req.wdata, wordMask)),
valid = hitWrite, setIdx = Cat(addr.index, addr.wordIndex), waymask = io.in.bits.waymask)
valid = hitWrite, setIdx = Cat(addr.index, Mux(req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast(), writeL2BeatCnt.value, addr.wordIndex)), waymask = io.in.bits.waymask)
val metaHitWriteBus = Wire(CacheMetaArrayWriteBus()).apply(
valid = hitWrite && !meta.dirty, setIdx = addr.index, waymask = io.in.bits.waymask,
......@@ -199,11 +205,35 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
val readBeatCnt = Counter(LineBeats)
val writeBeatCnt = Counter(LineBeats)
val readL2BeatCnt = Counter(LineBeats)
val s3_idle :: s3_dataReadWait :: s3_dataReadFinish :: Nil = Enum(3)
val state3 = RegInit(s3_idle)
// L2Cache needs to respond the whole cache line when receiving a readBurst req
val dataRespToL1ReadBus = Wire(CacheDataArrayReadBus()).apply(
valid = (state === s_idle) && hit, setIdx = )
valid = (state === s_idle) && ((state3 === s3_dataReadWait) || state3 === s3_idle && hit && req.cmd === SimpleBusCmd.readBurst), setIdx = Cat(addr.index, Mux(state3 === s3_idle, Mux(addr.wordIndex === (LineBeats - 1).U, 0.U, (addr.wordIndex + 1.U)), readL2BeatCnt.value)))
switch (state3) {
is(s3_idle) {
when(state === s_idle && hit && req.cmd === SimpleBusCmd.readBurst) {
state3 := s3_dataReadWait
readL2BeatCnt.value := Mux(addr.wordIndex === (LineBeats - 1).U, 1.U, Mux(addr.wordIndex === (LineBeats - 2).U, 0.U, (addr.wordIndex + 2.U)))
}
}
is(s3_dataReadWait) {
when(readL2BeatCnt.value === Mux(addr.wordIndex === 0.U, (LineBeats - 1).U, (addr.wordIndex - 1.U))) {
state3 := s3_dataReadFinish
}.otherwise {
readL2BeatCnt.inc()
}
}
is(s3_dataReadFinish) {
when(io.out.fire()) {
state3 := s3_idle
}
}
}
val s2_idle :: s2_dataReadWait :: s2_memWriteReq :: Nil = Enum(3)
val state2 = RegInit(s2_idle)
......@@ -211,8 +241,21 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
// no dataWrite now, and it is always ready if no probe requests
val dataWriteBackReadBus = Wire(CacheDataArrayReadBus()).apply(
valid = (state === s_memWriteReq) && (state2 === s2_idle), setIdx = Cat(addr.index, writeBeatCnt.value))
io.dataReadBus <> dataWriteBackReadBus
val dataWay = RegEnable(dataWriteBackReadBus.resp.data, state2 === s2_dataReadWait)
/*
when(req.cmd === SimpleBusCmd.readBurst && hit) {
io.dataReadBus <> dataRespToL1ReadBus
}.otherwise {
io.dataReadBus <> dataWriteBackReadBus
}
*/
val dataReadArb = Module(new Arbiter(CacheDataArrayReadBus().req.bits, 2))
dataReadArb.io.in(0) <> dataRespToL1ReadBus.req
dataReadArb.io.in(1) <> dataWriteBackReadBus.req
io.dataReadBus.req <> dataReadArb.io.out
dataRespToL1ReadBus.resp := io.dataReadBus.resp
dataWriteBackReadBus.resp := io.dataReadBus.resp
val dataWay = RegEnable(dataWriteBackReadBus.resp.data, state2 === s2_dataReadWait)
switch (state2) {
is (s2_idle) { when (state === s_memWriteReq) { state2 := s2_dataReadWait } }
......@@ -268,6 +311,7 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
when (io.mem.resp.fire()) {
afterFirstRead := true.B
readBeatCnt.inc()
when (req.cmd === SimpleBusCmd.writeBurst) { writeL2BeatCnt.value := 0.U }
when (io.mem.resp.bits.isReadLast()) { state := s_wait_resp }
}
}
......@@ -302,15 +346,30 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
metaWriteArb.io.in(1) <> metaRefillWriteBus.req
io.metaWriteBus.req <> metaWriteArb.io.out
val respToL1 = state3 === s3_idle && hit && req.cmd === SimpleBusCmd.readBurst || state3 === s3_dataReadWait || state3 === s3_dataReadFinish
io.dataReadRespToL1 := respToL1
if(cacheLevel == 2) {
when((state === s_memReadResp) && io.mem.resp.fire() && req.cmd === SimpleBusCmd.readBurst) {
io.out.bits.rdata := dataRefill
io.out.bits.cmd := Mux(io.mem.resp.bits.isReadLast(), SimpleBusCmd.readLast, SimpleBusCmd.readBurst)
}.elsewhen (req.bits.isWriteLast) {
}.elsewhen (req.isWriteLast() || req.cmd === SimpleBusCmd.writeBurst) {
io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand)
io.out.bits.cmd := DontCare
}.elsewhen (state === s_idle && (state3 === s3_dataReadWait || state3 === s3_dataReadFinish)) {
io.out.bits.rdata := Mux1H(io.in.bits.waymask, dataRespToL1ReadBus.resp.data).data
io.out.bits.cmd := Mux(state3 === s3_dataReadFinish, SimpleBusCmd.readLast, SimpleBusCmd.readBurst)
}.otherwise {
io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand)
io.out.bits.cmd := req.cmd
}
when(io.in.valid && req.isPrefetch()) {
io.out.valid := io.in.valid && Mux(hit, true.B, Mux(req.isWrite() || mmio, state === s_wait_resp, afterFirstRead && !alreadyOutFire))
}.otherwise {
io.out.valid := io.in.valid && (Mux(req.isWrite() && (hit || !hit && state === s_wait_resp), true.B, (state === s_memReadResp && io.mem.resp.fire() && req.cmd === SimpleBusCmd.readBurst)) || state3 === s3_dataReadFinish)
}
io.out.valid := io.in.valid && Mux(hit && req.cmd === SimpleBusCmd.writeLast || !hit && req.isWrite() && state === s_wait_resp, true.B, (state === s_memReadResp && io.mem.resp.fire()))
} else {
io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand)
io.out.bits.cmd := DontCare
......@@ -323,7 +382,7 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
// request really ends.
io.isFinish := Mux(hit || req.isWrite(), io.out.fire(), (state === s_wait_resp) && (io.out.fire() || alreadyOutFire))
io.in.ready := io.out.ready && (state === s_idle) && !miss
io.in.ready := io.out.ready && (state === s_idle && !respToL1) && !miss
assert(!(metaHitWriteBus.req.valid && metaRefillWriteBus.req.valid))
assert(!(dataHitWriteBus.req.valid && dataRefillWriteBus.req.valid))
......@@ -331,6 +390,15 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
printf("%d: [" + cacheName + " stage3]: in.ready = %d, in.valid = %d, state = %d, addr = %x\n",
GTimer(), io.in.ready, io.in.valid, state, req.addr)
}
Debug(debug) {
printf("%d: [" + cacheName + " stage3]: in.ready = %d, in.valid = %d, out.valid = %d, out.ready = %d, hit = %d, req.cmd = %d, state = %d, addr = %x, mem.req.fire() = %d, mem.req.bits.cmd = %d, mem.resp.fire() = %d, mem.resp.bits.cmd = %d\n",
GTimer(), io.in.ready, io.in.valid, io.out.valid, io.out.ready, hit, req.cmd, state, req.addr, io.mem.req.fire(), io.mem.req.bits.cmd, io.mem.resp.fire(), io.mem.resp.bits.cmd)
}
Debug(debug) {
printf("%d: [" + cacheName + " stage3]: in.ready = %d, in.valid = %d, out.valid = %d, out.ready = %d, hit = %d, req.cmd = %d, state = %d, state3 = %d, addr = %x\n",
GTimer(), io.in.ready, io.in.valid, io.out.valid, io.out.ready, hit, req.cmd, state, state3, req.addr)
}
}
// probe
......@@ -414,7 +482,12 @@ class L2Cache(implicit val cacheConfig: CacheConfig) extends CacheModule {
PipelineConnect(s1.io.out, s2.io.in, s2.io.out.fire(), io.flush(0))
PipelineConnect(s2.io.out, s3.io.in, s3.io.isFinish, io.flush(1))
io.in.resp <> s3.io.out
s3.io.flush := io.flush(1)
when(s3.io.out.bits.isPrefetch()) {
io.in.resp.valid := false.B
}.otherwise {
io.in.resp.valid := s3.io.out.valid || s3.io.dataReadRespToL1
}
s3.io.flush := io.flush(1)
io.out <> s3.io.mem
io.mmio <> s3.io.mmio
......
......@@ -42,6 +42,7 @@ class NOOP(implicit val p: NOOPConfig) extends NOOPModule {
val imem = new SimpleBusC
val dmem = new SimpleBusC
val mmio = new SimpleBusUC
// val prefetchReq = Decoupled(new SimpleBusReqBundle)
})
val ifu = Module(new IFU)
......@@ -84,5 +85,7 @@ class NOOP(implicit val p: NOOPConfig) extends NOOPModule {
io.imem <> Cache(ifu.io.imem, mmioXbar.io.in(0), Fill(2, ifu.io.flushVec(0) | ifu.io.bpFlush))(
CacheConfig(ro = true, name = "icache", userBits = AddrBits*2))
io.dmem <> Cache(exu.io.dmem, mmioXbar.io.in(1), "b00".U, enable = HasDcache)(CacheConfig(ro = false, name = "dcache"))
// io.prefetchReq.bits := exu.io.dmem.req.bits
// io.prefetchReq.valid := exu.io.dmem.req.valid
io.mmio <> mmioXbar.io.out
}
package system
import noop.{NOOP, NOOPConfig, HasNOOPParameter, Cache, L2Cache, CacheConfig}
import bus.axi4.{AXI4, AXI4Lite}
import bus.simplebus._
import chisel3._
import chisel3.util._
import chisel3.util.experimental.BoringUtils
trait HasPrefetcherParameter extends HasNOOPParameter {
val supportPrefetch = HasDcache
}
class Prefetcher extends Module with HasPrefetcherParameter {
val io = IO(new Bundle {
val in = Flipped(Decoupled(new SimpleBusReqBundle))
val out = Decoupled(new SimpleBusReqBundle)
})
/*
io.in.ready := !io.in.valid || io.out.fire()
val lastReq = RegEnable(io.in.bits, io.in.fire())
val lastAddr = lastReq.addr
io.out.bits := lastReq
io.out.bits.cmd := SimpleBusCmd.prefetch
io.out.bits.addr := lastAddr + Cat(Cat(0.U((TagBits + IndexBits - 1).W), 1.U(1.W)), 0.U(OffsetBits.W))
io.out.valid := io.in.valid
*/
io.out.bits := io.in.bits
io.out.bits.cmd := SimpleBusCmd.prefetch
// io.out.bits.addr := io.in.bits.addr + Cat(Cat(0.U((TagBits + IndexBits - 1).W), 1.U(1.W)), 0.U(OffsetBits.W))
io.out.bits.addr := io.in.bits.addr + 64.U(32.W)
io.out.valid := io.in.valid
io.in.ready := !io.in.valid || io.out.fire()
}
......@@ -5,6 +5,7 @@ import bus.axi4.{AXI4, AXI4Lite}
import bus.simplebus._
import chisel3._
import chisel3.util._
import chisel3.util.experimental.BoringUtils
class NOOPSoC(implicit val p: NOOPConfig) extends Module {
......@@ -19,24 +20,45 @@ class NOOPSoC(implicit val p: NOOPConfig) extends Module {
val cohMg = Module(new CoherenceInterconnect)
cohMg.io.in(0) <> noop.io.imem
cohMg.io.in(1) <> noop.io.dmem
// io.mem <> cohMg.io.out.toAXI4()
/*
// add L2 Cache and Dcache Prefetcher
val prefetcher = Module(new Prefetcher)
prefetcher.io.in <> noop.io.prefetchReq
val l2cacheIn = Wire(new SimpleBusUC)
val l2cacheInReqArb = Module(new Arbiter(noop.io.prefetchReq, 2))
l2cacheInReqArb.io.in(0) <> cohMg.io.out.req
l2cacheInReqArb.io.in(1) <> prefetcher.io.out
l2cacheIn.req <> l2cacheInReqArb.io.out
cohMg.io.out.resp <> l2cacheIn.resp
val mmioXbar = Module(new SimpleBusCrossbarNto1(2))
val l2cacheOut = Wire(new SimpleBusUC)
l2cacheOut <> Cache(in = cohMg.io.out, mmio = mmioXbar.io.in(0), flush = "b00".U, enable = true)(CacheConfig(ro = false, name = "l2cache", cacheLevel = 2))
l2cacheOut <> Cache(in = l2cacheIn, mmio = mmioXbar.io.in(0), flush = "b00".U, enable = true)(CacheConfig(ro = false, name = "l2cache", cacheLevel = 2))
io.mem <> l2cacheOut.toAXI4()
/*
val l2cache = Module(new L2Cache)
l2cache.io.in <> cohMg.io.out
mmioXbar.io.in(0) <> l2cache.io.mmio
l2cache.io.flush := "b00".U
io.mem <> l2cache.io.out.toAXI4()
mmioXbar.io.in(1) <> noop.io.mmio
if (p.FPGAPlatform) io.mmio <> mmioXbar.io.out.toAXI4Lite()
else io.mmio <> mmioXbar.io.out
*/
// add L2 Cache
val mmioXbar = Module(new SimpleBusCrossbarNto1(2))
val l2cacheOut = Wire(new SimpleBusUC)
l2cacheOut <> Cache(in = cohMg.io.out, mmio = mmioXbar.io.in(0), flush = "b00".U, enable = true)(CacheConfig(ro = false, name = "l2cache", cacheLevel = 2))
io.mem <> l2cacheOut.toAXI4()
mmioXbar.io.in(1) <> noop.io.mmio
if (p.FPGAPlatform) io.mmio <> mmioXbar.io.out.toAXI4Lite()
else io.mmio <> mmioXbar.io.out
/*
// no L2 Cache
io.mem <> cohMg.io.out.toAXI4()
if (p.FPGAPlatform) io.mmio <> noop.io.mmio.toAXI4Lite()
else io.mmio <> noop.io.mmio
*/
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册