dcache.scala 10.3 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.cache
2 3 4

import chisel3._
import chisel3.util._
L
linjiawei 已提交
5
import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
6
import utils.{Code, RandomReplacement, XSDebug, SRAMTemplate}
7

8
import scala.math.max
A
Allen 已提交
9

10

11 12 13
// DCache specific parameters
// L1 DCache is 64set, 8way-associative, with 64byte block, a total of 32KB
// It's a virtually indexed, physically tagged cache.
L
linjiawei 已提交
14 15
case class DCacheParameters
(
16 17 18 19 20 21
    nSets: Int = 64,
    nWays: Int = 8,
    rowBits: Int = 64,
    nTLBEntries: Int = 32,
    tagECC: Option[String] = None,
    dataECC: Option[String] = None,
22 23 24
    nMissEntries: Int = 1,
    nLoadMissEntries: Int = 1,
    nStoreMissEntries: Int = 1,
A
Allen 已提交
25
    nMiscMissEntries: Int = 1,
26
    nMMIOEntries: Int = 1,
27 28 29
    nSDQ: Int = 17,
    nRPQ: Int = 16,
    nMMIOs: Int = 1,
L
linjiawei 已提交
30 31
    blockBytes: Int = 64
) extends L1CacheParameters {
32

33 34 35 36 37 38 39 40 41 42
  def tagCode: Code = Code.fromString(tagECC)
  def dataCode: Code = Code.fromString(dataECC)

  def replacement = new RandomReplacement(nWays)
}

trait HasDCacheParameters extends HasL1CacheParameters {
  val cacheParams = dcacheParameters
  val cfg = cacheParams

A
Allen 已提交
43 44
  def encWordBits = cacheParams.dataCode.width(wordBits)
  def encRowBits = encWordBits*rowWords
45 46 47 48 49 50
  def lrscCycles = LRSCCycles // ISA requires 16-insn LRSC sequences to succeed
  def lrscBackoff = 3 // disallow LRSC reacquisition briefly
  def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant
  def nIOMSHRs = cacheParams.nMMIOs
  def maxUncachedInFlight = cacheParams.nMMIOs

51 52 53
  def missQueueEntryIdWidth = log2Up(cfg.nMissEntries)
  def loadMissQueueEntryIdWidth = log2Up(cfg.nLoadMissEntries)
  def storeMissQueueEntryIdWidth = log2Up(cfg.nStoreMissEntries)
A
Allen 已提交
54 55 56 57 58 59 60
  def miscMissQueueEntryIdWidth = log2Up(cfg.nMiscMissEntries)
  def clientMissQueueEntryIdWidth = max(
    max(loadMissQueueEntryIdWidth,
      storeMissQueueEntryIdWidth),
      miscMissQueueEntryIdWidth)

  def nClientMissQueues = 3
61 62 63 64 65 66
  def clientIdWidth = log2Up(nClientMissQueues)
  def missQueueClientIdWidth = clientIdWidth + clientMissQueueEntryIdWidth
  def clientIdMSB = missQueueClientIdWidth - 1
  def clientIdLSB = clientMissQueueEntryIdWidth
  def entryIdMSB = clientMissQueueEntryIdWidth - 1
  def entryIdLSB = 0
A
Allen 已提交
67
  def reqIdWidth = 64
68

69
  require(isPow2(nSets), s"nSets($nSets) must be pow2")
A
Allen 已提交
70 71 72 73
  require(isPow2(nWays), s"nWays($nWays) must be pow2")
  require(full_divide(rowBits, wordBits), s"rowBits($rowBits) must be multiple of wordBits($wordBits)")
  require(full_divide(beatBits, rowBits), s"beatBits($beatBits) must be multiple of rowBits($rowBits)")
  // this is a VIPT L1 cache
74 75 76
  require(pgIdxBits >= untagBits, s"page aliasing problem: pgIdxBits($pgIdxBits) < untagBits($untagBits)")
}

A
Allen 已提交
77
abstract class DCacheModule extends L1CacheModule
78 79
  with HasDCacheParameters

A
Allen 已提交
80
abstract class DCacheBundle extends L1CacheBundle
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
  with HasDCacheParameters

// basic building blocks for L1 DCache
class L1Metadata extends DCacheBundle {
  val coh = new ClientMetadata
  val tag = UInt(tagBits.W)
}

object L1Metadata {
  def apply(tag: Bits, coh: ClientMetadata) = {
    val meta = Wire(new L1Metadata)
    meta.tag := tag
    meta.coh := coh
    meta
  }
}

class L1MetaReadReq extends DCacheBundle {
  val idx    = UInt(idxBits.W)
  val way_en = UInt(nWays.W)
  val tag    = UInt(tagBits.W)
}

class L1MetaWriteReq extends L1MetaReadReq {
  val data = new L1Metadata
}

class L1DataReadReq extends DCacheBundle {
A
Allen 已提交
109 110
  // you can choose which bank to read to save power
  val rmask  = Bits(blockRows.W)
111 112 113 114
  val way_en = Bits(nWays.W)
  val addr   = Bits(untagBits.W)
}

A
Allen 已提交
115
// Now, we can write a cache-block in a single cycle
116
class L1DataWriteReq extends L1DataReadReq {
A
Allen 已提交
117 118
  val wmask  = Vec(blockRows, Bits(rowWords.W))
  val data   = Vec(blockRows, Bits(encRowBits.W))
119
}
120

121 122
abstract class AbstractDataArray extends DCacheModule {
  val io = IO(new DCacheBundle {
A
Allen 已提交
123 124
    val read  = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1DataReadReq)))
    val write = Flipped(DecoupledIO(new L1DataWriteReq))
A
Allen 已提交
125
    val resp  = Output(Vec(LoadPipelineWidth, Vec(nWays, Vec(blockRows, Bits(encRowBits.W)))))
126
    val nacks = Output(Vec(LoadPipelineWidth, Bool()))
127 128
  })

129
  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
130 131

  def dumpRead() = {
132
    (0 until LoadPipelineWidth) map { w =>
133 134 135 136 137 138 139 140 141
      when (io.read(w).valid) {
        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
          io.read(w).bits.way_en, io.read(w).bits.addr)
      }
    }
  }

  def dumpWrite() = {
    when (io.write.valid) {
142 143 144
      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
        io.write.bits.way_en, io.write.bits.addr)

A
Allen 已提交
145
      (0 until blockRows) map { r =>
146 147 148
        XSDebug(s"cycle: $r data: %x wmask: %x\n",
          io.write.bits.data(r), io.write.bits.wmask(r))
      }
149 150 151 152
    }
  }

  def dumpResp() = {
153
    (0 until LoadPipelineWidth) map { w =>
154 155
      XSDebug(s"DataArray ReadResp channel: $w\n")
      (0 until nWays) map { i =>
A
Allen 已提交
156
        (0 until blockRows) map { r =>
157 158
          XSDebug(s"way: $i cycle: $r data: %x\n", io.resp(w)(i)(r))
        }
159 160 161 162 163
      }
    }
  }

  def dumpNack() = {
164
    (0 until LoadPipelineWidth) map { w =>
165 166 167 168 169 170 171 172 173 174 175 176
      when (io.nacks(w)) {
        XSDebug(s"DataArray NACK channel: $w\n")
      }
    }
  }

  def dump() = {
    dumpRead
    dumpWrite
    dumpNack
    dumpResp
  }
177 178 179 180
}

class DuplicatedDataArray extends AbstractDataArray
{
A
Allen 已提交
181
  val singlePort = true
182 183
  // write is always ready
  io.write.ready := true.B
L
linjiawei 已提交
184
  val waddr = (io.write.bits.addr >> blockOffBits).asUInt()
185
  for (j <- 0 until LoadPipelineWidth) {
L
linjiawei 已提交
186
    val raddr = (io.read(j).bits.addr >> blockOffBits).asUInt()
A
Allen 已提交
187 188 189 190 191 192

    // for single port SRAM, do not allow read and write in the same cycle
    // for dual port SRAM, raddr === waddr is undefined behavior
    val rwhazard = if(singlePort) io.write.valid else io.write.valid && waddr === raddr
    io.read(j).ready := !rwhazard

193
    for (w <- 0 until nWays) {
A
Allen 已提交
194
      for (r <- 0 until blockRows) {
195 196 197 198 199
        val resp = Seq.fill(rowWords)(Wire(Bits(encWordBits.W)))
        io.resp(j)(w)(r) := Cat((0 until rowWords).reverse map (k => resp(k)))

        for (k <- 0 until rowWords) {
          val array = Module(new SRAMTemplate(Bits(encWordBits.W), set=nSets, way=1,
A
Allen 已提交
200
            shouldReset=false, holdRead=false, singlePort=singlePort))
201 202 203 204 205 206 207 208 209 210 211 212 213
          // data write
          val wen = io.write.valid && io.write.bits.way_en(w) && io.write.bits.wmask(r)(k)
          array.io.w.req.valid := wen
          array.io.w.req.bits.apply(
            setIdx=waddr,
            data=io.write.bits.data(r)(encWordBits*(k+1)-1,encWordBits*k),
            waymask=1.U)

          // data read
          val ren = io.read(j).valid && io.read(j).bits.way_en(w) && io.read(j).bits.rmask(r)
          array.io.r.req.valid := ren
          array.io.r.req.bits.apply(setIdx=raddr)
          resp(k) := RegNext(array.io.r.resp.data(0))
A
Allen 已提交
214
        }
215 216 217 218 219 220
      }
    }
    io.nacks(j) := false.B
  }
}

221
class L1MetadataArray(onReset: () => L1Metadata) extends DCacheModule {
A
Allen 已提交
222 223 224 225
  val rstVal = onReset()
  val io = IO(new Bundle {
    val read = Flipped(Decoupled(new L1MetaReadReq))
    val write = Flipped(Decoupled(new L1MetaWriteReq))
226
    val resp = Output(Vec(nWays, new L1Metadata))
A
Allen 已提交
227 228 229 230 231 232 233 234 235
  })
  val rst_cnt = RegInit(0.U(log2Up(nSets+1).W))
  val rst = rst_cnt < nSets.U
  val waddr = Mux(rst, rst_cnt, io.write.bits.idx)
  val wdata = Mux(rst, rstVal, io.write.bits.data).asUInt
  val wmask = Mux(rst || (nWays == 1).B, (-1).asSInt, io.write.bits.way_en.asSInt).asBools
  val rmask = Mux(rst || (nWays == 1).B, (-1).asSInt, io.read.bits.way_en.asSInt).asBools
  when (rst) { rst_cnt := rst_cnt + 1.U }

A
Allen 已提交
236 237 238
  val metaBits = rstVal.getWidth
  val encMetaBits = cacheParams.tagCode.width(metaBits)

239 240 241 242
  val tag_array = Module(new SRAMTemplate(UInt(encMetaBits.W), set=nSets, way=nWays,
    shouldReset=false, holdRead=false, singlePort=true))

  // tag write
A
Allen 已提交
243
  val wen = rst || io.write.valid
244 245 246 247 248 249 250 251 252 253
  tag_array.io.w.req.valid := wen
  tag_array.io.w.req.bits.apply(
    setIdx=waddr,
    data=cacheParams.tagCode.encode(wdata),
    waymask=VecInit(wmask).asUInt)

  // tag read
  tag_array.io.r.req.valid := io.read.fire()
  tag_array.io.r.req.bits.apply(setIdx=io.read.bits.idx)
  io.resp := tag_array.io.r.resp.data.map(rdata =>
254
      cacheParams.tagCode.decode(rdata).corrected.asTypeOf(rstVal))
A
Allen 已提交
255

A
Allen 已提交
256
  io.read.ready := !wen
A
Allen 已提交
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
  io.write.ready := !rst

  def dumpRead() = {
    when (io.read.fire()) {
      XSDebug("MetaArray Read: idx: %d way_en: %x tag: %x\n",
        io.read.bits.idx, io.read.bits.way_en, io.read.bits.tag)
    }
  }

  def dumpWrite() = {
    when (io.write.fire()) {
      XSDebug("MetaArray Write: idx: %d way_en: %x tag: %x new_tag: %x new_coh: %x\n",
        io.write.bits.idx, io.write.bits.way_en, io.write.bits.tag, io.write.bits.data.tag, io.write.bits.data.coh.state)
    }
  }

  def dumpResp() = {
    (0 until nWays) map { i =>
      XSDebug(s"MetaArray Resp: way: $i tag: %x coh: %x\n",
        io.resp(i).tag, io.resp(i).coh.state)
    }
  }

  def dump() = {
    dumpRead
    dumpWrite
    dumpResp
  }
}

class DuplicatedMetaArray extends DCacheModule {
  val io = IO(new DCacheBundle {
A
Allen 已提交
289 290 291
    val read  = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1MetaReadReq)))
    val write = Flipped(DecoupledIO(new L1MetaWriteReq))
    val resp  = Output(Vec(LoadPipelineWidth, Vec(nWays, new L1Metadata)))
A
Allen 已提交
292 293 294
  })

  def onReset = L1Metadata(0.U, ClientMetadata.onReset)
295
  val meta = Seq.fill(LoadPipelineWidth) { Module(new L1MetadataArray(onReset _)) }
A
Allen 已提交
296

297
  for (w <- 0 until LoadPipelineWidth) {
A
Allen 已提交
298 299
    meta(w).io.write <> io.write
    meta(w).io.read  <> io.read(w)
300
    io.resp(w) <> meta(w).io.resp
A
Allen 已提交
301 302
  }

303
  def dumpRead() = {
304
    (0 until LoadPipelineWidth) map { w =>
305
      when (io.read(w).fire()) {
306
        XSDebug(s"MetaArray Read channel: $w idx: %d way_en: %x tag: %x\n",
307 308 309 310 311 312 313 314 315
          io.read(w).bits.idx, io.read(w).bits.way_en, io.read(w).bits.tag)
      }
    }
  }

  def dumpWrite() = {
    when (io.write.fire()) {
      XSDebug("MetaArray Write: idx: %d way_en: %x tag: %x new_tag: %x new_coh: %x\n",
        io.write.bits.idx, io.write.bits.way_en, io.write.bits.tag, io.write.bits.data.tag, io.write.bits.data.coh.state)
A
Allen 已提交
316 317
    }
  }
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332

  def dumpResp() = {
    (0 until LoadPipelineWidth) map { w =>
      (0 until nWays) map { i =>
        XSDebug(s"MetaArray Resp: channel: $w way: $i tag: %x coh: %x\n",
          io.resp(w)(i).tag, io.resp(w)(i).coh.state)
      }
    }
  }

  def dump() = {
    dumpRead
    dumpWrite
    dumpResp
  }
A
Allen 已提交
333
}