MainPipe.scala 23.2 KB
Newer Older
L
Lemover 已提交
1 2
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
Y
Yinan Xu 已提交
3
* Copyright (c) 2020-2021 Peng Cheng Laboratory
L
Lemover 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*          http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/

A
Allen 已提交
17 18
package xiangshan.cache

19
import chipsalliance.rocketchip.config.Parameters
A
Allen 已提交
20
import chisel3._
Z
zhanglinjuan 已提交
21
import chisel3.internal.firrtl.Port
A
Allen 已提交
22
import chisel3.util._
23 24 25
import freechips.rocketchip.tilelink.ClientStates._
import freechips.rocketchip.tilelink.MemoryOpCategories._
import freechips.rocketchip.tilelink.TLPermissions._
A
Allen 已提交
26
import freechips.rocketchip.tilelink.{ClientMetadata, ClientStates, TLPermissions}
Z
zhanglinjuan 已提交
27
import utils._
A
Allen 已提交
28

Z
zhanglinjuan 已提交
29 30
class MainPipeReq(implicit p: Parameters) extends DCacheBundle {
  val miss = Bool() // only amo miss will refill in main pipe
A
Allen 已提交
31 32
  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
  val miss_param = UInt(TLPermissions.bdWidth.W)
33
  val miss_dirty = Bool()
A
Allen 已提交
34 35 36

  val probe = Bool()
  val probe_param = UInt(TLPermissions.bdWidth.W)
37
  val probe_need_data = Bool()
A
Allen 已提交
38 39

  // request info
Z
zhanglinjuan 已提交
40
  // reqs from Store, AMO use this
A
Allen 已提交
41 42
  // probe does not use this
  val source = UInt(sourceTypeWidth.W)
Z
zhanglinjuan 已提交
43
  val cmd = UInt(M_SZ.W)
44 45 46
  // if dcache size > 32KB, vaddr is also needed for store
  // vaddr is used to get extra index bits
  val vaddr  = UInt(VAddrBits.W)
A
Allen 已提交
47 48 49 50
  // must be aligned to block
  val addr   = UInt(PAddrBits.W)

  // store
Z
zhanglinjuan 已提交
51 52
  val store_data = UInt((cfg.blockBytes * 8).W)
  val store_mask = UInt(cfg.blockBytes.W)
A
Allen 已提交
53 54 55 56

  // which word does amo work on?
  val word_idx = UInt(log2Up(cfg.blockBytes * 8 / DataBits).W)
  val amo_data   = UInt(DataBits.W)
Z
zhanglinjuan 已提交
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
  val amo_mask   = UInt((DataBits / 8).W)

  val id = UInt(reqIdWidth.W)

  def isLoad: Bool = source === LOAD_SOURCE.U
  def isStore: Bool = source === STORE_SOURCE.U
  def isAMO: Bool = source === AMO_SOURCE.U

  def convertStoreReq(store: DCacheLineReq): MainPipeReq = {
    val req = Wire(new MainPipeReq)
    req := DontCare
    req.miss := false.B
    req.miss_dirty := false.B
    req.probe := false.B
    req.probe_need_data := false.B
    req.source := STORE_SOURCE.U
    req.cmd := store.cmd
    req.addr := store.addr
    req.vaddr := store.vaddr
    req.store_data := store.data
    req.store_mask := store.mask
    req.id := store.id
    req
A
Allen 已提交
80 81 82
  }
}

83
class MainPipe(implicit p: Parameters) extends DCacheModule {
Z
zhanglinjuan 已提交
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
  val metaBits = (new Meta).getWidth
  val encMetaBits = cacheParams.tagCode.width((new MetaAndTag).getWidth) - tagBits

  val io = IO(new Bundle() {
    // probe queue
    val probe_req = Flipped(DecoupledIO(new MainPipeReq))
    // store miss go to miss queue
    val miss = DecoupledIO(new MissReq)
    // store buffer
    val store_req = Flipped(DecoupledIO(new DCacheLineReq))
    val store_replay_resp = ValidIO(new DCacheLineResp)
    val store_hit_resp = ValidIO(new DCacheLineResp)
    val release_update = ValidIO(new ReleaseUpdate)
    // atmoics
    val atomic_req = Flipped(DecoupledIO(new MainPipeReq))
    val atomic_resp = ValidIO(new AtomicsResp)
    // write-back queue
    val wb = DecoupledIO(new WritebackReq)

    val data_read = DecoupledIO(new L1BankedDataReadLineReq)
    val data_resp = Input(Vec(DCacheBanks, new L1BankedDataReadResult()))
    val data_write = DecoupledIO(new L1BankedDataWriteReq)

    val meta_read = DecoupledIO(new MetaReadReq)
    val meta_resp = Input(Vec(nWays, UInt(encMetaBits.W)))
    val meta_write = DecoupledIO(new MetaWriteReq)

    val tag_read = DecoupledIO(new TagReadReq)
    val tag_resp = Input(Vec(nWays, UInt(tagBits.W)))
    val tag_write = DecoupledIO(new TagWriteReq)
114

Z
zhanglinjuan 已提交
115 116 117 118 119 120 121 122 123 124 125 126
    // update state vec in replacement algo
    val replace_access = ValidIO(new ReplacementAccessBundle)
    // find the way to be replaced
    val replace_way = new ReplacementWayReqIO

    val status = new Bundle() {
      val s0_set = ValidIO(UInt(idxBits.W))
      val s1, s2, s3 = ValidIO(new Bundle() {
        val set = UInt(idxBits.W)
        val way_en = UInt(nWays.W)
      })
    }
127 128 129

    // lrsc locked block should block probe
    val lrsc_locked_block = Output(Valid(UInt(PAddrBits.W)))
Z
zhanglinjuan 已提交
130
    val invalid_resv_set = Input(Bool())
131 132
  })

Z
zhanglinjuan 已提交
133 134 135
  // meta array is made of regs, so meta write or read should always be ready
  assert(RegNext(io.meta_read.ready))
  assert(RegNext(io.meta_write.ready))
136

Z
zhanglinjuan 已提交
137 138
  val s1_s0_set_conflict, s2_s0_set_conlict, s3_s0_set_conflict = Wire(Bool())
  val set_conflict = s1_s0_set_conflict || s2_s0_set_conlict || s3_s0_set_conflict
139 140
  val s1_ready, s2_ready, s3_ready = Wire(Bool())

Z
zhanglinjuan 已提交
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
  // convert store req to main pipe req, and select a req from store and probe
  val store_req = Wire(DecoupledIO(new MainPipeReq))
  store_req.bits := (new MainPipeReq).convertStoreReq(io.store_req.bits)
  store_req.valid := io.store_req.valid
  io.store_req.ready := store_req.ready
  val req_arb = Module(new Arbiter(new MainPipeReq, 3))
  req_arb.io.in(0) <> io.atomic_req
  req_arb.io.in(1) <> store_req
  req_arb.io.in(2) <> io.probe_req

  // s0: read meta and tag
  val req = Wire(DecoupledIO(new MainPipeReq))
  req <> req_arb.io.out
  val s0_req = req.bits
  val s0_idx = get_idx(s0_req.vaddr)
  val s0_can_go = io.meta_read.ready && io.tag_read.ready && s1_ready && !set_conflict
  val s0_fire = req.valid && s0_can_go
158

159 160 161
  val bank_write = VecInit((0 until DCacheBanks).map(i => get_mask_of_bank(i, s0_req.store_mask).orR)).asUInt
  val bank_full_write = VecInit((0 until DCacheBanks).map(i => get_mask_of_bank(i, s0_req.store_mask).andR)).asUInt
  val banks_full_overwrite = bank_full_write.andR
162

163 164 165
  val banked_store_rmask = bank_write & ~bank_full_write
  val banked_full_rmask = ~0.U(DCacheBanks.W)
  val banked_none_rmask = 0.U(DCacheBanks.W)
166

Z
zhanglinjuan 已提交
167
  val store_need_data = !s0_req.probe && s0_req.isStore && banked_store_rmask.orR
168
  val probe_need_data = s0_req.probe
Z
zhanglinjuan 已提交
169 170
  val amo_need_data = !s0_req.probe && s0_req.isAMO
  val miss_need_data = s0_req.miss
Y
Yinan Xu 已提交
171

Z
zhanglinjuan 已提交
172
  val banked_need_data = store_need_data || probe_need_data || amo_need_data || miss_need_data
173

Z
zhanglinjuan 已提交
174 175
  val s0_banked_rmask = Mux(store_need_data, banked_store_rmask,
    Mux(probe_need_data || amo_need_data || miss_need_data,
Y
Yinan Xu 已提交
176
      banked_full_rmask,
177 178
      banked_none_rmask
    ))
179 180

  // generate wmask here and use it in stage 2
181 182 183
  val banked_store_wmask = bank_write
  val banked_full_wmask = ~0.U(DCacheBanks.W)
  val banked_none_wmask = 0.U(DCacheBanks.W)
184

Z
zhanglinjuan 已提交
185
  // s1: read data
186
  val s1_valid = RegInit(false.B)
187
  val s1_need_data = RegEnable(banked_need_data, s0_fire)
188
  val s1_req = RegEnable(s0_req, s0_fire)
189 190
  val s1_banked_rmask = RegEnable(s0_banked_rmask, s0_fire)
  val s1_banked_store_wmask = RegEnable(banked_store_wmask, s0_fire)
Z
zhanglinjuan 已提交
191 192 193
  val s1_can_go = s2_ready && (io.data_read.ready || !s1_need_data)
  val s1_fire = s1_valid && s1_can_go
  val s1_idx = get_idx(s1_req.vaddr)
194 195 196 197 198
  when (s0_fire) {
    s1_valid := true.B
  }.elsewhen (s1_fire) {
    s1_valid := false.B
  }
Z
zhanglinjuan 已提交
199 200
  s1_ready := !s1_valid || s1_can_go
  s1_s0_set_conflict := s1_valid && s0_idx === s1_idx
201

Z
zhanglinjuan 已提交
202 203 204 205
  def getMeta(encMeta: UInt): UInt = {
    require(encMeta.getWidth == encMetaBits)
    encMeta(metaBits - 1, 0)
  }
Y
Yinan Xu 已提交
206

Z
zhanglinjuan 已提交
207 208 209 210 211
  val tag_resp = Wire(Vec(nWays, UInt(tagBits.W)))
  val ecc_meta_resp = Wire(Vec(nWays, UInt(encMetaBits.W)))
  tag_resp := Mux(RegNext(s0_fire), io.tag_resp, RegNext(tag_resp))
  ecc_meta_resp := Mux(RegNext(s0_fire), io.meta_resp, RegNext(ecc_meta_resp))
  val meta_resp = ecc_meta_resp.map(getMeta(_))
212 213

  def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
Z
zhanglinjuan 已提交
214 215
  val s1_tag_eq_way = wayMap((w: Int) => tag_resp(w) === get_tag(s1_req.addr)).asUInt
  val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && Meta(meta_resp(w)).coh.isValid()).asUInt
216 217
  val s1_tag_match = s1_tag_match_way.orR

Z
zhanglinjuan 已提交
218 219
  val s1_hit_tag = Mux(s1_tag_match, Mux1H(s1_tag_match_way, wayMap(w => tag_resp(w))), get_tag(s1_req.addr))
  val s1_hit_coh = ClientMetadata(Mux(s1_tag_match, Mux1H(s1_tag_match_way, wayMap(w => meta_resp(w))), 0.U))
220 221

  // replacement policy
222
  val s1_repl_way_en = WireInit(0.U(nWays.W))
Z
zhanglinjuan 已提交
223 224 225
  s1_repl_way_en := Mux(RegNext(s0_fire), UIntToOH(io.replace_way.way), RegNext(s1_repl_way_en))
  val s1_repl_tag = Mux1H(s1_repl_way_en, wayMap(w => tag_resp(w)))
  val s1_repl_coh = Mux1H(s1_repl_way_en, wayMap(w => meta_resp(w))).asTypeOf(new ClientMetadata)
226

Z
zhanglinjuan 已提交
227 228 229 230
  val s1_need_replacement = (s1_req.miss || s1_req.isStore && !s1_req.probe) && !s1_tag_match
  val s1_way_en = Mux(s1_need_replacement, s1_repl_way_en, s1_tag_match_way)
  val s1_tag = Mux(s1_need_replacement, s1_repl_tag, s1_hit_tag)
  val s1_coh = Mux(s1_need_replacement, s1_repl_coh, s1_hit_coh)
231

Z
zhanglinjuan 已提交
232 233 234
  // s2: select data, return resp if this is a store miss
  val s2_valid = RegInit(false.B)
  val s2_req = RegEnable(s1_req, s1_fire)
235 236
  val s2_tag_match = RegEnable(s1_tag_match, s1_fire)
  val s2_hit_coh = RegEnable(s1_hit_coh, s1_fire)
Z
zhanglinjuan 已提交
237
  val (s2_has_permission, _, s2_new_hit_coh) = s2_hit_coh.onAccess(s2_req.cmd)
238
  val s2_repl_way_en = RegEnable(s1_repl_way_en, s1_fire)
Z
zhanglinjuan 已提交
239 240
  val s2_repl_tag = RegEnable(s1_repl_tag, s1_fire)
  val s2_repl_coh = RegEnable(s1_repl_coh, s1_fire)
241
  val s2_need_replacement = RegEnable(s1_need_replacement, s1_fire)
Z
zhanglinjuan 已提交
242
  val s2_idx = get_idx(s2_req.vaddr)
243
  val s2_way_en = RegEnable(s1_way_en, s1_fire)
Z
zhanglinjuan 已提交
244 245 246
  val s2_tag = RegEnable(s1_tag, s1_fire)
  val s2_coh = RegEnable(s1_coh, s1_fire)
  val s2_banked_store_wmask = RegEnable(s1_banked_store_wmask, s1_fire)
247 248

  val s2_hit = s2_tag_match && s2_has_permission
Z
zhanglinjuan 已提交
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
  val s2_amo_hit = s2_hit && !s2_req.probe && !s2_req.miss && s2_req.isAMO
  val s2_store_hit = s2_hit && !s2_req.probe && !s2_req.miss && s2_req.isStore

  s2_s0_set_conlict := s2_valid && s0_idx === s2_idx

  // For a store req, it either hits and goes to s3, or miss and enter miss queue immediately
  val s2_can_go_to_s3 = (s2_req.probe || s2_req.miss || (s2_req.isStore || s2_req.isAMO) && s2_hit) && s3_ready
  val s2_can_go_to_mq = !s2_req.probe && !s2_req.miss && (s2_req.isStore || s2_req.isAMO) && !s2_hit
  assert(RegNext(!(s2_valid && s2_can_go_to_s3 && s2_can_go_to_mq)))
  val s2_can_go = s2_can_go_to_s3 || s2_can_go_to_mq
  val s2_fire = s2_valid && s2_can_go
  val s2_fire_to_s3 = s2_valid && s2_can_go_to_s3
  when (s1_fire) {
    s2_valid := true.B
  }.elsewhen (s2_fire) {
    s2_valid := false.B
265
  }
Z
zhanglinjuan 已提交
266 267
  s2_ready := !s2_valid || s2_can_go
  val replay = !io.miss.ready
268

Z
zhanglinjuan 已提交
269 270
  val data_resp = Wire(io.data_resp.cloneType)
  data_resp := Mux(RegNext(s1_fire), io.data_resp, RegNext(data_resp))
271
  val s2_store_data_merged = Wire(Vec(DCacheBanks, UInt(DCacheSRAMRowBits.W)))
272 273 274 275 276

  def mergePutData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
    val full_wmask = FillInterleaved(8, wmask)
    ((~full_wmask & old_data) | (full_wmask & new_data))
  }
Y
Yinan Xu 已提交
277

278
  val s2_data = WireInit(VecInit((0 until DCacheBanks).map(i => {
Z
zhanglinjuan 已提交
279
    val decoded = cacheParams.dataCode.decode(data_resp(i).asECCData())
280 281
    // assert(!RegNext(s2_valid && s2_hit && decoded.uncorrectable))
    // TODO: trigger ecc error
Z
zhanglinjuan 已提交
282
    data_resp(i).raw_data
283 284 285 286 287 288 289 290 291
  })))

  for (i <- 0 until DCacheBanks) {
    val old_data = s2_data(i)
    val new_data = get_data_of_bank(i, s2_req.store_data)
    // for amo hit, we should use read out SRAM data
    // do not merge with store data
    val wmask = Mux(s2_amo_hit, 0.U(wordBytes.W), get_mask_of_bank(i, s2_req.store_mask))
    s2_store_data_merged(i) := mergePutData(old_data, new_data, wmask)
292 293
  }

294
  val s2_data_word = s2_store_data_merged(s2_req.word_idx)
295

Z
zhanglinjuan 已提交
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
  // s3: write data, meta and tag
  val s3_valid = RegInit(false.B)
  val s3_req = RegEnable(s2_req, s2_fire_to_s3)
  val s3_idx = get_idx(s3_req.vaddr)
  val s3_tag = RegEnable(s2_tag, s2_fire_to_s3)
  val s3_tag_match = RegEnable(s2_tag_match, s2_fire_to_s3)
  val s3_coh = RegEnable(s2_coh, s2_fire_to_s3)
  val s3_hit = RegEnable(s2_hit, s2_fire_to_s3)
  val s3_amo_hit = RegEnable(s2_amo_hit, s2_fire_to_s3)
  val s3_store_hit = RegEnable(s2_store_hit, s2_fire_to_s3)
  val s3_hit_coh = RegEnable(s2_hit_coh, s2_fire_to_s3)
  val s3_new_hit_coh = RegEnable(s2_new_hit_coh, s2_fire_to_s3)
  val s3_way_en = RegEnable(s2_way_en, s2_fire_to_s3)
  val s3_banked_store_wmask = RegEnable(s2_banked_store_wmask, s2_fire_to_s3)
  val s3_store_data_merged = RegEnable(s2_store_data_merged, s2_fire_to_s3)
  val s3_data_word = RegEnable(s2_data_word, s2_fire_to_s3)
  val s3_data = RegEnable(s2_data, s2_fire_to_s3)
  val (probe_has_dirty_data, probe_shrink_param, probe_new_coh) = s3_coh.onProbe(s3_req.probe_param)
  val s3_need_replacement = RegEnable(s2_need_replacement, s2_fire_to_s3)
315

Z
zhanglinjuan 已提交
316 317 318 319 320
  val miss_update_meta = s3_req.miss
  val probe_update_meta = s3_req.probe && s3_tag_match && s3_coh =/= probe_new_coh
  val store_update_meta = s3_req.isStore && !s3_req.probe && s3_hit_coh =/= s3_new_hit_coh
  val amo_update_meta = s3_req.isAMO && !s3_req.probe && s3_hit_coh =/= s3_new_hit_coh
  val update_meta = miss_update_meta || probe_update_meta || store_update_meta || amo_update_meta
321

322 323 324 325 326 327 328 329 330 331 332 333 334 335
  def missCohGen(cmd: UInt, param: UInt, dirty: Bool) = {
    val c = categorize(cmd)
    MuxLookup(Cat(c, param, dirty), Nothing, Seq(
      //(effect param) -> (next)
      Cat(rd, toB, false.B)  -> Branch,
      Cat(rd, toB, true.B)   -> Branch,
      Cat(rd, toT, false.B)  -> Trunk,
      Cat(rd, toT, true.B)   -> Dirty,
      Cat(wi, toT, false.B)  -> Trunk,
      Cat(wi, toT, true.B)   -> Dirty,
      Cat(wr, toT, false.B)  -> Dirty,
      Cat(wr, toT, true.B)   -> Dirty))
  }
  val miss_new_coh = ClientMetadata(missCohGen(s3_req.cmd, s3_req.miss_param, s3_req.miss_dirty))
336

Z
zhanglinjuan 已提交
337 338 339 340 341 342 343 344 345 346 347 348 349
  val new_coh = Mux(
    miss_update_meta,
    miss_new_coh,
    Mux(
      probe_update_meta,
      probe_new_coh,
      Mux(
        store_update_meta || amo_update_meta,
        s3_new_hit_coh,
        ClientMetadata.onReset
      )
    )
  )
350 351 352 353 354 355 356 357

  // LR, SC and AMO
  val debug_sc_fail_addr = RegInit(0.U)
  val debug_sc_fail_cnt  = RegInit(0.U(8.W))

  val lrsc_count = RegInit(0.U(log2Ceil(lrscCycles).W))
  val lrsc_valid = lrsc_count > lrscBackoff.U
  val lrsc_addr  = Reg(UInt())
Z
zhanglinjuan 已提交
358 359
  val s3_lr = !s3_req.probe && s3_req.isAMO && s3_req.cmd === M_XLR
  val s3_sc = !s3_req.probe && s3_req.isAMO && s3_req.cmd === M_XSC
360 361 362 363 364 365
  val s3_lrsc_addr_match = lrsc_valid && lrsc_addr === get_block_addr(s3_req.addr)
  val s3_sc_fail = s3_sc && !s3_lrsc_addr_match
  val s3_sc_resp = Mux(s3_sc_fail, 1.U, 0.U)

  val s3_can_do_amo = (s3_req.miss && !s3_req.probe && s3_req.source === AMO_SOURCE.U) || s3_amo_hit
  val s3_can_do_amo_write = s3_can_do_amo && isWrite(s3_req.cmd) && !s3_sc_fail
Z
zhanglinjuan 已提交
366

367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
  when (s3_valid && (s3_lr || s3_sc)) {
    when (s3_can_do_amo && s3_lr) {
      lrsc_count := (lrscCycles - 1).U
      lrsc_addr := get_block_addr(s3_req.addr)
    } .otherwise {
      lrsc_count := 0.U
    }
  } .elsewhen (lrsc_count > 0.U) {
    lrsc_count := lrsc_count - 1.U
  }

  io.lrsc_locked_block.valid := lrsc_valid
  io.lrsc_locked_block.bits  := lrsc_addr

  // when we release this block,
  // we invalidate this reservation set
Z
zhanglinjuan 已提交
383 384
  when (io.invalid_resv_set) {
    lrsc_count := 0.U
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
  }

  when (s3_valid) {
    when (s3_req.addr === debug_sc_fail_addr) {
      when (s3_sc_fail) {
        debug_sc_fail_cnt := debug_sc_fail_cnt + 1.U
      } .elsewhen (s3_sc) {
        debug_sc_fail_cnt := 0.U
      }
    } .otherwise {
      when (s3_sc_fail) {
        debug_sc_fail_addr := s3_req.addr
        debug_sc_fail_cnt  := 1.U
      }
    }
  }
  assert(debug_sc_fail_cnt < 100.U, "L1DCache failed too many SCs in a row")

Z
zhanglinjuan 已提交
403

404
  val banked_amo_wmask = UIntToOH(s3_req.word_idx)
Z
zhanglinjuan 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
//  val banked_wmask = s3_banked_store_wmask
  val banked_wmask = Mux(
    s3_req.miss,
    banked_full_wmask,
    Mux(
      s3_store_hit,
      s3_banked_store_wmask,
      Mux(
        s3_can_do_amo_write,
        banked_amo_wmask,
        banked_none_wmask
      )
    )
  )
  val update_data = banked_wmask.asUInt.orR
420 421 422 423 424 425 426 427 428 429

  // generate write data
  // AMO hits
  val amoalu   = Module(new AMOALU(wordBits))
  amoalu.io.mask := s3_req.amo_mask
  amoalu.io.cmd  := s3_req.cmd
  amoalu.io.lhs  := s3_data_word
  amoalu.io.rhs  := s3_req.amo_data

  // merge amo write data
430 431 432 433 434 435 436
  val s3_amo_data_merged = Wire(Vec(DCacheBanks, UInt(DCacheSRAMRowBits.W)))
  for (i <- 0 until DCacheBanks) {
    val old_data = s3_store_data_merged(i)
    val new_data = amoalu.io.out
    val wmask = Mux(s3_can_do_amo_write && s3_req.word_idx === i.U,
      ~0.U(wordBytes.W), 0.U(wordBytes.W))
    s3_amo_data_merged(i) := mergePutData(old_data, new_data, wmask)
437 438
  }

Z
zhanglinjuan 已提交
439 440 441
  val miss_wb = s3_req.miss && s3_need_replacement && s3_coh.state =/= ClientStates.Nothing
  val probe_wb = s3_req.probe
  val need_wb = miss_wb || probe_wb
442 443

  val (_, miss_shrink_param, _) = s3_coh.onCacheControl(M_FLUSH)
Z
zhanglinjuan 已提交
444
  val writeback_param = Mux(miss_wb, miss_shrink_param, probe_shrink_param)
445 446
  val writeback_data = if (dcacheParameters.alwaysReleaseData) {
    s3_tag_match && s3_req.probe && s3_req.probe_need_data ||
Z
zhanglinjuan 已提交
447
      s3_coh === ClientStates.Dirty || miss_wb && s3_coh.state =/= ClientStates.Nothing
448 449 450
  } else {
    s3_tag_match && s3_req.probe && s3_req.probe_need_data || s3_coh === ClientStates.Dirty
  }
451

Z
zhanglinjuan 已提交
452 453 454 455 456 457 458 459 460 461 462 463 464 465
  val s3_probe_can_go = s3_req.probe && io.wb.ready && (io.meta_write.ready || !probe_update_meta)
  val s3_store_can_go = s3_req.isStore && !s3_req.probe && (io.meta_write.ready || !store_update_meta) && (io.data_write.ready || !update_data)
  val s3_amo_can_go = s3_amo_hit && (io.meta_write.ready || !amo_update_meta) && (io.data_write.ready || !update_data)
  val s3_miss_can_go = s3_req.miss &&
    (io.meta_write.ready || !amo_update_meta) &&
    (io.data_write.ready || !update_data) &&
    io.tag_write.ready &&
    io.wb.ready
  val s3_can_go = s3_probe_can_go || s3_store_can_go || s3_amo_can_go || s3_miss_can_go
  val s3_fire = s3_valid && s3_can_go
  when (s2_fire_to_s3) {
    s3_valid := true.B
  }.elsewhen (s3_fire) {
    s3_valid := false.B
466
  }
Z
zhanglinjuan 已提交
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
  s3_ready := !s3_valid || s3_can_go
  s3_s0_set_conflict := s3_valid && s3_idx === s0_idx
  assert(RegNext(!s3_valid || !(s3_req.isStore && !s3_req.probe) || s3_hit)) // miss store should never come to s3


  req.ready := s0_can_go

  io.meta_read.valid := req.valid && s1_ready && !set_conflict
  io.meta_read.bits.idx := get_idx(s0_req.vaddr)
  io.meta_read.bits.way_en := ~0.U(nWays.W)

  io.tag_read.valid := req.valid && s1_ready && !set_conflict
  io.tag_read.bits.idx := get_idx(s0_req.vaddr)
  io.tag_read.bits.way_en := ~0.U(nWays.W)

  io.data_read.valid := s1_valid && s1_need_data && s2_ready
  io.data_read.bits.rmask := s1_banked_rmask
  io.data_read.bits.way_en := s1_way_en
  io.data_read.bits.addr := s1_req.vaddr

  io.miss.valid := s2_valid && s2_can_go_to_mq
  val miss = io.miss.bits
  miss := DontCare
  miss.source := s2_req.source
  miss.cmd := s2_req.cmd
  miss.addr := s2_req.addr
  miss.vaddr := s2_req.vaddr
  miss.way_en := s2_way_en
  miss.store_data := s2_req.store_data
  miss.store_mask := s2_req.store_mask
  miss.word_idx := s2_req.word_idx
  miss.amo_data := s2_req.amo_data
  miss.amo_mask := s2_req.amo_mask
  miss.req_coh := s2_hit_coh
  miss.replace_coh := s2_repl_coh
  miss.replace_tag := s2_repl_tag
  miss.id := s2_req.id

  io.store_replay_resp.valid := s2_valid && s2_can_go_to_mq && replay && s2_req.isStore
  io.store_replay_resp.bits.data := DontCare
  io.store_replay_resp.bits.miss := true.B
  io.store_replay_resp.bits.replay := true.B
  io.store_replay_resp.bits.id := s2_req.id

  io.store_hit_resp.valid := s3_valid && s3_store_can_go
  io.store_hit_resp.bits.data := DontCare
  io.store_hit_resp.bits.miss := false.B
  io.store_hit_resp.bits.replay := false.B
  io.store_hit_resp.bits.id := s3_req.id

  io.release_update.valid := s3_valid && (s3_store_can_go || s3_amo_can_go) && s3_hit && update_data
  io.release_update.bits.addr := s3_req.addr
  io.release_update.bits.mask := Mux(s3_store_hit, s3_banked_store_wmask, banked_amo_wmask)
  io.release_update.bits.data := s3_amo_data_merged.asUInt

  val atomic_hit_resp = Wire(new AtomicsResp)
  atomic_hit_resp.data := Mux(s3_sc, s3_sc_resp, s3_data_word)
  atomic_hit_resp.miss := false.B
  atomic_hit_resp.miss_id := s3_req.miss_id
  atomic_hit_resp.replay := false.B
  atomic_hit_resp.ack_miss_queue := s3_req.miss
  atomic_hit_resp.id := lrsc_valid
  val atomic_replay_resp = Wire(new AtomicsResp)
  atomic_replay_resp.data := DontCare
  atomic_replay_resp.miss := true.B
  atomic_replay_resp.miss_id := DontCare
  atomic_replay_resp.replay := true.B
  atomic_replay_resp.ack_miss_queue := false.B
  atomic_replay_resp.id := DontCare
  val atomic_replay_resp_valid = s2_valid && s2_can_go_to_mq && replay && s2_req.isAMO
  val atomic_hit_resp_valid = s3_valid && (s3_amo_can_go || s3_miss_can_go && s3_req.isAMO)
  io.atomic_resp.valid := atomic_replay_resp_valid || atomic_hit_resp_valid
  io.atomic_resp.bits := Mux(atomic_replay_resp_valid, atomic_replay_resp, atomic_hit_resp)
540

Z
zhanglinjuan 已提交
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
  io.meta_write.valid := s3_fire && update_meta
  io.meta_write.bits.idx := s3_idx
  io.meta_write.bits.way_en := s3_way_en
  io.meta_write.bits.tag := get_tag(s3_req.addr)
  io.meta_write.bits.meta.coh := new_coh

  io.tag_write.valid := s3_fire && s3_req.miss
  io.tag_write.bits.idx := s3_idx
  io.tag_write.bits.way_en := s3_way_en
  io.tag_write.bits.tag := get_tag(s3_req.addr)

  io.data_write.valid := s3_fire && update_data
  io.data_write.bits.way_en := s3_way_en
  io.data_write.bits.addr := s3_req.vaddr
  io.data_write.bits.wmask := banked_wmask
  io.data_write.bits.data := s3_amo_data_merged

  io.wb.valid := s3_valid && (
    // probe can go to wbq
    s3_req.probe && (io.meta_write.ready || !probe_update_meta) ||
      // amo miss can go to wbq
      s3_req.miss &&
        (io.meta_write.ready || !amo_update_meta) &&
        (io.data_write.ready || !update_data) &&
        io.tag_write.ready
    ) && need_wb
  io.wb.bits.addr := get_block_addr(Cat(s3_tag, get_untag(s3_req.vaddr)))
  io.wb.bits.param := writeback_param
  io.wb.bits.voluntary := s3_req.miss
  io.wb.bits.hasData := writeback_data
  io.wb.bits.dirty := s3_coh === ClientStates.Dirty
  io.wb.bits.data := s3_data.asUInt()
  io.wb.bits.delay_release := false.B
  io.wb.bits.miss_id := DontCare

  io.replace_access.valid := RegNext(s1_fire && (s1_req.isAMO || s1_req.isStore) && !s1_req.probe && s1_tag_match)
  io.replace_access.bits.set := s2_idx
  io.replace_access.bits.way := RegNext(OHToUInt(s1_way_en))

  io.replace_way.set.valid := RegNext(s0_fire)
  io.replace_way.set.bits := s1_idx

  // TODO: consider block policy of a finer granularity
  io.status.s0_set.valid := req.valid
  io.status.s0_set.bits := get_idx(s0_req.vaddr)
  io.status.s1.valid := s1_valid
  io.status.s1.bits.set := s1_idx
  io.status.s1.bits.way_en := s1_way_en
  io.status.s2.valid := s2_valid
  io.status.s2.bits.set := s2_idx
  io.status.s2.bits.way_en := s2_way_en
  io.status.s3.valid := s3_valid
  io.status.s3.bits.set := s3_idx
  io.status.s3.bits.way_en := s3_way_en
595 596 597 598 599 600 601 602 603 604 605 606

  val perfinfo = IO(new Bundle(){
    val perfEvents = Output(new PerfEventsBundle(2))
  })
  val perfEvents = Seq(
    ("dcache_mp_req                    ", s0_fire                                                                     ),
    ("dcache_mp_total_penalty          ", (PopCount(VecInit(Seq(s0_fire, s1_valid, s2_valid, s3_valid))))             ),
  )

  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
    perf_out.incr_step := RegNext(perf)
  }
607
}