MissQueue.scala 23.0 KB
Newer Older
L
Lemover 已提交
1 2
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
Y
Yinan Xu 已提交
3
* Copyright (c) 2020-2021 Peng Cheng Laboratory
L
Lemover 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*          http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/

A
Allen 已提交
17 18
package xiangshan.cache

19
import chipsalliance.rocketchip.config.Parameters
A
Allen 已提交
20 21
import chisel3._
import chisel3.util._
22
import utils._
23
import freechips.rocketchip.tilelink._
Z
zhanglinjuan 已提交
24 25 26
import freechips.rocketchip.tilelink.ClientStates._
import freechips.rocketchip.tilelink.MemoryOpCategories._
import freechips.rocketchip.tilelink.TLPermissions._
27
import difftest._
28
import huancun.{AliasKey, DirtyKey, PreferCacheKey, PrefetchKey}
A
Allen 已提交
29

Z
zhanglinjuan 已提交
30
class MissReq(implicit p: Parameters) extends DCacheBundle {
A
Allen 已提交
31
  val source = UInt(sourceTypeWidth.W)
Z
zhanglinjuan 已提交
32 33 34 35
  val cmd = UInt(M_SZ.W)
  val addr = UInt(PAddrBits.W)
  val vaddr = UInt(VAddrBits.W)
  val way_en = UInt(DCacheWays.W)
A
Allen 已提交
36 37

  // store
Z
zhanglinjuan 已提交
38 39
  val store_data = UInt((cfg.blockBytes * 8).W)
  val store_mask = UInt(cfg.blockBytes.W)
A
Allen 已提交
40 41 42 43

  // which word does amo work on?
  val word_idx = UInt(log2Up(blockWords).W)
  val amo_data = UInt(DataBits.W)
Z
zhanglinjuan 已提交
44
  val amo_mask = UInt((DataBits / 8).W)
A
Allen 已提交
45

Z
zhanglinjuan 已提交
46 47 48 49
  val req_coh = new ClientMetadata
  val replace_coh = new ClientMetadata
  val replace_tag = UInt(tagBits.W)
  val id = UInt(reqIdWidth.W)
A
Allen 已提交
50

Z
zhanglinjuan 已提交
51
  def isLoad = source === LOAD_SOURCE.U
52
  def isStore = source === STORE_SOURCE.U
Z
zhanglinjuan 已提交
53 54
  def isAMO = source === AMO_SOURCE.U
  def hit = req_coh.isValid()
A
Allen 已提交
55 56
}

57
class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
Z
zhanglinjuan 已提交
58
  val io = IO(new Bundle() {
A
Allen 已提交
59
    // MSHR ID
Z
zhanglinjuan 已提交
60
    val id = Input(UInt(log2Up(cfg.nMissEntries).W))
A
Allen 已提交
61
    // client requests
W
William Wang 已提交
62 63
    // allocate this entry for new req
    val primary_valid = Input(Bool())
A
Allen 已提交
64 65 66 67 68 69
    // this entry is free and can be allocated to new reqs
    val primary_ready = Output(Bool())
    // this entry is busy, but it can merge the new req
    val secondary_ready = Output(Bool())
    // this entry is busy and it can not merge the new req
    val secondary_reject = Output(Bool())
70
    val req    = Flipped(ValidIO(new MissReq))
Z
zhanglinjuan 已提交
71 72
    val refill_to_ldq = ValidIO(new Refill)
    // TODO: bypass refill data to load pipe
A
Allen 已提交
73 74 75

    // bus
    val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle))
Z
zhanglinjuan 已提交
76 77
    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
    val mem_finish = DecoupledIO(new TLBundleE(edge.bundle))
A
Allen 已提交
78

Z
zhanglinjuan 已提交
79 80 81 82 83 84 85 86 87 88
    // refill pipe
    val refill_pipe_req = DecoupledIO(new RefillPipeReq)

    // replace pipe
    val replace_pipe_req = DecoupledIO(new ReplacePipeReq)
    val replace_pipe_resp = Input(Bool())

    // main pipe: amo miss
    val main_pipe_req = DecoupledIO(new MainPipeReq)
    val main_pipe_resp = Input(Bool())
89 90

    val block_addr = ValidIO(UInt(PAddrBits.W))
Z
zhanglinjuan 已提交
91 92 93 94 95 96

    val debug_early_replace = ValidIO(new Bundle() {
      // info about the block that has been replaced
      val idx = UInt(idxBits.W) // vaddr
      val tag = UInt(tagBits.W) // paddr
    })
97
  })
A
Allen 已提交
98

W
William Wang 已提交
99 100
  assert(!RegNext(io.primary_valid && !io.primary_ready))

A
Allen 已提交
101
  val req = Reg(new MissReq)
102
  val req_valid = RegInit(false.B)
Z
zhanglinjuan 已提交
103
  val set = addr_to_dcache_set(req.vaddr)
A
Allen 已提交
104

105 106
  val s_acquire = RegInit(true.B)
  val s_grantack = RegInit(true.B)
Z
zhanglinjuan 已提交
107 108 109 110
  val s_replace_req = RegInit(true.B)
  val s_refill = RegInit(true.B)
  val s_mainpipe_req = RegInit(true.B)

111 112
  val w_grantfirst = RegInit(true.B)
  val w_grantlast = RegInit(true.B)
Z
zhanglinjuan 已提交
113 114
  val w_replace_resp = RegInit(true.B)
  val w_mainpipe_resp = RegInit(true.B)
A
Allen 已提交
115

Z
zhanglinjuan 已提交
116
  val release_entry = s_grantack && s_refill && w_mainpipe_resp
117 118 119

  val acquire_not_sent = !s_acquire && !io.mem_acquire.ready
  val data_not_refilled = !w_grantlast
A
Allen 已提交
120

Z
zhanglinjuan 已提交
121
  val should_refill_data_reg =  Reg(Bool())
122
  val should_refill_data = WireInit(should_refill_data_reg)
A
Allen 已提交
123

124
  val full_overwrite = req.isStore && req.store_mask.andR
A
Allen 已提交
125

126 127
  val (_, _, refill_done, refill_count) = edge.count(io.mem_grant)
  val grant_param = Reg(UInt(TLPermissions.bdWidth.W))
A
Allen 已提交
128

Z
zhanglinjuan 已提交
129
  val grant_beats = RegInit(0.U(beatBits.W))
130

W
William Wang 已提交
131
  when (io.req.valid && io.primary_ready && io.primary_valid) {
132 133
    req_valid := true.B
    req := io.req.bits
134
    req.addr := get_block_addr(io.req.bits.addr)
A
Allen 已提交
135

136 137
    s_acquire := false.B
    s_grantack := false.B
Z
zhanglinjuan 已提交
138

139 140
    w_grantfirst := false.B
    w_grantlast := false.B
A
Allen 已提交
141

Z
zhanglinjuan 已提交
142 143 144 145 146 147 148 149
    when (!io.req.bits.isAMO) {
      s_refill := false.B
    }

    when (!io.req.bits.hit && io.req.bits.replace_coh.isValid() && !io.req.bits.isAMO) {
      s_replace_req := false.B
      w_replace_resp := false.B
    }
150

Z
zhanglinjuan 已提交
151 152 153 154 155 156
    when (io.req.bits.isAMO) {
      s_mainpipe_req := false.B
      w_mainpipe_resp := false.B
    }

    should_refill_data_reg := io.req.bits.isLoad
157
    grant_beats := 0.U
158 159
  }.elsewhen (release_entry) {
    req_valid := false.B
A
Allen 已提交
160 161
  }

162
  when (io.req.valid && io.secondary_ready) {
Z
zhanglinjuan 已提交
163 164
    assert(io.req.bits.req_coh.state <= req.req_coh.state)
    assert(!(io.req.bits.isAMO || req.isAMO))
A
Allen 已提交
165
    // use the most uptodate meta
Z
zhanglinjuan 已提交
166
    req.req_coh := io.req.bits.req_coh
A
Allen 已提交
167

168 169
    when (io.req.bits.isStore) {
      req := io.req.bits
Z
zhanglinjuan 已提交
170 171 172 173
      req.addr := get_block_addr(io.req.bits.addr)
      req.way_en := req.way_en
      req.replace_coh := req.replace_coh
      req.replace_tag := req.replace_tag
A
Allen 已提交
174 175
    }

176
    should_refill_data := should_refill_data_reg || io.req.bits.isLoad
177
    should_refill_data_reg := should_refill_data
A
Allen 已提交
178 179
  }

180 181 182
  when (io.mem_acquire.fire()) {
    s_acquire := true.B
  }
A
Allen 已提交
183

184 185 186 187 188 189 190 191 192 193 194 195 196 197
  val refill_data = Reg(Vec(blockRows, UInt(rowBits.W)))
  val refill_data_raw = Reg(Vec(blockBytes/beatBytes, UInt(beatBits.W)))
  val new_data = Wire(Vec(blockRows, UInt(rowBits.W)))
  val new_mask = Wire(Vec(blockRows, UInt(rowBytes.W)))
  def mergePutData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
    val full_wmask = FillInterleaved(8, wmask)
    (~full_wmask & old_data | full_wmask & new_data)
  }
  for (i <- 0 until blockRows) {
    new_data(i) := req.store_data(rowBits * (i + 1) - 1, rowBits * i)
    // we only need to merge data for Store
    new_mask(i) := Mux(req.isStore, req.store_mask(rowBytes * (i + 1) - 1, rowBytes * i), 0.U)
  }
  val hasData = RegInit(true.B)
198
  val isDirty = RegInit(false.B)
199 200 201 202 203 204 205 206 207 208 209 210
  when (io.mem_grant.fire()) {
    w_grantfirst := true.B
    grant_param := io.mem_grant.bits.param
    when (edge.hasData(io.mem_grant.bits)) {
      // GrantData
      for (i <- 0 until beatRows) {
        val idx = (refill_count << log2Floor(beatRows)) + i.U
        val grant_row = io.mem_grant.bits.data(rowBits * (i + 1) - 1, rowBits * i)
        refill_data(idx) := mergePutData(grant_row, new_data(idx), new_mask(idx))
      }
      w_grantlast := w_grantlast || refill_done
      hasData := true.B
211
      grant_beats := grant_beats + 1.U
212 213
    }.otherwise {
      // Grant
Z
zhanglinjuan 已提交
214
      assert(full_overwrite)
215 216 217 218 219
      for (i <- 0 until blockRows) {
        refill_data(i) := new_data(i)
      }
      w_grantlast := true.B
      hasData := false.B
A
Allen 已提交
220 221
    }

222
    refill_data_raw(refill_count) := io.mem_grant.bits.data
223
    isDirty := io.mem_grant.bits.echo.lift(DirtyKey).getOrElse(false.B)
224
  }
A
Allen 已提交
225

226 227 228
  when (io.mem_finish.fire()) {
    s_grantack := true.B
  }
A
Allen 已提交
229

Z
zhanglinjuan 已提交
230 231 232 233 234 235 236 237 238 239
  when (io.replace_pipe_req.fire()) {
    s_replace_req := true.B
  }

  when (io.replace_pipe_resp) {
    w_replace_resp := true.B
  }

  when (io.refill_pipe_req.fire()) {
    s_refill := true.B
A
Allen 已提交
240 241
  }

Z
zhanglinjuan 已提交
242 243
  when (io.main_pipe_req.fire()) {
    s_mainpipe_req := true.B
A
Allen 已提交
244 245
  }

Z
zhanglinjuan 已提交
246 247 248
  when (io.main_pipe_resp) {
    w_mainpipe_resp := true.B
  }
249 250

  def before_read_sent_can_merge(new_req: MissReq): Bool = {
Z
zhanglinjuan 已提交
251
    acquire_not_sent && req.isLoad && (new_req.isLoad || new_req.isStore)
252
  }
A
Allen 已提交
253

254
  def before_data_refill_can_merge(new_req: MissReq): Bool = {
Z
zhanglinjuan 已提交
255
    data_not_refilled && (req.isLoad || req.isStore) && new_req.isLoad
256 257 258
  }

  def should_merge(new_req: MissReq): Bool = {
259
    val block_match = req.addr === get_block_addr(new_req.addr)
260
    val beat_match = new_req.addr(blockOffBits - 1, beatOffBits) >= grant_beats
Z
zhanglinjuan 已提交
261 262 263
    block_match &&
    (before_read_sent_can_merge(new_req) ||
      beat_match && before_data_refill_can_merge(new_req))
A
Allen 已提交
264 265
  }

266
  def should_reject(new_req: MissReq): Bool = {
267
    val block_match = req.addr === get_block_addr(new_req.addr)
Z
zhanglinjuan 已提交
268 269 270 271 272 273 274 275 276 277
    val beat_match = new_req.addr(blockOffBits - 1, beatOffBits) >= grant_beats
    val set_match = set === addr_to_dcache_set(new_req.vaddr)

    req_valid &&
      Mux(
        block_match,
        !before_read_sent_can_merge(new_req) &&
          !(beat_match && before_data_refill_can_merge(new_req)),
        set_match && new_req.way_en === req.way_en
      )
278 279 280 281 282 283 284
  }

  io.primary_ready := !req_valid
  io.secondary_ready := should_merge(io.req.bits)
  io.secondary_reject := should_reject(io.req.bits)

  // should not allocate, merge or reject at the same time
Z
zhanglinjuan 已提交
285 286
  assert(RegNext(PopCount(Seq(io.primary_ready, io.secondary_ready, io.secondary_reject)) <= 1.U))

287 288 289 290
  val refill_data_splited = WireInit(VecInit(Seq.tabulate(cfg.blockBytes * 8 / l1BusDataWidth)(i => {
    val data = refill_data.asUInt
    data((i + 1) * l1BusDataWidth - 1, i * l1BusDataWidth)
  })))
Z
zhanglinjuan 已提交
291 292 293 294 295 296
  io.refill_to_ldq.valid := RegNext(!w_grantlast && io.mem_grant.fire()) && should_refill_data
  io.refill_to_ldq.bits.addr := RegNext(req.addr + (refill_count << refillOffBits))
  io.refill_to_ldq.bits.data := refill_data_splited(RegNext(refill_count))
  io.refill_to_ldq.bits.refill_done := RegNext(refill_done && io.mem_grant.fire())
  io.refill_to_ldq.bits.hasdata := hasData
  io.refill_to_ldq.bits.data_raw := refill_data_raw.asUInt
297 298

  io.mem_acquire.valid := !s_acquire
Z
zhanglinjuan 已提交
299
  val grow_param = req.req_coh.onAccess(req.cmd)._2
300 301 302 303 304 305 306 307 308 309 310 311 312
  val acquireBlock = edge.AcquireBlock(
    fromSource = io.id,
    toAddress = req.addr,
    lgSize = (log2Up(cfg.blockBytes)).U,
    growPermissions = grow_param
  )._2
  val acquirePerm = edge.AcquirePerm(
    fromSource = io.id,
    toAddress = req.addr,
    lgSize = (log2Up(cfg.blockBytes)).U,
    growPermissions = grow_param
  )._2
  io.mem_acquire.bits := Mux(full_overwrite, acquirePerm, acquireBlock)
313 314 315 316 317 318
  // resolve cache alias by L2
  io.mem_acquire.bits.user.lift(AliasKey).foreach( _ := req.vaddr(13, 12))
  // trigger prefetch
  io.mem_acquire.bits.user.lift(PrefetchKey).foreach(_ := true.B)
  // prefer not to cache data in L2 by default
  io.mem_acquire.bits.user.lift(PreferCacheKey).foreach(_ := false.B)
Z
zhanglinjuan 已提交
319 320
  require(nSets <= 256)

321
  io.mem_grant.ready := !w_grantlast && s_acquire
Z
zhanglinjuan 已提交
322

323
  val grantack = RegEnable(edge.GrantAck(io.mem_grant.bits), io.mem_grant.fire())
Z
zhanglinjuan 已提交
324 325
  assert(RegNext(!io.mem_grant.fire() || edge.isRequest(io.mem_grant.bits)))
  io.mem_finish.valid := !s_grantack && w_grantfirst
326 327
  io.mem_finish.bits := grantack

Z
zhanglinjuan 已提交
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
  io.replace_pipe_req.valid := !s_replace_req
  val replace = io.replace_pipe_req.bits
  replace.miss_id := io.id
  replace.way_en := req.way_en
  replace.vaddr := req.vaddr
  replace.tag := req.replace_tag

  io.refill_pipe_req.valid := !s_refill && w_replace_resp && w_grantlast
  val refill = io.refill_pipe_req.bits
  refill.source := req.source
  refill.addr := req.addr
  refill.way_en := req.way_en
  refill.wmask := Mux(
    hasData || req.isLoad,
    ~0.U(DCacheBanks.W),
    VecInit((0 until DCacheBanks).map(i => get_mask_of_bank(i, req.store_mask).orR)).asUInt
  )
  refill.data := refill_data.asTypeOf((new RefillPipeReq).data)
  refill.miss_id := io.id
  refill.id := req.id
  def missCohGen(cmd: UInt, param: UInt, dirty: Bool) = {
    val c = categorize(cmd)
    MuxLookup(Cat(c, param, dirty), Nothing, Seq(
      //(effect param) -> (next)
      Cat(rd, toB, false.B)  -> Branch,
      Cat(rd, toB, true.B)   -> Branch,
      Cat(rd, toT, false.B)  -> Trunk,
      Cat(rd, toT, true.B)   -> Dirty,
      Cat(wi, toT, false.B)  -> Trunk,
      Cat(wi, toT, true.B)   -> Dirty,
      Cat(wr, toT, false.B)  -> Dirty,
      Cat(wr, toT, true.B)   -> Dirty))
  }
  refill.meta.coh := ClientMetadata(missCohGen(req.cmd, grant_param, isDirty))
  refill.alias := req.vaddr(13, 12) // TODO

  io.main_pipe_req.valid := !s_mainpipe_req && w_grantlast
  io.main_pipe_req.bits := DontCare
  io.main_pipe_req.bits.miss := true.B
  io.main_pipe_req.bits.miss_id := io.id
  io.main_pipe_req.bits.miss_param := grant_param
  io.main_pipe_req.bits.miss_dirty := isDirty
  io.main_pipe_req.bits.probe := false.B
  io.main_pipe_req.bits.source := req.source
  io.main_pipe_req.bits.cmd := req.cmd
  io.main_pipe_req.bits.vaddr := req.vaddr
  io.main_pipe_req.bits.addr := req.addr
  io.main_pipe_req.bits.store_data := refill_data.asUInt
  io.main_pipe_req.bits.store_mask := ~0.U(blockBytes.W)
  io.main_pipe_req.bits.word_idx := req.word_idx
  io.main_pipe_req.bits.amo_data := req.amo_data
  io.main_pipe_req.bits.amo_mask := req.amo_mask
  io.main_pipe_req.bits.id := req.id

  io.block_addr.valid := req_valid && w_grantlast && !s_refill
383
  io.block_addr.bits := req.addr
384

Z
zhanglinjuan 已提交
385 386 387 388
  io.debug_early_replace.valid := BoolStopWatch(io.replace_pipe_resp, io.refill_pipe_req.fire())
  io.debug_early_replace.bits.idx := addr_to_dcache_set(req.vaddr)
  io.debug_early_replace.bits.tag := req.replace_tag

W
William Wang 已提交
389
  XSPerfAccumulate("miss_req_primary", io.req.valid && io.primary_ready && io.primary_valid)
Z
zhanglinjuan 已提交
390 391 392
  XSPerfAccumulate("miss_req_merged", io.req.valid && io.secondary_ready)
  XSPerfAccumulate("load_miss_penalty_to_use",
    should_refill_data &&
W
William Wang 已提交
393
      BoolStopWatch(io.req.valid && io.primary_ready && io.primary_valid, io.refill_to_ldq.valid, true)
Z
zhanglinjuan 已提交
394 395
  )
  XSPerfAccumulate("main_pipe_penalty", BoolStopWatch(io.main_pipe_req.fire(), io.main_pipe_resp))
396
  XSPerfAccumulate("penalty_blocked_by_channel_A", io.mem_acquire.valid && !io.mem_acquire.ready)
397
  XSPerfAccumulate("penalty_waiting_for_channel_D", s_acquire && !w_grantlast && !io.mem_grant.valid)
Z
zhanglinjuan 已提交
398 399
  XSPerfAccumulate("penalty_waiting_for_channel_E", io.mem_finish.valid && !io.mem_finish.ready)
  XSPerfAccumulate("penalty_from_grant_to_refill", !s_refill && w_grantlast)
W
William Wang 已提交
400
  XSPerfAccumulate("soft_prefetch_number", io.req.valid && io.primary_ready && io.primary_valid && io.req.bits.source === SOFT_PREFETCH.U)
401

W
William Wang 已提交
402
  val (mshr_penalty_sample, mshr_penalty) = TransactionLatencyCounter(RegNext(io.req.valid && io.primary_ready && io.primary_valid), release_entry)
W
William Wang 已提交
403 404
  XSPerfHistogram("miss_penalty", mshr_penalty, mshr_penalty_sample, 0, 20, 1, true, true)
  XSPerfHistogram("miss_penalty", mshr_penalty, mshr_penalty_sample, 20, 100, 10, true, false)
405

W
William Wang 已提交
406
  val load_miss_begin = io.req.valid && io.primary_ready && io.primary_valid && io.req.bits.isLoad
407
  val refill_finished = RegNext(!w_grantlast && refill_done) && should_refill_data
408
  val (load_miss_penalty_sample, load_miss_penalty) = TransactionLatencyCounter(load_miss_begin, refill_finished) // not real refill finish time
W
William Wang 已提交
409 410
  XSPerfHistogram("load_miss_penalty_to_use", load_miss_penalty, load_miss_penalty_sample, 0, 20, 1, true, true)
  XSPerfHistogram("load_miss_penalty_to_use", load_miss_penalty, load_miss_penalty_sample, 20, 100, 10, true, false)
411 412

  val (a_to_d_penalty_sample, a_to_d_penalty) = TransactionLatencyCounter(io.mem_acquire.fire(), io.mem_grant.fire() && refill_done)
W
William Wang 已提交
413 414
  XSPerfHistogram("a_to_d_penalty", a_to_d_penalty, a_to_d_penalty_sample, 0, 20, 1, true, true)
  XSPerfHistogram("a_to_d_penalty", a_to_d_penalty, a_to_d_penalty_sample, 20, 100, 10, true, false)
A
Allen 已提交
415 416
}

Z
zhanglinjuan 已提交
417
class MissQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
A
Allen 已提交
418
  val io = IO(new Bundle {
Z
zhanglinjuan 已提交
419 420 421 422 423 424
    val req = Flipped(DecoupledIO(new MissReq))
    val refill_to_ldq = ValidIO(new Refill)

    val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle))
    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
    val mem_finish = DecoupledIO(new TLBundleE(edge.bundle))
A
Allen 已提交
425

Z
zhanglinjuan 已提交
426
    val refill_pipe_req = DecoupledIO(new RefillPipeReq)
A
Allen 已提交
427

Z
zhanglinjuan 已提交
428 429 430 431 432
    val replace_pipe_req = DecoupledIO(new ReplacePipeReq)
    val replace_pipe_resp = Flipped(Vec(numReplaceRespPorts, ValidIO(new ReplacePipeResp)))

    val main_pipe_req = DecoupledIO(new MainPipeReq)
    val main_pipe_resp = Flipped(ValidIO(new AtomicsResp))
433 434

    // block probe
Z
zhanglinjuan 已提交
435
    val probe_addr = Input(UInt(PAddrBits.W))
436
    val probe_block = Output(Bool())
437 438

    val full = Output(Bool())
A
Allen 已提交
439

Z
zhanglinjuan 已提交
440 441 442 443 444 445 446 447 448 449
    // only for performance counter
    // This is valid when an mshr has finished replacing a block (w_replace_resp),
    // but hasn't received Grant from L2 (!w_grantlast)
    val debug_early_replace = Vec(cfg.nMissEntries, ValidIO(new Bundle() {
      // info about the block that has been replaced
      val idx = UInt(idxBits.W) // vaddr
      val tag = UInt(tagBits.W) // paddr
    }))
  })
  
450 451
  // 128KBL1: FIXME: provide vaddr for l2

Z
zhanglinjuan 已提交
452
  val entries = Seq.fill(cfg.nMissEntries)(Module(new MissEntry(edge)))
A
Allen 已提交
453

Z
zhanglinjuan 已提交
454 455 456 457
  val primary_ready_vec = entries.map(_.io.primary_ready)
  val secondary_ready_vec = entries.map(_.io.secondary_ready)
  val secondary_reject_vec = entries.map(_.io.secondary_reject)
  val probe_block_vec = entries.map { case e => e.io.block_addr.valid && e.io.block_addr.bits === io.probe_addr }
A
Allen 已提交
458

Z
zhanglinjuan 已提交
459
  val merge = Cat(secondary_ready_vec).orR
W
William Wang 已提交
460
  // val merge_idx = PriorityEncoder(secondary_ready_vec)
A
Allen 已提交
461

Z
zhanglinjuan 已提交
462
  val reject = Cat(secondary_reject_vec).orR
A
Allen 已提交
463

Z
zhanglinjuan 已提交
464
  val alloc = !reject && !merge && Cat(primary_ready_vec).orR
W
William Wang 已提交
465
  // val alloc_idx = PriorityEncoder(primary_ready_vec)
A
Allen 已提交
466

Z
zhanglinjuan 已提交
467
  val accept = alloc || merge
W
William Wang 已提交
468
  // val entry_idx = Mux(alloc, alloc_idx, merge_idx)
A
Allen 已提交
469

Z
zhanglinjuan 已提交
470 471 472 473 474 475 476
  assert(RegNext(PopCount(secondary_ready_vec) <= 1.U))
//  assert(RegNext(PopCount(secondary_reject_vec) <= 1.U))
  // It is possible that one mshr wants to merge a req, while another mshr wants to reject it.
  // That is, a coming req has the same paddr as that of mshr_0 (merge),
  // while it has the same set and the same way as mshr_1 (reject).
  // In this situation, the coming req should be merged by mshr_0
//  assert(RegNext(PopCount(Seq(merge, reject)) <= 1.U))
A
Allen 已提交
477

W
William Wang 已提交
478 479 480 481 482 483 484 485 486 487 488 489
  def arbiter[T <: Bundle](
    in: Seq[DecoupledIO[T]],
    out: DecoupledIO[T],
    name: Option[String] = None): Unit = {
    val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size))
    if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") }
    for ((a, req) <- arb.io.in.zip(in)) {
      a <> req
    }
    out <> arb.io.out
  }

Z
zhanglinjuan 已提交
490 491 492 493 494 495 496 497
  def rrArbiter[T <: Bundle](
    in: Seq[DecoupledIO[T]],
    out: DecoupledIO[T],
    name: Option[String] = None): Unit = {
    val arb = Module(new RRArbiter[T](chiselTypeOf(out.bits), in.size))
    if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") }
    for ((a, req) <- arb.io.in.zip(in)) {
      a <> req
A
Allen 已提交
498
    }
Z
zhanglinjuan 已提交
499 500
    out <> arb.io.out
  }
A
Allen 已提交
501

W
William Wang 已提交
502 503 504 505 506 507 508 509 510 511 512 513
  def select_valid_one[T <: Bundle](
    in: Seq[DecoupledIO[T]],
    out: DecoupledIO[T],
    name: Option[String] = None): Unit = {

    if (name.nonEmpty) { out.suggestName(s"${name.get}_select") }
    out.valid := Cat(in.map(_.valid)).orR
    out.bits := ParallelMux(in.map(_.valid) zip in.map(_.bits))
    in.map(_.ready := out.ready) 
    assert(!RegNext(out.valid && PopCount(Cat(in.map(_.valid))) > 1.U))
  }

Z
zhanglinjuan 已提交
514
  io.mem_grant.ready := false.B
A
Allen 已提交
515

Z
zhanglinjuan 已提交
516 517
  entries.zipWithIndex.foreach {
    case (e, i) =>
W
William Wang 已提交
518 519 520 521 522
      val former_primary_ready = if(i == 0)
        false.B 
      else
        Cat((0 until i).map(j => entries(j).io.primary_ready)).orR
      
Z
zhanglinjuan 已提交
523
      e.io.id := i.U
W
William Wang 已提交
524 525 526 527 528 529
      e.io.req.valid := io.req.valid
      e.io.primary_valid := io.req.valid && 
        !merge && 
        !reject && 
        !former_primary_ready &&
        e.io.primary_ready
Z
zhanglinjuan 已提交
530
      e.io.req.bits := io.req.bits
A
Allen 已提交
531

Z
zhanglinjuan 已提交
532 533 534 535 536
      e.io.mem_grant.valid := false.B
      e.io.mem_grant.bits := DontCare
      when (io.mem_grant.bits.source === i.U) {
        e.io.mem_grant <> io.mem_grant
      }
537

Z
zhanglinjuan 已提交
538 539
      e.io.replace_pipe_resp := Cat(io.replace_pipe_resp.map { case r => r.valid && r.bits.miss_id === i.U }).orR
      e.io.main_pipe_resp := io.main_pipe_resp.valid && io.main_pipe_resp.bits.ack_miss_queue && io.main_pipe_resp.bits.miss_id === i.U
A
Allen 已提交
540

Z
zhanglinjuan 已提交
541
      io.debug_early_replace(i) := e.io.debug_early_replace
542 543
  }

Z
zhanglinjuan 已提交
544 545 546
  io.req.ready := accept
  io.refill_to_ldq.valid := Cat(entries.map(_.io.refill_to_ldq.valid)).orR
  io.refill_to_ldq.bits := ParallelMux(entries.map(_.io.refill_to_ldq.valid) zip entries.map(_.io.refill_to_ldq.bits))
A
Allen 已提交
547

548
  TLArbiter.lowest(edge, io.mem_acquire, entries.map(_.io.mem_acquire):_*)
Z
zhanglinjuan 已提交
549
  TLArbiter.lowest(edge, io.mem_finish, entries.map(_.io.mem_finish):_*)
A
Allen 已提交
550

W
William Wang 已提交
551 552 553
  arbiter(entries.map(_.io.refill_pipe_req), io.refill_pipe_req, Some("refill_pipe_req"))
  arbiter(entries.map(_.io.replace_pipe_req), io.replace_pipe_req, Some("replace_pipe_req"))
  arbiter(entries.map(_.io.main_pipe_req), io.main_pipe_req, Some("main_pipe_req"))
A
Allen 已提交
554

Z
zhanglinjuan 已提交
555
  io.probe_block := Cat(probe_block_vec).orR
A
Allen 已提交
556

Z
zhanglinjuan 已提交
557
  io.full := ~Cat(entries.map(_.io.primary_ready)).andR
A
Allen 已提交
558

559
  if (env.EnableDifftest) {
Z
zhanglinjuan 已提交
560 561 562 563 564 565
    val difftest = Module(new DifftestRefillEvent)
    difftest.io.clock := clock
    difftest.io.coreid := hardId.U
    difftest.io.valid := io.refill_to_ldq.valid && io.refill_to_ldq.bits.hasdata && io.refill_to_ldq.bits.refill_done
    difftest.io.addr := io.refill_to_ldq.bits.addr
    difftest.io.data := io.refill_to_ldq.bits.data_raw.asTypeOf(difftest.io.data)
566 567
  }

568
  XSPerfAccumulate("miss_req", io.req.fire())
Z
zhanglinjuan 已提交
569 570
  XSPerfAccumulate("miss_req_allocate", io.req.fire() && alloc)
  XSPerfAccumulate("miss_req_merge_load", io.req.fire() && merge && io.req.bits.isLoad)
571
  XSPerfAccumulate("miss_req_reject_load", io.req.valid && reject && io.req.bits.isLoad)
572
  XSPerfAccumulate("probe_blocked_by_miss", io.probe_block)
573
  val max_inflight = RegInit(0.U((log2Up(cfg.nMissEntries) + 1).W))
Z
zhanglinjuan 已提交
574
  val num_valids = PopCount(~Cat(primary_ready_vec).asUInt)
575 576 577 578
  when (num_valids > max_inflight) {
    max_inflight := num_valids
  }
  // max inflight (average) = max_inflight_total / cycle cnt
579
  XSPerfAccumulate("max_inflight", max_inflight)
580
  QueuePerf(cfg.nMissEntries, num_valids, num_valids === cfg.nMissEntries.U)
581
  io.full := num_valids === cfg.nMissEntries.U
582
  XSPerfHistogram("num_valids", num_valids, true.B, 0, cfg.nMissEntries, 1)
583 584 585 586 587 588 589 590 591 592 593 594 595 596
  val perfinfo = IO(new Bundle(){
    val perfEvents = Output(new PerfEventsBundle(5))
  })
  val perfEvents = Seq(
    ("dcache_missq_req          ", io.req.fire()                                                                                                                                                                       ),
    ("dcache_missq_1/4_valid    ", (PopCount(entries.map(entry => (!entry.io.primary_ready))) < (cfg.nMissEntries.U/4.U))                                                                                              ),
    ("dcache_missq_2/4_valid    ", (PopCount(entries.map(entry => (!entry.io.primary_ready))) > (cfg.nMissEntries.U/4.U)) & (PopCount(entries.map(entry => (!entry.io.primary_ready))) <= (cfg.nMissEntries.U/2.U))    ),
    ("dcache_missq_3/4_valid    ", (PopCount(entries.map(entry => (!entry.io.primary_ready))) > (cfg.nMissEntries.U/2.U)) & (PopCount(entries.map(entry => (!entry.io.primary_ready))) <= (cfg.nMissEntries.U*3.U/4.U))),
    ("dcache_missq_4/4_valid    ", (PopCount(entries.map(entry => (!entry.io.primary_ready))) > (cfg.nMissEntries.U*3.U/4.U))                                                                                          ),
  )

  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
    perf_out.incr_step := RegNext(perf)
  }
A
Allen 已提交
597
}