Let DCache pipeline pass meta down.

Added some debug logs.

Let DCache pipeline pass meta down.
Added some debug logs.
846c5699 · Allen · 123bb4bc · 846c5699 · 846c5699 · 846c5699
3 changed file
--- a/src/main/scala/xiangshan/mem/cache/dcache.scala
+++ b/src/main/scala/xiangshan/mem/cache/dcache.scala
@@ -8,6 +8,8 @@ import xiangshan.mem.{DCacheReq, DCacheResp, LSUDMemIO}
 import xiangshan.utils.XSDebug
 import bus.tilelink._
 import _root_.utils.{Code, RandomReplacement, Transpose}
+import xiangshan.mem.MemoryOpConstants
+

 // DCache specific parameters
 // L1 DCache is 64set, 8way-associative, with 64byte block, a total of 32KB
@@ -430,6 +432,17 @@ class DCache extends DCacheModule
      }
  }

+  def dump_pipeline_valids(pipeline_stage_name: String, signal_name: String, valid: Vec[Bool]) = {
+    val anyValid = valid.reduce(_||_)
+    when (anyValid) {
+      (0 until memWidth) map { w =>
+        when (valid(w)) {
+          XSDebug(s"$pipeline_stage_name channel %d: $signal_name\n", w.U)
+        }
+      }
+    }
+  }
+
  // stage 0
  val s0_valid = Mux(io.lsu.req.fire(), VecInit(io.lsu.req.bits.map(_.valid)),
    Mux(mshrs.io.replay.fire(), VecInit(1.U(memWidth.W).asBools),
@@ -437,6 +450,9 @@ class DCache extends DCacheModule
  val s0_req = Mux(io.lsu.req.fire(), VecInit(io.lsu.req.bits.map(_.bits)),
    replay_req)
  val s0_type = Mux(io.lsu.req.fire(), t_lsu, t_replay)
+  val s0_meta = Mux(io.lsu.req.fire(), VecInit(io.lsu.req.bits.map(_.bits.meta)),
+    Mux(mshrs.io.replay.fire(), VecInit(mshrs.io.replay.bits.meta, 0.U(MemoryOpConstants.META_SZ.W)),
+      VecInit(0.U(MemoryOpConstants.META_SZ.W), 0.U(MemoryOpConstants.META_SZ.W))))

  dump_pipeline_reqs("DCache s0", s0_valid, s0_req, s0_type)

@@ -451,9 +467,8 @@ class DCache extends DCacheModule
  // we send a nack
  // all pipeline requests requires response or nack
  // only mshr replayed loads needs to send resp
-  val s0_send_resp_or_nack = Mux(io.lsu.req.fire(), s0_valid,
-    VecInit(Mux(mshrs.io.replay.fire() && isRead(mshrs.io.replay.bits.cmd), 1.U(memWidth.W), 0.U(memWidth.W)).asBools))
-
+  // all requests should send response back
+  val s0_send_resp_or_nack = s0_valid

  // stage 1
  val s1_req = RegNext(s0_req)
@@ -463,6 +478,7 @@ class DCache extends DCacheModule
  val s1_nack = VecInit(0.U(memWidth.W).asBools)
  val s1_send_resp_or_nack = RegNext(s0_send_resp_or_nack)
  val s1_type = RegNext(s0_type)
+  val s1_meta = RegNext(s0_meta)
  // For replays, the metadata isn't written yet
  val s1_replay_way_en = RegNext(mshrs.io.replay.bits.way_en)

@@ -479,6 +495,7 @@ class DCache extends DCacheModule
  // stage 2
  val s2_req   = RegNext(s1_req)
  val s2_type  = RegNext(s1_type)
+  val s2_meta  = RegNext(s1_meta)
  val s2_valid = widthMap(w =>
                  RegNext(s1_valid(w), init = false.B))

@@ -539,6 +556,14 @@ class DCache extends DCacheModule
  for (w <- 0 until memWidth)
    assert(!(s2_send_resp(w) && s2_send_nack(w)))

+  dump_pipeline_valids("DCache s2", "s2_hit", s2_hit)
+  dump_pipeline_valids("DCache s2", "s2_nack", s2_nack)
+  dump_pipeline_valids("DCache s2", "s2_nack_hit", s2_nack_hit)
+  dump_pipeline_valids("DCache s2", "s2_nack_set_busy", s2_nack_set_busy)
+  dump_pipeline_valids("DCache s2", "s2_nack_no_mshr", s2_nack_no_mshr)
+  dump_pipeline_valids("DCache s2", "s2_send_resp", s2_send_resp)
+  dump_pipeline_valids("DCache s2", "s2_send_nack", s2_send_nack)
+
  // hits always send a response
  // If MSHR is not available, LSU has to replay this request later
  // If MSHR is available and this is only a store(not a amo), we don't need to wait for resp later
@@ -613,7 +638,7 @@ class DCache extends DCacheModule
  for (w <- 0 until memWidth) {
    cache_resp(w).valid         := s2_valid(w) && (s2_send_resp(w) || s2_send_nack(w))
    cache_resp(w).bits.data     := s2_data_word(w)
-    cache_resp(w).bits.meta     := 0.U
+    cache_resp(w).bits.meta     := s2_meta(w)
    cache_resp(w).bits.nack     := s2_send_nack(w)
  }

@@ -622,6 +647,12 @@ class DCache extends DCacheModule
  // 返回结果
  for (w <- 0 until memWidth) {
    io.lsu.resp(w) <> resp(w)
+
+    val channel_resp = io.lsu.resp(w).bits
+    when (io.lsu.resp(w).valid) {
+      XSDebug(s"DCache resp channel $w: data: %x meta: %d nack: %b\n",
+        channel_resp.data, channel_resp.meta, channel_resp.nack)
+    }
  }

  // Store/amo hits

--- a/src/main/scala/xiangshan/mem/cache/mshrs.scala
+++ b/src/main/scala/xiangshan/mem/cache/mshrs.scala
@@ -95,6 +95,8 @@ class MSHR extends DCacheModule
  io.tag.bits := req_tag
  io.way.bits := req.way_en

+  XSDebug("mshr: %d state: %d idx_valid: %b\n", io.id, state, io.idx.valid)
+
  // assign default values to output signals
  io.req_pri_rdy         := false.B

@@ -335,6 +337,15 @@ class MSHRFile extends DCacheModule
      idx_matches(w)(i) := mshr.io.idx.valid && mshr.io.idx.bits === io.req(w).bits.addr(untagBits-1,blockOffBits)
      tag_matches(w)(i) := mshr.io.tag.valid && mshr.io.tag.bits === io.req(w).bits.addr >> untagBits
      way_matches(w)(i) := mshr.io.way.valid && mshr.io.way.bits === io.req(w).bits.way_en
+      when (idx_matches(w)(i)) {
+        XSDebug(s"mshr: $i channel: $w idx_match\n")
+      }
+      when (tag_matches(w)(i)) {
+        XSDebug(s"mshr: $i channel: $w tag_match\n")
+      }
+      when (way_matches(w)(i)) {
+        XSDebug(s"mshr: $i channel: $w way_match\n")
+      }
    }
    wb_tag_list(i) := mshr.io.wb_req.bits.tag

@@ -398,7 +409,7 @@ class MSHRFile extends DCacheModule

  // block hit
  (0 until memWidth) map { w =>
-    XSDebug(io.req(w).valid && io.block_hit(w), "channel %d req block hit\n", w.U)
+    XSDebug(io.block_hit(w), "channel %d req block hit\n", w.U)
  }

  // print refill

--- a/src/test/scala/xiangshan/backend/exu/DCacheTest.scala
+++ b/src/test/scala/xiangshan/backend/exu/DCacheTest.scala
@@ -154,10 +154,15 @@ class DCacheTest extends FlatSpec with ChiselScalatestTester with Matchers {
        for (i <- 0 to 1) {
          val resp = c.io.in.resp(i)
          if (resp.valid.peek().litToBoolean) {
-            println(s"clock: $global_clock channel: $i resp: $resp")
-            val original_req = all_requests(resp.bits.meta.peek().litValue.longValue)
+
+            val data = resp.bits.data.peek().litValue.longValue
+            val meta = resp.bits.meta.peek().litValue.longValue
+            val nack = resp.bits.nack.peek().litToBoolean
+            println(f"clock: $global_clock%d channel: $i%d nack: $nack%b data: $data%x meta: $meta%x")
+
+            val original_req = all_requests(meta)
            // needs to be replayed
-            if (resp.bits.nack.peek().litToBoolean) {
+            if (nack) {
              issue_queue.enqueue(Array[Req](original_req))
            } else {
              num_retired_reqs += 1