未验证 提交 d34e06b2 编写于 作者: Z zhanglinjuan 提交者: GitHub

DCache: add performance counters (#677)

* DCache: fix bug in failing to update access info of plru replacement

* DCache: add performance counters
上级 c634f99b
......@@ -4,7 +4,7 @@ import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink.ClientMetadata
import utils.XSDebug
import utils.{XSDebug, XSPerf}
class LoadPipe extends DCacheModule {
def metaBits = (new L1Metadata).getWidth
......@@ -214,4 +214,14 @@ class LoadPipe extends DCacheModule {
XSDebug(s"$pipeline_stage_name $signal_name\n")
}
}
// performance counters
XSPerf("load_req", io.lsu.req.fire())
XSPerf("load_s1_kill", s1_fire && io.lsu.s1_kill)
XSPerf("load_hit_way", s1_fire && s1_tag_match)
XSPerf("load_replay", io.lsu.resp.fire() && resp.bits.replay)
XSPerf("load_replay_for_data_nack", io.lsu.resp.fire() && resp.bits.replay && s2_nack_data)
XSPerf("load_replay_for_no_mshr", io.lsu.resp.fire() && resp.bits.replay && s2_nack_no_mshr)
XSPerf("load_hit", io.lsu.resp.fire() && !resp.bits.miss)
XSPerf("load_miss", io.lsu.resp.fire() && resp.bits.miss)
}
......@@ -2,11 +2,9 @@ package xiangshan.cache
import chisel3._
import chisel3.util._
import utils._
import freechips.rocketchip.tilelink.{ClientMetadata, ClientStates, TLPermissions}
import utils.{XSDebug, OneHot, ReplacementPolicy}
class MainPipeReq extends DCacheBundle
{
// for request that comes from MissQueue
......@@ -672,4 +670,28 @@ class MainPipe extends DCacheModule {
}
}
// performance counters
// penalty for each req in pipeline in average = pipe_total_penalty / pipe_req
XSPerf("pipe_req", s0_fire)
XSPerf("pipe_total_penalty", PopCount(VecInit(Seq(s0_fire, s1_valid, s2_valid, s3_valid))))
XSPerf("pipe_blocked_by_wbu", s3_valid && need_writeback && !io.wb_req.ready)
XSPerf("pipe_blocked_by_nack_data", s1_valid && s1_need_data && !io.data_read.ready)
XSPerf("pipe_reject_req_for_nack_meta", s0_valid && !meta_ready)
XSPerf("pipe_reject_req_for_set_conflict", s0_valid && set_conflict)
for (i <- 0 until LoadPipelineWidth) {
for (w <- 0 until nWays) {
XSPerf("load_pipe_" + Integer.toString(i,10) + "_access_way_" + Integer.toString(w, 10),
io.replace_access(i).valid && io.replace_access(i).bits.way === w.U)
}
}
for (w <- 0 until nWays) {
XSPerf("main_pipe_access_way_" + Integer.toString(w, 10),
access_bundle.valid && access_bundle.bits.way === w.U)
XSPerf("main_pipe_choose_way_" + Integer.toString(w, 10),
RegNext(s0_fire) && s1_repl_way_en === UIntToOH(w.U))
}
}
......@@ -342,6 +342,15 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
when (state === s_release_entry) {
state := s_invalid
}
XSPerf("miss_req", io.req_valid && io.primary_ready)
XSPerf("miss_penalty", BoolStopWatch(io.req_valid && io.primary_ready, state === s_release_entry))
XSPerf("load_miss_penalty_to_use", should_refill_data && BoolStopWatch(io.req_valid && io.primary_ready, io.refill.valid, true))
XSPerf("pipeline_penalty", BoolStopWatch(io.pipe_req.fire(), io.pipe_resp.fire()))
XSPerf("penalty_blocked_by_channel_A", io.mem_acquire.valid && !io.mem_acquire.ready)
XSPerf("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_refill_resp)
XSPerf("penalty_blocked_by_channel_E", io.mem_finish.valid && !io.mem_finish.ready)
XSPerf("penalty_blocked_by_pipeline", io.pipe_req.valid && !io.pipe_req.ready)
}
......@@ -513,5 +522,6 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
XSDebug(p"block probe req ${Hexadecimal(io.probe_req)}\n")
}
XSPerf("dcache_miss", io.req.fire())
XSPerf("miss_req", io.req.fire())
XSPerf("probe_blocked_by_miss", io.probe_block)
}
......@@ -6,7 +6,7 @@ import chisel3.util._
import utils.XSDebug
import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleB, TLMessages, TLPermissions}
import utils.{HasTLDump, XSDebug}
import utils.{HasTLDump, XSDebug, XSPerf}
class ProbeReq extends DCacheBundle
{
......@@ -76,6 +76,12 @@ class ProbeEntry extends DCacheModule {
state := s_invalid
}
}
// perfoemance counters
XSPerf("probe_req", state === s_invalid && io.req.fire())
XSPerf("probe_penalty", state =/= s_invalid)
XSPerf("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr)
XSPerf("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready)
}
class ProbeQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
......
......@@ -3,7 +3,7 @@ package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import utils.{XSDebug, XSPerf}
import bus.tilelink._
class StoreReplayEntry extends DCacheModule
......@@ -117,6 +117,14 @@ class StoreReplayEntry extends DCacheModule
when (io.lsu.resp.fire()) {
XSDebug(s"StoreReplayEntryTransaction resp %d\n", io.id)
}
// performance counters
XSPerf("store_req", io.lsu.req.fire())
XSPerf("store_penalty", state =/= s_invalid)
// this is useless
// XSPerf("store_hit", state === s_pipe_resp && io.pipe_resp.fire() && !io.pipe_resp.bits.miss)
XSPerf("store_replay", state === s_pipe_resp && io.pipe_resp.fire() && io.pipe_resp.bits.miss && io.pipe_resp.bits.replay)
XSPerf("store_miss", state === s_pipe_resp && io.pipe_resp.fire() && io.pipe_resp.bits.miss)
}
......@@ -190,4 +198,7 @@ class StoreReplayQueue extends DCacheModule
when (io.pipe_resp.fire()) {
io.pipe_resp.bits.dump()
}
// performance counters
XSPerf("store_req", io.lsu.req.fire())
}
......@@ -2,7 +2,7 @@ package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.{XSDebug, HasTLDump}
import utils.{XSDebug, HasTLDump, XSPerf}
import freechips.rocketchip.tilelink.{TLBundleC, TLBundleD, TLEdgeOut, TLPermissions, TLArbiter}
class WritebackReq extends DCacheBundle {
......@@ -129,6 +129,13 @@ class WritebackEntry(edge: TLEdgeOut) extends DCacheModule with HasTLDump
state := s_invalid
}
}
// performance counters
XSPerf("wb_req", io.req.fire())
XSPerf("wb_release", state === s_release_req && release_done && req.voluntary)
XSPerf("wb_probe_resp", state === s_release_req && release_done && !req.voluntary)
XSPerf("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
XSPerf("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp)
}
class WritebackQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
......@@ -203,4 +210,7 @@ class WritebackQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
when (io.block_miss_req) {
XSDebug("block_miss_req\n")
}
// performance counters
XSPerf("wb_req", io.req.fire())
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册