From d34e06b278e4b4babbadeb3dc5b291285c499286 Mon Sep 17 00:00:00 2001 From: zhanglinjuan <37037507+ZLJ987@users.noreply.github.com> Date: Wed, 10 Mar 2021 22:55:09 +0800 Subject: [PATCH] DCache: add performance counters (#677) * DCache: fix bug in failing to update access info of plru replacement * DCache: add performance counters --- src/main/scala/xiangshan/cache/LoadPipe.scala | 12 +++++++- src/main/scala/xiangshan/cache/MainPipe.scala | 28 +++++++++++++++++-- .../scala/xiangshan/cache/MissQueue.scala | 12 +++++++- src/main/scala/xiangshan/cache/Probe.scala | 8 +++++- .../xiangshan/cache/StoreReplayUnit.scala | 13 ++++++++- .../xiangshan/cache/WritebackQueue.scala | 12 +++++++- 6 files changed, 77 insertions(+), 8 deletions(-) diff --git a/src/main/scala/xiangshan/cache/LoadPipe.scala b/src/main/scala/xiangshan/cache/LoadPipe.scala index feaa580c3..baa8281e5 100644 --- a/src/main/scala/xiangshan/cache/LoadPipe.scala +++ b/src/main/scala/xiangshan/cache/LoadPipe.scala @@ -4,7 +4,7 @@ import chisel3._ import chisel3.util._ import freechips.rocketchip.tilelink.ClientMetadata -import utils.XSDebug +import utils.{XSDebug, XSPerf} class LoadPipe extends DCacheModule { def metaBits = (new L1Metadata).getWidth @@ -214,4 +214,14 @@ class LoadPipe extends DCacheModule { XSDebug(s"$pipeline_stage_name $signal_name\n") } } + + // performance counters + XSPerf("load_req", io.lsu.req.fire()) + XSPerf("load_s1_kill", s1_fire && io.lsu.s1_kill) + XSPerf("load_hit_way", s1_fire && s1_tag_match) + XSPerf("load_replay", io.lsu.resp.fire() && resp.bits.replay) + XSPerf("load_replay_for_data_nack", io.lsu.resp.fire() && resp.bits.replay && s2_nack_data) + XSPerf("load_replay_for_no_mshr", io.lsu.resp.fire() && resp.bits.replay && s2_nack_no_mshr) + XSPerf("load_hit", io.lsu.resp.fire() && !resp.bits.miss) + XSPerf("load_miss", io.lsu.resp.fire() && resp.bits.miss) } diff --git a/src/main/scala/xiangshan/cache/MainPipe.scala b/src/main/scala/xiangshan/cache/MainPipe.scala index ef7b641e7..940328e53 100644 --- a/src/main/scala/xiangshan/cache/MainPipe.scala +++ b/src/main/scala/xiangshan/cache/MainPipe.scala @@ -2,11 +2,9 @@ package xiangshan.cache import chisel3._ import chisel3.util._ - +import utils._ import freechips.rocketchip.tilelink.{ClientMetadata, ClientStates, TLPermissions} -import utils.{XSDebug, OneHot, ReplacementPolicy} - class MainPipeReq extends DCacheBundle { // for request that comes from MissQueue @@ -672,4 +670,28 @@ class MainPipe extends DCacheModule { } } + // performance counters + // penalty for each req in pipeline in average = pipe_total_penalty / pipe_req + XSPerf("pipe_req", s0_fire) + XSPerf("pipe_total_penalty", PopCount(VecInit(Seq(s0_fire, s1_valid, s2_valid, s3_valid)))) + + XSPerf("pipe_blocked_by_wbu", s3_valid && need_writeback && !io.wb_req.ready) + XSPerf("pipe_blocked_by_nack_data", s1_valid && s1_need_data && !io.data_read.ready) + XSPerf("pipe_reject_req_for_nack_meta", s0_valid && !meta_ready) + XSPerf("pipe_reject_req_for_set_conflict", s0_valid && set_conflict) + + for (i <- 0 until LoadPipelineWidth) { + for (w <- 0 until nWays) { + XSPerf("load_pipe_" + Integer.toString(i,10) + "_access_way_" + Integer.toString(w, 10), + io.replace_access(i).valid && io.replace_access(i).bits.way === w.U) + } + } + + for (w <- 0 until nWays) { + XSPerf("main_pipe_access_way_" + Integer.toString(w, 10), + access_bundle.valid && access_bundle.bits.way === w.U) + XSPerf("main_pipe_choose_way_" + Integer.toString(w, 10), + RegNext(s0_fire) && s1_repl_way_en === UIntToOH(w.U)) + } + } diff --git a/src/main/scala/xiangshan/cache/MissQueue.scala b/src/main/scala/xiangshan/cache/MissQueue.scala index a82b78480..05a291ca5 100644 --- a/src/main/scala/xiangshan/cache/MissQueue.scala +++ b/src/main/scala/xiangshan/cache/MissQueue.scala @@ -342,6 +342,15 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule when (state === s_release_entry) { state := s_invalid } + + XSPerf("miss_req", io.req_valid && io.primary_ready) + XSPerf("miss_penalty", BoolStopWatch(io.req_valid && io.primary_ready, state === s_release_entry)) + XSPerf("load_miss_penalty_to_use", should_refill_data && BoolStopWatch(io.req_valid && io.primary_ready, io.refill.valid, true)) + XSPerf("pipeline_penalty", BoolStopWatch(io.pipe_req.fire(), io.pipe_resp.fire())) + XSPerf("penalty_blocked_by_channel_A", io.mem_acquire.valid && !io.mem_acquire.ready) + XSPerf("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_refill_resp) + XSPerf("penalty_blocked_by_channel_E", io.mem_finish.valid && !io.mem_finish.ready) + XSPerf("penalty_blocked_by_pipeline", io.pipe_req.valid && !io.pipe_req.ready) } @@ -513,5 +522,6 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump XSDebug(p"block probe req ${Hexadecimal(io.probe_req)}\n") } - XSPerf("dcache_miss", io.req.fire()) + XSPerf("miss_req", io.req.fire()) + XSPerf("probe_blocked_by_miss", io.probe_block) } diff --git a/src/main/scala/xiangshan/cache/Probe.scala b/src/main/scala/xiangshan/cache/Probe.scala index ff8890290..35aa36830 100644 --- a/src/main/scala/xiangshan/cache/Probe.scala +++ b/src/main/scala/xiangshan/cache/Probe.scala @@ -6,7 +6,7 @@ import chisel3.util._ import utils.XSDebug import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleB, TLMessages, TLPermissions} -import utils.{HasTLDump, XSDebug} +import utils.{HasTLDump, XSDebug, XSPerf} class ProbeReq extends DCacheBundle { @@ -76,6 +76,12 @@ class ProbeEntry extends DCacheModule { state := s_invalid } } + + // perfoemance counters + XSPerf("probe_req", state === s_invalid && io.req.fire()) + XSPerf("probe_penalty", state =/= s_invalid) + XSPerf("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr) + XSPerf("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready) } class ProbeQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump diff --git a/src/main/scala/xiangshan/cache/StoreReplayUnit.scala b/src/main/scala/xiangshan/cache/StoreReplayUnit.scala index ddde1f9f1..315b27c61 100644 --- a/src/main/scala/xiangshan/cache/StoreReplayUnit.scala +++ b/src/main/scala/xiangshan/cache/StoreReplayUnit.scala @@ -3,7 +3,7 @@ package xiangshan.cache import chisel3._ import chisel3.util._ -import utils.XSDebug +import utils.{XSDebug, XSPerf} import bus.tilelink._ class StoreReplayEntry extends DCacheModule @@ -117,6 +117,14 @@ class StoreReplayEntry extends DCacheModule when (io.lsu.resp.fire()) { XSDebug(s"StoreReplayEntryTransaction resp %d\n", io.id) } + + // performance counters + XSPerf("store_req", io.lsu.req.fire()) + XSPerf("store_penalty", state =/= s_invalid) + // this is useless + // XSPerf("store_hit", state === s_pipe_resp && io.pipe_resp.fire() && !io.pipe_resp.bits.miss) + XSPerf("store_replay", state === s_pipe_resp && io.pipe_resp.fire() && io.pipe_resp.bits.miss && io.pipe_resp.bits.replay) + XSPerf("store_miss", state === s_pipe_resp && io.pipe_resp.fire() && io.pipe_resp.bits.miss) } @@ -190,4 +198,7 @@ class StoreReplayQueue extends DCacheModule when (io.pipe_resp.fire()) { io.pipe_resp.bits.dump() } + + // performance counters + XSPerf("store_req", io.lsu.req.fire()) } diff --git a/src/main/scala/xiangshan/cache/WritebackQueue.scala b/src/main/scala/xiangshan/cache/WritebackQueue.scala index 90eab42b9..72fc045a6 100644 --- a/src/main/scala/xiangshan/cache/WritebackQueue.scala +++ b/src/main/scala/xiangshan/cache/WritebackQueue.scala @@ -2,7 +2,7 @@ package xiangshan.cache import chisel3._ import chisel3.util._ -import utils.{XSDebug, HasTLDump} +import utils.{XSDebug, HasTLDump, XSPerf} import freechips.rocketchip.tilelink.{TLBundleC, TLBundleD, TLEdgeOut, TLPermissions, TLArbiter} class WritebackReq extends DCacheBundle { @@ -129,6 +129,13 @@ class WritebackEntry(edge: TLEdgeOut) extends DCacheModule with HasTLDump state := s_invalid } } + + // performance counters + XSPerf("wb_req", io.req.fire()) + XSPerf("wb_release", state === s_release_req && release_done && req.voluntary) + XSPerf("wb_probe_resp", state === s_release_req && release_done && !req.voluntary) + XSPerf("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready) + XSPerf("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp) } class WritebackQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump @@ -203,4 +210,7 @@ class WritebackQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump when (io.block_miss_req) { XSDebug("block_miss_req\n") } + + // performance counters + XSPerf("wb_req", io.req.fire()) } -- GitLab