diff --git a/src/main/scala/xiangshan/cache/DCacheWrapper.scala b/src/main/scala/xiangshan/cache/DCacheWrapper.scala index 621bbf938d8bf69a39bdb5df5bc42a5a74734a5d..4ffbd7f058b1b9b5127d3c56ad6ede94372398b3 100644 --- a/src/main/scala/xiangshan/cache/DCacheWrapper.scala +++ b/src/main/scala/xiangshan/cache/DCacheWrapper.scala @@ -322,6 +322,8 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame assert (!bus.d.fire()) } + //---------------------------------------- + // assertions // dcache should only deal with DRAM addresses when (bus.a.fire()) { assert(bus.a.bits.address >= 0x80000000L.U) @@ -333,9 +335,16 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame assert(bus.c.bits.address >= 0x80000000L.U) } + //---------------------------------------- + // utility functions def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = { sink.valid := source.valid && !block_signal source.ready := sink.ready && !block_signal sink.bits := source.bits } + + //---------------------------------------- + // performance counters + val num_loads = PopCount(ldu.map(e => e.io.lsu.req.fire())) + XSPerf("num_loads", num_loads) } diff --git a/src/main/scala/xiangshan/cache/MissQueue.scala b/src/main/scala/xiangshan/cache/MissQueue.scala index e650baf0a6fdabfb7237b71062e0ec57940ebbaf..5eb394886c8fd4629cedb3e3655c0df42bb7a14a 100644 --- a/src/main/scala/xiangshan/cache/MissQueue.scala +++ b/src/main/scala/xiangshan/cache/MissQueue.scala @@ -5,7 +5,7 @@ import chisel3.util._ import chisel3.ExcitingUtils._ import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleA, TLBundleD, TLBundleE, TLPermissions, TLArbiter, ClientMetadata} -import utils.{HasTLDump, XSDebug, BoolStopWatch, OneHot, XSPerf} +import utils.{HasTLDump, XSDebug, BoolStopWatch, OneHot, XSPerf, TransactionLatencyCounter} class MissReq extends DCacheBundle { @@ -351,6 +351,17 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule XSPerf("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_refill_resp) XSPerf("penalty_blocked_by_channel_E", io.mem_finish.valid && !io.mem_finish.ready) XSPerf("penalty_blocked_by_pipeline", io.pipe_req.valid && !io.pipe_req.ready) + + + val (mshr_penalty_sample, mshr_penalty) = TransactionLatencyCounter(io.req_valid && io.primary_ready, state === s_release_entry) + XSPerf("miss_penalty", mshr_penalty, mshr_penalty_sample, 0, 100, 10) + + val load_miss_begin = io.req_valid && (io.primary_ready || io.secondary_ready) && io.req.source === LOAD_SOURCE.U + val (load_miss_penalty_sample, load_miss_penalty) = TransactionLatencyCounter(load_miss_begin, io.refill.valid) + XSPerf("load_miss_penalty_to_use", load_miss_penalty, load_miss_penalty_sample, 0, 100, 10) + + val (a_to_d_penalty_sample, a_to_d_penalty) = TransactionLatencyCounter(io.mem_acquire.fire(), io.mem_grant.fire() && refill_done) + XSPerf("a_to_d_penalty", a_to_d_penalty, a_to_d_penalty_sample, 0, 100, 10) } @@ -532,4 +543,6 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump // max inflight (average) = max_inflight_total / cycle cnt XSPerf("max_inflight", max_inflight) XSPerf("num_valids", num_valids) + + XSPerf("num_valids", num_valids, true.B, 0, cfg.nMissEntries, 1) } diff --git a/src/main/scala/xiangshan/cache/StoreReplayUnit.scala b/src/main/scala/xiangshan/cache/StoreReplayUnit.scala index 315b27c6106adea886437dbe0050f4d5913cda04..0ea551e5fab39a43b790a9243c72243154073eb4 100644 --- a/src/main/scala/xiangshan/cache/StoreReplayUnit.scala +++ b/src/main/scala/xiangshan/cache/StoreReplayUnit.scala @@ -3,7 +3,7 @@ package xiangshan.cache import chisel3._ import chisel3.util._ -import utils.{XSDebug, XSPerf} +import utils.{XSDebug, XSPerf, TransactionLatencyCounter} import bus.tilelink._ class StoreReplayEntry extends DCacheModule @@ -125,6 +125,9 @@ class StoreReplayEntry extends DCacheModule // XSPerf("store_hit", state === s_pipe_resp && io.pipe_resp.fire() && !io.pipe_resp.bits.miss) XSPerf("store_replay", state === s_pipe_resp && io.pipe_resp.fire() && io.pipe_resp.bits.miss && io.pipe_resp.bits.replay) XSPerf("store_miss", state === s_pipe_resp && io.pipe_resp.fire() && io.pipe_resp.bits.miss) + + val (store_latency_sample, store_latency) = TransactionLatencyCounter(io.lsu.req.fire(), io.lsu.resp.fire()) + XSPerf("store_latency", store_latency, store_latency_sample, 0, 100, 10) } @@ -201,4 +204,6 @@ class StoreReplayQueue extends DCacheModule // performance counters XSPerf("store_req", io.lsu.req.fire()) + val num_valids = PopCount(entries.map(e => !e.io.lsu.req.ready)) + XSPerf("num_valids", num_valids, true.B, 0, cfg.nStoreReplayEntries, 1) }