提交 e0a152a4 编写于 作者: A Allen

Added several performance counters to L1DCache.

Not tested yet.

Added:
* L1 MSHR occupation
* L1 MSHR latency
* L1 Load Miss latency
* L1 Store latency
* L1 Store occupation
* L1 Load req count
上级 125034f7
......@@ -322,6 +322,8 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
assert (!bus.d.fire())
}
//----------------------------------------
// assertions
// dcache should only deal with DRAM addresses
when (bus.a.fire()) {
assert(bus.a.bits.address >= 0x80000000L.U)
......@@ -333,9 +335,16 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
assert(bus.c.bits.address >= 0x80000000L.U)
}
//----------------------------------------
// utility functions
def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = {
sink.valid := source.valid && !block_signal
source.ready := sink.ready && !block_signal
sink.bits := source.bits
}
//----------------------------------------
// performance counters
val num_loads = PopCount(ldu.map(e => e.io.lsu.req.fire()))
XSPerf("num_loads", num_loads)
}
......@@ -5,7 +5,7 @@ import chisel3.util._
import chisel3.ExcitingUtils._
import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleA, TLBundleD, TLBundleE, TLPermissions, TLArbiter, ClientMetadata}
import utils.{HasTLDump, XSDebug, BoolStopWatch, OneHot, XSPerf}
import utils.{HasTLDump, XSDebug, BoolStopWatch, OneHot, XSPerf, TransactionLatencyCounter}
class MissReq extends DCacheBundle
{
......@@ -351,6 +351,17 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
XSPerf("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_refill_resp)
XSPerf("penalty_blocked_by_channel_E", io.mem_finish.valid && !io.mem_finish.ready)
XSPerf("penalty_blocked_by_pipeline", io.pipe_req.valid && !io.pipe_req.ready)
val (mshr_penalty_sample, mshr_penalty) = TransactionLatencyCounter(io.req_valid && io.primary_ready, state === s_release_entry)
XSPerf("miss_penalty", mshr_penalty, mshr_penalty_sample, 0, 100, 10)
val load_miss_begin = io.req_valid && (io.primary_ready || io.secondary_ready) && io.req.source === LOAD_SOURCE.U
val (load_miss_penalty_sample, load_miss_penalty) = TransactionLatencyCounter(load_miss_begin, io.refill.valid)
XSPerf("load_miss_penalty_to_use", load_miss_penalty, load_miss_penalty_sample, 0, 100, 10)
val (a_to_d_penalty_sample, a_to_d_penalty) = TransactionLatencyCounter(io.mem_acquire.fire(), io.mem_grant.fire() && refill_done)
XSPerf("a_to_d_penalty", a_to_d_penalty, a_to_d_penalty_sample, 0, 100, 10)
}
......@@ -532,4 +543,6 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
// max inflight (average) = max_inflight_total / cycle cnt
XSPerf("max_inflight", max_inflight)
XSPerf("num_valids", num_valids)
XSPerf("num_valids", num_valids, true.B, 0, cfg.nMissEntries, 1)
}
......@@ -3,7 +3,7 @@ package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.{XSDebug, XSPerf}
import utils.{XSDebug, XSPerf, TransactionLatencyCounter}
import bus.tilelink._
class StoreReplayEntry extends DCacheModule
......@@ -125,6 +125,9 @@ class StoreReplayEntry extends DCacheModule
// XSPerf("store_hit", state === s_pipe_resp && io.pipe_resp.fire() && !io.pipe_resp.bits.miss)
XSPerf("store_replay", state === s_pipe_resp && io.pipe_resp.fire() && io.pipe_resp.bits.miss && io.pipe_resp.bits.replay)
XSPerf("store_miss", state === s_pipe_resp && io.pipe_resp.fire() && io.pipe_resp.bits.miss)
val (store_latency_sample, store_latency) = TransactionLatencyCounter(io.lsu.req.fire(), io.lsu.resp.fire())
XSPerf("store_latency", store_latency, store_latency_sample, 0, 100, 10)
}
......@@ -201,4 +204,6 @@ class StoreReplayQueue extends DCacheModule
// performance counters
XSPerf("store_req", io.lsu.req.fire())
val num_valids = PopCount(entries.map(e => !e.io.lsu.req.ready))
XSPerf("num_valids", num_valids, true.B, 0, cfg.nStoreReplayEntries, 1)
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册