提交 e843dec5 编写于 作者: L Leo Yan 提交者: Arnaldo Carvalho de Melo

perf mem: Add statistics for peer snooping

Since the flag PERF_MEM_SNOOPX_PEER is added to support cache snooping
from peer cache line, it can come from a peer core, a peer cluster, or
a remote NUMA node.

This patch adds statistics for the flag PERF_MEM_SNOOPX_PEER.  Note, we
take PERF_MEM_SNOOPX_PEER as an affiliated info, it needs to cooperate
with cache level statistics.  Therefore, we account the load operations
for both the cache level's metrics (e.g. ld_l2hit, ld_llchit, etc.) and
peer related metrics when flag PERF_MEM_SNOOPX_PEER is set.

So three new metrics are introduced: 'lcl_peer' is for local cache
access, the metric 'rmt_peer' is for remote access (includes remote DRAM
and any caches in remote node), and the metric 'tot_peer' is accounting
the sum value of 'lcl_peer' and 'rmt_peer'.
Reviewed-by: NAli Saidi <alisaidi@amazon.com>
Signed-off-by: NLeo Yan <leo.yan@linaro.org>
Tested-by: NAli Saidi <alisaidi@amazon.com>
Acked-by: NIan Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Like Xu <likexu@tencent.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Timothy Hayes <timothy.hayes@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20220811062451.435810-5-leo.yan@linaro.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
上级 4e6430cb
...@@ -525,6 +525,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) ...@@ -525,6 +525,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
u64 op = data_src->mem_op; u64 op = data_src->mem_op;
u64 lvl = data_src->mem_lvl; u64 lvl = data_src->mem_lvl;
u64 snoop = data_src->mem_snoop; u64 snoop = data_src->mem_snoop;
u64 snoopx = data_src->mem_snoopx;
u64 lock = data_src->mem_lock; u64 lock = data_src->mem_lock;
u64 blk = data_src->mem_blk; u64 blk = data_src->mem_blk;
/* /*
...@@ -544,6 +545,12 @@ do { \ ...@@ -544,6 +545,12 @@ do { \
stats->tot_hitm++; \ stats->tot_hitm++; \
} while (0) } while (0)
#define PEER_INC(__f) \
do { \
stats->__f++; \
stats->tot_peer++; \
} while (0)
#define P(a, b) PERF_MEM_##a##_##b #define P(a, b) PERF_MEM_##a##_##b
stats->nr_entries++; stats->nr_entries++;
...@@ -567,12 +574,20 @@ do { \ ...@@ -567,12 +574,20 @@ do { \
if (lvl & P(LVL, IO)) stats->ld_io++; if (lvl & P(LVL, IO)) stats->ld_io++;
if (lvl & P(LVL, LFB)) stats->ld_fbhit++; if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
if (lvl & P(LVL, L1 )) stats->ld_l1hit++; if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
if (lvl & P(LVL, L2 )) stats->ld_l2hit++; if (lvl & P(LVL, L2)) {
stats->ld_l2hit++;
if (snoopx & P(SNOOPX, PEER))
PEER_INC(lcl_peer);
}
if (lvl & P(LVL, L3 )) { if (lvl & P(LVL, L3 )) {
if (snoop & P(SNOOP, HITM)) if (snoop & P(SNOOP, HITM))
HITM_INC(lcl_hitm); HITM_INC(lcl_hitm);
else else
stats->ld_llchit++; stats->ld_llchit++;
if (snoopx & P(SNOOPX, PEER))
PEER_INC(lcl_peer);
} }
if (lvl & P(LVL, LOC_RAM)) { if (lvl & P(LVL, LOC_RAM)) {
...@@ -597,10 +612,14 @@ do { \ ...@@ -597,10 +612,14 @@ do { \
if ((lvl & P(LVL, REM_CCE1)) || if ((lvl & P(LVL, REM_CCE1)) ||
(lvl & P(LVL, REM_CCE2)) || (lvl & P(LVL, REM_CCE2)) ||
mrem) { mrem) {
if (snoop & P(SNOOP, HIT)) if (snoop & P(SNOOP, HIT)) {
stats->rmt_hit++; stats->rmt_hit++;
else if (snoop & P(SNOOP, HITM)) } else if (snoop & P(SNOOP, HITM)) {
HITM_INC(rmt_hitm); HITM_INC(rmt_hitm);
} else if (snoopx & P(SNOOPX, PEER)) {
stats->rmt_hit++;
PEER_INC(rmt_peer);
}
} }
if ((lvl & P(LVL, MISS))) if ((lvl & P(LVL, MISS)))
...@@ -664,6 +683,9 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) ...@@ -664,6 +683,9 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
stats->lcl_hitm += add->lcl_hitm; stats->lcl_hitm += add->lcl_hitm;
stats->rmt_hitm += add->rmt_hitm; stats->rmt_hitm += add->rmt_hitm;
stats->tot_hitm += add->tot_hitm; stats->tot_hitm += add->tot_hitm;
stats->lcl_peer += add->lcl_peer;
stats->rmt_peer += add->rmt_peer;
stats->tot_peer += add->tot_peer;
stats->rmt_hit += add->rmt_hit; stats->rmt_hit += add->rmt_hit;
stats->lcl_dram += add->lcl_dram; stats->lcl_dram += add->lcl_dram;
stats->rmt_dram += add->rmt_dram; stats->rmt_dram += add->rmt_dram;
......
...@@ -78,6 +78,9 @@ struct c2c_stats { ...@@ -78,6 +78,9 @@ struct c2c_stats {
u32 lcl_hitm; /* count of loads with local HITM */ u32 lcl_hitm; /* count of loads with local HITM */
u32 rmt_hitm; /* count of loads with remote HITM */ u32 rmt_hitm; /* count of loads with remote HITM */
u32 tot_hitm; /* count of loads with local and remote HITM */ u32 tot_hitm; /* count of loads with local and remote HITM */
u32 lcl_peer; /* count of loads with local peer cache */
u32 rmt_peer; /* count of loads with remote peer cache */
u32 tot_peer; /* count of loads with local and remote peer cache */
u32 rmt_hit; /* count of loads with remote hit clean; */ u32 rmt_hit; /* count of loads with remote hit clean; */
u32 lcl_dram; /* count of loads miss to local DRAM */ u32 lcl_dram; /* count of loads miss to local DRAM */
u32 rmt_dram; /* count of loads miss to remote DRAM */ u32 rmt_dram; /* count of loads miss to remote DRAM */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册