diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index f67e83c7b3cef6816ef2101e3d5281167f8f57eb..322961c863f04e6c75d7f9fe3a84f2309b4e6474 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -16,6 +16,10 @@ #include "arm-spe-pkt-decoder.h" #include "arm-spe-decoder.h" +#ifndef BIT +#define BIT(n) (1UL << (n)) +#endif + struct arm_spe_decoder { int (*get_trace)(struct arm_spe_buffer *buffer, void *data); void *data; @@ -40,8 +44,12 @@ struct arm_spe_decoder { static uint64_t arm_spe_calc_ip(uint64_t ip) { /* fill high 8 bits for kernel virtual address */ - if (ip & 0x1000000000000ULL) - ip |= (uint64_t)0xff00000000000000ULL; + /* In Armv8 Architecture Reference Manual: Xn[55] determines + * whether the address lies in the upper or lower address range + * for the purpose of determining whether address tagging is + * used */ + if (ip & BIT(55)) + ip |= (uint64_t)(0xffULL << 56); return ip; } @@ -195,17 +203,17 @@ static int arm_spe_walk_trace(struct arm_spe_decoder *decoder) } break; case ARM_SPE_EVENTS: - if (payload & 0x20) { + if (payload & BIT(EV_TLB_REFILL)) { decoder->state.type |= ARM_SPE_TLB_MISS; decoder->state.is_tlb_miss = true; } - if (payload & 0x80) + if (payload & BIT(EV_MISPRED)) decoder->state.type |= ARM_SPE_BRANCH_MISS; - if (idx > 1 && (payload & 0x200)) { + if (idx > 1 && (payload & BIT(EV_LLC_REFILL))) { decoder->state.type |= ARM_SPE_LLC_MISS; decoder->state.is_llc_miss = true; } - if (idx > 1 && (payload & 0x400)) { + if (idx > 1 && (payload & BIT(EV_REMOTE_ACCESS))) { decoder->state.type |= ARM_SPE_REMOTE_ACCESS; decoder->state.is_remote = true; } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 17db7d1c14a929d3745ca772e270cd3e6b779244..36d593eda7782b96527e50e35878825b8b44d80a 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -6,6 +6,20 @@ #include #include +enum arm_spe_events { + EV_EXCEPTION_GEN, + EV_RETIRED, + EV_L1D_ACCESS, + EV_L1D_REFILL, + EV_TLB_ACCESS, + EV_TLB_REFILL, + EV_NOT_TAKEN, + EV_MISPRED, + EV_LLC_ACCESS, + EV_LLC_REFILL, + EV_REMOTE_ACCESS, +}; + enum arm_spe_sample_type { ARM_SPE_LLC_MISS = 1 << 0, ARM_SPE_TLB_MISS = 1 << 1, diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index bd6d311625c47ad1737ad344fa582144b948cb29..1f97e432d125d1407f231e3e88c43869434b5427 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -31,7 +31,8 @@ #include "arm-spe-decoder/arm-spe-decoder.h" #include "arm-spe-decoder/arm-spe-pkt-decoder.h" -#define MAX_TIMESTAMP (~0ULL) +#define MAX_TIMESTAMP (~0ULL) +#define IN_CACHELINE (0x3FULL) struct arm_spe { struct auxtrace auxtrace; @@ -763,8 +764,8 @@ static void arm_spe_c2c_get_samples(void *arg) sampleB = rb_entry(nodeB, struct spe_c2c_sample, rb_node); xor = sampleA->state.phys_addr ^ sampleB->state.phys_addr; - if (!(xor & 0xFFFFFFFFFFFFFFC0) - && (tshare || (xor & 0x3F)) + if (!(xor & (uint64_t)~IN_CACHELINE) + && (tshare || (xor & IN_CACHELINE)) && (sampleA->tid != sampleB->tid)) { pthread_mutex_lock(&mut); arm_spe_c2c_sample(queues, sampleA);