From 3e2d6d5313574022b10640cb30c8a361cc2ca1fa Mon Sep 17 00:00:00 2001 From: Wei Li Date: Mon, 25 Nov 2019 10:58:17 +0800 Subject: [PATCH] arm_spe: fix getting the wrong tid hulk inclusion category: feature bugzilla: 18657 CVE: NA ------------------------------------------- Now the tid/pid getting is wrong when recording multi-threads. To fix this problem, we introduce task switch events to recognize the thread context, and the timestamp of spe record is necessary too. Signed-off-by: Wei Li Reviewed-by: Xuefeng Wang Reviewed-by: Tan Xiaojun Signed-off-by: Yang Yingliang --- tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/arm-spe.c | 157 +++++++++- tools/perf/arch/arm64/util/tsc.c | 83 ++++++ .../util/arm-spe-decoder/arm-spe-decoder.c | 1 + .../util/arm-spe-decoder/arm-spe-decoder.h | 1 + tools/perf/util/arm-spe.c | 268 ++++++++++++++---- tools/perf/util/arm-spe.h | 6 + 7 files changed, 464 insertions(+), 53 deletions(-) create mode 100644 tools/perf/arch/arm64/util/tsc.c diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 68f8a8eb3ad0..9962187892c0 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,4 +1,5 @@ libperf-y += header.o +libperf-y += tsc.o libperf-y += sym-handling.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 1419391e3a3e..3b1cb50b5083 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -18,6 +18,7 @@ #include "../../util/pmu.h" #include "../../util/debug.h" #include "../../util/auxtrace.h" +#include "../../util/tsc.h" #include "../../util/arm-spe.h" #define KiB(x) ((x) * 1024) @@ -27,6 +28,7 @@ struct arm_spe_recording { struct auxtrace_record itr; struct perf_pmu *arm_spe_pmu; struct perf_evlist *evlist; + int have_sched_switch; }; static size_t @@ -36,6 +38,42 @@ arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, return ARM_SPE_AUXTRACE_PRIV_SIZE; } +static int arm_spe_parse_terms_with_default(struct list_head *formats, + const char *str, + u64 *config) +{ + struct list_head *terms; + struct perf_event_attr attr = { .size = 0, }; + int err; + + terms = malloc(sizeof(struct list_head)); + if (!terms) + return -ENOMEM; + + INIT_LIST_HEAD(terms); + + err = parse_events_terms(terms, str); + if (err) + goto out_free; + + attr.config = *config; + err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); + if (err) + goto out_free; + + *config = attr.config; +out_free: + parse_events_terms__delete(terms); + return err; +} + +static int arm_spe_parse_terms(struct list_head *formats, const char *str, + u64 *config) +{ + *config = 0; + return arm_spe_parse_terms_with_default(formats, str, config); +} + static int arm_spe_info_fill(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, @@ -44,15 +82,69 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + struct perf_event_mmap_page *pc; + struct perf_tsc_conversion tc = { .time_mult = 0, }; + bool cap_user_time_zero = false; + u64 ts_enable; + int err; if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) return -EINVAL; + arm_spe_parse_terms(&arm_spe_pmu->format, "ts_enable", &ts_enable); + if (!session->evlist->nr_mmaps) return -EINVAL; + pc = session->evlist->mmap[0].base; + if (pc) { + err = perf_read_tsc_conversion(pc, &tc); + if (err) { + if (err != -EOPNOTSUPP) + return err; + } else { + cap_user_time_zero = tc.time_mult != 0; + } + if (!cap_user_time_zero) + ui__warning("ARM SPE: TSC not available\n"); + } + auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_TIME_SHIFT] = tc.time_shift; + auxtrace_info->priv[ARM_SPE_TIME_MULT] = tc.time_mult; + auxtrace_info->priv[ARM_SPE_TIME_ZERO] = tc.time_zero; + auxtrace_info->priv[ARM_SPE_CAP_USER_TIME_ZERO] = cap_user_time_zero; + auxtrace_info->priv[ARM_SPE_TS_ENABLE] = ts_enable; + auxtrace_info->priv[ARM_SPE_HAVE_SCHED_SWITCH] = sper->have_sched_switch; + + return 0; +} + +static int arm_spe_track_switches(struct perf_evlist *evlist) +{ + const char *sched_switch = "sched:sched_switch"; + struct perf_evsel *evsel; + int err; + + if (!perf_evlist__can_select_event(evlist, sched_switch)) + return -EPERM; + + err = parse_events(evlist, sched_switch, NULL); + if (err) { + pr_debug2("%s: failed to parse %s, error %d\n", + __func__, sched_switch, err); + return err; + } + + evsel = perf_evlist__last(evlist); + + perf_evsel__set_sample_bit(evsel, CPU); + perf_evsel__set_sample_bit(evsel, TIME); + + evsel->system_wide = true; + evsel->no_aux_samples = true; + evsel->immediate = true; return 0; } @@ -64,7 +156,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + bool need_immediate = false; struct perf_evsel *evsel, *arm_spe_evsel = NULL; + const struct cpu_map *cpus = evlist->cpus; bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; struct perf_evsel *tracking_evsel; int err; @@ -84,6 +178,58 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, } } + /* + * Per-cpu recording needs sched_switch events to distinguish different + * threads. + */ + if (!cpu_map__empty(cpus)) { + if (perf_can_record_switch_events()) { + bool cpu_wide = !target__none(&opts->target) && + !target__has_task(&opts->target); + + if (!cpu_wide && perf_can_record_cpu_wide()) { + struct perf_evsel *switch_evsel; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + switch_evsel = perf_evlist__last(evlist); + + switch_evsel->attr.freq = 0; + switch_evsel->attr.sample_period = 1; + switch_evsel->attr.context_switch = 1; + + switch_evsel->system_wide = true; + switch_evsel->no_aux_samples = true; + switch_evsel->immediate = true; + + perf_evsel__set_sample_bit(switch_evsel, TID); + perf_evsel__set_sample_bit(switch_evsel, TIME); + perf_evsel__set_sample_bit(switch_evsel, CPU); + perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); + + opts->record_switch_events = false; + sper->have_sched_switch = 3; + } else { + opts->record_switch_events = true; + need_immediate = true; + if (cpu_wide) + sper->have_sched_switch = 3; + else + sper->have_sched_switch = 2; + } + } else { + err = arm_spe_track_switches(evlist); + if (err == -EPERM) + pr_debug2("Unable to select sched:sched_switch\n"); + else if (err) + return err; + else + sper->have_sched_switch = 1; + } + } + if (!opts->full_auxtrace) return 0; @@ -110,7 +256,6 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, } } - /* * To obtain the auxtrace buffer file descriptor, the auxtrace event * must come first. @@ -131,10 +276,20 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, tracking_evsel->attr.freq = 0; tracking_evsel->attr.sample_period = 1; + if (need_immediate) + tracking_evsel->immediate = true; perf_evsel__set_sample_bit(tracking_evsel, TIME); perf_evsel__set_sample_bit(tracking_evsel, CPU); perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + /* + * Warn the user when we do not have enough information to decode i.e. + * per-cpu with no sched_switch (except workload-only). + */ + if (!sper->have_sched_switch && !cpu_map__empty(cpus) && + !target__none(&opts->target)) + ui__warning("ARM SPE decoding will not be possible except for kernel tracing!\n"); + return 0; } diff --git a/tools/perf/arch/arm64/util/tsc.c b/tools/perf/arch/arm64/util/tsc.c new file mode 100644 index 000000000000..54107e260535 --- /dev/null +++ b/tools/perf/arch/arm64/util/tsc.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include +#include +#include + +#include "../../perf.h" +#include +#include "../../util/debug.h" +#include "../../util/tsc.h" + +int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, + struct perf_tsc_conversion *tc) +{ + bool cap_user_time_zero; + u32 seq; + int i = 0; + + while (1) { + seq = pc->lock; + rmb(); + tc->time_mult = pc->time_mult; + tc->time_shift = pc->time_shift; + tc->time_zero = pc->time_zero; + cap_user_time_zero = pc->cap_user_time_zero; + rmb(); + if (pc->lock == seq && !(seq & 1)) + break; + if (++i > 10000) { + pr_debug("failed to get perf_event_mmap_page lock\n"); + return -EINVAL; + } + } + + if (!cap_user_time_zero) + return -EOPNOTSUPP; + + return 0; +} + +u64 rdtsc(void) +{ + u64 val; + + asm volatile("mrs %0, CNTVCT_EL0" : "=r" (val)); + + return val; +} + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event event = { + .time_conv = { + .header = { + .type = PERF_RECORD_TIME_CONV, + .size = sizeof(struct time_conv_event), + }, + }, + }; + struct perf_tsc_conversion tc; + int err; + + if (!pc) + return 0; + err = perf_read_tsc_conversion(pc, &tc); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + pr_debug2("Synthesizing TSC conversion information\n"); + + event.time_conv.time_mult = tc.time_mult; + event.time_conv.time_shift = tc.time_shift; + event.time_conv.time_zero = tc.time_zero; + + return process(tool, &event, NULL, machine); +} diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 608b9372a677..f67e83c7b3ce 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -184,6 +184,7 @@ static int arm_spe_walk_trace(struct arm_spe_decoder *decoder) case ARM_SPE_COUNTER: break; case ARM_SPE_CONTEXT: + decoder->state.contextidr = payload; break; case ARM_SPE_OP_TYPE: if (idx == 0x1) { diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 9d2a96a62cc4..17db7d1c14a9 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -31,6 +31,7 @@ struct arm_spe_state { uint64_t addr; uint64_t phys_addr; uint64_t timestamp; + uint64_t contextidr; /* if available, means tid */ }; struct arm_spe_insn; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index eac09380f5f5..52ba76a6fc17 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -26,6 +26,7 @@ #include "symbol.h" #include "debug.h" #include "auxtrace.h" +#include "tsc.h" #include "arm-spe.h" #include "arm-spe-decoder/arm-spe-decoder.h" #include "arm-spe-decoder/arm-spe-pkt-decoder.h" @@ -57,6 +58,11 @@ struct arm_spe { u64 kernel_start; unsigned long num_events; + int have_sched_switch; + struct perf_evsel *switch_evsel; + u64 ts_bit; + struct perf_tsc_conversion tc; + bool cap_user_time_zero; }; struct arm_spe_queue { @@ -81,6 +87,9 @@ struct arm_spe_queue { struct spe_c2c_sample { struct rb_node rb_node; struct arm_spe_state state; + pid_t pid; + pid_t tid; + }; struct spe_c2c_sample_queues { @@ -272,7 +281,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe, struct perf_sample *sample) { if (!spe->timeless_decoding) - sample->time = speq->timestamp; + sample->time = tsc_to_perf_time(speq->timestamp, &spe->tc); sample->ip = speq->state->from_ip; sample->cpumode = arm_spe_cpumode(spe, sample->ip); @@ -387,6 +396,11 @@ static void arm_spe_c2c_queue_store(struct arm_spe_queue *speq, struct rb_root *root; int ret = 0; + if (!speq->have_sample) + return; + + speq->have_sample = false; + if (state->ts && (state->is_ld || state->is_st)) { sample = zalloc(sizeof(struct spe_c2c_sample)); if (!sample) { @@ -397,6 +411,8 @@ static void arm_spe_c2c_queue_store(struct arm_spe_queue *speq, root = state->is_ld ? &(spe_c2cq->ld_list) : &(spe_c2cq->st_list); memcpy(&(sample->state), state, sizeof(struct arm_spe_state)); + sample->pid = speq->pid; + sample->tid = speq->tid; ret = spe_sample_insert(root, sample); if (ret) { @@ -423,6 +439,8 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp, if (!spe->kernel_start) spe->kernel_start = machine__kernel_start(spe->machine); + pr_debug4("queue %u decoding cpu %d pid %d tid %d\n", + speq->queue_nr, speq->cpu, speq->pid, speq->tid); while (1) { if (spe->sample_c2c_mode) { if (spe_c2cq) @@ -445,6 +463,10 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp, speq->state = state; speq->have_sample = true; + if (state->timestamp > speq->timestamp) { + speq->timestamp = state->timestamp; + } + if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { *timestamp = speq->timestamp; return 0; @@ -460,20 +482,20 @@ static int arm_spe__setup_queue(struct arm_spe *spe, { struct arm_spe_queue *speq = queue->priv; - if (list_empty(&queue->head) || speq) + if (list_empty(&queue->head)) return 0; - speq = arm_spe__alloc_queue(spe, queue_nr); - - if (!speq) - return -ENOMEM; - - queue->priv = speq; + if (!speq) { + speq = arm_spe__alloc_queue(spe, queue_nr); + if (!speq) + return -ENOMEM; - if (queue->cpu != -1) - speq->cpu = queue->cpu; + queue->priv = speq; - speq->tid = queue->tid; + if (queue->cpu != -1) + speq->cpu = queue->cpu; + speq->tid = queue->tid; + } if (!speq->on_heap) { const struct arm_spe_state *state; @@ -482,18 +504,26 @@ static int arm_spe__setup_queue(struct arm_spe *spe, if (spe->timeless_decoding) return 0; -retry: - state = arm_spe_decode(speq->decoder); - if (state->err) { - if (state->err == -ENODATA) { - pr_debug("queue %u has no timestamp\n", - queue_nr); - return 0; + pr_debug4("queue %u getting timestamp\n", queue_nr); + pr_debug4("queue %u decoding cpu %d pid %d tid %d\n", + queue_nr, speq->cpu, speq->pid, speq->tid); + while (1) { + state = arm_spe_decode(speq->decoder); + if (state->err) { + if (state->err == -ENODATA) { + pr_debug("queue %u has no timestamp\n", + queue_nr); + return 0; + } + continue; } - goto retry; + if (state->timestamp) + break; } speq->timestamp = state->timestamp; + pr_debug4("queue %u timestamp 0x%" PRIx64 "\n", + queue_nr, speq->timestamp); speq->state = state; speq->have_sample = true; ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); @@ -529,19 +559,36 @@ static int arm_spe__update_queues(struct arm_spe *spe) return 0; } -static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) +static bool arm_spe_get_config(struct arm_spe *spe, + struct perf_event_attr *attr, u64 *config) +{ + if (attr->type == spe->pmu_type) { + if (config) + *config = attr->config; + return true; + } + + return false; +} + +static bool arm_spe_is_timeless_decoding(struct arm_spe *spe) { struct perf_evsel *evsel; - struct perf_evlist *evlist = spe->session->evlist; bool timeless_decoding = true; - - /* - * Circle through the list of event and complain if we find one - * with the time bit set. - */ - evlist__for_each_entry(evlist, evsel) { - if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) - timeless_decoding = false; + u64 config; + + if (!spe->ts_bit || !spe->cap_user_time_zero) + return true; + + evlist__for_each_entry(spe->session->evlist, evsel) { + if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) + return true; + if (arm_spe_get_config(spe, &evsel->attr, &config)) { + if (config & spe->ts_bit) + timeless_decoding = false; + else + return true; + } } return timeless_decoding; @@ -552,7 +599,7 @@ static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, { struct arm_spe_queue *speq = queue->priv; - if (queue->tid == -1) { + if (queue->tid == -1 || spe->have_sched_switch) { speq->tid = machine__get_current_tid(spe->machine, speq->cpu); thread__zput(speq->thread); } @@ -587,7 +634,8 @@ arm_spe_get_c2c_queue(struct arm_spe_queue *speq) } } - pr_warning("spe_c2c: Now only support sample for two cpus!\n"); + pr_warning("spe_c2c: Now only support sample for %u cpus!\n", + SPE_C2C_SAMPLE_Q_MAX); return NULL; } @@ -606,7 +654,6 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) if (!spe->heap.heap_cnt) { return 0; } - if (spe->heap.heap_array[0].ordinal >= timestamp) { return 0; } @@ -614,6 +661,10 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) queue = &spe->queues.queue_array[queue_nr]; speq = queue->priv; + pr_debug4("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", + queue_nr, spe->heap.heap_array[0].ordinal, + timestamp); + auxtrace_heap__pop(&spe->heap); if (spe->heap.heap_cnt) { @@ -677,8 +728,8 @@ static void arm_spe_c2c_sample(struct spe_c2c_sample_queues *c2c_queues, sample.ip = c2c_sample->state.from_ip; sample.cpumode = arm_spe_cpumode(speq->spe, sample.ip); - sample.pid = speq->pid; - sample.tid = speq->tid; + sample.pid = c2c_sample->pid; + sample.tid = c2c_sample->tid; sample.addr = c2c_sample->state.addr; sample.data_src = src.val; sample.phys_addr = c2c_sample->state.phys_addr; @@ -710,7 +761,8 @@ static void arm_spe_c2c_get_samples(void *arg) sampleB = rb_entry(nodeB, struct spe_c2c_sample, rb_node); xor = sampleA->state.phys_addr ^ sampleB->state.phys_addr; - if (!(xor & 0xFFFFFFFFFFFFFFC0) && (xor & 0x3F)) { + if (!(xor & 0xFFFFFFFFFFFFFFC0) && (xor & 0x3F) + && sampleA->tid != sampleB->tid) { pthread_mutex_lock(&mut); arm_spe_c2c_sample(queues, sampleA); arm_spe_c2c_sample(oppoqs, sampleB); @@ -799,6 +851,66 @@ static int arm_spe_c2c_process(struct arm_spe *spe __maybe_unused) return ret; } +static int arm_spe_process_switch(struct arm_spe *spe, + struct perf_sample *sample) +{ + struct perf_evsel *evsel; + pid_t tid; + int cpu; + + evsel = perf_evlist__id2evsel(spe->session->evlist, sample->id); + if (evsel != spe->switch_evsel) + return 0; + + tid = perf_evsel__intval(evsel, sample, "next_pid"); + cpu = sample->cpu; + + pr_debug4("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + cpu, tid, sample->time, perf_time_to_tsc(sample->time, + &spe->tc)); + + return machine__set_current_tid(spe->machine, cpu, -1, tid); +} + +static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, + struct perf_sample *sample) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + pid_t pid, tid; + int cpu; + + cpu = sample->cpu; + + if (out) + return 0; + pid = sample->pid; + tid = sample->tid; + + if (tid == -1) { + pr_err("context_switch event has no tid\n"); + return -EINVAL; + } + + pr_debug4("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, + &spe->tc)); + + return machine__set_current_tid(spe->machine, cpu, pid, tid); +} + +static int arm_spe_process_itrace_start(struct arm_spe *spe, + union perf_event *event, + struct perf_sample *sample) +{ + pr_debug4("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + sample->cpu, event->itrace_start.pid, + event->itrace_start.tid, sample->time, + perf_time_to_tsc(sample->time, &spe->tc)); + + return machine__set_current_tid(spe->machine, sample->cpu, + event->itrace_start.pid, + event->itrace_start.tid); +} static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, u64 time_) { @@ -834,12 +946,12 @@ static int arm_spe_process_event(struct perf_session *session, return 0; if (!tool->ordered_events) { - pr_err("CoreSight SPE Trace requires ordered events\n"); + pr_err("ARM SPE requires ordered events\n"); return -EINVAL; } if (sample->time && (sample->time != (u64) -1)) - timestamp = sample->time; + timestamp = perf_time_to_tsc(sample->time, &spe->tc); else timestamp = 0; @@ -856,19 +968,23 @@ static int arm_spe_process_event(struct perf_session *session, sample->time); } } else if (timestamp) { - if (event->header.type == PERF_RECORD_EXIT) { - err = arm_spe_process_queues(spe, timestamp); - if (err) - return err; - - if (spe->sample_c2c_mode) { - err = arm_spe_c2c_process(spe); - if (err) - return err; - } - } + err = arm_spe_process_queues(spe, timestamp); + if (err) + return err; } + if (spe->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) + err = arm_spe_process_switch(spe, sample); + else if (event->header.type == PERF_RECORD_ITRACE_START) + err = arm_spe_process_itrace_start(spe, event, sample); + else if (event->header.type == PERF_RECORD_SWITCH || + event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) + err = arm_spe_context_switch(spe, event, sample); + + pr_debug4("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", + perf_event__name(event->header.type), event->header.type, + sample->cpu, sample->time, timestamp); + return err; } @@ -1033,6 +1149,32 @@ static void arm_spe_set_event_name(struct perf_evlist *evlist, u64 id, } } +static struct perf_evsel *arm_spe_find_sched_switch(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry_reverse(evlist, evsel) { + const char *name = perf_evsel__name(evsel); + + if (!strcmp(name, "sched:sched_switch")) + return evsel; + } + + return NULL; +} + +static bool arm_spe_find_switch(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->attr.context_switch) + return true; + } + + return false; +} + static int arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) { struct perf_evlist *evlist = session->evlist; @@ -1050,7 +1192,7 @@ static int arm_spe_synth_events(struct arm_spe *spe, struct perf_session *sessio } if (!found) { - pr_debug("No selected events with CoreSight Trace data\n"); + pr_debug("No selected events with ARM SPE data\n"); return 0; } @@ -1139,7 +1281,6 @@ int arm_spe_process_auxtrace_info(union perf_event *event, struct arm_spe *spe; int err; - if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + min_sz) return -EINVAL; @@ -1156,8 +1297,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; - - spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); + spe->tc.time_shift = auxtrace_info->priv[ARM_SPE_TIME_SHIFT]; + spe->tc.time_mult = auxtrace_info->priv[ARM_SPE_TIME_MULT]; + spe->tc.time_zero = auxtrace_info->priv[ARM_SPE_TIME_ZERO]; + spe->cap_user_time_zero = auxtrace_info->priv[ARM_SPE_CAP_USER_TIME_ZERO]; + spe->ts_bit = auxtrace_info->priv[ARM_SPE_TS_ENABLE]; + spe->have_sched_switch = auxtrace_info->priv[ARM_SPE_HAVE_SCHED_SWITCH]; + + spe->timeless_decoding = arm_spe_is_timeless_decoding(spe); spe->auxtrace.process_event = arm_spe_process_event; spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; spe->auxtrace.flush_events = arm_spe_flush; @@ -1170,6 +1317,20 @@ int arm_spe_process_auxtrace_info(union perf_event *event, if (dump_trace) return 0; + if (spe->have_sched_switch == 1) { + spe->switch_evsel = arm_spe_find_sched_switch(session->evlist); + if (!spe->switch_evsel) { + pr_err("%s: missing sched_switch event\n", __func__); + err = -EINVAL; + goto err_free_queues; + } + } else if (spe->have_sched_switch == 2 && + !arm_spe_find_switch(session->evlist)) { + pr_err("%s: missing context_switch attribute flag\n", __func__); + err = -EINVAL; + goto err_free_queues; + } + if (session->arm_spe_synth_opts && (session->arm_spe_synth_opts->set || session->arm_spe_synth_opts->c2c_mode)) spe->synth_opts = *session->arm_spe_synth_opts; @@ -1192,6 +1353,9 @@ int arm_spe_process_auxtrace_info(union perf_event *event, if (spe->queues.populated) spe->data_queued = true; + if (spe->timeless_decoding) + pr_debug2("ARM SPE decoding without timestamps\n"); + return 0; err_free_queues: diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h index 98d3235781c3..2610a56af879 100644 --- a/tools/perf/util/arm-spe.h +++ b/tools/perf/util/arm-spe.h @@ -12,6 +12,12 @@ enum { ARM_SPE_PMU_TYPE, ARM_SPE_PER_CPU_MMAPS, + ARM_SPE_TIME_SHIFT, + ARM_SPE_TIME_MULT, + ARM_SPE_CAP_USER_TIME_ZERO, + ARM_SPE_TIME_ZERO, + ARM_SPE_TS_ENABLE, + ARM_SPE_HAVE_SCHED_SWITCH, ARM_SPE_AUXTRACE_PRIV_MAX, }; -- GitLab