提交 3e2d6d53 编写于 作者: W Wei Li 提交者: Xie XiuQi

arm_spe: fix getting the wrong tid

hulk inclusion
category: feature
bugzilla: 18657
CVE: NA

-------------------------------------------

Now the tid/pid getting is wrong when recording multi-threads.
To fix this problem, we introduce task switch events to recognize
the thread context, and the timestamp of spe record is necessary
too.
Signed-off-by: NWei Li <liwei391@huawei.com>
Reviewed-by: NXuefeng Wang <wxf.wang@hisilicon.com>
Reviewed-by: NTan Xiaojun <tanxiaojun@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 86656703
libperf-y += header.o libperf-y += header.o
libperf-y += tsc.o
libperf-y += sym-handling.o libperf-y += sym-handling.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "../../util/pmu.h" #include "../../util/pmu.h"
#include "../../util/debug.h" #include "../../util/debug.h"
#include "../../util/auxtrace.h" #include "../../util/auxtrace.h"
#include "../../util/tsc.h"
#include "../../util/arm-spe.h" #include "../../util/arm-spe.h"
#define KiB(x) ((x) * 1024) #define KiB(x) ((x) * 1024)
...@@ -27,6 +28,7 @@ struct arm_spe_recording { ...@@ -27,6 +28,7 @@ struct arm_spe_recording {
struct auxtrace_record itr; struct auxtrace_record itr;
struct perf_pmu *arm_spe_pmu; struct perf_pmu *arm_spe_pmu;
struct perf_evlist *evlist; struct perf_evlist *evlist;
int have_sched_switch;
}; };
static size_t static size_t
...@@ -36,6 +38,42 @@ arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, ...@@ -36,6 +38,42 @@ arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
return ARM_SPE_AUXTRACE_PRIV_SIZE; return ARM_SPE_AUXTRACE_PRIV_SIZE;
} }
static int arm_spe_parse_terms_with_default(struct list_head *formats,
const char *str,
u64 *config)
{
struct list_head *terms;
struct perf_event_attr attr = { .size = 0, };
int err;
terms = malloc(sizeof(struct list_head));
if (!terms)
return -ENOMEM;
INIT_LIST_HEAD(terms);
err = parse_events_terms(terms, str);
if (err)
goto out_free;
attr.config = *config;
err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
if (err)
goto out_free;
*config = attr.config;
out_free:
parse_events_terms__delete(terms);
return err;
}
static int arm_spe_parse_terms(struct list_head *formats, const char *str,
u64 *config)
{
*config = 0;
return arm_spe_parse_terms_with_default(formats, str, config);
}
static int arm_spe_info_fill(struct auxtrace_record *itr, static int arm_spe_info_fill(struct auxtrace_record *itr,
struct perf_session *session, struct perf_session *session,
struct auxtrace_info_event *auxtrace_info, struct auxtrace_info_event *auxtrace_info,
...@@ -44,15 +82,69 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, ...@@ -44,15 +82,69 @@ static int arm_spe_info_fill(struct auxtrace_record *itr,
struct arm_spe_recording *sper = struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr); container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
struct perf_event_mmap_page *pc;
struct perf_tsc_conversion tc = { .time_mult = 0, };
bool cap_user_time_zero = false;
u64 ts_enable;
int err;
if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
return -EINVAL; return -EINVAL;
arm_spe_parse_terms(&arm_spe_pmu->format, "ts_enable", &ts_enable);
if (!session->evlist->nr_mmaps) if (!session->evlist->nr_mmaps)
return -EINVAL; return -EINVAL;
pc = session->evlist->mmap[0].base;
if (pc) {
err = perf_read_tsc_conversion(pc, &tc);
if (err) {
if (err != -EOPNOTSUPP)
return err;
} else {
cap_user_time_zero = tc.time_mult != 0;
}
if (!cap_user_time_zero)
ui__warning("ARM SPE: TSC not available\n");
}
auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
auxtrace_info->priv[ARM_SPE_TIME_SHIFT] = tc.time_shift;
auxtrace_info->priv[ARM_SPE_TIME_MULT] = tc.time_mult;
auxtrace_info->priv[ARM_SPE_TIME_ZERO] = tc.time_zero;
auxtrace_info->priv[ARM_SPE_CAP_USER_TIME_ZERO] = cap_user_time_zero;
auxtrace_info->priv[ARM_SPE_TS_ENABLE] = ts_enable;
auxtrace_info->priv[ARM_SPE_HAVE_SCHED_SWITCH] = sper->have_sched_switch;
return 0;
}
static int arm_spe_track_switches(struct perf_evlist *evlist)
{
const char *sched_switch = "sched:sched_switch";
struct perf_evsel *evsel;
int err;
if (!perf_evlist__can_select_event(evlist, sched_switch))
return -EPERM;
err = parse_events(evlist, sched_switch, NULL);
if (err) {
pr_debug2("%s: failed to parse %s, error %d\n",
__func__, sched_switch, err);
return err;
}
evsel = perf_evlist__last(evlist);
perf_evsel__set_sample_bit(evsel, CPU);
perf_evsel__set_sample_bit(evsel, TIME);
evsel->system_wide = true;
evsel->no_aux_samples = true;
evsel->immediate = true;
return 0; return 0;
} }
...@@ -64,7 +156,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, ...@@ -64,7 +156,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
struct arm_spe_recording *sper = struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr); container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
bool need_immediate = false;
struct perf_evsel *evsel, *arm_spe_evsel = NULL; struct perf_evsel *evsel, *arm_spe_evsel = NULL;
const struct cpu_map *cpus = evlist->cpus;
bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
struct perf_evsel *tracking_evsel; struct perf_evsel *tracking_evsel;
int err; int err;
...@@ -84,6 +178,58 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, ...@@ -84,6 +178,58 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
} }
} }
/*
* Per-cpu recording needs sched_switch events to distinguish different
* threads.
*/
if (!cpu_map__empty(cpus)) {
if (perf_can_record_switch_events()) {
bool cpu_wide = !target__none(&opts->target) &&
!target__has_task(&opts->target);
if (!cpu_wide && perf_can_record_cpu_wide()) {
struct perf_evsel *switch_evsel;
err = parse_events(evlist, "dummy:u", NULL);
if (err)
return err;
switch_evsel = perf_evlist__last(evlist);
switch_evsel->attr.freq = 0;
switch_evsel->attr.sample_period = 1;
switch_evsel->attr.context_switch = 1;
switch_evsel->system_wide = true;
switch_evsel->no_aux_samples = true;
switch_evsel->immediate = true;
perf_evsel__set_sample_bit(switch_evsel, TID);
perf_evsel__set_sample_bit(switch_evsel, TIME);
perf_evsel__set_sample_bit(switch_evsel, CPU);
perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
opts->record_switch_events = false;
sper->have_sched_switch = 3;
} else {
opts->record_switch_events = true;
need_immediate = true;
if (cpu_wide)
sper->have_sched_switch = 3;
else
sper->have_sched_switch = 2;
}
} else {
err = arm_spe_track_switches(evlist);
if (err == -EPERM)
pr_debug2("Unable to select sched:sched_switch\n");
else if (err)
return err;
else
sper->have_sched_switch = 1;
}
}
if (!opts->full_auxtrace) if (!opts->full_auxtrace)
return 0; return 0;
...@@ -110,7 +256,6 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, ...@@ -110,7 +256,6 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
} }
} }
/* /*
* To obtain the auxtrace buffer file descriptor, the auxtrace event * To obtain the auxtrace buffer file descriptor, the auxtrace event
* must come first. * must come first.
...@@ -131,10 +276,20 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, ...@@ -131,10 +276,20 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
tracking_evsel->attr.freq = 0; tracking_evsel->attr.freq = 0;
tracking_evsel->attr.sample_period = 1; tracking_evsel->attr.sample_period = 1;
if (need_immediate)
tracking_evsel->immediate = true;
perf_evsel__set_sample_bit(tracking_evsel, TIME); perf_evsel__set_sample_bit(tracking_evsel, TIME);
perf_evsel__set_sample_bit(tracking_evsel, CPU); perf_evsel__set_sample_bit(tracking_evsel, CPU);
perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
/*
* Warn the user when we do not have enough information to decode i.e.
* per-cpu with no sched_switch (except workload-only).
*/
if (!sper->have_sched_switch && !cpu_map__empty(cpus) &&
!target__none(&opts->target))
ui__warning("ARM SPE decoding will not be possible except for kernel tracing!\n");
return 0; return 0;
} }
......
// SPDX-License-Identifier: GPL-2.0
#include <stdbool.h>
#include <errno.h>
#include <linux/stddef.h>
#include <linux/perf_event.h>
#include <linux/stringify.h>
#include "../../perf.h"
#include <linux/types.h>
#include "../../util/debug.h"
#include "../../util/tsc.h"
int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
struct perf_tsc_conversion *tc)
{
bool cap_user_time_zero;
u32 seq;
int i = 0;
while (1) {
seq = pc->lock;
rmb();
tc->time_mult = pc->time_mult;
tc->time_shift = pc->time_shift;
tc->time_zero = pc->time_zero;
cap_user_time_zero = pc->cap_user_time_zero;
rmb();
if (pc->lock == seq && !(seq & 1))
break;
if (++i > 10000) {
pr_debug("failed to get perf_event_mmap_page lock\n");
return -EINVAL;
}
}
if (!cap_user_time_zero)
return -EOPNOTSUPP;
return 0;
}
u64 rdtsc(void)
{
u64 val;
asm volatile("mrs %0, CNTVCT_EL0" : "=r" (val));
return val;
}
int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
union perf_event event = {
.time_conv = {
.header = {
.type = PERF_RECORD_TIME_CONV,
.size = sizeof(struct time_conv_event),
},
},
};
struct perf_tsc_conversion tc;
int err;
if (!pc)
return 0;
err = perf_read_tsc_conversion(pc, &tc);
if (err == -EOPNOTSUPP)
return 0;
if (err)
return err;
pr_debug2("Synthesizing TSC conversion information\n");
event.time_conv.time_mult = tc.time_mult;
event.time_conv.time_shift = tc.time_shift;
event.time_conv.time_zero = tc.time_zero;
return process(tool, &event, NULL, machine);
}
...@@ -184,6 +184,7 @@ static int arm_spe_walk_trace(struct arm_spe_decoder *decoder) ...@@ -184,6 +184,7 @@ static int arm_spe_walk_trace(struct arm_spe_decoder *decoder)
case ARM_SPE_COUNTER: case ARM_SPE_COUNTER:
break; break;
case ARM_SPE_CONTEXT: case ARM_SPE_CONTEXT:
decoder->state.contextidr = payload;
break; break;
case ARM_SPE_OP_TYPE: case ARM_SPE_OP_TYPE:
if (idx == 0x1) { if (idx == 0x1) {
......
...@@ -31,6 +31,7 @@ struct arm_spe_state { ...@@ -31,6 +31,7 @@ struct arm_spe_state {
uint64_t addr; uint64_t addr;
uint64_t phys_addr; uint64_t phys_addr;
uint64_t timestamp; uint64_t timestamp;
uint64_t contextidr; /* if available, means tid */
}; };
struct arm_spe_insn; struct arm_spe_insn;
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "symbol.h" #include "symbol.h"
#include "debug.h" #include "debug.h"
#include "auxtrace.h" #include "auxtrace.h"
#include "tsc.h"
#include "arm-spe.h" #include "arm-spe.h"
#include "arm-spe-decoder/arm-spe-decoder.h" #include "arm-spe-decoder/arm-spe-decoder.h"
#include "arm-spe-decoder/arm-spe-pkt-decoder.h" #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
...@@ -57,6 +58,11 @@ struct arm_spe { ...@@ -57,6 +58,11 @@ struct arm_spe {
u64 kernel_start; u64 kernel_start;
unsigned long num_events; unsigned long num_events;
int have_sched_switch;
struct perf_evsel *switch_evsel;
u64 ts_bit;
struct perf_tsc_conversion tc;
bool cap_user_time_zero;
}; };
struct arm_spe_queue { struct arm_spe_queue {
...@@ -81,6 +87,9 @@ struct arm_spe_queue { ...@@ -81,6 +87,9 @@ struct arm_spe_queue {
struct spe_c2c_sample { struct spe_c2c_sample {
struct rb_node rb_node; struct rb_node rb_node;
struct arm_spe_state state; struct arm_spe_state state;
pid_t pid;
pid_t tid;
}; };
struct spe_c2c_sample_queues { struct spe_c2c_sample_queues {
...@@ -272,7 +281,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe, ...@@ -272,7 +281,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
struct perf_sample *sample) struct perf_sample *sample)
{ {
if (!spe->timeless_decoding) if (!spe->timeless_decoding)
sample->time = speq->timestamp; sample->time = tsc_to_perf_time(speq->timestamp, &spe->tc);
sample->ip = speq->state->from_ip; sample->ip = speq->state->from_ip;
sample->cpumode = arm_spe_cpumode(spe, sample->ip); sample->cpumode = arm_spe_cpumode(spe, sample->ip);
...@@ -387,6 +396,11 @@ static void arm_spe_c2c_queue_store(struct arm_spe_queue *speq, ...@@ -387,6 +396,11 @@ static void arm_spe_c2c_queue_store(struct arm_spe_queue *speq,
struct rb_root *root; struct rb_root *root;
int ret = 0; int ret = 0;
if (!speq->have_sample)
return;
speq->have_sample = false;
if (state->ts && (state->is_ld || state->is_st)) { if (state->ts && (state->is_ld || state->is_st)) {
sample = zalloc(sizeof(struct spe_c2c_sample)); sample = zalloc(sizeof(struct spe_c2c_sample));
if (!sample) { if (!sample) {
...@@ -397,6 +411,8 @@ static void arm_spe_c2c_queue_store(struct arm_spe_queue *speq, ...@@ -397,6 +411,8 @@ static void arm_spe_c2c_queue_store(struct arm_spe_queue *speq,
root = state->is_ld ? &(spe_c2cq->ld_list) : &(spe_c2cq->st_list); root = state->is_ld ? &(spe_c2cq->ld_list) : &(spe_c2cq->st_list);
memcpy(&(sample->state), state, sizeof(struct arm_spe_state)); memcpy(&(sample->state), state, sizeof(struct arm_spe_state));
sample->pid = speq->pid;
sample->tid = speq->tid;
ret = spe_sample_insert(root, sample); ret = spe_sample_insert(root, sample);
if (ret) { if (ret) {
...@@ -423,6 +439,8 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp, ...@@ -423,6 +439,8 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp,
if (!spe->kernel_start) if (!spe->kernel_start)
spe->kernel_start = machine__kernel_start(spe->machine); spe->kernel_start = machine__kernel_start(spe->machine);
pr_debug4("queue %u decoding cpu %d pid %d tid %d\n",
speq->queue_nr, speq->cpu, speq->pid, speq->tid);
while (1) { while (1) {
if (spe->sample_c2c_mode) { if (spe->sample_c2c_mode) {
if (spe_c2cq) if (spe_c2cq)
...@@ -445,6 +463,10 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp, ...@@ -445,6 +463,10 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp,
speq->state = state; speq->state = state;
speq->have_sample = true; speq->have_sample = true;
if (state->timestamp > speq->timestamp) {
speq->timestamp = state->timestamp;
}
if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
*timestamp = speq->timestamp; *timestamp = speq->timestamp;
return 0; return 0;
...@@ -460,20 +482,20 @@ static int arm_spe__setup_queue(struct arm_spe *spe, ...@@ -460,20 +482,20 @@ static int arm_spe__setup_queue(struct arm_spe *spe,
{ {
struct arm_spe_queue *speq = queue->priv; struct arm_spe_queue *speq = queue->priv;
if (list_empty(&queue->head) || speq) if (list_empty(&queue->head))
return 0; return 0;
speq = arm_spe__alloc_queue(spe, queue_nr); if (!speq) {
speq = arm_spe__alloc_queue(spe, queue_nr);
if (!speq) if (!speq)
return -ENOMEM; return -ENOMEM;
queue->priv = speq;
if (queue->cpu != -1) queue->priv = speq;
speq->cpu = queue->cpu;
speq->tid = queue->tid; if (queue->cpu != -1)
speq->cpu = queue->cpu;
speq->tid = queue->tid;
}
if (!speq->on_heap) { if (!speq->on_heap) {
const struct arm_spe_state *state; const struct arm_spe_state *state;
...@@ -482,18 +504,26 @@ static int arm_spe__setup_queue(struct arm_spe *spe, ...@@ -482,18 +504,26 @@ static int arm_spe__setup_queue(struct arm_spe *spe,
if (spe->timeless_decoding) if (spe->timeless_decoding)
return 0; return 0;
retry: pr_debug4("queue %u getting timestamp\n", queue_nr);
state = arm_spe_decode(speq->decoder); pr_debug4("queue %u decoding cpu %d pid %d tid %d\n",
if (state->err) { queue_nr, speq->cpu, speq->pid, speq->tid);
if (state->err == -ENODATA) { while (1) {
pr_debug("queue %u has no timestamp\n", state = arm_spe_decode(speq->decoder);
queue_nr); if (state->err) {
return 0; if (state->err == -ENODATA) {
pr_debug("queue %u has no timestamp\n",
queue_nr);
return 0;
}
continue;
} }
goto retry; if (state->timestamp)
break;
} }
speq->timestamp = state->timestamp; speq->timestamp = state->timestamp;
pr_debug4("queue %u timestamp 0x%" PRIx64 "\n",
queue_nr, speq->timestamp);
speq->state = state; speq->state = state;
speq->have_sample = true; speq->have_sample = true;
ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
...@@ -529,19 +559,36 @@ static int arm_spe__update_queues(struct arm_spe *spe) ...@@ -529,19 +559,36 @@ static int arm_spe__update_queues(struct arm_spe *spe)
return 0; return 0;
} }
static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) static bool arm_spe_get_config(struct arm_spe *spe,
struct perf_event_attr *attr, u64 *config)
{
if (attr->type == spe->pmu_type) {
if (config)
*config = attr->config;
return true;
}
return false;
}
static bool arm_spe_is_timeless_decoding(struct arm_spe *spe)
{ {
struct perf_evsel *evsel; struct perf_evsel *evsel;
struct perf_evlist *evlist = spe->session->evlist;
bool timeless_decoding = true; bool timeless_decoding = true;
u64 config;
/*
* Circle through the list of event and complain if we find one if (!spe->ts_bit || !spe->cap_user_time_zero)
* with the time bit set. return true;
*/
evlist__for_each_entry(evlist, evsel) { evlist__for_each_entry(spe->session->evlist, evsel) {
if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
timeless_decoding = false; return true;
if (arm_spe_get_config(spe, &evsel->attr, &config)) {
if (config & spe->ts_bit)
timeless_decoding = false;
else
return true;
}
} }
return timeless_decoding; return timeless_decoding;
...@@ -552,7 +599,7 @@ static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, ...@@ -552,7 +599,7 @@ static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
{ {
struct arm_spe_queue *speq = queue->priv; struct arm_spe_queue *speq = queue->priv;
if (queue->tid == -1) { if (queue->tid == -1 || spe->have_sched_switch) {
speq->tid = machine__get_current_tid(spe->machine, speq->cpu); speq->tid = machine__get_current_tid(spe->machine, speq->cpu);
thread__zput(speq->thread); thread__zput(speq->thread);
} }
...@@ -587,7 +634,8 @@ arm_spe_get_c2c_queue(struct arm_spe_queue *speq) ...@@ -587,7 +634,8 @@ arm_spe_get_c2c_queue(struct arm_spe_queue *speq)
} }
} }
pr_warning("spe_c2c: Now only support sample for two cpus!\n"); pr_warning("spe_c2c: Now only support sample for %u cpus!\n",
SPE_C2C_SAMPLE_Q_MAX);
return NULL; return NULL;
} }
...@@ -606,7 +654,6 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) ...@@ -606,7 +654,6 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
if (!spe->heap.heap_cnt) { if (!spe->heap.heap_cnt) {
return 0; return 0;
} }
if (spe->heap.heap_array[0].ordinal >= timestamp) { if (spe->heap.heap_array[0].ordinal >= timestamp) {
return 0; return 0;
} }
...@@ -614,6 +661,10 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) ...@@ -614,6 +661,10 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
queue = &spe->queues.queue_array[queue_nr]; queue = &spe->queues.queue_array[queue_nr];
speq = queue->priv; speq = queue->priv;
pr_debug4("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
queue_nr, spe->heap.heap_array[0].ordinal,
timestamp);
auxtrace_heap__pop(&spe->heap); auxtrace_heap__pop(&spe->heap);
if (spe->heap.heap_cnt) { if (spe->heap.heap_cnt) {
...@@ -677,8 +728,8 @@ static void arm_spe_c2c_sample(struct spe_c2c_sample_queues *c2c_queues, ...@@ -677,8 +728,8 @@ static void arm_spe_c2c_sample(struct spe_c2c_sample_queues *c2c_queues,
sample.ip = c2c_sample->state.from_ip; sample.ip = c2c_sample->state.from_ip;
sample.cpumode = arm_spe_cpumode(speq->spe, sample.ip); sample.cpumode = arm_spe_cpumode(speq->spe, sample.ip);
sample.pid = speq->pid; sample.pid = c2c_sample->pid;
sample.tid = speq->tid; sample.tid = c2c_sample->tid;
sample.addr = c2c_sample->state.addr; sample.addr = c2c_sample->state.addr;
sample.data_src = src.val; sample.data_src = src.val;
sample.phys_addr = c2c_sample->state.phys_addr; sample.phys_addr = c2c_sample->state.phys_addr;
...@@ -710,7 +761,8 @@ static void arm_spe_c2c_get_samples(void *arg) ...@@ -710,7 +761,8 @@ static void arm_spe_c2c_get_samples(void *arg)
sampleB = rb_entry(nodeB, struct spe_c2c_sample, rb_node); sampleB = rb_entry(nodeB, struct spe_c2c_sample, rb_node);
xor = sampleA->state.phys_addr ^ sampleB->state.phys_addr; xor = sampleA->state.phys_addr ^ sampleB->state.phys_addr;
if (!(xor & 0xFFFFFFFFFFFFFFC0) && (xor & 0x3F)) { if (!(xor & 0xFFFFFFFFFFFFFFC0) && (xor & 0x3F)
&& sampleA->tid != sampleB->tid) {
pthread_mutex_lock(&mut); pthread_mutex_lock(&mut);
arm_spe_c2c_sample(queues, sampleA); arm_spe_c2c_sample(queues, sampleA);
arm_spe_c2c_sample(oppoqs, sampleB); arm_spe_c2c_sample(oppoqs, sampleB);
...@@ -799,6 +851,66 @@ static int arm_spe_c2c_process(struct arm_spe *spe __maybe_unused) ...@@ -799,6 +851,66 @@ static int arm_spe_c2c_process(struct arm_spe *spe __maybe_unused)
return ret; return ret;
} }
static int arm_spe_process_switch(struct arm_spe *spe,
struct perf_sample *sample)
{
struct perf_evsel *evsel;
pid_t tid;
int cpu;
evsel = perf_evlist__id2evsel(spe->session->evlist, sample->id);
if (evsel != spe->switch_evsel)
return 0;
tid = perf_evsel__intval(evsel, sample, "next_pid");
cpu = sample->cpu;
pr_debug4("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
cpu, tid, sample->time, perf_time_to_tsc(sample->time,
&spe->tc));
return machine__set_current_tid(spe->machine, cpu, -1, tid);
}
static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
struct perf_sample *sample)
{
bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
pid_t pid, tid;
int cpu;
cpu = sample->cpu;
if (out)
return 0;
pid = sample->pid;
tid = sample->tid;
if (tid == -1) {
pr_err("context_switch event has no tid\n");
return -EINVAL;
}
pr_debug4("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
&spe->tc));
return machine__set_current_tid(spe->machine, cpu, pid, tid);
}
static int arm_spe_process_itrace_start(struct arm_spe *spe,
union perf_event *event,
struct perf_sample *sample)
{
pr_debug4("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
sample->cpu, event->itrace_start.pid,
event->itrace_start.tid, sample->time,
perf_time_to_tsc(sample->time, &spe->tc));
return machine__set_current_tid(spe->machine, sample->cpu,
event->itrace_start.pid,
event->itrace_start.tid);
}
static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
u64 time_) u64 time_)
{ {
...@@ -834,12 +946,12 @@ static int arm_spe_process_event(struct perf_session *session, ...@@ -834,12 +946,12 @@ static int arm_spe_process_event(struct perf_session *session,
return 0; return 0;
if (!tool->ordered_events) { if (!tool->ordered_events) {
pr_err("CoreSight SPE Trace requires ordered events\n"); pr_err("ARM SPE requires ordered events\n");
return -EINVAL; return -EINVAL;
} }
if (sample->time && (sample->time != (u64) -1)) if (sample->time && (sample->time != (u64) -1))
timestamp = sample->time; timestamp = perf_time_to_tsc(sample->time, &spe->tc);
else else
timestamp = 0; timestamp = 0;
...@@ -856,19 +968,23 @@ static int arm_spe_process_event(struct perf_session *session, ...@@ -856,19 +968,23 @@ static int arm_spe_process_event(struct perf_session *session,
sample->time); sample->time);
} }
} else if (timestamp) { } else if (timestamp) {
if (event->header.type == PERF_RECORD_EXIT) { err = arm_spe_process_queues(spe, timestamp);
err = arm_spe_process_queues(spe, timestamp); if (err)
if (err) return err;
return err;
if (spe->sample_c2c_mode) {
err = arm_spe_c2c_process(spe);
if (err)
return err;
}
}
} }
if (spe->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
err = arm_spe_process_switch(spe, sample);
else if (event->header.type == PERF_RECORD_ITRACE_START)
err = arm_spe_process_itrace_start(spe, event, sample);
else if (event->header.type == PERF_RECORD_SWITCH ||
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
err = arm_spe_context_switch(spe, event, sample);
pr_debug4("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
perf_event__name(event->header.type), event->header.type,
sample->cpu, sample->time, timestamp);
return err; return err;
} }
...@@ -1033,6 +1149,32 @@ static void arm_spe_set_event_name(struct perf_evlist *evlist, u64 id, ...@@ -1033,6 +1149,32 @@ static void arm_spe_set_event_name(struct perf_evlist *evlist, u64 id,
} }
} }
static struct perf_evsel *arm_spe_find_sched_switch(struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
evlist__for_each_entry_reverse(evlist, evsel) {
const char *name = perf_evsel__name(evsel);
if (!strcmp(name, "sched:sched_switch"))
return evsel;
}
return NULL;
}
static bool arm_spe_find_switch(struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.context_switch)
return true;
}
return false;
}
static int arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) static int arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
{ {
struct perf_evlist *evlist = session->evlist; struct perf_evlist *evlist = session->evlist;
...@@ -1050,7 +1192,7 @@ static int arm_spe_synth_events(struct arm_spe *spe, struct perf_session *sessio ...@@ -1050,7 +1192,7 @@ static int arm_spe_synth_events(struct arm_spe *spe, struct perf_session *sessio
} }
if (!found) { if (!found) {
pr_debug("No selected events with CoreSight Trace data\n"); pr_debug("No selected events with ARM SPE data\n");
return 0; return 0;
} }
...@@ -1139,7 +1281,6 @@ int arm_spe_process_auxtrace_info(union perf_event *event, ...@@ -1139,7 +1281,6 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
struct arm_spe *spe; struct arm_spe *spe;
int err; int err;
if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
min_sz) min_sz)
return -EINVAL; return -EINVAL;
...@@ -1156,8 +1297,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event, ...@@ -1156,8 +1297,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
spe->machine = &session->machines.host; /* No kvm support */ spe->machine = &session->machines.host; /* No kvm support */
spe->auxtrace_type = auxtrace_info->type; spe->auxtrace_type = auxtrace_info->type;
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
spe->tc.time_shift = auxtrace_info->priv[ARM_SPE_TIME_SHIFT];
spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); spe->tc.time_mult = auxtrace_info->priv[ARM_SPE_TIME_MULT];
spe->tc.time_zero = auxtrace_info->priv[ARM_SPE_TIME_ZERO];
spe->cap_user_time_zero = auxtrace_info->priv[ARM_SPE_CAP_USER_TIME_ZERO];
spe->ts_bit = auxtrace_info->priv[ARM_SPE_TS_ENABLE];
spe->have_sched_switch = auxtrace_info->priv[ARM_SPE_HAVE_SCHED_SWITCH];
spe->timeless_decoding = arm_spe_is_timeless_decoding(spe);
spe->auxtrace.process_event = arm_spe_process_event; spe->auxtrace.process_event = arm_spe_process_event;
spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
spe->auxtrace.flush_events = arm_spe_flush; spe->auxtrace.flush_events = arm_spe_flush;
...@@ -1170,6 +1317,20 @@ int arm_spe_process_auxtrace_info(union perf_event *event, ...@@ -1170,6 +1317,20 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
if (dump_trace) if (dump_trace)
return 0; return 0;
if (spe->have_sched_switch == 1) {
spe->switch_evsel = arm_spe_find_sched_switch(session->evlist);
if (!spe->switch_evsel) {
pr_err("%s: missing sched_switch event\n", __func__);
err = -EINVAL;
goto err_free_queues;
}
} else if (spe->have_sched_switch == 2 &&
!arm_spe_find_switch(session->evlist)) {
pr_err("%s: missing context_switch attribute flag\n", __func__);
err = -EINVAL;
goto err_free_queues;
}
if (session->arm_spe_synth_opts && (session->arm_spe_synth_opts->set if (session->arm_spe_synth_opts && (session->arm_spe_synth_opts->set
|| session->arm_spe_synth_opts->c2c_mode)) || session->arm_spe_synth_opts->c2c_mode))
spe->synth_opts = *session->arm_spe_synth_opts; spe->synth_opts = *session->arm_spe_synth_opts;
...@@ -1192,6 +1353,9 @@ int arm_spe_process_auxtrace_info(union perf_event *event, ...@@ -1192,6 +1353,9 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
if (spe->queues.populated) if (spe->queues.populated)
spe->data_queued = true; spe->data_queued = true;
if (spe->timeless_decoding)
pr_debug2("ARM SPE decoding without timestamps\n");
return 0; return 0;
err_free_queues: err_free_queues:
......
...@@ -12,6 +12,12 @@ ...@@ -12,6 +12,12 @@
enum { enum {
ARM_SPE_PMU_TYPE, ARM_SPE_PMU_TYPE,
ARM_SPE_PER_CPU_MMAPS, ARM_SPE_PER_CPU_MMAPS,
ARM_SPE_TIME_SHIFT,
ARM_SPE_TIME_MULT,
ARM_SPE_CAP_USER_TIME_ZERO,
ARM_SPE_TIME_ZERO,
ARM_SPE_TS_ENABLE,
ARM_SPE_HAVE_SCHED_SWITCH,
ARM_SPE_AUXTRACE_PRIV_MAX, ARM_SPE_AUXTRACE_PRIV_MAX,
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册