diff --git a/tools/perf/Build b/tools/perf/Build index e5232d567611b36aa5092262c8252f527e543c74..37442cc189b1261093979598bbe044ddfdb540de 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -24,6 +24,7 @@ perf-y += builtin-mem.o perf-y += builtin-data.o perf-y += builtin-version.o perf-y += builtin-c2c.o +perf-y += builtin-spe-c2c.o perf-$(CONFIG_TRACE) += builtin-trace.o perf-$(CONFIG_LIBELF) += builtin-probe.o diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 1452e5153c604addec427302bdcff0a8d2924459..e45fd0fabeb1ec1e123b9a57236146fef56712dc 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -356,6 +356,8 @@ static struct perf_c2c c2c = { .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, .ordered_events = true, .ordering_requires_timestamps = true, }, @@ -2704,9 +2706,11 @@ static int setup_coalesce(const char *coalesce, bool no_source) return 0; } -static int perf_c2c__report(int argc, const char **argv) +int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; + struct itrace_synth_opts itrace_synth_opts = { .set = 0, }; + struct arm_spe_synth_opts arm_spe_synth_opts = { .c2c_mode = true, .set = 0, }; struct ui_progress prog; struct perf_data data = { .mode = PERF_DATA_MODE_READ, @@ -2737,6 +2741,12 @@ static int perf_c2c__report(int argc, const char **argv) "print_type,threshold[,print_limit],order,sort_key[,branch],value", callchain_help, &parse_callchain_opt, callchain_default_opt), + OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", + "Instruction Tracing options", + itrace_parse_synth_opts), + OPT_CALLBACK_OPTARG(0, "spe", &arm_spe_synth_opts, NULL, "spe opts", + "ARM SPE Tracing options", + arm_spe_parse_synth_opts), OPT_STRING('d', "display", &display, "Switch HITM output type", "lcl,rmt"), OPT_STRING('c', "coalesce", &coalesce, "coalesce fields", "coalesce fields: pid,tid,iaddr,dso"), @@ -2782,6 +2792,9 @@ static int perf_c2c__report(int argc, const char **argv) goto out; } + session->itrace_synth_opts = &itrace_synth_opts; + session->arm_spe_synth_opts = &arm_spe_synth_opts; + err = setup_nodes(session); if (err) { pr_err("Failed setup nodes\n"); diff --git a/tools/perf/builtin-spe-c2c.c b/tools/perf/builtin-spe-c2c.c new file mode 100644 index 0000000000000000000000000000000000000000..4349f8041c2e777390a6ef615f66ca616d119355 --- /dev/null +++ b/tools/perf/builtin-spe-c2c.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This is for ARM SPE c2c, such as intel c2c. + */ +#include +#include "util.h" +#include "debug.h" +#include "builtin.h" +#include + +static const struct option spe_c2c_options[] = { + OPT_END() +}; + +static const char * const spe_c2c_usage[] = { + "perf spe-c2c {record|report}", + NULL +}; + +static int perf_spe_c2c__record(int argc, const char **argv) +{ + int rec_argc, i = 0, j; + const char **rec_argv; + int ret; + + rec_argc = argc + 5; /* max number of arguments */ + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + if (!rec_argv) + return -1; + + rec_argv[i++] = argv[0]; + rec_argv[i++] = "-e"; + rec_argv[i++] = "arm_spe_0/ts_enable=1," + "pct_enable=1,pa_enable=1,load_filter=1," + "jitter=1,store_filter=1,min_latency=0/"; + + for (j = 1; j < argc; j++) { + rec_argv[i++] = argv[j]; + } + + ret = cmd_record(i, rec_argv); + free(rec_argv); + return ret; +} + +int cmd_spe_c2c(int argc, const char **argv) +{ + argc = parse_options(argc, argv, spe_c2c_options, spe_c2c_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + if (!argc) + usage_with_options(spe_c2c_usage, spe_c2c_options); + + if (!strncmp(argv[0], "rec", 3)) { + return perf_spe_c2c__record(argc, argv); + } else if (!strncmp(argv[0], "rep", 3)) { + return perf_c2c__report(argc, argv); + } else { + usage_with_options(spe_c2c_usage, spe_c2c_options); + } + + return 0; +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 05745f3ce912dadf75495297d2b9978327d97774..4d9e5d1ea5b48fd03f639b4830bc9c767418c51f 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -16,6 +16,8 @@ int cmd_buildid_cache(int argc, const char **argv); int cmd_buildid_list(int argc, const char **argv); int cmd_config(int argc, const char **argv); int cmd_c2c(int argc, const char **argv); +int perf_c2c__report(int argc, const char **argv); +int cmd_spe_c2c(int argc, const char **argv); int cmd_diff(int argc, const char **argv); int cmd_evlist(int argc, const char **argv); int cmd_help(int argc, const char **argv); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 80f8ae8b13666a6834c9e9b45dd25c6e490c589c..426318433f40771cdab31cc37aa479f66e4ca557 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -50,6 +50,7 @@ static struct cmd_struct commands[] = { { "buildid-list", cmd_buildid_list, 0 }, { "config", cmd_config, 0 }, { "c2c", cmd_c2c, 0 }, + { "spe-c2c", cmd_spe_c2c, 0 }, { "diff", cmd_diff, 0 }, { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index b8a168c91aafbbf5279278d7b8f9b5e6bfc9a1cd..608b9372a6774971d6a36d1ba693eca264b321ff 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -37,10 +37,8 @@ struct arm_spe_decoder { unsigned char temp_buf[ARM_SPE_PKT_MAX_SZ]; }; -static uint64_t arm_spe_calc_ip(uint64_t payload) +static uint64_t arm_spe_calc_ip(uint64_t ip) { - uint64_t ip = (payload & ~(0xffULL << 56)); - /* fill high 8 bits for kernel virtual address */ if (ip & 0x1000000000000ULL) ip |= (uint64_t)0xff00000000000000ULL; @@ -154,32 +152,62 @@ static int arm_spe_walk_trace(struct arm_spe_decoder *decoder) switch (decoder->packet.type) { case ARM_SPE_TIMESTAMP: decoder->sample_timestamp = payload; + decoder->state.ts = payload; return 0; case ARM_SPE_END: decoder->sample_timestamp = 0; + decoder->state.ts = 0; return 0; case ARM_SPE_ADDRESS: - decoder->ip = arm_spe_calc_ip(payload); - if (idx == 0) - decoder->state.from_ip = decoder->ip; - else if (idx == 1) - decoder->state.to_ip = decoder->ip; + switch(idx) { + case 0: + case 1: + payload &= ~(0xffULL << 56); + decoder->ip = arm_spe_calc_ip(payload); + if (idx == 0) + decoder->state.from_ip = decoder->ip; + else + decoder->state.to_ip = decoder->ip; + break; + case 2: + decoder->ip = arm_spe_calc_ip(payload); + decoder->state.addr = decoder->ip; + break; + case 3: + payload &= ~(0xffULL << 56); + decoder->state.phys_addr = payload; + break; + default: + break; + } break; case ARM_SPE_COUNTER: break; case ARM_SPE_CONTEXT: break; case ARM_SPE_OP_TYPE: + if (idx == 0x1) { + if (payload & 0x1) + decoder->state.is_st = true; + else + decoder->state.is_ld = true; + } break; case ARM_SPE_EVENTS: - if (payload & 0x20) + if (payload & 0x20) { decoder->state.type |= ARM_SPE_TLB_MISS; + decoder->state.is_tlb_miss = true; + } if (payload & 0x80) decoder->state.type |= ARM_SPE_BRANCH_MISS; - if (idx > 1 && (payload & 0x200)) + if (idx > 1 && (payload & 0x200)) { decoder->state.type |= ARM_SPE_LLC_MISS; - if (idx > 1 && (payload & 0x400)) + decoder->state.is_llc_miss = true; + } + if (idx > 1 && (payload & 0x400)) { decoder->state.type |= ARM_SPE_REMOTE_ACCESS; + decoder->state.is_remote = true; + } break; case ARM_SPE_DATA_SOURCE: @@ -199,7 +227,7 @@ const struct arm_spe_state *arm_spe_decode(struct arm_spe_decoder *decoder) { int err; - decoder->state.type = 0; + memset(&(decoder->state), 0, sizeof(struct arm_spe_state)); err = arm_spe_walk_trace(decoder); if (err) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 13ebf281fa06af30f37186e05217addb252acb95..9d2a96a62cc4bd7856ddd0a8f656535002ac0548 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -17,8 +17,19 @@ enum arm_spe_sample_type { struct arm_spe_state { enum arm_spe_sample_type type; int err; + bool is_ld; /* Is load ? */ + bool is_st; /* Is store ? */ + bool is_l1d_miss; /* Is l1d miss ? */ + bool is_l2d_miss; /* Is l2d miss ? */ + bool is_llc_miss; /* Is llc miss ? */ + bool is_tlb_miss; /* Is tlb miss ? */ + bool is_remote; /* Is remote access ? */ + uint64_t ts; /* timestamp */ uint64_t from_ip; uint64_t to_ip; + uint64_t data_src; + uint64_t addr; + uint64_t phys_addr; uint64_t timestamp; }; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index af96caad761cf9863644e5e17940b1456a353d70..eac09380f5f5858520e87b746bfcfc84b51371a1 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "cpumap.h" #include "color.h" @@ -48,6 +49,7 @@ struct arm_spe { u8 sample_tlb_miss; u8 sample_branch_miss; u8 sample_remote_access; + u8 sample_c2c_mode; u64 llc_miss_id; u64 tlb_miss_id; u64 branch_miss_id; @@ -76,6 +78,51 @@ struct arm_spe_queue { bool have_sample; }; +struct spe_c2c_sample { + struct rb_node rb_node; + struct arm_spe_state state; +}; + +struct spe_c2c_sample_queues { + struct rb_root ld_list; + struct rb_root st_list; + + struct arm_spe_queue *speq; + bool valid; + int cpu; + uint64_t ld_num; + uint64_t st_num; +}; + +struct spe_c2c_compare_lists { + struct rb_root *listA; + struct rb_root *listB; + struct spe_c2c_sample_queues *queues; + struct spe_c2c_sample_queues *oppoqs; /* the oppo queues */ +}; + +#define SPE_C2C_SAMPLE_Q_MAX 128 + +int spe_c2c_q_num; + +struct spe_c2c_sample_queues spe_c2c_sample_list[SPE_C2C_SAMPLE_Q_MAX]; + +static void spe_c2c_sample_init(void) +{ + int i; + for (i = 0; i < SPE_C2C_SAMPLE_Q_MAX; i++) { + spe_c2c_sample_list[i].ld_list = RB_ROOT; + spe_c2c_sample_list[i].st_list = RB_ROOT; + spe_c2c_sample_list[i].valid = false; + spe_c2c_sample_list[i].cpu = -1; + spe_c2c_sample_list[i].speq = NULL; + spe_c2c_sample_list[i].ld_num = 0; + spe_c2c_sample_list[i].st_num = 0; + } + + spe_c2c_q_num = 0; +} + static void arm_spe_dump(struct arm_spe *spe __maybe_unused, unsigned char *buf, size_t len) { @@ -231,7 +278,8 @@ static void arm_spe_prep_sample(struct arm_spe *spe, sample->cpumode = arm_spe_cpumode(spe, sample->ip); sample->pid = speq->pid; sample->tid = speq->tid; - sample->addr = speq->state->to_ip; + sample->addr = speq->state->addr; + sample->phys_addr = speq->state->phys_addr; sample->period = 1; sample->cpu = speq->cpu; @@ -254,7 +302,7 @@ static inline int arm_spe_deliver_synth_event(struct arm_spe *spe, return ret; } -static int arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq, u64 spe_events_id) +static int arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq, u64 spe_events_id __maybe_unused) { struct arm_spe *spe = speq->spe; union perf_event *event = speq->event_buf; @@ -306,7 +354,67 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return 0; } -static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) +static int spe_sample_insert(struct rb_root *root, struct spe_c2c_sample *data) +{ + struct rb_node **tmp = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*tmp) { + struct spe_c2c_sample *this = container_of(*tmp, + struct spe_c2c_sample, rb_node); + + parent = *tmp; + if (data->state.ts < this->state.ts) + tmp = &((*tmp)->rb_left); + else if (data->state.ts > this->state.ts) + tmp = &((*tmp)->rb_right); + else + return -1; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->rb_node, parent, tmp); + rb_insert_color(&data->rb_node, root); + + return 0; +} + +static void arm_spe_c2c_queue_store(struct arm_spe_queue *speq, + struct spe_c2c_sample_queues *spe_c2cq) +{ + const struct arm_spe_state *state = speq->state; + struct spe_c2c_sample *sample; + struct rb_root *root; + int ret = 0; + + if (state->ts && (state->is_ld || state->is_st)) { + sample = zalloc(sizeof(struct spe_c2c_sample)); + if (!sample) { + pr_err("spe_c2c: Allocate sample error!\n"); + return; + } + + root = state->is_ld ? &(spe_c2cq->ld_list) : &(spe_c2cq->st_list); + + memcpy(&(sample->state), state, sizeof(struct arm_spe_state)); + + ret = spe_sample_insert(root, sample); + if (ret) { + pr_err("spe_c2c: The %lx(%lx) already exists.", + state->addr, state->ts); + free(sample); + return; + } + + if (state->is_ld) + spe_c2cq->ld_num++; + else + spe_c2cq->st_num++; + } +} + +static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp, + struct spe_c2c_sample_queues *spe_c2cq __maybe_unused) { const struct arm_spe_state *state = speq->state; struct arm_spe *spe = speq->spe; @@ -316,9 +424,14 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) spe->kernel_start = machine__kernel_start(spe->machine); while (1) { - err = arm_spe_sample(speq); - if (err) - return err; + if (spe->sample_c2c_mode) { + if (spe_c2cq) + arm_spe_c2c_queue_store(speq, spe_c2cq); + } else { + err = arm_spe_sample(speq); + if (err) + return err; + } state = arm_spe_decode(speq->decoder); if (state->err) { @@ -456,8 +569,32 @@ static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, } } +static struct spe_c2c_sample_queues* +arm_spe_get_c2c_queue(struct arm_spe_queue *speq) +{ + int i; + + for (i = 0; i < SPE_C2C_SAMPLE_Q_MAX; i++) { + if (!spe_c2c_sample_list[i].valid) { + spe_c2c_sample_list[i].valid = true; + spe_c2c_sample_list[i].cpu = speq->cpu; + spe_c2c_sample_list[i].speq = speq; + spe_c2c_q_num++; + return &spe_c2c_sample_list[i]; + } else { + if (spe_c2c_sample_list[i].cpu == speq->cpu) + return &spe_c2c_sample_list[i]; + } + } + + pr_warning("spe_c2c: Now only support sample for two cpus!\n"); + + return NULL; +} + static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) { + struct spe_c2c_sample_queues *spe_c2cq; unsigned int queue_nr; u64 ts; int ret; @@ -489,7 +626,9 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) arm_spe_set_pid_tid_cpu(spe, queue); - ret = arm_spe_run_decoder(speq, &ts); + spe_c2cq = arm_spe_get_c2c_queue(speq); + + ret = arm_spe_run_decoder(speq, &ts, spe_c2cq); if (ret < 0) { auxtrace_heap__add(&spe->heap, queue_nr, ts); return ret; @@ -507,9 +646,163 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) return 0; } +pthread_mutex_t mut; + +static void arm_spe_c2c_sample(struct spe_c2c_sample_queues *c2c_queues, + struct spe_c2c_sample *c2c_sample) +{ + struct arm_spe_queue *speq = c2c_queues->speq; + union perf_event *event = speq->event_buf; + struct perf_sample sample = { .ip = 0, }; + union perf_mem_data_src src; + int ret; + + memset(&src, 0, sizeof(src)); + + src.mem_op = PERF_MEM_OP_LOAD; + src.mem_snoop = PERF_MEM_SNOOP_HITM; + + if (c2c_sample->state.is_tlb_miss) + src.mem_dtlb = PERF_MEM_TLB_MISS; + else + src.mem_dtlb = PERF_MEM_TLB_HIT; + + if (speq->spe->synth_opts.c2c_remote) { + if (c2c_sample->state.is_remote) + src.mem_lvl = PERF_MEM_LVL_REM_CCE2; + else + return; + } else + src.mem_lvl = PERF_MEM_LVL_HIT | PERF_MEM_LVL_L3; + + sample.ip = c2c_sample->state.from_ip; + sample.cpumode = arm_spe_cpumode(speq->spe, sample.ip); + sample.pid = speq->pid; + sample.tid = speq->tid; + sample.addr = c2c_sample->state.addr; + sample.data_src = src.val; + sample.phys_addr = c2c_sample->state.phys_addr; + sample.period = 1; + sample.cpu = c2c_queues->cpu; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = sample.cpumode; + event->sample.header.size = sizeof(struct perf_event_header); + + ret = perf_session__deliver_synth_event(speq->spe->session, event, &sample); + if (ret) + pr_err("ARM SPE: failed to deliver event, error %d\n", ret); +} + +static void arm_spe_c2c_get_samples(void *arg) +{ + struct rb_root *listA = ((struct spe_c2c_compare_lists *)arg)->listA; + struct rb_root *listB = ((struct spe_c2c_compare_lists *)arg)->listB; + struct spe_c2c_sample_queues *queues = ((struct spe_c2c_compare_lists *)arg)->queues; + struct spe_c2c_sample_queues *oppoqs = ((struct spe_c2c_compare_lists *)arg)->oppoqs; + struct rb_node *nodeA, *nodeB; + struct spe_c2c_sample *sampleA, *sampleB; + uint64_t xor; + + for (nodeA = rb_first(listA); nodeA; nodeA = rb_next(nodeA)) { + for (nodeB = rb_first(listB); nodeB; nodeB = rb_next(nodeB)) { + sampleA = rb_entry(nodeA, struct spe_c2c_sample, rb_node); + sampleB = rb_entry(nodeB, struct spe_c2c_sample, rb_node); + + xor = sampleA->state.phys_addr ^ sampleB->state.phys_addr; + if (!(xor & 0xFFFFFFFFFFFFFFC0) && (xor & 0x3F)) { + pthread_mutex_lock(&mut); + arm_spe_c2c_sample(queues, sampleA); + arm_spe_c2c_sample(oppoqs, sampleB); + pthread_mutex_unlock(&mut); + break; + + } + + } + } +} + +static int arm_spe_c2c_process(struct arm_spe *spe __maybe_unused) +{ + int i, j, k, ret, size; + int store = spe->synth_opts.c2c_store ? 1 : 0; + pthread_t *c2c_threads; + struct spe_c2c_compare_lists *c2c_lists; + + if (spe_c2c_q_num == 0) + return 0; + + if (spe_c2c_q_num < 2) { + pr_err("ARM SPE: c2c mode requires data recorded on at least two CPUs!\n"); + return -1; + } + + k = 0; + size = (2 + store) * spe_c2c_q_num * (spe_c2c_q_num - 1) / 2; + + c2c_threads = (pthread_t *)zalloc(size * sizeof(pthread_t)); + c2c_lists = (struct spe_c2c_compare_lists *)zalloc(size * sizeof(struct spe_c2c_compare_lists)); + + for (i = 0; i < spe_c2c_q_num; i++) { + for (j = i + 1; j < spe_c2c_q_num; j++) { + c2c_lists[k].listA = &(spe_c2c_sample_list[i].ld_list); + c2c_lists[k].listB = &(spe_c2c_sample_list[j].st_list); + c2c_lists[k].queues = &spe_c2c_sample_list[i]; + c2c_lists[k].oppoqs = &spe_c2c_sample_list[j]; + ret = pthread_create(&c2c_threads[k], NULL, (void *)arm_spe_c2c_get_samples, + (void *)&c2c_lists[k]); + if (ret) { + pr_info("ARM SPE: c2c process thread[ld->st] create failed! ret=%d\n", ret); + return ret; + } + + k++; + c2c_lists[k].listA = &(spe_c2c_sample_list[j].ld_list); + c2c_lists[k].listB = &(spe_c2c_sample_list[i].st_list); + c2c_lists[k].queues = &spe_c2c_sample_list[j]; + c2c_lists[k].oppoqs = &spe_c2c_sample_list[i]; + ret = pthread_create(&c2c_threads[k], NULL, (void *)arm_spe_c2c_get_samples, + (void *)&c2c_lists[k]); + if (ret) { + pr_info("ARM SPE: c2c process thread[st->ld] create failed! ret=%d\n", ret); + return ret; + } + + if (store) { + k++; + c2c_lists[k].listA = &(spe_c2c_sample_list[i].st_list); + c2c_lists[k].listB = &(spe_c2c_sample_list[j].st_list); + c2c_lists[k].queues = &spe_c2c_sample_list[i]; + c2c_lists[k].oppoqs = &spe_c2c_sample_list[j]; + ret = pthread_create(&c2c_threads[k], NULL, (void *)arm_spe_c2c_get_samples, + (void *)&c2c_lists[k]); + if (ret) { + pr_info("ARM SPE: c2c process thread[st->st] create failed! ret=%d\n", ret); + return ret; + } + } + k++; + } + } + + for (i = 0; i < size; i++) { + ret = pthread_join(c2c_threads[i], NULL); + BUG_ON(ret); + } + + free(c2c_threads); + free(c2c_lists); + + spe_c2c_q_num = 0; + + return ret; +} + static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, u64 time_) { + struct spe_c2c_sample_queues *spe_c2cq = NULL; struct auxtrace_queues *queues = &spe->queues; unsigned int i; u64 ts = 0; @@ -521,7 +814,7 @@ static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, if (speq && (tid == -1 || speq->tid == tid)) { speq->time = time_; arm_spe_set_pid_tid_cpu(spe, queue); - arm_spe_run_decoder(speq, &ts); + arm_spe_run_decoder(speq, &ts, spe_c2cq); } } return 0; @@ -567,6 +860,12 @@ static int arm_spe_process_event(struct perf_session *session, err = arm_spe_process_queues(spe, timestamp); if (err) return err; + + if (spe->sample_c2c_mode) { + err = arm_spe_c2c_process(spe); + if (err) + return err; + } } } @@ -633,8 +932,12 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return arm_spe_process_timeless_queues(spe, -1, MAX_TIMESTAMP - 1); - return arm_spe_process_queues(spe, MAX_TIMESTAMP); - return 0; + if (spe->sample_c2c_mode) + ret = arm_spe_c2c_process(spe); + else + ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); + + return ret; } static void arm_spe_free_queue(void *priv) @@ -867,14 +1170,20 @@ int arm_spe_process_auxtrace_info(union perf_event *event, if (dump_trace) return 0; - if (session->arm_spe_synth_opts && session->arm_spe_synth_opts->set) + if (session->arm_spe_synth_opts && (session->arm_spe_synth_opts->set + || session->arm_spe_synth_opts->c2c_mode)) spe->synth_opts = *session->arm_spe_synth_opts; else arm_spe_synth_opts__set_default(&spe->synth_opts); - err = arm_spe_synth_events(spe, session); - if (err) - goto err_free_queues; + if (spe->synth_opts.c2c_mode) { + spe->sample_c2c_mode = true; + spe_c2c_sample_init(); + } else { + err = arm_spe_synth_events(spe, session); + if (err) + goto err_free_queues; + } err = auxtrace_queues__process_index(&spe->queues, session); if (err) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 5fba8e6548fc8c5200353d0f1edeb9d1e661b5f2..4a06986856594aa60b2bc466891baea9867e8005 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1142,6 +1142,8 @@ void arm_spe_synth_opts__set_default(struct arm_spe_synth_opts *synth_opts) synth_opts->tlb_miss = true; synth_opts->branch_miss = true; synth_opts->remote_access = true; + synth_opts->c2c_remote = false; + synth_opts->c2c_store = false; } int arm_spe_parse_synth_opts(const struct option *opt, const char *str, @@ -1169,7 +1171,14 @@ int arm_spe_parse_synth_opts(const struct option *opt, const char *str, synth_opts->branch_miss = true; break; case 'r': - synth_opts->remote_access = true; + if (synth_opts->c2c_mode) + synth_opts->c2c_remote = true; + else + synth_opts->remote_access = true; + break; + case 's': + if (synth_opts->c2c_mode) + synth_opts->c2c_store = true; break; case ' ': case ',': diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 1959e5d727ba664f1931718a91fd6184a372fb39..615f42d8c694bfc9a7b30adca960575132d6a06b 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -118,6 +118,9 @@ struct arm_spe_synth_opts { bool tlb_miss; bool branch_miss; bool remote_access; + bool c2c_mode; + bool c2c_remote; + bool c2c_store; }; /**