diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 03b76904ca5236c6e173dbd920ef624712c4e529..59811b39430caf724e594a2155655f96035e4969 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -72,6 +72,7 @@ struct intel_pt { bool use_thread_stack; bool callstack; unsigned int br_stack_sz; + unsigned int br_stack_sz_plus; int have_sched_switch; u32 pmu_type; u64 kernel_start; @@ -130,6 +131,7 @@ struct intel_pt { unsigned int range_cnt; struct ip_callchain *chain; + struct branch_stack *br_stack; }; enum switch_state { @@ -911,6 +913,44 @@ static void intel_pt_add_callchain(struct intel_pt *pt, sample->callchain = pt->chain; } +static struct branch_stack *intel_pt_alloc_br_stack(struct intel_pt *pt) +{ + size_t sz = sizeof(struct branch_stack); + + sz += pt->br_stack_sz * sizeof(struct branch_entry); + return zalloc(sz); +} + +static int intel_pt_br_stack_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)) + evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK; + } + + pt->br_stack = intel_pt_alloc_br_stack(pt); + if (!pt->br_stack) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_br_stack(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack, + pt->br_stack_sz, sample->ip, + pt->kernel_start); + + sample->branch_stack = pt->br_stack; +} + static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, unsigned int queue_nr) { @@ -929,10 +969,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, } if (pt->synth_opts.last_branch) { - size_t sz = sizeof(struct branch_stack); - - sz += pt->br_stack_sz * sizeof(struct branch_entry); - ptq->last_branch = zalloc(sz); + ptq->last_branch = intel_pt_alloc_br_stack(pt); if (!ptq->last_branch) goto out_free; } @@ -1963,7 +2000,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip, state->to_ip, ptq->insn_len, state->trace_nr, pt->callstack, - pt->br_stack_sz, + pt->br_stack_sz_plus, pt->mispred_all); } else { thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); @@ -2609,6 +2646,8 @@ static int intel_pt_process_event(struct perf_session *session, if (event->header.type == PERF_RECORD_SAMPLE) { if (pt->synth_opts.add_callchain && !sample->callchain) intel_pt_add_callchain(pt, sample); + if (pt->synth_opts.add_last_branch && !sample->branch_stack) + intel_pt_add_br_stack(pt, sample); } if (event->header.type == PERF_RECORD_AUX && @@ -3370,13 +3409,33 @@ int intel_pt_process_auxtrace_info(union perf_event *event, goto err_delete_thread; } - if (pt->synth_opts.last_branch) + if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) { pt->br_stack_sz = pt->synth_opts.last_branch_sz; + pt->br_stack_sz_plus = pt->br_stack_sz; + } + + if (pt->synth_opts.add_last_branch) { + err = intel_pt_br_stack_init(pt); + if (err) + goto err_delete_thread; + /* + * Additional branch stack size to cater for tracing from the + * actual sample ip to where the sample time is recorded. + * Measured at about 200 branches, but generously set to 1024. + * If kernel space is not being traced, then add just 1 for the + * branch to kernel space. + */ + if (intel_pt_tracing_kernel(pt)) + pt->br_stack_sz_plus += 1024; + else + pt->br_stack_sz_plus += 1; + } pt->use_thread_stack = pt->synth_opts.callchain || pt->synth_opts.add_callchain || pt->synth_opts.thread_stack || - pt->synth_opts.last_branch; + pt->synth_opts.last_branch || + pt->synth_opts.add_last_branch; pt->callstack = pt->synth_opts.callchain || pt->synth_opts.add_callchain ||