diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index a0fbb5d71f7d623d03421589cd0ba3d3e823f40e..be764f9ec7691a3d2357214cbe1af9c6c333ad92 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -764,3 +764,32 @@ perf inject also accepts the --itrace option in which case tracing data is removed and replaced with the synthesized events. e.g. perf inject --itrace -i perf.data -o perf.data.new + +Below is an example of using Intel PT with autofdo. It requires autofdo +(https://github.com/google/autofdo) and gcc version 5. The bubble +sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) +amended to take the number of elements as a parameter. + + $ gcc-5 -O3 sort.c -o sort_optimized + $ ./sort_optimized 30000 + Bubble sorting array of 30000 elements + 2254 ms + + $ cat ~/.perfconfig + [intel-pt] + mispred-all + + $ perf record -e intel_pt//u ./sort 3000 + Bubble sorting array of 3000 elements + 58 ms + [ perf record: Woken up 2 times to write data ] + [ perf record: Captured and wrote 3.939 MB perf.data ] + $ perf inject -i perf.data -o inj --itrace=i100usle --strip + $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 + $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo + $ ./sort_autofdo 30000 + Bubble sorting array of 30000 elements + 2155 ms + +Note there is currently no advantage to using Intel PT instead of LBR, but +that may change in the future if greater use is made of the data. diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 05e8fcc5188bce410934d0d5731aebf48d70e1b3..03ff072b59938d70b6af30c8b062686425203116 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -64,6 +64,7 @@ struct intel_pt { bool data_queued; bool est_tsc; bool sync_switch; + bool mispred_all; int have_sched_switch; u32 pmu_type; u64 kernel_start; @@ -943,6 +944,7 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); /* No support for mispredict */ + be->flags.mispred = ptq->pt->mispred_all; if (bs->nr < ptq->pt->synth_opts.last_branch_sz) bs->nr += 1; @@ -1967,6 +1969,16 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist) return false; } +static int intel_pt_perf_config(const char *var, const char *value, void *data) +{ + struct intel_pt *pt = data; + + if (!strcmp(var, "intel-pt.mispred-all")) + pt->mispred_all = perf_config_bool(var, value); + + return 0; +} + static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", @@ -2011,6 +2023,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (!pt) return -ENOMEM; + perf_config(intel_pt_perf_config, pt); + err = auxtrace_queues__init(&pt->queues); if (err) goto err_free;