diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 553c3e08fa4aae4db409adbafc21a579e1e6304a..8914335db84b23b04f0900c0d03be0b58e39c45f 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -157,6 +157,15 @@ of instructions and number of cycles since the last update, and thus represent the average IPC since the last IPC for that event type. Note IPC for "branches" events is calculated separately from IPC for "instructions" events. +Even with the 'cyc' config term, it is possible to produce IPC information for +every change of timestamp, but at the expense of accuracy. That is selected by +specifying the itrace 'A' option. Due to the granularity of timestamps, the +actual number of cycles increases even though the cycles reported does not. +The number of instructions is known, but if IPC is reported, cycles can be too +low and so IPC is too high. Note that inaccuracy decreases as the period of +sampling increases i.e. if the number of cycles is too low by a small amount, +that becomes less significant if the number of cycles is large. + Also note that the IPC instruction count may or may not include the current instruction. If the cycle count is associated with an asynchronous branch (e.g. page fault or interrupt), then the instruction count does not include the @@ -873,6 +882,7 @@ The letters are: L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) "Instructions" events look like they were recorded by "perf record -e diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 5ab631702769b85c26cb5a924e0a8ca3dd6acd66..5f83937bf8f3cbf653b2bb9f4a9356641a75f601 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -608,6 +608,7 @@ static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) { decoder->sample_timestamp = decoder->timestamp; decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + decoder->state.cycles = decoder->tot_cyc_cnt; } static void intel_pt_reposition(struct intel_pt_decoder *decoder) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 4b5e79fcf557f45274b0bb0b63e507a164a932fd..8fd68f7a0963732fcd44661e2c890a4c59087ff0 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -218,6 +218,7 @@ struct intel_pt_state { uint64_t to_ip; uint64_t tot_insn_cnt; uint64_t tot_cyc_cnt; + uint64_t cycles; uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index c9542fada8fb6f9b7b3f5d916c85b00ac97a368b..0ee5005e9837ce805c6936ab5f618c27a15e378c 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -172,6 +172,7 @@ struct intel_pt_queue { bool step_through_buffers; bool use_buffer_pid_tid; bool sync_switch; + bool sample_ipc; pid_t pid, tid; int cpu; int switch_state; @@ -1581,7 +1582,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + if (ptq->sample_ipc) sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; @@ -1632,7 +1633,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) else sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + if (ptq->sample_ipc) sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; @@ -2245,8 +2246,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; - ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + if (pt->synth_opts.approx_ipc) { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->cycles; + ptq->sample_ipc = true; + } else { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; + } /* * Do PEBS first to allow for the possibility that the PEBS timestamp