diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 809c7721cd2451a5e1a8e3bbe5a3994965106346..a7ecf8f469f47921ec0734eb4142a4da64f6c5a7 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep) return err; } +int filename__write_int(const char *filename, int value) +{ + int fd = open(filename, O_WRONLY), err = -1; + char buf[64]; + + if (fd < 0) + return err; + + sprintf(buf, "%d", value); + if (write(fd, buf, sizeof(buf)) == sizeof(buf)) + err = 0; + + close(fd); + return err; +} + int procfs__read_str(const char *entry, char **buf, size_t *sizep) { char path[PATH_MAX]; @@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value) return filename__read_int(path, value); } + +int sysfs__write_int(const char *entry, int value) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX) + return -1; + + return filename__write_int(path, value); +} diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 956c21127d1ef7d9817c0bbb8a9423ff4bdf1de8..45605348461e2f40e308c7a446ddfe32808c12ee 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value); int filename__read_ull(const char *filename, unsigned long long *value); int filename__read_str(const char *filename, char **buf, size_t *sizep); +int filename__write_int(const char *filename, int value); + int procfs__read_str(const char *entry, char **buf, size_t *sizep); int sysctl__read_int(const char *sysctl, int *value); @@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); int sysfs__read_str(const char *entry, char **buf, size_t *sizep); int sysfs__read_bool(const char *entry, bool *value); + +int sysfs__write_int(const char *entry, int value); #endif /* __API_FS__ */ diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index b0b3007d3c9c0ff9b3288e8005d533efe48c0b57..d157dee7a4ec89151ba795c13f54063b189e39e1 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -364,6 +364,42 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc CYC packets are not requested by default. +pt Specifies pass-through which enables the 'branch' config term. + + The default config selects 'pt' if it is available, so a user will + never need to specify this term. + +branch Enable branch tracing. Branch tracing is enabled by default so to + disable branch tracing use 'branch=0'. + + The default config selects 'branch' if it is available. + +ptw Enable PTWRITE packets which are produced when a ptwrite instruction + is executed. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/ptwrite + + which contains "1" if the feature is supported and + "0" otherwise. + +fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet + provides the address of the ptwrite instruction. In the absence of + fup_on_ptw, the decoder will use the address of the previous branch + if branch tracing is enabled, otherwise the address will be zero. + Note that fup_on_ptw will work even when branch tracing is disabled. + +pwr_evt Enable power events. The power events provide information about + changes to the CPU C-state. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/power_event_trace + + which contains "1" if the feature is supported and + "0" otherwise. + new snapshot option ------------------- diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index bd0e4417f2be63f892a870ec5ad60ae0fd5d9994..698076313606a7e22df2bda7bfa365c42673a32a 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -239,6 +239,20 @@ taskset. --no-merge:: Do not merge results from same PMUs. +--smi-cost:: +Measure SMI cost if msr/aperf/ and msr/smi/ events are supported. + +During the measurement, the /sys/device/cpu/freeze_on_smi will be set to +freeze core counters on SMI. +The aperf counter will not be effected by the setting. +The cost of SMI can be measured by (aperf - unhalted core cycles). + +In practice, the percentages of SMI cycles is very useful for performance +oriented analysis. --metric_only will be applied by default. +The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf + +Users who wants to get the actual value can apply --no-metric-only. + EXAMPLES -------- diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 1f4fbc9a3292e06b6ae48c47297e795ccbc45809..bdf0e87f9b2938c33dc94b0541c8a63d1965cb35 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -61,7 +61,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc)) NO_LIBDW_DWARF_UNWIND := 1 endif diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 90ad64b231cd821abe8756ed92d68d7c4c367804..2e6595310420104a40fe4ee813f41ee0bc213349 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -5,4 +5,6 @@ libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o + libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c new file mode 100644 index 0000000000000000000000000000000000000000..3a24b3c4327386dd0d32b6e00863bdf3a02e0aa6 --- /dev/null +++ b/tools/perf/arch/powerpc/util/unwind-libdw.c @@ -0,0 +1,73 @@ +#include <elfutils/libdwfl.h> +#include "../../util/unwind-libdw.h" +#include "../../util/perf_regs.h" +#include "../../util/event.h" + +/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */ +static const int special_regs[3][2] = { + { 65, PERF_REG_POWERPC_LINK }, + { 101, PERF_REG_POWERPC_XER }, + { 109, PERF_REG_POWERPC_CTR }, +}; + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) +{ + struct unwind_info *ui = arg; + struct regs_dump *user_regs = &ui->sample->user_regs; + Dwarf_Word dwarf_regs[32], dwarf_nip; + size_t i; + +#define REG(r) ({ \ + Dwarf_Word val = 0; \ + perf_reg_value(&val, user_regs, PERF_REG_POWERPC_##r); \ + val; \ +}) + + dwarf_regs[0] = REG(R0); + dwarf_regs[1] = REG(R1); + dwarf_regs[2] = REG(R2); + dwarf_regs[3] = REG(R3); + dwarf_regs[4] = REG(R4); + dwarf_regs[5] = REG(R5); + dwarf_regs[6] = REG(R6); + dwarf_regs[7] = REG(R7); + dwarf_regs[8] = REG(R8); + dwarf_regs[9] = REG(R9); + dwarf_regs[10] = REG(R10); + dwarf_regs[11] = REG(R11); + dwarf_regs[12] = REG(R12); + dwarf_regs[13] = REG(R13); + dwarf_regs[14] = REG(R14); + dwarf_regs[15] = REG(R15); + dwarf_regs[16] = REG(R16); + dwarf_regs[17] = REG(R17); + dwarf_regs[18] = REG(R18); + dwarf_regs[19] = REG(R19); + dwarf_regs[20] = REG(R20); + dwarf_regs[21] = REG(R21); + dwarf_regs[22] = REG(R22); + dwarf_regs[23] = REG(R23); + dwarf_regs[24] = REG(R24); + dwarf_regs[25] = REG(R25); + dwarf_regs[26] = REG(R26); + dwarf_regs[27] = REG(R27); + dwarf_regs[28] = REG(R28); + dwarf_regs[29] = REG(R29); + dwarf_regs[30] = REG(R30); + dwarf_regs[31] = REG(R31); + if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs)) + return false; + + dwarf_nip = REG(NIP); + dwfl_thread_state_register_pc(thread, dwarf_nip); + for (i = 0; i < ARRAY_SIZE(special_regs); i++) { + Dwarf_Word val = 0; + perf_reg_value(&val, user_regs, special_regs[i][1]); + if (!dwfl_thread_state_registers(thread, + special_regs[i][0], 1, + &val)) + return false; + } + + return true; +} diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 6fe667b3269eed8e563502f120757749435380ba..9535be57033f0c02b12d4471b2b61a272d630de0 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -192,6 +192,7 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) int psb_cyc, psb_periods, psb_period; int pos = 0; u64 config; + char c; pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); @@ -225,6 +226,10 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) } } + if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && + perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1) + pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch"); + pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index db5261c3f719977bd4517c934c8bf0a1e63226e5..4bce7d8679cbad191d97905e418ea24c2ad6407c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -385,7 +385,7 @@ static int perf_session__check_output_opt(struct perf_session *session) */ if (!evsel && output[j].user_set && !output[j].wildcard_set) { pr_err("%s events do not exist. " - "Remove corresponding -f option to proceed.\n", + "Remove corresponding -F option to proceed.\n", event_type(j)); return -1; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ad9324d1daf9f29a990a0d8f903563873ac91ef9..324363054c3fe1b8380acba5899d75473626a189 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -86,6 +86,7 @@ #define DEFAULT_SEPARATOR " " #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" +#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" static void print_counters(struct timespec *ts, int argc, const char **argv); @@ -122,6 +123,14 @@ static const char * topdown_attrs[] = { NULL, }; +static const char *smi_cost_attrs = { + "{" + "msr/aperf/," + "msr/smi/," + "cycles" + "}" +}; + static struct perf_evlist *evsel_list; static struct target target = { @@ -137,6 +146,8 @@ static bool null_run = false; static int detailed_run = 0; static bool transaction_run; static bool topdown_run = false; +static bool smi_cost = false; +static bool smi_reset = false; static bool big_num = true; static int big_num_opt = -1; static const char *csv_sep = NULL; @@ -1782,6 +1793,8 @@ static const struct option stat_options[] = { "Only print computed metrics. No raw values", enable_metric_only), OPT_BOOLEAN(0, "topdown", &topdown_run, "measure topdown level 1 statistics"), + OPT_BOOLEAN(0, "smi-cost", &smi_cost, + "measure SMI cost"), OPT_END() }; @@ -2160,6 +2173,39 @@ static int add_default_attributes(void) return 0; } + if (smi_cost) { + int smi; + + if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { + fprintf(stderr, "freeze_on_smi is not supported.\n"); + return -1; + } + + if (!smi) { + if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { + fprintf(stderr, "Failed to set freeze_on_smi.\n"); + return -1; + } + smi_reset = true; + } + + if (pmu_have_event("msr", "aperf") && + pmu_have_event("msr", "smi")) { + if (!force_metric_only) + metric_only = true; + err = parse_events(evsel_list, smi_cost_attrs, NULL); + } else { + fprintf(stderr, "To measure SMI cost, it needs " + "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); + return -1; + } + if (err) { + fprintf(stderr, "Cannot set up SMI cost events\n"); + return -1; + } + return 0; + } + if (topdown_run) { char *str = NULL; bool warn = false; @@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv) perf_stat__exit_aggr_mode(); perf_evlist__free_stats(evsel_list); out: + if (smi_cost && smi_reset) + sysfs__write_int(FREEZE_ON_SMI_PATH, 0); + perf_evlist__delete(evsel_list); return status; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7f78f27f53824f1f17dda10fb0d37224d557fc99..6f4882f8d61fb2e1f23000c581134f837722712c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -11,6 +11,7 @@ #include <errno.h> #include <inttypes.h> #include <linux/bitops.h> +#include <api/fs/fs.h> #include <api/fs/tracing_path.h> #include <traceevent/event-parse.h> #include <linux/hw_breakpoint.h> @@ -19,6 +20,8 @@ #include <linux/err.h> #include <sys/ioctl.h> #include <sys/resource.h> +#include <sys/types.h> +#include <dirent.h> #include "asm/bug.h" #include "callchain.h" #include "cgroup.h" @@ -2472,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err, return false; } +static bool find_process(const char *name) +{ + size_t len = strlen(name); + DIR *dir; + struct dirent *d; + int ret = -1; + + dir = opendir(procfs__mountpoint()); + if (!dir) + return false; + + /* Walk through the directory. */ + while (ret && (d = readdir(dir)) != NULL) { + char path[PATH_MAX]; + char *data; + size_t size; + + if ((d->d_type != DT_DIR) || + !strcmp(".", d->d_name) || + !strcmp("..", d->d_name)) + continue; + + scnprintf(path, sizeof(path), "%s/%s/comm", + procfs__mountpoint(), d->d_name); + + if (filename__read_str(path, &data, &size)) + continue; + + ret = strncmp(name, data, len); + free(data); + } + + closedir(dir); + return ret ? false : true; +} + int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, int err, char *msg, size_t size) { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 7cf7f7aca4d2e80cd3f39cf83bc515b26daaf497..5dea06289db591df192f22ced0d21ad1f50fe73a 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -64,6 +64,25 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_FUP_NO_TIP, }; +static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) +{ + switch (pkt_state) { + case INTEL_PT_STATE_NO_PSB: + case INTEL_PT_STATE_NO_IP: + case INTEL_PT_STATE_ERR_RESYNC: + case INTEL_PT_STATE_IN_SYNC: + case INTEL_PT_STATE_TNT: + return true; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + case INTEL_PT_STATE_FUP: + case INTEL_PT_STATE_FUP_NO_TIP: + return false; + default: + return true; + }; +} + #ifdef INTEL_PT_STRICT #define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB #define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB @@ -87,11 +106,13 @@ struct intel_pt_decoder { const unsigned char *buf; size_t len; bool return_compression; + bool branch_enable; bool mtc_insn; bool pge; bool have_tma; bool have_cyc; bool fixup_last_mtc; + bool have_last_ip; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -99,6 +120,7 @@ struct intel_pt_decoder { uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; uint64_t ctc_delta; @@ -119,6 +141,7 @@ struct intel_pt_decoder { int pkt_len; int last_packet_type; unsigned int cbr; + unsigned int cbr_seen; unsigned int max_non_turbo_ratio; double max_non_turbo_ratio_fp; double cbr_cyc_to_tsc; @@ -136,9 +159,18 @@ struct intel_pt_decoder { bool continuous_period; bool overflow; bool set_fup_tx_flags; + bool set_fup_ptw; + bool set_fup_mwait; + bool set_fup_pwre; + bool set_fup_exstop; unsigned int fup_tx_flags; unsigned int tx_flags; + uint64_t fup_ptw_payload; + uint64_t fup_mwait_payload; + uint64_t fup_pwre_payload; + uint64_t cbr_payload; uint64_t timestamp_insn_cnt; + uint64_t sample_insn_cnt; uint64_t stuck_ip; int no_progress; int stuck_ip_prd; @@ -192,6 +224,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->pgd_ip = params->pgd_ip; decoder->data = params->data; decoder->return_compression = params->return_compression; + decoder->branch_enable = params->branch_enable; decoder->period = params->period; decoder->period_type = params->period_type; @@ -398,6 +431,7 @@ static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet, static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) { decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip); + decoder->have_last_ip = true; } static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) @@ -635,6 +669,8 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) case INTEL_PT_PAD: case INTEL_PT_VMCS: case INTEL_PT_MNT: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: return 0; case INTEL_PT_MTC: @@ -733,6 +769,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) case INTEL_PT_TIP_PGD: case INTEL_PT_TRACESTOP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: case INTEL_PT_OVF: case INTEL_PT_BAD: /* Does not happen */ default: @@ -898,6 +939,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, decoder->tot_insn_cnt += insn_cnt; decoder->timestamp_insn_cnt += insn_cnt; + decoder->sample_insn_cnt += insn_cnt; decoder->period_insn_cnt += insn_cnt; if (err) { @@ -990,6 +1032,57 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, return err; } +static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) +{ + bool ret = false; + + if (decoder->set_fup_tx_flags) { + decoder->set_fup_tx_flags = false; + decoder->tx_flags = decoder->fup_tx_flags; + decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.flags = decoder->fup_tx_flags; + return true; + } + if (decoder->set_fup_ptw) { + decoder->set_fup_ptw = false; + decoder->state.type = INTEL_PT_PTW; + decoder->state.flags |= INTEL_PT_FUP_IP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.ptw_payload = decoder->fup_ptw_payload; + return true; + } + if (decoder->set_fup_mwait) { + decoder->set_fup_mwait = false; + decoder->state.type = INTEL_PT_MWAIT_OP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.mwait_payload = decoder->fup_mwait_payload; + ret = true; + } + if (decoder->set_fup_pwre) { + decoder->set_fup_pwre = false; + decoder->state.type |= INTEL_PT_PWR_ENTRY; + decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.pwre_payload = decoder->fup_pwre_payload; + ret = true; + } + if (decoder->set_fup_exstop) { + decoder->set_fup_exstop = false; + decoder->state.type |= INTEL_PT_EX_STOP; + decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.flags |= INTEL_PT_FUP_IP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + ret = true; + } + return ret; +} + static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) { struct intel_pt_insn intel_pt_insn; @@ -1003,15 +1096,8 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) if (err == INTEL_PT_RETURN) return 0; if (err == -EAGAIN) { - if (decoder->set_fup_tx_flags) { - decoder->set_fup_tx_flags = false; - decoder->tx_flags = decoder->fup_tx_flags; - decoder->state.type = INTEL_PT_TRANSACTION; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - decoder->state.flags = decoder->fup_tx_flags; + if (intel_pt_fup_event(decoder)) return 0; - } return err; } decoder->set_fup_tx_flags = false; @@ -1360,7 +1446,9 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) { - unsigned int cbr = decoder->packet.payload; + unsigned int cbr = decoder->packet.payload & 0xff; + + decoder->cbr_payload = decoder->packet.payload; if (decoder->cbr == cbr) return; @@ -1417,6 +1505,13 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_TRACESTOP: case INTEL_PT_BAD: case INTEL_PT_PSB: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); return -EAGAIN; @@ -1446,7 +1541,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - intel_pt_set_last_ip(decoder); + if (decoder->packet.count) + intel_pt_set_last_ip(decoder); break; case INTEL_PT_MODE_TSX: @@ -1497,6 +1593,13 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_MODE_TSX: case INTEL_PT_BAD: case INTEL_PT_PSBEND: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: intel_pt_log("ERROR: Missing TIP after FUP\n"); decoder->pkt_state = INTEL_PT_STATE_ERR3; return -ENOENT; @@ -1625,6 +1728,15 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) break; } intel_pt_set_last_ip(decoder); + if (!decoder->branch_enable) { + decoder->ip = decoder->last_ip; + if (intel_pt_fup_event(decoder)) + return 0; + no_tip = false; + break; + } + if (decoder->set_fup_mwait) + no_tip = true; err = intel_pt_walk_fup(decoder); if (err != -EAGAIN) { if (err) @@ -1650,6 +1762,8 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) break; case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psbend(decoder); if (err == -EAGAIN) @@ -1696,6 +1810,16 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) case INTEL_PT_CBR: intel_pt_calc_cbr(decoder); + if (!decoder->branch_enable && + decoder->cbr != decoder->cbr_seen) { + decoder->cbr_seen = decoder->cbr; + decoder->state.type = INTEL_PT_CBR_CHG; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.cbr_payload = + decoder->packet.payload; + return 0; + } break; case INTEL_PT_MODE_EXEC: @@ -1722,6 +1846,71 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) case INTEL_PT_PAD: break; + case INTEL_PT_PTWRITE_IP: + decoder->fup_ptw_payload = decoder->packet.payload; + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->set_fup_ptw = true; + no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after PTWRITE", + decoder->pos); + } + goto next; + + case INTEL_PT_PTWRITE: + decoder->state.type = INTEL_PT_PTW; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.ptw_payload = decoder->packet.payload; + return 0; + + case INTEL_PT_MWAIT: + decoder->fup_mwait_payload = decoder->packet.payload; + decoder->set_fup_mwait = true; + break; + + case INTEL_PT_PWRE: + if (decoder->set_fup_mwait) { + decoder->fup_pwre_payload = + decoder->packet.payload; + decoder->set_fup_pwre = true; + break; + } + decoder->state.type = INTEL_PT_PWR_ENTRY; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.pwrx_payload = decoder->packet.payload; + return 0; + + case INTEL_PT_EXSTOP_IP: + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->set_fup_exstop = true; + no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after EXSTOP", + decoder->pos); + } + goto next; + + case INTEL_PT_EXSTOP: + decoder->state.type = INTEL_PT_EX_STOP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return 0; + + case INTEL_PT_PWRX: + decoder->state.type = INTEL_PT_PWR_EXIT; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.pwrx_payload = decoder->packet.payload; + return 0; + default: return intel_pt_bug(decoder); } @@ -1730,8 +1919,9 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) { - return decoder->last_ip || decoder->packet.count == 0 || - decoder->packet.count == 3 || decoder->packet.count == 6; + return decoder->packet.count && + (decoder->have_last_ip || decoder->packet.count == 3 || + decoder->packet.count == 6); } /* Walk PSB+ packets to get in sync. */ @@ -1750,6 +1940,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) __fallthrough; case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: intel_pt_log("ERROR: Unexpected packet\n"); return -ENOENT; @@ -1854,14 +2051,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_FUP: - if (decoder->overflow) { - if (intel_pt_have_ip(decoder)) - intel_pt_set_ip(decoder); - if (decoder->ip) - return 0; - } - if (decoder->packet.count) - intel_pt_set_last_ip(decoder); + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; break; case INTEL_PT_MTC: @@ -1910,6 +2103,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; + intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psb(decoder); if (err) return err; @@ -1925,6 +2121,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_VMCS: case INTEL_PT_MNT: case INTEL_PT_PAD: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: default: break; } @@ -1935,6 +2138,19 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) { int err; + decoder->set_fup_tx_flags = false; + decoder->set_fup_ptw = false; + decoder->set_fup_mwait = false; + decoder->set_fup_pwre = false; + decoder->set_fup_exstop = false; + + if (!decoder->branch_enable) { + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->overflow = false; + decoder->state.type = 0; /* Do not have a sample */ + return 0; + } + intel_pt_log("Scanning for full IP\n"); err = intel_pt_walk_to_ip(decoder); if (err) @@ -2043,6 +2259,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) decoder->pge = false; decoder->continuous_period = false; + decoder->have_last_ip = false; decoder->last_ip = 0; decoder->ip = 0; intel_pt_clear_stack(&decoder->stack); @@ -2051,6 +2268,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) if (err) return err; + decoder->have_last_ip = true; decoder->pkt_state = INTEL_PT_STATE_NO_IP; err = intel_pt_walk_psb(decoder); @@ -2069,7 +2287,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) { - uint64_t est = decoder->timestamp_insn_cnt << 1; + uint64_t est = decoder->sample_insn_cnt << 1; if (!decoder->cbr || !decoder->max_non_turbo_ratio) goto out; @@ -2077,7 +2295,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) est *= decoder->max_non_turbo_ratio; est /= decoder->cbr; out: - return decoder->timestamp + est; + return decoder->sample_timestamp + est; } const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) @@ -2093,8 +2311,10 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_sync(decoder); break; case INTEL_PT_STATE_NO_IP: + decoder->have_last_ip = false; decoder->last_ip = 0; - /* Fall through */ + decoder->ip = 0; + __fallthrough; case INTEL_PT_STATE_ERR_RESYNC: err = intel_pt_sync_ip(decoder); break; @@ -2130,15 +2350,29 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) } } while (err == -ENOLINK); - decoder->state.err = err ? intel_pt_ext_err(err) : 0; - decoder->state.timestamp = decoder->timestamp; + if (err) { + decoder->state.err = intel_pt_ext_err(err); + decoder->state.from_ip = decoder->ip; + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } else { + decoder->state.err = 0; + if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { + decoder->cbr_seen = decoder->cbr; + decoder->state.type |= INTEL_PT_CBR_CHG; + decoder->state.cbr_payload = decoder->cbr_payload; + } + if (intel_pt_sample_time(decoder->pkt_state)) { + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } + } + + decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; - if (err) - decoder->state.from_ip = decoder->ip; - return &decoder->state; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index e90619a43c0cefdd6edebb0369e77dbf46017c21..921b22e8ca0eb5a00a1172e1d10f89f4752d429f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -25,11 +25,18 @@ #define INTEL_PT_IN_TX (1 << 0) #define INTEL_PT_ABORT_TX (1 << 1) #define INTEL_PT_ASYNC (1 << 2) +#define INTEL_PT_FUP_IP (1 << 3) enum intel_pt_sample_type { INTEL_PT_BRANCH = 1 << 0, INTEL_PT_INSTRUCTION = 1 << 1, INTEL_PT_TRANSACTION = 1 << 2, + INTEL_PT_PTW = 1 << 3, + INTEL_PT_MWAIT_OP = 1 << 4, + INTEL_PT_PWR_ENTRY = 1 << 5, + INTEL_PT_EX_STOP = 1 << 6, + INTEL_PT_PWR_EXIT = 1 << 7, + INTEL_PT_CBR_CHG = 1 << 8, }; enum intel_pt_period_type { @@ -63,6 +70,11 @@ struct intel_pt_state { uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; + uint64_t ptw_payload; + uint64_t mwait_payload; + uint64_t pwre_payload; + uint64_t pwrx_payload; + uint64_t cbr_payload; uint32_t flags; enum intel_pt_insn_op insn_op; int insn_len; @@ -87,6 +99,7 @@ struct intel_pt_params { bool (*pgd_ip)(uint64_t ip, void *data); void *data; bool return_compression; + bool branch_enable; uint64_t period; enum intel_pt_period_type period_type; unsigned max_non_turbo_ratio; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 7528ae4f7e28e1d419c699759125c979e8dc1397..ba4c9dd186434a33c8c33a59ab8884fd7c679dd3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -64,6 +64,13 @@ static const char * const packet_name[] = { [INTEL_PT_PIP] = "PIP", [INTEL_PT_OVF] = "OVF", [INTEL_PT_MNT] = "MNT", + [INTEL_PT_PTWRITE] = "PTWRITE", + [INTEL_PT_PTWRITE_IP] = "PTWRITE", + [INTEL_PT_EXSTOP] = "EXSTOP", + [INTEL_PT_EXSTOP_IP] = "EXSTOP", + [INTEL_PT_MWAIT] = "MWAIT", + [INTEL_PT_PWRE] = "PWRE", + [INTEL_PT_PWRX] = "PWRX", }; const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) @@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len, if (len < 4) return INTEL_PT_NEED_MORE_BYTES; packet->type = INTEL_PT_CBR; - packet->payload = buf[2]; + packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2)); return 4; } @@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len, } } +static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + packet->count = (buf[1] >> 5) & 0x3; + packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP : + INTEL_PT_PTWRITE; + + switch (packet->count) { + case 0: + if (len < 6) + return INTEL_PT_NEED_MORE_BYTES; + packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2)); + return 6; + case 1: + if (len < 10) + return INTEL_PT_NEED_MORE_BYTES; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2)); + return 10; + default: + return INTEL_PT_BAD_PACKET; + } +} + +static int intel_pt_get_exstop(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_EXSTOP; + return 2; +} + +static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_EXSTOP_IP; + return 2; +} + +static int intel_pt_get_mwait(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 10) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MWAIT; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2)); + return 10; +} + +static int intel_pt_get_pwre(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 4) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_PWRE; + memcpy_le64(&packet->payload, buf + 2, 2); + return 4; +} + +static int intel_pt_get_pwrx(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 7) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_PWRX; + memcpy_le64(&packet->payload, buf + 2, 5); + return 7; +} + static int intel_pt_get_ext(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { if (len < 2) return INTEL_PT_NEED_MORE_BYTES; + if ((buf[1] & 0x1f) == 0x12) + return intel_pt_get_ptwrite(buf, len, packet); + switch (buf[1]) { case 0xa3: /* Long TNT */ return intel_pt_get_long_tnt(buf, len, packet); @@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len, return intel_pt_get_tma(buf, len, packet); case 0xC3: /* 3-byte header */ return intel_pt_get_3byte(buf, len, packet); + case 0x62: /* EXSTOP no IP */ + return intel_pt_get_exstop(packet); + case 0xE2: /* EXSTOP with IP */ + return intel_pt_get_exstop_ip(packet); + case 0xC2: /* MWAIT */ + return intel_pt_get_mwait(buf, len, packet); + case 0x22: /* PWRE */ + return intel_pt_get_pwre(buf, len, packet); + case 0xA2: /* PWRX */ + return intel_pt_get_pwrx(buf, len, packet); default: return INTEL_PT_BAD_PACKET; } @@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", name, payload, nr); return ret; + case INTEL_PT_PTWRITE: + return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload); + case INTEL_PT_PTWRITE_IP: + return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload); + case INTEL_PT_EXSTOP: + return snprintf(buf, buf_len, "%s IP:0", name); + case INTEL_PT_EXSTOP_IP: + return snprintf(buf, buf_len, "%s IP:1", name); + case INTEL_PT_MWAIT: + return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x", + name, payload, (unsigned int)(payload & 0xff), + (unsigned int)((payload >> 32) & 0x3)); + case INTEL_PT_PWRE: + return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u", + name, payload, !!(payload & 0x80), + (unsigned int)((payload >> 12) & 0xf), + (unsigned int)((payload >> 8) & 0xf)); + case INTEL_PT_PWRX: + return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x", + name, payload, + (unsigned int)((payload >> 4) & 0xf), + (unsigned int)(payload & 0xf), + (unsigned int)((payload >> 8) & 0xf)); default: break; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h index 781bb79883bd612bb803318daff7248710f6c107..73ddc3a88d0749eddc31b4a7a98982921c4edc78 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h @@ -52,6 +52,13 @@ enum intel_pt_pkt_type { INTEL_PT_PIP, INTEL_PT_OVF, INTEL_PT_MNT, + INTEL_PT_PTWRITE, + INTEL_PT_PTWRITE_IP, + INTEL_PT_EXSTOP, + INTEL_PT_EXSTOP_IP, + INTEL_PT_MWAIT, + INTEL_PT_PWRE, + INTEL_PT_PWRX, }; struct intel_pt_pkt { diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 4c7718f87a0890ee64e2f78e0b97291b67ecf33e..6df836469f2b4987651c8197f4bfafc823819c36 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -668,6 +668,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt) return true; } +static bool intel_pt_branch_enable(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config) && + (config & 1) && !(config & 0x2000)) + return false; + } + return true; +} + static unsigned int intel_pt_mtc_period(struct intel_pt *pt) { struct perf_evsel *evsel; @@ -799,6 +812,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.walk_insn = intel_pt_walk_next_insn; params.data = ptq; params.return_compression = intel_pt_return_compression(pt); + params.branch_enable = intel_pt_branch_enable(pt); params.max_non_turbo_ratio = pt->max_non_turbo_ratio; params.mtc_period = intel_pt_mtc_period(pt); params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; @@ -1308,18 +1322,14 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; if (pt->sample_instructions && - (state->type & INTEL_PT_INSTRUCTION) && - (!pt->synth_opts.initial_skip || - pt->num_events++ >= pt->synth_opts.initial_skip)) { + (state->type & INTEL_PT_INSTRUCTION)) { err = intel_pt_synth_instruction_sample(ptq); if (err) return err; } if (pt->sample_transactions && - (state->type & INTEL_PT_TRANSACTION) && - (!pt->synth_opts.initial_skip || - pt->num_events++ >= pt->synth_opts.initial_skip)) { + (state->type & INTEL_PT_TRANSACTION)) { err = intel_pt_synth_transaction_sample(ptq); if (err) return err; @@ -2025,6 +2035,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, return err; } pt->sample_transactions = true; + pt->transactions_sample_type = attr.sample_type; pt->transactions_id = id; id += 1; evlist__for_each_entry(evlist, evsel) { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7dc1096264c575cbbd1cd41d07f041585a997453..d19c40a8104027117a2d30ad2a6aad6af8207025 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2035,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) { pr_err("File does not contain CPU events. " - "Remove -c option to proceed.\n"); + "Remove -C option to proceed.\n"); return -1; } } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index ac10cc675d39579bfca249abbc4353e7c9accc6e..719d6cb86952e5c6482e7a09070370360efe2030 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; static struct rblist runtime_saved_values; static bool have_frontend_stalled; @@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void) memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); + memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); + memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); next = rb_first(&runtime_saved_values.entries); while (next) { @@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, SMI_NUM)) + update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, APERF)) + update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); if (counter->collect_stat) { struct saved_value *v = saved_value_lookup(counter, cpu, ctx, @@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu) return sanitize_val(1.0 - sum); } +static void print_smi_cost(int cpu, struct perf_evsel *evsel, + struct perf_stat_output_ctx *out) +{ + double smi_num, aperf, cycles, cost = 0.0; + int ctx = evsel_context(evsel); + const char *color = NULL; + + smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); + aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); + cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); + + if ((cycles == 0) || (aperf == 0)) + return; + + if (smi_num) + cost = (aperf - cycles) / aperf * 100.00; + + if (cost > 10) + color = PERF_COLOR_RED; + out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost); + out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); +} + void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out) @@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); + } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { + print_smi_cost(cpu, evsel, out); } else { print_metric(ctxp, NULL, NULL, NULL, 0); } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c58174443dc12c7fad840d8b67a34223e5283f9f..53b9a994a3dc9e50aca6da360d4781d93f52dfc9 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), + ID(SMI_NUM, msr/smi/), + ID(APERF, msr/aperf/), }; #undef ID diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0a65ae23f49504874bf82134d4e9ed40d5408a7f..7522bf10b03e2fcbf26b9c67bebd41e56bff310b 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -22,6 +22,8 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, + PERF_STAT_EVSEL_ID__SMI_NUM, + PERF_STAT_EVSEL_ID__APERF, PERF_STAT_EVSEL_ID__MAX, }; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 28c9f335006c962a5df924e30d9a45687775c201..988111e0bab592369ecf197d43127dab0dc57975 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -343,43 +343,6 @@ int perf_event_paranoid(void) return value; } - -bool find_process(const char *name) -{ - size_t len = strlen(name); - DIR *dir; - struct dirent *d; - int ret = -1; - - dir = opendir(procfs__mountpoint()); - if (!dir) - return false; - - /* Walk through the directory. */ - while (ret && (d = readdir(dir)) != NULL) { - char path[PATH_MAX]; - char *data; - size_t size; - - if ((d->d_type != DT_DIR) || - !strcmp(".", d->d_name) || - !strcmp("..", d->d_name)) - continue; - - scnprintf(path, sizeof(path), "%s/%s/comm", - procfs__mountpoint(), d->d_name); - - if (filename__read_str(path, &data, &size)) - continue; - - ret = strncmp(name, data, len); - free(data); - } - - closedir(dir); - return ret ? false : true; -} - static int fetch_ubuntu_kernel_version(unsigned int *puint) { @@ -387,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint) size_t line_len = 0; char *ptr, *line = NULL; int version, patchlevel, sublevel, err; - FILE *vsig = fopen("/proc/version_signature", "r"); + FILE *vsig; + + if (!puint) + return 0; + vsig = fopen("/proc/version_signature", "r"); if (!vsig) { pr_debug("Open /proc/version_signature failed: %s\n", strerror(errno)); @@ -418,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint) goto errout; } - if (puint) - *puint = (version << 16) + (patchlevel << 8) + sublevel; + *puint = (version << 16) + (patchlevel << 8) + sublevel; err = 0; errout: free(line); @@ -446,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str, str[str_size - 1] = '\0'; } + if (!puint || int_ver_ready) + return 0; + err = sscanf(utsname.release, "%d.%d.%d", &version, &patchlevel, &sublevel); @@ -455,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str, return -1; } - if (puint && !int_ver_ready) - *puint = (version << 16) + (patchlevel << 8) + sublevel; + *puint = (version << 16) + (patchlevel << 8) + sublevel; return 0; } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 21c6db173bcc45d331f803b7e2da3fd6d39fedd3..978572dfeb14351494bf0a939e8ccada7b278f30 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -1,7 +1,6 @@ #ifndef GIT_COMPAT_UTIL_H #define GIT_COMPAT_UTIL_H -#define _ALL_SOURCE 1 #define _BSD_SOURCE 1 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ #define _DEFAULT_SOURCE 1 @@ -49,8 +48,6 @@ int hex2u64(const char *ptr, u64 *val); extern unsigned int page_size; extern int cacheline_size; -bool find_process(const char *name); - int fetch_kernel_version(unsigned int *puint, char *str, size_t str_sz); #define KVER_VERSION(x) (((x) >> 16) & 0xff)