diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 9aa107a0ce8cba0c1344b7312017fed9e0f4c87c..2a912df6771bf5c6abd880667fbece1dcf1e1005 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4905,8 +4905,8 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event else ls = 2; - if (*(ptr+1) == 'F' || - *(ptr+1) == 'f') { + if (*(ptr+1) == 'F' || *(ptr+1) == 'f' || + *(ptr+1) == 'S' || *(ptr+1) == 's') { ptr++; show_func = *ptr; } else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') { diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b027d28658f2a709999feffec076c4174aac03ef..7ff6a9d0ea0d7733e74a00f7d5af60c0bc331b9e 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -144,7 +144,7 @@ OPTIONS --call-graph:: Setup and enable call-graph (stack chain/backtrace) recording, - implies -g. + implies -g. Default is "fp". Allows specifying "fp" (frame pointer) or "dwarf" (DWARF's CFI - Call Frame Information) or "lbr" @@ -154,13 +154,18 @@ OPTIONS In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus call graphs, using "dwarf", if available (perf tools linked to - the libunwind library) should be used instead. + the libunwind or libdw library) should be used instead. Using the "lbr" method doesn't require any compiler options. It will produce call graphs from the hardware LBR registers. The main limition is that it is only available on new Intel platforms, such as Haswell. It can only get user call chain. It doesn't work with branch stack sampling at the same time. + When "dwarf" recording is used, perf also records (user) stack dump + when sampled. Default size of the stack dump is 8192 (bytes). + User can change the size by passing the size after comma like + "--call-graph dwarf,4096". + -q:: --quiet:: Don't print any message, useful for scripting. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index e4fdeeb5112320670f5d39c71f3d8c8577c8f05b..ab1fd64e36271d46426adfeb7e7a8648b1110dcc 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -169,30 +169,40 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. --g [type,min[,limit],order[,key][,branch]]:: ---call-graph:: - Display call chains using type, min percent threshold, optional print - limit and order. - type can be either: +-g:: +--call-graph=:: + Display call chains using type, min percent threshold, print limit, + call order, sort key and branch. Note that ordering of parameters is not + fixed so any parement can be given in an arbitraty order. One exception + is the print_limit which should be preceded by threshold. + + print_type can be either: - flat: single column, linear exposure of call chains. - - graph: use a graph tree, displaying absolute overhead rates. + - graph: use a graph tree, displaying absolute overhead rates. (default) - fractal: like graph, but displays relative rates. Each branch of - the tree is considered as a new profiled object. + + the tree is considered as a new profiled object. + - none: disable call chain display. + + threshold is a percentage value which specifies a minimum percent to be + included in the output call graph. Default is 0.5 (%). + + print_limit is only applied when stdio interface is used. It's to limit + number of call graph entries in a single hist entry. Note that it needs + to be given after threshold (but not necessarily consecutive). + Default is 0 (unlimited). order can be either: - callee: callee based call graph. - caller: inverted caller based call graph. + Default is 'caller' when --children is used, otherwise 'callee'. - key can be: - - function: compare on functions + sort_key can be: + - function: compare on functions (default) - address: compare on individual code addresses branch can be: - - branch: include last branch information in callgraph - when available. Usually more convenient to use --branch-history - for this. - - Default: graph,0.5,caller + - branch: include last branch information in callgraph when available. + Usually more convenient to use --branch-history for this. --children:: Accumulate callchain of children to parent entry so that then can diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index f6a23eb294e77ad2f364f7275d4916336ca60809..556cec09bf50cc4d5d9a50104d8efabd7879e2c7 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -160,9 +160,10 @@ Default is to monitor all CPUS. -g:: Enables call-graph (stack chain/backtrace) recording. ---call-graph:: +--call-graph [mode,type,min[,limit],order[,key][,branch]]:: Setup and enable call-graph (stack chain/backtrace) recording, - implies -g. + implies -g. See `--call-graph` section in perf-record and + perf-report man pages for details. --children:: Accumulate callchain of children to parent entry so that then can diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 24ace2f318c1ba499e19ba95f3c9f5543ae7615a..2740d7a82ae80943ca45b18059d2e40ff28f7e35 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1010,13 +1010,8 @@ static struct record record = { }, }; -#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " - -#ifdef HAVE_DWARF_UNWIND_SUPPORT -const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr"; -#else -const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr"; -#endif +const char record_callchain_help[] = CALLCHAIN_RECORD_HELP + "\n\t\t\t\tDefault: fp"; /* * XXX Will stay a global variable till we fix builtin-script.c to stop messing @@ -1064,7 +1059,7 @@ struct option __record_options[] = { NULL, "enables call-graph recording" , &record_callchain_opt), OPT_CALLBACK(0, "call-graph", &record.opts, - "mode[,dump_size]", record_callchain_help, + "record_mode[,record_size]", record_callchain_help, &record_parse_callchain_opt), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3b23b25d1589cded3f29501a1b3b4ff1392a3f02..50dd4d3d866768f4ef74892b922a85a34ffb9973 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -625,6 +625,12 @@ parse_percent_limit(const struct option *opt, const char *str, return 0; } +#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function" + +const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" + CALLCHAIN_REPORT_HELP + "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; + int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_session *session; @@ -633,7 +639,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) bool has_br_stack = false; int branch_mode = -1; bool branch_call_mode = false; - char callchain_default_opt[] = "graph,0.5,caller"; + char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const char * const report_usage[] = { "perf report []", NULL @@ -699,9 +705,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", - "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " - "Default: graph,0.5,caller", &report_parse_callchain_opt, callchain_default_opt), + OPT_CALLBACK_DEFAULT('g', "call-graph", &report, + "print_type,threshold[,print_limit],order,sort_key[,branch]", + report_callchain_help, &report_parse_callchain_opt, + callchain_default_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &report.max_stack, @@ -808,6 +815,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; + if (symbol_conf.cumulate_callchain && !callchain_param.order_set) + callchain_param.order = ORDER_CALLER; if (itrace_synth_opts.callchain && (int)itrace_synth_opts.callchain_sz > report.max_stack) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6f641fd682966b97beb1f13b2f0f14912b8fc0cf..7e2e72e6d9d16323c3c448986372fa13234d368a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1053,8 +1053,22 @@ callchain_opt(const struct option *opt, const char *arg, int unset) static int parse_callchain_opt(const struct option *opt, const char *arg, int unset) { - symbol_conf.use_callchain = true; - return record_parse_callchain_opt(opt, arg, unset); + struct record_opts *record = (struct record_opts *)opt->value; + + record->callgraph_set = true; + callchain_param.enabled = !unset; + callchain_param.record_mode = CALLCHAIN_FP; + + /* + * --no-call-graph + */ + if (unset) { + symbol_conf.use_callchain = false; + callchain_param.record_mode = CALLCHAIN_NONE; + return 0; + } + + return parse_callchain_top_opt(arg); } static int perf_top_config(const char *var, const char *value, void *cb) @@ -1079,6 +1093,9 @@ parse_percent_limit(const struct option *opt, const char *arg, return 0; } +const char top_callchain_help[] = CALLCHAIN_RECORD_HELP CALLCHAIN_REPORT_HELP + "\n\t\t\t\tDefault: fp,graph,0.5,caller,function"; + int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) { char errbuf[BUFSIZ]; @@ -1154,11 +1171,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts, - NULL, "enables call-graph recording", + NULL, "enables call-graph recording and display", &callchain_opt), OPT_CALLBACK(0, "call-graph", &top.record_opts, - "mode[,dump_size]", record_callchain_help, - &parse_callchain_opt), + "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]", + top_callchain_help, &parse_callchain_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &top.max_stack, @@ -1288,6 +1305,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) perf_hpp__cancel_cumulate(); } + if (symbol_conf.cumulate_callchain && !callchain_param.order_set) + callchain_param.order = ORDER_CALLER; + symbol_conf.priv_size = sizeof(struct annotation); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index bec0b62d8e3897eab0e42368bdeceacb77e8a92e..d4d7cc27252f1184bf2d678b6460f138099efd28 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1125,8 +1125,8 @@ static struct annotate_config { ANNOTATE_CFG(jump_arrows), ANNOTATE_CFG(show_linenr), ANNOTATE_CFG(show_nr_jumps), - ANNOTATE_CFG(use_offset), ANNOTATE_CFG(show_total_period), + ANNOTATE_CFG(use_offset), }; #undef ANNOTATE_CFG @@ -1152,9 +1152,9 @@ static int annotate__config(const char *var, const char *value, sizeof(struct annotate_config), annotate_config__cmp); if (cfg == NULL) - return -1; - - *cfg->value = perf_config_bool(name, value); + ui__warning("%s variable unknown, ignoring...", var); + else + *cfg->value = perf_config_bool(name, value); return 0; } diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 60d1f29b4b50a9fedf0a163855056edfce1ed22b..7dfeba0a91f37c33c87b826e9ed79ec4ad59869c 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -141,10 +141,6 @@ int ui__init(void) SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB); - ui_helpline__init(); - ui_browser__init(); - tui_progress__init(); - signal(SIGSEGV, ui__signal_backtrace); signal(SIGFPE, ui__signal_backtrace); signal(SIGINT, ui__signal); @@ -153,6 +149,10 @@ int ui__init(void) perf_error__register(&perf_tui_eops); + ui_helpline__init(); + ui_browser__init(); + tui_progress__init(); + hist_browser__init_hpp(); out: return err; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 773fe13ce6271b192bc9482ac8496578a9f41a81..735ad48e1858b0382c9aacc84e0b912e9329c86a 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -51,10 +51,12 @@ static int parse_callchain_order(const char *value) { if (!strncmp(value, "caller", strlen(value))) { callchain_param.order = ORDER_CALLER; + callchain_param.order_set = true; return 0; } if (!strncmp(value, "callee", strlen(value))) { callchain_param.order = ORDER_CALLEE; + callchain_param.order_set = true; return 0; } return -1; @@ -77,12 +79,14 @@ static int parse_callchain_sort_key(const char *value) return -1; } -int -parse_callchain_report_opt(const char *arg) +static int +__parse_callchain_report_opt(const char *arg, bool allow_record_opt) { char *tok; char *endptr; bool minpcnt_set = false; + bool record_opt_set = false; + bool try_stack_size = false; symbol_conf.use_callchain = true; @@ -100,6 +104,28 @@ parse_callchain_report_opt(const char *arg) !parse_callchain_order(tok) || !parse_callchain_sort_key(tok)) { /* parsing ok - move on to the next */ + try_stack_size = false; + goto next; + } else if (allow_record_opt && !record_opt_set) { + if (parse_callchain_record(tok, &callchain_param)) + goto try_numbers; + + /* assume that number followed by 'dwarf' is stack size */ + if (callchain_param.record_mode == CALLCHAIN_DWARF) + try_stack_size = true; + + record_opt_set = true; + goto next; + } + +try_numbers: + if (try_stack_size) { + unsigned long size = 0; + + if (get_stack_size(tok, &size) < 0) + return -1; + callchain_param.dump_size = size; + try_stack_size = false; } else if (!minpcnt_set) { /* try to get the min percent */ callchain_param.min_percent = strtod(tok, &endptr); @@ -112,7 +138,7 @@ parse_callchain_report_opt(const char *arg) if (tok == endptr) return -1; } - +next: arg = NULL; } @@ -123,6 +149,16 @@ parse_callchain_report_opt(const char *arg) return 0; } +int parse_callchain_report_opt(const char *arg) +{ + return __parse_callchain_report_opt(arg, false); +} + +int parse_callchain_top_opt(const char *arg) +{ + return __parse_callchain_report_opt(arg, true); +} + int perf_callchain_config(const char *var, const char *value) { char *endptr; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index acee2b3cd801737985fc7ca44443027a03ad4d54..fce8161e54dbbbae16f0667c0cd86cb50d7b58a2 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -7,6 +7,30 @@ #include "event.h" #include "symbol.h" +#define HELP_PAD "\t\t\t\t" + +#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n" +#else +# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|lbr)\n" +#endif + +#define RECORD_SIZE_HELP \ + HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording ()\n" \ + HELP_PAD "\t\tdefault: 8192 (bytes)\n" + +#define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP + +#define CALLCHAIN_REPORT_HELP \ + HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|none)\n" \ + HELP_PAD "threshold:\tminimum call graph inclusion threshold ()\n" \ + HELP_PAD "print_limit:\tmaximum number of call graph entry ()\n" \ + HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \ + HELP_PAD "sort_key:\tcall graph sort key (function|address)\n" \ + HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" + enum perf_call_graph_mode { CALLCHAIN_NONE, CALLCHAIN_FP, @@ -63,6 +87,7 @@ struct callchain_param { double min_percent; sort_chain_func_t sort; enum chain_order order; + bool order_set; enum chain_key key; bool branch_callstack; }; @@ -180,6 +205,7 @@ extern const char record_callchain_help[]; extern int parse_callchain_record(const char *arg, struct callchain_param *param); int parse_callchain_record_opt(const char *arg, struct callchain_param *param); int parse_callchain_report_opt(const char *arg); +int parse_callchain_top_opt(const char *arg); int perf_callchain_config(const char *var, const char *value); static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index c1bf9ff210b0e878542f416eb2902af94d0a7dcc..cd12c25e4ea408b1b873d8368f5081d3e2a72113 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -19,7 +19,7 @@ struct callchain_param callchain_param = { .mode = CHAIN_GRAPH_ABS, .min_percent = 0.5, - .order = ORDER_CALLER, + .order = ORDER_CALLEE, .key = CCKEY_FUNCTION };