diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 747ff50284b7c84419cc431996d820485bb069e9..2b8097ee39d83c194fd8393a0a1a3e2b67e40b65 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -115,7 +115,7 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. --g [type,min[,limit],order]:: +-g [type,min[,limit],order[,key]]:: --call-graph:: Display call chains using type, min percent threshold, optional print limit and order. @@ -129,7 +129,11 @@ OPTIONS - callee: callee based call graph. - caller: inverted caller based call graph. - Default: fractal,0.5,callee. + key can be: + - function: compare on functions + - address: compare on individual code addresses + + Default: fractal,0.5,callee,function. -G:: --inverted:: diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 2a690267714153340d22f18738ec82eb6df6a8ef..024680b23ddc323701699f452e17432d2c68c7ed 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -631,10 +631,10 @@ $(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $< $(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $< $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $< $(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 25fd3f1966f193e50ee60d12802d0538f8e18693..8cdca43016b250109d8bdd3ea7568eb13229a125 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -117,6 +117,8 @@ static void alloc_mem(void **dst, void **src, size_t length) *src = zalloc(length); if (!*src) die("memory allocation failed - maybe length is too large?\n"); + /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ + memset(*src, 0, length); } static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a34c587900c77edfbc8f274b68a958f1271fb2b8..d785d89ed226a8cd8047b7159386450e559bc686 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -667,12 +667,23 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) } /* get the call chain order */ - if (!strcmp(tok2, "caller")) + if (!strncmp(tok2, "caller", strlen("caller"))) callchain_param.order = ORDER_CALLER; - else if (!strcmp(tok2, "callee")) + else if (!strncmp(tok2, "callee", strlen("callee"))) callchain_param.order = ORDER_CALLEE; else return -1; + + /* Get the sort key */ + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; + if (!strncmp(tok2, "function", strlen("function"))) + callchain_param.key = CCKEY_FUNCTION; + else if (!strncmp(tok2, "address", strlen("address"))) + callchain_param.key = CCKEY_ADDRESS; + else + return -1; setup: if (callchain_register_param(&callchain_param) < 0) { fprintf(stderr, "Can't register callchain params\n"); @@ -784,8 +795,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", - "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit and callchain order. " - "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt), + "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " + "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, "alias for inverted call graph"), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ecb697998d3b2f0a4ccee329141b009f2ef5936c..1cad370146738b85536efa2a2de9c979d87e4963 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -397,10 +397,10 @@ static void print_sample_bts(union perf_event *event, static void process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct addr_location *al) + struct thread *thread, + struct addr_location *al __maybe_unused) { struct perf_event_attr *attr = &evsel->attr; - struct thread *thread = al->thread; if (output[attr->type].fields == 0) return; @@ -511,7 +511,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) return 0; - scripting_ops->process_event(event, sample, evsel, machine, &al); + scripting_ops->process_event(event, sample, evsel, machine, thread, &al); evsel->hists.stats.total_period += sample->period; return 0; diff --git a/tools/perf/tests/make b/tools/perf/tests/make index c441a28751283579652f01017aed8243e44c6246..2ca0abf1b2b6090165a4ccdb091729da4e7a1c48 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -1,6 +1,8 @@ PERF := . MK := Makefile +has = $(shell which $1 2>/dev/null) + # standard single make variable specified make_clean_all := clean all make_python_perf_so := python/perf.so @@ -25,6 +27,13 @@ make_help := help make_doc := doc make_perf_o := perf.o make_util_map_o := util/map.o +make_install := install +make_install_bin := install-bin +make_install_doc := install-doc +make_install_man := install-man +make_install_html := install-html +make_install_info := install-info +make_install_pdf := install-pdf # all the NO_* variable combined make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 @@ -50,14 +59,27 @@ run += make_no_backtrace run += make_no_libnuma run += make_no_libaudit run += make_no_libbionic -run += make_tags -run += make_cscope run += make_help run += make_doc run += make_perf_o run += make_util_map_o +run += make_install +run += make_install_bin +# FIXME 'install-*' commented out till they're fixed +# run += make_install_doc +# run += make_install_man +# run += make_install_html +# run += make_install_info +# run += make_install_pdf run += make_minimal +ifneq ($(call has,ctags),) +run += make_tags +endif +ifneq ($(call has,cscope),) +run += make_cscope +endif + # $(run_O) contains same portion of $(run) tests with '_O' attached # to distinguish O=... tests run_O := $(addsuffix _O,$(run)) @@ -84,6 +106,31 @@ test_make_python_perf_so := test -f $(PERF)/python/perf.so test_make_perf_o := test -f $(PERF)/perf.o test_make_util_map_o := test -f $(PERF)/util/map.o +test_make_install := test -x $$TMP_DEST/bin/perf +test_make_install_O := $(test_make_install) +test_make_install_bin := $(test_make_install) +test_make_install_bin_O := $(test_make_install) + +# FIXME nothing gets installed +test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1 +test_make_install_man_O := $(test_make_install_man) + +# FIXME nothing gets installed +test_make_install_doc := $(test_ok) +test_make_install_doc_O := $(test_ok) + +# FIXME nothing gets installed +test_make_install_html := $(test_ok) +test_make_install_html_O := $(test_ok) + +# FIXME nothing gets installed +test_make_install_info := $(test_ok) +test_make_install_info_O := $(test_ok) + +# FIXME nothing gets installed +test_make_install_pdf := $(test_ok) +test_make_install_pdf_O := $(test_ok) + # Kbuild tests only #test_make_python_perf_so_O := test -f $$TMP/tools/perf/python/perf.so #test_make_perf_o_O := test -f $$TMP/tools/perf/perf.o @@ -95,7 +142,7 @@ test_make_util_map_o_O := true test_default = test -x $(PERF)/perf test = $(if $(test_$1),$(test_$1),$(test_default)) -test_default_O = test -x $$TMP/perf +test_default_O = test -x $$TMP_O/perf test_O = $(if $(test_$1),$(test_$1),$(test_default_O)) all: @@ -111,23 +158,27 @@ clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null) $(run): $(call clean) - @cmd="cd $(PERF) && make -f $(MK) $($@)"; \ + @TMP_DEST=$$(mktemp -d); \ + cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \ echo "- $@: $$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1; \ echo " test: $(call test,$@)"; \ $(call test,$@) && \ - rm -f $@ + rm -f $@ \ + rm -rf $$TMP_DEST $(run_O): $(call clean) - @TMP=$$(mktemp -d); \ - cmd="cd $(PERF) && make -f $(MK) $($(patsubst %_O,%,$@)) O=$$TMP"; \ + @TMP_O=$$(mktemp -d); \ + TMP_DEST=$$(mktemp -d); \ + cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \ echo "- $@: $$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 && \ echo " test: $(call test_O,$@)"; \ $(call test_O,$@) && \ rm -f $@ && \ - rm -rf $$TMP + rm -rf $$TMP_O \ + rm -rf $$TMP_DEST all: $(run) $(run_O) @echo OK diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 42b6a632fe7b75bb297433e9ad9962814e5016d9..4fee33b229b0f8a67b664edbc2f092dc0bc0d235 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -15,6 +15,7 @@ #include #include +#include "hist.h" #include "util.h" #include "callchain.h" @@ -327,7 +328,8 @@ append_chain(struct callchain_node *root, /* * Lookup in the current node * If we have a symbol, then compare the start to match - * anywhere inside a function. + * anywhere inside a function, unless function + * mode is disabled. */ list_for_each_entry(cnode, &root->val, list) { struct callchain_cursor_node *node; @@ -339,7 +341,8 @@ append_chain(struct callchain_node *root, sym = node->sym; - if (cnode->ms.sym && sym) { + if (cnode->ms.sym && sym && + callchain_param.key == CCKEY_FUNCTION) { if (cnode->ms.sym->start != sym->start) break; } else if (cnode->ip != node->ip) diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3ee9f67d5af0bed457cb7ae22d5efe99ccdbfd8f..812d5a0ff2bcf7da3ce89315e6ad130a173adf10 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -41,12 +41,18 @@ struct callchain_param; typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *, u64, struct callchain_param *); +enum chain_key { + CCKEY_FUNCTION, + CCKEY_ADDRESS +}; + struct callchain_param { enum chain_mode mode; u32 print_limit; double min_percent; sort_chain_func_t sort; enum chain_order order; + enum chain_key key; }; struct callchain_list { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a6354619fa5daf2b289be16a4fa8ffa2a6af805e..8bed0c1a1399c4dabbb4efc52595927c1425a5ee 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1482,7 +1482,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, bool perf_evsel__fallback(struct perf_evsel *evsel, int err, char *msg, size_t msgsize) { - if ((err == ENOENT || err == ENXIO) && + if ((err == ENOENT || err == ENXIO || err == ENODEV) && evsel->attr.type == PERF_TYPE_HARDWARE && evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) { /* diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a9dd1b9d8907c185cf620ce381fa703f1089bd29..46a0d35a05e1f21aae097e7a67cea89bfe9ecddf 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -24,7 +24,8 @@ enum hist_filter { struct callchain_param callchain_param = { .mode = CHAIN_GRAPH_REL, .min_percent = 0.5, - .order = ORDER_CALLEE + .order = ORDER_CALLEE, + .key = CCKEY_FUNCTION }; u16 hists__col_len(struct hists *hists, enum hist_column col) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index eacec859f2996143a7fda07e4c96544db1057e1d..a85e4ae5f3ac582381740f8b29460e6f83ae6bd6 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -261,7 +261,8 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __maybe_unused, - struct addr_location *al) + struct thread *thread, + struct addr_location *al) { struct format_field *field; static char handler[256]; @@ -272,7 +273,6 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; - struct thread *thread = al->thread; char *comm = thread->comm; dSP; @@ -351,7 +351,8 @@ static void perl_process_event_generic(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __maybe_unused, - struct addr_location *al __maybe_unused) + struct thread *thread __maybe_unused, + struct addr_location *al __maybe_unused) { dSP; @@ -377,10 +378,11 @@ static void perl_process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct addr_location *al) + struct thread *thread, + struct addr_location *al) { - perl_process_tracepoint(event, sample, evsel, machine, al); - perl_process_event_generic(event, sample, evsel, machine, al); + perl_process_tracepoint(event, sample, evsel, machine, thread, al); + perl_process_event_generic(event, sample, evsel, machine, thread, al); } static void run_start_sub(void) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index e87aa5d9696b41cca90c23bc9ba81529e449eab8..cc75a3cef388065f3164fe270487d2b06d394c72 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -225,6 +225,7 @@ static void python_process_tracepoint(union perf_event *perf_event struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __maybe_unused, + struct thread *thread, struct addr_location *al) { PyObject *handler, *retval, *context, *t, *obj, *dict = NULL; @@ -238,7 +239,6 @@ static void python_process_tracepoint(union perf_event *perf_event int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; - struct thread *thread = al->thread; char *comm = thread->comm; t = PyTuple_New(MAX_FIELDS); @@ -345,12 +345,12 @@ static void python_process_general_event(union perf_event *perf_event struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __maybe_unused, + struct thread *thread, struct addr_location *al) { PyObject *handler, *retval, *t, *dict; static char handler_name[64]; unsigned n = 0; - struct thread *thread = al->thread; /* * Use the MAX_FIELDS to make the function expandable, though @@ -404,17 +404,18 @@ static void python_process_event(union perf_event *perf_event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, + struct thread *thread, struct addr_location *al) { switch (evsel->attr.type) { case PERF_TYPE_TRACEPOINT: python_process_tracepoint(perf_event, sample, evsel, - machine, al); + machine, thread, al); break; /* Reserve for future process_hw/sw/raw APIs */ default: python_process_general_event(perf_event, sample, evsel, - machine, al); + machine, thread, al); } } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index cb2b108635ee44b33528d231920df389f797bbb9..5f118a089519a46bb6b370536660476bc6880eda 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -874,6 +874,8 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_PARENT, "parent", sort_parent), DIM(SORT_CPU, "cpu", sort_cpu), DIM(SORT_SRCLINE, "srcline", sort_srcline), + DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), + DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), }; #undef DIM @@ -893,8 +895,6 @@ static struct sort_dimension bstack_sort_dimensions[] = { #define DIM(d, n, func) [d - __SORT_MEMORY_MODE] = { .name = n, .entry = &(func) } static struct sort_dimension memory_sort_dimensions[] = { - DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), - DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym), DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso), DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked), diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 586022dc3264e13d491c2ad1ba32b06f9e11ce5f..4e80dbd271e77e245d41f2f36afdffdf8ae7ab4e 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -143,6 +143,8 @@ enum sort_type { SORT_PARENT, SORT_CPU, SORT_SRCLINE, + SORT_LOCAL_WEIGHT, + SORT_GLOBAL_WEIGHT, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, @@ -154,9 +156,7 @@ enum sort_type { /* memory mode specific sort keys */ __SORT_MEMORY_MODE, - SORT_LOCAL_WEIGHT = __SORT_MEMORY_MODE, - SORT_GLOBAL_WEIGHT, - SORT_MEM_DADDR_SYMBOL, + SORT_MEM_DADDR_SYMBOL = __SORT_MEMORY_MODE, SORT_MEM_DADDR_DSO, SORT_MEM_LOCKED, SORT_MEM_TLB, diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 8715a1006d004b2c12e21d386b34ffae0165e17d..95199e4eea978c3961a86e5e2439e61b6a3ef8a1 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -39,7 +39,8 @@ static void process_event_unsupported(union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, struct perf_evsel *evsel __maybe_unused, struct machine *machine __maybe_unused, - struct addr_location *al __maybe_unused) + struct thread *thread __maybe_unused, + struct addr_location *al __maybe_unused) { } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 669a64a660d7865d7857fda480dd0ee368d68645..fafe1a40444a2b0785e4d41b048ee0926786fcf9 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -9,6 +9,7 @@ struct machine; struct perf_sample; union perf_event; struct perf_tool; +struct thread; extern struct pevent *perf_pevent; @@ -68,7 +69,8 @@ struct scripting_ops { struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct addr_location *al); + struct thread *thread, + struct addr_location *al); int (*generate_script) (struct pevent *pevent, const char *outfile); };