builtin-annotate.c 17.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11
/*
 * builtin-annotate.c
 *
 * Builtin annotate command: Analyze the perf.data input file,
 * look up and read DSOs and symbol information and display
 * a histogram of results, along various sorting keys.
 */
#include "builtin.h"

#include "util/color.h"
12
#include <linux/list.h>
13
#include "util/cache.h"
14
#include <linux/rbtree.h>
15
#include <linux/zalloc.h>
16 17 18
#include "util/symbol.h"

#include "perf.h"
19
#include "util/debug.h"
20

21 22
#include "util/evlist.h"
#include "util/evsel.h"
23
#include "util/annotate.h"
24
#include "util/event.h"
25
#include <subcmd/parse-options.h>
26
#include "util/parse-events.h"
27
#include "util/sort.h"
28
#include "util/hist.h"
29
#include "util/dso.h"
30
#include "util/machine.h"
31
#include "util/map.h"
32
#include "util/session.h"
33
#include "util/tool.h"
34
#include "util/data.h"
35
#include "arch/common.h"
36
#include "util/block-range.h"
37 38
#include "util/map_symbol.h"
#include "util/branch.h"
39

40
#include <dlfcn.h>
41
#include <errno.h>
42
#include <linux/bitmap.h>
43
#include <linux/err.h>
44

45
struct perf_annotate {
46
	struct perf_tool tool;
47
	struct perf_session *session;
48
	struct annotation_options opts;
49
	bool	   use_tui, use_stdio, use_stdio2, use_gtk;
50
	bool	   skip_missing;
51
	bool	   has_br_stack;
52
	bool	   group_set;
53 54 55
	const char *sym_hist_filter;
	const char *cpu_list;
	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
56
};
57

58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
/*
 * Given one basic block:
 *
 *	from	to		branch_i
 *	* ----> *
 *		|
 *		| block
 *		v
 *		* ----> *
 *		from	to	branch_i+1
 *
 * where the horizontal are the branches and the vertical is the executed
 * block of instructions.
 *
 * We count, for each 'instruction', the number of blocks that covered it as
 * well as count the ratio each branch is taken.
 *
 * We can do this without knowing the actual instruction stream by keeping
 * track of the address ranges. We break down ranges such that there is no
 * overlap and iterate from the start until the end.
 *
 * @acme: once we parse the objdump output _before_ processing the samples,
 * we can easily fold the branch.cycles IPC bits in.
 */
static void process_basic_block(struct addr_map_symbol *start,
				struct addr_map_symbol *end,
				struct branch_flags *flags)
{
86
	struct symbol *sym = start->ms.sym;
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
	struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
	struct block_range_iter iter;
	struct block_range *entry;

	/*
	 * Sanity; NULL isn't executable and the CPU cannot execute backwards
	 */
	if (!start->addr || start->addr > end->addr)
		return;

	iter = block_range__create(start->addr, end->addr);
	if (!block_range_iter__valid(&iter))
		return;

	/*
	 * First block in range is a branch target.
	 */
	entry = block_range_iter(&iter);
	assert(entry->is_target);
	entry->entry++;

	do {
		entry = block_range_iter(&iter);

		entry->coverage++;
		entry->sym = sym;

		if (notes)
			notes->max_coverage = max(notes->max_coverage, entry->coverage);

	} while (block_range_iter__next(&iter));

	/*
	 * Last block in rage is a branch.
	 */
	entry = block_range_iter(&iter);
	assert(entry->is_branch);
	entry->taken++;
	if (flags->predicted)
		entry->pred++;
}

static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
				 struct perf_sample *sample)
{
	struct addr_map_symbol *prev = NULL;
	struct branch_info *bi;
	int i;

	if (!bs || !bs->nr)
		return;

	bi = sample__resolve_bstack(sample, al);
	if (!bi)
		return;

	for (i = bs->nr - 1; i >= 0; i--) {
		/*
		 * XXX filter against symbol
		 */
		if (prev)
			process_basic_block(prev, &bi[i].from, &bi[i].flags);
		prev = &bi[i].to;
	}

	free(bi);
}

155 156 157 158 159 160 161 162
static int hist_iter__branch_callback(struct hist_entry_iter *iter,
				      struct addr_location *al __maybe_unused,
				      bool single __maybe_unused,
				      void *arg __maybe_unused)
{
	struct hist_entry *he = iter->he;
	struct branch_info *bi;
	struct perf_sample *sample = iter->sample;
163
	struct evsel *evsel = iter->evsel;
164 165 166
	int err;

	bi = he->branch_info;
167
	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
168 169 170 171

	if (err)
		goto out;

172
	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
173 174 175 176 177

out:
	return err;
}

178
static int process_branch_callback(struct evsel *evsel,
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
				   struct perf_sample *sample,
				   struct addr_location *al __maybe_unused,
				   struct perf_annotate *ann,
				   struct machine *machine)
{
	struct hist_entry_iter iter = {
		.evsel		= evsel,
		.sample		= sample,
		.add_entry_cb	= hist_iter__branch_callback,
		.hide_unresolved	= symbol_conf.hide_unresolved,
		.ops		= &hist_iter_branch,
	};

	struct addr_location a;
	int ret;

	if (machine__resolve(machine, &a, sample) < 0)
		return -1;

	if (a.sym == NULL)
		return 0;

	if (a.map != NULL)
		a.map->dso->hit = 1;

204
	hist__account_cycles(sample->branch_stack, al, sample, false, NULL);
205

206 207 208 209
	ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
	return ret;
}

210 211 212 213 214
static bool has_annotation(struct perf_annotate *ann)
{
	return ui__has_annotation() || ann->use_stdio2;
}

215 216 217
static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample,
			     struct addr_location *al, struct perf_annotate *ann,
			     struct machine *machine)
218
{
219
	struct hists *hists = evsel__hists(evsel);
220
	struct hist_entry *he;
221
	int ret;
222

223
	if ((!ann->has_br_stack || !has_annotation(ann)) &&
224
	    ann->sym_hist_filter != NULL &&
225 226
	    (al->sym == NULL ||
	     strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
227
		/* We're only interested in a symbol named sym_hist_filter */
228 229 230 231
		/*
		 * FIXME: why isn't this done in the symbol_filter when loading
		 * the DSO?
		 */
232
		if (al->sym != NULL) {
233
			rb_erase_cached(&al->sym->rb_node,
234
				 &al->map->dso->symbols);
235
			symbol__delete(al->sym);
236
			dso__reset_find_symbol_cache(al->map->dso);
237 238 239 240
		}
		return 0;
	}

241
	/*
242
	 * XXX filtered samples can still have branch entries pointing into our
243 244 245 246
	 * symbol and are missed.
	 */
	process_branch_stack(sample->branch_stack, al, sample);

247
	if (ann->has_br_stack && has_annotation(ann))
248 249
		return process_branch_callback(evsel, sample, al, ann, machine);

250
	he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
251
	if (he == NULL)
252
		return -ENOMEM;
253

254
	ret = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
255
	hists__inc_nr_samples(hists, true);
256
	return ret;
257 258
}

259
static int process_sample_event(struct perf_tool *tool,
260
				union perf_event *event,
261
				struct perf_sample *sample,
262
				struct evsel *evsel,
263
				struct machine *machine)
264
{
265
	struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
266
	struct addr_location al;
267
	int ret = 0;
268

269
	if (machine__resolve(machine, &al, sample) < 0) {
270 271
		pr_warning("problem processing %d event, skipping it.\n",
			   event->header.type);
272 273 274
		return -1;
	}

275
	if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
276
		goto out_put;
277

278
	if (!al.filtered &&
279
	    evsel__add_sample(evsel, sample, &al, ann, machine)) {
280 281
		pr_warning("problem incrementing symbol count, "
			   "skipping event\n");
282
		ret = -1;
283
	}
284 285 286
out_put:
	addr_location__put(&al);
	return ret;
287 288
}

289 290
static int process_feature_event(struct perf_session *session,
				 union perf_event *event)
291 292
{
	if (event->feat.feat_id < HEADER_LAST_FEATURE)
293
		return perf_event__process_feature(session, event);
294 295 296
	return 0;
}

297
static int hist_entry__tty_annotate(struct hist_entry *he,
298
				    struct evsel *evsel,
299
				    struct perf_annotate *ann)
300
{
301
	if (!ann->use_stdio2)
302
		return symbol__tty_annotate(&he->ms, evsel, &ann->opts);
303

304
	return symbol__tty_annotate2(&he->ms, evsel, &ann->opts);
305 306
}

307
static void hists__find_annotations(struct hists *hists,
308
				    struct evsel *evsel,
309
				    struct perf_annotate *ann)
310
{
D
Davidlohr Bueso 已提交
311
	struct rb_node *nd = rb_first_cached(&hists->entries), *next;
312
	int key = K_RIGHT;
313

314
	while (nd) {
315
		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
316
		struct annotation *notes;
317

318 319
		if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
			goto find_next;
320

321 322 323 324
		if (ann->sym_hist_filter &&
		    (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
			goto find_next;

325
		notes = symbol__annotation(he->ms.sym);
326
		if (notes->src == NULL) {
327
find_next:
328
			if (key == K_LEFT)
329 330 331
				nd = rb_prev(nd);
			else
				nd = rb_next(nd);
332
			continue;
333
		}
334

335
		if (use_browser == 2) {
336
			int ret;
337
			int (*annotate)(struct hist_entry *he,
338
					struct evsel *evsel,
339 340 341 342 343 344 345 346
					struct hist_browser_timer *hbt);

			annotate = dlsym(perf_gtk_handle,
					 "hist_entry__gtk_annotate");
			if (annotate == NULL) {
				ui__error("GTK browser not found!\n");
				return;
			}
347

348
			ret = annotate(he, evsel, NULL);
349 350 351 352 353
			if (!ret || !ann->skip_missing)
				return;

			/* skip missing symbols */
			nd = rb_next(nd);
354
		} else if (use_browser == 1) {
355
			key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts);
356

357
			switch (key) {
358 359 360 361
			case -1:
				if (!ann->skip_missing)
					return;
				/* fall through */
362
			case K_RIGHT:
363
				next = rb_next(nd);
364
				break;
365
			case K_LEFT:
366
				next = rb_prev(nd);
367
				break;
368 369
			default:
				return;
370
			}
371 372 373

			if (next != NULL)
				nd = next;
374
		} else {
375
			hist_entry__tty_annotate(he, evsel, ann);
376 377
			nd = rb_next(nd);
		}
378 379 380
	}
}

381
static int __cmd_annotate(struct perf_annotate *ann)
382
{
L
Li Zefan 已提交
383
	int ret;
384
	struct perf_session *session = ann->session;
385
	struct evsel *pos;
386
	u64 total_nr_samples;
387

388 389 390
	if (ann->cpu_list) {
		ret = perf_session__cpu_bitmap(session, ann->cpu_list,
					       ann->cpu_bitmap);
391
		if (ret)
392
			goto out;
393 394
	}

395 396 397
	if (!ann->opts.objdump_path) {
		ret = perf_env__lookup_objdump(&session->header.env,
					       &ann->opts.objdump_path);
398
		if (ret)
399
			goto out;
400 401
	}

402
	ret = perf_session__process_events(session);
L
Li Zefan 已提交
403
	if (ret)
404
		goto out;
405

406
	if (dump_trace) {
407 408
		perf_session__fprintf_nr_events(session, stdout, false);
		evlist__fprintf_nr_events(session->evlist, stdout, false);
409
		goto out;
410
	}
411

412
	if (verbose > 3)
413
		perf_session__fprintf(session, stdout);
414

415
	if (verbose > 2)
416
		perf_session__fprintf_dsos(session, stdout);
417

418
	total_nr_samples = 0;
419
	evlist__for_each_entry(session->evlist, pos) {
420
		struct hists *hists = evsel__hists(pos);
421
		u32 nr_samples = hists->stats.nr_samples;
422 423 424

		if (nr_samples > 0) {
			total_nr_samples += nr_samples;
425
			hists__collapse_resort(hists, NULL);
426
			/* Don't sort callchain */
427
			evsel__reset_sample_bit(pos, CALLCHAIN);
428
			evsel__output_resort(pos, NULL);
429

430
			if (symbol_conf.event_group && !evsel__is_group_leader(pos))
431 432
				continue;

433
			hists__find_annotations(hists, pos, ann);
434 435
		}
	}
436

437
	if (total_nr_samples == 0) {
J
Jiri Olsa 已提交
438
		ui__error("The %s data has no samples!\n", session->data->path);
439
		goto out;
440
	}
441

442 443 444 445 446 447 448
	if (use_browser == 2) {
		void (*show_annotations)(void);

		show_annotations = dlsym(perf_gtk_handle,
					 "perf_gtk__show_annotations");
		if (show_annotations == NULL) {
			ui__error("GTK browser not found!\n");
449
			goto out;
450 451 452
		}
		show_annotations();
	}
453

454
out:
L
Li Zefan 已提交
455
	return ret;
456 457 458
}

static const char * const annotate_usage[] = {
N
Namhyung Kim 已提交
459
	"perf annotate [<options>]",
460 461 462
	NULL
};

463
int cmd_annotate(int argc, const char **argv)
464 465
{
	struct perf_annotate annotate = {
466
		.tool = {
467 468
			.sample	= process_sample_event,
			.mmap	= perf_event__process_mmap,
469
			.mmap2	= perf_event__process_mmap2,
470
			.comm	= perf_event__process_comm,
471
			.exit	= perf_event__process_exit,
472
			.fork	= perf_event__process_fork,
473
			.namespaces = perf_event__process_namespaces,
474 475
			.attr	= perf_event__process_attr,
			.build_id = perf_event__process_build_id,
476
			.tracing_data   = perf_event__process_tracing_data,
477 478 479
			.id_index	= perf_event__process_id_index,
			.auxtrace_info	= perf_event__process_auxtrace_info,
			.auxtrace	= perf_event__process_auxtrace,
480
			.feature	= process_feature_event,
481
			.ordered_events = true,
482 483
			.ordering_requires_timestamps = true,
		},
484
		.opts = annotation__default_options,
485
	};
486
	struct perf_data data = {
487 488
		.mode  = PERF_DATA_MODE_READ,
	};
489 490 491
	struct itrace_synth_opts itrace_synth_opts = {
		.set = 0,
	};
492
	struct option options[] = {
493
	OPT_STRING('i', "input", &input_name, "file",
494
		    "input file name"),
495 496
	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
497
	OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
498
		    "symbol to annotate"),
499
	OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
500
	OPT_INCR('v', "verbose", &verbose,
501
		    "be more verbose (show symbol address, etc)"),
502
	OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
503 504
	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
		    "dump raw trace in ASCII"),
505
	OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
506 507
	OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
508
	OPT_BOOLEAN(0, "stdio2", &annotate.use_stdio2, "Use the stdio interface"),
509 510
	OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
                    "don't load vmlinux even if found"),
511 512 513
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
514
		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
515
	OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines,
516
		    "print matching source lines (may be slow)"),
517
	OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path,
518
		    "Don't shorten the displayed pathnames"),
519 520
	OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
		    "Skip symbols that cannot be annotated"),
521 522 523
	OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group,
			&annotate.group_set,
			"Show event group information together"),
524
	OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
525 526 527
	OPT_CALLBACK(0, "symfs", NULL, "directory",
		     "Look for files with symbols relative to this directory",
		     symbol__config_symfs),
528
	OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src,
529
		    "Interleave source code with assembly code (default)"),
530
	OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw,
531
		    "Display raw encoding of assembly instructions (default)"),
532
	OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style",
533
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
534 535 536 537
	OPT_STRING(0, "prefix", &annotate.opts.prefix, "prefix",
		    "Add prefix to source file path names in programs (with --prefix-strip)"),
	OPT_STRING(0, "prefix-strip", &annotate.opts.prefix_strip, "N",
		    "Strip first N entries of source file path name in programs (with --prefix)"),
538
	OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path",
539
		   "objdump binary to use for disassembly and annotations"),
540 541 542 543
	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
		    "Enable symbol demangling"),
	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
		    "Enable kernel symbol demangling"),
544 545
	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
		    "Show event group information together"),
546 547
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
548 549
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
550 551 552
	OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
			     "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
			     stdio__config_color, "always"),
553 554 555
	OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period",
		     "Set percent type local/global-period/hits",
		     annotate_parse_percent_type),
556 557 558
	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
			    "Instruction Tracing options\n" ITRACE_HELP,
			    itrace_parse_synth_opts),
559

560
	OPT_END()
561
	};
562 563 564 565 566
	int ret;

	set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE);
	set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE);

567

568
	ret = hists__init();
569 570
	if (ret < 0)
		return ret;
571

572 573
	annotation_config__init(&annotate.opts);

574
	argc = parse_options(argc, argv, options, annotate_usage, 0);
575 576 577 578 579 580 581 582 583 584
	if (argc) {
		/*
		 * Special case: if there's an argument left then assume that
		 * it's a symbol filter:
		 */
		if (argc > 1)
			usage_with_options(annotate_usage, options);

		annotate.sym_hist_filter = argv[0];
	}
585

586 587 588
	if (annotate_check_args(&annotate.opts) < 0)
		return -EINVAL;

589 590
	if (symbol_conf.show_nr_samples && annotate.use_gtk) {
		pr_err("--show-nr-samples is not available in --gtk mode at this time\n");
591 592 593
		return ret;
	}

594 595 596
	if (quiet)
		perf_quiet_option();

J
Jiri Olsa 已提交
597
	data.path = input_name;
598

599
	annotate.session = perf_session__new(&data, &annotate.tool);
600 601
	if (IS_ERR(annotate.session))
		return PTR_ERR(annotate.session);
602

603 604
	annotate.session->itrace_synth_opts = &itrace_synth_opts;

605 606 607
	annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
						      HEADER_BRANCH_STACK);

608
	if (annotate.group_set)
609
		evlist__force_leader(annotate.session->evlist);
610

611 612 613 614
	ret = symbol__annotation_init();
	if (ret < 0)
		goto out_delete;

615 616
	symbol_conf.try_vmlinux_path = true;

617
	ret = symbol__init(&annotate.session->header.env);
618 619
	if (ret < 0)
		goto out_delete;
620

621
	if (annotate.use_stdio || annotate.use_stdio2)
622 623 624 625 626 627 628 629
		use_browser = 0;
	else if (annotate.use_tui)
		use_browser = 1;
	else if (annotate.use_gtk)
		use_browser = 2;

	setup_browser(true);

630 631 632 633 634 635 636 637 638 639 640 641
	/*
	 * Events of different processes may correspond to the same
	 * symbol, we do not care about the processes in annotate,
	 * set sort order to avoid repeated output.
	 */
	sort_order = "dso,symbol";

	/*
	 * Set SORT_MODE__BRANCH so that annotate display IPC/Cycle
	 * if branch info is in perf data in TUI mode.
	 */
	if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack)
642
		sort__mode = SORT_MODE__BRANCH;
643 644 645

	if (setup_sorting(NULL) < 0)
		usage_with_options(annotate_usage, options);
646

647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
	ret = __cmd_annotate(&annotate);

out_delete:
	/*
	 * Speed up the exit process, for large files this can
	 * take quite a while.
	 *
	 * XXX Enable this when using valgrind or if we ever
	 * librarize this command.
	 *
	 * Also experiment with obstacks to see how much speed
	 * up we'll get here.
	 *
	 * perf_session__delete(session);
	 */
	return ret;
663
}