builtin-annotate.c 16.9 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11
/*
 * builtin-annotate.c
 *
 * Builtin annotate command: Analyze the perf.data input file,
 * look up and read DSOs and symbol information and display
 * a histogram of results, along various sorting keys.
 */
#include "builtin.h"

#include "util/color.h"
12
#include <linux/list.h>
13
#include "util/cache.h"
14
#include <linux/rbtree.h>
15
#include <linux/zalloc.h>
16 17 18
#include "util/symbol.h"

#include "perf.h"
19
#include "util/debug.h"
20

21 22
#include "util/evlist.h"
#include "util/evsel.h"
23
#include "util/annotate.h"
24
#include "util/event.h"
25
#include <subcmd/parse-options.h>
26
#include "util/parse-events.h"
27
#include "util/sort.h"
28
#include "util/hist.h"
29
#include "util/dso.h"
30
#include "util/machine.h"
31
#include "util/map.h"
32
#include "util/session.h"
33
#include "util/tool.h"
34
#include "util/data.h"
35
#include "arch/common.h"
36
#include "util/block-range.h"
37 38
#include "util/map_symbol.h"
#include "util/branch.h"
39

40
#include <dlfcn.h>
41
#include <errno.h>
42
#include <linux/bitmap.h>
43
#include <linux/err.h>
44

45
struct perf_annotate {
46
	struct perf_tool tool;
47
	struct perf_session *session;
48
	struct annotation_options opts;
49
	bool	   use_tui, use_stdio, use_stdio2, use_gtk;
50
	bool	   skip_missing;
51
	bool	   has_br_stack;
52
	bool	   group_set;
53 54 55
	const char *sym_hist_filter;
	const char *cpu_list;
	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
56
};
57

58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
/*
 * Given one basic block:
 *
 *	from	to		branch_i
 *	* ----> *
 *		|
 *		| block
 *		v
 *		* ----> *
 *		from	to	branch_i+1
 *
 * where the horizontal are the branches and the vertical is the executed
 * block of instructions.
 *
 * We count, for each 'instruction', the number of blocks that covered it as
 * well as count the ratio each branch is taken.
 *
 * We can do this without knowing the actual instruction stream by keeping
 * track of the address ranges. We break down ranges such that there is no
 * overlap and iterate from the start until the end.
 *
 * @acme: once we parse the objdump output _before_ processing the samples,
 * we can easily fold the branch.cycles IPC bits in.
 */
static void process_basic_block(struct addr_map_symbol *start,
				struct addr_map_symbol *end,
				struct branch_flags *flags)
{
86
	struct symbol *sym = start->ms.sym;
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
	struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
	struct block_range_iter iter;
	struct block_range *entry;

	/*
	 * Sanity; NULL isn't executable and the CPU cannot execute backwards
	 */
	if (!start->addr || start->addr > end->addr)
		return;

	iter = block_range__create(start->addr, end->addr);
	if (!block_range_iter__valid(&iter))
		return;

	/*
	 * First block in range is a branch target.
	 */
	entry = block_range_iter(&iter);
	assert(entry->is_target);
	entry->entry++;

	do {
		entry = block_range_iter(&iter);

		entry->coverage++;
		entry->sym = sym;

		if (notes)
			notes->max_coverage = max(notes->max_coverage, entry->coverage);

	} while (block_range_iter__next(&iter));

	/*
	 * Last block in rage is a branch.
	 */
	entry = block_range_iter(&iter);
	assert(entry->is_branch);
	entry->taken++;
	if (flags->predicted)
		entry->pred++;
}

static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
				 struct perf_sample *sample)
{
	struct addr_map_symbol *prev = NULL;
	struct branch_info *bi;
	int i;

	if (!bs || !bs->nr)
		return;

	bi = sample__resolve_bstack(sample, al);
	if (!bi)
		return;

	for (i = bs->nr - 1; i >= 0; i--) {
		/*
		 * XXX filter against symbol
		 */
		if (prev)
			process_basic_block(prev, &bi[i].from, &bi[i].flags);
		prev = &bi[i].to;
	}

	free(bi);
}

155 156 157 158 159 160 161 162
static int hist_iter__branch_callback(struct hist_entry_iter *iter,
				      struct addr_location *al __maybe_unused,
				      bool single __maybe_unused,
				      void *arg __maybe_unused)
{
	struct hist_entry *he = iter->he;
	struct branch_info *bi;
	struct perf_sample *sample = iter->sample;
163
	struct evsel *evsel = iter->evsel;
164 165 166
	int err;

	bi = he->branch_info;
167
	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
168 169 170 171

	if (err)
		goto out;

172
	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
173 174 175 176 177

out:
	return err;
}

178
static int process_branch_callback(struct evsel *evsel,
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
				   struct perf_sample *sample,
				   struct addr_location *al __maybe_unused,
				   struct perf_annotate *ann,
				   struct machine *machine)
{
	struct hist_entry_iter iter = {
		.evsel		= evsel,
		.sample		= sample,
		.add_entry_cb	= hist_iter__branch_callback,
		.hide_unresolved	= symbol_conf.hide_unresolved,
		.ops		= &hist_iter_branch,
	};

	struct addr_location a;
	int ret;

	if (machine__resolve(machine, &a, sample) < 0)
		return -1;

	if (a.sym == NULL)
		return 0;

	if (a.map != NULL)
		a.map->dso->hit = 1;

204
	hist__account_cycles(sample->branch_stack, al, sample, false, NULL);
205

206 207 208 209
	ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
	return ret;
}

210 211 212 213 214
static bool has_annotation(struct perf_annotate *ann)
{
	return ui__has_annotation() || ann->use_stdio2;
}

215
static int perf_evsel__add_sample(struct evsel *evsel,
216
				  struct perf_sample *sample,
217
				  struct addr_location *al,
218 219
				  struct perf_annotate *ann,
				  struct machine *machine)
220
{
221
	struct hists *hists = evsel__hists(evsel);
222
	struct hist_entry *he;
223
	int ret;
224

225
	if ((!ann->has_br_stack || !has_annotation(ann)) &&
226
	    ann->sym_hist_filter != NULL &&
227 228
	    (al->sym == NULL ||
	     strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
229
		/* We're only interested in a symbol named sym_hist_filter */
230 231 232 233
		/*
		 * FIXME: why isn't this done in the symbol_filter when loading
		 * the DSO?
		 */
234
		if (al->sym != NULL) {
235
			rb_erase_cached(&al->sym->rb_node,
236
				 &al->map->dso->symbols);
237
			symbol__delete(al->sym);
238
			dso__reset_find_symbol_cache(al->map->dso);
239 240 241 242
		}
		return 0;
	}

243 244 245 246 247 248
	/*
	 * XXX filtered samples can still have branch entires pointing into our
	 * symbol and are missed.
	 */
	process_branch_stack(sample->branch_stack, al, sample);

249
	if (ann->has_br_stack && has_annotation(ann))
250 251
		return process_branch_callback(evsel, sample, al, ann, machine);

252
	he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
253
	if (he == NULL)
254
		return -ENOMEM;
255

256
	ret = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
257
	hists__inc_nr_samples(hists, true);
258
	return ret;
259 260
}

261
static int process_sample_event(struct perf_tool *tool,
262
				union perf_event *event,
263
				struct perf_sample *sample,
264
				struct evsel *evsel,
265
				struct machine *machine)
266
{
267
	struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
268
	struct addr_location al;
269
	int ret = 0;
270

271
	if (machine__resolve(machine, &al, sample) < 0) {
272 273
		pr_warning("problem processing %d event, skipping it.\n",
			   event->header.type);
274 275 276
		return -1;
	}

277
	if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
278
		goto out_put;
279

280 281
	if (!al.filtered &&
	    perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
282 283
		pr_warning("problem incrementing symbol count, "
			   "skipping event\n");
284
		ret = -1;
285
	}
286 287 288
out_put:
	addr_location__put(&al);
	return ret;
289 290
}

291 292
static int process_feature_event(struct perf_session *session,
				 union perf_event *event)
293 294
{
	if (event->feat.feat_id < HEADER_LAST_FEATURE)
295
		return perf_event__process_feature(session, event);
296 297 298
	return 0;
}

299
static int hist_entry__tty_annotate(struct hist_entry *he,
300
				    struct evsel *evsel,
301
				    struct perf_annotate *ann)
302
{
303
	if (!ann->use_stdio2)
304
		return symbol__tty_annotate(&he->ms, evsel, &ann->opts);
305

306
	return symbol__tty_annotate2(&he->ms, evsel, &ann->opts);
307 308
}

309
static void hists__find_annotations(struct hists *hists,
310
				    struct evsel *evsel,
311
				    struct perf_annotate *ann)
312
{
D
Davidlohr Bueso 已提交
313
	struct rb_node *nd = rb_first_cached(&hists->entries), *next;
314
	int key = K_RIGHT;
315

316
	while (nd) {
317
		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
318
		struct annotation *notes;
319

320 321
		if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
			goto find_next;
322

323 324 325 326
		if (ann->sym_hist_filter &&
		    (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
			goto find_next;

327
		notes = symbol__annotation(he->ms.sym);
328
		if (notes->src == NULL) {
329
find_next:
330
			if (key == K_LEFT)
331 332 333
				nd = rb_prev(nd);
			else
				nd = rb_next(nd);
334
			continue;
335
		}
336

337
		if (use_browser == 2) {
338
			int ret;
339
			int (*annotate)(struct hist_entry *he,
340
					struct evsel *evsel,
341 342 343 344 345 346 347 348
					struct hist_browser_timer *hbt);

			annotate = dlsym(perf_gtk_handle,
					 "hist_entry__gtk_annotate");
			if (annotate == NULL) {
				ui__error("GTK browser not found!\n");
				return;
			}
349

350
			ret = annotate(he, evsel, NULL);
351 352 353 354 355
			if (!ret || !ann->skip_missing)
				return;

			/* skip missing symbols */
			nd = rb_next(nd);
356
		} else if (use_browser == 1) {
357
			key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts);
358

359
			switch (key) {
360 361 362 363
			case -1:
				if (!ann->skip_missing)
					return;
				/* fall through */
364
			case K_RIGHT:
365
				next = rb_next(nd);
366
				break;
367
			case K_LEFT:
368
				next = rb_prev(nd);
369
				break;
370 371
			default:
				return;
372
			}
373 374 375

			if (next != NULL)
				nd = next;
376
		} else {
377
			hist_entry__tty_annotate(he, evsel, ann);
378 379 380
			nd = rb_next(nd);
			/*
			 * Since we have a hist_entry per IP for the same
381
			 * symbol, free he->ms.sym->src to signal we already
382 383
			 * processed this symbol.
			 */
384
			zfree(&notes->src->cycles_hist);
385
			zfree(&notes->src);
386
		}
387 388 389
	}
}

390
static int __cmd_annotate(struct perf_annotate *ann)
391
{
L
Li Zefan 已提交
392
	int ret;
393
	struct perf_session *session = ann->session;
394
	struct evsel *pos;
395
	u64 total_nr_samples;
396

397 398 399
	if (ann->cpu_list) {
		ret = perf_session__cpu_bitmap(session, ann->cpu_list,
					       ann->cpu_bitmap);
400
		if (ret)
401
			goto out;
402 403
	}

404 405 406
	if (!ann->opts.objdump_path) {
		ret = perf_env__lookup_objdump(&session->header.env,
					       &ann->opts.objdump_path);
407
		if (ret)
408
			goto out;
409 410
	}

411
	ret = perf_session__process_events(session);
L
Li Zefan 已提交
412
	if (ret)
413
		goto out;
414

415
	if (dump_trace) {
416
		perf_session__fprintf_nr_events(session, stdout);
417
		perf_evlist__fprintf_nr_events(session->evlist, stdout);
418
		goto out;
419
	}
420

421
	if (verbose > 3)
422
		perf_session__fprintf(session, stdout);
423

424
	if (verbose > 2)
425
		perf_session__fprintf_dsos(session, stdout);
426

427
	total_nr_samples = 0;
428
	evlist__for_each_entry(session->evlist, pos) {
429
		struct hists *hists = evsel__hists(pos);
430 431 432 433
		u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];

		if (nr_samples > 0) {
			total_nr_samples += nr_samples;
434
			hists__collapse_resort(hists, NULL);
435 436
			/* Don't sort callchain */
			perf_evsel__reset_sample_bit(pos, CALLCHAIN);
437
			perf_evsel__output_resort(pos, NULL);
438 439 440 441 442

			if (symbol_conf.event_group &&
			    !perf_evsel__is_group_leader(pos))
				continue;

443
			hists__find_annotations(hists, pos, ann);
444 445
		}
	}
446

447
	if (total_nr_samples == 0) {
J
Jiri Olsa 已提交
448
		ui__error("The %s data has no samples!\n", session->data->path);
449
		goto out;
450
	}
451

452 453 454 455 456 457 458
	if (use_browser == 2) {
		void (*show_annotations)(void);

		show_annotations = dlsym(perf_gtk_handle,
					 "perf_gtk__show_annotations");
		if (show_annotations == NULL) {
			ui__error("GTK browser not found!\n");
459
			goto out;
460 461 462
		}
		show_annotations();
	}
463

464
out:
L
Li Zefan 已提交
465
	return ret;
466 467 468
}

static const char * const annotate_usage[] = {
N
Namhyung Kim 已提交
469
	"perf annotate [<options>]",
470 471 472
	NULL
};

473
int cmd_annotate(int argc, const char **argv)
474 475
{
	struct perf_annotate annotate = {
476
		.tool = {
477 478
			.sample	= process_sample_event,
			.mmap	= perf_event__process_mmap,
479
			.mmap2	= perf_event__process_mmap2,
480
			.comm	= perf_event__process_comm,
481
			.exit	= perf_event__process_exit,
482
			.fork	= perf_event__process_fork,
483
			.namespaces = perf_event__process_namespaces,
484 485
			.attr	= perf_event__process_attr,
			.build_id = perf_event__process_build_id,
486
			.tracing_data   = perf_event__process_tracing_data,
487
			.feature	= process_feature_event,
488
			.ordered_events = true,
489 490
			.ordering_requires_timestamps = true,
		},
491
		.opts = annotation__default_options,
492
	};
493
	struct perf_data data = {
494 495
		.mode  = PERF_DATA_MODE_READ,
	};
496
	struct option options[] = {
497
	OPT_STRING('i', "input", &input_name, "file",
498
		    "input file name"),
499 500
	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
501
	OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
502
		    "symbol to annotate"),
503
	OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
504
	OPT_INCR('v', "verbose", &verbose,
505
		    "be more verbose (show symbol address, etc)"),
506
	OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
507 508
	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
		    "dump raw trace in ASCII"),
509
	OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
510 511
	OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
512
	OPT_BOOLEAN(0, "stdio2", &annotate.use_stdio2, "Use the stdio interface"),
513 514
	OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
                    "don't load vmlinux even if found"),
515 516 517
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
518
		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
519
	OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines,
520
		    "print matching source lines (may be slow)"),
521
	OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path,
522
		    "Don't shorten the displayed pathnames"),
523 524
	OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
		    "Skip symbols that cannot be annotated"),
525 526 527
	OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group,
			&annotate.group_set,
			"Show event group information together"),
528
	OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
529 530 531
	OPT_CALLBACK(0, "symfs", NULL, "directory",
		     "Look for files with symbols relative to this directory",
		     symbol__config_symfs),
532
	OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src,
533
		    "Interleave source code with assembly code (default)"),
534
	OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw,
535
		    "Display raw encoding of assembly instructions (default)"),
536
	OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style",
537
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
538 539 540 541
	OPT_STRING(0, "prefix", &annotate.opts.prefix, "prefix",
		    "Add prefix to source file path names in programs (with --prefix-strip)"),
	OPT_STRING(0, "prefix-strip", &annotate.opts.prefix_strip, "N",
		    "Strip first N entries of source file path name in programs (with --prefix)"),
542
	OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path",
543
		   "objdump binary to use for disassembly and annotations"),
544 545
	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
		    "Show event group information together"),
546 547
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
548 549
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
550 551 552
	OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
			     "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
			     stdio__config_color, "always"),
553 554 555 556
	OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period",
		     "Set percent type local/global-period/hits",
		     annotate_parse_percent_type),

557
	OPT_END()
558
	};
559 560 561 562 563
	int ret;

	set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE);
	set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE);

564

565
	ret = hists__init();
566 567
	if (ret < 0)
		return ret;
568

569 570
	annotation_config__init(&annotate.opts);

571
	argc = parse_options(argc, argv, options, annotate_usage, 0);
572 573 574 575 576 577 578 579 580 581
	if (argc) {
		/*
		 * Special case: if there's an argument left then assume that
		 * it's a symbol filter:
		 */
		if (argc > 1)
			usage_with_options(annotate_usage, options);

		annotate.sym_hist_filter = argv[0];
	}
582

583 584 585
	if (annotate_check_args(&annotate.opts) < 0)
		return -EINVAL;

586 587
	if (symbol_conf.show_nr_samples && annotate.use_gtk) {
		pr_err("--show-nr-samples is not available in --gtk mode at this time\n");
588 589 590
		return ret;
	}

591 592 593
	if (quiet)
		perf_quiet_option();

J
Jiri Olsa 已提交
594
	data.path = input_name;
595

596
	annotate.session = perf_session__new(&data, false, &annotate.tool);
597 598
	if (IS_ERR(annotate.session))
		return PTR_ERR(annotate.session);
599

600 601 602
	annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
						      HEADER_BRANCH_STACK);

603 604 605
	if (annotate.group_set)
		perf_evlist__force_leader(annotate.session->evlist);

606 607 608 609
	ret = symbol__annotation_init();
	if (ret < 0)
		goto out_delete;

610 611
	symbol_conf.try_vmlinux_path = true;

612
	ret = symbol__init(&annotate.session->header.env);
613 614
	if (ret < 0)
		goto out_delete;
615

616
	if (annotate.use_stdio || annotate.use_stdio2)
617 618 619 620 621 622 623 624
		use_browser = 0;
	else if (annotate.use_tui)
		use_browser = 1;
	else if (annotate.use_gtk)
		use_browser = 2;

	setup_browser(true);

625
	if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) {
626 627 628 629 630 631 632 633
		sort__mode = SORT_MODE__BRANCH;
		if (setup_sorting(annotate.session->evlist) < 0)
			usage_with_options(annotate_usage, options);
	} else {
		if (setup_sorting(NULL) < 0)
			usage_with_options(annotate_usage, options);
	}

634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
	ret = __cmd_annotate(&annotate);

out_delete:
	/*
	 * Speed up the exit process, for large files this can
	 * take quite a while.
	 *
	 * XXX Enable this when using valgrind or if we ever
	 * librarize this command.
	 *
	 * Also experiment with obstacks to see how much speed
	 * up we'll get here.
	 *
	 * perf_session__delete(session);
	 */
	return ret;
650
}