builtin-top.c 44.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3 4 5 6 7 8
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
9
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
10 11 12 13 14 15 16 17
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
18
 */
19
#include "builtin.h"
20

21
#include "perf.h"
22

23
#include "util/annotate.h"
24
#include "util/bpf-event.h"
25
#include "util/config.h"
26
#include "util/color.h"
27
#include "util/dso.h"
28
#include "util/evlist.h"
29
#include "util/evsel.h"
30
#include "util/event.h"
31
#include "util/machine.h"
32
#include "util/map.h"
33
#include "util/mmap.h"
34 35
#include "util/session.h"
#include "util/symbol.h"
36
#include "util/synthetic-events.h"
37
#include "util/top.h"
38
#include "util/util.h"
39
#include <linux/rbtree.h>
40
#include <subcmd/parse-options.h>
41
#include "util/parse-events.h"
42
#include "util/callchain.h"
43
#include "util/cpumap.h"
44
#include "util/sort.h"
45
#include "util/string2.h"
46
#include "util/term.h"
47
#include "util/intlist.h"
48
#include "util/parse-branch-options.h"
49
#include "arch/common.h"
50
#include "ui/ui.h"
51

52
#include "util/debug.h"
J
Jiri Olsa 已提交
53
#include "util/ordered-events.h"
54

55
#include <assert.h>
56
#include <elf.h>
57
#include <fcntl.h>
58

59
#include <stdio.h>
60 61
#include <termios.h>
#include <unistd.h>
62
#include <inttypes.h>
63

64 65 66
#include <errno.h>
#include <time.h>
#include <sched.h>
67
#include <signal.h>
68 69 70

#include <sys/syscall.h>
#include <sys/ioctl.h>
71
#include <poll.h>
72 73 74
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
75
#include <sys/utsname.h>
76 77
#include <sys/mman.h>

78
#include <linux/stringify.h>
79
#include <linux/time64.h>
80
#include <linux/types.h>
81
#include <linux/err.h>
82

83
#include <linux/ctype.h>
84

85
static volatile int done;
86
static volatile int resize;
87

N
Namhyung Kim 已提交
88 89
#define HEADER_LINE_NR  5

90
static void perf_top__update_print_entries(struct perf_top *top)
91
{
N
Namhyung Kim 已提交
92
	top->print_entries = top->winsize.ws_row - HEADER_LINE_NR;
93 94
}

95
static void winch_sig(int sig __maybe_unused)
96
{
97 98
	resize = 1;
}
99

100 101
static void perf_top__resize(struct perf_top *top)
{
102 103
	get_term_dimensions(&top->winsize);
	perf_top__update_print_entries(top);
104 105
}

106
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
107
{
108
	struct evsel *evsel;
109
	struct symbol *sym;
110
	struct annotation *notes;
111
	struct map *map;
112
	int err = -1;
113

114
	if (!he || !he->ms.sym)
115 116
		return -1;

117 118
	evsel = hists_to_evsel(he->hists);

119 120
	sym = he->ms.sym;
	map = he->ms.map;
121 122 123 124

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
125 126
	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
	    !dso__is_kcore(map->dso)) {
127 128 129
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
130
		return -1;
131 132
	}

133 134
	notes = symbol__annotation(sym);
	pthread_mutex_lock(&notes->lock);
135

136
	if (!symbol__hists(sym, top->evlist->core.nr_entries)) {
137
		pthread_mutex_unlock(&notes->lock);
138 139
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
140
		sleep(1);
141
		return err;
142
	}
143

144
	err = symbol__annotate(sym, map, evsel, 0, &top->annotation_opts, NULL);
145
	if (err == 0) {
146
		top->sym_filter_entry = he;
147 148 149 150
	} else {
		char msg[BUFSIZ];
		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
		pr_err("Couldn't annotate %s: %s\n", sym->name, msg);
151
	}
152

153
	pthread_mutex_unlock(&notes->lock);
154
	return err;
155 156
}

157
static void __zero_source_counters(struct hist_entry *he)
158
{
159
	struct symbol *sym = he->ms.sym;
160
	symbol__annotate_zero_histograms(sym);
161 162
}

163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
{
	struct utsname uts;
	int err = uname(&uts);

	ui__warning("Out of bounds address found:\n\n"
		    "Addr:   %" PRIx64 "\n"
		    "DSO:    %s %c\n"
		    "Map:    %" PRIx64 "-%" PRIx64 "\n"
		    "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
		    "Arch:   %s\n"
		    "Kernel: %s\n"
		    "Tools:  %s\n\n"
		    "Not all samples will be on the annotation output.\n\n"
		    "Please report to linux-kernel@vger.kernel.org\n",
		    ip, map->dso->long_name, dso__symtab_origin(map->dso),
		    map->start, map->end, sym->start, sym->end,
		    sym->binding == STB_GLOBAL ? 'g' :
		    sym->binding == STB_LOCAL  ? 'l' : 'w', sym->name,
		    err ? "[unknown]" : uts.machine,
		    err ? "[unknown]" : uts.release, perf_version_string);
	if (use_browser <= 0)
		sleep(5);
186

187 188 189
	map->erange_warned = true;
}

190 191
static void perf_top__record_precise_ip(struct perf_top *top,
					struct hist_entry *he,
192
					struct perf_sample *sample,
193
					struct evsel *evsel, u64 ip)
194
{
195
	struct annotation *notes;
196
	struct symbol *sym = he->ms.sym;
197
	int err = 0;
198

199 200 201
	if (sym == NULL || (use_browser == 0 &&
			    (top->sym_filter_entry == NULL ||
			     top->sym_filter_entry->ms.sym != sym)))
202 203
		return;

204 205 206
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
207 208
		return;

209
	err = hist_entry__inc_addr_samples(he, sample, evsel, ip);
210

211
	pthread_mutex_unlock(&notes->lock);
212

213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
	if (unlikely(err)) {
		/*
		 * This function is now called with he->hists->lock held.
		 * Release it before going to sleep.
		 */
		pthread_mutex_unlock(&he->hists->lock);

		if (err == -ERANGE && !he->ms.map->erange_warned)
			ui__warn_map_erange(he->ms.map, sym, ip);
		else if (err == -ENOMEM) {
			pr_err("Not enough memory for annotating '%s' symbol!\n",
			       sym->name);
			sleep(1);
		}

		pthread_mutex_lock(&he->hists->lock);
229
	}
230 231
}

232
static void perf_top__show_details(struct perf_top *top)
233
{
234
	struct hist_entry *he = top->sym_filter_entry;
235
	struct evsel *evsel;
236
	struct annotation *notes;
237
	struct symbol *symbol;
238
	int more;
239

240
	if (!he)
241 242
		return;

243 244
	evsel = hists_to_evsel(he->hists);

245
	symbol = he->ms.sym;
246 247 248 249
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

250 251
	symbol__calc_percent(symbol, evsel);

252 253
	if (notes->src == NULL)
		goto out_unlock;
254

255
	printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
256
	printf("  Events  Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt);
257

258
	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, &top->annotation_opts);
259 260 261 262 263 264 265

	if (top->evlist->enabled) {
		if (top->zero)
			symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
		else
			symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
	}
266
	if (more != 0)
267
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
268 269
out_unlock:
	pthread_mutex_unlock(&notes->lock);
270
}
271

272
static void perf_top__resort_hists(struct perf_top *t)
273
{
274
	struct evlist *evlist = t->evlist;
275 276 277 278 279
	struct evsel *pos;

	evlist__for_each_entry(evlist, pos) {
		struct hists *hists = evsel__hists(pos);

280 281 282 283 284 285
		/*
		 * unlink existing entries so that they can be linked
		 * in a correct order in hists__match() below.
		 */
		hists__unlink(hists);

286 287 288 289 290 291 292 293 294
		if (evlist->enabled) {
			if (t->zero) {
				hists__delete_entries(hists);
			} else {
				hists__decay_entries(hists, t->hide_user_symbols,
						     t->hide_kernel_symbols);
			}
		}

295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
		hists__collapse_resort(hists, NULL);

		/* Non-group events are considered as leader */
		if (symbol_conf.event_group &&
		    !perf_evsel__is_group_leader(pos)) {
			struct hists *leader_hists = evsel__hists(pos->leader);

			hists__match(leader_hists, hists);
			hists__link(leader_hists, hists);
		}
	}

	evlist__for_each_entry(evlist, pos) {
		perf_evsel__output_resort(pos, NULL);
	}
}

312
static void perf_top__print_sym_table(struct perf_top *top)
313
{
314 315
	char bf[160];
	int printed = 0;
316
	const int win_width = top->winsize.ws_col - 1;
317
	struct evsel *evsel = top->sym_evsel;
318
	struct hists *hists = evsel__hists(evsel);
319

320
	puts(CONSOLE_CLEAR);
321

322
	perf_top__header_snprintf(top, bf, sizeof(bf));
323
	printf("%s\n", bf);
324

325
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
326

327 328 329
	if (!top->record_opts.overwrite &&
	    (hists->stats.nr_lost_warned !=
	    hists->stats.nr_events[PERF_RECORD_LOST])) {
330 331
		hists->stats.nr_lost_warned =
			      hists->stats.nr_events[PERF_RECORD_LOST];
332 333
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
334
			      hists->stats.nr_lost_warned);
335
		++printed;
336 337
	}

338 339
	if (top->sym_filter_entry) {
		perf_top__show_details(top);
340 341 342
		return;
	}

343
	perf_top__resort_hists(top);
N
Namhyung Kim 已提交
344

345
	hists__output_recalc_col_len(hists, top->print_entries - printed);
346
	putchar('\n');
347
	hists__fprintf(hists, false, top->print_entries - printed, win_width,
348
		       top->min_percent, stdout, !symbol_conf.use_callchain);
349 350
}

351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

386
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
387 388
{
	char *buf = malloc(0), *p;
389
	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
390
	struct hists *hists = evsel__hists(top->sym_evsel);
391
	struct rb_node *next;
392 393 394 395 396
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
397
		top->sym_filter_entry = NULL;
398 399 400 401 402 403 404 405 406 407
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

D
Davidlohr Bueso 已提交
408
	next = rb_first_cached(&hists->entries);
409 410 411 412
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
413 414
			break;
		}
415
		next = rb_next(&n->rb_node);
416 417 418
	}

	if (!found) {
419
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
420 421
		sleep(1);
	} else
422
		perf_top__parse_source(top, found);
423 424 425 426 427

out_free:
	free(buf);
}

428
static void perf_top__print_mapped_keys(struct perf_top *top)
429
{
430 431
	char *name = NULL;

432 433
	if (top->sym_filter_entry) {
		struct symbol *sym = top->sym_filter_entry->ms.sym;
434 435 436 437
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
438 439
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
440

441
	if (top->evlist->core.nr_entries > 1)
442
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", perf_evsel__name(top->sym_evsel));
443

444
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
445

446
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt);
447 448
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
449

450
	fprintf(stdout,
S
Sihyeon Jang 已提交
451
		"\t[K]     hide kernel symbols.             \t(%s)\n",
452
		top->hide_kernel_symbols ? "yes" : "no");
453 454
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
455 456
		top->hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
457 458 459
	fprintf(stdout, "\t[qQ]    quit.\n");
}

460
static int perf_top__key_mapped(struct perf_top *top, int c)
461 462 463 464 465 466 467 468
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
469 470
		case 'K':
		case 'U':
471 472 473
		case 'F':
		case 's':
		case 'S':
474 475
			return 1;
		case 'E':
476
			return top->evlist->core.nr_entries > 1 ? 1 : 0;
477 478
		default:
			break;
479 480 481
	}

	return 0;
482 483
}

484
static bool perf_top__handle_keypress(struct perf_top *top, int c)
485
{
486 487
	bool ret = true;

488
	if (!perf_top__key_mapped(top, c)) {
489
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
490
		struct termios save;
491

492
		perf_top__print_mapped_keys(top);
493 494 495
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

496
		set_term_quiet_input(&save);
497 498 499 500 501

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
502
		if (!perf_top__key_mapped(top, c))
503
			return ret;
504 505
	}

506 507
	switch (c) {
		case 'd':
508 509 510
			prompt_integer(&top->delay_secs, "Enter display delay");
			if (top->delay_secs < 1)
				top->delay_secs = 1;
511 512
			break;
		case 'e':
513 514
			prompt_integer(&top->print_entries, "Enter display entries (lines)");
			if (top->print_entries == 0) {
515
				perf_top__resize(top);
516
				signal(SIGWINCH, winch_sig);
517
			} else {
518
				signal(SIGWINCH, SIG_DFL);
519
			}
520 521
			break;
		case 'E':
522
			if (top->evlist->core.nr_entries > 1) {
523 524 525
				/* Select 0 as the default event: */
				int counter = 0;

526
				fprintf(stderr, "\nAvailable events:");
527

528
				evlist__for_each_entry(top->evlist, top->sym_evsel)
529
					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
530

531
				prompt_integer(&counter, "Enter details event counter");
532

533
				if (counter >= top->evlist->core.nr_entries) {
534
					top->sym_evsel = evlist__first(top->evlist);
535
					fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
536
					sleep(1);
537
					break;
538
				}
539
				evlist__for_each_entry(top->evlist, top->sym_evsel)
540
					if (top->sym_evsel->idx == counter)
541
						break;
542
			} else
543
				top->sym_evsel = evlist__first(top->evlist);
544 545
			break;
		case 'f':
546
			prompt_integer(&top->count_filter, "Enter display event count filter");
547 548
			break;
		case 'F':
549
			prompt_percent(&top->annotation_opts.min_pcnt,
550
				       "Enter details display event filter (percent)");
551
			break;
552
		case 'K':
553
			top->hide_kernel_symbols = !top->hide_kernel_symbols;
554
			break;
555 556 557
		case 'q':
		case 'Q':
			printf("exiting.\n");
558 559
			if (top->dump_symtab)
				perf_session__fprintf_dsos(top->session, stderr);
560 561
			ret = false;
			break;
562
		case 's':
563
			perf_top__prompt_symbol(top, "Enter details symbol");
564 565
			break;
		case 'S':
566
			if (!top->sym_filter_entry)
567 568
				break;
			else {
569
				struct hist_entry *syme = top->sym_filter_entry;
570

571
				top->sym_filter_entry = NULL;
572 573 574
				__zero_source_counters(syme);
			}
			break;
575
		case 'U':
576
			top->hide_user_symbols = !top->hide_user_symbols;
577
			break;
578
		case 'z':
579
			top->zero = !top->zero;
580
			break;
581 582
		default:
			break;
583
	}
584 585

	return ret;
586 587
}

588 589 590
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
591

592 593 594
	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

595
	perf_top__resort_hists(t);
596

597
	if (t->lost || t->drop)
598
		pr_warning("Too slow to read ring buffer (change period (-c/-F) or limit CPUs (-C)\n");
599 600
}

601 602 603 604 605 606
static void stop_top(void)
{
	session_done = 1;
	done = 1;
}

607
static void *display_thread_tui(void *arg)
608
{
609
	struct evsel *pos;
610
	struct perf_top *top = arg;
611
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";
612 613 614 615 616
	struct hist_browser_timer hbt = {
		.timer		= perf_top__sort_new_samples,
		.arg		= top,
		.refresh	= top->delay_secs,
	};
617

618 619 620 621 622 623 624
	/* In order to read symbols from other namespaces perf to  needs to call
	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
	 * unshare(2) the fs so that we may continue to setns into namespaces
	 * that we're observing.
	 */
	unshare(CLONE_FS);

625 626
	prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0);

627
	perf_top__sort_new_samples(top);
628 629 630

	/*
	 * Initialize the uid_filter_str, in the future the TUI will allow
631
	 * Zooming in/out UIDs. For now just use whatever the user passed
632 633
	 * via --uid.
	 */
634
	evlist__for_each_entry(top->evlist, pos) {
635 636 637
		struct hists *hists = evsel__hists(pos);
		hists->uid_filter_str = top->record_opts.target.uid_str;
	}
638

639 640
	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
				      top->min_percent,
641
				      &top->session->header.env,
642 643
				      !top->record_opts.overwrite,
				      &top->annotation_opts);
644

645
	stop_top();
646 647 648
	return NULL;
}

649 650
static void display_sig(int sig __maybe_unused)
{
651
	stop_top();
652 653 654 655
}

static void display_setup_sig(void)
{
656 657
	signal(SIGSEGV, sighandler_dump_stack);
	signal(SIGFPE, sighandler_dump_stack);
658 659 660 661 662
	signal(SIGINT,  display_sig);
	signal(SIGQUIT, display_sig);
	signal(SIGTERM, display_sig);
}

663
static void *display_thread(void *arg)
664
{
665
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
666
	struct termios save;
667
	struct perf_top *top = arg;
668 669
	int delay_msecs, c;

670 671 672 673 674 675 676
	/* In order to read symbols from other namespaces perf to  needs to call
	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
	 * unshare(2) the fs so that we may continue to setns into namespaces
	 * that we're observing.
	 */
	unshare(CLONE_FS);

677 678
	prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0);

679
	display_setup_sig();
680
	pthread__unblock_sigwinch();
681
repeat:
682
	delay_msecs = top->delay_secs * MSEC_PER_SEC;
683
	set_term_quiet_input(&save);
684 685
	/* trash return*/
	getc(stdin);
686

687
	while (!done) {
688
		perf_top__print_sym_table(top);
689 690 691 692 693 694 695 696 697 698
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
699
			__fallthrough;
700
		default:
701 702 703 704 705
			c = getc(stdin);
			tcsetattr(0, TCSAFLUSH, &save);

			if (perf_top__handle_keypress(top, c))
				goto repeat;
706
			stop_top();
707 708
		}
	}
709

710
	tcsetattr(0, TCSAFLUSH, &save);
711 712 713
	return NULL;
}

714 715 716 717 718 719
static int hist_iter__top_callback(struct hist_entry_iter *iter,
				   struct addr_location *al, bool single,
				   void *arg)
{
	struct perf_top *top = arg;
	struct hist_entry *he = iter->he;
720
	struct evsel *evsel = iter->evsel;
721

722
	if (perf_hpp_list.sym && single)
723
		perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
724

725 726
	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
		     !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
727 728 729
	return 0;
}

730 731
static void perf_event__process_sample(struct perf_tool *tool,
				       const union perf_event *event,
732
				       struct evsel *evsel,
733
				       struct perf_sample *sample,
734
				       struct machine *machine)
735
{
736
	struct perf_top *top = container_of(tool, struct perf_top, tool);
737
	struct addr_location al;
738
	int err;
739

740
	if (!machine && perf_guest) {
741 742 743
		static struct intlist *seen;

		if (!seen)
744
			seen = intlist__new(NULL);
745

746
		if (!intlist__has_entry(seen, sample->pid)) {
747
			pr_err("Can't find guest [%d]'s kernel information\n",
748 749
				sample->pid);
			intlist__add(seen, sample->pid);
750
		}
751 752 753
		return;
	}

754
	if (!machine) {
755
		pr_err("%u unprocessable samples recorded.\r",
756
		       top->session->evlist->stats.nr_unprocessable_samples++);
757 758 759
		return;
	}

760
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
761
		top->exact_samples++;
762

763
	if (machine__resolve(machine, &al, sample) < 0)
764
		return;
765

766
	if (!machine->kptr_restrict_warned &&
767 768
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
769 770
		if (!perf_evlist__exclude_kernel(top->session->evlist)) {
			ui__warning(
771
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
772
"Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
773
"Kernel%s samples will not be resolved.\n",
774
			  al.map && map__has_symbols(al.map) ?
775
			  " modules" : "");
776 777 778
			if (use_browser <= 0)
				sleep(5);
		}
779
		machine->kptr_restrict_warned = true;
780 781
	}

782
	if (al.sym == NULL && al.map != NULL) {
783
		const char *msg = "Kernel samples will not be resolved.\n";
784 785 786 787 788 789 790 791 792 793 794
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
795
		if (!machine->kptr_restrict_warned && !top->vmlinux_warned &&
796
		    __map__is_kernel(al.map) && map__has_symbols(al.map)) {
797
			if (symbol_conf.vmlinux_name) {
798 799 800 801
				char serr[256];
				dso__strerror_load(al.map->dso, serr, sizeof(serr));
				ui__warning("The %s file can't be used: %s\n%s",
					    symbol_conf.vmlinux_name, serr, msg);
802 803 804 805 806 807 808
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
809
			top->vmlinux_warned = true;
810
		}
811 812
	}

813
	if (al.sym == NULL || !al.sym->idle) {
814
		struct hists *hists = evsel__hists(evsel);
815
		struct hist_entry_iter iter = {
816 817 818
			.evsel		= evsel,
			.sample 	= sample,
			.add_entry_cb 	= hist_iter__top_callback,
819
		};
820

821 822 823 824
		if (symbol_conf.cumulate_callchain)
			iter.ops = &hist_iter_cumulative;
		else
			iter.ops = &hist_iter_normal;
825

826
		pthread_mutex_lock(&hists->lock);
827

828
		err = hist_entry_iter__add(&iter, &al, top->max_stack, top);
829 830
		if (err < 0)
			pr_err("Problem incrementing symbol period, skipping event\n");
831

832
		pthread_mutex_unlock(&hists->lock);
833
	}
834

835
	addr_location__put(&al);
836 837
}

838 839
static void
perf_top__process_lost(struct perf_top *top, union perf_event *event,
840
		       struct evsel *evsel)
841 842 843 844 845 846 847 848 849 850 851
{
	struct hists *hists = evsel__hists(evsel);

	top->lost += event->lost.lost;
	top->lost_total += event->lost.lost;
	hists->stats.total_lost += event->lost.lost;
}

static void
perf_top__process_lost_samples(struct perf_top *top,
			       union perf_event *event,
852
			       struct evsel *evsel)
853 854 855 856 857 858 859 860
{
	struct hists *hists = evsel__hists(evsel);

	top->lost += event->lost_samples.lost;
	top->lost_total += event->lost_samples.lost;
	hists->stats.total_lost_samples += event->lost_samples.lost;
}

861 862
static u64 last_timestamp;

863
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
864
{
865
	struct record_opts *opts = &top->record_opts;
866
	struct evlist *evlist = top->evlist;
867
	struct mmap *md;
868
	union perf_event *event;
869

870
	md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
871
	if (perf_mmap__read_init(md) < 0)
872 873
		return;

874
	while ((event = perf_mmap__read_event(md)) != NULL) {
J
Jiri Olsa 已提交
875
		int ret;
876

877
		ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp);
J
Jiri Olsa 已提交
878
		if (ret && ret != -1)
879 880
			break;

881
		ret = ordered_events__queue(top->qe.in, event, last_timestamp, 0);
J
Jiri Olsa 已提交
882 883
		if (ret)
			break;
884 885 886 887 888 889 890 891 892

		perf_mmap__consume(md);

		if (top->qe.rotate) {
			pthread_mutex_lock(&top->qe.mutex);
			top->qe.rotate = false;
			pthread_cond_signal(&top->qe.cond);
			pthread_mutex_unlock(&top->qe.mutex);
		}
893
	}
894 895

	perf_mmap__read_done(md);
896 897
}

898
static void perf_top__mmap_read(struct perf_top *top)
899
{
900
	bool overwrite = top->record_opts.overwrite;
901
	struct evlist *evlist = top->evlist;
902 903
	int i;

904 905 906
	if (overwrite)
		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);

907
	for (i = 0; i < top->evlist->core.nr_mmaps; i++)
908
		perf_top__mmap_read_idx(top, i);
909 910 911 912 913

	if (overwrite) {
		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
	}
914 915
}

916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935
/*
 * Check per-event overwrite term.
 * perf top should support consistent term for all events.
 * - All events don't have per-event term
 *   E.g. "cpu/cpu-cycles/,cpu/instructions/"
 *   Nothing change, return 0.
 * - All events have same per-event term
 *   E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
 *   Using the per-event setting to replace the opts->overwrite if
 *   they are different, then return 0.
 * - Events have different per-event term
 *   E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
 *   Return -1
 * - Some of the event set per-event term, but some not.
 *   E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
 *   Return -1
 */
static int perf_top__overwrite_check(struct perf_top *top)
{
	struct record_opts *opts = &top->record_opts;
936
	struct evlist *evlist = top->evlist;
937 938
	struct perf_evsel_config_term *term;
	struct list_head *config_terms;
939
	struct evsel *evsel;
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
	int set, overwrite = -1;

	evlist__for_each_entry(evlist, evsel) {
		set = -1;
		config_terms = &evsel->config_terms;
		list_for_each_entry(term, config_terms, list) {
			if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
				set = term->val.overwrite ? 1 : 0;
		}

		/* no term for current and previous event (likely) */
		if ((overwrite < 0) && (set < 0))
			continue;

		/* has term for both current and previous event, compare */
		if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
			return -1;

		/* no term for current event but has term for previous one */
		if ((overwrite >= 0) && (set < 0))
			return -1;

		/* has term for current event */
		if ((overwrite < 0) && (set >= 0)) {
			/* if it's first event, set overwrite */
965
			if (evsel == evlist__first(evlist))
966 967 968 969 970 971 972 973 974 975 976 977
				overwrite = set;
			else
				return -1;
		}
	}

	if ((overwrite >= 0) && (opts->overwrite != overwrite))
		opts->overwrite = overwrite;

	return 0;
}

K
Kan Liang 已提交
978
static int perf_top_overwrite_fallback(struct perf_top *top,
979
				       struct evsel *evsel)
K
Kan Liang 已提交
980 981
{
	struct record_opts *opts = &top->record_opts;
982
	struct evlist *evlist = top->evlist;
983
	struct evsel *counter;
K
Kan Liang 已提交
984 985 986 987 988

	if (!opts->overwrite)
		return 0;

	/* only fall back when first event fails */
989
	if (evsel != evlist__first(evlist))
K
Kan Liang 已提交
990 991 992
		return 0;

	evlist__for_each_entry(evlist, counter)
993
		counter->core.attr.write_backward = false;
K
Kan Liang 已提交
994
	opts->overwrite = false;
995
	pr_debug2("fall back to non-overwrite mode\n");
K
Kan Liang 已提交
996 997 998
	return 1;
}

999
static int perf_top__start_counters(struct perf_top *top)
1000
{
1001
	char msg[BUFSIZ];
1002
	struct evsel *counter;
1003
	struct evlist *evlist = top->evlist;
1004
	struct record_opts *opts = &top->record_opts;
1005

1006 1007 1008 1009 1010 1011
	if (perf_top__overwrite_check(top)) {
		ui__error("perf top only support consistent per-event "
			  "overwrite setting for all events\n");
		goto out_err;
	}

1012
	perf_evlist__config(evlist, opts, &callchain_param);
1013

1014
	evlist__for_each_entry(evlist, counter) {
1015
try_again:
1016
		if (evsel__open(counter, top->evlist->core.cpus,
1017
				     top->evlist->core.threads) < 0) {
K
Kan Liang 已提交
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032

			/*
			 * Specially handle overwrite fall back.
			 * Because perf top is the only tool which has
			 * overwrite mode by default, support
			 * both overwrite and non-overwrite mode, and
			 * require consistent mode for all events.
			 *
			 * May move it to generic code with more tools
			 * have similar attribute.
			 */
			if (perf_missing_features.write_backward &&
			    perf_top_overwrite_fallback(top, counter))
				goto try_again;

1033
			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
1034
				if (verbose > 0)
1035
					ui__warning("%s\n", msg);
1036 1037
				goto try_again;
			}
1038

1039 1040 1041
			perf_evsel__open_strerror(counter, &opts->target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);
1042
			goto out_err;
1043
		}
1044
	}
1045

1046
	if (evlist__mmap(evlist, opts->mmap_pages) < 0) {
1047
		ui__error("Failed to mmap with %d (%s)\n",
1048
			    errno, str_error_r(errno, msg, sizeof(msg)));
1049 1050 1051
		goto out_err;
	}

1052
	return 0;
1053 1054

out_err:
1055
	return -1;
1056 1057
}

1058
static int callchain_param__setup_sample_type(struct callchain_param *callchain)
1059
{
1060
	if (callchain->mode != CHAIN_NONE) {
1061
		if (callchain_register_param(callchain) < 0) {
1062
			ui__error("Can't register callchain params.\n");
1063 1064 1065 1066 1067 1068 1069
			return -EINVAL;
		}
	}

	return 0;
}

J
Jiri Olsa 已提交
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
static struct ordered_events *rotate_queues(struct perf_top *top)
{
	struct ordered_events *in = top->qe.in;

	if (top->qe.in == &top->qe.data[1])
		top->qe.in = &top->qe.data[0];
	else
		top->qe.in = &top->qe.data[1];

	return in;
}

static void *process_thread(void *arg)
{
	struct perf_top *top = arg;

	while (!done) {
		struct ordered_events *out, *in = top->qe.in;

		if (!in->nr_events) {
			usleep(100);
			continue;
		}

		out = rotate_queues(top);
1095 1096 1097 1098 1099

		pthread_mutex_lock(&top->qe.mutex);
		top->qe.rotate = true;
		pthread_cond_wait(&top->qe.cond, &top->qe.mutex);
		pthread_mutex_unlock(&top->qe.mutex);
J
Jiri Olsa 已提交
1100 1101 1102 1103 1104 1105 1106 1107

		if (ordered_events__flush(out, OE_FLUSH__TOP))
			pr_err("failed to process events\n");
	}

	return NULL;
}

1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122
/*
 * Allow only 'top->delay_secs' seconds behind samples.
 */
static int should_drop(struct ordered_event *qevent, struct perf_top *top)
{
	union perf_event *event = qevent->event;
	u64 delay_timestamp;

	if (event->header.type != PERF_RECORD_SAMPLE)
		return false;

	delay_timestamp = qevent->timestamp + top->delay_secs * NSEC_PER_SEC;
	return delay_timestamp < last_timestamp;
}

J
Jiri Olsa 已提交
1123 1124 1125 1126
static int deliver_event(struct ordered_events *qe,
			 struct ordered_event *qevent)
{
	struct perf_top *top = qe->data;
1127
	struct evlist *evlist = top->evlist;
J
Jiri Olsa 已提交
1128 1129 1130
	struct perf_session *session = top->session;
	union perf_event *event = qevent->event;
	struct perf_sample sample;
1131
	struct evsel *evsel;
J
Jiri Olsa 已提交
1132 1133 1134
	struct machine *machine;
	int ret = -1;

1135 1136 1137
	if (should_drop(qevent, top)) {
		top->drop++;
		top->drop_total++;
1138
		return 0;
1139
	}
1140

J
Jiri Olsa 已提交
1141 1142 1143 1144 1145 1146 1147 1148 1149
	ret = perf_evlist__parse_sample(evlist, event, &sample);
	if (ret) {
		pr_err("Can't parse sample, err = %d\n", ret);
		goto next_event;
	}

	evsel = perf_evlist__id2evsel(session->evlist, sample.id);
	assert(evsel != NULL);

1150 1151 1152
	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evswitch__discard(&top->evswitch, evsel))
			return 0;
J
Jiri Olsa 已提交
1153
		++top->samples;
1154
	}
J
Jiri Olsa 已提交
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212

	switch (sample.cpumode) {
	case PERF_RECORD_MISC_USER:
		++top->us_samples;
		if (top->hide_user_symbols)
			goto next_event;
		machine = &session->machines.host;
		break;
	case PERF_RECORD_MISC_KERNEL:
		++top->kernel_samples;
		if (top->hide_kernel_symbols)
			goto next_event;
		machine = &session->machines.host;
		break;
	case PERF_RECORD_MISC_GUEST_KERNEL:
		++top->guest_kernel_samples;
		machine = perf_session__find_machine(session,
						     sample.pid);
		break;
	case PERF_RECORD_MISC_GUEST_USER:
		++top->guest_us_samples;
		/*
		 * TODO: we don't process guest user from host side
		 * except simple counting.
		 */
		goto next_event;
	default:
		if (event->header.type == PERF_RECORD_SAMPLE)
			goto next_event;
		machine = &session->machines.host;
		break;
	}

	if (event->header.type == PERF_RECORD_SAMPLE) {
		perf_event__process_sample(&top->tool, event, evsel,
					   &sample, machine);
	} else if (event->header.type == PERF_RECORD_LOST) {
		perf_top__process_lost(top, event, evsel);
	} else if (event->header.type == PERF_RECORD_LOST_SAMPLES) {
		perf_top__process_lost_samples(top, event, evsel);
	} else if (event->header.type < PERF_RECORD_MAX) {
		hists__inc_nr_events(evsel__hists(evsel), event->header.type);
		machine__process_event(machine, event, &sample);
	} else
		++session->evlist->stats.nr_unknown_events;

	ret = 0;
next_event:
	return ret;
}

static void init_process_thread(struct perf_top *top)
{
	ordered_events__init(&top->qe.data[0], deliver_event, top);
	ordered_events__init(&top->qe.data[1], deliver_event, top);
	ordered_events__set_copy_on_queue(&top->qe.data[0], true);
	ordered_events__set_copy_on_queue(&top->qe.data[1], true);
	top->qe.in = &top->qe.data[0];
1213 1214
	pthread_mutex_init(&top->qe.mutex, NULL);
	pthread_cond_init(&top->qe.cond, NULL);
J
Jiri Olsa 已提交
1215 1216
}

1217
static int __cmd_top(struct perf_top *top)
1218
{
1219
	struct record_opts *opts = &top->record_opts;
J
Jiri Olsa 已提交
1220
	pthread_t thread, thread_process;
1221
	int ret;
1222

1223 1224 1225
	if (!top->annotation_opts.objdump_path) {
		ret = perf_env__lookup_objdump(&top->session->header.env,
					       &top->annotation_opts.objdump_path);
1226
		if (ret)
1227
			return ret;
1228 1229
	}

1230
	ret = callchain_param__setup_sample_type(&callchain_param);
1231
	if (ret)
1232
		return ret;
1233

1234
	if (perf_session__register_idle_thread(top->session) < 0)
1235
		return ret;
N
Namhyung Kim 已提交
1236

1237 1238
	if (top->nr_threads_synthesize > 1)
		perf_set_multithreaded();
1239

J
Jiri Olsa 已提交
1240 1241
	init_process_thread(top);

N
Namhyung Kim 已提交
1242 1243 1244
	if (opts->record_namespaces)
		top->tool.namespace_events = true;

1245
	ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
1246 1247 1248
						&top->session->machines.host,
						&top->record_opts);
	if (ret < 0)
1249
		pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n");
1250

1251
	machine__synthesize_threads(&top->session->machines.host, &opts->target,
1252
				    top->evlist->core.threads, false,
1253
				    top->nr_threads_synthesize);
1254

1255 1256
	if (top->nr_threads_synthesize > 1)
		perf_set_singlethreaded();
1257

1258
	if (perf_hpp_list.socket) {
1259
		ret = perf_env__read_cpu_topology_map(&perf_env);
1260 1261 1262 1263 1264 1265 1266
		if (ret < 0) {
			char errbuf[BUFSIZ];
			const char *err = str_error_r(-ret, errbuf, sizeof(errbuf));

			ui__error("Could not read the CPU topology map: %s\n", err);
			return ret;
		}
1267 1268
	}

1269 1270
	ret = perf_top__start_counters(top);
	if (ret)
1271
		return ret;
1272

1273
	top->session->evlist = top->evlist;
1274
	perf_session__set_id_hdr_size(top->session);
1275

1276 1277 1278 1279 1280 1281 1282 1283
	/*
	 * When perf is starting the traced process, all the events (apart from
	 * group members) have enable_on_exec=1 set, so don't spoil it by
	 * prematurely enabling them.
	 *
	 * XXX 'top' still doesn't start workloads like record, trace, but should,
	 * so leave the check here.
	 */
1284
        if (!target__none(&opts->target))
1285
		evlist__enable(top->evlist);
1286

1287
	ret = -1;
J
Jiri Olsa 已提交
1288 1289
	if (pthread_create(&thread_process, NULL, process_thread, top)) {
		ui__error("Could not create process thread.\n");
1290
		return ret;
J
Jiri Olsa 已提交
1291 1292
	}

1293
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
1294
							    display_thread), top)) {
1295
		ui__error("Could not create display thread.\n");
J
Jiri Olsa 已提交
1296
		goto out_join_thread;
1297 1298
	}

1299
	if (top->realtime_prio) {
1300 1301
		struct sched_param param;

1302
		param.sched_priority = top->realtime_prio;
1303
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1304
			ui__error("Could not set realtime priority.\n");
1305
			goto out_join;
1306 1307 1308
		}
	}

1309
	/* Wait for a minimal set of events before starting the snapshot */
1310
	evlist__poll(top->evlist, 100);
1311 1312 1313

	perf_top__mmap_read(top);

1314
	while (!done) {
1315
		u64 hits = top->samples;
1316

1317
		perf_top__mmap_read(top);
1318

1319
		if (opts->overwrite || (hits == top->samples))
1320
			ret = evlist__poll(top->evlist, 100);
1321 1322 1323 1324 1325

		if (resize) {
			perf_top__resize(top);
			resize = 0;
		}
1326 1327
	}

1328
	ret = 0;
1329 1330
out_join:
	pthread_join(thread, NULL);
J
Jiri Olsa 已提交
1331
out_join_thread:
1332
	pthread_cond_signal(&top->qe.cond);
J
Jiri Olsa 已提交
1333
	pthread_join(thread_process, NULL);
1334
	return ret;
1335 1336 1337
}

static int
J
Jiri Olsa 已提交
1338
callchain_opt(const struct option *opt, const char *arg, int unset)
1339 1340
{
	symbol_conf.use_callchain = true;
J
Jiri Olsa 已提交
1341 1342
	return record_callchain_opt(opt, arg, unset);
}
1343

J
Jiri Olsa 已提交
1344 1345 1346
static int
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
{
1347
	struct callchain_param *callchain = opt->value;
1348

1349 1350
	callchain->enabled = !unset;
	callchain->record_mode = CALLCHAIN_FP;
1351 1352 1353 1354 1355 1356

	/*
	 * --no-call-graph
	 */
	if (unset) {
		symbol_conf.use_callchain = false;
1357
		callchain->record_mode = CALLCHAIN_NONE;
1358 1359 1360 1361
		return 0;
	}

	return parse_callchain_top_opt(arg);
1362
}
1363

1364
static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
1365
{
1366 1367 1368 1369
	if (!strcmp(var, "top.call-graph")) {
		var = "call-graph.record-mode";
		return perf_default_config(var, value, cb);
	}
1370 1371 1372 1373
	if (!strcmp(var, "top.children")) {
		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
		return 0;
	}
1374

1375
	return 0;
1376 1377
}

1378 1379 1380 1381 1382 1383 1384 1385 1386 1387
static int
parse_percent_limit(const struct option *opt, const char *arg,
		    int unset __maybe_unused)
{
	struct perf_top *top = opt->value;

	top->min_percent = strtof(arg, NULL);
	return 0;
}

1388 1389
const char top_callchain_help[] = CALLCHAIN_RECORD_HELP CALLCHAIN_REPORT_HELP
	"\n\t\t\t\tDefault: fp,graph,0.5,caller,function";
1390

1391
int cmd_top(int argc, const char **argv)
1392
{
1393
	char errbuf[BUFSIZ];
1394 1395 1396
	struct perf_top top = {
		.count_filter	     = 5,
		.delay_secs	     = 2,
1397 1398 1399 1400 1401
		.record_opts = {
			.mmap_pages	= UINT_MAX,
			.user_freq	= UINT_MAX,
			.user_interval	= ULLONG_MAX,
			.freq		= 4000, /* 4 KHz */
1402
			.target		= {
1403 1404
				.uses_mmap   = true,
			},
1405 1406 1407 1408 1409 1410 1411 1412
			/*
			 * FIXME: This will lose PERF_RECORD_MMAP and other metadata
			 * when we pause, fix that and reenable. Probably using a
			 * separate evlist with a dummy event, i.e. a non-overwrite
			 * ring buffer just for metadata events, while PERF_RECORD_SAMPLE
			 * stays in overwrite mode. -acme
			 * */
			.overwrite	= 0,
J
Jiri Olsa 已提交
1413
			.sample_time	= true,
1414
			.sample_time_set = true,
N
Namhyung Kim 已提交
1415
		},
1416
		.max_stack	     = sysctl__max_stack(),
1417
		.annotation_opts     = annotation__default_options,
1418
		.nr_threads_synthesize = UINT_MAX,
1419
	};
1420
	struct record_opts *opts = &top.record_opts;
1421
	struct target *target = &opts->target;
1422
	const struct option options[] = {
1423
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1424
		     "event selector. use 'perf list' to list available events",
1425
		     parse_events_option),
1426 1427
	OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
	OPT_STRING('p', "pid", &target->pid, "pid",
1428
		    "profile events on existing process id"),
1429
	OPT_STRING('t', "tid", &target->tid, "tid",
1430
		    "profile events on existing thread id"),
1431
	OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
1432
			    "system-wide collection from all CPUs"),
1433
	OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
1434
		    "list of cpus to monitor"),
1435 1436
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1437 1438
	OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
		    "don't load vmlinux even if found"),
1439 1440
	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
		   "file", "kallsyms pathname"),
1441
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1442
		    "hide kernel symbols"),
1443 1444 1445
	OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages",
		     "number of mmap data pages",
		     perf_evlist__parse_mmap_pages),
1446
	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1447
		    "collect data with this RT SCHED_FIFO priority"),
1448
	OPT_INTEGER('d', "delay", &top.delay_secs,
1449
		    "number of seconds to delay between refreshes"),
1450
	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1451
			    "dump the symbol table used for profiling"),
1452
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1453
		    "only display functions with more events than this"),
1454
	OPT_BOOLEAN(0, "group", &opts->group,
1455
			    "put the counters into a counter group"),
1456 1457
	OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
		    "child tasks do not inherit counters"),
1458
	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1459
		    "symbol to annotate"),
1460
	OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
1461 1462 1463
	OPT_CALLBACK('F', "freq", &top.record_opts, "freq or 'max'",
		     "profile at this frequency",
		      record__parse_freq),
1464
	OPT_INTEGER('E', "entries", &top.print_entries,
1465
		    "display this many functions"),
1466
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1467
		    "hide user symbols"),
1468 1469
	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1470
	OPT_INCR('v', "verbose", &verbose,
1471
		    "be more verbose (show counter open errors, etc)"),
1472
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1473 1474
		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
		   " Please refer the man page for the complete list."),
1475 1476
	OPT_STRING(0, "fields", &field_order, "key[,keys...]",
		   "output field(s): overhead, period, sample plus all of sort keys"),
1477 1478
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1479
	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1480
			   NULL, "enables call-graph recording and display",
J
Jiri Olsa 已提交
1481
			   &callchain_opt),
1482
	OPT_CALLBACK(0, "call-graph", &callchain_param,
1483
		     "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
1484
		     top_callchain_help, &parse_callchain_opt),
N
Namhyung Kim 已提交
1485 1486
	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
		    "Accumulate callchains of children and show total overhead as well"),
1487 1488
	OPT_INTEGER(0, "max-stack", &top.max_stack,
		    "Set the maximum stack depth when parsing the callchain. "
1489
		    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
1490 1491 1492
	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
		   "ignore callees of these functions in call graphs",
		   report_parse_ignore_callees_opt),
1493 1494 1495 1496 1497 1498 1499 1500
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1501
	OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src,
1502
		    "Interleave source code with assembly code (default)"),
1503
	OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw,
1504
		    "Display raw encoding of assembly instructions (default)"),
1505 1506
	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
		    "Enable kernel symbol demangling"),
S
Song Liu 已提交
1507
	OPT_BOOLEAN(0, "no-bpf-event", &top.record_opts.no_bpf_event, "do not record bpf events"),
1508
	OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path",
1509
		    "objdump binary to use for disassembly and annotations"),
1510
	OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style",
1511
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1512
	OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
1513 1514
	OPT_CALLBACK(0, "percent-limit", &top, "percent",
		     "Don't show entries under that percent", parse_percent_limit),
N
Namhyung Kim 已提交
1515 1516
	OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
		     "How to display percentage of filtered entries", parse_filter_percentage),
1517 1518 1519
	OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
		   "width[,width...]",
		   "don't try to adjust column width, use these fixed values"),
1520
	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1521
			"per thread proc mmap processing timeout in ms"),
1522 1523 1524 1525 1526 1527
	OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
		     "branch any", "sample any taken branches",
		     parse_branch_stack),
	OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
		     "branch filter mask", "branch stack filter modes",
		     parse_branch_stack),
1528 1529
	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
		    "Show raw trace event output (do not use print fmt or plugins)"),
N
Namhyung Kim 已提交
1530 1531
	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
		    "Show entries in a hierarchy"),
1532
	OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite,
1533
		    "Use a backward ring buffer, default: no"),
1534
	OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
1535 1536
	OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
			"number of thread to run event synthesize"),
N
Namhyung Kim 已提交
1537 1538
	OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces,
		    "Record namespaces events"),
1539
	OPTS_EVSWITCH(&top.evswitch),
1540
	OPT_END()
1541
	};
1542
	struct evlist *sb_evlist = NULL;
1543 1544 1545 1546
	const char * const top_usage[] = {
		"perf top [<options>]",
		NULL
	};
1547 1548 1549 1550
	int status = hists__init();

	if (status < 0)
		return status;
1551

1552 1553 1554
	top.annotation_opts.min_pcnt = 5;
	top.annotation_opts.context  = 4;

1555
	top.evlist = evlist__new();
1556
	if (top.evlist == NULL)
1557 1558
		return -ENOMEM;

1559 1560 1561
	status = perf_config(perf_top_config, &top);
	if (status)
		return status;
1562

1563 1564 1565 1566
	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1567
	if (!top.evlist->core.nr_entries &&
1568 1569 1570 1571 1572
	    perf_evlist__add_default(top.evlist) < 0) {
		pr_err("Not enough memory for event selector list\n");
		goto out_delete_evlist;
	}

1573 1574 1575 1576
	status = evswitch__init(&top.evswitch, top.evlist, stderr);
	if (status)
		goto out_delete_evlist;

N
Namhyung Kim 已提交
1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
	if (symbol_conf.report_hierarchy) {
		/* disable incompatible options */
		symbol_conf.event_group = false;
		symbol_conf.cumulate_callchain = false;

		if (field_order) {
			pr_err("Error: --hierarchy and --fields options cannot be used together\n");
			parse_options_usage(top_usage, options, "fields", 0);
			parse_options_usage(NULL, options, "hierarchy", 0);
			goto out_delete_evlist;
		}
	}

1590 1591 1592
	if (opts->branch_stack && callchain_param.enabled)
		symbol_conf.show_branchflag_count = true;

1593
	sort__mode = SORT_MODE__TOP;
1594
	/* display thread wants entries to be collapsed in a different tree */
1595
	perf_hpp_list.need_collapse = 1;
1596

1597 1598 1599 1600 1601 1602 1603
	if (top.use_stdio)
		use_browser = 0;
	else if (top.use_tui)
		use_browser = 1;

	setup_browser(false);

1604
	if (setup_sorting(top.evlist) < 0) {
1605 1606 1607 1608 1609
		if (sort_order)
			parse_options_usage(top_usage, options, "s", 1);
		if (field_order)
			parse_options_usage(sort_order ? NULL : top_usage,
					    options, "fields", 0);
1610 1611
		goto out_delete_evlist;
	}
1612

1613
	status = target__validate(target);
1614
	if (status) {
1615
		target__strerror(target, status, errbuf, BUFSIZ);
1616
		ui__warning("%s\n", errbuf);
1617 1618
	}

1619
	status = target__parse_uid(target);
1620 1621
	if (status) {
		int saved_errno = errno;
1622

1623
		target__strerror(target, status, errbuf, BUFSIZ);
1624
		ui__error("%s\n", errbuf);
1625 1626

		status = -saved_errno;
1627
		goto out_delete_evlist;
1628
	}
1629

1630
	if (target__none(target))
1631
		target->system_wide = true;
1632

1633 1634
	if (perf_evlist__create_maps(top.evlist, target) < 0) {
		ui__error("Couldn't create thread/CPU maps: %s\n",
1635
			  errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
1636
		goto out_delete_evlist;
1637
	}
1638

1639 1640
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1641

1642
	if (record_opts__config(opts)) {
1643
		status = -EINVAL;
1644
		goto out_delete_evlist;
1645 1646
	}

1647
	top.sym_evsel = evlist__first(top.evlist);
1648

1649
	if (!callchain_param.enabled) {
N
Namhyung Kim 已提交
1650 1651 1652 1653
		symbol_conf.cumulate_callchain = false;
		perf_hpp__cancel_cumulate();
	}

1654 1655 1656
	if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
		callchain_param.order = ORDER_CALLER;

1657 1658 1659
	status = symbol__annotation_init();
	if (status < 0)
		goto out_delete_evlist;
1660

1661 1662
	annotation_config__init();

1663
	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1664 1665 1666
	status = symbol__init(NULL);
	if (status < 0)
		goto out_delete_evlist;
1667

1668
	sort__setup_elide(stdout);
1669

1670
	get_term_dimensions(&top.winsize);
1671
	if (top.print_entries == 0) {
1672
		perf_top__update_print_entries(&top);
1673
		signal(SIGWINCH, winch_sig);
1674 1675
	}

1676
	top.session = perf_session__new(NULL, false, NULL);
1677 1678
	if (IS_ERR(top.session)) {
		status = PTR_ERR(top.session);
1679 1680 1681
		goto out_delete_evlist;
	}

1682 1683 1684
	if (!top.record_opts.no_bpf_event)
		bpf_event__add_sb_event(&sb_evlist, &perf_env);

1685 1686 1687 1688 1689
	if (perf_evlist__start_sb_thread(sb_evlist, target)) {
		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
		opts->no_bpf_event = true;
	}

1690
	status = __cmd_top(&top);
1691

1692 1693 1694
	if (!opts->no_bpf_event)
		perf_evlist__stop_sb_thread(sb_evlist);

1695
out_delete_evlist:
1696
	evlist__delete(top.evlist);
1697
	perf_session__delete(top.session);
1698 1699

	return status;
1700
}