builtin-top.c 32.4 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9 10 11 12 13 14 15 16 17 18
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
19
 */
20
#include "builtin.h"
21

22
#include "perf.h"
23

24
#include "util/annotate.h"
25
#include "util/cache.h"
26
#include "util/color.h"
27
#include "util/evlist.h"
28
#include "util/evsel.h"
29 30
#include "util/session.h"
#include "util/symbol.h"
31
#include "util/thread.h"
32
#include "util/thread_map.h"
33
#include "util/top.h"
34
#include "util/util.h"
35
#include <linux/rbtree.h>
36 37
#include "util/parse-options.h"
#include "util/parse-events.h"
38
#include "util/cpumap.h"
39
#include "util/xyarray.h"
40
#include "util/sort.h"
41

42 43
#include "util/debug.h"

44 45
#include <assert.h>
#include <fcntl.h>
46

47
#include <stdio.h>
48 49
#include <termios.h>
#include <unistd.h>
50
#include <inttypes.h>
51

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

67
void get_term_dimensions(struct winsize *ws)
68
{
69 70 71 72 73 74 75 76 77 78
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
79
	}
80 81 82 83
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
84
#endif
85 86
	ws->ws_row = 25;
	ws->ws_col = 80;
87 88
}

89
static void perf_top__update_print_entries(struct perf_top *top)
90
{
91
	top->print_entries = top->winsize.ws_row;
92

93 94
	if (top->print_entries > 9)
		top->print_entries -= 9;
95 96
}

97
static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg)
98
{
99 100 101 102
	struct perf_top *top = arg;

	get_term_dimensions(&top->winsize);
	perf_top__update_print_entries(top);
103 104
}

105
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
106 107
{
	struct symbol *sym;
108
	struct annotation *notes;
109
	struct map *map;
110
	int err = -1;
111

112
	if (!he || !he->ms.sym)
113 114
		return -1;

115 116
	sym = he->ms.sym;
	map = he->ms.map;
117 118 119 120

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
121
	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
122 123 124
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
125
		return -1;
126 127
	}

128 129 130
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
131 132 133
		goto out_assign;
	}

134
	pthread_mutex_lock(&notes->lock);
135

136
	if (symbol__alloc_hist(sym) < 0) {
137
		pthread_mutex_unlock(&notes->lock);
138 139
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
140
		sleep(1);
141
		return err;
142
	}
143

144
	err = symbol__annotate(sym, map, 0);
145
	if (err == 0) {
146
out_assign:
147
		top->sym_filter_entry = he;
148
	}
149

150
	pthread_mutex_unlock(&notes->lock);
151
	return err;
152 153
}

154
static void __zero_source_counters(struct hist_entry *he)
155
{
156
	struct symbol *sym = he->ms.sym;
157
	symbol__annotate_zero_histograms(sym);
158 159
}

160 161 162
static void perf_top__record_precise_ip(struct perf_top *top,
					struct hist_entry *he,
					int counter, u64 ip)
163
{
164 165 166
	struct annotation *notes;
	struct symbol *sym;

167
	if (he == NULL || he->ms.sym == NULL ||
168 169
	    ((top->sym_filter_entry == NULL ||
	      top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
170 171
		return;

172
	sym = he->ms.sym;
173 174 175
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
176 177
		return;

178
	if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
179 180 181 182 183 184 185 186 187
		pthread_mutex_unlock(&notes->lock);
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
		sleep(1);
		return;
	}

	ip = he->ms.map->map_ip(he->ms.map, ip);
	symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
188

189
	pthread_mutex_unlock(&notes->lock);
190 191
}

192
static void perf_top__show_details(struct perf_top *top)
193
{
194
	struct hist_entry *he = top->sym_filter_entry;
195
	struct annotation *notes;
196
	struct symbol *symbol;
197
	int more;
198

199
	if (!he)
200 201
		return;

202
	symbol = he->ms.sym;
203 204 205 206 207 208
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
209

210 211
	printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
212

213 214 215 216
	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
				       0, top->sym_pcnt_filter, top->print_entries, 4);
	if (top->zero)
		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
217
	else
218
		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
219
	if (more != 0)
220
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
221 222
out_unlock:
	pthread_mutex_unlock(&notes->lock);
223
}
224 225 226

static const char		CONSOLE_CLEAR[] = "";

227 228 229
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
						     struct addr_location *al,
						     struct perf_sample *sample)
230
{
231 232 233 234 235 236 237 238
	struct hist_entry *he;

	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
	if (he == NULL)
		return NULL;

	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
	return he;
239
}
240

241
static void perf_top__print_sym_table(struct perf_top *top)
242
{
243 244
	char bf[160];
	int printed = 0;
245
	const int win_width = top->winsize.ws_col - 1;
246

247
	puts(CONSOLE_CLEAR);
248

249
	perf_top__header_snprintf(top, bf, sizeof(bf));
250
	printf("%s\n", bf);
251

252
	perf_top__reset_sample_counters(top);
253

254
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
255

256 257 258 259
	if (top->sym_evsel->hists.stats.nr_lost_warned !=
	    top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
		top->sym_evsel->hists.stats.nr_lost_warned =
			top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
260 261
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
262
			      top->sym_evsel->hists.stats.nr_lost_warned);
263
		++printed;
264 265
	}

266 267
	if (top->sym_filter_entry) {
		perf_top__show_details(top);
268 269 270
		return;
	}

271 272 273 274 275 276 277
	hists__collapse_resort_threaded(&top->sym_evsel->hists);
	hists__output_resort_threaded(&top->sym_evsel->hists);
	hists__decay_entries_threaded(&top->sym_evsel->hists,
				      top->hide_user_symbols,
				      top->hide_kernel_symbols);
	hists__output_recalc_col_len(&top->sym_evsel->hists,
				     top->winsize.ws_row - 3);
278
	putchar('\n');
279 280
	hists__fprintf(&top->sym_evsel->hists, NULL, false, false,
		       top->winsize.ws_row - 4 - printed, win_width, stdout);
281 282
}

283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

318
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
319 320
{
	char *buf = malloc(0), *p;
321
	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
322
	struct rb_node *next;
323 324 325 326 327
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
328
		top->sym_filter_entry = NULL;
329 330 331 332 333 334 335 336 337 338
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

339
	next = rb_first(&top->sym_evsel->hists.entries);
340 341 342 343
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
344 345
			break;
		}
346
		next = rb_next(&n->rb_node);
347 348 349
	}

	if (!found) {
350
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
351 352
		sleep(1);
	} else
353
		perf_top__parse_source(top, found);
354 355 356 357 358

out_free:
	free(buf);
}

359
static void perf_top__print_mapped_keys(struct perf_top *top)
360
{
361 362
	char *name = NULL;

363 364
	if (top->sym_filter_entry) {
		struct symbol *sym = top->sym_filter_entry->ms.sym;
365 366 367 368
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
369 370
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
371

372 373
	if (top->evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top->sym_evsel));
374

375
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
376

377
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
378 379
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
380

381
	fprintf(stdout,
382
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
383
		top->hide_kernel_symbols ? "yes" : "no");
384 385
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
386 387
		top->hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
388 389 390
	fprintf(stdout, "\t[qQ]    quit.\n");
}

391
static int perf_top__key_mapped(struct perf_top *top, int c)
392 393 394 395 396 397 398 399
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
400 401
		case 'K':
		case 'U':
402 403 404
		case 'F':
		case 's':
		case 'S':
405 406
			return 1;
		case 'E':
407
			return top->evlist->nr_entries > 1 ? 1 : 0;
408 409
		default:
			break;
410 411 412
	}

	return 0;
413 414
}

415
static void perf_top__handle_keypress(struct perf_top *top, int c)
416
{
417
	if (!perf_top__key_mapped(top, c)) {
418 419 420
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

421
		perf_top__print_mapped_keys(top);
422 423 424 425 426 427 428 429 430 431 432 433 434 435
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
436
		if (!perf_top__key_mapped(top, c))
437 438 439
			return;
	}

440 441
	switch (c) {
		case 'd':
442 443 444
			prompt_integer(&top->delay_secs, "Enter display delay");
			if (top->delay_secs < 1)
				top->delay_secs = 1;
445 446
			break;
		case 'e':
447 448 449 450 451 452 453 454
			prompt_integer(&top->print_entries, "Enter display entries (lines)");
			if (top->print_entries == 0) {
				struct sigaction act = {
					.sa_sigaction = perf_top__sig_winch,
					.sa_flags     = SA_SIGINFO,
				};
				perf_top__sig_winch(SIGWINCH, NULL, top);
				sigaction(SIGWINCH, &act, NULL);
455 456
			} else
				signal(SIGWINCH, SIG_DFL);
457 458
			break;
		case 'E':
459
			if (top->evlist->nr_entries > 1) {
460 461 462
				/* Select 0 as the default event: */
				int counter = 0;

463
				fprintf(stderr, "\nAvailable events:");
464

465 466
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
467

468
				prompt_integer(&counter, "Enter details event counter");
469

470 471 472
				if (counter >= top->evlist->nr_entries) {
					top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
473
					sleep(1);
474
					break;
475
				}
476 477
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					if (top->sym_evsel->idx == counter)
478
						break;
479
			} else
480
				top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
481 482
			break;
		case 'f':
483
			prompt_integer(&top->count_filter, "Enter display event count filter");
484 485
			break;
		case 'F':
486 487
			prompt_percent(&top->sym_pcnt_filter,
				       "Enter details display event filter (percent)");
488
			break;
489
		case 'K':
490
			top->hide_kernel_symbols = !top->hide_kernel_symbols;
491
			break;
492 493 494
		case 'q':
		case 'Q':
			printf("exiting.\n");
495 496
			if (top->dump_symtab)
				perf_session__fprintf_dsos(top->session, stderr);
497 498
			exit(0);
		case 's':
499
			perf_top__prompt_symbol(top, "Enter details symbol");
500 501
			break;
		case 'S':
502
			if (!top->sym_filter_entry)
503 504
				break;
			else {
505
				struct hist_entry *syme = top->sym_filter_entry;
506

507
				top->sym_filter_entry = NULL;
508 509 510
				__zero_source_counters(syme);
			}
			break;
511
		case 'U':
512
			top->hide_user_symbols = !top->hide_user_symbols;
513
			break;
514
		case 'z':
515
			top->zero = !top->zero;
516
			break;
517 518
		default:
			break;
519 520 521
	}
}

522 523 524 525 526 527 528 529 530 531
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
	perf_top__reset_sample_counters(t);

	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

	hists__collapse_resort_threaded(&t->sym_evsel->hists);
	hists__output_resort_threaded(&t->sym_evsel->hists);
532
	hists__decay_entries_threaded(&t->sym_evsel->hists,
533 534
				      t->hide_user_symbols,
				      t->hide_kernel_symbols);
535 536
}

537
static void *display_thread_tui(void *arg)
538
{
539
	struct perf_evsel *pos;
540
	struct perf_top *top = arg;
541 542
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";

543
	perf_top__sort_new_samples(top);
544 545 546 547 548 549 550 551 552

	/*
	 * Initialize the uid_filter_str, in the future the TUI will allow
	 * Zooming in/out UIDs. For now juse use whatever the user passed
	 * via --uid.
	 */
	list_for_each_entry(pos, &top->evlist->entries, node)
		pos->hists.uid_filter_str = top->uid_str;

553
	perf_evlist__tui_browse_hists(top->evlist, help,
554
				      perf_top__sort_new_samples,
555
				      top, top->delay_secs);
556

557 558 559 560 561
	exit_browser(0);
	exit(0);
	return NULL;
}

562
static void *display_thread(void *arg)
563
{
564
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
565
	struct termios tc, save;
566
	struct perf_top *top = arg;
567 568 569 570 571 572 573
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
574

575
	pthread__unblock_sigwinch();
576
repeat:
577
	delay_msecs = top->delay_secs * 1000;
578 579 580
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
581

582
	while (1) {
583
		perf_top__print_sym_table(top);
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
			/* Fall trhu */
		default:
			goto process_hotkey;
		}
	}
process_hotkey:
600 601 602
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

603
	perf_top__handle_keypress(top, c);
604
	goto repeat;
605 606 607 608

	return NULL;
}

609
/* Tag samples to be skipped. */
610
static const char *skip_symbols[] = {
611
	"default_idle",
612
	"native_safe_halt",
613 614 615 616
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
617
	"mwait_idle_with_hints",
618
	"poll_idle",
619 620
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
621 622 623
	NULL
};

624
static int symbol_filter(struct map *map __used, struct symbol *sym)
625
{
626
	const char *name = sym->name;
627
	int i;
628

629 630 631 632 633 634 635
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

636 637 638 639 640 641 642
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
643 644
		return 1;

645 646
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
647
			sym->ignore = true;
648 649 650
			break;
		}
	}
651 652 653 654

	return 0;
}

655 656
static void perf_event__process_sample(struct perf_tool *tool,
				       const union perf_event *event,
657
				       struct perf_evsel *evsel,
658
				       struct perf_sample *sample,
659
				       struct machine *machine)
660
{
661
	struct perf_top *top = container_of(tool, struct perf_top, tool);
662
	struct symbol *parent = NULL;
663
	u64 ip = event->ip.ip;
664
	struct addr_location al;
665
	int err;
666

667
	if (!machine && perf_guest) {
668
		pr_err("Can't find guest [%d]'s kernel information\n",
669
			event->ip.pid);
670 671 672
		return;
	}

673
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
674
		top->exact_samples++;
675

676
	if (perf_event__preprocess_sample(event, machine, &al, sample,
677
					  symbol_filter) < 0 ||
678
	    al.filtered)
679
		return;
680

681
	if (!top->kptr_restrict_warned &&
682 683 684 685 686 687 688 689 690 691
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
692
		top->kptr_restrict_warned = true;
693 694
	}

695
	if (al.sym == NULL) {
696
		const char *msg = "Kernel samples will not be resolved.\n";
697 698 699 700 701 702 703 704 705 706 707
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
708
		if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
709
		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
710
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
711 712 713 714 715 716 717 718 719 720
			if (symbol_conf.vmlinux_name) {
				ui__warning("The %s file can't be used.\n%s",
					    symbol_conf.vmlinux_name, msg);
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
721
			top->vmlinux_warned = true;
722
		}
723 724
	}

725 726
	if (al.sym == NULL || !al.sym->ignore) {
		struct hist_entry *he;
727

728 729
		if ((sort__has_parent || symbol_conf.use_callchain) &&
		    sample->callchain) {
730 731
			err = machine__resolve_callchain(machine, evsel, al.thread,
							 sample->callchain, &parent);
732 733 734 735
			if (err)
				return;
		}

736
		he = perf_evsel__add_hist_entry(evsel, &al, sample);
737 738 739
		if (he == NULL) {
			pr_err("Problem incrementing symbol period, skipping event\n");
			return;
740
		}
741

742
		if (symbol_conf.use_callchain) {
743
			err = callchain_append(he->callchain, &evsel->hists.callchain_cursor,
744 745 746 747 748
					       sample->period);
			if (err)
				return;
		}

749 750
		if (top->sort_has_symbols)
			perf_top__record_precise_ip(top, he, evsel->idx, ip);
751
	}
752 753

	return;
754 755
}

756
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
757
{
758
	struct perf_sample sample;
759
	struct perf_evsel *evsel;
760
	struct perf_session *session = top->session;
761
	union perf_event *event;
762 763
	struct machine *machine;
	u8 origin;
764
	int ret;
765

766 767
	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
		ret = perf_session__parse_sample(session, event, &sample);
768 769 770 771
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
772

773
		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
774 775
		assert(evsel != NULL);

776 777
		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

778
		if (event->header.type == PERF_RECORD_SAMPLE)
779
			++top->samples;
780 781 782

		switch (origin) {
		case PERF_RECORD_MISC_USER:
783 784
			++top->us_samples;
			if (top->hide_user_symbols)
785
				continue;
786
			machine = perf_session__find_host_machine(session);
787 788
			break;
		case PERF_RECORD_MISC_KERNEL:
789 790
			++top->kernel_samples;
			if (top->hide_kernel_symbols)
791
				continue;
792
			machine = perf_session__find_host_machine(session);
793 794
			break;
		case PERF_RECORD_MISC_GUEST_KERNEL:
795 796
			++top->guest_kernel_samples;
			machine = perf_session__find_machine(session, event->ip.pid);
797 798
			break;
		case PERF_RECORD_MISC_GUEST_USER:
799
			++top->guest_us_samples;
800 801 802 803 804 805 806 807 808 809
			/*
			 * TODO: we don't process guest user from host side
			 * except simple counting.
			 */
			/* Fall thru */
		default:
			continue;
		}


810 811 812 813
		if (event->header.type == PERF_RECORD_SAMPLE) {
			perf_event__process_sample(&top->tool, event, evsel,
						   &sample, machine);
		} else if (event->header.type < PERF_RECORD_MAX) {
814
			hists__inc_nr_events(&evsel->hists, event->header.type);
815
			perf_event__process(&top->tool, event, &sample, machine);
816
		} else
817
			++session->hists.stats.nr_unknown_events;
818 819 820
	}
}

821
static void perf_top__mmap_read(struct perf_top *top)
822
{
823 824
	int i;

825 826
	for (i = 0; i < top->evlist->nr_mmaps; i++)
		perf_top__mmap_read_idx(top, i);
827 828
}

829
static void perf_top__start_counters(struct perf_top *top)
830
{
831
	struct perf_evsel *counter, *first;
832
	struct perf_evlist *evlist = top->evlist;
833 834

	first = list_entry(evlist->entries.next, struct perf_evsel, node);
835

836 837
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
838 839
		struct xyarray *group_fd = NULL;

840
		if (top->group && counter != first)
841
			group_fd = first->fd;
842

843 844
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

845
		if (top->freq) {
846 847
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
848
			attr->sample_freq = top->freq;
849
		}
850

851 852 853 854 855
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

856 857 858
		if (symbol_conf.use_callchain)
			attr->sample_type |= PERF_SAMPLE_CALLCHAIN;

859
		attr->mmap = 1;
860
		attr->comm = 1;
861
		attr->inherit = top->inherit;
862
retry_sample_id:
863
		attr->sample_id_all = top->sample_id_all_avail ? 1 : 0;
864
try_again:
865 866
		if (perf_evsel__open(counter, top->evlist->cpus,
				     top->evlist->threads, top->group,
867
				     group_fd) < 0) {
868 869
			int err = errno;

870
			if (err == EPERM || err == EACCES) {
871
				ui__error_paranoid();
872
				goto out_err;
873
			} else if (err == EINVAL && top->sample_id_all_avail) {
874 875 876
				/*
				 * Old kernel, no attr->sample_id_type_all field
				 */
877
				top->sample_id_all_avail = false;
878
				goto retry_sample_id;
879
			}
880 881 882 883 884
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
885 886
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
887
				if (verbose)
888 889
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
890 891 892 893 894

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
895

896 897 898 899
			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(counter));
				goto out_err;
900 901 902 903
			} else if (err == EMFILE) {
				ui__warning("Too many events are opened.\n"
					    "Try again after reducing the number of events\n");
				goto out_err;
904 905
			}

906 907 908 909 910 911
			ui__warning("The sys_perf_event_open() syscall "
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
912
		}
913
	}
914

915
	if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
916 917 918 919 920 921 922 923 924 925
		ui__warning("Failed to mmap with %d (%s)\n",
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
926 927
}

928
static int perf_top__setup_sample_type(struct perf_top *top)
929
{
930
	if (!top->sort_has_symbols) {
931 932 933 934
		if (symbol_conf.use_callchain) {
			ui__warning("Selected -g but \"sym\" not present in --sort/-s.");
			return -EINVAL;
		}
935
	} else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
936 937 938 939 940 941 942 943 944
		if (callchain_register_param(&callchain_param) < 0) {
			ui__warning("Can't register callchain params.\n");
			return -EINVAL;
		}
	}

	return 0;
}

945
static int __cmd_top(struct perf_top *top)
946 947
{
	pthread_t thread;
948
	int ret;
949
	/*
950 951
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
952
	 */
953 954
	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
	if (top->session == NULL)
955
		return -ENOMEM;
956

957
	ret = perf_top__setup_sample_type(top);
958 959 960
	if (ret)
		goto out_delete;

961
	if (top->target_tid != -1 || top->uid != UINT_MAX)
962
		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
963
						  perf_event__process,
964
						  &top->session->host_machine);
965
	else
966 967 968 969 970
		perf_event__synthesize_threads(&top->tool, perf_event__process,
					       &top->session->host_machine);
	perf_top__start_counters(top);
	top->session->evlist = top->evlist;
	perf_session__update_sample_type(top->session);
971

972
	/* Wait for a minimal set of events before starting the snapshot */
973
	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
974

975
	perf_top__mmap_read(top);
976

977
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
978
							    display_thread), top)) {
979 980 981 982
		printf("Could not create display thread.\n");
		exit(-1);
	}

983
	if (top->realtime_prio) {
984 985
		struct sched_param param;

986
		param.sched_priority = top->realtime_prio;
987 988 989 990 991 992 993
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
994
		u64 hits = top->samples;
995

996
		perf_top__mmap_read(top);
997

998 999
		if (hits == top->samples)
			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1000 1001
	}

1002
out_delete:
1003 1004
	perf_session__delete(top->session);
	top->session = NULL;
1005 1006 1007 1008 1009

	return 0;
}

static int
1010
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1011
{
1012
	struct perf_top *top = (struct perf_top *)opt->value;
1013 1014 1015 1016 1017 1018 1019
	char *tok, *tok2;
	char *endptr;

	/*
	 * --no-call-graph
	 */
	if (unset) {
1020
		top->dont_use_callchains = true;
1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
		return 0;
	}

	symbol_conf.use_callchain = true;

	if (!arg)
		return 0;

	tok = strtok((char *)arg, ",");
	if (!tok)
		return -1;

	/* get the output mode */
	if (!strncmp(tok, "graph", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_ABS;

	else if (!strncmp(tok, "flat", strlen(arg)))
		callchain_param.mode = CHAIN_FLAT;

	else if (!strncmp(tok, "fractal", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_REL;

	else if (!strncmp(tok, "none", strlen(arg))) {
		callchain_param.mode = CHAIN_NONE;
		symbol_conf.use_callchain = false;

		return 0;
1048
	} else
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
		return -1;

	/* get the min percentage */
	tok = strtok(NULL, ",");
	if (!tok)
		goto setup;

	callchain_param.min_percent = strtod(tok, &endptr);
	if (tok == endptr)
		return -1;

	/* get the print limit */
	tok2 = strtok(NULL, ",");
	if (!tok2)
		goto setup;

	if (tok2[0] != 'c') {
		callchain_param.print_limit = strtod(tok2, &endptr);
		tok2 = strtok(NULL, ",");
		if (!tok2)
			goto setup;
	}

	/* get the call chain order */
	if (!strcmp(tok2, "caller"))
		callchain_param.order = ORDER_CALLER;
	else if (!strcmp(tok2, "callee"))
		callchain_param.order = ORDER_CALLEE;
	else
		return -1;
setup:
	if (callchain_register_param(&callchain_param) < 0) {
		fprintf(stderr, "Can't register callchain params\n");
		return -1;
	}
1084 1085
	return 0;
}
1086 1087 1088 1089 1090 1091

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

1092 1093 1094 1095 1096 1097 1098 1099 1100
int cmd_top(int argc, const char **argv, const char *prefix __used)
{
	struct perf_evsel *pos;
	int status = -ENOMEM;
	struct perf_top top = {
		.count_filter	     = 5,
		.delay_secs	     = 2,
		.target_pid	     = -1,
		.target_tid	     = -1,
1101
		.uid		     = UINT_MAX,
1102 1103 1104 1105 1106 1107 1108
		.freq		     = 1000, /* 1 KHz */
		.sample_id_all_avail = true,
		.mmap_pages	     = 128,
		.sym_pcnt_filter     = 5,
	};
	char callchain_default_opt[] = "fractal,0.5,callee";
	const struct option options[] = {
1109
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1110
		     "event selector. use 'perf list' to list available events",
1111
		     parse_events_option),
1112
	OPT_INTEGER('c', "count", &top.default_interval,
1113
		    "event period to sample"),
1114
	OPT_INTEGER('p', "pid", &top.target_pid,
1115
		    "profile events on existing process id"),
1116
	OPT_INTEGER('t', "tid", &top.target_tid,
1117
		    "profile events on existing thread id"),
1118
	OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
1119
			    "system-wide collection from all CPUs"),
1120
	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
1121
		    "list of cpus to monitor"),
1122 1123
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1124
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1125
		    "hide kernel symbols"),
1126 1127
	OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1128
		    "collect data with this RT SCHED_FIFO priority"),
1129
	OPT_INTEGER('d', "delay", &top.delay_secs,
1130
		    "number of seconds to delay between refreshes"),
1131
	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1132
			    "dump the symbol table used for profiling"),
1133
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1134
		    "only display functions with more events than this"),
1135
	OPT_BOOLEAN('g', "group", &top.group,
1136
			    "put the counters into a counter group"),
1137
	OPT_BOOLEAN('i', "inherit", &top.inherit,
1138
		    "child tasks inherit counters"),
1139
	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1140
		    "symbol to annotate"),
1141
	OPT_BOOLEAN('z', "zero", &top.zero,
1142
		    "zero history across updates"),
1143
	OPT_INTEGER('F', "freq", &top.freq,
1144
		    "profile at this frequency"),
1145
	OPT_INTEGER('E', "entries", &top.print_entries,
1146
		    "display this many functions"),
1147
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1148
		    "hide user symbols"),
1149 1150
	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1151
	OPT_INCR('v', "verbose", &verbose,
1152
		    "be more verbose (show counter open errors, etc)"),
1153 1154 1155 1156
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
		   "sort by key(s): pid, comm, dso, symbol, parent"),
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1157
	OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
1158 1159 1160
		     "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
		     "Default: fractal,0.5,callee", &parse_callchain_opt,
		     callchain_default_opt),
1161 1162 1163 1164 1165 1166 1167 1168
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1169 1170 1171 1172 1173 1174
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
		    "Interleave source code with assembly code (default)"),
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
		    "Display raw encoding of assembly instructions (default)"),
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1175
	OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"),
1176
	OPT_END()
1177
	};
1178

1179 1180
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1181 1182
		return -ENOMEM;

1183
	symbol_conf.exclude_other = false;
1184 1185 1186 1187 1188

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1189 1190 1191 1192 1193
	if (sort_order == default_sort_order)
		sort_order = "dso,symbol";

	setup_sorting(top_usage, options);

1194
	if (top.use_stdio)
1195
		use_browser = 0;
1196
	else if (top.use_tui)
1197 1198 1199 1200
		use_browser = 1;

	setup_browser(false);

1201 1202 1203 1204
	top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid);
	if (top.uid_str != NULL && top.uid == UINT_MAX - 1)
		goto out_delete_evlist;

1205
	/* CPU and PID are mutually exclusive */
1206
	if (top.target_tid > 0 && top.cpu_list) {
1207 1208
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
1209
		top.cpu_list = NULL;
1210 1211
	}

1212 1213
	if (top.target_pid != -1)
		top.target_tid = top.target_pid;
1214

1215
	if (perf_evlist__create_maps(top.evlist, top.target_pid,
1216
				     top.target_tid, top.uid, top.cpu_list) < 0)
1217 1218
		usage_with_options(top_usage, options);

1219 1220
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1221 1222 1223
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1224

1225 1226
	symbol_conf.nr_events = top.evlist->nr_entries;

1227 1228
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1229

1230 1231 1232
	/*
	 * User specified count overrides default frequency.
	 */
1233
	if (top.default_interval)
1234 1235
		top.freq = 0;
	else if (top.freq) {
1236
		top.default_interval = top.freq;
1237 1238 1239 1240 1241
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1242
	list_for_each_entry(pos, &top.evlist->entries, node) {
1243 1244 1245
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
1246 1247
		if (!pos->attr.sample_period)
			pos->attr.sample_period = top.default_interval;
1248 1249
	}

1250
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1251

1252
	symbol_conf.priv_size = sizeof(struct annotation);
1253 1254 1255 1256 1257

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1258 1259 1260 1261
	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);

1262 1263 1264 1265
	/*
	 * Avoid annotation data structures overhead when symbols aren't on the
	 * sort list.
	 */
1266
	top.sort_has_symbols = sort_sym.list.next != NULL;
1267

1268
	get_term_dimensions(&top.winsize);
1269
	if (top.print_entries == 0) {
1270 1271 1272 1273 1274 1275
		struct sigaction act = {
			.sa_sigaction = perf_top__sig_winch,
			.sa_flags     = SA_SIGINFO,
		};
		perf_top__update_print_entries(&top);
		sigaction(SIGWINCH, &act, NULL);
1276 1277
	}

1278
	status = __cmd_top(&top);
1279

1280
out_delete_evlist:
1281
	perf_evlist__delete(top.evlist);
1282 1283

	return status;
1284
}