builtin-top.c 31.9 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9 10 11 12 13 14 15 16 17 18
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
19
 */
20
#include "builtin.h"
21

22
#include "perf.h"
23

24
#include "util/annotate.h"
25
#include "util/cache.h"
26
#include "util/color.h"
27
#include "util/evlist.h"
28
#include "util/evsel.h"
29 30
#include "util/session.h"
#include "util/symbol.h"
31
#include "util/thread.h"
32
#include "util/thread_map.h"
33
#include "util/top.h"
34
#include "util/util.h"
35
#include <linux/rbtree.h>
36 37
#include "util/parse-options.h"
#include "util/parse-events.h"
38
#include "util/cpumap.h"
39
#include "util/xyarray.h"
40
#include "util/sort.h"
41

42 43
#include "util/debug.h"

44 45
#include <assert.h>
#include <fcntl.h>
46

47
#include <stdio.h>
48 49
#include <termios.h>
#include <unistd.h>
50
#include <inttypes.h>
51

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

67

68
void get_term_dimensions(struct winsize *ws)
69
{
70 71 72 73 74 75 76 77 78 79
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
80
	}
81 82 83 84
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
85
#endif
86 87
	ws->ws_row = 25;
	ws->ws_col = 80;
88 89
}

90
static void perf_top__update_print_entries(struct perf_top *top)
91
{
92
	top->print_entries = top->winsize.ws_row;
93

94 95
	if (top->print_entries > 9)
		top->print_entries -= 9;
96 97
}

98
static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg)
99
{
100 101 102 103
	struct perf_top *top = arg;

	get_term_dimensions(&top->winsize);
	perf_top__update_print_entries(top);
104 105
}

106
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
107 108
{
	struct symbol *sym;
109
	struct annotation *notes;
110
	struct map *map;
111
	int err = -1;
112

113
	if (!he || !he->ms.sym)
114 115
		return -1;

116 117
	sym = he->ms.sym;
	map = he->ms.map;
118 119 120 121

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
122
	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
123 124 125
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
126
		return -1;
127 128
	}

129 130 131
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
132 133 134
		goto out_assign;
	}

135
	pthread_mutex_lock(&notes->lock);
136

137
	if (symbol__alloc_hist(sym) < 0) {
138
		pthread_mutex_unlock(&notes->lock);
139 140
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
141
		sleep(1);
142
		return err;
143
	}
144

145
	err = symbol__annotate(sym, map, 0);
146
	if (err == 0) {
147
out_assign:
148
		top->sym_filter_entry = he;
149
	}
150

151
	pthread_mutex_unlock(&notes->lock);
152
	return err;
153 154
}

155
static void __zero_source_counters(struct hist_entry *he)
156
{
157
	struct symbol *sym = he->ms.sym;
158
	symbol__annotate_zero_histograms(sym);
159 160
}

161 162 163
static void perf_top__record_precise_ip(struct perf_top *top,
					struct hist_entry *he,
					int counter, u64 ip)
164
{
165 166 167
	struct annotation *notes;
	struct symbol *sym;

168
	if (he == NULL || he->ms.sym == NULL ||
169 170
	    ((top->sym_filter_entry == NULL ||
	      top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
171 172
		return;

173
	sym = he->ms.sym;
174 175 176
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
177 178
		return;

179
	if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
180 181 182 183 184 185 186 187 188
		pthread_mutex_unlock(&notes->lock);
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
		sleep(1);
		return;
	}

	ip = he->ms.map->map_ip(he->ms.map, ip);
	symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
189

190
	pthread_mutex_unlock(&notes->lock);
191 192
}

193
static void perf_top__show_details(struct perf_top *top)
194
{
195
	struct hist_entry *he = top->sym_filter_entry;
196
	struct annotation *notes;
197
	struct symbol *symbol;
198
	int more;
199

200
	if (!he)
201 202
		return;

203
	symbol = he->ms.sym;
204 205 206 207 208 209
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
210

211 212
	printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
213

214 215 216 217
	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
				       0, top->sym_pcnt_filter, top->print_entries, 4);
	if (top->zero)
		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
218
	else
219
		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
220
	if (more != 0)
221
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
222 223
out_unlock:
	pthread_mutex_unlock(&notes->lock);
224
}
225 226 227

static const char		CONSOLE_CLEAR[] = "";

228 229 230
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
						     struct addr_location *al,
						     struct perf_sample *sample)
231
{
232 233 234 235 236 237 238 239
	struct hist_entry *he;

	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
	if (he == NULL)
		return NULL;

	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
	return he;
240
}
241

242
static void perf_top__print_sym_table(struct perf_top *top)
243
{
244 245
	char bf[160];
	int printed = 0;
246
	const int win_width = top->winsize.ws_col - 1;
247

248
	puts(CONSOLE_CLEAR);
249

250
	perf_top__header_snprintf(top, bf, sizeof(bf));
251
	printf("%s\n", bf);
252

253
	perf_top__reset_sample_counters(top);
254

255
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
256

257 258 259 260
	if (top->sym_evsel->hists.stats.nr_lost_warned !=
	    top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
		top->sym_evsel->hists.stats.nr_lost_warned =
			top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
261 262
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
263
			      top->sym_evsel->hists.stats.nr_lost_warned);
264
		++printed;
265 266
	}

267 268
	if (top->sym_filter_entry) {
		perf_top__show_details(top);
269 270 271
		return;
	}

272 273 274 275 276 277 278
	hists__collapse_resort_threaded(&top->sym_evsel->hists);
	hists__output_resort_threaded(&top->sym_evsel->hists);
	hists__decay_entries_threaded(&top->sym_evsel->hists,
				      top->hide_user_symbols,
				      top->hide_kernel_symbols);
	hists__output_recalc_col_len(&top->sym_evsel->hists,
				     top->winsize.ws_row - 3);
279
	putchar('\n');
280 281
	hists__fprintf(&top->sym_evsel->hists, NULL, false, false,
		       top->winsize.ws_row - 4 - printed, win_width, stdout);
282 283
}

284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

319
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
320 321
{
	char *buf = malloc(0), *p;
322
	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
323
	struct rb_node *next;
324 325 326 327 328
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
329
		top->sym_filter_entry = NULL;
330 331 332 333 334 335 336 337 338 339
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

340
	next = rb_first(&top->sym_evsel->hists.entries);
341 342 343 344
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
345 346
			break;
		}
347
		next = rb_next(&n->rb_node);
348 349 350
	}

	if (!found) {
351
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
352 353
		sleep(1);
	} else
354
		perf_top__parse_source(top, found);
355 356 357 358 359

out_free:
	free(buf);
}

360
static void perf_top__print_mapped_keys(struct perf_top *top)
361
{
362 363
	char *name = NULL;

364 365
	if (top->sym_filter_entry) {
		struct symbol *sym = top->sym_filter_entry->ms.sym;
366 367 368 369
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
370 371
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
372

373 374
	if (top->evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top->sym_evsel));
375

376
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
377

378
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
379 380
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
381

382
	fprintf(stdout,
383
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
384
		top->hide_kernel_symbols ? "yes" : "no");
385 386
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
387 388
		top->hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
389 390 391
	fprintf(stdout, "\t[qQ]    quit.\n");
}

392
static int perf_top__key_mapped(struct perf_top *top, int c)
393 394 395 396 397 398 399 400
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
401 402
		case 'K':
		case 'U':
403 404 405
		case 'F':
		case 's':
		case 'S':
406 407
			return 1;
		case 'E':
408
			return top->evlist->nr_entries > 1 ? 1 : 0;
409 410
		default:
			break;
411 412 413
	}

	return 0;
414 415
}

416
static void perf_top__handle_keypress(struct perf_top *top, int c)
417
{
418
	if (!perf_top__key_mapped(top, c)) {
419 420 421
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

422
		perf_top__print_mapped_keys(top);
423 424 425 426 427 428 429 430 431 432 433 434 435 436
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
437
		if (!perf_top__key_mapped(top, c))
438 439 440
			return;
	}

441 442
	switch (c) {
		case 'd':
443 444 445
			prompt_integer(&top->delay_secs, "Enter display delay");
			if (top->delay_secs < 1)
				top->delay_secs = 1;
446 447
			break;
		case 'e':
448 449 450 451 452 453 454 455
			prompt_integer(&top->print_entries, "Enter display entries (lines)");
			if (top->print_entries == 0) {
				struct sigaction act = {
					.sa_sigaction = perf_top__sig_winch,
					.sa_flags     = SA_SIGINFO,
				};
				perf_top__sig_winch(SIGWINCH, NULL, top);
				sigaction(SIGWINCH, &act, NULL);
456 457
			} else
				signal(SIGWINCH, SIG_DFL);
458 459
			break;
		case 'E':
460
			if (top->evlist->nr_entries > 1) {
461 462 463
				/* Select 0 as the default event: */
				int counter = 0;

464
				fprintf(stderr, "\nAvailable events:");
465

466 467
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
468

469
				prompt_integer(&counter, "Enter details event counter");
470

471 472 473
				if (counter >= top->evlist->nr_entries) {
					top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
474
					sleep(1);
475
					break;
476
				}
477 478
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					if (top->sym_evsel->idx == counter)
479
						break;
480
			} else
481
				top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
482 483
			break;
		case 'f':
484
			prompt_integer(&top->count_filter, "Enter display event count filter");
485 486
			break;
		case 'F':
487 488
			prompt_percent(&top->sym_pcnt_filter,
				       "Enter details display event filter (percent)");
489
			break;
490
		case 'K':
491
			top->hide_kernel_symbols = !top->hide_kernel_symbols;
492
			break;
493 494 495
		case 'q':
		case 'Q':
			printf("exiting.\n");
496 497
			if (top->dump_symtab)
				perf_session__fprintf_dsos(top->session, stderr);
498 499
			exit(0);
		case 's':
500
			perf_top__prompt_symbol(top, "Enter details symbol");
501 502
			break;
		case 'S':
503
			if (!top->sym_filter_entry)
504 505
				break;
			else {
506
				struct hist_entry *syme = top->sym_filter_entry;
507

508
				top->sym_filter_entry = NULL;
509 510 511
				__zero_source_counters(syme);
			}
			break;
512
		case 'U':
513
			top->hide_user_symbols = !top->hide_user_symbols;
514
			break;
515
		case 'z':
516
			top->zero = !top->zero;
517
			break;
518 519
		default:
			break;
520 521 522
	}
}

523 524 525 526 527 528 529 530 531 532
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
	perf_top__reset_sample_counters(t);

	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

	hists__collapse_resort_threaded(&t->sym_evsel->hists);
	hists__output_resort_threaded(&t->sym_evsel->hists);
533
	hists__decay_entries_threaded(&t->sym_evsel->hists,
534 535
				      t->hide_user_symbols,
				      t->hide_kernel_symbols);
536 537
}

538
static void *display_thread_tui(void *arg)
539
{
540
	struct perf_top *top = arg;
541 542
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";

543 544
	perf_top__sort_new_samples(top);
	perf_evlist__tui_browse_hists(top->evlist, help,
545
				      perf_top__sort_new_samples,
546
				      top, top->delay_secs);
547

548 549 550 551 552
	exit_browser(0);
	exit(0);
	return NULL;
}

553
static void *display_thread(void *arg)
554
{
555
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
556
	struct termios tc, save;
557
	struct perf_top *top = arg;
558 559 560 561 562 563 564
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
565

566
	pthread__unblock_sigwinch();
567
repeat:
568
	delay_msecs = top->delay_secs * 1000;
569 570 571
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
572

573
	while (1) {
574
		perf_top__print_sym_table(top);
575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
			/* Fall trhu */
		default:
			goto process_hotkey;
		}
	}
process_hotkey:
591 592 593
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

594
	perf_top__handle_keypress(top, c);
595
	goto repeat;
596 597 598 599

	return NULL;
}

600
/* Tag samples to be skipped. */
601
static const char *skip_symbols[] = {
602
	"default_idle",
603
	"native_safe_halt",
604 605 606 607
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
608
	"mwait_idle_with_hints",
609
	"poll_idle",
610 611
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
612 613 614
	NULL
};

615
static int symbol_filter(struct map *map __used, struct symbol *sym)
616
{
617
	const char *name = sym->name;
618
	int i;
619

620 621 622 623 624 625 626
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

627 628 629 630 631 632 633
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
634 635
		return 1;

636 637
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
638
			sym->ignore = true;
639 640 641
			break;
		}
	}
642 643 644 645

	return 0;
}

646 647
static void perf_event__process_sample(struct perf_tool *tool,
				       const union perf_event *event,
648
				       struct perf_evsel *evsel,
649
				       struct perf_sample *sample,
650
				       struct machine *machine)
651
{
652
	struct perf_top *top = container_of(tool, struct perf_top, tool);
653
	struct symbol *parent = NULL;
654
	u64 ip = event->ip.ip;
655
	struct addr_location al;
656
	int err;
657

658
	if (!machine && perf_guest) {
659
		pr_err("Can't find guest [%d]'s kernel information\n",
660
			event->ip.pid);
661 662 663
		return;
	}

664
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
665
		top->exact_samples++;
666

667
	if (perf_event__preprocess_sample(event, machine, &al, sample,
668
					  symbol_filter) < 0 ||
669
	    al.filtered)
670
		return;
671

672
	if (!top->kptr_restrict_warned &&
673 674 675 676 677 678 679 680 681 682
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
683
		top->kptr_restrict_warned = true;
684 685
	}

686
	if (al.sym == NULL) {
687
		const char *msg = "Kernel samples will not be resolved.\n";
688 689 690 691 692 693 694 695 696 697 698
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
699
		if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
700
		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
701
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
702 703 704 705 706 707 708 709 710 711
			if (symbol_conf.vmlinux_name) {
				ui__warning("The %s file can't be used.\n%s",
					    symbol_conf.vmlinux_name, msg);
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
712
			top->vmlinux_warned = true;
713
		}
714 715
	}

716 717
	if (al.sym == NULL || !al.sym->ignore) {
		struct hist_entry *he;
718

719 720
		if ((sort__has_parent || symbol_conf.use_callchain) &&
		    sample->callchain) {
721 722
			err = machine__resolve_callchain(machine, evsel, al.thread,
							 sample->callchain, &parent);
723 724 725 726
			if (err)
				return;
		}

727
		he = perf_evsel__add_hist_entry(evsel, &al, sample);
728 729 730
		if (he == NULL) {
			pr_err("Problem incrementing symbol period, skipping event\n");
			return;
731
		}
732

733
		if (symbol_conf.use_callchain) {
734
			err = callchain_append(he->callchain, &evsel->hists.callchain_cursor,
735 736 737 738 739
					       sample->period);
			if (err)
				return;
		}

740 741
		if (top->sort_has_symbols)
			perf_top__record_precise_ip(top, he, evsel->idx, ip);
742
	}
743 744

	return;
745 746
}

747
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
748
{
749
	struct perf_sample sample;
750
	struct perf_evsel *evsel;
751
	struct perf_session *session = top->session;
752
	union perf_event *event;
753 754
	struct machine *machine;
	u8 origin;
755
	int ret;
756

757 758
	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
		ret = perf_session__parse_sample(session, event, &sample);
759 760 761 762
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
763

764
		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
765 766
		assert(evsel != NULL);

767 768
		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

769
		if (event->header.type == PERF_RECORD_SAMPLE)
770
			++top->samples;
771 772 773

		switch (origin) {
		case PERF_RECORD_MISC_USER:
774 775
			++top->us_samples;
			if (top->hide_user_symbols)
776
				continue;
777
			machine = perf_session__find_host_machine(session);
778 779
			break;
		case PERF_RECORD_MISC_KERNEL:
780 781
			++top->kernel_samples;
			if (top->hide_kernel_symbols)
782
				continue;
783
			machine = perf_session__find_host_machine(session);
784 785
			break;
		case PERF_RECORD_MISC_GUEST_KERNEL:
786 787
			++top->guest_kernel_samples;
			machine = perf_session__find_machine(session, event->ip.pid);
788 789
			break;
		case PERF_RECORD_MISC_GUEST_USER:
790
			++top->guest_us_samples;
791 792 793 794 795 796 797 798 799 800
			/*
			 * TODO: we don't process guest user from host side
			 * except simple counting.
			 */
			/* Fall thru */
		default:
			continue;
		}


801 802 803 804
		if (event->header.type == PERF_RECORD_SAMPLE) {
			perf_event__process_sample(&top->tool, event, evsel,
						   &sample, machine);
		} else if (event->header.type < PERF_RECORD_MAX) {
805
			hists__inc_nr_events(&evsel->hists, event->header.type);
806
			perf_event__process(&top->tool, event, &sample, machine);
807
		} else
808
			++session->hists.stats.nr_unknown_events;
809 810 811
	}
}

812
static void perf_top__mmap_read(struct perf_top *top)
813
{
814 815
	int i;

816 817
	for (i = 0; i < top->evlist->nr_mmaps; i++)
		perf_top__mmap_read_idx(top, i);
818 819
}

820
static void perf_top__start_counters(struct perf_top *top)
821
{
822
	struct perf_evsel *counter, *first;
823
	struct perf_evlist *evlist = top->evlist;
824 825

	first = list_entry(evlist->entries.next, struct perf_evsel, node);
826

827 828
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
829 830
		struct xyarray *group_fd = NULL;

831
		if (top->group && counter != first)
832
			group_fd = first->fd;
833

834 835
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

836
		if (top->freq) {
837 838
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
839
			attr->sample_freq = top->freq;
840
		}
841

842 843 844 845 846
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

847 848 849
		if (symbol_conf.use_callchain)
			attr->sample_type |= PERF_SAMPLE_CALLCHAIN;

850
		attr->mmap = 1;
851
		attr->comm = 1;
852
		attr->inherit = top->inherit;
853
retry_sample_id:
854
		attr->sample_id_all = top->sample_id_all_avail ? 1 : 0;
855
try_again:
856 857
		if (perf_evsel__open(counter, top->evlist->cpus,
				     top->evlist->threads, top->group,
858
				     group_fd) < 0) {
859 860
			int err = errno;

861
			if (err == EPERM || err == EACCES) {
862
				ui__error_paranoid();
863
				goto out_err;
864
			} else if (err == EINVAL && top->sample_id_all_avail) {
865 866 867
				/*
				 * Old kernel, no attr->sample_id_type_all field
				 */
868
				top->sample_id_all_avail = false;
869
				goto retry_sample_id;
870
			}
871 872 873 874 875
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
876 877
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
878
				if (verbose)
879 880
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
881 882 883 884 885

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
886

887 888 889 890
			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(counter));
				goto out_err;
891 892 893 894
			} else if (err == EMFILE) {
				ui__warning("Too many events are opened.\n"
					    "Try again after reducing the number of events\n");
				goto out_err;
895 896
			}

897 898 899 900 901 902
			ui__warning("The sys_perf_event_open() syscall "
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
903
		}
904
	}
905

906
	if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
907 908 909 910 911 912 913 914 915 916
		ui__warning("Failed to mmap with %d (%s)\n",
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
917 918
}

919
static int perf_top__setup_sample_type(struct perf_top *top)
920
{
921
	if (!top->sort_has_symbols) {
922 923 924 925
		if (symbol_conf.use_callchain) {
			ui__warning("Selected -g but \"sym\" not present in --sort/-s.");
			return -EINVAL;
		}
926
	} else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
927 928 929 930 931 932 933 934 935
		if (callchain_register_param(&callchain_param) < 0) {
			ui__warning("Can't register callchain params.\n");
			return -EINVAL;
		}
	}

	return 0;
}

936
static int __cmd_top(struct perf_top *top)
937 938
{
	pthread_t thread;
939
	int ret;
940
	/*
941 942
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
943
	 */
944 945
	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
	if (top->session == NULL)
946
		return -ENOMEM;
947

948
	ret = perf_top__setup_sample_type(top);
949 950 951
	if (ret)
		goto out_delete;

952 953
	if (top->target_tid != -1)
		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
954
						  perf_event__process,
955
						  &top->session->host_machine);
956
	else
957 958 959 960 961
		perf_event__synthesize_threads(&top->tool, perf_event__process,
					       &top->session->host_machine);
	perf_top__start_counters(top);
	top->session->evlist = top->evlist;
	perf_session__update_sample_type(top->session);
962

963
	/* Wait for a minimal set of events before starting the snapshot */
964
	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
965

966
	perf_top__mmap_read(top);
967

968
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
969
							    display_thread), top)) {
970 971 972 973
		printf("Could not create display thread.\n");
		exit(-1);
	}

974
	if (top->realtime_prio) {
975 976
		struct sched_param param;

977
		param.sched_priority = top->realtime_prio;
978 979 980 981 982 983 984
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
985
		u64 hits = top->samples;
986

987
		perf_top__mmap_read(top);
988

989 990
		if (hits == top->samples)
			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
991 992
	}

993
out_delete:
994 995
	perf_session__delete(top->session);
	top->session = NULL;
996 997 998 999 1000

	return 0;
}

static int
1001
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1002
{
1003
	struct perf_top *top = (struct perf_top *)opt->value;
1004 1005 1006 1007 1008 1009 1010
	char *tok, *tok2;
	char *endptr;

	/*
	 * --no-call-graph
	 */
	if (unset) {
1011
		top->dont_use_callchains = true;
1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
		return 0;
	}

	symbol_conf.use_callchain = true;

	if (!arg)
		return 0;

	tok = strtok((char *)arg, ",");
	if (!tok)
		return -1;

	/* get the output mode */
	if (!strncmp(tok, "graph", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_ABS;

	else if (!strncmp(tok, "flat", strlen(arg)))
		callchain_param.mode = CHAIN_FLAT;

	else if (!strncmp(tok, "fractal", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_REL;

	else if (!strncmp(tok, "none", strlen(arg))) {
		callchain_param.mode = CHAIN_NONE;
		symbol_conf.use_callchain = false;

		return 0;
1039
	} else
1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
		return -1;

	/* get the min percentage */
	tok = strtok(NULL, ",");
	if (!tok)
		goto setup;

	callchain_param.min_percent = strtod(tok, &endptr);
	if (tok == endptr)
		return -1;

	/* get the print limit */
	tok2 = strtok(NULL, ",");
	if (!tok2)
		goto setup;

	if (tok2[0] != 'c') {
		callchain_param.print_limit = strtod(tok2, &endptr);
		tok2 = strtok(NULL, ",");
		if (!tok2)
			goto setup;
	}

	/* get the call chain order */
	if (!strcmp(tok2, "caller"))
		callchain_param.order = ORDER_CALLER;
	else if (!strcmp(tok2, "callee"))
		callchain_param.order = ORDER_CALLEE;
	else
		return -1;
setup:
	if (callchain_register_param(&callchain_param) < 0) {
		fprintf(stderr, "Can't register callchain params\n");
		return -1;
	}
1075 1076
	return 0;
}
1077 1078 1079 1080 1081 1082

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
int cmd_top(int argc, const char **argv, const char *prefix __used)
{
	struct perf_evsel *pos;
	int status = -ENOMEM;
	struct perf_top top = {
		.count_filter	     = 5,
		.delay_secs	     = 2,
		.target_pid	     = -1,
		.target_tid	     = -1,
		.freq		     = 1000, /* 1 KHz */
		.sample_id_all_avail = true,
		.mmap_pages	     = 128,
		.sym_pcnt_filter     = 5,
	};
	char callchain_default_opt[] = "fractal,0.5,callee";
	const struct option options[] = {
1099
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1100
		     "event selector. use 'perf list' to list available events",
1101
		     parse_events_option),
1102
	OPT_INTEGER('c', "count", &top.default_interval,
1103
		    "event period to sample"),
1104
	OPT_INTEGER('p', "pid", &top.target_pid,
1105
		    "profile events on existing process id"),
1106
	OPT_INTEGER('t', "tid", &top.target_tid,
1107
		    "profile events on existing thread id"),
1108
	OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
1109
			    "system-wide collection from all CPUs"),
1110
	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
1111
		    "list of cpus to monitor"),
1112 1113
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1114
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1115
		    "hide kernel symbols"),
1116 1117
	OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1118
		    "collect data with this RT SCHED_FIFO priority"),
1119
	OPT_INTEGER('d', "delay", &top.delay_secs,
1120
		    "number of seconds to delay between refreshes"),
1121
	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1122
			    "dump the symbol table used for profiling"),
1123
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1124
		    "only display functions with more events than this"),
1125
	OPT_BOOLEAN('g', "group", &top.group,
1126
			    "put the counters into a counter group"),
1127
	OPT_BOOLEAN('i', "inherit", &top.inherit,
1128
		    "child tasks inherit counters"),
1129
	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1130
		    "symbol to annotate"),
1131
	OPT_BOOLEAN('z', "zero", &top.zero,
1132
		    "zero history across updates"),
1133
	OPT_INTEGER('F', "freq", &top.freq,
1134
		    "profile at this frequency"),
1135
	OPT_INTEGER('E', "entries", &top.print_entries,
1136
		    "display this many functions"),
1137
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1138
		    "hide user symbols"),
1139 1140
	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1141
	OPT_INCR('v', "verbose", &verbose,
1142
		    "be more verbose (show counter open errors, etc)"),
1143 1144 1145 1146
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
		   "sort by key(s): pid, comm, dso, symbol, parent"),
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1147
	OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
1148 1149 1150
		     "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
		     "Default: fractal,0.5,callee", &parse_callchain_opt,
		     callchain_default_opt),
1151 1152 1153 1154 1155 1156 1157 1158
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1159 1160 1161 1162 1163 1164
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
		    "Interleave source code with assembly code (default)"),
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
		    "Display raw encoding of assembly instructions (default)"),
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1165
	OPT_END()
1166
	};
1167

1168 1169
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1170 1171
		return -ENOMEM;

1172
	symbol_conf.exclude_other = false;
1173 1174 1175 1176 1177

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1178 1179 1180 1181 1182
	if (sort_order == default_sort_order)
		sort_order = "dso,symbol";

	setup_sorting(top_usage, options);

1183
	if (top.use_stdio)
1184
		use_browser = 0;
1185
	else if (top.use_tui)
1186 1187 1188 1189
		use_browser = 1;

	setup_browser(false);

1190
	/* CPU and PID are mutually exclusive */
1191
	if (top.target_tid > 0 && top.cpu_list) {
1192 1193
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
1194
		top.cpu_list = NULL;
1195 1196
	}

1197 1198
	if (top.target_pid != -1)
		top.target_tid = top.target_pid;
1199

1200 1201
	if (perf_evlist__create_maps(top.evlist, top.target_pid,
				     top.target_tid, top.cpu_list) < 0)
1202 1203
		usage_with_options(top_usage, options);

1204 1205
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1206 1207 1208
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1209

1210 1211
	symbol_conf.nr_events = top.evlist->nr_entries;

1212 1213
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1214

1215 1216 1217
	/*
	 * User specified count overrides default frequency.
	 */
1218
	if (top.default_interval)
1219 1220
		top.freq = 0;
	else if (top.freq) {
1221
		top.default_interval = top.freq;
1222 1223 1224 1225 1226
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1227
	list_for_each_entry(pos, &top.evlist->entries, node) {
1228 1229 1230
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
1231 1232
		if (!pos->attr.sample_period)
			pos->attr.sample_period = top.default_interval;
1233 1234
	}

1235
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1236

1237
	symbol_conf.priv_size = sizeof(struct annotation);
1238 1239 1240 1241 1242

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1243 1244 1245 1246
	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);

1247 1248 1249 1250
	/*
	 * Avoid annotation data structures overhead when symbols aren't on the
	 * sort list.
	 */
1251
	top.sort_has_symbols = sort_sym.list.next != NULL;
1252

1253
	get_term_dimensions(&top.winsize);
1254
	if (top.print_entries == 0) {
1255 1256 1257 1258 1259 1260
		struct sigaction act = {
			.sa_sigaction = perf_top__sig_winch,
			.sa_flags     = SA_SIGINFO,
		};
		perf_top__update_print_entries(&top);
		sigaction(SIGWINCH, &act, NULL);
1261 1262
	}

1263
	status = __cmd_top(&top);
1264

1265
	perf_evlist__delete(top.evlist);
1266 1267

	return status;
1268
}