builtin-top.c 31.7 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9 10 11 12 13 14 15 16 17 18
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
19
 */
20
#include "builtin.h"
21

22
#include "perf.h"
23

24
#include "util/annotate.h"
25
#include "util/cache.h"
26
#include "util/color.h"
27
#include "util/evlist.h"
28
#include "util/evsel.h"
29 30
#include "util/session.h"
#include "util/symbol.h"
31
#include "util/thread.h"
32
#include "util/thread_map.h"
33
#include "util/top.h"
34
#include "util/util.h"
35
#include <linux/rbtree.h>
36 37
#include "util/parse-options.h"
#include "util/parse-events.h"
38
#include "util/cpumap.h"
39
#include "util/xyarray.h"
40
#include "util/sort.h"
41

42 43
#include "util/debug.h"

44 45
#include <assert.h>
#include <fcntl.h>
46

47
#include <stdio.h>
48 49
#include <termios.h>
#include <unistd.h>
50
#include <inttypes.h>
51

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

67

68
void get_term_dimensions(struct winsize *ws)
69
{
70 71 72 73 74 75 76 77 78 79
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
80
	}
81 82 83 84
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
85
#endif
86 87
	ws->ws_row = 25;
	ws->ws_col = 80;
88 89
}

90
static void perf_top__update_print_entries(struct perf_top *top)
91
{
92
	top->print_entries = top->winsize.ws_row;
93

94 95
	if (top->print_entries > 9)
		top->print_entries -= 9;
96 97
}

98
static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg)
99
{
100 101 102 103
	struct perf_top *top = arg;

	get_term_dimensions(&top->winsize);
	perf_top__update_print_entries(top);
104 105
}

106
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
107 108
{
	struct symbol *sym;
109
	struct annotation *notes;
110
	struct map *map;
111
	int err = -1;
112

113
	if (!he || !he->ms.sym)
114 115
		return -1;

116 117
	sym = he->ms.sym;
	map = he->ms.map;
118 119 120 121

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
122
	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
123 124 125
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
126
		return -1;
127 128
	}

129 130 131
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
132 133 134
		goto out_assign;
	}

135
	pthread_mutex_lock(&notes->lock);
136

137
	if (symbol__alloc_hist(sym) < 0) {
138
		pthread_mutex_unlock(&notes->lock);
139 140
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
141
		sleep(1);
142
		return err;
143
	}
144

145
	err = symbol__annotate(sym, map, 0);
146
	if (err == 0) {
147
out_assign:
148
		top->sym_filter_entry = he;
149
	}
150

151
	pthread_mutex_unlock(&notes->lock);
152
	return err;
153 154
}

155
static void __zero_source_counters(struct hist_entry *he)
156
{
157
	struct symbol *sym = he->ms.sym;
158
	symbol__annotate_zero_histograms(sym);
159 160
}

161 162 163
static void perf_top__record_precise_ip(struct perf_top *top,
					struct hist_entry *he,
					int counter, u64 ip)
164
{
165 166 167
	struct annotation *notes;
	struct symbol *sym;

168
	if (he == NULL || he->ms.sym == NULL ||
169 170
	    ((top->sym_filter_entry == NULL ||
	      top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
171 172
		return;

173
	sym = he->ms.sym;
174 175 176
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
177 178
		return;

179
	if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
180 181 182 183 184 185 186 187 188
		pthread_mutex_unlock(&notes->lock);
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
		sleep(1);
		return;
	}

	ip = he->ms.map->map_ip(he->ms.map, ip);
	symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
189

190
	pthread_mutex_unlock(&notes->lock);
191 192
}

193
static void perf_top__show_details(struct perf_top *top)
194
{
195
	struct hist_entry *he = top->sym_filter_entry;
196
	struct annotation *notes;
197
	struct symbol *symbol;
198
	int more;
199

200
	if (!he)
201 202
		return;

203
	symbol = he->ms.sym;
204 205 206 207 208 209
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
210

211 212
	printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
213

214 215 216 217
	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
				       0, top->sym_pcnt_filter, top->print_entries, 4);
	if (top->zero)
		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
218
	else
219
		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
220
	if (more != 0)
221
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
222 223
out_unlock:
	pthread_mutex_unlock(&notes->lock);
224
}
225 226 227

static const char		CONSOLE_CLEAR[] = "";

228 229 230
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
						     struct addr_location *al,
						     struct perf_sample *sample)
231
{
232 233 234 235 236 237
	struct hist_entry *he;

	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
	if (he == NULL)
		return NULL;

238
	evsel->hists.stats.total_period += sample->period;
239 240
	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
	return he;
241
}
242

243
static void perf_top__print_sym_table(struct perf_top *top)
244
{
245 246
	char bf[160];
	int printed = 0;
247
	const int win_width = top->winsize.ws_col - 1;
248

249
	puts(CONSOLE_CLEAR);
250

251
	perf_top__header_snprintf(top, bf, sizeof(bf));
252
	printf("%s\n", bf);
253

254
	perf_top__reset_sample_counters(top);
255

256
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
257

258 259 260 261
	if (top->sym_evsel->hists.stats.nr_lost_warned !=
	    top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
		top->sym_evsel->hists.stats.nr_lost_warned =
			top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
262 263
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
264
			      top->sym_evsel->hists.stats.nr_lost_warned);
265
		++printed;
266 267
	}

268 269
	if (top->sym_filter_entry) {
		perf_top__show_details(top);
270 271 272
		return;
	}

273 274 275 276 277 278 279
	hists__collapse_resort_threaded(&top->sym_evsel->hists);
	hists__output_resort_threaded(&top->sym_evsel->hists);
	hists__decay_entries_threaded(&top->sym_evsel->hists,
				      top->hide_user_symbols,
				      top->hide_kernel_symbols);
	hists__output_recalc_col_len(&top->sym_evsel->hists,
				     top->winsize.ws_row - 3);
280
	putchar('\n');
281 282
	hists__fprintf(&top->sym_evsel->hists, NULL, false, false,
		       top->winsize.ws_row - 4 - printed, win_width, stdout);
283 284
}

285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

320
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
321 322
{
	char *buf = malloc(0), *p;
323
	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
324
	struct rb_node *next;
325 326 327 328 329
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
330
		top->sym_filter_entry = NULL;
331 332 333 334 335 336 337 338 339 340
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

341
	next = rb_first(&top->sym_evsel->hists.entries);
342 343 344 345
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
346 347
			break;
		}
348
		next = rb_next(&n->rb_node);
349 350 351
	}

	if (!found) {
352
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
353 354
		sleep(1);
	} else
355
		perf_top__parse_source(top, found);
356 357 358 359 360

out_free:
	free(buf);
}

361
static void perf_top__print_mapped_keys(struct perf_top *top)
362
{
363 364
	char *name = NULL;

365 366
	if (top->sym_filter_entry) {
		struct symbol *sym = top->sym_filter_entry->ms.sym;
367 368 369 370
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
371 372
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
373

374 375
	if (top->evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top->sym_evsel));
376

377
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
378

379
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
380 381
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
382

383
	fprintf(stdout,
384
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
385
		top->hide_kernel_symbols ? "yes" : "no");
386 387
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
388 389
		top->hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
390 391 392
	fprintf(stdout, "\t[qQ]    quit.\n");
}

393
static int perf_top__key_mapped(struct perf_top *top, int c)
394 395 396 397 398 399 400 401
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
402 403
		case 'K':
		case 'U':
404 405 406
		case 'F':
		case 's':
		case 'S':
407 408
			return 1;
		case 'E':
409
			return top->evlist->nr_entries > 1 ? 1 : 0;
410 411
		default:
			break;
412 413 414
	}

	return 0;
415 416
}

417
static void perf_top__handle_keypress(struct perf_top *top, int c)
418
{
419
	if (!perf_top__key_mapped(top, c)) {
420 421 422
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

423
		perf_top__print_mapped_keys(top);
424 425 426 427 428 429 430 431 432 433 434 435 436 437
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
438
		if (!perf_top__key_mapped(top, c))
439 440 441
			return;
	}

442 443
	switch (c) {
		case 'd':
444 445 446
			prompt_integer(&top->delay_secs, "Enter display delay");
			if (top->delay_secs < 1)
				top->delay_secs = 1;
447 448
			break;
		case 'e':
449 450 451 452 453 454 455 456
			prompt_integer(&top->print_entries, "Enter display entries (lines)");
			if (top->print_entries == 0) {
				struct sigaction act = {
					.sa_sigaction = perf_top__sig_winch,
					.sa_flags     = SA_SIGINFO,
				};
				perf_top__sig_winch(SIGWINCH, NULL, top);
				sigaction(SIGWINCH, &act, NULL);
457 458
			} else
				signal(SIGWINCH, SIG_DFL);
459 460
			break;
		case 'E':
461
			if (top->evlist->nr_entries > 1) {
462 463 464
				/* Select 0 as the default event: */
				int counter = 0;

465
				fprintf(stderr, "\nAvailable events:");
466

467 468
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
469

470
				prompt_integer(&counter, "Enter details event counter");
471

472 473 474
				if (counter >= top->evlist->nr_entries) {
					top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
475
					sleep(1);
476
					break;
477
				}
478 479
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					if (top->sym_evsel->idx == counter)
480
						break;
481
			} else
482
				top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
483 484
			break;
		case 'f':
485
			prompt_integer(&top->count_filter, "Enter display event count filter");
486 487
			break;
		case 'F':
488 489
			prompt_percent(&top->sym_pcnt_filter,
				       "Enter details display event filter (percent)");
490
			break;
491
		case 'K':
492
			top->hide_kernel_symbols = !top->hide_kernel_symbols;
493
			break;
494 495 496
		case 'q':
		case 'Q':
			printf("exiting.\n");
497 498
			if (top->dump_symtab)
				perf_session__fprintf_dsos(top->session, stderr);
499 500
			exit(0);
		case 's':
501
			perf_top__prompt_symbol(top, "Enter details symbol");
502 503
			break;
		case 'S':
504
			if (!top->sym_filter_entry)
505 506
				break;
			else {
507
				struct hist_entry *syme = top->sym_filter_entry;
508

509
				top->sym_filter_entry = NULL;
510 511 512
				__zero_source_counters(syme);
			}
			break;
513
		case 'U':
514
			top->hide_user_symbols = !top->hide_user_symbols;
515
			break;
516
		case 'z':
517
			top->zero = !top->zero;
518
			break;
519 520
		default:
			break;
521 522 523
	}
}

524 525 526 527 528 529 530 531 532 533
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
	perf_top__reset_sample_counters(t);

	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

	hists__collapse_resort_threaded(&t->sym_evsel->hists);
	hists__output_resort_threaded(&t->sym_evsel->hists);
534
	hists__decay_entries_threaded(&t->sym_evsel->hists,
535 536
				      t->hide_user_symbols,
				      t->hide_kernel_symbols);
537 538
}

539
static void *display_thread_tui(void *arg)
540
{
541
	struct perf_top *top = arg;
542 543
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";

544 545
	perf_top__sort_new_samples(top);
	perf_evlist__tui_browse_hists(top->evlist, help,
546
				      perf_top__sort_new_samples,
547
				      top, top->delay_secs);
548

549 550 551 552 553
	exit_browser(0);
	exit(0);
	return NULL;
}

554
static void *display_thread(void *arg)
555
{
556
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
557
	struct termios tc, save;
558
	struct perf_top *top = arg;
559 560 561 562 563 564 565
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
566

567
	pthread__unblock_sigwinch();
568
repeat:
569
	delay_msecs = top->delay_secs * 1000;
570 571 572
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
573

574
	while (1) {
575
		perf_top__print_sym_table(top);
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
			/* Fall trhu */
		default:
			goto process_hotkey;
		}
	}
process_hotkey:
592 593 594
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

595
	perf_top__handle_keypress(top, c);
596
	goto repeat;
597 598 599 600

	return NULL;
}

601
/* Tag samples to be skipped. */
602
static const char *skip_symbols[] = {
603
	"default_idle",
604
	"native_safe_halt",
605 606 607 608
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
609
	"mwait_idle_with_hints",
610
	"poll_idle",
611 612
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
613 614 615
	NULL
};

616
static int symbol_filter(struct map *map __used, struct symbol *sym)
617
{
618
	const char *name = sym->name;
619
	int i;
620

621 622 623 624 625 626 627
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

628 629 630 631 632 633 634
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
635 636
		return 1;

637 638
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
639
			sym->ignore = true;
640 641 642
			break;
		}
	}
643 644 645 646

	return 0;
}

647 648
static void perf_event__process_sample(struct perf_tool *tool,
				       const union perf_event *event,
649
				       struct perf_evsel *evsel,
650
				       struct perf_sample *sample,
651
				       struct machine *machine)
652
{
653
	struct perf_top *top = container_of(tool, struct perf_top, tool);
654
	struct symbol *parent = NULL;
655
	u64 ip = event->ip.ip;
656
	struct addr_location al;
657
	int err;
658

659
	if (!machine && perf_guest) {
660
		pr_err("Can't find guest [%d]'s kernel information\n",
661
			event->ip.pid);
662 663 664
		return;
	}

665
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
666
		top->exact_samples++;
667

668
	if (perf_event__preprocess_sample(event, machine, &al, sample,
669
					  symbol_filter) < 0 ||
670
	    al.filtered)
671
		return;
672

673
	if (!top->kptr_restrict_warned &&
674 675 676 677 678 679 680 681 682 683
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
684
		top->kptr_restrict_warned = true;
685 686
	}

687
	if (al.sym == NULL) {
688
		const char *msg = "Kernel samples will not be resolved.\n";
689 690 691 692 693 694 695 696 697 698 699
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
700
		if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
701
		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
702
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
703 704 705 706 707 708 709 710 711 712
			if (symbol_conf.vmlinux_name) {
				ui__warning("The %s file can't be used.\n%s",
					    symbol_conf.vmlinux_name, msg);
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
713
			top->vmlinux_warned = true;
714
		}
715 716
	}

717 718
	if (al.sym == NULL || !al.sym->ignore) {
		struct hist_entry *he;
719

720 721
		if ((sort__has_parent || symbol_conf.use_callchain) &&
		    sample->callchain) {
722 723
			err = machine__resolve_callchain(machine, evsel, al.thread,
							 sample->callchain, &parent);
724 725 726 727
			if (err)
				return;
		}

728
		he = perf_evsel__add_hist_entry(evsel, &al, sample);
729 730 731
		if (he == NULL) {
			pr_err("Problem incrementing symbol period, skipping event\n");
			return;
732
		}
733

734
		if (symbol_conf.use_callchain) {
735
			err = callchain_append(he->callchain, &evsel->hists.callchain_cursor,
736 737 738 739 740
					       sample->period);
			if (err)
				return;
		}

741 742
		if (top->sort_has_symbols)
			perf_top__record_precise_ip(top, he, evsel->idx, ip);
743
	}
744 745

	return;
746 747
}

748
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
749
{
750
	struct perf_sample sample;
751
	struct perf_evsel *evsel;
752
	struct perf_session *session = top->session;
753
	union perf_event *event;
754 755
	struct machine *machine;
	u8 origin;
756
	int ret;
757

758 759
	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
		ret = perf_session__parse_sample(session, event, &sample);
760 761 762 763
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
764

765
		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
766 767
		assert(evsel != NULL);

768 769
		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

770
		if (event->header.type == PERF_RECORD_SAMPLE)
771
			++top->samples;
772 773 774

		switch (origin) {
		case PERF_RECORD_MISC_USER:
775 776
			++top->us_samples;
			if (top->hide_user_symbols)
777
				continue;
778
			machine = perf_session__find_host_machine(session);
779 780
			break;
		case PERF_RECORD_MISC_KERNEL:
781 782
			++top->kernel_samples;
			if (top->hide_kernel_symbols)
783
				continue;
784
			machine = perf_session__find_host_machine(session);
785 786
			break;
		case PERF_RECORD_MISC_GUEST_KERNEL:
787 788
			++top->guest_kernel_samples;
			machine = perf_session__find_machine(session, event->ip.pid);
789 790
			break;
		case PERF_RECORD_MISC_GUEST_USER:
791
			++top->guest_us_samples;
792 793 794 795 796 797 798 799 800 801
			/*
			 * TODO: we don't process guest user from host side
			 * except simple counting.
			 */
			/* Fall thru */
		default:
			continue;
		}


802 803 804 805
		if (event->header.type == PERF_RECORD_SAMPLE) {
			perf_event__process_sample(&top->tool, event, evsel,
						   &sample, machine);
		} else if (event->header.type < PERF_RECORD_MAX) {
806
			hists__inc_nr_events(&evsel->hists, event->header.type);
807
			perf_event__process(&top->tool, event, &sample, machine);
808
		} else
809
			++session->hists.stats.nr_unknown_events;
810 811 812
	}
}

813
static void perf_top__mmap_read(struct perf_top *top)
814
{
815 816
	int i;

817 818
	for (i = 0; i < top->evlist->nr_mmaps; i++)
		perf_top__mmap_read_idx(top, i);
819 820
}

821
static void perf_top__start_counters(struct perf_top *top)
822
{
823
	struct perf_evsel *counter, *first;
824
	struct perf_evlist *evlist = top->evlist;
825 826

	first = list_entry(evlist->entries.next, struct perf_evsel, node);
827

828 829
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
830 831
		struct xyarray *group_fd = NULL;

832
		if (top->group && counter != first)
833
			group_fd = first->fd;
834

835 836
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

837
		if (top->freq) {
838 839
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
840
			attr->sample_freq = top->freq;
841
		}
842

843 844 845 846 847
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

848 849 850
		if (symbol_conf.use_callchain)
			attr->sample_type |= PERF_SAMPLE_CALLCHAIN;

851
		attr->mmap = 1;
852
		attr->comm = 1;
853
		attr->inherit = top->inherit;
854
retry_sample_id:
855
		attr->sample_id_all = top->sample_id_all_avail ? 1 : 0;
856
try_again:
857 858
		if (perf_evsel__open(counter, top->evlist->cpus,
				     top->evlist->threads, top->group,
859
				     group_fd) < 0) {
860 861
			int err = errno;

862
			if (err == EPERM || err == EACCES) {
863
				ui__error_paranoid();
864
				goto out_err;
865
			} else if (err == EINVAL && top->sample_id_all_avail) {
866 867 868
				/*
				 * Old kernel, no attr->sample_id_type_all field
				 */
869
				top->sample_id_all_avail = false;
870
				goto retry_sample_id;
871
			}
872 873 874 875 876
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
877 878
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
879
				if (verbose)
880 881
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
882 883 884 885 886

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
887

888 889 890 891 892 893
			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(counter));
				goto out_err;
			}

894 895 896 897 898 899
			ui__warning("The sys_perf_event_open() syscall "
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
900
		}
901
	}
902

903
	if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
904 905 906 907 908 909 910 911 912 913
		ui__warning("Failed to mmap with %d (%s)\n",
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
914 915
}

916
static int perf_top__setup_sample_type(struct perf_top *top)
917
{
918
	if (!top->sort_has_symbols) {
919 920 921 922
		if (symbol_conf.use_callchain) {
			ui__warning("Selected -g but \"sym\" not present in --sort/-s.");
			return -EINVAL;
		}
923
	} else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
924 925 926 927 928 929 930 931 932
		if (callchain_register_param(&callchain_param) < 0) {
			ui__warning("Can't register callchain params.\n");
			return -EINVAL;
		}
	}

	return 0;
}

933
static int __cmd_top(struct perf_top *top)
934 935
{
	pthread_t thread;
936
	int ret;
937
	/*
938 939
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
940
	 */
941 942
	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
	if (top->session == NULL)
943
		return -ENOMEM;
944

945
	ret = perf_top__setup_sample_type(top);
946 947 948
	if (ret)
		goto out_delete;

949 950
	if (top->target_tid != -1)
		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
951
						  perf_event__process,
952
						  &top->session->host_machine);
953
	else
954 955 956 957 958
		perf_event__synthesize_threads(&top->tool, perf_event__process,
					       &top->session->host_machine);
	perf_top__start_counters(top);
	top->session->evlist = top->evlist;
	perf_session__update_sample_type(top->session);
959

960
	/* Wait for a minimal set of events before starting the snapshot */
961
	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
962

963
	perf_top__mmap_read(top);
964

965
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
966
							    display_thread), top)) {
967 968 969 970
		printf("Could not create display thread.\n");
		exit(-1);
	}

971
	if (top->realtime_prio) {
972 973
		struct sched_param param;

974
		param.sched_priority = top->realtime_prio;
975 976 977 978 979 980 981
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
982
		u64 hits = top->samples;
983

984
		perf_top__mmap_read(top);
985

986 987
		if (hits == top->samples)
			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
988 989
	}

990
out_delete:
991 992
	perf_session__delete(top->session);
	top->session = NULL;
993 994 995 996 997

	return 0;
}

static int
998
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
999
{
1000
	struct perf_top *top = (struct perf_top *)opt->value;
1001 1002 1003 1004 1005 1006 1007
	char *tok, *tok2;
	char *endptr;

	/*
	 * --no-call-graph
	 */
	if (unset) {
1008
		top->dont_use_callchains = true;
1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
		return 0;
	}

	symbol_conf.use_callchain = true;

	if (!arg)
		return 0;

	tok = strtok((char *)arg, ",");
	if (!tok)
		return -1;

	/* get the output mode */
	if (!strncmp(tok, "graph", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_ABS;

	else if (!strncmp(tok, "flat", strlen(arg)))
		callchain_param.mode = CHAIN_FLAT;

	else if (!strncmp(tok, "fractal", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_REL;

	else if (!strncmp(tok, "none", strlen(arg))) {
		callchain_param.mode = CHAIN_NONE;
		symbol_conf.use_callchain = false;

		return 0;
1036
	} else
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
		return -1;

	/* get the min percentage */
	tok = strtok(NULL, ",");
	if (!tok)
		goto setup;

	callchain_param.min_percent = strtod(tok, &endptr);
	if (tok == endptr)
		return -1;

	/* get the print limit */
	tok2 = strtok(NULL, ",");
	if (!tok2)
		goto setup;

	if (tok2[0] != 'c') {
		callchain_param.print_limit = strtod(tok2, &endptr);
		tok2 = strtok(NULL, ",");
		if (!tok2)
			goto setup;
	}

	/* get the call chain order */
	if (!strcmp(tok2, "caller"))
		callchain_param.order = ORDER_CALLER;
	else if (!strcmp(tok2, "callee"))
		callchain_param.order = ORDER_CALLEE;
	else
		return -1;
setup:
	if (callchain_register_param(&callchain_param) < 0) {
		fprintf(stderr, "Can't register callchain params\n");
		return -1;
	}
1072 1073
	return 0;
}
1074 1075 1076 1077 1078 1079

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095
int cmd_top(int argc, const char **argv, const char *prefix __used)
{
	struct perf_evsel *pos;
	int status = -ENOMEM;
	struct perf_top top = {
		.count_filter	     = 5,
		.delay_secs	     = 2,
		.target_pid	     = -1,
		.target_tid	     = -1,
		.freq		     = 1000, /* 1 KHz */
		.sample_id_all_avail = true,
		.mmap_pages	     = 128,
		.sym_pcnt_filter     = 5,
	};
	char callchain_default_opt[] = "fractal,0.5,callee";
	const struct option options[] = {
1096
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1097
		     "event selector. use 'perf list' to list available events",
1098
		     parse_events_option),
1099
	OPT_INTEGER('c', "count", &top.default_interval,
1100
		    "event period to sample"),
1101
	OPT_INTEGER('p', "pid", &top.target_pid,
1102
		    "profile events on existing process id"),
1103
	OPT_INTEGER('t', "tid", &top.target_tid,
1104
		    "profile events on existing thread id"),
1105
	OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
1106
			    "system-wide collection from all CPUs"),
1107
	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
1108
		    "list of cpus to monitor"),
1109 1110
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1111
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1112
		    "hide kernel symbols"),
1113 1114
	OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1115
		    "collect data with this RT SCHED_FIFO priority"),
1116
	OPT_INTEGER('d', "delay", &top.delay_secs,
1117
		    "number of seconds to delay between refreshes"),
1118
	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1119
			    "dump the symbol table used for profiling"),
1120
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1121
		    "only display functions with more events than this"),
1122
	OPT_BOOLEAN('g', "group", &top.group,
1123
			    "put the counters into a counter group"),
1124
	OPT_BOOLEAN('i', "inherit", &top.inherit,
1125
		    "child tasks inherit counters"),
1126
	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1127
		    "symbol to annotate"),
1128
	OPT_BOOLEAN('z', "zero", &top.zero,
1129
		    "zero history across updates"),
1130
	OPT_INTEGER('F', "freq", &top.freq,
1131
		    "profile at this frequency"),
1132
	OPT_INTEGER('E', "entries", &top.print_entries,
1133
		    "display this many functions"),
1134
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1135
		    "hide user symbols"),
1136 1137
	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1138
	OPT_INCR('v', "verbose", &verbose,
1139
		    "be more verbose (show counter open errors, etc)"),
1140 1141 1142 1143
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
		   "sort by key(s): pid, comm, dso, symbol, parent"),
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1144
	OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
1145 1146 1147
		     "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
		     "Default: fractal,0.5,callee", &parse_callchain_opt,
		     callchain_default_opt),
1148 1149 1150 1151 1152 1153 1154 1155
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1156 1157 1158 1159 1160 1161
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
		    "Interleave source code with assembly code (default)"),
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
		    "Display raw encoding of assembly instructions (default)"),
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1162
	OPT_END()
1163
	};
1164

1165 1166
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1167 1168
		return -ENOMEM;

1169
	symbol_conf.exclude_other = false;
1170 1171 1172 1173 1174

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1175 1176 1177 1178 1179
	if (sort_order == default_sort_order)
		sort_order = "dso,symbol";

	setup_sorting(top_usage, options);

1180
	if (top.use_stdio)
1181
		use_browser = 0;
1182
	else if (top.use_tui)
1183 1184 1185 1186
		use_browser = 1;

	setup_browser(false);

1187
	/* CPU and PID are mutually exclusive */
1188
	if (top.target_tid > 0 && top.cpu_list) {
1189 1190
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
1191
		top.cpu_list = NULL;
1192 1193
	}

1194 1195
	if (top.target_pid != -1)
		top.target_tid = top.target_pid;
1196

1197 1198
	if (perf_evlist__create_maps(top.evlist, top.target_pid,
				     top.target_tid, top.cpu_list) < 0)
1199 1200
		usage_with_options(top_usage, options);

1201 1202
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1203 1204 1205
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1206

1207 1208
	symbol_conf.nr_events = top.evlist->nr_entries;

1209 1210
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1211

1212 1213 1214
	/*
	 * User specified count overrides default frequency.
	 */
1215
	if (top.default_interval)
1216 1217
		top.freq = 0;
	else if (top.freq) {
1218
		top.default_interval = top.freq;
1219 1220 1221 1222 1223
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1224
	list_for_each_entry(pos, &top.evlist->entries, node) {
1225 1226 1227
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
1228 1229
		if (!pos->attr.sample_period)
			pos->attr.sample_period = top.default_interval;
1230 1231
	}

1232
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1233

1234
	symbol_conf.priv_size = sizeof(struct annotation);
1235 1236 1237 1238 1239

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1240 1241 1242 1243
	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);

1244 1245 1246 1247
	/*
	 * Avoid annotation data structures overhead when symbols aren't on the
	 * sort list.
	 */
1248
	top.sort_has_symbols = sort_sym.list.next != NULL;
1249

1250
	get_term_dimensions(&top.winsize);
1251
	if (top.print_entries == 0) {
1252 1253 1254 1255 1256 1257
		struct sigaction act = {
			.sa_sigaction = perf_top__sig_winch,
			.sa_flags     = SA_SIGINFO,
		};
		perf_top__update_print_entries(&top);
		sigaction(SIGWINCH, &act, NULL);
1258 1259
	}

1260
	status = __cmd_top(&top);
1261

1262
	perf_evlist__delete(top.evlist);
1263 1264

	return status;
1265
}