builtin-top.c 32.0 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9 10 11 12 13 14 15 16 17 18
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
19
 */
20
#include "builtin.h"
21

22
#include "perf.h"
23

24
#include "util/annotate.h"
25
#include "util/cache.h"
26
#include "util/color.h"
27
#include "util/evlist.h"
28
#include "util/evsel.h"
29 30
#include "util/session.h"
#include "util/symbol.h"
31
#include "util/thread.h"
32
#include "util/thread_map.h"
33
#include "util/top.h"
34
#include "util/util.h"
35
#include <linux/rbtree.h>
36 37
#include "util/parse-options.h"
#include "util/parse-events.h"
38
#include "util/cpumap.h"
39
#include "util/xyarray.h"
40
#include "util/sort.h"
41

42 43
#include "util/debug.h"

44 45
#include <assert.h>
#include <fcntl.h>
46

47
#include <stdio.h>
48 49
#include <termios.h>
#include <unistd.h>
50
#include <inttypes.h>
51

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

67

68
void get_term_dimensions(struct winsize *ws)
69
{
70 71 72 73 74 75 76 77 78 79
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
80
	}
81 82 83 84
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
85
#endif
86 87
	ws->ws_row = 25;
	ws->ws_col = 80;
88 89
}

90
static void perf_top__update_print_entries(struct perf_top *top)
91
{
92
	top->print_entries = top->winsize.ws_row;
93

94 95
	if (top->print_entries > 9)
		top->print_entries -= 9;
96 97
}

98
static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg)
99
{
100 101 102 103
	struct perf_top *top = arg;

	get_term_dimensions(&top->winsize);
	perf_top__update_print_entries(top);
104 105
}

106
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
107 108
{
	struct symbol *sym;
109
	struct annotation *notes;
110
	struct map *map;
111
	int err = -1;
112

113
	if (!he || !he->ms.sym)
114 115
		return -1;

116 117
	sym = he->ms.sym;
	map = he->ms.map;
118 119 120 121

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
122
	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
123 124 125
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
126
		return -1;
127 128
	}

129 130 131
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
132 133 134
		goto out_assign;
	}

135
	pthread_mutex_lock(&notes->lock);
136

137
	if (symbol__alloc_hist(sym) < 0) {
138
		pthread_mutex_unlock(&notes->lock);
139 140
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
141
		sleep(1);
142
		return err;
143
	}
144

145
	err = symbol__annotate(sym, map, 0);
146
	if (err == 0) {
147
out_assign:
148
		top->sym_filter_entry = he;
149
	}
150

151
	pthread_mutex_unlock(&notes->lock);
152
	return err;
153 154
}

155
static void __zero_source_counters(struct hist_entry *he)
156
{
157
	struct symbol *sym = he->ms.sym;
158
	symbol__annotate_zero_histograms(sym);
159 160
}

161 162 163
static void perf_top__record_precise_ip(struct perf_top *top,
					struct hist_entry *he,
					int counter, u64 ip)
164
{
165 166 167
	struct annotation *notes;
	struct symbol *sym;

168
	if (he == NULL || he->ms.sym == NULL ||
169 170
	    ((top->sym_filter_entry == NULL ||
	      top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
171 172
		return;

173
	sym = he->ms.sym;
174 175 176
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
177 178
		return;

179
	if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
180 181 182 183 184 185 186 187 188
		pthread_mutex_unlock(&notes->lock);
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
		sleep(1);
		return;
	}

	ip = he->ms.map->map_ip(he->ms.map, ip);
	symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
189

190
	pthread_mutex_unlock(&notes->lock);
191 192
}

193
static void perf_top__show_details(struct perf_top *top)
194
{
195
	struct hist_entry *he = top->sym_filter_entry;
196
	struct annotation *notes;
197
	struct symbol *symbol;
198
	int more;
199

200
	if (!he)
201 202
		return;

203
	symbol = he->ms.sym;
204 205 206 207 208 209
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
210

211 212
	printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
213

214 215 216 217
	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
				       0, top->sym_pcnt_filter, top->print_entries, 4);
	if (top->zero)
		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
218
	else
219
		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
220
	if (more != 0)
221
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
222 223
out_unlock:
	pthread_mutex_unlock(&notes->lock);
224
}
225 226 227

static const char		CONSOLE_CLEAR[] = "";

228 229 230
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
						     struct addr_location *al,
						     struct perf_sample *sample)
231
{
232 233 234 235 236 237
	struct hist_entry *he;

	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
	if (he == NULL)
		return NULL;

238
	evsel->hists.stats.total_period += sample->period;
239 240
	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
	return he;
241
}
242

243
static void perf_top__print_sym_table(struct perf_top *top)
244
{
245 246
	char bf[160];
	int printed = 0;
247
	const int win_width = top->winsize.ws_col - 1;
248

249
	puts(CONSOLE_CLEAR);
250

251
	perf_top__header_snprintf(top, bf, sizeof(bf));
252
	printf("%s\n", bf);
253

254
	perf_top__reset_sample_counters(top);
255

256
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
257

258 259 260 261
	if (top->sym_evsel->hists.stats.nr_lost_warned !=
	    top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
		top->sym_evsel->hists.stats.nr_lost_warned =
			top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
262 263
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
264
			      top->sym_evsel->hists.stats.nr_lost_warned);
265
		++printed;
266 267
	}

268 269
	if (top->sym_filter_entry) {
		perf_top__show_details(top);
270 271 272
		return;
	}

273 274 275 276 277 278 279
	hists__collapse_resort_threaded(&top->sym_evsel->hists);
	hists__output_resort_threaded(&top->sym_evsel->hists);
	hists__decay_entries_threaded(&top->sym_evsel->hists,
				      top->hide_user_symbols,
				      top->hide_kernel_symbols);
	hists__output_recalc_col_len(&top->sym_evsel->hists,
				     top->winsize.ws_row - 3);
280
	putchar('\n');
281 282
	hists__fprintf(&top->sym_evsel->hists, NULL, false, false,
		       top->winsize.ws_row - 4 - printed, win_width, stdout);
283 284
}

285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

320
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
321 322
{
	char *buf = malloc(0), *p;
323
	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
324
	struct rb_node *next;
325 326 327 328 329
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
330
		top->sym_filter_entry = NULL;
331 332 333 334 335 336 337 338 339 340
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

341
	next = rb_first(&top->sym_evsel->hists.entries);
342 343 344 345
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
346 347
			break;
		}
348
		next = rb_next(&n->rb_node);
349 350 351
	}

	if (!found) {
352
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
353 354 355
		sleep(1);
		return;
	} else
356
		perf_top__parse_source(top, found);
357 358 359 360 361

out_free:
	free(buf);
}

362
static void perf_top__print_mapped_keys(struct perf_top *top)
363
{
364 365
	char *name = NULL;

366 367
	if (top->sym_filter_entry) {
		struct symbol *sym = top->sym_filter_entry->ms.sym;
368 369 370 371
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
372 373
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
374

375 376
	if (top->evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top->sym_evsel));
377

378
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
379

380
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
381 382
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
383

384
	fprintf(stdout,
385
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
386
		top->hide_kernel_symbols ? "yes" : "no");
387 388
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
389 390
		top->hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
391 392 393
	fprintf(stdout, "\t[qQ]    quit.\n");
}

394
static int perf_top__key_mapped(struct perf_top *top, int c)
395 396 397 398 399 400 401 402
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
403 404
		case 'K':
		case 'U':
405 406 407
		case 'F':
		case 's':
		case 'S':
408 409
			return 1;
		case 'E':
410
			return top->evlist->nr_entries > 1 ? 1 : 0;
411 412
		default:
			break;
413 414 415
	}

	return 0;
416 417
}

418
static void perf_top__handle_keypress(struct perf_top *top, int c)
419
{
420
	if (!perf_top__key_mapped(top, c)) {
421 422 423
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

424
		perf_top__print_mapped_keys(top);
425 426 427 428 429 430 431 432 433 434 435 436 437 438
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
439
		if (!perf_top__key_mapped(top, c))
440 441 442
			return;
	}

443 444
	switch (c) {
		case 'd':
445 446 447
			prompt_integer(&top->delay_secs, "Enter display delay");
			if (top->delay_secs < 1)
				top->delay_secs = 1;
448 449
			break;
		case 'e':
450 451 452 453 454 455 456 457
			prompt_integer(&top->print_entries, "Enter display entries (lines)");
			if (top->print_entries == 0) {
				struct sigaction act = {
					.sa_sigaction = perf_top__sig_winch,
					.sa_flags     = SA_SIGINFO,
				};
				perf_top__sig_winch(SIGWINCH, NULL, top);
				sigaction(SIGWINCH, &act, NULL);
458 459
			} else
				signal(SIGWINCH, SIG_DFL);
460 461
			break;
		case 'E':
462
			if (top->evlist->nr_entries > 1) {
463 464 465
				/* Select 0 as the default event: */
				int counter = 0;

466
				fprintf(stderr, "\nAvailable events:");
467

468 469
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
470

471
				prompt_integer(&counter, "Enter details event counter");
472

473 474 475
				if (counter >= top->evlist->nr_entries) {
					top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
476
					sleep(1);
477
					break;
478
				}
479 480
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					if (top->sym_evsel->idx == counter)
481
						break;
482
			} else
483
				top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
484 485
			break;
		case 'f':
486
			prompt_integer(&top->count_filter, "Enter display event count filter");
487 488
			break;
		case 'F':
489 490
			prompt_percent(&top->sym_pcnt_filter,
				       "Enter details display event filter (percent)");
491
			break;
492
		case 'K':
493
			top->hide_kernel_symbols = !top->hide_kernel_symbols;
494
			break;
495 496 497
		case 'q':
		case 'Q':
			printf("exiting.\n");
498 499
			if (top->dump_symtab)
				perf_session__fprintf_dsos(top->session, stderr);
500 501
			exit(0);
		case 's':
502
			perf_top__prompt_symbol(top, "Enter details symbol");
503 504
			break;
		case 'S':
505
			if (!top->sym_filter_entry)
506 507
				break;
			else {
508
				struct hist_entry *syme = top->sym_filter_entry;
509

510
				top->sym_filter_entry = NULL;
511 512 513
				__zero_source_counters(syme);
			}
			break;
514
		case 'U':
515
			top->hide_user_symbols = !top->hide_user_symbols;
516
			break;
517
		case 'z':
518
			top->zero = !top->zero;
519
			break;
520 521
		default:
			break;
522 523 524
	}
}

525 526 527 528 529 530 531 532 533 534
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
	perf_top__reset_sample_counters(t);

	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

	hists__collapse_resort_threaded(&t->sym_evsel->hists);
	hists__output_resort_threaded(&t->sym_evsel->hists);
535
	hists__decay_entries_threaded(&t->sym_evsel->hists,
536 537
				      t->hide_user_symbols,
				      t->hide_kernel_symbols);
538 539
}

540
static void *display_thread_tui(void *arg)
541
{
542
	struct perf_top *top = arg;
543 544
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";

545 546
	perf_top__sort_new_samples(top);
	perf_evlist__tui_browse_hists(top->evlist, help,
547
				      perf_top__sort_new_samples,
548
				      top, top->delay_secs);
549

550 551 552 553 554
	exit_browser(0);
	exit(0);
	return NULL;
}

555
static void *display_thread(void *arg)
556
{
557
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
558
	struct termios tc, save;
559
	struct perf_top *top = arg;
560 561 562 563 564 565 566
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
567

568
	pthread__unblock_sigwinch();
569
repeat:
570
	delay_msecs = top->delay_secs * 1000;
571 572 573
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
574

575
	while (1) {
576
		perf_top__print_sym_table(top);
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
			/* Fall trhu */
		default:
			goto process_hotkey;
		}
	}
process_hotkey:
593 594 595
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

596
	perf_top__handle_keypress(top, c);
597
	goto repeat;
598 599 600 601

	return NULL;
}

602
/* Tag samples to be skipped. */
603
static const char *skip_symbols[] = {
604
	"default_idle",
605
	"native_safe_halt",
606 607 608 609
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
610
	"mwait_idle_with_hints",
611
	"poll_idle",
612 613
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
614 615 616
	NULL
};

617
static int symbol_filter(struct map *map __used, struct symbol *sym)
618
{
619
	const char *name = sym->name;
620
	int i;
621

622 623 624 625 626 627 628
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

629 630 631 632 633 634 635
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
636 637
		return 1;

638 639
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
640
			sym->ignore = true;
641 642 643
			break;
		}
	}
644 645 646 647

	return 0;
}

648 649
static void perf_event__process_sample(struct perf_tool *tool,
				       const union perf_event *event,
650
				       struct perf_evsel *evsel,
651
				       struct perf_sample *sample,
652
				       struct machine *machine)
653
{
654
	struct perf_top *top = container_of(tool, struct perf_top, tool);
655
	struct symbol *parent = NULL;
656
	u64 ip = event->ip.ip;
657
	struct addr_location al;
658
	int err;
659

660
	if (!machine && perf_guest) {
661
		pr_err("Can't find guest [%d]'s kernel information\n",
662
			event->ip.pid);
663 664 665
		return;
	}

666
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
667
		top->exact_samples++;
668

669
	if (perf_event__preprocess_sample(event, machine, &al, sample,
670
					  symbol_filter) < 0 ||
671
	    al.filtered)
672
		return;
673

674
	if (!top->kptr_restrict_warned &&
675 676 677 678 679 680 681 682 683 684
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
685
		top->kptr_restrict_warned = true;
686 687
	}

688
	if (al.sym == NULL) {
689
		const char *msg = "Kernel samples will not be resolved.\n";
690 691 692 693 694 695 696 697 698 699 700
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
701
		if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
702
		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
703
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
704 705 706 707 708 709 710 711 712 713
			if (symbol_conf.vmlinux_name) {
				ui__warning("The %s file can't be used.\n%s",
					    symbol_conf.vmlinux_name, msg);
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
714
			top->vmlinux_warned = true;
715
		}
716 717
	}

718 719
	if (al.sym == NULL || !al.sym->ignore) {
		struct hist_entry *he;
720

721 722
		if ((sort__has_parent || symbol_conf.use_callchain) &&
		    sample->callchain) {
723 724
			err = machine__resolve_callchain(machine, evsel, al.thread,
							 sample->callchain, &parent);
725 726 727 728
			if (err)
				return;
		}

729
		he = perf_evsel__add_hist_entry(evsel, &al, sample);
730 731 732
		if (he == NULL) {
			pr_err("Problem incrementing symbol period, skipping event\n");
			return;
733
		}
734

735
		if (symbol_conf.use_callchain) {
736
			err = callchain_append(he->callchain, &evsel->hists.callchain_cursor,
737 738 739 740 741
					       sample->period);
			if (err)
				return;
		}

742 743
		if (top->sort_has_symbols)
			perf_top__record_precise_ip(top, he, evsel->idx, ip);
744
	}
745 746

	return;
747 748
}

749
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
750
{
751
	struct perf_sample sample;
752
	struct perf_evsel *evsel;
753
	struct perf_session *session = top->session;
754
	union perf_event *event;
755 756
	struct machine *machine;
	u8 origin;
757
	int ret;
758

759 760
	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
		ret = perf_session__parse_sample(session, event, &sample);
761 762 763 764
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
765

766
		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
767 768
		assert(evsel != NULL);

769 770
		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

771
		if (event->header.type == PERF_RECORD_SAMPLE)
772
			++top->samples;
773 774 775

		switch (origin) {
		case PERF_RECORD_MISC_USER:
776 777
			++top->us_samples;
			if (top->hide_user_symbols)
778
				continue;
779
			machine = perf_session__find_host_machine(session);
780 781
			break;
		case PERF_RECORD_MISC_KERNEL:
782 783
			++top->kernel_samples;
			if (top->hide_kernel_symbols)
784
				continue;
785
			machine = perf_session__find_host_machine(session);
786 787
			break;
		case PERF_RECORD_MISC_GUEST_KERNEL:
788 789
			++top->guest_kernel_samples;
			machine = perf_session__find_machine(session, event->ip.pid);
790 791
			break;
		case PERF_RECORD_MISC_GUEST_USER:
792
			++top->guest_us_samples;
793 794 795 796 797 798 799 800 801 802
			/*
			 * TODO: we don't process guest user from host side
			 * except simple counting.
			 */
			/* Fall thru */
		default:
			continue;
		}


803 804 805 806
		if (event->header.type == PERF_RECORD_SAMPLE) {
			perf_event__process_sample(&top->tool, event, evsel,
						   &sample, machine);
		} else if (event->header.type < PERF_RECORD_MAX) {
807
			hists__inc_nr_events(&evsel->hists, event->header.type);
808
			perf_event__process(&top->tool, event, &sample, machine);
809
		} else
810
			++session->hists.stats.nr_unknown_events;
811 812 813
	}
}

814
static void perf_top__mmap_read(struct perf_top *top)
815
{
816 817
	int i;

818 819
	for (i = 0; i < top->evlist->nr_mmaps; i++)
		perf_top__mmap_read_idx(top, i);
820 821
}

822
static void perf_top__start_counters(struct perf_top *top)
823
{
824
	struct perf_evsel *counter, *first;
825
	struct perf_evlist *evlist = top->evlist;
826 827

	first = list_entry(evlist->entries.next, struct perf_evsel, node);
828

829 830
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
831 832
		struct xyarray *group_fd = NULL;

833
		if (top->group && counter != first)
834
			group_fd = first->fd;
835

836 837
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

838
		if (top->freq) {
839 840
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
841
			attr->sample_freq = top->freq;
842
		}
843

844 845 846 847 848
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

849 850 851
		if (symbol_conf.use_callchain)
			attr->sample_type |= PERF_SAMPLE_CALLCHAIN;

852
		attr->mmap = 1;
853
		attr->comm = 1;
854
		attr->inherit = top->inherit;
855
retry_sample_id:
856
		attr->sample_id_all = top->sample_id_all_avail ? 1 : 0;
857
try_again:
858 859
		if (perf_evsel__open(counter, top->evlist->cpus,
				     top->evlist->threads, top->group,
860
				     group_fd) < 0) {
861 862
			int err = errno;

863
			if (err == EPERM || err == EACCES) {
864
				ui__error_paranoid();
865
				goto out_err;
866
			} else if (err == EINVAL && top->sample_id_all_avail) {
867 868 869
				/*
				 * Old kernel, no attr->sample_id_type_all field
				 */
870
				top->sample_id_all_avail = false;
871
				goto retry_sample_id;
872
			}
873 874 875 876 877
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
878 879
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
880
				if (verbose)
881 882
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
883 884 885 886 887

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
888

889 890 891 892 893 894
			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(counter));
				goto out_err;
			}

895 896 897 898 899 900
			ui__warning("The sys_perf_event_open() syscall "
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
901
		}
902
	}
903

904
	if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
905 906 907 908 909 910 911 912 913 914
		ui__warning("Failed to mmap with %d (%s)\n",
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
915 916
}

917
static int perf_top__setup_sample_type(struct perf_top *top)
918
{
919
	if (!top->sort_has_symbols) {
920 921 922 923
		if (symbol_conf.use_callchain) {
			ui__warning("Selected -g but \"sym\" not present in --sort/-s.");
			return -EINVAL;
		}
924
	} else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
925 926 927 928 929 930 931 932 933
		if (callchain_register_param(&callchain_param) < 0) {
			ui__warning("Can't register callchain params.\n");
			return -EINVAL;
		}
	}

	return 0;
}

934
static int __cmd_top(struct perf_top *top)
935 936
{
	pthread_t thread;
937
	int ret;
938
	/*
939 940
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
941
	 */
942 943
	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
	if (top->session == NULL)
944
		return -ENOMEM;
945

946
	ret = perf_top__setup_sample_type(top);
947 948 949
	if (ret)
		goto out_delete;

950 951
	if (top->target_tid != -1)
		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
952
						  perf_event__process,
953
						  &top->session->host_machine);
954
	else
955 956 957 958 959
		perf_event__synthesize_threads(&top->tool, perf_event__process,
					       &top->session->host_machine);
	perf_top__start_counters(top);
	top->session->evlist = top->evlist;
	perf_session__update_sample_type(top->session);
960

961
	/* Wait for a minimal set of events before starting the snapshot */
962
	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
963

964
	perf_top__mmap_read(top);
965

966
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
967
							    display_thread), top)) {
968 969 970 971
		printf("Could not create display thread.\n");
		exit(-1);
	}

972
	if (top->realtime_prio) {
973 974
		struct sched_param param;

975
		param.sched_priority = top->realtime_prio;
976 977 978 979 980 981 982
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
983
		u64 hits = top->samples;
984

985
		perf_top__mmap_read(top);
986

987 988
		if (hits == top->samples)
			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
989 990
	}

991
out_delete:
992 993
	perf_session__delete(top->session);
	top->session = NULL;
994 995 996 997 998

	return 0;
}

static int
999
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1000
{
1001
	struct perf_top *top = (struct perf_top *)opt->value;
1002 1003 1004 1005 1006 1007 1008
	char *tok, *tok2;
	char *endptr;

	/*
	 * --no-call-graph
	 */
	if (unset) {
1009
		top->dont_use_callchains = true;
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
		return 0;
	}

	symbol_conf.use_callchain = true;

	if (!arg)
		return 0;

	tok = strtok((char *)arg, ",");
	if (!tok)
		return -1;

	/* get the output mode */
	if (!strncmp(tok, "graph", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_ABS;

	else if (!strncmp(tok, "flat", strlen(arg)))
		callchain_param.mode = CHAIN_FLAT;

	else if (!strncmp(tok, "fractal", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_REL;

	else if (!strncmp(tok, "none", strlen(arg))) {
		callchain_param.mode = CHAIN_NONE;
		symbol_conf.use_callchain = false;

		return 0;
	}

	else
		return -1;

	/* get the min percentage */
	tok = strtok(NULL, ",");
	if (!tok)
		goto setup;

	callchain_param.min_percent = strtod(tok, &endptr);
	if (tok == endptr)
		return -1;

	/* get the print limit */
	tok2 = strtok(NULL, ",");
	if (!tok2)
		goto setup;

	if (tok2[0] != 'c') {
		callchain_param.print_limit = strtod(tok2, &endptr);
		tok2 = strtok(NULL, ",");
		if (!tok2)
			goto setup;
	}

	/* get the call chain order */
	if (!strcmp(tok2, "caller"))
		callchain_param.order = ORDER_CALLER;
	else if (!strcmp(tok2, "callee"))
		callchain_param.order = ORDER_CALLEE;
	else
		return -1;
setup:
	if (callchain_register_param(&callchain_param) < 0) {
		fprintf(stderr, "Can't register callchain params\n");
		return -1;
	}
1075 1076
	return 0;
}
1077 1078 1079 1080 1081 1082

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
int cmd_top(int argc, const char **argv, const char *prefix __used)
{
	struct perf_evsel *pos;
	int status = -ENOMEM;
	struct perf_top top = {
		.count_filter	     = 5,
		.delay_secs	     = 2,
		.target_pid	     = -1,
		.target_tid	     = -1,
		.freq		     = 1000, /* 1 KHz */
		.sample_id_all_avail = true,
		.mmap_pages	     = 128,
		.sym_pcnt_filter     = 5,
	};
	char callchain_default_opt[] = "fractal,0.5,callee";
	const struct option options[] = {
1099
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1100
		     "event selector. use 'perf list' to list available events",
1101
		     parse_events_option),
1102
	OPT_INTEGER('c', "count", &top.default_interval,
1103
		    "event period to sample"),
1104
	OPT_INTEGER('p', "pid", &top.target_pid,
1105
		    "profile events on existing process id"),
1106
	OPT_INTEGER('t', "tid", &top.target_tid,
1107
		    "profile events on existing thread id"),
1108
	OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
1109
			    "system-wide collection from all CPUs"),
1110
	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
1111
		    "list of cpus to monitor"),
1112 1113
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1114
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1115
		    "hide kernel symbols"),
1116 1117
	OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1118
		    "collect data with this RT SCHED_FIFO priority"),
1119
	OPT_INTEGER('d', "delay", &top.delay_secs,
1120
		    "number of seconds to delay between refreshes"),
1121
	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1122
			    "dump the symbol table used for profiling"),
1123
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1124
		    "only display functions with more events than this"),
1125
	OPT_BOOLEAN('g', "group", &top.group,
1126
			    "put the counters into a counter group"),
1127
	OPT_BOOLEAN('i', "inherit", &top.inherit,
1128
		    "child tasks inherit counters"),
1129
	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1130
		    "symbol to annotate"),
1131
	OPT_BOOLEAN('z', "zero", &top.zero,
1132
		    "zero history across updates"),
1133
	OPT_INTEGER('F', "freq", &top.freq,
1134
		    "profile at this frequency"),
1135
	OPT_INTEGER('E', "entries", &top.print_entries,
1136
		    "display this many functions"),
1137
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1138
		    "hide user symbols"),
1139 1140
	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1141
	OPT_INCR('v', "verbose", &verbose,
1142
		    "be more verbose (show counter open errors, etc)"),
1143 1144 1145 1146
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
		   "sort by key(s): pid, comm, dso, symbol, parent"),
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1147
	OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
1148 1149 1150
		     "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
		     "Default: fractal,0.5,callee", &parse_callchain_opt,
		     callchain_default_opt),
1151 1152 1153 1154 1155 1156 1157 1158
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1159 1160 1161 1162 1163 1164
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
		    "Interleave source code with assembly code (default)"),
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
		    "Display raw encoding of assembly instructions (default)"),
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1165
	OPT_END()
1166
	};
1167

1168 1169
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1170 1171
		return -ENOMEM;

1172
	symbol_conf.exclude_other = false;
1173 1174 1175 1176 1177

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1178 1179 1180 1181 1182
	if (sort_order == default_sort_order)
		sort_order = "dso,symbol";

	setup_sorting(top_usage, options);

1183
	if (top.use_stdio)
1184
		use_browser = 0;
1185
	else if (top.use_tui)
1186 1187 1188 1189
		use_browser = 1;

	setup_browser(false);

1190
	/* CPU and PID are mutually exclusive */
1191
	if (top.target_tid > 0 && top.cpu_list) {
1192 1193
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
1194
		top.cpu_list = NULL;
1195 1196
	}

1197 1198
	if (top.target_pid != -1)
		top.target_tid = top.target_pid;
1199

1200 1201
	if (perf_evlist__create_maps(top.evlist, top.target_pid,
				     top.target_tid, top.cpu_list) < 0)
1202 1203
		usage_with_options(top_usage, options);

1204 1205
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1206 1207 1208
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1209

1210 1211
	symbol_conf.nr_events = top.evlist->nr_entries;

1212 1213
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1214

1215 1216 1217
	/*
	 * User specified count overrides default frequency.
	 */
1218
	if (top.default_interval)
1219 1220
		top.freq = 0;
	else if (top.freq) {
1221
		top.default_interval = top.freq;
1222 1223 1224 1225 1226
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1227 1228 1229
	list_for_each_entry(pos, &top.evlist->entries, node) {
		if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
					 top.evlist->threads->nr) < 0)
1230 1231 1232 1233 1234 1235 1236
			goto out_free_fd;
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
		if (pos->attr.sample_period)
			continue;

1237
		pos->attr.sample_period = top.default_interval;
1238 1239
	}

1240 1241
	if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
	    perf_evlist__alloc_mmap(top.evlist) < 0)
1242 1243
		goto out_free_fd;

1244
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1245

1246
	symbol_conf.priv_size = sizeof(struct annotation);
1247 1248 1249 1250 1251

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1252 1253 1254 1255
	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);

1256 1257 1258 1259
	/*
	 * Avoid annotation data structures overhead when symbols aren't on the
	 * sort list.
	 */
1260
	top.sort_has_symbols = sort_sym.list.next != NULL;
1261

1262
	get_term_dimensions(&top.winsize);
1263
	if (top.print_entries == 0) {
1264 1265 1266 1267 1268 1269
		struct sigaction act = {
			.sa_sigaction = perf_top__sig_winch,
			.sa_flags     = SA_SIGINFO,
		};
		perf_top__update_print_entries(&top);
		sigaction(SIGWINCH, &act, NULL);
1270 1271
	}

1272
	status = __cmd_top(&top);
1273
out_free_fd:
1274
	perf_evlist__delete(top.evlist);
1275 1276

	return status;
1277
}