builtin-top.c 31.1 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9 10 11 12 13 14 15 16 17 18
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
19
 */
20
#include "builtin.h"
21

22
#include "perf.h"
23

24
#include "util/annotate.h"
25
#include "util/cache.h"
26
#include "util/color.h"
27
#include "util/evlist.h"
28
#include "util/evsel.h"
29 30
#include "util/session.h"
#include "util/symbol.h"
31
#include "util/thread.h"
32
#include "util/thread_map.h"
33
#include "util/top.h"
34
#include "util/util.h"
35
#include <linux/rbtree.h>
36 37
#include "util/parse-options.h"
#include "util/parse-events.h"
38
#include "util/cpumap.h"
39
#include "util/xyarray.h"
40
#include "util/sort.h"
41

42 43
#include "util/debug.h"

44 45
#include <assert.h>
#include <fcntl.h>
46

47
#include <stdio.h>
48 49
#include <termios.h>
#include <unistd.h>
50
#include <inttypes.h>
51

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

67 68 69 70 71 72 73
static struct perf_top top = {
	.count_filter		= 5,
	.delay_secs		= 2,
	.target_pid		= -1,
	.target_tid		= -1,
	.freq			= 1000, /* 1 KHz */
};
74

75
static bool			system_wide			=  false;
76

77 78
static bool			use_tui, use_stdio;

79 80 81 82 83 84
static bool			sort_has_symbols;

static bool			dont_use_callchains;
static char			callchain_default_opt[]		= "fractal,0.5,callee";


85
static int			default_interval		=      0;
86

87
static bool			kptr_restrict_warned;
88
static bool			vmlinux_warned;
89
static bool			inherit				=  false;
90
static int			realtime_prio			=      0;
91
static bool			group				=  false;
92
static bool			sample_id_all_avail		=   true;
93
static unsigned int		mmap_pages			=    128;
94

95
static bool			dump_symtab                     =  false;
96

97
static struct winsize		winsize;
98

99
static const char		*sym_filter			=   NULL;
100
static int			sym_pcnt_filter			=      5;
101

102 103 104 105
/*
 * Source functions
 */

106
void get_term_dimensions(struct winsize *ws)
107
{
108 109 110 111 112 113 114 115 116 117
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
118
	}
119 120 121 122
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
123
#endif
124 125
	ws->ws_row = 25;
	ws->ws_col = 80;
126 127
}

128
static void update_print_entries(struct winsize *ws)
129
{
130
	top.print_entries = ws->ws_row;
131

132 133
	if (top.print_entries > 9)
		top.print_entries -= 9;
134 135 136 137
}

static void sig_winch_handler(int sig __used)
{
138 139
	get_term_dimensions(&winsize);
	update_print_entries(&winsize);
140 141
}

142
static int parse_source(struct hist_entry *he)
143 144
{
	struct symbol *sym;
145
	struct annotation *notes;
146
	struct map *map;
147
	int err = -1;
148

149
	if (!he || !he->ms.sym)
150 151
		return -1;

152 153
	sym = he->ms.sym;
	map = he->ms.map;
154 155 156 157

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
158
	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
159 160 161
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
162
		return -1;
163 164
	}

165 166 167
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
168 169 170
		goto out_assign;
	}

171
	pthread_mutex_lock(&notes->lock);
172

173
	if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
174
		pthread_mutex_unlock(&notes->lock);
175 176
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
177
		sleep(1);
178
		return err;
179
	}
180

181
	err = symbol__annotate(sym, map, 0);
182
	if (err == 0) {
183
out_assign:
184
		top.sym_filter_entry = he;
185
	}
186

187
	pthread_mutex_unlock(&notes->lock);
188
	return err;
189 190
}

191
static void __zero_source_counters(struct hist_entry *he)
192
{
193
	struct symbol *sym = he->ms.sym;
194
	symbol__annotate_zero_histograms(sym);
195 196
}

197
static void record_precise_ip(struct hist_entry *he, int counter, u64 ip)
198
{
199 200 201
	struct annotation *notes;
	struct symbol *sym;

202 203
	if (he == NULL || he->ms.sym == NULL ||
	    (he != top.sym_filter_entry && use_browser != 1))
204 205
		return;

206
	sym = he->ms.sym;
207 208 209
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
210 211
		return;

212 213 214 215 216 217 218 219 220 221 222
	if (notes->src == NULL &&
	    symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
		pthread_mutex_unlock(&notes->lock);
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
		sleep(1);
		return;
	}

	ip = he->ms.map->map_ip(he->ms.map, ip);
	symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
223

224
	pthread_mutex_unlock(&notes->lock);
225 226
}

227
static void show_details(struct hist_entry *he)
228
{
229
	struct annotation *notes;
230
	struct symbol *symbol;
231
	int more;
232

233
	if (!he)
234 235
		return;

236
	symbol = he->ms.sym;
237 238 239 240 241 242
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
243

244
	printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
245 246
	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);

247
	more = symbol__annotate_printf(symbol, he->ms.map, top.sym_evsel->idx,
248
				       0, sym_pcnt_filter, top.print_entries, 4);
249 250 251
	if (top.zero)
		symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
	else
252
		symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
253
	if (more != 0)
254
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
255 256
out_unlock:
	pthread_mutex_unlock(&notes->lock);
257
}
258 259 260

static const char		CONSOLE_CLEAR[] = "";

261 262 263 264 265
static struct hist_entry *
	perf_session__add_hist_entry(struct perf_session *session,
				     struct addr_location *al,
				     struct perf_sample *sample,
				     struct perf_evsel *evsel)
266
{
267 268 269 270 271 272 273 274 275
	struct hist_entry *he;

	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
	if (he == NULL)
		return NULL;

	session->hists.stats.total_period += sample->period;
	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
	return he;
276
}
277

278
static void print_sym_table(void)
279
{
280 281
	char bf[160];
	int printed = 0;
282
	const int win_width = winsize.ws_col - 1;
283

284
	puts(CONSOLE_CLEAR);
285

286 287
	perf_top__header_snprintf(&top, bf, sizeof(bf));
	printf("%s\n", bf);
288

289
	perf_top__reset_sample_counters(&top);
290

291
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
292

293 294 295 296 297 298 299
	if (top.sym_evsel->hists.stats.nr_lost_warned !=
	    top.sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
		top.sym_evsel->hists.stats.nr_lost_warned =
			top.sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
			      top.sym_evsel->hists.stats.nr_lost_warned);
300
		++printed;
301 302
	}

303 304
	if (top.sym_filter_entry) {
		show_details(top.sym_filter_entry);
305 306 307
		return;
	}

308 309
	hists__collapse_resort_threaded(&top.sym_evsel->hists);
	hists__output_resort_threaded(&top.sym_evsel->hists);
310 311 312
	hists__decay_entries_threaded(&top.sym_evsel->hists,
				      top.hide_user_symbols,
				      top.hide_kernel_symbols);
313
	hists__output_recalc_col_len(&top.sym_evsel->hists, winsize.ws_row - 3);
314
	putchar('\n');
315 316
	hists__fprintf(&top.sym_evsel->hists, NULL, false, false,
		       winsize.ws_row - 4 - printed, win_width, stdout);
317 318
}

319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

354
static void prompt_symbol(struct hist_entry **target, const char *msg)
355 356
{
	char *buf = malloc(0), *p;
357 358
	struct hist_entry *syme = *target, *n, *found = NULL;
	struct rb_node *next;
359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
		*target = NULL;
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

375 376 377 378 379
	next = rb_first(&top.sym_evsel->hists.entries);
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
380 381
			break;
		}
382
		next = rb_next(&n->rb_node);
383 384 385
	}

	if (!found) {
386
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
387 388 389 390 391 392 393 394 395
		sleep(1);
		return;
	} else
		parse_source(found);

out_free:
	free(buf);
}

396
static void print_mapped_keys(void)
397
{
398 399
	char *name = NULL;

400
	if (top.sym_filter_entry) {
401
		struct symbol *sym = top.sym_filter_entry->ms.sym;
402 403 404 405
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
406 407
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top.delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top.print_entries);
408

409 410
	if (top.evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top.sym_evsel));
411

412
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top.count_filter);
413

414 415 416
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
417

418
	fprintf(stdout,
419
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
420
		top.hide_kernel_symbols ? "yes" : "no");
421 422
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
423 424
		top.hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top.zero ? 1 : 0);
425 426 427 428 429 430 431 432 433 434 435 436
	fprintf(stdout, "\t[qQ]    quit.\n");
}

static int key_mapped(int c)
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
437 438
		case 'K':
		case 'U':
439 440 441
		case 'F':
		case 's':
		case 'S':
442 443
			return 1;
		case 'E':
444
			return top.evlist->nr_entries > 1 ? 1 : 0;
445 446
		default:
			break;
447 448 449
	}

	return 0;
450 451
}

452
static void handle_keypress(int c)
453
{
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
	if (!key_mapped(c)) {
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

		print_mapped_keys();
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
		if (!key_mapped(c))
			return;
	}

477 478
	switch (c) {
		case 'd':
479 480 481
			prompt_integer(&top.delay_secs, "Enter display delay");
			if (top.delay_secs < 1)
				top.delay_secs = 1;
482 483
			break;
		case 'e':
484 485
			prompt_integer(&top.print_entries, "Enter display entries (lines)");
			if (top.print_entries == 0) {
486
				sig_winch_handler(SIGWINCH);
487 488 489
				signal(SIGWINCH, sig_winch_handler);
			} else
				signal(SIGWINCH, SIG_DFL);
490 491
			break;
		case 'E':
492
			if (top.evlist->nr_entries > 1) {
493 494 495
				/* Select 0 as the default event: */
				int counter = 0;

496
				fprintf(stderr, "\nAvailable events:");
497

498 499
				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
500

501
				prompt_integer(&counter, "Enter details event counter");
502

503
				if (counter >= top.evlist->nr_entries) {
504 505
					top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
506
					sleep(1);
507
					break;
508
				}
509
				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
510
					if (top.sym_evsel->idx == counter)
511
						break;
512 513
			} else
				top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
514 515
			break;
		case 'f':
516
			prompt_integer(&top.count_filter, "Enter display event count filter");
517 518 519 520
			break;
		case 'F':
			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
			break;
521
		case 'K':
522
			top.hide_kernel_symbols = !top.hide_kernel_symbols;
523
			break;
524 525 526
		case 'q':
		case 'Q':
			printf("exiting.\n");
527
			if (dump_symtab)
528
				perf_session__fprintf_dsos(top.session, stderr);
529 530
			exit(0);
		case 's':
531
			prompt_symbol(&top.sym_filter_entry, "Enter details symbol");
532 533
			break;
		case 'S':
534
			if (!top.sym_filter_entry)
535 536
				break;
			else {
537
				struct hist_entry *syme = top.sym_filter_entry;
538

539
				top.sym_filter_entry = NULL;
540 541 542
				__zero_source_counters(syme);
			}
			break;
543
		case 'U':
544
			top.hide_user_symbols = !top.hide_user_symbols;
545
			break;
546
		case 'z':
547
			top.zero = !top.zero;
548
			break;
549 550
		default:
			break;
551 552 553
	}
}

554 555 556 557 558 559 560 561 562 563
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
	perf_top__reset_sample_counters(t);

	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

	hists__collapse_resort_threaded(&t->sym_evsel->hists);
	hists__output_resort_threaded(&t->sym_evsel->hists);
564 565 566
	hists__decay_entries_threaded(&t->sym_evsel->hists,
				      top.hide_user_symbols,
				      top.hide_kernel_symbols);
567 568
}

569 570
static void *display_thread_tui(void *arg __used)
{
571 572 573 574 575 576 577
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";

	perf_top__sort_new_samples(&top);
	perf_evlist__tui_browse_hists(top.evlist, help,
				      perf_top__sort_new_samples,
				      &top, top.delay_secs);

578 579 580 581 582
	exit_browser(0);
	exit(0);
	return NULL;
}

583
static void *display_thread(void *arg __used)
584
{
585
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
586 587 588 589 590 591 592 593
	struct termios tc, save;
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
594

595
	pthread__unblock_sigwinch();
596
repeat:
597
	delay_msecs = top.delay_secs * 1000;
598 599 600
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
601

602
	while (1) {
603
		print_sym_table();
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
			/* Fall trhu */
		default:
			goto process_hotkey;
		}
	}
process_hotkey:
620 621 622
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

623
	handle_keypress(c);
624
	goto repeat;
625 626 627 628

	return NULL;
}

629
/* Tag samples to be skipped. */
630
static const char *skip_symbols[] = {
631
	"default_idle",
632
	"native_safe_halt",
633 634 635 636
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
637
	"mwait_idle_with_hints",
638
	"poll_idle",
639 640
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
641 642 643
	NULL
};

644
static int symbol_filter(struct map *map __used, struct symbol *sym)
645
{
646
	const char *name = sym->name;
647
	int i;
648

649 650 651 652 653 654 655
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

656 657 658 659 660 661 662
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
663 664
		return 1;

665 666
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
667
			sym->ignore = true;
668 669 670
			break;
		}
	}
671 672 673 674

	return 0;
}

675
static void perf_event__process_sample(const union perf_event *event,
676
				       struct perf_evsel *evsel,
677 678
				       struct perf_sample *sample,
				       struct perf_session *session)
679
{
680
	struct symbol *parent = NULL;
681
	u64 ip = event->ip.ip;
682
	struct addr_location al;
683
	struct machine *machine;
684
	int err;
685
	u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
686

687
	++top.samples;
688

689
	switch (origin) {
690
	case PERF_RECORD_MISC_USER:
691 692
		++top.us_samples;
		if (top.hide_user_symbols)
693
			return;
694
		machine = perf_session__find_host_machine(session);
695
		break;
696
	case PERF_RECORD_MISC_KERNEL:
697 698
		++top.kernel_samples;
		if (top.hide_kernel_symbols)
699
			return;
700
		machine = perf_session__find_host_machine(session);
701 702
		break;
	case PERF_RECORD_MISC_GUEST_KERNEL:
703
		++top.guest_kernel_samples;
704
		machine = perf_session__find_machine(session, event->ip.pid);
705
		break;
706
	case PERF_RECORD_MISC_GUEST_USER:
707
		++top.guest_us_samples;
708 709 710 711 712
		/*
		 * TODO: we don't process guest user from host side
		 * except simple counting.
		 */
		return;
713 714 715 716
	default:
		return;
	}

717
	if (!machine && perf_guest) {
718
		pr_err("Can't find guest [%d]'s kernel information\n",
719
			event->ip.pid);
720 721 722
		return;
	}

723
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
724
		top.exact_samples++;
725

726 727
	if (perf_event__preprocess_sample(event, session, &al, sample,
					  symbol_filter) < 0 ||
728
	    al.filtered)
729
		return;
730

731 732 733 734 735 736 737 738 739 740 741 742 743 744
	if (!kptr_restrict_warned &&
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
		kptr_restrict_warned = true;
	}

745
	if (al.sym == NULL) {
746
		const char *msg = "Kernel samples will not be resolved.\n";
747 748 749 750 751 752 753 754 755 756 757
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
758 759
		if (!kptr_restrict_warned && !vmlinux_warned &&
		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
760
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
761 762 763 764 765 766 767 768 769 770 771
			if (symbol_conf.vmlinux_name) {
				ui__warning("The %s file can't be used.\n%s",
					    symbol_conf.vmlinux_name, msg);
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
			vmlinux_warned = true;
772
		}
773 774
	}

775 776
	if (al.sym == NULL || !al.sym->ignore) {
		struct hist_entry *he;
777

778 779 780 781 782 783 784 785
		if ((sort__has_parent || symbol_conf.use_callchain) &&
		    sample->callchain) {
			err = perf_session__resolve_callchain(session, al.thread,
							      sample->callchain, &parent);
			if (err)
				return;
		}

786 787 788 789
		he = perf_session__add_hist_entry(session, &al, sample, evsel);
		if (he == NULL) {
			pr_err("Problem incrementing symbol period, skipping event\n");
			return;
790
		}
791

792 793 794 795 796 797 798 799 800
		if (symbol_conf.use_callchain) {
			err = callchain_append(he->callchain, &session->callchain_cursor,
					       sample->period);
			if (err)
				return;
		}

		if (sort_has_symbols)
			record_precise_ip(he, evsel->idx, ip);
801
	}
802 803

	return;
804 805
}

806
static void perf_session__mmap_read_idx(struct perf_session *self, int idx)
807
{
808
	struct perf_sample sample;
809
	struct perf_evsel *evsel;
810
	union perf_event *event;
811
	int ret;
812

813
	while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) {
814 815 816 817 818
		ret = perf_session__parse_sample(self, event, &sample);
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
819

820 821 822
		evsel = perf_evlist__id2evsel(self->evlist, sample.id);
		assert(evsel != NULL);

823
		if (event->header.type == PERF_RECORD_SAMPLE)
824 825 826
			perf_event__process_sample(event, evsel, &sample, self);
		else if (event->header.type < PERF_RECORD_MAX) {
			hists__inc_nr_events(&evsel->hists, event->header.type);
827
			perf_event__process(event, &sample, self);
828 829
		} else
			++self->hists.stats.nr_unknown_events;
830 831 832
	}
}

833
static void perf_session__mmap_read(struct perf_session *self)
834
{
835 836
	int i;

837 838
	for (i = 0; i < top.evlist->nr_mmaps; i++)
		perf_session__mmap_read_idx(self, i);
839 840
}

841 842
static void start_counters(struct perf_evlist *evlist)
{
843 844 845
	struct perf_evsel *counter, *first;

	first = list_entry(evlist->entries.next, struct perf_evsel, node);
846

847 848
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
849 850 851 852
		struct xyarray *group_fd = NULL;

		if (group && counter != first)
			group_fd = first->fd;
853

854 855
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

856
		if (top.freq) {
857 858
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
859
			attr->sample_freq = top.freq;
860
		}
861

862 863 864 865 866
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

867 868 869
		if (symbol_conf.use_callchain)
			attr->sample_type |= PERF_SAMPLE_CALLCHAIN;

870
		attr->mmap = 1;
871
		attr->comm = 1;
872
		attr->inherit = inherit;
873 874
retry_sample_id:
		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
875
try_again:
876
		if (perf_evsel__open(counter, top.evlist->cpus,
877 878
				     top.evlist->threads, group,
				     group_fd) < 0) {
879 880
			int err = errno;

881
			if (err == EPERM || err == EACCES) {
882
				ui__error_paranoid();
883
				goto out_err;
884 885 886 887 888 889
			} else if (err == EINVAL && sample_id_all_avail) {
				/*
				 * Old kernel, no attr->sample_id_type_all field
				 */
				sample_id_all_avail = false;
				goto retry_sample_id;
890
			}
891 892 893 894 895
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
896 897
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
898
				if (verbose)
899 900
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
901 902 903 904 905

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
906

907 908 909 910 911 912
			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(counter));
				goto out_err;
			}

913 914 915 916 917 918
			ui__warning("The sys_perf_event_open() syscall "
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
919
		}
920
	}
921

922 923 924 925 926 927 928 929 930 931 932
	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) {
		ui__warning("Failed to mmap with %d (%s)\n",
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
933 934
}

935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951
static int setup_sample_type(void)
{
	if (!sort_has_symbols) {
		if (symbol_conf.use_callchain) {
			ui__warning("Selected -g but \"sym\" not present in --sort/-s.");
			return -EINVAL;
		}
	} else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
		if (callchain_register_param(&callchain_param) < 0) {
			ui__warning("Can't register callchain params.\n");
			return -EINVAL;
		}
	}

	return 0;
}

952 953 954
static int __cmd_top(void)
{
	pthread_t thread;
955
	int ret;
956
	/*
957 958
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
959
	 */
960 961
	top.session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
	if (top.session == NULL)
962
		return -ENOMEM;
963

964 965 966 967
	ret = setup_sample_type();
	if (ret)
		goto out_delete;

968
	if (top.target_tid != -1)
969
		perf_event__synthesize_thread_map(top.evlist->threads,
970
						  perf_event__process, top.session);
971
	else
972
		perf_event__synthesize_threads(perf_event__process, top.session);
973

974
	start_counters(top.evlist);
975 976
	top.session->evlist = top.evlist;
	perf_session__update_sample_type(top.session);
977

978
	/* Wait for a minimal set of events before starting the snapshot */
979
	poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
980

981
	perf_session__mmap_read(top.session);
982

983
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
984
							     display_thread), NULL)) {
985 986 987 988 989 990 991 992 993 994 995 996 997 998 999
		printf("Could not create display thread.\n");
		exit(-1);
	}

	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
1000
		u64 hits = top.samples;
1001

1002
		perf_session__mmap_read(top.session);
1003

1004 1005
		if (hits == top.samples)
			ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
1006 1007
	}

1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
out_delete:
	perf_session__delete(top.session);
	top.session = NULL;

	return 0;
}

static int
parse_callchain_opt(const struct option *opt __used, const char *arg,
		    int unset)
{
	char *tok, *tok2;
	char *endptr;

	/*
	 * --no-call-graph
	 */
	if (unset) {
		dont_use_callchains = true;
		return 0;
	}

	symbol_conf.use_callchain = true;

	if (!arg)
		return 0;

	tok = strtok((char *)arg, ",");
	if (!tok)
		return -1;

	/* get the output mode */
	if (!strncmp(tok, "graph", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_ABS;

	else if (!strncmp(tok, "flat", strlen(arg)))
		callchain_param.mode = CHAIN_FLAT;

	else if (!strncmp(tok, "fractal", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_REL;

	else if (!strncmp(tok, "none", strlen(arg))) {
		callchain_param.mode = CHAIN_NONE;
		symbol_conf.use_callchain = false;

		return 0;
	}

	else
		return -1;

	/* get the min percentage */
	tok = strtok(NULL, ",");
	if (!tok)
		goto setup;

	callchain_param.min_percent = strtod(tok, &endptr);
	if (tok == endptr)
		return -1;

	/* get the print limit */
	tok2 = strtok(NULL, ",");
	if (!tok2)
		goto setup;

	if (tok2[0] != 'c') {
		callchain_param.print_limit = strtod(tok2, &endptr);
		tok2 = strtok(NULL, ",");
		if (!tok2)
			goto setup;
	}

	/* get the call chain order */
	if (!strcmp(tok2, "caller"))
		callchain_param.order = ORDER_CALLER;
	else if (!strcmp(tok2, "callee"))
		callchain_param.order = ORDER_CALLEE;
	else
		return -1;
setup:
	if (callchain_register_param(&callchain_param) < 0) {
		fprintf(stderr, "Can't register callchain params\n");
		return -1;
	}
1092 1093
	return 0;
}
1094 1095 1096 1097 1098 1099 1100

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

static const struct option options[] = {
1101
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1102
		     "event selector. use 'perf list' to list available events",
1103
		     parse_events_option),
1104 1105
	OPT_INTEGER('c', "count", &default_interval,
		    "event period to sample"),
1106
	OPT_INTEGER('p', "pid", &top.target_pid,
1107
		    "profile events on existing process id"),
1108
	OPT_INTEGER('t', "tid", &top.target_tid,
1109
		    "profile events on existing thread id"),
1110 1111
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
1112
	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
1113
		    "list of cpus to monitor"),
1114 1115
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1116
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1117
		    "hide kernel symbols"),
1118
	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
1119 1120
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
1121
	OPT_INTEGER('d', "delay", &top.delay_secs,
1122 1123 1124
		    "number of seconds to delay between refreshes"),
	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
			    "dump the symbol table used for profiling"),
1125
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1126 1127 1128
		    "only display functions with more events than this"),
	OPT_BOOLEAN('g', "group", &group,
			    "put the counters into a counter group"),
1129 1130
	OPT_BOOLEAN('i', "inherit", &inherit,
		    "child tasks inherit counters"),
1131
	OPT_STRING(0, "sym-annotate", &sym_filter, "symbol name",
1132
		    "symbol to annotate"),
1133
	OPT_BOOLEAN('z', "zero", &top.zero,
1134
		    "zero history across updates"),
1135
	OPT_INTEGER('F', "freq", &top.freq,
1136
		    "profile at this frequency"),
1137
	OPT_INTEGER('E', "entries", &top.print_entries,
1138
		    "display this many functions"),
1139
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1140
		    "hide user symbols"),
1141 1142
	OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
1143
	OPT_INCR('v', "verbose", &verbose,
1144
		    "be more verbose (show counter open errors, etc)"),
1145 1146 1147 1148
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
		   "sort by key(s): pid, comm, dso, symbol, parent"),
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1149 1150 1151 1152
	OPT_CALLBACK_DEFAULT('G', "call-graph", NULL, "output_type,min_percent, call_order",
		     "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
		     "Default: fractal,0.5,callee", &parse_callchain_opt,
		     callchain_default_opt),
1153 1154 1155 1156 1157 1158 1159 1160
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1161 1162 1163 1164 1165 1166
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
		    "Interleave source code with assembly code (default)"),
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
		    "Display raw encoding of assembly instructions (default)"),
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1167 1168 1169
	OPT_END()
};

1170
int cmd_top(int argc, const char **argv, const char *prefix __used)
1171
{
1172 1173
	struct perf_evsel *pos;
	int status = -ENOMEM;
1174

1175 1176
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1177 1178
		return -ENOMEM;

1179
	symbol_conf.exclude_other = false;
1180 1181 1182 1183 1184

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1185 1186 1187 1188 1189
	if (sort_order == default_sort_order)
		sort_order = "dso,symbol";

	setup_sorting(top_usage, options);

1190 1191 1192 1193 1194 1195 1196
	if (use_stdio)
		use_browser = 0;
	else if (use_tui)
		use_browser = 1;

	setup_browser(false);

1197
	/* CPU and PID are mutually exclusive */
1198
	if (top.target_tid > 0 && top.cpu_list) {
1199 1200
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
1201
		top.cpu_list = NULL;
1202 1203
	}

1204 1205
	if (top.target_pid != -1)
		top.target_tid = top.target_pid;
1206

1207 1208
	if (perf_evlist__create_maps(top.evlist, top.target_pid,
				     top.target_tid, top.cpu_list) < 0)
1209 1210
		usage_with_options(top_usage, options);

1211 1212
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1213 1214 1215
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1216

1217 1218
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1219

1220 1221 1222 1223
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
1224 1225 1226
		top.freq = 0;
	else if (top.freq) {
		default_interval = top.freq;
1227 1228 1229 1230 1231
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1232 1233 1234
	list_for_each_entry(pos, &top.evlist->entries, node) {
		if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
					 top.evlist->threads->nr) < 0)
1235 1236 1237 1238 1239 1240 1241 1242 1243 1244
			goto out_free_fd;
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
		if (pos->attr.sample_period)
			continue;

		pos->attr.sample_period = default_interval;
	}

1245 1246
	if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
	    perf_evlist__alloc_mmap(top.evlist) < 0)
1247 1248
		goto out_free_fd;

1249
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1250

1251
	symbol_conf.priv_size = sizeof(struct annotation);
1252 1253 1254 1255 1256

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1257 1258 1259 1260
	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);

1261 1262 1263 1264 1265 1266
	/*
	 * Avoid annotation data structures overhead when symbols aren't on the
	 * sort list.
	 */
	sort_has_symbols = sort_sym.list.next != NULL;

1267
	get_term_dimensions(&winsize);
1268
	if (top.print_entries == 0) {
1269
		update_print_entries(&winsize);
1270 1271 1272
		signal(SIGWINCH, sig_winch_handler);
	}

1273 1274
	status = __cmd_top();
out_free_fd:
1275
	perf_evlist__delete(top.evlist);
1276 1277

	return status;
1278
}