builtin-top.c 34.1 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9 10 11 12 13 14 15 16 17 18
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
19
 */
20
#include "builtin.h"
21

22
#include "perf.h"
23

24
#include "util/annotate.h"
25
#include "util/cache.h"
26
#include "util/color.h"
27
#include "util/evlist.h"
28
#include "util/evsel.h"
29 30
#include "util/session.h"
#include "util/symbol.h"
31
#include "util/thread.h"
32
#include "util/thread_map.h"
33
#include "util/top.h"
34
#include "util/util.h"
35
#include <linux/rbtree.h>
36 37
#include "util/parse-options.h"
#include "util/parse-events.h"
38
#include "util/cpumap.h"
39
#include "util/xyarray.h"
40
#include "util/sort.h"
41

42 43
#include "util/debug.h"

44
#include <assert.h>
45
#include <elf.h>
46
#include <fcntl.h>
47

48
#include <stdio.h>
49 50
#include <termios.h>
#include <unistd.h>
51
#include <inttypes.h>
52

53 54 55 56 57 58 59 60 61 62
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
63
#include <sys/utsname.h>
64 65 66 67 68
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

69
void get_term_dimensions(struct winsize *ws)
70
{
71 72 73 74 75 76 77 78 79 80
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
81
	}
82 83 84 85
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
86
#endif
87 88
	ws->ws_row = 25;
	ws->ws_col = 80;
89 90
}

91
static void perf_top__update_print_entries(struct perf_top *top)
92
{
93 94
	if (top->print_entries > 9)
		top->print_entries -= 9;
95 96
}

97
static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg)
98
{
99 100 101
	struct perf_top *top = arg;

	get_term_dimensions(&top->winsize);
102 103 104 105 106 107 108
	if (!top->print_entries
	    || (top->print_entries+4) > top->winsize.ws_row) {
		top->print_entries = top->winsize.ws_row;
	} else {
		top->print_entries += 4;
		top->winsize.ws_row = top->print_entries;
	}
109
	perf_top__update_print_entries(top);
110 111
}

112
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
113 114
{
	struct symbol *sym;
115
	struct annotation *notes;
116
	struct map *map;
117
	int err = -1;
118

119
	if (!he || !he->ms.sym)
120 121
		return -1;

122 123
	sym = he->ms.sym;
	map = he->ms.map;
124 125 126 127

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
128
	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
129 130 131
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
132
		return -1;
133 134
	}

135 136 137
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
138 139 140
		goto out_assign;
	}

141
	pthread_mutex_lock(&notes->lock);
142

143
	if (symbol__alloc_hist(sym) < 0) {
144
		pthread_mutex_unlock(&notes->lock);
145 146
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
147
		sleep(1);
148
		return err;
149
	}
150

151
	err = symbol__annotate(sym, map, 0);
152
	if (err == 0) {
153
out_assign:
154
		top->sym_filter_entry = he;
155
	}
156

157
	pthread_mutex_unlock(&notes->lock);
158
	return err;
159 160
}

161
static void __zero_source_counters(struct hist_entry *he)
162
{
163
	struct symbol *sym = he->ms.sym;
164
	symbol__annotate_zero_histograms(sym);
165 166
}

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
{
	struct utsname uts;
	int err = uname(&uts);

	ui__warning("Out of bounds address found:\n\n"
		    "Addr:   %" PRIx64 "\n"
		    "DSO:    %s %c\n"
		    "Map:    %" PRIx64 "-%" PRIx64 "\n"
		    "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
		    "Arch:   %s\n"
		    "Kernel: %s\n"
		    "Tools:  %s\n\n"
		    "Not all samples will be on the annotation output.\n\n"
		    "Please report to linux-kernel@vger.kernel.org\n",
		    ip, map->dso->long_name, dso__symtab_origin(map->dso),
		    map->start, map->end, sym->start, sym->end,
		    sym->binding == STB_GLOBAL ? 'g' :
		    sym->binding == STB_LOCAL  ? 'l' : 'w', sym->name,
		    err ? "[unknown]" : uts.machine,
		    err ? "[unknown]" : uts.release, perf_version_string);
	if (use_browser <= 0)
		sleep(5);
	
	map->erange_warned = true;
}

194 195 196
static void perf_top__record_precise_ip(struct perf_top *top,
					struct hist_entry *he,
					int counter, u64 ip)
197
{
198 199
	struct annotation *notes;
	struct symbol *sym;
200
	int err;
201

202
	if (he == NULL || he->ms.sym == NULL ||
203 204
	    ((top->sym_filter_entry == NULL ||
	      top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
205 206
		return;

207
	sym = he->ms.sym;
208 209 210
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
211 212
		return;

213
	if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
214 215 216 217 218 219 220 221
		pthread_mutex_unlock(&notes->lock);
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
		sleep(1);
		return;
	}

	ip = he->ms.map->map_ip(he->ms.map, ip);
222
	err = symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
223

224
	pthread_mutex_unlock(&notes->lock);
225 226 227

	if (err == -ERANGE && !he->ms.map->erange_warned)
		ui__warn_map_erange(he->ms.map, sym, ip);
228 229
}

230
static void perf_top__show_details(struct perf_top *top)
231
{
232
	struct hist_entry *he = top->sym_filter_entry;
233
	struct annotation *notes;
234
	struct symbol *symbol;
235
	int more;
236

237
	if (!he)
238 239
		return;

240
	symbol = he->ms.sym;
241 242 243 244 245 246
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
247

248 249
	printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
250

251 252 253 254
	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
				       0, top->sym_pcnt_filter, top->print_entries, 4);
	if (top->zero)
		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
255
	else
256
		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
257
	if (more != 0)
258
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
259 260
out_unlock:
	pthread_mutex_unlock(&notes->lock);
261
}
262 263 264

static const char		CONSOLE_CLEAR[] = "";

265 266 267
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
						     struct addr_location *al,
						     struct perf_sample *sample)
268
{
269 270 271 272 273 274 275 276
	struct hist_entry *he;

	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
	if (he == NULL)
		return NULL;

	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
	return he;
277
}
278

279
static void perf_top__print_sym_table(struct perf_top *top)
280
{
281 282
	char bf[160];
	int printed = 0;
283
	const int win_width = top->winsize.ws_col - 1;
284

285
	puts(CONSOLE_CLEAR);
286

287
	perf_top__header_snprintf(top, bf, sizeof(bf));
288
	printf("%s\n", bf);
289

290
	perf_top__reset_sample_counters(top);
291

292
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
293

294 295 296 297
	if (top->sym_evsel->hists.stats.nr_lost_warned !=
	    top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
		top->sym_evsel->hists.stats.nr_lost_warned =
			top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
298 299
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
300
			      top->sym_evsel->hists.stats.nr_lost_warned);
301
		++printed;
302 303
	}

304 305
	if (top->sym_filter_entry) {
		perf_top__show_details(top);
306 307 308
		return;
	}

309 310 311 312 313 314 315
	hists__collapse_resort_threaded(&top->sym_evsel->hists);
	hists__output_resort_threaded(&top->sym_evsel->hists);
	hists__decay_entries_threaded(&top->sym_evsel->hists,
				      top->hide_user_symbols,
				      top->hide_kernel_symbols);
	hists__output_recalc_col_len(&top->sym_evsel->hists,
				     top->winsize.ws_row - 3);
316
	putchar('\n');
317 318
	hists__fprintf(&top->sym_evsel->hists, NULL, false, false,
		       top->winsize.ws_row - 4 - printed, win_width, stdout);
319 320
}

321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

356
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
357 358
{
	char *buf = malloc(0), *p;
359
	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
360
	struct rb_node *next;
361 362 363 364 365
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
366
		top->sym_filter_entry = NULL;
367 368 369 370 371 372 373 374 375 376
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

377
	next = rb_first(&top->sym_evsel->hists.entries);
378 379 380 381
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
382 383
			break;
		}
384
		next = rb_next(&n->rb_node);
385 386 387
	}

	if (!found) {
388
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
389 390
		sleep(1);
	} else
391
		perf_top__parse_source(top, found);
392 393 394 395 396

out_free:
	free(buf);
}

397
static void perf_top__print_mapped_keys(struct perf_top *top)
398
{
399 400
	char *name = NULL;

401 402
	if (top->sym_filter_entry) {
		struct symbol *sym = top->sym_filter_entry->ms.sym;
403 404 405 406
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
407 408
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
409

410 411
	if (top->evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top->sym_evsel));
412

413
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
414

415
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
416 417
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
418

419
	fprintf(stdout,
420
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
421
		top->hide_kernel_symbols ? "yes" : "no");
422 423
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
424 425
		top->hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
426 427 428
	fprintf(stdout, "\t[qQ]    quit.\n");
}

429
static int perf_top__key_mapped(struct perf_top *top, int c)
430 431 432 433 434 435 436 437
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
438 439
		case 'K':
		case 'U':
440 441 442
		case 'F':
		case 's':
		case 'S':
443 444
			return 1;
		case 'E':
445
			return top->evlist->nr_entries > 1 ? 1 : 0;
446 447
		default:
			break;
448 449 450
	}

	return 0;
451 452
}

453
static void perf_top__handle_keypress(struct perf_top *top, int c)
454
{
455
	if (!perf_top__key_mapped(top, c)) {
456 457 458
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

459
		perf_top__print_mapped_keys(top);
460 461 462 463 464 465 466 467 468 469 470 471 472 473
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
474
		if (!perf_top__key_mapped(top, c))
475 476 477
			return;
	}

478 479
	switch (c) {
		case 'd':
480 481 482
			prompt_integer(&top->delay_secs, "Enter display delay");
			if (top->delay_secs < 1)
				top->delay_secs = 1;
483 484
			break;
		case 'e':
485 486 487 488 489 490 491 492
			prompt_integer(&top->print_entries, "Enter display entries (lines)");
			if (top->print_entries == 0) {
				struct sigaction act = {
					.sa_sigaction = perf_top__sig_winch,
					.sa_flags     = SA_SIGINFO,
				};
				perf_top__sig_winch(SIGWINCH, NULL, top);
				sigaction(SIGWINCH, &act, NULL);
493 494
			} else {
				perf_top__sig_winch(SIGWINCH, NULL, top);
495
				signal(SIGWINCH, SIG_DFL);
496
			}
497 498
			break;
		case 'E':
499
			if (top->evlist->nr_entries > 1) {
500 501 502
				/* Select 0 as the default event: */
				int counter = 0;

503
				fprintf(stderr, "\nAvailable events:");
504

505 506
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
507

508
				prompt_integer(&counter, "Enter details event counter");
509

510 511 512
				if (counter >= top->evlist->nr_entries) {
					top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
513
					sleep(1);
514
					break;
515
				}
516 517
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					if (top->sym_evsel->idx == counter)
518
						break;
519
			} else
520
				top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
521 522
			break;
		case 'f':
523
			prompt_integer(&top->count_filter, "Enter display event count filter");
524 525
			break;
		case 'F':
526 527
			prompt_percent(&top->sym_pcnt_filter,
				       "Enter details display event filter (percent)");
528
			break;
529
		case 'K':
530
			top->hide_kernel_symbols = !top->hide_kernel_symbols;
531
			break;
532 533 534
		case 'q':
		case 'Q':
			printf("exiting.\n");
535 536
			if (top->dump_symtab)
				perf_session__fprintf_dsos(top->session, stderr);
537 538
			exit(0);
		case 's':
539
			perf_top__prompt_symbol(top, "Enter details symbol");
540 541
			break;
		case 'S':
542
			if (!top->sym_filter_entry)
543 544
				break;
			else {
545
				struct hist_entry *syme = top->sym_filter_entry;
546

547
				top->sym_filter_entry = NULL;
548 549 550
				__zero_source_counters(syme);
			}
			break;
551
		case 'U':
552
			top->hide_user_symbols = !top->hide_user_symbols;
553
			break;
554
		case 'z':
555
			top->zero = !top->zero;
556
			break;
557 558
		default:
			break;
559 560 561
	}
}

562 563 564 565 566 567 568 569 570 571
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
	perf_top__reset_sample_counters(t);

	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

	hists__collapse_resort_threaded(&t->sym_evsel->hists);
	hists__output_resort_threaded(&t->sym_evsel->hists);
572
	hists__decay_entries_threaded(&t->sym_evsel->hists,
573 574
				      t->hide_user_symbols,
				      t->hide_kernel_symbols);
575 576
}

577
static void *display_thread_tui(void *arg)
578
{
579
	struct perf_evsel *pos;
580
	struct perf_top *top = arg;
581 582
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";

583
	perf_top__sort_new_samples(top);
584 585 586 587 588 589 590

	/*
	 * Initialize the uid_filter_str, in the future the TUI will allow
	 * Zooming in/out UIDs. For now juse use whatever the user passed
	 * via --uid.
	 */
	list_for_each_entry(pos, &top->evlist->entries, node)
591
		pos->hists.uid_filter_str = top->target.uid_str;
592

593
	perf_evlist__tui_browse_hists(top->evlist, help,
594
				      perf_top__sort_new_samples,
595
				      top, top->delay_secs);
596

597 598 599 600 601
	exit_browser(0);
	exit(0);
	return NULL;
}

602
static void *display_thread(void *arg)
603
{
604
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
605
	struct termios tc, save;
606
	struct perf_top *top = arg;
607 608 609 610 611 612 613
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
614

615
	pthread__unblock_sigwinch();
616
repeat:
617
	delay_msecs = top->delay_secs * 1000;
618 619 620
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
621

622
	while (1) {
623
		perf_top__print_sym_table(top);
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
			/* Fall trhu */
		default:
			goto process_hotkey;
		}
	}
process_hotkey:
640 641 642
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

643
	perf_top__handle_keypress(top, c);
644
	goto repeat;
645 646 647 648

	return NULL;
}

649
/* Tag samples to be skipped. */
650
static const char *skip_symbols[] = {
651
	"intel_idle",
652
	"default_idle",
653
	"native_safe_halt",
654 655 656 657
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
658
	"mwait_idle_with_hints",
659
	"poll_idle",
660 661
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
662 663 664
	NULL
};

665
static int symbol_filter(struct map *map __used, struct symbol *sym)
666
{
667
	const char *name = sym->name;
668
	int i;
669

670 671 672 673 674 675 676
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

677 678 679 680 681 682 683
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
684 685
		return 1;

686 687
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
688
			sym->ignore = true;
689 690 691
			break;
		}
	}
692 693 694 695

	return 0;
}

696 697
static void perf_event__process_sample(struct perf_tool *tool,
				       const union perf_event *event,
698
				       struct perf_evsel *evsel,
699
				       struct perf_sample *sample,
700
				       struct machine *machine)
701
{
702
	struct perf_top *top = container_of(tool, struct perf_top, tool);
703
	struct symbol *parent = NULL;
704
	u64 ip = event->ip.ip;
705
	struct addr_location al;
706
	int err;
707

708
	if (!machine && perf_guest) {
709
		pr_err("Can't find guest [%d]'s kernel information\n",
710
			event->ip.pid);
711 712 713
		return;
	}

714 715 716 717 718 719
	if (!machine) {
		pr_err("%u unprocessable samples recorded.",
		       top->session->hists.stats.nr_unprocessable_samples++);
		return;
	}

720
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
721
		top->exact_samples++;
722

723
	if (perf_event__preprocess_sample(event, machine, &al, sample,
724
					  symbol_filter) < 0 ||
725
	    al.filtered)
726
		return;
727

728
	if (!top->kptr_restrict_warned &&
729 730 731 732 733 734 735 736 737 738
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
739
		top->kptr_restrict_warned = true;
740 741
	}

742
	if (al.sym == NULL) {
743
		const char *msg = "Kernel samples will not be resolved.\n";
744 745 746 747 748 749 750 751 752 753 754
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
755
		if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
756
		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
757
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
758 759 760 761 762 763 764 765 766 767
			if (symbol_conf.vmlinux_name) {
				ui__warning("The %s file can't be used.\n%s",
					    symbol_conf.vmlinux_name, msg);
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
768
			top->vmlinux_warned = true;
769
		}
770 771
	}

772 773
	if (al.sym == NULL || !al.sym->ignore) {
		struct hist_entry *he;
774

775 776
		if ((sort__has_parent || symbol_conf.use_callchain) &&
		    sample->callchain) {
777 778
			err = machine__resolve_callchain(machine, evsel, al.thread,
							 sample->callchain, &parent);
779 780 781 782
			if (err)
				return;
		}

783
		he = perf_evsel__add_hist_entry(evsel, &al, sample);
784 785 786
		if (he == NULL) {
			pr_err("Problem incrementing symbol period, skipping event\n");
			return;
787
		}
788

789
		if (symbol_conf.use_callchain) {
790
			err = callchain_append(he->callchain, &evsel->hists.callchain_cursor,
791 792 793 794 795
					       sample->period);
			if (err)
				return;
		}

796 797
		if (top->sort_has_symbols)
			perf_top__record_precise_ip(top, he, evsel->idx, ip);
798
	}
799 800

	return;
801 802
}

803
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
804
{
805
	struct perf_sample sample;
806
	struct perf_evsel *evsel;
807
	struct perf_session *session = top->session;
808
	union perf_event *event;
809 810
	struct machine *machine;
	u8 origin;
811
	int ret;
812

813 814
	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
		ret = perf_session__parse_sample(session, event, &sample);
815 816 817 818
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
819

820
		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
821 822
		assert(evsel != NULL);

823 824
		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

825
		if (event->header.type == PERF_RECORD_SAMPLE)
826
			++top->samples;
827 828 829

		switch (origin) {
		case PERF_RECORD_MISC_USER:
830 831
			++top->us_samples;
			if (top->hide_user_symbols)
832
				continue;
833
			machine = perf_session__find_host_machine(session);
834 835
			break;
		case PERF_RECORD_MISC_KERNEL:
836 837
			++top->kernel_samples;
			if (top->hide_kernel_symbols)
838
				continue;
839
			machine = perf_session__find_host_machine(session);
840 841
			break;
		case PERF_RECORD_MISC_GUEST_KERNEL:
842 843
			++top->guest_kernel_samples;
			machine = perf_session__find_machine(session, event->ip.pid);
844 845
			break;
		case PERF_RECORD_MISC_GUEST_USER:
846
			++top->guest_us_samples;
847 848 849 850 851 852 853 854 855 856
			/*
			 * TODO: we don't process guest user from host side
			 * except simple counting.
			 */
			/* Fall thru */
		default:
			continue;
		}


857 858 859 860
		if (event->header.type == PERF_RECORD_SAMPLE) {
			perf_event__process_sample(&top->tool, event, evsel,
						   &sample, machine);
		} else if (event->header.type < PERF_RECORD_MAX) {
861
			hists__inc_nr_events(&evsel->hists, event->header.type);
862
			perf_event__process(&top->tool, event, &sample, machine);
863
		} else
864
			++session->hists.stats.nr_unknown_events;
865 866 867
	}
}

868
static void perf_top__mmap_read(struct perf_top *top)
869
{
870 871
	int i;

872 873
	for (i = 0; i < top->evlist->nr_mmaps; i++)
		perf_top__mmap_read_idx(top, i);
874 875
}

876
static void perf_top__start_counters(struct perf_top *top)
877
{
878
	struct perf_evsel *counter, *first;
879
	struct perf_evlist *evlist = top->evlist;
880 881

	first = list_entry(evlist->entries.next, struct perf_evsel, node);
882

883 884
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
885 886
		struct xyarray *group_fd = NULL;

887
		if (top->group && counter != first)
888
			group_fd = first->fd;
889

890 891
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

892
		if (top->freq) {
893 894
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
895
			attr->sample_freq = top->freq;
896
		}
897

898 899 900 901 902
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

903 904 905
		if (symbol_conf.use_callchain)
			attr->sample_type |= PERF_SAMPLE_CALLCHAIN;

906
		attr->mmap = 1;
907
		attr->comm = 1;
908
		attr->inherit = top->inherit;
909 910 911
fallback_missing_features:
		if (top->exclude_guest_missing)
			attr->exclude_guest = attr->exclude_host = 0;
912
retry_sample_id:
913
		attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
914
try_again:
915 916
		if (perf_evsel__open(counter, top->evlist->cpus,
				     top->evlist->threads, top->group,
917
				     group_fd) < 0) {
918 919
			int err = errno;

920
			if (err == EPERM || err == EACCES) {
921
				ui__error_paranoid();
922
				goto out_err;
923 924 925 926 927 928 929
			} else if (err == EINVAL) {
				if (!top->exclude_guest_missing &&
				    (attr->exclude_guest || attr->exclude_host)) {
					pr_debug("Old kernel, cannot exclude "
						 "guest or host samples.\n");
					top->exclude_guest_missing = true;
					goto fallback_missing_features;
930
				} else if (!top->sample_id_all_missing) {
931 932 933
					/*
					 * Old kernel, no attr->sample_id_type_all field
					 */
934
					top->sample_id_all_missing = true;
935 936
					goto retry_sample_id;
				}
937
			}
938 939 940 941 942
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
943 944
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
945
				if (verbose)
946 947
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
948 949 950 951 952

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
953

954 955 956 957
			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(counter));
				goto out_err;
958 959 960 961
			} else if (err == EMFILE) {
				ui__warning("Too many events are opened.\n"
					    "Try again after reducing the number of events\n");
				goto out_err;
962 963
			}

964 965 966 967 968 969
			ui__warning("The sys_perf_event_open() syscall "
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
970
		}
971
	}
972

973
	if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
974 975 976 977 978 979 980 981 982 983
		ui__warning("Failed to mmap with %d (%s)\n",
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
984 985
}

986
static int perf_top__setup_sample_type(struct perf_top *top)
987
{
988
	if (!top->sort_has_symbols) {
989 990 991 992
		if (symbol_conf.use_callchain) {
			ui__warning("Selected -g but \"sym\" not present in --sort/-s.");
			return -EINVAL;
		}
993
	} else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
994 995 996 997 998 999 1000 1001 1002
		if (callchain_register_param(&callchain_param) < 0) {
			ui__warning("Can't register callchain params.\n");
			return -EINVAL;
		}
	}

	return 0;
}

1003
static int __cmd_top(struct perf_top *top)
1004 1005
{
	pthread_t thread;
1006
	int ret;
1007
	/*
1008 1009
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
1010
	 */
1011 1012
	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
	if (top->session == NULL)
1013
		return -ENOMEM;
1014

1015
	ret = perf_top__setup_sample_type(top);
1016 1017 1018
	if (ret)
		goto out_delete;

1019
	if (!perf_target__no_task(&top->target))
1020
		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
1021
						  perf_event__process,
1022
						  &top->session->host_machine);
1023
	else
1024 1025 1026 1027 1028
		perf_event__synthesize_threads(&top->tool, perf_event__process,
					       &top->session->host_machine);
	perf_top__start_counters(top);
	top->session->evlist = top->evlist;
	perf_session__update_sample_type(top->session);
1029

1030
	/* Wait for a minimal set of events before starting the snapshot */
1031
	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1032

1033
	perf_top__mmap_read(top);
1034

1035
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
1036
							    display_thread), top)) {
1037 1038 1039 1040
		printf("Could not create display thread.\n");
		exit(-1);
	}

1041
	if (top->realtime_prio) {
1042 1043
		struct sched_param param;

1044
		param.sched_priority = top->realtime_prio;
1045 1046 1047 1048 1049 1050 1051
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
1052
		u64 hits = top->samples;
1053

1054
		perf_top__mmap_read(top);
1055

1056 1057
		if (hits == top->samples)
			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1058 1059
	}

1060
out_delete:
1061 1062
	perf_session__delete(top->session);
	top->session = NULL;
1063 1064 1065 1066 1067

	return 0;
}

static int
1068
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1069
{
1070
	struct perf_top *top = (struct perf_top *)opt->value;
1071 1072 1073 1074 1075 1076 1077
	char *tok, *tok2;
	char *endptr;

	/*
	 * --no-call-graph
	 */
	if (unset) {
1078
		top->dont_use_callchains = true;
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
		return 0;
	}

	symbol_conf.use_callchain = true;

	if (!arg)
		return 0;

	tok = strtok((char *)arg, ",");
	if (!tok)
		return -1;

	/* get the output mode */
	if (!strncmp(tok, "graph", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_ABS;

	else if (!strncmp(tok, "flat", strlen(arg)))
		callchain_param.mode = CHAIN_FLAT;

	else if (!strncmp(tok, "fractal", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_REL;

	else if (!strncmp(tok, "none", strlen(arg))) {
		callchain_param.mode = CHAIN_NONE;
		symbol_conf.use_callchain = false;

		return 0;
1106
	} else
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141
		return -1;

	/* get the min percentage */
	tok = strtok(NULL, ",");
	if (!tok)
		goto setup;

	callchain_param.min_percent = strtod(tok, &endptr);
	if (tok == endptr)
		return -1;

	/* get the print limit */
	tok2 = strtok(NULL, ",");
	if (!tok2)
		goto setup;

	if (tok2[0] != 'c') {
		callchain_param.print_limit = strtod(tok2, &endptr);
		tok2 = strtok(NULL, ",");
		if (!tok2)
			goto setup;
	}

	/* get the call chain order */
	if (!strcmp(tok2, "caller"))
		callchain_param.order = ORDER_CALLER;
	else if (!strcmp(tok2, "callee"))
		callchain_param.order = ORDER_CALLEE;
	else
		return -1;
setup:
	if (callchain_register_param(&callchain_param) < 0) {
		fprintf(stderr, "Can't register callchain params\n");
		return -1;
	}
1142 1143
	return 0;
}
1144 1145 1146 1147 1148 1149

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

1150 1151 1152
int cmd_top(int argc, const char **argv, const char *prefix __used)
{
	struct perf_evsel *pos;
1153 1154
	int status;
	char errbuf[BUFSIZ];
1155 1156 1157 1158 1159 1160 1161 1162 1163
	struct perf_top top = {
		.count_filter	     = 5,
		.delay_secs	     = 2,
		.freq		     = 1000, /* 1 KHz */
		.mmap_pages	     = 128,
		.sym_pcnt_filter     = 5,
	};
	char callchain_default_opt[] = "fractal,0.5,callee";
	const struct option options[] = {
1164
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1165
		     "event selector. use 'perf list' to list available events",
1166
		     parse_events_option),
1167
	OPT_INTEGER('c', "count", &top.default_interval,
1168
		    "event period to sample"),
1169
	OPT_STRING('p', "pid", &top.target.pid, "pid",
1170
		    "profile events on existing process id"),
1171
	OPT_STRING('t', "tid", &top.target.tid, "tid",
1172
		    "profile events on existing thread id"),
1173
	OPT_BOOLEAN('a', "all-cpus", &top.target.system_wide,
1174
			    "system-wide collection from all CPUs"),
1175
	OPT_STRING('C', "cpu", &top.target.cpu_list, "cpu",
1176
		    "list of cpus to monitor"),
1177 1178
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1179
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1180
		    "hide kernel symbols"),
1181 1182
	OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1183
		    "collect data with this RT SCHED_FIFO priority"),
1184
	OPT_INTEGER('d', "delay", &top.delay_secs,
1185
		    "number of seconds to delay between refreshes"),
1186
	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1187
			    "dump the symbol table used for profiling"),
1188
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1189
		    "only display functions with more events than this"),
1190
	OPT_BOOLEAN('g', "group", &top.group,
1191
			    "put the counters into a counter group"),
1192
	OPT_BOOLEAN('i', "inherit", &top.inherit,
1193
		    "child tasks inherit counters"),
1194
	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1195
		    "symbol to annotate"),
1196
	OPT_BOOLEAN('z', "zero", &top.zero,
1197
		    "zero history across updates"),
1198
	OPT_INTEGER('F', "freq", &top.freq,
1199
		    "profile at this frequency"),
1200
	OPT_INTEGER('E', "entries", &top.print_entries,
1201
		    "display this many functions"),
1202
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1203
		    "hide user symbols"),
1204 1205
	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1206
	OPT_INCR('v', "verbose", &verbose,
1207
		    "be more verbose (show counter open errors, etc)"),
1208 1209 1210 1211
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
		   "sort by key(s): pid, comm, dso, symbol, parent"),
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1212
	OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
1213 1214 1215
		     "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
		     "Default: fractal,0.5,callee", &parse_callchain_opt,
		     callchain_default_opt),
1216 1217 1218 1219 1220 1221 1222 1223
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1224 1225 1226 1227 1228 1229
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
		    "Interleave source code with assembly code (default)"),
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
		    "Display raw encoding of assembly instructions (default)"),
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1230
	OPT_STRING('u', "uid", &top.target.uid_str, "user", "user to profile"),
1231
	OPT_END()
1232
	};
1233

1234 1235
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1236 1237
		return -ENOMEM;

1238
	symbol_conf.exclude_other = false;
1239 1240 1241 1242 1243

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1244 1245 1246 1247 1248
	if (sort_order == default_sort_order)
		sort_order = "dso,symbol";

	setup_sorting(top_usage, options);

1249
	if (top.use_stdio)
1250
		use_browser = 0;
1251
	else if (top.use_tui)
1252 1253 1254 1255
		use_browser = 1;

	setup_browser(false);

1256 1257 1258 1259 1260 1261 1262 1263 1264
	status = perf_target__validate(&top.target);
	if (status) {
		perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
		ui__warning("%s", errbuf);
	}

	status = perf_target__parse_uid(&top.target);
	if (status) {
		int saved_errno = errno;
1265

1266 1267 1268 1269
		perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
		ui__warning("%s", errbuf);

		status = -saved_errno;
1270
		goto out_delete_evlist;
1271
	}
1272

1273
	if (perf_target__none(&top.target))
1274 1275
		top.target.system_wide = true;

1276
	if (perf_evlist__create_maps(top.evlist, &top.target) < 0)
1277 1278
		usage_with_options(top_usage, options);

1279 1280
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1281 1282 1283
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1284

1285 1286
	symbol_conf.nr_events = top.evlist->nr_entries;

1287 1288
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1289

1290 1291 1292
	/*
	 * User specified count overrides default frequency.
	 */
1293
	if (top.default_interval)
1294 1295
		top.freq = 0;
	else if (top.freq) {
1296
		top.default_interval = top.freq;
1297 1298 1299 1300 1301
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1302
	list_for_each_entry(pos, &top.evlist->entries, node) {
1303 1304 1305
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
1306 1307
		if (!pos->attr.sample_period)
			pos->attr.sample_period = top.default_interval;
1308 1309
	}

1310
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1311

1312
	symbol_conf.priv_size = sizeof(struct annotation);
1313 1314 1315 1316 1317

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1318 1319 1320 1321
	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);

1322 1323 1324 1325
	/*
	 * Avoid annotation data structures overhead when symbols aren't on the
	 * sort list.
	 */
1326
	top.sort_has_symbols = sort_sym.list.next != NULL;
1327

1328
	get_term_dimensions(&top.winsize);
1329
	if (top.print_entries == 0) {
1330 1331 1332 1333 1334 1335
		struct sigaction act = {
			.sa_sigaction = perf_top__sig_winch,
			.sa_flags     = SA_SIGINFO,
		};
		perf_top__update_print_entries(&top);
		sigaction(SIGWINCH, &act, NULL);
1336 1337
	}

1338
	status = __cmd_top(&top);
1339

1340
out_delete_evlist:
1341
	perf_evlist__delete(top.evlist);
1342 1343

	return status;
1344
}