builtin-top.c 34.3 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9 10 11 12 13 14 15 16 17 18
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
19
 */
20
#include "builtin.h"
21

22
#include "perf.h"
23

24
#include "util/annotate.h"
25
#include "util/cache.h"
26
#include "util/color.h"
27
#include "util/evlist.h"
28
#include "util/evsel.h"
29 30
#include "util/session.h"
#include "util/symbol.h"
31
#include "util/thread.h"
32
#include "util/thread_map.h"
33
#include "util/top.h"
34
#include "util/util.h"
35
#include <linux/rbtree.h>
36 37
#include "util/parse-options.h"
#include "util/parse-events.h"
38
#include "util/cpumap.h"
39
#include "util/xyarray.h"
40
#include "util/sort.h"
41

42 43
#include "util/debug.h"

44
#include <assert.h>
45
#include <elf.h>
46
#include <fcntl.h>
47

48
#include <stdio.h>
49 50
#include <termios.h>
#include <unistd.h>
51
#include <inttypes.h>
52

53 54 55 56 57 58 59 60 61 62
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
63
#include <sys/utsname.h>
64 65 66 67 68
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

69
void get_term_dimensions(struct winsize *ws)
70
{
71 72 73 74 75 76 77 78 79 80
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
81
	}
82 83 84 85
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
86
#endif
87 88
	ws->ws_row = 25;
	ws->ws_col = 80;
89 90
}

91
static void perf_top__update_print_entries(struct perf_top *top)
92
{
93 94
	if (top->print_entries > 9)
		top->print_entries -= 9;
95 96
}

97
static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg)
98
{
99 100 101
	struct perf_top *top = arg;

	get_term_dimensions(&top->winsize);
102 103 104 105 106 107 108
	if (!top->print_entries
	    || (top->print_entries+4) > top->winsize.ws_row) {
		top->print_entries = top->winsize.ws_row;
	} else {
		top->print_entries += 4;
		top->winsize.ws_row = top->print_entries;
	}
109
	perf_top__update_print_entries(top);
110 111
}

112
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
113 114
{
	struct symbol *sym;
115
	struct annotation *notes;
116
	struct map *map;
117
	int err = -1;
118

119
	if (!he || !he->ms.sym)
120 121
		return -1;

122 123
	sym = he->ms.sym;
	map = he->ms.map;
124 125 126 127

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
128
	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
129 130 131
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
132
		return -1;
133 134
	}

135 136 137
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
138 139 140
		goto out_assign;
	}

141
	pthread_mutex_lock(&notes->lock);
142

143
	if (symbol__alloc_hist(sym) < 0) {
144
		pthread_mutex_unlock(&notes->lock);
145 146
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
147
		sleep(1);
148
		return err;
149
	}
150

151
	err = symbol__annotate(sym, map, 0);
152
	if (err == 0) {
153
out_assign:
154
		top->sym_filter_entry = he;
155
	}
156

157
	pthread_mutex_unlock(&notes->lock);
158
	return err;
159 160
}

161
static void __zero_source_counters(struct hist_entry *he)
162
{
163
	struct symbol *sym = he->ms.sym;
164
	symbol__annotate_zero_histograms(sym);
165 166
}

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
{
	struct utsname uts;
	int err = uname(&uts);

	ui__warning("Out of bounds address found:\n\n"
		    "Addr:   %" PRIx64 "\n"
		    "DSO:    %s %c\n"
		    "Map:    %" PRIx64 "-%" PRIx64 "\n"
		    "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
		    "Arch:   %s\n"
		    "Kernel: %s\n"
		    "Tools:  %s\n\n"
		    "Not all samples will be on the annotation output.\n\n"
		    "Please report to linux-kernel@vger.kernel.org\n",
		    ip, map->dso->long_name, dso__symtab_origin(map->dso),
		    map->start, map->end, sym->start, sym->end,
		    sym->binding == STB_GLOBAL ? 'g' :
		    sym->binding == STB_LOCAL  ? 'l' : 'w', sym->name,
		    err ? "[unknown]" : uts.machine,
		    err ? "[unknown]" : uts.release, perf_version_string);
	if (use_browser <= 0)
		sleep(5);
	
	map->erange_warned = true;
}

194 195 196
static void perf_top__record_precise_ip(struct perf_top *top,
					struct hist_entry *he,
					int counter, u64 ip)
197
{
198 199
	struct annotation *notes;
	struct symbol *sym;
200
	int err;
201

202
	if (he == NULL || he->ms.sym == NULL ||
203 204
	    ((top->sym_filter_entry == NULL ||
	      top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
205 206
		return;

207
	sym = he->ms.sym;
208 209 210
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
211 212
		return;

213
	if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
214 215 216 217 218 219 220 221
		pthread_mutex_unlock(&notes->lock);
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
		sleep(1);
		return;
	}

	ip = he->ms.map->map_ip(he->ms.map, ip);
222
	err = symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
223

224
	pthread_mutex_unlock(&notes->lock);
225 226 227

	if (err == -ERANGE && !he->ms.map->erange_warned)
		ui__warn_map_erange(he->ms.map, sym, ip);
228 229
}

230
static void perf_top__show_details(struct perf_top *top)
231
{
232
	struct hist_entry *he = top->sym_filter_entry;
233
	struct annotation *notes;
234
	struct symbol *symbol;
235
	int more;
236

237
	if (!he)
238 239
		return;

240
	symbol = he->ms.sym;
241 242 243 244 245 246
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
247

248 249
	printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
250

251 252 253 254
	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
				       0, top->sym_pcnt_filter, top->print_entries, 4);
	if (top->zero)
		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
255
	else
256
		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
257
	if (more != 0)
258
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
259 260
out_unlock:
	pthread_mutex_unlock(&notes->lock);
261
}
262 263 264

static const char		CONSOLE_CLEAR[] = "";

265 266 267
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
						     struct addr_location *al,
						     struct perf_sample *sample)
268
{
269 270 271 272 273 274 275 276
	struct hist_entry *he;

	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
	if (he == NULL)
		return NULL;

	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
	return he;
277
}
278

279
static void perf_top__print_sym_table(struct perf_top *top)
280
{
281 282
	char bf[160];
	int printed = 0;
283
	const int win_width = top->winsize.ws_col - 1;
284

285
	puts(CONSOLE_CLEAR);
286

287
	perf_top__header_snprintf(top, bf, sizeof(bf));
288
	printf("%s\n", bf);
289

290
	perf_top__reset_sample_counters(top);
291

292
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
293

294 295 296 297
	if (top->sym_evsel->hists.stats.nr_lost_warned !=
	    top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
		top->sym_evsel->hists.stats.nr_lost_warned =
			top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
298 299
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
300
			      top->sym_evsel->hists.stats.nr_lost_warned);
301
		++printed;
302 303
	}

304 305
	if (top->sym_filter_entry) {
		perf_top__show_details(top);
306 307 308
		return;
	}

309 310 311 312 313 314 315
	hists__collapse_resort_threaded(&top->sym_evsel->hists);
	hists__output_resort_threaded(&top->sym_evsel->hists);
	hists__decay_entries_threaded(&top->sym_evsel->hists,
				      top->hide_user_symbols,
				      top->hide_kernel_symbols);
	hists__output_recalc_col_len(&top->sym_evsel->hists,
				     top->winsize.ws_row - 3);
316
	putchar('\n');
317 318
	hists__fprintf(&top->sym_evsel->hists, NULL, false, false,
		       top->winsize.ws_row - 4 - printed, win_width, stdout);
319 320
}

321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

356
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
357 358
{
	char *buf = malloc(0), *p;
359
	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
360
	struct rb_node *next;
361 362 363 364 365
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
366
		top->sym_filter_entry = NULL;
367 368 369 370 371 372 373 374 375 376
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

377
	next = rb_first(&top->sym_evsel->hists.entries);
378 379 380 381
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
382 383
			break;
		}
384
		next = rb_next(&n->rb_node);
385 386 387
	}

	if (!found) {
388
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
389 390
		sleep(1);
	} else
391
		perf_top__parse_source(top, found);
392 393 394 395 396

out_free:
	free(buf);
}

397
static void perf_top__print_mapped_keys(struct perf_top *top)
398
{
399 400
	char *name = NULL;

401 402
	if (top->sym_filter_entry) {
		struct symbol *sym = top->sym_filter_entry->ms.sym;
403 404 405 406
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
407 408
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
409

410 411
	if (top->evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top->sym_evsel));
412

413
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
414

415
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
416 417
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
418

419
	fprintf(stdout,
420
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
421
		top->hide_kernel_symbols ? "yes" : "no");
422 423
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
424 425
		top->hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
426 427 428
	fprintf(stdout, "\t[qQ]    quit.\n");
}

429
static int perf_top__key_mapped(struct perf_top *top, int c)
430 431 432 433 434 435 436 437
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
438 439
		case 'K':
		case 'U':
440 441 442
		case 'F':
		case 's':
		case 'S':
443 444
			return 1;
		case 'E':
445
			return top->evlist->nr_entries > 1 ? 1 : 0;
446 447
		default:
			break;
448 449 450
	}

	return 0;
451 452
}

453
static void perf_top__handle_keypress(struct perf_top *top, int c)
454
{
455
	if (!perf_top__key_mapped(top, c)) {
456 457 458
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

459
		perf_top__print_mapped_keys(top);
460 461 462 463 464 465 466 467 468 469 470 471 472 473
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
474
		if (!perf_top__key_mapped(top, c))
475 476 477
			return;
	}

478 479
	switch (c) {
		case 'd':
480 481 482
			prompt_integer(&top->delay_secs, "Enter display delay");
			if (top->delay_secs < 1)
				top->delay_secs = 1;
483 484
			break;
		case 'e':
485 486 487 488 489 490 491 492
			prompt_integer(&top->print_entries, "Enter display entries (lines)");
			if (top->print_entries == 0) {
				struct sigaction act = {
					.sa_sigaction = perf_top__sig_winch,
					.sa_flags     = SA_SIGINFO,
				};
				perf_top__sig_winch(SIGWINCH, NULL, top);
				sigaction(SIGWINCH, &act, NULL);
493 494
			} else {
				perf_top__sig_winch(SIGWINCH, NULL, top);
495
				signal(SIGWINCH, SIG_DFL);
496
			}
497 498
			break;
		case 'E':
499
			if (top->evlist->nr_entries > 1) {
500 501 502
				/* Select 0 as the default event: */
				int counter = 0;

503
				fprintf(stderr, "\nAvailable events:");
504

505 506
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
507

508
				prompt_integer(&counter, "Enter details event counter");
509

510 511 512
				if (counter >= top->evlist->nr_entries) {
					top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
513
					sleep(1);
514
					break;
515
				}
516 517
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					if (top->sym_evsel->idx == counter)
518
						break;
519
			} else
520
				top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
521 522
			break;
		case 'f':
523
			prompt_integer(&top->count_filter, "Enter display event count filter");
524 525
			break;
		case 'F':
526 527
			prompt_percent(&top->sym_pcnt_filter,
				       "Enter details display event filter (percent)");
528
			break;
529
		case 'K':
530
			top->hide_kernel_symbols = !top->hide_kernel_symbols;
531
			break;
532 533 534
		case 'q':
		case 'Q':
			printf("exiting.\n");
535 536
			if (top->dump_symtab)
				perf_session__fprintf_dsos(top->session, stderr);
537 538
			exit(0);
		case 's':
539
			perf_top__prompt_symbol(top, "Enter details symbol");
540 541
			break;
		case 'S':
542
			if (!top->sym_filter_entry)
543 544
				break;
			else {
545
				struct hist_entry *syme = top->sym_filter_entry;
546

547
				top->sym_filter_entry = NULL;
548 549 550
				__zero_source_counters(syme);
			}
			break;
551
		case 'U':
552
			top->hide_user_symbols = !top->hide_user_symbols;
553
			break;
554
		case 'z':
555
			top->zero = !top->zero;
556
			break;
557 558
		default:
			break;
559 560 561
	}
}

562 563 564 565 566 567 568 569 570 571
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
	perf_top__reset_sample_counters(t);

	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

	hists__collapse_resort_threaded(&t->sym_evsel->hists);
	hists__output_resort_threaded(&t->sym_evsel->hists);
572
	hists__decay_entries_threaded(&t->sym_evsel->hists,
573 574
				      t->hide_user_symbols,
				      t->hide_kernel_symbols);
575 576
}

577
static void *display_thread_tui(void *arg)
578
{
579
	struct perf_evsel *pos;
580
	struct perf_top *top = arg;
581 582
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";

583
	perf_top__sort_new_samples(top);
584 585 586 587 588 589 590

	/*
	 * Initialize the uid_filter_str, in the future the TUI will allow
	 * Zooming in/out UIDs. For now juse use whatever the user passed
	 * via --uid.
	 */
	list_for_each_entry(pos, &top->evlist->entries, node)
591
		pos->hists.uid_filter_str = top->target.uid_str;
592

593
	perf_evlist__tui_browse_hists(top->evlist, help,
594
				      perf_top__sort_new_samples,
595
				      top, top->delay_secs);
596

597 598 599 600 601
	exit_browser(0);
	exit(0);
	return NULL;
}

602
static void *display_thread(void *arg)
603
{
604
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
605
	struct termios tc, save;
606
	struct perf_top *top = arg;
607 608 609 610 611 612 613
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
614

615
	pthread__unblock_sigwinch();
616
repeat:
617
	delay_msecs = top->delay_secs * 1000;
618 619 620
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
621

622
	while (1) {
623
		perf_top__print_sym_table(top);
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
			/* Fall trhu */
		default:
			goto process_hotkey;
		}
	}
process_hotkey:
640 641 642
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

643
	perf_top__handle_keypress(top, c);
644
	goto repeat;
645 646 647 648

	return NULL;
}

649
/* Tag samples to be skipped. */
650
static const char *skip_symbols[] = {
651
	"intel_idle",
652
	"default_idle",
653
	"native_safe_halt",
654 655 656 657
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
658
	"mwait_idle_with_hints",
659
	"poll_idle",
660 661
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
662 663 664
	NULL
};

665
static int symbol_filter(struct map *map __used, struct symbol *sym)
666
{
667
	const char *name = sym->name;
668
	int i;
669

670 671 672 673 674 675 676
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

677 678 679 680 681 682 683
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
684 685
		return 1;

686 687
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
688
			sym->ignore = true;
689 690 691
			break;
		}
	}
692 693 694 695

	return 0;
}

696 697
static void perf_event__process_sample(struct perf_tool *tool,
				       const union perf_event *event,
698
				       struct perf_evsel *evsel,
699
				       struct perf_sample *sample,
700
				       struct machine *machine)
701
{
702
	struct perf_top *top = container_of(tool, struct perf_top, tool);
703
	struct symbol *parent = NULL;
704
	u64 ip = event->ip.ip;
705
	struct addr_location al;
706
	int err;
707

708
	if (!machine && perf_guest) {
709
		pr_err("Can't find guest [%d]'s kernel information\n",
710
			event->ip.pid);
711 712 713
		return;
	}

714 715 716 717 718 719
	if (!machine) {
		pr_err("%u unprocessable samples recorded.",
		       top->session->hists.stats.nr_unprocessable_samples++);
		return;
	}

720
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
721
		top->exact_samples++;
722

723
	if (perf_event__preprocess_sample(event, machine, &al, sample,
724
					  symbol_filter) < 0 ||
725
	    al.filtered)
726
		return;
727

728
	if (!top->kptr_restrict_warned &&
729 730 731 732 733 734 735 736 737 738
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
739
		top->kptr_restrict_warned = true;
740 741
	}

742
	if (al.sym == NULL) {
743
		const char *msg = "Kernel samples will not be resolved.\n";
744 745 746 747 748 749 750 751 752 753 754
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
755
		if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
756
		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
757
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
758 759 760 761 762 763 764 765 766 767
			if (symbol_conf.vmlinux_name) {
				ui__warning("The %s file can't be used.\n%s",
					    symbol_conf.vmlinux_name, msg);
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
768
			top->vmlinux_warned = true;
769
		}
770 771
	}

772 773
	if (al.sym == NULL || !al.sym->ignore) {
		struct hist_entry *he;
774

775 776
		if ((sort__has_parent || symbol_conf.use_callchain) &&
		    sample->callchain) {
777 778
			err = machine__resolve_callchain(machine, evsel, al.thread,
							 sample->callchain, &parent);
779 780 781 782
			if (err)
				return;
		}

783
		he = perf_evsel__add_hist_entry(evsel, &al, sample);
784 785 786
		if (he == NULL) {
			pr_err("Problem incrementing symbol period, skipping event\n");
			return;
787
		}
788

789
		if (symbol_conf.use_callchain) {
790
			err = callchain_append(he->callchain, &callchain_cursor,
791 792 793 794 795
					       sample->period);
			if (err)
				return;
		}

796 797
		if (top->sort_has_symbols)
			perf_top__record_precise_ip(top, he, evsel->idx, ip);
798
	}
799 800

	return;
801 802
}

803
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
804
{
805
	struct perf_sample sample;
806
	struct perf_evsel *evsel;
807
	struct perf_session *session = top->session;
808
	union perf_event *event;
809 810
	struct machine *machine;
	u8 origin;
811
	int ret;
812

813 814
	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
		ret = perf_session__parse_sample(session, event, &sample);
815 816 817 818
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
819

820
		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
821 822
		assert(evsel != NULL);

823 824
		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

825
		if (event->header.type == PERF_RECORD_SAMPLE)
826
			++top->samples;
827 828 829

		switch (origin) {
		case PERF_RECORD_MISC_USER:
830 831
			++top->us_samples;
			if (top->hide_user_symbols)
832
				continue;
833
			machine = perf_session__find_host_machine(session);
834 835
			break;
		case PERF_RECORD_MISC_KERNEL:
836 837
			++top->kernel_samples;
			if (top->hide_kernel_symbols)
838
				continue;
839
			machine = perf_session__find_host_machine(session);
840 841
			break;
		case PERF_RECORD_MISC_GUEST_KERNEL:
842 843
			++top->guest_kernel_samples;
			machine = perf_session__find_machine(session, event->ip.pid);
844 845
			break;
		case PERF_RECORD_MISC_GUEST_USER:
846
			++top->guest_us_samples;
847 848 849 850 851 852 853 854 855 856
			/*
			 * TODO: we don't process guest user from host side
			 * except simple counting.
			 */
			/* Fall thru */
		default:
			continue;
		}


857 858 859 860
		if (event->header.type == PERF_RECORD_SAMPLE) {
			perf_event__process_sample(&top->tool, event, evsel,
						   &sample, machine);
		} else if (event->header.type < PERF_RECORD_MAX) {
861
			hists__inc_nr_events(&evsel->hists, event->header.type);
862
			perf_event__process(&top->tool, event, &sample, machine);
863
		} else
864
			++session->hists.stats.nr_unknown_events;
865 866 867
	}
}

868
static void perf_top__mmap_read(struct perf_top *top)
869
{
870 871
	int i;

872 873
	for (i = 0; i < top->evlist->nr_mmaps; i++)
		perf_top__mmap_read_idx(top, i);
874 875
}

876
static void perf_top__start_counters(struct perf_top *top)
877
{
878
	struct perf_evsel *counter, *first;
879
	struct perf_evlist *evlist = top->evlist;
880 881

	first = list_entry(evlist->entries.next, struct perf_evsel, node);
882

883 884
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
885 886
		struct xyarray *group_fd = NULL;

887
		if (top->group && counter != first)
888
			group_fd = first->fd;
889

890 891
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

892
		if (top->freq) {
893 894
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
895
			attr->sample_freq = top->freq;
896
		}
897

898 899 900 901 902
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

903 904 905
		if (perf_target__has_cpu(&top->target))
			attr->sample_type |= PERF_SAMPLE_CPU;

906 907 908
		if (symbol_conf.use_callchain)
			attr->sample_type |= PERF_SAMPLE_CALLCHAIN;

909
		attr->mmap = 1;
910
		attr->comm = 1;
911
		attr->inherit = top->inherit;
912 913 914
fallback_missing_features:
		if (top->exclude_guest_missing)
			attr->exclude_guest = attr->exclude_host = 0;
915
retry_sample_id:
916
		attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
917
try_again:
918 919
		if (perf_evsel__open(counter, top->evlist->cpus,
				     top->evlist->threads, top->group,
920
				     group_fd) < 0) {
921 922
			int err = errno;

923
			if (err == EPERM || err == EACCES) {
924
				ui__error_paranoid();
925
				goto out_err;
926 927 928 929 930 931 932
			} else if (err == EINVAL) {
				if (!top->exclude_guest_missing &&
				    (attr->exclude_guest || attr->exclude_host)) {
					pr_debug("Old kernel, cannot exclude "
						 "guest or host samples.\n");
					top->exclude_guest_missing = true;
					goto fallback_missing_features;
933
				} else if (!top->sample_id_all_missing) {
934 935 936
					/*
					 * Old kernel, no attr->sample_id_type_all field
					 */
937
					top->sample_id_all_missing = true;
938 939
					goto retry_sample_id;
				}
940
			}
941 942 943 944 945
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
946 947
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
948
				if (verbose)
949 950
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
951 952 953

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
954 955
				if (counter->name) {
					free(counter->name);
956
					counter->name = NULL;
957
				}
958 959
				goto try_again;
			}
960

961
			if (err == ENOENT) {
962
				ui__error("The %s event is not supported.\n",
963 964
					    event_name(counter));
				goto out_err;
965
			} else if (err == EMFILE) {
966
				ui__error("Too many events are opened.\n"
967 968
					    "Try again after reducing the number of events\n");
				goto out_err;
969 970
			}

971
			ui__error("The sys_perf_event_open() syscall "
972 973 974 975 976
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
977
		}
978
	}
979

980
	if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
981
		ui__error("Failed to mmap with %d (%s)\n",
982 983 984 985 986 987 988 989 990
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
991 992
}

993
static int perf_top__setup_sample_type(struct perf_top *top)
994
{
995
	if (!top->sort_has_symbols) {
996
		if (symbol_conf.use_callchain) {
997
			ui__error("Selected -g but \"sym\" not present in --sort/-s.");
998 999
			return -EINVAL;
		}
1000
	} else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
1001
		if (callchain_register_param(&callchain_param) < 0) {
1002
			ui__error("Can't register callchain params.\n");
1003 1004 1005 1006 1007 1008 1009
			return -EINVAL;
		}
	}

	return 0;
}

1010
static int __cmd_top(struct perf_top *top)
1011 1012
{
	pthread_t thread;
1013
	int ret;
1014
	/*
1015 1016
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
1017
	 */
1018 1019
	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
	if (top->session == NULL)
1020
		return -ENOMEM;
1021

1022
	ret = perf_top__setup_sample_type(top);
1023 1024 1025
	if (ret)
		goto out_delete;

1026
	if (perf_target__has_task(&top->target))
1027
		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
1028
						  perf_event__process,
1029
						  &top->session->host_machine);
1030
	else
1031 1032 1033 1034 1035
		perf_event__synthesize_threads(&top->tool, perf_event__process,
					       &top->session->host_machine);
	perf_top__start_counters(top);
	top->session->evlist = top->evlist;
	perf_session__update_sample_type(top->session);
1036

1037
	/* Wait for a minimal set of events before starting the snapshot */
1038
	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1039

1040
	perf_top__mmap_read(top);
1041

1042
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
1043
							    display_thread), top)) {
1044
		ui__error("Could not create display thread.\n");
1045 1046 1047
		exit(-1);
	}

1048
	if (top->realtime_prio) {
1049 1050
		struct sched_param param;

1051
		param.sched_priority = top->realtime_prio;
1052
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1053
			ui__error("Could not set realtime priority.\n");
1054 1055 1056 1057 1058
			exit(-1);
		}
	}

	while (1) {
1059
		u64 hits = top->samples;
1060

1061
		perf_top__mmap_read(top);
1062

1063 1064
		if (hits == top->samples)
			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1065 1066
	}

1067
out_delete:
1068 1069
	perf_session__delete(top->session);
	top->session = NULL;
1070 1071 1072 1073 1074

	return 0;
}

static int
1075
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1076
{
1077
	struct perf_top *top = (struct perf_top *)opt->value;
1078 1079 1080 1081 1082 1083 1084
	char *tok, *tok2;
	char *endptr;

	/*
	 * --no-call-graph
	 */
	if (unset) {
1085
		top->dont_use_callchains = true;
1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
		return 0;
	}

	symbol_conf.use_callchain = true;

	if (!arg)
		return 0;

	tok = strtok((char *)arg, ",");
	if (!tok)
		return -1;

	/* get the output mode */
	if (!strncmp(tok, "graph", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_ABS;

	else if (!strncmp(tok, "flat", strlen(arg)))
		callchain_param.mode = CHAIN_FLAT;

	else if (!strncmp(tok, "fractal", strlen(arg)))
		callchain_param.mode = CHAIN_GRAPH_REL;

	else if (!strncmp(tok, "none", strlen(arg))) {
		callchain_param.mode = CHAIN_NONE;
		symbol_conf.use_callchain = false;

		return 0;
1113
	} else
1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
		return -1;

	/* get the min percentage */
	tok = strtok(NULL, ",");
	if (!tok)
		goto setup;

	callchain_param.min_percent = strtod(tok, &endptr);
	if (tok == endptr)
		return -1;

	/* get the print limit */
	tok2 = strtok(NULL, ",");
	if (!tok2)
		goto setup;

	if (tok2[0] != 'c') {
		callchain_param.print_limit = strtod(tok2, &endptr);
		tok2 = strtok(NULL, ",");
		if (!tok2)
			goto setup;
	}

	/* get the call chain order */
	if (!strcmp(tok2, "caller"))
		callchain_param.order = ORDER_CALLER;
	else if (!strcmp(tok2, "callee"))
		callchain_param.order = ORDER_CALLEE;
	else
		return -1;
setup:
	if (callchain_register_param(&callchain_param) < 0) {
		fprintf(stderr, "Can't register callchain params\n");
		return -1;
	}
1149 1150
	return 0;
}
1151 1152 1153 1154 1155 1156

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

1157 1158 1159
int cmd_top(int argc, const char **argv, const char *prefix __used)
{
	struct perf_evsel *pos;
1160 1161
	int status;
	char errbuf[BUFSIZ];
1162 1163 1164
	struct perf_top top = {
		.count_filter	     = 5,
		.delay_secs	     = 2,
1165
		.freq		     = 4000, /* 4 KHz */
1166 1167
		.mmap_pages	     = 128,
		.sym_pcnt_filter     = 5,
N
Namhyung Kim 已提交
1168 1169 1170
		.target		     = {
			.uses_mmap   = true,
		},
1171 1172 1173
	};
	char callchain_default_opt[] = "fractal,0.5,callee";
	const struct option options[] = {
1174
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1175
		     "event selector. use 'perf list' to list available events",
1176
		     parse_events_option),
1177
	OPT_INTEGER('c', "count", &top.default_interval,
1178
		    "event period to sample"),
1179
	OPT_STRING('p', "pid", &top.target.pid, "pid",
1180
		    "profile events on existing process id"),
1181
	OPT_STRING('t', "tid", &top.target.tid, "tid",
1182
		    "profile events on existing thread id"),
1183
	OPT_BOOLEAN('a', "all-cpus", &top.target.system_wide,
1184
			    "system-wide collection from all CPUs"),
1185
	OPT_STRING('C', "cpu", &top.target.cpu_list, "cpu",
1186
		    "list of cpus to monitor"),
1187 1188
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1189
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1190
		    "hide kernel symbols"),
1191 1192
	OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1193
		    "collect data with this RT SCHED_FIFO priority"),
1194
	OPT_INTEGER('d', "delay", &top.delay_secs,
1195
		    "number of seconds to delay between refreshes"),
1196
	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1197
			    "dump the symbol table used for profiling"),
1198
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1199
		    "only display functions with more events than this"),
1200
	OPT_BOOLEAN('g', "group", &top.group,
1201
			    "put the counters into a counter group"),
1202
	OPT_BOOLEAN('i', "inherit", &top.inherit,
1203
		    "child tasks inherit counters"),
1204
	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1205
		    "symbol to annotate"),
1206
	OPT_BOOLEAN('z', "zero", &top.zero,
1207
		    "zero history across updates"),
1208
	OPT_INTEGER('F', "freq", &top.freq,
1209
		    "profile at this frequency"),
1210
	OPT_INTEGER('E', "entries", &top.print_entries,
1211
		    "display this many functions"),
1212
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1213
		    "hide user symbols"),
1214 1215
	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1216
	OPT_INCR('v', "verbose", &verbose,
1217
		    "be more verbose (show counter open errors, etc)"),
1218 1219 1220 1221
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
		   "sort by key(s): pid, comm, dso, symbol, parent"),
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1222
	OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
1223 1224 1225
		     "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
		     "Default: fractal,0.5,callee", &parse_callchain_opt,
		     callchain_default_opt),
1226 1227 1228 1229 1230 1231 1232 1233
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1234 1235 1236 1237 1238 1239
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
		    "Interleave source code with assembly code (default)"),
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
		    "Display raw encoding of assembly instructions (default)"),
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1240
	OPT_STRING('u', "uid", &top.target.uid_str, "user", "user to profile"),
1241
	OPT_END()
1242
	};
1243

1244 1245
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1246 1247
		return -ENOMEM;

1248
	symbol_conf.exclude_other = false;
1249 1250 1251 1252 1253

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1254 1255 1256 1257 1258
	if (sort_order == default_sort_order)
		sort_order = "dso,symbol";

	setup_sorting(top_usage, options);

1259
	if (top.use_stdio)
1260
		use_browser = 0;
1261
	else if (top.use_tui)
1262 1263 1264 1265
		use_browser = 1;

	setup_browser(false);

1266 1267 1268 1269 1270 1271 1272 1273 1274
	status = perf_target__validate(&top.target);
	if (status) {
		perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
		ui__warning("%s", errbuf);
	}

	status = perf_target__parse_uid(&top.target);
	if (status) {
		int saved_errno = errno;
1275

1276
		perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
1277
		ui__error("%s", errbuf);
1278 1279

		status = -saved_errno;
1280
		goto out_delete_evlist;
1281
	}
1282

1283
	if (perf_target__none(&top.target))
1284 1285
		top.target.system_wide = true;

1286
	if (perf_evlist__create_maps(top.evlist, &top.target) < 0)
1287 1288
		usage_with_options(top_usage, options);

1289 1290
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1291
		ui__error("Not enough memory for event selector list\n");
1292 1293
		return -ENOMEM;
	}
1294

1295 1296
	symbol_conf.nr_events = top.evlist->nr_entries;

1297 1298
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1299

1300 1301 1302
	/*
	 * User specified count overrides default frequency.
	 */
1303
	if (top.default_interval)
1304 1305
		top.freq = 0;
	else if (top.freq) {
1306
		top.default_interval = top.freq;
1307
	} else {
1308
		ui__error("frequency and count are zero, aborting\n");
1309 1310 1311
		exit(EXIT_FAILURE);
	}

1312
	list_for_each_entry(pos, &top.evlist->entries, node) {
1313 1314 1315
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
1316 1317
		if (!pos->attr.sample_period)
			pos->attr.sample_period = top.default_interval;
1318 1319
	}

1320
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1321

1322
	symbol_conf.priv_size = sizeof(struct annotation);
1323 1324 1325 1326 1327

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1328 1329 1330 1331
	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);

1332 1333 1334 1335
	/*
	 * Avoid annotation data structures overhead when symbols aren't on the
	 * sort list.
	 */
1336
	top.sort_has_symbols = sort_sym.list.next != NULL;
1337

1338
	get_term_dimensions(&top.winsize);
1339
	if (top.print_entries == 0) {
1340 1341 1342 1343 1344 1345
		struct sigaction act = {
			.sa_sigaction = perf_top__sig_winch,
			.sa_flags     = SA_SIGINFO,
		};
		perf_top__update_print_entries(&top);
		sigaction(SIGWINCH, &act, NULL);
1346 1347
	}

1348
	status = __cmd_top(&top);
1349

1350
out_delete_evlist:
1351
	perf_evlist__delete(top.evlist);
1352 1353

	return status;
1354
}