builtin-top.c 32.5 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9 10 11 12 13 14 15 16 17 18
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
19
 */
20
#include "builtin.h"
21

22
#include "perf.h"
23

24
#include "util/annotate.h"
25
#include "util/cache.h"
26
#include "util/color.h"
27
#include "util/evlist.h"
28
#include "util/evsel.h"
29
#include "util/machine.h"
30 31
#include "util/session.h"
#include "util/symbol.h"
32
#include "util/thread.h"
33
#include "util/thread_map.h"
34
#include "util/top.h"
35
#include "util/util.h"
36
#include <linux/rbtree.h>
37 38
#include "util/parse-options.h"
#include "util/parse-events.h"
39
#include "util/cpumap.h"
40
#include "util/xyarray.h"
41
#include "util/sort.h"
42
#include "util/intlist.h"
43

44 45
#include "util/debug.h"

46
#include <assert.h>
47
#include <elf.h>
48
#include <fcntl.h>
49

50
#include <stdio.h>
51 52
#include <termios.h>
#include <unistd.h>
53
#include <inttypes.h>
54

55 56 57 58 59 60 61 62 63 64
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
65
#include <sys/utsname.h>
66 67 68 69 70
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

71
void get_term_dimensions(struct winsize *ws)
72
{
73 74 75 76 77 78 79 80 81 82
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
83
	}
84 85 86 87
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
88
#endif
89 90
	ws->ws_row = 25;
	ws->ws_col = 80;
91 92
}

93
static void perf_top__update_print_entries(struct perf_top *top)
94
{
95 96
	if (top->print_entries > 9)
		top->print_entries -= 9;
97 98
}

99 100
static void perf_top__sig_winch(int sig __maybe_unused,
				siginfo_t *info __maybe_unused, void *arg)
101
{
102 103 104
	struct perf_top *top = arg;

	get_term_dimensions(&top->winsize);
105 106 107 108 109 110 111
	if (!top->print_entries
	    || (top->print_entries+4) > top->winsize.ws_row) {
		top->print_entries = top->winsize.ws_row;
	} else {
		top->print_entries += 4;
		top->winsize.ws_row = top->print_entries;
	}
112
	perf_top__update_print_entries(top);
113 114
}

115
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
116 117
{
	struct symbol *sym;
118
	struct annotation *notes;
119
	struct map *map;
120
	int err = -1;
121

122
	if (!he || !he->ms.sym)
123 124
		return -1;

125 126
	sym = he->ms.sym;
	map = he->ms.map;
127 128 129 130

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
131
	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
132 133 134
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
135
		return -1;
136 137
	}

138 139 140
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
141 142 143
		goto out_assign;
	}

144
	pthread_mutex_lock(&notes->lock);
145

146
	if (symbol__alloc_hist(sym) < 0) {
147
		pthread_mutex_unlock(&notes->lock);
148 149
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
150
		sleep(1);
151
		return err;
152
	}
153

154
	err = symbol__annotate(sym, map, 0);
155
	if (err == 0) {
156
out_assign:
157
		top->sym_filter_entry = he;
158
	}
159

160
	pthread_mutex_unlock(&notes->lock);
161
	return err;
162 163
}

164
static void __zero_source_counters(struct hist_entry *he)
165
{
166
	struct symbol *sym = he->ms.sym;
167
	symbol__annotate_zero_histograms(sym);
168 169
}

170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
{
	struct utsname uts;
	int err = uname(&uts);

	ui__warning("Out of bounds address found:\n\n"
		    "Addr:   %" PRIx64 "\n"
		    "DSO:    %s %c\n"
		    "Map:    %" PRIx64 "-%" PRIx64 "\n"
		    "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
		    "Arch:   %s\n"
		    "Kernel: %s\n"
		    "Tools:  %s\n\n"
		    "Not all samples will be on the annotation output.\n\n"
		    "Please report to linux-kernel@vger.kernel.org\n",
		    ip, map->dso->long_name, dso__symtab_origin(map->dso),
		    map->start, map->end, sym->start, sym->end,
		    sym->binding == STB_GLOBAL ? 'g' :
		    sym->binding == STB_LOCAL  ? 'l' : 'w', sym->name,
		    err ? "[unknown]" : uts.machine,
		    err ? "[unknown]" : uts.release, perf_version_string);
	if (use_browser <= 0)
		sleep(5);
	
	map->erange_warned = true;
}

197 198 199
static void perf_top__record_precise_ip(struct perf_top *top,
					struct hist_entry *he,
					int counter, u64 ip)
200
{
201 202
	struct annotation *notes;
	struct symbol *sym;
203
	int err;
204

205
	if (he == NULL || he->ms.sym == NULL ||
206 207
	    ((top->sym_filter_entry == NULL ||
	      top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
208 209
		return;

210
	sym = he->ms.sym;
211 212 213
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
214 215
		return;

216
	if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
217 218 219 220 221 222 223 224
		pthread_mutex_unlock(&notes->lock);
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
		sleep(1);
		return;
	}

	ip = he->ms.map->map_ip(he->ms.map, ip);
225
	err = symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
226

227
	pthread_mutex_unlock(&notes->lock);
228 229 230

	if (err == -ERANGE && !he->ms.map->erange_warned)
		ui__warn_map_erange(he->ms.map, sym, ip);
231 232
}

233
static void perf_top__show_details(struct perf_top *top)
234
{
235
	struct hist_entry *he = top->sym_filter_entry;
236
	struct annotation *notes;
237
	struct symbol *symbol;
238
	int more;
239

240
	if (!he)
241 242
		return;

243
	symbol = he->ms.sym;
244 245 246 247 248 249
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
250

251
	printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
252
	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
253

254 255 256 257
	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
				       0, top->sym_pcnt_filter, top->print_entries, 4);
	if (top->zero)
		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
258
	else
259
		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
260
	if (more != 0)
261
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
262 263
out_unlock:
	pthread_mutex_unlock(&notes->lock);
264
}
265 266 267

static const char		CONSOLE_CLEAR[] = "";

268 269 270
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
						     struct addr_location *al,
						     struct perf_sample *sample)
271
{
272 273 274 275 276 277 278 279
	struct hist_entry *he;

	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
	if (he == NULL)
		return NULL;

	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
	return he;
280
}
281

282
static void perf_top__print_sym_table(struct perf_top *top)
283
{
284 285
	char bf[160];
	int printed = 0;
286
	const int win_width = top->winsize.ws_col - 1;
287

288
	puts(CONSOLE_CLEAR);
289

290
	perf_top__header_snprintf(top, bf, sizeof(bf));
291
	printf("%s\n", bf);
292

293
	perf_top__reset_sample_counters(top);
294

295
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
296

297 298 299 300
	if (top->sym_evsel->hists.stats.nr_lost_warned !=
	    top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
		top->sym_evsel->hists.stats.nr_lost_warned =
			top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
301 302
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
303
			      top->sym_evsel->hists.stats.nr_lost_warned);
304
		++printed;
305 306
	}

307 308
	if (top->sym_filter_entry) {
		perf_top__show_details(top);
309 310 311
		return;
	}

312 313 314 315 316 317 318
	hists__collapse_resort_threaded(&top->sym_evsel->hists);
	hists__output_resort_threaded(&top->sym_evsel->hists);
	hists__decay_entries_threaded(&top->sym_evsel->hists,
				      top->hide_user_symbols,
				      top->hide_kernel_symbols);
	hists__output_recalc_col_len(&top->sym_evsel->hists,
				     top->winsize.ws_row - 3);
319
	putchar('\n');
320
	hists__fprintf(&top->sym_evsel->hists, false,
321
		       top->winsize.ws_row - 4 - printed, win_width, stdout);
322 323
}

324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

359
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
360 361
{
	char *buf = malloc(0), *p;
362
	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
363
	struct rb_node *next;
364 365 366 367 368
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
369
		top->sym_filter_entry = NULL;
370 371 372 373 374 375 376 377 378 379
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

380
	next = rb_first(&top->sym_evsel->hists.entries);
381 382 383 384
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
385 386
			break;
		}
387
		next = rb_next(&n->rb_node);
388 389 390
	}

	if (!found) {
391
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
392 393
		sleep(1);
	} else
394
		perf_top__parse_source(top, found);
395 396 397 398 399

out_free:
	free(buf);
}

400
static void perf_top__print_mapped_keys(struct perf_top *top)
401
{
402 403
	char *name = NULL;

404 405
	if (top->sym_filter_entry) {
		struct symbol *sym = top->sym_filter_entry->ms.sym;
406 407 408 409
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
410 411
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
412

413
	if (top->evlist->nr_entries > 1)
414
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", perf_evsel__name(top->sym_evsel));
415

416
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
417

418
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
419 420
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
421

422
	fprintf(stdout,
423
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
424
		top->hide_kernel_symbols ? "yes" : "no");
425 426
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
427 428
		top->hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
429 430 431
	fprintf(stdout, "\t[qQ]    quit.\n");
}

432
static int perf_top__key_mapped(struct perf_top *top, int c)
433 434 435 436 437 438 439 440
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
441 442
		case 'K':
		case 'U':
443 444 445
		case 'F':
		case 's':
		case 'S':
446 447
			return 1;
		case 'E':
448
			return top->evlist->nr_entries > 1 ? 1 : 0;
449 450
		default:
			break;
451 452 453
	}

	return 0;
454 455
}

456
static void perf_top__handle_keypress(struct perf_top *top, int c)
457
{
458
	if (!perf_top__key_mapped(top, c)) {
459 460 461
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

462
		perf_top__print_mapped_keys(top);
463 464 465 466 467 468 469 470 471 472 473 474 475 476
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
477
		if (!perf_top__key_mapped(top, c))
478 479 480
			return;
	}

481 482
	switch (c) {
		case 'd':
483 484 485
			prompt_integer(&top->delay_secs, "Enter display delay");
			if (top->delay_secs < 1)
				top->delay_secs = 1;
486 487
			break;
		case 'e':
488 489 490 491 492 493 494 495
			prompt_integer(&top->print_entries, "Enter display entries (lines)");
			if (top->print_entries == 0) {
				struct sigaction act = {
					.sa_sigaction = perf_top__sig_winch,
					.sa_flags     = SA_SIGINFO,
				};
				perf_top__sig_winch(SIGWINCH, NULL, top);
				sigaction(SIGWINCH, &act, NULL);
496 497
			} else {
				perf_top__sig_winch(SIGWINCH, NULL, top);
498
				signal(SIGWINCH, SIG_DFL);
499
			}
500 501
			break;
		case 'E':
502
			if (top->evlist->nr_entries > 1) {
503 504 505
				/* Select 0 as the default event: */
				int counter = 0;

506
				fprintf(stderr, "\nAvailable events:");
507

508
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
509
					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
510

511
				prompt_integer(&counter, "Enter details event counter");
512

513
				if (counter >= top->evlist->nr_entries) {
514
					top->sym_evsel = perf_evlist__first(top->evlist);
515
					fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
516
					sleep(1);
517
					break;
518
				}
519 520
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					if (top->sym_evsel->idx == counter)
521
						break;
522
			} else
523
				top->sym_evsel = perf_evlist__first(top->evlist);
524 525
			break;
		case 'f':
526
			prompt_integer(&top->count_filter, "Enter display event count filter");
527 528
			break;
		case 'F':
529 530
			prompt_percent(&top->sym_pcnt_filter,
				       "Enter details display event filter (percent)");
531
			break;
532
		case 'K':
533
			top->hide_kernel_symbols = !top->hide_kernel_symbols;
534
			break;
535 536 537
		case 'q':
		case 'Q':
			printf("exiting.\n");
538 539
			if (top->dump_symtab)
				perf_session__fprintf_dsos(top->session, stderr);
540 541
			exit(0);
		case 's':
542
			perf_top__prompt_symbol(top, "Enter details symbol");
543 544
			break;
		case 'S':
545
			if (!top->sym_filter_entry)
546 547
				break;
			else {
548
				struct hist_entry *syme = top->sym_filter_entry;
549

550
				top->sym_filter_entry = NULL;
551 552 553
				__zero_source_counters(syme);
			}
			break;
554
		case 'U':
555
			top->hide_user_symbols = !top->hide_user_symbols;
556
			break;
557
		case 'z':
558
			top->zero = !top->zero;
559
			break;
560 561
		default:
			break;
562 563 564
	}
}

565 566 567 568 569 570 571 572 573 574
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
	perf_top__reset_sample_counters(t);

	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

	hists__collapse_resort_threaded(&t->sym_evsel->hists);
	hists__output_resort_threaded(&t->sym_evsel->hists);
575
	hists__decay_entries_threaded(&t->sym_evsel->hists,
576 577
				      t->hide_user_symbols,
				      t->hide_kernel_symbols);
578 579
}

580
static void *display_thread_tui(void *arg)
581
{
582
	struct perf_evsel *pos;
583
	struct perf_top *top = arg;
584
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";
585 586 587 588 589
	struct hist_browser_timer hbt = {
		.timer		= perf_top__sort_new_samples,
		.arg		= top,
		.refresh	= top->delay_secs,
	};
590

591
	perf_top__sort_new_samples(top);
592 593 594 595 596 597 598

	/*
	 * Initialize the uid_filter_str, in the future the TUI will allow
	 * Zooming in/out UIDs. For now juse use whatever the user passed
	 * via --uid.
	 */
	list_for_each_entry(pos, &top->evlist->entries, node)
599
		pos->hists.uid_filter_str = top->record_opts.target.uid_str;
600

601 602
	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
				      &top->session->header.env);
603

604 605 606 607 608
	exit_browser(0);
	exit(0);
	return NULL;
}

609
static void *display_thread(void *arg)
610
{
611
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
612
	struct termios tc, save;
613
	struct perf_top *top = arg;
614 615 616 617 618 619 620
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
621

622
	pthread__unblock_sigwinch();
623
repeat:
624
	delay_msecs = top->delay_secs * 1000;
625 626 627
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
628

629
	while (1) {
630
		perf_top__print_sym_table(top);
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
			/* Fall trhu */
		default:
			goto process_hotkey;
		}
	}
process_hotkey:
647 648 649
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

650
	perf_top__handle_keypress(top, c);
651
	goto repeat;
652 653 654 655

	return NULL;
}

656
/* Tag samples to be skipped. */
657
static const char *skip_symbols[] = {
658
	"intel_idle",
659
	"default_idle",
660
	"native_safe_halt",
661 662 663 664
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
665
	"mwait_idle_with_hints",
666
	"poll_idle",
667 668
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
669 670 671
	NULL
};

672
static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
673
{
674
	const char *name = sym->name;
675
	int i;
676

677 678 679 680 681 682 683
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

684 685 686 687 688 689 690
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
691 692
		return 1;

693 694
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
695
			sym->ignore = true;
696 697 698
			break;
		}
	}
699 700 701 702

	return 0;
}

703 704
static void perf_event__process_sample(struct perf_tool *tool,
				       const union perf_event *event,
705
				       struct perf_evsel *evsel,
706
				       struct perf_sample *sample,
707
				       struct machine *machine)
708
{
709
	struct perf_top *top = container_of(tool, struct perf_top, tool);
710
	struct symbol *parent = NULL;
711
	u64 ip = event->ip.ip;
712
	struct addr_location al;
713
	int err;
714

715
	if (!machine && perf_guest) {
716 717 718 719 720 721 722 723 724 725
		static struct intlist *seen;

		if (!seen)
			seen = intlist__new();

		if (!intlist__has_entry(seen, event->ip.pid)) {
			pr_err("Can't find guest [%d]'s kernel information\n",
				event->ip.pid);
			intlist__add(seen, event->ip.pid);
		}
726 727 728
		return;
	}

729
	if (!machine) {
730
		pr_err("%u unprocessable samples recorded.\n",
731 732 733 734
		       top->session->hists.stats.nr_unprocessable_samples++);
		return;
	}

735
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
736
		top->exact_samples++;
737

738
	if (perf_event__preprocess_sample(event, machine, &al, sample,
739
					  symbol_filter) < 0 ||
740
	    al.filtered)
741
		return;
742

743
	if (!top->kptr_restrict_warned &&
744 745 746 747 748 749 750 751 752 753
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
754
		top->kptr_restrict_warned = true;
755 756
	}

757
	if (al.sym == NULL) {
758
		const char *msg = "Kernel samples will not be resolved.\n";
759 760 761 762 763 764 765 766 767 768 769
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
770
		if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
771
		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
772
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
773 774 775 776 777 778 779 780 781 782
			if (symbol_conf.vmlinux_name) {
				ui__warning("The %s file can't be used.\n%s",
					    symbol_conf.vmlinux_name, msg);
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
783
			top->vmlinux_warned = true;
784
		}
785 786
	}

787 788
	if (al.sym == NULL || !al.sym->ignore) {
		struct hist_entry *he;
789

790 791
		if ((sort__has_parent || symbol_conf.use_callchain) &&
		    sample->callchain) {
792 793 794 795
			err = machine__resolve_callchain(machine, evsel,
							 al.thread, sample,
							 &parent);

796 797 798 799
			if (err)
				return;
		}

800
		he = perf_evsel__add_hist_entry(evsel, &al, sample);
801 802 803
		if (he == NULL) {
			pr_err("Problem incrementing symbol period, skipping event\n");
			return;
804
		}
805

806
		if (symbol_conf.use_callchain) {
807
			err = callchain_append(he->callchain, &callchain_cursor,
808 809 810 811 812
					       sample->period);
			if (err)
				return;
		}

813 814
		if (top->sort_has_symbols)
			perf_top__record_precise_ip(top, he, evsel->idx, ip);
815
	}
816 817

	return;
818 819
}

820
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
821
{
822
	struct perf_sample sample;
823
	struct perf_evsel *evsel;
824
	struct perf_session *session = top->session;
825
	union perf_event *event;
826 827
	struct machine *machine;
	u8 origin;
828
	int ret;
829

830
	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
831
		ret = perf_evlist__parse_sample(top->evlist, event, &sample);
832 833 834 835
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
836

837
		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
838 839
		assert(evsel != NULL);

840 841
		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

842
		if (event->header.type == PERF_RECORD_SAMPLE)
843
			++top->samples;
844 845 846

		switch (origin) {
		case PERF_RECORD_MISC_USER:
847 848
			++top->us_samples;
			if (top->hide_user_symbols)
849
				continue;
850
			machine = perf_session__find_host_machine(session);
851 852
			break;
		case PERF_RECORD_MISC_KERNEL:
853 854
			++top->kernel_samples;
			if (top->hide_kernel_symbols)
855
				continue;
856
			machine = perf_session__find_host_machine(session);
857 858
			break;
		case PERF_RECORD_MISC_GUEST_KERNEL:
859 860
			++top->guest_kernel_samples;
			machine = perf_session__find_machine(session, event->ip.pid);
861 862
			break;
		case PERF_RECORD_MISC_GUEST_USER:
863
			++top->guest_us_samples;
864 865 866 867 868 869 870 871 872 873
			/*
			 * TODO: we don't process guest user from host side
			 * except simple counting.
			 */
			/* Fall thru */
		default:
			continue;
		}


874 875 876 877
		if (event->header.type == PERF_RECORD_SAMPLE) {
			perf_event__process_sample(&top->tool, event, evsel,
						   &sample, machine);
		} else if (event->header.type < PERF_RECORD_MAX) {
878
			hists__inc_nr_events(&evsel->hists, event->header.type);
879
			machine__process_event(machine, event);
880
		} else
881
			++session->hists.stats.nr_unknown_events;
882 883 884
	}
}

885
static void perf_top__mmap_read(struct perf_top *top)
886
{
887 888
	int i;

889 890
	for (i = 0; i < top->evlist->nr_mmaps; i++)
		perf_top__mmap_read_idx(top, i);
891 892
}

893
static void perf_top__start_counters(struct perf_top *top)
894
{
895
	struct perf_evsel *counter;
896
	struct perf_evlist *evlist = top->evlist;
897
	struct perf_record_opts *opts = &top->record_opts;
898

899
	perf_evlist__config(evlist, opts);
900

901 902 903
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
try_again:
904
		if (perf_evsel__open(counter, top->evlist->cpus,
905
				     top->evlist->threads) < 0) {
906 907
			int err = errno;

908
			if (err == EPERM || err == EACCES) {
909
				ui__error_paranoid();
910 911
				goto out_err;
			}
912 913 914 915 916
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
917 918 919 920
			if ((err == ENOENT || err == ENXIO) &&
			    (attr->type == PERF_TYPE_HARDWARE) &&
			    (attr->config == PERF_COUNT_HW_CPU_CYCLES)) {

921
				if (verbose)
922 923
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
924 925 926

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
927 928
				if (counter->name) {
					free(counter->name);
929
					counter->name = NULL;
930
				}
931 932
				goto try_again;
			}
933

934
			if (err == ENOENT) {
935
				ui__error("The %s event is not supported.\n",
936
					  perf_evsel__name(counter));
937
				goto out_err;
938
			} else if (err == EMFILE) {
939
				ui__error("Too many events are opened.\n"
940 941
					    "Try again after reducing the number of events\n");
				goto out_err;
942 943 944 945
			} else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
				ui__error("\'precise\' request may not be supported. "
					  "Try removing 'p' modifier\n");
				goto out_err;
946 947
			}

948
			ui__error("The sys_perf_event_open() syscall "
949 950 951 952 953
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
954
		}
955
	}
956

957
	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
958
		ui__error("Failed to mmap with %d (%s)\n",
959 960 961 962 963 964 965 966 967
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
968 969
}

970
static int perf_top__setup_sample_type(struct perf_top *top)
971
{
972
	if (!top->sort_has_symbols) {
973
		if (symbol_conf.use_callchain) {
974
			ui__error("Selected -g but \"sym\" not present in --sort/-s.");
975 976
			return -EINVAL;
		}
977
	} else if (callchain_param.mode != CHAIN_NONE) {
978
		if (callchain_register_param(&callchain_param) < 0) {
979
			ui__error("Can't register callchain params.\n");
980 981 982 983 984 985 986
			return -EINVAL;
		}
	}

	return 0;
}

987
static int __cmd_top(struct perf_top *top)
988
{
989
	struct perf_record_opts *opts = &top->record_opts;
990
	pthread_t thread;
991
	int ret;
992
	/*
993 994
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
995
	 */
996 997
	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
	if (top->session == NULL)
998
		return -ENOMEM;
999

1000
	ret = perf_top__setup_sample_type(top);
1001 1002 1003
	if (ret)
		goto out_delete;

1004
	if (perf_target__has_task(&opts->target))
1005
		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
1006
						  perf_event__process,
1007
						  &top->session->host_machine);
1008
	else
1009 1010 1011 1012
		perf_event__synthesize_threads(&top->tool, perf_event__process,
					       &top->session->host_machine);
	perf_top__start_counters(top);
	top->session->evlist = top->evlist;
1013
	perf_session__set_id_hdr_size(top->session);
1014

1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
	/*
	 * When perf is starting the traced process, all the events (apart from
	 * group members) have enable_on_exec=1 set, so don't spoil it by
	 * prematurely enabling them.
	 *
	 * XXX 'top' still doesn't start workloads like record, trace, but should,
	 * so leave the check here.
	 */
        if (!perf_target__none(&opts->target))
                perf_evlist__enable(top->evlist);

1026
	/* Wait for a minimal set of events before starting the snapshot */
1027
	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1028

1029
	perf_top__mmap_read(top);
1030

1031
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
1032
							    display_thread), top)) {
1033
		ui__error("Could not create display thread.\n");
1034 1035 1036
		exit(-1);
	}

1037
	if (top->realtime_prio) {
1038 1039
		struct sched_param param;

1040
		param.sched_priority = top->realtime_prio;
1041
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1042
			ui__error("Could not set realtime priority.\n");
1043 1044 1045 1046 1047
			exit(-1);
		}
	}

	while (1) {
1048
		u64 hits = top->samples;
1049

1050
		perf_top__mmap_read(top);
1051

1052 1053
		if (hits == top->samples)
			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1054 1055
	}

1056
out_delete:
1057 1058
	perf_session__delete(top->session);
	top->session = NULL;
1059 1060 1061 1062 1063

	return 0;
}

static int
1064
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1065 1066 1067 1068
{
	/*
	 * --no-call-graph
	 */
1069
	if (unset)
1070 1071 1072 1073
		return 0;

	symbol_conf.use_callchain = true;

1074
	return record_parse_callchain_opt(opt, arg, unset);
1075
}
1076

1077
int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1078
{
1079 1080
	int status;
	char errbuf[BUFSIZ];
1081 1082 1083
	struct perf_top top = {
		.count_filter	     = 5,
		.delay_secs	     = 2,
1084 1085 1086 1087 1088 1089 1090 1091
		.record_opts = {
			.mmap_pages	= UINT_MAX,
			.user_freq	= UINT_MAX,
			.user_interval	= ULLONG_MAX,
			.freq		= 4000, /* 4 KHz */
			.target		     = {
				.uses_mmap   = true,
			},
N
Namhyung Kim 已提交
1092
		},
1093
		.sym_pcnt_filter     = 5,
1094
	};
1095 1096
	struct perf_record_opts *opts = &top.record_opts;
	struct perf_target *target = &opts->target;
1097
	const struct option options[] = {
1098
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1099
		     "event selector. use 'perf list' to list available events",
1100
		     parse_events_option),
1101 1102
	OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
	OPT_STRING('p', "pid", &target->pid, "pid",
1103
		    "profile events on existing process id"),
1104
	OPT_STRING('t', "tid", &target->tid, "tid",
1105
		    "profile events on existing thread id"),
1106
	OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
1107
			    "system-wide collection from all CPUs"),
1108
	OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
1109
		    "list of cpus to monitor"),
1110 1111
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1112
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1113
		    "hide kernel symbols"),
1114 1115
	OPT_UINTEGER('m', "mmap-pages", &opts->mmap_pages,
		     "number of mmap data pages"),
1116
	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1117
		    "collect data with this RT SCHED_FIFO priority"),
1118
	OPT_INTEGER('d', "delay", &top.delay_secs,
1119
		    "number of seconds to delay between refreshes"),
1120
	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1121
			    "dump the symbol table used for profiling"),
1122
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1123
		    "only display functions with more events than this"),
1124
	OPT_BOOLEAN('g', "group", &opts->group,
1125
			    "put the counters into a counter group"),
1126 1127
	OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
		    "child tasks do not inherit counters"),
1128
	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1129
		    "symbol to annotate"),
1130 1131
	OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
	OPT_UINTEGER('F', "freq", &opts->user_freq, "profile at this frequency"),
1132
	OPT_INTEGER('E', "entries", &top.print_entries,
1133
		    "display this many functions"),
1134
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1135
		    "hide user symbols"),
1136 1137
	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1138
	OPT_INCR('v', "verbose", &verbose,
1139
		    "be more verbose (show counter open errors, etc)"),
1140 1141 1142 1143
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
		   "sort by key(s): pid, comm, dso, symbol, parent"),
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1144 1145 1146
	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
			     "mode[,dump_size]", record_callchain_help,
			     &parse_callchain_opt, "fp"),
1147 1148 1149 1150 1151 1152 1153 1154
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1155 1156 1157 1158 1159 1160
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
		    "Interleave source code with assembly code (default)"),
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
		    "Display raw encoding of assembly instructions (default)"),
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1161
	OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
1162
	OPT_END()
1163
	};
1164 1165 1166 1167
	const char * const top_usage[] = {
		"perf top [<options>]",
		NULL
	};
1168

1169 1170
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1171 1172
		return -ENOMEM;

1173
	symbol_conf.exclude_other = false;
1174 1175 1176 1177 1178

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1179 1180 1181 1182 1183
	if (sort_order == default_sort_order)
		sort_order = "dso,symbol";

	setup_sorting(top_usage, options);

1184
	if (top.use_stdio)
1185
		use_browser = 0;
1186
	else if (top.use_tui)
1187 1188 1189 1190
		use_browser = 1;

	setup_browser(false);

1191
	status = perf_target__validate(target);
1192
	if (status) {
1193
		perf_target__strerror(target, status, errbuf, BUFSIZ);
1194 1195 1196
		ui__warning("%s", errbuf);
	}

1197
	status = perf_target__parse_uid(target);
1198 1199
	if (status) {
		int saved_errno = errno;
1200

1201
		perf_target__strerror(target, status, errbuf, BUFSIZ);
1202
		ui__error("%s", errbuf);
1203 1204

		status = -saved_errno;
1205
		goto out_delete_evlist;
1206
	}
1207

1208 1209
	if (perf_target__none(target))
		target->system_wide = true;
1210

1211
	if (perf_evlist__create_maps(top.evlist, target) < 0)
1212 1213
		usage_with_options(top_usage, options);

1214 1215
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1216
		ui__error("Not enough memory for event selector list\n");
1217 1218
		return -ENOMEM;
	}
1219

1220 1221
	symbol_conf.nr_events = top.evlist->nr_entries;

1222 1223
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1224

1225 1226 1227 1228 1229
	if (opts->user_interval != ULLONG_MAX)
		opts->default_interval = opts->user_interval;
	if (opts->user_freq != UINT_MAX)
		opts->freq = opts->user_freq;

1230 1231 1232
	/*
	 * User specified count overrides default frequency.
	 */
1233 1234 1235 1236
	if (opts->default_interval)
		opts->freq = 0;
	else if (opts->freq) {
		opts->default_interval = opts->freq;
1237
	} else {
1238
		ui__error("frequency and count are zero, aborting\n");
1239 1240
		status = -EINVAL;
		goto out_delete_evlist;
1241 1242
	}

1243
	top.sym_evsel = perf_evlist__first(top.evlist);
1244

1245
	symbol_conf.priv_size = sizeof(struct annotation);
1246 1247 1248 1249 1250

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1251 1252 1253 1254
	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);

1255 1256 1257 1258
	/*
	 * Avoid annotation data structures overhead when symbols aren't on the
	 * sort list.
	 */
1259
	top.sort_has_symbols = sort_sym.list.next != NULL;
1260

1261
	get_term_dimensions(&top.winsize);
1262
	if (top.print_entries == 0) {
1263 1264 1265 1266 1267 1268
		struct sigaction act = {
			.sa_sigaction = perf_top__sig_winch,
			.sa_flags     = SA_SIGINFO,
		};
		perf_top__update_print_entries(&top);
		sigaction(SIGWINCH, &act, NULL);
1269 1270
	}

1271
	status = __cmd_top(&top);
1272

1273
out_delete_evlist:
1274
	perf_evlist__delete(top.evlist);
1275 1276

	return status;
1277
}