builtin-top.c 33.2 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9 10 11 12 13 14 15 16 17 18
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
19
 */
20
#include "builtin.h"
21

22
#include "perf.h"
23

24
#include "util/annotate.h"
25
#include "util/cache.h"
26
#include "util/color.h"
27
#include "util/evlist.h"
28
#include "util/evsel.h"
29
#include "util/machine.h"
30 31
#include "util/session.h"
#include "util/symbol.h"
32
#include "util/thread.h"
33
#include "util/thread_map.h"
34
#include "util/top.h"
35
#include "util/util.h"
36
#include <linux/rbtree.h>
37 38
#include "util/parse-options.h"
#include "util/parse-events.h"
39
#include "util/cpumap.h"
40
#include "util/xyarray.h"
41
#include "util/sort.h"
42
#include "util/intlist.h"
43

44 45
#include "util/debug.h"

46
#include <assert.h>
47
#include <elf.h>
48
#include <fcntl.h>
49

50
#include <stdio.h>
51 52
#include <termios.h>
#include <unistd.h>
53
#include <inttypes.h>
54

55 56 57 58 59 60 61 62 63 64
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
65
#include <sys/utsname.h>
66 67 68 69 70
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

71
void get_term_dimensions(struct winsize *ws)
72
{
73 74 75 76 77 78 79 80 81 82
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
83
	}
84 85 86 87
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
88
#endif
89 90
	ws->ws_row = 25;
	ws->ws_col = 80;
91 92
}

93
static void perf_top__update_print_entries(struct perf_top *top)
94
{
95 96
	if (top->print_entries > 9)
		top->print_entries -= 9;
97 98
}

99 100
static void perf_top__sig_winch(int sig __maybe_unused,
				siginfo_t *info __maybe_unused, void *arg)
101
{
102 103 104
	struct perf_top *top = arg;

	get_term_dimensions(&top->winsize);
105 106 107 108 109 110 111
	if (!top->print_entries
	    || (top->print_entries+4) > top->winsize.ws_row) {
		top->print_entries = top->winsize.ws_row;
	} else {
		top->print_entries += 4;
		top->winsize.ws_row = top->print_entries;
	}
112
	perf_top__update_print_entries(top);
113 114
}

115
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
116 117
{
	struct symbol *sym;
118
	struct annotation *notes;
119
	struct map *map;
120
	int err = -1;
121

122
	if (!he || !he->ms.sym)
123 124
		return -1;

125 126
	sym = he->ms.sym;
	map = he->ms.map;
127 128 129 130

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
131
	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
132 133 134
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
135
		return -1;
136 137
	}

138 139 140
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
141 142 143
		goto out_assign;
	}

144
	pthread_mutex_lock(&notes->lock);
145

146
	if (symbol__alloc_hist(sym) < 0) {
147
		pthread_mutex_unlock(&notes->lock);
148 149
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
150
		sleep(1);
151
		return err;
152
	}
153

154
	err = symbol__annotate(sym, map, 0);
155
	if (err == 0) {
156
out_assign:
157
		top->sym_filter_entry = he;
158
	}
159

160
	pthread_mutex_unlock(&notes->lock);
161
	return err;
162 163
}

164
static void __zero_source_counters(struct hist_entry *he)
165
{
166
	struct symbol *sym = he->ms.sym;
167
	symbol__annotate_zero_histograms(sym);
168 169
}

170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
{
	struct utsname uts;
	int err = uname(&uts);

	ui__warning("Out of bounds address found:\n\n"
		    "Addr:   %" PRIx64 "\n"
		    "DSO:    %s %c\n"
		    "Map:    %" PRIx64 "-%" PRIx64 "\n"
		    "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
		    "Arch:   %s\n"
		    "Kernel: %s\n"
		    "Tools:  %s\n\n"
		    "Not all samples will be on the annotation output.\n\n"
		    "Please report to linux-kernel@vger.kernel.org\n",
		    ip, map->dso->long_name, dso__symtab_origin(map->dso),
		    map->start, map->end, sym->start, sym->end,
		    sym->binding == STB_GLOBAL ? 'g' :
		    sym->binding == STB_LOCAL  ? 'l' : 'w', sym->name,
		    err ? "[unknown]" : uts.machine,
		    err ? "[unknown]" : uts.release, perf_version_string);
	if (use_browser <= 0)
		sleep(5);
	
	map->erange_warned = true;
}

197 198 199
static void perf_top__record_precise_ip(struct perf_top *top,
					struct hist_entry *he,
					int counter, u64 ip)
200
{
201 202
	struct annotation *notes;
	struct symbol *sym;
203
	int err;
204

205
	if (he == NULL || he->ms.sym == NULL ||
206 207
	    ((top->sym_filter_entry == NULL ||
	      top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
208 209
		return;

210
	sym = he->ms.sym;
211 212 213
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
214 215
		return;

216
	if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
217 218 219 220 221 222 223 224
		pthread_mutex_unlock(&notes->lock);
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
		sleep(1);
		return;
	}

	ip = he->ms.map->map_ip(he->ms.map, ip);
225
	err = symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
226

227
	pthread_mutex_unlock(&notes->lock);
228 229 230

	if (err == -ERANGE && !he->ms.map->erange_warned)
		ui__warn_map_erange(he->ms.map, sym, ip);
231 232
}

233
static void perf_top__show_details(struct perf_top *top)
234
{
235
	struct hist_entry *he = top->sym_filter_entry;
236
	struct annotation *notes;
237
	struct symbol *symbol;
238
	int more;
239

240
	if (!he)
241 242
		return;

243
	symbol = he->ms.sym;
244 245 246 247 248 249
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
250

251
	printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
252
	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
253

254 255 256 257
	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
				       0, top->sym_pcnt_filter, top->print_entries, 4);
	if (top->zero)
		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
258
	else
259
		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
260
	if (more != 0)
261
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
262 263
out_unlock:
	pthread_mutex_unlock(&notes->lock);
264
}
265 266 267

static const char		CONSOLE_CLEAR[] = "";

268 269 270
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
						     struct addr_location *al,
						     struct perf_sample *sample)
271
{
272 273 274 275 276 277 278 279
	struct hist_entry *he;

	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
	if (he == NULL)
		return NULL;

	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
	return he;
280
}
281

282
static void perf_top__print_sym_table(struct perf_top *top)
283
{
284 285
	char bf[160];
	int printed = 0;
286
	const int win_width = top->winsize.ws_col - 1;
287

288
	puts(CONSOLE_CLEAR);
289

290
	perf_top__header_snprintf(top, bf, sizeof(bf));
291
	printf("%s\n", bf);
292

293
	perf_top__reset_sample_counters(top);
294

295
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
296

297 298 299 300
	if (top->sym_evsel->hists.stats.nr_lost_warned !=
	    top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
		top->sym_evsel->hists.stats.nr_lost_warned =
			top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
301 302
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
303
			      top->sym_evsel->hists.stats.nr_lost_warned);
304
		++printed;
305 306
	}

307 308
	if (top->sym_filter_entry) {
		perf_top__show_details(top);
309 310 311
		return;
	}

312 313 314 315 316 317 318
	hists__collapse_resort_threaded(&top->sym_evsel->hists);
	hists__output_resort_threaded(&top->sym_evsel->hists);
	hists__decay_entries_threaded(&top->sym_evsel->hists,
				      top->hide_user_symbols,
				      top->hide_kernel_symbols);
	hists__output_recalc_col_len(&top->sym_evsel->hists,
				     top->winsize.ws_row - 3);
319
	putchar('\n');
320
	hists__fprintf(&top->sym_evsel->hists, false,
321
		       top->winsize.ws_row - 4 - printed, win_width, stdout);
322 323
}

324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

359
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
360 361
{
	char *buf = malloc(0), *p;
362
	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
363
	struct rb_node *next;
364 365 366 367 368
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
369
		top->sym_filter_entry = NULL;
370 371 372 373 374 375 376 377 378 379
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

380
	next = rb_first(&top->sym_evsel->hists.entries);
381 382 383 384
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
385 386
			break;
		}
387
		next = rb_next(&n->rb_node);
388 389 390
	}

	if (!found) {
391
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
392 393
		sleep(1);
	} else
394
		perf_top__parse_source(top, found);
395 396 397 398 399

out_free:
	free(buf);
}

400
static void perf_top__print_mapped_keys(struct perf_top *top)
401
{
402 403
	char *name = NULL;

404 405
	if (top->sym_filter_entry) {
		struct symbol *sym = top->sym_filter_entry->ms.sym;
406 407 408 409
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
410 411
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
412

413
	if (top->evlist->nr_entries > 1)
414
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", perf_evsel__name(top->sym_evsel));
415

416
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
417

418
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
419 420
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
421

422
	fprintf(stdout,
423
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
424
		top->hide_kernel_symbols ? "yes" : "no");
425 426
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
427 428
		top->hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
429 430 431
	fprintf(stdout, "\t[qQ]    quit.\n");
}

432
static int perf_top__key_mapped(struct perf_top *top, int c)
433 434 435 436 437 438 439 440
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
441 442
		case 'K':
		case 'U':
443 444 445
		case 'F':
		case 's':
		case 'S':
446 447
			return 1;
		case 'E':
448
			return top->evlist->nr_entries > 1 ? 1 : 0;
449 450
		default:
			break;
451 452 453
	}

	return 0;
454 455
}

456
static void perf_top__handle_keypress(struct perf_top *top, int c)
457
{
458
	if (!perf_top__key_mapped(top, c)) {
459 460 461
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

462
		perf_top__print_mapped_keys(top);
463 464 465 466 467 468 469 470 471 472 473 474 475 476
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
477
		if (!perf_top__key_mapped(top, c))
478 479 480
			return;
	}

481 482
	switch (c) {
		case 'd':
483 484 485
			prompt_integer(&top->delay_secs, "Enter display delay");
			if (top->delay_secs < 1)
				top->delay_secs = 1;
486 487
			break;
		case 'e':
488 489 490 491 492 493 494 495
			prompt_integer(&top->print_entries, "Enter display entries (lines)");
			if (top->print_entries == 0) {
				struct sigaction act = {
					.sa_sigaction = perf_top__sig_winch,
					.sa_flags     = SA_SIGINFO,
				};
				perf_top__sig_winch(SIGWINCH, NULL, top);
				sigaction(SIGWINCH, &act, NULL);
496 497
			} else {
				perf_top__sig_winch(SIGWINCH, NULL, top);
498
				signal(SIGWINCH, SIG_DFL);
499
			}
500 501
			break;
		case 'E':
502
			if (top->evlist->nr_entries > 1) {
503 504 505
				/* Select 0 as the default event: */
				int counter = 0;

506
				fprintf(stderr, "\nAvailable events:");
507

508
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
509
					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
510

511
				prompt_integer(&counter, "Enter details event counter");
512

513
				if (counter >= top->evlist->nr_entries) {
514
					top->sym_evsel = perf_evlist__first(top->evlist);
515
					fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
516
					sleep(1);
517
					break;
518
				}
519 520
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					if (top->sym_evsel->idx == counter)
521
						break;
522
			} else
523
				top->sym_evsel = perf_evlist__first(top->evlist);
524 525
			break;
		case 'f':
526
			prompt_integer(&top->count_filter, "Enter display event count filter");
527 528
			break;
		case 'F':
529 530
			prompt_percent(&top->sym_pcnt_filter,
				       "Enter details display event filter (percent)");
531
			break;
532
		case 'K':
533
			top->hide_kernel_symbols = !top->hide_kernel_symbols;
534
			break;
535 536 537
		case 'q':
		case 'Q':
			printf("exiting.\n");
538 539
			if (top->dump_symtab)
				perf_session__fprintf_dsos(top->session, stderr);
540 541
			exit(0);
		case 's':
542
			perf_top__prompt_symbol(top, "Enter details symbol");
543 544
			break;
		case 'S':
545
			if (!top->sym_filter_entry)
546 547
				break;
			else {
548
				struct hist_entry *syme = top->sym_filter_entry;
549

550
				top->sym_filter_entry = NULL;
551 552 553
				__zero_source_counters(syme);
			}
			break;
554
		case 'U':
555
			top->hide_user_symbols = !top->hide_user_symbols;
556
			break;
557
		case 'z':
558
			top->zero = !top->zero;
559
			break;
560 561
		default:
			break;
562 563 564
	}
}

565 566 567 568 569 570 571 572 573 574
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
	perf_top__reset_sample_counters(t);

	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

	hists__collapse_resort_threaded(&t->sym_evsel->hists);
	hists__output_resort_threaded(&t->sym_evsel->hists);
575
	hists__decay_entries_threaded(&t->sym_evsel->hists,
576 577
				      t->hide_user_symbols,
				      t->hide_kernel_symbols);
578 579
}

580
static void *display_thread_tui(void *arg)
581
{
582
	struct perf_evsel *pos;
583
	struct perf_top *top = arg;
584
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";
585 586 587 588 589
	struct hist_browser_timer hbt = {
		.timer		= perf_top__sort_new_samples,
		.arg		= top,
		.refresh	= top->delay_secs,
	};
590

591
	perf_top__sort_new_samples(top);
592 593 594 595 596 597 598

	/*
	 * Initialize the uid_filter_str, in the future the TUI will allow
	 * Zooming in/out UIDs. For now juse use whatever the user passed
	 * via --uid.
	 */
	list_for_each_entry(pos, &top->evlist->entries, node)
599
		pos->hists.uid_filter_str = top->record_opts.target.uid_str;
600

601 602
	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
				      &top->session->header.env);
603

604 605 606 607 608
	exit_browser(0);
	exit(0);
	return NULL;
}

609
static void *display_thread(void *arg)
610
{
611
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
612
	struct termios tc, save;
613
	struct perf_top *top = arg;
614 615 616 617 618 619 620
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
621

622
	pthread__unblock_sigwinch();
623
repeat:
624
	delay_msecs = top->delay_secs * 1000;
625 626 627
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
628

629
	while (1) {
630
		perf_top__print_sym_table(top);
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
			/* Fall trhu */
		default:
			goto process_hotkey;
		}
	}
process_hotkey:
647 648 649
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

650
	perf_top__handle_keypress(top, c);
651
	goto repeat;
652 653 654 655

	return NULL;
}

656
/* Tag samples to be skipped. */
657
static const char *skip_symbols[] = {
658
	"intel_idle",
659
	"default_idle",
660
	"native_safe_halt",
661 662 663 664
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
665
	"mwait_idle_with_hints",
666
	"poll_idle",
667 668
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
669 670 671
	NULL
};

672
static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
673
{
674
	const char *name = sym->name;
675
	int i;
676

677 678 679 680 681 682 683
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

684 685 686 687 688 689 690
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
691 692
		return 1;

693 694
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
695
			sym->ignore = true;
696 697 698
			break;
		}
	}
699 700 701 702

	return 0;
}

703 704
static void perf_event__process_sample(struct perf_tool *tool,
				       const union perf_event *event,
705
				       struct perf_evsel *evsel,
706
				       struct perf_sample *sample,
707
				       struct machine *machine)
708
{
709
	struct perf_top *top = container_of(tool, struct perf_top, tool);
710
	struct symbol *parent = NULL;
711
	u64 ip = event->ip.ip;
712
	struct addr_location al;
713
	int err;
714

715
	if (!machine && perf_guest) {
716 717 718 719 720 721 722 723 724 725
		static struct intlist *seen;

		if (!seen)
			seen = intlist__new();

		if (!intlist__has_entry(seen, event->ip.pid)) {
			pr_err("Can't find guest [%d]'s kernel information\n",
				event->ip.pid);
			intlist__add(seen, event->ip.pid);
		}
726 727 728
		return;
	}

729
	if (!machine) {
730
		pr_err("%u unprocessable samples recorded.\n",
731 732 733 734
		       top->session->hists.stats.nr_unprocessable_samples++);
		return;
	}

735
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
736
		top->exact_samples++;
737

738
	if (perf_event__preprocess_sample(event, machine, &al, sample,
739
					  symbol_filter) < 0 ||
740
	    al.filtered)
741
		return;
742

743
	if (!top->kptr_restrict_warned &&
744 745 746 747 748 749 750 751 752 753
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
754
		top->kptr_restrict_warned = true;
755 756
	}

757
	if (al.sym == NULL) {
758
		const char *msg = "Kernel samples will not be resolved.\n";
759 760 761 762 763 764 765 766 767 768 769
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
770
		if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
771
		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
772
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
773 774 775 776 777 778 779 780 781 782
			if (symbol_conf.vmlinux_name) {
				ui__warning("The %s file can't be used.\n%s",
					    symbol_conf.vmlinux_name, msg);
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
783
			top->vmlinux_warned = true;
784
		}
785 786
	}

787 788
	if (al.sym == NULL || !al.sym->ignore) {
		struct hist_entry *he;
789

790 791
		if ((sort__has_parent || symbol_conf.use_callchain) &&
		    sample->callchain) {
792 793 794 795
			err = machine__resolve_callchain(machine, evsel,
							 al.thread, sample,
							 &parent);

796 797 798 799
			if (err)
				return;
		}

800
		he = perf_evsel__add_hist_entry(evsel, &al, sample);
801 802 803
		if (he == NULL) {
			pr_err("Problem incrementing symbol period, skipping event\n");
			return;
804
		}
805

806
		if (symbol_conf.use_callchain) {
807
			err = callchain_append(he->callchain, &callchain_cursor,
808 809 810 811 812
					       sample->period);
			if (err)
				return;
		}

813 814
		if (top->sort_has_symbols)
			perf_top__record_precise_ip(top, he, evsel->idx, ip);
815
	}
816 817

	return;
818 819
}

820
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
821
{
822
	struct perf_sample sample;
823
	struct perf_evsel *evsel;
824
	struct perf_session *session = top->session;
825
	union perf_event *event;
826 827
	struct machine *machine;
	u8 origin;
828
	int ret;
829

830
	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
831
		ret = perf_evlist__parse_sample(top->evlist, event, &sample);
832 833 834 835
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
836

837
		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
838 839
		assert(evsel != NULL);

840 841
		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

842
		if (event->header.type == PERF_RECORD_SAMPLE)
843
			++top->samples;
844 845 846

		switch (origin) {
		case PERF_RECORD_MISC_USER:
847 848
			++top->us_samples;
			if (top->hide_user_symbols)
849
				continue;
850
			machine = perf_session__find_host_machine(session);
851 852
			break;
		case PERF_RECORD_MISC_KERNEL:
853 854
			++top->kernel_samples;
			if (top->hide_kernel_symbols)
855
				continue;
856
			machine = perf_session__find_host_machine(session);
857 858
			break;
		case PERF_RECORD_MISC_GUEST_KERNEL:
859 860
			++top->guest_kernel_samples;
			machine = perf_session__find_machine(session, event->ip.pid);
861 862
			break;
		case PERF_RECORD_MISC_GUEST_USER:
863
			++top->guest_us_samples;
864 865 866 867 868 869 870 871 872 873
			/*
			 * TODO: we don't process guest user from host side
			 * except simple counting.
			 */
			/* Fall thru */
		default:
			continue;
		}


874 875 876 877
		if (event->header.type == PERF_RECORD_SAMPLE) {
			perf_event__process_sample(&top->tool, event, evsel,
						   &sample, machine);
		} else if (event->header.type < PERF_RECORD_MAX) {
878
			hists__inc_nr_events(&evsel->hists, event->header.type);
879
			machine__process_event(machine, event);
880
		} else
881
			++session->hists.stats.nr_unknown_events;
882 883 884
	}
}

885
static void perf_top__mmap_read(struct perf_top *top)
886
{
887 888
	int i;

889 890
	for (i = 0; i < top->evlist->nr_mmaps; i++)
		perf_top__mmap_read_idx(top, i);
891 892
}

893
static void perf_top__start_counters(struct perf_top *top)
894
{
895
	struct perf_evsel *counter;
896
	struct perf_evlist *evlist = top->evlist;
897
	struct perf_record_opts *opts = &top->record_opts;
898

899
	perf_evlist__config(evlist, opts);
900

901 902
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
903

904 905 906
fallback_missing_features:
		if (top->exclude_guest_missing)
			attr->exclude_guest = attr->exclude_host = 0;
907
retry_sample_id:
908
		attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
909
try_again:
910
		if (perf_evsel__open(counter, top->evlist->cpus,
911
				     top->evlist->threads) < 0) {
912 913
			int err = errno;

914
			if (err == EPERM || err == EACCES) {
915
				ui__error_paranoid();
916
				goto out_err;
917 918 919 920 921 922 923
			} else if (err == EINVAL) {
				if (!top->exclude_guest_missing &&
				    (attr->exclude_guest || attr->exclude_host)) {
					pr_debug("Old kernel, cannot exclude "
						 "guest or host samples.\n");
					top->exclude_guest_missing = true;
					goto fallback_missing_features;
924
				} else if (!top->sample_id_all_missing) {
925 926 927
					/*
					 * Old kernel, no attr->sample_id_type_all field
					 */
928
					top->sample_id_all_missing = true;
929 930
					goto retry_sample_id;
				}
931
			}
932 933 934 935 936
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
937 938 939 940
			if ((err == ENOENT || err == ENXIO) &&
			    (attr->type == PERF_TYPE_HARDWARE) &&
			    (attr->config == PERF_COUNT_HW_CPU_CYCLES)) {

941
				if (verbose)
942 943
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
944 945 946

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
947 948
				if (counter->name) {
					free(counter->name);
949
					counter->name = NULL;
950
				}
951 952
				goto try_again;
			}
953

954
			if (err == ENOENT) {
955
				ui__error("The %s event is not supported.\n",
956
					  perf_evsel__name(counter));
957
				goto out_err;
958
			} else if (err == EMFILE) {
959
				ui__error("Too many events are opened.\n"
960 961
					    "Try again after reducing the number of events\n");
				goto out_err;
962 963 964 965
			} else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
				ui__error("\'precise\' request may not be supported. "
					  "Try removing 'p' modifier\n");
				goto out_err;
966 967
			}

968
			ui__error("The sys_perf_event_open() syscall "
969 970 971 972 973
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
974
		}
975
	}
976

977
	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
978
		ui__error("Failed to mmap with %d (%s)\n",
979 980 981 982 983 984 985 986 987
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
988 989
}

990
static int perf_top__setup_sample_type(struct perf_top *top)
991
{
992
	if (!top->sort_has_symbols) {
993
		if (symbol_conf.use_callchain) {
994
			ui__error("Selected -g but \"sym\" not present in --sort/-s.");
995 996
			return -EINVAL;
		}
997
	} else if (callchain_param.mode != CHAIN_NONE) {
998
		if (callchain_register_param(&callchain_param) < 0) {
999
			ui__error("Can't register callchain params.\n");
1000 1001 1002 1003 1004 1005 1006
			return -EINVAL;
		}
	}

	return 0;
}

1007
static int __cmd_top(struct perf_top *top)
1008
{
1009
	struct perf_record_opts *opts = &top->record_opts;
1010
	pthread_t thread;
1011
	int ret;
1012
	/*
1013 1014
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
1015
	 */
1016 1017
	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
	if (top->session == NULL)
1018
		return -ENOMEM;
1019

1020
	ret = perf_top__setup_sample_type(top);
1021 1022 1023
	if (ret)
		goto out_delete;

1024
	if (perf_target__has_task(&opts->target))
1025
		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
1026
						  perf_event__process,
1027
						  &top->session->host_machine);
1028
	else
1029 1030 1031 1032
		perf_event__synthesize_threads(&top->tool, perf_event__process,
					       &top->session->host_machine);
	perf_top__start_counters(top);
	top->session->evlist = top->evlist;
1033
	perf_session__set_id_hdr_size(top->session);
1034

1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045
	/*
	 * When perf is starting the traced process, all the events (apart from
	 * group members) have enable_on_exec=1 set, so don't spoil it by
	 * prematurely enabling them.
	 *
	 * XXX 'top' still doesn't start workloads like record, trace, but should,
	 * so leave the check here.
	 */
        if (!perf_target__none(&opts->target))
                perf_evlist__enable(top->evlist);

1046
	/* Wait for a minimal set of events before starting the snapshot */
1047
	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1048

1049
	perf_top__mmap_read(top);
1050

1051
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
1052
							    display_thread), top)) {
1053
		ui__error("Could not create display thread.\n");
1054 1055 1056
		exit(-1);
	}

1057
	if (top->realtime_prio) {
1058 1059
		struct sched_param param;

1060
		param.sched_priority = top->realtime_prio;
1061
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1062
			ui__error("Could not set realtime priority.\n");
1063 1064 1065 1066 1067
			exit(-1);
		}
	}

	while (1) {
1068
		u64 hits = top->samples;
1069

1070
		perf_top__mmap_read(top);
1071

1072 1073
		if (hits == top->samples)
			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1074 1075
	}

1076
out_delete:
1077 1078
	perf_session__delete(top->session);
	top->session = NULL;
1079 1080 1081 1082 1083

	return 0;
}

static int
1084
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1085 1086 1087 1088
{
	/*
	 * --no-call-graph
	 */
1089
	if (unset)
1090 1091 1092 1093
		return 0;

	symbol_conf.use_callchain = true;

1094
	return record_parse_callchain_opt(opt, arg, unset);
1095
}
1096

1097
int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1098
{
1099 1100
	int status;
	char errbuf[BUFSIZ];
1101 1102 1103
	struct perf_top top = {
		.count_filter	     = 5,
		.delay_secs	     = 2,
1104 1105 1106 1107 1108 1109 1110 1111
		.record_opts = {
			.mmap_pages	= UINT_MAX,
			.user_freq	= UINT_MAX,
			.user_interval	= ULLONG_MAX,
			.freq		= 4000, /* 4 KHz */
			.target		     = {
				.uses_mmap   = true,
			},
N
Namhyung Kim 已提交
1112
		},
1113
		.sym_pcnt_filter     = 5,
1114
	};
1115 1116
	struct perf_record_opts *opts = &top.record_opts;
	struct perf_target *target = &opts->target;
1117
	const struct option options[] = {
1118
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1119
		     "event selector. use 'perf list' to list available events",
1120
		     parse_events_option),
1121 1122
	OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
	OPT_STRING('p', "pid", &target->pid, "pid",
1123
		    "profile events on existing process id"),
1124
	OPT_STRING('t', "tid", &target->tid, "tid",
1125
		    "profile events on existing thread id"),
1126
	OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
1127
			    "system-wide collection from all CPUs"),
1128
	OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
1129
		    "list of cpus to monitor"),
1130 1131
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1132
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1133
		    "hide kernel symbols"),
1134 1135
	OPT_UINTEGER('m', "mmap-pages", &opts->mmap_pages,
		     "number of mmap data pages"),
1136
	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1137
		    "collect data with this RT SCHED_FIFO priority"),
1138
	OPT_INTEGER('d', "delay", &top.delay_secs,
1139
		    "number of seconds to delay between refreshes"),
1140
	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1141
			    "dump the symbol table used for profiling"),
1142
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1143
		    "only display functions with more events than this"),
1144
	OPT_BOOLEAN('g', "group", &opts->group,
1145
			    "put the counters into a counter group"),
1146 1147
	OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
		    "child tasks do not inherit counters"),
1148
	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1149
		    "symbol to annotate"),
1150 1151
	OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
	OPT_UINTEGER('F', "freq", &opts->user_freq, "profile at this frequency"),
1152
	OPT_INTEGER('E', "entries", &top.print_entries,
1153
		    "display this many functions"),
1154
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1155
		    "hide user symbols"),
1156 1157
	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1158
	OPT_INCR('v', "verbose", &verbose,
1159
		    "be more verbose (show counter open errors, etc)"),
1160 1161 1162 1163
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
		   "sort by key(s): pid, comm, dso, symbol, parent"),
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1164 1165 1166
	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
			     "mode[,dump_size]", record_callchain_help,
			     &parse_callchain_opt, "fp"),
1167 1168 1169 1170 1171 1172 1173 1174
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1175 1176 1177 1178 1179 1180
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
		    "Interleave source code with assembly code (default)"),
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
		    "Display raw encoding of assembly instructions (default)"),
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1181
	OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
1182
	OPT_END()
1183
	};
1184 1185 1186 1187
	const char * const top_usage[] = {
		"perf top [<options>]",
		NULL
	};
1188

1189 1190
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1191 1192
		return -ENOMEM;

1193
	symbol_conf.exclude_other = false;
1194 1195 1196 1197 1198

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1199 1200 1201 1202 1203
	if (sort_order == default_sort_order)
		sort_order = "dso,symbol";

	setup_sorting(top_usage, options);

1204
	if (top.use_stdio)
1205
		use_browser = 0;
1206
	else if (top.use_tui)
1207 1208 1209 1210
		use_browser = 1;

	setup_browser(false);

1211
	status = perf_target__validate(target);
1212
	if (status) {
1213
		perf_target__strerror(target, status, errbuf, BUFSIZ);
1214 1215 1216
		ui__warning("%s", errbuf);
	}

1217
	status = perf_target__parse_uid(target);
1218 1219
	if (status) {
		int saved_errno = errno;
1220

1221
		perf_target__strerror(target, status, errbuf, BUFSIZ);
1222
		ui__error("%s", errbuf);
1223 1224

		status = -saved_errno;
1225
		goto out_delete_evlist;
1226
	}
1227

1228 1229
	if (perf_target__none(target))
		target->system_wide = true;
1230

1231
	if (perf_evlist__create_maps(top.evlist, target) < 0)
1232 1233
		usage_with_options(top_usage, options);

1234 1235
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1236
		ui__error("Not enough memory for event selector list\n");
1237 1238
		return -ENOMEM;
	}
1239

1240 1241
	symbol_conf.nr_events = top.evlist->nr_entries;

1242 1243
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1244

1245 1246 1247 1248 1249
	if (opts->user_interval != ULLONG_MAX)
		opts->default_interval = opts->user_interval;
	if (opts->user_freq != UINT_MAX)
		opts->freq = opts->user_freq;

1250 1251 1252
	/*
	 * User specified count overrides default frequency.
	 */
1253 1254 1255 1256
	if (opts->default_interval)
		opts->freq = 0;
	else if (opts->freq) {
		opts->default_interval = opts->freq;
1257
	} else {
1258
		ui__error("frequency and count are zero, aborting\n");
1259 1260
		status = -EINVAL;
		goto out_delete_evlist;
1261 1262
	}

1263
	top.sym_evsel = perf_evlist__first(top.evlist);
1264

1265
	symbol_conf.priv_size = sizeof(struct annotation);
1266 1267 1268 1269 1270

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1271 1272 1273 1274
	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);

1275 1276 1277 1278
	/*
	 * Avoid annotation data structures overhead when symbols aren't on the
	 * sort list.
	 */
1279
	top.sort_has_symbols = sort_sym.list.next != NULL;
1280

1281
	get_term_dimensions(&top.winsize);
1282
	if (top.print_entries == 0) {
1283 1284 1285 1286 1287 1288
		struct sigaction act = {
			.sa_sigaction = perf_top__sig_winch,
			.sa_flags     = SA_SIGINFO,
		};
		perf_top__update_print_entries(&top);
		sigaction(SIGWINCH, &act, NULL);
1289 1290
	}

1291
	status = __cmd_top(&top);
1292

1293
out_delete_evlist:
1294
	perf_evlist__delete(top.evlist);
1295 1296

	return status;
1297
}