builtin-top.c 31.3 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8
 *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9 10 11 12 13 14 15 16 17 18
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
19
 */
20
#include "builtin.h"
21

22
#include "perf.h"
23

24
#include "util/annotate.h"
25
#include "util/cache.h"
26
#include "util/color.h"
27
#include "util/evlist.h"
28
#include "util/evsel.h"
29
#include "util/machine.h"
30 31
#include "util/session.h"
#include "util/symbol.h"
32
#include "util/thread.h"
33
#include "util/thread_map.h"
34
#include "util/top.h"
35
#include "util/util.h"
36
#include <linux/rbtree.h>
37 38
#include "util/parse-options.h"
#include "util/parse-events.h"
39
#include "util/cpumap.h"
40
#include "util/xyarray.h"
41
#include "util/sort.h"
42
#include "util/intlist.h"
43

44 45
#include "util/debug.h"

46
#include <assert.h>
47
#include <elf.h>
48
#include <fcntl.h>
49

50
#include <stdio.h>
51 52
#include <termios.h>
#include <unistd.h>
53
#include <inttypes.h>
54

55 56 57 58 59 60 61 62 63 64
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
65
#include <sys/utsname.h>
66 67 68 69 70
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

71
void get_term_dimensions(struct winsize *ws)
72
{
73 74 75 76 77 78 79 80 81 82
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
83
	}
84 85 86 87
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
88
#endif
89 90
	ws->ws_row = 25;
	ws->ws_col = 80;
91 92
}

93
static void perf_top__update_print_entries(struct perf_top *top)
94
{
95 96
	if (top->print_entries > 9)
		top->print_entries -= 9;
97 98
}

99 100
static void perf_top__sig_winch(int sig __maybe_unused,
				siginfo_t *info __maybe_unused, void *arg)
101
{
102 103 104
	struct perf_top *top = arg;

	get_term_dimensions(&top->winsize);
105 106 107 108 109 110 111
	if (!top->print_entries
	    || (top->print_entries+4) > top->winsize.ws_row) {
		top->print_entries = top->winsize.ws_row;
	} else {
		top->print_entries += 4;
		top->winsize.ws_row = top->print_entries;
	}
112
	perf_top__update_print_entries(top);
113 114
}

115
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
116 117
{
	struct symbol *sym;
118
	struct annotation *notes;
119
	struct map *map;
120
	int err = -1;
121

122
	if (!he || !he->ms.sym)
123 124
		return -1;

125 126
	sym = he->ms.sym;
	map = he->ms.map;
127 128 129 130

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
131
	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
132 133 134
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
135
		return -1;
136 137
	}

138 139 140
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
141 142 143
		goto out_assign;
	}

144
	pthread_mutex_lock(&notes->lock);
145

146
	if (symbol__alloc_hist(sym) < 0) {
147
		pthread_mutex_unlock(&notes->lock);
148 149
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
150
		sleep(1);
151
		return err;
152
	}
153

154
	err = symbol__annotate(sym, map, 0);
155
	if (err == 0) {
156
out_assign:
157
		top->sym_filter_entry = he;
158
	}
159

160
	pthread_mutex_unlock(&notes->lock);
161
	return err;
162 163
}

164
static void __zero_source_counters(struct hist_entry *he)
165
{
166
	struct symbol *sym = he->ms.sym;
167
	symbol__annotate_zero_histograms(sym);
168 169
}

170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
{
	struct utsname uts;
	int err = uname(&uts);

	ui__warning("Out of bounds address found:\n\n"
		    "Addr:   %" PRIx64 "\n"
		    "DSO:    %s %c\n"
		    "Map:    %" PRIx64 "-%" PRIx64 "\n"
		    "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
		    "Arch:   %s\n"
		    "Kernel: %s\n"
		    "Tools:  %s\n\n"
		    "Not all samples will be on the annotation output.\n\n"
		    "Please report to linux-kernel@vger.kernel.org\n",
		    ip, map->dso->long_name, dso__symtab_origin(map->dso),
		    map->start, map->end, sym->start, sym->end,
		    sym->binding == STB_GLOBAL ? 'g' :
		    sym->binding == STB_LOCAL  ? 'l' : 'w', sym->name,
		    err ? "[unknown]" : uts.machine,
		    err ? "[unknown]" : uts.release, perf_version_string);
	if (use_browser <= 0)
		sleep(5);
	
	map->erange_warned = true;
}

197 198 199
static void perf_top__record_precise_ip(struct perf_top *top,
					struct hist_entry *he,
					int counter, u64 ip)
200
{
201 202
	struct annotation *notes;
	struct symbol *sym;
203
	int err;
204

205
	if (he == NULL || he->ms.sym == NULL ||
206 207
	    ((top->sym_filter_entry == NULL ||
	      top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
208 209
		return;

210
	sym = he->ms.sym;
211 212 213
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
214 215
		return;

216
	if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
217 218 219 220 221 222 223 224
		pthread_mutex_unlock(&notes->lock);
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
		sleep(1);
		return;
	}

	ip = he->ms.map->map_ip(he->ms.map, ip);
225
	err = symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
226

227
	pthread_mutex_unlock(&notes->lock);
228 229 230

	if (err == -ERANGE && !he->ms.map->erange_warned)
		ui__warn_map_erange(he->ms.map, sym, ip);
231 232
}

233
static void perf_top__show_details(struct perf_top *top)
234
{
235
	struct hist_entry *he = top->sym_filter_entry;
236
	struct annotation *notes;
237
	struct symbol *symbol;
238
	int more;
239

240
	if (!he)
241 242
		return;

243
	symbol = he->ms.sym;
244 245 246 247 248 249
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
250

251
	printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
252
	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
253

254 255 256 257
	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
				       0, top->sym_pcnt_filter, top->print_entries, 4);
	if (top->zero)
		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
258
	else
259
		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
260
	if (more != 0)
261
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
262 263
out_unlock:
	pthread_mutex_unlock(&notes->lock);
264
}
265 266 267

static const char		CONSOLE_CLEAR[] = "";

268 269 270
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
						     struct addr_location *al,
						     struct perf_sample *sample)
271
{
272 273 274 275 276 277 278 279
	struct hist_entry *he;

	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
	if (he == NULL)
		return NULL;

	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
	return he;
280
}
281

282
static void perf_top__print_sym_table(struct perf_top *top)
283
{
284 285
	char bf[160];
	int printed = 0;
286
	const int win_width = top->winsize.ws_col - 1;
287

288
	puts(CONSOLE_CLEAR);
289

290
	perf_top__header_snprintf(top, bf, sizeof(bf));
291
	printf("%s\n", bf);
292

293
	perf_top__reset_sample_counters(top);
294

295
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
296

297 298 299 300
	if (top->sym_evsel->hists.stats.nr_lost_warned !=
	    top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
		top->sym_evsel->hists.stats.nr_lost_warned =
			top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
301 302
		color_fprintf(stdout, PERF_COLOR_RED,
			      "WARNING: LOST %d chunks, Check IO/CPU overload",
303
			      top->sym_evsel->hists.stats.nr_lost_warned);
304
		++printed;
305 306
	}

307 308
	if (top->sym_filter_entry) {
		perf_top__show_details(top);
309 310 311
		return;
	}

312 313 314 315 316 317 318
	hists__collapse_resort_threaded(&top->sym_evsel->hists);
	hists__output_resort_threaded(&top->sym_evsel->hists);
	hists__decay_entries_threaded(&top->sym_evsel->hists,
				      top->hide_user_symbols,
				      top->hide_kernel_symbols);
	hists__output_recalc_col_len(&top->sym_evsel->hists,
				     top->winsize.ws_row - 3);
319
	putchar('\n');
320
	hists__fprintf(&top->sym_evsel->hists, false,
321
		       top->winsize.ws_row - 4 - printed, win_width, stdout);
322 323
}

324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

359
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
360 361
{
	char *buf = malloc(0), *p;
362
	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
363
	struct rb_node *next;
364 365 366 367 368
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
369
		top->sym_filter_entry = NULL;
370 371 372 373 374 375 376 377 378 379
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

380
	next = rb_first(&top->sym_evsel->hists.entries);
381 382 383 384
	while (next) {
		n = rb_entry(next, struct hist_entry, rb_node);
		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
			found = n;
385 386
			break;
		}
387
		next = rb_next(&n->rb_node);
388 389 390
	}

	if (!found) {
391
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
392 393
		sleep(1);
	} else
394
		perf_top__parse_source(top, found);
395 396 397 398 399

out_free:
	free(buf);
}

400
static void perf_top__print_mapped_keys(struct perf_top *top)
401
{
402 403
	char *name = NULL;

404 405
	if (top->sym_filter_entry) {
		struct symbol *sym = top->sym_filter_entry->ms.sym;
406 407 408 409
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
410 411
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
412

413
	if (top->evlist->nr_entries > 1)
414
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", perf_evsel__name(top->sym_evsel));
415

416
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
417

418
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
419 420
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
421

422
	fprintf(stdout,
423
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
424
		top->hide_kernel_symbols ? "yes" : "no");
425 426
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
427 428
		top->hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
429 430 431
	fprintf(stdout, "\t[qQ]    quit.\n");
}

432
static int perf_top__key_mapped(struct perf_top *top, int c)
433 434 435 436 437 438 439 440
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
441 442
		case 'K':
		case 'U':
443 444 445
		case 'F':
		case 's':
		case 'S':
446 447
			return 1;
		case 'E':
448
			return top->evlist->nr_entries > 1 ? 1 : 0;
449 450
		default:
			break;
451 452 453
	}

	return 0;
454 455
}

456
static void perf_top__handle_keypress(struct perf_top *top, int c)
457
{
458
	if (!perf_top__key_mapped(top, c)) {
459 460 461
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

462
		perf_top__print_mapped_keys(top);
463 464 465 466 467 468 469 470 471 472 473 474 475 476
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
477
		if (!perf_top__key_mapped(top, c))
478 479 480
			return;
	}

481 482
	switch (c) {
		case 'd':
483 484 485
			prompt_integer(&top->delay_secs, "Enter display delay");
			if (top->delay_secs < 1)
				top->delay_secs = 1;
486 487
			break;
		case 'e':
488 489 490 491 492 493 494 495
			prompt_integer(&top->print_entries, "Enter display entries (lines)");
			if (top->print_entries == 0) {
				struct sigaction act = {
					.sa_sigaction = perf_top__sig_winch,
					.sa_flags     = SA_SIGINFO,
				};
				perf_top__sig_winch(SIGWINCH, NULL, top);
				sigaction(SIGWINCH, &act, NULL);
496 497
			} else {
				perf_top__sig_winch(SIGWINCH, NULL, top);
498
				signal(SIGWINCH, SIG_DFL);
499
			}
500 501
			break;
		case 'E':
502
			if (top->evlist->nr_entries > 1) {
503 504 505
				/* Select 0 as the default event: */
				int counter = 0;

506
				fprintf(stderr, "\nAvailable events:");
507

508
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
509
					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
510

511
				prompt_integer(&counter, "Enter details event counter");
512

513
				if (counter >= top->evlist->nr_entries) {
514
					top->sym_evsel = perf_evlist__first(top->evlist);
515
					fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
516
					sleep(1);
517
					break;
518
				}
519 520
				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
					if (top->sym_evsel->idx == counter)
521
						break;
522
			} else
523
				top->sym_evsel = perf_evlist__first(top->evlist);
524 525
			break;
		case 'f':
526
			prompt_integer(&top->count_filter, "Enter display event count filter");
527 528
			break;
		case 'F':
529 530
			prompt_percent(&top->sym_pcnt_filter,
				       "Enter details display event filter (percent)");
531
			break;
532
		case 'K':
533
			top->hide_kernel_symbols = !top->hide_kernel_symbols;
534
			break;
535 536 537
		case 'q':
		case 'Q':
			printf("exiting.\n");
538 539
			if (top->dump_symtab)
				perf_session__fprintf_dsos(top->session, stderr);
540 541
			exit(0);
		case 's':
542
			perf_top__prompt_symbol(top, "Enter details symbol");
543 544
			break;
		case 'S':
545
			if (!top->sym_filter_entry)
546 547
				break;
			else {
548
				struct hist_entry *syme = top->sym_filter_entry;
549

550
				top->sym_filter_entry = NULL;
551 552 553
				__zero_source_counters(syme);
			}
			break;
554
		case 'U':
555
			top->hide_user_symbols = !top->hide_user_symbols;
556
			break;
557
		case 'z':
558
			top->zero = !top->zero;
559
			break;
560 561
		default:
			break;
562 563 564
	}
}

565 566 567 568 569 570 571 572 573 574
static void perf_top__sort_new_samples(void *arg)
{
	struct perf_top *t = arg;
	perf_top__reset_sample_counters(t);

	if (t->evlist->selected != NULL)
		t->sym_evsel = t->evlist->selected;

	hists__collapse_resort_threaded(&t->sym_evsel->hists);
	hists__output_resort_threaded(&t->sym_evsel->hists);
575
	hists__decay_entries_threaded(&t->sym_evsel->hists,
576 577
				      t->hide_user_symbols,
				      t->hide_kernel_symbols);
578 579
}

580
static void *display_thread_tui(void *arg)
581
{
582
	struct perf_evsel *pos;
583
	struct perf_top *top = arg;
584
	const char *help = "For a higher level overview, try: perf top --sort comm,dso";
585 586 587 588 589
	struct hist_browser_timer hbt = {
		.timer		= perf_top__sort_new_samples,
		.arg		= top,
		.refresh	= top->delay_secs,
	};
590

591
	perf_top__sort_new_samples(top);
592 593 594 595 596 597 598

	/*
	 * Initialize the uid_filter_str, in the future the TUI will allow
	 * Zooming in/out UIDs. For now juse use whatever the user passed
	 * via --uid.
	 */
	list_for_each_entry(pos, &top->evlist->entries, node)
599
		pos->hists.uid_filter_str = top->record_opts.target.uid_str;
600

601 602
	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
				      &top->session->header.env);
603

604 605 606 607 608
	exit_browser(0);
	exit(0);
	return NULL;
}

609
static void *display_thread(void *arg)
610
{
611
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
612
	struct termios tc, save;
613
	struct perf_top *top = arg;
614 615 616 617 618 619 620
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
621

622
	pthread__unblock_sigwinch();
623
repeat:
624
	delay_msecs = top->delay_secs * 1000;
625 626 627
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
628

629
	while (1) {
630
		perf_top__print_sym_table(top);
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
		/*
		 * Either timeout expired or we got an EINTR due to SIGWINCH,
		 * refresh screen in both cases.
		 */
		switch (poll(&stdin_poll, 1, delay_msecs)) {
		case 0:
			continue;
		case -1:
			if (errno == EINTR)
				continue;
			/* Fall trhu */
		default:
			goto process_hotkey;
		}
	}
process_hotkey:
647 648 649
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

650
	perf_top__handle_keypress(top, c);
651
	goto repeat;
652 653 654 655

	return NULL;
}

656
/* Tag samples to be skipped. */
657
static const char *skip_symbols[] = {
658
	"intel_idle",
659
	"default_idle",
660
	"native_safe_halt",
661 662 663 664
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
665
	"mwait_idle_with_hints",
666
	"poll_idle",
667 668
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
669 670 671
	NULL
};

672
static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
673
{
674
	const char *name = sym->name;
675
	int i;
676

677 678 679 680 681 682 683
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

684 685 686 687 688 689 690
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
691 692
		return 1;

693 694
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
695
			sym->ignore = true;
696 697 698
			break;
		}
	}
699 700 701 702

	return 0;
}

703 704
static void perf_event__process_sample(struct perf_tool *tool,
				       const union perf_event *event,
705
				       struct perf_evsel *evsel,
706
				       struct perf_sample *sample,
707
				       struct machine *machine)
708
{
709
	struct perf_top *top = container_of(tool, struct perf_top, tool);
710
	struct symbol *parent = NULL;
711
	u64 ip = event->ip.ip;
712
	struct addr_location al;
713
	int err;
714

715
	if (!machine && perf_guest) {
716 717 718 719 720 721 722 723 724 725
		static struct intlist *seen;

		if (!seen)
			seen = intlist__new();

		if (!intlist__has_entry(seen, event->ip.pid)) {
			pr_err("Can't find guest [%d]'s kernel information\n",
				event->ip.pid);
			intlist__add(seen, event->ip.pid);
		}
726 727 728
		return;
	}

729
	if (!machine) {
730
		pr_err("%u unprocessable samples recorded.\n",
731
		       top->session->stats.nr_unprocessable_samples++);
732 733 734
		return;
	}

735
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
736
		top->exact_samples++;
737

738
	if (perf_event__preprocess_sample(event, machine, &al, sample,
739
					  symbol_filter) < 0 ||
740
	    al.filtered)
741
		return;
742

743
	if (!top->kptr_restrict_warned &&
744 745 746 747 748 749 750 751 752 753
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
754
		top->kptr_restrict_warned = true;
755 756
	}

757
	if (al.sym == NULL) {
758
		const char *msg = "Kernel samples will not be resolved.\n";
759 760 761 762 763 764 765 766 767 768 769
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
770
		if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
771
		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
772
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
773 774 775 776 777 778 779 780 781 782
			if (symbol_conf.vmlinux_name) {
				ui__warning("The %s file can't be used.\n%s",
					    symbol_conf.vmlinux_name, msg);
			} else {
				ui__warning("A vmlinux file was not found.\n%s",
					    msg);
			}

			if (use_browser <= 0)
				sleep(5);
783
			top->vmlinux_warned = true;
784
		}
785 786
	}

787 788
	if (al.sym == NULL || !al.sym->ignore) {
		struct hist_entry *he;
789

790 791
		if ((sort__has_parent || symbol_conf.use_callchain) &&
		    sample->callchain) {
792 793 794 795
			err = machine__resolve_callchain(machine, evsel,
							 al.thread, sample,
							 &parent);

796 797 798 799
			if (err)
				return;
		}

800
		he = perf_evsel__add_hist_entry(evsel, &al, sample);
801 802 803
		if (he == NULL) {
			pr_err("Problem incrementing symbol period, skipping event\n");
			return;
804
		}
805

806
		if (symbol_conf.use_callchain) {
807
			err = callchain_append(he->callchain, &callchain_cursor,
808 809 810 811 812
					       sample->period);
			if (err)
				return;
		}

813 814
		if (top->sort_has_symbols)
			perf_top__record_precise_ip(top, he, evsel->idx, ip);
815
	}
816 817

	return;
818 819
}

820
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
821
{
822
	struct perf_sample sample;
823
	struct perf_evsel *evsel;
824
	struct perf_session *session = top->session;
825
	union perf_event *event;
826 827
	struct machine *machine;
	u8 origin;
828
	int ret;
829

830
	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
831
		ret = perf_evlist__parse_sample(top->evlist, event, &sample);
832 833 834 835
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
836

837
		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
838 839
		assert(evsel != NULL);

840 841
		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

842
		if (event->header.type == PERF_RECORD_SAMPLE)
843
			++top->samples;
844 845 846

		switch (origin) {
		case PERF_RECORD_MISC_USER:
847 848
			++top->us_samples;
			if (top->hide_user_symbols)
849
				continue;
850
			machine = &session->machines.host;
851 852
			break;
		case PERF_RECORD_MISC_KERNEL:
853 854
			++top->kernel_samples;
			if (top->hide_kernel_symbols)
855
				continue;
856
			machine = &session->machines.host;
857 858
			break;
		case PERF_RECORD_MISC_GUEST_KERNEL:
859 860
			++top->guest_kernel_samples;
			machine = perf_session__find_machine(session, event->ip.pid);
861 862
			break;
		case PERF_RECORD_MISC_GUEST_USER:
863
			++top->guest_us_samples;
864 865 866 867 868 869 870 871 872 873
			/*
			 * TODO: we don't process guest user from host side
			 * except simple counting.
			 */
			/* Fall thru */
		default:
			continue;
		}


874 875 876 877
		if (event->header.type == PERF_RECORD_SAMPLE) {
			perf_event__process_sample(&top->tool, event, evsel,
						   &sample, machine);
		} else if (event->header.type < PERF_RECORD_MAX) {
878
			hists__inc_nr_events(&evsel->hists, event->header.type);
879
			machine__process_event(machine, event);
880
		} else
881
			++session->stats.nr_unknown_events;
882 883 884
	}
}

885
static void perf_top__mmap_read(struct perf_top *top)
886
{
887 888
	int i;

889 890
	for (i = 0; i < top->evlist->nr_mmaps; i++)
		perf_top__mmap_read_idx(top, i);
891 892
}

893
static void perf_top__start_counters(struct perf_top *top)
894
{
895
	char msg[512];
896
	struct perf_evsel *counter;
897
	struct perf_evlist *evlist = top->evlist;
898
	struct perf_record_opts *opts = &top->record_opts;
899

900
	perf_evlist__config(evlist, opts);
901

902 903
	list_for_each_entry(counter, &evlist->entries, node) {
try_again:
904
		if (perf_evsel__open(counter, top->evlist->cpus,
905
				     top->evlist->threads) < 0) {
906
			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
907
				if (verbose)
908
					ui__warning("%s\n", msg);
909 910
				goto try_again;
			}
911

912 913 914
			perf_evsel__open_strerror(counter, &opts->target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);
915
			goto out_err;
916
		}
917
	}
918

919
	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
920
		ui__error("Failed to mmap with %d (%s)\n",
921 922 923 924 925 926 927 928 929
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
930 931
}

932
static int perf_top__setup_sample_type(struct perf_top *top)
933
{
934
	if (!top->sort_has_symbols) {
935
		if (symbol_conf.use_callchain) {
936
			ui__error("Selected -g but \"sym\" not present in --sort/-s.");
937 938
			return -EINVAL;
		}
939
	} else if (callchain_param.mode != CHAIN_NONE) {
940
		if (callchain_register_param(&callchain_param) < 0) {
941
			ui__error("Can't register callchain params.\n");
942 943 944 945 946 947 948
			return -EINVAL;
		}
	}

	return 0;
}

949
static int __cmd_top(struct perf_top *top)
950
{
951
	struct perf_record_opts *opts = &top->record_opts;
952
	pthread_t thread;
953
	int ret;
954
	/*
955 956
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
957
	 */
958 959
	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
	if (top->session == NULL)
960
		return -ENOMEM;
961

962
	ret = perf_top__setup_sample_type(top);
963 964 965
	if (ret)
		goto out_delete;

966
	if (perf_target__has_task(&opts->target))
967
		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
968
						  perf_event__process,
969
						  &top->session->machines.host);
970
	else
971
		perf_event__synthesize_threads(&top->tool, perf_event__process,
972
					       &top->session->machines.host);
973 974
	perf_top__start_counters(top);
	top->session->evlist = top->evlist;
975
	perf_session__set_id_hdr_size(top->session);
976

977 978 979 980 981 982 983 984 985 986 987
	/*
	 * When perf is starting the traced process, all the events (apart from
	 * group members) have enable_on_exec=1 set, so don't spoil it by
	 * prematurely enabling them.
	 *
	 * XXX 'top' still doesn't start workloads like record, trace, but should,
	 * so leave the check here.
	 */
        if (!perf_target__none(&opts->target))
                perf_evlist__enable(top->evlist);

988
	/* Wait for a minimal set of events before starting the snapshot */
989
	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
990

991
	perf_top__mmap_read(top);
992

993
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
994
							    display_thread), top)) {
995
		ui__error("Could not create display thread.\n");
996 997 998
		exit(-1);
	}

999
	if (top->realtime_prio) {
1000 1001
		struct sched_param param;

1002
		param.sched_priority = top->realtime_prio;
1003
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1004
			ui__error("Could not set realtime priority.\n");
1005 1006 1007 1008 1009
			exit(-1);
		}
	}

	while (1) {
1010
		u64 hits = top->samples;
1011

1012
		perf_top__mmap_read(top);
1013

1014 1015
		if (hits == top->samples)
			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1016 1017
	}

1018
out_delete:
1019 1020
	perf_session__delete(top->session);
	top->session = NULL;
1021 1022 1023 1024 1025

	return 0;
}

static int
1026
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1027 1028 1029 1030
{
	/*
	 * --no-call-graph
	 */
1031
	if (unset)
1032 1033 1034 1035
		return 0;

	symbol_conf.use_callchain = true;

1036
	return record_parse_callchain_opt(opt, arg, unset);
1037
}
1038

1039
int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1040
{
1041 1042
	int status;
	char errbuf[BUFSIZ];
1043 1044 1045
	struct perf_top top = {
		.count_filter	     = 5,
		.delay_secs	     = 2,
1046 1047 1048 1049 1050 1051 1052 1053
		.record_opts = {
			.mmap_pages	= UINT_MAX,
			.user_freq	= UINT_MAX,
			.user_interval	= ULLONG_MAX,
			.freq		= 4000, /* 4 KHz */
			.target		     = {
				.uses_mmap   = true,
			},
N
Namhyung Kim 已提交
1054
		},
1055
		.sym_pcnt_filter     = 5,
1056
	};
1057 1058
	struct perf_record_opts *opts = &top.record_opts;
	struct perf_target *target = &opts->target;
1059
	const struct option options[] = {
1060
	OPT_CALLBACK('e', "event", &top.evlist, "event",
1061
		     "event selector. use 'perf list' to list available events",
1062
		     parse_events_option),
1063 1064
	OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
	OPT_STRING('p', "pid", &target->pid, "pid",
1065
		    "profile events on existing process id"),
1066
	OPT_STRING('t', "tid", &target->tid, "tid",
1067
		    "profile events on existing thread id"),
1068
	OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
1069
			    "system-wide collection from all CPUs"),
1070
	OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
1071
		    "list of cpus to monitor"),
1072 1073
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1074
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1075
		    "hide kernel symbols"),
1076 1077
	OPT_UINTEGER('m', "mmap-pages", &opts->mmap_pages,
		     "number of mmap data pages"),
1078
	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1079
		    "collect data with this RT SCHED_FIFO priority"),
1080
	OPT_INTEGER('d', "delay", &top.delay_secs,
1081
		    "number of seconds to delay between refreshes"),
1082
	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1083
			    "dump the symbol table used for profiling"),
1084
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1085
		    "only display functions with more events than this"),
1086
	OPT_BOOLEAN('g', "group", &opts->group,
1087
			    "put the counters into a counter group"),
1088 1089
	OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
		    "child tasks do not inherit counters"),
1090
	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1091
		    "symbol to annotate"),
1092 1093
	OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
	OPT_UINTEGER('F', "freq", &opts->user_freq, "profile at this frequency"),
1094
	OPT_INTEGER('E', "entries", &top.print_entries,
1095
		    "display this many functions"),
1096
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1097
		    "hide user symbols"),
1098 1099
	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1100
	OPT_INCR('v', "verbose", &verbose,
1101
		    "be more verbose (show counter open errors, etc)"),
1102 1103 1104 1105
	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
		   "sort by key(s): pid, comm, dso, symbol, parent"),
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
1106 1107 1108
	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
			     "mode[,dump_size]", record_callchain_help,
			     &parse_callchain_opt, "fp"),
1109 1110 1111 1112 1113 1114 1115 1116
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
		   "only consider symbols in these comms"),
	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
		   "only consider these symbols"),
1117 1118 1119 1120 1121 1122
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
		    "Interleave source code with assembly code (default)"),
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
		    "Display raw encoding of assembly instructions (default)"),
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1123
	OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
1124
	OPT_END()
1125
	};
1126 1127 1128 1129
	const char * const top_usage[] = {
		"perf top [<options>]",
		NULL
	};
1130

1131 1132
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1133 1134
		return -ENOMEM;

1135
	symbol_conf.exclude_other = false;
1136 1137 1138 1139 1140

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1141 1142 1143 1144 1145
	if (sort_order == default_sort_order)
		sort_order = "dso,symbol";

	setup_sorting(top_usage, options);

1146
	if (top.use_stdio)
1147
		use_browser = 0;
1148
	else if (top.use_tui)
1149 1150 1151 1152
		use_browser = 1;

	setup_browser(false);

1153
	status = perf_target__validate(target);
1154
	if (status) {
1155
		perf_target__strerror(target, status, errbuf, BUFSIZ);
1156 1157 1158
		ui__warning("%s", errbuf);
	}

1159
	status = perf_target__parse_uid(target);
1160 1161
	if (status) {
		int saved_errno = errno;
1162

1163
		perf_target__strerror(target, status, errbuf, BUFSIZ);
1164
		ui__error("%s", errbuf);
1165 1166

		status = -saved_errno;
1167
		goto out_delete_evlist;
1168
	}
1169

1170 1171
	if (perf_target__none(target))
		target->system_wide = true;
1172

1173
	if (perf_evlist__create_maps(top.evlist, target) < 0)
1174 1175
		usage_with_options(top_usage, options);

1176 1177
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1178
		ui__error("Not enough memory for event selector list\n");
1179 1180
		return -ENOMEM;
	}
1181

1182 1183
	symbol_conf.nr_events = top.evlist->nr_entries;

1184 1185
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1186

1187 1188 1189 1190 1191
	if (opts->user_interval != ULLONG_MAX)
		opts->default_interval = opts->user_interval;
	if (opts->user_freq != UINT_MAX)
		opts->freq = opts->user_freq;

1192 1193 1194
	/*
	 * User specified count overrides default frequency.
	 */
1195 1196 1197 1198
	if (opts->default_interval)
		opts->freq = 0;
	else if (opts->freq) {
		opts->default_interval = opts->freq;
1199
	} else {
1200
		ui__error("frequency and count are zero, aborting\n");
1201 1202
		status = -EINVAL;
		goto out_delete_evlist;
1203 1204
	}

1205
	top.sym_evsel = perf_evlist__first(top.evlist);
1206

1207
	symbol_conf.priv_size = sizeof(struct annotation);
1208 1209 1210 1211 1212

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1213 1214 1215 1216
	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);

1217 1218 1219 1220
	/*
	 * Avoid annotation data structures overhead when symbols aren't on the
	 * sort list.
	 */
1221
	top.sort_has_symbols = sort_sym.list.next != NULL;
1222

1223
	get_term_dimensions(&top.winsize);
1224
	if (top.print_entries == 0) {
1225 1226 1227 1228 1229 1230
		struct sigaction act = {
			.sa_sigaction = perf_top__sig_winch,
			.sa_flags     = SA_SIGINFO,
		};
		perf_top__update_print_entries(&top);
		sigaction(SIGWINCH, &act, NULL);
1231 1232
	}

1233
	status = __cmd_top(&top);
1234

1235
out_delete_evlist:
1236
	perf_evlist__delete(top.evlist);
1237 1238

	return status;
1239
}