builtin-top.c 26.3 KB
Newer Older
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
18
 */
19
#include "builtin.h"
20

21
#include "perf.h"
22

23
#include "util/annotate.h"
24
#include "util/cache.h"
25
#include "util/color.h"
26
#include "util/evlist.h"
27
#include "util/evsel.h"
28 29
#include "util/session.h"
#include "util/symbol.h"
30
#include "util/thread.h"
31
#include "util/thread_map.h"
32
#include "util/top.h"
33
#include "util/util.h"
34
#include <linux/rbtree.h>
35 36
#include "util/parse-options.h"
#include "util/parse-events.h"
37
#include "util/cpumap.h"
38
#include "util/xyarray.h"
39

40 41
#include "util/debug.h"

42 43
#include <assert.h>
#include <fcntl.h>
44

45
#include <stdio.h>
46 47
#include <termios.h>
#include <unistd.h>
48
#include <inttypes.h>
49

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

65
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
66

67 68 69 70 71 72 73 74 75 76
static struct perf_top top = {
	.count_filter		= 5,
	.delay_secs		= 2,
	.display_weighted	= -1,
	.target_pid		= -1,
	.target_tid		= -1,
	.active_symbols		= LIST_HEAD_INIT(top.active_symbols),
	.active_symbols_lock	= PTHREAD_MUTEX_INITIALIZER,
	.freq			= 1000, /* 1 KHz */
};
77

78
static bool			system_wide			=  false;
79

80 81
static bool			use_tui, use_stdio;

82
static int			default_interval		=      0;
83

84
static bool			inherit				=  false;
85
static int			realtime_prio			=      0;
86
static bool			group				=  false;
87
static unsigned int		page_size;
88
static unsigned int		mmap_pages			=    128;
89

90
static bool			dump_symtab                     =  false;
91

92
static struct winsize		winsize;
93

94
static const char		*sym_filter			=   NULL;
95
struct sym_entry		*sym_filter_entry		=   NULL;
96
struct sym_entry		*sym_filter_entry_sched		=   NULL;
97
static int			sym_pcnt_filter			=      5;
98

99 100 101 102
/*
 * Source functions
 */

103
void get_term_dimensions(struct winsize *ws)
104
{
105 106 107 108 109 110 111 112 113 114
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
115
	}
116 117 118 119
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
120
#endif
121 122
	ws->ws_row = 25;
	ws->ws_col = 80;
123 124
}

125
static void update_print_entries(struct winsize *ws)
126
{
127
	top.print_entries = ws->ws_row;
128

129 130
	if (top.print_entries > 9)
		top.print_entries -= 9;
131 132 133 134
}

static void sig_winch_handler(int sig __used)
{
135 136
	get_term_dimensions(&winsize);
	update_print_entries(&winsize);
137 138
}

139
static int parse_source(struct sym_entry *syme)
140 141
{
	struct symbol *sym;
142
	struct annotation *notes;
143
	struct map *map;
144
	int err = -1;
145 146

	if (!syme)
147 148 149 150 151 152 153 154
		return -1;

	sym = sym_entry__symbol(syme);
	map = syme->map;

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
155 156 157 158
	if (map->dso->origin == DSO__ORIG_KERNEL) {
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
159
		return -1;
160 161
	}

162 163 164
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
165 166 167
		goto out_assign;
	}

168
	pthread_mutex_lock(&notes->lock);
169

170 171 172
	if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
173
		sleep(1);
174
		goto out_unlock;
175
	}
176

177
	err = symbol__annotate(sym, syme->map, 0);
178
	if (err == 0) {
179 180
out_assign:
	sym_filter_entry = syme;
181 182
	}
out_unlock:
183
	pthread_mutex_unlock(&notes->lock);
184
	return err;
185 186 187 188
}

static void __zero_source_counters(struct sym_entry *syme)
{
189 190
	struct symbol *sym = sym_entry__symbol(syme);
	symbol__annotate_zero_histograms(sym);
191 192 193 194
}

static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
{
195 196 197
	struct annotation *notes;
	struct symbol *sym;

198 199 200
	if (syme != sym_filter_entry)
		return;

201 202 203 204
	sym = sym_entry__symbol(syme);
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
205 206
		return;

207
	ip = syme->map->map_ip(syme->map, ip);
208
	symbol__inc_addr_samples(sym, syme->map, counter, ip);
209

210
	pthread_mutex_unlock(&notes->lock);
211 212 213 214
}

static void show_details(struct sym_entry *syme)
{
215
	struct annotation *notes;
216
	struct symbol *symbol;
217
	int more;
218 219 220 221

	if (!syme)
		return;

222
	symbol = sym_entry__symbol(syme);
223 224 225 226 227 228
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
229

230
	printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
231 232
	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);

233 234
	more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx,
				       0, sym_pcnt_filter, top.print_entries);
235 236 237
	if (top.zero)
		symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
	else
238
		symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
239
	if (more != 0)
240
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
241 242
out_unlock:
	pthread_mutex_unlock(&notes->lock);
243
}
244 245 246

static const char		CONSOLE_CLEAR[] = "";

247
static void __list_insert_active_sym(struct sym_entry *syme)
248
{
249
	list_add(&syme->node, &top.active_symbols);
250
}
251

252
static void print_sym_table(struct perf_session *session)
253
{
254 255
	char bf[160];
	int printed = 0;
256
	struct rb_node *nd;
257 258
	struct sym_entry *syme;
	struct rb_root tmp = RB_ROOT;
259
	const int win_width = winsize.ws_col - 1;
260 261
	int sym_width, dso_width, dso_short_width;
	float sum_ksamples = perf_top__decay_samples(&top, &tmp);
262

263
	puts(CONSOLE_CLEAR);
264

265 266
	perf_top__header_snprintf(&top, bf, sizeof(bf));
	printf("%s\n", bf);
267

268
	perf_top__reset_sample_counters(&top);
269

270
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
271

272 273 274 275 276 277
	if (session->hists.stats.total_lost != 0) {
		color_fprintf(stdout, PERF_COLOR_RED, "WARNING:");
		printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n",
		       session->hists.stats.total_lost);
	}

278 279 280 281 282
	if (sym_filter_entry) {
		show_details(sym_filter_entry);
		return;
	}

283 284
	perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width,
			      &sym_width);
285

286 287 288 289 290
	if (sym_width + dso_width > winsize.ws_col - 29) {
		dso_width = dso_short_width;
		if (sym_width + dso_width > winsize.ws_col - 29)
			sym_width = winsize.ws_col - dso_width - 29;
	}
291
	putchar('\n');
292
	if (top.evlist->nr_entries == 1)
293
		printf("             samples  pcnt");
294
	else
295
		printf("   weight    samples  pcnt");
296

297 298
	if (verbose)
		printf("         RIP       ");
299
	printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
300
	printf("   %s    _______ _____",
301
	       top.evlist->nr_entries == 1 ? "      " : "______");
302
	if (verbose)
303
		printf(" ________________");
304
	printf(" %-*.*s", sym_width, sym_width, graph_line);
305
	printf(" %-*.*s", dso_width, dso_width, graph_line);
306
	puts("\n");
307

308
	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
309
		struct symbol *sym;
310
		double pcnt;
311

312
		syme = rb_entry(nd, struct sym_entry, rb_node);
313
		sym = sym_entry__symbol(syme);
314 315
		if (++printed > top.print_entries ||
		    (int)syme->snap_count < top.count_filter)
316
			continue;
317

318 319
		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
					 sum_ksamples));
320

321
		if (top.evlist->nr_entries == 1 || !top.display_weighted)
322
			printf("%20.2f ", syme->weight);
323
		else
324
			printf("%9.1f %10ld ", syme->weight, syme->snap_count);
325

326
		percent_color_fprintf(stdout, "%4.1f%%", pcnt);
327
		if (verbose)
328
			printf(" %016" PRIx64, sym->start);
329
		printf(" %-*.*s", sym_width, sym_width, sym->name);
330 331 332 333
		printf(" %-*.*s\n", dso_width, dso_width,
		       dso_width >= syme->map->dso->long_name_len ?
					syme->map->dso->long_name :
					syme->map->dso->short_name);
334 335 336
	}
}

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

static void prompt_symbol(struct sym_entry **target, const char *msg)
{
	char *buf = malloc(0), *p;
	struct sym_entry *syme = *target, *n, *found = NULL;
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
		*target = NULL;
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

392 393 394
	pthread_mutex_lock(&top.active_symbols_lock);
	syme = list_entry(top.active_symbols.next, struct sym_entry, node);
	pthread_mutex_unlock(&top.active_symbols_lock);
395

396
	list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) {
397
		struct symbol *sym = sym_entry__symbol(syme);
398 399 400 401 402 403 404 405

		if (!strcmp(buf, sym->name)) {
			found = syme;
			break;
		}
	}

	if (!found) {
406
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
407 408 409 410 411 412 413 414 415
		sleep(1);
		return;
	} else
		parse_source(found);

out_free:
	free(buf);
}

416
static void print_mapped_keys(void)
417
{
418 419 420
	char *name = NULL;

	if (sym_filter_entry) {
421
		struct symbol *sym = sym_entry__symbol(sym_filter_entry);
422 423 424 425
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
426 427
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top.delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top.print_entries);
428

429 430
	if (top.evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top.sym_evsel));
431

432
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top.count_filter);
433

434 435 436
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
437

438 439
	if (top.evlist->nr_entries > 1)
		fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0);
440

441
	fprintf(stdout,
442
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
443
		top.hide_kernel_symbols ? "yes" : "no");
444 445
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
446 447
		top.hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top.zero ? 1 : 0);
448 449 450 451 452 453 454 455 456 457 458 459
	fprintf(stdout, "\t[qQ]    quit.\n");
}

static int key_mapped(int c)
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
460 461
		case 'K':
		case 'U':
462 463 464
		case 'F':
		case 's':
		case 'S':
465 466 467
			return 1;
		case 'E':
		case 'w':
468
			return top.evlist->nr_entries > 1 ? 1 : 0;
469 470
		default:
			break;
471 472 473
	}

	return 0;
474 475
}

476
static void handle_keypress(struct perf_session *session, int c)
477
{
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
	if (!key_mapped(c)) {
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

		print_mapped_keys();
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
		if (!key_mapped(c))
			return;
	}

501 502
	switch (c) {
		case 'd':
503 504 505
			prompt_integer(&top.delay_secs, "Enter display delay");
			if (top.delay_secs < 1)
				top.delay_secs = 1;
506 507
			break;
		case 'e':
508 509
			prompt_integer(&top.print_entries, "Enter display entries (lines)");
			if (top.print_entries == 0) {
510
				sig_winch_handler(SIGWINCH);
511 512 513
				signal(SIGWINCH, sig_winch_handler);
			} else
				signal(SIGWINCH, SIG_DFL);
514 515
			break;
		case 'E':
516
			if (top.evlist->nr_entries > 1) {
517
				fprintf(stderr, "\nAvailable events:");
518

519 520
				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
521

522
				prompt_integer(&top.sym_counter, "Enter details event counter");
523

524 525 526 527
				if (top.sym_counter >= top.evlist->nr_entries) {
					top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
					top.sym_counter = 0;
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
528
					sleep(1);
529
					break;
530
				}
531 532
				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
					if (top.sym_evsel->idx == top.sym_counter)
533
						break;
534
			} else top.sym_counter = 0;
535 536
			break;
		case 'f':
537
			prompt_integer(&top.count_filter, "Enter display event count filter");
538 539 540 541
			break;
		case 'F':
			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
			break;
542
		case 'K':
543
			top.hide_kernel_symbols = !top.hide_kernel_symbols;
544
			break;
545 546 547
		case 'q':
		case 'Q':
			printf("exiting.\n");
548
			if (dump_symtab)
549
				perf_session__fprintf_dsos(session, stderr);
550 551 552 553 554 555 556 557 558 559 560 561 562 563
			exit(0);
		case 's':
			prompt_symbol(&sym_filter_entry, "Enter details symbol");
			break;
		case 'S':
			if (!sym_filter_entry)
				break;
			else {
				struct sym_entry *syme = sym_filter_entry;

				sym_filter_entry = NULL;
				__zero_source_counters(syme);
			}
			break;
564
		case 'U':
565
			top.hide_user_symbols = !top.hide_user_symbols;
566
			break;
567
		case 'w':
568
			top.display_weighted = ~top.display_weighted;
569
			break;
570
		case 'z':
571
			top.zero = !top.zero;
572
			break;
573 574
		default:
			break;
575 576 577
	}
}

578 579 580 581 582 583 584 585
static void *display_thread_tui(void *arg __used)
{
	perf_top__tui_browser(&top);
	exit_browser(0);
	exit(0);
	return NULL;
}

586
static void *display_thread(void *arg __used)
587
{
588
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
589 590
	struct termios tc, save;
	int delay_msecs, c;
591
	struct perf_session *session = (struct perf_session *) arg;
592 593 594 595 596 597

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
598

599
repeat:
600
	delay_msecs = top.delay_secs * 1000;
601 602 603
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
604

605
	do {
606
		print_sym_table(session);
607 608
	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);

609 610 611
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

612
	handle_keypress(session, c);
613
	goto repeat;
614 615 616 617

	return NULL;
}

618
/* Tag samples to be skipped. */
619
static const char *skip_symbols[] = {
620
	"default_idle",
621
	"native_safe_halt",
622 623 624 625
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
626
	"mwait_idle_with_hints",
627
	"poll_idle",
628 629
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
630 631 632
	NULL
};

633
static int symbol_filter(struct map *map, struct symbol *sym)
634
{
635 636
	struct sym_entry *syme;
	const char *name = sym->name;
637
	int i;
638

639 640 641 642 643 644 645
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

646 647 648 649 650 651 652
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
653 654
		return 1;

655
	syme = symbol__priv(sym);
656
	syme->map = map;
657
	symbol__annotate_init(map, sym);
658 659 660 661 662 663

	if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
		/* schedule initial sym_filter_entry setup */
		sym_filter_entry_sched = syme;
		sym_filter = NULL;
	}
664

665 666 667 668 669 670
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
			syme->skip = 1;
			break;
		}
	}
671 672 673 674

	return 0;
}

675 676 677
static void perf_event__process_sample(const union perf_event *event,
				       struct perf_sample *sample,
				       struct perf_session *session)
678
{
679
	u64 ip = event->ip.ip;
680
	struct sym_entry *syme;
681
	struct addr_location al;
682
	struct machine *machine;
683
	u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
684

685
	++top.samples;
686

687
	switch (origin) {
688
	case PERF_RECORD_MISC_USER:
689 690
		++top.us_samples;
		if (top.hide_user_symbols)
691
			return;
692
		machine = perf_session__find_host_machine(session);
693
		break;
694
	case PERF_RECORD_MISC_KERNEL:
695 696
		++top.kernel_samples;
		if (top.hide_kernel_symbols)
697
			return;
698
		machine = perf_session__find_host_machine(session);
699 700
		break;
	case PERF_RECORD_MISC_GUEST_KERNEL:
701
		++top.guest_kernel_samples;
702
		machine = perf_session__find_machine(session, event->ip.pid);
703
		break;
704
	case PERF_RECORD_MISC_GUEST_USER:
705
		++top.guest_us_samples;
706 707 708 709 710
		/*
		 * TODO: we don't process guest user from host side
		 * except simple counting.
		 */
		return;
711 712 713 714
	default:
		return;
	}

715
	if (!machine && perf_guest) {
716
		pr_err("Can't find guest [%d]'s kernel information\n",
717
			event->ip.pid);
718 719 720
		return;
	}

721
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
722
		top.exact_samples++;
723

724 725
	if (perf_event__preprocess_sample(event, session, &al, sample,
					  symbol_filter) < 0 ||
726
	    al.filtered)
727
		return;
728

729 730 731 732 733 734 735 736 737 738 739 740
	if (al.sym == NULL) {
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
741
		if (al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
742 743 744 745 746 747 748 749 750
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
			pr_err("The %s file can't be used\n",
			       symbol_conf.vmlinux_name);
			exit(1);
		}

		return;
	}

751 752 753 754
	/* let's see, whether we need to install initial sym_filter_entry */
	if (sym_filter_entry_sched) {
		sym_filter_entry = sym_filter_entry_sched;
		sym_filter_entry_sched = NULL;
755 756 757 758 759 760
		if (parse_source(sym_filter_entry) < 0) {
			struct symbol *sym = sym_entry__symbol(sym_filter_entry);

			pr_err("Can't annotate %s", sym->name);
			if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) {
				pr_err(": No vmlinux file was found in the path:\n");
761
				machine__fprintf_vmlinux_path(machine, stderr);
762 763 764 765
			} else
				pr_err(".\n");
			exit(1);
		}
766 767
	}

768
	syme = symbol__priv(al.sym);
769
	if (!syme->skip) {
770 771
		struct perf_evsel *evsel;

772
		syme->origin = origin;
773
		evsel = perf_evlist__id2evsel(top.evlist, sample->id);
774 775
		assert(evsel != NULL);
		syme->count[evsel->idx]++;
776
		record_precise_ip(syme, evsel->idx, ip);
777
		pthread_mutex_lock(&top.active_symbols_lock);
778 779
		if (list_empty(&syme->node) || !syme->node.next)
			__list_insert_active_sym(syme);
780
		pthread_mutex_unlock(&top.active_symbols_lock);
781
	}
782 783
}

784
static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu)
785
{
786
	struct perf_sample sample;
787
	union perf_event *event;
788

789
	while ((event = perf_evlist__read_on_cpu(top.evlist, cpu)) != NULL) {
790
		perf_session__parse_sample(self, event, &sample);
791

792
		if (event->header.type == PERF_RECORD_SAMPLE)
793
			perf_event__process_sample(event, &sample, self);
794
		else
795
			perf_event__process(event, &sample, self);
796 797 798
	}
}

799
static void perf_session__mmap_read(struct perf_session *self)
800
{
801 802
	int i;

803
	for (i = 0; i < top.evlist->cpus->nr; i++)
804
		perf_session__mmap_read_cpu(self, i);
805 806
}

807 808 809
static void start_counters(struct perf_evlist *evlist)
{
	struct perf_evsel *counter;
810

811 812
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
813

814 815
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

816
		if (top.freq) {
817 818
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
819
			attr->sample_freq = top.freq;
820
		}
821

822 823 824 825 826
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

827 828
		attr->mmap = 1;
try_again:
829 830
		if (perf_evsel__open(counter, top.evlist->cpus,
				     top.evlist->threads, group, inherit) < 0) {
831 832 833
			int err = errno;

			if (err == EPERM || err == EACCES)
834 835 836
				die("Permission error - are you root?\n"
					"\t Consider tweaking"
					" /proc/sys/kernel/perf_event_paranoid.\n");
837 838 839 840 841
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
842 843
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
844 845 846 847 848 849 850 851 852

				if (verbose)
					warning(" ... trying to fall back to cpu-clock-ticks\n");

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
			printf("\n");
853 854 855
			error("sys_perf_event_open() syscall returned with %d "
			      "(%s).  /bin/dmesg may provide additional information.\n",
			      err, strerror(err));
856 857 858
			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
			exit(-1);
		}
859
	}
860

861
	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
862
		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
863 864 865 866 867
}

static int __cmd_top(void)
{
	pthread_t thread;
868
	struct perf_evsel *first;
K
Kyle McMartin 已提交
869
	int ret __used;
870
	/*
871 872
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
873
	 */
874
	struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
875 876
	if (session == NULL)
		return -ENOMEM;
877

878 879
	if (top.target_tid != -1)
		perf_event__synthesize_thread(top.target_tid, perf_event__process,
880
					      session);
881
	else
882
		perf_event__synthesize_threads(perf_event__process, session);
883

884 885
	start_counters(top.evlist);
	first = list_entry(top.evlist->entries.next, struct perf_evsel, node);
886
	perf_session__set_sample_type(session, first->attr.sample_type);
887

888
	/* Wait for a minimal set of events before starting the snapshot */
889
	poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
890

891
	perf_session__mmap_read(session);
892

893 894
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
							     display_thread), session)) {
895 896 897 898 899 900 901 902 903 904 905 906 907 908 909
		printf("Could not create display thread.\n");
		exit(-1);
	}

	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
910
		u64 hits = top.samples;
911

912
		perf_session__mmap_read(session);
913

914 915
		if (hits == top.samples)
			ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
916 917 918 919
	}

	return 0;
}
920 921 922 923 924 925 926

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

static const struct option options[] = {
927
	OPT_CALLBACK('e', "event", &top.evlist, "event",
928 929
		     "event selector. use 'perf list' to list available events",
		     parse_events),
930 931
	OPT_INTEGER('c', "count", &default_interval,
		    "event period to sample"),
932
	OPT_INTEGER('p', "pid", &top.target_pid,
933
		    "profile events on existing process id"),
934
	OPT_INTEGER('t', "tid", &top.target_tid,
935
		    "profile events on existing thread id"),
936 937
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
938
	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
939
		    "list of cpus to monitor"),
940 941
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
942
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
943
		    "hide kernel symbols"),
944
	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
945 946
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
947
	OPT_INTEGER('d', "delay", &top.delay_secs,
948 949 950
		    "number of seconds to delay between refreshes"),
	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
			    "dump the symbol table used for profiling"),
951
	OPT_INTEGER('f', "count-filter", &top.count_filter,
952 953 954
		    "only display functions with more events than this"),
	OPT_BOOLEAN('g', "group", &group,
			    "put the counters into a counter group"),
955 956
	OPT_BOOLEAN('i', "inherit", &inherit,
		    "child tasks inherit counters"),
957
	OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
958
		    "symbol to annotate"),
959
	OPT_BOOLEAN('z', "zero", &top.zero,
960
		    "zero history across updates"),
961
	OPT_INTEGER('F', "freq", &top.freq,
962
		    "profile at this frequency"),
963
	OPT_INTEGER('E', "entries", &top.print_entries,
964
		    "display this many functions"),
965
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
966
		    "hide user symbols"),
967 968
	OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
969
	OPT_INCR('v', "verbose", &verbose,
970
		    "be more verbose (show counter open errors, etc)"),
971 972 973
	OPT_END()
};

974
int cmd_top(int argc, const char **argv, const char *prefix __used)
975
{
976 977
	struct perf_evsel *pos;
	int status = -ENOMEM;
978

979 980
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
981 982
		return -ENOMEM;

983 984 985 986 987 988
	page_size = sysconf(_SC_PAGE_SIZE);

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
	/*
 	 * XXX For now start disabled, only using TUI if explicitely asked for.
 	 * Change that when handle_keys equivalent gets written, live annotation
 	 * done, etc.
 	 */
	use_browser = 0;

	if (use_stdio)
		use_browser = 0;
	else if (use_tui)
		use_browser = 1;

	setup_browser(false);

1003
	/* CPU and PID are mutually exclusive */
1004
	if (top.target_tid > 0 && top.cpu_list) {
1005 1006
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
1007
		top.cpu_list = NULL;
1008 1009
	}

1010 1011
	if (top.target_pid != -1)
		top.target_tid = top.target_pid;
1012

1013 1014
	if (perf_evlist__create_maps(top.evlist, top.target_pid,
				     top.target_tid, top.cpu_list) < 0)
1015 1016
		usage_with_options(top_usage, options);

1017 1018
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1019 1020 1021
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1022

1023 1024
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1025

1026 1027 1028 1029
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
1030 1031 1032
		top.freq = 0;
	else if (top.freq) {
		default_interval = top.freq;
1033 1034 1035 1036 1037
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1038 1039 1040
	list_for_each_entry(pos, &top.evlist->entries, node) {
		if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
					 top.evlist->threads->nr) < 0)
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
			goto out_free_fd;
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
		if (pos->attr.sample_period)
			continue;

		pos->attr.sample_period = default_interval;
	}

1051 1052
	if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
	    perf_evlist__alloc_mmap(top.evlist) < 0)
1053 1054
		goto out_free_fd;

1055
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1056

1057
	symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) +
1058
				 (top.evlist->nr_entries + 1) * sizeof(unsigned long));
1059 1060 1061 1062 1063

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1064
	get_term_dimensions(&winsize);
1065
	if (top.print_entries == 0) {
1066
		update_print_entries(&winsize);
1067 1068 1069
		signal(SIGWINCH, sig_winch_handler);
	}

1070 1071
	status = __cmd_top();
out_free_fd:
1072
	perf_evlist__delete(top.evlist);
1073 1074

	return status;
1075
}