builtin-top.c 26.3 KB
Newer Older
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
18
 */
19
#include "builtin.h"
20

21
#include "perf.h"
22

23
#include "util/annotate.h"
24
#include "util/cache.h"
25
#include "util/color.h"
26
#include "util/evlist.h"
27
#include "util/evsel.h"
28 29
#include "util/session.h"
#include "util/symbol.h"
30
#include "util/thread.h"
31
#include "util/thread_map.h"
32
#include "util/top.h"
33
#include "util/util.h"
34
#include <linux/rbtree.h>
35 36
#include "util/parse-options.h"
#include "util/parse-events.h"
37
#include "util/cpumap.h"
38
#include "util/xyarray.h"
39

40 41
#include "util/debug.h"

42 43
#include <assert.h>
#include <fcntl.h>
44

45
#include <stdio.h>
46 47
#include <termios.h>
#include <unistd.h>
48
#include <inttypes.h>
49

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

65
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
66

67 68 69 70 71 72 73 74 75 76
static struct perf_top top = {
	.count_filter		= 5,
	.delay_secs		= 2,
	.display_weighted	= -1,
	.target_pid		= -1,
	.target_tid		= -1,
	.active_symbols		= LIST_HEAD_INIT(top.active_symbols),
	.active_symbols_lock	= PTHREAD_MUTEX_INITIALIZER,
	.freq			= 1000, /* 1 KHz */
};
77

78
static bool			system_wide			=  false;
79

80 81
static bool			use_tui, use_stdio;

82
static int			default_interval		=      0;
83

84
static bool			inherit				=  false;
85
static int			realtime_prio			=      0;
86
static bool			group				=  false;
87
static unsigned int		page_size;
88
static unsigned int		mmap_pages			=    128;
89

90
static bool			dump_symtab                     =  false;
91

92
static struct winsize		winsize;
93

94
static const char		*sym_filter			=   NULL;
95
struct sym_entry		*sym_filter_entry_sched		=   NULL;
96
static int			sym_pcnt_filter			=      5;
97

98 99 100 101
/*
 * Source functions
 */

102
void get_term_dimensions(struct winsize *ws)
103
{
104 105 106 107 108 109 110 111 112 113
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
114
	}
115 116 117 118
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
119
#endif
120 121
	ws->ws_row = 25;
	ws->ws_col = 80;
122 123
}

124
static void update_print_entries(struct winsize *ws)
125
{
126
	top.print_entries = ws->ws_row;
127

128 129
	if (top.print_entries > 9)
		top.print_entries -= 9;
130 131 132 133
}

static void sig_winch_handler(int sig __used)
{
134 135
	get_term_dimensions(&winsize);
	update_print_entries(&winsize);
136 137
}

138
static int parse_source(struct sym_entry *syme)
139 140
{
	struct symbol *sym;
141
	struct annotation *notes;
142
	struct map *map;
143
	int err = -1;
144 145

	if (!syme)
146 147 148 149 150 151 152 153
		return -1;

	sym = sym_entry__symbol(syme);
	map = syme->map;

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
154 155 156 157
	if (map->dso->origin == DSO__ORIG_KERNEL) {
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
158
		return -1;
159 160
	}

161 162 163
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
164 165 166
		goto out_assign;
	}

167
	pthread_mutex_lock(&notes->lock);
168

169
	if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
170
		pthread_mutex_unlock(&notes->lock);
171 172
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
173
		sleep(1);
174
		return err;
175
	}
176

177
	err = symbol__annotate(sym, syme->map, 0);
178
	if (err == 0) {
179
out_assign:
180
		top.sym_filter_entry = syme;
181
	}
182

183
	pthread_mutex_unlock(&notes->lock);
184
	return err;
185 186 187 188
}

static void __zero_source_counters(struct sym_entry *syme)
{
189 190
	struct symbol *sym = sym_entry__symbol(syme);
	symbol__annotate_zero_histograms(sym);
191 192 193 194
}

static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
{
195 196 197
	struct annotation *notes;
	struct symbol *sym;

198
	if (syme != top.sym_filter_entry)
199 200
		return;

201 202 203 204
	sym = sym_entry__symbol(syme);
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
205 206
		return;

207
	ip = syme->map->map_ip(syme->map, ip);
208
	symbol__inc_addr_samples(sym, syme->map, counter, ip);
209

210
	pthread_mutex_unlock(&notes->lock);
211 212 213 214
}

static void show_details(struct sym_entry *syme)
{
215
	struct annotation *notes;
216
	struct symbol *symbol;
217
	int more;
218 219 220 221

	if (!syme)
		return;

222
	symbol = sym_entry__symbol(syme);
223 224 225 226 227 228
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
229

230
	printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
231 232
	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);

233
	more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx,
234
				       0, sym_pcnt_filter, top.print_entries, 4);
235 236 237
	if (top.zero)
		symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
	else
238
		symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
239
	if (more != 0)
240
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
241 242
out_unlock:
	pthread_mutex_unlock(&notes->lock);
243
}
244 245 246

static const char		CONSOLE_CLEAR[] = "";

247
static void __list_insert_active_sym(struct sym_entry *syme)
248
{
249
	list_add(&syme->node, &top.active_symbols);
250
}
251

252
static void print_sym_table(struct perf_session *session)
253
{
254 255
	char bf[160];
	int printed = 0;
256
	struct rb_node *nd;
257 258
	struct sym_entry *syme;
	struct rb_root tmp = RB_ROOT;
259
	const int win_width = winsize.ws_col - 1;
260 261
	int sym_width, dso_width, dso_short_width;
	float sum_ksamples = perf_top__decay_samples(&top, &tmp);
262

263
	puts(CONSOLE_CLEAR);
264

265 266
	perf_top__header_snprintf(&top, bf, sizeof(bf));
	printf("%s\n", bf);
267

268
	perf_top__reset_sample_counters(&top);
269

270
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
271

272 273 274 275 276 277
	if (session->hists.stats.total_lost != 0) {
		color_fprintf(stdout, PERF_COLOR_RED, "WARNING:");
		printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n",
		       session->hists.stats.total_lost);
	}

278 279
	if (top.sym_filter_entry) {
		show_details(top.sym_filter_entry);
280 281 282
		return;
	}

283 284
	perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width,
			      &sym_width);
285

286 287 288 289 290
	if (sym_width + dso_width > winsize.ws_col - 29) {
		dso_width = dso_short_width;
		if (sym_width + dso_width > winsize.ws_col - 29)
			sym_width = winsize.ws_col - dso_width - 29;
	}
291
	putchar('\n');
292
	if (top.evlist->nr_entries == 1)
293
		printf("             samples  pcnt");
294
	else
295
		printf("   weight    samples  pcnt");
296

297 298
	if (verbose)
		printf("         RIP       ");
299
	printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
300
	printf("   %s    _______ _____",
301
	       top.evlist->nr_entries == 1 ? "      " : "______");
302
	if (verbose)
303
		printf(" ________________");
304
	printf(" %-*.*s", sym_width, sym_width, graph_line);
305
	printf(" %-*.*s", dso_width, dso_width, graph_line);
306
	puts("\n");
307

308
	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
309
		struct symbol *sym;
310
		double pcnt;
311

312
		syme = rb_entry(nd, struct sym_entry, rb_node);
313
		sym = sym_entry__symbol(syme);
314 315
		if (++printed > top.print_entries ||
		    (int)syme->snap_count < top.count_filter)
316
			continue;
317

318 319
		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
					 sum_ksamples));
320

321
		if (top.evlist->nr_entries == 1 || !top.display_weighted)
322
			printf("%20.2f ", syme->weight);
323
		else
324
			printf("%9.1f %10ld ", syme->weight, syme->snap_count);
325

326
		percent_color_fprintf(stdout, "%4.1f%%", pcnt);
327
		if (verbose)
328
			printf(" %016" PRIx64, sym->start);
329
		printf(" %-*.*s", sym_width, sym_width, sym->name);
330 331 332 333
		printf(" %-*.*s\n", dso_width, dso_width,
		       dso_width >= syme->map->dso->long_name_len ?
					syme->map->dso->long_name :
					syme->map->dso->short_name);
334 335 336
	}
}

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

static void prompt_symbol(struct sym_entry **target, const char *msg)
{
	char *buf = malloc(0), *p;
	struct sym_entry *syme = *target, *n, *found = NULL;
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
		*target = NULL;
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

392 393 394
	pthread_mutex_lock(&top.active_symbols_lock);
	syme = list_entry(top.active_symbols.next, struct sym_entry, node);
	pthread_mutex_unlock(&top.active_symbols_lock);
395

396
	list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) {
397
		struct symbol *sym = sym_entry__symbol(syme);
398 399 400 401 402 403 404 405

		if (!strcmp(buf, sym->name)) {
			found = syme;
			break;
		}
	}

	if (!found) {
406
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
407 408 409 410 411 412 413 414 415
		sleep(1);
		return;
	} else
		parse_source(found);

out_free:
	free(buf);
}

416
static void print_mapped_keys(void)
417
{
418 419
	char *name = NULL;

420 421
	if (top.sym_filter_entry) {
		struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
422 423 424 425
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
426 427
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top.delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top.print_entries);
428

429 430
	if (top.evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top.sym_evsel));
431

432
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top.count_filter);
433

434 435 436
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
437

438 439
	if (top.evlist->nr_entries > 1)
		fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0);
440

441
	fprintf(stdout,
442
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
443
		top.hide_kernel_symbols ? "yes" : "no");
444 445
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
446 447
		top.hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top.zero ? 1 : 0);
448 449 450 451 452 453 454 455 456 457 458 459
	fprintf(stdout, "\t[qQ]    quit.\n");
}

static int key_mapped(int c)
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
460 461
		case 'K':
		case 'U':
462 463 464
		case 'F':
		case 's':
		case 'S':
465 466 467
			return 1;
		case 'E':
		case 'w':
468
			return top.evlist->nr_entries > 1 ? 1 : 0;
469 470
		default:
			break;
471 472 473
	}

	return 0;
474 475
}

476
static void handle_keypress(struct perf_session *session, int c)
477
{
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
	if (!key_mapped(c)) {
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

		print_mapped_keys();
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
		if (!key_mapped(c))
			return;
	}

501 502
	switch (c) {
		case 'd':
503 504 505
			prompt_integer(&top.delay_secs, "Enter display delay");
			if (top.delay_secs < 1)
				top.delay_secs = 1;
506 507
			break;
		case 'e':
508 509
			prompt_integer(&top.print_entries, "Enter display entries (lines)");
			if (top.print_entries == 0) {
510
				sig_winch_handler(SIGWINCH);
511 512 513
				signal(SIGWINCH, sig_winch_handler);
			} else
				signal(SIGWINCH, SIG_DFL);
514 515
			break;
		case 'E':
516
			if (top.evlist->nr_entries > 1) {
517
				fprintf(stderr, "\nAvailable events:");
518

519 520
				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
521

522
				prompt_integer(&top.sym_counter, "Enter details event counter");
523

524 525 526 527
				if (top.sym_counter >= top.evlist->nr_entries) {
					top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
					top.sym_counter = 0;
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
528
					sleep(1);
529
					break;
530
				}
531 532
				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
					if (top.sym_evsel->idx == top.sym_counter)
533
						break;
534
			} else top.sym_counter = 0;
535 536
			break;
		case 'f':
537
			prompt_integer(&top.count_filter, "Enter display event count filter");
538 539 540 541
			break;
		case 'F':
			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
			break;
542
		case 'K':
543
			top.hide_kernel_symbols = !top.hide_kernel_symbols;
544
			break;
545 546 547
		case 'q':
		case 'Q':
			printf("exiting.\n");
548
			if (dump_symtab)
549
				perf_session__fprintf_dsos(session, stderr);
550 551
			exit(0);
		case 's':
552
			prompt_symbol(&top.sym_filter_entry, "Enter details symbol");
553 554
			break;
		case 'S':
555
			if (!top.sym_filter_entry)
556 557
				break;
			else {
558
				struct sym_entry *syme = top.sym_filter_entry;
559

560
				top.sym_filter_entry = NULL;
561 562 563
				__zero_source_counters(syme);
			}
			break;
564
		case 'U':
565
			top.hide_user_symbols = !top.hide_user_symbols;
566
			break;
567
		case 'w':
568
			top.display_weighted = ~top.display_weighted;
569
			break;
570
		case 'z':
571
			top.zero = !top.zero;
572
			break;
573 574
		default:
			break;
575 576 577
	}
}

578 579 580 581 582 583 584 585
static void *display_thread_tui(void *arg __used)
{
	perf_top__tui_browser(&top);
	exit_browser(0);
	exit(0);
	return NULL;
}

586
static void *display_thread(void *arg __used)
587
{
588
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
589 590
	struct termios tc, save;
	int delay_msecs, c;
591
	struct perf_session *session = (struct perf_session *) arg;
592 593 594 595 596 597

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
598

599
repeat:
600
	delay_msecs = top.delay_secs * 1000;
601 602 603
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
604

605
	do {
606
		print_sym_table(session);
607 608
	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);

609 610 611
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

612
	handle_keypress(session, c);
613
	goto repeat;
614 615 616 617

	return NULL;
}

618
/* Tag samples to be skipped. */
619
static const char *skip_symbols[] = {
620
	"default_idle",
621
	"native_safe_halt",
622 623 624 625
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
626
	"mwait_idle_with_hints",
627
	"poll_idle",
628 629
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
630 631 632
	NULL
};

633
static int symbol_filter(struct map *map, struct symbol *sym)
634
{
635 636
	struct sym_entry *syme;
	const char *name = sym->name;
637
	int i;
638

639 640 641 642 643 644 645
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

646 647 648 649 650 651 652
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
653 654
		return 1;

655
	syme = symbol__priv(sym);
656
	syme->map = map;
657
	symbol__annotate_init(map, sym);
658

659
	if (!top.sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
660 661 662 663
		/* schedule initial sym_filter_entry setup */
		sym_filter_entry_sched = syme;
		sym_filter = NULL;
	}
664

665 666 667 668 669 670
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
			syme->skip = 1;
			break;
		}
	}
671 672 673 674

	return 0;
}

675 676 677
static void perf_event__process_sample(const union perf_event *event,
				       struct perf_sample *sample,
				       struct perf_session *session)
678
{
679
	u64 ip = event->ip.ip;
680
	struct sym_entry *syme;
681
	struct addr_location al;
682
	struct machine *machine;
683
	u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
684

685
	++top.samples;
686

687
	switch (origin) {
688
	case PERF_RECORD_MISC_USER:
689 690
		++top.us_samples;
		if (top.hide_user_symbols)
691
			return;
692
		machine = perf_session__find_host_machine(session);
693
		break;
694
	case PERF_RECORD_MISC_KERNEL:
695 696
		++top.kernel_samples;
		if (top.hide_kernel_symbols)
697
			return;
698
		machine = perf_session__find_host_machine(session);
699 700
		break;
	case PERF_RECORD_MISC_GUEST_KERNEL:
701
		++top.guest_kernel_samples;
702
		machine = perf_session__find_machine(session, event->ip.pid);
703
		break;
704
	case PERF_RECORD_MISC_GUEST_USER:
705
		++top.guest_us_samples;
706 707 708 709 710
		/*
		 * TODO: we don't process guest user from host side
		 * except simple counting.
		 */
		return;
711 712 713 714
	default:
		return;
	}

715
	if (!machine && perf_guest) {
716
		pr_err("Can't find guest [%d]'s kernel information\n",
717
			event->ip.pid);
718 719 720
		return;
	}

721
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
722
		top.exact_samples++;
723

724 725
	if (perf_event__preprocess_sample(event, session, &al, sample,
					  symbol_filter) < 0 ||
726
	    al.filtered)
727
		return;
728

729 730 731 732 733 734 735 736 737 738 739 740
	if (al.sym == NULL) {
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
741
		if (al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
742
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
743 744 745
			ui__warning("The %s file can't be used\n",
				    symbol_conf.vmlinux_name);
			exit_browser(0);
746 747 748 749 750 751
			exit(1);
		}

		return;
	}

752 753
	/* let's see, whether we need to install initial sym_filter_entry */
	if (sym_filter_entry_sched) {
754
		top.sym_filter_entry = sym_filter_entry_sched;
755
		sym_filter_entry_sched = NULL;
756 757
		if (parse_source(top.sym_filter_entry) < 0) {
			struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
758 759

			pr_err("Can't annotate %s", sym->name);
760
			if (top.sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) {
761
				pr_err(": No vmlinux file was found in the path:\n");
762
				machine__fprintf_vmlinux_path(machine, stderr);
763 764 765 766
			} else
				pr_err(".\n");
			exit(1);
		}
767 768
	}

769
	syme = symbol__priv(al.sym);
770
	if (!syme->skip) {
771 772
		struct perf_evsel *evsel;

773
		syme->origin = origin;
774
		evsel = perf_evlist__id2evsel(top.evlist, sample->id);
775 776
		assert(evsel != NULL);
		syme->count[evsel->idx]++;
777
		record_precise_ip(syme, evsel->idx, ip);
778
		pthread_mutex_lock(&top.active_symbols_lock);
779 780
		if (list_empty(&syme->node) || !syme->node.next)
			__list_insert_active_sym(syme);
781
		pthread_mutex_unlock(&top.active_symbols_lock);
782
	}
783 784
}

785
static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu)
786
{
787
	struct perf_sample sample;
788
	union perf_event *event;
789

790
	while ((event = perf_evlist__read_on_cpu(top.evlist, cpu)) != NULL) {
791
		perf_session__parse_sample(self, event, &sample);
792

793
		if (event->header.type == PERF_RECORD_SAMPLE)
794
			perf_event__process_sample(event, &sample, self);
795
		else
796
			perf_event__process(event, &sample, self);
797 798 799
	}
}

800
static void perf_session__mmap_read(struct perf_session *self)
801
{
802 803
	int i;

804
	for (i = 0; i < top.evlist->cpus->nr; i++)
805
		perf_session__mmap_read_cpu(self, i);
806 807
}

808 809 810
static void start_counters(struct perf_evlist *evlist)
{
	struct perf_evsel *counter;
811

812 813
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
814

815 816
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

817
		if (top.freq) {
818 819
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
820
			attr->sample_freq = top.freq;
821
		}
822

823 824 825 826 827
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

828 829
		attr->mmap = 1;
try_again:
830 831
		if (perf_evsel__open(counter, top.evlist->cpus,
				     top.evlist->threads, group, inherit) < 0) {
832 833 834
			int err = errno;

			if (err == EPERM || err == EACCES)
835 836 837
				die("Permission error - are you root?\n"
					"\t Consider tweaking"
					" /proc/sys/kernel/perf_event_paranoid.\n");
838 839 840 841 842
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
843 844
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
845 846 847 848 849 850 851 852 853

				if (verbose)
					warning(" ... trying to fall back to cpu-clock-ticks\n");

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
			printf("\n");
854 855 856
			error("sys_perf_event_open() syscall returned with %d "
			      "(%s).  /bin/dmesg may provide additional information.\n",
			      err, strerror(err));
857 858 859
			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
			exit(-1);
		}
860
	}
861

862
	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
863
		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
864 865 866 867 868
}

static int __cmd_top(void)
{
	pthread_t thread;
869
	struct perf_evsel *first;
K
Kyle McMartin 已提交
870
	int ret __used;
871
	/*
872 873
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
874
	 */
875
	struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
876 877
	if (session == NULL)
		return -ENOMEM;
878

879
	if (top.target_tid != -1)
880 881
		perf_event__synthesize_thread_map(top.evlist->threads,
						  perf_event__process, session);
882
	else
883
		perf_event__synthesize_threads(perf_event__process, session);
884

885 886
	start_counters(top.evlist);
	first = list_entry(top.evlist->entries.next, struct perf_evsel, node);
887
	perf_session__set_sample_type(session, first->attr.sample_type);
888

889
	/* Wait for a minimal set of events before starting the snapshot */
890
	poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
891

892
	perf_session__mmap_read(session);
893

894 895
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
							     display_thread), session)) {
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
		printf("Could not create display thread.\n");
		exit(-1);
	}

	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
911
		u64 hits = top.samples;
912

913
		perf_session__mmap_read(session);
914

915 916
		if (hits == top.samples)
			ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
917 918 919 920
	}

	return 0;
}
921 922 923 924 925 926 927

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

static const struct option options[] = {
928
	OPT_CALLBACK('e', "event", &top.evlist, "event",
929 930
		     "event selector. use 'perf list' to list available events",
		     parse_events),
931 932
	OPT_INTEGER('c', "count", &default_interval,
		    "event period to sample"),
933
	OPT_INTEGER('p', "pid", &top.target_pid,
934
		    "profile events on existing process id"),
935
	OPT_INTEGER('t', "tid", &top.target_tid,
936
		    "profile events on existing thread id"),
937 938
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
939
	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
940
		    "list of cpus to monitor"),
941 942
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
943
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
944
		    "hide kernel symbols"),
945
	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
946 947
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
948
	OPT_INTEGER('d', "delay", &top.delay_secs,
949 950 951
		    "number of seconds to delay between refreshes"),
	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
			    "dump the symbol table used for profiling"),
952
	OPT_INTEGER('f', "count-filter", &top.count_filter,
953 954 955
		    "only display functions with more events than this"),
	OPT_BOOLEAN('g', "group", &group,
			    "put the counters into a counter group"),
956 957
	OPT_BOOLEAN('i', "inherit", &inherit,
		    "child tasks inherit counters"),
958
	OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
959
		    "symbol to annotate"),
960
	OPT_BOOLEAN('z', "zero", &top.zero,
961
		    "zero history across updates"),
962
	OPT_INTEGER('F', "freq", &top.freq,
963
		    "profile at this frequency"),
964
	OPT_INTEGER('E', "entries", &top.print_entries,
965
		    "display this many functions"),
966
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
967
		    "hide user symbols"),
968 969
	OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
970
	OPT_INCR('v', "verbose", &verbose,
971
		    "be more verbose (show counter open errors, etc)"),
972 973 974
	OPT_END()
};

975
int cmd_top(int argc, const char **argv, const char *prefix __used)
976
{
977 978
	struct perf_evsel *pos;
	int status = -ENOMEM;
979

980 981
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
982 983
		return -ENOMEM;

984 985 986 987 988 989
	page_size = sysconf(_SC_PAGE_SIZE);

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003
	/*
 	 * XXX For now start disabled, only using TUI if explicitely asked for.
 	 * Change that when handle_keys equivalent gets written, live annotation
 	 * done, etc.
 	 */
	use_browser = 0;

	if (use_stdio)
		use_browser = 0;
	else if (use_tui)
		use_browser = 1;

	setup_browser(false);

1004
	/* CPU and PID are mutually exclusive */
1005
	if (top.target_tid > 0 && top.cpu_list) {
1006 1007
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
1008
		top.cpu_list = NULL;
1009 1010
	}

1011 1012
	if (top.target_pid != -1)
		top.target_tid = top.target_pid;
1013

1014 1015
	if (perf_evlist__create_maps(top.evlist, top.target_pid,
				     top.target_tid, top.cpu_list) < 0)
1016 1017
		usage_with_options(top_usage, options);

1018 1019
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1020 1021 1022
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1023

1024 1025
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1026

1027 1028 1029 1030
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
1031 1032 1033
		top.freq = 0;
	else if (top.freq) {
		default_interval = top.freq;
1034 1035 1036 1037 1038
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1039 1040 1041
	list_for_each_entry(pos, &top.evlist->entries, node) {
		if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
					 top.evlist->threads->nr) < 0)
1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
			goto out_free_fd;
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
		if (pos->attr.sample_period)
			continue;

		pos->attr.sample_period = default_interval;
	}

1052 1053
	if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
	    perf_evlist__alloc_mmap(top.evlist) < 0)
1054 1055
		goto out_free_fd;

1056
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1057

1058
	symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) +
1059
				 (top.evlist->nr_entries + 1) * sizeof(unsigned long));
1060 1061 1062 1063 1064

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1065
	get_term_dimensions(&winsize);
1066
	if (top.print_entries == 0) {
1067
		update_print_entries(&winsize);
1068 1069 1070
		signal(SIGWINCH, sig_winch_handler);
	}

1071 1072
	status = __cmd_top();
out_free_fd:
1073
	perf_evlist__delete(top.evlist);
1074 1075

	return status;
1076
}