builtin-top.c 27.0 KB
Newer Older
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
18
 */
19
#include "builtin.h"
20

21
#include "perf.h"
22

23
#include "util/annotate.h"
24
#include "util/cache.h"
25
#include "util/color.h"
26
#include "util/evlist.h"
27
#include "util/evsel.h"
28 29
#include "util/session.h"
#include "util/symbol.h"
30
#include "util/thread.h"
31
#include "util/thread_map.h"
32
#include "util/top.h"
33
#include "util/util.h"
34
#include <linux/rbtree.h>
35 36
#include "util/parse-options.h"
#include "util/parse-events.h"
37
#include "util/cpumap.h"
38
#include "util/xyarray.h"
39

40 41
#include "util/debug.h"

42 43
#include <assert.h>
#include <fcntl.h>
44

45
#include <stdio.h>
46 47
#include <termios.h>
#include <unistd.h>
48
#include <inttypes.h>
49

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

65 66 67 68 69 70 71 72
static struct perf_top top = {
	.count_filter		= 5,
	.delay_secs		= 2,
	.display_weighted	= -1,
	.target_pid		= -1,
	.target_tid		= -1,
	.active_symbols		= LIST_HEAD_INIT(top.active_symbols),
	.active_symbols_lock	= PTHREAD_MUTEX_INITIALIZER,
73
	.active_symbols_cond	= PTHREAD_COND_INITIALIZER,
74 75
	.freq			= 1000, /* 1 KHz */
};
76

77
static bool			system_wide			=  false;
78

79 80
static bool			use_tui, use_stdio;

81
static int			default_interval		=      0;
82

83
static bool			inherit				=  false;
84
static int			realtime_prio			=      0;
85
static bool			group				=  false;
86
static unsigned int		page_size;
87
static unsigned int		mmap_pages			=    128;
88

89
static bool			dump_symtab                     =  false;
90

91
static struct winsize		winsize;
92

93
static const char		*sym_filter			=   NULL;
94
struct sym_entry		*sym_filter_entry_sched		=   NULL;
95
static int			sym_pcnt_filter			=      5;
96

97 98 99 100
/*
 * Source functions
 */

101
void get_term_dimensions(struct winsize *ws)
102
{
103 104 105 106 107 108 109 110 111 112
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
113
	}
114 115 116 117
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
118
#endif
119 120
	ws->ws_row = 25;
	ws->ws_col = 80;
121 122
}

123
static void update_print_entries(struct winsize *ws)
124
{
125
	top.print_entries = ws->ws_row;
126

127 128
	if (top.print_entries > 9)
		top.print_entries -= 9;
129 130 131 132
}

static void sig_winch_handler(int sig __used)
{
133 134
	get_term_dimensions(&winsize);
	update_print_entries(&winsize);
135 136
}

137
static int parse_source(struct sym_entry *syme)
138 139
{
	struct symbol *sym;
140
	struct annotation *notes;
141
	struct map *map;
142
	int err = -1;
143 144

	if (!syme)
145 146 147 148 149 150 151 152
		return -1;

	sym = sym_entry__symbol(syme);
	map = syme->map;

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
153
	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
154 155 156
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
157
		return -1;
158 159
	}

160 161 162
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
163 164 165
		goto out_assign;
	}

166
	pthread_mutex_lock(&notes->lock);
167

168
	if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
169
		pthread_mutex_unlock(&notes->lock);
170 171
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
172
		sleep(1);
173
		return err;
174
	}
175

176
	err = symbol__annotate(sym, syme->map, 0);
177
	if (err == 0) {
178
out_assign:
179
		top.sym_filter_entry = syme;
180
	}
181

182
	pthread_mutex_unlock(&notes->lock);
183
	return err;
184 185 186 187
}

static void __zero_source_counters(struct sym_entry *syme)
{
188 189
	struct symbol *sym = sym_entry__symbol(syme);
	symbol__annotate_zero_histograms(sym);
190 191 192 193
}

static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
{
194 195 196
	struct annotation *notes;
	struct symbol *sym;

197
	if (syme != top.sym_filter_entry)
198 199
		return;

200 201 202 203
	sym = sym_entry__symbol(syme);
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
204 205
		return;

206
	ip = syme->map->map_ip(syme->map, ip);
207
	symbol__inc_addr_samples(sym, syme->map, counter, ip);
208

209
	pthread_mutex_unlock(&notes->lock);
210 211 212 213
}

static void show_details(struct sym_entry *syme)
{
214
	struct annotation *notes;
215
	struct symbol *symbol;
216
	int more;
217 218 219 220

	if (!syme)
		return;

221
	symbol = sym_entry__symbol(syme);
222 223 224 225 226 227
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
228

229
	printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
230 231
	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);

232
	more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx,
233
				       0, sym_pcnt_filter, top.print_entries, 4);
234 235 236
	if (top.zero)
		symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
	else
237
		symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
238
	if (more != 0)
239
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
240 241
out_unlock:
	pthread_mutex_unlock(&notes->lock);
242
}
243 244 245

static const char		CONSOLE_CLEAR[] = "";

246
static void __list_insert_active_sym(struct sym_entry *syme)
247
{
248
	list_add(&syme->node, &top.active_symbols);
249
}
250

251
static void print_sym_table(struct perf_session *session)
252
{
253 254
	char bf[160];
	int printed = 0;
255
	struct rb_node *nd;
256 257
	struct sym_entry *syme;
	struct rb_root tmp = RB_ROOT;
258
	const int win_width = winsize.ws_col - 1;
259 260
	int sym_width, dso_width, dso_short_width;
	float sum_ksamples = perf_top__decay_samples(&top, &tmp);
261

262
	puts(CONSOLE_CLEAR);
263

264 265
	perf_top__header_snprintf(&top, bf, sizeof(bf));
	printf("%s\n", bf);
266

267
	perf_top__reset_sample_counters(&top);
268

269
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
270

271 272 273 274 275 276
	if (session->hists.stats.total_lost != 0) {
		color_fprintf(stdout, PERF_COLOR_RED, "WARNING:");
		printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n",
		       session->hists.stats.total_lost);
	}

277 278
	if (top.sym_filter_entry) {
		show_details(top.sym_filter_entry);
279 280 281
		return;
	}

282 283
	perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width,
			      &sym_width);
284

285 286 287 288 289
	if (sym_width + dso_width > winsize.ws_col - 29) {
		dso_width = dso_short_width;
		if (sym_width + dso_width > winsize.ws_col - 29)
			sym_width = winsize.ws_col - dso_width - 29;
	}
290
	putchar('\n');
291
	if (top.evlist->nr_entries == 1)
292
		printf("             samples  pcnt");
293
	else
294
		printf("   weight    samples  pcnt");
295

296 297
	if (verbose)
		printf("         RIP       ");
298
	printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
299
	printf("   %s    _______ _____",
300
	       top.evlist->nr_entries == 1 ? "      " : "______");
301
	if (verbose)
302
		printf(" ________________");
303
	printf(" %-*.*s", sym_width, sym_width, graph_line);
304
	printf(" %-*.*s", dso_width, dso_width, graph_line);
305
	puts("\n");
306

307
	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
308
		struct symbol *sym;
309
		double pcnt;
310

311
		syme = rb_entry(nd, struct sym_entry, rb_node);
312
		sym = sym_entry__symbol(syme);
313 314
		if (++printed > top.print_entries ||
		    (int)syme->snap_count < top.count_filter)
315
			continue;
316

317 318
		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
					 sum_ksamples));
319

320
		if (top.evlist->nr_entries == 1 || !top.display_weighted)
321
			printf("%20.2f ", syme->weight);
322
		else
323
			printf("%9.1f %10ld ", syme->weight, syme->snap_count);
324

325
		percent_color_fprintf(stdout, "%4.1f%%", pcnt);
326
		if (verbose)
327
			printf(" %016" PRIx64, sym->start);
328
		printf(" %-*.*s", sym_width, sym_width, sym->name);
329 330 331 332
		printf(" %-*.*s\n", dso_width, dso_width,
		       dso_width >= syme->map->dso->long_name_len ?
					syme->map->dso->long_name :
					syme->map->dso->short_name);
333 334 335
	}
}

336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

static void prompt_symbol(struct sym_entry **target, const char *msg)
{
	char *buf = malloc(0), *p;
	struct sym_entry *syme = *target, *n, *found = NULL;
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
		*target = NULL;
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

391 392 393
	pthread_mutex_lock(&top.active_symbols_lock);
	syme = list_entry(top.active_symbols.next, struct sym_entry, node);
	pthread_mutex_unlock(&top.active_symbols_lock);
394

395
	list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) {
396
		struct symbol *sym = sym_entry__symbol(syme);
397 398 399 400 401 402 403 404

		if (!strcmp(buf, sym->name)) {
			found = syme;
			break;
		}
	}

	if (!found) {
405
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
406 407 408 409 410 411 412 413 414
		sleep(1);
		return;
	} else
		parse_source(found);

out_free:
	free(buf);
}

415
static void print_mapped_keys(void)
416
{
417 418
	char *name = NULL;

419 420
	if (top.sym_filter_entry) {
		struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
421 422 423 424
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
425 426
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top.delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top.print_entries);
427

428 429
	if (top.evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top.sym_evsel));
430

431
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top.count_filter);
432

433 434 435
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
436

437 438
	if (top.evlist->nr_entries > 1)
		fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0);
439

440
	fprintf(stdout,
441
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
442
		top.hide_kernel_symbols ? "yes" : "no");
443 444
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
445 446
		top.hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top.zero ? 1 : 0);
447 448 449 450 451 452 453 454 455 456 457 458
	fprintf(stdout, "\t[qQ]    quit.\n");
}

static int key_mapped(int c)
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
459 460
		case 'K':
		case 'U':
461 462 463
		case 'F':
		case 's':
		case 'S':
464 465 466
			return 1;
		case 'E':
		case 'w':
467
			return top.evlist->nr_entries > 1 ? 1 : 0;
468 469
		default:
			break;
470 471 472
	}

	return 0;
473 474
}

475
static void handle_keypress(struct perf_session *session, int c)
476
{
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
	if (!key_mapped(c)) {
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

		print_mapped_keys();
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
		if (!key_mapped(c))
			return;
	}

500 501
	switch (c) {
		case 'd':
502 503 504
			prompt_integer(&top.delay_secs, "Enter display delay");
			if (top.delay_secs < 1)
				top.delay_secs = 1;
505 506
			break;
		case 'e':
507 508
			prompt_integer(&top.print_entries, "Enter display entries (lines)");
			if (top.print_entries == 0) {
509
				sig_winch_handler(SIGWINCH);
510 511 512
				signal(SIGWINCH, sig_winch_handler);
			} else
				signal(SIGWINCH, SIG_DFL);
513 514
			break;
		case 'E':
515
			if (top.evlist->nr_entries > 1) {
516 517 518
				/* Select 0 as the default event: */
				int counter = 0;

519
				fprintf(stderr, "\nAvailable events:");
520

521 522
				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
523

524
				prompt_integer(&counter, "Enter details event counter");
525

526
				if (counter >= top.evlist->nr_entries) {
527 528
					top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
529
					sleep(1);
530
					break;
531
				}
532
				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
533
					if (top.sym_evsel->idx == counter)
534
						break;
535 536
			} else
				top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
537 538
			break;
		case 'f':
539
			prompt_integer(&top.count_filter, "Enter display event count filter");
540 541 542 543
			break;
		case 'F':
			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
			break;
544
		case 'K':
545
			top.hide_kernel_symbols = !top.hide_kernel_symbols;
546
			break;
547 548 549
		case 'q':
		case 'Q':
			printf("exiting.\n");
550
			if (dump_symtab)
551
				perf_session__fprintf_dsos(session, stderr);
552 553
			exit(0);
		case 's':
554
			prompt_symbol(&top.sym_filter_entry, "Enter details symbol");
555 556
			break;
		case 'S':
557
			if (!top.sym_filter_entry)
558 559
				break;
			else {
560
				struct sym_entry *syme = top.sym_filter_entry;
561

562
				top.sym_filter_entry = NULL;
563 564 565
				__zero_source_counters(syme);
			}
			break;
566
		case 'U':
567
			top.hide_user_symbols = !top.hide_user_symbols;
568
			break;
569
		case 'w':
570
			top.display_weighted = ~top.display_weighted;
571
			break;
572
		case 'z':
573
			top.zero = !top.zero;
574
			break;
575 576
		default:
			break;
577 578 579
	}
}

580 581
static void *display_thread_tui(void *arg __used)
{
582 583 584 585 586 587 588 589 590 591 592
	int err = 0;
	pthread_mutex_lock(&top.active_symbols_lock);
	while (list_empty(&top.active_symbols)) {
		err = pthread_cond_wait(&top.active_symbols_cond,
					&top.active_symbols_lock);
		if (err)
			break;
	}
	pthread_mutex_unlock(&top.active_symbols_lock);
	if (!err)
		perf_top__tui_browser(&top);
593 594 595 596 597
	exit_browser(0);
	exit(0);
	return NULL;
}

598
static void *display_thread(void *arg __used)
599
{
600
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
601 602
	struct termios tc, save;
	int delay_msecs, c;
603
	struct perf_session *session = (struct perf_session *) arg;
604 605 606 607 608 609

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
610

611
repeat:
612
	delay_msecs = top.delay_secs * 1000;
613 614 615
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
616

617
	do {
618
		print_sym_table(session);
619 620
	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);

621 622 623
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

624
	handle_keypress(session, c);
625
	goto repeat;
626 627 628 629

	return NULL;
}

630
/* Tag samples to be skipped. */
631
static const char *skip_symbols[] = {
632
	"default_idle",
633
	"native_safe_halt",
634 635 636 637
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
638
	"mwait_idle_with_hints",
639
	"poll_idle",
640 641
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
642 643 644
	NULL
};

645
static int symbol_filter(struct map *map, struct symbol *sym)
646
{
647 648
	struct sym_entry *syme;
	const char *name = sym->name;
649
	int i;
650

651 652 653 654 655 656 657
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

658 659 660 661 662 663 664
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
665 666
		return 1;

667
	syme = symbol__priv(sym);
668
	syme->map = map;
669
	symbol__annotate_init(map, sym);
670

671
	if (!top.sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
672 673 674 675
		/* schedule initial sym_filter_entry setup */
		sym_filter_entry_sched = syme;
		sym_filter = NULL;
	}
676

677 678
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
679
			sym->ignore = true;
680 681 682
			break;
		}
	}
683 684 685 686

	return 0;
}

687 688 689
static void perf_event__process_sample(const union perf_event *event,
				       struct perf_sample *sample,
				       struct perf_session *session)
690
{
691
	u64 ip = event->ip.ip;
692
	struct sym_entry *syme;
693
	struct addr_location al;
694
	struct machine *machine;
695
	u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
696

697
	++top.samples;
698

699
	switch (origin) {
700
	case PERF_RECORD_MISC_USER:
701 702
		++top.us_samples;
		if (top.hide_user_symbols)
703
			return;
704
		machine = perf_session__find_host_machine(session);
705
		break;
706
	case PERF_RECORD_MISC_KERNEL:
707 708
		++top.kernel_samples;
		if (top.hide_kernel_symbols)
709
			return;
710
		machine = perf_session__find_host_machine(session);
711 712
		break;
	case PERF_RECORD_MISC_GUEST_KERNEL:
713
		++top.guest_kernel_samples;
714
		machine = perf_session__find_machine(session, event->ip.pid);
715
		break;
716
	case PERF_RECORD_MISC_GUEST_USER:
717
		++top.guest_us_samples;
718 719 720 721 722
		/*
		 * TODO: we don't process guest user from host side
		 * except simple counting.
		 */
		return;
723 724 725 726
	default:
		return;
	}

727
	if (!machine && perf_guest) {
728
		pr_err("Can't find guest [%d]'s kernel information\n",
729
			event->ip.pid);
730 731 732
		return;
	}

733
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
734
		top.exact_samples++;
735

736 737
	if (perf_event__preprocess_sample(event, session, &al, sample,
					  symbol_filter) < 0 ||
738
	    al.filtered)
739
		return;
740

741 742 743 744 745 746 747 748 749 750 751 752
	if (al.sym == NULL) {
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
753
		if (al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
754
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
755 756 757
			ui__warning("The %s file can't be used\n",
				    symbol_conf.vmlinux_name);
			exit_browser(0);
758 759 760 761 762 763
			exit(1);
		}

		return;
	}

764 765
	/* let's see, whether we need to install initial sym_filter_entry */
	if (sym_filter_entry_sched) {
766
		top.sym_filter_entry = sym_filter_entry_sched;
767
		sym_filter_entry_sched = NULL;
768 769
		if (parse_source(top.sym_filter_entry) < 0) {
			struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
770 771

			pr_err("Can't annotate %s", sym->name);
772
			if (top.sym_filter_entry->map->dso->symtab_type == SYMTAB__KALLSYMS) {
773
				pr_err(": No vmlinux file was found in the path:\n");
774
				machine__fprintf_vmlinux_path(machine, stderr);
775 776 777 778
			} else
				pr_err(".\n");
			exit(1);
		}
779 780
	}

781
	syme = symbol__priv(al.sym);
782
	if (!al.sym->ignore) {
783 784
		struct perf_evsel *evsel;

785
		evsel = perf_evlist__id2evsel(top.evlist, sample->id);
786 787
		assert(evsel != NULL);
		syme->count[evsel->idx]++;
788
		record_precise_ip(syme, evsel->idx, ip);
789
		pthread_mutex_lock(&top.active_symbols_lock);
790 791
		if (list_empty(&syme->node) || !syme->node.next) {
			static bool first = true;
792
			__list_insert_active_sym(syme);
793 794 795 796 797
			if (first) {
				pthread_cond_broadcast(&top.active_symbols_cond);
				first = false;
			}
		}
798
		pthread_mutex_unlock(&top.active_symbols_lock);
799
	}
800 801
}

802
static void perf_session__mmap_read_idx(struct perf_session *self, int idx)
803
{
804
	struct perf_sample sample;
805
	union perf_event *event;
806
	int ret;
807

808
	while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) {
809 810 811 812 813
		ret = perf_session__parse_sample(self, event, &sample);
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
814

815
		if (event->header.type == PERF_RECORD_SAMPLE)
816
			perf_event__process_sample(event, &sample, self);
817
		else
818
			perf_event__process(event, &sample, self);
819 820 821
	}
}

822
static void perf_session__mmap_read(struct perf_session *self)
823
{
824 825
	int i;

826 827
	for (i = 0; i < top.evlist->nr_mmaps; i++)
		perf_session__mmap_read_idx(self, i);
828 829
}

830 831 832
static void start_counters(struct perf_evlist *evlist)
{
	struct perf_evsel *counter;
833

834 835
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
836

837 838
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

839
		if (top.freq) {
840 841
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
842
			attr->sample_freq = top.freq;
843
		}
844

845 846 847 848 849
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

850
		attr->mmap = 1;
851
		attr->inherit = inherit;
852
try_again:
853
		if (perf_evsel__open(counter, top.evlist->cpus,
854
				     top.evlist->threads, group) < 0) {
855 856
			int err = errno;

857 858 859 860
			if (err == EPERM || err == EACCES) {
				ui__warning_paranoid();
				goto out_err;
			}
861 862 863 864 865
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
866 867
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
868
				if (verbose)
869 870
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
871 872 873 874 875

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
876

877 878 879 880 881 882
			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(counter));
				goto out_err;
			}

883 884 885 886 887 888
			ui__warning("The sys_perf_event_open() syscall "
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
889
		}
890
	}
891

892 893 894 895 896 897 898 899 900 901 902
	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) {
		ui__warning("Failed to mmap with %d (%s)\n",
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
903 904 905 906 907
}

static int __cmd_top(void)
{
	pthread_t thread;
K
Kyle McMartin 已提交
908
	int ret __used;
909
	/*
910 911
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
912
	 */
913
	struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
914 915
	if (session == NULL)
		return -ENOMEM;
916

917
	if (top.target_tid != -1)
918 919
		perf_event__synthesize_thread_map(top.evlist->threads,
						  perf_event__process, session);
920
	else
921
		perf_event__synthesize_threads(perf_event__process, session);
922

923
	start_counters(top.evlist);
924 925
	session->evlist = top.evlist;
	perf_session__update_sample_type(session);
926

927
	/* Wait for a minimal set of events before starting the snapshot */
928
	poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
929

930
	perf_session__mmap_read(session);
931

932 933
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
							     display_thread), session)) {
934 935 936 937 938 939 940 941 942 943 944 945 946 947 948
		printf("Could not create display thread.\n");
		exit(-1);
	}

	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
949
		u64 hits = top.samples;
950

951
		perf_session__mmap_read(session);
952

953 954
		if (hits == top.samples)
			ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
955 956 957 958
	}

	return 0;
}
959 960 961 962 963 964 965

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

static const struct option options[] = {
966
	OPT_CALLBACK('e', "event", &top.evlist, "event",
967 968
		     "event selector. use 'perf list' to list available events",
		     parse_events),
969 970
	OPT_INTEGER('c', "count", &default_interval,
		    "event period to sample"),
971
	OPT_INTEGER('p', "pid", &top.target_pid,
972
		    "profile events on existing process id"),
973
	OPT_INTEGER('t', "tid", &top.target_tid,
974
		    "profile events on existing thread id"),
975 976
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
977
	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
978
		    "list of cpus to monitor"),
979 980
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
981
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
982
		    "hide kernel symbols"),
983
	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
984 985
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
986
	OPT_INTEGER('d', "delay", &top.delay_secs,
987 988 989
		    "number of seconds to delay between refreshes"),
	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
			    "dump the symbol table used for profiling"),
990
	OPT_INTEGER('f', "count-filter", &top.count_filter,
991 992 993
		    "only display functions with more events than this"),
	OPT_BOOLEAN('g', "group", &group,
			    "put the counters into a counter group"),
994 995
	OPT_BOOLEAN('i', "inherit", &inherit,
		    "child tasks inherit counters"),
996
	OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
997
		    "symbol to annotate"),
998
	OPT_BOOLEAN('z', "zero", &top.zero,
999
		    "zero history across updates"),
1000
	OPT_INTEGER('F', "freq", &top.freq,
1001
		    "profile at this frequency"),
1002
	OPT_INTEGER('E', "entries", &top.print_entries,
1003
		    "display this many functions"),
1004
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1005
		    "hide user symbols"),
1006 1007
	OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
1008
	OPT_INCR('v', "verbose", &verbose,
1009
		    "be more verbose (show counter open errors, etc)"),
1010 1011 1012
	OPT_END()
};

1013
int cmd_top(int argc, const char **argv, const char *prefix __used)
1014
{
1015 1016
	struct perf_evsel *pos;
	int status = -ENOMEM;
1017

1018 1019
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1020 1021
		return -ENOMEM;

1022 1023 1024 1025 1026 1027
	page_size = sysconf(_SC_PAGE_SIZE);

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
	/*
 	 * XXX For now start disabled, only using TUI if explicitely asked for.
 	 * Change that when handle_keys equivalent gets written, live annotation
 	 * done, etc.
 	 */
	use_browser = 0;

	if (use_stdio)
		use_browser = 0;
	else if (use_tui)
		use_browser = 1;

	setup_browser(false);

1042
	/* CPU and PID are mutually exclusive */
1043
	if (top.target_tid > 0 && top.cpu_list) {
1044 1045
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
1046
		top.cpu_list = NULL;
1047 1048
	}

1049 1050
	if (top.target_pid != -1)
		top.target_tid = top.target_pid;
1051

1052 1053
	if (perf_evlist__create_maps(top.evlist, top.target_pid,
				     top.target_tid, top.cpu_list) < 0)
1054 1055
		usage_with_options(top_usage, options);

1056 1057
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1058 1059 1060
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1061

1062 1063
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1064

1065 1066 1067 1068
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
1069 1070 1071
		top.freq = 0;
	else if (top.freq) {
		default_interval = top.freq;
1072 1073 1074 1075 1076
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1077 1078 1079
	list_for_each_entry(pos, &top.evlist->entries, node) {
		if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
					 top.evlist->threads->nr) < 0)
1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
			goto out_free_fd;
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
		if (pos->attr.sample_period)
			continue;

		pos->attr.sample_period = default_interval;
	}

1090 1091
	if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
	    perf_evlist__alloc_mmap(top.evlist) < 0)
1092 1093
		goto out_free_fd;

1094
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1095

1096
	symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) +
1097
				 (top.evlist->nr_entries + 1) * sizeof(unsigned long));
1098 1099 1100 1101 1102

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1103
	get_term_dimensions(&winsize);
1104
	if (top.print_entries == 0) {
1105
		update_print_entries(&winsize);
1106 1107 1108
		signal(SIGWINCH, sig_winch_handler);
	}

1109 1110
	status = __cmd_top();
out_free_fd:
1111
	perf_evlist__delete(top.evlist);
1112 1113

	return status;
1114
}