builtin-top.c 27.5 KB
Newer Older
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
18
 */
19
#include "builtin.h"
20

21
#include "perf.h"
22

23
#include "util/annotate.h"
24
#include "util/cache.h"
25
#include "util/color.h"
26
#include "util/evlist.h"
27
#include "util/evsel.h"
28 29
#include "util/session.h"
#include "util/symbol.h"
30
#include "util/thread.h"
31
#include "util/thread_map.h"
32
#include "util/top.h"
33
#include "util/util.h"
34
#include <linux/rbtree.h>
35 36
#include "util/parse-options.h"
#include "util/parse-events.h"
37
#include "util/cpumap.h"
38
#include "util/xyarray.h"
39

40 41
#include "util/debug.h"

42 43
#include <assert.h>
#include <fcntl.h>
44

45
#include <stdio.h>
46 47
#include <termios.h>
#include <unistd.h>
48
#include <inttypes.h>
49

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
#include <errno.h>
#include <time.h>
#include <sched.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

65 66 67 68 69 70 71 72
static struct perf_top top = {
	.count_filter		= 5,
	.delay_secs		= 2,
	.display_weighted	= -1,
	.target_pid		= -1,
	.target_tid		= -1,
	.active_symbols		= LIST_HEAD_INIT(top.active_symbols),
	.active_symbols_lock	= PTHREAD_MUTEX_INITIALIZER,
73
	.active_symbols_cond	= PTHREAD_COND_INITIALIZER,
74 75
	.freq			= 1000, /* 1 KHz */
};
76

77
static bool			system_wide			=  false;
78

79 80
static bool			use_tui, use_stdio;

81
static int			default_interval		=      0;
82

83
static bool			kptr_restrict_warned;
84
static bool			inherit				=  false;
85
static int			realtime_prio			=      0;
86
static bool			group				=  false;
87
static unsigned int		page_size;
88
static unsigned int		mmap_pages			=    128;
89

90
static bool			dump_symtab                     =  false;
91

92
static struct winsize		winsize;
93

94
static const char		*sym_filter			=   NULL;
95
struct sym_entry		*sym_filter_entry_sched		=   NULL;
96
static int			sym_pcnt_filter			=      5;
97

98 99 100 101
/*
 * Source functions
 */

102
void get_term_dimensions(struct winsize *ws)
103
{
104 105 106 107 108 109 110 111 112 113
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
114
	}
115 116 117 118
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
119
#endif
120 121
	ws->ws_row = 25;
	ws->ws_col = 80;
122 123
}

124
static void update_print_entries(struct winsize *ws)
125
{
126
	top.print_entries = ws->ws_row;
127

128 129
	if (top.print_entries > 9)
		top.print_entries -= 9;
130 131 132 133
}

static void sig_winch_handler(int sig __used)
{
134 135
	get_term_dimensions(&winsize);
	update_print_entries(&winsize);
136 137
}

138
static int parse_source(struct sym_entry *syme)
139 140
{
	struct symbol *sym;
141
	struct annotation *notes;
142
	struct map *map;
143
	int err = -1;
144 145

	if (!syme)
146 147 148 149 150 151 152 153
		return -1;

	sym = sym_entry__symbol(syme);
	map = syme->map;

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
154
	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
155 156 157
		pr_err("Can't annotate %s: No vmlinux file was found in the "
		       "path\n", sym->name);
		sleep(1);
158
		return -1;
159 160
	}

161 162 163
	notes = symbol__annotation(sym);
	if (notes->src != NULL) {
		pthread_mutex_lock(&notes->lock);
164 165 166
		goto out_assign;
	}

167
	pthread_mutex_lock(&notes->lock);
168

169
	if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
170
		pthread_mutex_unlock(&notes->lock);
171 172
		pr_err("Not enough memory for annotating '%s' symbol!\n",
		       sym->name);
173
		sleep(1);
174
		return err;
175
	}
176

177
	err = symbol__annotate(sym, syme->map, 0);
178
	if (err == 0) {
179
out_assign:
180
		top.sym_filter_entry = syme;
181
	}
182

183
	pthread_mutex_unlock(&notes->lock);
184
	return err;
185 186 187 188
}

static void __zero_source_counters(struct sym_entry *syme)
{
189 190
	struct symbol *sym = sym_entry__symbol(syme);
	symbol__annotate_zero_histograms(sym);
191 192 193 194
}

static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
{
195 196 197
	struct annotation *notes;
	struct symbol *sym;

198
	if (syme != top.sym_filter_entry)
199 200
		return;

201 202 203 204
	sym = sym_entry__symbol(syme);
	notes = symbol__annotation(sym);

	if (pthread_mutex_trylock(&notes->lock))
205 206
		return;

207
	ip = syme->map->map_ip(syme->map, ip);
208
	symbol__inc_addr_samples(sym, syme->map, counter, ip);
209

210
	pthread_mutex_unlock(&notes->lock);
211 212 213 214
}

static void show_details(struct sym_entry *syme)
{
215
	struct annotation *notes;
216
	struct symbol *symbol;
217
	int more;
218 219 220 221

	if (!syme)
		return;

222
	symbol = sym_entry__symbol(syme);
223 224 225 226 227 228
	notes = symbol__annotation(symbol);

	pthread_mutex_lock(&notes->lock);

	if (notes->src == NULL)
		goto out_unlock;
229

230
	printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
231 232
	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);

233
	more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx,
234
				       0, sym_pcnt_filter, top.print_entries, 4);
235 236 237
	if (top.zero)
		symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
	else
238
		symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
239
	if (more != 0)
240
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
241 242
out_unlock:
	pthread_mutex_unlock(&notes->lock);
243
}
244 245 246

static const char		CONSOLE_CLEAR[] = "";

247
static void __list_insert_active_sym(struct sym_entry *syme)
248
{
249
	list_add(&syme->node, &top.active_symbols);
250
}
251

252
static void print_sym_table(struct perf_session *session)
253
{
254 255
	char bf[160];
	int printed = 0;
256
	struct rb_node *nd;
257 258
	struct sym_entry *syme;
	struct rb_root tmp = RB_ROOT;
259
	const int win_width = winsize.ws_col - 1;
260 261
	int sym_width, dso_width, dso_short_width;
	float sum_ksamples = perf_top__decay_samples(&top, &tmp);
262

263
	puts(CONSOLE_CLEAR);
264

265 266
	perf_top__header_snprintf(&top, bf, sizeof(bf));
	printf("%s\n", bf);
267

268
	perf_top__reset_sample_counters(&top);
269

270
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
271

272 273 274 275 276 277
	if (session->hists.stats.total_lost != 0) {
		color_fprintf(stdout, PERF_COLOR_RED, "WARNING:");
		printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n",
		       session->hists.stats.total_lost);
	}

278 279
	if (top.sym_filter_entry) {
		show_details(top.sym_filter_entry);
280 281 282
		return;
	}

283 284
	perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width,
			      &sym_width);
285

286 287 288 289 290
	if (sym_width + dso_width > winsize.ws_col - 29) {
		dso_width = dso_short_width;
		if (sym_width + dso_width > winsize.ws_col - 29)
			sym_width = winsize.ws_col - dso_width - 29;
	}
291
	putchar('\n');
292
	if (top.evlist->nr_entries == 1)
293
		printf("             samples  pcnt");
294
	else
295
		printf("   weight    samples  pcnt");
296

297 298
	if (verbose)
		printf("         RIP       ");
299
	printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
300
	printf("   %s    _______ _____",
301
	       top.evlist->nr_entries == 1 ? "      " : "______");
302
	if (verbose)
303
		printf(" ________________");
304
	printf(" %-*.*s", sym_width, sym_width, graph_line);
305
	printf(" %-*.*s", dso_width, dso_width, graph_line);
306
	puts("\n");
307

308
	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
309
		struct symbol *sym;
310
		double pcnt;
311

312
		syme = rb_entry(nd, struct sym_entry, rb_node);
313
		sym = sym_entry__symbol(syme);
314 315
		if (++printed > top.print_entries ||
		    (int)syme->snap_count < top.count_filter)
316
			continue;
317

318 319
		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
					 sum_ksamples));
320

321
		if (top.evlist->nr_entries == 1 || !top.display_weighted)
322
			printf("%20.2f ", syme->weight);
323
		else
324
			printf("%9.1f %10ld ", syme->weight, syme->snap_count);
325

326
		percent_color_fprintf(stdout, "%4.1f%%", pcnt);
327
		if (verbose)
328
			printf(" %016" PRIx64, sym->start);
329
		printf(" %-*.*s", sym_width, sym_width, sym->name);
330 331 332 333
		printf(" %-*.*s\n", dso_width, dso_width,
		       dso_width >= syme->map->dso->long_name_len ?
					syme->map->dso->long_name :
					syme->map->dso->short_name);
334 335 336
	}
}

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

static void prompt_symbol(struct sym_entry **target, const char *msg)
{
	char *buf = malloc(0), *p;
	struct sym_entry *syme = *target, *n, *found = NULL;
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
		__zero_source_counters(syme);
		*target = NULL;
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

392 393 394
	pthread_mutex_lock(&top.active_symbols_lock);
	syme = list_entry(top.active_symbols.next, struct sym_entry, node);
	pthread_mutex_unlock(&top.active_symbols_lock);
395

396
	list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) {
397
		struct symbol *sym = sym_entry__symbol(syme);
398 399 400 401 402 403 404 405

		if (!strcmp(buf, sym->name)) {
			found = syme;
			break;
		}
	}

	if (!found) {
406
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
407 408 409 410 411 412 413 414 415
		sleep(1);
		return;
	} else
		parse_source(found);

out_free:
	free(buf);
}

416
static void print_mapped_keys(void)
417
{
418 419
	char *name = NULL;

420 421
	if (top.sym_filter_entry) {
		struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
422 423 424 425
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
426 427
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top.delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top.print_entries);
428

429 430
	if (top.evlist->nr_entries > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top.sym_evsel));
431

432
	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top.count_filter);
433

434 435 436
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
437

438 439
	if (top.evlist->nr_entries > 1)
		fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0);
440

441
	fprintf(stdout,
442
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
443
		top.hide_kernel_symbols ? "yes" : "no");
444 445
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
446 447
		top.hide_user_symbols ? "yes" : "no");
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top.zero ? 1 : 0);
448 449 450 451 452 453 454 455 456 457 458 459
	fprintf(stdout, "\t[qQ]    quit.\n");
}

static int key_mapped(int c)
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
460 461
		case 'K':
		case 'U':
462 463 464
		case 'F':
		case 's':
		case 'S':
465 466 467
			return 1;
		case 'E':
		case 'w':
468
			return top.evlist->nr_entries > 1 ? 1 : 0;
469 470
		default:
			break;
471 472 473
	}

	return 0;
474 475
}

476
static void handle_keypress(struct perf_session *session, int c)
477
{
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
	if (!key_mapped(c)) {
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

		print_mapped_keys();
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
		if (!key_mapped(c))
			return;
	}

501 502
	switch (c) {
		case 'd':
503 504 505
			prompt_integer(&top.delay_secs, "Enter display delay");
			if (top.delay_secs < 1)
				top.delay_secs = 1;
506 507
			break;
		case 'e':
508 509
			prompt_integer(&top.print_entries, "Enter display entries (lines)");
			if (top.print_entries == 0) {
510
				sig_winch_handler(SIGWINCH);
511 512 513
				signal(SIGWINCH, sig_winch_handler);
			} else
				signal(SIGWINCH, SIG_DFL);
514 515
			break;
		case 'E':
516
			if (top.evlist->nr_entries > 1) {
517 518 519
				/* Select 0 as the default event: */
				int counter = 0;

520
				fprintf(stderr, "\nAvailable events:");
521

522 523
				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
					fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
524

525
				prompt_integer(&counter, "Enter details event counter");
526

527
				if (counter >= top.evlist->nr_entries) {
528 529
					top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
530
					sleep(1);
531
					break;
532
				}
533
				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
534
					if (top.sym_evsel->idx == counter)
535
						break;
536 537
			} else
				top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
538 539
			break;
		case 'f':
540
			prompt_integer(&top.count_filter, "Enter display event count filter");
541 542 543 544
			break;
		case 'F':
			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
			break;
545
		case 'K':
546
			top.hide_kernel_symbols = !top.hide_kernel_symbols;
547
			break;
548 549 550
		case 'q':
		case 'Q':
			printf("exiting.\n");
551
			if (dump_symtab)
552
				perf_session__fprintf_dsos(session, stderr);
553 554
			exit(0);
		case 's':
555
			prompt_symbol(&top.sym_filter_entry, "Enter details symbol");
556 557
			break;
		case 'S':
558
			if (!top.sym_filter_entry)
559 560
				break;
			else {
561
				struct sym_entry *syme = top.sym_filter_entry;
562

563
				top.sym_filter_entry = NULL;
564 565 566
				__zero_source_counters(syme);
			}
			break;
567
		case 'U':
568
			top.hide_user_symbols = !top.hide_user_symbols;
569
			break;
570
		case 'w':
571
			top.display_weighted = ~top.display_weighted;
572
			break;
573
		case 'z':
574
			top.zero = !top.zero;
575
			break;
576 577
		default:
			break;
578 579 580
	}
}

581 582
static void *display_thread_tui(void *arg __used)
{
583 584 585 586 587 588 589 590 591 592 593
	int err = 0;
	pthread_mutex_lock(&top.active_symbols_lock);
	while (list_empty(&top.active_symbols)) {
		err = pthread_cond_wait(&top.active_symbols_cond,
					&top.active_symbols_lock);
		if (err)
			break;
	}
	pthread_mutex_unlock(&top.active_symbols_lock);
	if (!err)
		perf_top__tui_browser(&top);
594 595 596 597 598
	exit_browser(0);
	exit(0);
	return NULL;
}

599
static void *display_thread(void *arg __used)
600
{
601
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
602 603
	struct termios tc, save;
	int delay_msecs, c;
604
	struct perf_session *session = (struct perf_session *) arg;
605 606 607 608 609 610

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
611

612
repeat:
613
	delay_msecs = top.delay_secs * 1000;
614 615 616
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
617

618
	do {
619
		print_sym_table(session);
620 621
	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);

622 623 624
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

625
	handle_keypress(session, c);
626
	goto repeat;
627 628 629 630

	return NULL;
}

631
/* Tag samples to be skipped. */
632
static const char *skip_symbols[] = {
633
	"default_idle",
634
	"native_safe_halt",
635 636 637 638
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
639
	"mwait_idle_with_hints",
640
	"poll_idle",
641 642
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
643 644 645
	NULL
};

646
static int symbol_filter(struct map *map, struct symbol *sym)
647
{
648 649
	struct sym_entry *syme;
	const char *name = sym->name;
650
	int i;
651

652 653 654 655 656 657 658
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

659 660 661 662 663 664 665
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
666 667
		return 1;

668
	syme = symbol__priv(sym);
669
	syme->map = map;
670
	symbol__annotate_init(map, sym);
671

672
	if (!top.sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
673 674 675 676
		/* schedule initial sym_filter_entry setup */
		sym_filter_entry_sched = syme;
		sym_filter = NULL;
	}
677

678 679
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
680
			sym->ignore = true;
681 682 683
			break;
		}
	}
684 685 686 687

	return 0;
}

688 689 690
static void perf_event__process_sample(const union perf_event *event,
				       struct perf_sample *sample,
				       struct perf_session *session)
691
{
692
	u64 ip = event->ip.ip;
693
	struct sym_entry *syme;
694
	struct addr_location al;
695
	struct machine *machine;
696
	u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
697

698
	++top.samples;
699

700
	switch (origin) {
701
	case PERF_RECORD_MISC_USER:
702 703
		++top.us_samples;
		if (top.hide_user_symbols)
704
			return;
705
		machine = perf_session__find_host_machine(session);
706
		break;
707
	case PERF_RECORD_MISC_KERNEL:
708 709
		++top.kernel_samples;
		if (top.hide_kernel_symbols)
710
			return;
711
		machine = perf_session__find_host_machine(session);
712 713
		break;
	case PERF_RECORD_MISC_GUEST_KERNEL:
714
		++top.guest_kernel_samples;
715
		machine = perf_session__find_machine(session, event->ip.pid);
716
		break;
717
	case PERF_RECORD_MISC_GUEST_USER:
718
		++top.guest_us_samples;
719 720 721 722 723
		/*
		 * TODO: we don't process guest user from host side
		 * except simple counting.
		 */
		return;
724 725 726 727
	default:
		return;
	}

728
	if (!machine && perf_guest) {
729
		pr_err("Can't find guest [%d]'s kernel information\n",
730
			event->ip.pid);
731 732 733
		return;
	}

734
	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
735
		top.exact_samples++;
736

737 738
	if (perf_event__preprocess_sample(event, session, &al, sample,
					  symbol_filter) < 0 ||
739
	    al.filtered)
740
		return;
741

742 743 744 745 746 747 748 749 750 751 752 753 754 755
	if (!kptr_restrict_warned &&
	    symbol_conf.kptr_restrict &&
	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
		ui__warning(
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
			  " modules" : "");
		if (use_browser <= 0)
			sleep(5);
		kptr_restrict_warned = true;
	}

756 757 758 759 760 761 762 763 764 765 766 767
	if (al.sym == NULL) {
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
768
		if (al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
769
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
770 771 772
			ui__warning("The %s file can't be used\n",
				    symbol_conf.vmlinux_name);
			exit_browser(0);
773 774 775 776 777 778
			exit(1);
		}

		return;
	}

779 780
	/* let's see, whether we need to install initial sym_filter_entry */
	if (sym_filter_entry_sched) {
781
		top.sym_filter_entry = sym_filter_entry_sched;
782
		sym_filter_entry_sched = NULL;
783 784
		if (parse_source(top.sym_filter_entry) < 0) {
			struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
785 786

			pr_err("Can't annotate %s", sym->name);
787
			if (top.sym_filter_entry->map->dso->symtab_type == SYMTAB__KALLSYMS) {
788
				pr_err(": No vmlinux file was found in the path:\n");
789
				machine__fprintf_vmlinux_path(machine, stderr);
790 791 792 793
			} else
				pr_err(".\n");
			exit(1);
		}
794 795
	}

796
	syme = symbol__priv(al.sym);
797
	if (!al.sym->ignore) {
798 799
		struct perf_evsel *evsel;

800
		evsel = perf_evlist__id2evsel(top.evlist, sample->id);
801 802
		assert(evsel != NULL);
		syme->count[evsel->idx]++;
803
		record_precise_ip(syme, evsel->idx, ip);
804
		pthread_mutex_lock(&top.active_symbols_lock);
805 806
		if (list_empty(&syme->node) || !syme->node.next) {
			static bool first = true;
807
			__list_insert_active_sym(syme);
808 809 810 811 812
			if (first) {
				pthread_cond_broadcast(&top.active_symbols_cond);
				first = false;
			}
		}
813
		pthread_mutex_unlock(&top.active_symbols_lock);
814
	}
815 816
}

817
static void perf_session__mmap_read_idx(struct perf_session *self, int idx)
818
{
819
	struct perf_sample sample;
820
	union perf_event *event;
821
	int ret;
822

823
	while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) {
824 825 826 827 828
		ret = perf_session__parse_sample(self, event, &sample);
		if (ret) {
			pr_err("Can't parse sample, err = %d\n", ret);
			continue;
		}
829

830
		if (event->header.type == PERF_RECORD_SAMPLE)
831
			perf_event__process_sample(event, &sample, self);
832
		else
833
			perf_event__process(event, &sample, self);
834 835 836
	}
}

837
static void perf_session__mmap_read(struct perf_session *self)
838
{
839 840
	int i;

841 842
	for (i = 0; i < top.evlist->nr_mmaps; i++)
		perf_session__mmap_read_idx(self, i);
843 844
}

845 846 847
static void start_counters(struct perf_evlist *evlist)
{
	struct perf_evsel *counter;
848

849 850
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
851

852 853
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

854
		if (top.freq) {
855 856
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
857
			attr->sample_freq = top.freq;
858
		}
859

860 861 862 863 864
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

865
		attr->mmap = 1;
866
		attr->inherit = inherit;
867
try_again:
868
		if (perf_evsel__open(counter, top.evlist->cpus,
869
				     top.evlist->threads, group) < 0) {
870 871
			int err = errno;

872 873 874 875
			if (err == EPERM || err == EACCES) {
				ui__warning_paranoid();
				goto out_err;
			}
876 877 878 879 880
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
881 882
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
883
				if (verbose)
884 885
					ui__warning("Cycles event not supported,\n"
						    "trying to fall back to cpu-clock-ticks\n");
886 887 888 889 890

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
891

892 893 894 895 896 897
			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(counter));
				goto out_err;
			}

898 899 900 901 902 903
			ui__warning("The sys_perf_event_open() syscall "
				    "returned with %d (%s).  /bin/dmesg "
				    "may provide additional information.\n"
				    "No CONFIG_PERF_EVENTS=y kernel support "
				    "configured?\n", err, strerror(err));
			goto out_err;
904
		}
905
	}
906

907 908 909 910 911 912 913 914 915 916 917
	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) {
		ui__warning("Failed to mmap with %d (%s)\n",
			    errno, strerror(errno));
		goto out_err;
	}

	return;

out_err:
	exit_browser(0);
	exit(0);
918 919 920 921 922
}

static int __cmd_top(void)
{
	pthread_t thread;
K
Kyle McMartin 已提交
923
	int ret __used;
924
	/*
925 926
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
927
	 */
928
	struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
929 930
	if (session == NULL)
		return -ENOMEM;
931

932
	if (top.target_tid != -1)
933 934
		perf_event__synthesize_thread_map(top.evlist->threads,
						  perf_event__process, session);
935
	else
936
		perf_event__synthesize_threads(perf_event__process, session);
937

938
	start_counters(top.evlist);
939 940
	session->evlist = top.evlist;
	perf_session__update_sample_type(session);
941

942
	/* Wait for a minimal set of events before starting the snapshot */
943
	poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
944

945
	perf_session__mmap_read(session);
946

947 948
	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
							     display_thread), session)) {
949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
		printf("Could not create display thread.\n");
		exit(-1);
	}

	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
964
		u64 hits = top.samples;
965

966
		perf_session__mmap_read(session);
967

968 969
		if (hits == top.samples)
			ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
970 971 972 973
	}

	return 0;
}
974 975 976 977 978 979 980

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

static const struct option options[] = {
981
	OPT_CALLBACK('e', "event", &top.evlist, "event",
982 983
		     "event selector. use 'perf list' to list available events",
		     parse_events),
984 985
	OPT_INTEGER('c', "count", &default_interval,
		    "event period to sample"),
986
	OPT_INTEGER('p', "pid", &top.target_pid,
987
		    "profile events on existing process id"),
988
	OPT_INTEGER('t', "tid", &top.target_tid,
989
		    "profile events on existing thread id"),
990 991
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
992
	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
993
		    "list of cpus to monitor"),
994 995
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
996
	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
997
		    "hide kernel symbols"),
998
	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
999 1000
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
1001
	OPT_INTEGER('d', "delay", &top.delay_secs,
1002 1003 1004
		    "number of seconds to delay between refreshes"),
	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
			    "dump the symbol table used for profiling"),
1005
	OPT_INTEGER('f', "count-filter", &top.count_filter,
1006 1007 1008
		    "only display functions with more events than this"),
	OPT_BOOLEAN('g', "group", &group,
			    "put the counters into a counter group"),
1009 1010
	OPT_BOOLEAN('i', "inherit", &inherit,
		    "child tasks inherit counters"),
1011
	OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
1012
		    "symbol to annotate"),
1013
	OPT_BOOLEAN('z', "zero", &top.zero,
1014
		    "zero history across updates"),
1015
	OPT_INTEGER('F', "freq", &top.freq,
1016
		    "profile at this frequency"),
1017
	OPT_INTEGER('E', "entries", &top.print_entries,
1018
		    "display this many functions"),
1019
	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1020
		    "hide user symbols"),
1021 1022
	OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
1023
	OPT_INCR('v', "verbose", &verbose,
1024
		    "be more verbose (show counter open errors, etc)"),
1025 1026 1027
	OPT_END()
};

1028
int cmd_top(int argc, const char **argv, const char *prefix __used)
1029
{
1030 1031
	struct perf_evsel *pos;
	int status = -ENOMEM;
1032

1033 1034
	top.evlist = perf_evlist__new(NULL, NULL);
	if (top.evlist == NULL)
1035 1036
		return -ENOMEM;

1037 1038 1039 1040 1041 1042
	page_size = sysconf(_SC_PAGE_SIZE);

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
	/*
 	 * XXX For now start disabled, only using TUI if explicitely asked for.
 	 * Change that when handle_keys equivalent gets written, live annotation
 	 * done, etc.
 	 */
	use_browser = 0;

	if (use_stdio)
		use_browser = 0;
	else if (use_tui)
		use_browser = 1;

	setup_browser(false);

1057
	/* CPU and PID are mutually exclusive */
1058
	if (top.target_tid > 0 && top.cpu_list) {
1059 1060
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
1061
		top.cpu_list = NULL;
1062 1063
	}

1064 1065
	if (top.target_pid != -1)
		top.target_tid = top.target_pid;
1066

1067 1068
	if (perf_evlist__create_maps(top.evlist, top.target_pid,
				     top.target_tid, top.cpu_list) < 0)
1069 1070
		usage_with_options(top_usage, options);

1071 1072
	if (!top.evlist->nr_entries &&
	    perf_evlist__add_default(top.evlist) < 0) {
1073 1074 1075
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1076

1077 1078
	if (top.delay_secs < 1)
		top.delay_secs = 1;
1079

1080 1081 1082 1083
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
1084 1085 1086
		top.freq = 0;
	else if (top.freq) {
		default_interval = top.freq;
1087 1088 1089 1090 1091
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1092 1093 1094
	list_for_each_entry(pos, &top.evlist->entries, node) {
		if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
					 top.evlist->threads->nr) < 0)
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
			goto out_free_fd;
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
		if (pos->attr.sample_period)
			continue;

		pos->attr.sample_period = default_interval;
	}

1105 1106
	if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
	    perf_evlist__alloc_mmap(top.evlist) < 0)
1107 1108
		goto out_free_fd;

1109
	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1110

1111
	symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) +
1112
				 (top.evlist->nr_entries + 1) * sizeof(unsigned long));
1113 1114 1115 1116 1117

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1118
	get_term_dimensions(&winsize);
1119
	if (top.print_entries == 0) {
1120
		update_print_entries(&winsize);
1121 1122 1123
		signal(SIGWINCH, sig_winch_handler);
	}

1124 1125
	status = __cmd_top();
out_free_fd:
1126
	perf_evlist__delete(top.evlist);
1127 1128

	return status;
1129
}