builtin-top.c 31.9 KB
Newer Older
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
18
 */
19
#include "builtin.h"
20

21
#include "perf.h"
22

23
#include "util/color.h"
24 25
#include "util/session.h"
#include "util/symbol.h"
26
#include "util/thread.h"
27
#include "util/util.h"
28
#include <linux/rbtree.h>
29 30
#include "util/parse-options.h"
#include "util/parse-events.h"
31
#include "util/cpumap.h"
32

33 34
#include "util/debug.h"

35 36
#include <assert.h>
#include <fcntl.h>
37

38
#include <stdio.h>
39 40
#include <termios.h>
#include <unistd.h>
41

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
#include <errno.h>
#include <time.h>
#include <sched.h>
#include <pthread.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

58
static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
59

60
static int			system_wide			=      0;
61

62
static int			default_interval		=      0;
63

64
static int			count_filter			=      5;
65
static int			print_entries;
66

67 68 69 70 71 72
static int			target_pid			=     -1;
static int			inherit				=      0;
static int			profile_cpu			=     -1;
static int			nr_cpus				=      0;
static unsigned int		realtime_prio			=      0;
static int			group				=      0;
73
static unsigned int		page_size;
74 75
static unsigned int		mmap_pages			=     16;
static int			freq				=   1000; /* 1 KHz */
76

77 78 79
static int			delay_secs			=      2;
static int			zero                            =      0;
static int			dump_symtab                     =      0;
80

81 82
static bool			hide_kernel_symbols		=  false;
static bool			hide_user_symbols		=  false;
83
static struct winsize		winsize;
84

85 86 87 88 89 90 91 92 93 94 95
/*
 * Source
 */

struct source_line {
	u64			eip;
	unsigned long		count[MAX_COUNTERS];
	char			*line;
	struct source_line	*next;
};

96 97
static char			*sym_filter			=   NULL;
struct sym_entry		*sym_filter_entry		=   NULL;
98
struct sym_entry		*sym_filter_entry_sched		=   NULL;
99 100 101
static int			sym_pcnt_filter			=      5;
static int			sym_counter			=      0;
static int			display_weighted		=     -1;
102

103 104 105 106
/*
 * Symbols
 */

107 108 109 110 111 112 113
struct sym_entry_source {
	struct source_line	*source;
	struct source_line	*lines;
	struct source_line	**lines_tail;
	pthread_mutex_t		lock;
};

114
struct sym_entry {
115 116
	struct rb_node		rb_node;
	struct list_head	node;
117 118
	unsigned long		snap_count;
	double			weight;
119
	int			skip;
120
	u16			name_len;
121
	u8			origin;
122
	struct map		*map;
123
	struct sym_entry_source	*src;
124
	unsigned long		count[0];
125 126
};

127 128 129 130
/*
 * Source functions
 */

131 132
static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
{
133
       return ((void *)self) + symbol_conf.priv_size;
134 135
}

136
void get_term_dimensions(struct winsize *ws)
137
{
138 139 140 141 142 143 144 145 146 147
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
148
	}
149 150 151 152
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
153
#endif
154 155
	ws->ws_row = 25;
	ws->ws_col = 80;
156 157
}

158
static void update_print_entries(struct winsize *ws)
159
{
160 161
	print_entries = ws->ws_row;

162 163 164 165 166 167
	if (print_entries > 9)
		print_entries -= 9;
}

static void sig_winch_handler(int sig __used)
{
168 169
	get_term_dimensions(&winsize);
	update_print_entries(&winsize);
170 171
}

172
static int parse_source(struct sym_entry *syme)
173 174
{
	struct symbol *sym;
175
	struct sym_entry_source *source;
176
	struct map *map;
177
	FILE *file;
178
	char command[PATH_MAX*2];
179 180
	const char *path;
	u64 len;
181 182

	if (!syme)
183 184 185 186 187 188 189 190 191 192
		return -1;

	sym = sym_entry__symbol(syme);
	map = syme->map;

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
	if (map->dso->origin == DSO__ORIG_KERNEL)
		return -1;
193

194
	if (syme->src == NULL) {
195
		syme->src = zalloc(sizeof(*source));
196
		if (syme->src == NULL)
197
			return -1;
198 199 200 201 202 203 204
		pthread_mutex_init(&syme->src->lock, NULL);
	}

	source = syme->src;

	if (source->lines) {
		pthread_mutex_lock(&source->lock);
205 206
		goto out_assign;
	}
207
	path = map->dso->long_name;
208 209 210

	len = sym->end - sym->start;

211
	sprintf(command,
212 213 214
		"objdump --start-address=%#0*Lx --stop-address=%#0*Lx -dS %s",
		BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
		BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
215 216 217

	file = popen(command, "r");
	if (!file)
218
		return -1;
219

220 221
	pthread_mutex_lock(&source->lock);
	source->lines_tail = &source->lines;
222 223 224
	while (!feof(file)) {
		struct source_line *src;
		size_t dummy = 0;
225
		char *c, *sep;
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240

		src = malloc(sizeof(struct source_line));
		assert(src != NULL);
		memset(src, 0, sizeof(struct source_line));

		if (getline(&src->line, &dummy, file) < 0)
			break;
		if (!src->line)
			break;

		c = strchr(src->line, '\n');
		if (c)
			*c = 0;

		src->next = NULL;
241 242
		*source->lines_tail = src;
		source->lines_tail = &src->next;
243

244 245 246 247 248
		src->eip = strtoull(src->line, &sep, 16);
		if (*sep == ':')
			src->eip = map__objdump_2ip(map, src->eip);
		else /* this line has no ip info (e.g. source line) */
			src->eip = 0;
249 250 251 252
	}
	pclose(file);
out_assign:
	sym_filter_entry = syme;
253
	pthread_mutex_unlock(&source->lock);
254
	return 0;
255 256 257 258 259 260 261
}

static void __zero_source_counters(struct sym_entry *syme)
{
	int i;
	struct source_line *line;

262
	line = syme->src->lines;
263 264 265 266 267 268 269 270 271 272 273 274 275 276
	while (line) {
		for (i = 0; i < nr_counters; i++)
			line->count[i] = 0;
		line = line->next;
	}
}

static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
{
	struct source_line *line;

	if (syme != sym_filter_entry)
		return;

277
	if (pthread_mutex_trylock(&syme->src->lock))
278 279
		return;

280
	if (syme->src == NULL || syme->src->source == NULL)
281 282
		goto out_unlock;

283
	for (line = syme->src->lines; line; line = line->next) {
284 285 286
		/* skip lines without IP info */
		if (line->eip == 0)
			continue;
287 288 289 290 291 292 293 294
		if (line->eip == ip) {
			line->count[counter]++;
			break;
		}
		if (line->eip > ip)
			break;
	}
out_unlock:
295
	pthread_mutex_unlock(&syme->src->lock);
296 297
}

298 299
#define PATTERN_LEN		(BITS_PER_LONG / 4 + 2)

300 301
static void lookup_sym_source(struct sym_entry *syme)
{
302
	struct symbol *symbol = sym_entry__symbol(syme);
303
	struct source_line *line;
304
	char pattern[PATTERN_LEN + 1];
305

306 307
	sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4,
		map__rip_2objdump(syme->map, symbol->start));
308

309 310
	pthread_mutex_lock(&syme->src->lock);
	for (line = syme->src->lines; line; line = line->next) {
311
		if (memcmp(line->line, pattern, PATTERN_LEN) == 0) {
312
			syme->src->source = line;
313 314 315
			break;
		}
	}
316
	pthread_mutex_unlock(&syme->src->lock);
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
}

static void show_lines(struct source_line *queue, int count, int total)
{
	int i;
	struct source_line *line;

	line = queue;
	for (i = 0; i < count; i++) {
		float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;

		printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
		line = line->next;
	}
}

#define TRACE_COUNT     3

static void show_details(struct sym_entry *syme)
{
	struct symbol *symbol;
	struct source_line *line;
	struct source_line *line_queue = NULL;
	int displayed = 0;
	int line_queue_count = 0, total = 0, more = 0;

	if (!syme)
		return;

346
	if (!syme->src->source)
347 348
		lookup_sym_source(syme);

349
	if (!syme->src->source)
350 351
		return;

352
	symbol = sym_entry__symbol(syme);
353 354 355
	printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);

356 357
	pthread_mutex_lock(&syme->src->lock);
	line = syme->src->source;
358 359 360 361 362
	while (line) {
		total += line->count[sym_counter];
		line = line->next;
	}

363
	line = syme->src->source;
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
	while (line) {
		float pcnt = 0.0;

		if (!line_queue_count)
			line_queue = line;
		line_queue_count++;

		if (line->count[sym_counter])
			pcnt = 100.0 * line->count[sym_counter] / (float)total;
		if (pcnt >= (float)sym_pcnt_filter) {
			if (displayed <= print_entries)
				show_lines(line_queue, line_queue_count, total);
			else more++;
			displayed += line_queue_count;
			line_queue_count = 0;
			line_queue = NULL;
		} else if (line_queue_count > TRACE_COUNT) {
			line_queue = line_queue->next;
			line_queue_count--;
		}

		line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
		line = line->next;
	}
388
	pthread_mutex_unlock(&syme->src->lock);
389 390 391
	if (more)
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
}
392

393
/*
394
 * Symbols will be added here in event__process_sample and will get out
395 396 397
 * after decayed.
 */
static LIST_HEAD(active_symbols);
398
static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
399 400 401 402 403 404

/*
 * Ordering weight: count-1 * count-2 * ... / count-n
 */
static double sym_weight(const struct sym_entry *sym)
{
405
	double weight = sym->snap_count;
406 407
	int counter;

408 409 410
	if (!display_weighted)
		return weight;

411 412 413 414 415 416 417 418
	for (counter = 1; counter < nr_counters-1; counter++)
		weight *= sym->count[counter];

	weight /= (sym->count[counter] + 1);

	return weight;
}

419 420
static long			samples;
static long			userspace_samples;
421
static long			exact_samples;
422 423
static const char		CONSOLE_CLEAR[] = "";

424
static void __list_insert_active_sym(struct sym_entry *syme)
425 426 427 428
{
	list_add(&syme->node, &active_symbols);
}

429 430 431 432 433 434 435
static void list_remove_active_sym(struct sym_entry *syme)
{
	pthread_mutex_lock(&active_symbols_lock);
	list_del_init(&syme->node);
	pthread_mutex_unlock(&active_symbols_lock);
}

436 437 438 439 440 441 442 443 444 445
static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
{
	struct rb_node **p = &tree->rb_node;
	struct rb_node *parent = NULL;
	struct sym_entry *iter;

	while (*p != NULL) {
		parent = *p;
		iter = rb_entry(parent, struct sym_entry, rb_node);

446
		if (se->weight > iter->weight)
447 448 449 450 451 452 453 454
			p = &(*p)->rb_left;
		else
			p = &(*p)->rb_right;
	}

	rb_link_node(&se->rb_node, parent, p);
	rb_insert_color(&se->rb_node, tree);
}
455 456 457

static void print_sym_table(void)
{
458
	int printed = 0, j;
459
	int counter, snap = !display_weighted ? sym_counter : 0;
460 461
	float samples_per_sec = samples/delay_secs;
	float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
462
	float esamples_percent = (100.0*exact_samples)/samples;
463
	float sum_ksamples = 0.0;
464 465 466
	struct sym_entry *syme, *n;
	struct rb_root tmp = RB_ROOT;
	struct rb_node *nd;
467
	int sym_width = 0, dso_width = 0, dso_short_width = 0;
468
	const int win_width = winsize.ws_col - 1;
469

470
	samples = userspace_samples = exact_samples = 0;
471

472
	/* Sort the active symbols */
473 474 475 476 477
	pthread_mutex_lock(&active_symbols_lock);
	syme = list_entry(active_symbols.next, struct sym_entry, node);
	pthread_mutex_unlock(&active_symbols_lock);

	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
478
		syme->snap_count = syme->count[snap];
479
		if (syme->snap_count != 0) {
480

481 482 483 484 485 486 487
			if ((hide_user_symbols &&
			     syme->origin == PERF_RECORD_MISC_USER) ||
			    (hide_kernel_symbols &&
			     syme->origin == PERF_RECORD_MISC_KERNEL)) {
				list_remove_active_sym(syme);
				continue;
			}
488
			syme->weight = sym_weight(syme);
489
			rb_insert_active_sym(&tmp, syme);
490
			sum_ksamples += syme->snap_count;
491 492

			for (j = 0; j < nr_counters; j++)
493 494
				syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
		} else
495
			list_remove_active_sym(syme);
496 497
	}

498
	puts(CONSOLE_CLEAR);
499

500
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
501
	printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%%  exact: %4.1f%% [",
502
		samples_per_sec,
503 504
		100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)),
		esamples_percent);
505

506
	if (nr_counters == 1 || !display_weighted) {
507
		printf("%Ld", (u64)attrs[0].sample_period);
I
Ingo Molnar 已提交
508 509 510 511 512
		if (freq)
			printf("Hz ");
		else
			printf(" ");
	}
513

514 515 516
	if (!display_weighted)
		printf("%s", event_name(sym_counter));
	else for (counter = 0; counter < nr_counters; counter++) {
517 518 519 520 521 522 523 524
		if (counter)
			printf("/");

		printf("%s", event_name(counter));
	}

	printf( "], ");

525 526
	if (target_pid != -1)
		printf(" (target_pid: %d", target_pid);
527 528 529 530 531 532
	else
		printf(" (all");

	if (profile_cpu != -1)
		printf(", cpu: %d)\n", profile_cpu);
	else {
533
		if (target_pid != -1)
534 535 536 537 538
			printf(")\n");
		else
			printf(", %d CPUs)\n", nr_cpus);
	}

539
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
540

541 542 543 544 545
	if (sym_filter_entry) {
		show_details(sym_filter_entry);
		return;
	}

546 547 548 549 550 551 552 553 554
	/*
	 * Find the longest symbol name that will be displayed
	 */
	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
		syme = rb_entry(nd, struct sym_entry, rb_node);
		if (++printed > print_entries ||
		    (int)syme->snap_count < count_filter)
			continue;

555 556 557
		if (syme->map->dso->long_name_len > dso_width)
			dso_width = syme->map->dso->long_name_len;

558 559 560
		if (syme->map->dso->short_name_len > dso_short_width)
			dso_short_width = syme->map->dso->short_name_len;

561 562 563 564 565 566
		if (syme->name_len > sym_width)
			sym_width = syme->name_len;
	}

	printed = 0;

567 568 569 570 571
	if (sym_width + dso_width > winsize.ws_col - 29) {
		dso_width = dso_short_width;
		if (sym_width + dso_width > winsize.ws_col - 29)
			sym_width = winsize.ws_col - dso_width - 29;
	}
572
	putchar('\n');
573
	if (nr_counters == 1)
574
		printf("             samples  pcnt");
575
	else
576
		printf("   weight    samples  pcnt");
577

578 579
	if (verbose)
		printf("         RIP       ");
580
	printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
581
	printf("   %s    _______ _____",
582 583
	       nr_counters == 1 ? "      " : "______");
	if (verbose)
584
		printf(" ________________");
585
	printf(" %-*.*s", sym_width, sym_width, graph_line);
586
	printf(" %-*.*s", dso_width, dso_width, graph_line);
587
	puts("\n");
588

589
	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
590
		struct symbol *sym;
591
		double pcnt;
592

593
		syme = rb_entry(nd, struct sym_entry, rb_node);
594
		sym = sym_entry__symbol(syme);
595

596
		if (++printed > print_entries || (int)syme->snap_count < count_filter)
597
			continue;
598

599 600
		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
					 sum_ksamples));
601

602
		if (nr_counters == 1 || !display_weighted)
603
			printf("%20.2f ", syme->weight);
604
		else
605
			printf("%9.1f %10ld ", syme->weight, syme->snap_count);
606

607
		percent_color_fprintf(stdout, "%4.1f%%", pcnt);
608
		if (verbose)
609
			printf(" %016llx", sym->start);
610
		printf(" %-*.*s", sym_width, sym_width, sym->name);
611 612 613 614
		printf(" %-*.*s\n", dso_width, dso_width,
		       dso_width >= syme->map->dso->long_name_len ?
					syme->map->dso->long_name :
					syme->map->dso->short_name);
615 616 617
	}
}

618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

static void prompt_symbol(struct sym_entry **target, const char *msg)
{
	char *buf = malloc(0), *p;
	struct sym_entry *syme = *target, *n, *found = NULL;
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
661
		pthread_mutex_lock(&syme->src->lock);
662 663
		__zero_source_counters(syme);
		*target = NULL;
664
		pthread_mutex_unlock(&syme->src->lock);
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	pthread_mutex_lock(&active_symbols_lock);
	syme = list_entry(active_symbols.next, struct sym_entry, node);
	pthread_mutex_unlock(&active_symbols_lock);

	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
680
		struct symbol *sym = sym_entry__symbol(syme);
681 682 683 684 685 686 687 688

		if (!strcmp(buf, sym->name)) {
			found = syme;
			break;
		}
	}

	if (!found) {
689
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
690 691 692 693 694 695 696 697 698
		sleep(1);
		return;
	} else
		parse_source(found);

out_free:
	free(buf);
}

699
static void print_mapped_keys(void)
700
{
701 702 703
	char *name = NULL;

	if (sym_filter_entry) {
704
		struct symbol *sym = sym_entry__symbol(sym_filter_entry);
705 706 707 708 709 710 711 712 713 714 715 716
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", print_entries);

	if (nr_counters > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(sym_counter));

	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", count_filter);

717 718 719
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
720 721 722 723

	if (nr_counters > 1)
		fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);

724
	fprintf(stdout,
725
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
726 727 728 729
		hide_kernel_symbols ? "yes" : "no");
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
		hide_user_symbols ? "yes" : "no");
730
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", zero ? 1 : 0);
731 732 733 734 735 736 737 738 739 740 741 742
	fprintf(stdout, "\t[qQ]    quit.\n");
}

static int key_mapped(int c)
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
743 744
		case 'K':
		case 'U':
745 746 747
		case 'F':
		case 's':
		case 'S':
748 749 750 751
			return 1;
		case 'E':
		case 'w':
			return nr_counters > 1 ? 1 : 0;
752 753
		default:
			break;
754 755 756
	}

	return 0;
757 758 759 760
}

static void handle_keypress(int c)
{
761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
	if (!key_mapped(c)) {
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

		print_mapped_keys();
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
		if (!key_mapped(c))
			return;
	}

784 785 786
	switch (c) {
		case 'd':
			prompt_integer(&delay_secs, "Enter display delay");
787 788
			if (delay_secs < 1)
				delay_secs = 1;
789 790 791
			break;
		case 'e':
			prompt_integer(&print_entries, "Enter display entries (lines)");
792
			if (print_entries == 0) {
793
				sig_winch_handler(SIGWINCH);
794 795 796
				signal(SIGWINCH, sig_winch_handler);
			} else
				signal(SIGWINCH, SIG_DFL);
797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
			break;
		case 'E':
			if (nr_counters > 1) {
				int i;

				fprintf(stderr, "\nAvailable events:");
				for (i = 0; i < nr_counters; i++)
					fprintf(stderr, "\n\t%d %s", i, event_name(i));

				prompt_integer(&sym_counter, "Enter details event counter");

				if (sym_counter >= nr_counters) {
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
					sym_counter = 0;
					sleep(1);
				}
			} else sym_counter = 0;
			break;
		case 'f':
			prompt_integer(&count_filter, "Enter display event count filter");
			break;
		case 'F':
			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
			break;
821 822 823
		case 'K':
			hide_kernel_symbols = !hide_kernel_symbols;
			break;
824 825 826
		case 'q':
		case 'Q':
			printf("exiting.\n");
827 828
			if (dump_symtab)
				dsos__fprintf(stderr);
829 830 831 832 833 834 835 836 837 838
			exit(0);
		case 's':
			prompt_symbol(&sym_filter_entry, "Enter details symbol");
			break;
		case 'S':
			if (!sym_filter_entry)
				break;
			else {
				struct sym_entry *syme = sym_filter_entry;

839
				pthread_mutex_lock(&syme->src->lock);
840 841
				sym_filter_entry = NULL;
				__zero_source_counters(syme);
842
				pthread_mutex_unlock(&syme->src->lock);
843 844
			}
			break;
845 846 847
		case 'U':
			hide_user_symbols = !hide_user_symbols;
			break;
848 849 850
		case 'w':
			display_weighted = ~display_weighted;
			break;
851 852 853
		case 'z':
			zero = ~zero;
			break;
854 855
		default:
			break;
856 857 858
	}
}

859
static void *display_thread(void *arg __used)
860
{
861
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
862 863 864 865 866 867 868 869
	struct termios tc, save;
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
870

871 872 873 874 875
repeat:
	delay_msecs = delay_secs * 1000;
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
876

877
	do {
878
		print_sym_table();
879 880
	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);

881 882 883 884 885
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

	handle_keypress(c);
	goto repeat;
886 887 888 889

	return NULL;
}

890
/* Tag samples to be skipped. */
891
static const char *skip_symbols[] = {
892 893 894 895 896
	"default_idle",
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
897
	"mwait_idle_with_hints",
898
	"poll_idle",
899 900
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
901 902 903
	NULL
};

904
static int symbol_filter(struct map *map, struct symbol *sym)
905
{
906 907
	struct sym_entry *syme;
	const char *name = sym->name;
908
	int i;
909

910 911 912 913 914 915 916
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

917 918 919 920 921 922 923
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
924 925
		return 1;

926
	syme = symbol__priv(sym);
927
	syme->map = map;
928
	syme->src = NULL;
929 930 931 932 933 934

	if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
		/* schedule initial sym_filter_entry setup */
		sym_filter_entry_sched = syme;
		sym_filter = NULL;
	}
935

936 937 938 939 940 941
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
			syme->skip = 1;
			break;
		}
	}
942

943 944 945
	if (!syme->skip)
		syme->name_len = strlen(sym->name);

946 947 948
	return 0;
}

949 950
static void event__process_sample(const event_t *self,
				 struct perf_session *session, int counter)
951
{
952 953
	u64 ip = self->ip.ip;
	struct sym_entry *syme;
954
	struct addr_location al;
955
	u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
956

957 958
	++samples;

959
	switch (origin) {
960
	case PERF_RECORD_MISC_USER:
961
		++userspace_samples;
962 963
		if (hide_user_symbols)
			return;
964
		break;
965
	case PERF_RECORD_MISC_KERNEL:
966 967
		if (hide_kernel_symbols)
			return;
968 969 970 971 972
		break;
	default:
		return;
	}

973 974 975
	if (self->header.misc & PERF_RECORD_MISC_EXACT)
		exact_samples++;

976
	if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 ||
977
	    al.filtered)
978
		return;
979

980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001
	if (al.sym == NULL) {
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
		if (al.map == session->vmlinux_maps[MAP__FUNCTION] &&
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
			pr_err("The %s file can't be used\n",
			       symbol_conf.vmlinux_name);
			exit(1);
		}

		return;
	}

1002 1003 1004 1005
	/* let's see, whether we need to install initial sym_filter_entry */
	if (sym_filter_entry_sched) {
		sym_filter_entry = sym_filter_entry_sched;
		sym_filter_entry_sched = NULL;
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
		if (parse_source(sym_filter_entry) < 0) {
			struct symbol *sym = sym_entry__symbol(sym_filter_entry);

			pr_err("Can't annotate %s", sym->name);
			if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) {
				pr_err(": No vmlinux file was found in the path:\n");
				vmlinux_path__fprintf(stderr);
			} else
				pr_err(".\n");
			exit(1);
		}
1017 1018
	}

1019
	syme = symbol__priv(al.sym);
1020 1021
	if (!syme->skip) {
		syme->count[counter]++;
1022
		syme->origin = origin;
1023 1024 1025 1026 1027 1028
		record_precise_ip(syme, counter, ip);
		pthread_mutex_lock(&active_symbols_lock);
		if (list_empty(&syme->node) || !syme->node.next)
			__list_insert_active_sym(syme);
		pthread_mutex_unlock(&active_symbols_lock);
	}
1029 1030
}

1031
static int event__process(event_t *event, struct perf_session *session)
1032 1033 1034
{
	switch (event->header.type) {
	case PERF_RECORD_COMM:
1035
		event__process_comm(event, session);
1036 1037
		break;
	case PERF_RECORD_MMAP:
1038
		event__process_mmap(event, session);
1039
		break;
1040 1041 1042 1043
	case PERF_RECORD_FORK:
	case PERF_RECORD_EXIT:
		event__process_task(event, session);
		break;
1044 1045
	default:
		break;
1046 1047
	}

1048
	return 0;
1049 1050 1051
}

struct mmap_data {
1052 1053
	int			counter;
	void			*base;
1054
	int			mask;
1055
	unsigned int		prev;
1056 1057 1058 1059
};

static unsigned int mmap_read_head(struct mmap_data *md)
{
1060
	struct perf_event_mmap_page *pc = md->base;
1061 1062 1063 1064 1065 1066 1067 1068
	int head;

	head = pc->data_head;
	rmb();

	return head;
}

1069 1070
static void perf_session__mmap_read_counter(struct perf_session *self,
					    struct mmap_data *md)
1071 1072 1073 1074 1075 1076 1077 1078
{
	unsigned int head = mmap_read_head(md);
	unsigned int old = md->prev;
	unsigned char *data = md->base + page_size;
	int diff;

	/*
	 * If we're further behind than half the buffer, there's a chance
1079
	 * the writer will bite our tail and mess up the samples under us.
1080 1081 1082 1083 1084 1085 1086
	 *
	 * If we somehow ended up ahead of the head, we got messed up.
	 *
	 * In either case, truncate and restart at head.
	 */
	diff = head - old;
	if (diff > md->mask / 2 || diff < 0) {
1087
		fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099

		/*
		 * head points to a known good entry, start there.
		 */
		old = head;
	}

	for (; old != head;) {
		event_t *event = (event_t *)&data[old & md->mask];

		event_t event_copy;

1100
		size_t size = event->header.size;
1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121

		/*
		 * Event straddles the mmap boundary -- header should always
		 * be inside due to u64 alignment of output.
		 */
		if ((old & md->mask) + size != ((old + size) & md->mask)) {
			unsigned int offset = old;
			unsigned int len = min(sizeof(*event), size), cpy;
			void *dst = &event_copy;

			do {
				cpy = min(md->mask + 1 - (offset & md->mask), len);
				memcpy(dst, &data[offset & md->mask], cpy);
				offset += cpy;
				dst += cpy;
				len -= cpy;
			} while (len);

			event = &event_copy;
		}

1122
		if (event->header.type == PERF_RECORD_SAMPLE)
1123
			event__process_sample(event, self, md->counter);
1124
		else
1125
			event__process(event, self);
1126 1127 1128 1129 1130 1131
		old += size;
	}

	md->prev = old;
}

M
Mike Galbraith 已提交
1132 1133 1134
static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];

1135
static void perf_session__mmap_read(struct perf_session *self)
1136 1137 1138 1139 1140
{
	int i, counter;

	for (i = 0; i < nr_cpus; i++) {
		for (counter = 0; counter < nr_counters; counter++)
1141
			perf_session__mmap_read_counter(self, &mmap_array[i][counter]);
1142 1143 1144
	}
}

1145 1146 1147 1148
int nr_poll;
int group_fd;

static void start_counter(int i, int counter)
1149
{
1150
	struct perf_event_attr *attr;
1151
	int cpu;
1152 1153 1154

	cpu = profile_cpu;
	if (target_pid == -1 && profile_cpu == -1)
1155
		cpu = cpumap[i];
1156 1157 1158 1159

	attr = attrs + counter;

	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
1160 1161 1162 1163 1164 1165 1166

	if (freq) {
		attr->sample_type	|= PERF_SAMPLE_PERIOD;
		attr->freq		= 1;
		attr->sample_freq	= freq;
	}

1167
	attr->inherit		= (cpu < 0) && inherit;
1168
	attr->mmap		= 1;
1169 1170

try_again:
1171
	fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0);
1172 1173 1174 1175

	if (fd[i][counter] < 0) {
		int err = errno;

P
Pekka Enberg 已提交
1176
		if (err == EPERM || err == EACCES)
1177
			die("No permission - are you root?\n");
1178 1179 1180 1181 1182 1183
		/*
		 * If it's cycles then fall back to hrtimer
		 * based cpu-clock-tick sw counter, which
		 * is always available even if no PMU support:
		 */
		if (attr->type == PERF_TYPE_HARDWARE
1184
			&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
1185

1186 1187 1188
			if (verbose)
				warning(" ... trying to fall back to cpu-clock-ticks\n");

1189
			attr->type = PERF_TYPE_SOFTWARE;
1190
			attr->config = PERF_COUNT_SW_CPU_CLOCK;
1191 1192
			goto try_again;
		}
1193 1194 1195
		printf("\n");
		error("perfcounter syscall returned with %d (%s)\n",
			fd[i][counter], strerror(err));
1196
		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
		exit(-1);
	}
	assert(fd[i][counter] >= 0);
	fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);

	/*
	 * First counter acts as the group leader:
	 */
	if (group && group_fd == -1)
		group_fd = fd[i][counter];

	event_array[nr_poll].fd = fd[i][counter];
	event_array[nr_poll].events = POLLIN;
	nr_poll++;

	mmap_array[i][counter].counter = counter;
	mmap_array[i][counter].prev = 0;
	mmap_array[i][counter].mask = mmap_pages*page_size - 1;
	mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
			PROT_READ, MAP_SHARED, fd[i][counter], 0);
	if (mmap_array[i][counter].base == MAP_FAILED)
		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
}

static int __cmd_top(void)
{
	pthread_t thread;
	int i, counter;
1225
	int ret;
1226
	/*
1227 1228
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
1229
	 */
1230
	struct perf_session *session = perf_session__new(NULL, O_WRONLY, false);
1231 1232
	if (session == NULL)
		return -ENOMEM;
1233

1234
	if (target_pid != -1)
1235
		event__synthesize_thread(target_pid, event__process, session);
1236
	else
1237
		event__synthesize_threads(event__process, session);
1238

1239 1240
	for (i = 0; i < nr_cpus; i++) {
		group_fd = -1;
1241 1242
		for (counter = 0; counter < nr_counters; counter++)
			start_counter(i, counter);
1243 1244
	}

1245 1246 1247
	/* Wait for a minimal set of events before starting the snapshot */
	poll(event_array, nr_poll, 100);

1248
	perf_session__mmap_read(session);
1249

1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265
	if (pthread_create(&thread, NULL, display_thread, NULL)) {
		printf("Could not create display thread.\n");
		exit(-1);
	}

	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
1266
		int hits = samples;
1267

1268
		perf_session__mmap_read(session);
1269

1270
		if (hits == samples)
1271 1272 1273 1274 1275
			ret = poll(event_array, nr_poll, 100);
	}

	return 0;
}
1276 1277 1278 1279 1280 1281 1282 1283

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

static const struct option options[] = {
	OPT_CALLBACK('e', "event", NULL, "event",
1284 1285
		     "event selector. use 'perf list' to list available events",
		     parse_events),
1286 1287 1288 1289 1290 1291 1292 1293
	OPT_INTEGER('c', "count", &default_interval,
		    "event period to sample"),
	OPT_INTEGER('p', "pid", &target_pid,
		    "profile events on existing pid"),
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
	OPT_INTEGER('C', "CPU", &profile_cpu,
		    "CPU to profile on"),
1294 1295
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1296 1297
	OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
		    "hide kernel symbols"),
1298 1299 1300 1301
	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
		    "number of mmap data pages"),
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
M
Mike Galbraith 已提交
1302
	OPT_INTEGER('d', "delay", &delay_secs,
1303 1304 1305
		    "number of seconds to delay between refreshes"),
	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
			    "dump the symbol table used for profiling"),
1306
	OPT_INTEGER('f', "count-filter", &count_filter,
1307 1308 1309
		    "only display functions with more events than this"),
	OPT_BOOLEAN('g', "group", &group,
			    "put the counters into a counter group"),
1310 1311
	OPT_BOOLEAN('i', "inherit", &inherit,
		    "child tasks inherit counters"),
1312
	OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
1313
		    "symbol to annotate"),
A
Anton Blanchard 已提交
1314
	OPT_BOOLEAN('z', "zero", &zero,
1315
		    "zero history across updates"),
1316
	OPT_INTEGER('F', "freq", &freq,
1317
		    "profile at this frequency"),
1318 1319
	OPT_INTEGER('E', "entries", &print_entries,
		    "display this many functions"),
1320 1321
	OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols,
		    "hide user symbols"),
1322 1323
	OPT_BOOLEAN('v', "verbose", &verbose,
		    "be more verbose (show counter open errors, etc)"),
1324 1325 1326
	OPT_END()
};

1327
int cmd_top(int argc, const char **argv, const char *prefix __used)
1328
{
1329
	int counter;
1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343

	page_size = sysconf(_SC_PAGE_SIZE);

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

	/* CPU and PID are mutually exclusive */
	if (target_pid != -1 && profile_cpu != -1) {
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
		profile_cpu = -1;
	}

1344
	if (!nr_counters)
1345 1346
		nr_counters = 1;

1347 1348
	symbol_conf.priv_size = (sizeof(struct sym_entry) +
				 (nr_counters + 1) * sizeof(unsigned long));
1349 1350

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1351
	if (symbol__init() < 0)
1352
		return -1;
1353

1354 1355 1356
	if (delay_secs < 1)
		delay_secs = 1;

1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
		freq = 0;
	else if (freq) {
		default_interval = freq;
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1369 1370 1371
	/*
	 * Fill in the ones not specifically initialized via -c:
	 */
1372
	for (counter = 0; counter < nr_counters; counter++) {
1373
		if (attrs[counter].sample_period)
1374 1375
			continue;

1376
		attrs[counter].sample_period = default_interval;
1377 1378 1379 1380
	}

	if (target_pid != -1 || profile_cpu != -1)
		nr_cpus = 1;
1381 1382
	else
		nr_cpus = read_cpu_map();
1383

1384
	get_term_dimensions(&winsize);
1385
	if (print_entries == 0) {
1386
		update_print_entries(&winsize);
1387 1388 1389
		signal(SIGWINCH, sig_winch_handler);
	}

1390 1391
	return __cmd_top();
}