builtin-top.c 33.3 KB
Newer Older
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
18
 */
19
#include "builtin.h"
20

21
#include "perf.h"
22

23
#include "util/color.h"
24 25
#include "util/session.h"
#include "util/symbol.h"
26
#include "util/thread.h"
27
#include "util/util.h"
28
#include <linux/rbtree.h>
29 30
#include "util/parse-options.h"
#include "util/parse-events.h"
31
#include "util/cpumap.h"
32

33 34
#include "util/debug.h"

35 36
#include <assert.h>
#include <fcntl.h>
37

38
#include <stdio.h>
39 40
#include <termios.h>
#include <unistd.h>
41

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
#include <errno.h>
#include <time.h>
#include <sched.h>
#include <pthread.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

58
static int			*fd[MAX_NR_CPUS][MAX_COUNTERS];
59

60
static int			system_wide			=      0;
61

62
static int			default_interval		=      0;
63

64
static int			count_filter			=      5;
65
static int			print_entries;
66

67
static int			target_pid			=     -1;
68 69 70
static int			target_tid			=     -1;
static pid_t			*all_tids			=      NULL;
static int			thread_num			=      0;
71 72 73 74 75
static int			inherit				=      0;
static int			profile_cpu			=     -1;
static int			nr_cpus				=      0;
static unsigned int		realtime_prio			=      0;
static int			group				=      0;
76
static unsigned int		page_size;
77 78
static unsigned int		mmap_pages			=     16;
static int			freq				=   1000; /* 1 KHz */
79

80 81 82
static int			delay_secs			=      2;
static int			zero                            =      0;
static int			dump_symtab                     =      0;
83

84 85
static bool			hide_kernel_symbols		=  false;
static bool			hide_user_symbols		=  false;
86
static struct winsize		winsize;
87

88 89 90 91 92 93 94 95 96 97 98
/*
 * Source
 */

struct source_line {
	u64			eip;
	unsigned long		count[MAX_COUNTERS];
	char			*line;
	struct source_line	*next;
};

99 100
static char			*sym_filter			=   NULL;
struct sym_entry		*sym_filter_entry		=   NULL;
101
struct sym_entry		*sym_filter_entry_sched		=   NULL;
102 103 104
static int			sym_pcnt_filter			=      5;
static int			sym_counter			=      0;
static int			display_weighted		=     -1;
105

106 107 108 109
/*
 * Symbols
 */

110 111 112 113 114 115 116
struct sym_entry_source {
	struct source_line	*source;
	struct source_line	*lines;
	struct source_line	**lines_tail;
	pthread_mutex_t		lock;
};

117
struct sym_entry {
118 119
	struct rb_node		rb_node;
	struct list_head	node;
120 121
	unsigned long		snap_count;
	double			weight;
122
	int			skip;
123
	u16			name_len;
124
	u8			origin;
125
	struct map		*map;
126
	struct sym_entry_source	*src;
127
	unsigned long		count[0];
128 129
};

130 131 132 133
/*
 * Source functions
 */

134 135
static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
{
136
       return ((void *)self) + symbol_conf.priv_size;
137 138
}

139
void get_term_dimensions(struct winsize *ws)
140
{
141 142 143 144 145 146 147 148 149 150
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
151
	}
152 153 154 155
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
156
#endif
157 158
	ws->ws_row = 25;
	ws->ws_col = 80;
159 160
}

161
static void update_print_entries(struct winsize *ws)
162
{
163 164
	print_entries = ws->ws_row;

165 166 167 168 169 170
	if (print_entries > 9)
		print_entries -= 9;
}

static void sig_winch_handler(int sig __used)
{
171 172
	get_term_dimensions(&winsize);
	update_print_entries(&winsize);
173 174
}

175
static int parse_source(struct sym_entry *syme)
176 177
{
	struct symbol *sym;
178
	struct sym_entry_source *source;
179
	struct map *map;
180
	FILE *file;
181
	char command[PATH_MAX*2];
182 183
	const char *path;
	u64 len;
184 185

	if (!syme)
186 187 188 189 190 191 192 193 194 195
		return -1;

	sym = sym_entry__symbol(syme);
	map = syme->map;

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
	if (map->dso->origin == DSO__ORIG_KERNEL)
		return -1;
196

197
	if (syme->src == NULL) {
198
		syme->src = zalloc(sizeof(*source));
199
		if (syme->src == NULL)
200
			return -1;
201 202 203 204 205 206 207
		pthread_mutex_init(&syme->src->lock, NULL);
	}

	source = syme->src;

	if (source->lines) {
		pthread_mutex_lock(&source->lock);
208 209
		goto out_assign;
	}
210
	path = map->dso->long_name;
211 212 213

	len = sym->end - sym->start;

214
	sprintf(command,
215 216 217
		"objdump --start-address=%#0*Lx --stop-address=%#0*Lx -dS %s",
		BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
		BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
218 219 220

	file = popen(command, "r");
	if (!file)
221
		return -1;
222

223 224
	pthread_mutex_lock(&source->lock);
	source->lines_tail = &source->lines;
225 226 227
	while (!feof(file)) {
		struct source_line *src;
		size_t dummy = 0;
228
		char *c, *sep;
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243

		src = malloc(sizeof(struct source_line));
		assert(src != NULL);
		memset(src, 0, sizeof(struct source_line));

		if (getline(&src->line, &dummy, file) < 0)
			break;
		if (!src->line)
			break;

		c = strchr(src->line, '\n');
		if (c)
			*c = 0;

		src->next = NULL;
244 245
		*source->lines_tail = src;
		source->lines_tail = &src->next;
246

247 248 249 250 251
		src->eip = strtoull(src->line, &sep, 16);
		if (*sep == ':')
			src->eip = map__objdump_2ip(map, src->eip);
		else /* this line has no ip info (e.g. source line) */
			src->eip = 0;
252 253 254 255
	}
	pclose(file);
out_assign:
	sym_filter_entry = syme;
256
	pthread_mutex_unlock(&source->lock);
257
	return 0;
258 259 260 261 262 263 264
}

static void __zero_source_counters(struct sym_entry *syme)
{
	int i;
	struct source_line *line;

265
	line = syme->src->lines;
266 267 268 269 270 271 272 273 274 275 276 277 278 279
	while (line) {
		for (i = 0; i < nr_counters; i++)
			line->count[i] = 0;
		line = line->next;
	}
}

static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
{
	struct source_line *line;

	if (syme != sym_filter_entry)
		return;

280
	if (pthread_mutex_trylock(&syme->src->lock))
281 282
		return;

283
	if (syme->src == NULL || syme->src->source == NULL)
284 285
		goto out_unlock;

286
	for (line = syme->src->lines; line; line = line->next) {
287 288 289
		/* skip lines without IP info */
		if (line->eip == 0)
			continue;
290 291 292 293 294 295 296 297
		if (line->eip == ip) {
			line->count[counter]++;
			break;
		}
		if (line->eip > ip)
			break;
	}
out_unlock:
298
	pthread_mutex_unlock(&syme->src->lock);
299 300
}

301 302
#define PATTERN_LEN		(BITS_PER_LONG / 4 + 2)

303 304
static void lookup_sym_source(struct sym_entry *syme)
{
305
	struct symbol *symbol = sym_entry__symbol(syme);
306
	struct source_line *line;
307
	char pattern[PATTERN_LEN + 1];
308

309 310
	sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4,
		map__rip_2objdump(syme->map, symbol->start));
311

312 313
	pthread_mutex_lock(&syme->src->lock);
	for (line = syme->src->lines; line; line = line->next) {
314
		if (memcmp(line->line, pattern, PATTERN_LEN) == 0) {
315
			syme->src->source = line;
316 317 318
			break;
		}
	}
319
	pthread_mutex_unlock(&syme->src->lock);
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
}

static void show_lines(struct source_line *queue, int count, int total)
{
	int i;
	struct source_line *line;

	line = queue;
	for (i = 0; i < count; i++) {
		float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;

		printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
		line = line->next;
	}
}

#define TRACE_COUNT     3

static void show_details(struct sym_entry *syme)
{
	struct symbol *symbol;
	struct source_line *line;
	struct source_line *line_queue = NULL;
	int displayed = 0;
	int line_queue_count = 0, total = 0, more = 0;

	if (!syme)
		return;

349
	if (!syme->src->source)
350 351
		lookup_sym_source(syme);

352
	if (!syme->src->source)
353 354
		return;

355
	symbol = sym_entry__symbol(syme);
356 357 358
	printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);

359 360
	pthread_mutex_lock(&syme->src->lock);
	line = syme->src->source;
361 362 363 364 365
	while (line) {
		total += line->count[sym_counter];
		line = line->next;
	}

366
	line = syme->src->source;
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
	while (line) {
		float pcnt = 0.0;

		if (!line_queue_count)
			line_queue = line;
		line_queue_count++;

		if (line->count[sym_counter])
			pcnt = 100.0 * line->count[sym_counter] / (float)total;
		if (pcnt >= (float)sym_pcnt_filter) {
			if (displayed <= print_entries)
				show_lines(line_queue, line_queue_count, total);
			else more++;
			displayed += line_queue_count;
			line_queue_count = 0;
			line_queue = NULL;
		} else if (line_queue_count > TRACE_COUNT) {
			line_queue = line_queue->next;
			line_queue_count--;
		}

		line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
		line = line->next;
	}
391
	pthread_mutex_unlock(&syme->src->lock);
392 393 394
	if (more)
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
}
395

396
/*
397
 * Symbols will be added here in event__process_sample and will get out
398 399 400
 * after decayed.
 */
static LIST_HEAD(active_symbols);
401
static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
402 403 404 405 406 407

/*
 * Ordering weight: count-1 * count-2 * ... / count-n
 */
static double sym_weight(const struct sym_entry *sym)
{
408
	double weight = sym->snap_count;
409 410
	int counter;

411 412 413
	if (!display_weighted)
		return weight;

414 415 416 417 418 419 420 421
	for (counter = 1; counter < nr_counters-1; counter++)
		weight *= sym->count[counter];

	weight /= (sym->count[counter] + 1);

	return weight;
}

422 423
static long			samples;
static long			userspace_samples;
424
static long			exact_samples;
425 426
static const char		CONSOLE_CLEAR[] = "";

427
static void __list_insert_active_sym(struct sym_entry *syme)
428 429 430 431
{
	list_add(&syme->node, &active_symbols);
}

432 433 434 435 436 437 438
static void list_remove_active_sym(struct sym_entry *syme)
{
	pthread_mutex_lock(&active_symbols_lock);
	list_del_init(&syme->node);
	pthread_mutex_unlock(&active_symbols_lock);
}

439 440 441 442 443 444 445 446 447 448
static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
{
	struct rb_node **p = &tree->rb_node;
	struct rb_node *parent = NULL;
	struct sym_entry *iter;

	while (*p != NULL) {
		parent = *p;
		iter = rb_entry(parent, struct sym_entry, rb_node);

449
		if (se->weight > iter->weight)
450 451 452 453 454 455 456 457
			p = &(*p)->rb_left;
		else
			p = &(*p)->rb_right;
	}

	rb_link_node(&se->rb_node, parent, p);
	rb_insert_color(&se->rb_node, tree);
}
458 459 460

static void print_sym_table(void)
{
461
	int printed = 0, j;
462
	int counter, snap = !display_weighted ? sym_counter : 0;
463 464
	float samples_per_sec = samples/delay_secs;
	float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
465
	float esamples_percent = (100.0*exact_samples)/samples;
466
	float sum_ksamples = 0.0;
467 468 469
	struct sym_entry *syme, *n;
	struct rb_root tmp = RB_ROOT;
	struct rb_node *nd;
470
	int sym_width = 0, dso_width = 0, dso_short_width = 0;
471
	const int win_width = winsize.ws_col - 1;
472

473
	samples = userspace_samples = exact_samples = 0;
474

475
	/* Sort the active symbols */
476 477 478 479 480
	pthread_mutex_lock(&active_symbols_lock);
	syme = list_entry(active_symbols.next, struct sym_entry, node);
	pthread_mutex_unlock(&active_symbols_lock);

	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
481
		syme->snap_count = syme->count[snap];
482
		if (syme->snap_count != 0) {
483

484 485 486 487 488 489 490
			if ((hide_user_symbols &&
			     syme->origin == PERF_RECORD_MISC_USER) ||
			    (hide_kernel_symbols &&
			     syme->origin == PERF_RECORD_MISC_KERNEL)) {
				list_remove_active_sym(syme);
				continue;
			}
491
			syme->weight = sym_weight(syme);
492
			rb_insert_active_sym(&tmp, syme);
493
			sum_ksamples += syme->snap_count;
494 495

			for (j = 0; j < nr_counters; j++)
496 497
				syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
		} else
498
			list_remove_active_sym(syme);
499 500
	}

501
	puts(CONSOLE_CLEAR);
502

503
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
504
	printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%%  exact: %4.1f%% [",
505
		samples_per_sec,
506 507
		100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)),
		esamples_percent);
508

509
	if (nr_counters == 1 || !display_weighted) {
510
		printf("%Ld", (u64)attrs[0].sample_period);
I
Ingo Molnar 已提交
511 512 513 514 515
		if (freq)
			printf("Hz ");
		else
			printf(" ");
	}
516

517 518 519
	if (!display_weighted)
		printf("%s", event_name(sym_counter));
	else for (counter = 0; counter < nr_counters; counter++) {
520 521 522 523 524 525 526 527
		if (counter)
			printf("/");

		printf("%s", event_name(counter));
	}

	printf( "], ");

528 529
	if (target_pid != -1)
		printf(" (target_pid: %d", target_pid);
530 531
	else if (target_tid != -1)
		printf(" (target_tid: %d", target_tid);
532 533 534 535 536 537
	else
		printf(" (all");

	if (profile_cpu != -1)
		printf(", cpu: %d)\n", profile_cpu);
	else {
538
		if (target_tid != -1)
539 540 541 542 543
			printf(")\n");
		else
			printf(", %d CPUs)\n", nr_cpus);
	}

544
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
545

546 547 548 549 550
	if (sym_filter_entry) {
		show_details(sym_filter_entry);
		return;
	}

551 552 553 554 555 556 557 558 559
	/*
	 * Find the longest symbol name that will be displayed
	 */
	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
		syme = rb_entry(nd, struct sym_entry, rb_node);
		if (++printed > print_entries ||
		    (int)syme->snap_count < count_filter)
			continue;

560 561 562
		if (syme->map->dso->long_name_len > dso_width)
			dso_width = syme->map->dso->long_name_len;

563 564 565
		if (syme->map->dso->short_name_len > dso_short_width)
			dso_short_width = syme->map->dso->short_name_len;

566 567 568 569 570 571
		if (syme->name_len > sym_width)
			sym_width = syme->name_len;
	}

	printed = 0;

572 573 574 575 576
	if (sym_width + dso_width > winsize.ws_col - 29) {
		dso_width = dso_short_width;
		if (sym_width + dso_width > winsize.ws_col - 29)
			sym_width = winsize.ws_col - dso_width - 29;
	}
577
	putchar('\n');
578
	if (nr_counters == 1)
579
		printf("             samples  pcnt");
580
	else
581
		printf("   weight    samples  pcnt");
582

583 584
	if (verbose)
		printf("         RIP       ");
585
	printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
586
	printf("   %s    _______ _____",
587 588
	       nr_counters == 1 ? "      " : "______");
	if (verbose)
589
		printf(" ________________");
590
	printf(" %-*.*s", sym_width, sym_width, graph_line);
591
	printf(" %-*.*s", dso_width, dso_width, graph_line);
592
	puts("\n");
593

594
	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
595
		struct symbol *sym;
596
		double pcnt;
597

598
		syme = rb_entry(nd, struct sym_entry, rb_node);
599
		sym = sym_entry__symbol(syme);
600

601
		if (++printed > print_entries || (int)syme->snap_count < count_filter)
602
			continue;
603

604 605
		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
					 sum_ksamples));
606

607
		if (nr_counters == 1 || !display_weighted)
608
			printf("%20.2f ", syme->weight);
609
		else
610
			printf("%9.1f %10ld ", syme->weight, syme->snap_count);
611

612
		percent_color_fprintf(stdout, "%4.1f%%", pcnt);
613
		if (verbose)
614
			printf(" %016llx", sym->start);
615
		printf(" %-*.*s", sym_width, sym_width, sym->name);
616 617 618 619
		printf(" %-*.*s\n", dso_width, dso_width,
		       dso_width >= syme->map->dso->long_name_len ?
					syme->map->dso->long_name :
					syme->map->dso->short_name);
620 621 622
	}
}

623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

static void prompt_symbol(struct sym_entry **target, const char *msg)
{
	char *buf = malloc(0), *p;
	struct sym_entry *syme = *target, *n, *found = NULL;
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
666
		pthread_mutex_lock(&syme->src->lock);
667 668
		__zero_source_counters(syme);
		*target = NULL;
669
		pthread_mutex_unlock(&syme->src->lock);
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	pthread_mutex_lock(&active_symbols_lock);
	syme = list_entry(active_symbols.next, struct sym_entry, node);
	pthread_mutex_unlock(&active_symbols_lock);

	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
685
		struct symbol *sym = sym_entry__symbol(syme);
686 687 688 689 690 691 692 693

		if (!strcmp(buf, sym->name)) {
			found = syme;
			break;
		}
	}

	if (!found) {
694
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
695 696 697 698 699 700 701 702 703
		sleep(1);
		return;
	} else
		parse_source(found);

out_free:
	free(buf);
}

704
static void print_mapped_keys(void)
705
{
706 707 708
	char *name = NULL;

	if (sym_filter_entry) {
709
		struct symbol *sym = sym_entry__symbol(sym_filter_entry);
710 711 712 713 714 715 716 717 718 719 720 721
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", print_entries);

	if (nr_counters > 1)
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(sym_counter));

	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", count_filter);

722 723 724
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
725 726 727 728

	if (nr_counters > 1)
		fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);

729
	fprintf(stdout,
730
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
731 732 733 734
		hide_kernel_symbols ? "yes" : "no");
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
		hide_user_symbols ? "yes" : "no");
735
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", zero ? 1 : 0);
736 737 738 739 740 741 742 743 744 745 746 747
	fprintf(stdout, "\t[qQ]    quit.\n");
}

static int key_mapped(int c)
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
748 749
		case 'K':
		case 'U':
750 751 752
		case 'F':
		case 's':
		case 'S':
753 754 755 756
			return 1;
		case 'E':
		case 'w':
			return nr_counters > 1 ? 1 : 0;
757 758
		default:
			break;
759 760 761
	}

	return 0;
762 763 764 765
}

static void handle_keypress(int c)
{
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788
	if (!key_mapped(c)) {
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

		print_mapped_keys();
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
		if (!key_mapped(c))
			return;
	}

789 790 791
	switch (c) {
		case 'd':
			prompt_integer(&delay_secs, "Enter display delay");
792 793
			if (delay_secs < 1)
				delay_secs = 1;
794 795 796
			break;
		case 'e':
			prompt_integer(&print_entries, "Enter display entries (lines)");
797
			if (print_entries == 0) {
798
				sig_winch_handler(SIGWINCH);
799 800 801
				signal(SIGWINCH, sig_winch_handler);
			} else
				signal(SIGWINCH, SIG_DFL);
802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
			break;
		case 'E':
			if (nr_counters > 1) {
				int i;

				fprintf(stderr, "\nAvailable events:");
				for (i = 0; i < nr_counters; i++)
					fprintf(stderr, "\n\t%d %s", i, event_name(i));

				prompt_integer(&sym_counter, "Enter details event counter");

				if (sym_counter >= nr_counters) {
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
					sym_counter = 0;
					sleep(1);
				}
			} else sym_counter = 0;
			break;
		case 'f':
			prompt_integer(&count_filter, "Enter display event count filter");
			break;
		case 'F':
			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
			break;
826 827 828
		case 'K':
			hide_kernel_symbols = !hide_kernel_symbols;
			break;
829 830 831
		case 'q':
		case 'Q':
			printf("exiting.\n");
832 833
			if (dump_symtab)
				dsos__fprintf(stderr);
834 835 836 837 838 839 840 841 842 843
			exit(0);
		case 's':
			prompt_symbol(&sym_filter_entry, "Enter details symbol");
			break;
		case 'S':
			if (!sym_filter_entry)
				break;
			else {
				struct sym_entry *syme = sym_filter_entry;

844
				pthread_mutex_lock(&syme->src->lock);
845 846
				sym_filter_entry = NULL;
				__zero_source_counters(syme);
847
				pthread_mutex_unlock(&syme->src->lock);
848 849
			}
			break;
850 851 852
		case 'U':
			hide_user_symbols = !hide_user_symbols;
			break;
853 854 855
		case 'w':
			display_weighted = ~display_weighted;
			break;
856 857 858
		case 'z':
			zero = ~zero;
			break;
859 860
		default:
			break;
861 862 863
	}
}

864
static void *display_thread(void *arg __used)
865
{
866
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
867 868 869 870 871 872 873 874
	struct termios tc, save;
	int delay_msecs, c;

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
875

876 877 878 879 880
repeat:
	delay_msecs = delay_secs * 1000;
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
881

882
	do {
883
		print_sym_table();
884 885
	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);

886 887 888 889 890
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

	handle_keypress(c);
	goto repeat;
891 892 893 894

	return NULL;
}

895
/* Tag samples to be skipped. */
896
static const char *skip_symbols[] = {
897 898 899 900 901
	"default_idle",
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
902
	"mwait_idle_with_hints",
903
	"poll_idle",
904 905
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
906 907 908
	NULL
};

909
static int symbol_filter(struct map *map, struct symbol *sym)
910
{
911 912
	struct sym_entry *syme;
	const char *name = sym->name;
913
	int i;
914

915 916 917 918 919 920 921
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

922 923 924 925 926 927 928
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
929 930
		return 1;

931
	syme = symbol__priv(sym);
932
	syme->map = map;
933
	syme->src = NULL;
934 935 936 937 938 939

	if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
		/* schedule initial sym_filter_entry setup */
		sym_filter_entry_sched = syme;
		sym_filter = NULL;
	}
940

941 942 943 944 945 946
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
			syme->skip = 1;
			break;
		}
	}
947

948 949 950
	if (!syme->skip)
		syme->name_len = strlen(sym->name);

951 952 953
	return 0;
}

954 955
static void event__process_sample(const event_t *self,
				 struct perf_session *session, int counter)
956
{
957 958
	u64 ip = self->ip.ip;
	struct sym_entry *syme;
959
	struct addr_location al;
960
	u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
961

962 963
	++samples;

964
	switch (origin) {
965
	case PERF_RECORD_MISC_USER:
966
		++userspace_samples;
967 968
		if (hide_user_symbols)
			return;
969
		break;
970
	case PERF_RECORD_MISC_KERNEL:
971 972
		if (hide_kernel_symbols)
			return;
973 974 975 976 977
		break;
	default:
		return;
	}

978 979 980
	if (self->header.misc & PERF_RECORD_MISC_EXACT)
		exact_samples++;

981
	if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 ||
982
	    al.filtered)
983
		return;
984

985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
	if (al.sym == NULL) {
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
		if (al.map == session->vmlinux_maps[MAP__FUNCTION] &&
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
			pr_err("The %s file can't be used\n",
			       symbol_conf.vmlinux_name);
			exit(1);
		}

		return;
	}

1007 1008 1009 1010
	/* let's see, whether we need to install initial sym_filter_entry */
	if (sym_filter_entry_sched) {
		sym_filter_entry = sym_filter_entry_sched;
		sym_filter_entry_sched = NULL;
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021
		if (parse_source(sym_filter_entry) < 0) {
			struct symbol *sym = sym_entry__symbol(sym_filter_entry);

			pr_err("Can't annotate %s", sym->name);
			if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) {
				pr_err(": No vmlinux file was found in the path:\n");
				vmlinux_path__fprintf(stderr);
			} else
				pr_err(".\n");
			exit(1);
		}
1022 1023
	}

1024
	syme = symbol__priv(al.sym);
1025 1026
	if (!syme->skip) {
		syme->count[counter]++;
1027
		syme->origin = origin;
1028 1029 1030 1031 1032 1033
		record_precise_ip(syme, counter, ip);
		pthread_mutex_lock(&active_symbols_lock);
		if (list_empty(&syme->node) || !syme->node.next)
			__list_insert_active_sym(syme);
		pthread_mutex_unlock(&active_symbols_lock);
	}
1034 1035
}

1036
static int event__process(event_t *event, struct perf_session *session)
1037 1038 1039
{
	switch (event->header.type) {
	case PERF_RECORD_COMM:
1040
		event__process_comm(event, session);
1041 1042
		break;
	case PERF_RECORD_MMAP:
1043
		event__process_mmap(event, session);
1044
		break;
1045 1046 1047 1048
	case PERF_RECORD_FORK:
	case PERF_RECORD_EXIT:
		event__process_task(event, session);
		break;
1049 1050
	default:
		break;
1051 1052
	}

1053
	return 0;
1054 1055 1056
}

struct mmap_data {
1057 1058
	int			counter;
	void			*base;
1059
	int			mask;
1060
	unsigned int		prev;
1061 1062 1063 1064
};

static unsigned int mmap_read_head(struct mmap_data *md)
{
1065
	struct perf_event_mmap_page *pc = md->base;
1066 1067 1068 1069 1070 1071 1072 1073
	int head;

	head = pc->data_head;
	rmb();

	return head;
}

1074 1075
static void perf_session__mmap_read_counter(struct perf_session *self,
					    struct mmap_data *md)
1076 1077 1078 1079 1080 1081 1082 1083
{
	unsigned int head = mmap_read_head(md);
	unsigned int old = md->prev;
	unsigned char *data = md->base + page_size;
	int diff;

	/*
	 * If we're further behind than half the buffer, there's a chance
1084
	 * the writer will bite our tail and mess up the samples under us.
1085 1086 1087 1088 1089 1090 1091
	 *
	 * If we somehow ended up ahead of the head, we got messed up.
	 *
	 * In either case, truncate and restart at head.
	 */
	diff = head - old;
	if (diff > md->mask / 2 || diff < 0) {
1092
		fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104

		/*
		 * head points to a known good entry, start there.
		 */
		old = head;
	}

	for (; old != head;) {
		event_t *event = (event_t *)&data[old & md->mask];

		event_t event_copy;

1105
		size_t size = event->header.size;
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126

		/*
		 * Event straddles the mmap boundary -- header should always
		 * be inside due to u64 alignment of output.
		 */
		if ((old & md->mask) + size != ((old + size) & md->mask)) {
			unsigned int offset = old;
			unsigned int len = min(sizeof(*event), size), cpy;
			void *dst = &event_copy;

			do {
				cpy = min(md->mask + 1 - (offset & md->mask), len);
				memcpy(dst, &data[offset & md->mask], cpy);
				offset += cpy;
				dst += cpy;
				len -= cpy;
			} while (len);

			event = &event_copy;
		}

1127
		if (event->header.type == PERF_RECORD_SAMPLE)
1128
			event__process_sample(event, self, md->counter);
1129
		else
1130
			event__process(event, self);
1131 1132 1133 1134 1135 1136
		old += size;
	}

	md->prev = old;
}

1137 1138
static struct pollfd *event_array;
static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
M
Mike Galbraith 已提交
1139

1140
static void perf_session__mmap_read(struct perf_session *self)
1141
{
1142
	int i, counter, thread_index;
1143 1144 1145

	for (i = 0; i < nr_cpus; i++) {
		for (counter = 0; counter < nr_counters; counter++)
1146 1147 1148 1149 1150 1151
			for (thread_index = 0;
				thread_index < thread_num;
				thread_index++) {
				perf_session__mmap_read_counter(self,
					&mmap_array[i][counter][thread_index]);
			}
1152 1153 1154
	}
}

1155 1156 1157 1158
int nr_poll;
int group_fd;

static void start_counter(int i, int counter)
1159
{
1160
	struct perf_event_attr *attr;
1161
	int cpu;
1162
	int thread_index;
1163 1164

	cpu = profile_cpu;
1165
	if (target_tid == -1 && profile_cpu == -1)
1166
		cpu = cpumap[i];
1167 1168 1169 1170

	attr = attrs + counter;

	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
1171 1172 1173 1174 1175 1176 1177

	if (freq) {
		attr->sample_type	|= PERF_SAMPLE_PERIOD;
		attr->freq		= 1;
		attr->sample_freq	= freq;
	}

1178
	attr->inherit		= (cpu < 0) && inherit;
1179
	attr->mmap		= 1;
1180

1181
	for (thread_index = 0; thread_index < thread_num; thread_index++) {
1182
try_again:
1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
		fd[i][counter][thread_index] = sys_perf_event_open(attr,
				all_tids[thread_index], cpu, group_fd, 0);

		if (fd[i][counter][thread_index] < 0) {
			int err = errno;

			if (err == EPERM || err == EACCES)
				die("No permission - are you root?\n");
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
			if (attr->type == PERF_TYPE_HARDWARE
					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {

				if (verbose)
					warning(" ... trying to fall back to cpu-clock-ticks\n");

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
			printf("\n");
			error("perfcounter syscall returned with %d (%s)\n",
					fd[i][counter][thread_index], strerror(err));
			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
			exit(-1);
		}
		assert(fd[i][counter][thread_index] >= 0);
		fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK);
1214 1215

		/*
1216
		 * First counter acts as the group leader:
1217
		 */
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231
		if (group && group_fd == -1)
			group_fd = fd[i][counter][thread_index];

		event_array[nr_poll].fd = fd[i][counter][thread_index];
		event_array[nr_poll].events = POLLIN;
		nr_poll++;

		mmap_array[i][counter][thread_index].counter = counter;
		mmap_array[i][counter][thread_index].prev = 0;
		mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1;
		mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
				PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0);
		if (mmap_array[i][counter][thread_index].base == MAP_FAILED)
			die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1232 1233 1234 1235 1236 1237 1238
	}
}

static int __cmd_top(void)
{
	pthread_t thread;
	int i, counter;
1239
	int ret;
1240
	/*
1241 1242
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
1243
	 */
1244
	struct perf_session *session = perf_session__new(NULL, O_WRONLY, false);
1245 1246
	if (session == NULL)
		return -ENOMEM;
1247

1248 1249
	if (target_tid != -1)
		event__synthesize_thread(target_tid, event__process, session);
1250
	else
1251
		event__synthesize_threads(event__process, session);
1252

1253 1254
	for (i = 0; i < nr_cpus; i++) {
		group_fd = -1;
1255 1256
		for (counter = 0; counter < nr_counters; counter++)
			start_counter(i, counter);
1257 1258
	}

1259
	/* Wait for a minimal set of events before starting the snapshot */
1260
	poll(&event_array[0], nr_poll, 100);
1261

1262
	perf_session__mmap_read(session);
1263

1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
	if (pthread_create(&thread, NULL, display_thread, NULL)) {
		printf("Could not create display thread.\n");
		exit(-1);
	}

	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
1280
		int hits = samples;
1281

1282
		perf_session__mmap_read(session);
1283

1284
		if (hits == samples)
1285 1286 1287 1288 1289
			ret = poll(event_array, nr_poll, 100);
	}

	return 0;
}
1290 1291 1292 1293 1294 1295 1296 1297

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

static const struct option options[] = {
	OPT_CALLBACK('e', "event", NULL, "event",
1298 1299
		     "event selector. use 'perf list' to list available events",
		     parse_events),
1300 1301 1302
	OPT_INTEGER('c', "count", &default_interval,
		    "event period to sample"),
	OPT_INTEGER('p', "pid", &target_pid,
1303 1304 1305
		    "profile events on existing process id"),
	OPT_INTEGER('t', "tid", &target_tid,
		    "profile events on existing thread id"),
1306 1307 1308 1309
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
	OPT_INTEGER('C', "CPU", &profile_cpu,
		    "CPU to profile on"),
1310 1311
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1312 1313
	OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
		    "hide kernel symbols"),
1314 1315 1316 1317
	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
		    "number of mmap data pages"),
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
M
Mike Galbraith 已提交
1318
	OPT_INTEGER('d', "delay", &delay_secs,
1319 1320 1321
		    "number of seconds to delay between refreshes"),
	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
			    "dump the symbol table used for profiling"),
1322
	OPT_INTEGER('f', "count-filter", &count_filter,
1323 1324 1325
		    "only display functions with more events than this"),
	OPT_BOOLEAN('g', "group", &group,
			    "put the counters into a counter group"),
1326 1327
	OPT_BOOLEAN('i', "inherit", &inherit,
		    "child tasks inherit counters"),
1328
	OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
1329
		    "symbol to annotate"),
A
Anton Blanchard 已提交
1330
	OPT_BOOLEAN('z', "zero", &zero,
1331
		    "zero history across updates"),
1332
	OPT_INTEGER('F', "freq", &freq,
1333
		    "profile at this frequency"),
1334 1335
	OPT_INTEGER('E', "entries", &print_entries,
		    "display this many functions"),
1336 1337
	OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols,
		    "hide user symbols"),
1338 1339
	OPT_BOOLEAN('v', "verbose", &verbose,
		    "be more verbose (show counter open errors, etc)"),
1340 1341 1342
	OPT_END()
};

1343
int cmd_top(int argc, const char **argv, const char *prefix __used)
1344
{
1345
	int counter;
1346
	int i,j;
1347 1348 1349 1350 1351 1352 1353

	page_size = sysconf(_SC_PAGE_SIZE);

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384
	if (target_pid != -1) {
		target_tid = target_pid;
		thread_num = find_all_tid(target_pid, &all_tids);
		if (thread_num <= 0) {
			fprintf(stderr, "Can't find all threads of pid %d\n",
				target_pid);
			usage_with_options(top_usage, options);
		}
	} else {
		all_tids=malloc(sizeof(pid_t));
		if (!all_tids)
			return -ENOMEM;

		all_tids[0] = target_tid;
		thread_num = 1;
	}

	for (i = 0; i < MAX_NR_CPUS; i++) {
		for (j = 0; j < MAX_COUNTERS; j++) {
			fd[i][j] = malloc(sizeof(int)*thread_num);
			mmap_array[i][j] = malloc(
				sizeof(struct mmap_data)*thread_num);
			if (!fd[i][j] || !mmap_array[i][j])
				return -ENOMEM;
		}
	}
	event_array = malloc(
		sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num);
	if (!event_array)
		return -ENOMEM;

1385
	/* CPU and PID are mutually exclusive */
1386
	if (target_tid > 0 && profile_cpu != -1) {
1387 1388 1389 1390 1391
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
		profile_cpu = -1;
	}

1392
	if (!nr_counters)
1393 1394
		nr_counters = 1;

1395 1396
	symbol_conf.priv_size = (sizeof(struct sym_entry) +
				 (nr_counters + 1) * sizeof(unsigned long));
1397 1398

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1399
	if (symbol__init() < 0)
1400
		return -1;
1401

1402 1403 1404
	if (delay_secs < 1)
		delay_secs = 1;

1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
		freq = 0;
	else if (freq) {
		default_interval = freq;
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1417 1418 1419
	/*
	 * Fill in the ones not specifically initialized via -c:
	 */
1420
	for (counter = 0; counter < nr_counters; counter++) {
1421
		if (attrs[counter].sample_period)
1422 1423
			continue;

1424
		attrs[counter].sample_period = default_interval;
1425 1426
	}

1427
	if (target_tid != -1 || profile_cpu != -1)
1428
		nr_cpus = 1;
1429 1430
	else
		nr_cpus = read_cpu_map();
1431

1432
	get_term_dimensions(&winsize);
1433
	if (print_entries == 0) {
1434
		update_print_entries(&winsize);
1435 1436 1437
		signal(SIGWINCH, sig_winch_handler);
	}

1438 1439
	return __cmd_top();
}