builtin-top.c 33.3 KB
Newer Older
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * builtin-top.c
 *
 * Builtin top command: Display a continuously updated profile of
 * any workload, CPU or specific PID.
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
 *
 * Released under the GPL v2. (and only v2, not any later version)
18
 */
19
#include "builtin.h"
20

21
#include "perf.h"
22

23
#include "util/color.h"
24
#include "util/evlist.h"
25
#include "util/evsel.h"
26 27
#include "util/session.h"
#include "util/symbol.h"
28
#include "util/thread.h"
29
#include "util/thread_map.h"
30
#include "util/util.h"
31
#include <linux/rbtree.h>
32 33
#include "util/parse-options.h"
#include "util/parse-events.h"
34
#include "util/cpumap.h"
35
#include "util/xyarray.h"
36

37 38
#include "util/debug.h"

39 40
#include <assert.h>
#include <fcntl.h>
41

42
#include <stdio.h>
43 44
#include <termios.h>
#include <unistd.h>
45
#include <inttypes.h>
46

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
#include <errno.h>
#include <time.h>
#include <sched.h>
#include <pthread.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>

#include <linux/unistd.h>
#include <linux/types.h>

63
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
64

65 66
struct perf_evlist		*evsel_list;

67
static bool			system_wide			=  false;
68

69
static int			default_interval		=      0;
70

71
static int			count_filter			=      5;
72
static int			print_entries;
73

74
static int			target_pid			=     -1;
75
static int			target_tid			=     -1;
76
static struct thread_map	*threads;
77
static bool			inherit				=  false;
78
static struct cpu_map		*cpus;
79
static int			realtime_prio			=      0;
80
static bool			group				=  false;
81
static unsigned int		page_size;
82
static unsigned int		mmap_pages			=    128;
83
static int			freq				=   1000; /* 1 KHz */
84

85
static int			delay_secs			=      2;
86 87
static bool			zero                            =  false;
static bool			dump_symtab                     =  false;
88

89 90
static bool			hide_kernel_symbols		=  false;
static bool			hide_user_symbols		=  false;
91
static struct winsize		winsize;
92

93 94 95 96 97 98 99 100 101 102 103
/*
 * Source
 */

struct source_line {
	u64			eip;
	unsigned long		count[MAX_COUNTERS];
	char			*line;
	struct source_line	*next;
};

104
static const char		*sym_filter			=   NULL;
105
struct sym_entry		*sym_filter_entry		=   NULL;
106
struct sym_entry		*sym_filter_entry_sched		=   NULL;
107 108
static int			sym_pcnt_filter			=      5;
static int			sym_counter			=      0;
109
static struct perf_evsel	*sym_evsel			=   NULL;
110
static int			display_weighted		=     -1;
111
static const char		*cpu_list;
112

113 114 115 116
/*
 * Symbols
 */

117 118 119 120 121 122 123
struct sym_entry_source {
	struct source_line	*source;
	struct source_line	*lines;
	struct source_line	**lines_tail;
	pthread_mutex_t		lock;
};

124
struct sym_entry {
125 126
	struct rb_node		rb_node;
	struct list_head	node;
127 128
	unsigned long		snap_count;
	double			weight;
129
	int			skip;
130
	u16			name_len;
131
	u8			origin;
132
	struct map		*map;
133
	struct sym_entry_source	*src;
134
	unsigned long		count[0];
135 136
};

137 138 139 140
/*
 * Source functions
 */

141 142
static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
{
143
       return ((void *)self) + symbol_conf.priv_size;
144 145
}

146
void get_term_dimensions(struct winsize *ws)
147
{
148 149 150 151 152 153 154 155 156 157
	char *s = getenv("LINES");

	if (s != NULL) {
		ws->ws_row = atoi(s);
		s = getenv("COLUMNS");
		if (s != NULL) {
			ws->ws_col = atoi(s);
			if (ws->ws_row && ws->ws_col)
				return;
		}
158
	}
159 160 161 162
#ifdef TIOCGWINSZ
	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
	    ws->ws_row && ws->ws_col)
		return;
163
#endif
164 165
	ws->ws_row = 25;
	ws->ws_col = 80;
166 167
}

168
static void update_print_entries(struct winsize *ws)
169
{
170 171
	print_entries = ws->ws_row;

172 173 174 175 176 177
	if (print_entries > 9)
		print_entries -= 9;
}

static void sig_winch_handler(int sig __used)
{
178 179
	get_term_dimensions(&winsize);
	update_print_entries(&winsize);
180 181
}

182
static int parse_source(struct sym_entry *syme)
183 184
{
	struct symbol *sym;
185
	struct sym_entry_source *source;
186
	struct map *map;
187
	FILE *file;
188
	char command[PATH_MAX*2];
189 190
	const char *path;
	u64 len;
191 192

	if (!syme)
193 194 195 196 197 198 199 200 201 202
		return -1;

	sym = sym_entry__symbol(syme);
	map = syme->map;

	/*
	 * We can't annotate with just /proc/kallsyms
	 */
	if (map->dso->origin == DSO__ORIG_KERNEL)
		return -1;
203

204
	if (syme->src == NULL) {
205
		syme->src = zalloc(sizeof(*source));
206
		if (syme->src == NULL)
207
			return -1;
208 209 210 211 212 213 214
		pthread_mutex_init(&syme->src->lock, NULL);
	}

	source = syme->src;

	if (source->lines) {
		pthread_mutex_lock(&source->lock);
215 216
		goto out_assign;
	}
217
	path = map->dso->long_name;
218 219 220

	len = sym->end - sym->start;

221
	sprintf(command,
222
		"objdump --start-address=%#0*" PRIx64 " --stop-address=%#0*" PRIx64 " -dS %s",
223 224
		BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
		BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
225 226 227

	file = popen(command, "r");
	if (!file)
228
		return -1;
229

230 231
	pthread_mutex_lock(&source->lock);
	source->lines_tail = &source->lines;
232 233 234
	while (!feof(file)) {
		struct source_line *src;
		size_t dummy = 0;
235
		char *c, *sep;
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250

		src = malloc(sizeof(struct source_line));
		assert(src != NULL);
		memset(src, 0, sizeof(struct source_line));

		if (getline(&src->line, &dummy, file) < 0)
			break;
		if (!src->line)
			break;

		c = strchr(src->line, '\n');
		if (c)
			*c = 0;

		src->next = NULL;
251 252
		*source->lines_tail = src;
		source->lines_tail = &src->next;
253

254 255 256 257 258
		src->eip = strtoull(src->line, &sep, 16);
		if (*sep == ':')
			src->eip = map__objdump_2ip(map, src->eip);
		else /* this line has no ip info (e.g. source line) */
			src->eip = 0;
259 260 261 262
	}
	pclose(file);
out_assign:
	sym_filter_entry = syme;
263
	pthread_mutex_unlock(&source->lock);
264
	return 0;
265 266 267 268 269 270 271
}

static void __zero_source_counters(struct sym_entry *syme)
{
	int i;
	struct source_line *line;

272
	line = syme->src->lines;
273
	while (line) {
274
		for (i = 0; i < evsel_list->nr_entries; i++)
275 276 277 278 279 280 281 282 283 284 285 286
			line->count[i] = 0;
		line = line->next;
	}
}

static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
{
	struct source_line *line;

	if (syme != sym_filter_entry)
		return;

287
	if (pthread_mutex_trylock(&syme->src->lock))
288 289
		return;

290
	if (syme->src == NULL || syme->src->source == NULL)
291 292
		goto out_unlock;

293
	for (line = syme->src->lines; line; line = line->next) {
294 295 296
		/* skip lines without IP info */
		if (line->eip == 0)
			continue;
297 298 299 300 301 302 303 304
		if (line->eip == ip) {
			line->count[counter]++;
			break;
		}
		if (line->eip > ip)
			break;
	}
out_unlock:
305
	pthread_mutex_unlock(&syme->src->lock);
306 307
}

308 309
#define PATTERN_LEN		(BITS_PER_LONG / 4 + 2)

310 311
static void lookup_sym_source(struct sym_entry *syme)
{
312
	struct symbol *symbol = sym_entry__symbol(syme);
313
	struct source_line *line;
314
	char pattern[PATTERN_LEN + 1];
315

316
	sprintf(pattern, "%0*" PRIx64 " <", BITS_PER_LONG / 4,
317
		map__rip_2objdump(syme->map, symbol->start));
318

319 320
	pthread_mutex_lock(&syme->src->lock);
	for (line = syme->src->lines; line; line = line->next) {
321
		if (memcmp(line->line, pattern, PATTERN_LEN) == 0) {
322
			syme->src->source = line;
323 324 325
			break;
		}
	}
326
	pthread_mutex_unlock(&syme->src->lock);
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
}

static void show_lines(struct source_line *queue, int count, int total)
{
	int i;
	struct source_line *line;

	line = queue;
	for (i = 0; i < count; i++) {
		float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;

		printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
		line = line->next;
	}
}

#define TRACE_COUNT     3

static void show_details(struct sym_entry *syme)
{
	struct symbol *symbol;
	struct source_line *line;
	struct source_line *line_queue = NULL;
	int displayed = 0;
	int line_queue_count = 0, total = 0, more = 0;

	if (!syme)
		return;

356
	if (!syme->src->source)
357 358
		lookup_sym_source(syme);

359
	if (!syme->src->source)
360 361
		return;

362
	symbol = sym_entry__symbol(syme);
363
	printf("Showing %s for %s\n", event_name(sym_evsel), symbol->name);
364 365
	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);

366 367
	pthread_mutex_lock(&syme->src->lock);
	line = syme->src->source;
368 369 370 371 372
	while (line) {
		total += line->count[sym_counter];
		line = line->next;
	}

373
	line = syme->src->source;
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
	while (line) {
		float pcnt = 0.0;

		if (!line_queue_count)
			line_queue = line;
		line_queue_count++;

		if (line->count[sym_counter])
			pcnt = 100.0 * line->count[sym_counter] / (float)total;
		if (pcnt >= (float)sym_pcnt_filter) {
			if (displayed <= print_entries)
				show_lines(line_queue, line_queue_count, total);
			else more++;
			displayed += line_queue_count;
			line_queue_count = 0;
			line_queue = NULL;
		} else if (line_queue_count > TRACE_COUNT) {
			line_queue = line_queue->next;
			line_queue_count--;
		}

		line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
		line = line->next;
	}
398
	pthread_mutex_unlock(&syme->src->lock);
399 400 401
	if (more)
		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
}
402

403
/*
404
 * Symbols will be added here in event__process_sample and will get out
405 406 407
 * after decayed.
 */
static LIST_HEAD(active_symbols);
408
static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
409 410 411 412 413 414

/*
 * Ordering weight: count-1 * count-2 * ... / count-n
 */
static double sym_weight(const struct sym_entry *sym)
{
415
	double weight = sym->snap_count;
416 417
	int counter;

418 419 420
	if (!display_weighted)
		return weight;

421
	for (counter = 1; counter < evsel_list->nr_entries - 1; counter++)
422 423 424 425 426 427 428
		weight *= sym->count[counter];

	weight /= (sym->count[counter] + 1);

	return weight;
}

429
static long			samples;
430
static long			kernel_samples, us_samples;
431
static long			exact_samples;
432
static long			guest_us_samples, guest_kernel_samples;
433 434
static const char		CONSOLE_CLEAR[] = "";

435
static void __list_insert_active_sym(struct sym_entry *syme)
436 437 438 439
{
	list_add(&syme->node, &active_symbols);
}

440 441 442 443 444 445 446
static void list_remove_active_sym(struct sym_entry *syme)
{
	pthread_mutex_lock(&active_symbols_lock);
	list_del_init(&syme->node);
	pthread_mutex_unlock(&active_symbols_lock);
}

447 448 449 450 451 452 453 454 455 456
static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
{
	struct rb_node **p = &tree->rb_node;
	struct rb_node *parent = NULL;
	struct sym_entry *iter;

	while (*p != NULL) {
		parent = *p;
		iter = rb_entry(parent, struct sym_entry, rb_node);

457
		if (se->weight > iter->weight)
458 459 460 461 462 463 464 465
			p = &(*p)->rb_left;
		else
			p = &(*p)->rb_right;
	}

	rb_link_node(&se->rb_node, parent, p);
	rb_insert_color(&se->rb_node, tree);
}
466 467 468

static void print_sym_table(void)
{
469
	int printed = 0, j;
470 471
	struct perf_evsel *counter;
	int snap = !display_weighted ? sym_counter : 0;
472
	float samples_per_sec = samples/delay_secs;
473 474 475 476
	float ksamples_per_sec = kernel_samples/delay_secs;
	float us_samples_per_sec = (us_samples)/delay_secs;
	float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs;
	float guest_us_samples_per_sec = (guest_us_samples)/delay_secs;
477
	float esamples_percent = (100.0*exact_samples)/samples;
478
	float sum_ksamples = 0.0;
479 480 481
	struct sym_entry *syme, *n;
	struct rb_root tmp = RB_ROOT;
	struct rb_node *nd;
482
	int sym_width = 0, dso_width = 0, dso_short_width = 0;
483
	const int win_width = winsize.ws_col - 1;
484

485 486
	samples = us_samples = kernel_samples = exact_samples = 0;
	guest_kernel_samples = guest_us_samples = 0;
487

488
	/* Sort the active symbols */
489 490 491 492 493
	pthread_mutex_lock(&active_symbols_lock);
	syme = list_entry(active_symbols.next, struct sym_entry, node);
	pthread_mutex_unlock(&active_symbols_lock);

	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
494
		syme->snap_count = syme->count[snap];
495
		if (syme->snap_count != 0) {
496

497 498 499 500 501 502 503
			if ((hide_user_symbols &&
			     syme->origin == PERF_RECORD_MISC_USER) ||
			    (hide_kernel_symbols &&
			     syme->origin == PERF_RECORD_MISC_KERNEL)) {
				list_remove_active_sym(syme);
				continue;
			}
504
			syme->weight = sym_weight(syme);
505
			rb_insert_active_sym(&tmp, syme);
506
			sum_ksamples += syme->snap_count;
507

508
			for (j = 0; j < evsel_list->nr_entries; j++)
509 510
				syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
		} else
511
			list_remove_active_sym(syme);
512 513
	}

514
	puts(CONSOLE_CLEAR);
515

516
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
	if (!perf_guest) {
		printf("   PerfTop:%8.0f irqs/sec  kernel:%4.1f%%"
			"  exact: %4.1f%% [",
			samples_per_sec,
			100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
					 samples_per_sec)),
			esamples_percent);
	} else {
		printf("   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% us:%4.1f%%"
			" guest kernel:%4.1f%% guest us:%4.1f%%"
			" exact: %4.1f%% [",
			samples_per_sec,
			100.0 - (100.0 * ((samples_per_sec-ksamples_per_sec) /
					  samples_per_sec)),
			100.0 - (100.0 * ((samples_per_sec-us_samples_per_sec) /
					  samples_per_sec)),
			100.0 - (100.0 * ((samples_per_sec -
						guest_kernel_samples_per_sec) /
					  samples_per_sec)),
			100.0 - (100.0 * ((samples_per_sec -
					   guest_us_samples_per_sec) /
					  samples_per_sec)),
			esamples_percent);
	}
541

542
	if (evsel_list->nr_entries == 1 || !display_weighted) {
543
		struct perf_evsel *first;
544
		first = list_entry(evsel_list->entries.next, struct perf_evsel, node);
545
		printf("%" PRIu64, (uint64_t)first->attr.sample_period);
I
Ingo Molnar 已提交
546 547 548 549 550
		if (freq)
			printf("Hz ");
		else
			printf(" ");
	}
551

552
	if (!display_weighted)
553
		printf("%s", event_name(sym_evsel));
554
	else list_for_each_entry(counter, &evsel_list->entries, node) {
555
		if (counter->idx)
556 557 558 559 560 561 562
			printf("/");

		printf("%s", event_name(counter));
	}

	printf( "], ");

563 564
	if (target_pid != -1)
		printf(" (target_pid: %d", target_pid);
565 566
	else if (target_tid != -1)
		printf(" (target_tid: %d", target_tid);
567 568 569
	else
		printf(" (all");

570
	if (cpu_list)
571
		printf(", CPU%s: %s)\n", cpus->nr > 1 ? "s" : "", cpu_list);
572
	else {
573
		if (target_tid != -1)
574 575
			printf(")\n");
		else
576
			printf(", %d CPU%s)\n", cpus->nr, cpus->nr > 1 ? "s" : "");
577 578
	}

579
	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
580

581 582 583 584 585
	if (sym_filter_entry) {
		show_details(sym_filter_entry);
		return;
	}

586 587 588 589 590 591 592 593 594
	/*
	 * Find the longest symbol name that will be displayed
	 */
	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
		syme = rb_entry(nd, struct sym_entry, rb_node);
		if (++printed > print_entries ||
		    (int)syme->snap_count < count_filter)
			continue;

595 596 597
		if (syme->map->dso->long_name_len > dso_width)
			dso_width = syme->map->dso->long_name_len;

598 599 600
		if (syme->map->dso->short_name_len > dso_short_width)
			dso_short_width = syme->map->dso->short_name_len;

601 602 603 604 605 606
		if (syme->name_len > sym_width)
			sym_width = syme->name_len;
	}

	printed = 0;

607 608 609 610 611
	if (sym_width + dso_width > winsize.ws_col - 29) {
		dso_width = dso_short_width;
		if (sym_width + dso_width > winsize.ws_col - 29)
			sym_width = winsize.ws_col - dso_width - 29;
	}
612
	putchar('\n');
613
	if (evsel_list->nr_entries == 1)
614
		printf("             samples  pcnt");
615
	else
616
		printf("   weight    samples  pcnt");
617

618 619
	if (verbose)
		printf("         RIP       ");
620
	printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
621
	printf("   %s    _______ _____",
622
	       evsel_list->nr_entries == 1 ? "      " : "______");
623
	if (verbose)
624
		printf(" ________________");
625
	printf(" %-*.*s", sym_width, sym_width, graph_line);
626
	printf(" %-*.*s", dso_width, dso_width, graph_line);
627
	puts("\n");
628

629
	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
630
		struct symbol *sym;
631
		double pcnt;
632

633
		syme = rb_entry(nd, struct sym_entry, rb_node);
634
		sym = sym_entry__symbol(syme);
635
		if (++printed > print_entries || (int)syme->snap_count < count_filter)
636
			continue;
637

638 639
		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
					 sum_ksamples));
640

641
		if (evsel_list->nr_entries == 1 || !display_weighted)
642
			printf("%20.2f ", syme->weight);
643
		else
644
			printf("%9.1f %10ld ", syme->weight, syme->snap_count);
645

646
		percent_color_fprintf(stdout, "%4.1f%%", pcnt);
647
		if (verbose)
648
			printf(" %016" PRIx64, sym->start);
649
		printf(" %-*.*s", sym_width, sym_width, sym->name);
650 651 652 653
		printf(" %-*.*s\n", dso_width, dso_width,
		       dso_width >= syme->map->dso->long_name_len ?
					syme->map->dso->long_name :
					syme->map->dso->short_name);
654 655 656
	}
}

657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
static void prompt_integer(int *target, const char *msg)
{
	char *buf = malloc(0), *p;
	size_t dummy = 0;
	int tmp;

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		return;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	p = buf;
	while(*p) {
		if (!isdigit(*p))
			goto out_free;
		p++;
	}
	tmp = strtoul(buf, NULL, 10);
	*target = tmp;
out_free:
	free(buf);
}

static void prompt_percent(int *target, const char *msg)
{
	int tmp = 0;

	prompt_integer(&tmp, msg);
	if (tmp >= 0 && tmp <= 100)
		*target = tmp;
}

static void prompt_symbol(struct sym_entry **target, const char *msg)
{
	char *buf = malloc(0), *p;
	struct sym_entry *syme = *target, *n, *found = NULL;
	size_t dummy = 0;

	/* zero counters of active symbol */
	if (syme) {
700
		pthread_mutex_lock(&syme->src->lock);
701 702
		__zero_source_counters(syme);
		*target = NULL;
703
		pthread_mutex_unlock(&syme->src->lock);
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
	}

	fprintf(stdout, "\n%s: ", msg);
	if (getline(&buf, &dummy, stdin) < 0)
		goto out_free;

	p = strchr(buf, '\n');
	if (p)
		*p = 0;

	pthread_mutex_lock(&active_symbols_lock);
	syme = list_entry(active_symbols.next, struct sym_entry, node);
	pthread_mutex_unlock(&active_symbols_lock);

	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
719
		struct symbol *sym = sym_entry__symbol(syme);
720 721 722 723 724 725 726 727

		if (!strcmp(buf, sym->name)) {
			found = syme;
			break;
		}
	}

	if (!found) {
728
		fprintf(stderr, "Sorry, %s is not active.\n", buf);
729 730 731 732 733 734 735 736 737
		sleep(1);
		return;
	} else
		parse_source(found);

out_free:
	free(buf);
}

738
static void print_mapped_keys(void)
739
{
740 741 742
	char *name = NULL;

	if (sym_filter_entry) {
743
		struct symbol *sym = sym_entry__symbol(sym_filter_entry);
744 745 746 747 748 749 750
		name = sym->name;
	}

	fprintf(stdout, "\nMapped keys:\n");
	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", delay_secs);
	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", print_entries);

751
	if (evsel_list->nr_entries > 1)
752
		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(sym_evsel));
753 754 755

	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", count_filter);

756 757 758
	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
	fprintf(stdout, "\t[S]     stop annotation.\n");
759

760
	if (evsel_list->nr_entries > 1)
761 762
		fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);

763
	fprintf(stdout,
764
		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
765 766 767 768
		hide_kernel_symbols ? "yes" : "no");
	fprintf(stdout,
		"\t[U]     hide user symbols.               \t(%s)\n",
		hide_user_symbols ? "yes" : "no");
769
	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", zero ? 1 : 0);
770 771 772 773 774 775 776 777 778 779 780 781
	fprintf(stdout, "\t[qQ]    quit.\n");
}

static int key_mapped(int c)
{
	switch (c) {
		case 'd':
		case 'e':
		case 'f':
		case 'z':
		case 'q':
		case 'Q':
782 783
		case 'K':
		case 'U':
784 785 786
		case 'F':
		case 's':
		case 'S':
787 788 789
			return 1;
		case 'E':
		case 'w':
790
			return evsel_list->nr_entries > 1 ? 1 : 0;
791 792
		default:
			break;
793 794 795
	}

	return 0;
796 797
}

798
static void handle_keypress(struct perf_session *session, int c)
799
{
800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
	if (!key_mapped(c)) {
		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
		struct termios tc, save;

		print_mapped_keys();
		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
		fflush(stdout);

		tcgetattr(0, &save);
		tc = save;
		tc.c_lflag &= ~(ICANON | ECHO);
		tc.c_cc[VMIN] = 0;
		tc.c_cc[VTIME] = 0;
		tcsetattr(0, TCSANOW, &tc);

		poll(&stdin_poll, 1, -1);
		c = getc(stdin);

		tcsetattr(0, TCSAFLUSH, &save);
		if (!key_mapped(c))
			return;
	}

823 824 825
	switch (c) {
		case 'd':
			prompt_integer(&delay_secs, "Enter display delay");
826 827
			if (delay_secs < 1)
				delay_secs = 1;
828 829 830
			break;
		case 'e':
			prompt_integer(&print_entries, "Enter display entries (lines)");
831
			if (print_entries == 0) {
832
				sig_winch_handler(SIGWINCH);
833 834 835
				signal(SIGWINCH, sig_winch_handler);
			} else
				signal(SIGWINCH, SIG_DFL);
836 837
			break;
		case 'E':
838
			if (evsel_list->nr_entries > 1) {
839
				fprintf(stderr, "\nAvailable events:");
840

841
				list_for_each_entry(sym_evsel, &evsel_list->entries, node)
842
					fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel));
843 844 845

				prompt_integer(&sym_counter, "Enter details event counter");

846 847
				if (sym_counter >= evsel_list->nr_entries) {
					sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node);
848
					sym_counter = 0;
849
					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel));
850
					sleep(1);
851
					break;
852
				}
853
				list_for_each_entry(sym_evsel, &evsel_list->entries, node)
854 855
					if (sym_evsel->idx == sym_counter)
						break;
856 857 858 859 860 861 862 863
			} else sym_counter = 0;
			break;
		case 'f':
			prompt_integer(&count_filter, "Enter display event count filter");
			break;
		case 'F':
			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
			break;
864 865 866
		case 'K':
			hide_kernel_symbols = !hide_kernel_symbols;
			break;
867 868 869
		case 'q':
		case 'Q':
			printf("exiting.\n");
870
			if (dump_symtab)
871
				perf_session__fprintf_dsos(session, stderr);
872 873 874 875 876 877 878 879 880 881
			exit(0);
		case 's':
			prompt_symbol(&sym_filter_entry, "Enter details symbol");
			break;
		case 'S':
			if (!sym_filter_entry)
				break;
			else {
				struct sym_entry *syme = sym_filter_entry;

882
				pthread_mutex_lock(&syme->src->lock);
883 884
				sym_filter_entry = NULL;
				__zero_source_counters(syme);
885
				pthread_mutex_unlock(&syme->src->lock);
886 887
			}
			break;
888 889 890
		case 'U':
			hide_user_symbols = !hide_user_symbols;
			break;
891 892 893
		case 'w':
			display_weighted = ~display_weighted;
			break;
894
		case 'z':
895
			zero = !zero;
896
			break;
897 898
		default:
			break;
899 900 901
	}
}

902
static void *display_thread(void *arg __used)
903
{
904
	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
905 906
	struct termios tc, save;
	int delay_msecs, c;
907
	struct perf_session *session = (struct perf_session *) arg;
908 909 910 911 912 913

	tcgetattr(0, &save);
	tc = save;
	tc.c_lflag &= ~(ICANON | ECHO);
	tc.c_cc[VMIN] = 0;
	tc.c_cc[VTIME] = 0;
914

915 916 917 918 919
repeat:
	delay_msecs = delay_secs * 1000;
	tcsetattr(0, TCSANOW, &tc);
	/* trash return*/
	getc(stdin);
920

921
	do {
922
		print_sym_table();
923 924
	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);

925 926 927
	c = getc(stdin);
	tcsetattr(0, TCSAFLUSH, &save);

928
	handle_keypress(session, c);
929
	goto repeat;
930 931 932 933

	return NULL;
}

934
/* Tag samples to be skipped. */
935
static const char *skip_symbols[] = {
936
	"default_idle",
937
	"native_safe_halt",
938 939 940 941
	"cpu_idle",
	"enter_idle",
	"exit_idle",
	"mwait_idle",
942
	"mwait_idle_with_hints",
943
	"poll_idle",
944 945
	"ppc64_runlatch_off",
	"pseries_dedicated_idle_sleep",
946 947 948
	NULL
};

949
static int symbol_filter(struct map *map, struct symbol *sym)
950
{
951 952
	struct sym_entry *syme;
	const char *name = sym->name;
953
	int i;
954

955 956 957 958 959 960 961
	/*
	 * ppc64 uses function descriptors and appends a '.' to the
	 * start of every instruction address. Remove it.
	 */
	if (name[0] == '.')
		name++;

962 963 964 965 966 967 968
	if (!strcmp(name, "_text") ||
	    !strcmp(name, "_etext") ||
	    !strcmp(name, "_sinittext") ||
	    !strncmp("init_module", name, 11) ||
	    !strncmp("cleanup_module", name, 14) ||
	    strstr(name, "_text_start") ||
	    strstr(name, "_text_end"))
969 970
		return 1;

971
	syme = symbol__priv(sym);
972
	syme->map = map;
973
	syme->src = NULL;
974 975 976 977 978 979

	if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
		/* schedule initial sym_filter_entry setup */
		sym_filter_entry_sched = syme;
		sym_filter = NULL;
	}
980

981 982 983 984 985 986
	for (i = 0; skip_symbols[i]; i++) {
		if (!strcmp(skip_symbols[i], name)) {
			syme->skip = 1;
			break;
		}
	}
987

988 989 990
	if (!syme->skip)
		syme->name_len = strlen(sym->name);

991 992 993
	return 0;
}

994
static void event__process_sample(const event_t *self,
995
				  struct sample_data *sample,
996
				  struct perf_session *session)
997
{
998 999
	u64 ip = self->ip.ip;
	struct sym_entry *syme;
1000
	struct addr_location al;
1001
	struct machine *machine;
1002
	u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1003

1004 1005
	++samples;

1006
	switch (origin) {
1007
	case PERF_RECORD_MISC_USER:
1008
		++us_samples;
1009 1010
		if (hide_user_symbols)
			return;
1011
		machine = perf_session__find_host_machine(session);
1012
		break;
1013
	case PERF_RECORD_MISC_KERNEL:
1014
		++kernel_samples;
1015 1016
		if (hide_kernel_symbols)
			return;
1017
		machine = perf_session__find_host_machine(session);
1018 1019 1020
		break;
	case PERF_RECORD_MISC_GUEST_KERNEL:
		++guest_kernel_samples;
1021
		machine = perf_session__find_machine(session, self->ip.pid);
1022
		break;
1023 1024 1025 1026 1027 1028 1029
	case PERF_RECORD_MISC_GUEST_USER:
		++guest_us_samples;
		/*
		 * TODO: we don't process guest user from host side
		 * except simple counting.
		 */
		return;
1030 1031 1032 1033
	default:
		return;
	}

1034
	if (!machine && perf_guest) {
1035 1036 1037 1038 1039
		pr_err("Can't find guest [%d]'s kernel information\n",
			self->ip.pid);
		return;
	}

P
Peter Zijlstra 已提交
1040
	if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
1041 1042
		exact_samples++;

1043
	if (event__preprocess_sample(self, session, &al, sample,
1044
				     symbol_filter) < 0 ||
1045
	    al.filtered)
1046
		return;
1047

1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059
	if (al.sym == NULL) {
		/*
		 * As we do lazy loading of symtabs we only will know if the
		 * specified vmlinux file is invalid when we actually have a
		 * hit in kernel space and then try to load it. So if we get
		 * here and there are _no_ symbols in the DSO backing the
		 * kernel map, bail out.
		 *
		 * We may never get here, for instance, if we use -K/
		 * --hide-kernel-symbols, even if the user specifies an
		 * invalid --vmlinux ;-)
		 */
1060
		if (al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
1061 1062 1063 1064 1065 1066 1067 1068 1069
		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
			pr_err("The %s file can't be used\n",
			       symbol_conf.vmlinux_name);
			exit(1);
		}

		return;
	}

1070 1071 1072 1073
	/* let's see, whether we need to install initial sym_filter_entry */
	if (sym_filter_entry_sched) {
		sym_filter_entry = sym_filter_entry_sched;
		sym_filter_entry_sched = NULL;
1074 1075 1076 1077 1078 1079
		if (parse_source(sym_filter_entry) < 0) {
			struct symbol *sym = sym_entry__symbol(sym_filter_entry);

			pr_err("Can't annotate %s", sym->name);
			if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) {
				pr_err(": No vmlinux file was found in the path:\n");
1080
				machine__fprintf_vmlinux_path(machine, stderr);
1081 1082 1083 1084
			} else
				pr_err(".\n");
			exit(1);
		}
1085 1086
	}

1087
	syme = symbol__priv(al.sym);
1088
	if (!syme->skip) {
1089 1090
		struct perf_evsel *evsel;

1091
		syme->origin = origin;
1092 1093 1094
		evsel = perf_evlist__id2evsel(evsel_list, sample->id);
		assert(evsel != NULL);
		syme->count[evsel->idx]++;
1095
		record_precise_ip(syme, evsel->idx, ip);
1096 1097 1098 1099 1100
		pthread_mutex_lock(&active_symbols_lock);
		if (list_empty(&syme->node) || !syme->node.next)
			__list_insert_active_sym(syme);
		pthread_mutex_unlock(&active_symbols_lock);
	}
1101 1102
}

1103
static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu)
1104
{
1105
	struct sample_data sample;
1106
	event_t *event;
1107

1108
	while ((event = perf_evlist__read_on_cpu(evsel_list, cpu)) != NULL) {
1109
		event__parse_sample(event, self, &sample);
1110

1111
		if (event->header.type == PERF_RECORD_SAMPLE)
1112
			event__process_sample(event, &sample, self);
1113
		else
1114
			event__process(event, &sample, self);
1115 1116 1117
	}
}

1118
static void perf_session__mmap_read(struct perf_session *self)
1119
{
1120 1121 1122 1123
	int i;

	for (i = 0; i < cpus->nr; i++)
		perf_session__mmap_read_cpu(self, i);
1124 1125
}

1126 1127 1128
static void start_counters(struct perf_evlist *evlist)
{
	struct perf_evsel *counter;
1129

1130 1131
	list_for_each_entry(counter, &evlist->entries, node) {
		struct perf_event_attr *attr = &counter->attr;
1132

1133 1134 1135 1136 1137 1138 1139
		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;

		if (freq) {
			attr->sample_type |= PERF_SAMPLE_PERIOD;
			attr->freq	  = 1;
			attr->sample_freq = freq;
		}
1140

1141 1142 1143 1144 1145
		if (evlist->nr_entries > 1) {
			attr->sample_type |= PERF_SAMPLE_ID;
			attr->read_format |= PERF_FORMAT_ID;
		}

1146 1147 1148
		attr->mmap = 1;
try_again:
		if (perf_evsel__open(counter, cpus, threads, group, inherit) < 0) {
1149 1150 1151
			int err = errno;

			if (err == EPERM || err == EACCES)
1152 1153 1154
				die("Permission error - are you root?\n"
					"\t Consider tweaking"
					" /proc/sys/kernel/perf_event_paranoid.\n");
1155 1156 1157 1158 1159
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
1160 1161
			if (attr->type == PERF_TYPE_HARDWARE &&
			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
1162 1163 1164 1165 1166 1167 1168 1169 1170

				if (verbose)
					warning(" ... trying to fall back to cpu-clock-ticks\n");

				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
			printf("\n");
1171 1172 1173
			error("sys_perf_event_open() syscall returned with %d "
			      "(%s).  /bin/dmesg may provide additional information.\n",
			      err, strerror(err));
1174 1175 1176
			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
			exit(-1);
		}
1177
	}
1178 1179 1180

	if (perf_evlist__mmap(evlist, cpus, threads, mmap_pages, true) < 0)
		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1181 1182 1183 1184 1185
}

static int __cmd_top(void)
{
	pthread_t thread;
1186
	struct perf_evsel *first;
1187
	int ret;
1188
	/*
1189 1190
	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
1191
	 */
1192
	struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
1193 1194
	if (session == NULL)
		return -ENOMEM;
1195

1196 1197
	if (target_tid != -1)
		event__synthesize_thread(target_tid, event__process, session);
1198
	else
1199
		event__synthesize_threads(event__process, session);
1200

1201
	start_counters(evsel_list);
1202 1203
	first = list_entry(evsel_list->entries.next, struct perf_evsel, node);
	perf_session__set_sample_type(session, first->attr.sample_type);
1204

1205
	/* Wait for a minimal set of events before starting the snapshot */
1206
	poll(evsel_list->pollfd, evsel_list->nr_fds, 100);
1207

1208
	perf_session__mmap_read(session);
1209

1210
	if (pthread_create(&thread, NULL, display_thread, session)) {
1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225
		printf("Could not create display thread.\n");
		exit(-1);
	}

	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			printf("Could not set realtime priority.\n");
			exit(-1);
		}
	}

	while (1) {
1226
		int hits = samples;
1227

1228
		perf_session__mmap_read(session);
1229

1230
		if (hits == samples)
1231
			ret = poll(evsel_list->pollfd, evsel_list->nr_fds, 100);
1232 1233 1234 1235
	}

	return 0;
}
1236 1237 1238 1239 1240 1241 1242

static const char * const top_usage[] = {
	"perf top [<options>]",
	NULL
};

static const struct option options[] = {
1243
	OPT_CALLBACK('e', "event", &evsel_list, "event",
1244 1245
		     "event selector. use 'perf list' to list available events",
		     parse_events),
1246 1247 1248
	OPT_INTEGER('c', "count", &default_interval,
		    "event period to sample"),
	OPT_INTEGER('p', "pid", &target_pid,
1249 1250 1251
		    "profile events on existing process id"),
	OPT_INTEGER('t', "tid", &target_tid,
		    "profile events on existing thread id"),
1252 1253
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
1254 1255
	OPT_STRING('C', "cpu", &cpu_list, "cpu",
		    "list of cpus to monitor"),
1256 1257
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
1258 1259
	OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
		    "hide kernel symbols"),
1260
	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
1261 1262
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
M
Mike Galbraith 已提交
1263
	OPT_INTEGER('d', "delay", &delay_secs,
1264 1265 1266
		    "number of seconds to delay between refreshes"),
	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
			    "dump the symbol table used for profiling"),
1267
	OPT_INTEGER('f', "count-filter", &count_filter,
1268 1269 1270
		    "only display functions with more events than this"),
	OPT_BOOLEAN('g', "group", &group,
			    "put the counters into a counter group"),
1271 1272
	OPT_BOOLEAN('i', "inherit", &inherit,
		    "child tasks inherit counters"),
1273
	OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
1274
		    "symbol to annotate"),
A
Anton Blanchard 已提交
1275
	OPT_BOOLEAN('z', "zero", &zero,
1276
		    "zero history across updates"),
1277
	OPT_INTEGER('F', "freq", &freq,
1278
		    "profile at this frequency"),
1279 1280
	OPT_INTEGER('E', "entries", &print_entries,
		    "display this many functions"),
1281 1282
	OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols,
		    "hide user symbols"),
1283
	OPT_INCR('v', "verbose", &verbose,
1284
		    "be more verbose (show counter open errors, etc)"),
1285 1286 1287
	OPT_END()
};

1288
int cmd_top(int argc, const char **argv, const char *prefix __used)
1289
{
1290 1291
	struct perf_evsel *pos;
	int status = -ENOMEM;
1292

1293 1294 1295 1296
	evsel_list = perf_evlist__new();
	if (evsel_list == NULL)
		return -ENOMEM;

1297 1298 1299 1300 1301 1302
	page_size = sysconf(_SC_PAGE_SIZE);

	argc = parse_options(argc, argv, options, top_usage, 0);
	if (argc)
		usage_with_options(top_usage, options);

1303
	if (target_pid != -1)
1304 1305
		target_tid = target_pid;

1306 1307 1308 1309
	threads = thread_map__new(target_pid, target_tid);
	if (threads == NULL) {
		pr_err("Problems finding threads of monitor\n");
		usage_with_options(top_usage, options);
1310 1311
	}

1312
	/* CPU and PID are mutually exclusive */
1313
	if (target_tid > 0 && cpu_list) {
1314 1315
		printf("WARNING: PID switch overriding CPU\n");
		sleep(1);
1316
		cpu_list = NULL;
1317 1318
	}

1319 1320
	if (!evsel_list->nr_entries &&
	    perf_evlist__add_default(evsel_list) < 0) {
1321 1322 1323
		pr_err("Not enough memory for event selector list\n");
		return -ENOMEM;
	}
1324

1325 1326 1327
	if (delay_secs < 1)
		delay_secs = 1;

1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
		freq = 0;
	else if (freq) {
		default_interval = freq;
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
		exit(EXIT_FAILURE);
	}

1340
	if (target_tid != -1)
1341
		cpus = cpu_map__dummy_new();
1342
	else
1343
		cpus = cpu_map__new(cpu_list);
1344

1345
	if (cpus == NULL)
1346
		usage_with_options(top_usage, options);
1347

1348
	list_for_each_entry(pos, &evsel_list->entries, node) {
1349
		if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359
			goto out_free_fd;
		/*
		 * Fill in the ones not specifically initialized via -c:
		 */
		if (pos->attr.sample_period)
			continue;

		pos->attr.sample_period = default_interval;
	}

1360 1361
	if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0 ||
	    perf_evlist__alloc_mmap(evsel_list, cpus->nr) < 0)
1362 1363
		goto out_free_fd;

1364
	sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node);
1365

1366
	symbol_conf.priv_size = (sizeof(struct sym_entry) +
1367
				 (evsel_list->nr_entries + 1) * sizeof(unsigned long));
1368 1369 1370 1371 1372

	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
	if (symbol__init() < 0)
		return -1;

1373
	get_term_dimensions(&winsize);
1374
	if (print_entries == 0) {
1375
		update_print_entries(&winsize);
1376 1377 1378
		signal(SIGWINCH, sig_winch_handler);
	}

1379 1380
	status = __cmd_top();
out_free_fd:
1381
	perf_evlist__delete(evsel_list);
1382 1383

	return status;
1384
}