diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f139f1ab933358c551af7b43d7f53d9e887e4653..d58701346b1ed516a92cf8403f14cc5ea605096c 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -31,6 +31,8 @@
 #include <fcntl.h>
 
 #include <stdio.h>
+#include <termios.h>
+#include <unistd.h>
 
 #include <errno.h>
 #include <time.h>
@@ -54,7 +56,7 @@ static int			system_wide			=  0;
 
 static int			default_interval		= 100000;
 
-static u64			count_filter			=  5;
+static int			count_filter			=  5;
 static int			print_entries			= 15;
 
 static int			target_pid			= -1;
@@ -69,14 +71,26 @@ static int			freq				=  0;
 static int			verbose				=  0;
 static char			*vmlinux			=  NULL;
 
-static char			*sym_filter;
-static unsigned long		filter_start;
-static unsigned long		filter_end;
-
 static int			delay_secs			=  2;
 static int			zero;
 static int			dump_symtab;
 
+/*
+ * Source
+ */
+
+struct source_line {
+	u64			eip;
+	unsigned long		count[MAX_COUNTERS];
+	char			*line;
+	struct source_line	*next;
+};
+
+static char			*sym_filter			=  NULL;
+struct sym_entry		*sym_filter_entry		=  NULL;
+static int			sym_pcnt_filter			=  5;
+static int			sym_counter			=  0;
+
 /*
  * Symbols
  */
@@ -91,9 +105,237 @@ struct sym_entry {
 	unsigned long		snap_count;
 	double			weight;
 	int			skip;
+	struct source_line	*source;
+	struct source_line	*lines;
+	struct source_line	**lines_tail;
+	pthread_mutex_t		source_lock;
 };
 
-struct sym_entry		*sym_filter_entry;
+/*
+ * Source functions
+ */
+
+static void parse_source(struct sym_entry *syme)
+{
+	struct symbol *sym;
+	struct module *module;
+	struct section *section = NULL;
+	FILE *file;
+	char command[PATH_MAX*2], *path = vmlinux;
+	u64 start, end, len;
+
+	if (!syme)
+		return;
+
+	if (syme->lines) {
+		pthread_mutex_lock(&syme->source_lock);
+		goto out_assign;
+	}
+
+	sym = (struct symbol *)(syme + 1);
+	module = sym->module;
+
+	if (module)
+		path = module->path;
+	if (!path)
+		return;
+
+	start = sym->obj_start;
+	if (!start)
+		start = sym->start;
+
+	if (module) {
+		section = module->sections->find_section(module->sections, ".text");
+		if (section)
+			start -= section->vma;
+	}
+
+	end = start + sym->end - sym->start + 1;
+	len = sym->end - sym->start;
+
+	sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path);
+
+	file = popen(command, "r");
+	if (!file)
+		return;
+
+	pthread_mutex_lock(&syme->source_lock);
+	syme->lines_tail = &syme->lines;
+	while (!feof(file)) {
+		struct source_line *src;
+		size_t dummy = 0;
+		char *c;
+
+		src = malloc(sizeof(struct source_line));
+		assert(src != NULL);
+		memset(src, 0, sizeof(struct source_line));
+
+		if (getline(&src->line, &dummy, file) < 0)
+			break;
+		if (!src->line)
+			break;
+
+		c = strchr(src->line, '\n');
+		if (c)
+			*c = 0;
+
+		src->next = NULL;
+		*syme->lines_tail = src;
+		syme->lines_tail = &src->next;
+
+		if (strlen(src->line)>8 && src->line[8] == ':') {
+			src->eip = strtoull(src->line, NULL, 16);
+			if (section)
+				src->eip += section->vma;
+		}
+		if (strlen(src->line)>8 && src->line[16] == ':') {
+			src->eip = strtoull(src->line, NULL, 16);
+			if (section)
+				src->eip += section->vma;
+		}
+	}
+	pclose(file);
+out_assign:
+	sym_filter_entry = syme;
+	pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void __zero_source_counters(struct sym_entry *syme)
+{
+	int i;
+	struct source_line *line;
+
+	line = syme->lines;
+	while (line) {
+		for (i = 0; i < nr_counters; i++)
+			line->count[i] = 0;
+		line = line->next;
+	}
+}
+
+static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
+{
+	struct source_line *line;
+
+	if (syme != sym_filter_entry)
+		return;
+
+	if (pthread_mutex_trylock(&syme->source_lock))
+		return;
+
+	if (!syme->source)
+		goto out_unlock;
+
+	for (line = syme->lines; line; line = line->next) {
+		if (line->eip == ip) {
+			line->count[counter]++;
+			break;
+		}
+		if (line->eip > ip)
+			break;
+	}
+out_unlock:
+	pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void lookup_sym_source(struct sym_entry *syme)
+{
+	struct symbol *symbol = (struct symbol *)(syme + 1);
+	struct source_line *line;
+	char pattern[PATH_MAX];
+	char *idx;
+
+	sprintf(pattern, "<%s>:", symbol->name);
+
+	if (symbol->module) {
+		idx = strstr(pattern, "\t");
+		if (idx)
+			*idx = 0;
+	}
+
+	pthread_mutex_lock(&syme->source_lock);
+	for (line = syme->lines; line; line = line->next) {
+		if (strstr(line->line, pattern)) {
+			syme->source = line;
+			break;
+		}
+	}
+	pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void show_lines(struct source_line *queue, int count, int total)
+{
+	int i;
+	struct source_line *line;
+
+	line = queue;
+	for (i = 0; i < count; i++) {
+		float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
+
+		printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
+		line = line->next;
+	}
+}
+
+#define TRACE_COUNT     3
+
+static void show_details(struct sym_entry *syme)
+{
+	struct symbol *symbol;
+	struct source_line *line;
+	struct source_line *line_queue = NULL;
+	int displayed = 0;
+	int line_queue_count = 0, total = 0, more = 0;
+
+	if (!syme)
+		return;
+
+	if (!syme->source)
+		lookup_sym_source(syme);
+
+	if (!syme->source)
+		return;
+
+	symbol = (struct symbol *)(syme + 1);
+	printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
+	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);
+
+	pthread_mutex_lock(&syme->source_lock);
+	line = syme->source;
+	while (line) {
+		total += line->count[sym_counter];
+		line = line->next;
+	}
+
+	line = syme->source;
+	while (line) {
+		float pcnt = 0.0;
+
+		if (!line_queue_count)
+			line_queue = line;
+		line_queue_count++;
+
+		if (line->count[sym_counter])
+			pcnt = 100.0 * line->count[sym_counter] / (float)total;
+		if (pcnt >= (float)sym_pcnt_filter) {
+			if (displayed <= print_entries)
+				show_lines(line_queue, line_queue_count, total);
+			else more++;
+			displayed += line_queue_count;
+			line_queue_count = 0;
+			line_queue = NULL;
+		} else if (line_queue_count > TRACE_COUNT) {
+			line_queue = line_queue->next;
+			line_queue_count--;
+		}
+
+		line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
+		line = line->next;
+	}
+	pthread_mutex_unlock(&syme->source_lock);
+	if (more)
+		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
+}
 
 struct dso			*kernel_dso;
 
@@ -228,6 +470,11 @@ static void print_sym_table(void)
 
 	printf("------------------------------------------------------------------------------\n\n");
 
+	if (sym_filter_entry) {
+		show_details(sym_filter_entry);
+		return;
+	}
+
 	if (nr_counters == 1)
 		printf("             samples    pcnt");
 	else
@@ -242,7 +489,7 @@ static void print_sym_table(void)
 		struct symbol *sym = (struct symbol *)(syme + 1);
 		double pcnt;
 
-		if (++printed > print_entries || syme->snap_count < count_filter)
+		if (++printed > print_entries || (int)syme->snap_count < count_filter)
 			continue;
 
 		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
@@ -261,19 +508,208 @@ static void print_sym_table(void)
 	}
 }
 
+static void prompt_integer(int *target, const char *msg)
+{
+	char *buf = malloc(0), *p;
+	size_t dummy = 0;
+	int tmp;
+
+	fprintf(stdout, "\n%s: ", msg);
+	if (getline(&buf, &dummy, stdin) < 0)
+		return;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = 0;
+
+	p = buf;
+	while(*p) {
+		if (!isdigit(*p))
+			goto out_free;
+		p++;
+	}
+	tmp = strtoul(buf, NULL, 10);
+	*target = tmp;
+out_free:
+	free(buf);
+}
+
+static void prompt_percent(int *target, const char *msg)
+{
+	int tmp = 0;
+
+	prompt_integer(&tmp, msg);
+	if (tmp >= 0 && tmp <= 100)
+		*target = tmp;
+}
+
+static void prompt_symbol(struct sym_entry **target, const char *msg)
+{
+	char *buf = malloc(0), *p;
+	struct sym_entry *syme = *target, *n, *found = NULL;
+	size_t dummy = 0;
+
+	/* zero counters of active symbol */
+	if (syme) {
+		pthread_mutex_lock(&syme->source_lock);
+		__zero_source_counters(syme);
+		*target = NULL;
+		pthread_mutex_unlock(&syme->source_lock);
+	}
+
+	fprintf(stdout, "\n%s: ", msg);
+	if (getline(&buf, &dummy, stdin) < 0)
+		goto out_free;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = 0;
+
+	pthread_mutex_lock(&active_symbols_lock);
+	syme = list_entry(active_symbols.next, struct sym_entry, node);
+	pthread_mutex_unlock(&active_symbols_lock);
+
+	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
+		struct symbol *sym = (struct symbol *)(syme + 1);
+
+		if (!strcmp(buf, sym->name)) {
+			found = syme;
+			break;
+		}
+	}
+
+	if (!found) {
+		fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
+		sleep(1);
+		return;
+	} else
+		parse_source(found);
+
+out_free:
+	free(buf);
+}
+
+static void print_known_keys(void)
+{
+	fprintf(stdout, "\nknown keys:\n");
+	fprintf(stdout, "\t[d]     select display delay.\n");
+	fprintf(stdout, "\t[e]     select display entries (lines).\n");
+	fprintf(stdout, "\t[E]     select annotation event counter.\n");
+	fprintf(stdout, "\t[f]     select normal display count filter.\n");
+	fprintf(stdout, "\t[F]     select annotation display count filter (percentage).\n");
+	fprintf(stdout, "\t[qQ]    quit.\n");
+	fprintf(stdout, "\t[s]     select annotation symbol and start annotation.\n");
+	fprintf(stdout, "\t[S]     stop annotation, revert to normal display.\n");
+	fprintf(stdout, "\t[z]     toggle event count zeroing.\n");
+}
+
+static void handle_keypress(int c)
+{
+	int once = 0;
+repeat:
+	switch (c) {
+		case 'd':
+			prompt_integer(&delay_secs, "Enter display delay");
+			break;
+		case 'e':
+			prompt_integer(&print_entries, "Enter display entries (lines)");
+			break;
+		case 'E':
+			if (nr_counters > 1) {
+				int i;
+
+				fprintf(stderr, "\nAvailable events:");
+				for (i = 0; i < nr_counters; i++)
+					fprintf(stderr, "\n\t%d %s", i, event_name(i));
+
+				prompt_integer(&sym_counter, "Enter details event counter");
+
+				if (sym_counter >= nr_counters) {
+					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
+					sym_counter = 0;
+					sleep(1);
+				}
+			} else sym_counter = 0;
+			break;
+		case 'f':
+			prompt_integer(&count_filter, "Enter display event count filter");
+			break;
+		case 'F':
+			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
+			break;
+		case 'q':
+		case 'Q':
+			printf("exiting.\n");
+			exit(0);
+		case 's':
+			prompt_symbol(&sym_filter_entry, "Enter details symbol");
+			break;
+		case 'S':
+			if (!sym_filter_entry)
+				break;
+			else {
+				struct sym_entry *syme = sym_filter_entry;
+
+				pthread_mutex_lock(&syme->source_lock);
+				sym_filter_entry = NULL;
+				__zero_source_counters(syme);
+				pthread_mutex_unlock(&syme->source_lock);
+			}
+			break;
+		case 'z':
+			zero = ~zero;
+			break;
+		default: {
+			struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+			struct termios tc, save;
+
+			if (!once) {
+				print_known_keys();
+				once++;
+			}
+
+			tcgetattr(0, &save);
+			tc = save;
+			tc.c_lflag &= ~(ICANON | ECHO);
+			tc.c_cc[VMIN] = 0;
+			tc.c_cc[VTIME] = 0;
+			tcsetattr(0, TCSANOW, &tc);
+
+			poll(&stdin_poll, 1, -1);
+			c = getc(stdin);
+
+			tcsetattr(0, TCSAFLUSH, &save);
+			goto repeat;
+		}
+	}
+}
+
 static void *display_thread(void *arg __used)
 {
 	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
-	int delay_msecs = delay_secs * 1000;
-
-	printf("PerfTop refresh period: %d seconds\n", delay_secs);
+	struct termios tc, save;
+	int delay_msecs, c;
+
+	tcgetattr(0, &save);
+	tc = save;
+	tc.c_lflag &= ~(ICANON | ECHO);
+	tc.c_cc[VMIN] = 0;
+	tc.c_cc[VTIME] = 0;
+repeat:
+	delay_msecs = delay_secs * 1000;
+	tcsetattr(0, TCSANOW, &tc);
+	/* trash return*/
+	getc(stdin);
 
 	do {
 		print_sym_table();
 	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
 
-	printf("key pressed - exiting.\n");
-	exit(0);
+	c = getc(stdin);
+	tcsetattr(0, TCSAFLUSH, &save);
+
+	handle_keypress(c);
+	goto repeat;
 
 	return NULL;
 }
@@ -293,7 +729,6 @@ static const char *skip_symbols[] = {
 
 static int symbol_filter(struct dso *self, struct symbol *sym)
 {
-	static int filter_match;
 	struct sym_entry *syme;
 	const char *name = sym->name;
 	int i;
@@ -315,6 +750,10 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
 		return 1;
 
 	syme = dso__sym_priv(self, sym);
+	pthread_mutex_init(&syme->source_lock, NULL);
+	if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
+		sym_filter_entry = syme;
+
 	for (i = 0; skip_symbols[i]; i++) {
 		if (!strcmp(skip_symbols[i], name)) {
 			syme->skip = 1;
@@ -322,29 +761,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
 		}
 	}
 
-	if (filter_match == 1) {
-		filter_end = sym->start;
-		filter_match = -1;
-		if (filter_end - filter_start > 10000) {
-			fprintf(stderr,
-				"hm, too large filter symbol <%s> - skipping.\n",
-				sym_filter);
-			fprintf(stderr, "symbol filter start: %016lx\n",
-				filter_start);
-			fprintf(stderr, "                end: %016lx\n",
-				filter_end);
-			filter_end = filter_start = 0;
-			sym_filter = NULL;
-			sleep(1);
-		}
-	}
-
-	if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
-		filter_match = 1;
-		filter_start = sym->start;
-	}
-
-
 	return 0;
 }
 
@@ -380,8 +796,6 @@ static int parse_symbols(void)
 	return -1;
 }
 
-#define TRACE_COUNT     3
-
 /*
  * Binary search in the histogram table and record the hit:
  */
@@ -394,6 +808,7 @@ static void record_ip(u64 ip, int counter)
 
 		if (!syme->skip) {
 			syme->count[counter]++;
+			record_precise_ip(syme, counter, ip);
 			pthread_mutex_lock(&active_symbols_lock);
 			if (list_empty(&syme->node) || !syme->node.next)
 				__list_insert_active_sym(syme);
@@ -690,8 +1105,8 @@ static const struct option options[] = {
 			    "put the counters into a counter group"),
 	OPT_BOOLEAN('i', "inherit", &inherit,
 		    "child tasks inherit counters"),
-	OPT_STRING('s', "sym-filter", &sym_filter, "pattern",
-		    "only display symbols matchig this pattern"),
+	OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
+		    "symbol to annotate - requires -k option"),
 	OPT_BOOLEAN('z', "zero", &zero,
 		    "zero history across updates"),
 	OPT_INTEGER('F', "freq", &freq,
@@ -734,6 +1149,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 		delay_secs = 1;
 
 	parse_symbols();
+	parse_source(sym_filter_entry);
 
 	/*
 	 * Fill in the ones not specifically initialized via -c: