diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 9c4dc30cdc13bba5ed018be4ddaed077cadd4e1a..a9281cca411412be75e12a867b1224af37cbfa45 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -416,6 +416,7 @@ LIB_H += util/thread.h
 LIB_H += util/trace-event.h
 LIB_H += util/probe-finder.h
 LIB_H += util/probe-event.h
+LIB_H += util/pstack.h
 LIB_H += util/cpumap.h
 
 LIB_OBJS += $(OUTPUT)util/abspath.o
@@ -451,6 +452,7 @@ LIB_OBJS += $(OUTPUT)util/callchain.o
 LIB_OBJS += $(OUTPUT)util/values.o
 LIB_OBJS += $(OUTPUT)util/debug.o
 LIB_OBJS += $(OUTPUT)util/map.o
+LIB_OBJS += $(OUTPUT)util/pstack.o
 LIB_OBJS += $(OUTPUT)util/session.o
 LIB_OBJS += $(OUTPUT)util/thread.o
 LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index fd1b786c8f35a510fd3b82cfa3584bcca0a570f5..77bcc9b130f5b3d6120f8c458b70f3c384ff2672 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -365,7 +365,7 @@ static int __cmd_annotate(void)
 		goto out_delete;
 
 	if (dump_trace) {
-		event__print_totals();
+		perf_session__fprintf_nr_events(session, stdout);
 		goto out_delete;
 	}
 
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 3a95a0260a5b7c5e5894ac3e17ca80b43cacf9c1..a6e2fdc7a04e16613087a4c65e70d832ab903505 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -23,9 +23,9 @@ static bool  force;
 static bool show_displacement;
 
 static int hists__add_entry(struct hists *self,
-			    struct addr_location *al, u64 count)
+			    struct addr_location *al, u64 period)
 {
-	if (__hists__add_entry(self, al, NULL, count) != NULL)
+	if (__hists__add_entry(self, al, NULL, period) != NULL)
 		return 0;
 	return -ENOMEM;
 }
@@ -50,11 +50,11 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
 	event__parse_sample(event, session->sample_type, &data);
 
 	if (hists__add_entry(&session->hists, &al, data.period)) {
-		pr_warning("problem incrementing symbol count, skipping event\n");
+		pr_warning("problem incrementing symbol period, skipping event\n");
 		return -1;
 	}
 
-	session->hists.stats.total += data.period;
+	session->hists.stats.total_period += data.period;
 	return 0;
 }
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 04de3387de3ff20b4177b94c9ca3cadef9899a39..68265120ee07f38c47a48bf0ce44fdd5900bc606 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -138,8 +138,14 @@ static int add_event_total(struct perf_session *session,
 	if (!hists)
 		return -ENOMEM;
 
-	hists->stats.total += data->period;
-	session->hists.stats.total += data->period;
+	hists->stats.total_period += data->period;
+	/*
+	 * FIXME: add_event_total should be moved from here to
+	 * perf_session__process_event so that the proper hist is passed to
+	 * the event_op methods.
+	 */
+	hists__inc_nr_events(hists, PERF_RECORD_SAMPLE);
+	session->hists.stats.total_period += data->period;
 	return 0;
 }
 
@@ -182,14 +188,14 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 		return 0;
 
 	if (perf_session__add_hist_entry(session, &al, &data)) {
-		pr_debug("problem incrementing symbol count, skipping event\n");
+		pr_debug("problem incrementing symbol period, skipping event\n");
 		return -1;
 	}
 
 	attr = perf_header__find_attr(data.id, &session->header);
 
 	if (add_event_total(session, &data, attr)) {
-		pr_debug("problem adding event count\n");
+		pr_debug("problem adding event period\n");
 		return -1;
 	}
 
@@ -263,11 +269,25 @@ static struct perf_event_ops event_ops = {
 
 extern volatile int session_done;
 
-static void sig_handler(int sig __attribute__((__unused__)))
+static void sig_handler(int sig __used)
 {
 	session_done = 1;
 }
 
+static size_t hists__fprintf_nr_sample_events(struct hists *self,
+					      const char *evname, FILE *fp)
+{
+	size_t ret;
+	char unit;
+	unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE];
+
+	nr_events = convert_unit(nr_events, &unit);
+	ret = fprintf(fp, "# Events: %lu%c", nr_events, unit);
+	if (evname != NULL)
+		ret += fprintf(fp, " %s", evname);
+	return ret + fprintf(fp, "\n#\n");
+}
+
 static int __cmd_report(void)
 {
 	int ret = -EINVAL;
@@ -293,7 +313,7 @@ static int __cmd_report(void)
 		goto out_delete;
 
 	if (dump_trace) {
-		event__print_totals();
+		perf_session__fprintf_nr_events(session, stdout);
 		goto out_delete;
 	}
 
@@ -313,14 +333,12 @@ static int __cmd_report(void)
 		if (use_browser)
 			hists__browse(hists, help, input_name);
 		else {
-			if (rb_first(&session->hists.entries) ==
+			const char *evname = NULL;
+			if (rb_first(&session->hists.entries) !=
 			    rb_last(&session->hists.entries))
-				fprintf(stdout, "# Samples: %Ld\n#\n",
-					hists->stats.total);
-			else
-				fprintf(stdout, "# Samples: %Ld %s\n#\n",
-					hists->stats.total,
-					__event_name(hists->type, hists->config));
+				evname = __event_name(hists->type, hists->config);
+
+			hists__fprintf_nr_sample_events(hists, evname, stdout);
 
 			hists__fprintf(hists, NULL, false, stdout);
 			fprintf(stdout, "\n\n");
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index aef6ed0e119c86a49d235c39db9c2061f6e9e3fb..be7bc9264710979913e71745d4cf616b9b48c09d 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1641,19 +1641,10 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 	return 0;
 }
 
-static int process_lost_event(event_t *event __used,
-			      struct perf_session *session __used)
-{
-	nr_lost_chunks++;
-	nr_lost_events += event->lost.lost;
-
-	return 0;
-}
-
 static struct perf_event_ops event_ops = {
 	.sample			= process_sample_event,
 	.comm			= event__process_comm,
-	.lost			= process_lost_event,
+	.lost			= event__process_lost,
 	.ordered_samples	= true,
 };
 
@@ -1664,8 +1655,12 @@ static int read_events(void)
 	if (session == NULL)
 		return -ENOMEM;
 
-	if (perf_session__has_traces(session, "record -R"))
+	if (perf_session__has_traces(session, "record -R")) {
 		err = perf_session__process_events(session, &event_ops);
+		nr_events      = session->hists.stats.nr_events[0];
+		nr_lost_events = session->hists.stats.total_lost;
+		nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST];
+	}
 
 	perf_session__delete(session);
 	return err;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 95fcb0517a9d1e3d3f7bd092cbcf2af5b5294362..dddf3f01b5ab3c91db25611ac4a2ed6c5adcf064 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -109,7 +109,7 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 					     data.time, thread->comm);
 	}
 
-	session->hists.stats.total += data.period;
+	session->hists.stats.total_period += data.period;
 	return 0;
 }
 
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index cce006ec8f05651f0fd725ccf5dd66fd0e23ce2e..50771b5813ee6e15a6843a232b108415f77a4a29 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -7,6 +7,23 @@
 #include "strlist.h"
 #include "thread.h"
 
+const char *event__name[] = {
+	[0]			 = "TOTAL",
+	[PERF_RECORD_MMAP]	 = "MMAP",
+	[PERF_RECORD_LOST]	 = "LOST",
+	[PERF_RECORD_COMM]	 = "COMM",
+	[PERF_RECORD_EXIT]	 = "EXIT",
+	[PERF_RECORD_THROTTLE]	 = "THROTTLE",
+	[PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
+	[PERF_RECORD_FORK]	 = "FORK",
+	[PERF_RECORD_READ]	 = "READ",
+	[PERF_RECORD_SAMPLE]	 = "SAMPLE",
+	[PERF_RECORD_HEADER_ATTR]	 = "ATTR",
+	[PERF_RECORD_HEADER_EVENT_TYPE]	 = "EVENT_TYPE",
+	[PERF_RECORD_HEADER_TRACING_DATA]	 = "TRACING_DATA",
+	[PERF_RECORD_HEADER_BUILD_ID]	 = "BUILD_ID",
+};
+
 static pid_t event__synthesize_comm(pid_t pid, int full,
 				    event__handler_t process,
 				    struct perf_session *session)
@@ -368,7 +385,7 @@ int event__process_comm(event_t *self, struct perf_session *session)
 int event__process_lost(event_t *self, struct perf_session *session)
 {
 	dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
-	session->hists.stats.lost += self->lost.lost;
+	session->hists.stats.total_lost += self->lost.lost;
 	return 0;
 }
 
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 48c2cc9dae4f8bf9345a54df1244009fdf52608a..8577085db067bb3d7b6a7530fab877b3e28afe96 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -160,4 +160,6 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
 			     struct addr_location *al, symbol_filter_t filter);
 int event__parse_sample(event_t *event, u64 type, struct sample_data *data);
 
+extern const char *event__name[];
+
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 5dc4f8429eda0bc7e375fa41a2665c03a2d0828f..f75c5f62401c3dda011c4b52f4356507efc78f1a 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -9,21 +9,21 @@ struct callchain_param	callchain_param = {
 	.min_percent = 0.5
 };
 
-static void hist_entry__add_cpumode_count(struct hist_entry *self,
-					  unsigned int cpumode, u64 count)
+static void hist_entry__add_cpumode_period(struct hist_entry *self,
+					   unsigned int cpumode, u64 period)
 {
 	switch (cpumode) {
 	case PERF_RECORD_MISC_KERNEL:
-		self->count_sys += count;
+		self->period_sys += period;
 		break;
 	case PERF_RECORD_MISC_USER:
-		self->count_us += count;
+		self->period_us += period;
 		break;
 	case PERF_RECORD_MISC_GUEST_KERNEL:
-		self->count_guest_sys += count;
+		self->period_guest_sys += period;
 		break;
 	case PERF_RECORD_MISC_GUEST_USER:
-		self->count_guest_us += count;
+		self->period_guest_us += period;
 		break;
 	default:
 		break;
@@ -31,7 +31,7 @@ static void hist_entry__add_cpumode_count(struct hist_entry *self,
 }
 
 /*
- * histogram, sorted on item, collects counts
+ * histogram, sorted on item, collects periods
  */
 
 static struct hist_entry *hist_entry__new(struct hist_entry *template)
@@ -41,6 +41,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
 
 	if (self != NULL) {
 		*self = *template;
+		self->nr_events = 1;
 		if (symbol_conf.use_callchain)
 			callchain_init(self->callchain);
 	}
@@ -57,7 +58,7 @@ static void hists__inc_nr_entries(struct hists *self, struct hist_entry *entry)
 
 struct hist_entry *__hists__add_entry(struct hists *self,
 				      struct addr_location *al,
-				      struct symbol *sym_parent, u64 count)
+				      struct symbol *sym_parent, u64 period)
 {
 	struct rb_node **p = &self->entries.rb_node;
 	struct rb_node *parent = NULL;
@@ -70,7 +71,7 @@ struct hist_entry *__hists__add_entry(struct hists *self,
 		},
 		.ip	= al->addr,
 		.level	= al->level,
-		.count	= count,
+		.period	= period,
 		.parent = sym_parent,
 	};
 	int cmp;
@@ -82,7 +83,8 @@ struct hist_entry *__hists__add_entry(struct hists *self,
 		cmp = hist_entry__cmp(&entry, he);
 
 		if (!cmp) {
-			he->count += count;
+			he->period += period;
+			++he->nr_events;
 			goto out;
 		}
 
@@ -99,7 +101,7 @@ struct hist_entry *__hists__add_entry(struct hists *self,
 	rb_insert_color(&he->rb_node, &self->entries);
 	hists__inc_nr_entries(self, he);
 out:
-	hist_entry__add_cpumode_count(he, al->cpumode, count);
+	hist_entry__add_cpumode_period(he, al->cpumode, period);
 	return he;
 }
 
@@ -160,7 +162,7 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
 		cmp = hist_entry__collapse(iter, he);
 
 		if (!cmp) {
-			iter->count += he->count;
+			iter->period += he->period;
 			hist_entry__free(he);
 			return false;
 		}
@@ -203,7 +205,7 @@ void hists__collapse_resort(struct hists *self)
 }
 
 /*
- * reverse the map, sort on count.
+ * reverse the map, sort on period.
  */
 
 static void __hists__insert_output_entry(struct rb_root *entries,
@@ -222,7 +224,7 @@ static void __hists__insert_output_entry(struct rb_root *entries,
 		parent = *p;
 		iter = rb_entry(parent, struct hist_entry, rb_node);
 
-		if (he->count > iter->count)
+		if (he->period > iter->period)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
@@ -239,7 +241,7 @@ void hists__output_resort(struct hists *self)
 	struct hist_entry *n;
 	u64 min_callchain_hits;
 
-	min_callchain_hits = self->stats.total * (callchain_param.min_percent / 100);
+	min_callchain_hits = self->stats.total_period * (callchain_param.min_percent / 100);
 
 	tmp = RB_ROOT;
 	next = rb_first(&self->entries);
@@ -288,7 +290,7 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
 }
 
 static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
-				     int depth, int depth_mask, int count,
+				     int depth, int depth_mask, int period,
 				     u64 total_samples, int hits,
 				     int left_margin)
 {
@@ -301,7 +303,7 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
 			ret += fprintf(fp, "|");
 		else
 			ret += fprintf(fp, " ");
-		if (!count && i == depth - 1) {
+		if (!period && i == depth - 1) {
 			double percent;
 
 			percent = hits * 100.0 / total_samples;
@@ -516,7 +518,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
 			 long displacement, bool color, u64 session_total)
 {
 	struct sort_entry *se;
-	u64 count, total, count_sys, count_us, count_guest_sys, count_guest_us;
+	u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
 	const char *sep = symbol_conf.field_sep;
 	int ret;
 
@@ -524,57 +526,57 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
 		return 0;
 
 	if (pair_hists) {
-		count = self->pair ? self->pair->count : 0;
-		total = pair_hists->stats.total;
-		count_sys = self->pair ? self->pair->count_sys : 0;
-		count_us = self->pair ? self->pair->count_us : 0;
-		count_guest_sys = self->pair ? self->pair->count_guest_sys : 0;
-		count_guest_us = self->pair ? self->pair->count_guest_us : 0;
+		period = self->pair ? self->pair->period : 0;
+		total = pair_hists->stats.total_period;
+		period_sys = self->pair ? self->pair->period_sys : 0;
+		period_us = self->pair ? self->pair->period_us : 0;
+		period_guest_sys = self->pair ? self->pair->period_guest_sys : 0;
+		period_guest_us = self->pair ? self->pair->period_guest_us : 0;
 	} else {
-		count = self->count;
+		period = self->period;
 		total = session_total;
-		count_sys = self->count_sys;
-		count_us = self->count_us;
-		count_guest_sys = self->count_guest_sys;
-		count_guest_us = self->count_guest_us;
+		period_sys = self->period_sys;
+		period_us = self->period_us;
+		period_guest_sys = self->period_guest_sys;
+		period_guest_us = self->period_guest_us;
 	}
 
 	if (total) {
 		if (color)
 			ret = percent_color_snprintf(s, size,
 						     sep ? "%.2f" : "   %6.2f%%",
-						     (count * 100.0) / total);
+						     (period * 100.0) / total);
 		else
 			ret = snprintf(s, size, sep ? "%.2f" : "   %6.2f%%",
-				       (count * 100.0) / total);
+				       (period * 100.0) / total);
 		if (symbol_conf.show_cpu_utilization) {
 			ret += percent_color_snprintf(s + ret, size - ret,
 					sep ? "%.2f" : "   %6.2f%%",
-					(count_sys * 100.0) / total);
+					(period_sys * 100.0) / total);
 			ret += percent_color_snprintf(s + ret, size - ret,
 					sep ? "%.2f" : "   %6.2f%%",
-					(count_us * 100.0) / total);
+					(period_us * 100.0) / total);
 			if (perf_guest) {
 				ret += percent_color_snprintf(s + ret,
 						size - ret,
 						sep ? "%.2f" : "   %6.2f%%",
-						(count_guest_sys * 100.0) /
+						(period_guest_sys * 100.0) /
 								total);
 				ret += percent_color_snprintf(s + ret,
 						size - ret,
 						sep ? "%.2f" : "   %6.2f%%",
-						(count_guest_us * 100.0) /
+						(period_guest_us * 100.0) /
 								total);
 			}
 		}
 	} else
-		ret = snprintf(s, size, sep ? "%lld" : "%12lld ", count);
+		ret = snprintf(s, size, sep ? "%lld" : "%12lld ", period);
 
 	if (symbol_conf.show_nr_samples) {
 		if (sep)
-			ret += snprintf(s + ret, size - ret, "%c%lld", *sep, count);
+			ret += snprintf(s + ret, size - ret, "%c%lld", *sep, period);
 		else
-			ret += snprintf(s + ret, size - ret, "%11lld", count);
+			ret += snprintf(s + ret, size - ret, "%11lld", period);
 	}
 
 	if (pair_hists) {
@@ -582,9 +584,9 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
 		double old_percent = 0, new_percent = 0, diff;
 
 		if (total > 0)
-			old_percent = (count * 100.0) / total;
+			old_percent = (period * 100.0) / total;
 		if (session_total > 0)
-			new_percent = (self->count * 100.0) / session_total;
+			new_percent = (self->period * 100.0) / session_total;
 
 		diff = new_percent - old_percent;
 
@@ -769,10 +771,10 @@ size_t hists__fprintf(struct hists *self, struct hists *pair,
 			++position;
 		}
 		ret += hist_entry__fprintf(h, pair, show_displacement,
-					   displacement, fp, self->stats.total);
+					   displacement, fp, self->stats.total_period);
 
 		if (symbol_conf.use_callchain)
-			ret += hist_entry__fprintf_callchain(h, fp, self->stats.total);
+			ret += hist_entry__fprintf_callchain(h, fp, self->stats.total_period);
 
 		if (h->ms.map == NULL && verbose > 1) {
 			__map_groups__fprintf_maps(&h->thread->mg,
@@ -795,7 +797,8 @@ void hists__filter_by_dso(struct hists *self, const struct dso *dso)
 {
 	struct rb_node *nd;
 
-	self->nr_entries = self->stats.total = 0;
+	self->nr_entries = self->stats.total_period = 0;
+	self->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
 	self->max_sym_namelen = 0;
 
 	for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
@@ -812,7 +815,8 @@ void hists__filter_by_dso(struct hists *self, const struct dso *dso)
 		h->filtered &= ~(1 << HIST_FILTER__DSO);
 		if (!h->filtered) {
 			++self->nr_entries;
-			self->stats.total += h->count;
+			self->stats.total_period += h->period;
+			self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events;
 			if (h->ms.sym &&
 			    self->max_sym_namelen < h->ms.sym->namelen)
 				self->max_sym_namelen = h->ms.sym->namelen;
@@ -824,7 +828,8 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread)
 {
 	struct rb_node *nd;
 
-	self->nr_entries = self->stats.total = 0;
+	self->nr_entries = self->stats.total_period = 0;
+	self->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
 	self->max_sym_namelen = 0;
 
 	for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
@@ -837,7 +842,8 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread)
 		h->filtered &= ~(1 << HIST_FILTER__THREAD);
 		if (!h->filtered) {
 			++self->nr_entries;
-			self->stats.total += h->count;
+			self->stats.total_period += h->period;
+			self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events;
 			if (h->ms.sym &&
 			    self->max_sym_namelen < h->ms.sym->namelen)
 				self->max_sym_namelen = h->ms.sym->namelen;
@@ -881,7 +887,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip)
 	h->sum++;
 	h->ip[offset]++;
 
-	pr_debug3("%#Lx %s: count++ [ip: %#Lx, %#Lx] => %Ld\n", self->ms.sym->start,
+	pr_debug3("%#Lx %s: period++ [ip: %#Lx, %#Lx] => %Ld\n", self->ms.sym->start,
 		  self->ms.sym->name, ip, ip - self->ms.sym->start, h->ip[offset]);
 	return 0;
 }
@@ -1028,3 +1034,24 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head)
 	pclose(file);
 	return 0;
 }
+
+void hists__inc_nr_events(struct hists *self, u32 type)
+{
+	++self->stats.nr_events[0];
+	++self->stats.nr_events[type];
+}
+
+size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
+{
+	int i;
+	size_t ret = 0;
+
+	for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
+		if (!event__name[i])
+			continue;
+		ret += fprintf(fp, "%10s events: %10d\n",
+			       event__name[i], self->stats.nr_events[i]);
+	}
+
+	return ret;
+}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 0b4c8df914bd0e5e67509ef34481236a50b2d045..6f17dcd8412c6c6e0894aa4b078f7982b10cd69a 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -37,9 +37,23 @@ struct sym_priv {
 	struct sym_ext	*ext;
 };
 
+/*
+ * The kernel collects the number of events it couldn't send in a stretch and
+ * when possible sends this number in a PERF_RECORD_LOST event. The number of
+ * such "chunks" of lost events is stored in .nr_events[PERF_EVENT_LOST] while
+ * total_lost tells exactly how many events the kernel in fact lost, i.e. it is
+ * the sum of all struct lost_event.lost fields reported.
+ *
+ * The total_period is needed because by default auto-freq is used, so
+ * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
+ * the total number of low level events, it is necessary to to sum all struct
+ * sample_event.period and stash the result in total_period.
+ */
 struct events_stats {
-	u64 total;
-	u64 lost;
+	u64 total_period;
+	u64 total_lost;
+	u32 nr_events[PERF_RECORD_HEADER_MAX];
+	u32 nr_unknown_events;
 };
 
 struct hists {
@@ -55,7 +69,7 @@ struct hists {
 
 struct hist_entry *__hists__add_entry(struct hists *self,
 				      struct addr_location *al,
-				      struct symbol *parent, u64 count);
+				      struct symbol *parent, u64 period);
 extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
 extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
 int hist_entry__fprintf(struct hist_entry *self, struct hists *pair_hists,
@@ -68,6 +82,10 @@ void hist_entry__free(struct hist_entry *);
 
 void hists__output_resort(struct hists *self);
 void hists__collapse_resort(struct hists *self);
+
+void hists__inc_nr_events(struct hists *self, u32 type);
+size_t hists__fprintf_nr_events(struct hists *self, FILE *fp);
+
 size_t hists__fprintf(struct hists *self, struct hists *pair,
 		      bool show_displacement, FILE *fp);
 
diff --git a/tools/perf/util/newt.c b/tools/perf/util/newt.c
index ba6acd04c082fac6be1feff82a210a10ad94cf84..e74df1240ef64c0525d532e2fadb48890d412095 100644
--- a/tools/perf/util/newt.c
+++ b/tools/perf/util/newt.c
@@ -9,6 +9,7 @@
 
 #include "cache.h"
 #include "hist.h"
+#include "pstack.h"
 #include "session.h"
 #include "sort.h"
 #include "symbol.h"
@@ -680,16 +681,18 @@ static int hist_browser__populate(struct hist_browser *self, struct hists *hists
 	struct ui_progress *progress;
 	struct rb_node *nd;
 	u64 curr_hist = 0;
-	char seq[] = ".";
+	char seq[] = ".", unit;
 	char str[256];
+	unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE];
 
 	if (self->form) {
 		newtFormDestroy(self->form);
 		newtPopWindow();
 	}
 
-	snprintf(str, sizeof(str), "Samples: %Ld                            ",
-		 hists->stats.total);
+	nr_events = convert_unit(nr_events, &unit);
+	snprintf(str, sizeof(str), "Events: %lu%c                            ",
+		 nr_events, unit);
 	newtDrawRootText(0, 0, str);
 
 	newtGetScreenSize(NULL, &rows);
@@ -718,12 +721,12 @@ static int hist_browser__populate(struct hist_browser *self, struct hists *hists
 		if (h->filtered)
 			continue;
 
-		len = hist_entry__append_browser(h, self->tree, hists->stats.total);
+		len = hist_entry__append_browser(h, self->tree, hists->stats.total_period);
 		if (len > max_len)
 			max_len = len;
 		if (symbol_conf.use_callchain)
 			hist_entry__append_callchain_browser(h, self->tree,
-							     hists->stats.total, idx++);
+							     hists->stats.total_period, idx++);
 		++curr_hist;
 		if (curr_hist % 5)
 			ui_progress__update(progress, curr_hist);
@@ -748,6 +751,7 @@ static int hist_browser__populate(struct hist_browser *self, struct hists *hists
 	newtFormAddHotKey(self->form, 'A');
 	newtFormAddHotKey(self->form, 'a');
 	newtFormAddHotKey(self->form, NEWT_KEY_RIGHT);
+	newtFormAddHotKey(self->form, NEWT_KEY_LEFT);
 	newtFormAddComponents(self->form, self->tree, NULL);
 	self->selection = newt__symbol_tree_get_current(self->tree);
 
@@ -799,6 +803,7 @@ static int hist_browser__title(char *bf, size_t size, const char *input_name,
 int hists__browse(struct hists *self, const char *helpline, const char *input_name)
 {
 	struct hist_browser *browser = hist_browser__new();
+	struct pstack *fstack = pstack__new(2);
 	const struct thread *thread_filter = NULL;
 	const struct dso *dso_filter = NULL;
 	struct newtExitStruct es;
@@ -808,12 +813,16 @@ int hists__browse(struct hists *self, const char *helpline, const char *input_na
 	if (browser == NULL)
 		return -1;
 
+	fstack = pstack__new(2);
+	if (fstack == NULL)
+		goto out;
+
 	ui_helpline__push(helpline);
 
 	hist_browser__title(msg, sizeof(msg), input_name,
 			    dso_filter, thread_filter);
 	if (hist_browser__populate(browser, self, msg) < 0)
-		goto out;
+		goto out_free_stack;
 
 	while (1) {
 		const struct thread *thread;
@@ -834,6 +843,19 @@ int hists__browse(struct hists *self, const char *helpline, const char *input_na
 				else
 					continue;
 			}
+
+			if (es.u.key == NEWT_KEY_LEFT) {
+				const void *top;
+
+				if (pstack__empty(fstack))
+					continue;
+				top = pstack__pop(fstack);
+				if (top == &dso_filter)
+					goto zoom_out_dso;
+				if (top == &thread_filter)
+					goto zoom_out_thread;
+				continue;
+			}
 		}
 
 		if (browser->selection->sym != NULL &&
@@ -886,12 +908,15 @@ int hists__browse(struct hists *self, const char *helpline, const char *input_na
 			hist_entry__annotate_browser(he);
 		} else if (choice == zoom_dso) {
 			if (dso_filter) {
+				pstack__remove(fstack, &dso_filter);
+zoom_out_dso:
 				ui_helpline__pop();
 				dso_filter = NULL;
 			} else {
-				ui_helpline__fpush("To zoom out press -> + \"Zoom out of %s DSO\"",
+				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
 						   dso->kernel ? "the Kernel" : dso->short_name);
 				dso_filter = dso;
+				pstack__push(fstack, &dso_filter);
 			}
 			hists__filter_by_dso(self, dso_filter);
 			hist_browser__title(msg, sizeof(msg), input_name,
@@ -900,13 +925,16 @@ int hists__browse(struct hists *self, const char *helpline, const char *input_na
 				goto out;
 		} else if (choice == zoom_thread) {
 			if (thread_filter) {
+				pstack__remove(fstack, &thread_filter);
+zoom_out_thread:
 				ui_helpline__pop();
 				thread_filter = NULL;
 			} else {
-				ui_helpline__fpush("To zoom out press -> + \"Zoom out of %s(%d) thread\"",
+				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
 						   thread->comm_set ? thread->comm : "",
 						   thread->pid);
 				thread_filter = thread;
+				pstack__push(fstack, &thread_filter);
 			}
 			hists__filter_by_thread(self, thread_filter);
 			hist_browser__title(msg, sizeof(msg), input_name,
@@ -916,6 +944,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *input_na
 		}
 	}
 	err = 0;
+out_free_stack:
+	pstack__delete(fstack);
 out:
 	hist_browser__delete(browser);
 	return err;
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
new file mode 100644
index 0000000000000000000000000000000000000000..13d36faf64eb4da3e2bed21813f0d791550f5053
--- /dev/null
+++ b/tools/perf/util/pstack.c
@@ -0,0 +1,75 @@
+/*
+ * Simple pointer stack
+ *
+ * (c) 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "util.h"
+#include "pstack.h"
+#include <linux/kernel.h>
+#include <stdlib.h>
+
+struct pstack {
+	unsigned short	top;
+	unsigned short	max_nr_entries;
+	void		*entries[0];
+};
+
+struct pstack *pstack__new(unsigned short max_nr_entries)
+{
+	struct pstack *self = zalloc((sizeof(*self) +
+				     max_nr_entries * sizeof(void *)));
+	if (self != NULL)
+		self->max_nr_entries = max_nr_entries;
+	return self;
+}
+
+void pstack__delete(struct pstack *self)
+{
+	free(self);
+}
+
+bool pstack__empty(const struct pstack *self)
+{
+	return self->top == 0;
+}
+
+void pstack__remove(struct pstack *self, void *key)
+{
+	unsigned short i = self->top, last_index = self->top - 1;
+
+	while (i-- != 0) {
+		if (self->entries[i] == key) {
+			if (i < last_index)
+				memmove(self->entries + i,
+					self->entries + i + 1,
+					(last_index - i) * sizeof(void *));
+			--self->top;
+			return;
+		}
+	}
+	pr_err("%s: %p not on the pstack!\n", __func__, key);
+}
+
+void pstack__push(struct pstack *self, void *key)
+{
+	if (self->top == self->max_nr_entries) {
+		pr_err("%s: top=%d, overflow!\n", __func__, self->top);
+		return;
+	}
+	self->entries[self->top++] = key;
+}
+
+void *pstack__pop(struct pstack *self)
+{
+	void *ret;
+
+	if (self->top == 0) {
+		pr_err("%s: underflow!\n", __func__);
+		return NULL;
+	}
+
+	ret = self->entries[--self->top];
+	self->entries[self->top] = NULL;
+	return ret;
+}
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
new file mode 100644
index 0000000000000000000000000000000000000000..5ad07023504bb834e6334bbc5628e269faaa21ce
--- /dev/null
+++ b/tools/perf/util/pstack.h
@@ -0,0 +1,12 @@
+#ifndef _PERF_PSTACK_
+#define _PERF_PSTACK_
+
+struct pstack;
+struct pstack *pstack__new(unsigned short max_nr_entries);
+void pstack__delete(struct pstack *self);
+bool pstack__empty(const struct pstack *self);
+void pstack__remove(struct pstack *self, void *key);
+void pstack__push(struct pstack *self, void *key);
+void *pstack__pop(struct pstack *self);
+
+#endif /* _PERF_PSTACK_ */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 72a7f6ae0293e707fadd8a2a3e28a0df197a6c3d..25bfca4f10f0ff98a2ef06d90f1362424547e8d3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -94,7 +94,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
 	self->mmap_window = 32;
 	self->cwd = NULL;
 	self->cwdlen = 0;
-	self->unknown_events = 0;
 	self->machines = RB_ROOT;
 	self->repipe = repipe;
 	INIT_LIST_HEAD(&self->ordered_samples.samples_head);
@@ -241,36 +240,6 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
 	}
 }
 
-static const char *event__name[] = {
-	[0]			 = "TOTAL",
-	[PERF_RECORD_MMAP]	 = "MMAP",
-	[PERF_RECORD_LOST]	 = "LOST",
-	[PERF_RECORD_COMM]	 = "COMM",
-	[PERF_RECORD_EXIT]	 = "EXIT",
-	[PERF_RECORD_THROTTLE]	 = "THROTTLE",
-	[PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
-	[PERF_RECORD_FORK]	 = "FORK",
-	[PERF_RECORD_READ]	 = "READ",
-	[PERF_RECORD_SAMPLE]	 = "SAMPLE",
-	[PERF_RECORD_HEADER_ATTR]	 = "ATTR",
-	[PERF_RECORD_HEADER_EVENT_TYPE]	 = "EVENT_TYPE",
-	[PERF_RECORD_HEADER_TRACING_DATA]	 = "TRACING_DATA",
-	[PERF_RECORD_HEADER_BUILD_ID]	 = "BUILD_ID",
-};
-
-unsigned long event__total[PERF_RECORD_HEADER_MAX];
-
-void event__print_totals(void)
-{
-	int i;
-	for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
-		if (!event__name[i])
-			continue;
-		pr_info("%10s events: %10ld\n",
-			event__name[i], event__total[i]);
-	}
-}
-
 void mem_bswap_64(void *src, int byte_size)
 {
 	u64 *m = src;
@@ -580,8 +549,7 @@ static int perf_session__process_event(struct perf_session *self,
 		dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
 			    offset + head, event->header.size,
 			    event__name[event->header.type]);
-		++event__total[0];
-		++event__total[event->header.type];
+		hists__inc_nr_events(&self->hists, event->header.type);
 	}
 
 	if (self->header.needs_swap && event__swap_ops[event->header.type])
@@ -619,7 +587,7 @@ static int perf_session__process_event(struct perf_session *self,
 	case PERF_RECORD_FINISHED_ROUND:
 		return ops->finished_round(event, self, ops);
 	default:
-		self->unknown_events++;
+		++self->hists.stats.nr_unknown_events;
 		return -1;
 	}
 }
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index ce00fa6cdeda1e97ddf86c46270444659a8d1341..e7fce486ebe23a299d0a3b5a5240de0795a99ebc 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -30,8 +30,6 @@ struct perf_session {
 	struct machine		host_machine;
 	struct rb_root		machines;
 	struct rb_root		hists_tree;
-	unsigned long		event_total[PERF_RECORD_MAX];
-	unsigned long		unknown_events;
 	/*
 	 * FIXME: should point to the first entry in hists_tree and
 	 *        be a hists instance. Right now its only 'report'
@@ -140,4 +138,10 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
 {
 	return machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
 }
+
+static inline
+size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp)
+{
+	return hists__fprintf_nr_events(&self->hists, fp);
+}
 #endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index af301acc461ce16715b2f2fc5d696fda91ad8e20..eab2e0b3b74ec3dd965938012867fbab085ec1a0 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -43,14 +43,15 @@ extern enum sort_type sort__first_dimension;
 
 struct hist_entry {
 	struct rb_node		rb_node;
-	u64			count;
-	u64			count_sys;
-	u64			count_us;
-	u64			count_guest_sys;
-	u64			count_guest_us;
+	u64			period;
+	u64			period_sys;
+	u64			period_us;
+	u64			period_guest_sys;
+	u64			period_guest_us;
 	struct map_symbol	ms;
 	struct thread		*thread;
 	u64			ip;
+	u32			nr_events;
 	char			level;
 	u8			filtered;
 	struct symbol		*parent;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index f9b890fde681931eaaa8089705cf06f7b90f6ac1..214265674ddda0a59488cc0871a0943b20fc6bd4 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -92,3 +92,25 @@ int copyfile(const char *from, const char *to)
 out:
 	return err;
 }
+
+unsigned long convert_unit(unsigned long value, char *unit)
+{
+	*unit = ' ';
+
+	if (value > 1000) {
+		value /= 1000;
+		*unit = 'K';
+	}
+
+	if (value > 1000) {
+		value /= 1000;
+		*unit = 'M';
+	}
+
+	if (value > 1000) {
+		value /= 1000;
+		*unit = 'G';
+	}
+
+	return value;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index fbf45d1b26f701a6f94f6ce561a166f346039cec..0795bf304b19495b0b7f88ca366fcace7302f1fa 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -423,6 +423,7 @@ char **argv_split(const char *str, int *argcp);
 void argv_free(char **argv);
 bool strglobmatch(const char *str, const char *pat);
 bool strlazymatch(const char *str, const char *pat);
+unsigned long convert_unit(unsigned long value, char *unit);
 
 #define _STR(x) #x
 #define STR(x) _STR(x)