diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 4212ed62235b46bc687929cdc38f1002a452b6c2..8143536b462a54a9d633e455615085b8a842aa01 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -110,3 +110,59 @@ int bpf_map_update_elem(int fd, void *key, void *value,
 
 	return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
 }
+
+int bpf_map_lookup_elem(int fd, void *key, void *value)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+
+	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_delete_elem(int fd, void *key)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+
+	return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_get_next_key(int fd, void *key, void *next_key)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.next_key = ptr_to_u64(next_key);
+
+	return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+}
+
+int bpf_obj_pin(int fd, const char *pathname)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.pathname = ptr_to_u64((void *)pathname);
+	attr.bpf_fd = fd;
+
+	return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.pathname = ptr_to_u64((void *)pathname);
+
+	return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index e8ba540874972c4f0c02204a6f28b00782b7673e..253c3dbb06b420a39558512f89b80ce81a719287 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -35,4 +35,11 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
 
 int bpf_map_update_elem(int fd, void *key, void *value,
 			u64 flags);
+
+int bpf_map_lookup_elem(int fd, void *key, void *value);
+int bpf_map_delete_elem(int fd, void *key);
+int bpf_map_get_next_key(int fd, void *key, void *next_key);
+int bpf_obj_pin(int fd, const char *pathname);
+int bpf_obj_get(const char *pathname);
+
 #endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 96a2b2ff1212e75fa99b86821cbeb23d24c2c5c7..2e974593f3e8dc2b14b47d61b9354b31f7c8c0c5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -229,6 +229,10 @@ struct bpf_object {
 	 * all objects.
 	 */
 	struct list_head list;
+
+	void *priv;
+	bpf_object_clear_priv_t clear_priv;
+
 	char path[];
 };
 #define obj_elf_valid(o)	((o)->efile.elf)
@@ -1229,6 +1233,9 @@ void bpf_object__close(struct bpf_object *obj)
 	if (!obj)
 		return;
 
+	if (obj->clear_priv)
+		obj->clear_priv(obj, obj->priv);
+
 	bpf_object__elf_finish(obj);
 	bpf_object__unload(obj);
 
@@ -1282,6 +1289,22 @@ unsigned int bpf_object__kversion(struct bpf_object *obj)
 	return obj ? obj->kern_version : 0;
 }
 
+int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+			 bpf_object_clear_priv_t clear_priv)
+{
+	if (obj->priv && obj->clear_priv)
+		obj->clear_priv(obj, obj->priv);
+
+	obj->priv = priv;
+	obj->clear_priv = clear_priv;
+	return 0;
+}
+
+void *bpf_object__priv(struct bpf_object *obj)
+{
+	return obj ? obj->priv : ERR_PTR(-EINVAL);
+}
+
 struct bpf_program *
 bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
 {
@@ -1501,3 +1524,15 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
 	}
 	return NULL;
 }
+
+struct bpf_map *
+bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
+{
+	int i;
+
+	for (i = 0; i < obj->nr_maps; i++) {
+		if (obj->maps[i].offset == offset)
+			return &obj->maps[i];
+	}
+	return ERR_PTR(-ENOENT);
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index dd7a513efb10e6337d962fa50f554ad73831ba67..a5a8b86a06fe0794623a3f9ac3fb3bf719553ba9 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -24,6 +24,7 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <linux/err.h>
+#include <sys/types.h>  // for size_t
 
 enum libbpf_errno {
 	__LIBBPF_ERRNO__START = 4000,
@@ -79,6 +80,11 @@ struct bpf_object *bpf_object__next(struct bpf_object *prev);
 	     (pos) != NULL;				\
 	     (pos) = (tmp), (tmp) = bpf_object__next(tmp))
 
+typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
+int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+			 bpf_object_clear_priv_t clear_priv);
+void *bpf_object__priv(struct bpf_object *prog);
+
 /* Accessors of bpf_program. */
 struct bpf_program;
 struct bpf_program *bpf_program__next(struct bpf_program *prog,
@@ -195,6 +201,13 @@ struct bpf_map;
 struct bpf_map *
 bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
 
+/*
+ * Get bpf_map through the offset of corresponding struct bpf_map_def
+ * in the bpf object file.
+ */
+struct bpf_map *
+bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
+
 struct bpf_map *
 bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
 #define bpf_map__for_each(pos, obj)		\
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index ff0f433b3fce1ee402a375b62703d7034c468e34..479fc3261a5035ea3e9a6c6d0898c371355c72bb 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -61,6 +61,13 @@ OPTIONS
 	default, but this option shows live (currently allocated) pages
 	instead.  (This option works with --page option only)
 
+--time::
+	Only analyze samples within given time window: <start>,<stop>. Times
+	have the format seconds.microseconds. If start is not given (i.e., time
+	string is ',x.y') then analysis starts at the beginning of the file. If
+	stop time is not given (i.e, time string is 'x.y,') then analysis goes
+	to end of file.
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 2d1746295abf1206b5feed03ef8ad344aea6ff59..3a166ae4a4d318d10122acfc3b3499514d3286ca 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -382,6 +382,13 @@ OPTIONS
 --header-only::
 	Show only perf.data header (forces --stdio).
 
+--time::
+	Only analyze samples within given time window: <start>,<stop>. Times
+	have the format seconds.microseconds. If start is not given (i.e., time
+	string is ',x.y') then analysis starts at the beginning of the file. If
+	stop time is not given (i.e, time string is 'x.y,') then analysis goes
+	to end of file.
+
 --itrace::
 	Options for decoding instruction tracing data. The options are:
 
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index fb9e52d65fca2b0f4179aaedc1a5030ed939be5c..7775b1eb2bee6bd092fe854b63b12150aa50699f 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -128,6 +128,18 @@ OPTIONS for 'perf sched timehist'
 --wakeups::
 	Show wakeup events.
 
+-M::
+--migrations::
+	Show migration events.
+
+--time::
+	Only analyze samples within given time window: <start>,<stop>. Times
+	have the format seconds.microseconds. If start is not given (i.e., time
+	string is ',x.y') then analysis starts at the beginning of the file. If
+	stop time is not given (i.e, time string is 'x.y,') then analysis goes
+	to end of file.
+
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index c01904f388ce807cb4acefc607df9314a56fcbb4..5dc5c6a09ac4f4113fd88f56f16c8713ff2e9646 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -212,6 +212,9 @@ OPTIONS
 --hide-call-graph::
         When printing symbols do not display call chain.
 
+--stop-bt::
+        Stop display of callgraph at these symbols
+
 -C::
 --cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
 	be provided as a comma-separated list with no space: 0,1. Ranges of
@@ -289,6 +292,13 @@ include::itrace.txt[]
 --force::
 	Don't do ownership validation.
 
+--time::
+	Only analyze samples within given time window: <start>,<stop>. Times
+	have the format seconds.microseconds. If start is not given (i.e., time
+	string is ',x.y') then analysis starts at the beginning of the file. If
+	stop time is not given (i.e, time string is 'x.y,') then analysis goes
+	to end of file.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
new file mode 100644
index 0000000000000000000000000000000000000000..44eafd6f2d5008e62aa68b21a2b415d1db736a49
--- /dev/null
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -0,0 +1,62 @@
+#include <sys/types.h>
+#include <regex.h>
+
+struct arm64_annotate {
+	regex_t call_insn,
+		jump_insn;
+};
+
+static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const char *name)
+{
+	struct arm64_annotate *arm = arch->priv;
+	struct ins_ops *ops;
+	regmatch_t match[2];
+
+	if (!regexec(&arm->jump_insn, name, 2, match, 0))
+		ops = &jump_ops;
+	else if (!regexec(&arm->call_insn, name, 2, match, 0))
+		ops = &call_ops;
+	else if (!strcmp(name, "ret"))
+		ops = &ret_ops;
+	else
+		return NULL;
+
+	arch__associate_ins_ops(arch, name, ops);
+	return ops;
+}
+
+static int arm64__annotate_init(struct arch *arch)
+{
+	struct arm64_annotate *arm;
+	int err;
+
+	if (arch->initialized)
+		return 0;
+
+	arm = zalloc(sizeof(*arm));
+	if (!arm)
+		return -1;
+
+	/* bl, blr */
+	err = regcomp(&arm->call_insn, "^blr?$", REG_EXTENDED);
+	if (err)
+		goto out_free_arm;
+	/* b, b.cond, br, cbz/cbnz, tbz/tbnz */
+	err = regcomp(&arm->jump_insn, "^[ct]?br?\\.?(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl)?n?z?$",
+		      REG_EXTENDED);
+	if (err)
+		goto out_free_call;
+
+	arch->initialized = true;
+	arch->priv	  = arm;
+	arch->associate_instruction_ops   = arm64__associate_instruction_ops;
+	arch->objdump.comment_char	  = ';';
+	arch->objdump.skip_functions_char = '+';
+	return 0;
+
+out_free_call:
+	regfree(&arm->call_insn);
+out_free_arm:
+	free(arm);
+	return -1;
+}
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 2218cb64f8409c4b7f487f846f766b11dadcf722..99d66191e56ccf8df7ae4f2a432f6f74782dd57d 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -4,27 +4,27 @@
 
 struct test arch_tests[] = {
 	{
-		.desc = "x86 rdpmc test",
+		.desc = "x86 rdpmc",
 		.func = test__rdpmc,
 	},
 	{
-		.desc = "Test converting perf time to TSC",
+		.desc = "Convert perf time to TSC",
 		.func = test__perf_time_to_tsc,
 	},
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 	{
-		.desc = "Test dwarf unwind",
+		.desc = "DWARF unwind",
 		.func = test__dwarf_unwind,
 	},
 #endif
 #ifdef HAVE_AUXTRACE_SUPPORT
 	{
-		.desc = "Test x86 instruction decoder - new instructions",
+		.desc = "x86 instruction decoder - new instructions",
 		.func = test__insn_x86,
 	},
 #endif
 	{
-		.desc = "Test intel cqm nmi context read",
+		.desc = "Intel cqm nmi context read",
 		.func = test__intel_cqm_count_nmi_context,
 	},
 	{
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index d426dcb18ce9a0d9756c274844bc84af9bcc5b0a..35a02f8e5a4aa90548c643c0de80e85b07a0b3bb 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -11,6 +11,7 @@
 #include "util/session.h"
 #include "util/tool.h"
 #include "util/callchain.h"
+#include "util/time-utils.h"
 
 #include <subcmd/parse-options.h>
 #include "util/trace-event.h"
@@ -49,6 +50,7 @@ struct alloc_stat {
 	u64	ptr;
 	u64	bytes_req;
 	u64	bytes_alloc;
+	u64	last_alloc;
 	u32	hit;
 	u32	pingpong;
 
@@ -62,9 +64,13 @@ static struct rb_root root_alloc_sorted;
 static struct rb_root root_caller_stat;
 static struct rb_root root_caller_sorted;
 
-static unsigned long total_requested, total_allocated;
+static unsigned long total_requested, total_allocated, total_freed;
 static unsigned long nr_allocs, nr_cross_allocs;
 
+/* filters for controlling start and stop of time of analysis */
+static struct perf_time_interval ptime;
+const char *time_str;
+
 static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
 			     int bytes_req, int bytes_alloc, int cpu)
 {
@@ -105,6 +111,8 @@ static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
 	}
 	data->call_site = call_site;
 	data->alloc_cpu = cpu;
+	data->last_alloc = bytes_alloc;
+
 	return 0;
 }
 
@@ -223,6 +231,8 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel,
 	if (!s_alloc)
 		return 0;
 
+	total_freed += s_alloc->last_alloc;
+
 	if ((short)sample->cpu != s_alloc->alloc_cpu) {
 		s_alloc->pingpong++;
 
@@ -907,6 +917,15 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	return 0;
 }
 
+static bool perf_kmem__skip_sample(struct perf_sample *sample)
+{
+	/* skip sample based on time? */
+	if (perf_time__skip_sample(&ptime, sample->time))
+		return true;
+
+	return false;
+}
+
 typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
 				  struct perf_sample *sample);
 
@@ -926,6 +945,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 		return -1;
 	}
 
+	if (perf_kmem__skip_sample(sample))
+		return 0;
+
 	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
 
 	if (evsel->handler != NULL) {
@@ -1128,6 +1150,11 @@ static void print_slab_summary(void)
 	printf("\n========================\n");
 	printf("Total bytes requested: %'lu\n", total_requested);
 	printf("Total bytes allocated: %'lu\n", total_allocated);
+	printf("Total bytes freed:     %'lu\n", total_freed);
+	if (total_allocated > total_freed) {
+		printf("Net total bytes allocated: %'lu\n",
+		total_allocated - total_freed);
+	}
 	printf("Total bytes wasted on internal fragmentation: %'lu\n",
 	       total_allocated - total_requested);
 	printf("Internal fragmentation: %f%%\n",
@@ -1884,6 +1911,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
 			   parse_page_opt),
 	OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
+	OPT_STRING(0, "time", &time_str, "str",
+		   "Time span of interest (start,stop)"),
 	OPT_END()
 	};
 	const char *const kmem_subcommands[] = { "record", "stat", NULL };
@@ -1944,6 +1973,11 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	symbol__init(&session->header.env);
 
+	if (perf_time__parse_str(&ptime, time_str) != 0) {
+		pr_err("Invalid time string\n");
+		return -EINVAL;
+	}
+
 	if (!strcmp(argv[0], "stat")) {
 		setlocale(LC_ALL, "");
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 67d2a9003294af22d70db9a34b606e8165bf0595..fa26865364b64808e39407e5a3a6e5c9aa277d64 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -37,6 +37,7 @@
 #include "util/llvm-utils.h"
 #include "util/bpf-loader.h"
 #include "util/trigger.h"
+#include "util/perf-hooks.h"
 #include "asm/bug.h"
 
 #include <unistd.h>
@@ -206,6 +207,12 @@ static void sig_handler(int sig)
 	done = 1;
 }
 
+static void sigsegv_handler(int sig)
+{
+	perf_hooks__recover();
+	sighandler_dump_stack(sig);
+}
+
 static void record__sig_exit(void)
 {
 	if (signr == -1)
@@ -833,6 +840,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
 	signal(SIGTERM, sig_handler);
+	signal(SIGSEGV, sigsegv_handler);
 
 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
 		signal(SIGUSR2, snapshot_sig_handler);
@@ -970,6 +978,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
 	trigger_ready(&auxtrace_snapshot_trigger);
 	trigger_ready(&switch_output_trigger);
+	perf_hooks__invoke_record_start();
 	for (;;) {
 		unsigned long long hits = rec->samples;
 
@@ -1114,6 +1123,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		}
 	}
 
+	perf_hooks__invoke_record_end();
+
 	if (!err && !quiet) {
 		char samples[128];
 		const char *postfix = rec->timestamp_filename ?
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 3dfbfffe2ecdbc5bb31a2640a8c65d35284a6717..d2afbe4a240dbc5c90168ff15a60405fa324bf8f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -36,7 +36,7 @@
 #include "util/hist.h"
 #include "util/data.h"
 #include "arch/common.h"
-
+#include "util/time-utils.h"
 #include "util/auxtrace.h"
 
 #include <dlfcn.h>
@@ -59,6 +59,8 @@ struct report {
 	const char		*pretty_printing_style;
 	const char		*cpu_list;
 	const char		*symbol_filter_str;
+	const char		*time_str;
+	struct perf_time_interval ptime;
 	float			min_percent;
 	u64			nr_entries;
 	u64			queue_size;
@@ -158,6 +160,9 @@ static int process_sample_event(struct perf_tool *tool,
 	};
 	int ret = 0;
 
+	if (perf_time__skip_sample(&rep->ptime, sample->time))
+		return 0;
+
 	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			 event->header.type);
@@ -830,6 +835,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
 			     "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
 			     stdio__config_color, "always"),
+	OPT_STRING(0, "time", &report.time_str, "str",
+		   "Time span of interest (start,stop)"),
 	OPT_END()
 	};
 	struct perf_data_file file = {
@@ -1015,6 +1022,11 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (symbol__init(&session->header.env) < 0)
 		goto error;
 
+	if (perf_time__parse_str(&report.ptime, report.time_str) != 0) {
+		pr_err("Invalid time string\n");
+		return -EINVAL;
+	}
+
 	sort__setup_elide(stdout);
 
 	ret = __cmd_report(&report);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index a49a032f5b155467ad69fe1e1e15fc17f6b599db..870d94cd20ba104bf98930aebffc32fa1acdc7d8 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -15,6 +15,7 @@
 #include "util/color.h"
 #include "util/stat.h"
 #include "util/callchain.h"
+#include "util/time-utils.h"
 
 #include <subcmd/parse-options.h>
 #include "util/trace-event.h"
@@ -203,7 +204,10 @@ struct perf_sched {
 	unsigned int	max_stack;
 	bool		show_cpu_visual;
 	bool		show_wakeups;
+	bool		show_migrations;
 	u64		skipped_samples;
+	const char	*time_str;
+	struct perf_time_interval ptime;
 };
 
 /* per thread run time data */
@@ -216,6 +220,8 @@ struct thread_runtime {
 
 	struct stats run_stats;
 	u64 total_run_time;
+
+	u64 migrations;
 };
 
 /* per event run time data */
@@ -1834,13 +1840,14 @@ static void timehist_header(struct perf_sched *sched)
 static void timehist_print_sample(struct perf_sched *sched,
 				  struct perf_sample *sample,
 				  struct addr_location *al,
-				  struct thread *thread)
+				  struct thread *thread,
+				  u64 t)
 {
 	struct thread_runtime *tr = thread__priv(thread);
 	u32 max_cpus = sched->max_cpu + 1;
 	char tstr[64];
 
-	timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
+	timestamp__scnprintf_usec(t, tstr, sizeof(tstr));
 	printf("%15s [%04d] ", tstr, sample->cpu);
 
 	if (sched->show_cpu_visual) {
@@ -2191,12 +2198,94 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool,
 		tr->ready_to_run = sample->time;
 
 	/* show wakeups if requested */
-	if (sched->show_wakeups)
+	if (sched->show_wakeups &&
+	    !perf_time__skip_sample(&sched->ptime, sample->time))
 		timehist_print_wakeup_event(sched, sample, machine, thread);
 
 	return 0;
 }
 
+static void timehist_print_migration_event(struct perf_sched *sched,
+					struct perf_evsel *evsel,
+					struct perf_sample *sample,
+					struct machine *machine,
+					struct thread *migrated)
+{
+	struct thread *thread;
+	char tstr[64];
+	u32 max_cpus = sched->max_cpu + 1;
+	u32 ocpu, dcpu;
+
+	if (sched->summary_only)
+		return;
+
+	max_cpus = sched->max_cpu + 1;
+	ocpu = perf_evsel__intval(evsel, sample, "orig_cpu");
+	dcpu = perf_evsel__intval(evsel, sample, "dest_cpu");
+
+	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+	if (thread == NULL)
+		return;
+
+	if (timehist_skip_sample(sched, thread) &&
+	    timehist_skip_sample(sched, migrated)) {
+		return;
+	}
+
+	timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
+	printf("%15s [%04d] ", tstr, sample->cpu);
+
+	if (sched->show_cpu_visual) {
+		u32 i;
+		char c;
+
+		printf("  ");
+		for (i = 0; i < max_cpus; ++i) {
+			c = (i == sample->cpu) ? 'm' : ' ';
+			printf("%c", c);
+		}
+		printf("  ");
+	}
+
+	printf(" %-*s ", comm_width, timehist_get_commstr(thread));
+
+	/* dt spacer */
+	printf("  %9s  %9s  %9s ", "", "", "");
+
+	printf("migrated: %s", timehist_get_commstr(migrated));
+	printf(" cpu %d => %d", ocpu, dcpu);
+
+	printf("\n");
+}
+
+static int timehist_migrate_task_event(struct perf_tool *tool,
+				       union perf_event *event __maybe_unused,
+				       struct perf_evsel *evsel,
+				       struct perf_sample *sample,
+				       struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+	struct thread *thread;
+	struct thread_runtime *tr = NULL;
+	/* want pid of migrated task not pid in sample */
+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+
+	thread = machine__findnew_thread(machine, 0, pid);
+	if (thread == NULL)
+		return -1;
+
+	tr = thread__get_runtime(thread);
+	if (tr == NULL)
+		return -1;
+
+	tr->migrations++;
+
+	/* show migrations if requested */
+	timehist_print_migration_event(sched, evsel, sample, machine, thread);
+
+	return 0;
+}
+
 static int timehist_sched_change_event(struct perf_tool *tool,
 				       union perf_event *event,
 				       struct perf_evsel *evsel,
@@ -2204,10 +2293,11 @@ static int timehist_sched_change_event(struct perf_tool *tool,
 				       struct machine *machine)
 {
 	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+	struct perf_time_interval *ptime = &sched->ptime;
 	struct addr_location al;
 	struct thread *thread;
 	struct thread_runtime *tr = NULL;
-	u64 tprev;
+	u64 tprev, t = sample->time;
 	int rc = 0;
 
 	if (machine__resolve(machine, &al, sample) < 0) {
@@ -2234,9 +2324,35 @@ static int timehist_sched_change_event(struct perf_tool *tool,
 
 	tprev = perf_evsel__get_time(evsel, sample->cpu);
 
-	timehist_update_runtime_stats(tr, sample->time, tprev);
+	/*
+	 * If start time given:
+	 * - sample time is under window user cares about - skip sample
+	 * - tprev is under window user cares about  - reset to start of window
+	 */
+	if (ptime->start && ptime->start > t)
+		goto out;
+
+	if (ptime->start > tprev)
+		tprev = ptime->start;
+
+	/*
+	 * If end time given:
+	 * - previous sched event is out of window - we are done
+	 * - sample time is beyond window user cares about - reset it
+	 *   to close out stats for time window interest
+	 */
+	if (ptime->end) {
+		if (tprev > ptime->end)
+			goto out;
+
+		if (t > ptime->end)
+			t = ptime->end;
+	}
+
+	timehist_update_runtime_stats(tr, t, tprev);
+
 	if (!sched->summary_only)
-		timehist_print_sample(sched, sample, &al, thread);
+		timehist_print_sample(sched, sample, &al, thread, t);
 
 out:
 	if (tr) {
@@ -2295,6 +2411,7 @@ static void print_thread_runtime(struct thread *t,
 	print_sched_time(r->run_stats.max, 6);
 	printf("  ");
 	printf("%5.2f", stddev);
+	printf("   %5" PRIu64, r->migrations);
 	printf("\n");
 }
 
@@ -2356,10 +2473,10 @@ static void timehist_print_summary(struct perf_sched *sched,
 
 	printf("\nRuntime summary\n");
 	printf("%*s  parent   sched-in  ", comm_width, "comm");
-	printf("   run-time    min-run     avg-run     max-run  stddev\n");
+	printf("   run-time    min-run     avg-run     max-run  stddev  migrations\n");
 	printf("%*s            (count)  ", comm_width, "");
 	printf("     (msec)     (msec)      (msec)      (msec)       %%\n");
-	printf("%.105s\n", graph_dotted_line);
+	printf("%.117s\n", graph_dotted_line);
 
 	machine__for_each_thread(m, show_thread_runtime, &totals);
 	task_count = totals.task_count;
@@ -2460,6 +2577,9 @@ static int perf_sched__timehist(struct perf_sched *sched)
 		{ "sched:sched_wakeup",	      timehist_sched_wakeup_event, },
 		{ "sched:sched_wakeup_new",   timehist_sched_wakeup_event, },
 	};
+	const struct perf_evsel_str_handler migrate_handlers[] = {
+		{ "sched:sched_migrate_task", timehist_migrate_task_event, },
+	};
 	struct perf_data_file file = {
 		.path = input_name,
 		.mode = PERF_DATA_MODE_READ,
@@ -2495,6 +2615,11 @@ static int perf_sched__timehist(struct perf_sched *sched)
 
 	symbol__init(&session->header.env);
 
+	if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) {
+		pr_err("Invalid time string\n");
+		return -EINVAL;
+	}
+
 	if (timehist_check_attr(sched, evlist) != 0)
 		goto out;
 
@@ -2507,6 +2632,10 @@ static int perf_sched__timehist(struct perf_sched *sched)
 	if (!perf_session__has_traces(session, "record -R"))
 		goto out;
 
+	if (sched->show_migrations &&
+	    perf_session__set_tracepoints_handlers(session, migrate_handlers))
+		goto out;
+
 	/* pre-allocate struct for per-CPU idle stats */
 	sched->max_cpu = session->header.env.nr_cpus_online;
 	if (sched->max_cpu == 0)
@@ -2903,7 +3032,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('S', "with-summary", &sched.summary,
 		    "Show all syscalls and summary with statistics"),
 	OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"),
+	OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"),
 	OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"),
+	OPT_STRING(0, "time", &sched.time_str, "str",
+		   "Time span for analysis (start,stop)"),
 	OPT_PARENT(sched_options)
 	};
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e1daff36d0702920038e9685c47e881d6b7195db..2f3ff69fc4e7f0e40c50b2d10d1fc73396f74d4d 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -22,6 +22,7 @@
 #include "util/thread_map.h"
 #include "util/stat.h"
 #include "util/thread-stack.h"
+#include "util/time-utils.h"
 #include <linux/bitmap.h>
 #include <linux/stringify.h>
 #include <linux/time64.h>
@@ -833,6 +834,8 @@ struct perf_script {
 	struct cpu_map		*cpus;
 	struct thread_map	*threads;
 	int			name_width;
+	const char              *time_str;
+	struct perf_time_interval ptime;
 };
 
 static int perf_evlist__max_name_len(struct perf_evlist *evlist)
@@ -1014,6 +1017,9 @@ static int process_sample_event(struct perf_tool *tool,
 	struct perf_script *scr = container_of(tool, struct perf_script, tool);
 	struct addr_location al;
 
+	if (perf_time__skip_sample(&scr->ptime, sample->time))
+		return 0;
+
 	if (debug_mode) {
 		if (sample->time < last_timestamp) {
 			pr_err("Samples misordered, previous: %" PRIu64
@@ -2151,6 +2157,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
 		   "only consider these symbols"),
+	OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
+		   "Stop display of callgraph at these symbols"),
 	OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
 		   "only display events for these comms"),
@@ -2184,7 +2192,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 			"Enable symbol demangling"),
 	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
 			"Enable kernel symbol demangling"),
-
+	OPT_STRING(0, "time", &script.time_str, "str",
+		   "Time span of interest (start,stop)"),
 	OPT_END()
 	};
 	const char * const script_subcommands[] = { "record", "report", NULL };
@@ -2463,6 +2472,12 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (err < 0)
 		goto out_delete;
 
+	/* needs to be parsed after looking up reference time */
+	if (perf_time__parse_str(&script.ptime, script.time_str) != 0) {
+		pr_err("Invalid time string\n");
+		return -EINVAL;
+	}
+
 	err = __cmd_script(&script);
 
 	flush_scripting();
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5f45166c892d6bb4f6b4e3ae261bf0d3794378b5..206bf72b77fcb9c795486ec9fba184b9d6321b7b 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -74,8 +74,6 @@ struct trace {
 		size_t		nr;
 		int		*entries;
 	}			ev_qualifier_ids;
-	struct intlist		*tid_list;
-	struct intlist		*pid_list;
 	struct {
 		size_t		nr;
 		pid_t		*entries;
@@ -1890,18 +1888,6 @@ static int trace__pgfault(struct trace *trace,
 	return err;
 }
 
-static bool skip_sample(struct trace *trace, struct perf_sample *sample)
-{
-	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
-	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
-		return false;
-
-	if (trace->pid_list || trace->tid_list)
-		return true;
-
-	return false;
-}
-
 static void trace__set_base_time(struct trace *trace,
 				 struct perf_evsel *evsel,
 				 struct perf_sample *sample)
@@ -1926,11 +1912,13 @@ static int trace__process_sample(struct perf_tool *tool,
 				 struct machine *machine __maybe_unused)
 {
 	struct trace *trace = container_of(tool, struct trace, tool);
+	struct thread *thread;
 	int err = 0;
 
 	tracepoint_handler handler = evsel->handler;
 
-	if (skip_sample(trace, sample))
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	if (thread && thread__is_filtered(thread))
 		return 0;
 
 	trace__set_base_time(trace, evsel, sample);
@@ -1943,27 +1931,6 @@ static int trace__process_sample(struct perf_tool *tool,
 	return err;
 }
 
-static int parse_target_str(struct trace *trace)
-{
-	if (trace->opts.target.pid) {
-		trace->pid_list = intlist__new(trace->opts.target.pid);
-		if (trace->pid_list == NULL) {
-			pr_err("Error parsing process id string\n");
-			return -EINVAL;
-		}
-	}
-
-	if (trace->opts.target.tid) {
-		trace->tid_list = intlist__new(trace->opts.target.tid);
-		if (trace->tid_list == NULL) {
-			pr_err("Error parsing thread id string\n");
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
 static int trace__record(struct trace *trace, int argc, const char **argv)
 {
 	unsigned int rec_argc, i, j;
@@ -2460,6 +2427,12 @@ static int trace__replay(struct trace *trace)
 	if (session == NULL)
 		return -1;
 
+	if (trace->opts.target.pid)
+		symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
+
+	if (trace->opts.target.tid)
+		symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
+
 	if (symbol__init(&session->header.env) < 0)
 		goto out;
 
@@ -2503,10 +2476,6 @@ static int trace__replay(struct trace *trace)
 			evsel->handler = trace__pgfault;
 	}
 
-	err = parse_target_str(trace);
-	if (err != 0)
-		goto out;
-
 	setup_pager();
 
 	err = perf_session__process_events(session);
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 8a4ce492f7b22be2cf8e0dcfd751365cbc601220..af3ec94869aafa65bb4f818045428be47a1f242f 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -42,6 +42,7 @@ perf-y += backward-ring-buffer.o
 perf-y += sdt.o
 perf-y += is_printable_array.o
 perf-y += bitmap.o
+perf-y += perf-hooks.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 8f0298aff222b5d28cd2e60762875e3a2036a1e6..92343f43e44aa660e5ee1e37c2d721aabafc5fcf 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -57,7 +57,7 @@ static struct {
 } bpf_testcase_table[] = {
 	{
 		LLVM_TESTCASE_BASE,
-		"Test basic BPF filtering",
+		"Basic BPF filtering",
 		"[basic_bpf_test]",
 		"fix 'perf test LLVM' first",
 		"load bpf object failed",
@@ -67,7 +67,7 @@ static struct {
 #ifdef HAVE_BPF_PROLOGUE
 	{
 		LLVM_TESTCASE_BPF_PROLOGUE,
-		"Test BPF prologue generation",
+		"BPF prologue generation",
 		"[bpf_prologue_test]",
 		"fix kbuild first",
 		"check your vmlinux setting?",
@@ -77,7 +77,7 @@ static struct {
 #endif
 	{
 		LLVM_TESTCASE_BPF_RELOCATION,
-		"Test BPF relocation checker",
+		"BPF relocation checker",
 		"[bpf_relocation_test]",
 		"fix 'perf test LLVM' first",
 		"libbpf error when dealing with relocation",
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 778668a2a9661c7402135d62f4cefe69ceb04001..d1bec0444be7e0c47da31fc0a1379db25bed698f 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -28,119 +28,119 @@ static struct test generic_tests[] = {
 		.func = test__vmlinux_matches_kallsyms,
 	},
 	{
-		.desc = "detect openat syscall event",
+		.desc = "Detect openat syscall event",
 		.func = test__openat_syscall_event,
 	},
 	{
-		.desc = "detect openat syscall event on all cpus",
+		.desc = "Detect openat syscall event on all cpus",
 		.func = test__openat_syscall_event_on_all_cpus,
 	},
 	{
-		.desc = "read samples using the mmap interface",
+		.desc = "Read samples using the mmap interface",
 		.func = test__basic_mmap,
 	},
 	{
-		.desc = "parse events tests",
+		.desc = "Parse event definition strings",
 		.func = test__parse_events,
 	},
 	{
-		.desc = "Validate PERF_RECORD_* events & perf_sample fields",
+		.desc = "PERF_RECORD_* events & perf_sample fields",
 		.func = test__PERF_RECORD,
 	},
 	{
-		.desc = "Test perf pmu format parsing",
+		.desc = "Parse perf pmu format",
 		.func = test__pmu,
 	},
 	{
-		.desc = "Test dso data read",
+		.desc = "DSO data read",
 		.func = test__dso_data,
 	},
 	{
-		.desc = "Test dso data cache",
+		.desc = "DSO data cache",
 		.func = test__dso_data_cache,
 	},
 	{
-		.desc = "Test dso data reopen",
+		.desc = "DSO data reopen",
 		.func = test__dso_data_reopen,
 	},
 	{
-		.desc = "roundtrip evsel->name check",
+		.desc = "Roundtrip evsel->name",
 		.func = test__perf_evsel__roundtrip_name_test,
 	},
 	{
-		.desc = "Check parsing of sched tracepoints fields",
+		.desc = "Parse sched tracepoints fields",
 		.func = test__perf_evsel__tp_sched_test,
 	},
 	{
-		.desc = "Generate and check syscalls:sys_enter_openat event fields",
+		.desc = "syscalls:sys_enter_openat event fields",
 		.func = test__syscall_openat_tp_fields,
 	},
 	{
-		.desc = "struct perf_event_attr setup",
+		.desc = "Setup struct perf_event_attr",
 		.func = test__attr,
 	},
 	{
-		.desc = "Test matching and linking multiple hists",
+		.desc = "Match and link multiple hists",
 		.func = test__hists_link,
 	},
 	{
-		.desc = "Try 'import perf' in python, checking link problems",
+		.desc = "'import perf' in python",
 		.func = test__python_use,
 	},
 	{
-		.desc = "Test breakpoint overflow signal handler",
+		.desc = "Breakpoint overflow signal handler",
 		.func = test__bp_signal,
 	},
 	{
-		.desc = "Test breakpoint overflow sampling",
+		.desc = "Breakpoint overflow sampling",
 		.func = test__bp_signal_overflow,
 	},
 	{
-		.desc = "Test number of exit event of a simple workload",
+		.desc = "Number of exit events of a simple workload",
 		.func = test__task_exit,
 	},
 	{
-		.desc = "Test software clock events have valid period values",
+		.desc = "Software clock events period values",
 		.func = test__sw_clock_freq,
 	},
 	{
-		.desc = "Test object code reading",
+		.desc = "Object code reading",
 		.func = test__code_reading,
 	},
 	{
-		.desc = "Test sample parsing",
+		.desc = "Sample parsing",
 		.func = test__sample_parsing,
 	},
 	{
-		.desc = "Test using a dummy software event to keep tracking",
+		.desc = "Use a dummy software event to keep tracking",
 		.func = test__keep_tracking,
 	},
 	{
-		.desc = "Test parsing with no sample_id_all bit set",
+		.desc = "Parse with no sample_id_all bit set",
 		.func = test__parse_no_sample_id_all,
 	},
 	{
-		.desc = "Test filtering hist entries",
+		.desc = "Filter hist entries",
 		.func = test__hists_filter,
 	},
 	{
-		.desc = "Test mmap thread lookup",
+		.desc = "Lookup mmap thread",
 		.func = test__mmap_thread_lookup,
 	},
 	{
-		.desc = "Test thread mg sharing",
+		.desc = "Share thread mg",
 		.func = test__thread_mg_share,
 	},
 	{
-		.desc = "Test output sorting of hist entries",
+		.desc = "Sort output of hist entries",
 		.func = test__hists_output,
 	},
 	{
-		.desc = "Test cumulation of child hist entries",
+		.desc = "Cumulate child hist entries",
 		.func = test__hists_cumulate,
 	},
 	{
-		.desc = "Test tracking with sched_switch",
+		.desc = "Track with sched_switch",
 		.func = test__switch_tracking,
 	},
 	{
@@ -152,15 +152,15 @@ static struct test generic_tests[] = {
 		.func = test__fdarray__add,
 	},
 	{
-		.desc = "Test kmod_path__parse function",
+		.desc = "kmod_path__parse",
 		.func = test__kmod_path__parse,
 	},
 	{
-		.desc = "Test thread map",
+		.desc = "Thread map",
 		.func = test__thread_map,
 	},
 	{
-		.desc = "Test LLVM searching and compiling",
+		.desc = "LLVM search and compile",
 		.func = test__llvm,
 		.subtest = {
 			.skip_if_fail	= true,
@@ -169,11 +169,11 @@ static struct test generic_tests[] = {
 		},
 	},
 	{
-		.desc = "Test topology in session",
+		.desc = "Session topology",
 		.func = test_session_topology,
 	},
 	{
-		.desc = "Test BPF filter",
+		.desc = "BPF filter",
 		.func = test__bpf,
 		.subtest = {
 			.skip_if_fail	= true,
@@ -182,53 +182,57 @@ static struct test generic_tests[] = {
 		},
 	},
 	{
-		.desc = "Test thread map synthesize",
+		.desc = "Synthesize thread map",
 		.func = test__thread_map_synthesize,
 	},
 	{
-		.desc = "Test cpu map synthesize",
+		.desc = "Synthesize cpu map",
 		.func = test__cpu_map_synthesize,
 	},
 	{
-		.desc = "Test stat config synthesize",
+		.desc = "Synthesize stat config",
 		.func = test__synthesize_stat_config,
 	},
 	{
-		.desc = "Test stat synthesize",
+		.desc = "Synthesize stat",
 		.func = test__synthesize_stat,
 	},
 	{
-		.desc = "Test stat round synthesize",
+		.desc = "Synthesize stat round",
 		.func = test__synthesize_stat_round,
 	},
 	{
-		.desc = "Test attr update synthesize",
+		.desc = "Synthesize attr update",
 		.func = test__event_update,
 	},
 	{
-		.desc = "Test events times",
+		.desc = "Event times",
 		.func = test__event_times,
 	},
 	{
-		.desc = "Test backward reading from ring buffer",
+		.desc = "Read backward ring buffer",
 		.func = test__backward_ring_buffer,
 	},
 	{
-		.desc = "Test cpu map print",
+		.desc = "Print cpu map",
 		.func = test__cpu_map_print,
 	},
 	{
-		.desc = "Test SDT event probing",
+		.desc = "Probe SDT events",
 		.func = test__sdt_event,
 	},
 	{
-		.desc = "Test is_printable_array function",
+		.desc = "is_printable_array",
 		.func = test__is_printable_array,
 	},
 	{
-		.desc = "Test bitmap print",
+		.desc = "Print bitmap",
 		.func = test__bitmap_print,
 	},
+	{
+		.desc = "perf hooks",
+		.func = test__perf_hooks,
+	},
 	{
 		.func = NULL,
 	},
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index b798a4bfd23892677b225e41009a784cd4e06d8d..02a33ebcd992b68a3d1adbf05a2e7bb13298a45f 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -34,19 +34,19 @@ static struct {
 } bpf_source_table[__LLVM_TESTCASE_MAX] = {
 	[LLVM_TESTCASE_BASE] = {
 		.source = test_llvm__bpf_base_prog,
-		.desc = "Basic BPF llvm compiling test",
+		.desc = "Basic BPF llvm compile",
 	},
 	[LLVM_TESTCASE_KBUILD] = {
 		.source = test_llvm__bpf_test_kbuild_prog,
-		.desc = "Test kbuild searching",
+		.desc = "kbuild searching",
 	},
 	[LLVM_TESTCASE_BPF_PROLOGUE] = {
 		.source = test_llvm__bpf_test_prologue_prog,
-		.desc = "Compile source for BPF prologue generation test",
+		.desc = "Compile source for BPF prologue generation",
 	},
 	[LLVM_TESTCASE_BPF_RELOCATION] = {
 		.source = test_llvm__bpf_test_relocation,
-		.desc = "Compile source for BPF relocation test",
+		.desc = "Compile source for BPF relocation",
 		.should_load_fail = true,
 	},
 };
diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c
new file mode 100644
index 0000000000000000000000000000000000000000..9338cb2c25ab1d54e7aafdd0776390aef5e73ffb
--- /dev/null
+++ b/tools/perf/tests/perf-hooks.c
@@ -0,0 +1,44 @@
+#include <signal.h>
+#include <stdlib.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "util.h"
+#include "perf-hooks.h"
+
+static void sigsegv_handler(int sig __maybe_unused)
+{
+	pr_debug("SIGSEGV is observed as expected, try to recover.\n");
+	perf_hooks__recover();
+	signal(SIGSEGV, SIG_DFL);
+	raise(SIGSEGV);
+	exit(-1);
+}
+
+static int hook_flags;
+
+static void the_hook(void)
+{
+	int *p = NULL;
+
+	hook_flags = 1234;
+
+	/* Generate a segfault, test perf_hooks__recover */
+	*p = 0;
+}
+
+int test__perf_hooks(int subtest __maybe_unused)
+{
+	signal(SIGSEGV, sigsegv_handler);
+	perf_hooks__set_hook("test", the_hook);
+	perf_hooks__invoke_test();
+
+	/* hook is triggered? */
+	if (hook_flags != 1234)
+		return TEST_FAIL;
+
+	/* the buggy hook is removed? */
+	if (perf_hooks__get_hook("test"))
+		return TEST_FAIL;
+	return TEST_OK;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 7c196c585472f482755bda72a3abf1bcd68a6c44..3a1f98f291ba2223fc2bb138a40a3e06369d8c16 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -91,6 +91,7 @@ int test__cpu_map_print(int subtest);
 int test__sdt_event(int subtest);
 int test__is_printable_array(int subtest);
 int test__bitmap_print(int subtest);
+int test__perf_hooks(int subtest);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index cee0eee31ce6de28c2559a0f17b860a2a083cad3..ec7a30fad14918bcdf8f01b6c90970da8f43a676 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -543,14 +543,16 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows
 static bool annotate_browser__jump(struct annotate_browser *browser)
 {
 	struct disasm_line *dl = browser->selection;
+	u64 offset;
 	s64 idx;
 
 	if (!ins__is_jump(&dl->ins))
 		return false;
 
-	dl = annotate_browser__find_offset(browser, dl->ops.target.offset, &idx);
+	offset = dl->ops.target.offset;
+	dl = annotate_browser__find_offset(browser, offset, &idx);
 	if (dl == NULL) {
-		ui_helpline__puts("Invalid jump offset");
+		ui_helpline__printf("Invalid jump offset: %" PRIx64, offset);
 		return true;
 	}
 
diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c
index 5b74a7eba210b20cb693dbedaee482508adcd2cd..379039ab00d82d8cb00e6583864761f1e8f84a5a 100644
--- a/tools/perf/ui/helpline.c
+++ b/tools/perf/ui/helpline.c
@@ -72,3 +72,13 @@ int ui_helpline__vshow(const char *fmt, va_list ap)
 {
 	return helpline_fns->show(fmt, ap);
 }
+
+void ui_helpline__printf(const char *fmt, ...)
+{
+	va_list ap;
+
+	ui_helpline__pop();
+	va_start(ap, fmt);
+	ui_helpline__vpush(fmt, ap);
+	va_end(ap);
+}
diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h
index 46181f4fc07ebf2133e97898bd21acf5fe9c7551..d52d0a1a881b885dccfc6fd3d3aaecf8017f277c 100644
--- a/tools/perf/ui/helpline.h
+++ b/tools/perf/ui/helpline.h
@@ -21,6 +21,7 @@ void ui_helpline__push(const char *msg);
 void ui_helpline__vpush(const char *fmt, va_list ap);
 void ui_helpline__fpush(const char *fmt, ...);
 void ui_helpline__puts(const char *msg);
+void ui_helpline__printf(const char *fmt, ...);
 int  ui_helpline__vshow(const char *fmt, va_list ap);
 
 extern char ui_helpline__current[512];
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 1dc67efad6340f7aef55f1727f00f7f20d8baa2c..bdad82a9812d46bd054ca4017df32cd3c2b63b64 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -87,6 +87,7 @@ libperf-y += help-unknown-cmd.o
 libperf-y += mem-events.o
 libperf-y += vsprintf.o
 libperf-y += drv_configs.o
+libperf-y += time-utils.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
@@ -123,6 +124,8 @@ libperf-$(CONFIG_LIBELF) += genelf.o
 libperf-$(CONFIG_DWARF) += genelf_debug.o
 endif
 
+libperf-y += perf-hooks.o
+
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 # avoid compiler warnings in 32-bit mode
 CFLAGS_genelf_debug.o  += -Wno-packed
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 3e34ee0fde2830132ed171dc2fddac691aa62e20..4012b1de2813113b86dc5d389579dbabb0632d07 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -105,6 +105,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
 }
 
 #include "arch/arm/annotate/instructions.c"
+#include "arch/arm64/annotate/instructions.c"
 #include "arch/x86/annotate/instructions.c"
 #include "arch/powerpc/annotate/instructions.c"
 
@@ -113,6 +114,10 @@ static struct arch architectures[] = {
 		.name = "arm",
 		.init = arm__annotate_init,
 	},
+	{
+		.name = "arm64",
+		.init = arm64__annotate_init,
+	},
 	{
 		.name = "x86",
 		.instructions = x86__instructions,
@@ -408,7 +413,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops
 	if (ops->target.raw == NULL)
 		return -1;
 
-	comment = strchr(s, '#');
+	comment = strchr(s, arch->objdump.comment_char);
 	if (comment == NULL)
 		return 0;
 
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 5a6f52284452e2ea07be12f5eff85ea306356470..6b2925542c0a641d92ca15db702a43fbe15946c4 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -166,6 +166,14 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 			if (!print_oneline)
 				printed += fprintf(fp, "\n");
 
+			if (symbol_conf.bt_stop_list &&
+			    node->sym &&
+			    node->sym->name &&
+			    strlist__has_entry(symbol_conf.bt_stop_list,
+					       node->sym->name)) {
+				break;
+			}
+
 			first = false;
 next:
 			callchain_cursor_advance(cursor);
diff --git a/tools/perf/util/perf-hooks-list.h b/tools/perf/util/perf-hooks-list.h
new file mode 100644
index 0000000000000000000000000000000000000000..2867c07ee84e75d013a875f760a35f22e47f3252
--- /dev/null
+++ b/tools/perf/util/perf-hooks-list.h
@@ -0,0 +1,3 @@
+PERF_HOOK(record_start)
+PERF_HOOK(record_end)
+PERF_HOOK(test)
diff --git a/tools/perf/util/perf-hooks.c b/tools/perf/util/perf-hooks.c
new file mode 100644
index 0000000000000000000000000000000000000000..4ce88e37dd63d73c0c0bd8fced63e05f11b517bd
--- /dev/null
+++ b/tools/perf/util/perf-hooks.c
@@ -0,0 +1,84 @@
+/*
+ * perf_hooks.c
+ *
+ * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2016 Huawei Inc.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <linux/err.h>
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/perf-hooks.h"
+
+static sigjmp_buf jmpbuf;
+static const struct perf_hook_desc *current_perf_hook;
+
+void perf_hooks__invoke(const struct perf_hook_desc *desc)
+{
+	if (!(desc && desc->p_hook_func && *desc->p_hook_func))
+		return;
+
+	if (sigsetjmp(jmpbuf, 1)) {
+		pr_warning("Fatal error (SEGFAULT) in perf hook '%s'\n",
+			   desc->hook_name);
+		*(current_perf_hook->p_hook_func) = NULL;
+	} else {
+		current_perf_hook = desc;
+		(**desc->p_hook_func)();
+	}
+	current_perf_hook = NULL;
+}
+
+void perf_hooks__recover(void)
+{
+	if (current_perf_hook)
+		siglongjmp(jmpbuf, 1);
+}
+
+#define PERF_HOOK(name)					\
+perf_hook_func_t __perf_hook_func_##name = NULL;	\
+struct perf_hook_desc __perf_hook_desc_##name =		\
+	{.hook_name = #name, .p_hook_func = &__perf_hook_func_##name};
+#include "perf-hooks-list.h"
+#undef PERF_HOOK
+
+#define PERF_HOOK(name)		\
+	&__perf_hook_desc_##name,
+
+static struct perf_hook_desc *perf_hooks[] = {
+#include "perf-hooks-list.h"
+};
+#undef PERF_HOOK
+
+int perf_hooks__set_hook(const char *hook_name,
+			 perf_hook_func_t hook_func)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) {
+		if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0)
+			continue;
+
+		if (*(perf_hooks[i]->p_hook_func))
+			pr_warning("Overwrite existing hook: %s\n", hook_name);
+		*(perf_hooks[i]->p_hook_func) = hook_func;
+		return 0;
+	}
+	return -ENOENT;
+}
+
+perf_hook_func_t perf_hooks__get_hook(const char *hook_name)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) {
+		if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0)
+			continue;
+
+		return *(perf_hooks[i]->p_hook_func);
+	}
+	return ERR_PTR(-ENOENT);
+}
diff --git a/tools/perf/util/perf-hooks.h b/tools/perf/util/perf-hooks.h
new file mode 100644
index 0000000000000000000000000000000000000000..1d482b26b4b9cb874b65c52c5e82adce207eb12f
--- /dev/null
+++ b/tools/perf/util/perf-hooks.h
@@ -0,0 +1,37 @@
+#ifndef PERF_UTIL_PERF_HOOKS_H
+#define PERF_UTIL_PERF_HOOKS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*perf_hook_func_t)(void);
+struct perf_hook_desc {
+	const char * const hook_name;
+	perf_hook_func_t * const p_hook_func;
+};
+
+extern void perf_hooks__invoke(const struct perf_hook_desc *);
+extern void perf_hooks__recover(void);
+
+#define PERF_HOOK(name)					\
+extern struct perf_hook_desc __perf_hook_desc_##name;	\
+static inline void perf_hooks__invoke_##name(void)	\
+{ 							\
+	perf_hooks__invoke(&__perf_hook_desc_##name);	\
+}
+
+#include "perf-hooks-list.h"
+#undef PERF_HOOK
+
+extern int
+perf_hooks__set_hook(const char *hook_name,
+		     perf_hook_func_t hook_func);
+
+extern perf_hook_func_t
+perf_hooks__get_hook(const char *hook_name);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 420ada9de22fa3464fcac7954762151f46112694..df2482b2ba45c47883a254db2ff4d6551aeea83b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -2032,6 +2032,10 @@ int symbol__init(struct perf_env *env)
 		       symbol_conf.sym_list_str, "symbol") < 0)
 		goto out_free_tid_list;
 
+	if (setup_list(&symbol_conf.bt_stop_list,
+		       symbol_conf.bt_stop_list_str, "symbol") < 0)
+		goto out_free_sym_list;
+
 	/*
 	 * A path to symbols of "/" is identical to ""
 	 * reset here for simplicity.
@@ -2049,6 +2053,8 @@ int symbol__init(struct perf_env *env)
 	symbol_conf.initialized = true;
 	return 0;
 
+out_free_sym_list:
+	strlist__delete(symbol_conf.sym_list);
 out_free_tid_list:
 	intlist__delete(symbol_conf.tid_list);
 out_free_pid_list:
@@ -2064,6 +2070,7 @@ void symbol__exit(void)
 {
 	if (!symbol_conf.initialized)
 		return;
+	strlist__delete(symbol_conf.bt_stop_list);
 	strlist__delete(symbol_conf.sym_list);
 	strlist__delete(symbol_conf.dso_list);
 	strlist__delete(symbol_conf.comm_list);
@@ -2071,6 +2078,7 @@ void symbol__exit(void)
 	intlist__delete(symbol_conf.pid_list);
 	vmlinux_path__exit();
 	symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL;
+	symbol_conf.bt_stop_list = NULL;
 	symbol_conf.initialized = false;
 }
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 1bcbefc0c325d327213e1368f453e8604e82eb67..6c358b7ed336c72fe9984c8f5b33fb3d200c638c 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -132,14 +132,16 @@ struct symbol_conf {
 			*pid_list_str,
 			*tid_list_str,
 			*sym_list_str,
-			*col_width_list_str;
+			*col_width_list_str,
+			*bt_stop_list_str;
        struct strlist	*dso_list,
 			*comm_list,
 			*sym_list,
 			*dso_from_list,
 			*dso_to_list,
 			*sym_from_list,
-			*sym_to_list;
+			*sym_to_list,
+			*bt_stop_list;
 	struct intlist	*pid_list,
 			*tid_list;
 	const char	*symfs;
diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
new file mode 100644
index 0000000000000000000000000000000000000000..d1b21c72206d8b1a6a52697adb3091ff09e81ec2
--- /dev/null
+++ b/tools/perf/util/time-utils.c
@@ -0,0 +1,119 @@
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <linux/time64.h>
+#include <time.h>
+#include <errno.h>
+#include <inttypes.h>
+
+#include "perf.h"
+#include "debug.h"
+#include "time-utils.h"
+
+int parse_nsec_time(const char *str, u64 *ptime)
+{
+	u64 time_sec, time_nsec;
+	char *end;
+
+	time_sec = strtoul(str, &end, 10);
+	if (*end != '.' && *end != '\0')
+		return -1;
+
+	if (*end == '.') {
+		int i;
+		char nsec_buf[10];
+
+		if (strlen(++end) > 9)
+			return -1;
+
+		strncpy(nsec_buf, end, 9);
+		nsec_buf[9] = '\0';
+
+		/* make it nsec precision */
+		for (i = strlen(nsec_buf); i < 9; i++)
+			nsec_buf[i] = '0';
+
+		time_nsec = strtoul(nsec_buf, &end, 10);
+		if (*end != '\0')
+			return -1;
+	} else
+		time_nsec = 0;
+
+	*ptime = time_sec * NSEC_PER_SEC + time_nsec;
+	return 0;
+}
+
+static int parse_timestr_sec_nsec(struct perf_time_interval *ptime,
+				  char *start_str, char *end_str)
+{
+	if (start_str && (*start_str != '\0') &&
+	    (parse_nsec_time(start_str, &ptime->start) != 0)) {
+		return -1;
+	}
+
+	if (end_str && (*end_str != '\0') &&
+	    (parse_nsec_time(end_str, &ptime->end) != 0)) {
+		return -1;
+	}
+
+	return 0;
+}
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
+{
+	char *start_str, *end_str;
+	char *d, *str;
+	int rc = 0;
+
+	if (ostr == NULL || *ostr == '\0')
+		return 0;
+
+	/* copy original string because we need to modify it */
+	str = strdup(ostr);
+	if (str == NULL)
+		return -ENOMEM;
+
+	ptime->start = 0;
+	ptime->end = 0;
+
+	/* str has the format: <start>,<stop>
+	 * variations: <start>,
+	 *             ,<stop>
+	 *             ,
+	 */
+	start_str = str;
+	d = strchr(start_str, ',');
+	if (d) {
+		*d = '\0';
+		++d;
+	}
+	end_str = d;
+
+	rc = parse_timestr_sec_nsec(ptime, start_str, end_str);
+
+	free(str);
+
+	/* make sure end time is after start time if it was given */
+	if (rc == 0 && ptime->end && ptime->end < ptime->start)
+		return -EINVAL;
+
+	pr_debug("start time %" PRIu64 ", ", ptime->start);
+	pr_debug("end time %" PRIu64 "\n", ptime->end);
+
+	return rc;
+}
+
+bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)
+{
+	/* if time is not set don't drop sample */
+	if (timestamp == 0)
+		return false;
+
+	/* otherwise compare sample time to time window */
+	if ((ptime->start && timestamp < ptime->start) ||
+	    (ptime->end && timestamp > ptime->end)) {
+		return true;
+	}
+
+	return false;
+}
diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..c1f197c4af6c9aa019cdebc9625b07020f30ad4e
--- /dev/null
+++ b/tools/perf/util/time-utils.h
@@ -0,0 +1,14 @@
+#ifndef _TIME_UTILS_H_
+#define _TIME_UTILS_H_
+
+struct perf_time_interval {
+	u64 start, end;
+};
+
+int parse_nsec_time(const char *str, u64 *ptime);
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr);
+
+bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp);
+
+#endif
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 67ac765da27a10789b44971bdcd50fb8ad55a320..9ddd98827d12dffa0b4482deb72be3749dc33145 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -400,39 +400,6 @@ void sighandler_dump_stack(int sig)
 	raise(sig);
 }
 
-int parse_nsec_time(const char *str, u64 *ptime)
-{
-	u64 time_sec, time_nsec;
-	char *end;
-
-	time_sec = strtoul(str, &end, 10);
-	if (*end != '.' && *end != '\0')
-		return -1;
-
-	if (*end == '.') {
-		int i;
-		char nsec_buf[10];
-
-		if (strlen(++end) > 9)
-			return -1;
-
-		strncpy(nsec_buf, end, 9);
-		nsec_buf[9] = '\0';
-
-		/* make it nsec precision */
-		for (i = strlen(nsec_buf); i < 9; i++)
-			nsec_buf[i] = '0';
-
-		time_nsec = strtoul(nsec_buf, &end, 10);
-		if (*end != '\0')
-			return -1;
-	} else
-		time_nsec = 0;
-
-	*ptime = time_sec * NSEC_PER_SEC + time_nsec;
-	return 0;
-}
-
 int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)
 {
 	u64  sec = timestamp / NSEC_PER_SEC;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 79662d67891e1a06c1c85e484d8d8cbe9ecc4562..1d639e38aa82d0b495183af4dd36a0fdf3f47542 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -179,8 +179,6 @@ static inline void *zalloc(size_t size)
 #undef tolower
 #undef toupper
 
-int parse_nsec_time(const char *str, u64 *ptime);
-
 extern unsigned char sane_ctype[256];
 #define GIT_SPACE		0x01
 #define GIT_DIGIT		0x02