diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6cc29469c1dff7ab252c65e2fbcec9c54d18d21f..40fb808e83965095c00b679aa2621fa3b0f0ae90 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2336,6 +2336,15 @@ void arch_perf_update_userpage(struct perf_event *event, cyc2ns_read_end(); } +/* + * Determine whether the regs were taken from an irq/exception handler rather + * than from perf_arch_fetch_caller_regs(). + */ +static bool perf_hw_regs(struct pt_regs *regs) +{ + return regs->flags & X86_EFLAGS_FIXED; +} + void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { @@ -2347,11 +2356,15 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_callchain_store(entry, regs->ip)) - return; + if (perf_hw_regs(regs)) { + if (perf_callchain_store(entry, regs->ip)) + return; + unwind_start(&state, current, regs, NULL); + } else { + unwind_start(&state, current, NULL, (void *)regs->sp); + } - for (unwind_start(&state, current, regs, NULL); !unwind_done(&state); - unwind_next_frame(&state)) { + for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); if (!addr || perf_callchain_store(entry, addr)) return; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index f6c4915a863e03313d4ded9b7722eb6d74e52cd8..620c3e3564beff29d43d9684647527f7fbb073f5 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -264,14 +264,9 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); */ #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->ip = (__ip); \ - (regs)->bp = caller_frame_pointer(); \ + (regs)->sp = (unsigned long)__builtin_frame_address(0); \ (regs)->cs = __KERNEL_CS; \ regs->flags = 0; \ - asm volatile( \ - _ASM_MOV "%%"_ASM_SP ", %0\n" \ - : "=m" ((regs)->sp) \ - :: "memory" \ - ); \ } struct perf_guest_switch_msr { diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f335aad404a479e98e4a5d38dc41ee5e5aa419ec..beef7ad9e43a4a7391aad3db4231de806f4799d6 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -98,19 +98,6 @@ struct stack_frame_ia32 { u32 return_address; }; -static inline unsigned long caller_frame_pointer(void) -{ - struct stack_frame *frame; - - frame = __builtin_frame_address(0); - -#ifdef CONFIG_FRAME_POINTER - frame = frame->next_frame; -#endif - - return (unsigned long)frame; -} - void show_opcodes(struct pt_regs *regs, const char *loglvl); void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d8b4d31acd18eda8e3f4ef8073d6e7f1b36d4e26..cf3da70056f723c18298167c3649c927914510cb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1050,12 +1050,18 @@ static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned lo #endif /* - * Take a snapshot of the regs. Skip ip and frame pointer to - * the nth caller. We only need a few of the regs: + * When generating a perf sample in-line, instead of from an interrupt / + * exception, we lack a pt_regs. This is typically used from software events + * like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints. + * + * We typically don't need a full set, but (for x86) do require: * - ip for PERF_SAMPLE_IP * - cs for user_mode() tests - * - bp for callchains - * - eflags, for future purposes, just in case + * - sp for PERF_SAMPLE_CALLCHAIN + * - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs()) + * + * NOTE: assumes @regs is otherwise already 0 filled; this is important for + * things like PERF_SAMPLE_REGS_INTR. */ static inline void perf_fetch_caller_regs(struct pt_regs *regs) {