提交 c56d3406 编写于 作者: I Ingo Molnar

Merge branch 'perf/uprobes' into perf/core

These bits from Oleg are fully cooked, ship them to Linus.
Signed-off-by: NIngo Molnar <mingo@kernel.org>
...@@ -98,7 +98,6 @@ static inline int get_si_code(unsigned long condition) ...@@ -98,7 +98,6 @@ static inline int get_si_code(unsigned long condition)
extern int panic_on_unrecovered_nmi; extern int panic_on_unrecovered_nmi;
void math_error(struct pt_regs *, int, int);
void math_emulate(struct math_emu_info *); void math_emulate(struct math_emu_info *);
#ifndef CONFIG_X86_32 #ifndef CONFIG_X86_32
asmlinkage void smp_thermal_interrupt(void); asmlinkage void smp_thermal_interrupt(void);
......
...@@ -41,18 +41,18 @@ struct arch_uprobe { ...@@ -41,18 +41,18 @@ struct arch_uprobe {
u8 ixol[MAX_UINSN_BYTES]; u8 ixol[MAX_UINSN_BYTES];
}; };
u16 fixups;
const struct uprobe_xol_ops *ops; const struct uprobe_xol_ops *ops;
union { union {
#ifdef CONFIG_X86_64
unsigned long rip_rela_target_address;
#endif
struct { struct {
s32 offs; s32 offs;
u8 ilen; u8 ilen;
u8 opc1; u8 opc1;
} branch; } branch;
struct {
u8 fixups;
u8 ilen;
} def;
}; };
}; };
......
...@@ -413,12 +413,11 @@ void set_personality_ia32(bool x32) ...@@ -413,12 +413,11 @@ void set_personality_ia32(bool x32)
set_thread_flag(TIF_ADDR32); set_thread_flag(TIF_ADDR32);
/* Mark the associated mm as containing 32-bit tasks. */ /* Mark the associated mm as containing 32-bit tasks. */
if (current->mm)
current->mm->context.ia32_compat = 1;
if (x32) { if (x32) {
clear_thread_flag(TIF_IA32); clear_thread_flag(TIF_IA32);
set_thread_flag(TIF_X32); set_thread_flag(TIF_X32);
if (current->mm)
current->mm->context.ia32_compat = TIF_X32;
current->personality &= ~READ_IMPLIES_EXEC; current->personality &= ~READ_IMPLIES_EXEC;
/* is_compat_task() uses the presence of the x32 /* is_compat_task() uses the presence of the x32
syscall bit flag to determine compat status */ syscall bit flag to determine compat status */
...@@ -426,6 +425,8 @@ void set_personality_ia32(bool x32) ...@@ -426,6 +425,8 @@ void set_personality_ia32(bool x32)
} else { } else {
set_thread_flag(TIF_IA32); set_thread_flag(TIF_IA32);
clear_thread_flag(TIF_X32); clear_thread_flag(TIF_X32);
if (current->mm)
current->mm->context.ia32_compat = TIF_IA32;
current->personality |= force_personality32; current->personality |= force_personality32;
/* Prepare the first "return" to user space */ /* Prepare the first "return" to user space */
current_thread_info()->status |= TS_COMPAT; current_thread_info()->status |= TS_COMPAT;
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/uprobes.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/errno.h> #include <linux/errno.h>
...@@ -136,6 +137,37 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, ...@@ -136,6 +137,37 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
return -1; return -1;
} }
static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
siginfo_t *info)
{
unsigned long siaddr;
int sicode;
switch (trapnr) {
default:
return SEND_SIG_PRIV;
case X86_TRAP_DE:
sicode = FPE_INTDIV;
siaddr = uprobe_get_trap_addr(regs);
break;
case X86_TRAP_UD:
sicode = ILL_ILLOPN;
siaddr = uprobe_get_trap_addr(regs);
break;
case X86_TRAP_AC:
sicode = BUS_ADRALN;
siaddr = 0;
break;
}
info->si_signo = signr;
info->si_errno = 0;
info->si_code = sicode;
info->si_addr = (void __user *)siaddr;
return info;
}
static void __kprobes static void __kprobes
do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
long error_code, siginfo_t *info) long error_code, siginfo_t *info)
...@@ -168,60 +200,42 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, ...@@ -168,60 +200,42 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
} }
#endif #endif
if (info) force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
force_sig_info(signr, info, tsk);
else
force_sig(signr, tsk);
} }
#define DO_ERROR(trapnr, signr, str, name) \ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ unsigned long trapnr, int signr)
{ \ {
enum ctx_state prev_state; \ enum ctx_state prev_state = exception_enter();
\ siginfo_t info;
prev_state = exception_enter(); \
if (notify_die(DIE_TRAP, str, regs, error_code, \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
trapnr, signr) == NOTIFY_STOP) { \ NOTIFY_STOP) {
exception_exit(prev_state); \ conditional_sti(regs);
return; \ do_trap(trapnr, signr, str, regs, error_code,
} \ fill_trap_info(regs, signr, trapnr, &info));
conditional_sti(regs); \ }
do_trap(trapnr, signr, str, regs, error_code, NULL); \
exception_exit(prev_state); \ exception_exit(prev_state);
} }
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ #define DO_ERROR(trapnr, signr, str, name) \
dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \ { \
siginfo_t info; \ do_error_trap(regs, error_code, str, trapnr, signr); \
enum ctx_state prev_state; \
\
info.si_signo = signr; \
info.si_errno = 0; \
info.si_code = sicode; \
info.si_addr = (void __user *)siaddr; \
prev_state = exception_enter(); \
if (notify_die(DIE_TRAP, str, regs, error_code, \
trapnr, signr) == NOTIFY_STOP) { \
exception_exit(prev_state); \
return; \
} \
conditional_sti(regs); \
do_trap(trapnr, signr, str, regs, error_code, &info); \
exception_exit(prev_state); \
} }
DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error)
DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow)
DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds)
DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op)
DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun)
DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
#endif #endif
DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* Runs on IST stack */ /* Runs on IST stack */
...@@ -305,7 +319,7 @@ do_general_protection(struct pt_regs *regs, long error_code) ...@@ -305,7 +319,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
pr_cont("\n"); pr_cont("\n");
} }
force_sig(SIGSEGV, tsk); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
exit: exit:
exception_exit(prev_state); exception_exit(prev_state);
} }
...@@ -488,7 +502,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) ...@@ -488,7 +502,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
* the correct behaviour even in the presence of the asynchronous * the correct behaviour even in the presence of the asynchronous
* IRQ13 behaviour * IRQ13 behaviour
*/ */
void math_error(struct pt_regs *regs, int error_code, int trapnr) static void math_error(struct pt_regs *regs, int error_code, int trapnr)
{ {
struct task_struct *task = current; struct task_struct *task = current;
siginfo_t info; siginfo_t info;
...@@ -518,7 +532,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) ...@@ -518,7 +532,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
task->thread.error_code = error_code; task->thread.error_code = error_code;
info.si_signo = SIGFPE; info.si_signo = SIGFPE;
info.si_errno = 0; info.si_errno = 0;
info.si_addr = (void __user *)regs->ip; info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
if (trapnr == X86_TRAP_MF) { if (trapnr == X86_TRAP_MF) {
unsigned short cwd, swd; unsigned short cwd, swd;
/* /*
...@@ -645,7 +659,7 @@ void math_state_restore(void) ...@@ -645,7 +659,7 @@ void math_state_restore(void)
*/ */
if (unlikely(restore_fpu_checking(tsk))) { if (unlikely(restore_fpu_checking(tsk))) {
drop_init_fpu(tsk); drop_init_fpu(tsk);
force_sig(SIGSEGV, tsk); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
return; return;
} }
......
此差异已折叠。
...@@ -102,6 +102,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u ...@@ -102,6 +102,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u
extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
extern bool __weak is_trap_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
...@@ -130,6 +131,9 @@ extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *r ...@@ -130,6 +131,9 @@ extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *r
#else /* !CONFIG_UPROBES */ #else /* !CONFIG_UPROBES */
struct uprobes_state { struct uprobes_state {
}; };
#define uprobe_get_trap_addr(regs) instruction_pointer(regs)
static inline int static inline int
uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
{ {
......
...@@ -127,7 +127,7 @@ struct xol_area { ...@@ -127,7 +127,7 @@ struct xol_area {
*/ */
static bool valid_vma(struct vm_area_struct *vma, bool is_register) static bool valid_vma(struct vm_area_struct *vma, bool is_register)
{ {
vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;
if (is_register) if (is_register)
flags |= VM_WRITE; flags |= VM_WRITE;
...@@ -279,18 +279,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t ...@@ -279,18 +279,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
* supported by that architecture then we need to modify is_trap_at_addr and * supported by that architecture then we need to modify is_trap_at_addr and
* uprobe_write_opcode accordingly. This would never be a problem for archs * uprobe_write_opcode accordingly. This would never be a problem for archs
* that have fixed length instructions. * that have fixed length instructions.
*/ *
/*
* uprobe_write_opcode - write the opcode at a given virtual address. * uprobe_write_opcode - write the opcode at a given virtual address.
* @mm: the probed process address space. * @mm: the probed process address space.
* @vaddr: the virtual address to store the opcode. * @vaddr: the virtual address to store the opcode.
* @opcode: opcode to be written at @vaddr. * @opcode: opcode to be written at @vaddr.
* *
* Called with mm->mmap_sem held (for read and with a reference to * Called with mm->mmap_sem held for write.
* mm).
*
* For mm @mm, write the opcode at @vaddr.
* Return 0 (success) or a negative errno. * Return 0 (success) or a negative errno.
*/ */
int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
...@@ -310,21 +305,25 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, ...@@ -310,21 +305,25 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
if (ret <= 0) if (ret <= 0)
goto put_old; goto put_old;
ret = anon_vma_prepare(vma);
if (ret)
goto put_old;
ret = -ENOMEM; ret = -ENOMEM;
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
if (!new_page) if (!new_page)
goto put_old; goto put_old;
__SetPageUptodate(new_page); if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
goto put_new;
__SetPageUptodate(new_page);
copy_highpage(new_page, old_page); copy_highpage(new_page, old_page);
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
ret = anon_vma_prepare(vma);
if (ret)
goto put_new;
ret = __replace_page(vma, vaddr, old_page, new_page); ret = __replace_page(vma, vaddr, old_page, new_page);
if (ret)
mem_cgroup_uncharge_page(new_page);
put_new: put_new:
page_cache_release(new_page); page_cache_release(new_page);
...@@ -1352,6 +1351,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) ...@@ -1352,6 +1351,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
} }
unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
{
struct uprobe_task *utask = current->utask;
if (unlikely(utask && utask->active_uprobe))
return utask->vaddr;
return instruction_pointer(regs);
}
/* /*
* Called with no locks held. * Called with no locks held.
* Called in context of a exiting or a exec-ing thread. * Called in context of a exiting or a exec-ing thread.
......
...@@ -1009,56 +1009,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) ...@@ -1009,56 +1009,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
} }
static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
{ {
bool done; bool done;
write_lock(&tu->filter.rwlock); write_lock(&tu->filter.rwlock);
if (event->hw.tp_target) { if (event->hw.tp_target) {
/* list_del(&event->hw.tp_list);
* event->parent != NULL means copy_process(), we can avoid
* uprobe_apply(). current->mm must be probed and we can rely
* on dup_mmap() which preserves the already installed bp's.
*
* attr.enable_on_exec means that exec/mmap will install the
* breakpoints we need.
*/
done = tu->filter.nr_systemwide || done = tu->filter.nr_systemwide ||
event->parent || event->attr.enable_on_exec || (event->hw.tp_target->flags & PF_EXITING) ||
uprobe_filter_event(tu, event); uprobe_filter_event(tu, event);
list_add(&event->hw.tp_list, &tu->filter.perf_events);
} else { } else {
tu->filter.nr_systemwide--;
done = tu->filter.nr_systemwide; done = tu->filter.nr_systemwide;
tu->filter.nr_systemwide++;
} }
write_unlock(&tu->filter.rwlock); write_unlock(&tu->filter.rwlock);
if (!done) if (!done)
uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
return 0; return 0;
} }
static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
{ {
bool done; bool done;
int err;
write_lock(&tu->filter.rwlock); write_lock(&tu->filter.rwlock);
if (event->hw.tp_target) { if (event->hw.tp_target) {
list_del(&event->hw.tp_list); /*
* event->parent != NULL means copy_process(), we can avoid
* uprobe_apply(). current->mm must be probed and we can rely
* on dup_mmap() which preserves the already installed bp's.
*
* attr.enable_on_exec means that exec/mmap will install the
* breakpoints we need.
*/
done = tu->filter.nr_systemwide || done = tu->filter.nr_systemwide ||
(event->hw.tp_target->flags & PF_EXITING) || event->parent || event->attr.enable_on_exec ||
uprobe_filter_event(tu, event); uprobe_filter_event(tu, event);
list_add(&event->hw.tp_list, &tu->filter.perf_events);
} else { } else {
tu->filter.nr_systemwide--;
done = tu->filter.nr_systemwide; done = tu->filter.nr_systemwide;
tu->filter.nr_systemwide++;
} }
write_unlock(&tu->filter.rwlock); write_unlock(&tu->filter.rwlock);
if (!done) err = 0;
uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); if (!done) {
err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
return 0; if (err)
uprobe_perf_close(tu, event);
}
return err;
} }
static bool uprobe_perf_filter(struct uprobe_consumer *uc, static bool uprobe_perf_filter(struct uprobe_consumer *uc,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册