diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d048cad9bcad4b8c48c62279d7b73eb0499305d7..433d2e5c98a7976951b134c90e4bf4984c8bfe18 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -759,6 +759,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +extern void set_task_blockstep(struct task_struct *task, bool on); + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index f3971bbcd1de3ee52ddda97b8bd02fbcc815aef1..8ff8be7835ab47eec29a13b013bff17722418b6b 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -42,10 +42,11 @@ struct arch_uprobe { }; struct arch_uprobe_task { - unsigned long saved_trap_nr; #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif + unsigned int saved_trap_nr; + unsigned int saved_tf; }; extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c346d116148866758ca5e602684eb91fcb77f219..cd3b2438a9800b8568b0d79788ef3fdb438aa1ca 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child) return 1; } +void set_task_blockstep(struct task_struct *task, bool on) +{ + unsigned long debugctl; + + /* + * Ensure irq/preemption can't change debugctl in between. + * Note also that both TIF_BLOCKSTEP and debugctl should + * be changed atomically wrt preemption. + * FIXME: this means that set/clear TIF_BLOCKSTEP is simply + * wrong if task != current, SIGKILL can wakeup the stopped + * tracee and set/clear can play with the running task, this + * can confuse the next __switch_to_xtra(). + */ + local_irq_disable(); + debugctl = get_debugctlmsr(); + if (on) { + debugctl |= DEBUGCTLMSR_BTF; + set_tsk_thread_flag(task, TIF_BLOCKSTEP); + } else { + debugctl &= ~DEBUGCTLMSR_BTF; + clear_tsk_thread_flag(task, TIF_BLOCKSTEP); + } + if (task == current) + update_debugctlmsr(debugctl); + local_irq_enable(); +} + /* * Enable single or block step. */ @@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block) * So no one should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ - if (enable_single_step(child) && block) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - set_tsk_thread_flag(child, TIF_BLOCKSTEP); - } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (enable_single_step(child) && block) + set_task_blockstep(child, true); + else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); } void user_enable_single_step(struct task_struct *child) @@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 36fd42091fa7283d71e189ddb55cf29a5e940790..9538f00827a9b25257debf63aaa438ef6e40d68b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -41,6 +41,9 @@ /* Adjust the return address of a call insn */ #define UPROBE_FIX_CALL 0x2 +/* Instruction will modify TF, don't change it */ +#define UPROBE_FIX_SETF 0x4 + #define UPROBE_FIX_RIP_AX 0x8000 #define UPROBE_FIX_RIP_CX 0x4000 @@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) insn_get_opcode(insn); /* should be a nop */ switch (OPCODE1(insn)) { + case 0x9d: + /* popf */ + auprobe->fixups |= UPROBE_FIX_SETF; + break; case 0xc3: /* ret/lret */ case 0xcb: case 0xc2: @@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * Skip these instructions as per the currently known x86 ISA. * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } */ -bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; @@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) } return false; } + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + bool ret = __skip_sstep(auprobe, regs); + if (ret && (regs->flags & X86_EFLAGS_TF)) + send_sig(SIGTRAP, current, 0); + return ret; +} + +void arch_uprobe_enable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + struct pt_regs *regs = task_pt_regs(task); + + autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); + + regs->flags |= X86_EFLAGS_TF; + if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) + set_task_blockstep(task, false); +} + +void arch_uprobe_disable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); + struct pt_regs *regs = task_pt_regs(task); + /* + * The state of TIF_BLOCKSTEP was not saved so we can get an extra + * SIGTRAP if we do not clear TF. We need to examine the opcode to + * make it right. + */ + if (unlikely(trapped)) { + if (!autask->saved_tf) + regs->flags &= ~X86_EFLAGS_TF; + } else { + if (autask->saved_tf) + send_sig(SIGTRAP, task, 0); + else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + regs->flags &= ~X86_EFLAGS_TF; + } +} diff --git a/include/linux/sched.h b/include/linux/sched.h index 3667c332e61de85b70e656cd54fd89b73365458b..255661d48834ba0a5e500ae23a1d175bf3f15a07 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -446,7 +446,8 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ -#define MMF_HAS_UPROBES 19 /* might have uprobes */ +#define MMF_HAS_UPROBES 19 /* has uprobes */ +#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 6d4fe79a1a6af97eb256fac3792a39b5c00c8050..e6f0331e3d456281c87370e8c0275961c04ca084 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -112,6 +112,8 @@ extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern void __weak arch_uprobe_enable_step(struct arch_uprobe *arch); +extern void __weak arch_uprobe_disable_step(struct arch_uprobe *arch); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1666632e6edfcfc07c867c91d5b7039dc84ce2f6..912ef48d28ab485d2948b46d6862fbed91be84b5 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -411,11 +411,10 @@ static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset) static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) { struct uprobe *uprobe; - unsigned long flags; - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); uprobe = __find_uprobe(inode, offset); - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); return uprobe; } @@ -462,12 +461,11 @@ static struct uprobe *__insert_uprobe(struct uprobe *uprobe) */ static struct uprobe *insert_uprobe(struct uprobe *uprobe) { - unsigned long flags; struct uprobe *u; - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); u = __insert_uprobe(uprobe); - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); /* For now assume that the instruction need not be single-stepped */ uprobe->flags |= UPROBE_SKIP_SSTEP; @@ -686,7 +684,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, set_bit(MMF_HAS_UPROBES, &mm->flags); ret = set_swbp(&uprobe->arch, mm, vaddr); - if (ret && first_uprobe) + if (!ret) + clear_bit(MMF_RECALC_UPROBES, &mm->flags); + else if (first_uprobe) clear_bit(MMF_HAS_UPROBES, &mm->flags); return ret; @@ -695,6 +695,11 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, static void remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { + /* can happen if uprobe_register() fails */ + if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) + return; + + set_bit(MMF_RECALC_UPROBES, &mm->flags); set_orig_insn(&uprobe->arch, mm, vaddr); } @@ -705,11 +710,9 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad */ static void delete_uprobe(struct uprobe *uprobe) { - unsigned long flags; - - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); rb_erase(&uprobe->rb_node, &uprobes_tree); - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); iput(uprobe->inode); put_uprobe(uprobe); atomic_dec(&uprobe_events); @@ -897,7 +900,8 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * } mutex_unlock(uprobes_hash(inode)); - put_uprobe(uprobe); + if (uprobe) + put_uprobe(uprobe); return ret; } @@ -967,7 +971,6 @@ static void build_probe_list(struct inode *inode, struct list_head *head) { loff_t min, max; - unsigned long flags; struct rb_node *n, *t; struct uprobe *u; @@ -975,7 +978,7 @@ static void build_probe_list(struct inode *inode, min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); n = find_node_in_range(inode, min, max); if (n) { for (t = n; t; t = rb_prev(t)) { @@ -993,7 +996,7 @@ static void build_probe_list(struct inode *inode, atomic_inc(&u->ref); } } - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); } /* @@ -1030,6 +1033,25 @@ int uprobe_mmap(struct vm_area_struct *vma) return 0; } +static bool +vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + loff_t min, max; + struct inode *inode; + struct rb_node *n; + + inode = vma->vm_file->f_mapping->host; + + min = vaddr_to_offset(vma, start); + max = min + (end - start) - 1; + + spin_lock(&uprobes_treelock); + n = find_node_in_range(inode, min, max); + spin_unlock(&uprobes_treelock); + + return !!n; +} + /* * Called in context of a munmap of a vma. */ @@ -1041,10 +1063,12 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ return; - if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags)) + if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) || + test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags)) return; - /* TODO: unmapping uprobe(s) will need more work */ + if (vma_has_uprobes(vma, start, end)) + set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags); } /* Slot allocation for XOL */ @@ -1150,8 +1174,11 @@ void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { newmm->uprobes_state.xol_area = NULL; - if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) + if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) { set_bit(MMF_HAS_UPROBES, &newmm->flags); + /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */ + set_bit(MMF_RECALC_UPROBES, &newmm->flags); + } } /* @@ -1369,6 +1396,25 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) return false; } +static void mmf_recalc_uprobes(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!valid_vma(vma, false)) + continue; + /* + * This is not strictly accurate, we can race with + * uprobe_unregister() and see the already removed + * uprobe if delete_uprobe() was not yet called. + */ + if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) + return; + } + + clear_bit(MMF_HAS_UPROBES, &mm->flags); +} + static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) { struct mm_struct *mm = current->mm; @@ -1390,11 +1436,24 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) } else { *is_swbp = -EFAULT; } + + if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags)) + mmf_recalc_uprobes(mm); up_read(&mm->mmap_sem); return uprobe; } +void __weak arch_uprobe_enable_step(struct arch_uprobe *arch) +{ + user_enable_single_step(current); +} + +void __weak arch_uprobe_disable_step(struct arch_uprobe *arch) +{ + user_disable_single_step(current); +} + /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. @@ -1441,7 +1500,7 @@ static void handle_swbp(struct pt_regs *regs) utask->state = UTASK_SSTEP; if (!pre_ssout(uprobe, regs, bp_vaddr)) { - user_enable_single_step(current); + arch_uprobe_enable_step(&uprobe->arch); return; } @@ -1477,10 +1536,10 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) else WARN_ON_ONCE(1); + arch_uprobe_disable_step(&uprobe->arch); put_uprobe(uprobe); utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; - user_disable_single_step(current); xol_free_insn_slot(current); spin_lock_irq(¤t->sighand->siglock);