提交 b57c0b51 编写于 作者: I Ingo Molnar

Merge tag 'pr-20150201-x86-entry' of...

Merge tag 'pr-20150201-x86-entry' of git://git.kernel.org/pub/scm/linux/kernel/git/luto/linux into x86/asm

Pull "x86: Entry cleanups and a bugfix for 3.20" from Andy Lutomirski:

 " This fixes a bug in the RCU code I added in ist_enter.  It also includes
   the sysret stuff discussed here:

     http://lkml.kernel.org/g/cover.1421453410.git.luto%40amacapital.net "
Signed-off-by: NIngo Molnar <mingo@kernel.org>
...@@ -361,15 +361,12 @@ system_call_fastpath: ...@@ -361,15 +361,12 @@ system_call_fastpath:
* Has incomplete stack frame and undefined top of stack. * Has incomplete stack frame and undefined top of stack.
*/ */
ret_from_sys_call: ret_from_sys_call:
movl $_TIF_ALLWORK_MASK,%edi testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
/* edi: flagmask */ jnz int_ret_from_sys_call_fixup /* Go the the slow path */
sysret_check:
LOCKDEP_SYS_EXIT LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_NONE) DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF TRACE_IRQS_OFF
movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
andl %edi,%edx
jnz sysret_careful
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
/* /*
* sysretq will re-enable interrupts: * sysretq will re-enable interrupts:
...@@ -383,49 +380,10 @@ sysret_check: ...@@ -383,49 +380,10 @@ sysret_check:
USERGS_SYSRET64 USERGS_SYSRET64
CFI_RESTORE_STATE CFI_RESTORE_STATE
/* Handle reschedules */
/* edx: work, edi: workmask */
sysret_careful:
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
SCHEDULE_USER
popq_cfi %rdi
jmp sysret_check
/* Handle a signal */ int_ret_from_sys_call_fixup:
sysret_signal:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
#ifdef CONFIG_AUDITSYSCALL
bt $TIF_SYSCALL_AUDIT,%edx
jc sysret_audit
#endif
/*
* We have a signal, or exit tracing or single-step.
* These all wind up with the iret return path anyway,
* so just join that path right now.
*/
FIXUP_TOP_OF_STACK %r11, -ARGOFFSET FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
jmp int_check_syscall_exit_work jmp int_ret_from_sys_call
#ifdef CONFIG_AUDITSYSCALL
/*
* Return fast path for syscall audit. Call __audit_syscall_exit()
* directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
* masked off.
*/
sysret_audit:
movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
setbe %al /* 1 if so, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
call __audit_syscall_exit
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
jmp sysret_check
#endif /* CONFIG_AUDITSYSCALL */
/* Do syscall tracing */ /* Do syscall tracing */
tracesys: tracesys:
...@@ -794,6 +752,60 @@ retint_swapgs: /* return to user-space */ ...@@ -794,6 +752,60 @@ retint_swapgs: /* return to user-space */
*/ */
DISABLE_INTERRUPTS(CLBR_ANY) DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_IRETQ TRACE_IRQS_IRETQ
/*
* Try to use SYSRET instead of IRET if we're returning to
* a completely clean 64-bit userspace context.
*/
movq (RCX-R11)(%rsp), %rcx
cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */
jne opportunistic_sysret_failed
/*
* On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
* in kernel space. This essentially lets the user take over
* the kernel, since userspace controls RSP. It's not worth
* testing for canonicalness exactly -- this check detects any
* of the 17 high bits set, which is true for non-canonical
* or kernel addresses. (This will pessimize vsyscall=native.
* Big deal.)
*
* If virtual addresses ever become wider, this will need
* to be updated to remain correct on both old and new CPUs.
*/
.ifne __VIRTUAL_MASK_SHIFT - 47
.error "virtual address width changed -- sysret checks need update"
.endif
shr $__VIRTUAL_MASK_SHIFT, %rcx
jnz opportunistic_sysret_failed
cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */
jne opportunistic_sysret_failed
movq (R11-ARGOFFSET)(%rsp), %r11
cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */
jne opportunistic_sysret_failed
testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */
jnz opportunistic_sysret_failed
/* nothing to check for RSP */
cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */
jne opportunistic_sysret_failed
/*
* We win! This label is here just for ease of understanding
* perf profiles. Nothing jumps here.
*/
irq_return_via_sysret:
CFI_REMEMBER_STATE
RESTORE_ARGS 1,8,1
movq (RSP-RIP)(%rsp),%rsp
USERGS_SYSRET64
CFI_RESTORE_STATE
opportunistic_sysret_failed:
SWAPGS SWAPGS
jmp restore_args jmp restore_args
......
...@@ -110,15 +110,11 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) ...@@ -110,15 +110,11 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
enum ctx_state ist_enter(struct pt_regs *regs) enum ctx_state ist_enter(struct pt_regs *regs)
{ {
/* enum ctx_state prev_state;
* We are atomic because we're on the IST stack (or we're on x86_32,
* in which case we still shouldn't schedule.
*/
preempt_count_add(HARDIRQ_OFFSET);
if (user_mode_vm(regs)) { if (user_mode_vm(regs)) {
/* Other than that, we're just an exception. */ /* Other than that, we're just an exception. */
return exception_enter(); prev_state = exception_enter();
} else { } else {
/* /*
* We might have interrupted pretty much anything. In * We might have interrupted pretty much anything. In
...@@ -127,12 +123,27 @@ enum ctx_state ist_enter(struct pt_regs *regs) ...@@ -127,12 +123,27 @@ enum ctx_state ist_enter(struct pt_regs *regs)
* but we need to notify RCU. * but we need to notify RCU.
*/ */
rcu_nmi_enter(); rcu_nmi_enter();
return IN_KERNEL; /* the value is irrelevant. */ prev_state = IN_KERNEL; /* the value is irrelevant. */
} }
/*
* We are atomic because we're on the IST stack (or we're on x86_32,
* in which case we still shouldn't schedule).
*
* This must be after exception_enter(), because exception_enter()
* won't do anything if in_interrupt() returns true.
*/
preempt_count_add(HARDIRQ_OFFSET);
/* This code is a bit fragile. Test it. */
rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work");
return prev_state;
} }
void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
{ {
/* Must be before exception_exit. */
preempt_count_sub(HARDIRQ_OFFSET); preempt_count_sub(HARDIRQ_OFFSET);
if (user_mode_vm(regs)) if (user_mode_vm(regs))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册