提交 30bfa7b3 编写于 作者: A Andy Lutomirski 提交者: Thomas Gleixner

x86/entry: Restore traditional SYSENTER calling convention

It turns out that some Android versions hardcode the SYSENTER
calling convention.  This is buggy and will cause problems no
matter what the kernel does.  Nonetheless, we should try to
support it.

Credit goes to Linus for pointing out a clean way to handle
the SYSENTER/SYSCALL clobber differences while preserving
straightforward DWARF annotations.

I believe that the original offending Android commit was:

https://android.googlesource.com/platform%2Fbionic/+/7dc3684d7a2587e43e6d2a8e0e3f39bf759bd535Reported-by: NQiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: NAndy Lutomirski <luto@kernel.org>
Reviewed-and-tested-by: NBorislav Petkov <bp@alien8.de>
Cc: <mark.gross@intel.com>
Cc: Su Tao <tao.su@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: <frank.wang@intel.com>
Cc: <borun.fu@intel.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Mingwei Shi <mingwei.shi@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
上级 6a613ac6
...@@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) ...@@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
regs->ip = landing_pad; regs->ip = landing_pad;
/* /*
* Fetch ECX from where the vDSO stashed it. * Fetch EBP from where the vDSO stashed it.
* *
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention! * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
*/ */
...@@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) ...@@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
* Micro-optimization: the pointer we're following is explicitly * Micro-optimization: the pointer we're following is explicitly
* 32 bits, so it can't be out of range. * 32 bits, so it can't be out of range.
*/ */
__get_user(*(u32 *)&regs->cx, __get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp) (u32 __user __force *)(unsigned long)(u32)regs->sp)
#else #else
get_user(*(u32 *)&regs->cx, get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp) (u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif #endif
) { ) {
......
...@@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32) ...@@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp: sysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */ pushl $__USER_DS /* pt_regs->ss */
pushl %ecx /* pt_regs->sp (stashed in cx) */ pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */ pushfl /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */ pushl $__USER_CS /* pt_regs->cs */
......
...@@ -63,7 +63,7 @@ ENTRY(entry_SYSENTER_compat) ...@@ -63,7 +63,7 @@ ENTRY(entry_SYSENTER_compat)
/* Construct struct pt_regs on stack */ /* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */ pushq $__USER32_DS /* pt_regs->ss */
pushq %rcx /* pt_regs->sp */ pushq %rbp /* pt_regs->sp (stashed in bp) */
/* /*
* Push flags. This is nasty. First, interrupts are currently * Push flags. This is nasty. First, interrupts are currently
...@@ -82,14 +82,14 @@ ENTRY(entry_SYSENTER_compat) ...@@ -82,14 +82,14 @@ ENTRY(entry_SYSENTER_compat)
pushq %rdi /* pt_regs->di */ pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */ pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */ pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx (will be overwritten) */ pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */ pushq $-ENOSYS /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 = 0 */ pushq %r8 /* pt_regs->r8 = 0 */
pushq %r8 /* pt_regs->r9 = 0 */ pushq %r8 /* pt_regs->r9 = 0 */
pushq %r8 /* pt_regs->r10 = 0 */ pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */ pushq %r8 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */ pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp */ pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */ pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */ pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */ pushq %r8 /* pt_regs->r14 = 0 */
...@@ -179,7 +179,7 @@ ENTRY(entry_SYSCALL_compat) ...@@ -179,7 +179,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %rdi /* pt_regs->di */ pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */ pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */ pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx (will be overwritten) */ pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */ pushq $-ENOSYS /* pt_regs->ax */
xorq %r8,%r8 xorq %r8,%r8
pushq %r8 /* pt_regs->r8 = 0 */ pushq %r8 /* pt_regs->r8 = 0 */
...@@ -187,7 +187,7 @@ ENTRY(entry_SYSCALL_compat) ...@@ -187,7 +187,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %r8 /* pt_regs->r10 = 0 */ pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */ pushq %r8 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */ pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp */ pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */ pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */ pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */ pushq %r8 /* pt_regs->r14 = 0 */
......
...@@ -21,35 +21,67 @@ __kernel_vsyscall: ...@@ -21,35 +21,67 @@ __kernel_vsyscall:
/* /*
* Reshuffle regs so that all of any of the entry instructions * Reshuffle regs so that all of any of the entry instructions
* will preserve enough state. * will preserve enough state.
*
* A really nice entry sequence would be:
* pushl %edx
* pushl %ecx
* movl %esp, %ecx
*
* Unfortunately, naughty Android versions between July and December
* 2015 actually hardcode the traditional Linux SYSENTER entry
* sequence. That is severely broken for a number of reasons (ask
* anyone with an AMD CPU, for example). Nonetheless, we try to keep
* it working approximately as well as it ever worked.
*
* This link may eludicate some of the history:
* https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
* personally, I find it hard to understand what's going on there.
*
* Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE.
* Execute an indirect call to the address in the AT_SYSINFO auxv
* entry. That is the ONLY correct way to make a fast 32-bit system
* call on Linux. (Open-coding int $0x80 is also fine, but it's
* slow.)
*/ */
pushl %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0
pushl %edx pushl %edx
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx, 0 CFI_REL_OFFSET edx, 0
pushl %ecx pushl %ebp
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0 CFI_REL_OFFSET ebp, 0
movl %esp, %ecx
#define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter"
#define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall"
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \ ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \
"syscall", X86_FEATURE_SYSCALL32 SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32
#else #else
ALTERNATIVE "", "sysenter", X86_FEATURE_SEP ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP
#endif #endif
/* Enter using int $0x80 */ /* Enter using int $0x80 */
movl (%esp), %ecx
int $0x80 int $0x80
GLOBAL(int80_landing_pad) GLOBAL(int80_landing_pad)
/* Restore ECX and EDX in case they were clobbered. */ /*
popl %ecx * Restore EDX and ECX in case they were clobbered. EBP is not
CFI_RESTORE ecx * clobbered (the kernel restores it), but it's cleaner and
* probably faster to pop it than to adjust ESP using addl.
*/
popl %ebp
CFI_RESTORE ebp
CFI_ADJUST_CFA_OFFSET -4 CFI_ADJUST_CFA_OFFSET -4
popl %edx popl %edx
CFI_RESTORE edx CFI_RESTORE edx
CFI_ADJUST_CFA_OFFSET -4 CFI_ADJUST_CFA_OFFSET -4
popl %ecx
CFI_RESTORE ecx
CFI_ADJUST_CFA_OFFSET -4
ret ret
CFI_ENDPROC CFI_ENDPROC
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册