提交 650e5455 编写于 作者: L Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "A handful of x86 fixes:

   - a syscall ABI fix, fixing an Android breakage
   - a Xen PV guest fix relating to the RTC device, causing a
     non-working console
   - a Xen guest syscall stack frame fix
   - an MCE hotplug CPU crash fix"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/numachip: Fix NumaConnect2 MMCFG PCI access
  x86/entry: Restore traditional SYSENTER calling convention
  x86/entry: Fix some comments
  x86/paravirt: Prevent rtc_cmos platform device init on PV guests
  x86/xen: Avoid fast syscall path for Xen PV guests
  x86/mce: Ensure offline CPUs don't participate in rendezvous process
......@@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
regs->ip = landing_pad;
/*
* Fetch ECX from where the vDSO stashed it.
* Fetch EBP from where the vDSO stashed it.
*
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
*/
......@@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
* Micro-optimization: the pointer we're following is explicitly
* 32 bits, so it can't be out of range.
*/
__get_user(*(u32 *)&regs->cx,
__get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#else
get_user(*(u32 *)&regs->cx,
get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif
) {
......
......@@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
pushl %ecx /* pt_regs->cx */
pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */
......@@ -308,8 +308,9 @@ sysenter_past_esp:
movl %esp, %eax
call do_fast_syscall_32
testl %eax, %eax
jz .Lsyscall_32_done
/* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
/* Opportunistic SYSEXIT */
TRACE_IRQS_ON /* User mode traces as IRQs on. */
......
......@@ -63,7 +63,7 @@ ENTRY(entry_SYSENTER_compat)
/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
pushq %rcx /* pt_regs->sp */
pushq %rbp /* pt_regs->sp (stashed in bp) */
/*
* Push flags. This is nasty. First, interrupts are currently
......@@ -82,14 +82,14 @@ ENTRY(entry_SYSENTER_compat)
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx (will be overwritten) */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 = 0 */
pushq %r8 /* pt_regs->r9 = 0 */
pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */
......@@ -121,8 +121,9 @@ sysenter_flags_fixed:
movq %rsp, %rdi
call do_fast_syscall_32
testl %eax, %eax
jz .Lsyscall_32_done
/* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
jmp sysret32_from_system_call
sysenter_fix_flags:
......@@ -178,7 +179,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx (will be overwritten) */
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
xorq %r8,%r8
pushq %r8 /* pt_regs->r8 = 0 */
......@@ -186,7 +187,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */
......@@ -200,8 +201,9 @@ ENTRY(entry_SYSCALL_compat)
movq %rsp, %rdi
call do_fast_syscall_32
testl %eax, %eax
jz .Lsyscall_32_done
/* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
/* Opportunistic SYSRET */
sysret32_from_system_call:
......
/*
* Code for the vDSO. This version uses the old int $0x80 method.
* AT_SYSINFO entry point
*/
#include <asm/dwarf2.h>
......@@ -21,35 +21,67 @@ __kernel_vsyscall:
/*
* Reshuffle regs so that all of any of the entry instructions
* will preserve enough state.
*
* A really nice entry sequence would be:
* pushl %edx
* pushl %ecx
* movl %esp, %ecx
*
* Unfortunately, naughty Android versions between July and December
* 2015 actually hardcode the traditional Linux SYSENTER entry
* sequence. That is severely broken for a number of reasons (ask
* anyone with an AMD CPU, for example). Nonetheless, we try to keep
* it working approximately as well as it ever worked.
*
* This link may eludicate some of the history:
* https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
* personally, I find it hard to understand what's going on there.
*
* Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE.
* Execute an indirect call to the address in the AT_SYSINFO auxv
* entry. That is the ONLY correct way to make a fast 32-bit system
* call on Linux. (Open-coding int $0x80 is also fine, but it's
* slow.)
*/
pushl %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0
pushl %edx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx, 0
pushl %ecx
pushl %ebp
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0
movl %esp, %ecx
CFI_REL_OFFSET ebp, 0
#define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter"
#define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall"
#ifdef CONFIG_X86_64
/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
"syscall", X86_FEATURE_SYSCALL32
ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \
SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32
#else
ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP
#endif
/* Enter using int $0x80 */
movl (%esp), %ecx
int $0x80
GLOBAL(int80_landing_pad)
/* Restore ECX and EDX in case they were clobbered. */
popl %ecx
CFI_RESTORE ecx
/*
* Restore EDX and ECX in case they were clobbered. EBP is not
* clobbered (the kernel restores it), but it's cleaner and
* probably faster to pop it than to adjust ESP using addl.
*/
popl %ebp
CFI_RESTORE ebp
CFI_ADJUST_CFA_OFFSET -4
popl %edx
CFI_RESTORE edx
CFI_ADJUST_CFA_OFFSET -4
popl %ecx
CFI_RESTORE ecx
CFI_ADJUST_CFA_OFFSET -4
ret
CFI_ENDPROC
......
......@@ -216,6 +216,7 @@
#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
......
......@@ -19,6 +19,12 @@ static inline int paravirt_enabled(void)
return pv_info.paravirt_enabled;
}
static inline int paravirt_has_feature(unsigned int feature)
{
WARN_ON_ONCE(!pv_info.paravirt_enabled);
return (pv_info.features & feature);
}
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
......
......@@ -70,9 +70,14 @@ struct pv_info {
#endif
int paravirt_enabled;
unsigned int features; /* valid only if paravirt_enabled is set */
const char *name;
};
#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
/* Supported features */
#define PV_SUPPORTED_RTC (1<<0)
struct pv_init_ops {
/*
* Patch may replace one of the defined code sequences with
......
......@@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void)
#else
#define __cpuid native_cpuid
#define paravirt_enabled() 0
#define paravirt_has(x) 0
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
......
......@@ -193,20 +193,17 @@ static int __init numachip_system_init(void)
case 1:
init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
numachip_apic_icr_write = numachip1_apic_icr_write;
x86_init.pci.arch_init = pci_numachip_init;
break;
case 2:
init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE);
numachip_apic_icr_write = numachip2_apic_icr_write;
/* Use MCFG config cycles rather than locked CF8 cycles */
raw_pci_ops = &pci_mmcfg;
break;
default:
return 0;
}
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
x86_init.pci.arch_init = pci_numachip_init;
return 0;
}
......
......@@ -999,6 +999,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int flags = MF_ACTION_REQUIRED;
int lmce = 0;
/* If this CPU is offline, just bail out. */
if (cpu_is_offline(smp_processor_id())) {
u64 mcgstatus;
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
if (mcgstatus & MCG_STATUS_RIPV) {
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
return;
}
}
ist_enter(regs);
this_cpu_inc(mce_exception_count);
......
......@@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void)
}
#endif
if (paravirt_enabled() && !paravirt_has(RTC))
return -ENODEV;
platform_device_register(&rtc_device);
dev_info(&rtc_device.dev,
"registered platform RTC device (no PNP device found)\n");
......
......@@ -1414,6 +1414,7 @@ __init void lguest_init(void)
pv_info.kernel_rpl = 1;
/* Everyone except Xen runs with this set. */
pv_info.shared_kernel_pmd = 1;
pv_info.features = 0;
/*
* We set up all the lguest overrides for sensitive operations. These
......
......@@ -1192,7 +1192,7 @@ static const struct pv_info xen_info __initconst = {
#ifdef CONFIG_X86_64
.extra_user_64bit_cs = FLAT_USER_CS64,
#endif
.features = 0,
.name = "Xen",
};
......@@ -1535,6 +1535,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */
pv_info = xen_info;
if (xen_initial_domain())
pv_info.features |= PV_SUPPORTED_RTC;
pv_init_ops = xen_init_ops;
pv_apic_ops = xen_apic_ops;
if (!xen_pvh_domain()) {
......@@ -1886,8 +1888,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
static void xen_set_cpu_features(struct cpuinfo_x86 *c)
{
if (xen_pv_domain())
if (xen_pv_domain()) {
clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
set_cpu_cap(c, X86_FEATURE_XENPV);
}
}
const struct hypervisor_x86 x86_hyper_xen = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册