diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 03390a1ef8e7f5e7661aefedba40f4dcca8aa409..2901d5df4366c0409e6d295d0eb1585aa1568ea9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -367,6 +367,7 @@ #define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index a338a6deb950bf86b8b6ca5419aaba59dec1f6b3..5e658ba2654a7f687886276b504657b4d035e22a 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -89,8 +89,15 @@ #define INTEL_FAM6_COMETLAKE 0xA5 #define INTEL_FAM6_COMETLAKE_L 0xA6 +#define INTEL_FAM6_ROCKETLAKE 0xA7 + #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F +/* Hybrid Core/Atom Processors */ + +#define INTEL_FAM6_LAKEFIELD 0x8A +#define INTEL_FAM6_ALDERLAKE 0x97 + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 03b7c4ca425a620880719e97bd2a6d2b09857730..68ba42fdd184c0b7b30132ff053c220574babd80 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -678,70 +678,6 @@ static inline unsigned int cpuid_edx(unsigned int op) return edx; } -/* - * This function forces the icache and prefetched instruction stream to - * catch up with reality in two very specific cases: - * - * a) Text was modified using one virtual address and is about to be executed - * from the same physical page at a different virtual address. - * - * b) Text was modified on a different CPU, may subsequently be - * executed on this CPU, and you want to make sure the new version - * gets executed. This generally means you're calling this in a IPI. - * - * If you're calling this for a different reason, you're probably doing - * it wrong. - */ -static inline void sync_core(void) -{ - /* - * There are quite a few ways to do this. IRET-to-self is nice - * because it works on every CPU, at any CPL (so it's compatible - * with paravirtualization), and it never exits to a hypervisor. - * The only down sides are that it's a bit slow (it seems to be - * a bit more than 2x slower than the fastest options) and that - * it unmasks NMIs. The "push %cs" is needed because, in - * paravirtual environments, __KERNEL_CS may not be a valid CS - * value when we do IRET directly. - * - * In case NMI unmasking or performance ever becomes a problem, - * the next best option appears to be MOV-to-CR2 and an - * unconditional jump. That sequence also works on all CPUs, - * but it will fault at CPL3 (i.e. Xen PV). - * - * CPUID is the conventional way, but it's nasty: it doesn't - * exist on some 486-like CPUs, and it usually exits to a - * hypervisor. - * - * Like all of Linux's memory ordering operations, this is a - * compiler barrier as well. - */ -#ifdef CONFIG_X86_32 - asm volatile ( - "pushfl\n\t" - "pushl %%cs\n\t" - "pushl $1f\n\t" - "iret\n\t" - "1:" - : ASM_CALL_CONSTRAINT : : "memory"); -#else - unsigned int tmp; - - asm volatile ( - "mov %%ss, %0\n\t" - "pushq %q0\n\t" - "pushq %%rsp\n\t" - "addq $8, (%%rsp)\n\t" - "pushfq\n\t" - "mov %%cs, %0\n\t" - "pushq %q0\n\t" - "pushq $1f\n\t" - "iretq\n\t" - "1:" - : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); -#endif -} - extern void select_idle_routine(const struct cpuinfo_x86 *c); extern void amd_e400_c1e_apic_setup(void); diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index eb8e781c4353946f0dd4aca6363a88411896ea2c..59a3e13204c348d1cabc1c63dc5e4b202211b668 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -234,7 +234,6 @@ static inline void clwb(volatile void *__p) #define nop() asm volatile ("nop") - #endif /* __KERNEL__ */ #endif /* _ASM_X86_SPECIAL_INSNS_H */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 9804a7957f4e99fa547923995562c415e451c573..7fb482f0f25b062c073294ef72fd6936bf36c41c 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -90,6 +90,15 @@ static __always_inline void boot_init_stack_canary(void) #endif } +static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle) +{ +#ifdef CONFIG_X86_64 + per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary; +#else + per_cpu(stack_canary.canary, cpu) = idle->stack_canary; +#endif +} + static inline void setup_stack_canary_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -119,6 +128,9 @@ static inline void load_stack_canary_segment(void) static inline void setup_stack_canary_segment(int cpu) { } +static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle) +{ } + static inline void load_stack_canary_segment(void) { #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h index c67caafd3381722dd6bb76dac8c5639f6bf2bba8..fdb5b356e59b07c215738667c8090c16c10aaebf 100644 --- a/arch/x86/include/asm/sync_core.h +++ b/arch/x86/include/asm/sync_core.h @@ -6,6 +6,78 @@ #include <asm/processor.h> #include <asm/cpufeature.h> +#ifdef CONFIG_X86_32 +static inline void iret_to_self(void) +{ + asm volatile ( + "pushfl\n\t" + "pushl %%cs\n\t" + "pushl $1f\n\t" + "iret\n\t" + "1:" + : ASM_CALL_CONSTRAINT : : "memory"); +} +#else +static inline void iret_to_self(void) +{ + unsigned int tmp; + + asm volatile ( + "mov %%ss, %0\n\t" + "pushq %q0\n\t" + "pushq %%rsp\n\t" + "addq $8, (%%rsp)\n\t" + "pushfq\n\t" + "mov %%cs, %0\n\t" + "pushq %q0\n\t" + "pushq $1f\n\t" + "iretq\n\t" + "1:" + : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); +} +#endif /* CONFIG_X86_32 */ + +/* + * This function forces the icache and prefetched instruction stream to + * catch up with reality in two very specific cases: + * + * a) Text was modified using one virtual address and is about to be executed + * from the same physical page at a different virtual address. + * + * b) Text was modified on a different CPU, may subsequently be + * executed on this CPU, and you want to make sure the new version + * gets executed. This generally means you're calling this in a IPI. + * + * If you're calling this for a different reason, you're probably doing + * it wrong. + */ +static inline void sync_core(void) +{ + /* + * There are quite a few ways to do this. IRET-to-self is nice + * because it works on every CPU, at any CPL (so it's compatible + * with paravirtualization), and it never exits to a hypervisor. + * The only down sides are that it's a bit slow (it seems to be + * a bit more than 2x slower than the fastest options) and that + * it unmasks NMIs. The "push %cs" is needed because, in + * paravirtual environments, __KERNEL_CS may not be a valid CS + * value when we do IRET directly. + * + * In case NMI unmasking or performance ever becomes a problem, + * the next best option appears to be MOV-to-CR2 and an + * unconditional jump. That sequence also works on all CPUs, + * but it will fault at CPL3 (i.e. Xen PV). + * + * CPUID is the conventional way, but it's nasty: it doesn't + * exist on some 486-like CPUs, and it usually exits to a + * hypervisor. + * + * Like all of Linux's memory ordering operations, this is a + * compiler barrier as well. + */ + iret_to_self(); +} + /* * Ensure that a core serializing instruction is issued before returning * to user-mode. x86 implements return to user-space through sysexit, diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 20e07feb4064d16881bea8ff836fd7af0ff58b5d..3abc1316f91b8b6aafc3e802a3ae580b720036aa 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -16,6 +16,7 @@ #include <linux/kprobes.h> #include <linux/mmu_context.h> #include <linux/bsearch.h> +#include <linux/sync_core.h> #include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/sections.h> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0b71970d2d3d2a65848a6377d6026d9ea45929be..7beaefa9d198c08e03414509257693444c262569 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -763,10 +763,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } /* - * If enhanced IBRS is enabled or SMT impossible, STIBP is not + * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not * required. */ - if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + if (!boot_cpu_has(X86_FEATURE_STIBP) || + !smt_possible || + spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; /* @@ -778,12 +780,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) mode = SPECTRE_V2_USER_STRICT_PREFERRED; - /* - * If STIBP is not available, clear the STIBP mode. - */ - if (!boot_cpu_has(X86_FEATURE_STIBP)) - mode = SPECTRE_V2_USER_NONE; - spectre_v2_user_stibp = mode; set_mode: @@ -1270,7 +1266,6 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) * Indirect branch speculation is always disabled in strict * mode. It can neither be enabled if it was force-disabled * by a previous prctl call. - */ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0ab48f1cdf848faf6e9e6429d85f4dc815324f68..b6b7b38dff5fd73bc7a9e9266f56c7d7ca457698 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1156,6 +1156,8 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1), {} }; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 14e4b4d17ee5bcbe909f47c5db3c550e9c07c08b..9246595c07d736d095ef73615a6dcc5bb8811575 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -42,6 +42,7 @@ #include <linux/export.h> #include <linux/jump_label.h> #include <linux/set_memory.h> +#include <linux/sync_core.h> #include <linux/task_work.h> #include <linux/hardirq.h> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 518ac6bf752e0da9dd016633ddb36f6c0a15a293..27aa04a9570211c0d89e4d7a3112b65c286198dc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -51,7 +51,6 @@ #include <linux/err.h> #include <linux/nmi.h> #include <linux/tboot.h> -#include <linux/stackprotector.h> #include <linux/gfp.h> #include <linux/cpuidle.h> #include <linux/numa.h> @@ -81,6 +80,7 @@ #include <asm/cpu_device_id.h> #include <asm/spec-ctrl.h> #include <asm/hw_irq.h> +#include <asm/stackprotector.h> /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); @@ -260,21 +260,10 @@ static void notrace start_secondary(void *unused) /* enable local interrupts */ local_irq_enable(); - /* to prevent fake stack check failure in clock setup */ - boot_init_stack_canary(); - x86_cpuinit.setup_percpu_clockev(); wmb(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); - - /* - * Prevent tail call to cpu_startup_entry() because the stack protector - * guard has been changed a couple of function calls up, in - * boot_init_stack_canary() and must not be checked before tail calling - * another function. - */ - prevent_tail_call_optimization(); } /** @@ -1012,6 +1001,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) alternatives_enable_smp(); per_cpu(current_task, cpu) = idle; + cpu_init_stack_canary(cpu, idle); /* Initialize the interrupt stack(s) */ ret = irq_init_percpu_irqstack(cpu); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 171aff1b11f24e38923b529a060d88f60247225b..9ea598dcc132fc4c7ef02cd23f918892082d65b6 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -92,9 +92,7 @@ static void cpu_bringup(void) asmlinkage __visible void cpu_bringup_and_idle(void) { cpu_bringup(); - boot_init_stack_canary(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); - prevent_tail_call_optimization(); } void xen_smp_intr_free_pv(unsigned int cpu) diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index b1521112dbbde46683d1ee3a2fbe1a5374eabc67..723825524ea0cb333949f92a8b988f3640683e6f 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -20,6 +20,7 @@ #include <linux/io.h> #include <linux/uaccess.h> #include <linux/security.h> +#include <linux/sync_core.h> #include <linux/prefetch.h> #include "gru.h" #include "grutables.h" diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c index f7224f90f4131f68636faee891e1f3b6fba42208..1d75d5e540bc992b35bf0274692ef889fb14e7bb 100644 --- a/drivers/misc/sgi-gru/gruhandles.c +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -16,6 +16,7 @@ #define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) #define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq) #else +#include <linux/sync_core.h> #include <asm/tsc.h> #define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) #define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz) diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 0197441a1eae7c28dea29b98449d9c33ded13030..f6e600bfac5d23b9f8c8c28d747436227af6fd38 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -16,6 +16,7 @@ #include <linux/miscdevice.h> #include <linux/proc_fs.h> #include <linux/interrupt.h> +#include <linux/sync_core.h> #include <linux/uaccess.h> #include <linux/delay.h> #include <linux/export.h>