提交 2853d5fa 编写于 作者: L Linus Torvalds

Merge tag 'x86-splitlock-2020-03-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 splitlock updates from Thomas Gleixner:
 "Support for 'split lock' detection:

  Atomic operations (lock prefixed instructions) which span two cache
  lines have to acquire the global bus lock. This is at least 1k cycles
  slower than an atomic operation within a cache line and disrupts
  performance on other cores. Aside of performance disruption this is a
  unpriviledged form of DoS.

  Some newer CPUs have the capability to raise an #AC trap when such an
  operation is attempted. The detection is by default enabled in warning
  mode which will warn once when a user space application is caught. A
  command line option allows to disable the detection or to select fatal
  mode which will terminate offending applications with SIGBUS"

* tag 'x86-splitlock-2020-03-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/split_lock: Avoid runtime reads of the TEST_CTRL MSR
  x86/split_lock: Rework the initialization flow of split lock detection
  x86/split_lock: Enable split lock detection by kernel
...@@ -4711,6 +4711,28 @@ ...@@ -4711,6 +4711,28 @@
spia_pedr= spia_pedr=
spia_peddr= spia_peddr=
split_lock_detect=
[X86] Enable split lock detection
When enabled (and if hardware support is present), atomic
instructions that access data across cache line
boundaries will result in an alignment check exception.
off - not enabled
warn - the kernel will emit rate limited warnings
about applications triggering the #AC
exception. This mode is the default on CPUs
that supports split lock detection.
fatal - the kernel will send SIGBUS to applications
that trigger the #AC exception.
If an #AC exception is hit in the kernel or in
firmware (i.e. not while executing in user mode)
the kernel will oops in either "warn" or "fatal"
mode.
srcutree.counter_wrap_check [KNL] srcutree.counter_wrap_check [KNL]
Specifies how frequently to check for Specifies how frequently to check for
grace-period sequence counter wrap for the grace-period sequence counter wrap for the
......
...@@ -40,4 +40,16 @@ int mwait_usable(const struct cpuinfo_x86 *); ...@@ -40,4 +40,16 @@ int mwait_usable(const struct cpuinfo_x86 *);
unsigned int x86_family(unsigned int sig); unsigned int x86_family(unsigned int sig);
unsigned int x86_model(unsigned int sig); unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig); unsigned int x86_stepping(unsigned int sig);
#ifdef CONFIG_CPU_SUP_INTEL
extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
extern void switch_to_sld(unsigned long tifn);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
#else
static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
static inline void switch_to_sld(unsigned long tifn) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
{
return false;
}
#endif
#endif /* _ASM_X86_CPU_H */ #endif /* _ASM_X86_CPU_H */
...@@ -285,6 +285,7 @@ ...@@ -285,6 +285,7 @@
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
...@@ -368,6 +369,7 @@ ...@@ -368,6 +369,7 @@
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
/* /*
......
...@@ -41,6 +41,10 @@ ...@@ -41,6 +41,10 @@
/* Intel MSRs. Some also available on other CPUs */ /* Intel MSRs. Some also available on other CPUs */
#define MSR_TEST_CTRL 0x00000033
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
...@@ -70,6 +74,11 @@ ...@@ -70,6 +74,11 @@
*/ */
#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
#define MSR_IA32_CORE_CAPS 0x000000cf
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26) #define NHM_C1_AUTO_DEMOTE (1UL << 26)
......
...@@ -92,6 +92,7 @@ struct thread_info { ...@@ -92,6 +92,7 @@ struct thread_info {
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_SLD 18 /* Restore split lock detection on context switch */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */
...@@ -121,6 +122,7 @@ struct thread_info { ...@@ -121,6 +122,7 @@ struct thread_info {
#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32) #define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_SLD (1 << TIF_SLD)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
...@@ -139,7 +141,7 @@ struct thread_info { ...@@ -139,7 +141,7 @@ struct thread_info {
/* flags to check in __switch_to() */ /* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \ #define _TIF_WORK_CTXSW_BASE \
(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \ (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)
/* /*
* Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
......
...@@ -1224,6 +1224,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) ...@@ -1224,6 +1224,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
cpu_set_bug_bits(c); cpu_set_bug_bits(c);
cpu_set_core_cap_bits(c);
fpu__init_system(c); fpu__init_system(c);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#include <asm/microcode_intel.h> #include <asm/microcode_intel.h>
#include <asm/hwcap2.h> #include <asm/hwcap2.h>
#include <asm/elf.h> #include <asm/elf.h>
#include <asm/cpu_device_id.h>
#include <asm/cmdline.h>
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#include <linux/topology.h> #include <linux/topology.h>
...@@ -31,6 +33,20 @@ ...@@ -31,6 +33,20 @@
#include <asm/apic.h> #include <asm/apic.h>
#endif #endif
enum split_lock_detect_state {
sld_off = 0,
sld_warn,
sld_fatal,
};
/*
* Default to sld_off because most systems do not support split lock detection
* split_lock_setup() will switch this to sld_warn on systems that support
* split lock detect, unless there is a command line override.
*/
static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
static u64 msr_test_ctrl_cache __ro_after_init;
/* /*
* Processors which have self-snooping capability can handle conflicting * Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists * memory type across CPUs by snooping its own cache. However, there exists
...@@ -570,6 +586,8 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c) ...@@ -570,6 +586,8 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
} }
static void split_lock_init(void);
static void init_intel(struct cpuinfo_x86 *c) static void init_intel(struct cpuinfo_x86 *c)
{ {
early_init_intel(c); early_init_intel(c);
...@@ -684,6 +702,8 @@ static void init_intel(struct cpuinfo_x86 *c) ...@@ -684,6 +702,8 @@ static void init_intel(struct cpuinfo_x86 *c)
tsx_enable(); tsx_enable();
if (tsx_ctrl_state == TSX_CTRL_DISABLE) if (tsx_ctrl_state == TSX_CTRL_DISABLE)
tsx_disable(); tsx_disable();
split_lock_init();
} }
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
...@@ -945,3 +965,166 @@ static const struct cpu_dev intel_cpu_dev = { ...@@ -945,3 +965,166 @@ static const struct cpu_dev intel_cpu_dev = {
}; };
cpu_dev_register(intel_cpu_dev); cpu_dev_register(intel_cpu_dev);
#undef pr_fmt
#define pr_fmt(fmt) "x86/split lock detection: " fmt
static const struct {
const char *option;
enum split_lock_detect_state state;
} sld_options[] __initconst = {
{ "off", sld_off },
{ "warn", sld_warn },
{ "fatal", sld_fatal },
};
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt);
return len == arglen && !strncmp(arg, opt, len);
}
static bool split_lock_verify_msr(bool on)
{
u64 ctrl, tmp;
if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
return false;
if (on)
ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
else
ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
return false;
rdmsrl(MSR_TEST_CTRL, tmp);
return ctrl == tmp;
}
static void __init split_lock_setup(void)
{
enum split_lock_detect_state state = sld_warn;
char arg[20];
int i, ret;
if (!split_lock_verify_msr(false)) {
pr_info("MSR access failed: Disabled\n");
return;
}
ret = cmdline_find_option(boot_command_line, "split_lock_detect",
arg, sizeof(arg));
if (ret >= 0) {
for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
if (match_option(arg, ret, sld_options[i].option)) {
state = sld_options[i].state;
break;
}
}
}
switch (state) {
case sld_off:
pr_info("disabled\n");
return;
case sld_warn:
pr_info("warning about user-space split_locks\n");
break;
case sld_fatal:
pr_info("sending SIGBUS on user-space split_locks\n");
break;
}
rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
if (!split_lock_verify_msr(true)) {
pr_info("MSR access failed: Disabled\n");
return;
}
sld_state = state;
setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
}
/*
* MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
* is not implemented as one thread could undo the setting of the other
* thread immediately after dropping the lock anyway.
*/
static void sld_update_msr(bool on)
{
u64 test_ctrl_val = msr_test_ctrl_cache;
if (on)
test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
}
static void split_lock_init(void)
{
split_lock_verify_msr(sld_state != sld_off);
}
bool handle_user_split_lock(struct pt_regs *regs, long error_code)
{
if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
return false;
pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
current->comm, current->pid, regs->ip);
/*
* Disable the split lock detection for this task so it can make
* progress and set TIF_SLD so the detection is re-enabled via
* switch_to_sld() when the task is scheduled out.
*/
sld_update_msr(false);
set_tsk_thread_flag(current, TIF_SLD);
return true;
}
/*
* This function is called only when switching between tasks with
* different split-lock detection modes. It sets the MSR for the
* mode of the new task. This is right most of the time, but since
* the MSR is shared by hyperthreads on a physical core there can
* be glitches when the two threads need different modes.
*/
void switch_to_sld(unsigned long tifn)
{
sld_update_msr(!(tifn & _TIF_SLD));
}
#define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY}
/*
* The following processors have the split lock detection feature. But
* since they don't have the IA32_CORE_CAPABILITIES MSR, the feature cannot
* be enumerated. Enable it by family and model matching on these
* processors.
*/
static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_X),
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_L),
{}
};
void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
{
u64 ia32_core_caps = 0;
if (c->x86_vendor != X86_VENDOR_INTEL)
return;
if (cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) {
/* Enumerate features reported in IA32_CORE_CAPABILITIES MSR. */
rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
} else if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
/* Enumerate split lock detection by family and model. */
if (x86_match_cpu(split_lock_cpu_ids))
ia32_core_caps |= MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT;
}
if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
split_lock_setup();
}
...@@ -649,6 +649,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -649,6 +649,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
/* Enforce MSR update to ensure consistent state */ /* Enforce MSR update to ensure consistent state */
__speculation_ctrl_update(~tifn, tifn); __speculation_ctrl_update(~tifn, tifn);
} }
if ((tifp ^ tifn) & _TIF_SLD)
switch_to_sld(tifn);
} }
/* /*
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include <asm/traps.h> #include <asm/traps.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/fpu/internal.h> #include <asm/fpu/internal.h>
#include <asm/cpu.h>
#include <asm/cpu_entry_area.h> #include <asm/cpu_entry_area.h>
#include <asm/mce.h> #include <asm/mce.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
...@@ -242,7 +243,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, ...@@ -242,7 +243,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
return; return;
...@@ -288,9 +288,29 @@ DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overru ...@@ -288,9 +288,29 @@ DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overru
DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS) DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS)
DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present) DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present)
DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment) DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment)
DO_ERROR(X86_TRAP_AC, SIGBUS, BUS_ADRALN, NULL, "alignment check", alignment_check)
#undef IP #undef IP
dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)
{
char *str = "alignment check";
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP)
return;
if (!user_mode(regs))
die("Split lock detected\n", regs, error_code);
local_irq_enable();
if (handle_user_split_lock(regs, error_code))
return;
do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
error_code, BUS_ADRALN, NULL);
}
#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_VMAP_STACK
__visible void __noreturn handle_stack_overflow(const char *message, __visible void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs, struct pt_regs *regs,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册