提交 e4b5c954 编写于 作者: P Peter Zijlstra 提交者: Zheng Zengkai

intel_idle: Disable IBRS during long idle

stable inclusion
from stable-v5.10.133
commit 55bba093fd91a76971134e3a4e3576e536c08f5c
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5PTAS
CVE: CVE-2022-29900,CVE-2022-23816,CVE-2022-29901

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=55bba093fd91a76971134e3a4e3576e536c08f5c

--------------------------------

commit bf5835bc upstream.

Having IBRS enabled while the SMT sibling is idle unnecessarily slows
down the running sibling. OTOH, disabling IBRS around idle takes two
MSR writes, which will increase the idle latency.

Therefore, only disable IBRS around deeper idle states. Shallow idle
states are bounded by the tick in duration, since NOHZ is not allowed
for them by virtue of their short target residency.

Only do this for mwait-driven idle, since that keeps interrupts disabled
across idle, which makes disabling IBRS vs IRQ-entry a non-issue.

Note: C6 is a random threshold, most importantly C1 probably shouldn't
disable IBRS, benchmarking needed.
Suggested-by: NTim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: NBorislav Petkov <bp@suse.de>
Reviewed-by: NJosh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: NBorislav Petkov <bp@suse.de>
[cascardo: no CPUIDLE_FLAG_IRQ_ENABLE]
Signed-off-by: NThadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: NBen Hutchings <ben@decadent.org.uk>
Signed-off-by: NGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: NLin Yujun <linyujun809@huawei.com>
Reviewed-by: NZhang Jianhua <chris.zjh@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 2c373cc2
...@@ -263,6 +263,7 @@ static inline void indirect_branch_prediction_barrier(void) ...@@ -263,6 +263,7 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */ /* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base; extern u64 x86_spec_ctrl_base;
extern void write_spec_ctrl_current(u64 val, bool force); extern void write_spec_ctrl_current(u64 val, bool force);
extern u64 spec_ctrl_current(void);
/* /*
* With retpoline, we must use IBRS to restrict branch prediction * With retpoline, we must use IBRS to restrict branch prediction
......
...@@ -78,6 +78,12 @@ void write_spec_ctrl_current(u64 val, bool force) ...@@ -78,6 +78,12 @@ void write_spec_ctrl_current(u64 val, bool force)
wrmsrl(MSR_IA32_SPEC_CTRL, val); wrmsrl(MSR_IA32_SPEC_CTRL, val);
} }
u64 spec_ctrl_current(void)
{
return this_cpu_read(x86_spec_ctrl_current);
}
EXPORT_SYMBOL_GPL(spec_ctrl_current);
/* /*
* The vendor and possibly platform specific bits which can be modified in * The vendor and possibly platform specific bits which can be modified in
* x86_spec_ctrl_base. * x86_spec_ctrl_base.
......
...@@ -47,11 +47,13 @@ ...@@ -47,11 +47,13 @@
#include <linux/tick.h> #include <linux/tick.h>
#include <trace/events/power.h> #include <trace/events/power.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/smt.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/moduleparam.h> #include <linux/moduleparam.h>
#include <asm/cpu_device_id.h> #include <asm/cpu_device_id.h>
#include <asm/intel-family.h> #include <asm/intel-family.h>
#include <asm/nospec-branch.h>
#include <asm/mwait.h> #include <asm/mwait.h>
#include <asm/msr.h> #include <asm/msr.h>
...@@ -105,6 +107,12 @@ static unsigned int mwait_substates __initdata; ...@@ -105,6 +107,12 @@ static unsigned int mwait_substates __initdata;
*/ */
#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
/*
* Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
* above.
*/
#define CPUIDLE_FLAG_IBRS BIT(16)
/* /*
* MWAIT takes an 8-bit "hint" in EAX "suggesting" * MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble) * the C-state (top nibble) and sub-state (bottom nibble)
...@@ -144,6 +152,24 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, ...@@ -144,6 +152,24 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
return index; return index;
} }
static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
bool smt_active = sched_smt_active();
u64 spec_ctrl = spec_ctrl_current();
int ret;
if (smt_active)
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
ret = intel_idle(dev, drv, index);
if (smt_active)
wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
return ret;
}
/** /**
* intel_idle_s2idle - Ask the processor to enter the given idle state. * intel_idle_s2idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU. * @dev: cpuidle device of the target CPU.
...@@ -665,7 +691,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { ...@@ -665,7 +691,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{ {
.name = "C6", .name = "C6",
.desc = "MWAIT 0x20", .desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 85, .exit_latency = 85,
.target_residency = 200, .target_residency = 200,
.enter = &intel_idle, .enter = &intel_idle,
...@@ -673,7 +699,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { ...@@ -673,7 +699,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{ {
.name = "C7s", .name = "C7s",
.desc = "MWAIT 0x33", .desc = "MWAIT 0x33",
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 124, .exit_latency = 124,
.target_residency = 800, .target_residency = 800,
.enter = &intel_idle, .enter = &intel_idle,
...@@ -681,7 +707,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { ...@@ -681,7 +707,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{ {
.name = "C8", .name = "C8",
.desc = "MWAIT 0x40", .desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 200, .exit_latency = 200,
.target_residency = 800, .target_residency = 800,
.enter = &intel_idle, .enter = &intel_idle,
...@@ -689,7 +715,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { ...@@ -689,7 +715,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{ {
.name = "C9", .name = "C9",
.desc = "MWAIT 0x50", .desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 480, .exit_latency = 480,
.target_residency = 5000, .target_residency = 5000,
.enter = &intel_idle, .enter = &intel_idle,
...@@ -697,7 +723,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { ...@@ -697,7 +723,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{ {
.name = "C10", .name = "C10",
.desc = "MWAIT 0x60", .desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 890, .exit_latency = 890,
.target_residency = 5000, .target_residency = 5000,
.enter = &intel_idle, .enter = &intel_idle,
...@@ -726,7 +752,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { ...@@ -726,7 +752,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
{ {
.name = "C6", .name = "C6",
.desc = "MWAIT 0x20", .desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 133, .exit_latency = 133,
.target_residency = 600, .target_residency = 600,
.enter = &intel_idle, .enter = &intel_idle,
...@@ -1668,6 +1694,11 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) ...@@ -1668,6 +1694,11 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
/* Structure copy. */ /* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate]; drv->states[drv->state_count] = cpuidle_state_table[cstate];
if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
drv->states[drv->state_count].enter = intel_idle_ibrs;
}
if ((disabled_states_mask & BIT(drv->state_count)) || if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) && ((icpu->use_acpi || force_use_acpi) &&
intel_idle_off_by_default(mwait_hint) && intel_idle_off_by_default(mwait_hint) &&
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册