提交 66570e96 编写于 作者: O Oliver Upton 提交者: Paolo Bonzini

kvm: x86: only provide PV features if enabled in guest's CPUID

KVM unconditionally provides PV features to the guest, regardless of the
configured CPUID. An unwitting guest that doesn't check
KVM_CPUID_FEATURES before use could access paravirt features that
userspace did not intend to provide. Fix this by checking the guest's
CPUID before performing any paravirtual operations.

Introduce a capability, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, to gate the
aforementioned enforcement. Migrating a VM from a host w/o this patch to
a host with this patch could silently change the ABI exposed to the
guest, warranting that we default to the old behavior and opt-in for
the new one.
Reviewed-by: NJim Mattson <jmattson@google.com>
Reviewed-by: NPeter Shier <pshier@google.com>
Signed-off-by: NOliver Upton <oupton@google.com>
Change-Id: I202a0926f65035b872bfe8ad15307c026de59a98
Message-Id: <20200818152429.1923996-4-oupton@google.com>
Reviewed-by: NWanpeng Li <wanpengli@tencent.com>
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
上级 210dfd93
...@@ -6380,3 +6380,14 @@ ranges that KVM should reject access to. ...@@ -6380,3 +6380,14 @@ ranges that KVM should reject access to.
In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
trap and emulate MSRs that are outside of the scope of KVM as well as trap and emulate MSRs that are outside of the scope of KVM as well as
limit the attack surface on KVM's MSR emulation code. limit the attack surface on KVM's MSR emulation code.
8.26 KVM_CAP_ENFORCE_PV_CPUID
-----------------------------
Architectures: x86
When enabled, KVM will disable paravirtual features provided to the
guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
(0x40000001). Otherwise, a guest may use the paravirtual features
regardless of what has actually been exposed through the CPUID leaf.
...@@ -789,6 +789,21 @@ struct kvm_vcpu_arch { ...@@ -789,6 +789,21 @@ struct kvm_vcpu_arch {
/* AMD MSRC001_0015 Hardware Configuration */ /* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr; u64 msr_hwcr;
/* pv related cpuid info */
struct {
/*
* value of the eax register in the KVM_CPUID_FEATURES CPUID
* leaf.
*/
u32 features;
/*
* indicates whether pv emulation should be disabled if features
* are not present in the guest's cpuid
*/
bool enforce;
} pv_cpuid;
}; };
struct kvm_lpage_info { struct kvm_lpage_info {
......
...@@ -107,6 +107,13 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) ...@@ -107,6 +107,13 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
(best->eax & (1 << KVM_FEATURE_PV_UNHALT))) (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
/*
* save the feature bitmap to avoid cpuid lookup for every PV
* operation
*/
if (best)
vcpu->arch.pv_cpuid.features = best->eax;
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = kvm_find_cpuid_entry(vcpu, 0x1, 0); best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
if (best) if (best)
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "x86.h" #include "x86.h"
#include <asm/cpu.h> #include <asm/cpu.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <uapi/asm/kvm_para.h>
extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly; extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
void kvm_set_cpu_caps(void); void kvm_set_cpu_caps(void);
...@@ -313,4 +314,13 @@ static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa) ...@@ -313,4 +314,13 @@ static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu)); return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
} }
static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
unsigned int kvm_feature)
{
if (!vcpu->arch.pv_cpuid.enforce)
return true;
return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
}
#endif #endif
...@@ -2877,6 +2877,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) ...@@ -2877,6 +2877,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
if (data & 0x30) if (data & 0x30)
return 1; return 1;
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
(data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
return 1;
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
(data & KVM_ASYNC_PF_DELIVERY_AS_INT))
return 1;
if (!lapic_in_kernel(vcpu)) if (!lapic_in_kernel(vcpu))
return data ? 1 : 0; return data ? 1 : 0;
...@@ -2954,10 +2962,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu) ...@@ -2954,10 +2962,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
* Doing a TLB flush here, on the guest's behalf, can avoid * Doing a TLB flush here, on the guest's behalf, can avoid
* expensive IPIs. * expensive IPIs.
*/ */
trace_kvm_pv_tlb_flush(vcpu->vcpu_id, if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
st->preempted & KVM_VCPU_FLUSH_TLB); trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) st->preempted & KVM_VCPU_FLUSH_TLB);
kvm_vcpu_flush_tlb_guest(vcpu); if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb_guest(vcpu);
}
vcpu->arch.st.preempted = 0; vcpu->arch.st.preempted = 0;
...@@ -3118,30 +3128,54 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -3118,30 +3128,54 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.smi_count = data; vcpu->arch.smi_count = data;
break; break;
case MSR_KVM_WALL_CLOCK_NEW: case MSR_KVM_WALL_CLOCK_NEW:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
return 1;
kvm_write_wall_clock(vcpu->kvm, data);
break;
case MSR_KVM_WALL_CLOCK: case MSR_KVM_WALL_CLOCK:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
return 1;
kvm_write_wall_clock(vcpu->kvm, data); kvm_write_wall_clock(vcpu->kvm, data);
break; break;
case MSR_KVM_SYSTEM_TIME_NEW: case MSR_KVM_SYSTEM_TIME_NEW:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
return 1;
kvm_write_system_time(vcpu, data, false, msr_info->host_initiated); kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
break; break;
case MSR_KVM_SYSTEM_TIME: case MSR_KVM_SYSTEM_TIME:
kvm_write_system_time(vcpu, data, true, msr_info->host_initiated); if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
return 1;
kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
break; break;
case MSR_KVM_ASYNC_PF_EN: case MSR_KVM_ASYNC_PF_EN:
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
return 1;
if (kvm_pv_enable_async_pf(vcpu, data)) if (kvm_pv_enable_async_pf(vcpu, data))
return 1; return 1;
break; break;
case MSR_KVM_ASYNC_PF_INT: case MSR_KVM_ASYNC_PF_INT:
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
return 1;
if (kvm_pv_enable_async_pf_int(vcpu, data)) if (kvm_pv_enable_async_pf_int(vcpu, data))
return 1; return 1;
break; break;
case MSR_KVM_ASYNC_PF_ACK: case MSR_KVM_ASYNC_PF_ACK:
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
return 1;
if (data & 0x1) { if (data & 0x1) {
vcpu->arch.apf.pageready_pending = false; vcpu->arch.apf.pageready_pending = false;
kvm_check_async_pf_completion(vcpu); kvm_check_async_pf_completion(vcpu);
} }
break; break;
case MSR_KVM_STEAL_TIME: case MSR_KVM_STEAL_TIME:
if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
return 1;
if (unlikely(!sched_info_on())) if (unlikely(!sched_info_on()))
return 1; return 1;
...@@ -3158,11 +3192,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -3158,11 +3192,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break; break;
case MSR_KVM_PV_EOI_EN: case MSR_KVM_PV_EOI_EN:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
return 1;
if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8))) if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
return 1; return 1;
break; break;
case MSR_KVM_POLL_CONTROL: case MSR_KVM_POLL_CONTROL:
if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
return 1;
/* only enable bit supported */ /* only enable bit supported */
if (data & (-1ULL << 1)) if (data & (-1ULL << 1))
return 1; return 1;
...@@ -3658,6 +3698,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -3658,6 +3698,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_LAST_CPU: case KVM_CAP_LAST_CPU:
case KVM_CAP_X86_USER_SPACE_MSR: case KVM_CAP_X86_USER_SPACE_MSR:
case KVM_CAP_X86_MSR_FILTER: case KVM_CAP_X86_MSR_FILTER:
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
r = 1; r = 1;
break; break;
case KVM_CAP_SYNC_REGS: case KVM_CAP_SYNC_REGS:
...@@ -4528,6 +4569,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, ...@@ -4528,6 +4569,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return kvm_x86_ops.enable_direct_tlbflush(vcpu); return kvm_x86_ops.enable_direct_tlbflush(vcpu);
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
vcpu->arch.pv_cpuid.enforce = cap->args[0];
return 0;
default: default:
return -EINVAL; return -EINVAL;
} }
...@@ -8000,11 +8046,16 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ...@@ -8000,11 +8046,16 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
goto out; goto out;
} }
ret = -KVM_ENOSYS;
switch (nr) { switch (nr) {
case KVM_HC_VAPIC_POLL_IRQ: case KVM_HC_VAPIC_POLL_IRQ:
ret = 0; ret = 0;
break; break;
case KVM_HC_KICK_CPU: case KVM_HC_KICK_CPU:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
break;
kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
kvm_sched_yield(vcpu->kvm, a1); kvm_sched_yield(vcpu->kvm, a1);
ret = 0; ret = 0;
...@@ -8015,9 +8066,15 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ...@@ -8015,9 +8066,15 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
break; break;
#endif #endif
case KVM_HC_SEND_IPI: case KVM_HC_SEND_IPI:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
break;
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break; break;
case KVM_HC_SCHED_YIELD: case KVM_HC_SCHED_YIELD:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
break;
kvm_sched_yield(vcpu->kvm, a0); kvm_sched_yield(vcpu->kvm, a0);
ret = 0; ret = 0;
break; break;
......
...@@ -1052,6 +1052,7 @@ struct kvm_ppc_resize_hpt { ...@@ -1052,6 +1052,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_STEAL_TIME 187 #define KVM_CAP_STEAL_TIME 187
#define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_USER_SPACE_MSR 188
#define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_X86_MSR_FILTER 189
#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册