未验证 提交 3eb8e5e6 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!161 SPR: IPI Virtualization Support

Merge Pull Request from: @x56Jason 
 
This PR is to enable IPI virtualization support for Intel SPR, meanwhile fix kabi changes.

## Intel-Kernel Issue
#I5ODSC

## Test

1, build and boot succeed with CONFIG_SMP enabled or disabled.
2, Use IPI benchmark (https://lore.kernel.org/kvm/20171219085010.4081-1-ynorov@caviumnetworks.com) to do unicast IPI testing ("Normal IPI" case in the benchmark):
 - With host disabled IPI virtualization, and guest enable x2apic mode, we can see a lot of MSR_WR_VMEXITs
 - With host enabled IPI virtualization, and guest enable x2apic mode, MSR_WR_VMEXITs caused by IPI disappears.

## Known Issue
N/A

## Default Config Change
N/A
 
 
Link:https://gitee.com/openeuler/kernel/pulls/161 
Reviewed-by: Chen Wei <chenwei@xfusion.com> 
Reviewed-by: Kevin Zhu <zhukeqian1@huawei.com> 
Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com> 
...@@ -6220,6 +6220,27 @@ default. ...@@ -6220,6 +6220,27 @@ default.
See Documentation/x86/sgx.rst for more details. See Documentation/x86/sgx.rst for more details.
7.23 KVM_CAP_MAX_VCPU_ID
------------------------
:Architectures: x86
:Target: VM
:Parameters: args[0] - maximum APIC ID value set for current VM
:Returns: 0 on success, -EINVAL if args[0] is beyond KVM_MAX_VCPU_ID
supported in KVM or if it has been set.
This capability allows userspace to specify maximum possible APIC ID
assigned for current VM session prior to the creation of vCPUs, saving
memory for data structures indexed by the APIC ID. Userspace is able
to calculate the limit to APIC ID values from designated
CPU topology.
The value can be changed only until KVM_ENABLE_CAP is set to a nonzero
value or until a vCPU is created. Upon creation of the first vCPU,
if the value was set to zero or KVM_ENABLE_CAP was not invoked, KVM
uses the return value of KVM_CHECK_EXTENSION(KVM_CAP_MAX_VCPU_ID) as
the maximum APIC ID.
8. Other capabilities. 8. Other capabilities.
====================== ======================
......
...@@ -2944,9 +2944,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) ...@@ -2944,9 +2944,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
if (!sclp.has_esca || !sclp.has_64bscao) if (!sclp.has_esca || !sclp.has_64bscao)
return false; return false;
mutex_lock(&kvm->lock);
rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
mutex_unlock(&kvm->lock);
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
} }
......
...@@ -1020,6 +1020,12 @@ struct kvm_arch { ...@@ -1020,6 +1020,12 @@ struct kvm_arch {
struct list_head tdp_mmu_roots; struct list_head tdp_mmu_roots;
/* List of struct tdp_mmu_pages not being used as roots */ /* List of struct tdp_mmu_pages not being used as roots */
struct list_head tdp_mmu_pages; struct list_head tdp_mmu_pages;
/*
* VM-scope maximum vCPU ID. Used to determine the size of structures
* that increase along with the maximum vCPU ID, in which case, using
* the global KVM_MAX_VCPU_ID may lead to significant memory waste.
*/
u32 max_vcpu_ids;
}; };
struct kvm_vm_stat { struct kvm_vm_stat {
...@@ -1121,6 +1127,7 @@ struct kvm_x86_ops { ...@@ -1121,6 +1127,7 @@ struct kvm_x86_ops {
void (*vm_destroy)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm);
/* Create, but do not attach this VCPU */ /* Create, but do not attach this VCPU */
int (*vcpu_precreate)(struct kvm *kvm);
int (*vcpu_create)(struct kvm_vcpu *vcpu); int (*vcpu_create)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
......
...@@ -956,6 +956,7 @@ ...@@ -956,6 +956,7 @@
#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
#define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_VMFUNC 0x00000491
#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492
/* VMX_BASIC bits and bitmasks */ /* VMX_BASIC bits and bitmasks */
#define VMX_BASIC_VMCS_SIZE_SHIFT 32 #define VMX_BASIC_VMCS_SIZE_SHIFT 32
......
...@@ -142,6 +142,12 @@ struct cpuinfo_x86 { ...@@ -142,6 +142,12 @@ struct cpuinfo_x86 {
unsigned initialized : 1; unsigned initialized : 1;
} __randomize_layout; } __randomize_layout;
struct extra_cpuinfo_x86 {
#ifdef CONFIG_X86_VMX_FEATURE_NAMES
__u32 vmx_tertiary_capability[NVMX_TERTIARY_INTS];
#endif
} __randomize_layout;
struct cpuid_regs { struct cpuid_regs {
u32 eax, ebx, ecx, edx; u32 eax, ebx, ecx, edx;
}; };
...@@ -172,6 +178,8 @@ enum cpuid_regs_idx { ...@@ -172,6 +178,8 @@ enum cpuid_regs_idx {
extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data; extern struct cpuinfo_x86 new_cpu_data;
extern struct extra_cpuinfo_x86 extra_boot_cpu_data;
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
...@@ -183,6 +191,14 @@ DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); ...@@ -183,6 +191,14 @@ DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) boot_cpu_data #define cpu_data(cpu) boot_cpu_data
#endif #endif
#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct extra_cpuinfo_x86, extra_cpu_info);
#define extra_cpu_data(cpu) per_cpu(extra_cpu_info, cpu)
#else
#define extra_cpu_info extra_boot_cpu_data
#define extra_cpu_data(cpu) extra_boot_cpu_data
#endif
extern const struct seq_operations cpuinfo_op; extern const struct seq_operations cpuinfo_op;
#define cache_line_size() (boot_cpu_data.x86_cache_alignment) #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <asm/vmxfeatures.h> #include <asm/vmxfeatures.h>
#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f) #define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f)
#define VMCS_TERTIARY_CONTROL_BIT(x) BIT(VMX_TERTIARY_FEATURE_##x & 0x1f)
/* /*
* Definitions of Primary Processor-Based VM-Execution Controls. * Definitions of Primary Processor-Based VM-Execution Controls.
...@@ -31,6 +32,7 @@ ...@@ -31,6 +32,7 @@
#define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING) #define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING)
#define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING) #define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING)
#define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING) #define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING)
#define CPU_BASED_ACTIVATE_TERTIARY_CONTROLS VMCS_CONTROL_BIT(TERTIARY_CONTROLS)
#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING) #define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING)
#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING) #define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING)
#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR) #define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR)
...@@ -74,6 +76,11 @@ ...@@ -74,6 +76,11 @@
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) #define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE) #define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE)
/*
* Definitions of Tertiary Processor-Based VM-Execution Controls.
*/
#define TERTIARY_EXEC_IPI_VIRT VMCS_TERTIARY_CONTROL_BIT(IPI_VIRT)
#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) #define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) #define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
#define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS) #define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS)
...@@ -156,6 +163,7 @@ static inline int vmx_misc_mseg_revid(u64 vmx_misc) ...@@ -156,6 +163,7 @@ static inline int vmx_misc_mseg_revid(u64 vmx_misc)
enum vmcs_field { enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000, VIRTUAL_PROCESSOR_ID = 0x00000000,
POSTED_INTR_NV = 0x00000002, POSTED_INTR_NV = 0x00000002,
LAST_PID_POINTER_INDEX = 0x00000008,
GUEST_ES_SELECTOR = 0x00000800, GUEST_ES_SELECTOR = 0x00000800,
GUEST_CS_SELECTOR = 0x00000802, GUEST_CS_SELECTOR = 0x00000802,
GUEST_SS_SELECTOR = 0x00000804, GUEST_SS_SELECTOR = 0x00000804,
...@@ -219,6 +227,10 @@ enum vmcs_field { ...@@ -219,6 +227,10 @@ enum vmcs_field {
ENCLS_EXITING_BITMAP_HIGH = 0x0000202F, ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
TSC_MULTIPLIER = 0x00002032, TSC_MULTIPLIER = 0x00002032,
TSC_MULTIPLIER_HIGH = 0x00002033, TSC_MULTIPLIER_HIGH = 0x00002033,
TERTIARY_VM_EXEC_CONTROL = 0x00002034,
TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035,
PID_POINTER_TABLE = 0x00002042,
PID_POINTER_TABLE_HIGH = 0x00002043,
GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800, VMCS_LINK_POINTER = 0x00002800,
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
* Defines VMX CPU feature bits * Defines VMX CPU feature bits
*/ */
#define NVMXINTS 3 /* N 32-bit words worth of info */ #define NVMXINTS 3 /* N 32-bit words worth of info */
#define NVMX_TERTIARY_INTS 2 /* N 32-bit words worth of info */
/* /*
* Note: If the comment begins with a quoted string, that string is used * Note: If the comment begins with a quoted string, that string is used
...@@ -43,6 +44,7 @@ ...@@ -43,6 +44,7 @@
#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */ #define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */
#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */ #define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */
#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */ #define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */
#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* "" Enable Tertiary VM-Execution Controls */
#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */ #define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */
#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */ #define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */
#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */ #define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
...@@ -84,4 +86,6 @@ ...@@ -84,4 +86,6 @@
#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */ #define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ #define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
/* Tertiary Processor-Based VM-Execution Controls, word 3 */
#define VMX_TERTIARY_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */
#endif /* _ASM_X86_VMXFEATURES_H */ #endif /* _ASM_X86_VMXFEATURES_H */
...@@ -1596,6 +1596,15 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c) ...@@ -1596,6 +1596,15 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
#endif #endif
} }
#ifdef CONFIG_X86_VMX_FEATURE_NAMES
static void init_extra_cpu_data(u16 cpu_index)
{
struct extra_cpuinfo_x86 *e = &extra_cpu_data(cpu_index);
memset(&e->vmx_tertiary_capability, 0, sizeof(e->vmx_tertiary_capability));
}
#endif
/* /*
* This does the hard work of actually picking apart the CPU stuff... * This does the hard work of actually picking apart the CPU stuff...
*/ */
...@@ -1626,6 +1635,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) ...@@ -1626,6 +1635,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
memset(&c->x86_capability, 0, sizeof(c->x86_capability)); memset(&c->x86_capability, 0, sizeof(c->x86_capability));
#ifdef CONFIG_X86_VMX_FEATURE_NAMES #ifdef CONFIG_X86_VMX_FEATURE_NAMES
memset(&c->vmx_capability, 0, sizeof(c->vmx_capability)); memset(&c->vmx_capability, 0, sizeof(c->vmx_capability));
init_extra_cpu_data(c->cpu_index);
#endif #endif
generic_identify(c); generic_identify(c);
......
...@@ -18,8 +18,24 @@ enum vmx_feature_leafs { ...@@ -18,8 +18,24 @@ enum vmx_feature_leafs {
NR_VMX_FEATURE_WORDS, NR_VMX_FEATURE_WORDS,
}; };
enum vmx_tertiary_feature_leafs {
TERTIARY_CTLS_LOW = 0,
TERTIARY_CTLS_HIGH,
};
#define VMX_F(x) BIT(VMX_FEATURE_##x & 0x1f) #define VMX_F(x) BIT(VMX_FEATURE_##x & 0x1f)
static void init_vmx_tertiary_capabilities(u16 cpu_index)
{
struct extra_cpuinfo_x86 *e = &extra_cpu_data(cpu_index);
u32 low, high;
/* All 64 bits of tertiary controls MSR are allowed-1 settings. */
rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS3, &low, &high);
e->vmx_tertiary_capability[TERTIARY_CTLS_LOW] = low;
e->vmx_tertiary_capability[TERTIARY_CTLS_HIGH] = high;
}
static void init_vmx_capabilities(struct cpuinfo_x86 *c) static void init_vmx_capabilities(struct cpuinfo_x86 *c)
{ {
u32 supported, funcs, ept, vpid, ign; u32 supported, funcs, ept, vpid, ign;
...@@ -42,6 +58,8 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c) ...@@ -42,6 +58,8 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c)
rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported); rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported);
c->vmx_capability[SECONDARY_CTLS] = supported; c->vmx_capability[SECONDARY_CTLS] = supported;
init_vmx_tertiary_capabilities(c->cpu_index);
rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported); rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported);
rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs); rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs);
......
...@@ -68,6 +68,9 @@ trap 'rm "$OUT"' EXIT ...@@ -68,6 +68,9 @@ trap 'rm "$OUT"' EXIT
echo "#include <asm/vmxfeatures.h>" echo "#include <asm/vmxfeatures.h>"
echo "#endif" echo "#endif"
dump_array "x86_vmx_flags" "NVMXINTS*32" "VMX_FEATURE_" "" $3 dump_array "x86_vmx_flags" "NVMXINTS*32" "VMX_FEATURE_" "" $3
echo ""
dump_array "x86_vmx_tertiary_flags" "NVMX_TERTIARY_INTS*32" "VMX_TERTIARY_FEATURE_" "NVMXINTS*32" $3
echo "#endif /* CONFIG_X86_VMX_FEATURE_NAMES */" echo "#endif /* CONFIG_X86_VMX_FEATURE_NAMES */"
) > $OUT ) > $OUT
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "asm/vmxfeatures.h"
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/timex.h> #include <linux/timex.h>
#include <linux/string.h> #include <linux/string.h>
...@@ -9,6 +10,7 @@ ...@@ -9,6 +10,7 @@
#ifdef CONFIG_X86_VMX_FEATURE_NAMES #ifdef CONFIG_X86_VMX_FEATURE_NAMES
extern const char * const x86_vmx_flags[NVMXINTS*32]; extern const char * const x86_vmx_flags[NVMXINTS*32];
extern const char * const x86_vmx_tertiary_flags[NVMX_TERTIARY_INTS*32];
#endif #endif
/* /*
...@@ -108,12 +110,18 @@ static int show_cpuinfo(struct seq_file *m, void *v) ...@@ -108,12 +110,18 @@ static int show_cpuinfo(struct seq_file *m, void *v)
#ifdef CONFIG_X86_VMX_FEATURE_NAMES #ifdef CONFIG_X86_VMX_FEATURE_NAMES
if (cpu_has(c, X86_FEATURE_VMX) && c->vmx_capability[0]) { if (cpu_has(c, X86_FEATURE_VMX) && c->vmx_capability[0]) {
struct extra_cpuinfo_x86 *e = &extra_cpu_data(c->cpu_index);
seq_puts(m, "\nvmx flags\t:"); seq_puts(m, "\nvmx flags\t:");
for (i = 0; i < 32*NVMXINTS; i++) { for (i = 0; i < 32*NVMXINTS; i++) {
if (test_bit(i, (unsigned long *)c->vmx_capability) && if (test_bit(i, (unsigned long *)c->vmx_capability) &&
x86_vmx_flags[i] != NULL) x86_vmx_flags[i] != NULL)
seq_printf(m, " %s", x86_vmx_flags[i]); seq_printf(m, " %s", x86_vmx_flags[i]);
} }
for (i = 0; i < 32*NVMX_TERTIARY_INTS; i++) {
if (test_bit(i, (unsigned long *)e->vmx_tertiary_capability) &&
x86_vmx_tertiary_flags[i] != NULL)
seq_printf(m, " %s", x86_vmx_tertiary_flags[i]);
}
} }
#endif #endif
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
* This file contains the setup_arch() code, which handles the architecture-dependent * This file contains the setup_arch() code, which handles the architecture-dependent
* parts of early kernel initialization. * parts of early kernel initialization.
*/ */
#include "asm/processor.h"
#include <linux/console.h> #include <linux/console.h>
#include <linux/crash_dump.h> #include <linux/crash_dump.h>
#include <linux/dma-map-ops.h> #include <linux/dma-map-ops.h>
...@@ -141,6 +142,7 @@ struct cpuinfo_x86 boot_cpu_data __read_mostly; ...@@ -141,6 +142,7 @@ struct cpuinfo_x86 boot_cpu_data __read_mostly;
EXPORT_SYMBOL(boot_cpu_data); EXPORT_SYMBOL(boot_cpu_data);
#endif #endif
struct extra_cpuinfo_x86 extra_boot_cpu_data __read_mostly;
#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
__visible unsigned long mmu_cr4_features __ro_after_init; __visible unsigned long mmu_cr4_features __ro_after_init;
......
...@@ -99,6 +99,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); ...@@ -99,6 +99,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
/* Per CPU bogomips and other parameters */ /* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info);
DEFINE_PER_CPU_READ_MOSTLY(struct extra_cpuinfo_x86, extra_cpu_info);
EXPORT_PER_CPU_SYMBOL(extra_cpu_info);
/* Logical package management. We might want to allocate that dynamically */ /* Logical package management. We might want to allocate that dynamically */
unsigned int __max_logical_packages __read_mostly; unsigned int __max_logical_packages __read_mostly;
......
...@@ -2174,15 +2174,21 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); ...@@ -2174,15 +2174,21 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
/* emulate APIC access in a trap manner */ /* emulate APIC access in a trap manner */
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
{ {
u32 val = 0; struct kvm_lapic *apic = vcpu->arch.apic;
u64 val = 0;
/* hw has done the conditional check and inst decode */ /* hw has done the conditional check and inst decode */
offset &= 0xff0; offset &= 0xff0;
kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val); /* exception dealing with 64bit data on vICR in x2apic mode */
if ((offset == APIC_ICR) && apic_x2apic_mode(apic)) {
val = kvm_lapic_get_reg64(apic, offset);
kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(val>>32));
} else
kvm_lapic_reg_read(apic, offset, 4, &val);
/* TODO: optimize to just emulate side effect w/o one more write */ /* TODO: optimize to just emulate side effect w/o one more write */
kvm_lapic_reg_write(vcpu->arch.apic, offset, val); kvm_lapic_reg_write(apic, offset, (u32)val);
} }
EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
......
...@@ -162,6 +162,11 @@ static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off) ...@@ -162,6 +162,11 @@ static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off)
return *((u32 *) (apic->regs + reg_off)); return *((u32 *) (apic->regs + reg_off));
} }
static inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg_off)
{
return *((u64 *) (apic->regs + reg_off));
}
static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val) static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val)
{ {
*((u32 *) (regs + reg_off)) = val; *((u32 *) (regs + reg_off)) = val;
......
...@@ -13,6 +13,7 @@ extern bool __read_mostly enable_unrestricted_guest; ...@@ -13,6 +13,7 @@ extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits; extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml; extern bool __read_mostly enable_pml;
extern bool __read_mostly enable_apicv; extern bool __read_mostly enable_apicv;
extern bool __read_mostly enable_ipiv;
extern int __read_mostly pt_mode; extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0 #define PT_MODE_SYSTEM 0
...@@ -60,6 +61,7 @@ struct vmcs_config { ...@@ -60,6 +61,7 @@ struct vmcs_config {
u32 pin_based_exec_ctrl; u32 pin_based_exec_ctrl;
u32 cpu_based_exec_ctrl; u32 cpu_based_exec_ctrl;
u32 cpu_based_2nd_exec_ctrl; u32 cpu_based_2nd_exec_ctrl;
u64 cpu_based_3rd_exec_ctrl;
u32 vmexit_ctrl; u32 vmexit_ctrl;
u32 vmentry_ctrl; u32 vmentry_ctrl;
struct nested_vmx_msrs nested; struct nested_vmx_msrs nested;
...@@ -133,6 +135,12 @@ static inline bool cpu_has_secondary_exec_ctrls(void) ...@@ -133,6 +135,12 @@ static inline bool cpu_has_secondary_exec_ctrls(void)
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
} }
static inline bool cpu_has_tertiary_exec_ctrls(void)
{
return vmcs_config.cpu_based_exec_ctrl &
CPU_BASED_ACTIVATE_TERTIARY_CONTROLS;
}
static inline bool cpu_has_vmx_virtualize_apic_accesses(void) static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
{ {
return vmcs_config.cpu_based_2nd_exec_ctrl & return vmcs_config.cpu_based_2nd_exec_ctrl &
...@@ -272,6 +280,11 @@ static inline bool cpu_has_vmx_apicv(void) ...@@ -272,6 +280,11 @@ static inline bool cpu_has_vmx_apicv(void)
cpu_has_vmx_posted_intr(); cpu_has_vmx_posted_intr();
} }
static inline bool cpu_has_vmx_ipiv(void)
{
return vmcs_config.cpu_based_3rd_exec_ctrl & TERTIARY_EXEC_IPI_VIRT;
}
static inline bool cpu_has_vmx_flexpriority(void) static inline bool cpu_has_vmx_flexpriority(void)
{ {
return cpu_has_vmx_tpr_shadow() && return cpu_has_vmx_tpr_shadow() &&
......
...@@ -299,8 +299,10 @@ const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); ...@@ -299,8 +299,10 @@ const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) __init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
{ {
vmcs_conf->cpu_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_EXEC_CTRL;
vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC; vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
vmcs_conf->cpu_based_3rd_exec_ctrl = 0;
vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
......
...@@ -50,6 +50,7 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); ...@@ -50,6 +50,7 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
*/ */
#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \ #define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
PIN_BASED_VMX_PREEMPTION_TIMER) PIN_BASED_VMX_PREEMPTION_TIMER)
#define EVMCS1_UNSUPPORTED_EXEC_CTRL (CPU_BASED_ACTIVATE_TERTIARY_CONTROLS)
#define EVMCS1_UNSUPPORTED_2NDEXEC \ #define EVMCS1_UNSUPPORTED_2NDEXEC \
(SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \ (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
......
...@@ -89,7 +89,7 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) ...@@ -89,7 +89,7 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
{ {
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
if (!vmx_can_use_vtd_pi(vcpu->kvm)) if (!(vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm)))
return; return;
/* Set SN when the vCPU is preempted */ /* Set SN when the vCPU is preempted */
...@@ -147,7 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu) ...@@ -147,7 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
struct pi_desc old, new; struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
if (!vmx_can_use_vtd_pi(vcpu->kvm)) if (!(vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm)))
return 0; return 0;
WARN_ON(irqs_disabled()); WARN_ON(irqs_disabled());
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#define POSTED_INTR_ON 0 #define POSTED_INTR_ON 0
#define POSTED_INTR_SN 1 #define POSTED_INTR_SN 1
#define PID_TABLE_ENTRY_VALID 1
/* Posted-Interrupt Descriptor */ /* Posted-Interrupt Descriptor */
struct pi_desc { struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */ u32 pir[8]; /* Posted interrupt requested */
......
...@@ -48,6 +48,7 @@ struct vmcs_controls_shadow { ...@@ -48,6 +48,7 @@ struct vmcs_controls_shadow {
u32 pin; u32 pin;
u32 exec; u32 exec;
u32 secondary_exec; u32 secondary_exec;
u64 tertiary_exec;
}; };
/* /*
......
...@@ -104,6 +104,9 @@ module_param(fasteoi, bool, S_IRUGO); ...@@ -104,6 +104,9 @@ module_param(fasteoi, bool, S_IRUGO);
bool __read_mostly enable_apicv = 1; bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO); module_param(enable_apicv, bool, S_IRUGO);
bool __read_mostly enable_ipiv = true;
module_param(enable_ipiv, bool, 0444);
/* /*
* If nested=1, nested virtualization is supported, i.e., guests may use * If nested=1, nested virtualization is supported, i.e., guests may use
* VMX and be a hypervisor for its own guests. If nested=0, guests may not * VMX and be a hypervisor for its own guests. If nested=0, guests may not
...@@ -2568,6 +2571,15 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, ...@@ -2568,6 +2571,15 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
return 0; return 0;
} }
static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
{
u64 allowed;
rdmsrl(msr, allowed);
return ctl_opt & allowed;
}
static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
struct vmx_capability *vmx_cap) struct vmx_capability *vmx_cap)
{ {
...@@ -2576,6 +2588,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, ...@@ -2576,6 +2588,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
u32 _pin_based_exec_control = 0; u32 _pin_based_exec_control = 0;
u32 _cpu_based_exec_control = 0; u32 _cpu_based_exec_control = 0;
u32 _cpu_based_2nd_exec_control = 0; u32 _cpu_based_2nd_exec_control = 0;
u64 _cpu_based_3rd_exec_control = 0;
u32 _vmexit_control = 0; u32 _vmexit_control = 0;
u32 _vmentry_control = 0; u32 _vmentry_control = 0;
...@@ -2597,7 +2610,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, ...@@ -2597,7 +2610,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
opt = CPU_BASED_TPR_SHADOW | opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; CPU_BASED_ACTIVATE_SECONDARY_CONTROLS |
CPU_BASED_ACTIVATE_TERTIARY_CONTROLS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
&_cpu_based_exec_control) < 0) &_cpu_based_exec_control) < 0)
return -EIO; return -EIO;
...@@ -2670,6 +2684,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, ...@@ -2670,6 +2684,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
"1-setting enable VPID VM-execution control\n"); "1-setting enable VPID VM-execution control\n");
} }
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) {
u64 opt3 = TERTIARY_EXEC_IPI_VIRT;
_cpu_based_3rd_exec_control = adjust_vmx_controls64(opt3,
MSR_IA32_VMX_PROCBASED_CTLS3);
}
min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
...@@ -2757,6 +2778,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, ...@@ -2757,6 +2778,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
vmcs_conf->cpu_based_3rd_exec_ctrl = _cpu_based_3rd_exec_control;
vmcs_conf->vmexit_ctrl = _vmexit_control; vmcs_conf->vmexit_ctrl = _vmexit_control;
vmcs_conf->vmentry_ctrl = _vmentry_control; vmcs_conf->vmentry_ctrl = _vmentry_control;
...@@ -4041,6 +4063,8 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode) ...@@ -4041,6 +4063,8 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW); vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW);
vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
if (enable_ipiv)
vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_ICR), MSR_TYPE_RW);
} }
} }
...@@ -4307,15 +4331,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) ...@@ -4307,15 +4331,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
if (cpu_has_secondary_exec_ctrls()) {
if (kvm_vcpu_apicv_active(vcpu)) if (kvm_vcpu_apicv_active(vcpu)) {
secondary_exec_controls_setbit(vmx, secondary_exec_controls_setbit(vmx,
SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
else if (enable_ipiv)
secondary_exec_controls_clearbit(vmx, tertiary_exec_controls_setbit(vmx, TERTIARY_EXEC_IPI_VIRT);
SECONDARY_EXEC_APIC_REGISTER_VIRT | } else {
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); secondary_exec_controls_clearbit(vmx,
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
if (enable_ipiv)
tertiary_exec_controls_clearbit(vmx, TERTIARY_EXEC_IPI_VIRT);
} }
if (cpu_has_vmx_msr_bitmap()) if (cpu_has_vmx_msr_bitmap())
...@@ -4348,6 +4376,20 @@ u32 vmx_exec_control(struct vcpu_vmx *vmx) ...@@ -4348,6 +4376,20 @@ u32 vmx_exec_control(struct vcpu_vmx *vmx)
return exec_control; return exec_control;
} }
static u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx)
{
u64 exec_control = vmcs_config.cpu_based_3rd_exec_ctrl;
/*
* IPI virtualization relies on APICv. Disable IPI virtualization if
* APICv is inhibited.
*/
if (!enable_ipiv || !kvm_vcpu_apicv_active(&vmx->vcpu))
exec_control &= ~TERTIARY_EXEC_IPI_VIRT;
return exec_control;
}
/* /*
* Adjust a single secondary execution control bit to intercept/allow an * Adjust a single secondary execution control bit to intercept/allow an
* instruction in the guest. This is usually done based on whether or not a * instruction in the guest. This is usually done based on whether or not a
...@@ -4488,6 +4530,35 @@ static void ept_set_mmio_spte_mask(void) ...@@ -4488,6 +4530,35 @@ static void ept_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0); kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0);
} }
static inline int vmx_get_pid_table_order(struct kvm *kvm)
{
return get_order(kvm->arch.max_vcpu_ids * sizeof(*to_kvm_vmx(kvm)->pid_table));
}
static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
{
struct page *pages;
struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
if (!irqchip_in_kernel(kvm) || !enable_ipiv)
return 0;
if (kvm_vmx->pid_table)
return 0;
pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm));
if (!pages)
return -ENOMEM;
kvm_vmx->pid_table = (void *)page_address(pages);
return 0;
}
static int vmx_vcpu_precreate(struct kvm *kvm)
{
return vmx_alloc_ipiv_pid_table(kvm);
}
#define VMX_XSS_EXIT_BITMAP 0 #define VMX_XSS_EXIT_BITMAP 0
/* /*
...@@ -4496,6 +4567,9 @@ static void ept_set_mmio_spte_mask(void) ...@@ -4496,6 +4567,9 @@ static void ept_set_mmio_spte_mask(void)
*/ */
static void init_vmcs(struct vcpu_vmx *vmx) static void init_vmcs(struct vcpu_vmx *vmx)
{ {
struct kvm *kvm = vmx->vcpu.kvm;
struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
if (nested) if (nested)
nested_vmx_set_vmcs_shadowing_bitmap(); nested_vmx_set_vmcs_shadowing_bitmap();
...@@ -4514,6 +4588,9 @@ static void init_vmcs(struct vcpu_vmx *vmx) ...@@ -4514,6 +4588,9 @@ static void init_vmcs(struct vcpu_vmx *vmx)
secondary_exec_controls_set(vmx, vmx->secondary_exec_control); secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
} }
if (cpu_has_tertiary_exec_ctrls())
tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx));
if (kvm_vcpu_apicv_active(&vmx->vcpu)) { if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP0, 0);
vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0);
...@@ -4526,7 +4603,12 @@ static void init_vmcs(struct vcpu_vmx *vmx) ...@@ -4526,7 +4603,12 @@ static void init_vmcs(struct vcpu_vmx *vmx)
vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
} }
if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { if (vmx_can_use_ipiv(&vmx->vcpu)) {
vmcs_write64(PID_POINTER_TABLE, __pa(kvm_vmx->pid_table));
vmcs_write16(LAST_PID_POINTER_INDEX, kvm->arch.max_vcpu_ids - 1);
}
if (!kvm_pause_in_guest(kvm)) {
vmcs_write32(PLE_GAP, ple_gap); vmcs_write32(PLE_GAP, ple_gap);
vmx->ple_window = ple_window; vmx->ple_window = ple_window;
vmx->ple_window_dirty = true; vmx->ple_window_dirty = true;
...@@ -6019,6 +6101,7 @@ void dump_vmcs(void) ...@@ -6019,6 +6101,7 @@ void dump_vmcs(void)
{ {
u32 vmentry_ctl, vmexit_ctl; u32 vmentry_ctl, vmexit_ctl;
u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
u64 tertiary_exec_control;
unsigned long cr4; unsigned long cr4;
if (!dump_invalid_vmcs) { if (!dump_invalid_vmcs) {
...@@ -6031,9 +6114,16 @@ void dump_vmcs(void) ...@@ -6031,9 +6114,16 @@ void dump_vmcs(void)
cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
cr4 = vmcs_readl(GUEST_CR4); cr4 = vmcs_readl(GUEST_CR4);
secondary_exec_control = 0;
if (cpu_has_secondary_exec_ctrls()) if (cpu_has_secondary_exec_ctrls())
secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
else
secondary_exec_control = 0;
if (cpu_has_tertiary_exec_ctrls())
tertiary_exec_control = vmcs_read64(TERTIARY_VM_EXEC_CONTROL);
else
tertiary_exec_control = 0;
pr_err("*** Guest State ***\n"); pr_err("*** Guest State ***\n");
pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
...@@ -6116,9 +6206,10 @@ void dump_vmcs(void) ...@@ -6116,9 +6206,10 @@ void dump_vmcs(void)
vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
pr_err("*** Control State ***\n"); pr_err("*** Control State ***\n");
pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n",
pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); cpu_based_exec_ctrl, secondary_exec_control, tertiary_exec_control);
pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); pr_err("PinBased=0x%08x EntryControls=%08x ExitControls=%08x\n",
pin_based_exec_ctrl, vmentry_ctl, vmexit_ctl);
pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
vmcs_read32(EXCEPTION_BITMAP), vmcs_read32(EXCEPTION_BITMAP),
vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
...@@ -7214,6 +7305,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) ...@@ -7214,6 +7305,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
vmx->ept_pointer = INVALID_PAGE; vmx->ept_pointer = INVALID_PAGE;
if (vmx_can_use_ipiv(vcpu))
WRITE_ONCE(to_kvm_vmx(vcpu->kvm)->pid_table[vcpu->vcpu_id],
__pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID);
return 0; return 0;
free_vmcs: free_vmcs:
...@@ -7877,6 +7972,13 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit) ...@@ -7877,6 +7972,13 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit)
return supported & BIT(bit); return supported & BIT(bit);
} }
static void vmx_vm_destroy(struct kvm *kvm)
{
struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm));
}
static struct kvm_x86_ops vmx_x86_ops __initdata = { static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup, .hardware_unsetup = hardware_unsetup,
...@@ -7887,7 +7989,9 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { ...@@ -7887,7 +7989,9 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.vm_size = sizeof(struct kvm_vmx), .vm_size = sizeof(struct kvm_vmx),
.vm_init = vmx_vm_init, .vm_init = vmx_vm_init,
.vm_destroy = vmx_vm_destroy,
.vcpu_precreate = vmx_vcpu_precreate,
.vcpu_create = vmx_create_vcpu, .vcpu_create = vmx_create_vcpu,
.vcpu_free = vmx_free_vcpu, .vcpu_free = vmx_free_vcpu,
.vcpu_reset = vmx_vcpu_reset, .vcpu_reset = vmx_vcpu_reset,
...@@ -8089,6 +8193,9 @@ static __init int hardware_setup(void) ...@@ -8089,6 +8193,9 @@ static __init int hardware_setup(void)
vmx_x86_ops.sync_pir_to_irr = NULL; vmx_x86_ops.sync_pir_to_irr = NULL;
} }
if (!enable_apicv || !cpu_has_vmx_ipiv())
enable_ipiv = false;
if (cpu_has_vmx_tsc_scaling()) { if (cpu_has_vmx_tsc_scaling()) {
kvm_has_tsc_control = true; kvm_has_tsc_control = true;
kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
......
...@@ -358,6 +358,8 @@ struct kvm_vmx { ...@@ -358,6 +358,8 @@ struct kvm_vmx {
enum ept_pointers_status ept_pointers_match; enum ept_pointers_status ept_pointers_match;
spinlock_t ept_pointer_lock; spinlock_t ept_pointer_lock;
/* Posted Interrupt Descriptor (PID) table for IPI virtualization */
u64 *pid_table;
}; };
bool nested_vmx_allowed(struct kvm_vcpu *vcpu); bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
...@@ -409,31 +411,36 @@ static inline u8 vmx_get_rvi(void) ...@@ -409,31 +411,36 @@ static inline u8 vmx_get_rvi(void)
return vmcs_read16(GUEST_INTR_STATUS) & 0xff; return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
} }
#define BUILD_CONTROLS_SHADOW(lname, uname) \ #define BUILD_CONTROLS_SHADOW(lname, uname, bits) \
static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \
{ \ { \
if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ if (vmx->loaded_vmcs->controls_shadow.lname != val) { \
vmcs_write32(uname, val); \ vmcs_write##bits(uname, val); \
vmx->loaded_vmcs->controls_shadow.lname = val; \ vmx->loaded_vmcs->controls_shadow.lname = val; \
} \ } \
} \ } \
static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \ static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs) \
{ \ { \
return vmx->loaded_vmcs->controls_shadow.lname; \ return vmcs->controls_shadow.lname; \
} \ } \
static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \ static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx) \
{ \ { \
lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ return __##lname##_controls_get(vmx->loaded_vmcs); \
} \ } \
static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u32 val) \ static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val) \
{ \ { \
lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \
} \
static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val) \
{ \
lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
} }
BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS) BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32)
BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS) BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32)
BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL) BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32)
BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL) BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32)
BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL) BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32)
BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu) static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu)
{ {
...@@ -553,4 +560,9 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu) ...@@ -553,4 +560,9 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
void dump_vmcs(void); void dump_vmcs(void);
static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu)
{
return lapic_in_kernel(vcpu) && enable_ipiv;
}
#endif /* __KVM_X86_VMX_H */ #endif /* __KVM_X86_VMX_H */
...@@ -5432,6 +5432,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, ...@@ -5432,6 +5432,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
break; break;
} }
#endif #endif
case KVM_CAP_MAX_VCPU_ID:
r = -EINVAL;
if (cap->args[0] > KVM_MAX_VCPU_ID)
break;
mutex_lock(&kvm->lock);
if (kvm->arch.max_vcpu_ids == cap->args[0]) {
r = 0;
} else if (!kvm->arch.max_vcpu_ids) {
kvm->arch.max_vcpu_ids = cap->args[0];
r = 0;
}
mutex_unlock(&kvm->lock);
break;
default: default:
r = -EINVAL; r = -EINVAL;
break; break;
...@@ -10104,11 +10118,17 @@ static void fx_init(struct kvm_vcpu *vcpu) ...@@ -10104,11 +10118,17 @@ static void fx_init(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
{ {
if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) if (kvm_check_tsc_unstable() && kvm->created_vcpus)
pr_warn_once("kvm: SMP vm created on host with unstable TSC; " pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n"); "guest TSC will not be reliable\n");
return 0; if (!kvm->arch.max_vcpu_ids)
kvm->arch.max_vcpu_ids = KVM_MAX_VCPU_ID;
if (id >= kvm->arch.max_vcpu_ids)
return -EINVAL;
return kvm_x86_ops.vcpu_precreate ? kvm_x86_ops.vcpu_precreate(kvm) : 0;
} }
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
/x86_64/evmcs_test /x86_64/evmcs_test
/x86_64/kvm_pv_test /x86_64/kvm_pv_test
/x86_64/hyperv_cpuid /x86_64/hyperv_cpuid
/x86_64/max_vcpuid_cap_test
/x86_64/mmio_warning_test /x86_64/mmio_warning_test
/x86_64/platform_info_test /x86_64/platform_info_test
/x86_64/set_sregs_test /x86_64/set_sregs_test
......
...@@ -59,6 +59,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test ...@@ -59,6 +59,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
......
...@@ -62,6 +62,7 @@ enum vm_mem_backing_src_type { ...@@ -62,6 +62,7 @@ enum vm_mem_backing_src_type {
}; };
int kvm_check_cap(long cap); int kvm_check_cap(long cap);
int vm_check_cap(struct kvm_vm *vm, long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_enable_cap *cap); struct kvm_enable_cap *cap);
......
...@@ -63,6 +63,33 @@ int kvm_check_cap(long cap) ...@@ -63,6 +63,33 @@ int kvm_check_cap(long cap)
return ret; return ret;
} }
/* VM Check Capability
*
* Input Args:
* vm - Virtual Machine
* cap - Capability
*
* Output Args: None
*
* Return:
* On success, the Value corresponding to the capability (KVM_CAP_*)
* specified by the value of cap. On failure a TEST_ASSERT failure
* is produced.
*
* Looks up and returns the value corresponding to the capability
* (KVM_CAP_*) given by cap.
*/
int vm_check_cap(struct kvm_vm *vm, long cap)
{
int ret;
ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap);
TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n"
" rc: %i errno: %i", ret, errno);
return ret;
}
/* VM Enable Capability /* VM Enable Capability
* *
* Input Args: * Input Args:
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* maximum APIC ID capability tests
*
* Copyright (C) 2022, Intel, Inc.
*
* Tests for getting/setting maximum APIC ID capability
*/
#include "kvm_util.h"
#include "../lib/kvm_util_internal.h"
#define MAX_VCPU_ID 2
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_enable_cap cap = { 0 };
int ret;
vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
/* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
/* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
cap.cap = KVM_CAP_MAX_VCPU_ID;
cap.args[0] = ret + 1;
ret = ioctl(vm->fd, KVM_ENABLE_CAP, &cap);
TEST_ASSERT(ret < 0,
"Unexpected success to enable KVM_CAP_MAX_VCPU_ID"
"beyond KVM cap!\n");
/* Set KVM_CAP_MAX_VCPU_ID */
cap.cap = KVM_CAP_MAX_VCPU_ID;
cap.args[0] = MAX_VCPU_ID;
ret = ioctl(vm->fd, KVM_ENABLE_CAP, &cap);
TEST_ASSERT(ret == 0,
"Unexpected failure to enable KVM_CAP_MAX_VCPU_ID!\n");
/* Try to set KVM_CAP_MAX_VCPU_ID again */
cap.args[0] = MAX_VCPU_ID + 1;
ret = ioctl(vm->fd, KVM_ENABLE_CAP, &cap);
TEST_ASSERT(ret < 0,
"Unexpected success to enable KVM_CAP_MAX_VCPU_ID again\n");
/* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/
ret = ioctl(vm->fd, KVM_CREATE_VCPU, MAX_VCPU_ID);
TEST_ASSERT(ret < 0,
"Unexpected success in creating a vCPU with VCPU ID out of range\n");
kvm_vm_free(vm);
return 0;
}
...@@ -3180,13 +3180,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) ...@@ -3180,13 +3180,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
return -EINVAL; return -EINVAL;
} }
r = kvm_arch_vcpu_precreate(kvm, id);
if (r) {
mutex_unlock(&kvm->lock);
return r;
}
kvm->created_vcpus++; kvm->created_vcpus++;
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->lock);
r = kvm_arch_vcpu_precreate(kvm, id);
if (r)
goto vcpu_decrement;
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu) { if (!vcpu) {
r = -ENOMEM; r = -ENOMEM;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册