diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index d542bd99a9a416c02d2af232026223dffbfa2294..19438a5c942b64b9d157deff213998b243e54846 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6220,6 +6220,27 @@ default. See Documentation/x86/sgx.rst for more details. +7.23 KVM_CAP_MAX_VCPU_ID +------------------------ + +:Architectures: x86 +:Target: VM +:Parameters: args[0] - maximum APIC ID value set for current VM +:Returns: 0 on success, -EINVAL if args[0] is beyond KVM_MAX_VCPU_ID + supported in KVM or if it has been set. + +This capability allows userspace to specify maximum possible APIC ID +assigned for current VM session prior to the creation of vCPUs, saving +memory for data structures indexed by the APIC ID. Userspace is able +to calculate the limit to APIC ID values from designated +CPU topology. + +The value can be changed only until KVM_ENABLE_CAP is set to a nonzero +value or until a vCPU is created. Upon creation of the first vCPU, +if the value was set to zero or KVM_ENABLE_CAP was not invoked, KVM +uses the return value of KVM_CHECK_EXTENSION(KVM_CAP_MAX_VCPU_ID) as +the maximum APIC ID. + 8. Other capabilities. ====================== diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d8e9239c24ffc62c91a0f280fe5bddd89029a664..8b35774d3319f89eba58f1ce6481d14e8e6db653 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2944,9 +2944,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) if (!sclp.has_esca || !sclp.has_64bscao) return false; - mutex_lock(&kvm->lock); rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); - mutex_unlock(&kvm->lock); return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c54e81d2a409c29bc9519f1717e86f055c1d3827..b2e3a2b812ed863dd81414d56ee58d9898092c8f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1020,6 +1020,12 @@ struct kvm_arch { struct list_head tdp_mmu_roots; /* List of struct tdp_mmu_pages not being used as roots */ struct list_head tdp_mmu_pages; + /* + * VM-scope maximum vCPU ID. Used to determine the size of structures + * that increase along with the maximum vCPU ID, in which case, using + * the global KVM_MAX_VCPU_ID may lead to significant memory waste. + */ + u32 max_vcpu_ids; }; struct kvm_vm_stat { @@ -1121,6 +1127,7 @@ struct kvm_x86_ops { void (*vm_destroy)(struct kvm *kvm); /* Create, but do not attach this VCPU */ + int (*vcpu_precreate)(struct kvm *kvm); int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 03622e5e1dec25bc1ae67e8033d016c1cd25d64b..3995770d723acbdad84abc338546d659e2d6bb39 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -956,6 +956,7 @@ #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 #define MSR_IA32_VMX_VMFUNC 0x00000491 +#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 /* VMX_BASIC bits and bitmasks */ #define VMX_BASIC_VMCS_SIZE_SHIFT 32 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a3d0152a4361386eab160edab502be504b3531c1..ea694ad7eda77a59cff891b871ce2db348046232 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -142,6 +142,12 @@ struct cpuinfo_x86 { unsigned initialized : 1; } __randomize_layout; +struct extra_cpuinfo_x86 { +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + __u32 vmx_tertiary_capability[NVMX_TERTIARY_INTS]; +#endif +} __randomize_layout; + struct cpuid_regs { u32 eax, ebx, ecx, edx; }; @@ -172,6 +178,8 @@ enum cpuid_regs_idx { extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; +extern struct extra_cpuinfo_x86 extra_boot_cpu_data; + extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; @@ -183,6 +191,14 @@ DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) boot_cpu_data #endif +#ifdef CONFIG_SMP +DECLARE_PER_CPU_READ_MOSTLY(struct extra_cpuinfo_x86, extra_cpu_info); +#define extra_cpu_data(cpu) per_cpu(extra_cpu_info, cpu) +#else +#define extra_cpu_info extra_boot_cpu_data +#define extra_cpu_data(cpu) extra_boot_cpu_data +#endif + extern const struct seq_operations cpuinfo_op; #define cache_line_size() (boot_cpu_data.x86_cache_alignment) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index c6f028bac3ff869e2e3a65a28575d663a39eda82..ecae8a49e6009bf68a66f7b98c7e423df5fd86c2 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -18,6 +18,7 @@ #include #define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f) +#define VMCS_TERTIARY_CONTROL_BIT(x) BIT(VMX_TERTIARY_FEATURE_##x & 0x1f) /* * Definitions of Primary Processor-Based VM-Execution Controls. @@ -31,6 +32,7 @@ #define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING) #define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING) #define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING) +#define CPU_BASED_ACTIVATE_TERTIARY_CONTROLS VMCS_CONTROL_BIT(TERTIARY_CONTROLS) #define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING) #define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING) #define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR) @@ -74,6 +76,11 @@ #define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) #define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE) +/* + * Definitions of Tertiary Processor-Based VM-Execution Controls. + */ +#define TERTIARY_EXEC_IPI_VIRT VMCS_TERTIARY_CONTROL_BIT(IPI_VIRT) + #define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) #define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) #define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS) @@ -156,6 +163,7 @@ static inline int vmx_misc_mseg_revid(u64 vmx_misc) enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, POSTED_INTR_NV = 0x00000002, + LAST_PID_POINTER_INDEX = 0x00000008, GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ -219,6 +227,10 @@ enum vmcs_field { ENCLS_EXITING_BITMAP_HIGH = 0x0000202F, TSC_MULTIPLIER = 0x00002032, TSC_MULTIPLIER_HIGH = 0x00002033, + TERTIARY_VM_EXEC_CONTROL = 0x00002034, + TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035, + PID_POINTER_TABLE = 0x00002042, + PID_POINTER_TABLE_HIGH = 0x00002043, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index 9915990fd8cfab51a9cd287017e2d71d93f1f3f0..f26dea5769d8b50bc3b2d2e55b33e8e495767238 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -6,6 +6,7 @@ * Defines VMX CPU feature bits */ #define NVMXINTS 3 /* N 32-bit words worth of info */ +#define NVMX_TERTIARY_INTS 2 /* N 32-bit words worth of info */ /* * Note: If the comment begins with a quoted string, that string is used @@ -43,6 +44,7 @@ #define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */ #define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */ #define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */ +#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* "" Enable Tertiary VM-Execution Controls */ #define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */ #define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */ #define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */ @@ -84,4 +86,6 @@ #define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */ #define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ +/* Tertiary Processor-Based VM-Execution Controls, word 3 */ +#define VMX_TERTIARY_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */ #endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1ee24f9541a56b95c7febc3aca8201da749a9162..bc0e27070a09abb80a95edad4bf323008b3428b3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1596,6 +1596,15 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c) #endif } +#ifdef CONFIG_X86_VMX_FEATURE_NAMES +static void init_extra_cpu_data(u16 cpu_index) +{ + struct extra_cpuinfo_x86 *e = &extra_cpu_data(cpu_index); + + memset(&e->vmx_tertiary_capability, 0, sizeof(e->vmx_tertiary_capability)); +} +#endif + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -1626,6 +1635,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) memset(&c->x86_capability, 0, sizeof(c->x86_capability)); #ifdef CONFIG_X86_VMX_FEATURE_NAMES memset(&c->vmx_capability, 0, sizeof(c->vmx_capability)); + init_extra_cpu_data(c->cpu_index); #endif generic_identify(c); diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index da696eb4821a0b159e14ea0246beea0bcaf22cfc..09cd64ac6d4f9dc9c38e72ca349671361dbfc0ec 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -18,8 +18,24 @@ enum vmx_feature_leafs { NR_VMX_FEATURE_WORDS, }; +enum vmx_tertiary_feature_leafs { + TERTIARY_CTLS_LOW = 0, + TERTIARY_CTLS_HIGH, +}; + #define VMX_F(x) BIT(VMX_FEATURE_##x & 0x1f) +static void init_vmx_tertiary_capabilities(u16 cpu_index) +{ + struct extra_cpuinfo_x86 *e = &extra_cpu_data(cpu_index); + u32 low, high; + + /* All 64 bits of tertiary controls MSR are allowed-1 settings. */ + rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS3, &low, &high); + e->vmx_tertiary_capability[TERTIARY_CTLS_LOW] = low; + e->vmx_tertiary_capability[TERTIARY_CTLS_HIGH] = high; +} + static void init_vmx_capabilities(struct cpuinfo_x86 *c) { u32 supported, funcs, ept, vpid, ign; @@ -42,6 +58,8 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c) rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported); c->vmx_capability[SECONDARY_CTLS] = supported; + init_vmx_tertiary_capabilities(c->cpu_index); + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported); rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs); diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 1db560ed2ca35a0d3572f01c25ea4f9f96d015fd..b030882d2f6f390d9202b4da93fb904ca31d2e4c 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -68,6 +68,9 @@ trap 'rm "$OUT"' EXIT echo "#include " echo "#endif" dump_array "x86_vmx_flags" "NVMXINTS*32" "VMX_FEATURE_" "" $3 + echo "" + + dump_array "x86_vmx_tertiary_flags" "NVMX_TERTIARY_INTS*32" "VMX_TERTIARY_FEATURE_" "NVMXINTS*32" $3 echo "#endif /* CONFIG_X86_VMX_FEATURE_NAMES */" ) > $OUT diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 4eec8889b0ff1de48787fd162985bf930b89fd11..7810c75abdd4e5bd3d6c44b08a2e40622529be08 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include "asm/vmxfeatures.h" #include #include #include @@ -9,6 +10,7 @@ #ifdef CONFIG_X86_VMX_FEATURE_NAMES extern const char * const x86_vmx_flags[NVMXINTS*32]; +extern const char * const x86_vmx_tertiary_flags[NVMX_TERTIARY_INTS*32]; #endif /* @@ -108,12 +110,18 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_X86_VMX_FEATURE_NAMES if (cpu_has(c, X86_FEATURE_VMX) && c->vmx_capability[0]) { + struct extra_cpuinfo_x86 *e = &extra_cpu_data(c->cpu_index); seq_puts(m, "\nvmx flags\t:"); for (i = 0; i < 32*NVMXINTS; i++) { if (test_bit(i, (unsigned long *)c->vmx_capability) && x86_vmx_flags[i] != NULL) seq_printf(m, " %s", x86_vmx_flags[i]); } + for (i = 0; i < 32*NVMX_TERTIARY_INTS; i++) { + if (test_bit(i, (unsigned long *)e->vmx_tertiary_capability) && + x86_vmx_tertiary_flags[i] != NULL) + seq_printf(m, " %s", x86_vmx_tertiary_flags[i]); + } } #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 85979c1a404e933654642d7f07ba062976e9cd2b..4d48ebb18fe8b0201f5d480059d4bae5c3ae4ee6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -5,6 +5,7 @@ * This file contains the setup_arch() code, which handles the architecture-dependent * parts of early kernel initialization. */ +#include "asm/processor.h" #include #include #include @@ -141,6 +142,7 @@ struct cpuinfo_x86 boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); #endif +struct extra_cpuinfo_x86 extra_boot_cpu_data __read_mostly; #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) __visible unsigned long mmu_cr4_features __ro_after_init; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8baff500914ea12f0e0f0310171a9d2f257edb83..0c02358130456d60268f23fe763c7981c60704ce 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -99,6 +99,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); /* Per CPU bogomips and other parameters */ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +DEFINE_PER_CPU_READ_MOSTLY(struct extra_cpuinfo_x86, extra_cpu_info); +EXPORT_PER_CPU_SYMBOL(extra_cpu_info); /* Logical package management. We might want to allocate that dynamically */ unsigned int __max_logical_packages __read_mostly; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 28248e312efa267e4fd0984d990b7e2d148ada09..4ea8119b50105751e66f3e2c61dd4bf56a79dd16 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2174,15 +2174,21 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); /* emulate APIC access in a trap manner */ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) { - u32 val = 0; + struct kvm_lapic *apic = vcpu->arch.apic; + u64 val = 0; /* hw has done the conditional check and inst decode */ offset &= 0xff0; - kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val); + /* exception dealing with 64bit data on vICR in x2apic mode */ + if ((offset == APIC_ICR) && apic_x2apic_mode(apic)) { + val = kvm_lapic_get_reg64(apic, offset); + kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(val>>32)); + } else + kvm_lapic_reg_read(apic, offset, 4, &val); /* TODO: optimize to just emulate side effect w/o one more write */ - kvm_lapic_reg_write(vcpu->arch.apic, offset, val); + kvm_lapic_reg_write(apic, offset, (u32)val); } EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 4fb86e3a9dd3db76328ac13f36eafc8f009d50b2..49e85dea5e3f332ace39d4db008ef8aaf73d06dd 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -162,6 +162,11 @@ static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off) return *((u32 *) (apic->regs + reg_off)); } +static inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg_off) +{ + return *((u64 *) (apic->regs + reg_off)); +} + static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val) { *((u32 *) (regs + reg_off)) = val; diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 7c47c7f7911bb70b28ac193ec274e5796265aa2f..1fc2d141588907c2e710ecac2a9ca7d5dd3238f1 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -13,6 +13,7 @@ extern bool __read_mostly enable_unrestricted_guest; extern bool __read_mostly enable_ept_ad_bits; extern bool __read_mostly enable_pml; extern bool __read_mostly enable_apicv; +extern bool __read_mostly enable_ipiv; extern int __read_mostly pt_mode; #define PT_MODE_SYSTEM 0 @@ -60,6 +61,7 @@ struct vmcs_config { u32 pin_based_exec_ctrl; u32 cpu_based_exec_ctrl; u32 cpu_based_2nd_exec_ctrl; + u64 cpu_based_3rd_exec_ctrl; u32 vmexit_ctrl; u32 vmentry_ctrl; struct nested_vmx_msrs nested; @@ -133,6 +135,12 @@ static inline bool cpu_has_secondary_exec_ctrls(void) CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; } +static inline bool cpu_has_tertiary_exec_ctrls(void) +{ + return vmcs_config.cpu_based_exec_ctrl & + CPU_BASED_ACTIVATE_TERTIARY_CONTROLS; +} + static inline bool cpu_has_vmx_virtualize_apic_accesses(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -272,6 +280,11 @@ static inline bool cpu_has_vmx_apicv(void) cpu_has_vmx_posted_intr(); } +static inline bool cpu_has_vmx_ipiv(void) +{ + return vmcs_config.cpu_based_3rd_exec_ctrl & TERTIARY_EXEC_IPI_VIRT; +} + static inline bool cpu_has_vmx_flexpriority(void) { return cpu_has_vmx_tpr_shadow() && diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 5b68034ec5f9c336d628ad9036b6589ed5970550..a406a7a59ffd5528df351b0169e89d1b9fbcaabf 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -299,8 +299,10 @@ const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); __init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) { + vmcs_conf->cpu_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_EXEC_CTRL; vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC; + vmcs_conf->cpu_based_3rd_exec_ctrl = 0; vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 011929a63823084e21e2dc5575e1567bbc478ee7..fecaa026b78ddeb197e06c4f36d9894b80b88294 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -50,6 +50,7 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); */ #define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \ PIN_BASED_VMX_PREEMPTION_TIMER) +#define EVMCS1_UNSUPPORTED_EXEC_CTRL (CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) #define EVMCS1_UNSUPPORTED_2NDEXEC \ (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \ diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 5f8acd2faa7c1ea4848b257a5564e54376fe25ec..9689541fa02768a63dd6c09f4cc2f43517122857 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -89,7 +89,7 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - if (!vmx_can_use_vtd_pi(vcpu->kvm)) + if (!(vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm))) return; /* Set SN when the vCPU is preempted */ @@ -147,7 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu) struct pi_desc old, new; struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - if (!vmx_can_use_vtd_pi(vcpu->kvm)) + if (!(vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm))) return 0; WARN_ON(irqs_disabled()); diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h index 0bdc41391c5b8fae1d0f581944a6b21ee9156d26..39d92d52caa90341ae254c5c87ce354c3a591f20 100644 --- a/arch/x86/kvm/vmx/posted_intr.h +++ b/arch/x86/kvm/vmx/posted_intr.h @@ -5,6 +5,8 @@ #define POSTED_INTR_ON 0 #define POSTED_INTR_SN 1 +#define PID_TABLE_ENTRY_VALID 1 + /* Posted-Interrupt Descriptor */ struct pi_desc { u32 pir[8]; /* Posted interrupt requested */ diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 69c147df957fde613cf6dac3b9c9d4bfe327c2b7..995a51635467b7e710b9a6881b2e673cccf4ba64 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -48,6 +48,7 @@ struct vmcs_controls_shadow { u32 pin; u32 exec; u32 secondary_exec; + u64 tertiary_exec; }; /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index af23ededb0952abbedc33193c3cbc27b46a5bd9d..c115c57261c1efa5428e12c041623a19ad51b37a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -104,6 +104,9 @@ module_param(fasteoi, bool, S_IRUGO); bool __read_mostly enable_apicv = 1; module_param(enable_apicv, bool, S_IRUGO); +bool __read_mostly enable_ipiv = true; +module_param(enable_ipiv, bool, 0444); + /* * If nested=1, nested virtualization is supported, i.e., guests may use * VMX and be a hypervisor for its own guests. If nested=0, guests may not @@ -2568,6 +2571,15 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, return 0; } +static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) +{ + u64 allowed; + + rdmsrl(msr, allowed); + + return ctl_opt & allowed; +} + static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, struct vmx_capability *vmx_cap) { @@ -2576,6 +2588,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, u32 _pin_based_exec_control = 0; u32 _cpu_based_exec_control = 0; u32 _cpu_based_2nd_exec_control = 0; + u64 _cpu_based_3rd_exec_control = 0; u32 _vmexit_control = 0; u32 _vmentry_control = 0; @@ -2597,7 +2610,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, opt = CPU_BASED_TPR_SHADOW | CPU_BASED_USE_MSR_BITMAPS | - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | + CPU_BASED_ACTIVATE_TERTIARY_CONTROLS; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, &_cpu_based_exec_control) < 0) return -EIO; @@ -2670,6 +2684,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, "1-setting enable VPID VM-execution control\n"); } + if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) { + u64 opt3 = TERTIARY_EXEC_IPI_VIRT; + + _cpu_based_3rd_exec_control = adjust_vmx_controls64(opt3, + MSR_IA32_VMX_PROCBASED_CTLS3); + } + min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; @@ -2757,6 +2778,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; + vmcs_conf->cpu_based_3rd_exec_ctrl = _cpu_based_3rd_exec_control; vmcs_conf->vmexit_ctrl = _vmexit_control; vmcs_conf->vmentry_ctrl = _vmentry_control; @@ -4041,6 +4063,8 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode) vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW); vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + if (enable_ipiv) + vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_ICR), MSR_TYPE_RW); } } @@ -4307,15 +4331,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); - if (cpu_has_secondary_exec_ctrls()) { - if (kvm_vcpu_apicv_active(vcpu)) - secondary_exec_controls_setbit(vmx, - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - else - secondary_exec_controls_clearbit(vmx, - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); + + if (kvm_vcpu_apicv_active(vcpu)) { + secondary_exec_controls_setbit(vmx, + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); + if (enable_ipiv) + tertiary_exec_controls_setbit(vmx, TERTIARY_EXEC_IPI_VIRT); + } else { + secondary_exec_controls_clearbit(vmx, + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); + if (enable_ipiv) + tertiary_exec_controls_clearbit(vmx, TERTIARY_EXEC_IPI_VIRT); } if (cpu_has_vmx_msr_bitmap()) @@ -4348,6 +4376,20 @@ u32 vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } +static u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx) +{ + u64 exec_control = vmcs_config.cpu_based_3rd_exec_ctrl; + + /* + * IPI virtualization relies on APICv. Disable IPI virtualization if + * APICv is inhibited. + */ + if (!enable_ipiv || !kvm_vcpu_apicv_active(&vmx->vcpu)) + exec_control &= ~TERTIARY_EXEC_IPI_VIRT; + + return exec_control; +} + /* * Adjust a single secondary execution control bit to intercept/allow an * instruction in the guest. This is usually done based on whether or not a @@ -4488,6 +4530,35 @@ static void ept_set_mmio_spte_mask(void) kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0); } +static inline int vmx_get_pid_table_order(struct kvm *kvm) +{ + return get_order(kvm->arch.max_vcpu_ids * sizeof(*to_kvm_vmx(kvm)->pid_table)); +} + +static int vmx_alloc_ipiv_pid_table(struct kvm *kvm) +{ + struct page *pages; + struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); + + if (!irqchip_in_kernel(kvm) || !enable_ipiv) + return 0; + + if (kvm_vmx->pid_table) + return 0; + + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm)); + if (!pages) + return -ENOMEM; + + kvm_vmx->pid_table = (void *)page_address(pages); + return 0; +} + +static int vmx_vcpu_precreate(struct kvm *kvm) +{ + return vmx_alloc_ipiv_pid_table(kvm); +} + #define VMX_XSS_EXIT_BITMAP 0 /* @@ -4496,6 +4567,9 @@ static void ept_set_mmio_spte_mask(void) */ static void init_vmcs(struct vcpu_vmx *vmx) { + struct kvm *kvm = vmx->vcpu.kvm; + struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); + if (nested) nested_vmx_set_vmcs_shadowing_bitmap(); @@ -4514,6 +4588,9 @@ static void init_vmcs(struct vcpu_vmx *vmx) secondary_exec_controls_set(vmx, vmx->secondary_exec_control); } + if (cpu_has_tertiary_exec_ctrls()) + tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx)); + if (kvm_vcpu_apicv_active(&vmx->vcpu)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); @@ -4526,7 +4603,12 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } - if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { + if (vmx_can_use_ipiv(&vmx->vcpu)) { + vmcs_write64(PID_POINTER_TABLE, __pa(kvm_vmx->pid_table)); + vmcs_write16(LAST_PID_POINTER_INDEX, kvm->arch.max_vcpu_ids - 1); + } + + if (!kvm_pause_in_guest(kvm)) { vmcs_write32(PLE_GAP, ple_gap); vmx->ple_window = ple_window; vmx->ple_window_dirty = true; @@ -6019,6 +6101,7 @@ void dump_vmcs(void) { u32 vmentry_ctl, vmexit_ctl; u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; + u64 tertiary_exec_control; unsigned long cr4; if (!dump_invalid_vmcs) { @@ -6031,9 +6114,16 @@ void dump_vmcs(void) cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); cr4 = vmcs_readl(GUEST_CR4); - secondary_exec_control = 0; + if (cpu_has_secondary_exec_ctrls()) secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + else + secondary_exec_control = 0; + + if (cpu_has_tertiary_exec_ctrls()) + tertiary_exec_control = vmcs_read64(TERTIARY_VM_EXEC_CONTROL); + else + tertiary_exec_control = 0; pr_err("*** Guest State ***\n"); pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", @@ -6116,9 +6206,10 @@ void dump_vmcs(void) vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); pr_err("*** Control State ***\n"); - pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", - pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); - pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); + pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n", + cpu_based_exec_ctrl, secondary_exec_control, tertiary_exec_control); + pr_err("PinBased=0x%08x EntryControls=%08x ExitControls=%08x\n", + pin_based_exec_ctrl, vmentry_ctl, vmexit_ctl); pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", vmcs_read32(EXCEPTION_BITMAP), vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), @@ -7214,6 +7305,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) vmx->ept_pointer = INVALID_PAGE; + if (vmx_can_use_ipiv(vcpu)) + WRITE_ONCE(to_kvm_vmx(vcpu->kvm)->pid_table[vcpu->vcpu_id], + __pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID); + return 0; free_vmcs: @@ -7877,6 +7972,13 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit) return supported & BIT(bit); } +static void vmx_vm_destroy(struct kvm *kvm) +{ + struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); + + free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm)); +} + static struct kvm_x86_ops vmx_x86_ops __initdata = { .hardware_unsetup = hardware_unsetup, @@ -7887,7 +7989,9 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .vm_size = sizeof(struct kvm_vmx), .vm_init = vmx_vm_init, + .vm_destroy = vmx_vm_destroy, + .vcpu_precreate = vmx_vcpu_precreate, .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, .vcpu_reset = vmx_vcpu_reset, @@ -8089,6 +8193,9 @@ static __init int hardware_setup(void) vmx_x86_ops.sync_pir_to_irr = NULL; } + if (!enable_apicv || !cpu_has_vmx_ipiv()) + enable_ipiv = false; + if (cpu_has_vmx_tsc_scaling()) { kvm_has_tsc_control = true; kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 9d40633b080ee11b2709c495f93c96d19a3fde26..bcedbe5ff6098736f5abe848339df79e8efcf7f2 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -358,6 +358,8 @@ struct kvm_vmx { enum ept_pointers_status ept_pointers_match; spinlock_t ept_pointer_lock; + /* Posted Interrupt Descriptor (PID) table for IPI virtualization */ + u64 *pid_table; }; bool nested_vmx_allowed(struct kvm_vcpu *vcpu); @@ -409,31 +411,36 @@ static inline u8 vmx_get_rvi(void) return vmcs_read16(GUEST_INTR_STATUS) & 0xff; } -#define BUILD_CONTROLS_SHADOW(lname, uname) \ -static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \ -{ \ - if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ - vmcs_write32(uname, val); \ - vmx->loaded_vmcs->controls_shadow.lname = val; \ - } \ -} \ -static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \ -{ \ - return vmx->loaded_vmcs->controls_shadow.lname; \ -} \ -static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \ -{ \ - lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ -} \ -static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u32 val) \ -{ \ - lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ +#define BUILD_CONTROLS_SHADOW(lname, uname, bits) \ +static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ + vmcs_write##bits(uname, val); \ + vmx->loaded_vmcs->controls_shadow.lname = val; \ + } \ +} \ +static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs) \ +{ \ + return vmcs->controls_shadow.lname; \ +} \ +static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx) \ +{ \ + return __##lname##_controls_get(vmx->loaded_vmcs); \ +} \ +static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ +} \ +static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ } -BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS) -BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS) -BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL) -BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL) -BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL) +BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32) +BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32) +BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32) +BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32) +BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32) +BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu) { @@ -553,4 +560,9 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu) void dump_vmcs(void); +static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu) +{ + return lapic_in_kernel(vcpu) && enable_ipiv; +} + #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7cc18c4ffb048c9ba399b1f41c0b3823ca015ade..c7b4dfd2a2c43e0d4f8ee1da2ac13c4711e618ee 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5432,6 +5432,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, break; } #endif + case KVM_CAP_MAX_VCPU_ID: + r = -EINVAL; + if (cap->args[0] > KVM_MAX_VCPU_ID) + break; + + mutex_lock(&kvm->lock); + if (kvm->arch.max_vcpu_ids == cap->args[0]) { + r = 0; + } else if (!kvm->arch.max_vcpu_ids) { + kvm->arch.max_vcpu_ids = cap->args[0]; + r = 0; + } + mutex_unlock(&kvm->lock); + break; default: r = -EINVAL; break; @@ -10104,11 +10118,17 @@ static void fx_init(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { - if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) + if (kvm_check_tsc_unstable() && kvm->created_vcpus) pr_warn_once("kvm: SMP vm created on host with unstable TSC; " "guest TSC will not be reliable\n"); - return 0; + if (!kvm->arch.max_vcpu_ids) + kvm->arch.max_vcpu_ids = KVM_MAX_VCPU_ID; + + if (id >= kvm->arch.max_vcpu_ids) + return -EINVAL; + + return kvm_x86_ops.vcpu_precreate ? kvm_x86_ops.vcpu_precreate(kvm) : 0; } int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 7a2c242b7152e3e56da3341f9e7efb287175b5fd..d4768897eda6e2ad13e0010ff52ef18896ec9372 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -9,6 +9,7 @@ /x86_64/evmcs_test /x86_64/kvm_pv_test /x86_64/hyperv_cpuid +/x86_64/max_vcpuid_cap_test /x86_64/mmio_warning_test /x86_64/platform_info_test /x86_64/set_sregs_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 3d14ef77755e537c504f151decb74ad9a4d35f1c..25618728dc4afde0536a5d678e022297f13cda3d 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -59,6 +59,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test +TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 7d29aa78695968d90da325fbe814a3cdd152b60d..85b621383fa1701549c1380557b06dbf4af13326 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -62,6 +62,7 @@ enum vm_mem_backing_src_type { }; int kvm_check_cap(long cap); +int vm_check_cap(struct kvm_vm *vm, long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_enable_cap *cap); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 49805fd16fdf53c48c7499df6b4b8c169778e21c..d9de68b44783449806db32e7bdcb7c42e75b2770 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -63,6 +63,33 @@ int kvm_check_cap(long cap) return ret; } +/* VM Check Capability + * + * Input Args: + * vm - Virtual Machine + * cap - Capability + * + * Output Args: None + * + * Return: + * On success, the Value corresponding to the capability (KVM_CAP_*) + * specified by the value of cap. On failure a TEST_ASSERT failure + * is produced. + * + * Looks up and returns the value corresponding to the capability + * (KVM_CAP_*) given by cap. + */ +int vm_check_cap(struct kvm_vm *vm, long cap) +{ + int ret; + + ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap); + TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n" + " rc: %i errno: %i", ret, errno); + + return ret; +} + /* VM Enable Capability * * Input Args: diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c new file mode 100644 index 0000000000000000000000000000000000000000..3f6c1ad86cc637afe02cf7a45c0425f7e193757e --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * maximum APIC ID capability tests + * + * Copyright (C) 2022, Intel, Inc. + * + * Tests for getting/setting maximum APIC ID capability + */ + +#include "kvm_util.h" +#include "../lib/kvm_util_internal.h" + +#define MAX_VCPU_ID 2 + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_enable_cap cap = { 0 }; + int ret; + + vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); + + /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */ + ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID); + + /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */ + cap.cap = KVM_CAP_MAX_VCPU_ID; + cap.args[0] = ret + 1; + ret = ioctl(vm->fd, KVM_ENABLE_CAP, &cap); + TEST_ASSERT(ret < 0, + "Unexpected success to enable KVM_CAP_MAX_VCPU_ID" + "beyond KVM cap!\n"); + + /* Set KVM_CAP_MAX_VCPU_ID */ + cap.cap = KVM_CAP_MAX_VCPU_ID; + cap.args[0] = MAX_VCPU_ID; + ret = ioctl(vm->fd, KVM_ENABLE_CAP, &cap); + TEST_ASSERT(ret == 0, + "Unexpected failure to enable KVM_CAP_MAX_VCPU_ID!\n"); + + /* Try to set KVM_CAP_MAX_VCPU_ID again */ + cap.args[0] = MAX_VCPU_ID + 1; + ret = ioctl(vm->fd, KVM_ENABLE_CAP, &cap); + TEST_ASSERT(ret < 0, + "Unexpected success to enable KVM_CAP_MAX_VCPU_ID again\n"); + + /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/ + ret = ioctl(vm->fd, KVM_CREATE_VCPU, MAX_VCPU_ID); + TEST_ASSERT(ret < 0, + "Unexpected success in creating a vCPU with VCPU ID out of range\n"); + + kvm_vm_free(vm); + return 0; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a5c107bbf022fd066a8842b7e31895ed8fd9d84a..93cd86267eba0baaf51d6468aea2a0d692e9ad5b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3180,13 +3180,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) return -EINVAL; } + r = kvm_arch_vcpu_precreate(kvm, id); + if (r) { + mutex_unlock(&kvm->lock); + return r; + } + kvm->created_vcpus++; mutex_unlock(&kvm->lock); - r = kvm_arch_vcpu_precreate(kvm, id); - if (r) - goto vcpu_decrement; - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) { r = -ENOMEM;