提交 de3a0021 编写于 作者: J Jim Mattson 提交者: Paolo Bonzini

KVM: nVMX: Eliminate vmcs02 pool

The potential performance advantages of a vmcs02 pool have never been
realized. To simplify the code, eliminate the pool. Instead, a single
vmcs02 is allocated per VCPU when the VCPU enters VMX operation.

Cc: stable@vger.kernel.org       # prereq for Spectre mitigation
Signed-off-by: NJim Mattson <jmattson@google.com>
Signed-off-by: NMark Kanda <mark.kanda@oracle.com>
Reviewed-by: NAmeya More <ameya.more@oracle.com>
Reviewed-by: NDavid Hildenbrand <david@redhat.com>
Reviewed-by: NPaolo Bonzini <pbonzini@redhat.com>
Signed-off-by: NRadim Krčmář <rkrcmar@redhat.com>
上级 ba804bb4
master alk-4.19.24 alk-4.19.30 alk-4.19.34 alk-4.19.36 alk-4.19.43 alk-4.19.48 alk-4.19.57 ck-4.19.67 ck-4.19.81 ck-4.19.91 github/fork/deepanshu1422/fix-typo-in-comment github/fork/haosdent/fix-typo linux-next v4.19.91 v4.19.90 v4.19.89 v4.19.88 v4.19.87 v4.19.86 v4.19.85 v4.19.84 v4.19.83 v4.19.82 v4.19.81 v4.19.80 v4.19.79 v4.19.78 v4.19.77 v4.19.76 v4.19.75 v4.19.74 v4.19.73 v4.19.72 v4.19.71 v4.19.70 v4.19.69 v4.19.68 v4.19.67 v4.19.66 v4.19.65 v4.19.64 v4.19.63 v4.19.62 v4.19.61 v4.19.60 v4.19.59 v4.19.58 v4.19.57 v4.19.56 v4.19.55 v4.19.54 v4.19.53 v4.19.52 v4.19.51 v4.19.50 v4.19.49 v4.19.48 v4.19.47 v4.19.46 v4.19.45 v4.19.44 v4.19.43 v4.19.42 v4.19.41 v4.19.40 v4.19.39 v4.19.38 v4.19.37 v4.19.36 v4.19.35 v4.19.34 v4.19.33 v4.19.32 v4.19.31 v4.19.30 v4.19.29 v4.19.28 v4.19.27 v4.19.26 v4.19.25 v4.19.24 v4.19.23 v4.19.22 v4.19.21 v4.19.20 v4.19.19 v4.19.18 v4.19.17 v4.19.16 v4.19.15 v4.19.14 v4.19.13 v4.19.12 v4.19.11 v4.19.10 v4.19.9 v4.19.8 v4.19.7 v4.19.6 v4.19.5 v4.19.4 v4.19.3 v4.19.2 v4.19.1 v4.19 v4.19-rc8 v4.19-rc7 v4.19-rc6 v4.19-rc5 v4.19-rc4 v4.19-rc3 v4.19-rc2 v4.19-rc1 ck-release-21 ck-release-20 ck-release-19.2 ck-release-19.1 ck-release-19 ck-release-18 ck-release-17.2 ck-release-17.1 ck-release-17 ck-release-16 ck-release-15.1 ck-release-15 ck-release-14 ck-release-13.2 ck-release-13 ck-release-12 ck-release-11 ck-release-10 ck-release-9 ck-release-7 alk-release-15 alk-release-14 alk-release-13.2 alk-release-13 alk-release-12 alk-release-11 alk-release-10 alk-release-9 alk-release-7
无相关合并请求
...@@ -185,7 +185,6 @@ module_param(ple_window_max, int, S_IRUGO); ...@@ -185,7 +185,6 @@ module_param(ple_window_max, int, S_IRUGO);
extern const ulong vmx_return; extern const ulong vmx_return;
#define NR_AUTOLOAD_MSRS 8 #define NR_AUTOLOAD_MSRS 8
#define VMCS02_POOL_SIZE 1
struct vmcs { struct vmcs {
u32 revision_id; u32 revision_id;
...@@ -226,7 +225,7 @@ struct shared_msr_entry { ...@@ -226,7 +225,7 @@ struct shared_msr_entry {
* stored in guest memory specified by VMPTRLD, but is opaque to the guest, * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
* More than one of these structures may exist, if L1 runs multiple L2 guests. * More than one of these structures may exist, if L1 runs multiple L2 guests.
* nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
* underlying hardware which will be used to run L2. * underlying hardware which will be used to run L2.
* This structure is packed to ensure that its layout is identical across * This structure is packed to ensure that its layout is identical across
* machines (necessary for live migration). * machines (necessary for live migration).
...@@ -409,13 +408,6 @@ struct __packed vmcs12 { ...@@ -409,13 +408,6 @@ struct __packed vmcs12 {
*/ */
#define VMCS12_SIZE 0x1000 #define VMCS12_SIZE 0x1000
/* Used to remember the last vmcs02 used for some recently used vmcs12s */
struct vmcs02_list {
struct list_head list;
gpa_t vmptr;
struct loaded_vmcs vmcs02;
};
/* /*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need * The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu. * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
...@@ -440,15 +432,15 @@ struct nested_vmx { ...@@ -440,15 +432,15 @@ struct nested_vmx {
*/ */
bool sync_shadow_vmcs; bool sync_shadow_vmcs;
/* vmcs02_list cache of VMCSs recently used to run L2 guests */
struct list_head vmcs02_pool;
int vmcs02_num;
bool change_vmcs01_virtual_x2apic_mode; bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */ /* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending; bool nested_run_pending;
struct loaded_vmcs vmcs02;
/* /*
* Guest pages referred to in vmcs02 with host-physical pointers, so * Guest pages referred to in the vmcs02 with host-physical
* we must keep them pinned while L2 runs. * pointers, so we must keep them pinned while L2 runs.
*/ */
struct page *apic_access_page; struct page *apic_access_page;
struct page *virtual_apic_page; struct page *virtual_apic_page;
...@@ -6973,94 +6965,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) ...@@ -6973,94 +6965,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
return handle_nop(vcpu); return handle_nop(vcpu);
} }
/*
* To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
* We could reuse a single VMCS for all the L2 guests, but we also want the
* option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
* allows keeping them loaded on the processor, and in the future will allow
* optimizations where prepare_vmcs02 doesn't need to set all the fields on
* every entry if they never change.
* So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
* (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
*
* The following functions allocate and free a vmcs02 in this pool.
*/
/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
{
struct vmcs02_list *item;
list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
if (item->vmptr == vmx->nested.current_vmptr) {
list_move(&item->list, &vmx->nested.vmcs02_pool);
return &item->vmcs02;
}
if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
/* Recycle the least recently used VMCS. */
item = list_last_entry(&vmx->nested.vmcs02_pool,
struct vmcs02_list, list);
item->vmptr = vmx->nested.current_vmptr;
list_move(&item->list, &vmx->nested.vmcs02_pool);
return &item->vmcs02;
}
/* Create a new VMCS */
item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
if (!item)
return NULL;
item->vmcs02.vmcs = alloc_vmcs();
item->vmcs02.shadow_vmcs = NULL;
if (!item->vmcs02.vmcs) {
kfree(item);
return NULL;
}
loaded_vmcs_init(&item->vmcs02);
item->vmptr = vmx->nested.current_vmptr;
list_add(&(item->list), &(vmx->nested.vmcs02_pool));
vmx->nested.vmcs02_num++;
return &item->vmcs02;
}
/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
{
struct vmcs02_list *item;
list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
if (item->vmptr == vmptr) {
free_loaded_vmcs(&item->vmcs02);
list_del(&item->list);
kfree(item);
vmx->nested.vmcs02_num--;
return;
}
}
/*
* Free all VMCSs saved for this vcpu, except the one pointed by
* vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
* must be &vmx->vmcs01.
*/
static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
{
struct vmcs02_list *item, *n;
WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
/*
* Something will leak if the above WARN triggers. Better than
* a use-after-free.
*/
if (vmx->loaded_vmcs == &item->vmcs02)
continue;
free_loaded_vmcs(&item->vmcs02);
list_del(&item->list);
kfree(item);
vmx->nested.vmcs02_num--;
}
}
/* /*
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
* set the success or error code of an emulated VMX instruction, as specified * set the success or error code of an emulated VMX instruction, as specified
...@@ -7242,6 +7146,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) ...@@ -7242,6 +7146,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs *shadow_vmcs; struct vmcs *shadow_vmcs;
vmx->nested.vmcs02.vmcs = alloc_vmcs();
vmx->nested.vmcs02.shadow_vmcs = NULL;
if (!vmx->nested.vmcs02.vmcs)
goto out_vmcs02;
loaded_vmcs_init(&vmx->nested.vmcs02);
if (cpu_has_vmx_msr_bitmap()) { if (cpu_has_vmx_msr_bitmap()) {
vmx->nested.msr_bitmap = vmx->nested.msr_bitmap =
(unsigned long *)__get_free_page(GFP_KERNEL); (unsigned long *)__get_free_page(GFP_KERNEL);
...@@ -7264,9 +7174,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) ...@@ -7264,9 +7174,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
vmx->vmcs01.shadow_vmcs = shadow_vmcs; vmx->vmcs01.shadow_vmcs = shadow_vmcs;
} }
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
vmx->nested.vmcs02_num = 0;
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_PINNED); HRTIMER_MODE_REL_PINNED);
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
...@@ -7281,6 +7188,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) ...@@ -7281,6 +7188,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
free_page((unsigned long)vmx->nested.msr_bitmap); free_page((unsigned long)vmx->nested.msr_bitmap);
out_msr_bitmap: out_msr_bitmap:
free_loaded_vmcs(&vmx->nested.vmcs02);
out_vmcs02:
return -ENOMEM; return -ENOMEM;
} }
...@@ -7434,7 +7344,7 @@ static void free_nested(struct vcpu_vmx *vmx) ...@@ -7434,7 +7344,7 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->vmcs01.shadow_vmcs = NULL; vmx->vmcs01.shadow_vmcs = NULL;
} }
kfree(vmx->nested.cached_vmcs12); kfree(vmx->nested.cached_vmcs12);
/* Unpin physical memory we referred to in current vmcs02 */ /* Unpin physical memory we referred to in the vmcs02 */
if (vmx->nested.apic_access_page) { if (vmx->nested.apic_access_page) {
kvm_release_page_dirty(vmx->nested.apic_access_page); kvm_release_page_dirty(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL; vmx->nested.apic_access_page = NULL;
...@@ -7450,7 +7360,7 @@ static void free_nested(struct vcpu_vmx *vmx) ...@@ -7450,7 +7360,7 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.pi_desc = NULL; vmx->nested.pi_desc = NULL;
} }
nested_free_all_saved_vmcss(vmx); free_loaded_vmcs(&vmx->nested.vmcs02);
} }
/* Emulate the VMXOFF instruction */ /* Emulate the VMXOFF instruction */
...@@ -7493,8 +7403,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) ...@@ -7493,8 +7403,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
vmptr + offsetof(struct vmcs12, launch_state), vmptr + offsetof(struct vmcs12, launch_state),
&zero, sizeof(zero)); &zero, sizeof(zero));
nested_free_vmcs02(vmx, vmptr);
nested_vmx_succeed(vcpu); nested_vmx_succeed(vcpu);
return kvm_skip_emulated_instruction(vcpu); return kvm_skip_emulated_instruction(vcpu);
} }
...@@ -8406,10 +8314,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) ...@@ -8406,10 +8314,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
/* /*
* The host physical addresses of some pages of guest memory * The host physical addresses of some pages of guest memory
* are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
* may write to these pages via their host physical address while * Page). The CPU may write to these pages via their host
* L2 is running, bypassing any address-translation-based dirty * physical address while L2 is running, bypassing any
* tracking (e.g. EPT write protection). * address-translation-based dirty tracking (e.g. EPT write
* protection).
* *
* Mark them dirty on every exit from L2 to prevent them from * Mark them dirty on every exit from L2 to prevent them from
* getting out of sync with dirty tracking. * getting out of sync with dirty tracking.
...@@ -10903,20 +10812,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) ...@@ -10903,20 +10812,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct loaded_vmcs *vmcs02;
u32 msr_entry_idx; u32 msr_entry_idx;
u32 exit_qual; u32 exit_qual;
vmcs02 = nested_get_current_vmcs02(vmx);
if (!vmcs02)
return -ENOMEM;
enter_guest_mode(vcpu); enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
vmx_switch_vmcs(vcpu, vmcs02); vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
vmx_segment_cache_clear(vmx); vmx_segment_cache_clear(vmx);
if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
...@@ -11534,10 +11438,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, ...@@ -11534,10 +11438,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vm_exit_controls_reset_shadow(vmx); vm_exit_controls_reset_shadow(vmx);
vmx_segment_cache_clear(vmx); vmx_segment_cache_clear(vmx);
/* if no vmcs02 cache requested, remove the one we used */
if (VMCS02_POOL_SIZE == 0)
nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
/* Update any VMCS fields that might have changed while L2 ran */ /* Update any VMCS fields that might have changed while L2 ran */
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部