提交 843e4330 编写于 作者: K Kai Huang 提交者: Paolo Bonzini

KVM: VMX: Add PML support in VMX

This patch adds PML support in VMX. A new module parameter 'enable_pml' is added
to allow user to enable/disable it manually.
Signed-off-by: NKai Huang <kai.huang@linux.intel.com>
Reviewed-by: NXiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
上级 88178fd4
......@@ -69,6 +69,7 @@
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
......@@ -121,6 +122,7 @@ enum vmcs_field {
GUEST_LDTR_SELECTOR = 0x0000080c,
GUEST_TR_SELECTOR = 0x0000080e,
GUEST_INTR_STATUS = 0x00000810,
GUEST_PML_INDEX = 0x00000812,
HOST_ES_SELECTOR = 0x00000c00,
HOST_CS_SELECTOR = 0x00000c02,
HOST_SS_SELECTOR = 0x00000c04,
......@@ -140,6 +142,8 @@ enum vmcs_field {
VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
PML_ADDRESS = 0x0000200e,
PML_ADDRESS_HIGH = 0x0000200f,
TSC_OFFSET = 0x00002010,
TSC_OFFSET_HIGH = 0x00002011,
VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
......
......@@ -73,6 +73,7 @@
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
#define EXIT_REASON_INVPCID 58
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
......
......@@ -848,6 +848,24 @@ TRACE_EVENT(kvm_track_tsc,
#endif /* CONFIG_X86_64 */
/*
* Tracepoint for PML full VMEXIT.
*/
TRACE_EVENT(kvm_pml_full,
TP_PROTO(unsigned int vcpu_id),
TP_ARGS(vcpu_id),
TP_STRUCT__entry(
__field( unsigned int, vcpu_id )
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
),
TP_printk("vcpu %d: PML full", __entry->vcpu_id)
);
TRACE_EVENT(kvm_ple_window,
TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
TP_ARGS(grow, vcpu_id, new, old),
......
......@@ -101,6 +101,9 @@ module_param(nested, bool, S_IRUGO);
static u64 __read_mostly host_xss;
static bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
#define KVM_VM_CR0_ALWAYS_ON \
......@@ -516,6 +519,10 @@ struct vcpu_vmx {
/* Dynamic PLE window. */
int ple_window;
bool ple_window_dirty;
/* Support for PML */
#define PML_ENTITY_NUM 512
struct page *pml_pg;
};
enum segment_cache_field {
......@@ -1068,6 +1075,11 @@ static inline bool cpu_has_vmx_shadow_vmcs(void)
SECONDARY_EXEC_SHADOW_VMCS;
}
static inline bool cpu_has_vmx_pml(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
}
static inline bool report_flexpriority(void)
{
return flexpriority_enabled;
......@@ -2924,7 +2936,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES;
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_PML;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
......@@ -4355,6 +4368,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
a current VMCS12
*/
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
/* PML is enabled/disabled in creating/destorying vcpu */
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
return exec_control;
}
......@@ -5942,6 +5958,20 @@ static __init int hardware_setup(void)
update_ple_window_actual_max();
/*
* Only enable PML when hardware supports PML feature, and both EPT
* and EPT A/D bit features are enabled -- PML depends on them to work.
*/
if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
enable_pml = 0;
if (!enable_pml) {
kvm_x86_ops->slot_enable_log_dirty = NULL;
kvm_x86_ops->slot_disable_log_dirty = NULL;
kvm_x86_ops->flush_log_dirty = NULL;
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
}
return alloc_kvm_area();
out7:
......@@ -6971,6 +7001,31 @@ static bool vmx_test_pir(struct kvm_vcpu *vcpu, int vector)
return pi_test_pir(vector, &vmx->pi_desc);
}
static int handle_pml_full(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
trace_kvm_pml_full(vcpu->vcpu_id);
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
/*
* PML buffer FULL happened while executing iret from NMI,
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
/*
* PML buffer already flushed at beginning of VMEXIT. Nothing to do
* here.., and there's no userspace involvement needed for PML.
*/
return 1;
}
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
......@@ -7019,6 +7074,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_INVVPID] = handle_invvpid,
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
};
static const int kvm_vmx_max_exit_handlers =
......@@ -7325,6 +7381,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
}
static int vmx_enable_pml(struct vcpu_vmx *vmx)
{
struct page *pml_pg;
u32 exec_control;
pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!pml_pg)
return -ENOMEM;
vmx->pml_pg = pml_pg;
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
exec_control |= SECONDARY_EXEC_ENABLE_PML;
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
return 0;
}
static void vmx_disable_pml(struct vcpu_vmx *vmx)
{
u32 exec_control;
ASSERT(vmx->pml_pg);
__free_page(vmx->pml_pg);
vmx->pml_pg = NULL;
exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
{
struct kvm *kvm = vmx->vcpu.kvm;
u64 *pml_buf;
u16 pml_idx;
pml_idx = vmcs_read16(GUEST_PML_INDEX);
/* Do nothing if PML buffer is empty */
if (pml_idx == (PML_ENTITY_NUM - 1))
return;
/* PML index always points to next available PML buffer entity */
if (pml_idx >= PML_ENTITY_NUM)
pml_idx = 0;
else
pml_idx++;
pml_buf = page_address(vmx->pml_pg);
for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
u64 gpa;
gpa = pml_buf[pml_idx];
WARN_ON(gpa & (PAGE_SIZE - 1));
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
}
/* reset PML index */
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}
/*
* Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
* Called before reporting dirty_bitmap to userspace.
*/
static void kvm_flush_pml_buffers(struct kvm *kvm)
{
int i;
struct kvm_vcpu *vcpu;
/*
* We only need to kick vcpu out of guest mode here, as PML buffer
* is flushed at beginning of all VMEXITs, and it's obvious that only
* vcpus running in guest are possible to have unflushed GPAs in PML
* buffer.
*/
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vcpu_kick(vcpu);
}
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
......@@ -7335,6 +7474,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
u32 exit_reason = vmx->exit_reason;
u32 vectoring_info = vmx->idt_vectoring_info;
/*
* Flush logged GPAs PML buffer, this will make dirty_bitmap more
* updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
* querying dirty_bitmap, we only need to kick all vcpus out of guest
* mode as if vcpus is in root mode, the PML buffer must has been
* flushed already.
*/
if (enable_pml)
vmx_flush_pml_buffer(vmx);
/* If guest state is invalid, start emulating */
if (vmx->emulation_required)
return handle_invalid_guest_state(vcpu);
......@@ -7981,6 +8130,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (enable_pml)
vmx_disable_pml(vmx);
free_vpid(vmx);
leave_guest_mode(vcpu);
vmx_load_vmcs01(vcpu);
......@@ -8051,6 +8202,18 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->nested.current_vmptr = -1ull;
vmx->nested.current_vmcs12 = NULL;
/*
* If PML is turned on, failure on enabling PML just results in failure
* of creating the vcpu, therefore we can simplify PML logic (by
* avoiding dealing with cases, such as enabling PML partially on vcpus
* for the guest, etc.
*/
if (enable_pml) {
err = vmx_enable_pml(vmx);
if (err)
goto free_vmcs;
}
return &vmx->vcpu;
free_vmcs:
......@@ -9492,6 +9655,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
shrink_ple_window(vcpu);
}
static void vmx_slot_enable_log_dirty(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
}
static void vmx_slot_disable_log_dirty(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
kvm_mmu_slot_set_dirty(kvm, slot);
}
static void vmx_flush_log_dirty(struct kvm *kvm)
{
kvm_flush_pml_buffers(kvm);
}
static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *memslot,
gfn_t offset, unsigned long mask)
{
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
}
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
......@@ -9601,6 +9789,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
.check_nested_events = vmx_check_nested_events,
.sched_in = vmx_sched_in,
.slot_enable_log_dirty = vmx_slot_enable_log_dirty,
.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
.flush_log_dirty = vmx_flush_log_dirty,
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
};
static int __init vmx_init(void)
......
......@@ -7880,3 +7880,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册