提交 44a95dae 编写于 作者: S Suravee Suthikulpanit 提交者: Paolo Bonzini

KVM: x86: Detect and Initialize AVIC support

This patch introduces AVIC-related data structure, and AVIC
initialization code.

There are three main data structures for AVIC:
    * Virtual APIC (vAPIC) backing page (per-VCPU)
    * Physical APIC ID table (per-VM)
    * Logical APIC ID table (per-VM)

Currently, AVIC is disabled by default. Users can manually
enable AVIC via kernel boot option kvm-amd.avic=1 or during
kvm-amd module loading with parameter avic=1.
Signed-off-by: NSuravee Suthikulpanit <suravee.suthikulpanit@amd.com>
[Avoid extra indentation (Boris). - Paolo]
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
上级 3d5615e5
...@@ -773,6 +773,10 @@ struct kvm_arch { ...@@ -773,6 +773,10 @@ struct kvm_arch {
u8 nr_reserved_ioapic_pins; u8 nr_reserved_ioapic_pins;
bool disabled_lapic_found; bool disabled_lapic_found;
/* Struct members for AVIC */
struct page *avic_logical_id_table_page;
struct page *avic_physical_id_table_page;
}; };
struct kvm_vm_stat { struct kvm_vm_stat {
......
...@@ -116,6 +116,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { ...@@ -116,6 +116,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_SHIFT 24
#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
#define SVM_INTERRUPT_SHADOW_MASK 1 #define SVM_INTERRUPT_SHADOW_MASK 1
#define SVM_IOIO_STR_SHIFT 2 #define SVM_IOIO_STR_SHIFT 2
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
* the COPYING file in the top-level directory. * the COPYING file in the top-level directory.
* *
*/ */
#define pr_fmt(fmt) "SVM: " fmt
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include "irq.h" #include "irq.h"
...@@ -78,6 +81,14 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); ...@@ -78,6 +81,14 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
#define TSC_RATIO_MIN 0x0000000000000001ULL #define TSC_RATIO_MIN 0x0000000000000001ULL
#define TSC_RATIO_MAX 0x000000ffffffffffULL #define TSC_RATIO_MAX 0x000000ffffffffffULL
#define AVIC_HPA_MASK ~((0xFFFULL << 52) || 0xFFF)
/*
* 0xff is broadcast, so the max index allowed for physical APIC ID
* table is 0xfe. APIC IDs above 0xff are reserved.
*/
#define AVIC_MAX_PHYSICAL_ID_COUNT 255
static bool erratum_383_found __read_mostly; static bool erratum_383_found __read_mostly;
static const u32 host_save_user_msrs[] = { static const u32 host_save_user_msrs[] = {
...@@ -162,8 +173,19 @@ struct vcpu_svm { ...@@ -162,8 +173,19 @@ struct vcpu_svm {
/* cached guest cpuid flags for faster access */ /* cached guest cpuid flags for faster access */
bool nrips_enabled : 1; bool nrips_enabled : 1;
struct page *avic_backing_page;
u64 *avic_physical_id_cache;
}; };
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
static DEFINE_PER_CPU(u64, current_tsc_ratio); static DEFINE_PER_CPU(u64, current_tsc_ratio);
#define TSC_RATIO_DEFAULT 0x0100000000ULL #define TSC_RATIO_DEFAULT 0x0100000000ULL
...@@ -205,6 +227,10 @@ module_param(npt, int, S_IRUGO); ...@@ -205,6 +227,10 @@ module_param(npt, int, S_IRUGO);
static int nested = true; static int nested = true;
module_param(nested, int, S_IRUGO); module_param(nested, int, S_IRUGO);
/* enable / disable AVIC */
static int avic;
module_param(avic, int, S_IRUGO);
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_flush_tlb(struct kvm_vcpu *vcpu);
static void svm_complete_interrupts(struct vcpu_svm *svm); static void svm_complete_interrupts(struct vcpu_svm *svm);
...@@ -228,12 +254,18 @@ enum { ...@@ -228,12 +254,18 @@ enum {
VMCB_SEG, /* CS, DS, SS, ES, CPL */ VMCB_SEG, /* CS, DS, SS, ES, CPL */
VMCB_CR2, /* CR2 only */ VMCB_CR2, /* CR2 only */
VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
* AVIC PHYSICAL_TABLE pointer,
* AVIC LOGICAL_TABLE pointer
*/
VMCB_DIRTY_MAX, VMCB_DIRTY_MAX,
}; };
/* TPR and CR2 are always written before VMRUN */ /* TPR and CR2 are always written before VMRUN */
#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) #define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
static inline void mark_all_dirty(struct vmcb *vmcb) static inline void mark_all_dirty(struct vmcb *vmcb)
{ {
vmcb->control.clean = 0; vmcb->control.clean = 0;
...@@ -255,6 +287,12 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) ...@@ -255,6 +287,12 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_svm, vcpu); return container_of(vcpu, struct vcpu_svm, vcpu);
} }
static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
{
svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
mark_dirty(svm->vmcb, VMCB_AVIC);
}
static void recalc_intercepts(struct vcpu_svm *svm) static void recalc_intercepts(struct vcpu_svm *svm)
{ {
struct vmcb_control_area *c, *h; struct vmcb_control_area *c, *h;
...@@ -923,6 +961,12 @@ static __init int svm_hardware_setup(void) ...@@ -923,6 +961,12 @@ static __init int svm_hardware_setup(void)
} else } else
kvm_disable_tdp(); kvm_disable_tdp();
if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)))
avic = false;
if (avic)
pr_info("AVIC enabled\n");
return 0; return 0;
err: err:
...@@ -1000,6 +1044,22 @@ static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) ...@@ -1000,6 +1044,22 @@ static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS); mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
} }
static void avic_init_vmcb(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb;
struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page);
phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page);
vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
svm->vcpu.arch.apicv_active = true;
}
static void init_vmcb(struct vcpu_svm *svm) static void init_vmcb(struct vcpu_svm *svm)
{ {
struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_control_area *control = &svm->vmcb->control;
...@@ -1110,9 +1170,131 @@ static void init_vmcb(struct vcpu_svm *svm) ...@@ -1110,9 +1170,131 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_PAUSE); set_intercept(svm, INTERCEPT_PAUSE);
} }
if (avic)
avic_init_vmcb(svm);
mark_all_dirty(svm->vmcb); mark_all_dirty(svm->vmcb);
enable_gif(svm); enable_gif(svm);
}
static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index)
{
u64 *avic_physical_id_table;
struct kvm_arch *vm_data = &vcpu->kvm->arch;
if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
return NULL;
avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page);
return &avic_physical_id_table[index];
}
/**
* Note:
* AVIC hardware walks the nested page table to check permissions,
* but does not use the SPA address specified in the leaf page
* table entry since it uses address in the AVIC_BACKING_PAGE pointer
* field of the VMCB. Therefore, we set up the
* APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
*/
static int avic_init_access_page(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
int ret;
if (kvm->arch.apic_access_page_done)
return 0;
ret = x86_set_memory_region(kvm,
APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
APIC_DEFAULT_PHYS_BASE,
PAGE_SIZE);
if (ret)
return ret;
kvm->arch.apic_access_page_done = true;
return 0;
}
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
{
int ret;
u64 *entry, new_entry;
int id = vcpu->vcpu_id;
struct vcpu_svm *svm = to_svm(vcpu);
ret = avic_init_access_page(vcpu);
if (ret)
return ret;
if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
return -EINVAL;
if (!svm->vcpu.arch.apic->regs)
return -EINVAL;
svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
/* Setting AVIC backing page address in the phy APIC ID table */
entry = avic_get_physical_id_entry(vcpu, id);
if (!entry)
return -EINVAL;
new_entry = READ_ONCE(*entry);
new_entry = (page_to_phys(svm->avic_backing_page) &
AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
AVIC_PHYSICAL_ID_ENTRY_VALID_MASK;
WRITE_ONCE(*entry, new_entry);
svm->avic_physical_id_cache = entry;
return 0;
}
static void avic_vm_destroy(struct kvm *kvm)
{
struct kvm_arch *vm_data = &kvm->arch;
if (vm_data->avic_logical_id_table_page)
__free_page(vm_data->avic_logical_id_table_page);
if (vm_data->avic_physical_id_table_page)
__free_page(vm_data->avic_physical_id_table_page);
}
static int avic_vm_init(struct kvm *kvm)
{
int err = -ENOMEM;
struct kvm_arch *vm_data = &kvm->arch;
struct page *p_page;
struct page *l_page;
if (!avic)
return 0;
/* Allocating physical APIC ID table (4KB) */
p_page = alloc_page(GFP_KERNEL);
if (!p_page)
goto free_avic;
vm_data->avic_physical_id_table_page = p_page;
clear_page(page_address(p_page));
/* Allocating logical APIC ID table (4KB) */
l_page = alloc_page(GFP_KERNEL);
if (!l_page)
goto free_avic;
vm_data->avic_logical_id_table_page = l_page;
clear_page(page_address(l_page));
return 0;
free_avic:
avic_vm_destroy(kvm);
return err;
} }
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
...@@ -1131,6 +1313,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) ...@@ -1131,6 +1313,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
kvm_register_write(vcpu, VCPU_REGS_RDX, eax); kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
if (kvm_vcpu_apicv_active(vcpu) && !init_event)
avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
} }
static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
...@@ -1169,6 +1354,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) ...@@ -1169,6 +1354,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
if (!hsave_page) if (!hsave_page)
goto free_page3; goto free_page3;
if (avic) {
err = avic_init_backing_page(&svm->vcpu);
if (err)
goto free_page4;
}
svm->nested.hsave = page_address(hsave_page); svm->nested.hsave = page_address(hsave_page);
svm->msrpm = page_address(msrpm_pages); svm->msrpm = page_address(msrpm_pages);
...@@ -1187,6 +1378,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) ...@@ -1187,6 +1378,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
return &svm->vcpu; return &svm->vcpu;
free_page4:
__free_page(hsave_page);
free_page3: free_page3:
__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
free_page2: free_page2:
...@@ -3212,6 +3405,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) ...@@ -3212,6 +3405,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE: case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break; break;
case MSR_IA32_APICBASE:
if (kvm_vcpu_apicv_active(vcpu))
avic_update_vapic_bar(to_svm(vcpu), data);
/* Follow through */
default: default:
return kvm_set_msr_common(vcpu, msr); return kvm_set_msr_common(vcpu, msr);
} }
...@@ -3375,10 +3572,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) ...@@ -3375,10 +3572,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
pr_err("%-20s%08x\n", "event_inj:", control->event_inj); pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
pr_err("VMCB State Save Area:\n"); pr_err("VMCB State Save Area:\n");
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"es:", "es:",
...@@ -3606,11 +3807,28 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) ...@@ -3606,11 +3807,28 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
static bool svm_get_enable_apicv(void) static bool svm_get_enable_apicv(void)
{ {
return false; return avic;
}
static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
{
}
static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
{
} }
/* Note: Currently only used by Hyper-V. */
static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
if (!avic)
return;
vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
mark_dirty(vmcb, VMCB_INTR);
} }
static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
...@@ -4322,6 +4540,9 @@ static struct kvm_x86_ops svm_x86_ops = { ...@@ -4322,6 +4540,9 @@ static struct kvm_x86_ops svm_x86_ops = {
.vcpu_free = svm_free_vcpu, .vcpu_free = svm_free_vcpu,
.vcpu_reset = svm_vcpu_reset, .vcpu_reset = svm_vcpu_reset,
.vm_init = avic_vm_init,
.vm_destroy = avic_vm_destroy,
.prepare_guest_switch = svm_prepare_guest_switch, .prepare_guest_switch = svm_prepare_guest_switch,
.vcpu_load = svm_vcpu_load, .vcpu_load = svm_vcpu_load,
.vcpu_put = svm_vcpu_put, .vcpu_put = svm_vcpu_put,
...@@ -4382,6 +4603,8 @@ static struct kvm_x86_ops svm_x86_ops = { ...@@ -4382,6 +4603,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
.load_eoi_exitmap = svm_load_eoi_exitmap, .load_eoi_exitmap = svm_load_eoi_exitmap,
.sync_pir_to_irr = svm_sync_pir_to_irr, .sync_pir_to_irr = svm_sync_pir_to_irr,
.hwapic_irr_update = svm_hwapic_irr_update,
.hwapic_isr_update = svm_hwapic_isr_update,
.set_tss_addr = svm_set_tss_addr, .set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level, .get_tdp_level = get_npt_level,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册