提交 2fc67756 编写于 作者: L Linus Torvalds

Merge git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Marcelo Tosatti:
 "KVM bug fixes (ARM and x86)"

* git://git.kernel.org/pub/scm/virt/kvm/kvm:
  arm/arm64: KVM: Keep elrsr/aisr in sync with software model
  KVM: VMX: Set msr bitmap correctly if vcpu is in guest mode
  arm/arm64: KVM: fix missing unlock on error in kvm_vgic_create()
  kvm: x86: i8259: return initialized data on invalid-size read
  arm64: KVM: Fix outdated comment about VTCR_EL2.PS
  arm64: KVM: Do not use pgd_index to index stage-2 pgd
  arm64: KVM: Fix stage-2 PGD allocation to have per-page refcounting
  kvm: move advertising of KVM_CAP_IRQFD to common code
......@@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})
#define kvm_pgd_index(addr) pgd_index(addr)
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
#define kvm_pud_table_empty(kvm, pudp) (0)
#define KVM_PREALLOC_LEVEL 0
static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
return 0;
return kvm->arch.pgd;
}
static inline void kvm_free_hwpgd(struct kvm *kvm) { }
static inline void *kvm_get_hwpgd(struct kvm *kvm)
static inline unsigned int kvm_get_hwpgd_size(void)
{
return kvm->arch.pgd;
return PTRS_PER_S2_PGD * sizeof(pgd_t);
}
struct kvm;
......
......@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
pgd = pgdp + pgd_index(addr);
pgd = pgdp + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
if (!pgd_none(*pgd))
......@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t next;
pgd_t *pgd;
pgd = kvm->arch.pgd + pgd_index(addr);
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
stage2_flush_puds(kvm, pgd, addr, next);
......@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
/* Free the HW pgd, one page at a time */
static void kvm_free_hwpgd(void *hwpgd)
{
free_pages_exact(hwpgd, kvm_get_hwpgd_size());
}
/* Allocate the HW PGD, making sure that each page gets its own refcount */
static void *kvm_alloc_hwpgd(void)
{
unsigned int size = kvm_get_hwpgd_size();
return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
}
/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
......@@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
int ret;
pgd_t *pgd;
void *hwpgd;
if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
hwpgd = kvm_alloc_hwpgd();
if (!hwpgd)
return -ENOMEM;
/* When the kernel uses more levels of page tables than the
* guest, we allocate a fake PGD and pre-populate it to point
* to the next-level page table, which will be the real
* initial page table pointed to by the VTTBR.
*
* When KVM_PREALLOC_LEVEL==2, we allocate a single page for
* the PMD and the kernel will use folded pud.
* When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
* pages.
*/
if (KVM_PREALLOC_LEVEL > 0) {
int i;
/*
* Allocate fake pgd for the page table manipulation macros to
* work. This is not used by the hardware and we have no
......@@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
*/
pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
GFP_KERNEL | __GFP_ZERO);
if (!pgd) {
kvm_free_hwpgd(hwpgd);
return -ENOMEM;
}
/* Plug the HW PGD into the fake one. */
for (i = 0; i < PTRS_PER_S2_PGD; i++) {
if (KVM_PREALLOC_LEVEL == 1)
pgd_populate(NULL, pgd + i,
(pud_t *)hwpgd + i * PTRS_PER_PUD);
else if (KVM_PREALLOC_LEVEL == 2)
pud_populate(NULL, pud_offset(pgd, 0) + i,
(pmd_t *)hwpgd + i * PTRS_PER_PMD);
}
} else {
/*
* Allocate actual first-level Stage-2 page table used by the
* hardware for Stage-2 page table walks.
*/
pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
pgd = (pgd_t *)hwpgd;
}
if (!pgd)
return -ENOMEM;
ret = kvm_prealloc_hwpgd(kvm, pgd);
if (ret)
goto out_err;
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
out_err:
if (KVM_PREALLOC_LEVEL > 0)
kfree(pgd);
else
free_pages((unsigned long)pgd, S2_PGD_ORDER);
return ret;
}
/**
......@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
kvm_free_hwpgd(kvm);
kvm_free_hwpgd(kvm_get_hwpgd(kvm));
if (KVM_PREALLOC_LEVEL > 0)
kfree(kvm->arch.pgd);
else
free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
kvm->arch.pgd = NULL;
}
......@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pgd_t *pgd;
pud_t *pud;
pgd = kvm->arch.pgd + pgd_index(addr);
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
if (WARN_ON(pgd_none(*pgd))) {
if (!cache)
return NULL;
......@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
pgd_t *pgd;
phys_addr_t next;
pgd = kvm->arch.pgd + pgd_index(addr);
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
/*
* Release kvm_mmu_lock periodically if the memory region is
......
......@@ -129,6 +129,9 @@
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
* VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
* (see hyp-init.S).
*
* Note that when using 4K pages, we concatenate two first level page tables
* together.
*
......@@ -138,7 +141,6 @@
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
* 40bits output (PS = 2)
* 40bits input (T0SZ = 24)
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
......@@ -150,7 +152,6 @@
#else
/*
* Stage2 translation configuration:
* 40bits output (PS = 2)
* 40bits input (T0SZ = 24)
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
......
......@@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
/*
* If we are concatenating first level stage-2 page tables, we would have less
* than or equal to 16 pointers in the fake PGD, because that's what the
......@@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define KVM_PREALLOC_LEVEL (0)
#endif
/**
* kvm_prealloc_hwpgd - allocate inital table for VTTBR
* @kvm: The KVM struct pointer for the VM.
* @pgd: The kernel pseudo pgd
*
* When the kernel uses more levels of page tables than the guest, we allocate
* a fake PGD and pre-populate it to point to the next-level page table, which
* will be the real initial page table pointed to by the VTTBR.
*
* When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
* the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
* allocate 2 consecutive PUD pages.
*/
static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
{
unsigned int i;
unsigned long hwpgd;
if (KVM_PREALLOC_LEVEL == 0)
return 0;
hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
if (!hwpgd)
return -ENOMEM;
for (i = 0; i < PTRS_PER_S2_PGD; i++) {
if (KVM_PREALLOC_LEVEL == 1)
pgd_populate(NULL, pgd + i,
(pud_t *)hwpgd + i * PTRS_PER_PUD);
else if (KVM_PREALLOC_LEVEL == 2)
pud_populate(NULL, pud_offset(pgd, 0) + i,
(pmd_t *)hwpgd + i * PTRS_PER_PMD);
}
return 0;
}
static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
pgd_t *pgd = kvm->arch.pgd;
......@@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
return pmd_offset(pud, 0);
}
static inline void kvm_free_hwpgd(struct kvm *kvm)
static inline unsigned int kvm_get_hwpgd_size(void)
{
if (KVM_PREALLOC_LEVEL > 0) {
unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
}
if (KVM_PREALLOC_LEVEL > 0)
return PTRS_PER_S2_PGD * PAGE_SIZE;
return PTRS_PER_S2_PGD * sizeof(pgd_t);
}
static inline bool kvm_page_empty(void *ptr)
......
......@@ -165,7 +165,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_ENABLE_CAP_VM:
......
......@@ -507,6 +507,7 @@ static int picdev_read(struct kvm_pic *s,
return -EOPNOTSUPP;
if (len != 1) {
memset(val, 0, len);
pr_pic_unimpl("non byte read\n");
return 0;
}
......
......@@ -2168,7 +2168,10 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
{
unsigned long *msr_bitmap;
if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
if (is_guest_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_nested;
else if (irqchip_in_kernel(vcpu->kvm) &&
apic_x2apic_mode(vcpu->arch.apic)) {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
......@@ -9218,9 +9221,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
}
if (cpu_has_vmx_msr_bitmap() &&
exec_control & CPU_BASED_USE_MSR_BITMAPS &&
nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) {
vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested));
exec_control & CPU_BASED_USE_MSR_BITMAPS) {
nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
/* MSR_BITMAP will be set by following vmx_set_efer. */
} else
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
......
......@@ -2744,7 +2744,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_USER_NMI:
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_IOEVENTFD_NO_LENGTH:
case KVM_CAP_PIT2:
......
......@@ -114,6 +114,7 @@ struct vgic_ops {
void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
void (*clear_eisr)(struct kvm_vcpu *vcpu);
u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
void (*enable_underflow)(struct kvm_vcpu *vcpu);
void (*disable_underflow)(struct kvm_vcpu *vcpu);
......
......@@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
else
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
}
static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
......@@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
}
static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
{
vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
}
static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
{
u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
......@@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = {
.sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
.get_elrsr = vgic_v2_get_elrsr,
.get_eisr = vgic_v2_get_eisr,
.clear_eisr = vgic_v2_clear_eisr,
.get_interrupt_status = vgic_v2_get_interrupt_status,
.enable_underflow = vgic_v2_enable_underflow,
.disable_underflow = vgic_v2_disable_underflow,
......
......@@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
else
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
}
static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
......@@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
}
static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
{
vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
}
static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
{
u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
......@@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = {
.sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
.get_elrsr = vgic_v3_get_elrsr,
.get_eisr = vgic_v3_get_eisr,
.clear_eisr = vgic_v3_clear_eisr,
.get_interrupt_status = vgic_v3_get_interrupt_status,
.enable_underflow = vgic_v3_enable_underflow,
.disable_underflow = vgic_v3_disable_underflow,
......
......@@ -883,6 +883,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
return vgic_ops->get_eisr(vcpu);
}
static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
{
vgic_ops->clear_eisr(vcpu);
}
static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
{
return vgic_ops->get_interrupt_status(vcpu);
......@@ -922,6 +927,7 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
vgic_set_lr(vcpu, lr_nr, vlr);
clear_bit(lr_nr, vgic_cpu->lr_used);
vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
}
/*
......@@ -978,6 +984,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
vlr.state |= LR_STATE_PENDING;
vgic_set_lr(vcpu, lr, vlr);
vgic_sync_lr_elrsr(vcpu, lr, vlr);
return true;
}
}
......@@ -999,6 +1006,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
vlr.state |= LR_EOI_INT;
vgic_set_lr(vcpu, lr, vlr);
vgic_sync_lr_elrsr(vcpu, lr, vlr);
return true;
}
......@@ -1136,6 +1144,14 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
if (status & INT_STATUS_UNDERFLOW)
vgic_disable_underflow(vcpu);
/*
* In the next iterations of the vcpu loop, if we sync the vgic state
* after flushing it, but before entering the guest (this happens for
* pending signals and vmid rollovers), then make sure we don't pick
* up any old maintenance interrupts here.
*/
vgic_clear_eisr(vcpu);
return level_pending;
}
......@@ -1583,8 +1599,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
* emulation. So check this here again. KVM_CREATE_DEVICE does
* the proper checks already.
*/
if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2)
return -ENODEV;
if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
ret = -ENODEV;
goto out;
}
/*
* Any time a vcpu is run, vcpu_load is called which tries to grab the
......
......@@ -2492,6 +2492,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_SIGNAL_MSI:
#endif
#ifdef CONFIG_HAVE_KVM_IRQFD
case KVM_CAP_IRQFD:
case KVM_CAP_IRQFD_RESAMPLE:
#endif
case KVM_CAP_CHECK_EXTENSION_VM:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册