提交 4217fdde 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
 "ARM:
   - a fix for a build failure introduced in -rc1 when tracepoints are
     enabled on 32-bit ARM.

   - disable use of stack pointer protection in the hyp code which can
     cause panics.

   - a handful of VGIC fixes.

   - a fix to the init of the redistributors on GICv3 systems that
     prevented boot with kvmtool on GICv3 systems introduced in -rc1.

   - a number of race conditions fixed in our MMU handling code.

   - a fix for the guest being able to program the debug extensions for
     the host on the 32-bit side.

  PPC:
   - fixes for build failures with PR KVM configurations.

   - a fix for a host crash that can occur on POWER9 with radix guests.

  x86:
   - fixes for nested PML and nested EPT.

   - a fix for crashes caused by reserved bits in SSE MXCSR that could
     have been set by userspace.

   - an optimization of halt polling that fixes high CPU overhead.

   - fixes for four reports from Dan Carpenter's static checker.

   - a protection around code that shouldn't have been preemptible.

   - a fix for port IO emulation"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (27 commits)
  KVM: x86: prevent uninitialized variable warning in check_svme()
  KVM: x86/vPMU: fix undefined shift in intel_pmu_refresh()
  KVM: x86: zero base3 of unusable segments
  KVM: X86: Fix read out-of-bounds vulnerability in kvm pio emulation
  KVM: x86: Fix potential preemption when get the current kvmclock timestamp
  KVM: Silence underflow warning in avic_get_physical_id_entry()
  KVM: arm/arm64: Hold slots_lock when unregistering kvm io bus devices
  KVM: arm/arm64: Fix bug when registering redist iodevs
  KVM: x86: lower default for halt_poll_ns
  kvm: arm/arm64: Fix use after free of stage2 page table
  kvm: arm/arm64: Force reading uncached stage2 PGD
  KVM: nVMX: fix EPT permissions as reported in exit qualification
  KVM: VMX: Don't enable EPT A/D feature if EPT feature is disabled
  KVM: x86: Fix load damaged SSEx MXCSR register
  kvm: nVMX: off by one in vmx_write_pml_buffer()
  KVM: arm: rename pm_fake handler to trap_raz_wi
  KVM: arm: plug potential guest hardware debug leakage
  kvm: arm/arm64: Fix race in resetting stage2 PGD
  KVM: arm/arm64: vgic-v3: Use PREbits to infer the number of ICH_APxRn_EL2 registers
  KVM: arm/arm64: vgic-v3: Do not use Active+Pending state for a HW interrupt
  ...
......@@ -31,7 +31,8 @@ void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table);
int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
......
......@@ -32,6 +32,7 @@
#include <asm/vfp.h>
#include "../vfp/vfpinstr.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "coproc.h"
......@@ -111,12 +112,6 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
kvm_inject_undefined(vcpu);
return 1;
}
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
{
/*
......@@ -284,7 +279,7 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
* must always support PMCCNTR (the cycle counter): we just RAZ/WI for
* all PM registers, which doesn't crash the guest kernel at least.
*/
static bool pm_fake(struct kvm_vcpu *vcpu,
static bool trap_raz_wi(struct kvm_vcpu *vcpu,
const struct coproc_params *p,
const struct coproc_reg *r)
{
......@@ -294,19 +289,19 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
#define access_pmcr pm_fake
#define access_pmcntenset pm_fake
#define access_pmcntenclr pm_fake
#define access_pmovsr pm_fake
#define access_pmselr pm_fake
#define access_pmceid0 pm_fake
#define access_pmceid1 pm_fake
#define access_pmccntr pm_fake
#define access_pmxevtyper pm_fake
#define access_pmxevcntr pm_fake
#define access_pmuserenr pm_fake
#define access_pmintenset pm_fake
#define access_pmintenclr pm_fake
#define access_pmcr trap_raz_wi
#define access_pmcntenset trap_raz_wi
#define access_pmcntenclr trap_raz_wi
#define access_pmovsr trap_raz_wi
#define access_pmselr trap_raz_wi
#define access_pmceid0 trap_raz_wi
#define access_pmceid1 trap_raz_wi
#define access_pmccntr trap_raz_wi
#define access_pmxevtyper trap_raz_wi
#define access_pmxevcntr trap_raz_wi
#define access_pmuserenr trap_raz_wi
#define access_pmintenset trap_raz_wi
#define access_pmintenclr trap_raz_wi
/* Architected CP15 registers.
* CRn denotes the primary register number, but is copied to the CRm in the
......@@ -532,12 +527,7 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
return 1;
}
/**
* kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
static struct coproc_params decode_64bit_hsr(struct kvm_vcpu *vcpu)
{
struct coproc_params params;
......@@ -551,9 +541,38 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
params.CRm = 0;
return params;
}
/**
* kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct coproc_params params = decode_64bit_hsr(vcpu);
return emulate_cp15(vcpu, &params);
}
/**
* kvm_handle_cp14_64 -- handles a mrrc/mcrr trap on a guest CP14 access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct coproc_params params = decode_64bit_hsr(vcpu);
/* raz_wi cp14 */
trap_raz_wi(vcpu, &params, NULL);
/* handled */
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 1;
}
static void reset_coproc_regs(struct kvm_vcpu *vcpu,
const struct coproc_reg *table, size_t num)
{
......@@ -564,12 +583,7 @@ static void reset_coproc_regs(struct kvm_vcpu *vcpu,
table[i].reset(vcpu, &table[i]);
}
/**
* kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu)
{
struct coproc_params params;
......@@ -583,9 +597,37 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7;
params.Rt2 = 0;
return params;
}
/**
* kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct coproc_params params = decode_32bit_hsr(vcpu);
return emulate_cp15(vcpu, &params);
}
/**
* kvm_handle_cp14_32 -- handles a mrc/mcr trap on a guest CP14 access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct coproc_params params = decode_32bit_hsr(vcpu);
/* raz_wi cp14 */
trap_raz_wi(vcpu, &params, NULL);
/* handled */
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 1;
}
/******************************************************************************
* Userspace API
*****************************************************************************/
......
......@@ -95,9 +95,9 @@ static exit_handle_fn arm_exit_handlers[] = {
[HSR_EC_WFI] = kvm_handle_wfx,
[HSR_EC_CP15_32] = kvm_handle_cp15_32,
[HSR_EC_CP15_64] = kvm_handle_cp15_64,
[HSR_EC_CP14_MR] = kvm_handle_cp14_access,
[HSR_EC_CP14_MR] = kvm_handle_cp14_32,
[HSR_EC_CP14_LS] = kvm_handle_cp14_load_store,
[HSR_EC_CP14_64] = kvm_handle_cp14_access,
[HSR_EC_CP14_64] = kvm_handle_cp14_64,
[HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access,
[HSR_EC_CP10_ID] = kvm_handle_cp10_id,
[HSR_EC_HVC] = handle_hvc,
......
......@@ -2,6 +2,8 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
ccflags-y += -fno-stack-protector
KVM=../../../../virt/kvm
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
......
......@@ -48,7 +48,9 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host)
write_sysreg(HSTR_T(15), HSTR);
write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
val = read_sysreg(HDCR);
write_sysreg(val | HDCR_TPM | HDCR_TPMCR, HDCR);
val |= HDCR_TPM | HDCR_TPMCR; /* trap performance monitors */
val |= HDCR_TDRA | HDCR_TDOSA | HDCR_TDA; /* trap debug regs */
write_sysreg(val, HDCR);
}
static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
......
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_H
#if !defined(_TRACE_ARM_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_ARM_KVM_H
#include <linux/tracepoint.h>
......@@ -74,10 +74,10 @@ TRACE_EVENT(kvm_hvc,
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
#endif /* _TRACE_KVM_H */
#endif /* _TRACE_ARM_KVM_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH arch/arm/kvm
#define TRACE_INCLUDE_PATH .
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace
......
......@@ -2,6 +2,8 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
ccflags-y += -fno-stack-protector
KVM=../../../../virt/kvm
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
......
......@@ -67,7 +67,7 @@ config KVM_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
select SPAPR_TCE_IOMMU if IOMMU_SUPPORT
select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV)
---help---
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
......
......@@ -46,7 +46,7 @@ kvm-e500mc-objs := \
e500_emulate.o
kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
kvm-book3s_64-builtin-objs-$(CONFIG_SPAPR_TCE_IOMMU) := \
book3s_64_vio_hv.o
kvm-pr-y := \
......@@ -90,11 +90,11 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
kvm-book3s_64-module-objs := \
$(common-objs-y) \
book3s.o \
book3s_64_vio.o \
book3s_rtas.o \
$(kvm-book3s_64-objs-y)
......
......@@ -301,6 +301,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
/* For radix, we might be in virtual mode, so punt */
if (kvm_is_radix(vcpu->kvm))
return H_TOO_HARD;
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
......@@ -381,6 +385,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
bool prereg = false;
struct kvmppc_spapr_tce_iommu_table *stit;
/* For radix, we might be in virtual mode, so punt */
if (kvm_is_radix(vcpu->kvm))
return H_TOO_HARD;
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
......@@ -491,6 +499,10 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
long i, ret;
struct kvmppc_spapr_tce_iommu_table *stit;
/* For radix, we might be in virtual mode, so punt */
if (kvm_is_radix(vcpu->kvm))
return H_TOO_HARD;
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
......@@ -527,6 +539,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
return H_SUCCESS;
}
/* This can be called in either virtual mode or real mode */
long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba)
{
......
......@@ -207,7 +207,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_h_random(struct kvm_vcpu *vcpu)
{
if (powernv_get_random_real_mode(&vcpu->arch.gpr[4]))
int r;
/* Only need to do the expensive mfmsr() on radix */
if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
r = powernv_get_random_long(&vcpu->arch.gpr[4]);
else
r = powernv_get_random_real_mode(&vcpu->arch.gpr[4]);
if (r)
return H_SUCCESS;
return H_HARDWARE;
......
......@@ -50,7 +50,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
pteg_addr = get_pteg_addr(vcpu, pte_index);
mutex_lock(&vcpu->kvm->arch.hpt_mutex);
copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
ret = H_FUNCTION;
if (copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)))
goto done;
hpte = pteg;
ret = H_PTEG_FULL;
......@@ -71,7 +73,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6));
hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7));
pteg_addr += i * HPTE_SIZE;
copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
ret = H_FUNCTION;
if (copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE))
goto done;
kvmppc_set_gpr(vcpu, 4, pte_index | i);
ret = H_SUCCESS;
......@@ -93,7 +97,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
pteg = get_pteg_addr(vcpu, pte_index);
mutex_lock(&vcpu->kvm->arch.hpt_mutex);
copy_from_user(pte, (void __user *)pteg, sizeof(pte));
ret = H_FUNCTION;
if (copy_from_user(pte, (void __user *)pteg, sizeof(pte)))
goto done;
pte[0] = be64_to_cpu((__force __be64)pte[0]);
pte[1] = be64_to_cpu((__force __be64)pte[1]);
......@@ -103,7 +109,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
((flags & H_ANDCOND) && (pte[0] & avpn) != 0))
goto done;
copy_to_user((void __user *)pteg, &v, sizeof(v));
ret = H_FUNCTION;
if (copy_to_user((void __user *)pteg, &v, sizeof(v)))
goto done;
rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
......@@ -171,7 +179,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
}
pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
copy_from_user(pte, (void __user *)pteg, sizeof(pte));
if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) {
ret = H_FUNCTION;
break;
}
pte[0] = be64_to_cpu((__force __be64)pte[0]);
pte[1] = be64_to_cpu((__force __be64)pte[1]);
......@@ -184,7 +195,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
tsh |= H_BULK_REMOVE_NOT_FOUND;
} else {
/* Splat the pteg in (userland) hpt */
copy_to_user((void __user *)pteg, &v, sizeof(v));
if (copy_to_user((void __user *)pteg, &v, sizeof(v))) {
ret = H_FUNCTION;
break;
}
rb = compute_tlbie_rb(pte[0], pte[1],
tsh & H_BULK_REMOVE_PTEX);
......@@ -211,7 +225,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
pteg = get_pteg_addr(vcpu, pte_index);
mutex_lock(&vcpu->kvm->arch.hpt_mutex);
copy_from_user(pte, (void __user *)pteg, sizeof(pte));
ret = H_FUNCTION;
if (copy_from_user(pte, (void __user *)pteg, sizeof(pte)))
goto done;
pte[0] = be64_to_cpu((__force __be64)pte[0]);
pte[1] = be64_to_cpu((__force __be64)pte[1]);
......@@ -234,7 +250,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
pte[0] = (__force u64)cpu_to_be64(pte[0]);
pte[1] = (__force u64)cpu_to_be64(pte[1]);
copy_to_user((void __user *)pteg, pte, sizeof(pte));
ret = H_FUNCTION;
if (copy_to_user((void __user *)pteg, pte, sizeof(pte)))
goto done;
ret = H_SUCCESS;
done:
......@@ -244,36 +262,37 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
{
unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
unsigned long tce = kvmppc_get_gpr(vcpu, 6);
long rc;
rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
rc = kvmppc_h_logical_ci_load(vcpu);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
{
long rc;
rc = kvmppc_h_logical_ci_load(vcpu);
rc = kvmppc_h_logical_ci_store(vcpu);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
#ifdef CONFIG_SPAPR_TCE_IOMMU
static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
{
unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
unsigned long tce = kvmppc_get_gpr(vcpu, 6);
long rc;
rc = kvmppc_h_logical_ci_store(vcpu);
rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
......@@ -311,6 +330,23 @@ static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
#else /* CONFIG_SPAPR_TCE_IOMMU */
static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
{
return EMULATE_FAIL;
}
static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
{
return EMULATE_FAIL;
}
static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
{
return EMULATE_FAIL;
}
#endif /* CONFIG_SPAPR_TCE_IOMMU */
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{
long rc = kvmppc_xics_hcall(vcpu, cmd);
......
......@@ -1749,7 +1749,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_enable_cap(kvm, &cap);
break;
}
#ifdef CONFIG_PPC_BOOK3S_64
#ifdef CONFIG_SPAPR_TCE_IOMMU
case KVM_CREATE_SPAPR_TCE_64: {
struct kvm_create_spapr_tce_64 create_tce_64;
......@@ -1780,6 +1780,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out;
}
#endif
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_PPC_GET_SMMU_INFO: {
struct kvm_ppc_smmu_info info;
struct kvm *kvm = filp->private_data;
......
......@@ -43,7 +43,7 @@
#define KVM_PRIVATE_MEM_SLOTS 3
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
#define KVM_HALT_POLL_NS_DEFAULT 400000
#define KVM_HALT_POLL_NS_DEFAULT 200000
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
......
......@@ -90,6 +90,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
* Boot time FPU feature detection code:
*/
unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
EXPORT_SYMBOL_GPL(mxcsr_feature_mask);
static void __init fpu__init_system_mxcsr(void)
{
......
......@@ -4173,7 +4173,7 @@ static int check_dr_write(struct x86_emulate_ctxt *ctxt)
static int check_svme(struct x86_emulate_ctxt *ctxt)
{
u64 efer;
u64 efer = 0;
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
......
......@@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
pt_element_t pte;
pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn;
unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
u64 pt_access, pte_access;
unsigned index, accessed_dirty, pte_pkey;
unsigned nested_access;
gpa_t pte_gpa;
bool have_ad;
int offset;
u64 walk_nx_mask = 0;
const int write_fault = access & PFERR_WRITE_MASK;
const int user_fault = access & PFERR_USER_MASK;
const int fetch_fault = access & PFERR_FETCH_MASK;
......@@ -302,6 +304,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
have_ad = PT_HAVE_ACCESSED_DIRTY(mmu);
#if PTTYPE == 64
walk_nx_mask = 1ULL << PT64_NX_SHIFT;
if (walker->level == PT32E_ROOT_LEVEL) {
pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
trace_kvm_mmu_paging_element(pte, walker->level);
......@@ -313,8 +316,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
walker->max_level = walker->level;
ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
/*
* FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
* by the MOV to CR instruction are treated as reads and do not cause the
......@@ -322,14 +323,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
*/
nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
pt_access = pte_access = ACC_ALL;
pte_access = ~0;
++walker->level;
do {
gfn_t real_gfn;
unsigned long host_addr;
pt_access &= pte_access;
pt_access = pte_access;
--walker->level;
index = PT_INDEX(addr, walker->level);
......@@ -371,6 +372,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
trace_kvm_mmu_paging_element(pte, walker->level);
/*
* Inverting the NX it lets us AND it like other
* permission bits.
*/
pte_access = pt_access & (pte ^ walk_nx_mask);
if (unlikely(!FNAME(is_present_gpte)(pte)))
goto error;
......@@ -379,14 +386,16 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
goto error;
}
accessed_dirty &= pte;
pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
walker->ptes[walker->level - 1] = pte;
} while (!is_last_gpte(mmu, walker->level, pte));
pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access);
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
/* Convert to ACC_*_MASK flags for struct guest_walker. */
walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask);
errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
if (unlikely(errcode))
goto error;
......@@ -403,7 +412,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
walker->gfn = real_gpa >> PAGE_SHIFT;
if (!write_fault)
FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
else
/*
* On a write fault, fold the dirty bit into accessed_dirty.
......@@ -421,10 +430,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
goto retry_walk;
}
walker->pt_access = pt_access;
walker->pte_access = pte_access;
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
__func__, (u64)pte, pte_access, pt_access);
__func__, (u64)pte, walker->pte_access, walker->pt_access);
return 1;
error:
......@@ -452,7 +459,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
*/
if (!(errcode & PFERR_RSVD_MASK)) {
vcpu->arch.exit_qualification &= 0x187;
vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;
vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
}
#endif
walker->fault.address = addr;
......
......@@ -294,7 +294,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
((u64)1 << edx.split.bit_width_fixed) - 1;
}
pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
pmu->global_ctrl_mask = ~pmu->global_ctrl;
......
......@@ -1272,7 +1272,8 @@ static void init_vmcb(struct vcpu_svm *svm)
}
static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index)
static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
unsigned int index)
{
u64 *avic_physical_id_table;
struct kvm_arch *vm_data = &vcpu->kvm->arch;
......
......@@ -6504,7 +6504,7 @@ static __init int hardware_setup(void)
enable_ept_ad_bits = 0;
}
if (!cpu_has_vmx_ept_ad_bits())
if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
enable_ept_ad_bits = 0;
if (!cpu_has_vmx_unrestricted_guest())
......@@ -11213,7 +11213,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
if (!nested_cpu_has_pml(vmcs12))
return 0;
if (vmcs12->guest_pml_index > PML_ENTITY_NUM) {
if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
vmx->nested.pml_full = true;
return 1;
}
......
......@@ -1763,6 +1763,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
{
struct kvm_arch *ka = &kvm->arch;
struct pvclock_vcpu_time_info hv_clock;
u64 ret;
spin_lock(&ka->pvclock_gtod_sync_lock);
if (!ka->use_master_clock) {
......@@ -1774,10 +1775,17 @@ u64 get_kvmclock_ns(struct kvm *kvm)
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
spin_unlock(&ka->pvclock_gtod_sync_lock);
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu();
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
&hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul);
return __pvclock_read_cycles(&hv_clock, rdtsc());
ret = __pvclock_read_cycles(&hv_clock, rdtsc());
put_cpu();
return ret;
}
static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
......@@ -3288,11 +3296,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
}
}
#define XSAVE_MXCSR_OFFSET 24
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
u64 xstate_bv =
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
/*
......@@ -3300,11 +3311,13 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
* CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
* with old userspace.
*/
if (xstate_bv & ~kvm_supported_xcr0())
if (xstate_bv & ~kvm_supported_xcr0() ||
mxcsr & ~mxcsr_feature_mask)
return -EINVAL;
load_xsave(vcpu, (u8 *)guest_xsave->region);
} else {
if (xstate_bv & ~XFEATURE_MASK_FPSSE)
if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
mxcsr & ~mxcsr_feature_mask)
return -EINVAL;
memcpy(&vcpu->arch.guest_fpu.state.fxsave,
guest_xsave->region, sizeof(struct fxregs_state));
......@@ -4818,16 +4831,20 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
{
/* TODO: String I/O for in kernel device */
int r;
int r = 0, i;
if (vcpu->arch.pio.in)
r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
vcpu->arch.pio.size, pd);
else
r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
vcpu->arch.pio.port, vcpu->arch.pio.size,
pd);
for (i = 0; i < vcpu->arch.pio.count; i++) {
if (vcpu->arch.pio.in)
r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
vcpu->arch.pio.size, pd);
else
r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
vcpu->arch.pio.port, vcpu->arch.pio.size,
pd);
if (r)
break;
pd += vcpu->arch.pio.size;
}
return r;
}
......@@ -4865,6 +4882,8 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
if (vcpu->arch.pio.count)
goto data_avail;
memset(vcpu->arch.pio_data, 0, size * count);
ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
if (ret) {
data_avail:
......@@ -5048,6 +5067,8 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
if (var.unusable) {
memset(desc, 0, sizeof(*desc));
if (base3)
*base3 = 0;
return false;
}
......
......@@ -195,7 +195,10 @@ struct vgic_dist {
/* either a GICv2 CPU interface */
gpa_t vgic_cpu_base;
/* or a number of GICv3 redistributor regions */
gpa_t vgic_redist_base;
struct {
gpa_t vgic_redist_base;
gpa_t vgic_redist_free_offset;
};
};
/* distributor enabled */
......
......@@ -22,7 +22,7 @@
#include <asm/kvm_hyp.h>
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1)
#define vtr_to_nr_pre_bits(v) (((u32)(v) >> 26) + 1)
static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
{
......@@ -135,13 +135,13 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
if (used_lrs) {
int i;
u32 nr_pri_bits;
u32 nr_pre_bits;
cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
write_gicreg(0, ICH_HCR_EL2);
val = read_gicreg(ICH_VTR_EL2);
nr_pri_bits = vtr_to_nr_pri_bits(val);
nr_pre_bits = vtr_to_nr_pre_bits(val);
for (i = 0; i < used_lrs; i++) {
if (cpu_if->vgic_elrsr & (1 << i))
......@@ -152,7 +152,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
__gic_v3_set_lr(0, i);
}
switch (nr_pri_bits) {
switch (nr_pre_bits) {
case 7:
cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
......@@ -162,7 +162,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
}
switch (nr_pri_bits) {
switch (nr_pre_bits) {
case 7:
cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
......@@ -198,7 +198,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
u64 val;
u32 nr_pri_bits;
u32 nr_pre_bits;
int i;
/*
......@@ -217,12 +217,12 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
}
val = read_gicreg(ICH_VTR_EL2);
nr_pri_bits = vtr_to_nr_pri_bits(val);
nr_pre_bits = vtr_to_nr_pre_bits(val);
if (used_lrs) {
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
switch (nr_pri_bits) {
switch (nr_pre_bits) {
case 7:
write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
......@@ -232,7 +232,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
}
switch (nr_pri_bits) {
switch (nr_pre_bits) {
case 7:
write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
......
......@@ -295,6 +295,13 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
assert_spin_locked(&kvm->mmu_lock);
pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
/*
* Make sure the page table is still active, as another thread
* could have possibly freed the page table, while we released
* the lock.
*/
if (!READ_ONCE(kvm->arch.pgd))
break;
next = stage2_pgd_addr_end(addr, end);
if (!stage2_pgd_none(*pgd))
unmap_stage2_puds(kvm, pgd, addr, next);
......@@ -829,22 +836,22 @@ void stage2_unmap_vm(struct kvm *kvm)
* Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
* underlying level-2 and level-3 tables before freeing the actual level-1 table
* and setting the struct pointer to NULL.
*
* Note we don't need locking here as this is only called when the VM is
* destroyed, which can only be done once.
*/
void kvm_free_stage2_pgd(struct kvm *kvm)
{
if (kvm->arch.pgd == NULL)
return;
void *pgd = NULL;
spin_lock(&kvm->mmu_lock);
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
if (kvm->arch.pgd) {
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
pgd = READ_ONCE(kvm->arch.pgd);
kvm->arch.pgd = NULL;
}
spin_unlock(&kvm->mmu_lock);
/* Free the HW pgd, one page at a time */
free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
kvm->arch.pgd = NULL;
if (pgd)
free_pages_exact(pgd, S2_PGD_SIZE);
}
static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
......@@ -1170,11 +1177,13 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
* large. Otherwise, we may see kernel panics with
* CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
* CONFIG_LOCKDEP. Additionally, holding the lock too long
* will also starve other vCPUs.
* will also starve other vCPUs. We have to also make sure
* that the page tables are not freed while we released
* the lock.
*/
if (need_resched() || spin_needbreak(&kvm->mmu_lock))
cond_resched_lock(&kvm->mmu_lock);
cond_resched_lock(&kvm->mmu_lock);
if (!READ_ONCE(kvm->arch.pgd))
break;
next = stage2_pgd_addr_end(addr, end);
if (stage2_pgd_present(*pgd))
stage2_wp_puds(pgd, addr, next);
......
......@@ -242,8 +242,11 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
* If we are creating a VCPU with a GICv3 we must also register the
* KVM io device for the redistributor that belongs to this VCPU.
*/
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
mutex_lock(&vcpu->kvm->lock);
ret = vgic_register_redist_iodev(vcpu);
mutex_unlock(&vcpu->kvm->lock);
}
return ret;
}
......
......@@ -586,7 +586,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
if (!vgic_v3_check_base(kvm))
return -EINVAL;
rd_base = vgic->vgic_redist_base + kvm_vcpu_get_idx(vcpu) * SZ_64K * 2;
rd_base = vgic->vgic_redist_base + vgic->vgic_redist_free_offset;
sgi_base = rd_base + SZ_64K;
kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
......@@ -614,11 +614,15 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
SZ_64K, &sgi_dev->dev);
mutex_unlock(&kvm->slots_lock);
if (ret)
if (ret) {
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
&rd_dev->dev);
goto out;
}
vgic->vgic_redist_free_offset += 2 * SZ_64K;
out:
mutex_unlock(&kvm->slots_lock);
return ret;
}
......@@ -644,10 +648,12 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
if (ret) {
/* The current c failed, so we start with the previous one. */
mutex_lock(&kvm->slots_lock);
for (c--; c >= 0; c--) {
vcpu = kvm_get_vcpu(kvm, c);
vgic_unregister_redist_iodev(vcpu);
}
mutex_unlock(&kvm->slots_lock);
}
return ret;
......
......@@ -149,6 +149,13 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (irq->hw) {
val |= GICH_LR_HW;
val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
/*
* Never set pending+active on a HW interrupt, as the
* pending state is kept at the physical distributor
* level.
*/
if (irq->active && irq_is_pending(irq))
val &= ~GICH_LR_PENDING_BIT;
} else {
if (irq->config == VGIC_CONFIG_LEVEL)
val |= GICH_LR_EOI;
......
......@@ -127,6 +127,13 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (irq->hw) {
val |= ICH_LR_HW;
val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
/*
* Never set pending+active on a HW interrupt, as the
* pending state is kept at the physical distributor
* level.
*/
if (irq->active && irq_is_pending(irq))
val &= ~ICH_LR_PENDING_BIT;
} else {
if (irq->config == VGIC_CONFIG_LEVEL)
val |= ICH_LR_EOI;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册