提交 df14a68d 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:

 - Fix FPU refactoring ("kvm: x86: fix load xsave feature warning")

 - Fix eager FPU mode (Cc stable)

 - AMD bits of MTRR virtualization

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: x86: fix load xsave feature warning
  KVM: x86: apply guest MTRR virtualization on host reserved pages
  KVM: SVM: Sync g_pat with guest-written PAT value
  KVM: SVM: use NPT page attributes
  KVM: count number of assigned devices
  KVM: VMX: fix vmwrite to invalid VMCS
  KVM: x86: reintroduce kvm_is_mmio_pfn
  x86: hyperv: add CPUID bit for crash handlers
......@@ -604,6 +604,8 @@ struct kvm_arch {
bool iommu_noncoherent;
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
atomic_t noncoherent_dma_count;
#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
atomic_t assigned_device_count;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
......
......@@ -108,6 +108,8 @@
#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4)
/* Support for a virtual guest idle state is available */
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5)
/* Guest crash data handler available */
#define HV_X64_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
/*
* Implementation recommendations. Indicates which behaviors the hypervisor
......
......@@ -98,6 +98,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
if (vcpu->arch.eager_fpu)
kvm_x86_ops->fpu_activate(vcpu);
/*
* The existing code assumes virtual address is 48-bit in the canonical
......
......@@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
goto out_unmap;
}
kvm_arch_start_assignment(kvm);
pci_set_dev_assigned(pdev);
dev_info(&pdev->dev, "kvm assign device\n");
......@@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
iommu_detach_device(domain, &pdev->dev);
pci_clear_dev_assigned(pdev);
kvm_arch_end_assignment(kvm);
dev_info(&pdev->dev, "kvm deassign device\n");
......
......@@ -2479,6 +2479,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
return 0;
}
static bool kvm_is_mmio_pfn(pfn_t pfn)
{
if (pfn_valid(pfn))
return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
return true;
}
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int level,
gfn_t gfn, pfn_t pfn, bool speculative,
......@@ -2506,7 +2514,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= PT_PAGE_SIZE_MASK;
if (tdp_enabled)
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
kvm_is_reserved_pfn(pfn));
kvm_is_mmio_pfn(pfn));
if (host_writable)
spte |= SPTE_HOST_WRITEABLE;
......
......@@ -865,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
#define MTRR_TYPE_UC_MINUS 7
#define MTRR2PROTVAL_INVALID 0xff
static u8 mtrr2protval[8];
static u8 fallback_mtrr_type(int mtrr)
{
/*
* WT and WP aren't always available in the host PAT. Treat
* them as UC and UC- respectively. Everything else should be
* there.
*/
switch (mtrr)
{
case MTRR_TYPE_WRTHROUGH:
return MTRR_TYPE_UNCACHABLE;
case MTRR_TYPE_WRPROT:
return MTRR_TYPE_UC_MINUS;
default:
BUG();
}
}
static void build_mtrr2protval(void)
{
int i;
u64 pat;
for (i = 0; i < 8; i++)
mtrr2protval[i] = MTRR2PROTVAL_INVALID;
/* Ignore the invalid MTRR types. */
mtrr2protval[2] = 0;
mtrr2protval[3] = 0;
/*
* Use host PAT value to figure out the mapping from guest MTRR
* values to nested page table PAT/PCD/PWT values. We do not
* want to change the host PAT value every time we enter the
* guest.
*/
rdmsrl(MSR_IA32_CR_PAT, pat);
for (i = 0; i < 8; i++) {
u8 mtrr = pat >> (8 * i);
if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
mtrr2protval[mtrr] = __cm_idx2pte(i);
}
for (i = 0; i < 8; i++) {
if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
u8 fallback = fallback_mtrr_type(i);
mtrr2protval[i] = mtrr2protval[fallback];
BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
}
}
}
static __init int svm_hardware_setup(void)
{
int cpu;
......@@ -931,6 +989,7 @@ static __init int svm_hardware_setup(void)
} else
kvm_disable_tdp();
build_mtrr2protval();
return 0;
err:
......@@ -1085,6 +1144,39 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
return target_tsc - tsc;
}
static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
/* Unlike Intel, AMD takes the guest's CR0.CD into account.
*
* AMD does not have IPAT. To emulate it for the case of guests
* with no assigned devices, just set everything to WB. If guests
* have assigned devices, however, we cannot force WB for RAM
* pages only, so use the guest PAT directly.
*/
if (!kvm_arch_has_assigned_device(vcpu->kvm))
*g_pat = 0x0606060606060606;
else
*g_pat = vcpu->arch.pat;
}
static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
u8 mtrr;
/*
* 1. MMIO: trust guest MTRR, so same as item 3.
* 2. No passthrough: always map as WB, and force guest PAT to WB as well
* 3. Passthrough: can't guarantee the result, try to trust guest.
*/
if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
return 0;
mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
return mtrr2protval[mtrr];
}
static void init_vmcb(struct vcpu_svm *svm, bool init_event)
{
struct vmcb_control_area *control = &svm->vmcb->control;
......@@ -1180,6 +1272,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
save->g_pat = svm->vcpu.arch.pat;
svm_set_guest_pat(svm, &save->g_pat);
save->cr3 = 0;
save->cr4 = 0;
}
......@@ -3254,6 +3347,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
case MSR_IA32_CR_PAT:
if (npt_enabled) {
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
return 1;
vcpu->arch.pat = data;
svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
mark_dirty(svm->vmcb, VMCB_NPT);
break;
}
/* fall through */
default:
return kvm_set_msr_common(vcpu, msr);
}
......@@ -4088,11 +4191,6 @@ static bool svm_has_high_real_mode_segbase(void)
return true;
}
static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
return 0;
}
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
}
......
......@@ -8632,22 +8632,17 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
u64 ipat = 0;
/* For VT-d and EPT combination
* 1. MMIO: always map as UC
* 1. MMIO: guest may want to apply WC, trust it.
* 2. EPT with VT-d:
* a. VT-d without snooping control feature: can't guarantee the
* result, try to trust guest.
* result, try to trust guest. So the same as item 1.
* b. VT-d with snooping control feature: snooping control feature of
* VT-d engine can guarantee the cache correctness. Just set it
* to WB to keep consistent with host. So the same as item 3.
* 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
* consistent with host MTRR
*/
if (is_mmio) {
cache = MTRR_TYPE_UNCACHABLE;
goto exit;
}
if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
ipat = VMX_EPT_IPAT_BIT;
cache = MTRR_TYPE_WRBACK;
goto exit;
......
......@@ -3157,8 +3157,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
cpuid_count(XSTATE_CPUID, index,
&size, &offset, &ecx, &edx);
memcpy(dest, src + offset, size);
} else
WARN_ON_ONCE(1);
}
valid -= feature;
}
......@@ -7315,11 +7314,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu = kvm_x86_ops->vcpu_create(kvm, id);
/*
* Activate fpu unconditionally in case the guest needs eager FPU. It will be
* deactivated soon if it doesn't.
*/
kvm_x86_ops->fpu_activate(vcpu);
return vcpu;
}
......@@ -8218,6 +8212,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
kvm_x86_ops->interrupt_allowed(vcpu);
}
void kvm_arch_start_assignment(struct kvm *kvm)
{
atomic_inc(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
void kvm_arch_end_assignment(struct kvm *kvm)
{
atomic_dec(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
bool kvm_arch_has_assigned_device(struct kvm *kvm)
{
return atomic_read(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
{
atomic_inc(&kvm->arch.noncoherent_dma_count);
......
......@@ -734,6 +734,24 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
return false;
}
#endif
#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE
void kvm_arch_start_assignment(struct kvm *kvm);
void kvm_arch_end_assignment(struct kvm *kvm);
bool kvm_arch_has_assigned_device(struct kvm *kvm);
#else
static inline void kvm_arch_start_assignment(struct kvm *kvm)
{
}
static inline void kvm_arch_end_assignment(struct kvm *kvm)
{
}
static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
{
return false;
}
#endif
static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
{
......
......@@ -155,6 +155,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
list_add_tail(&kvg->node, &kv->group_list);
kvg->vfio_group = vfio_group;
kvm_arch_start_assignment(dev->kvm);
mutex_unlock(&kv->lock);
kvm_vfio_update_coherency(dev);
......@@ -190,6 +192,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
break;
}
kvm_arch_end_assignment(dev->kvm);
mutex_unlock(&kv->lock);
kvm_vfio_group_put_external_user(vfio_group);
......@@ -239,6 +243,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
kvm_vfio_group_put_external_user(kvg->vfio_group);
list_del(&kvg->node);
kfree(kvg);
kvm_arch_end_assignment(dev->kvm);
}
kvm_vfio_update_coherency(dev);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册