diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2a7f5d782c332d1965ac1c5a23a33289cfce7352..49ec9038ec14102a286c9b4bed126a6825613439 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -604,6 +604,8 @@ struct kvm_arch { bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA atomic_t noncoherent_dma_count; +#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE + atomic_t assigned_device_count; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 8fba544e9cc4164261f76c6c5d894b80a92f0377..f36d56bd76324543f6f0c208bea569ea96e6e802 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -108,6 +108,8 @@ #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4) /* Support for a virtual guest idle state is available */ #define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5) +/* Guest crash data handler available */ +#define HV_X64_GUEST_CRASH_MSR_AVAILABLE (1 << 10) /* * Implementation recommendations. Indicates which behaviors the hypervisor diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 64dd467930997adbfa6aecd25641ef2004c5488c..2fbea2544f2437bc0ae50ef00288dc320effd81e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -98,6 +98,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); + if (vcpu->arch.eager_fpu) + kvm_x86_ops->fpu_activate(vcpu); /* * The existing code assumes virtual address is 48-bit in the canonical diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c index 7dbced309ddb526d99ae41114bf3afd8488ccaa8..5c520ebf6343270272679e213b19cd9380b63293 100644 --- a/arch/x86/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c @@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev) goto out_unmap; } + kvm_arch_start_assignment(kvm); pci_set_dev_assigned(pdev); dev_info(&pdev->dev, "kvm assign device\n"); @@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev) iommu_detach_device(domain, &pdev->dev); pci_clear_dev_assigned(pdev); + kvm_arch_end_assignment(kvm); dev_info(&pdev->dev, "kvm deassign device\n"); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f807496b62c2cc76e82a60cd58ee187f0cdc77c2..44171462bd2a31561645aff21c83584b82eed6ff 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2479,6 +2479,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, return 0; } +static bool kvm_is_mmio_pfn(pfn_t pfn) +{ + if (pfn_valid(pfn)) + return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); + + return true; +} + static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int level, gfn_t gfn, pfn_t pfn, bool speculative, @@ -2506,7 +2514,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= PT_PAGE_SIZE_MASK; if (tdp_enabled) spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, - kvm_is_reserved_pfn(pfn)); + kvm_is_mmio_pfn(pfn)); if (host_writable) spte |= SPTE_HOST_WRITEABLE; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 602b974a60a626e18d11965ed5ad1461c329bee8..bbc678a66b18719287b091787db96cf1f9f98981 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -865,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } +#define MTRR_TYPE_UC_MINUS 7 +#define MTRR2PROTVAL_INVALID 0xff + +static u8 mtrr2protval[8]; + +static u8 fallback_mtrr_type(int mtrr) +{ + /* + * WT and WP aren't always available in the host PAT. Treat + * them as UC and UC- respectively. Everything else should be + * there. + */ + switch (mtrr) + { + case MTRR_TYPE_WRTHROUGH: + return MTRR_TYPE_UNCACHABLE; + case MTRR_TYPE_WRPROT: + return MTRR_TYPE_UC_MINUS; + default: + BUG(); + } +} + +static void build_mtrr2protval(void) +{ + int i; + u64 pat; + + for (i = 0; i < 8; i++) + mtrr2protval[i] = MTRR2PROTVAL_INVALID; + + /* Ignore the invalid MTRR types. */ + mtrr2protval[2] = 0; + mtrr2protval[3] = 0; + + /* + * Use host PAT value to figure out the mapping from guest MTRR + * values to nested page table PAT/PCD/PWT values. We do not + * want to change the host PAT value every time we enter the + * guest. + */ + rdmsrl(MSR_IA32_CR_PAT, pat); + for (i = 0; i < 8; i++) { + u8 mtrr = pat >> (8 * i); + + if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID) + mtrr2protval[mtrr] = __cm_idx2pte(i); + } + + for (i = 0; i < 8; i++) { + if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) { + u8 fallback = fallback_mtrr_type(i); + mtrr2protval[i] = mtrr2protval[fallback]; + BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID); + } + } +} + static __init int svm_hardware_setup(void) { int cpu; @@ -931,6 +989,7 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); + build_mtrr2protval(); return 0; err: @@ -1085,6 +1144,39 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } +static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + + /* Unlike Intel, AMD takes the guest's CR0.CD into account. + * + * AMD does not have IPAT. To emulate it for the case of guests + * with no assigned devices, just set everything to WB. If guests + * have assigned devices, however, we cannot force WB for RAM + * pages only, so use the guest PAT directly. + */ + if (!kvm_arch_has_assigned_device(vcpu->kvm)) + *g_pat = 0x0606060606060606; + else + *g_pat = vcpu->arch.pat; +} + +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + u8 mtrr; + + /* + * 1. MMIO: trust guest MTRR, so same as item 3. + * 2. No passthrough: always map as WB, and force guest PAT to WB as well + * 3. Passthrough: can't guarantee the result, try to trust guest. + */ + if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) + return 0; + + mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); + return mtrr2protval[mtrr]; +} + static void init_vmcb(struct vcpu_svm *svm, bool init_event) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1180,6 +1272,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; + svm_set_guest_pat(svm, &save->g_pat); save->cr3 = 0; save->cr4 = 0; } @@ -3254,6 +3347,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_IA32_CR_PAT: + if (npt_enabled) { + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + return 1; + vcpu->arch.pat = data; + svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); + mark_dirty(svm->vmcb, VMCB_NPT); + break; + } + /* fall through */ default: return kvm_set_msr_common(vcpu, msr); } @@ -4088,11 +4191,6 @@ static bool svm_has_high_real_mode_segbase(void) return true; } -static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - return 0; -} - static void svm_cpuid_update(struct kvm_vcpu *vcpu) { } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e856dd566f4c2a6c10a4a6dc9c6ea018fe72cdda..5b4e9384717a17257ea20ef47031dd5f158273a2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8632,22 +8632,17 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u64 ipat = 0; /* For VT-d and EPT combination - * 1. MMIO: always map as UC + * 1. MMIO: guest may want to apply WC, trust it. * 2. EPT with VT-d: * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. + * result, try to trust guest. So the same as item 1. * b. VT-d with snooping control feature: snooping control feature of * VT-d engine can guarantee the cache correctness. Just set it * to WB to keep consistent with host. So the same as item 3. * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep * consistent with host MTRR */ - if (is_mmio) { - cache = MTRR_TYPE_UNCACHABLE; - goto exit; - } - - if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { ipat = VMX_EPT_IPAT_BIT; cache = MTRR_TYPE_WRBACK; goto exit; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bbaf44e8f0d3cdd7100c40c98ccf2ab40ae1ea2f..5ef2560075bfb80e6fdabcdf51f71258091e4339 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3157,8 +3157,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); memcpy(dest, src + offset, size); - } else - WARN_ON_ONCE(1); + } valid -= feature; } @@ -7315,11 +7314,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu = kvm_x86_ops->vcpu_create(kvm, id); - /* - * Activate fpu unconditionally in case the guest needs eager FPU. It will be - * deactivated soon if it doesn't. - */ - kvm_x86_ops->fpu_activate(vcpu); return vcpu; } @@ -8218,6 +8212,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) kvm_x86_ops->interrupt_allowed(vcpu); } +void kvm_arch_start_assignment(struct kvm *kvm) +{ + atomic_inc(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); + +void kvm_arch_end_assignment(struct kvm *kvm) +{ + atomic_dec(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); + +bool kvm_arch_has_assigned_device(struct kvm *kvm) +{ + return atomic_read(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); + void kvm_arch_register_noncoherent_dma(struct kvm *kvm) { atomic_inc(&kvm->arch.noncoherent_dma_count); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9564fd78c547b6128ddf8304639e2215b6190e3f..05e99b8ef465bcc10ca979b26b68277191951d86 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -734,6 +734,24 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) return false; } #endif +#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE +void kvm_arch_start_assignment(struct kvm *kvm); +void kvm_arch_end_assignment(struct kvm *kvm); +bool kvm_arch_has_assigned_device(struct kvm *kvm); +#else +static inline void kvm_arch_start_assignment(struct kvm *kvm) +{ +} + +static inline void kvm_arch_end_assignment(struct kvm *kvm) +{ +} + +static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +{ + return false; +} +#endif static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 620e37f741b868a231a414a71511cd8872704a3c..1dd087da6f31ae2f38c70042213ee6e5159cee10 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -155,6 +155,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) list_add_tail(&kvg->node, &kv->group_list); kvg->vfio_group = vfio_group; + kvm_arch_start_assignment(dev->kvm); + mutex_unlock(&kv->lock); kvm_vfio_update_coherency(dev); @@ -190,6 +192,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) break; } + kvm_arch_end_assignment(dev->kvm); + mutex_unlock(&kv->lock); kvm_vfio_group_put_external_user(vfio_group); @@ -239,6 +243,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) kvm_vfio_group_put_external_user(kvg->vfio_group); list_del(&kvg->node); kfree(kvg); + kvm_arch_end_assignment(dev->kvm); } kvm_vfio_update_coherency(dev);