diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0d3f5cf3ff3eae84e05d723bd76fdc44492d83dc..f6a911b4db31dd79dcf7d28b7d45814e8a49a8cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1141,6 +1141,8 @@ struct kvm_x86_ops { int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*get_msr_feature)(struct kvm_msr_entry *entry); + + bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 88940261fb5379841c7e516f2c78aa912baddb57..691f8994504755b94ee5ec13f1a6097b137c9ca3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5279,10 +5279,12 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * This can happen if a guest gets a page-fault on data access but the HW * table walker is not able to read the instruction page (e.g instruction * page is not present in memory). In those cases we simply restart the - * guest. + * guest, with the exception of AMD Erratum 1096 which is unrecoverable. */ - if (unlikely(insn && !insn_len)) - return 1; + if (unlikely(insn && !insn_len)) { + if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu)) + return 1; + } er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 50dcb755608939f40dc92dba10fb152c8b4a78a3..3ef1bad27728ca9f00057fa4217bea8cef7b820e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7103,6 +7103,36 @@ static int svm_unregister_enc_region(struct kvm *kvm, return ret; } +static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + bool is_user, smap; + + is_user = svm_get_cpl(vcpu) == 3; + smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); + + /* + * Detect and workaround Errata 1096 Fam_17h_00_0Fh + * + * In non SEV guest, hypervisor will be able to read the guest + * memory to decode the instruction pointer when insn_len is zero + * so we return true to indicate that decoding is possible. + * + * But in the SEV guest, the guest memory is encrypted with the + * guest specific key and hypervisor will not be able to decode the + * instruction pointer so we will not able to workaround it. Lets + * print the error and request to kill the guest. + */ + if (is_user && smap) { + if (!sev_guest(vcpu->kvm)) + return true; + + pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n"); + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + } + + return false; +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -7233,6 +7263,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .mem_enc_op = svm_mem_enc_op, .mem_enc_reg_region = svm_register_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region, + + .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6f7b3acdab263b6ef26ac70d55e02c11d6bf95b3..17803afab1b9b2fbc3bcdb256f621e8f17f35cf1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7865,6 +7865,11 @@ static void vmx_enable_tdp(void) kvm_enable_tdp(); } +static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + return 0; +} + static __init int hardware_setup(void) { unsigned long host_bndcfgs; @@ -14466,6 +14471,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .pre_enter_smm = vmx_pre_enter_smm, .pre_leave_smm = vmx_pre_leave_smm, .enable_smi_window = enable_smi_window, + .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, }; static void vmx_cleanup_l1d_flush(void)