diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6cbf97a2ebc4c07167d0005fe5ec85d2234e03b9..89d1fdb39c46d66acb88cdabb4b6795a24cd636f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -647,6 +647,7 @@ struct kvm_vcpu_arch { u64 smi_count; bool tpr_access_reporting; bool xsaves_enabled; + bool xfd_no_write_intercept; u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b8b7f5c7b3df54cc83ed527618e58bbecd1d3f2b..15e30602782b3cd388a8c84e1f721a9791883586 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -162,6 +162,7 @@ static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = { MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE, + MSR_IA32_XFD, MSR_IA32_XFD_ERR, #endif MSR_IA32_SYSENTER_CS, @@ -764,10 +765,11 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu) } /* - * Trap #NM if guest xfd contains a non-zero value so guest XFD_ERR - * can be saved timely. + * Disabling xfd interception indicates that dynamic xfeatures + * might be used in the guest. Always trap #NM in this case + * to save guest xfd_err timely. */ - if (vcpu->arch.guest_fpu.fpstate->xfd) + if (vcpu->arch.xfd_no_write_intercept) eb |= (1u << NM_VECTOR); vmcs_write32(EXCEPTION_BITMAP, eb); @@ -1978,9 +1980,21 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_XFD: ret = kvm_set_msr_common(vcpu, msr_info); - /* Update #NM interception according to guest xfd */ - if (!ret) + /* + * Always intercepting WRMSR could incur non-negligible + * overhead given xfd might be changed frequently in + * guest context switch. Disable write interception + * upon the first write with a non-zero value (indicating + * potential usage on dynamic xfeatures). Also update + * exception bitmap to trap #NM for proper virtualization + * of guest xfd_err. + */ + if (!ret && data) { + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD, + MSR_TYPE_RW); + vcpu->arch.xfd_no_write_intercept = true; vmx_update_exception_bitmap(vcpu); + } break; #endif case MSR_IA32_SYSENTER_CS: diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 69dd2f85abdc3b51ad5caa9a5621e93663f43a8f..f8fc7441baea93da21762f92b93e45e45849cf81 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -349,7 +349,7 @@ struct vcpu_vmx { struct lbr_desc lbr_desc; /* Save desired MSR intercept (read: pass-through) state */ -#define MAX_POSSIBLE_PASSTHROUGH_MSRS 14 +#define MAX_POSSIBLE_PASSTHROUGH_MSRS 15 struct { DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bde18ca657db6318b1dadb5e1bc6b2399d627120..60da2331ec32c0dfcb9ad5e2a379003c870c39dc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10083,6 +10083,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); + /* + * Sync xfd before calling handle_exit_irqoff() which may + * rely on the fact that guest_fpu::xfd is up-to-date (e.g. + * in #NM irqoff handler). + */ + if (vcpu->arch.xfd_no_write_intercept) + fpu_sync_guest_vmexit_xfd_state(); + static_call(kvm_x86_handle_exit_irqoff)(vcpu); if (vcpu->arch.guest_fpu.xfd_err)