提交 12703759 编写于 作者: S Sean Christopherson 提交者: Paolo Bonzini

KVM: x86/mmu: Bail early from final #PF handling on spurious faults

Detect spurious page faults, e.g. page faults that occur when multiple
vCPUs simultaneously access a not-present page, and skip the SPTE write,
prefetch, and stats update for spurious faults.

Note, the performance benefits of skipping the write and prefetch are
likely negligible, and the false positive stats adjustment is probably
lost in the noise.  The primary motivation is to play nice with TDX's
SEPT in the long term.  SEAMCALLs (to program SEPT entries) are quite
costly, e.g. thousands of cycles, and a spurious SEPT update will result
in a SEAMCALL error (which KVM will ideally treat as fatal).
Reported-by: NKai Huang <kai.huang@intel.com>
Signed-off-by: NSean Christopherson <sean.j.christopherson@intel.com>
Message-Id: <20200923220425.18402-5-sean.j.christopherson@intel.com>
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
上级 c4371c2a
...@@ -2985,6 +2985,7 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) ...@@ -2985,6 +2985,7 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
/* Bits which may be returned by set_spte() */ /* Bits which may be returned by set_spte() */
#define SET_SPTE_WRITE_PROTECTED_PT BIT(0) #define SET_SPTE_WRITE_PROTECTED_PT BIT(0)
#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1) #define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
#define SET_SPTE_SPURIOUS BIT(2)
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned int pte_access, int level, unsigned int pte_access, int level,
...@@ -3073,7 +3074,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -3073,7 +3074,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte = mark_spte_for_access_track(spte); spte = mark_spte_for_access_track(spte);
set_pte: set_pte:
if (mmu_spte_update(sptep, spte)) if (*sptep == spte)
ret |= SET_SPTE_SPURIOUS;
else if (mmu_spte_update(sptep, spte))
ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH; ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
return ret; return ret;
} }
...@@ -3128,6 +3131,15 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -3128,6 +3131,15 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (unlikely(is_mmio_spte(*sptep))) if (unlikely(is_mmio_spte(*sptep)))
ret = RET_PF_EMULATE; ret = RET_PF_EMULATE;
/*
* The fault is fully spurious if and only if the new SPTE and old SPTE
* are identical, and emulation is not required.
*/
if ((set_spte_ret & SET_SPTE_SPURIOUS) && ret == RET_PF_FIXED) {
WARN_ON_ONCE(!was_rmapped);
return RET_PF_SPURIOUS;
}
pgprintk("%s: setting spte %llx\n", __func__, *sptep); pgprintk("%s: setting spte %llx\n", __func__, *sptep);
trace_kvm_mmu_set_spte(level, gfn, sptep); trace_kvm_mmu_set_spte(level, gfn, sptep);
if (!was_rmapped && is_large_pte(*sptep)) if (!was_rmapped && is_large_pte(*sptep))
...@@ -3364,6 +3376,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, ...@@ -3364,6 +3376,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL, ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
write, level, base_gfn, pfn, prefault, write, level, base_gfn, pfn, prefault,
map_writable); map_writable);
if (ret == RET_PF_SPURIOUS)
return ret;
direct_pte_prefetch(vcpu, it.sptep); direct_pte_prefetch(vcpu, it.sptep);
++vcpu->stat.pf_fixed; ++vcpu->stat.pf_fixed;
return ret; return ret;
......
...@@ -711,6 +711,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, ...@@ -711,6 +711,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
it.level, base_gfn, pfn, prefault, map_writable); it.level, base_gfn, pfn, prefault, map_writable);
if (ret == RET_PF_SPURIOUS)
return ret;
FNAME(pte_prefetch)(vcpu, gw, it.sptep); FNAME(pte_prefetch)(vcpu, gw, it.sptep);
++vcpu->stat.pf_fixed; ++vcpu->stat.pf_fixed;
return ret; return ret;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册