提交 8ea667f2 编写于 作者: A Avi Kivity

KVM: MMU: Push clean gpte write protection out of gpte_access()

gpte_access() computes the access permissions of a guest pte and also
write-protects clean gptes.  This is wrong when we are servicing a
write fault (since we'll be setting the dirty bit momentarily) but
correct when instantiating a speculative spte, or when servicing a
read fault (since we'll want to trap a following write in order to
set the dirty bit).

It doesn't seem to hurt in practice, but in order to make the code
readable, push the write protection out of gpte_access() and into
a new protect_clean_gpte() which is called explicitly when needed.
Reviewed-by: NXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: NAvi Kivity <avi@redhat.com>
上级 879238fe
...@@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) ...@@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
} }
static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
{
unsigned mask;
BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
mask = (unsigned)~ACC_WRITE_MASK;
/* Allow write access to dirty gptes */
mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK;
*access &= mask;
}
static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
int *nr_present) int *nr_present)
{ {
......
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
#define PT_PCD_MASK (1ULL << 4) #define PT_PCD_MASK (1ULL << 4)
#define PT_ACCESSED_SHIFT 5 #define PT_ACCESSED_SHIFT 5
#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
#define PT_DIRTY_MASK (1ULL << 6) #define PT_DIRTY_SHIFT 6
#define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
#define PT_PAGE_SIZE_MASK (1ULL << 7) #define PT_PAGE_SIZE_MASK (1ULL << 7)
#define PT_PAT_MASK (1ULL << 7) #define PT_PAT_MASK (1ULL << 7)
#define PT_GLOBAL_MASK (1ULL << 8) #define PT_GLOBAL_MASK (1ULL << 8)
......
...@@ -101,14 +101,11 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ...@@ -101,14 +101,11 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
return (ret != orig_pte); return (ret != orig_pte);
} }
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
bool last)
{ {
unsigned access; unsigned access;
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
if (last && !is_dirty_gpte(gpte))
access &= ~ACC_WRITE_MASK;
#if PTTYPE == 64 #if PTTYPE == 64
if (vcpu->arch.mmu.nx) if (vcpu->arch.mmu.nx)
...@@ -222,8 +219,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -222,8 +219,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
if (last_gpte) { if (last_gpte) {
pte_access = pt_access & pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
FNAME(gpte_access)(vcpu, pte, true);
/* check if the kernel is fetching from user page */ /* check if the kernel is fetching from user page */
if (unlikely(pte_access & PT_USER_MASK) && if (unlikely(pte_access & PT_USER_MASK) &&
kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
...@@ -274,7 +270,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -274,7 +270,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
break; break;
} }
pt_access &= FNAME(gpte_access)(vcpu, pte, false); pt_access &= FNAME(gpte_access)(vcpu, pte);
--walker->level; --walker->level;
} }
...@@ -283,7 +279,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -283,7 +279,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
goto error; goto error;
} }
if (write_fault && unlikely(!is_dirty_gpte(pte))) { if (!write_fault)
protect_clean_gpte(&pte_access, pte);
else if (unlikely(!is_dirty_gpte(pte))) {
int ret; int ret;
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
...@@ -368,7 +366,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, ...@@ -368,7 +366,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return; return;
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true); pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
protect_clean_gpte(&pte_access, gpte);
pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
if (mmu_invalid_pfn(pfn)) if (mmu_invalid_pfn(pfn))
return; return;
...@@ -441,8 +440,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, ...@@ -441,8 +440,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
continue; continue;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
true); protect_clean_gpte(&pte_access, gpte);
gfn = gpte_to_gfn(gpte); gfn = gpte_to_gfn(gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
pte_access & ACC_WRITE_MASK); pte_access & ACC_WRITE_MASK);
...@@ -794,7 +793,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ...@@ -794,7 +793,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn = gpte_to_gfn(gpte); gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access; pte_access = sp->role.access;
pte_access &= FNAME(gpte_access)(vcpu, gpte, true); pte_access &= FNAME(gpte_access)(vcpu, gpte);
protect_clean_gpte(&pte_access, gpte);
if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
continue; continue;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册