提交 ba6a3541 编写于 作者: P Paolo Bonzini 提交者: Gleb Natapov

KVM: mmu: allow page tables to be in read-only slots

Page tables in a read-only memory slot will currently cause a triple
fault because the page walker uses gfn_to_hva and it fails on such a slot.

OVMF uses such a page table; however, real hardware seems to be fine with
that as long as the accessed/dirty bits are set.  Save whether the slot
is readonly, and later check it when updating the accessed and dirty bits.
Reviewed-by: NXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: NGleb Natapov <gleb@redhat.com>
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
上级 3261107e
...@@ -99,6 +99,7 @@ struct guest_walker { ...@@ -99,6 +99,7 @@ struct guest_walker {
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
bool pte_writable[PT_MAX_FULL_LEVELS];
unsigned pt_access; unsigned pt_access;
unsigned pte_access; unsigned pte_access;
gfn_t gfn; gfn_t gfn;
...@@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, ...@@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
if (pte == orig_pte) if (pte == orig_pte)
continue; continue;
/*
* If the slot is read-only, simply do not process the accessed
* and dirty bits. This is the correct thing to do if the slot
* is ROM, and page tables in read-as-ROM/write-as-MMIO slots
* are only supported if the accessed and dirty bits are already
* set in the ROM (so that MMIO writes are never needed).
*
* Note that NPT does not allow this at all and faults, since
* it always wants nested page table entries for the guest
* page tables to be writable. And EPT works but will simply
* overwrite the read-only memory to set the accessed and dirty
* bits.
*/
if (unlikely(!walker->pte_writable[level - 1]))
continue;
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
if (ret) if (ret)
return ret; return ret;
...@@ -309,7 +326,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -309,7 +326,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
goto error; goto error;
real_gfn = gpa_to_gfn(real_gfn); real_gfn = gpa_to_gfn(real_gfn);
host_addr = gfn_to_hva(vcpu->kvm, real_gfn); host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
&walker->pte_writable[walker->level - 1]);
if (unlikely(kvm_is_error_hva(host_addr))) if (unlikely(kvm_is_error_hva(host_addr)))
goto error; goto error;
......
...@@ -533,6 +533,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, ...@@ -533,6 +533,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_release_page_clean(struct page *page); void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page); void kvm_release_page_dirty(struct page *page);
......
...@@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) ...@@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
EXPORT_SYMBOL_GPL(gfn_to_hva); EXPORT_SYMBOL_GPL(gfn_to_hva);
/* /*
* The hva returned by this function is only allowed to be read. * If writable is set to false, the hva returned by this function is only
* It should pair with kvm_read_hva() or kvm_read_hva_atomic(). * allowed to be read.
*/ */
static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
{ {
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
if (writable)
*writable = !memslot_is_readonly(slot);
return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
} }
...@@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, ...@@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int r; int r;
unsigned long addr; unsigned long addr;
addr = gfn_to_hva_read(kvm, gfn); addr = gfn_to_hva_prot(kvm, gfn, NULL);
if (kvm_is_error_hva(addr)) if (kvm_is_error_hva(addr))
return -EFAULT; return -EFAULT;
r = kvm_read_hva(data, (void __user *)addr + offset, len); r = kvm_read_hva(data, (void __user *)addr + offset, len);
...@@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, ...@@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
gfn_t gfn = gpa >> PAGE_SHIFT; gfn_t gfn = gpa >> PAGE_SHIFT;
int offset = offset_in_page(gpa); int offset = offset_in_page(gpa);
addr = gfn_to_hva_read(kvm, gfn); addr = gfn_to_hva_prot(kvm, gfn, NULL);
if (kvm_is_error_hva(addr)) if (kvm_is_error_hva(addr))
return -EFAULT; return -EFAULT;
pagefault_disable(); pagefault_disable();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册