diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index c21e46da4a3baa61cf04576ffac62aa0dbeb17c8..b0c08b142770d91ad87fddd15bf6add06911bccd 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -121,6 +121,22 @@ static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; } +static inline int hpte_is_writable(unsigned long ptel) +{ + unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP); + + return pp != PP_RXRX && pp != PP_RXXX; +} + +static inline unsigned long hpte_make_readonly(unsigned long ptel) +{ + if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX) + ptel = (ptel & ~HPTE_R_PP) | PP_RXXX; + else + ptel |= PP_RXRX; + return ptel; +} + static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) { unsigned int wimg = ptel & HPTE_R_WIMG; @@ -140,7 +156,7 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) * Lock and read a linux PTE. If it's present and writable, atomically * set dirty and referenced bits and return the PTE, otherwise return 0. */ -static inline pte_t kvmppc_read_update_linux_pte(pte_t *p) +static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing) { pte_t pte, tmp; @@ -158,7 +174,7 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p) if (pte_present(pte)) { pte = pte_mkyoung(pte); - if (pte_write(pte)) + if (writing && pte_write(pte)) pte = pte_mkdirty(pte); } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 83761dd8a924141aa2bf507210612ad75eb6036e..66d6452c1081fa2badbbf37b19332d765d0e53d8 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -503,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, struct page *page, *pages[1]; long index, ret, npages; unsigned long is_io; + unsigned int writing, write_ok; struct vm_area_struct *vma; /* @@ -553,8 +554,11 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pfn = 0; page = NULL; pte_size = PAGE_SIZE; + writing = (dsisr & DSISR_ISSTORE) != 0; + /* If writing != 0, then the HPTE must allow writing, if we get here */ + write_ok = writing; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, 1, pages); + npages = get_user_pages_fast(hva, 1, writing, pages); if (npages < 1) { /* Check if it's an I/O mapping */ down_read(¤t->mm->mmap_sem); @@ -565,6 +569,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ((hva - vma->vm_start) >> PAGE_SHIFT); pte_size = psize; is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); + write_ok = vma->vm_flags & VM_WRITE; } up_read(¤t->mm->mmap_sem); if (!pfn) @@ -575,6 +580,24 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, page = compound_head(page); pte_size <<= compound_order(page); } + /* if the guest wants write access, see if that is OK */ + if (!writing && hpte_is_writable(r)) { + pte_t *ptep, pte; + + /* + * We need to protect against page table destruction + * while looking up and updating the pte. + */ + rcu_read_lock_sched(); + ptep = find_linux_pte_or_hugepte(current->mm->pgd, + hva, NULL); + if (ptep && pte_present(*ptep)) { + pte = kvmppc_read_update_linux_pte(ptep, 1); + if (pte_write(pte)) + write_ok = 1; + } + rcu_read_unlock_sched(); + } pfn = page_to_pfn(page); } @@ -595,6 +618,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Set the HPTE to point to pfn */ r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); + if (hpte_is_writable(r) && !write_ok) + r = hpte_make_readonly(r); ret = RESUME_GUEST; preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) @@ -614,14 +639,22 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unlock_rmap(rmap); goto out_unlock; } - kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); + + if (hptep[0] & HPTE_V_VALID) { + /* HPTE was previously valid, so we need to invalidate it */ + unlock_rmap(rmap); + hptep[0] |= HPTE_V_ABSENT; + kvmppc_invalidate_hpte(kvm, hptep, index); + } else { + kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); + } hptep[1] = r; eieio(); hptep[0] = hpte[0]; asm volatile("ptesync" : : : "memory"); preempt_enable(); - if (page) + if (page && hpte_is_writable(r)) SetPageDirty(page); out_put: diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 81d16ed9767df248d8579541386de4e81ee61c1e..d3e36fc77e2b8ecf84405a2c2935296cf2ebd817 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -120,7 +120,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, } static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva, - unsigned long *pte_sizep) + int writing, unsigned long *pte_sizep) { pte_t *ptep; unsigned long ps = *pte_sizep; @@ -137,7 +137,7 @@ static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva, return __pte(0); if (!pte_present(*ptep)) return __pte(0); - return kvmppc_read_update_linux_pte(ptep); + return kvmppc_read_update_linux_pte(ptep, writing); } long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, @@ -154,12 +154,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long is_io; unsigned long *rmap; pte_t pte; + unsigned int writing; unsigned long mmu_seq; bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; psize = hpte_page_size(pteh, ptel); if (!psize) return H_PARAMETER; + writing = hpte_is_writable(ptel); pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); /* used later to detect if we might have been invalidated */ @@ -208,8 +210,11 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, /* Look up the Linux PTE for the backing page */ pte_size = psize; - pte = lookup_linux_pte(vcpu, hva, &pte_size); + pte = lookup_linux_pte(vcpu, hva, writing, &pte_size); if (pte_present(pte)) { + if (writing && !pte_write(pte)) + /* make the actual HPTE be read-only */ + ptel = hpte_make_readonly(ptel); is_io = hpte_cache_bits(pte_val(pte)); pa = pte_pfn(pte) << PAGE_SHIFT; } @@ -678,7 +683,9 @@ EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); /* * Called in real mode to check whether an HPTE not found fault - * is due to accessing a paged-out page or an emulated MMIO page. + * is due to accessing a paged-out page or an emulated MMIO page, + * or if a protection fault is due to accessing a page that the + * guest wanted read/write access to but which we made read-only. * Returns a possibly modified status (DSISR) value if not * (i.e. pass the interrupt to the guest), * -1 to pass the fault up to host kernel mode code, -2 to do that @@ -696,12 +703,17 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, struct revmap_entry *rev; unsigned long pp, key; - valid = HPTE_V_VALID | HPTE_V_ABSENT; + /* For protection fault, expect to find a valid HPTE */ + valid = HPTE_V_VALID; + if (status & DSISR_NOHPTE) + valid |= HPTE_V_ABSENT; index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); - if (index < 0) - return status; /* there really was no HPTE */ - + if (index < 0) { + if (status & DSISR_NOHPTE) + return status; /* there really was no HPTE */ + return 0; /* for prot fault, HPTE disappeared */ + } hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); v = hpte[0] & ~HPTE_V_HVLOCK; r = hpte[1]; @@ -712,8 +724,8 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, asm volatile("lwsync" : : : "memory"); hpte[0] = v; - /* If the HPTE is valid by now, retry the instruction */ - if (v & HPTE_V_VALID) + /* For not found, if the HPTE is valid by now, retry the instruction */ + if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) return 0; /* Check access permissions to the page */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 7d4990665d00454e52af07cded97cbe0a16efce0..b70bf22a3ff39615c90d82c78764f897d4fa1677 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1114,8 +1114,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) kvmppc_hdsi: mfspr r4, SPRN_HDAR mfspr r6, SPRN_HDSISR - /* HPTE not found fault? */ - andis. r0, r6, DSISR_NOHPTE@h + /* HPTE not found fault or protection fault? */ + andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h beq 1f /* if not, send it to the guest */ andi. r0, r11, MSR_DR /* data relocation enabled? */ beq 3f