diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 6981a52b3887079fe2b2870b1fcc78b17e909878..5c6b18a71d55a68ab8ea4b9234520a465ee418d8 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -104,6 +104,7 @@ #define HPTE_R_C ASM_CONST(0x0000000000000080) #define HPTE_R_R ASM_CONST(0x0000000000000100) #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) +#define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI) #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 7cea76f11c26c66fc52e08aae5b37e6534ac79a8..83596f32f50b49672d7ced5f3ee806c921406114 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -104,6 +104,10 @@ struct kvmppc_host_state { u8 napping; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * hwthread_req/hwthread_state pair is used to pull sibling threads + * out of guest on pre-ISAv3.0B CPUs where threads share MMU. + */ u8 hwthread_req; u8 hwthread_state; u8 host_ipi; diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index afae9a336136a4916c9912d3fd7f8c8f268406f2..eb9d57defb7509314bb693aa99f98c0e05a85a8a 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -66,16 +66,8 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #endif -pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *shift); -static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *shift) -{ - VM_WARN(!arch_irqs_disabled(), - "%s called with irq enabled\n", __func__); - return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift); -} +/* can we use this in kvm */ unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h new file mode 100644 index 0000000000000000000000000000000000000000..2d633e9d686c4f9d3b4ea39ebd160d93806467a7 --- /dev/null +++ b/arch/powerpc/include/asm/pte-walk.h @@ -0,0 +1,35 @@ +#ifndef _ASM_POWERPC_PTE_WALK_H +#define _ASM_POWERPC_PTE_WALK_H + +#include + +/* Don't use this directly */ +extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hshift); + +static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hshift) +{ + VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); + return __find_linux_pte(pgdir, ea, is_thp, hshift); +} + +static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift) +{ + pgd_t *pgdir = init_mm.pgd; + return __find_linux_pte(pgdir, ea, NULL, hshift); +} +/* + * This is what we should always use. Any other lockless page table lookup needs + * careful audit against THP split. + */ +static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hshift) +{ + VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); + VM_WARN(pgdir != current->mm->pgd, + "%s lock less page table lookup called on wrong mm\n", __func__); + return __find_linux_pte(pgdir, ea, is_thp, hshift); +} + +#endif /* _ASM_POWERPC_PTE_WALK_H */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 63992b2d8e15684a17e05981fbfd8260ee45af50..5e6887c40528530a04bfb5c3b86be1b513b80b4a 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -44,6 +44,7 @@ #include #include #include +#include /** Overview: @@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) * worried about _PAGE_SPLITTING/collapse. Also we will not hit * page table free, because of init_mm. */ - ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, - NULL, &hugepage_shift); + ptep = find_init_mm_pte(token, &hugepage_shift); if (!ptep) return token; WARN_ON(hugepage_shift); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index e6252c5a57a4a06f10bd13710ceecdcf72c3d26e..67ef4bb916d6c6498bb211e6774be9696423b7a6 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -242,13 +242,20 @@ enter_winkle: /* * r3 - PSSCR value corresponding to the requested stop state. */ -power_enter_stop: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ +power_enter_stop_kvm_rm: + /* + * This is currently unused because POWER9 KVM does not have to + * gather secondary threads into sibling mode, but the code is + * here in case that function is required. + * + * Tell KVM we're entering idle. + */ li r4,KVM_HWTHREAD_IN_IDLE /* DO THIS IN REAL MODE! See comment above. */ stb r4,HSTATE_HWTHREAD_STATE(r13) #endif +power_enter_stop: /* * Check if we are executing the lite variant with ESL=EC=0 */ @@ -411,6 +418,18 @@ pnv_powersave_wakeup_mce: b pnv_powersave_wakeup +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +kvm_start_guest_check: + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beqlr + b kvm_start_guest +#endif + /* * Called from reset vector for powersave wakeups. * cr3 - set to gt if waking up with partial/complete hypervisor state loss @@ -435,15 +454,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) mr r3,r12 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 1f - b kvm_start_guest -1: +BEGIN_FTR_SECTION + bl kvm_start_guest_check +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #endif /* Return SRR1 from power7_nap() */ diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index a582e0d4252552c1a0e40ec97fbbc4bcfa439864..bbe85f5aea71bd93c00a65ff8950031c8f8faf9f 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -19,6 +19,8 @@ #include #include #include +#include + #define IOWA_MAX_BUS 8 @@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) * We won't find huge pages here (iomem). Also can't hit * a page table free due to init_mm */ - ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr, - NULL, &hugepage_shift); + ptep = find_init_mm_pte(vaddr, &hugepage_shift); if (ptep == NULL) paddr = 0; else { diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index b42812e014c04b2fc96cdf5d183c23df0ee88b53..7c62967d672caa818d12c26620520603fb3c49c7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "trace_hv.h" @@ -599,8 +600,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * hugepage split and collapse. */ local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, + hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); if (__pte_write(pte)) @@ -1940,6 +1941,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); if (ret < 0) { + kfree(ctx); kvm_put_kvm(kvm); return ret; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index f6b3e67c576294f96cbb5a5e5178f09563307c1b..7d719c8aa0bb02e820bb90e6116cf9b35f75cb13 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -17,6 +17,7 @@ #include #include #include +#include /* * Supported radix tree geometry. @@ -359,8 +360,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (writing) pgflags |= _PAGE_DIRTY; local_irq_save(flags); - ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva, - NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL); if (ptep) { pte = READ_ONCE(*ptep); if (pte_present(pte) && @@ -374,8 +374,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, spin_unlock(&kvm->mmu_lock); return RESUME_GUEST; } - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, - gpa, NULL, &shift); + /* + * We are walking the secondary page table here. We can do this + * without disabling irq. + */ + ptep = __find_linux_pte(kvm->arch.pgtable, + gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); @@ -427,8 +431,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pgflags |= _PAGE_WRITE; } else { local_irq_save(flags); - ptep = __find_linux_pte_or_hugepte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, + hva, NULL, NULL); if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) pgflags |= _PAGE_WRITE; local_irq_restore(flags); @@ -499,8 +503,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; unsigned long old; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, gpa, shift); @@ -525,8 +528,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; int ref = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) { kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, gpa, shift); @@ -545,8 +547,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; int ref = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) ref = 1; return ref; @@ -562,8 +563,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, unsigned int shift; int ret = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { ret = 1; if (shift) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 3adfd2f5301c686762007e496717d453c258d37c..c32e9bfe75b1abbf6b27a574f91bf5b3fcaf5e66 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef CONFIG_BUG @@ -353,7 +354,16 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, pte_t *ptep, pte; unsigned shift = 0; - ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift); + /* + * Called in real mode with MSR_EE = 0. We are safe here. + * It is ok to do the lookup with arch.pgdir here, because + * we are doing this on secondary cpus and current task there + * is not the hypervisor. Also this is safe against THP in the + * host, because an IPI to primary thread will wait for the secondary + * to exit which will agains result in the below page table walk + * to finish. + */ + ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift); if (!ptep || !pte_present(*ptep)) return -ENXIO; pte = *ptep; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 359c79cdf0cc821d87a4e4322177294648b42ee3..18e974a34fced58328ecb062fee539279f3df7cd 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -485,7 +485,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, switch (subfunc) { case H_VPA_REG_VPA: /* register VPA */ - if (len < sizeof(struct lppaca)) + /* + * The size of our lppaca is 1kB because of the way we align + * it for the guest to avoid crossing a 4kB boundary. We only + * use 640 bytes of the structure though, so we should accept + * clients that set a size of 640. + */ + if (len < 640) break; vpap = &tvcpu->arch.vpa; err = 0; @@ -2111,6 +2117,15 @@ static int kvmppc_grab_hwthread(int cpu) struct paca_struct *tpaca; long timeout = 10000; + /* + * ISA v3.0 idle routines do not set hwthread_state or test + * hwthread_req, so they can not grab idle threads. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + WARN(1, "KVM: can not control sibling threads\n"); + return -EBUSY; + } + tpaca = &paca[cpu]; /* Ensure the thread won't go into the kernel if it wakes */ @@ -2145,10 +2160,12 @@ static void kvmppc_release_hwthread(int cpu) struct paca_struct *tpaca; tpaca = &paca[cpu]; - tpaca->kvm_hstate.hwthread_req = 0; tpaca->kvm_hstate.kvm_vcpu = NULL; tpaca->kvm_hstate.kvm_vcore = NULL; tpaca->kvm_hstate.kvm_split_mode = NULL; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + tpaca->kvm_hstate.hwthread_req = 0; + } static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) @@ -3325,6 +3342,14 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, if (radix_enabled()) return -EINVAL; + /* + * POWER7, POWER8 and POWER9 all support 32 storage keys for data. + * POWER7 doesn't support keys for instruction accesses, + * POWER8 and POWER9 do. + */ + info->data_keys = 32; + info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0; + info->flags = KVM_PPC_PAGE_SIZES_REAL; if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) info->flags |= KVM_PPC_1T_SEGMENTS; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 584c74c8119f0db6f4cb76023dfa07ee26a941bd..4efe364f11881b573acb1c18356be40bd34a553a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -22,6 +22,7 @@ #include #include #include +#include /* Translate address of a vmalloc'd thing to a linear map address */ static void *real_vmalloc_addr(void *x) @@ -31,9 +32,9 @@ static void *real_vmalloc_addr(void *x) /* * assume we don't have huge pages in vmalloc space... * So don't worry about THP collapse/split. Called - * Only in realmode, hence won't need irq_save/restore. + * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. */ - p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL, NULL); + p = find_init_mm_pte(addr, NULL); if (!p || !pte_present(*p)) return NULL; addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); @@ -230,14 +231,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, * If we had a page table table change after lookup, we would * retry via mmu_notifier_retry. */ - if (realmode) - ptep = __find_linux_pte_or_hugepte(pgdir, hva, NULL, - &hpage_shift); - else { + if (!realmode) local_irq_save(irq_flags); - ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, - &hpage_shift); - } + /* + * If called in real mode we have MSR_EE = 0. Otherwise + * we disable irq above. + */ + ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); if (ptep) { pte_t pte; unsigned int host_pte_size; @@ -269,7 +269,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, if (!realmode) local_irq_restore(irq_flags); - ptel &= ~(HPTE_R_PP0 - psize); + ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); ptel |= pa; if (pa) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c52184a8efdf025c1efffc6dd7310e9374dca630..9dd6b54a43dcf9a55b6dde00a16ae2d7115730a7 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -149,9 +149,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) subf r4, r4, r3 mtspr SPRN_DEC, r4 +BEGIN_FTR_SECTION /* hwthread_req may have got set by cede or no vcpu, so clear it */ li r0, 0 stb r0, HSTATE_HWTHREAD_REQ(r13) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) /* * For external interrupts we need to call the Linux @@ -314,6 +316,7 @@ kvm_novcpu_exit: * Relocation is off and most register values are lost. * r13 points to the PACA. * r3 contains the SRR1 wakeup value, SRR1 is trashed. + * This is not used by ISAv3.0B processors. */ .globl kvm_start_guest kvm_start_guest: @@ -432,6 +435,9 @@ kvm_secondary_got_guest: * While waiting we also need to check if we get given a vcpu to run. */ kvm_no_guest: +BEGIN_FTR_SECTION + twi 31,0,0 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r3, HSTATE_HWTHREAD_REQ(r13) cmpwi r3, 0 bne 53f @@ -976,7 +982,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) #ifdef CONFIG_KVM_XICS /* We are entering the guest on that thread, push VCPU to XIVE */ ld r10, HSTATE_XIVE_TIMA_PHYS(r13) - cmpldi cr0, r10, r0 + cmpldi cr0, r10, 0 beq no_xive ld r11, VCPU_XIVE_SAVED_STATE(r4) li r9, TM_QW1_OS @@ -1280,7 +1286,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER bne 2f mfspr r3,SPRN_HDEC - cmpwi r3,0 + EXTEND_HDEC(r3) + cmpdi r3,0 mr r4,r9 bge fast_guest_return 2: @@ -2509,8 +2516,10 @@ kvm_do_nap: clrrdi r0, r0, 1 mtspr SPRN_CTRLT, r0 +BEGIN_FTR_SECTION li r0,1 stb r0,HSTATE_HWTHREAD_REQ(r13) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mfspr r5,SPRN_LPCR ori r5,r5,LPCR_PECE0 | LPCR_PECE1 BEGIN_FTR_SECTION diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 32fdab57d604d02eb16886818b0631b6612e3e61..f9f6468f41712549666771fa730c24bdf6673684 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -455,16 +455,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, if (err) goto free_vcpu; - if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) + if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) { + err = -ENOMEM; goto uninit_vcpu; + } err = kvmppc_e500_tlb_init(vcpu_e500); if (err) goto uninit_id; vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); - if (!vcpu->arch.shared) + if (!vcpu->arch.shared) { + err = -ENOMEM; goto uninit_tlb; + } return vcpu; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 77fd043b3ecc50857ac25cc266fd559be715e9de..c6c734424c707002810ccf59d78c6cb670c74555 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "e500.h" #include "timing.h" @@ -476,7 +477,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, * can't run hence pfn won't change. */ local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, NULL); + ptep = find_linux_pte(pgdir, hva, NULL, NULL); if (ptep) { pte_t pte = READ_ONCE(*ptep); diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index f48a0c22e8f9024e87965df7b0821596d43c926a..d0b6b5788afcff7b15f63fb80f9d2c0c3d656d3b 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -331,8 +331,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, goto uninit_vcpu; vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - if (!vcpu->arch.shared) + if (!vcpu->arch.shared) { + err = -ENOMEM; goto uninit_tlb; + } return vcpu; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7a20669c19e7258d979d60a19f3acec378c9dbad..5b10b4fcbf76220044d9c49baeb92fd148f31790 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -61,6 +61,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -1297,7 +1298,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, #endif /* CONFIG_PPC_64K_PAGES */ /* Get PTE and page size from page tables */ - ptep = __find_linux_pte_or_hugepte(pgdir, ea, &is_thp, &hugeshift); + ptep = find_linux_pte(pgdir, ea, &is_thp, &hugeshift); if (ptep == NULL || !pte_present(*ptep)) { DBG_LOW(" no PTE !\n"); rc = 1; @@ -1526,7 +1527,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, * THP pages use update_mmu_cache_pmd. We don't do * hash preload there. Hence can ignore THP here */ - ptep = find_linux_pte_or_hugepte(pgdir, ea, NULL, &hugepage_shift); + ptep = find_current_mm_pte(pgdir, ea, NULL, &hugepage_shift); if (!ptep) goto out_exit; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index e1bf5ca397fe13c00ebe194e879a21ac759f8f28..70a3a2bdf06ce1d1805f21df514619e5fc4620ad 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -24,6 +24,8 @@ #include #include #include +#include + #ifdef CONFIG_HUGETLB_PAGE @@ -60,8 +62,11 @@ static unsigned nr_gpages; pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { - /* Only called for hugetlbfs pages, hence can ignore THP */ - return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL); + /* + * Only called for hugetlbfs pages, hence can ignore THP and the + * irq disabled walk. + */ + return __find_linux_pte(mm->pgd, addr, NULL, NULL); } static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, @@ -886,9 +891,8 @@ void flush_dcache_icache_hugepage(struct page *page) * This function need to be called with interrupts disabled. We use this variant * when we have MSR[EE] = 0 but the paca->soft_enabled = 1 */ - -pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *shift) +pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hpage_shift) { pgd_t pgd, *pgdp; pud_t pud, *pudp; @@ -897,8 +901,8 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, hugepd_t *hpdp = NULL; unsigned pdshift = PGDIR_SHIFT; - if (shift) - *shift = 0; + if (hpage_shift) + *hpage_shift = 0; if (is_thp) *is_thp = false; @@ -968,11 +972,11 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, ret_pte = hugepte_offset(*hpdp, ea, pdshift); pdshift = hugepd_shift(*hpdp); out: - if (shift) - *shift = pdshift; + if (hpage_shift) + *hpage_shift = pdshift; return ret_pte; } -EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte); +EXPORT_SYMBOL_GPL(__find_linux_pte); int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index b5b0fb97b9c03c4e8bfa5ab71a3b153353aa2314..71cb2742bd16123ef9f9d4b18e0ccc9bde51a951 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -29,6 +29,8 @@ #include #include #include +#include + #include @@ -207,8 +209,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, local_irq_save(flags); arch_enter_lazy_mmu_mode(); for (; start < end; start += PAGE_SIZE) { - pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, &is_thp, - &hugepage_shift); + pte_t *ptep = find_current_mm_pte(mm->pgd, start, &is_thp, + &hugepage_shift); unsigned long pte; if (ptep == NULL) diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 0fc26714780acd95e3df8043c4a764d1e823aefc..0af051a1974e0e1151eb3964c327dc5ee8967c7d 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -22,6 +22,7 @@ #ifdef CONFIG_PPC64 #include "../kernel/ppc32.h" #endif +#include /* @@ -127,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb) return -EFAULT; local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(pgdir, addr, NULL, &shift); + ptep = find_current_mm_pte(pgdir, addr, NULL, &shift); if (!ptep) goto err_out; if (!shift) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6cd63c18708ae1d23dbc280ed49aed55f817a2f5..83888758741184f969f3b8fd1738e38bdc24da78 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size { struct kvm_ppc_smmu_info { __u64 flags; __u32 slb_size; - __u32 pad; + __u16 data_keys; /* # storage keys supported for data */ + __u16 instr_keys; /* # storage keys supported for instructions */ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; };