提交 78809a68 编写于 作者: R Radim Krčmář

Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

KVM/PPC update for 4.14

There are various minor fixes and cleanups.  The only new feature is
that we now export information about storage key support to userspace,
so it can advertise it to the guest.

I have pulled in Michael Ellerman's topic/ppc-kvm branch from the
powerpc tree to get a couple of fixes that touch both KVM PPC code and
other PPC code.  That's why there is some arch/powerpc stuff in the
diffstat that isn't arch/powerpc/kvm.
...@@ -104,6 +104,7 @@ ...@@ -104,6 +104,7 @@
#define HPTE_R_C ASM_CONST(0x0000000000000080) #define HPTE_R_C ASM_CONST(0x0000000000000080)
#define HPTE_R_R ASM_CONST(0x0000000000000100) #define HPTE_R_R ASM_CONST(0x0000000000000100)
#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00)
#define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI)
#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000)
#define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000)
......
...@@ -104,6 +104,10 @@ struct kvmppc_host_state { ...@@ -104,6 +104,10 @@ struct kvmppc_host_state {
u8 napping; u8 napping;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* hwthread_req/hwthread_state pair is used to pull sibling threads
* out of guest on pre-ISAv3.0B CPUs where threads share MMU.
*/
u8 hwthread_req; u8 hwthread_req;
u8 hwthread_state; u8 hwthread_state;
u8 host_ipi; u8 host_ipi;
......
...@@ -66,16 +66,8 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, ...@@ -66,16 +66,8 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
#ifndef CONFIG_TRANSPARENT_HUGEPAGE #ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0 #define pmd_large(pmd) 0
#endif #endif
pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *shift);
static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *shift)
{
VM_WARN(!arch_irqs_disabled(),
"%s called with irq enabled\n", __func__);
return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
}
/* can we use this in kvm */
unsigned long vmalloc_to_phys(void *vmalloc_addr); unsigned long vmalloc_to_phys(void *vmalloc_addr);
void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
......
#ifndef _ASM_POWERPC_PTE_WALK_H
#define _ASM_POWERPC_PTE_WALK_H
#include <linux/sched.h>
/* Don't use this directly */
extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hshift);
static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hshift)
{
VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
return __find_linux_pte(pgdir, ea, is_thp, hshift);
}
static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
{
pgd_t *pgdir = init_mm.pgd;
return __find_linux_pte(pgdir, ea, NULL, hshift);
}
/*
* This is what we should always use. Any other lockless page table lookup needs
* careful audit against THP split.
*/
static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hshift)
{
VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
VM_WARN(pgdir != current->mm->pgd,
"%s lock less page table lookup called on wrong mm\n", __func__);
return __find_linux_pte(pgdir, ea, is_thp, hshift);
}
#endif /* _ASM_POWERPC_PTE_WALK_H */
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include <asm/machdep.h> #include <asm/machdep.h>
#include <asm/ppc-pci.h> #include <asm/ppc-pci.h>
#include <asm/rtas.h> #include <asm/rtas.h>
#include <asm/pte-walk.h>
/** Overview: /** Overview:
...@@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) ...@@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
* worried about _PAGE_SPLITTING/collapse. Also we will not hit * worried about _PAGE_SPLITTING/collapse. Also we will not hit
* page table free, because of init_mm. * page table free, because of init_mm.
*/ */
ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, ptep = find_init_mm_pte(token, &hugepage_shift);
NULL, &hugepage_shift);
if (!ptep) if (!ptep)
return token; return token;
WARN_ON(hugepage_shift); WARN_ON(hugepage_shift);
......
...@@ -242,13 +242,20 @@ enter_winkle: ...@@ -242,13 +242,20 @@ enter_winkle:
/* /*
* r3 - PSSCR value corresponding to the requested stop state. * r3 - PSSCR value corresponding to the requested stop state.
*/ */
power_enter_stop:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/* Tell KVM we're entering idle */ power_enter_stop_kvm_rm:
/*
* This is currently unused because POWER9 KVM does not have to
* gather secondary threads into sibling mode, but the code is
* here in case that function is required.
*
* Tell KVM we're entering idle.
*/
li r4,KVM_HWTHREAD_IN_IDLE li r4,KVM_HWTHREAD_IN_IDLE
/* DO THIS IN REAL MODE! See comment above. */ /* DO THIS IN REAL MODE! See comment above. */
stb r4,HSTATE_HWTHREAD_STATE(r13) stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif #endif
power_enter_stop:
/* /*
* Check if we are executing the lite variant with ESL=EC=0 * Check if we are executing the lite variant with ESL=EC=0
*/ */
...@@ -411,6 +418,18 @@ pnv_powersave_wakeup_mce: ...@@ -411,6 +418,18 @@ pnv_powersave_wakeup_mce:
b pnv_powersave_wakeup b pnv_powersave_wakeup
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm_start_guest_check:
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
/* Order setting hwthread_state vs. testing hwthread_req */
sync
lbz r0,HSTATE_HWTHREAD_REQ(r13)
cmpwi r0,0
beqlr
b kvm_start_guest
#endif
/* /*
* Called from reset vector for powersave wakeups. * Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss * cr3 - set to gt if waking up with partial/complete hypervisor state loss
...@@ -435,15 +454,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) ...@@ -435,15 +454,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12 mr r3,r12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
li r0,KVM_HWTHREAD_IN_KERNEL BEGIN_FTR_SECTION
stb r0,HSTATE_HWTHREAD_STATE(r13) bl kvm_start_guest_check
/* Order setting hwthread_state vs. testing hwthread_req */ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
sync
lbz r0,HSTATE_HWTHREAD_REQ(r13)
cmpwi r0,0
beq 1f
b kvm_start_guest
1:
#endif #endif
/* Return SRR1 from power7_nap() */ /* Return SRR1 from power7_nap() */
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/ppc-pci.h> #include <asm/ppc-pci.h>
#include <asm/io-workarounds.h> #include <asm/io-workarounds.h>
#include <asm/pte-walk.h>
#define IOWA_MAX_BUS 8 #define IOWA_MAX_BUS 8
...@@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) ...@@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
* We won't find huge pages here (iomem). Also can't hit * We won't find huge pages here (iomem). Also can't hit
* a page table free due to init_mm * a page table free due to init_mm
*/ */
ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr, ptep = find_init_mm_pte(vaddr, &hugepage_shift);
NULL, &hugepage_shift);
if (ptep == NULL) if (ptep == NULL)
paddr = 0; paddr = 0;
else { else {
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <asm/synch.h> #include <asm/synch.h>
#include <asm/ppc-opcode.h> #include <asm/ppc-opcode.h>
#include <asm/cputable.h> #include <asm/cputable.h>
#include <asm/pte-walk.h>
#include "trace_hv.h" #include "trace_hv.h"
...@@ -599,8 +600,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -599,8 +600,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
* hugepage split and collapse. * hugepage split and collapse.
*/ */
local_irq_save(flags); local_irq_save(flags);
ptep = find_linux_pte_or_hugepte(current->mm->pgd, ptep = find_current_mm_pte(current->mm->pgd,
hva, NULL, NULL); hva, NULL, NULL);
if (ptep) { if (ptep) {
pte = kvmppc_read_update_linux_pte(ptep, 1); pte = kvmppc_read_update_linux_pte(ptep, 1);
if (__pte_write(pte)) if (__pte_write(pte))
...@@ -1940,6 +1941,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) ...@@ -1940,6 +1941,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
if (ret < 0) { if (ret < 0) {
kfree(ctx);
kvm_put_kvm(kvm); kvm_put_kvm(kvm);
return ret; return ret;
} }
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <asm/mmu.h> #include <asm/mmu.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/pte-walk.h>
/* /*
* Supported radix tree geometry. * Supported radix tree geometry.
...@@ -359,8 +360,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -359,8 +360,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (writing) if (writing)
pgflags |= _PAGE_DIRTY; pgflags |= _PAGE_DIRTY;
local_irq_save(flags); local_irq_save(flags);
ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva, ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
NULL, NULL);
if (ptep) { if (ptep) {
pte = READ_ONCE(*ptep); pte = READ_ONCE(*ptep);
if (pte_present(pte) && if (pte_present(pte) &&
...@@ -374,8 +374,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -374,8 +374,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
spin_unlock(&kvm->mmu_lock); spin_unlock(&kvm->mmu_lock);
return RESUME_GUEST; return RESUME_GUEST;
} }
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, /*
gpa, NULL, &shift); * We are walking the secondary page table here. We can do this
* without disabling irq.
*/
ptep = __find_linux_pte(kvm->arch.pgtable,
gpa, NULL, &shift);
if (ptep && pte_present(*ptep)) { if (ptep && pte_present(*ptep)) {
kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
gpa, shift); gpa, shift);
...@@ -427,8 +431,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -427,8 +431,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pgflags |= _PAGE_WRITE; pgflags |= _PAGE_WRITE;
} else { } else {
local_irq_save(flags); local_irq_save(flags);
ptep = __find_linux_pte_or_hugepte(current->mm->pgd, ptep = find_current_mm_pte(current->mm->pgd,
hva, NULL, NULL); hva, NULL, NULL);
if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
pgflags |= _PAGE_WRITE; pgflags |= _PAGE_WRITE;
local_irq_restore(flags); local_irq_restore(flags);
...@@ -499,8 +503,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, ...@@ -499,8 +503,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned int shift; unsigned int shift;
unsigned long old; unsigned long old;
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
NULL, &shift);
if (ptep && pte_present(*ptep)) { if (ptep && pte_present(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
gpa, shift); gpa, shift);
...@@ -525,8 +528,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, ...@@ -525,8 +528,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned int shift; unsigned int shift;
int ref = 0; int ref = 0;
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
NULL, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep)) { if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
gpa, shift); gpa, shift);
...@@ -545,8 +547,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, ...@@ -545,8 +547,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned int shift; unsigned int shift;
int ref = 0; int ref = 0;
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
NULL, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep)) if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1; ref = 1;
return ref; return ref;
...@@ -562,8 +563,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, ...@@ -562,8 +563,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
unsigned int shift; unsigned int shift;
int ret = 0; int ret = 0;
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
NULL, &shift);
if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
ret = 1; ret = 1;
if (shift) if (shift)
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <asm/udbg.h> #include <asm/udbg.h>
#include <asm/iommu.h> #include <asm/iommu.h>
#include <asm/tce.h> #include <asm/tce.h>
#include <asm/pte-walk.h>
#ifdef CONFIG_BUG #ifdef CONFIG_BUG
...@@ -353,7 +354,16 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, ...@@ -353,7 +354,16 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
pte_t *ptep, pte; pte_t *ptep, pte;
unsigned shift = 0; unsigned shift = 0;
ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift); /*
* Called in real mode with MSR_EE = 0. We are safe here.
* It is ok to do the lookup with arch.pgdir here, because
* we are doing this on secondary cpus and current task there
* is not the hypervisor. Also this is safe against THP in the
* host, because an IPI to primary thread will wait for the secondary
* to exit which will agains result in the below page table walk
* to finish.
*/
ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift);
if (!ptep || !pte_present(*ptep)) if (!ptep || !pte_present(*ptep))
return -ENXIO; return -ENXIO;
pte = *ptep; pte = *ptep;
......
...@@ -485,7 +485,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, ...@@ -485,7 +485,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
switch (subfunc) { switch (subfunc) {
case H_VPA_REG_VPA: /* register VPA */ case H_VPA_REG_VPA: /* register VPA */
if (len < sizeof(struct lppaca)) /*
* The size of our lppaca is 1kB because of the way we align
* it for the guest to avoid crossing a 4kB boundary. We only
* use 640 bytes of the structure though, so we should accept
* clients that set a size of 640.
*/
if (len < 640)
break; break;
vpap = &tvcpu->arch.vpa; vpap = &tvcpu->arch.vpa;
err = 0; err = 0;
...@@ -2111,6 +2117,15 @@ static int kvmppc_grab_hwthread(int cpu) ...@@ -2111,6 +2117,15 @@ static int kvmppc_grab_hwthread(int cpu)
struct paca_struct *tpaca; struct paca_struct *tpaca;
long timeout = 10000; long timeout = 10000;
/*
* ISA v3.0 idle routines do not set hwthread_state or test
* hwthread_req, so they can not grab idle threads.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
WARN(1, "KVM: can not control sibling threads\n");
return -EBUSY;
}
tpaca = &paca[cpu]; tpaca = &paca[cpu];
/* Ensure the thread won't go into the kernel if it wakes */ /* Ensure the thread won't go into the kernel if it wakes */
...@@ -2145,10 +2160,12 @@ static void kvmppc_release_hwthread(int cpu) ...@@ -2145,10 +2160,12 @@ static void kvmppc_release_hwthread(int cpu)
struct paca_struct *tpaca; struct paca_struct *tpaca;
tpaca = &paca[cpu]; tpaca = &paca[cpu];
tpaca->kvm_hstate.hwthread_req = 0;
tpaca->kvm_hstate.kvm_vcpu = NULL; tpaca->kvm_hstate.kvm_vcpu = NULL;
tpaca->kvm_hstate.kvm_vcore = NULL; tpaca->kvm_hstate.kvm_vcore = NULL;
tpaca->kvm_hstate.kvm_split_mode = NULL; tpaca->kvm_hstate.kvm_split_mode = NULL;
if (!cpu_has_feature(CPU_FTR_ARCH_300))
tpaca->kvm_hstate.hwthread_req = 0;
} }
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
...@@ -3325,6 +3342,14 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, ...@@ -3325,6 +3342,14 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
if (radix_enabled()) if (radix_enabled())
return -EINVAL; return -EINVAL;
/*
* POWER7, POWER8 and POWER9 all support 32 storage keys for data.
* POWER7 doesn't support keys for instruction accesses,
* POWER8 and POWER9 do.
*/
info->data_keys = 32;
info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
info->flags = KVM_PPC_PAGE_SIZES_REAL; info->flags = KVM_PPC_PAGE_SIZES_REAL;
if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
info->flags |= KVM_PPC_1T_SEGMENTS; info->flags |= KVM_PPC_1T_SEGMENTS;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <asm/hvcall.h> #include <asm/hvcall.h>
#include <asm/synch.h> #include <asm/synch.h>
#include <asm/ppc-opcode.h> #include <asm/ppc-opcode.h>
#include <asm/pte-walk.h>
/* Translate address of a vmalloc'd thing to a linear map address */ /* Translate address of a vmalloc'd thing to a linear map address */
static void *real_vmalloc_addr(void *x) static void *real_vmalloc_addr(void *x)
...@@ -31,9 +32,9 @@ static void *real_vmalloc_addr(void *x) ...@@ -31,9 +32,9 @@ static void *real_vmalloc_addr(void *x)
/* /*
* assume we don't have huge pages in vmalloc space... * assume we don't have huge pages in vmalloc space...
* So don't worry about THP collapse/split. Called * So don't worry about THP collapse/split. Called
* Only in realmode, hence won't need irq_save/restore. * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore.
*/ */
p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL, NULL); p = find_init_mm_pte(addr, NULL);
if (!p || !pte_present(*p)) if (!p || !pte_present(*p))
return NULL; return NULL;
addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
...@@ -230,14 +231,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, ...@@ -230,14 +231,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
* If we had a page table table change after lookup, we would * If we had a page table table change after lookup, we would
* retry via mmu_notifier_retry. * retry via mmu_notifier_retry.
*/ */
if (realmode) if (!realmode)
ptep = __find_linux_pte_or_hugepte(pgdir, hva, NULL,
&hpage_shift);
else {
local_irq_save(irq_flags); local_irq_save(irq_flags);
ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, /*
&hpage_shift); * If called in real mode we have MSR_EE = 0. Otherwise
} * we disable irq above.
*/
ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift);
if (ptep) { if (ptep) {
pte_t pte; pte_t pte;
unsigned int host_pte_size; unsigned int host_pte_size;
...@@ -269,7 +269,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, ...@@ -269,7 +269,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
if (!realmode) if (!realmode)
local_irq_restore(irq_flags); local_irq_restore(irq_flags);
ptel &= ~(HPTE_R_PP0 - psize); ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
ptel |= pa; ptel |= pa;
if (pa) if (pa)
......
...@@ -149,9 +149,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -149,9 +149,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
subf r4, r4, r3 subf r4, r4, r3
mtspr SPRN_DEC, r4 mtspr SPRN_DEC, r4
BEGIN_FTR_SECTION
/* hwthread_req may have got set by cede or no vcpu, so clear it */ /* hwthread_req may have got set by cede or no vcpu, so clear it */
li r0, 0 li r0, 0
stb r0, HSTATE_HWTHREAD_REQ(r13) stb r0, HSTATE_HWTHREAD_REQ(r13)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* /*
* For external interrupts we need to call the Linux * For external interrupts we need to call the Linux
...@@ -314,6 +316,7 @@ kvm_novcpu_exit: ...@@ -314,6 +316,7 @@ kvm_novcpu_exit:
* Relocation is off and most register values are lost. * Relocation is off and most register values are lost.
* r13 points to the PACA. * r13 points to the PACA.
* r3 contains the SRR1 wakeup value, SRR1 is trashed. * r3 contains the SRR1 wakeup value, SRR1 is trashed.
* This is not used by ISAv3.0B processors.
*/ */
.globl kvm_start_guest .globl kvm_start_guest
kvm_start_guest: kvm_start_guest:
...@@ -432,6 +435,9 @@ kvm_secondary_got_guest: ...@@ -432,6 +435,9 @@ kvm_secondary_got_guest:
* While waiting we also need to check if we get given a vcpu to run. * While waiting we also need to check if we get given a vcpu to run.
*/ */
kvm_no_guest: kvm_no_guest:
BEGIN_FTR_SECTION
twi 31,0,0
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r3, HSTATE_HWTHREAD_REQ(r13) lbz r3, HSTATE_HWTHREAD_REQ(r13)
cmpwi r3, 0 cmpwi r3, 0
bne 53f bne 53f
...@@ -976,7 +982,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) ...@@ -976,7 +982,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
#ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XICS
/* We are entering the guest on that thread, push VCPU to XIVE */ /* We are entering the guest on that thread, push VCPU to XIVE */
ld r10, HSTATE_XIVE_TIMA_PHYS(r13) ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
cmpldi cr0, r10, r0 cmpldi cr0, r10, 0
beq no_xive beq no_xive
ld r11, VCPU_XIVE_SAVED_STATE(r4) ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS li r9, TM_QW1_OS
...@@ -1280,7 +1286,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ...@@ -1280,7 +1286,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f bne 2f
mfspr r3,SPRN_HDEC mfspr r3,SPRN_HDEC
cmpwi r3,0 EXTEND_HDEC(r3)
cmpdi r3,0
mr r4,r9 mr r4,r9
bge fast_guest_return bge fast_guest_return
2: 2:
...@@ -2509,8 +2516,10 @@ kvm_do_nap: ...@@ -2509,8 +2516,10 @@ kvm_do_nap:
clrrdi r0, r0, 1 clrrdi r0, r0, 1
mtspr SPRN_CTRLT, r0 mtspr SPRN_CTRLT, r0
BEGIN_FTR_SECTION
li r0,1 li r0,1
stb r0,HSTATE_HWTHREAD_REQ(r13) stb r0,HSTATE_HWTHREAD_REQ(r13)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mfspr r5,SPRN_LPCR mfspr r5,SPRN_LPCR
ori r5,r5,LPCR_PECE0 | LPCR_PECE1 ori r5,r5,LPCR_PECE0 | LPCR_PECE1
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
......
...@@ -455,16 +455,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, ...@@ -455,16 +455,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
if (err) if (err)
goto free_vcpu; goto free_vcpu;
if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) {
err = -ENOMEM;
goto uninit_vcpu; goto uninit_vcpu;
}
err = kvmppc_e500_tlb_init(vcpu_e500); err = kvmppc_e500_tlb_init(vcpu_e500);
if (err) if (err)
goto uninit_id; goto uninit_id;
vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!vcpu->arch.shared) if (!vcpu->arch.shared) {
err = -ENOMEM;
goto uninit_tlb; goto uninit_tlb;
}
return vcpu; return vcpu;
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <asm/kvm_ppc.h> #include <asm/kvm_ppc.h>
#include <asm/pte-walk.h>
#include "e500.h" #include "e500.h"
#include "timing.h" #include "timing.h"
...@@ -476,7 +477,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, ...@@ -476,7 +477,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
* can't run hence pfn won't change. * can't run hence pfn won't change.
*/ */
local_irq_save(flags); local_irq_save(flags);
ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, NULL); ptep = find_linux_pte(pgdir, hva, NULL, NULL);
if (ptep) { if (ptep) {
pte_t pte = READ_ONCE(*ptep); pte_t pte = READ_ONCE(*ptep);
......
...@@ -331,8 +331,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, ...@@ -331,8 +331,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
goto uninit_vcpu; goto uninit_vcpu;
vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
if (!vcpu->arch.shared) if (!vcpu->arch.shared) {
err = -ENOMEM;
goto uninit_tlb; goto uninit_tlb;
}
return vcpu; return vcpu;
......
...@@ -61,6 +61,7 @@ ...@@ -61,6 +61,7 @@
#include <asm/tm.h> #include <asm/tm.h>
#include <asm/trace.h> #include <asm/trace.h>
#include <asm/ps3.h> #include <asm/ps3.h>
#include <asm/pte-walk.h>
#ifdef DEBUG #ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt) #define DBG(fmt...) udbg_printf(fmt)
...@@ -1297,7 +1298,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, ...@@ -1297,7 +1298,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
#endif /* CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC_64K_PAGES */
/* Get PTE and page size from page tables */ /* Get PTE and page size from page tables */
ptep = __find_linux_pte_or_hugepte(pgdir, ea, &is_thp, &hugeshift); ptep = find_linux_pte(pgdir, ea, &is_thp, &hugeshift);
if (ptep == NULL || !pte_present(*ptep)) { if (ptep == NULL || !pte_present(*ptep)) {
DBG_LOW(" no PTE !\n"); DBG_LOW(" no PTE !\n");
rc = 1; rc = 1;
...@@ -1526,7 +1527,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, ...@@ -1526,7 +1527,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
* THP pages use update_mmu_cache_pmd. We don't do * THP pages use update_mmu_cache_pmd. We don't do
* hash preload there. Hence can ignore THP here * hash preload there. Hence can ignore THP here
*/ */
ptep = find_linux_pte_or_hugepte(pgdir, ea, NULL, &hugepage_shift); ptep = find_current_mm_pte(pgdir, ea, NULL, &hugepage_shift);
if (!ptep) if (!ptep)
goto out_exit; goto out_exit;
......
...@@ -24,6 +24,8 @@ ...@@ -24,6 +24,8 @@
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/hugetlb.h> #include <asm/hugetlb.h>
#include <asm/pte-walk.h>
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
...@@ -60,8 +62,11 @@ static unsigned nr_gpages; ...@@ -60,8 +62,11 @@ static unsigned nr_gpages;
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{ {
/* Only called for hugetlbfs pages, hence can ignore THP */ /*
return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL); * Only called for hugetlbfs pages, hence can ignore THP and the
* irq disabled walk.
*/
return __find_linux_pte(mm->pgd, addr, NULL, NULL);
} }
static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
...@@ -886,9 +891,8 @@ void flush_dcache_icache_hugepage(struct page *page) ...@@ -886,9 +891,8 @@ void flush_dcache_icache_hugepage(struct page *page)
* This function need to be called with interrupts disabled. We use this variant * This function need to be called with interrupts disabled. We use this variant
* when we have MSR[EE] = 0 but the paca->soft_enabled = 1 * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
*/ */
pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hpage_shift)
bool *is_thp, unsigned *shift)
{ {
pgd_t pgd, *pgdp; pgd_t pgd, *pgdp;
pud_t pud, *pudp; pud_t pud, *pudp;
...@@ -897,8 +901,8 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, ...@@ -897,8 +901,8 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
hugepd_t *hpdp = NULL; hugepd_t *hpdp = NULL;
unsigned pdshift = PGDIR_SHIFT; unsigned pdshift = PGDIR_SHIFT;
if (shift) if (hpage_shift)
*shift = 0; *hpage_shift = 0;
if (is_thp) if (is_thp)
*is_thp = false; *is_thp = false;
...@@ -968,11 +972,11 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, ...@@ -968,11 +972,11 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
ret_pte = hugepte_offset(*hpdp, ea, pdshift); ret_pte = hugepte_offset(*hpdp, ea, pdshift);
pdshift = hugepd_shift(*hpdp); pdshift = hugepd_shift(*hpdp);
out: out:
if (shift) if (hpage_shift)
*shift = pdshift; *hpage_shift = pdshift;
return ret_pte; return ret_pte;
} }
EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte); EXPORT_SYMBOL_GPL(__find_linux_pte);
int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr) unsigned long end, int write, struct page **pages, int *nr)
......
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/bug.h> #include <asm/bug.h>
#include <asm/pte-walk.h>
#include <trace/events/thp.h> #include <trace/events/thp.h>
...@@ -207,8 +209,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, ...@@ -207,8 +209,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
local_irq_save(flags); local_irq_save(flags);
arch_enter_lazy_mmu_mode(); arch_enter_lazy_mmu_mode();
for (; start < end; start += PAGE_SIZE) { for (; start < end; start += PAGE_SIZE) {
pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, &is_thp, pte_t *ptep = find_current_mm_pte(mm->pgd, start, &is_thp,
&hugepage_shift); &hugepage_shift);
unsigned long pte; unsigned long pte;
if (ptep == NULL) if (ptep == NULL)
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC64
#include "../kernel/ppc32.h" #include "../kernel/ppc32.h"
#endif #endif
#include <asm/pte-walk.h>
/* /*
...@@ -127,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb) ...@@ -127,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
return -EFAULT; return -EFAULT;
local_irq_save(flags); local_irq_save(flags);
ptep = find_linux_pte_or_hugepte(pgdir, addr, NULL, &shift); ptep = find_current_mm_pte(pgdir, addr, NULL, &shift);
if (!ptep) if (!ptep)
goto err_out; goto err_out;
if (!shift) if (!shift)
......
...@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size { ...@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
struct kvm_ppc_smmu_info { struct kvm_ppc_smmu_info {
__u64 flags; __u64 flags;
__u32 slb_size; __u32 slb_size;
__u32 pad; __u16 data_keys; /* # storage keys supported for data */
__u16 instr_keys; /* # storage keys supported for instructions */
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册