diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 930126698a0f5b0f6e2c458b53604d581b201063..310fe508d9cd6b95ab1ecb13471a0f80a3edc5e4 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1930,6 +1930,42 @@ The "pte_enc" field provides a value that can OR'ed into the hash PTE's RPN field (ie, it needs to be shifted left by 12 to OR it into the hash PTE second double word). + +4.75 KVM_PPC_ALLOCATE_HTAB + +Capability: KVM_CAP_PPC_ALLOC_HTAB +Architectures: powerpc +Type: vm ioctl +Parameters: Pointer to u32 containing hash table order (in/out) +Returns: 0 on success, -1 on error + +This requests the host kernel to allocate an MMU hash table for a +guest using the PAPR paravirtualization interface. This only does +anything if the kernel is configured to use the Book 3S HV style of +virtualization. Otherwise the capability doesn't exist and the ioctl +returns an ENOTTY error. The rest of this description assumes Book 3S +HV. + +There must be no vcpus running when this ioctl is called; if there +are, it will do nothing and return an EBUSY error. + +The parameter is a pointer to a 32-bit unsigned integer variable +containing the order (log base 2) of the desired size of the hash +table, which must be between 18 and 46. On successful return from the +ioctl, it will have been updated with the order of the hash table that +was allocated. + +If no hash table has been allocated when any vcpu is asked to run +(with the KVM_RUN ioctl), the host kernel will allocate a +default-sized hash table (16 MB). + +If this ioctl is called when a hash table has already been allocated, +the kernel will clear out the existing hash table (zero all HPTEs) and +return the hash table order in the parameter. (If the guest is using +the virtualized real-mode area (VRMA) facility, the kernel will +re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.) + + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index b0c08b142770d91ad87fddd15bf6add06911bccd..0dd1d86d3e3168303e1e714977dc8439db86cb29 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -36,11 +36,8 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #define SPAPR_TCE_SHIFT 12 #ifdef CONFIG_KVM_BOOK3S_64_HV -/* For now use fixed-size 16MB page table */ -#define HPT_ORDER 24 -#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */ -#define HPT_NPTE (HPT_NPTEG << 3) /* 8 PTEs per PTEG */ -#define HPT_HASH_MASK (HPT_NPTEG - 1) +#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ +extern int kvm_hpt_order; /* order of preallocated HPTs */ #endif #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d848cdc4971523936550b435b7e3d8e7f08bba60..dd783beb88b3e8fe577413117ee8ab80e259e630 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -237,6 +237,10 @@ struct kvm_arch { unsigned long vrma_slb_v; int rma_setup_done; int using_mmu_notifiers; + u32 hpt_order; + atomic_t vcpus_running; + unsigned long hpt_npte; + unsigned long hpt_mask; spinlock_t slot_phys_lock; unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; int slot_npages[KVM_MEM_SLOTS_NUM]; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f68c22fa2fcebd0751447f96d92507d253b27114..0124937a23b97e104260f9cd4bbe1c409bdf1ff5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -119,7 +119,8 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); extern void kvmppc_map_magic(struct kvm_vcpu *vcpu); -extern long kvmppc_alloc_hpt(struct kvm *kvm); +extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp); +extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp); extern void kvmppc_free_hpt(struct kvm *kvm); extern long kvmppc_prepare_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 80a57751758444a427797ecaff2a6f2e07a1ba03..d03eb6f7b0584e368bdd96e0f1969af58e885371 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,56 +37,121 @@ /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ #define MAX_LPID_970 63 -long kvmppc_alloc_hpt(struct kvm *kvm) +/* Power architecture requires HPT is at least 256kB */ +#define PPC_MIN_HPT_ORDER 18 + +long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) { unsigned long hpt; - long lpid; struct revmap_entry *rev; struct kvmppc_linear_info *li; + long order = kvm_hpt_order; - /* Allocate guest's hashed page table */ - li = kvm_alloc_hpt(); - if (li) { - /* using preallocated memory */ - hpt = (ulong)li->base_virt; - kvm->arch.hpt_li = li; - } else { - /* using dynamic memory */ + if (htab_orderp) { + order = *htab_orderp; + if (order < PPC_MIN_HPT_ORDER) + order = PPC_MIN_HPT_ORDER; + } + + /* + * If the user wants a different size from default, + * try first to allocate it from the kernel page allocator. + */ + hpt = 0; + if (order != kvm_hpt_order) { hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| - __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT); + __GFP_NOWARN, order - PAGE_SHIFT); + if (!hpt) + --order; } + /* Next try to allocate from the preallocated pool */ if (!hpt) { - pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n"); - return -ENOMEM; + li = kvm_alloc_hpt(); + if (li) { + hpt = (ulong)li->base_virt; + kvm->arch.hpt_li = li; + order = kvm_hpt_order; + } } + + /* Lastly try successively smaller sizes from the page allocator */ + while (!hpt && order > PPC_MIN_HPT_ORDER) { + hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| + __GFP_NOWARN, order - PAGE_SHIFT); + if (!hpt) + --order; + } + + if (!hpt) + return -ENOMEM; + kvm->arch.hpt_virt = hpt; + kvm->arch.hpt_order = order; + /* HPTEs are 2**4 bytes long */ + kvm->arch.hpt_npte = 1ul << (order - 4); + /* 128 (2**7) bytes in each HPTEG */ + kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; /* Allocate reverse map array */ - rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE); + rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); if (!rev) { pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); goto out_freehpt; } kvm->arch.revmap = rev; + kvm->arch.sdr1 = __pa(hpt) | (order - 18); - lpid = kvmppc_alloc_lpid(); - if (lpid < 0) - goto out_freeboth; + pr_info("KVM guest htab at %lx (order %ld), LPID %x\n", + hpt, order, kvm->arch.lpid); - kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18); - kvm->arch.lpid = lpid; - - pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid); + if (htab_orderp) + *htab_orderp = order; return 0; - out_freeboth: - vfree(rev); out_freehpt: - free_pages(hpt, HPT_ORDER - PAGE_SHIFT); + if (kvm->arch.hpt_li) + kvm_release_hpt(kvm->arch.hpt_li); + else + free_pages(hpt, order - PAGE_SHIFT); return -ENOMEM; } +long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) +{ + long err = -EBUSY; + long order; + + mutex_lock(&kvm->lock); + if (kvm->arch.rma_setup_done) { + kvm->arch.rma_setup_done = 0; + /* order rma_setup_done vs. vcpus_running */ + smp_mb(); + if (atomic_read(&kvm->arch.vcpus_running)) { + kvm->arch.rma_setup_done = 1; + goto out; + } + } + if (kvm->arch.hpt_virt) { + order = kvm->arch.hpt_order; + /* Set the entire HPT to 0, i.e. invalid HPTEs */ + memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); + /* + * Set the whole last_vcpu array to an invalid vcpu number. + * This ensures that each vcpu will flush its TLB on next entry. + */ + memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu)); + *htab_orderp = order; + err = 0; + } else { + err = kvmppc_alloc_hpt(kvm, htab_orderp); + order = *htab_orderp; + } + out: + mutex_unlock(&kvm->lock); + return err; +} + void kvmppc_free_hpt(struct kvm *kvm) { kvmppc_free_lpid(kvm->arch.lpid); @@ -94,7 +159,8 @@ void kvmppc_free_hpt(struct kvm *kvm) if (kvm->arch.hpt_li) kvm_release_hpt(kvm->arch.hpt_li); else - free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); + free_pages(kvm->arch.hpt_virt, + kvm->arch.hpt_order - PAGE_SHIFT); } /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ @@ -119,6 +185,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long psize; unsigned long hp0, hp1; long ret; + struct kvm *kvm = vcpu->kvm; psize = 1ul << porder; npages = memslot->npages >> (porder - PAGE_SHIFT); @@ -127,8 +194,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, if (npages > 1ul << (40 - porder)) npages = 1ul << (40 - porder); /* Can't use more than 1 HPTE per HPTEG */ - if (npages > HPT_NPTEG) - npages = HPT_NPTEG; + if (npages > kvm->arch.hpt_mask + 1) + npages = kvm->arch.hpt_mask + 1; hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); @@ -138,7 +205,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, for (i = 0; i < npages; ++i) { addr = i << porder; /* can't use hpt_hash since va > 64 bits */ - hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; + hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask; /* * We assume that the hash table is empty and no * vcpus are using it at this stage. Since we create diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c6af1d6238395947725a2e53ff0fbd6d6614b2e7..d084e412b3c53317d88fe59ac6f5f4baed70ba7a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -56,7 +56,7 @@ /* #define EXIT_DEBUG_INT */ static void kvmppc_end_cede(struct kvm_vcpu *vcpu); -static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu); +static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { @@ -1068,11 +1068,15 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) return -EINTR; } - /* On the first time here, set up VRMA or RMA */ + atomic_inc(&vcpu->kvm->arch.vcpus_running); + /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */ + smp_mb(); + + /* On the first time here, set up HTAB and VRMA or RMA */ if (!vcpu->kvm->arch.rma_setup_done) { - r = kvmppc_hv_setup_rma(vcpu); + r = kvmppc_hv_setup_htab_rma(vcpu); if (r) - return r; + goto out; } flush_fp_to_thread(current); @@ -1090,6 +1094,9 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_core_prepare_to_enter(vcpu); } } while (r == RESUME_GUEST); + + out: + atomic_dec(&vcpu->kvm->arch.vcpus_running); return r; } @@ -1305,7 +1312,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, { } -static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) +static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; struct kvm *kvm = vcpu->kvm; @@ -1324,6 +1331,15 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) if (kvm->arch.rma_setup_done) goto out; /* another vcpu beat us to it */ + /* Allocate hashed page table (if not done already) and reset it */ + if (!kvm->arch.hpt_virt) { + err = kvmppc_alloc_hpt(kvm, NULL); + if (err) { + pr_err("KVM: Couldn't alloc HPT\n"); + goto out; + } + } + /* Look up the memslot for guest physical address 0 */ memslot = gfn_to_memslot(kvm, 0); @@ -1435,13 +1451,14 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) int kvmppc_core_init_vm(struct kvm *kvm) { - long r; - unsigned long lpcr; + unsigned long lpcr, lpid; - /* Allocate hashed page table */ - r = kvmppc_alloc_hpt(kvm); - if (r) - return r; + /* Allocate the guest's logical partition ID */ + + lpid = kvmppc_alloc_lpid(); + if (lpid < 0) + return -ENOMEM; + kvm->arch.lpid = lpid; INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); @@ -1451,7 +1468,6 @@ int kvmppc_core_init_vm(struct kvm *kvm) if (cpu_has_feature(CPU_FTR_ARCH_201)) { /* PPC970; HID4 is effectively the LPCR */ - unsigned long lpid = kvm->arch.lpid; kvm->arch.host_lpid = 0; kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index e1b60f56f2a1eb73bdd00ebc36faa44e5e3a0aa2..fb4eac290fefc1c438b46a1cc5865c0b3200ce57 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -25,6 +25,9 @@ static void __init kvm_linear_init_one(ulong size, int count, int type); static struct kvmppc_linear_info *kvm_alloc_linear(int type); static void kvm_release_linear(struct kvmppc_linear_info *ri); +int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER; +EXPORT_SYMBOL_GPL(kvm_hpt_order); + /*************** RMA *************/ /* @@ -209,7 +212,7 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri) void __init kvm_linear_init(void) { /* HPT */ - kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT); + kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT); /* RMA */ /* Only do this on PPC970 in HV mode */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index cec4daddbf31632a97cecb23d94d2773ccb8b803..5c70d19494f9251bee3968ba5f23a7c56cd6c926 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -237,7 +237,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, /* Find and lock the HPTEG slot to use */ do_insert: - if (pte_index >= HPT_NPTE) + if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; if (likely((flags & H_EXACT) == 0)) { pte_index &= ~7UL; @@ -352,7 +352,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long v, r, rb; struct revmap_entry *rev; - if (pte_index >= HPT_NPTE) + if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) @@ -419,7 +419,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) i = 4; break; } - if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) { + if (req != 1 || flags == 3 || + pte_index >= kvm->arch.hpt_npte) { /* parameter error */ args[j] = ((0xa0 | flags) << 56) + pte_index; ret = H_PARAMETER; @@ -521,7 +522,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, struct revmap_entry *rev; unsigned long v, r, rb, mask, bits; - if (pte_index >= HPT_NPTE) + if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); @@ -583,7 +584,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, int i, n = 1; struct revmap_entry *rev = NULL; - if (pte_index >= HPT_NPTE) + if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; if (flags & H_READ_4) { pte_index &= ~3; @@ -678,7 +679,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, somask = (1UL << 28) - 1; vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; } - hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK; + hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask; avpn = slb_v & ~(somask >> 16); /* also includes B */ avpn |= (eaddr & somask) >> 16; @@ -723,7 +724,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, if (val & HPTE_V_SECONDARY) break; val |= HPTE_V_SECONDARY; - hash = hash ^ HPT_HASH_MASK; + hash = hash ^ kvm->arch.hpt_mask; } return -1; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1493c8de947b92abc84b9a155f87b1e5a9bb0673..87f4dc886076d3ad2d6ecede5dfbe1990cd568e0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -246,6 +246,7 @@ int kvm_dev_ioctl_check_extension(long ext) #endif #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: + case KVM_CAP_PPC_ALLOC_HTAB: r = 1; break; #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -802,6 +803,23 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + + case KVM_PPC_ALLOCATE_HTAB: { + struct kvm *kvm = filp->private_data; + u32 htab_order; + + r = -EFAULT; + if (get_user(htab_order, (u32 __user *)argp)) + break; + r = kvmppc_alloc_reset_hpt(kvm, &htab_order); + if (r) + break; + r = -EFAULT; + if (put_user(htab_order, (u32 __user *)argp)) + break; + r = 0; + break; + } #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 09f2b3aa2da7e8b674943ac36b9bdd618f0b5611..2ce09aa7d3b3d12b3cb27fbfb2d4896390b3045f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -617,6 +617,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_SIGNAL_MSI 77 #define KVM_CAP_PPC_GET_SMMU_INFO 78 #define KVM_CAP_S390_COW 79 +#define KVM_CAP_PPC_ALLOC_HTAB 80 #ifdef KVM_CAP_IRQ_ROUTING @@ -828,6 +829,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) /* Available with KVM_CAP_PPC_GET_SMMU_INFO */ #define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info) +/* Available with KVM_CAP_PPC_ALLOC_HTAB */ +#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32) /* * ioctls for vcpu fds