diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index a0b17f9f1ea4e5c40e1b2d7a2c6e1aa913759ea5..c95c6518bf27079a729cb1026a54a0b78b8b6bbb 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -322,6 +322,11 @@ #define H_GET_24X7_DATA 0xF07C #define H_GET_PERF_COUNTER_INFO 0xF080 +/* Platform-specific hcalls used for nested HV KVM */ +#define H_SET_PARTITION_TABLE 0xF800 +#define H_ENTER_NESTED 0xF804 +#define H_TLB_INVALIDATE 0xF808 + /* Values for 2nd argument to H_SET_MODE */ #define H_SET_MODE_RESOURCE_SET_CIABR 1 #define H_SET_MODE_RESOURCE_SET_DAWR 2 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 91c977948828155c09e598d61e5df35b28ac5a27..43f212e38b89e86f8b2fed6274d9bf83acb2d4a6 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -274,6 +274,13 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {} static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {} #endif +long kvmhv_nested_init(void); +void kvmhv_nested_exit(void); +void kvmhv_vm_nested_init(struct kvm *kvm); +long kvmhv_set_partition_table(struct kvm_vcpu *vcpu); +void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1); +void kvmhv_release_all_nested(struct kvm *kvm); + void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); extern int kvm_irq_bypass; @@ -387,9 +394,6 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu); /* TO = 31 for unconditional trap */ #define INS_TW 0x7fe00008 -/* LPIDs we support with this build -- runtime limit may be lower */ -#define KVMPPC_NR_LPIDS (LPID_RSVD + 1) - #define SPLIT_HACK_MASK 0xff000000 #define SPLIT_HACK_OFFS 0xfb000000 diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 5c0e2d9a7e15149dd1354fd75036a7d7e73daabf..6d67b6a9e78468b35f0d29f507ec6966b58dfdcb 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -23,6 +23,39 @@ #include #include #include +#include + +#ifdef CONFIG_PPC_PSERIES +static inline bool kvmhv_on_pseries(void) +{ + return !cpu_has_feature(CPU_FTR_HVMODE); +} +#else +static inline bool kvmhv_on_pseries(void) +{ + return false; +} +#endif + +/* + * Structure for a nested guest, that is, for a guest that is managed by + * one of our guests. + */ +struct kvm_nested_guest { + struct kvm *l1_host; /* L1 VM that owns this nested guest */ + int l1_lpid; /* lpid L1 guest thinks this guest is */ + int shadow_lpid; /* real lpid of this nested guest */ + pgd_t *shadow_pgtable; /* our page table for this guest */ + u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */ + u64 process_table; /* process table entry for this guest */ + long refcnt; /* number of pointers to this struct */ + struct mutex tlb_lock; /* serialize page faults and tlbies */ + struct kvm_nested_guest *next; +}; + +struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid, + bool create); +void kvmhv_put_nested(struct kvm_nested_guest *gp); /* Power architecture requires HPT is at least 256kiB, at most 64TiB */ #define PPC_MIN_HPT_ORDER 18 diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index d978fdf698af2ad5e89a4243e7bf2efe3e4e15bb..eb3ba6390108215c2a0c628c43a27266b1444ff5 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -25,6 +25,9 @@ #define XICS_MFRR 0xc #define XICS_IPI 2 /* interrupt source # for IPIs */ +/* LPIDs we support with this build -- runtime limit may be lower */ +#define KVMPPC_NR_LPIDS (LPID_RSVD + 1) + /* Maximum number of threads per physical core */ #define MAX_SMT_THREADS 8 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index c9cc42f73b3c57b92a74a8214582266d4917c270..c35d4f2c4d908adf1053addce1f4e5cdd9bc51ba 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #include /* for MAX_SMT_THREADS */ #define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES) +#define KVM_MAX_NESTED_GUESTS KVMPPC_NR_LPIDS #else #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS @@ -287,6 +288,7 @@ struct kvm_arch { u8 radix; u8 fwnmi_enabled; bool threads_indep; + bool nested_enable; pgd_t *pgtable; u64 process_table; struct dentry *debugfs_dir; @@ -312,6 +314,9 @@ struct kvm_arch { #endif struct kvmppc_ops *kvm_ops; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + u64 l1_ptcr; + int max_nested_lpid; + struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS]; /* This array can grow quite large, keep it at the end */ struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; #endif diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index f872c04bb5b1bb1185a7fcc9df1597540d3d3120..e814f40ab836e3bfc66f9e529299fb8202af2f6a 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -75,7 +75,8 @@ kvm-hv-y += \ book3s_hv.o \ book3s_hv_interrupts.o \ book3s_64_mmu_hv.o \ - book3s_64_mmu_radix.o + book3s_64_mmu_radix.o \ + book3s_hv_nested.o kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ book3s_hv_tm.o diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8c20a90a68519737677ccf97865850fc57f64064..d8fc49effab0b9ce0a6336eaa4b01b34254b5560 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -934,6 +934,19 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (ret == H_TOO_HARD) return RESUME_HOST; break; + + case H_SET_PARTITION_TABLE: + ret = H_FUNCTION; + if (vcpu->kvm->arch.nested_enable) + ret = kvmhv_set_partition_table(vcpu); + break; + case H_ENTER_NESTED: + ret = H_FUNCTION; + break; + case H_TLB_INVALIDATE: + ret = H_FUNCTION; + break; + default: return RESUME_HOST; } @@ -4157,8 +4170,7 @@ void kvmppc_setup_partition_table(struct kvm *kvm) __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE; dw1 = PATB_GR | kvm->arch.process_table; } - - mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); + kvmhv_set_ptbl_entry(kvm->arch.lpid, dw0, dw1); } /* @@ -4254,6 +4266,10 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) /* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) { + if (kvm->arch.nested_enable) { + kvm->arch.nested_enable = false; + kvmhv_release_all_nested(kvm); + } kvmppc_free_radix(kvm); kvmppc_update_lpcr(kvm, LPCR_VPM1, LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); @@ -4374,6 +4390,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvmppc_alloc_host_rm_ops(); + kvmhv_vm_nested_init(kvm); + /* * Since we don't flush the TLB when tearing down a VM, * and this lpid might have previously been used, @@ -4517,8 +4535,10 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) /* Perform global invalidation and return lpid to the pool */ if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (kvm->arch.nested_enable) + kvmhv_release_all_nested(kvm); kvm->arch.process_table = 0; - kvmppc_setup_partition_table(kvm); + kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); } kvmppc_free_lpid(kvm->arch.lpid); @@ -4989,6 +5009,10 @@ static int kvmppc_book3s_init_hv(void) if (r < 0) return -ENODEV; + r = kvmhv_nested_init(); + if (r) + return r; + r = kvm_init_subcore_bitmap(); if (r) return r; @@ -5047,6 +5071,7 @@ static void kvmppc_book3s_exit_hv(void) if (kvmppc_radix_possible()) kvmppc_radix_exit(); kvmppc_hv_ops = NULL; + kvmhv_nested_exit(); } module_init(kvmppc_book3s_init_hv); diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c new file mode 100644 index 0000000000000000000000000000000000000000..327826248314d75f07750b6ce97b7604b7546454 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corporation, 2018 + * Authors Suraj Jitindar Singh + * Paul Mackerras + * + * Description: KVM functions specific to running nested KVM-HV guests + * on Book3S processors (specifically POWER9 and later). + */ + +#include +#include + +#include +#include +#include +#include + +static struct patb_entry *pseries_partition_tb; + +static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp); + +long kvmhv_nested_init(void) +{ + long int ptb_order; + unsigned long ptcr; + long rc; + + if (!kvmhv_on_pseries()) + return 0; + if (!radix_enabled()) + return -ENODEV; + + /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */ + ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1; + if (ptb_order < 8) + ptb_order = 8; + pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order, + GFP_KERNEL); + if (!pseries_partition_tb) { + pr_err("kvm-hv: failed to allocated nested partition table\n"); + return -ENOMEM; + } + + ptcr = __pa(pseries_partition_tb) | (ptb_order - 8); + rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr); + if (rc != H_SUCCESS) { + pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n", + rc); + kfree(pseries_partition_tb); + pseries_partition_tb = NULL; + return -ENODEV; + } + + return 0; +} + +void kvmhv_nested_exit(void) +{ + /* + * N.B. the kvmhv_on_pseries() test is there because it enables + * the compiler to remove the call to plpar_hcall_norets() + * when CONFIG_PPC_PSERIES=n. + */ + if (kvmhv_on_pseries() && pseries_partition_tb) { + plpar_hcall_norets(H_SET_PARTITION_TABLE, 0); + kfree(pseries_partition_tb); + pseries_partition_tb = NULL; + } +} + +void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1) +{ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + mmu_partition_table_set_entry(lpid, dw0, dw1); + } else { + pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0); + pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1); + } +} + +static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp) +{ + unsigned long dw0; + + dw0 = PATB_HR | radix__get_tree_size() | + __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE; + kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table); +} + +void kvmhv_vm_nested_init(struct kvm *kvm) +{ + kvm->arch.max_nested_lpid = -1; +} + +/* + * Handle the H_SET_PARTITION_TABLE hcall. + * r4 = guest real address of partition table + log_2(size) - 12 + * (formatted as for the PTCR). + */ +long kvmhv_set_partition_table(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long ptcr = kvmppc_get_gpr(vcpu, 4); + int srcu_idx; + long ret = H_SUCCESS; + + srcu_idx = srcu_read_lock(&kvm->srcu); + /* + * Limit the partition table to 4096 entries (because that's what + * hardware supports), and check the base address. + */ + if ((ptcr & PRTS_MASK) > 12 - 8 || + !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT)) + ret = H_PARAMETER; + srcu_read_unlock(&kvm->srcu, srcu_idx); + if (ret == H_SUCCESS) + kvm->arch.l1_ptcr = ptcr; + return ret; +} + +/* + * Reload the partition table entry for a guest. + * Caller must hold gp->tlb_lock. + */ +static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) +{ + int ret; + struct patb_entry ptbl_entry; + unsigned long ptbl_addr; + struct kvm *kvm = gp->l1_host; + + ret = -EFAULT; + ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); + if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) + ret = kvm_read_guest(kvm, ptbl_addr, + &ptbl_entry, sizeof(ptbl_entry)); + if (ret) { + gp->l1_gr_to_hr = 0; + gp->process_table = 0; + } else { + gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0); + gp->process_table = be64_to_cpu(ptbl_entry.patb1); + } + kvmhv_set_nested_ptbl(gp); +} + +struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) +{ + struct kvm_nested_guest *gp; + long shadow_lpid; + + gp = kzalloc(sizeof(*gp), GFP_KERNEL); + if (!gp) + return NULL; + gp->l1_host = kvm; + gp->l1_lpid = lpid; + mutex_init(&gp->tlb_lock); + gp->shadow_pgtable = pgd_alloc(kvm->mm); + if (!gp->shadow_pgtable) + goto out_free; + shadow_lpid = kvmppc_alloc_lpid(); + if (shadow_lpid < 0) + goto out_free2; + gp->shadow_lpid = shadow_lpid; + + return gp; + + out_free2: + pgd_free(kvm->mm, gp->shadow_pgtable); + out_free: + kfree(gp); + return NULL; +} + +/* + * Free up any resources allocated for a nested guest. + */ +static void kvmhv_release_nested(struct kvm_nested_guest *gp) +{ + kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0); + kvmppc_free_lpid(gp->shadow_lpid); + if (gp->shadow_pgtable) + pgd_free(gp->l1_host->mm, gp->shadow_pgtable); + kfree(gp); +} + +static void kvmhv_remove_nested(struct kvm_nested_guest *gp) +{ + struct kvm *kvm = gp->l1_host; + int lpid = gp->l1_lpid; + long ref; + + spin_lock(&kvm->mmu_lock); + if (gp == kvm->arch.nested_guests[lpid]) { + kvm->arch.nested_guests[lpid] = NULL; + if (lpid == kvm->arch.max_nested_lpid) { + while (--lpid >= 0 && !kvm->arch.nested_guests[lpid]) + ; + kvm->arch.max_nested_lpid = lpid; + } + --gp->refcnt; + } + ref = gp->refcnt; + spin_unlock(&kvm->mmu_lock); + if (ref == 0) + kvmhv_release_nested(gp); +} + +/* + * Free up all nested resources allocated for this guest. + * This is called with no vcpus of the guest running, when + * switching the guest to HPT mode or when destroying the + * guest. + */ +void kvmhv_release_all_nested(struct kvm *kvm) +{ + int i; + struct kvm_nested_guest *gp; + struct kvm_nested_guest *freelist = NULL; + + spin_lock(&kvm->mmu_lock); + for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { + gp = kvm->arch.nested_guests[i]; + if (!gp) + continue; + kvm->arch.nested_guests[i] = NULL; + if (--gp->refcnt == 0) { + gp->next = freelist; + freelist = gp; + } + } + kvm->arch.max_nested_lpid = -1; + spin_unlock(&kvm->mmu_lock); + while ((gp = freelist) != NULL) { + freelist = gp->next; + kvmhv_release_nested(gp); + } +} + +/* caller must hold gp->tlb_lock */ +void kvmhv_flush_nested(struct kvm_nested_guest *gp) +{ + kvmhv_update_ptbl_cache(gp); + if (gp->l1_gr_to_hr == 0) + kvmhv_remove_nested(gp); +} + +struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid, + bool create) +{ + struct kvm_nested_guest *gp, *newgp; + + if (l1_lpid >= KVM_MAX_NESTED_GUESTS || + l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) + return NULL; + + spin_lock(&kvm->mmu_lock); + gp = kvm->arch.nested_guests[l1_lpid]; + if (gp) + ++gp->refcnt; + spin_unlock(&kvm->mmu_lock); + + if (gp || !create) + return gp; + + newgp = kvmhv_alloc_nested(kvm, l1_lpid); + if (!newgp) + return NULL; + spin_lock(&kvm->mmu_lock); + if (kvm->arch.nested_guests[l1_lpid]) { + /* someone else beat us to it */ + gp = kvm->arch.nested_guests[l1_lpid]; + } else { + kvm->arch.nested_guests[l1_lpid] = newgp; + ++newgp->refcnt; + gp = newgp; + newgp = NULL; + if (l1_lpid > kvm->arch.max_nested_lpid) + kvm->arch.max_nested_lpid = l1_lpid; + } + ++gp->refcnt; + spin_unlock(&kvm->mmu_lock); + + if (newgp) + kvmhv_release_nested(newgp); + + return gp; +} + +void kvmhv_put_nested(struct kvm_nested_guest *gp) +{ + struct kvm *kvm = gp->l1_host; + long ref; + + spin_lock(&kvm->mmu_lock); + ref = --gp->refcnt; + spin_unlock(&kvm->mmu_lock); + if (ref == 0) + kvmhv_release_nested(gp); +}