提交 43d8ac22 编写于 作者: M Marc Zyngier

Merge branch kvm-arm64/pkvm-hyp-sharing into kvmarm-master/next

* kvm-arm64/pkvm-hyp-sharing:
  : .
  : Series from Quentin Perret, implementing HYP page share/unshare:
  :
  : This series implements an unshare hypercall at EL2 in nVHE
  : protected mode, and makes use of it to unmmap guest-specific
  : data-structures from EL2 stage-1 during guest tear-down.
  : Crucially, the implementation of the share and unshare
  : routines use page refcounts in the host kernel to avoid
  : accidentally unmapping data-structures that overlap a common
  : page.
  : [...]
  : .
  KVM: arm64: pkvm: Unshare guest structs during teardown
  KVM: arm64: Expose unshare hypercall to the host
  KVM: arm64: Implement do_unshare() helper for unsharing memory
  KVM: arm64: Implement __pkvm_host_share_hyp() using do_share()
  KVM: arm64: Implement do_share() helper for sharing memory
  KVM: arm64: Introduce wrappers for host and hyp spin lock accessors
  KVM: arm64: Extend pkvm_page_state enumeration to handle absent pages
  KVM: arm64: pkvm: Refcount the pages shared with EL2
  KVM: arm64: Introduce kvm_share_hyp()
  KVM: arm64: Implement kvm_pgtable_hyp_unmap() at EL2
  KVM: arm64: Hook up ->page_count() for hypervisor stage-1 page-table
  KVM: arm64: Fixup hyp stage-1 refcount
  KVM: arm64: Refcount hyp stage-1 pgtable pages
  KVM: arm64: Provide {get,put}_page() stubs for early hyp allocator
Signed-off-by: NMarc Zyngier <maz@kernel.org>
......@@ -63,6 +63,7 @@ enum __kvm_host_smccc_func {
/* Hypercalls available after pKVM finalisation */
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
......
......@@ -321,6 +321,7 @@ struct kvm_vcpu_arch {
struct kvm_guest_debug_arch external_debug_state;
struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
struct task_struct *parent_task;
struct {
/* {Break,watch}point registers */
......@@ -737,6 +738,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu);
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
{
......
......@@ -150,6 +150,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
int kvm_share_hyp(void *from, void *to);
void kvm_unshare_hyp(void *from, void *to);
int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr,
......
......@@ -251,6 +251,27 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot);
/**
* kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
* @addr: Virtual address from which to remove the mapping.
* @size: Size of the mapping.
*
* The offset of @addr within a page is ignored, @size is rounded-up to
* the next page boundary and @phys is rounded-down to the previous page
* boundary.
*
* TLB invalidation is performed for each page-table entry cleared during the
* unmapping operation and the reference count for the page-table page
* containing the cleared entry is decremented, with unreferenced pages being
* freed. The unmapping operation will stop early if it encounters either an
* invalid page-table entry or a valid block mapping which maps beyond the range
* being unmapped.
*
* Return: Number of bytes unmapped, which may be 0.
*/
u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
/**
* kvm_get_vtcr() - Helper to construct VTCR_EL2
* @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
......
......@@ -146,7 +146,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
return ret;
ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
goto out_free_stage2_pgd;
......@@ -188,6 +188,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
}
}
atomic_set(&kvm->online_vcpus, 0);
kvm_unshare_hyp(kvm, kvm + 1);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
......@@ -342,7 +344,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
return err;
return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
return kvm_share_hyp(vcpu, vcpu + 1);
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
......
......@@ -14,6 +14,19 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
{
struct task_struct *p = vcpu->arch.parent_task;
struct user_fpsimd_state *fpsimd;
if (!is_protected_kvm_enabled() || !p)
return;
fpsimd = &p->thread.uw.fpsimd_state;
kvm_unshare_hyp(fpsimd, fpsimd + 1);
put_task_struct(p);
}
/*
* Called on entry to KVM_RUN unless this vcpu previously ran at least
* once and the most recent prior KVM_RUN for this vcpu was called from
......@@ -29,12 +42,27 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
kvm_vcpu_unshare_task_fp(vcpu);
/* Make sure the host task fpsimd state is visible to hyp: */
ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP);
if (!ret)
vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
ret = kvm_share_hyp(fpsimd, fpsimd + 1);
if (ret)
return ret;
vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
/*
* We need to keep current's task_struct pinned until its data has been
* unshared with the hypervisor to make sure it is not re-used by the
* kernel and donated to someone else while already shared -- see
* kvm_vcpu_unshare_task_fp() for the matching put_task_struct().
*/
if (is_protected_kvm_enabled()) {
get_task_struct(current);
vcpu->arch.parent_task = current;
}
return ret;
return 0;
}
/*
......
......@@ -24,6 +24,11 @@ enum pkvm_page_state {
PKVM_PAGE_OWNED = 0ULL,
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
__PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
KVM_PGTABLE_PROT_SW1,
/* Meta-states which aren't encoded directly in the PTE's SW bits */
PKVM_NOPAGE,
};
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
......@@ -50,6 +55,7 @@ extern const u8 pkvm_hyp_id;
int __pkvm_prot_finalize(void);
int __pkvm_host_share_hyp(u64 pfn);
int __pkvm_host_unshare_hyp(u64 pfn);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
......
......@@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg)
return hyp_early_alloc_contig(1);
}
static void hyp_early_alloc_get_page(void *addr) { }
static void hyp_early_alloc_put_page(void *addr) { }
void hyp_early_alloc_init(void *virt, unsigned long size)
{
base = cur = (unsigned long)virt;
......@@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size)
hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page;
hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page;
}
......@@ -147,6 +147,13 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
}
static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
}
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
......@@ -184,6 +191,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_prot_finalize),
HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__pkvm_host_unshare_hyp),
HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
......
......@@ -28,6 +28,26 @@ static struct hyp_pool host_s2_pool;
const u8 pkvm_hyp_id = 1;
static void host_lock_component(void)
{
hyp_spin_lock(&host_kvm.lock);
}
static void host_unlock_component(void)
{
hyp_spin_unlock(&host_kvm.lock);
}
static void hyp_lock_component(void)
{
hyp_spin_lock(&pkvm_pgd_lock);
}
static void hyp_unlock_component(void)
{
hyp_spin_unlock(&pkvm_pgd_lock);
}
static void *host_s2_zalloc_pages_exact(size_t size)
{
void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
......@@ -339,116 +359,446 @@ static int host_stage2_idmap(u64 addr)
prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
hyp_spin_lock(&host_kvm.lock);
host_lock_component();
ret = host_stage2_adjust_range(addr, &range);
if (ret)
goto unlock;
ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
unlock:
hyp_spin_unlock(&host_kvm.lock);
host_unlock_component();
return ret;
}
static inline bool check_prot(enum kvm_pgtable_prot prot,
enum kvm_pgtable_prot required,
enum kvm_pgtable_prot denied)
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
{
return (prot & (required | denied)) == required;
struct kvm_vcpu_fault_info fault;
u64 esr, addr;
int ret = 0;
esr = read_sysreg_el2(SYS_ESR);
BUG_ON(!__get_fault_info(esr, &fault));
addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
ret = host_stage2_idmap(addr);
BUG_ON(ret && ret != -EAGAIN);
}
int __pkvm_host_share_hyp(u64 pfn)
/* This corresponds to locking order */
enum pkvm_component_id {
PKVM_ID_HOST,
PKVM_ID_HYP,
};
struct pkvm_mem_transition {
u64 nr_pages;
struct {
enum pkvm_component_id id;
/* Address in the initiator's address space */
u64 addr;
union {
struct {
/* Address in the completer's address space */
u64 completer_addr;
} host;
};
} initiator;
struct {
enum pkvm_component_id id;
} completer;
};
struct pkvm_mem_share {
const struct pkvm_mem_transition tx;
const enum kvm_pgtable_prot completer_prot;
};
struct check_walk_data {
enum pkvm_page_state desired;
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
};
static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
phys_addr_t addr = hyp_pfn_to_phys(pfn);
enum kvm_pgtable_prot prot, cur;
void *virt = __hyp_va(addr);
enum pkvm_page_state state;
kvm_pte_t pte;
int ret;
struct check_walk_data *d = arg;
kvm_pte_t pte = *ptep;
if (!addr_is_memory(addr))
if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
return -EINVAL;
hyp_spin_lock(&host_kvm.lock);
hyp_spin_lock(&pkvm_pgd_lock);
return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
}
static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct check_walk_data *data)
{
struct kvm_pgtable_walker walker = {
.cb = __check_page_state_visitor,
.arg = data,
.flags = KVM_PGTABLE_WALK_LEAF,
};
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
{
if (!kvm_pte_valid(pte) && pte)
return PKVM_NOPAGE;
return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
}
static int __host_check_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
struct check_walk_data d = {
.desired = state,
.get_page_state = host_get_page_state,
};
hyp_assert_lock_held(&host_kvm.lock);
return check_page_state_range(&host_kvm.pgt, addr, size, &d);
}
static int __host_set_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
return host_stage2_idmap_locked(addr, size, prot);
}
static int host_request_owned_transition(u64 *completer_addr,
const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
u64 addr = tx->initiator.addr;
*completer_addr = tx->initiator.host.completer_addr;
return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
}
static int host_request_unshare(u64 *completer_addr,
const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
u64 addr = tx->initiator.addr;
*completer_addr = tx->initiator.host.completer_addr;
return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
}
static int host_initiate_share(u64 *completer_addr,
const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
u64 addr = tx->initiator.addr;
*completer_addr = tx->initiator.host.completer_addr;
return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
}
static int host_initiate_unshare(u64 *completer_addr,
const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
u64 addr = tx->initiator.addr;
*completer_addr = tx->initiator.host.completer_addr;
return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
}
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
{
if (!kvm_pte_valid(pte))
return PKVM_NOPAGE;
return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
}
static int __hyp_check_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
struct check_walk_data d = {
.desired = state,
.get_page_state = hyp_get_page_state,
};
hyp_assert_lock_held(&pkvm_pgd_lock);
return check_page_state_range(&pkvm_pgtable, addr, size, &d);
}
static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
{
return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
tx->initiator.id != PKVM_ID_HOST);
}
static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
enum kvm_pgtable_prot perms)
{
u64 size = tx->nr_pages * PAGE_SIZE;
if (perms != PAGE_HYP)
return -EPERM;
if (__hyp_ack_skip_pgtable_check(tx))
return 0;
return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
}
static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
if (__hyp_ack_skip_pgtable_check(tx))
return 0;
return __hyp_check_page_state_range(addr, size,
PKVM_PAGE_SHARED_BORROWED);
}
static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
enum kvm_pgtable_prot perms)
{
void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
enum kvm_pgtable_prot prot;
prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
return pkvm_create_mappings_locked(start, end, prot);
}
static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
return (ret != size) ? -EFAULT : 0;
}
static int check_share(struct pkvm_mem_share *share)
{
const struct pkvm_mem_transition *tx = &share->tx;
u64 completer_addr;
int ret;
switch (tx->initiator.id) {
case PKVM_ID_HOST:
ret = host_request_owned_transition(&completer_addr, tx);
break;
default:
ret = -EINVAL;
}
ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
if (ret)
goto unlock;
if (!pte)
goto map_shared;
return ret;
/*
* Check attributes in the host stage-2 PTE. We need the page to be:
* - mapped RWX as we're sharing memory;
* - not borrowed, as that implies absence of ownership.
* Otherwise, we can't let it got through
*/
cur = kvm_pgtable_stage2_pte_prot(pte);
prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
ret = -EPERM;
goto unlock;
switch (tx->completer.id) {
case PKVM_ID_HYP:
ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
break;
default:
ret = -EINVAL;
}
state = pkvm_getstate(cur);
if (state == PKVM_PAGE_OWNED)
goto map_shared;
return ret;
}
/*
* Tolerate double-sharing the same page, but this requires
* cross-checking the hypervisor stage-1.
*/
if (state != PKVM_PAGE_SHARED_OWNED) {
ret = -EPERM;
goto unlock;
static int __do_share(struct pkvm_mem_share *share)
{
const struct pkvm_mem_transition *tx = &share->tx;
u64 completer_addr;
int ret;
switch (tx->initiator.id) {
case PKVM_ID_HOST:
ret = host_initiate_share(&completer_addr, tx);
break;
default:
ret = -EINVAL;
}
ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
if (ret)
goto unlock;
return ret;
/*
* If the page has been shared with the hypervisor, it must be
* already mapped as SHARED_BORROWED in its stage-1.
*/
cur = kvm_pgtable_hyp_pte_prot(pte);
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
if (!check_prot(cur, prot, ~prot))
ret = -EPERM;
goto unlock;
switch (tx->completer.id) {
case PKVM_ID_HYP:
ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
break;
default:
ret = -EINVAL;
}
map_shared:
/*
* If the page is not yet shared, adjust mappings in both page-tables
* while both locks are held.
*/
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
BUG_ON(ret);
return ret;
}
prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
BUG_ON(ret);
/*
* do_share():
*
* The page owner grants access to another component with a given set
* of permissions.
*
* Initiator: OWNED => SHARED_OWNED
* Completer: NOPAGE => SHARED_BORROWED
*/
static int do_share(struct pkvm_mem_share *share)
{
int ret;
unlock:
hyp_spin_unlock(&pkvm_pgd_lock);
hyp_spin_unlock(&host_kvm.lock);
ret = check_share(share);
if (ret)
return ret;
return WARN_ON(__do_share(share));
}
static int check_unshare(struct pkvm_mem_share *share)
{
const struct pkvm_mem_transition *tx = &share->tx;
u64 completer_addr;
int ret;
switch (tx->initiator.id) {
case PKVM_ID_HOST:
ret = host_request_unshare(&completer_addr, tx);
break;
default:
ret = -EINVAL;
}
if (ret)
return ret;
switch (tx->completer.id) {
case PKVM_ID_HYP:
ret = hyp_ack_unshare(completer_addr, tx);
break;
default:
ret = -EINVAL;
}
return ret;
}
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
static int __do_unshare(struct pkvm_mem_share *share)
{
struct kvm_vcpu_fault_info fault;
u64 esr, addr;
int ret = 0;
const struct pkvm_mem_transition *tx = &share->tx;
u64 completer_addr;
int ret;
esr = read_sysreg_el2(SYS_ESR);
BUG_ON(!__get_fault_info(esr, &fault));
switch (tx->initiator.id) {
case PKVM_ID_HOST:
ret = host_initiate_unshare(&completer_addr, tx);
break;
default:
ret = -EINVAL;
}
addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
ret = host_stage2_idmap(addr);
BUG_ON(ret && ret != -EAGAIN);
if (ret)
return ret;
switch (tx->completer.id) {
case PKVM_ID_HYP:
ret = hyp_complete_unshare(completer_addr, tx);
break;
default:
ret = -EINVAL;
}
return ret;
}
/*
* do_unshare():
*
* The page owner revokes access from another component for a range of
* pages which were previously shared using do_share().
*
* Initiator: SHARED_OWNED => OWNED
* Completer: SHARED_BORROWED => NOPAGE
*/
static int do_unshare(struct pkvm_mem_share *share)
{
int ret;
ret = check_unshare(share);
if (ret)
return ret;
return WARN_ON(__do_unshare(share));
}
int __pkvm_host_share_hyp(u64 pfn)
{
int ret;
u64 host_addr = hyp_pfn_to_phys(pfn);
u64 hyp_addr = (u64)__hyp_va(host_addr);
struct pkvm_mem_share share = {
.tx = {
.nr_pages = 1,
.initiator = {
.id = PKVM_ID_HOST,
.addr = host_addr,
.host = {
.completer_addr = hyp_addr,
},
},
.completer = {
.id = PKVM_ID_HYP,
},
},
.completer_prot = PAGE_HYP,
};
host_lock_component();
hyp_lock_component();
ret = do_share(&share);
hyp_unlock_component();
host_unlock_component();
return ret;
}
int __pkvm_host_unshare_hyp(u64 pfn)
{
int ret;
u64 host_addr = hyp_pfn_to_phys(pfn);
u64 hyp_addr = (u64)__hyp_va(host_addr);
struct pkvm_mem_share share = {
.tx = {
.nr_pages = 1,
.initiator = {
.id = PKVM_ID_HOST,
.addr = host_addr,
.host = {
.completer_addr = hyp_addr,
},
},
.completer = {
.id = PKVM_ID_HYP,
},
},
.completer_prot = PAGE_HYP,
};
host_lock_component();
hyp_lock_component();
ret = do_unshare(&share);
hyp_unlock_component();
host_unlock_component();
return ret;
}
......@@ -166,6 +166,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
struct kvm_pgtable_mm_ops *mm_ops = arg;
enum kvm_pgtable_prot prot;
enum pkvm_page_state state;
kvm_pte_t pte = *ptep;
......@@ -174,6 +175,15 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
if (!kvm_pte_valid(pte))
return 0;
/*
* Fix-up the refcount for the page-table pages as the early allocator
* was unable to access the hyp_vmemmap and so the buddy allocator has
* initialised the refcount to '1'.
*/
mm_ops->get_page(ptep);
if (flag != KVM_PGTABLE_WALK_LEAF)
return 0;
if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
......@@ -206,7 +216,8 @@ static int finalize_host_mappings(void)
{
struct kvm_pgtable_walker walker = {
.cb = finalize_host_mappings_walker,
.flags = KVM_PGTABLE_WALK_LEAF,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pkvm_pgtable.mm_ops,
};
int i, ret;
......@@ -241,19 +252,20 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
ret = finalize_host_mappings();
if (ret)
goto out;
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.get_page = hpool_get_page,
.put_page = hpool_put_page,
.page_count = hyp_page_count,
};
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
ret = finalize_host_mappings();
if (ret)
goto out;
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
......
......@@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
return prot;
}
static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
{
/*
* Tolerate KVM recreating the exact same mapping, or changing software
* bits if the existing mapping was valid.
*/
if (old == new)
return false;
if (!kvm_pte_valid(old))
return true;
return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
}
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data)
{
......@@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
data->phys += granule;
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
if (hyp_pte_needs_update(old, new))
smp_store_release(ptep, new);
if (old == new)
return true;
if (!kvm_pte_valid(old))
data->mm_ops->get_page(ptep);
else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
return false;
data->phys += granule;
smp_store_release(ptep, new);
return true;
}
......@@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
return -ENOMEM;
kvm_set_table_pte(ptep, childp, mm_ops);
mm_ops->get_page(ptep);
return 0;
}
......@@ -460,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
return ret;
}
struct hyp_unmap_data {
u64 unmapped;
struct kvm_pgtable_mm_ops *mm_ops;
};
static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
kvm_pte_t pte = *ptep, *childp = NULL;
u64 granule = kvm_granule_size(level);
struct hyp_unmap_data *data = arg;
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
if (!kvm_pte_valid(pte))
return -EINVAL;
if (kvm_pte_table(pte, level)) {
childp = kvm_pte_follow(pte, mm_ops);
if (mm_ops->page_count(childp) != 1)
return 0;
kvm_clear_pte(ptep);
dsb(ishst);
__tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
} else {
if (end - addr < granule)
return -EINVAL;
kvm_clear_pte(ptep);
dsb(ishst);
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
data->unmapped += granule;
}
dsb(ish);
isb();
mm_ops->put_page(ptep);
if (childp)
mm_ops->put_page(childp);
return 0;
}
u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
struct hyp_unmap_data unmap_data = {
.mm_ops = pgt->mm_ops,
};
struct kvm_pgtable_walker walker = {
.cb = hyp_unmap_walker,
.arg = &unmap_data,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
};
if (!pgt->mm_ops->page_count)
return 0;
kvm_pgtable_walk(pgt, addr, size, &walker);
return unmap_data.unmapped;
}
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
struct kvm_pgtable_mm_ops *mm_ops)
{
......@@ -482,8 +536,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
struct kvm_pgtable_mm_ops *mm_ops = arg;
kvm_pte_t pte = *ptep;
if (!kvm_pte_valid(pte))
return 0;
mm_ops->put_page(ptep);
if (kvm_pte_table(pte, level))
mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops));
return 0;
}
......@@ -491,7 +553,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
{
struct kvm_pgtable_walker walker = {
.cb = hyp_free_walker,
.flags = KVM_PGTABLE_WALK_TABLE_POST,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pgt->mm_ops,
};
......
......@@ -284,14 +284,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
}
}
static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
struct hyp_shared_pfn {
u64 pfn;
int count;
struct rb_node node;
};
static DEFINE_MUTEX(hyp_shared_pfns_lock);
static struct rb_root hyp_shared_pfns = RB_ROOT;
static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node,
struct rb_node **parent)
{
phys_addr_t addr;
struct hyp_shared_pfn *this;
*node = &hyp_shared_pfns.rb_node;
*parent = NULL;
while (**node) {
this = container_of(**node, struct hyp_shared_pfn, node);
*parent = **node;
if (this->pfn < pfn)
*node = &((**node)->rb_left);
else if (this->pfn > pfn)
*node = &((**node)->rb_right);
else
return this;
}
return NULL;
}
static int share_pfn_hyp(u64 pfn)
{
struct rb_node **node, *parent;
struct hyp_shared_pfn *this;
int ret = 0;
mutex_lock(&hyp_shared_pfns_lock);
this = find_shared_pfn(pfn, &node, &parent);
if (this) {
this->count++;
goto unlock;
}
this = kzalloc(sizeof(*this), GFP_KERNEL);
if (!this) {
ret = -ENOMEM;
goto unlock;
}
this->pfn = pfn;
this->count = 1;
rb_link_node(&this->node, parent, node);
rb_insert_color(&this->node, &hyp_shared_pfns);
ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1);
unlock:
mutex_unlock(&hyp_shared_pfns_lock);
return ret;
}
static int unshare_pfn_hyp(u64 pfn)
{
struct rb_node **node, *parent;
struct hyp_shared_pfn *this;
int ret = 0;
mutex_lock(&hyp_shared_pfns_lock);
this = find_shared_pfn(pfn, &node, &parent);
if (WARN_ON(!this)) {
ret = -ENOENT;
goto unlock;
}
this->count--;
if (this->count)
goto unlock;
rb_erase(&this->node, &hyp_shared_pfns);
kfree(this);
ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1);
unlock:
mutex_unlock(&hyp_shared_pfns_lock);
return ret;
}
int kvm_share_hyp(void *from, void *to)
{
phys_addr_t start, end, cur;
u64 pfn;
int ret;
for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
__phys_to_pfn(addr));
if (is_kernel_in_hyp_mode())
return 0;
/*
* The share hcall maps things in the 'fixed-offset' region of the hyp
* VA space, so we can only share physically contiguous data-structures
* for now.
*/
if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to))
return -EINVAL;
if (kvm_host_owns_hyp_mappings())
return create_hyp_mappings(from, to, PAGE_HYP);
start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
end = PAGE_ALIGN(__pa(to));
for (cur = start; cur < end; cur += PAGE_SIZE) {
pfn = __phys_to_pfn(cur);
ret = share_pfn_hyp(pfn);
if (ret)
return ret;
}
......@@ -299,6 +402,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
return 0;
}
void kvm_unshare_hyp(void *from, void *to)
{
phys_addr_t start, end, cur;
u64 pfn;
if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from)
return;
start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
end = PAGE_ALIGN(__pa(to));
for (cur = start; cur < end; cur += PAGE_SIZE) {
pfn = __phys_to_pfn(cur);
WARN_ON(unshare_pfn_hyp(pfn));
}
}
/**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
......@@ -319,12 +438,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
if (is_kernel_in_hyp_mode())
return 0;
if (!kvm_host_owns_hyp_mappings()) {
if (WARN_ON(prot != PAGE_HYP))
return -EPERM;
return pkvm_share_hyp(kvm_kaddr_to_phys(from),
kvm_kaddr_to_phys(to));
}
if (!kvm_host_owns_hyp_mappings())
return -EPERM;
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);
......
......@@ -113,7 +113,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
if (!buf)
return -ENOMEM;
ret = create_hyp_mappings(buf, buf + reg_sz, PAGE_HYP);
ret = kvm_share_hyp(buf, buf + reg_sz);
if (ret) {
kfree(buf);
return ret;
......@@ -150,7 +150,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
kfree(vcpu->arch.sve_state);
void *sve_state = vcpu->arch.sve_state;
kvm_vcpu_unshare_task_fp(vcpu);
kvm_unshare_hyp(vcpu, vcpu + 1);
if (sve_state)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
kfree(sve_state);
}
static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册