diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 4ae19247837b4dd87f8d8c34bfe39867bb47041b..eea2e2b0acaaf651924021d2f2aad02bace2313c 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -238,6 +238,26 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, void *mc); +/** + * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to + * track ownership. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Base intermediate physical address to annotate. + * @size: Size of the annotated range. + * @mc: Cache of pre-allocated and zeroed memory from which to allocate + * page-table pages. + * @owner_id: Unique identifier for the owner of the page. + * + * By default, all page-tables are owned by identifier 0. This function can be + * used to mark portions of the IPA space as owned by other entities. When a + * stage 2 is used with identity-mappings, these annotations allow to use the + * page-table data structure as a simple rmap. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, + void *mc, u8 owner_id); + /** * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0a674010afb60d5b2afe3db109f11233747a237a..eefb226e1d1ebcd208172ff80ba907a56a339809 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -48,6 +48,9 @@ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ KVM_PTE_LEAF_ATTR_HI_S2_XN) +#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56) +#define KVM_MAX_OWNER_ID 1 + struct kvm_pgtable_walk_data { struct kvm_pgtable *pgt; struct kvm_pgtable_walker *walker; @@ -67,6 +70,13 @@ static u64 kvm_granule_size(u32 level) return BIT(kvm_granule_shift(level)); } +#define KVM_PHYS_INVALID (-1ULL) + +static bool kvm_phys_is_valid(u64 phys) +{ + return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX)); +} + static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) { u64 granule = kvm_granule_size(level); @@ -81,7 +91,10 @@ static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) if (granule > (end - addr)) return false; - return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule); + if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule)) + return false; + + return IS_ALIGNED(addr, granule); } static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) @@ -186,6 +199,11 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) return pte; } +static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id) +{ + return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id); +} + static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag) @@ -441,6 +459,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) struct stage2_map_data { u64 phys; kvm_pte_t attr; + u8 owner_id; kvm_pte_t *anchor; kvm_pte_t *childp; @@ -507,6 +526,39 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, return 0; } +static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new) +{ + if (!kvm_pte_valid(old) || !kvm_pte_valid(new)) + return true; + + return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS)); +} + +static bool stage2_pte_is_counted(kvm_pte_t pte) +{ + /* + * The refcount tracks valid entries as well as invalid entries if they + * encode ownership of a page to another entity than the page-table + * owner, whose id is 0. + */ + return !!pte; +} + +static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, + u32 level, struct kvm_pgtable_mm_ops *mm_ops) +{ + /* + * Clear the existing PTE, and perform break-before-make with + * TLB maintenance if it was valid. + */ + if (kvm_pte_valid(*ptep)) { + kvm_clear_pte(ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); + } + + mm_ops->put_page(ptep); +} + static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) @@ -518,29 +570,29 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, phys, level)) return -E2BIG; - new = kvm_init_valid_leaf_pte(phys, data->attr, level); - if (kvm_pte_valid(old)) { + if (kvm_phys_is_valid(phys)) + new = kvm_init_valid_leaf_pte(phys, data->attr, level); + else + new = kvm_init_invalid_leaf_owner(data->owner_id); + + if (stage2_pte_is_counted(old)) { /* * Skip updating the PTE if we are trying to recreate the exact * same mapping or only change the access permissions. Instead, * the vCPU will exit one more time from guest if still needed * and then go through the path of relaxing permissions. */ - if (!((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS))) + if (!stage2_pte_needs_update(old, new)) return -EAGAIN; - /* - * There's an existing different valid leaf entry, so perform - * break-before-make. - */ - kvm_clear_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); - mm_ops->put_page(ptep); + stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); } smp_store_release(ptep, new); - mm_ops->get_page(ptep); - data->phys += granule; + if (stage2_pte_is_counted(new)) + mm_ops->get_page(ptep); + if (kvm_phys_is_valid(phys)) + data->phys += granule; return 0; } @@ -575,7 +627,7 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, int ret; if (data->anchor) { - if (kvm_pte_valid(pte)) + if (stage2_pte_is_counted(pte)) mm_ops->put_page(ptep); return 0; @@ -600,11 +652,8 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * a table. Accesses beyond 'end' that fall within the new table * will be mapped lazily. */ - if (kvm_pte_valid(pte)) { - kvm_clear_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); - mm_ops->put_page(ptep); - } + if (stage2_pte_is_counted(pte)) + stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); kvm_set_table_pte(ptep, childp, mm_ops); mm_ops->get_page(ptep); @@ -702,6 +751,32 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, + void *mc, u8 owner_id) +{ + int ret; + struct stage2_map_data map_data = { + .phys = KVM_PHYS_INVALID, + .mmu = pgt->mmu, + .memcache = mc, + .mm_ops = pgt->mm_ops, + .owner_id = owner_id, + }; + struct kvm_pgtable_walker walker = { + .cb = stage2_map_walker, + .flags = KVM_PGTABLE_WALK_TABLE_PRE | + KVM_PGTABLE_WALK_LEAF | + KVM_PGTABLE_WALK_TABLE_POST, + .arg = &map_data, + }; + + if (owner_id > KVM_MAX_OWNER_ID) + return -EINVAL; + + ret = kvm_pgtable_walk(pgt, addr, size, &walker); + return ret; +} + static void stage2_flush_dcache(void *addr, u64 size) { if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) @@ -726,8 +801,13 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, kvm_pte_t pte = *ptep, *childp = NULL; bool need_flush = false; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(pte)) { + if (stage2_pte_is_counted(pte)) { + kvm_clear_pte(ptep); + mm_ops->put_page(ptep); + } return 0; + } if (kvm_pte_table(pte, level)) { childp = kvm_pte_follow(pte, mm_ops); @@ -743,9 +823,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * block entry and rely on the remaining portions being faulted * back lazily. */ - kvm_clear_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); - mm_ops->put_page(ptep); + stage2_put_pte(ptep, mmu, addr, level, mm_ops); if (need_flush) { stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), @@ -949,7 +1027,7 @@ static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable_mm_ops *mm_ops = arg; kvm_pte_t pte = *ptep; - if (!kvm_pte_valid(pte)) + if (!stage2_pte_is_counted(pte)) return 0; mm_ops->put_page(ptep);