提交 8a5de182 编写于 作者: L Linus Torvalds

Merge tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm

Pull second batch of changes for KVM/{arm,arm64} from Marc Zyngier:
 "The most obvious thing is the sizeable MMU changes to support 48bit
  VAs on arm64.

  Summary:

   - support for 48bit IPA and VA (EL2)
   - a number of fixes for devices mapped into guests
   - yet another VGIC fix for BE
   - a fix for CPU hotplug
   - a few compile fixes (disabled VGIC, strict mm checks)"

[ I'm pulling directly from Marc at the request of Paolo Bonzini, whose
  backpack was stolen at Düsseldorf airport and will do new keys and
  rebuild his web of trust.    - Linus ]

* tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm:
  arm/arm64: KVM: Fix BE accesses to GICv2 EISR and ELRSR regs
  arm: kvm: STRICT_MM_TYPECHECKS fix for user_mem_abort
  arm/arm64: KVM: Ensure memslots are within KVM_PHYS_SIZE
  arm64: KVM: Implement 48 VA support for KVM EL2 and Stage-2
  arm/arm64: KVM: map MMIO regions at creation time
  arm64: kvm: define PAGE_S2_DEVICE as read-only by default
  ARM: kvm: define PAGE_S2_DEVICE as read-only by default
  arm/arm64: KVM: add 'writable' parameter to kvm_phys_addr_ioremap
  arm/arm64: KVM: fix potential NULL dereference in user_mem_abort()
  arm/arm64: KVM: use __GFP_ZERO not memset() to get zeroed pages
  ARM: KVM: fix vgic-disabled build
  arm: kvm: fix CPU hotplug
......@@ -37,6 +37,11 @@
*/
#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE)
/*
* KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
*/
#define KVM_MMU_CACHE_MIN_PAGES 2
#ifndef __ASSEMBLY__
#include <asm/cacheflush.h>
......@@ -50,7 +55,7 @@ void free_hyp_pgds(void);
int kvm_alloc_stage2_pgd(struct kvm *kvm);
void kvm_free_stage2_pgd(struct kvm *kvm);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size);
phys_addr_t pa, unsigned long size, bool writable);
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
......@@ -83,6 +88,11 @@ static inline void kvm_clean_pgd(pgd_t *pgd)
clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
}
static inline void kvm_clean_pmd(pmd_t *pmd)
{
clean_dcache_area(pmd, PTRS_PER_PMD * sizeof(pmd_t));
}
static inline void kvm_clean_pmd_entry(pmd_t *pmd)
{
clean_pmd_entry(pmd);
......@@ -123,10 +133,23 @@ static inline bool kvm_page_empty(void *ptr)
}
#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
#define kvm_pud_table_empty(pudp) (0)
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
#define kvm_pud_table_empty(kvm, pudp) (0)
#define KVM_PREALLOC_LEVEL 0
static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
{
return 0;
}
static inline void kvm_free_hwpgd(struct kvm *kvm) { }
static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
return kvm->arch.pgd;
}
struct kvm;
......
......@@ -100,7 +100,7 @@ extern pgprot_t pgprot_s2_device;
#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP)
#define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR)
#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
......
......@@ -409,7 +409,7 @@ static void update_vttbr(struct kvm *kvm)
kvm_next_vmid++;
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm->arch.pgd);
pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
kvm->arch.vttbr = pgd_phys | vmid;
......@@ -808,6 +808,7 @@ static int hyp_init_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_STARTING:
case CPU_STARTING_FROZEN:
if (__hyp_get_vectors() == hyp_default_vectors)
cpu_init_hyp_mode(NULL);
break;
}
......
......@@ -433,10 +433,17 @@ ARM_BE8(rev r10, r10 )
str r3, [r11, #VGIC_V2_CPU_HCR]
str r4, [r11, #VGIC_V2_CPU_VMCR]
str r5, [r11, #VGIC_V2_CPU_MISR]
#ifdef CONFIG_CPU_ENDIAN_BE8
str r6, [r11, #(VGIC_V2_CPU_EISR + 4)]
str r7, [r11, #VGIC_V2_CPU_EISR]
str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
str r9, [r11, #VGIC_V2_CPU_ELRSR]
#else
str r6, [r11, #VGIC_V2_CPU_EISR]
str r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
str r8, [r11, #VGIC_V2_CPU_ELRSR]
str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
#endif
str r10, [r11, #VGIC_V2_CPU_APR]
/* Clear GICH_HCR */
......
......@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
......@@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
}
} while (pte++, addr += PAGE_SIZE, addr != end);
if (kvm_pte_table_empty(start_pte))
if (kvm_pte_table_empty(kvm, start_pte))
clear_pmd_entry(kvm, pmd, start_addr);
}
......@@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
}
} while (pmd++, addr = next, addr != end);
if (kvm_pmd_table_empty(start_pmd))
if (kvm_pmd_table_empty(kvm, start_pmd))
clear_pud_entry(kvm, pud, start_addr);
}
......@@ -182,7 +182,7 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
}
} while (pud++, addr = next, addr != end);
if (kvm_pud_table_empty(start_pud))
if (kvm_pud_table_empty(kvm, start_pud))
clear_pgd_entry(kvm, pgd, start_addr);
}
......@@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void)
if (boot_hyp_pgd) {
unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
free_pages((unsigned long)boot_hyp_pgd, pgd_order);
free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
boot_hyp_pgd = NULL;
}
......@@ -343,7 +343,7 @@ void free_hyp_pgds(void)
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
free_pages((unsigned long)hyp_pgd, pgd_order);
free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
hyp_pgd = NULL;
}
......@@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
return 0;
}
static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
unsigned long end, unsigned long pfn,
pgprot_t prot)
{
pud_t *pud;
pmd_t *pmd;
unsigned long addr, next;
int ret;
addr = start;
do {
pud = pud_offset(pgd, addr);
if (pud_none_or_clear_bad(pud)) {
pmd = pmd_alloc_one(NULL, addr);
if (!pmd) {
kvm_err("Cannot allocate Hyp pmd\n");
return -ENOMEM;
}
pud_populate(NULL, pud, pmd);
get_page(virt_to_page(pud));
kvm_flush_dcache_to_poc(pud, sizeof(*pud));
}
next = pud_addr_end(addr, end);
ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
if (ret)
return ret;
pfn += (next - addr) >> PAGE_SHIFT;
} while (addr = next, addr != end);
return 0;
}
static int __create_hyp_mappings(pgd_t *pgdp,
unsigned long start, unsigned long end,
unsigned long pfn, pgprot_t prot)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
unsigned long addr, next;
int err = 0;
......@@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp,
end = PAGE_ALIGN(end);
do {
pgd = pgdp + pgd_index(addr);
pud = pud_offset(pgd, addr);
if (pud_none_or_clear_bad(pud)) {
pmd = pmd_alloc_one(NULL, addr);
if (!pmd) {
kvm_err("Cannot allocate Hyp pmd\n");
if (pgd_none(*pgd)) {
pud = pud_alloc_one(NULL, addr);
if (!pud) {
kvm_err("Cannot allocate Hyp pud\n");
err = -ENOMEM;
goto out;
}
pud_populate(NULL, pud, pmd);
get_page(virt_to_page(pud));
kvm_flush_dcache_to_poc(pud, sizeof(*pud));
pgd_populate(NULL, pgd, pud);
get_page(virt_to_page(pgd));
kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
}
next = pgd_addr_end(addr, end);
err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
if (err)
goto out;
pfn += (next - addr) >> PAGE_SHIFT;
......@@ -521,6 +553,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
int ret;
pgd_t *pgd;
if (kvm->arch.pgd != NULL) {
......@@ -528,15 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
return -EINVAL;
}
pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
if (KVM_PREALLOC_LEVEL > 0) {
/*
* Allocate fake pgd for the page table manipulation macros to
* work. This is not used by the hardware and we have no
* alignment requirement for this allocation.
*/
pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
GFP_KERNEL | __GFP_ZERO);
} else {
/*
* Allocate actual first-level Stage-2 page table used by the
* hardware for Stage-2 page table walks.
*/
pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
}
if (!pgd)
return -ENOMEM;
memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
ret = kvm_prealloc_hwpgd(kvm, pgd);
if (ret)
goto out_err;
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
out_err:
if (KVM_PREALLOC_LEVEL > 0)
kfree(pgd);
else
free_pages((unsigned long)pgd, S2_PGD_ORDER);
return ret;
}
/**
......@@ -572,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
kvm_free_hwpgd(kvm);
if (KVM_PREALLOC_LEVEL > 0)
kfree(kvm->arch.pgd);
else
free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
kvm->arch.pgd = NULL;
}
static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pgd = kvm->arch.pgd + pgd_index(addr);
pud = pud_offset(pgd, addr);
if (WARN_ON(pgd_none(*pgd))) {
if (!cache)
return NULL;
pud = mmu_memory_cache_alloc(cache);
pgd_populate(NULL, pgd, pud);
get_page(virt_to_page(pgd));
}
return pud_offset(pgd, addr);
}
static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
pud_t *pud;
pmd_t *pmd;
pud = stage2_get_pud(kvm, cache, addr);
if (pud_none(*pud)) {
if (!cache)
return NULL;
......@@ -630,7 +706,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
pmd_t *pmd;
pte_t *pte, old_pte;
/* Create stage-2 page table mapping - Level 1 */
/* Create stage-2 page table mapping - Levels 0 and 1 */
pmd = stage2_get_pmd(kvm, cache, addr);
if (!pmd) {
/*
......@@ -675,7 +751,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
* @size: The size of the mapping
*/
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size)
phys_addr_t pa, unsigned long size, bool writable)
{
phys_addr_t addr, end;
int ret = 0;
......@@ -688,7 +764,11 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
ret = mmu_topup_memory_cache(&cache, 2, 2);
if (writable)
kvm_set_s2pte_writable(&pte);
ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
KVM_NR_MEM_OBJS);
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
......@@ -777,6 +857,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
/* Let's check if we will get back a huge page backed by hugetlbfs */
down_read(&current->mm->mmap_sem);
vma = find_vma_intersection(current->mm, hva, hva + 1);
if (unlikely(!vma)) {
kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
up_read(&current->mm->mmap_sem);
return -EFAULT;
}
if (is_vm_hugetlb_page(vma)) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
......@@ -797,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
up_read(&current->mm->mmap_sem);
/* We need minimum second+third level pages */
ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
KVM_NR_MEM_OBJS);
if (ret)
return ret;
......@@ -843,7 +930,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
mem_type == PAGE_S2_DEVICE);
pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
}
......@@ -916,6 +1003,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
/* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
......@@ -1072,8 +1162,8 @@ int kvm_mmu_init(void)
(unsigned long)phys_base);
}
hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
if (!hyp_pgd || !boot_hyp_pgd) {
kvm_err("Hyp mode PGD not allocated\n");
......@@ -1126,13 +1216,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_memory_slot *old,
enum kvm_mr_change change)
{
gpa_t gpa = old->base_gfn << PAGE_SHIFT;
phys_addr_t size = old->npages << PAGE_SHIFT;
if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
spin_lock(&kvm->mmu_lock);
unmap_stage2_range(kvm, gpa, size);
spin_unlock(&kvm->mmu_lock);
}
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
......@@ -1140,7 +1223,77 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
hva_t hva = mem->userspace_addr;
hva_t reg_end = hva + mem->memory_size;
bool writable = !(mem->flags & KVM_MEM_READONLY);
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE)
return 0;
/*
* Prevent userspace from creating a memory region outside of the IPA
* space addressable by the KVM guest IPA space.
*/
if (memslot->base_gfn + memslot->npages >=
(KVM_PHYS_SIZE >> PAGE_SHIFT))
return -EFAULT;
/*
* A memory region could potentially cover multiple VMAs, and any holes
* between them, so iterate over all of them to find out if we can map
* any of them right now.
*
* +--------------------------------------------+
* +---------------+----------------+ +----------------+
* | : VMA 1 | VMA 2 | | VMA 3 : |
* +---------------+----------------+ +----------------+
* | memory region |
* +--------------------------------------------+
*/
do {
struct vm_area_struct *vma = find_vma(current->mm, hva);
hva_t vm_start, vm_end;
if (!vma || vma->vm_start >= reg_end)
break;
/*
* Mapping a read-only VMA is only allowed if the
* memory region is configured as read-only.
*/
if (writable && !(vma->vm_flags & VM_WRITE)) {
ret = -EPERM;
break;
}
/*
* Take the intersection of this VMA with the memory region
*/
vm_start = max(hva, vma->vm_start);
vm_end = min(reg_end, vma->vm_end);
if (vma->vm_flags & VM_PFNMAP) {
gpa_t gpa = mem->guest_phys_addr +
(vm_start - mem->userspace_addr);
phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
vm_start - vma->vm_start;
ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
vm_end - vm_start,
writable);
if (ret)
break;
}
hva = vm_end;
} while (hva < reg_end);
if (ret) {
spin_lock(&kvm->mmu_lock);
unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
spin_unlock(&kvm->mmu_lock);
}
return ret;
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
......@@ -1165,4 +1318,10 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
phys_addr_t size = slot->npages << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
unmap_stage2_range(kvm, gpa, size);
spin_unlock(&kvm->mmu_lock);
}
......@@ -41,6 +41,18 @@
*/
#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK)
/*
* KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
* levels in addition to the PGD and potentially the PUD which are
* pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2
* tables use one level of tables less than the kernel.
*/
#ifdef CONFIG_ARM64_64K_PAGES
#define KVM_MMU_CACHE_MIN_PAGES 1
#else
#define KVM_MMU_CACHE_MIN_PAGES 2
#endif
#ifdef __ASSEMBLY__
/*
......@@ -53,6 +65,7 @@
#else
#include <asm/pgalloc.h>
#include <asm/cachetype.h>
#include <asm/cacheflush.h>
......@@ -65,10 +78,6 @@
#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
/* Make sure we get the right size, and thus the right alignment */
#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT))
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
int create_hyp_mappings(void *from, void *to);
int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
void free_boot_hyp_pgd(void);
......@@ -77,7 +86,7 @@ void free_hyp_pgds(void);
int kvm_alloc_stage2_pgd(struct kvm *kvm);
void kvm_free_stage2_pgd(struct kvm *kvm);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size);
phys_addr_t pa, unsigned long size, bool writable);
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
......@@ -93,6 +102,7 @@ void kvm_clear_hyp_idmap(void);
#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
static inline void kvm_clean_pgd(pgd_t *pgd) {}
static inline void kvm_clean_pmd(pmd_t *pmd) {}
static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
static inline void kvm_clean_pte(pte_t *pte) {}
static inline void kvm_clean_pte_entry(pte_t *pte) {}
......@@ -111,19 +121,116 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
/*
* In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address
* the entire IPA input range with a single pgd entry, and we would only need
* one pgd entry. Note that in this case, the pgd is actually not used by
* the MMU for Stage-2 translations, but is merely a fake pgd used as a data
* structure for the kernel pgtable macros to work.
*/
#if PGDIR_SHIFT > KVM_PHYS_SHIFT
#define PTRS_PER_S2_PGD_SHIFT 0
#else
#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT)
#endif
#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
/*
* If we are concatenating first level stage-2 page tables, we would have less
* than or equal to 16 pointers in the fake PGD, because that's what the
* architecture allows. In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS)
* represents the first level for the host, and we add 1 to go to the next
* level (which uses contatenation) for the stage-2 tables.
*/
#if PTRS_PER_S2_PGD <= 16
#define KVM_PREALLOC_LEVEL (4 - CONFIG_ARM64_PGTABLE_LEVELS + 1)
#else
#define KVM_PREALLOC_LEVEL (0)
#endif
/**
* kvm_prealloc_hwpgd - allocate inital table for VTTBR
* @kvm: The KVM struct pointer for the VM.
* @pgd: The kernel pseudo pgd
*
* When the kernel uses more levels of page tables than the guest, we allocate
* a fake PGD and pre-populate it to point to the next-level page table, which
* will be the real initial page table pointed to by the VTTBR.
*
* When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
* the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
* allocate 2 consecutive PUD pages.
*/
static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
{
unsigned int i;
unsigned long hwpgd;
if (KVM_PREALLOC_LEVEL == 0)
return 0;
hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
if (!hwpgd)
return -ENOMEM;
for (i = 0; i < PTRS_PER_S2_PGD; i++) {
if (KVM_PREALLOC_LEVEL == 1)
pgd_populate(NULL, pgd + i,
(pud_t *)hwpgd + i * PTRS_PER_PUD);
else if (KVM_PREALLOC_LEVEL == 2)
pud_populate(NULL, pud_offset(pgd, 0) + i,
(pmd_t *)hwpgd + i * PTRS_PER_PMD);
}
return 0;
}
static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
pgd_t *pgd = kvm->arch.pgd;
pud_t *pud;
if (KVM_PREALLOC_LEVEL == 0)
return pgd;
pud = pud_offset(pgd, 0);
if (KVM_PREALLOC_LEVEL == 1)
return pud;
BUG_ON(KVM_PREALLOC_LEVEL != 2);
return pmd_offset(pud, 0);
}
static inline void kvm_free_hwpgd(struct kvm *kvm)
{
if (KVM_PREALLOC_LEVEL > 0) {
unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
}
}
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
#ifndef CONFIG_ARM64_64K_PAGES
#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#ifdef __PAGETABLE_PMD_FOLDED
#define kvm_pmd_table_empty(kvm, pmdp) (0)
#else
#define kvm_pmd_table_empty(kvm, pmdp) \
(kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2))
#endif
#ifdef __PAGETABLE_PUD_FOLDED
#define kvm_pud_table_empty(kvm, pudp) (0)
#else
#define kvm_pmd_table_empty(pmdp) (0)
#define kvm_pud_table_empty(kvm, pudp) \
(kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1))
#endif
#define kvm_pud_table_empty(pudp) (0)
struct kvm;
......
......@@ -79,7 +79,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
......
......@@ -67,10 +67,14 @@ CPU_BE( rev w11, w11 )
str w4, [x3, #VGIC_V2_CPU_HCR]
str w5, [x3, #VGIC_V2_CPU_VMCR]
str w6, [x3, #VGIC_V2_CPU_MISR]
str w7, [x3, #VGIC_V2_CPU_EISR]
str w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
str w9, [x3, #VGIC_V2_CPU_ELRSR]
str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] )
CPU_LE( str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] )
CPU_LE( str w9, [x3, #VGIC_V2_CPU_ELRSR] )
CPU_LE( str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
CPU_BE( str w7, [x3, #(VGIC_V2_CPU_EISR + 4)] )
CPU_BE( str w8, [x3, #VGIC_V2_CPU_EISR] )
CPU_BE( str w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] )
str w11, [x3, #VGIC_V2_CPU_APR]
/* Clear GICH_HCR */
......
......@@ -219,8 +219,8 @@ struct vgic_v2_cpu_if {
u32 vgic_hcr;
u32 vgic_vmcr;
u32 vgic_misr; /* Saved only */
u32 vgic_eisr[2]; /* Saved only */
u32 vgic_elrsr[2]; /* Saved only */
u64 vgic_eisr; /* Saved only */
u64 vgic_elrsr; /* Saved only */
u32 vgic_apr;
u32 vgic_lr[VGIC_V2_MAX_LRS];
};
......@@ -331,6 +331,14 @@ static inline int kvm_vgic_create(struct kvm *kvm)
return 0;
}
static inline void kvm_vgic_destroy(struct kvm *kvm)
{
}
static inline void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
{
}
static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
{
return 0;
......
......@@ -71,35 +71,17 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
struct vgic_lr lr_desc)
{
if (!(lr_desc.state & LR_STATE_MASK))
__set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
}
static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
{
u64 val;
#if BITS_PER_LONG == 64
val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
val <<= 32;
val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
#else
val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
#endif
return val;
return vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
}
static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
{
u64 val;
#if BITS_PER_LONG == 64
val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
val <<= 32;
val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
#else
val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
#endif
return val;
return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
}
static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
......
......@@ -145,6 +145,20 @@ static void vgic_free_bitmap(struct vgic_bitmap *b)
b->shared = NULL;
}
/*
* Call this function to convert a u64 value to an unsigned long * bitmask
* in a way that works on both 32-bit and 64-bit LE and BE platforms.
*
* Warning: Calling this function may modify *val.
*/
static unsigned long *u64_to_bitmask(u64 *val)
{
#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
*val = (*val >> 32) | (*val << 32);
#endif
return (unsigned long *)val;
}
static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
int cpuid, u32 offset)
{
......@@ -1442,7 +1456,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
* active bit.
*/
u64 eisr = vgic_get_eisr(vcpu);
unsigned long *eisr_ptr = (unsigned long *)&eisr;
unsigned long *eisr_ptr = u64_to_bitmask(&eisr);
int lr;
for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
......@@ -1505,7 +1519,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
level_pending = vgic_process_maintenance(vcpu);
elrsr = vgic_get_elrsr(vcpu);
elrsr_ptr = (unsigned long *)&elrsr;
elrsr_ptr = u64_to_bitmask(&elrsr);
/* Clear mappings for empty LRs */
for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
......@@ -1899,7 +1913,8 @@ int kvm_vgic_init(struct kvm *kvm)
}
ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
true);
if (ret) {
kvm_err("Unable to remap VGIC CPU to VCPU\n");
goto out;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册