提交 71d03935 编写于 作者: P Paolo Bonzini

Merge tag 'kvmarm-fixes-6.2-1' of...

Merge tag 'kvmarm-fixes-6.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into kvm-master

KVM/arm64 fixes for 6.2, take #1

- Fix the PMCR_EL0 reset value after the PMU rework

- Correctly handle S2 fault triggered by a S1 page table walk
  by not always classifying it as a write, as this breaks on
  R/O memslots

- Document why we cannot exit with KVM_EXIT_MMIO when taking
  a write fault from a S1 PTW on a R/O memslot

- Put the Apple M2 on the naughty step for not being able to
  correctly implement the vgic SEIS feature, just liek the M1
  before it

- Reviewer updates: Alex is stepping down, replaced by Zenghui
...@@ -1354,6 +1354,14 @@ the memory region are automatically reflected into the guest. For example, an ...@@ -1354,6 +1354,14 @@ the memory region are automatically reflected into the guest. For example, an
mmap() that affects the region will be made visible immediately. Another mmap() that affects the region will be made visible immediately. Another
example is madvise(MADV_DROP). example is madvise(MADV_DROP).
Note: On arm64, a write generated by the page-table walker (to update
the Access and Dirty flags, for example) never results in a
KVM_EXIT_MMIO exit when the slot has the KVM_MEM_READONLY flag. This
is because KVM cannot provide the data that would be written by the
page-table walker, making it impossible to emulate the access.
Instead, an abort (data abort if the cause of the page-table update
was a load or a store, instruction abort if it was an instruction
fetch) is injected in the guest.
4.36 KVM_SET_TSS_ADDR 4.36 KVM_SET_TSS_ADDR
--------------------- ---------------------
......
...@@ -11356,9 +11356,9 @@ F: virt/kvm/* ...@@ -11356,9 +11356,9 @@ F: virt/kvm/*
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M: Marc Zyngier <maz@kernel.org> M: Marc Zyngier <maz@kernel.org>
R: James Morse <james.morse@arm.com> R: James Morse <james.morse@arm.com>
R: Alexandru Elisei <alexandru.elisei@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com> R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Oliver Upton <oliver.upton@linux.dev> R: Oliver Upton <oliver.upton@linux.dev>
R: Zenghui Yu <yuzenghui@huawei.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kvmarm@lists.linux.dev L: kvmarm@lists.linux.dev
L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers)
......
...@@ -124,6 +124,8 @@ ...@@ -124,6 +124,8 @@
#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025 #define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 #define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 #define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
#define APPLE_CPU_PART_M2_BLIZZARD 0x032
#define APPLE_CPU_PART_M2_AVALANCHE 0x033
#define AMPERE_CPU_PART_AMPERE1 0xAC3 #define AMPERE_CPU_PART_AMPERE1 0xAC3
...@@ -177,6 +179,8 @@ ...@@ -177,6 +179,8 @@
#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) #define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
......
...@@ -114,6 +114,15 @@ ...@@ -114,6 +114,15 @@
#define ESR_ELx_FSC_ACCESS (0x08) #define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04) #define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C) #define ESR_ELx_FSC_PERM (0x0C)
#define ESR_ELx_FSC_SEA_TTW0 (0x14)
#define ESR_ELx_FSC_SEA_TTW1 (0x15)
#define ESR_ELx_FSC_SEA_TTW2 (0x16)
#define ESR_ELx_FSC_SEA_TTW3 (0x17)
#define ESR_ELx_FSC_SECC (0x18)
#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
/* ISS field definitions for Data Aborts */ /* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24) #define ESR_ELx_ISV_SHIFT (24)
......
...@@ -319,21 +319,6 @@ ...@@ -319,21 +319,6 @@
BIT(18) | \ BIT(18) | \
GENMASK(16, 15)) GENMASK(16, 15))
/* For compatibility with fault code shared with 32-bit */
#define FSC_FAULT ESR_ELx_FSC_FAULT
#define FSC_ACCESS ESR_ELx_FSC_ACCESS
#define FSC_PERM ESR_ELx_FSC_PERM
#define FSC_SEA ESR_ELx_FSC_EXTABT
#define FSC_SEA_TTW0 (0x14)
#define FSC_SEA_TTW1 (0x15)
#define FSC_SEA_TTW2 (0x16)
#define FSC_SEA_TTW3 (0x17)
#define FSC_SECC (0x18)
#define FSC_SECC_TTW0 (0x1c)
#define FSC_SECC_TTW1 (0x1d)
#define FSC_SECC_TTW2 (0x1e)
#define FSC_SECC_TTW3 (0x1f)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf)) #define HPFAR_MASK (~UL(0xf))
/* /*
......
...@@ -349,16 +349,16 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *v ...@@ -349,16 +349,16 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *v
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{ {
switch (kvm_vcpu_trap_get_fault(vcpu)) { switch (kvm_vcpu_trap_get_fault(vcpu)) {
case FSC_SEA: case ESR_ELx_FSC_EXTABT:
case FSC_SEA_TTW0: case ESR_ELx_FSC_SEA_TTW0:
case FSC_SEA_TTW1: case ESR_ELx_FSC_SEA_TTW1:
case FSC_SEA_TTW2: case ESR_ELx_FSC_SEA_TTW2:
case FSC_SEA_TTW3: case ESR_ELx_FSC_SEA_TTW3:
case FSC_SECC: case ESR_ELx_FSC_SECC:
case FSC_SECC_TTW0: case ESR_ELx_FSC_SECC_TTW0:
case FSC_SECC_TTW1: case ESR_ELx_FSC_SECC_TTW1:
case FSC_SECC_TTW2: case ESR_ELx_FSC_SECC_TTW2:
case FSC_SECC_TTW3: case ESR_ELx_FSC_SECC_TTW3:
return true; return true;
default: default:
return false; return false;
...@@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) ...@@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
{ {
if (kvm_vcpu_abt_iss1tw(vcpu)) if (kvm_vcpu_abt_iss1tw(vcpu)) {
return true; /*
* Only a permission fault on a S1PTW should be
* considered as a write. Otherwise, page tables baked
* in a read-only memslot will result in an exception
* being delivered in the guest.
*
* The drawback is that we end-up faulting twice if the
* guest is using any of HW AF/DB: a translation fault
* to map the page containing the PT (read only at
* first), then a permission fault to allow the flags
* to be set.
*/
switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
case ESR_ELx_FSC_PERM:
return true;
default:
return false;
}
}
if (kvm_vcpu_trap_is_iabt(vcpu)) if (kvm_vcpu_trap_is_iabt(vcpu))
return false; return false;
......
...@@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) ...@@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
*/ */
if (!(esr & ESR_ELx_S1PTW) && if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) || (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar)) if (!__translate_far_to_hpfar(far, &hpfar))
return false; return false;
} else { } else {
......
...@@ -367,7 +367,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) ...@@ -367,7 +367,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid; bool valid;
valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
kvm_vcpu_dabt_isvalid(vcpu) && kvm_vcpu_dabt_isvalid(vcpu) &&
!kvm_vcpu_abt_issea(vcpu) && !kvm_vcpu_abt_issea(vcpu) &&
!kvm_vcpu_abt_iss1tw(vcpu); !kvm_vcpu_abt_iss1tw(vcpu);
......
...@@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, ...@@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault); VM_BUG_ON(write_fault && exec_fault);
if (fault_status == FSC_PERM && !write_fault && !exec_fault) { if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n"); kvm_err("Unexpected L2 read permission error\n");
return -EFAULT; return -EFAULT;
} }
...@@ -1277,7 +1277,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, ...@@ -1277,7 +1277,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* only exception to this is when dirty logging is enabled at runtime * only exception to this is when dirty logging is enabled at runtime
* and a write fault needs to collapse a block entry into a table. * and a write fault needs to collapse a block entry into a table.
*/ */
if (fault_status != FSC_PERM || (logging_active && write_fault)) { if (fault_status != ESR_ELx_FSC_PERM ||
(logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache, ret = kvm_mmu_topup_memory_cache(memcache,
kvm_mmu_cache_min_pages(kvm)); kvm_mmu_cache_min_pages(kvm));
if (ret) if (ret)
...@@ -1342,7 +1343,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, ...@@ -1342,7 +1343,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* backed by a THP and thus use block mapping if possible. * backed by a THP and thus use block mapping if possible.
*/ */
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE) if (fault_status == ESR_ELx_FSC_PERM &&
fault_granule > PAGE_SIZE)
vma_pagesize = fault_granule; vma_pagesize = fault_granule;
else else
vma_pagesize = transparent_hugepage_adjust(kvm, memslot, vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
...@@ -1350,7 +1352,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, ...@@ -1350,7 +1352,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
&fault_ipa); &fault_ipa);
} }
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */ /* Check the VMM hasn't introduced a new disallowed VMA */
if (kvm_vma_mte_allowed(vma)) { if (kvm_vma_mte_allowed(vma)) {
sanitise_mte_tags(kvm, pfn, vma_pagesize); sanitise_mte_tags(kvm, pfn, vma_pagesize);
...@@ -1376,7 +1378,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, ...@@ -1376,7 +1378,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* permissions only if vma_pagesize equals fault_granule. Otherwise, * permissions only if vma_pagesize equals fault_granule. Otherwise,
* kvm_pgtable_stage2_map() should be called to change block size. * kvm_pgtable_stage2_map() should be called to change block size.
*/ */
if (fault_status == FSC_PERM && vma_pagesize == fault_granule) if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
else else
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
...@@ -1441,7 +1443,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) ...@@ -1441,7 +1443,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
if (fault_status == FSC_FAULT) { if (fault_status == ESR_ELx_FSC_FAULT) {
/* Beyond sanitised PARange (which is the IPA limit) */ /* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu); kvm_inject_size_fault(vcpu);
...@@ -1476,8 +1478,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) ...@@ -1476,8 +1478,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
kvm_vcpu_get_hfar(vcpu), fault_ipa); kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */ /* Check the stage-2 fault is trans. fault or write fault */
if (fault_status != FSC_FAULT && fault_status != FSC_PERM && if (fault_status != ESR_ELx_FSC_FAULT &&
fault_status != FSC_ACCESS) { fault_status != ESR_ELx_FSC_PERM &&
fault_status != ESR_ELx_FSC_ACCESS) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu), kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
...@@ -1539,7 +1542,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) ...@@ -1539,7 +1542,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
/* Userspace should not be able to register out-of-bounds IPAs */ /* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
if (fault_status == FSC_ACCESS) { if (fault_status == ESR_ELx_FSC_ACCESS) {
handle_access_fault(vcpu, fault_ipa); handle_access_fault(vcpu, fault_ipa);
ret = 1; ret = 1;
goto out_unlock; goto out_unlock;
......
...@@ -646,7 +646,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) ...@@ -646,7 +646,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return; return;
/* Only preserve PMCR_EL0.N, and reset the rest to 0 */ /* Only preserve PMCR_EL0.N, and reset the rest to 0 */
pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK; pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
if (!kvm_supports_32bit_el0()) if (!kvm_supports_32bit_el0())
pmcr |= ARMV8_PMU_PMCR_LC; pmcr |= ARMV8_PMU_PMCR_LC;
......
...@@ -616,6 +616,8 @@ static const struct midr_range broken_seis[] = { ...@@ -616,6 +616,8 @@ static const struct midr_range broken_seis[] = {
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
{}, {},
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册