提交 6aef0fdd 编写于 作者: C Catalin Marinas

Merge branch 'kpti' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Support for unmapping the kernel when running in userspace (aka
"KAISER").

* 'kpti' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: kaslr: Put kernel vectors address in separate data page
  arm64: mm: Introduce TTBR_ASID_MASK for getting at the ASID in the TTBR
  perf: arm_spe: Fail device probe when arm64_kernel_unmapped_at_el0()
  arm64: Kconfig: Add CONFIG_UNMAP_KERNEL_AT_EL0
  arm64: entry: Add fake CPU feature for unmapping the kernel at EL0
  arm64: tls: Avoid unconditional zeroing of tpidrro_el0 for native tasks
  arm64: erratum: Work around Falkor erratum #E1003 in trampoline code
  arm64: entry: Hook up entry trampoline to exception vectors
  arm64: entry: Explicitly pass exception level to kernel_ventry macro
  arm64: mm: Map entry trampoline into trampoline and kernel page tables
  arm64: entry: Add exception trampoline page for exceptions from EL0
  arm64: mm: Invalidate both kernel and user ASIDs when performing TLBI
  arm64: mm: Add arm64_kernel_unmapped_at_el0 helper
  arm64: mm: Allocate ASIDs in pairs
  arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN
  arm64: mm: Rename post_ttbr0_update_workaround
  arm64: mm: Remove pre_ttbr0_update_workaround for Falkor erratum #E1003
  arm64: mm: Move ASID from TTBR0 to TTBR1
  arm64: mm: Temporarily disable ARM64_SW_TTBR0_PAN
  arm64: mm: Use non-global mappings for kernel space
......@@ -522,20 +522,13 @@ config CAVIUM_ERRATUM_30115
config QCOM_FALKOR_ERRATUM_1003
bool "Falkor E1003: Incorrect translation due to ASID change"
default y
select ARM64_PAN if ARM64_SW_TTBR0_PAN
help
On Falkor v1, an incorrect ASID may be cached in the TLB when ASID
and BADDR are changed together in TTBRx_EL1. The workaround for this
issue is to use a reserved ASID in cpu_do_switch_mm() before
switching to the new ASID. Saying Y here selects ARM64_PAN if
ARM64_SW_TTBR0_PAN is selected. This is done because implementing and
maintaining the E1003 workaround in the software PAN emulation code
would be an unnecessary complication. The affected Falkor v1 CPU
implements ARMv8.1 hardware PAN support and using hardware PAN
support versus software PAN emulation is mutually exclusive at
runtime.
If unsure, say Y.
and BADDR are changed together in TTBRx_EL1. Since we keep the ASID
in TTBR1_EL1, this situation only occurs in the entry trampoline and
then only for entries in the walk cache, since the leaf translation
is unchanged. Work around the erratum by invalidating the walk cache
entries for the trampoline before entering the kernel proper.
config QCOM_FALKOR_ERRATUM_1009
bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
......@@ -840,6 +833,19 @@ config FORCE_MAX_ZONEORDER
However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
4M allocations matching the default size used by generic code.
config UNMAP_KERNEL_AT_EL0
bool "Unmap kernel when running in userspace (aka \"KAISER\")"
default y
help
Some attacks against KASLR make use of the timing difference between
a permission fault which could arise from a page table entry that is
present in the TLB, and a translation fault which always requires a
page table walk. This option defends against these attacks by unmapping
the kernel whilst running in userspace, therefore forcing translation
faults for all of kernel space.
If unsure, say Y.
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
......
......@@ -4,6 +4,7 @@
#include <asm/alternative.h>
#include <asm/kernel-pgtable.h>
#include <asm/mmu.h>
#include <asm/sysreg.h>
#include <asm/assembler.h>
......@@ -16,11 +17,20 @@
add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir
msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
isb
sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE
bic \tmp1, \tmp1, #TTBR_ASID_MASK
msr ttbr1_el1, \tmp1 // set reserved ASID
isb
.endm
.macro __uaccess_ttbr0_enable, tmp1
.macro __uaccess_ttbr0_enable, tmp1, tmp2
get_thread_info \tmp1
ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1
mrs \tmp2, ttbr1_el1
extr \tmp2, \tmp2, \tmp1, #48
ror \tmp2, \tmp2, #16
msr ttbr1_el1, \tmp2 // set the active ASID
isb
msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
isb
.endm
......@@ -31,18 +41,18 @@ alternative_if_not ARM64_HAS_PAN
alternative_else_nop_endif
.endm
.macro uaccess_ttbr0_enable, tmp1, tmp2
.macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
alternative_if_not ARM64_HAS_PAN
save_and_disable_irq \tmp2 // avoid preemption
__uaccess_ttbr0_enable \tmp1
restore_irq \tmp2
save_and_disable_irq \tmp3 // avoid preemption
__uaccess_ttbr0_enable \tmp1, \tmp2
restore_irq \tmp3
alternative_else_nop_endif
.endm
#else
.macro uaccess_ttbr0_disable, tmp1
.endm
.macro uaccess_ttbr0_enable, tmp1, tmp2
.macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
.endm
#endif
......@@ -56,8 +66,8 @@ alternative_if ARM64_ALT_PAN_NOT_UAO
alternative_else_nop_endif
.endm
.macro uaccess_enable_not_uao, tmp1, tmp2
uaccess_ttbr0_enable \tmp1, \tmp2
.macro uaccess_enable_not_uao, tmp1, tmp2, tmp3
uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(0)
alternative_else_nop_endif
......
......@@ -26,7 +26,6 @@
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
......@@ -478,31 +477,9 @@ alternative_endif
.endm
/*
* Errata workaround prior to TTBR0_EL1 update
*
* val: TTBR value with new BADDR, preserved
* tmp0: temporary register, clobbered
* tmp1: other temporary register, clobbered
*/
.macro pre_ttbr0_update_workaround, val, tmp0, tmp1
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
mrs \tmp0, ttbr0_el1
mov \tmp1, #FALKOR_RESERVED_ASID
bfi \tmp0, \tmp1, #48, #16 // reserved ASID + old BADDR
msr ttbr0_el1, \tmp0
isb
bfi \tmp0, \val, #0, #48 // reserved ASID + new BADDR
msr ttbr0_el1, \tmp0
isb
alternative_else_nop_endif
#endif
.endm
/*
* Errata workaround post TTBR0_EL1 update.
* Errata workaround post TTBRx_EL1 update.
*/
.macro post_ttbr0_update_workaround
.macro post_ttbr_update_workaround
#ifdef CONFIG_CAVIUM_ERRATUM_27456
alternative_if ARM64_WORKAROUND_CAVIUM_27456
ic iallu
......
......@@ -41,7 +41,8 @@
#define ARM64_WORKAROUND_CAVIUM_30115 20
#define ARM64_HAS_DCPOP 21
#define ARM64_SVE 22
#define ARM64_UNMAP_KERNEL_AT_EL0 23
#define ARM64_NCAPS 23
#define ARM64_NCAPS 24
#endif /* __ASM_CPUCAPS_H */
......@@ -58,6 +58,11 @@ enum fixed_addresses {
FIX_APEI_GHES_NMI,
#endif /* CONFIG_ACPI_APEI_GHES */
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
FIX_ENTRY_TRAMP_DATA,
FIX_ENTRY_TRAMP_TEXT,
#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
__end_of_permanent_fixed_addresses,
/*
......
......@@ -78,8 +78,16 @@
/*
* Initial memory map attributes.
*/
#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG)
#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG)
#else
#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS
#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS
#endif
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
......
......@@ -17,6 +17,10 @@
#define __ASM_MMU_H
#define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */
#define USER_ASID_FLAG (UL(1) << 48)
#define TTBR_ASID_MASK (UL(0xffff) << 48)
#ifndef __ASSEMBLY__
typedef struct {
atomic64_t id;
......@@ -31,6 +35,12 @@ typedef struct {
*/
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
static inline bool arm64_kernel_unmapped_at_el0(void)
{
return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) &&
cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
}
extern void paging_init(void);
extern void bootmem_init(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
......@@ -41,4 +51,5 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
extern void mark_linear_text_alias_ro(void);
#endif /* !__ASSEMBLY__ */
#endif
......@@ -19,8 +19,6 @@
#ifndef __ASM_MMU_CONTEXT_H
#define __ASM_MMU_CONTEXT_H
#define FALKOR_RESERVED_ASID 1
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
......@@ -57,6 +55,13 @@ static inline void cpu_set_reserved_ttbr0(void)
isb();
}
static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
{
BUG_ON(pgd == swapper_pg_dir);
cpu_set_reserved_ttbr0();
cpu_do_switch_mm(virt_to_phys(pgd),mm);
}
/*
* TCR.T0SZ value to use when the ID map is active. Usually equals
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
......
......@@ -272,6 +272,7 @@
#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT)
#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT)
#define TCR_A1 (UL(1) << 22)
#define TCR_ASID16 (UL(1) << 36)
#define TCR_TBI0 (UL(1) << 37)
#define TCR_HA (UL(1) << 39)
......
......@@ -34,8 +34,16 @@
#include <asm/pgtable-types.h>
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG)
#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG)
#else
#define PROT_DEFAULT _PROT_DEFAULT
#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
......@@ -48,6 +56,7 @@
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG)
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
......@@ -55,15 +64,15 @@
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
#define PAGE_HYP_EXEC __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
......
......@@ -680,6 +680,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
/*
* Encode and decode a swap entry:
......
......@@ -35,12 +35,6 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
#include <asm/memory.h>
#define cpu_switch_mm(pgd,mm) \
do { \
BUG_ON(pgd == swapper_pg_dir); \
cpu_do_switch_mm(virt_to_phys(pgd),mm); \
} while (0)
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* __ASM_PROCFNS_H */
......@@ -23,6 +23,7 @@
#include <linux/sched.h>
#include <asm/cputype.h>
#include <asm/mmu.h>
/*
* Raw TLBI operations.
......@@ -54,6 +55,11 @@
#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0)
#define __tlbi_user(op, arg) do { \
if (arm64_kernel_unmapped_at_el0()) \
__tlbi(op, (arg) | USER_ASID_FLAG); \
} while (0)
/*
* TLB Management
* ==============
......@@ -115,6 +121,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
dsb(ishst);
__tlbi(aside1is, asid);
__tlbi_user(aside1is, asid);
dsb(ish);
}
......@@ -125,6 +132,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
dsb(ishst);
__tlbi(vale1is, addr);
__tlbi_user(vale1is, addr);
dsb(ish);
}
......@@ -151,10 +159,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ishst);
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
if (last_level)
if (last_level) {
__tlbi(vale1is, addr);
else
__tlbi_user(vale1is, addr);
} else {
__tlbi(vae1is, addr);
__tlbi_user(vae1is, addr);
}
}
dsb(ish);
}
......@@ -194,6 +205,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm,
unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
__tlbi(vae1is, addr);
__tlbi_user(vae1is, addr);
dsb(ish);
}
......
......@@ -107,15 +107,19 @@ static inline void __uaccess_ttbr0_disable(void)
{
unsigned long ttbr;
ttbr = read_sysreg(ttbr1_el1);
/* reserved_ttbr0 placed at the end of swapper_pg_dir */
ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE;
write_sysreg(ttbr, ttbr0_el1);
write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1);
isb();
/* Set reserved ASID */
ttbr &= ~TTBR_ASID_MASK;
write_sysreg(ttbr, ttbr1_el1);
isb();
}
static inline void __uaccess_ttbr0_enable(void)
{
unsigned long flags;
unsigned long flags, ttbr0, ttbr1;
/*
* Disable interrupts to avoid preemption between reading the 'ttbr0'
......@@ -123,7 +127,16 @@ static inline void __uaccess_ttbr0_enable(void)
* roll-over and an update of 'ttbr0'.
*/
local_irq_save(flags);
write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
ttbr0 = current_thread_info()->ttbr0;
/* Restore active ASID */
ttbr1 = read_sysreg(ttbr1_el1);
ttbr1 |= ttbr0 & TTBR_ASID_MASK;
write_sysreg(ttbr1, ttbr1_el1);
isb();
/* Restore user page table */
write_sysreg(ttbr0, ttbr0_el1);
isb();
local_irq_restore(flags);
}
......
......@@ -24,6 +24,7 @@
#include <linux/kvm_host.h>
#include <linux/suspend.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
......@@ -148,11 +149,14 @@ int main(void)
DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id));
DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state));
BLANK();
DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val));
BLANK();
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
DEFINE(TRAMP_VALIAS, TRAMP_VALIAS);
#endif
return 0;
}
......@@ -845,6 +845,40 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
ID_AA64PFR0_FP_SHIFT) < 0;
}
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
int __unused)
{
/* Forced on command line? */
if (__kpti_forced) {
pr_info_once("kernel page table isolation forced %s by command line option\n",
__kpti_forced > 0 ? "ON" : "OFF");
return __kpti_forced > 0;
}
/* Useful for KASLR robustness */
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
return true;
return false;
}
static int __init parse_kpti(char *str)
{
bool enabled;
int ret = strtobool(str, &enabled);
if (ret)
return ret;
__kpti_forced = enabled ? 1 : -1;
return 0;
}
__setup("kpti=", parse_kpti);
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
......@@ -931,6 +965,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.def_scope = SCOPE_SYSTEM,
.matches = hyp_offset_low,
},
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
{
.capability = ARM64_UNMAP_KERNEL_AT_EL0,
.def_scope = SCOPE_SYSTEM,
.matches = unmap_kernel_at_el0,
},
#endif
{
/* FP/SIMD is not implemented */
.capability = ARM64_HAS_NO_FPSIMD,
......
......@@ -28,6 +28,8 @@
#include <asm/errno.h>
#include <asm/esr.h>
#include <asm/irq.h>
#include <asm/memory.h>
#include <asm/mmu.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
......@@ -69,8 +71,21 @@
#define BAD_FIQ 2
#define BAD_ERROR 3
.macro kernel_ventry label
.macro kernel_ventry, el, label, regsize = 64
.align 7
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
alternative_if ARM64_UNMAP_KERNEL_AT_EL0
.if \el == 0
.if \regsize == 64
mrs x30, tpidrro_el0
msr tpidrro_el0, xzr
.else
mov x30, xzr
.endif
.endif
alternative_else_nop_endif
#endif
sub sp, sp, #S_FRAME_SIZE
#ifdef CONFIG_VMAP_STACK
/*
......@@ -82,7 +97,7 @@
tbnz x0, #THREAD_SHIFT, 0f
sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
b \label
b el\()\el\()_\label
0:
/*
......@@ -114,7 +129,12 @@
sub sp, sp, x0
mrs x0, tpidrro_el0
#endif
b \label
b el\()\el\()_\label
.endm
.macro tramp_alias, dst, sym
mov_q \dst, TRAMP_VALIAS
add \dst, \dst, #(\sym - .entry.tramp.text)
.endm
.macro kernel_entry, el, regsize = 64
......@@ -184,8 +204,8 @@ alternative_if ARM64_HAS_PAN
alternative_else_nop_endif
.if \el != 0
mrs x21, ttbr0_el1
tst x21, #0xffff << 48 // Check for the reserved ASID
mrs x21, ttbr1_el1
tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
b.eq 1f // TTBR0 access already disabled
and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
......@@ -248,7 +268,7 @@ alternative_else_nop_endif
tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
.endif
__uaccess_ttbr0_enable x0
__uaccess_ttbr0_enable x0, x1
.if \el == 0
/*
......@@ -257,7 +277,7 @@ alternative_else_nop_endif
* Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
* corruption).
*/
post_ttbr0_update_workaround
post_ttbr_update_workaround
.endif
1:
.if \el != 0
......@@ -269,18 +289,20 @@ alternative_else_nop_endif
.if \el == 0
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
tst x22, #PSR_MODE32_BIT // native task?
b.eq 3f
#ifdef CONFIG_ARM64_ERRATUM_845719
alternative_if ARM64_WORKAROUND_845719
tbz x22, #4, 1f
#ifdef CONFIG_PID_IN_CONTEXTIDR
mrs x29, contextidr_el1
msr contextidr_el1, x29
#else
msr contextidr_el1, xzr
#endif
1:
alternative_else_nop_endif
#endif
3:
.endif
msr elr_el1, x21 // set up the return data
......@@ -302,7 +324,21 @@ alternative_else_nop_endif
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
eret // return to kernel
.if \el == 0
alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
bne 4f
msr far_el1, x30
tramp_alias x30, tramp_exit_native
br x30
4:
tramp_alias x30, tramp_exit_compat
br x30
#endif
.else
eret
.endif
.endm
.macro irq_stack_entry
......@@ -367,31 +403,31 @@ tsk .req x28 // current thread_info
.align 11
ENTRY(vectors)
kernel_ventry el1_sync_invalid // Synchronous EL1t
kernel_ventry el1_irq_invalid // IRQ EL1t
kernel_ventry el1_fiq_invalid // FIQ EL1t
kernel_ventry el1_error_invalid // Error EL1t
kernel_ventry 1, sync_invalid // Synchronous EL1t
kernel_ventry 1, irq_invalid // IRQ EL1t
kernel_ventry 1, fiq_invalid // FIQ EL1t
kernel_ventry 1, error_invalid // Error EL1t
kernel_ventry el1_sync // Synchronous EL1h
kernel_ventry el1_irq // IRQ EL1h
kernel_ventry el1_fiq_invalid // FIQ EL1h
kernel_ventry el1_error // Error EL1h
kernel_ventry 1, sync // Synchronous EL1h
kernel_ventry 1, irq // IRQ EL1h
kernel_ventry 1, fiq_invalid // FIQ EL1h
kernel_ventry 1, error // Error EL1h
kernel_ventry el0_sync // Synchronous 64-bit EL0
kernel_ventry el0_irq // IRQ 64-bit EL0
kernel_ventry el0_fiq_invalid // FIQ 64-bit EL0
kernel_ventry el0_error // Error 64-bit EL0
kernel_ventry 0, sync // Synchronous 64-bit EL0
kernel_ventry 0, irq // IRQ 64-bit EL0
kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0
kernel_ventry 0, error // Error 64-bit EL0
#ifdef CONFIG_COMPAT
kernel_ventry el0_sync_compat // Synchronous 32-bit EL0
kernel_ventry el0_irq_compat // IRQ 32-bit EL0
kernel_ventry el0_fiq_invalid_compat // FIQ 32-bit EL0
kernel_ventry el0_error_compat // Error 32-bit EL0
kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0
kernel_ventry 0, error_compat, 32 // Error 32-bit EL0
#else
kernel_ventry el0_sync_invalid // Synchronous 32-bit EL0
kernel_ventry el0_irq_invalid // IRQ 32-bit EL0
kernel_ventry el0_fiq_invalid // FIQ 32-bit EL0
kernel_ventry el0_error_invalid // Error 32-bit EL0
kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0
kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
#endif
END(vectors)
......@@ -943,6 +979,116 @@ __ni_sys_trace:
.popsection // .entry.text
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
/*
* Exception vectors trampoline.
*/
.pushsection ".entry.tramp.text", "ax"
.macro tramp_map_kernel, tmp
mrs \tmp, ttbr1_el1
sub \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
bic \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
/* ASID already in \tmp[63:48] */
movk \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12)
movk \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12)
/* 2MB boundary containing the vectors, so we nobble the walk cache */
movk \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12)
isb
tlbi vae1, \tmp
dsb nsh
alternative_else_nop_endif
#endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */
.endm
.macro tramp_unmap_kernel, tmp
mrs \tmp, ttbr1_el1
add \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
orr \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
/*
* We avoid running the post_ttbr_update_workaround here because the
* user and kernel ASIDs don't have conflicting mappings, so any
* "blessing" as described in:
*
* http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com
*
* will not hurt correctness. Whilst this may partially defeat the
* point of using split ASIDs in the first place, it avoids
* the hit of invalidating the entire I-cache on every return to
* userspace.
*/
.endm
.macro tramp_ventry, regsize = 64
.align 7
1:
.if \regsize == 64
msr tpidrro_el0, x30 // Restored in kernel_ventry
.endif
tramp_map_kernel x30
#ifdef CONFIG_RANDOMIZE_BASE
adr x30, tramp_vectors + PAGE_SIZE
alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
ldr x30, [x30]
#else
ldr x30, =vectors
#endif
prfm plil1strm, [x30, #(1b - tramp_vectors)]
msr vbar_el1, x30
add x30, x30, #(1b - tramp_vectors)
isb
br x30
.endm
.macro tramp_exit, regsize = 64
adr x30, tramp_vectors
msr vbar_el1, x30
tramp_unmap_kernel x30
.if \regsize == 64
mrs x30, far_el1
.endif
eret
.endm
.align 11
ENTRY(tramp_vectors)
.space 0x400
tramp_ventry
tramp_ventry
tramp_ventry
tramp_ventry
tramp_ventry 32
tramp_ventry 32
tramp_ventry 32
tramp_ventry 32
END(tramp_vectors)
ENTRY(tramp_exit_native)
tramp_exit
END(tramp_exit_native)
ENTRY(tramp_exit_compat)
tramp_exit 32
END(tramp_exit_compat)
.ltorg
.popsection // .entry.tramp.text
#ifdef CONFIG_RANDOMIZE_BASE
.pushsection ".rodata", "a"
.align PAGE_SHIFT
.globl __entry_tramp_data_start
__entry_tramp_data_start:
.quad vectors
.popsection // .rodata
#endif /* CONFIG_RANDOMIZE_BASE */
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
/*
* Special system call wrappers.
*/
......
......@@ -370,16 +370,14 @@ void tls_preserve_current_state(void)
static void tls_thread_switch(struct task_struct *next)
{
unsigned long tpidr, tpidrro;
tls_preserve_current_state();
tpidr = *task_user_tls(next);
tpidrro = is_compat_thread(task_thread_info(next)) ?
next->thread.tp_value : 0;
if (is_compat_thread(task_thread_info(next)))
write_sysreg(next->thread.tp_value, tpidrro_el0);
else if (!arm64_kernel_unmapped_at_el0())
write_sysreg(0, tpidrro_el0);
write_sysreg(tpidr, tpidr_el0);
write_sysreg(tpidrro, tpidrro_el0);
write_sysreg(*task_user_tls(next), tpidr_el0);
}
/* Restore the UAO state depending on next's addr_limit */
......
......@@ -57,6 +57,17 @@ jiffies = jiffies_64;
#define HIBERNATE_TEXT
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define TRAMP_TEXT \
. = ALIGN(PAGE_SIZE); \
VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \
*(.entry.tramp.text) \
. = ALIGN(PAGE_SIZE); \
VMLINUX_SYMBOL(__entry_tramp_text_end) = .;
#else
#define TRAMP_TEXT
#endif
/*
* The size of the PE/COFF section that covers the kernel image, which
* runs from stext to _edata, must be a round multiple of the PE/COFF
......@@ -113,6 +124,7 @@ SECTIONS
HYPERVISOR_TEXT
IDMAP_TEXT
HIBERNATE_TEXT
TRAMP_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
......@@ -214,6 +226,11 @@ SECTIONS
. += RESERVED_TTBR0_SIZE;
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
tramp_pg_dir = .;
. += PAGE_SIZE;
#endif
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
_end = .;
......@@ -234,7 +251,10 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
<= SZ_4K, "Hibernate exit text too big or misaligned")
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
"Entry trampoline text too big")
#endif
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
......
......@@ -30,7 +30,7 @@
* Alignment fixed up by hardware.
*/
ENTRY(__clear_user)
uaccess_enable_not_uao x2, x3
uaccess_enable_not_uao x2, x3, x4
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
......
......@@ -64,7 +64,7 @@
end .req x5
ENTRY(__arch_copy_from_user)
uaccess_enable_not_uao x3, x4
uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
uaccess_disable_not_uao x3
......
......@@ -65,7 +65,7 @@
end .req x5
ENTRY(raw_copy_in_user)
uaccess_enable_not_uao x3, x4
uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
uaccess_disable_not_uao x3
......
......@@ -63,7 +63,7 @@
end .req x5
ENTRY(__arch_copy_to_user)
uaccess_enable_not_uao x3, x4
uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
uaccess_disable_not_uao x3
......
......@@ -49,7 +49,7 @@ ENTRY(flush_icache_range)
* - end - virtual end address of region
*/
ENTRY(__flush_cache_user_range)
uaccess_ttbr0_enable x2, x3
uaccess_ttbr0_enable x2, x3, x4
dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x0, x3
......
......@@ -39,7 +39,16 @@ static cpumask_t tlb_flush_pending;
#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
#define ASID_FIRST_VERSION (1UL << asid_bits)
#define NUM_USER_ASIDS ASID_FIRST_VERSION
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1)
#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1)
#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK)
#else
#define NUM_USER_ASIDS (ASID_FIRST_VERSION)
#define asid2idx(asid) ((asid) & ~ASID_MASK)
#define idx2asid(idx) asid2idx(idx)
#endif
/* Get the ASIDBits supported by the current CPU */
static u32 get_cpu_asid_bits(void)
......@@ -79,13 +88,6 @@ void verify_cpu_asid_bits(void)
}
}
static void set_reserved_asid_bits(void)
{
if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_1003) &&
cpus_have_const_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003))
__set_bit(FALKOR_RESERVED_ASID, asid_map);
}
static void flush_context(unsigned int cpu)
{
int i;
......@@ -94,8 +96,6 @@ static void flush_context(unsigned int cpu)
/* Update the list of reserved ASIDs and the ASID bitmap. */
bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
set_reserved_asid_bits();
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/*
......@@ -107,7 +107,7 @@ static void flush_context(unsigned int cpu)
*/
if (asid == 0)
asid = per_cpu(reserved_asids, i);
__set_bit(asid & ~ASID_MASK, asid_map);
__set_bit(asid2idx(asid), asid_map);
per_cpu(reserved_asids, i) = asid;
}
......@@ -162,16 +162,16 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
* We had a valid ASID in a previous life, so try to re-use
* it if possible.
*/
asid &= ~ASID_MASK;
if (!__test_and_set_bit(asid, asid_map))
if (!__test_and_set_bit(asid2idx(asid), asid_map))
return newasid;
}
/*
* Allocate a free ASID. If we can't find one, take a note of the
* currently active ASIDs and mark the TLBs as requiring flushes.
* We always count from ASID #1, as we use ASID #0 when setting a
* reserved TTBR0 for the init_mm.
* currently active ASIDs and mark the TLBs as requiring flushes. We
* always count from ASID #2 (index 1), as we use ASID #0 when setting
* a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
* pairs.
*/
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
if (asid != NUM_USER_ASIDS)
......@@ -188,7 +188,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
set_asid:
__set_bit(asid, asid_map);
cur_idx = asid;
return asid | generation;
return idx2asid(asid) | generation;
}
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
......@@ -254,8 +254,6 @@ static int asids_init(void)
panic("Failed to allocate bitmap for %lu ASIDs\n",
NUM_USER_ASIDS);
set_reserved_asid_bits();
pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
return 0;
}
......
......@@ -525,6 +525,37 @@ static int __init parse_rodata(char *arg)
}
early_param("rodata", parse_rodata);
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
static int __init map_entry_trampoline(void)
{
extern char __entry_tramp_text_start[];
pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
/* The trampoline is always mapped and can therefore be global */
pgprot_val(prot) &= ~PTE_NG;
/* Map only the text into the trampoline page table */
memset(tramp_pg_dir, 0, PGD_SIZE);
__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
prot, pgd_pgtable_alloc, 0);
/* Map both the text and data into the kernel page table */
__set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
extern char __entry_tramp_data_start[];
__set_fixmap(FIX_ENTRY_TRAMP_DATA,
__pa_symbol(__entry_tramp_data_start),
PAGE_KERNEL_RO);
}
return 0;
}
core_initcall(map_entry_trampoline);
#endif
/*
* Create fine-grained mappings for the kernel.
*/
......
......@@ -138,12 +138,14 @@ ENDPROC(cpu_do_resume)
* - pgd_phys - physical address of new TTB
*/
ENTRY(cpu_do_switch_mm)
pre_ttbr0_update_workaround x0, x2, x3
mrs x2, ttbr1_el1
mmid x1, x1 // get mm->context.id
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
bfi x2, x1, #48, #16 // set the ASID
msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set)
isb
post_ttbr0_update_workaround
msr ttbr0_el1, x0 // now update TTBR0
isb
post_ttbr_update_workaround
ret
ENDPROC(cpu_do_switch_mm)
......@@ -224,7 +226,7 @@ ENTRY(__cpu_setup)
* both user and kernel.
*/
ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1
tcr_set_idmap_t0sz x10, x9
/*
......
......@@ -101,7 +101,7 @@ ENTRY(privcmd_call)
* need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
* is enabled (it implies that hardware UAO and PAN disabled).
*/
uaccess_ttbr0_enable x6, x7
uaccess_ttbr0_enable x6, x7, x8
hvc XEN_IMM
/*
......
......@@ -1164,6 +1164,15 @@ static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev)
struct arm_spe_pmu *spe_pmu;
struct device *dev = &pdev->dev;
/*
* If kernelspace is unmapped when running at EL0, then the SPE
* buffer will fault and prematurely terminate the AUX session.
*/
if (arm64_kernel_unmapped_at_el0()) {
dev_warn_once(dev, "profiling buffer inaccessible. Try passing \"kpti=off\" on the kernel command line\n");
return -EPERM;
}
spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL);
if (!spe_pmu) {
dev_err(dev, "failed to allocate spe_pmu\n");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册