diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fef3dc6e21b9f5293543161ed4bf0599c1cf19d7..968fa13cc25b3aa4873810429e72e704c78d3524 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -798,6 +798,35 @@ config RELOCATABLE relocation pass at runtime even if the kernel is loaded at the same address it was linked at. +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + select ARM64_MODULE_PLTS + select RELOCATABLE + help + Randomizes the virtual address at which the kernel image is + loaded, as a security feature that deters exploit attempts + relying on knowledge of the location of kernel internals. + + It is the bootloader's job to provide entropy, by passing a + random u64 value in /chosen/kaslr-seed at kernel entry. + + If unsure, say N. + +config RANDOMIZE_MODULE_REGION_FULL + bool "Randomize the module region independently from the core kernel" + depends on RANDOMIZE_BASE + default y + help + Randomizes the location of the module region without considering the + location of the core kernel. This way, it is impossible for modules + to leak information about the location of core kernel data structures + but it does imply that function calls between modules and the core + kernel will need to be resolved via veneers in the module PLT. + + When this option is not set, the module region will be randomized over + a limited range that contains the [_stext, _etext] interval of the + core kernel, so branch relocations are always in range. + endmenu menu "Boot options" diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index eb798156cf56ad03e11b23e92b3a3bcaa1f7f99d..5f8667a99e41133384b980c89db37c77cc19abae 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -53,7 +53,7 @@ #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) -#define MODULES_VSIZE (SZ_64M) +#define MODULES_VSIZE (SZ_128M) #define PCI_IO_END (PAGE_OFFSET - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) @@ -139,6 +139,9 @@ extern phys_addr_t memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) +/* the virtual base of the kernel image (minus TEXT_OFFSET) */ +extern u64 kimage_vaddr; + /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 8652fb6133045a2d2bbd333e93344b5274efe15d..e12af6754634b3d2aa031ae23ce25228dc766cfb 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -31,4 +31,10 @@ struct mod_arch_specific { u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, Elf64_Sym *sym); +#ifdef CONFIG_RANDOMIZE_BASE +extern u64 module_alloc_base; +#else +#define module_alloc_base ((u64)_etext - MODULES_VSIZE) +#endif + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 9ca2a48ba326bdbce03366f8c248570d6ad38f1c..3793003e16a2467c4b2bcc710e3e776c895ab03d 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -44,6 +44,7 @@ arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o +arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4e69412a7323b54b45217b2719e2e3cf761567af..319f896c6e744a3ee47f4fede17a75bee4d7d9e2 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -210,6 +210,7 @@ section_table: ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode + mov x23, xzr // KASLR offset, defaults to 0 adrp x24, __PHYS_OFFSET bl set_cpu_boot_mode_flag bl __create_page_tables // x25=TTBR0, x26=TTBR1 @@ -313,7 +314,7 @@ ENDPROC(preserve_boot_args) __create_page_tables: adrp x25, idmap_pg_dir adrp x26, swapper_pg_dir - mov x27, lr + mov x28, lr /* * Invalidate the idmap and swapper page tables to avoid potential @@ -392,6 +393,7 @@ __create_page_tables: */ mov x0, x26 // swapper_pg_dir ldr x5, =KIMAGE_VADDR + add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 ldr w6, kernel_img_size add x6, x6, x5 @@ -408,8 +410,7 @@ __create_page_tables: dmb sy bl __inval_cache_range - mov lr, x27 - ret + ret x28 ENDPROC(__create_page_tables) kernel_img_size: @@ -421,6 +422,7 @@ kernel_img_size: */ .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: + mov x28, lr // preserve LR adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb @@ -449,19 +451,26 @@ __mmap_switched: ldr x13, [x9, #-8] cmp w12, #R_AARCH64_RELATIVE b.ne 1f - str x13, [x11] + add x13, x13, x23 // relocate + str x13, [x11, x23] b 0b 1: cmp w12, #R_AARCH64_ABS64 b.ne 0b add x12, x12, x12, lsl #1 // symtab offset: 24x top word add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word + ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx ldr x15, [x12, #8] // Elf64_Sym::st_value + cmp w14, #-0xf // SHN_ABS (0xfff1) ? + add x14, x15, x23 // relocate + csel x15, x14, x15, ne add x15, x13, x15 - str x15, [x11] + str x15, [x11, x23] b 0b -2: +2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr + dc cvac, x8 // value visible to secondaries + dsb sy // with MMU off #endif adr_l sp, initial_sp, x4 @@ -470,13 +479,23 @@ __mmap_switched: msr sp_el0, x4 // Save thread_info str_l x21, __fdt_pointer, x5 // Save FDT pointer - ldr x4, =KIMAGE_VADDR // Save the offset between + ldr_l x4, kimage_vaddr // Save the offset between sub x4, x4, x24 // the kernel virtual and str_l x4, kimage_voffset, x5 // physical mappings mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init +#endif +#ifdef CONFIG_RANDOMIZE_BASE + cbnz x23, 0f // already running randomized? + mov x0, x21 // pass FDT address in x0 + bl kaslr_early_init // parse FDT for KASLR options + cbz x0, 0f // KASLR disabled? just proceed + mov x23, x0 // record KASLR offset + ret x28 // we must enable KASLR, return + // to __enable_mmu() +0: #endif b start_kernel ENDPROC(__mmap_switched) @@ -486,6 +505,10 @@ ENDPROC(__mmap_switched) * hotplug and needs to have the same protections as the text region */ .section ".text","ax" + +ENTRY(kimage_vaddr) + .quad _text - TEXT_OFFSET + /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. @@ -651,7 +674,7 @@ ENTRY(secondary_startup) adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - ldr x8, =KIMAGE_VADDR + ldr x8, kimage_vaddr ldr w9, 0f sub x27, x8, w9, sxtw // address to jump to after enabling the MMU b __enable_mmu @@ -684,6 +707,7 @@ ENDPROC(__secondary_switched) */ .section ".idmap.text", "ax" __enable_mmu: + mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED @@ -701,6 +725,25 @@ __enable_mmu: ic iallu dsb nsh isb +#ifdef CONFIG_RANDOMIZE_BASE + mov x19, x0 // preserve new SCTLR_EL1 value + blr x27 + + /* + * If we return here, we have a KASLR displacement in x23 which we need + * to take into account by discarding the current kernel mapping and + * creating a new one. + */ + msr sctlr_el1, x18 // disable the MMU + isb + bl __create_page_tables // recreate kernel mapping + + msr sctlr_el1, x19 // re-enable the MMU + isb + ic ialluis // flush instructions fetched + isb // via old mapping + add x27, x27, x23 // relocated __mmap_switched +#endif br x27 ENDPROC(__enable_mmu) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c new file mode 100644 index 0000000000000000000000000000000000000000..8b32a1f8f09f3f7fccc04f6f1ae7463d5fe109a1 --- /dev/null +++ b/arch/arm64/kernel/kaslr.c @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2016 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +u64 __read_mostly module_alloc_base; + +static __init u64 get_kaslr_seed(void *fdt) +{ + int node, len; + u64 *prop; + u64 ret; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return 0; + + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); + if (!prop || len != sizeof(u64)) + return 0; + + ret = fdt64_to_cpu(*prop); + *prop = 0; + return ret; +} + +static __init const u8 *get_cmdline(void *fdt) +{ + static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE; + + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + int node; + const u8 *prop; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + goto out; + + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + goto out; + return prop; + } +out: + return default_cmdline; +} + +extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, + pgprot_t prot); + +/* + * This routine will be executed with the kernel mapped at its default virtual + * address, and if it returns successfully, the kernel will be remapped, and + * start_kernel() will be executed from a randomized virtual offset. The + * relocation will result in all absolute references (e.g., static variables + * containing function pointers) to be reinitialized, and zero-initialized + * .bss variables will be reset to 0. + */ +u64 __init kaslr_early_init(u64 dt_phys) +{ + void *fdt; + u64 seed, offset, mask, module_range; + const u8 *cmdline, *str; + int size; + + /* + * Set a reasonable default for module_alloc_base in case + * we end up running with module randomization disabled. + */ + module_alloc_base = (u64)_etext - MODULES_VSIZE; + + /* + * Try to map the FDT early. If this fails, we simply bail, + * and proceed with KASLR disabled. We will make another + * attempt at mapping the FDT in setup_machine() + */ + early_fixmap_init(); + fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); + if (!fdt) + return 0; + + /* + * Retrieve (and wipe) the seed from the FDT + */ + seed = get_kaslr_seed(fdt); + if (!seed) + return 0; + + /* + * Check if 'nokaslr' appears on the command line, and + * return 0 if that is the case. + */ + cmdline = get_cmdline(fdt); + str = strstr(cmdline, "nokaslr"); + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) + return 0; + + /* + * OK, so we are proceeding with KASLR enabled. Calculate a suitable + * kernel image offset from the seed. Let's place the kernel in the + * lower half of the VMALLOC area (VA_BITS - 2). + * Even if we could randomize at page granularity for 16k and 64k pages, + * let's always round to 2 MB so we don't interfere with the ability to + * map using contiguous PTEs + */ + mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); + offset = seed & mask; + + /* + * The kernel Image should not extend across a 1GB/32MB/512MB alignment + * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this + * happens, increase the KASLR offset by the size of the kernel image. + */ + if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != + (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) + offset = (offset + (u64)(_end - _text)) & mask; + + if (IS_ENABLED(CONFIG_KASAN)) + /* + * KASAN does not expect the module region to intersect the + * vmalloc region, since shadow memory is allocated for each + * module at load time, whereas the vmalloc region is shadowed + * by KASAN zero pages. So keep modules out of the vmalloc + * region if KASAN is enabled. + */ + return offset; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + /* + * Randomize the module region independently from the core + * kernel. This prevents modules from leaking any information + * about the address of the kernel itself, but results in + * branches between modules and the core kernel that are + * resolved via PLTs. (Branches between modules will be + * resolved normally.) + */ + module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE; + module_alloc_base = VMALLOC_START; + } else { + /* + * Randomize the module region by setting module_alloc_base to + * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, + * _stext) . This guarantees that the resulting region still + * covers [_stext, _etext], and that all relative branches can + * be resolved without veneers. + */ + module_range = MODULES_VSIZE - (u64)(_etext - _stext); + module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; + } + + /* use the lower 21 bits to randomize the base of the module region */ + module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; + module_alloc_base &= PAGE_MASK; + + return offset; +} diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index a9dde97f5ca58a6b6ccb248c3b7ff0cfe43d3556..7f316982ce00186262728518f3a03f7871fb7dd7 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -34,7 +34,8 @@ void *module_alloc(unsigned long size) { void *p; - p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, + module_alloc_base + MODULES_VSIZE, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index cfed56f0ad26d64714b137f124ac8112b12331b1..42371f69def3a3afc68a7f1a28d0e819e83c7837 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -388,3 +388,32 @@ static int __init topology_init(void) return 0; } subsys_initcall(topology_init); + +/* + * Dump out kernel offset information on panic. + */ +static int dump_kernel_offset(struct notifier_block *self, unsigned long v, + void *p) +{ + u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR; + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) { + pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n", + kaslr_offset, KIMAGE_VADDR); + } else { + pr_emerg("Kernel Offset: disabled\n"); + } + return 0; +} + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 7f10cc91fa8a442ca485ba22d40c02296574ef78..56e19d150c21b98ab3e59ec83f378d1f33b2a119 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -129,12 +129,16 @@ static void __init clear_pgds(unsigned long start, void __init kasan_init(void) { u64 kimg_shadow_start, kimg_shadow_end; + u64 mod_shadow_start, mod_shadow_end; struct memblock_region *reg; int i; kimg_shadow_start = (u64)kasan_mem_to_shadow(_text); kimg_shadow_end = (u64)kasan_mem_to_shadow(_end); + mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); + mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); + /* * We are going to perform proper setup of shadow memory. * At first we should unmap early shadow (clear_pgds() call bellow). @@ -158,13 +162,20 @@ void __init kasan_init(void) * with PMD table mappings at the edges of the shadow region for the * kernel image. */ - if (ARM64_SWAPPER_USES_SECTION_MAPS) + if (ARM64_SWAPPER_USES_SECTION_MAPS) { + kimg_shadow_start = round_down(kimg_shadow_start, + SWAPPER_BLOCK_SIZE); kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE); + } kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, - kasan_mem_to_shadow((void *)MODULES_VADDR)); + (void *)mod_shadow_start); kasan_populate_zero_shadow((void *)kimg_shadow_end, - kasan_mem_to_shadow((void *)PAGE_OFFSET)); + kasan_mem_to_shadow((void *)PAGE_OFFSET)); + + if (kimg_shadow_start > mod_shadow_end) + kasan_populate_zero_shadow((void *)mod_shadow_end, + (void *)kimg_shadow_start); for_each_memblock(memory, reg) { void *start = (void *)__phys_to_virt(reg->base); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 32c4a378e4d65dacf0a4ffb564c830ccc13a90c9..00d166465ff46930323c35002b88ec3581f2bc60 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -678,7 +678,8 @@ void __init early_fixmap_init(void) unsigned long addr = FIXADDR_START; pgd = pgd_offset_k(addr); - if (CONFIG_PGTABLE_LEVELS > 3 && !pgd_none(*pgd)) { + if (CONFIG_PGTABLE_LEVELS > 3 && + !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) { /* * We only end up here if the kernel mapping and the fixmap * share the top level pgd entry, which should only happen on @@ -735,11 +736,10 @@ void __set_fixmap(enum fixed_addresses idx, } } -void *__init fixmap_remap_fdt(phys_addr_t dt_phys) +void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); - pgprot_t prot = PAGE_KERNEL_RO; - int size, offset; + int offset; void *dt_virt; /* @@ -778,16 +778,27 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) if (fdt_check_header(dt_virt) != 0) return NULL; - size = fdt_totalsize(dt_virt); - if (size > MAX_FDT_SIZE) + *size = fdt_totalsize(dt_virt); + if (*size > MAX_FDT_SIZE) return NULL; - if (offset + size > SWAPPER_BLOCK_SIZE) + if (offset + *size > SWAPPER_BLOCK_SIZE) create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, - round_up(offset + size, SWAPPER_BLOCK_SIZE), prot); + round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot); - memblock_reserve(dt_phys, size); + return dt_virt; +} +void *__init fixmap_remap_fdt(phys_addr_t dt_phys) +{ + void *dt_virt; + int size; + + dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); + if (!dt_virt) + return NULL; + + memblock_reserve(dt_phys, size); return dt_virt; }