diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S index 47a5003c2e286650be7f633bf7b9c5a2250434af..62d94696a19c71366d287b7ac286606b43e8bee7 100644 --- a/arch/riscv/boot/loader.lds.S +++ b/arch/riscv/boot/loader.lds.S @@ -1,13 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include OUTPUT_ARCH(riscv) ENTRY(_start) SECTIONS { - . = PAGE_OFFSET; + . = KERNEL_LINK_ADDR; .payload : { *(.payload) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index adc9d26f3d752dd02e434f0be04958a871459a0c..f64b61296c0ce27f2ac8743c55ce4bff66b5ef98 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -90,15 +90,37 @@ typedef struct page *pgtable_t; #ifdef CONFIG_MMU extern unsigned long va_pa_offset; +#ifdef CONFIG_64BIT +extern unsigned long va_kernel_pa_offset; +#endif extern unsigned long pfn_base; #define ARCH_PFN_OFFSET (pfn_base) #else #define va_pa_offset 0 +#ifdef CONFIG_64BIT +#define va_kernel_pa_offset 0 +#endif #define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) #endif /* CONFIG_MMU */ -#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset)) -#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset) +#ifdef CONFIG_64BIT +extern unsigned long kernel_virt_addr; + +#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_pa_offset)) +#define kernel_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_kernel_pa_offset)) +#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) + +#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset) +#define kernel_mapping_va_to_pa(x) ((unsigned long)(x) - va_kernel_pa_offset) +#define __va_to_pa_nodebug(x) ({ \ + unsigned long _x = x; \ + (_x < kernel_virt_addr) ? \ + linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x); \ + }) +#else +#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset)) +#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset) +#endif #ifdef CONFIG_DEBUG_VIRTUAL extern phys_addr_t __virt_to_phys(unsigned long x); diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index ebf817c1bdf4b3d955875670494d1828bd042d26..5afda75cc2c3379362b972e3aaea2ef9890cf5ac 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -11,23 +11,38 @@ #include -#ifndef __ASSEMBLY__ +#ifndef CONFIG_MMU +#define KERNEL_LINK_ADDR PAGE_OFFSET +#else -/* Page Upper Directory not used in RISC-V */ -#include -#include -#include -#include +#define ADDRESS_SPACE_END (UL(-1)) -#ifdef CONFIG_MMU +#ifdef CONFIG_64BIT +/* Leave 2GB for kernel and BPF at the end of the address space */ +#define KERNEL_LINK_ADDR (ADDRESS_SPACE_END - SZ_2G + 1) +#else +#define KERNEL_LINK_ADDR PAGE_OFFSET +#endif #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) #define BPF_JIT_REGION_SIZE (SZ_128M) +#ifdef CONFIG_64BIT +/* KASLR should leave at least 128MB for BPF after the kernel */ +#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end) +#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) +#else #define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) #define BPF_JIT_REGION_END (VMALLOC_END) +#endif + +/* Modules always live before the kernel */ +#ifdef CONFIG_64BIT +#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) +#define MODULES_END (PFN_ALIGN((unsigned long)&_start)) +#endif /* * Roughly size the vmemmap space to be large enough to fit enough @@ -57,9 +72,16 @@ #define FIXADDR_SIZE PGDIR_SIZE #endif #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - #endif +#ifndef __ASSEMBLY__ + +/* Page Upper Directory not used in RISC-V */ +#include +#include +#include +#include + #ifdef CONFIG_64BIT #include #else @@ -484,6 +506,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define kern_addr_valid(addr) (1) /* FIXME */ +extern char _start[]; extern void *dtb_early_va; extern uintptr_t dtb_early_pa; void setup_bootmem(void); diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index 6887b3d9f3712bb5f4558b5c40b91b9559ad7335..a9c56776fa0e74d614e0ddcc40291c7c6ad80850 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -17,6 +17,7 @@ int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); int set_memory_rw_nx(unsigned long addr, int numpages); void protect_kernel_text_data(void); +void protect_kernel_linear_mapping_text_rodata(void); #else static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index f5a9bad86e5831890441e9c7c216d406664b2540..6cb05f22e52aaf97b844b4f409e1f8d98abe5614 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -69,7 +69,8 @@ pe_head_start: #ifdef CONFIG_MMU relocate: /* Relocate return address */ - li a1, PAGE_OFFSET + la a1, kernel_virt_addr + REG_L a1, 0(a1) la a2, _start sub a1, a1, a2 add ra, ra, a1 diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 104fba889cf767cb159840954de99f985f7f0929..ce153771e5e9b6591abdd52b91c2902e035ca572 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -408,12 +408,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } #if defined(CONFIG_MMU) && defined(CONFIG_64BIT) -#define VMALLOC_MODULE_START \ - max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START) void *module_alloc(unsigned long size) { - return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START, - VMALLOC_END, GFP_KERNEL, + return __vmalloc_node_range(size, 1, MODULES_VADDR, + MODULES_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index e85bacff1b5075ee3b704f7a65e40d102d0a9de3..d208abc0b47322ed9260d49517b3a8b903c07c90 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -263,8 +263,13 @@ void __init setup_arch(char **cmdline_p) sbi_init(); - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { protect_kernel_text_data(); +#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) + protect_kernel_linear_mapping_text_rodata(); +#endif + } + #ifdef CONFIG_SWIOTLB swiotlb_init(1); #endif diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 7e61bc1dc36e2ea10b833566d8ebccb5586fbb91..56677137c85b118bd0641e2b13f0914fec5cd63e 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -4,7 +4,8 @@ * Copyright (C) 2017 SiFive */ -#define LOAD_OFFSET PAGE_OFFSET +#include +#define LOAD_OFFSET KERNEL_LINK_ADDR #include #include #include diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 8f17519208c756b7a08bf089ec53004c8a5b0c89..1b14d523a95c3358af325ade921e12107836b0a1 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -231,6 +231,19 @@ asmlinkage void do_page_fault(struct pt_regs *regs) return; } +#ifdef CONFIG_64BIT + /* + * Modules in 64bit kernels lie in their own virtual region which is not + * in the vmalloc region, but dealing with page faults in this region + * or the vmalloc region amounts to doing the same thing: checking that + * the mapping exists in init_mm.pgd and updating user page table, so + * just use vmalloc_fault. + */ + if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) { + vmalloc_fault(regs, code, addr); + return; + } +#endif /* Enable interrupts if they were enabled in the parent context. */ if (likely(regs->status & SR_PIE)) local_irq_enable(); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 7f5036fbee8c522fbfbd8e63a02fac2662174140..dc9b988e0778a0594031a517487facbf804ee114 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -25,6 +25,9 @@ #include "../kernel/head.h" +unsigned long kernel_virt_addr = KERNEL_LINK_ADDR; +EXPORT_SYMBOL(kernel_virt_addr); + unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -88,6 +91,10 @@ static void print_vm_layout(void) (unsigned long)VMALLOC_END); print_mlm("lowmem", (unsigned long)PAGE_OFFSET, (unsigned long)high_memory); +#ifdef CONFIG_64BIT + print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, + (unsigned long)ADDRESS_SPACE_END); +#endif } #else static void print_vm_layout(void) { } @@ -116,8 +123,13 @@ void __init setup_bootmem(void) /* The maximal physical memory size is -PAGE_OFFSET. */ memblock_enforce_memory_limit(-PAGE_OFFSET); - /* Reserve from the start of the kernel to the end of the kernel */ - memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); + /* + * Reserve from the start of the kernel to the end of the kernel + * and make sure we align the reservation on PMD_SIZE since we will + * map the kernel in the linear mapping as read-only: we do not want + * any allocation to happen between _end and the next pmd aligned page. + */ + memblock_reserve(vmlinux_start, (vmlinux_end - vmlinux_start + PMD_SIZE - 1) & PMD_MASK); /* * memblock allocator is not aware of the fact that last 4K bytes of @@ -152,8 +164,14 @@ void __init setup_bootmem(void) #ifdef CONFIG_MMU static struct pt_alloc_ops pt_ops; +/* Offset between linear mapping virtual address and kernel load address */ unsigned long va_pa_offset; EXPORT_SYMBOL(va_pa_offset); +#ifdef CONFIG_64BIT +/* Offset between kernel mapping virtual address and kernel load address */ +unsigned long va_kernel_pa_offset; +EXPORT_SYMBOL(va_kernel_pa_offset); +#endif unsigned long pfn_base; EXPORT_SYMBOL(pfn_base); @@ -257,7 +275,7 @@ static pmd_t *get_pmd_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_pmd_early(uintptr_t va) { - BUG_ON((va - PAGE_OFFSET) >> PGDIR_SHIFT); + BUG_ON((va - kernel_virt_addr) >> PGDIR_SHIFT); return (uintptr_t)early_pmd; } @@ -372,17 +390,34 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." #endif +uintptr_t load_pa, load_sz; + +static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size) +{ + uintptr_t va, end_va; + + end_va = kernel_virt_addr + load_sz; + for (va = kernel_virt_addr; va < end_va; va += map_size) + create_pgd_mapping(pgdir, va, + load_pa + (va - kernel_virt_addr), + map_size, PAGE_KERNEL_EXEC); +} + asmlinkage void __init setup_vm(uintptr_t dtb_pa) { - uintptr_t va, pa, end_va; - uintptr_t load_pa = (uintptr_t)(&_start); - uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; + uintptr_t pa; uintptr_t map_size; #ifndef __PAGETABLE_PMD_FOLDED pmd_t fix_bmap_spmd, fix_bmap_epmd; #endif + load_pa = (uintptr_t)(&_start); + load_sz = (uintptr_t)(&_end) - load_pa; va_pa_offset = PAGE_OFFSET - load_pa; +#ifdef CONFIG_64BIT + va_kernel_pa_offset = kernel_virt_addr - load_pa; +#endif + pfn_base = PFN_DOWN(load_pa); /* @@ -410,26 +445,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_pmd_mapping(fixmap_pmd, FIXADDR_START, (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); /* Setup trampoline PGD and PMD */ - create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET, + create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr, (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); - create_pmd_mapping(trampoline_pmd, PAGE_OFFSET, + create_pmd_mapping(trampoline_pmd, kernel_virt_addr, load_pa, PMD_SIZE, PAGE_KERNEL_EXEC); #else /* Setup trampoline PGD */ - create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET, + create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr, load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC); #endif /* - * Setup early PGD covering entire kernel which will allows + * Setup early PGD covering entire kernel which will allow * us to reach paging_init(). We map all memory banks later * in setup_vm_final() below. */ - end_va = PAGE_OFFSET + load_sz; - for (va = PAGE_OFFSET; va < end_va; va += map_size) - create_pgd_mapping(early_pg_dir, va, - load_pa + (va - PAGE_OFFSET), - map_size, PAGE_KERNEL_EXEC); + create_kernel_page_table(early_pg_dir, map_size); #ifndef __PAGETABLE_PMD_FOLDED /* Setup early PMD for DTB */ @@ -444,7 +475,16 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); #else /* CONFIG_BUILTIN_DTB */ +#ifdef CONFIG_64BIT + /* + * __va can't be used since it would return a linear mapping address + * whereas dtb_early_va will be used before setup_vm_final installs + * the linear mapping. + */ + dtb_early_va = kernel_mapping_pa_to_va(dtb_pa); +#else dtb_early_va = __va(dtb_pa); +#endif /* CONFIG_64BIT */ #endif /* CONFIG_BUILTIN_DTB */ #else #ifndef CONFIG_BUILTIN_DTB @@ -456,7 +496,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); #else /* CONFIG_BUILTIN_DTB */ +#ifdef CONFIG_64BIT + dtb_early_va = kernel_mapping_pa_to_va(dtb_pa); +#else dtb_early_va = __va(dtb_pa); +#endif /* CONFIG_64BIT */ #endif /* CONFIG_BUILTIN_DTB */ #endif dtb_early_pa = dtb_pa; @@ -492,6 +536,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) #endif } +#ifdef CONFIG_64BIT +void protect_kernel_linear_mapping_text_rodata(void) +{ + unsigned long text_start = (unsigned long)lm_alias(_start); + unsigned long init_text_start = (unsigned long)lm_alias(__init_text_begin); + unsigned long rodata_start = (unsigned long)lm_alias(__start_rodata); + unsigned long data_start = (unsigned long)lm_alias(_data); + + set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT); + set_memory_nx(text_start, (init_text_start - text_start) >> PAGE_SHIFT); + + set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); + set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); +} +#endif + static void __init setup_vm_final(void) { uintptr_t va, map_size; @@ -513,7 +573,7 @@ static void __init setup_vm_final(void) __pa_symbol(fixmap_pgd_next), PGDIR_SIZE, PAGE_TABLE); - /* Map all memory banks */ + /* Map all memory banks in the linear mapping */ for_each_mem_range(i, &start, &end) { if (start >= end) break; @@ -525,10 +585,22 @@ static void __init setup_vm_final(void) for (pa = start; pa < end; pa += map_size) { va = (uintptr_t)__va(pa); create_pgd_mapping(swapper_pg_dir, va, pa, - map_size, PAGE_KERNEL_EXEC); + map_size, +#ifdef CONFIG_64BIT + PAGE_KERNEL +#else + PAGE_KERNEL_EXEC +#endif + ); + } } +#ifdef CONFIG_64BIT + /* Map the kernel */ + create_kernel_page_table(swapper_pg_dir, PMD_SIZE); +#endif + /* Clear fixmap PTE and PMD mappings */ clear_fixmap(FIX_PTE); clear_fixmap(FIX_PMD); diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 2c39f0386673e371a738428a73dea9ddfccb9e6b..28f4d52cf17e48df32993538cd0fc70677316b01 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -171,6 +171,10 @@ void __init kasan_init(void) phys_addr_t _start, _end; u64 i; + /* + * Populate all kernel virtual address space with kasan_early_shadow_page + * except for the linear mapping and the modules/kernel/BPF mapping. + */ kasan_populate_early_shadow((void *)KASAN_SHADOW_START, (void *)kasan_mem_to_shadow((void *) VMEMMAP_END)); @@ -183,6 +187,7 @@ void __init kasan_init(void) (void *)kasan_mem_to_shadow((void *)VMALLOC_START), (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); + /* Populate the linear mapping */ for_each_mem_range(i, &_start, &_end) { void *start = (void *)__va(_start); void *end = (void *)__va(_end); @@ -193,6 +198,10 @@ void __init kasan_init(void) kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end)); }; + /* Populate kernel, BPF, modules mapping */ + kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR), + kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END)); + for (i = 0; i < PTRS_PER_PTE; i++) set_pte(&kasan_early_shadow_pte[i], mk_pte(virt_to_page(kasan_early_shadow_page), diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index e8e4dcd39fede8d9726ae01892d3f71cf18d5173..35703d5ef5fd66605a5349c4fa39c7b566e19de8 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -23,7 +23,7 @@ EXPORT_SYMBOL(__virt_to_phys); phys_addr_t __phys_addr_symbol(unsigned long x) { - unsigned long kernel_start = (unsigned long)PAGE_OFFSET; + unsigned long kernel_start = (unsigned long)kernel_virt_addr; unsigned long kernel_end = (unsigned long)_end; /*