init.c 36.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
P
Palmer Dabbelt 已提交
2 3
/*
 * Copyright (C) 2012 Regents of the University of California
4
 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
N
Nick Kossifidis 已提交
5 6
 * Copyright (C) 2020 FORTH-ICS/CARV
 *  Nick Kossifidis <mick@ics.forth.gr>
P
Palmer Dabbelt 已提交
7 8 9 10 11
 */

#include <linux/init.h>
#include <linux/mm.h>
#include <linux/memblock.h>
M
Mike Rapoport 已提交
12
#include <linux/initrd.h>
P
Palmer Dabbelt 已提交
13
#include <linux/swap.h>
14
#include <linux/swiotlb.h>
C
Christoph Hellwig 已提交
15
#include <linux/sizes.h>
16
#include <linux/of_fdt.h>
17
#include <linux/of_reserved_mem.h>
18
#include <linux/libfdt.h>
Z
Zong Li 已提交
19
#include <linux/set_memory.h>
K
Kefeng Wang 已提交
20
#include <linux/dma-map-ops.h>
N
Nick Kossifidis 已提交
21
#include <linux/crash_dump.h>
22
#include <linux/hugetlb.h>
P
Palmer Dabbelt 已提交
23

24
#include <asm/fixmap.h>
P
Palmer Dabbelt 已提交
25 26
#include <asm/tlbflush.h>
#include <asm/sections.h>
27
#include <asm/soc.h>
P
Palmer Dabbelt 已提交
28
#include <asm/io.h>
Z
Zong Li 已提交
29
#include <asm/ptdump.h>
30
#include <asm/numa.h>
P
Palmer Dabbelt 已提交
31

32 33
#include "../kernel/head.h"

34 35 36 37 38 39
struct kernel_mapping kernel_map __ro_after_init;
EXPORT_SYMBOL(kernel_map);
#ifdef CONFIG_XIP_KERNEL
#define kernel_map	(*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
#endif

A
Alexandre Ghiti 已提交
40
#ifdef CONFIG_64BIT
41
u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
A
Alexandre Ghiti 已提交
42
#else
43
u64 satp_mode __ro_after_init = SATP_MODE_32;
A
Alexandre Ghiti 已提交
44 45 46
#endif
EXPORT_SYMBOL(satp_mode);

47
bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
Q
Qinglin Pan 已提交
48
bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
A
Alexandre Ghiti 已提交
49
EXPORT_SYMBOL(pgtable_l4_enabled);
50
EXPORT_SYMBOL(pgtable_l5_enabled);
A
Alexandre Ghiti 已提交
51

52 53 54
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);

55 56 57 58
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
							__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);

59
extern char _start[];
A
Anup Patel 已提交
60
#define DTB_EARLY_BASE_VA      PGDIR_SIZE
V
Vitaly Wool 已提交
61 62
void *_dtb_early_va __initdata;
uintptr_t _dtb_early_pa __initdata;
63

64
static phys_addr_t dma32_phys_limit __initdata;
K
Kefeng Wang 已提交
65

P
Palmer Dabbelt 已提交
66 67
static void __init zone_sizes_init(void)
{
C
Christoph Hellwig 已提交
68
	unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
P
Palmer Dabbelt 已提交
69

70
#ifdef CONFIG_ZONE_DMA32
K
Kefeng Wang 已提交
71
	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
72
#endif
C
Christoph Hellwig 已提交
73 74
	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;

75
	free_area_init(max_zone_pfns);
P
Palmer Dabbelt 已提交
76 77
}

78
#if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM)
79 80 81 82 83 84

#define LOG2_SZ_1K  ilog2(SZ_1K)
#define LOG2_SZ_1M  ilog2(SZ_1M)
#define LOG2_SZ_1G  ilog2(SZ_1G)
#define LOG2_SZ_1T  ilog2(SZ_1T)

Y
Yash Shah 已提交
85 86 87
static inline void print_mlk(char *name, unsigned long b, unsigned long t)
{
	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld kB)\n", name, b, t,
88
		  (((t) - (b)) >> LOG2_SZ_1K));
Y
Yash Shah 已提交
89 90 91 92 93
}

static inline void print_mlm(char *name, unsigned long b, unsigned long t)
{
	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld MB)\n", name, b, t,
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
		  (((t) - (b)) >> LOG2_SZ_1M));
}

static inline void print_mlg(char *name, unsigned long b, unsigned long t)
{
	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld GB)\n", name, b, t,
		   (((t) - (b)) >> LOG2_SZ_1G));
}

#ifdef CONFIG_64BIT
static inline void print_mlt(char *name, unsigned long b, unsigned long t)
{
	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld TB)\n", name, b, t,
		   (((t) - (b)) >> LOG2_SZ_1T));
}
#else
#define print_mlt(n, b, t) do {} while (0)
#endif

static inline void print_ml(char *name, unsigned long b, unsigned long t)
{
	unsigned long diff = t - b;

	if (IS_ENABLED(CONFIG_64BIT) && (diff >> LOG2_SZ_1T) >= 10)
		print_mlt(name, b, t);
	else if ((diff >> LOG2_SZ_1G) >= 10)
		print_mlg(name, b, t);
	else if ((diff >> LOG2_SZ_1M) >= 10)
		print_mlm(name, b, t);
	else
		print_mlk(name, b, t);
Y
Yash Shah 已提交
125 126
}

127
static void __init print_vm_layout(void)
Y
Yash Shah 已提交
128 129
{
	pr_notice("Virtual kernel memory layout:\n");
130 131 132 133 134 135 136 137
	print_ml("fixmap", (unsigned long)FIXADDR_START,
		(unsigned long)FIXADDR_TOP);
	print_ml("pci io", (unsigned long)PCI_IO_START,
		(unsigned long)PCI_IO_END);
	print_ml("vmemmap", (unsigned long)VMEMMAP_START,
		(unsigned long)VMEMMAP_END);
	print_ml("vmalloc", (unsigned long)VMALLOC_START,
		(unsigned long)VMALLOC_END);
138 139 140 141
#ifdef CONFIG_64BIT
	print_ml("modules", (unsigned long)MODULES_VADDR,
		(unsigned long)MODULES_END);
#endif
142 143
	print_ml("lowmem", (unsigned long)PAGE_OFFSET,
		(unsigned long)high_memory);
144
	if (IS_ENABLED(CONFIG_64BIT)) {
145
#ifdef CONFIG_KASAN
146
		print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
147
#endif
148

149 150
		print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR,
			 (unsigned long)ADDRESS_SPACE_END);
151
	}
Y
Yash Shah 已提交
152 153 154 155 156
}
#else
static void print_vm_layout(void) { }
#endif /* CONFIG_DEBUG_VM */

P
Palmer Dabbelt 已提交
157 158 159 160 161 162
void __init mem_init(void)
{
#ifdef CONFIG_FLATMEM
	BUG_ON(!mem_map);
#endif /* CONFIG_FLATMEM */

163
	swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE);
164
	memblock_free_all();
P
Palmer Dabbelt 已提交
165

Y
Yash Shah 已提交
166
	print_vm_layout();
P
Palmer Dabbelt 已提交
167 168
}

169 170
/* Limit the memory size via mem. */
static phys_addr_t memory_limit;
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187

static int __init early_mem(char *p)
{
	u64 size;

	if (!p)
		return 1;

	size = memparse(p, &p) & PAGE_MASK;
	memory_limit = min_t(u64, size, memory_limit);

	pr_notice("Memory limited to %lldMB\n", (u64)memory_limit >> 20);

	return 0;
}
early_param("mem", early_mem);

188
static void __init setup_bootmem(void)
189
{
190
	phys_addr_t vmlinux_end = __pa_symbol(&_end);
191
	phys_addr_t max_mapped_addr;
192
	phys_addr_t phys_ram_end, vmlinux_start;
193

194 195 196 197
	if (IS_ENABLED(CONFIG_XIP_KERNEL))
		vmlinux_start = __pa_symbol(&_sdata);
	else
		vmlinux_start = __pa_symbol(&_start);
V
Vitaly Wool 已提交
198

199
	memblock_enforce_memory_limit(memory_limit);
200

201 202
	/*
	 * Make sure we align the reservation on PMD_SIZE since we will
203 204 205
	 * map the kernel in the linear mapping as read-only: we do not want
	 * any allocation to happen between _end and the next pmd aligned page.
	 */
206 207 208 209 210
	if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
		vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
	/*
	 * Reserve from the start of the kernel to the end of the kernel
	 */
211
	memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
212

213
	phys_ram_end = memblock_end_of_DRAM();
214 215
	if (!IS_ENABLED(CONFIG_XIP_KERNEL))
		phys_ram_base = memblock_start_of_DRAM();
A
Atish Patra 已提交
216 217 218 219
	/*
	 * memblock allocator is not aware of the fact that last 4K bytes of
	 * the addressable memory can not be mapped because of IS_ERR_VALUE
	 * macro. Make sure that last 4k bytes are not usable by memblock
220 221 222
	 * if end of dram is equal to maximum addressable memory.  For 64-bit
	 * kernel, this problem can't happen here as the end of the virtual
	 * address space is occupied by the kernel mapping then this check must
223
	 * be done as soon as the kernel mapping base address is determined.
A
Atish Patra 已提交
224
	 */
225 226 227 228 229
	if (!IS_ENABLED(CONFIG_64BIT)) {
		max_mapped_addr = __pa(~(ulong)0);
		if (max_mapped_addr == (phys_ram_end - 1))
			memblock_set_current_limit(max_mapped_addr - 4096);
	}
A
Atish Patra 已提交
230

231 232
	min_low_pfn = PFN_UP(phys_ram_base);
	max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
233
	high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
K
Kefeng Wang 已提交
234

K
Kefeng Wang 已提交
235
	dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
236
	set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
237

238
	reserve_initrd_mem();
239
	/*
240 241 242
	 * If DTB is built in, no need to reserve its memblock.
	 * Otherwise, do reserve it but avoid using
	 * early_init_fdt_reserve_self() since __pa() does
243 244
	 * not work for DTB pointers that are fixmap addresses
	 */
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
	if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) {
		/*
		 * In case the DTB is not located in a memory region we won't
		 * be able to locate it later on via the linear mapping and
		 * get a segfault when accessing it via __va(dtb_early_pa).
		 * To avoid this situation copy DTB to a memory region.
		 * Note that memblock_phys_alloc will also reserve DTB region.
		 */
		if (!memblock_is_memory(dtb_early_pa)) {
			size_t fdt_size = fdt_totalsize(dtb_early_va);
			phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE);
			void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size);

			memcpy(new_dtb_early_va, dtb_early_va, fdt_size);
			early_memunmap(new_dtb_early_va, fdt_size);
			_dtb_early_pa = new_dtb_early_pa;
		} else
			memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
	}
264

265
	early_init_fdt_scan_reserved_mem();
K
Kefeng Wang 已提交
266
	dma_contiguous_reserve(dma32_phys_limit);
267 268
	if (IS_ENABLED(CONFIG_64BIT))
		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
269 270
	memblock_allow_resize();
}
271

C
Christoph Hellwig 已提交
272
#ifdef CONFIG_MMU
273
struct pt_alloc_ops pt_ops __initdata;
V
Vitaly Wool 已提交
274

275 276
unsigned long riscv_pfn_base __ro_after_init;
EXPORT_SYMBOL(riscv_pfn_base);
277

278
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
279
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
280
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
281

282
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
283
static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
A
Alexandre Ghiti 已提交
284
static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
285
static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
286

V
Vitaly Wool 已提交
287
#ifdef CONFIG_XIP_KERNEL
288
#define pt_ops			(*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
289
#define riscv_pfn_base         (*(unsigned long  *)XIP_FIXUP(&riscv_pfn_base))
V
Vitaly Wool 已提交
290 291 292 293 294
#define trampoline_pg_dir      ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte             ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir           ((pgd_t *)XIP_FIXUP(early_pg_dir))
#endif /* CONFIG_XIP_KERNEL */

295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
static const pgprot_t protection_map[16] = {
	[VM_NONE]					= PAGE_NONE,
	[VM_READ]					= PAGE_READ,
	[VM_WRITE]					= PAGE_COPY,
	[VM_WRITE | VM_READ]				= PAGE_COPY,
	[VM_EXEC]					= PAGE_EXEC,
	[VM_EXEC | VM_READ]				= PAGE_READ_EXEC,
	[VM_EXEC | VM_WRITE]				= PAGE_COPY_EXEC,
	[VM_EXEC | VM_WRITE | VM_READ]			= PAGE_COPY_READ_EXEC,
	[VM_SHARED]					= PAGE_NONE,
	[VM_SHARED | VM_READ]				= PAGE_READ,
	[VM_SHARED | VM_WRITE]				= PAGE_SHARED,
	[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_SHARED,
	[VM_SHARED | VM_EXEC]				= PAGE_EXEC,
	[VM_SHARED | VM_EXEC | VM_READ]			= PAGE_READ_EXEC,
	[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_SHARED_EXEC,
	[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_SHARED_EXEC
};
DECLARE_VM_GET_PAGE_PROT

315 316 317 318 319 320 321 322 323
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
	unsigned long addr = __fix_to_virt(idx);
	pte_t *ptep;

	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);

	ptep = &fixmap_pte[pte_index(addr)];

324
	if (pgprot_val(prot))
325
		set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
326
	else
327
		pte_clear(&init_mm, addr, ptep);
328
	local_flush_tlb_page(addr);
329 330
}

331
static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
332
{
333
	return (pte_t *)((uintptr_t)pa);
334 335
}

336 337 338 339 340 341
static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa)
{
	clear_fixmap(FIX_PTE);
	return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
}

342
static inline pte_t *__init get_pte_virt_late(phys_addr_t pa)
343 344 345 346 347
{
	return (pte_t *) __va(pa);
}

static inline phys_addr_t __init alloc_pte_early(uintptr_t va)
348 349 350 351 352
{
	/*
	 * We only create PMD or PGD early mappings so we
	 * should never reach here with MMU disabled.
	 */
353 354
	BUG();
}
355

356 357
static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
{
358 359 360
	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}

361
static phys_addr_t __init alloc_pte_late(uintptr_t va)
362 363 364 365
{
	unsigned long vaddr;

	vaddr = __get_free_page(GFP_KERNEL);
366 367
	BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)));

368 369 370
	return __pa(vaddr);
}

371 372 373 374
static void __init create_pte_mapping(pte_t *ptep,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
375
	uintptr_t pte_idx = pte_index(va);
376 377 378

	BUG_ON(sz != PAGE_SIZE);

379 380
	if (pte_none(ptep[pte_idx]))
		ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot);
381 382 383 384
}

#ifndef __PAGETABLE_PMD_FOLDED

385 386 387
static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
388

V
Vitaly Wool 已提交
389 390 391 392 393 394
#ifdef CONFIG_XIP_KERNEL
#define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
#define fixmap_pmd     ((pmd_t *)XIP_FIXUP(fixmap_pmd))
#define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
#endif /* CONFIG_XIP_KERNEL */

395 396 397 398 399 400 401 402 403 404
static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);

#ifdef CONFIG_XIP_KERNEL
#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
#define fixmap_p4d     ((p4d_t *)XIP_FIXUP(fixmap_p4d))
#define early_p4d      ((p4d_t *)XIP_FIXUP(early_p4d))
#endif /* CONFIG_XIP_KERNEL */

A
Alexandre Ghiti 已提交
405 406 407 408 409 410 411 412 413 414
static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);

#ifdef CONFIG_XIP_KERNEL
#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud))
#define fixmap_pud     ((pud_t *)XIP_FIXUP(fixmap_pud))
#define early_pud      ((pud_t *)XIP_FIXUP(early_pud))
#endif /* CONFIG_XIP_KERNEL */

415
static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
416
{
417 418
	/* Before MMU is enabled */
	return (pmd_t *)((uintptr_t)pa);
419 420
}

421
static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
422
{
423 424 425
	clear_fixmap(FIX_PMD);
	return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
}
426

427
static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
428 429 430
{
	return (pmd_t *) __va(pa);
}
431

432 433
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
A
Alexandre Ghiti 已提交
434
	BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
435

436
	return (uintptr_t)early_pmd;
437 438
}

439 440 441 442 443
static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
{
	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}

444
static phys_addr_t __init alloc_pmd_late(uintptr_t va)
445 446 447 448
{
	unsigned long vaddr;

	vaddr = __get_free_page(GFP_KERNEL);
449 450
	BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page(vaddr)));

451 452 453
	return __pa(vaddr);
}

454 455 456 457 458 459
static void __init create_pmd_mapping(pmd_t *pmdp,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
	pte_t *ptep;
	phys_addr_t pte_phys;
460
	uintptr_t pmd_idx = pmd_index(va);
461 462

	if (sz == PMD_SIZE) {
463 464
		if (pmd_none(pmdp[pmd_idx]))
			pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot);
465 466 467
		return;
	}

468
	if (pmd_none(pmdp[pmd_idx])) {
469
		pte_phys = pt_ops.alloc_pte(va);
470
		pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
471
		ptep = pt_ops.get_pte_virt(pte_phys);
472 473
		memset(ptep, 0, PAGE_SIZE);
	} else {
474
		pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
475
		ptep = pt_ops.get_pte_virt(pte_phys);
476 477 478 479 480
	}

	create_pte_mapping(ptep, va, pa, sz, prot);
}

A
Alexandre Ghiti 已提交
481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
static pud_t *__init get_pud_virt_early(phys_addr_t pa)
{
	return (pud_t *)((uintptr_t)pa);
}

static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa)
{
	clear_fixmap(FIX_PUD);
	return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
}

static pud_t *__init get_pud_virt_late(phys_addr_t pa)
{
	return (pud_t *)__va(pa);
}

static phys_addr_t __init alloc_pud_early(uintptr_t va)
{
	/* Only one PUD is available for early mapping */
	BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);

	return (uintptr_t)early_pud;
}

static phys_addr_t __init alloc_pud_fixmap(uintptr_t va)
{
	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}

static phys_addr_t alloc_pud_late(uintptr_t va)
{
	unsigned long vaddr;

	vaddr = __get_free_page(GFP_KERNEL);
	BUG_ON(!vaddr);
	return __pa(vaddr);
}

519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
{
	return (p4d_t *)((uintptr_t)pa);
}

static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
{
	clear_fixmap(FIX_P4D);
	return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
}

static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
{
	return (p4d_t *)__va(pa);
}

static phys_addr_t __init alloc_p4d_early(uintptr_t va)
{
	/* Only one P4D is available for early mapping */
	BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);

	return (uintptr_t)early_p4d;
}

static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
{
	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}

static phys_addr_t alloc_p4d_late(uintptr_t va)
{
	unsigned long vaddr;

	vaddr = __get_free_page(GFP_KERNEL);
	BUG_ON(!vaddr);
	return __pa(vaddr);
}

A
Alexandre Ghiti 已提交
557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
static void __init create_pud_mapping(pud_t *pudp,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
	pmd_t *nextp;
	phys_addr_t next_phys;
	uintptr_t pud_index = pud_index(va);

	if (sz == PUD_SIZE) {
		if (pud_val(pudp[pud_index]) == 0)
			pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
		return;
	}

	if (pud_val(pudp[pud_index]) == 0) {
		next_phys = pt_ops.alloc_pmd(va);
		pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
		nextp = pt_ops.get_pmd_virt(next_phys);
		memset(nextp, 0, PAGE_SIZE);
	} else {
		next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
		nextp = pt_ops.get_pmd_virt(next_phys);
	}

	create_pmd_mapping(nextp, va, pa, sz, prot);
}

584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617
static void __init create_p4d_mapping(p4d_t *p4dp,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
	pud_t *nextp;
	phys_addr_t next_phys;
	uintptr_t p4d_index = p4d_index(va);

	if (sz == P4D_SIZE) {
		if (p4d_val(p4dp[p4d_index]) == 0)
			p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
		return;
	}

	if (p4d_val(p4dp[p4d_index]) == 0) {
		next_phys = pt_ops.alloc_pud(va);
		p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
		nextp = pt_ops.get_pud_virt(next_phys);
		memset(nextp, 0, PAGE_SIZE);
	} else {
		next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
		nextp = pt_ops.get_pud_virt(next_phys);
	}

	create_pud_mapping(nextp, va, pa, sz, prot);
}

#define pgd_next_t		p4d_t
#define alloc_pgd_next(__va)	(pgtable_l5_enabled ?			\
		pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ?		\
		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
#define get_pgd_next_virt(__pa)	(pgtable_l5_enabled ?			\
		pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ?	\
		pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
618
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
619 620
				(pgtable_l5_enabled ?			\
		create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
A
Alexandre Ghiti 已提交
621
				(pgtable_l4_enabled ?			\
622 623 624 625 626 627 628 629 630 631 632
		create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) :	\
		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
#define fixmap_pgd_next		(pgtable_l5_enabled ?			\
		(uintptr_t)fixmap_p4d : (pgtable_l4_enabled ?		\
		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
#define trampoline_pgd_next	(pgtable_l5_enabled ?			\
		(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ?	\
		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
#define early_dtb_pgd_next	(pgtable_l5_enabled ?			\
		(uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ?	\
		(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
633 634
#else
#define pgd_next_t		pte_t
635 636
#define alloc_pgd_next(__va)	pt_ops.alloc_pte(__va)
#define get_pgd_next_virt(__pa)	pt_ops.get_pte_virt(__pa)
637 638
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
A
Alexandre Ghiti 已提交
639 640
#define fixmap_pgd_next		((uintptr_t)fixmap_pte)
#define early_dtb_pgd_next	((uintptr_t)early_dtb_pmd)
641 642 643
#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
A
Alexandre Ghiti 已提交
644
#endif /* __PAGETABLE_PMD_FOLDED */
645

A
Atish Patra 已提交
646
void __init create_pgd_mapping(pgd_t *pgdp,
647 648 649 650 651
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
	pgd_next_t *nextp;
	phys_addr_t next_phys;
652
	uintptr_t pgd_idx = pgd_index(va);
653 654

	if (sz == PGDIR_SIZE) {
655 656
		if (pgd_val(pgdp[pgd_idx]) == 0)
			pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(pa), prot);
657 658 659
		return;
	}

660
	if (pgd_val(pgdp[pgd_idx]) == 0) {
661
		next_phys = alloc_pgd_next(va);
662
		pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE);
663 664 665
		nextp = get_pgd_next_virt(next_phys);
		memset(nextp, 0, PAGE_SIZE);
	} else {
666
		next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx]));
667 668 669 670 671 672 673 674
		nextp = get_pgd_next_virt(next_phys);
	}

	create_pgd_next_mapping(nextp, va, pa, sz, prot);
}

static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
{
675 676 677
	/* Upgrade to PMD_SIZE mappings whenever possible */
	if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1)))
		return PAGE_SIZE;
678

679
	return PMD_SIZE;
680 681
}

V
Vitaly Wool 已提交
682
#ifdef CONFIG_XIP_KERNEL
683
#define phys_ram_base  (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base))
684 685
extern char _xiprom[], _exiprom[], __data_loc;

V
Vitaly Wool 已提交
686 687 688
/* called from head.S with MMU off */
asmlinkage void __init __copy_data(void)
{
689
	void *from = (void *)(&__data_loc);
V
Vitaly Wool 已提交
690
	void *to = (void *)CONFIG_PHYS_RAM_BASE;
691
	size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata));
V
Vitaly Wool 已提交
692 693 694 695 696

	memcpy(to, from, sz);
}
#endif

697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
#ifdef CONFIG_STRICT_KERNEL_RWX
static __init pgprot_t pgprot_from_va(uintptr_t va)
{
	if (is_va_kernel_text(va))
		return PAGE_KERNEL_READ_EXEC;

	/*
	 * In 64-bit kernel, the kernel mapping is outside the linear mapping so
	 * we must protect its linear mapping alias from being executed and
	 * written.
	 * And rodata section is marked readonly in mark_rodata_ro.
	 */
	if (IS_ENABLED(CONFIG_64BIT) && is_va_kernel_lm_alias_text(va))
		return PAGE_KERNEL_READ;

	return PAGE_KERNEL;
}

void mark_rodata_ro(void)
{
	set_kernel_memory(__start_rodata, _data, set_memory_ro);
	if (IS_ENABLED(CONFIG_64BIT))
		set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
				  set_memory_ro);

	debug_checkwx();
}
#else
static __init pgprot_t pgprot_from_va(uintptr_t va)
{
	if (IS_ENABLED(CONFIG_64BIT) && !is_kernel_mapping(va))
		return PAGE_KERNEL;

	return PAGE_KERNEL_EXEC;
}
#endif /* CONFIG_STRICT_KERNEL_RWX */

P
Palmer Dabbelt 已提交
734
#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
Q
Qinglin Pan 已提交
735 736 737 738 739 740 741
static void __init disable_pgtable_l5(void)
{
	pgtable_l5_enabled = false;
	kernel_map.page_offset = PAGE_OFFSET_L4;
	satp_mode = SATP_MODE_48;
}

A
Alexandre Ghiti 已提交
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757
static void __init disable_pgtable_l4(void)
{
	pgtable_l4_enabled = false;
	kernel_map.page_offset = PAGE_OFFSET_L3;
	satp_mode = SATP_MODE_39;
}

/*
 * There is a simple way to determine if 4-level is supported by the
 * underlying hardware: establish 1:1 mapping in 4-level page table mode
 * then read SATP to see if the configuration was taken into account
 * meaning sv48 is supported.
 */
static __init void set_satp_mode(void)
{
	u64 identity_satp, hw_satp;
Q
Qinglin Pan 已提交
758 759
	uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
	bool check_l4 = false;
A
Alexandre Ghiti 已提交
760

Q
Qinglin Pan 已提交
761 762 763
	create_p4d_mapping(early_p4d,
			set_satp_mode_pmd, (uintptr_t)early_pud,
			P4D_SIZE, PAGE_TABLE);
A
Alexandre Ghiti 已提交
764 765 766 767 768 769 770 771 772 773 774
	create_pud_mapping(early_pud,
			   set_satp_mode_pmd, (uintptr_t)early_pmd,
			   PUD_SIZE, PAGE_TABLE);
	/* Handle the case where set_satp_mode straddles 2 PMDs */
	create_pmd_mapping(early_pmd,
			   set_satp_mode_pmd, set_satp_mode_pmd,
			   PMD_SIZE, PAGE_KERNEL_EXEC);
	create_pmd_mapping(early_pmd,
			   set_satp_mode_pmd + PMD_SIZE,
			   set_satp_mode_pmd + PMD_SIZE,
			   PMD_SIZE, PAGE_KERNEL_EXEC);
Q
Qinglin Pan 已提交
775 776 777 778 779
retry:
	create_pgd_mapping(early_pg_dir,
			   set_satp_mode_pmd,
			   check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
			   PGDIR_SIZE, PAGE_TABLE);
A
Alexandre Ghiti 已提交
780 781 782 783 784 785 786 787

	identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;

	local_flush_tlb_all();
	csr_write(CSR_SATP, identity_satp);
	hw_satp = csr_swap(CSR_SATP, 0ULL);
	local_flush_tlb_all();

Q
Qinglin Pan 已提交
788 789 790 791
	if (hw_satp != identity_satp) {
		if (!check_l4) {
			disable_pgtable_l5();
			check_l4 = true;
792
			memset(early_pg_dir, 0, PAGE_SIZE);
Q
Qinglin Pan 已提交
793 794
			goto retry;
		}
A
Alexandre Ghiti 已提交
795
		disable_pgtable_l4();
Q
Qinglin Pan 已提交
796
	}
A
Alexandre Ghiti 已提交
797 798

	memset(early_pg_dir, 0, PAGE_SIZE);
Q
Qinglin Pan 已提交
799
	memset(early_p4d, 0, PAGE_SIZE);
A
Alexandre Ghiti 已提交
800 801 802 803 804
	memset(early_pud, 0, PAGE_SIZE);
	memset(early_pmd, 0, PAGE_SIZE);
}
#endif

805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
/*
 * setup_vm() is called from head.S with MMU-off.
 *
 * Following requirements should be honoured for setup_vm() to work
 * correctly:
 * 1) It should use PC-relative addressing for accessing kernel symbols.
 *    To achieve this we always use GCC cmodel=medany.
 * 2) The compiler instrumentation for FTRACE will not work for setup_vm()
 *    so disable compiler instrumentation when FTRACE is enabled.
 *
 * Currently, the above requirements are honoured by using custom CFLAGS
 * for init.o in mm/Makefile.
 */

#ifndef __riscv_cmodel_medany
820
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
821 822
#endif

V
Vitaly Wool 已提交
823
#ifdef CONFIG_XIP_KERNEL
824
static void __init create_kernel_page_table(pgd_t *pgdir,
825
					    __always_unused bool early)
V
Vitaly Wool 已提交
826 827 828 829
{
	uintptr_t va, end_va;

	/* Map the flash resident part */
830
	end_va = kernel_map.virt_addr + kernel_map.xiprom_sz;
831
	for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
V
Vitaly Wool 已提交
832
		create_pgd_mapping(pgdir, va,
833
				   kernel_map.xiprom + (va - kernel_map.virt_addr),
834
				   PMD_SIZE, PAGE_KERNEL_EXEC);
V
Vitaly Wool 已提交
835 836

	/* Map the data in RAM */
837
	end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size;
838
	for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE)
V
Vitaly Wool 已提交
839
		create_pgd_mapping(pgdir, va,
840
				   kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)),
841
				   PMD_SIZE, PAGE_KERNEL);
V
Vitaly Wool 已提交
842 843
}
#else
844
static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
845 846 847
{
	uintptr_t va, end_va;

848
	end_va = kernel_map.virt_addr + kernel_map.size;
849
	for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
850
		create_pgd_mapping(pgdir, va,
851
				   kernel_map.phys_addr + (va - kernel_map.virt_addr),
852
				   PMD_SIZE,
853 854
				   early ?
					PAGE_KERNEL_EXEC : pgprot_from_va(va));
855
}
V
Vitaly Wool 已提交
856
#endif
857

858 859 860 861 862 863
/*
 * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel,
 * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
 * entry.
 */
static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
864
{
865 866 867 868
#ifndef CONFIG_BUILTIN_DTB
	uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);

	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
A
Alexandre Ghiti 已提交
869
			   IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
870 871 872
			   PGDIR_SIZE,
			   IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);

873 874 875 876 877
	if (pgtable_l5_enabled)
		create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
				   (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);

	if (pgtable_l4_enabled)
A
Alexandre Ghiti 已提交
878 879 880
		create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
				   (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);

881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896
	if (IS_ENABLED(CONFIG_64BIT)) {
		create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
				   pa, PMD_SIZE, PAGE_KERNEL);
		create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
				   pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
	}

	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
#else
	/*
	 * For 64-bit kernel, __va can't be used since it would return a linear
	 * mapping address whereas dtb_early_va will be used before
	 * setup_vm_final installs the linear mapping. For 32-bit kernel, as the
	 * kernel is mapped in the linear mapping, that makes no difference.
	 */
	dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
A
Atish Patra 已提交
897
#endif
898

899 900 901
	dtb_early_pa = dtb_pa;
}

902 903 904 905
/*
 * MMU is not enabled, the page tables are allocated directly using
 * early_pmd/pud/p4d and the address returned is the physical one.
 */
906
static void __init pt_ops_set_early(void)
907 908 909 910 911 912
{
	pt_ops.alloc_pte = alloc_pte_early;
	pt_ops.get_pte_virt = get_pte_virt_early;
#ifndef __PAGETABLE_PMD_FOLDED
	pt_ops.alloc_pmd = alloc_pmd_early;
	pt_ops.get_pmd_virt = get_pmd_virt_early;
A
Alexandre Ghiti 已提交
913 914
	pt_ops.alloc_pud = alloc_pud_early;
	pt_ops.get_pud_virt = get_pud_virt_early;
915 916
	pt_ops.alloc_p4d = alloc_p4d_early;
	pt_ops.get_p4d_virt = get_p4d_virt_early;
917 918 919 920 921 922 923 924 925 926 927
#endif
}

/*
 * MMU is enabled but page table setup is not complete yet.
 * fixmap page table alloc functions must be used as a means to temporarily
 * map the allocated physical pages since the linear mapping does not exist yet.
 *
 * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va,
 * but it will be used as described above.
 */
928
static void __init pt_ops_set_fixmap(void)
929 930 931 932 933 934
{
	pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap);
	pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap);
#ifndef __PAGETABLE_PMD_FOLDED
	pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap);
	pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
A
Alexandre Ghiti 已提交
935 936
	pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
	pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
937 938
	pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap);
	pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap);
939 940 941 942 943 944 945
#endif
}

/*
 * MMU is enabled and page table setup is complete, so from now, we can use
 * generic page allocation functions to setup page table.
 */
946
static void __init pt_ops_set_late(void)
947 948 949 950 951 952
{
	pt_ops.alloc_pte = alloc_pte_late;
	pt_ops.get_pte_virt = get_pte_virt_late;
#ifndef __PAGETABLE_PMD_FOLDED
	pt_ops.alloc_pmd = alloc_pmd_late;
	pt_ops.get_pmd_virt = get_pmd_virt_late;
A
Alexandre Ghiti 已提交
953 954
	pt_ops.alloc_pud = alloc_pud_late;
	pt_ops.get_pud_virt = get_pud_virt_late;
955 956
	pt_ops.alloc_p4d = alloc_p4d_late;
	pt_ops.get_p4d_virt = get_p4d_virt_late;
957 958 959
#endif
}

960
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
961
{
962
	pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
963

964
	kernel_map.virt_addr = KERNEL_LINK_ADDR;
A
Alexandre Ghiti 已提交
965
	kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
966

V
Vitaly Wool 已提交
967
#ifdef CONFIG_XIP_KERNEL
968 969
	kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
	kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
V
Vitaly Wool 已提交
970

971
	phys_ram_base = CONFIG_PHYS_RAM_BASE;
972 973
	kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
	kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata);
V
Vitaly Wool 已提交
974

975
	kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom;
V
Vitaly Wool 已提交
976
#else
977 978
	kernel_map.phys_addr = (uintptr_t)(&_start);
	kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
V
Vitaly Wool 已提交
979
#endif
A
Alexandre Ghiti 已提交
980 981 982 983 984

#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
	set_satp_mode();
#endif

985 986
	kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
	kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
987

988
	riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
989

990 991 992 993 994 995 996 997
	/*
	 * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
	 * kernel, whereas for 64-bit kernel, the end of the virtual address
	 * space is occupied by the modules/BPF/kernel mappings which reduces
	 * the available size of the linear mapping.
	 */
	memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0);

998 999
	/* Sanity check alignment and size */
	BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
1000
	BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
1001

1002 1003 1004 1005 1006 1007
#ifdef CONFIG_64BIT
	/*
	 * The last 4K bytes of the addressable memory can not be mapped because
	 * of IS_ERR_VALUE macro.
	 */
	BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
1008
#endif
1009

1010
	apply_early_boot_alternatives();
1011 1012
	pt_ops_set_early();

1013 1014
	/* Setup early PGD for fixmap */
	create_pgd_mapping(early_pg_dir, FIXADDR_START,
A
Alexandre Ghiti 已提交
1015
			   fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
1016 1017

#ifndef __PAGETABLE_PMD_FOLDED
1018 1019 1020 1021
	/* Setup fixmap P4D and PUD */
	if (pgtable_l5_enabled)
		create_p4d_mapping(fixmap_p4d, FIXADDR_START,
				   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
A
Alexandre Ghiti 已提交
1022 1023 1024 1025
	/* Setup fixmap PUD and PMD */
	if (pgtable_l4_enabled)
		create_pud_mapping(fixmap_pud, FIXADDR_START,
				   (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
1026 1027 1028
	create_pmd_mapping(fixmap_pmd, FIXADDR_START,
			   (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
	/* Setup trampoline PGD and PMD */
1029
	create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
A
Alexandre Ghiti 已提交
1030
			   trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
1031 1032 1033
	if (pgtable_l5_enabled)
		create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
				   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
A
Alexandre Ghiti 已提交
1034 1035 1036
	if (pgtable_l4_enabled)
		create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
				   (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
V
Vitaly Wool 已提交
1037
#ifdef CONFIG_XIP_KERNEL
1038 1039
	create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
			   kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
V
Vitaly Wool 已提交
1040
#else
1041 1042
	create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
			   kernel_map.phys_addr, PMD_SIZE, PAGE_KERNEL_EXEC);
V
Vitaly Wool 已提交
1043
#endif
1044 1045
#else
	/* Setup trampoline PGD */
1046 1047
	create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
			   kernel_map.phys_addr, PGDIR_SIZE, PAGE_KERNEL_EXEC);
1048
#endif
1049

1050
	/*
1051
	 * Setup early PGD covering entire kernel which will allow
1052 1053 1054
	 * us to reach paging_init(). We map all memory banks later
	 * in setup_vm_final() below.
	 */
1055
	create_kernel_page_table(early_pg_dir, true);
1056

1057 1058
	/* Setup early mapping for FDT early scan */
	create_fdt_early_page_table(early_pg_dir, dtb_pa);
A
Atish Patra 已提交
1059 1060 1061 1062 1063

	/*
	 * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
	 * range can not span multiple pmds.
	 */
A
Alexandre Ghiti 已提交
1064
	BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
A
Atish Patra 已提交
1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));

#ifndef __PAGETABLE_PMD_FOLDED
	/*
	 * Early ioremap fixmap is already created as it lies within first 2MB
	 * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END
	 * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn
	 * the user if not.
	 */
	fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))];
	fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))];
	if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) {
		WARN_ON(1);
		pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n",
			pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd));
		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
			fix_to_virt(FIX_BTMAP_BEGIN));
		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
			fix_to_virt(FIX_BTMAP_END));

		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
	}
#endif
1089 1090

	pt_ops_set_fixmap();
1091
}
1092

1093 1094 1095 1096
static void __init setup_vm_final(void)
{
	uintptr_t va, map_size;
	phys_addr_t pa, start, end;
1097
	u64 i;
1098 1099 1100

	/* Setup swapper PGD for fixmap */
	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
1101
			   __pa_symbol(fixmap_pgd_next),
1102
			   PGDIR_SIZE, PAGE_TABLE);
1103

1104
	/* Map all memory banks in the linear mapping */
1105
	for_each_mem_range(i, &start, &end) {
1106 1107 1108 1109 1110
		if (start >= end)
			break;
		if (start <= __pa(PAGE_OFFSET) &&
		    __pa(PAGE_OFFSET) < end)
			start = __pa(PAGE_OFFSET);
1111 1112
		if (end >= __pa(PAGE_OFFSET) + memory_limit)
			end = __pa(PAGE_OFFSET) + memory_limit;
1113 1114 1115 1116

		map_size = best_map_size(start, end - start);
		for (pa = start; pa < end; pa += map_size) {
			va = (uintptr_t)__va(pa);
1117

1118 1119
			create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
					   pgprot_from_va(va));
1120
		}
1121
	}
1122

1123
	/* Map the kernel */
1124 1125
	if (IS_ENABLED(CONFIG_64BIT))
		create_kernel_page_table(swapper_pg_dir, false);
1126

1127 1128 1129 1130
#ifdef CONFIG_KASAN
	kasan_swapper_init();
#endif

1131 1132 1133
	/* Clear fixmap PTE and PMD mappings */
	clear_fixmap(FIX_PTE);
	clear_fixmap(FIX_PMD);
A
Alexandre Ghiti 已提交
1134
	clear_fixmap(FIX_PUD);
1135
	clear_fixmap(FIX_P4D);
1136 1137

	/* Move to swapper page table */
A
Alexandre Ghiti 已提交
1138
	csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
1139
	local_flush_tlb_all();
1140

1141
	pt_ops_set_late();
1142
}
C
Christoph Hellwig 已提交
1143 1144 1145 1146
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
	dtb_early_va = (void *)dtb_pa;
1147
	dtb_early_pa = dtb_pa;
C
Christoph Hellwig 已提交
1148 1149 1150 1151 1152 1153
}

static inline void setup_vm_final(void)
{
}
#endif /* CONFIG_MMU */
1154

N
Nick Kossifidis 已提交
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
/*
 * reserve_crashkernel() - reserves memory for crash kernel
 *
 * This function reserves memory area given in "crashkernel=" kernel command
 * line parameter. The memory reserved is used by dump capture kernel when
 * primary kernel is crashing.
 */
static void __init reserve_crashkernel(void)
{
	unsigned long long crash_base = 0;
	unsigned long long crash_size = 0;
	unsigned long search_start = memblock_start_of_DRAM();
	unsigned long search_end = memblock_end_of_DRAM();

	int ret = 0;

1171 1172
	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
		return;
1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
	/*
	 * Don't reserve a region for a crash kernel on a crash kernel
	 * since it doesn't make much sense and we have limited memory
	 * resources.
	 */
	if (is_kdump_kernel()) {
		pr_info("crashkernel: ignoring reservation request\n");
		return;
	}

N
Nick Kossifidis 已提交
1183 1184 1185 1186 1187 1188 1189
	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
				&crash_size, &crash_base);
	if (ret || !crash_size)
		return;

	crash_size = PAGE_ALIGN(crash_size);

1190 1191 1192 1193
	if (crash_base) {
		search_start = crash_base;
		search_end = crash_base + crash_size;
	}
N
Nick Kossifidis 已提交
1194

1195 1196 1197
	/*
	 * Current riscv boot protocol requires 2MB alignment for
	 * RV64 and 4MB alignment for RV32 (hugepage size)
1198 1199 1200
	 *
	 * Try to alloc from 32bit addressible physical memory so that
	 * swiotlb can work on the crash kernel.
1201 1202
	 */
	crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
1203 1204
					       search_start,
					       min(search_end, (unsigned long) SZ_4G));
1205
	if (crash_base == 0) {
1206 1207 1208 1209 1210 1211 1212 1213
		/* Try again without restricting region to 32bit addressible memory */
		crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
						search_start, search_end);
		if (crash_base == 0) {
			pr_warn("crashkernel: couldn't allocate %lldKB\n",
				crash_size >> 10);
			return;
		}
N
Nick Kossifidis 已提交
1214 1215 1216 1217 1218 1219 1220 1221 1222
	}

	pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
		crash_base, crash_base + crash_size, crash_size >> 20);

	crashk_res.start = crash_base;
	crashk_res.end = crash_base + crash_size - 1;
}

1223 1224
void __init paging_init(void)
{
1225
	setup_bootmem();
1226
	setup_vm_final();
1227 1228 1229 1230
}

void __init misc_mem_init(void)
{
K
Kefeng Wang 已提交
1231
	early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
1232
	arch_numa_init();
1233
	sparse_init();
1234
	zone_sizes_init();
N
Nick Kossifidis 已提交
1235
	reserve_crashkernel();
1236
	memblock_dump_all();
1237
}
L
Logan Gunthorpe 已提交
1238

1239
#ifdef CONFIG_SPARSEMEM_VMEMMAP
L
Logan Gunthorpe 已提交
1240 1241 1242
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
			       struct vmem_altmap *altmap)
{
1243
	return vmemmap_populate_basepages(start, end, node, NULL);
L
Logan Gunthorpe 已提交
1244 1245
}
#endif