init.c 35.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
P
Palmer Dabbelt 已提交
2 3
/*
 * Copyright (C) 2012 Regents of the University of California
4
 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
N
Nick Kossifidis 已提交
5 6
 * Copyright (C) 2020 FORTH-ICS/CARV
 *  Nick Kossifidis <mick@ics.forth.gr>
P
Palmer Dabbelt 已提交
7 8 9 10 11
 */

#include <linux/init.h>
#include <linux/mm.h>
#include <linux/memblock.h>
M
Mike Rapoport 已提交
12
#include <linux/initrd.h>
P
Palmer Dabbelt 已提交
13
#include <linux/swap.h>
14
#include <linux/swiotlb.h>
C
Christoph Hellwig 已提交
15
#include <linux/sizes.h>
16
#include <linux/of_fdt.h>
17
#include <linux/of_reserved_mem.h>
18
#include <linux/libfdt.h>
Z
Zong Li 已提交
19
#include <linux/set_memory.h>
K
Kefeng Wang 已提交
20
#include <linux/dma-map-ops.h>
N
Nick Kossifidis 已提交
21
#include <linux/crash_dump.h>
22
#include <linux/hugetlb.h>
P
Palmer Dabbelt 已提交
23

24
#include <asm/fixmap.h>
P
Palmer Dabbelt 已提交
25 26
#include <asm/tlbflush.h>
#include <asm/sections.h>
27
#include <asm/soc.h>
P
Palmer Dabbelt 已提交
28
#include <asm/io.h>
Z
Zong Li 已提交
29
#include <asm/ptdump.h>
30
#include <asm/numa.h>
P
Palmer Dabbelt 已提交
31

32 33
#include "../kernel/head.h"

34 35 36 37 38 39
struct kernel_mapping kernel_map __ro_after_init;
EXPORT_SYMBOL(kernel_map);
#ifdef CONFIG_XIP_KERNEL
#define kernel_map	(*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
#endif

A
Alexandre Ghiti 已提交
40
#ifdef CONFIG_64BIT
41
u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
A
Alexandre Ghiti 已提交
42
#else
43
u64 satp_mode __ro_after_init = SATP_MODE_32;
A
Alexandre Ghiti 已提交
44 45 46
#endif
EXPORT_SYMBOL(satp_mode);

47
bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
Q
Qinglin Pan 已提交
48
bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
A
Alexandre Ghiti 已提交
49
EXPORT_SYMBOL(pgtable_l4_enabled);
50
EXPORT_SYMBOL(pgtable_l5_enabled);
A
Alexandre Ghiti 已提交
51

52 53 54
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);

55 56 57 58
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
							__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);

59
extern char _start[];
A
Anup Patel 已提交
60
#define DTB_EARLY_BASE_VA      PGDIR_SIZE
V
Vitaly Wool 已提交
61 62
void *_dtb_early_va __initdata;
uintptr_t _dtb_early_pa __initdata;
63

64
static phys_addr_t dma32_phys_limit __initdata;
K
Kefeng Wang 已提交
65

P
Palmer Dabbelt 已提交
66 67
static void __init zone_sizes_init(void)
{
C
Christoph Hellwig 已提交
68
	unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
P
Palmer Dabbelt 已提交
69

70
#ifdef CONFIG_ZONE_DMA32
K
Kefeng Wang 已提交
71
	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
72
#endif
C
Christoph Hellwig 已提交
73 74
	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;

75
	free_area_init(max_zone_pfns);
P
Palmer Dabbelt 已提交
76 77
}

78
#if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM)
79 80 81 82 83 84

#define LOG2_SZ_1K  ilog2(SZ_1K)
#define LOG2_SZ_1M  ilog2(SZ_1M)
#define LOG2_SZ_1G  ilog2(SZ_1G)
#define LOG2_SZ_1T  ilog2(SZ_1T)

Y
Yash Shah 已提交
85 86 87
static inline void print_mlk(char *name, unsigned long b, unsigned long t)
{
	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld kB)\n", name, b, t,
88
		  (((t) - (b)) >> LOG2_SZ_1K));
Y
Yash Shah 已提交
89 90 91 92 93
}

static inline void print_mlm(char *name, unsigned long b, unsigned long t)
{
	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld MB)\n", name, b, t,
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
		  (((t) - (b)) >> LOG2_SZ_1M));
}

static inline void print_mlg(char *name, unsigned long b, unsigned long t)
{
	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld GB)\n", name, b, t,
		   (((t) - (b)) >> LOG2_SZ_1G));
}

#ifdef CONFIG_64BIT
static inline void print_mlt(char *name, unsigned long b, unsigned long t)
{
	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld TB)\n", name, b, t,
		   (((t) - (b)) >> LOG2_SZ_1T));
}
#else
#define print_mlt(n, b, t) do {} while (0)
#endif

static inline void print_ml(char *name, unsigned long b, unsigned long t)
{
	unsigned long diff = t - b;

	if (IS_ENABLED(CONFIG_64BIT) && (diff >> LOG2_SZ_1T) >= 10)
		print_mlt(name, b, t);
	else if ((diff >> LOG2_SZ_1G) >= 10)
		print_mlg(name, b, t);
	else if ((diff >> LOG2_SZ_1M) >= 10)
		print_mlm(name, b, t);
	else
		print_mlk(name, b, t);
Y
Yash Shah 已提交
125 126
}

127
static void __init print_vm_layout(void)
Y
Yash Shah 已提交
128 129
{
	pr_notice("Virtual kernel memory layout:\n");
130 131 132 133 134 135 136 137 138 139
	print_ml("fixmap", (unsigned long)FIXADDR_START,
		(unsigned long)FIXADDR_TOP);
	print_ml("pci io", (unsigned long)PCI_IO_START,
		(unsigned long)PCI_IO_END);
	print_ml("vmemmap", (unsigned long)VMEMMAP_START,
		(unsigned long)VMEMMAP_END);
	print_ml("vmalloc", (unsigned long)VMALLOC_START,
		(unsigned long)VMALLOC_END);
	print_ml("lowmem", (unsigned long)PAGE_OFFSET,
		(unsigned long)high_memory);
140
	if (IS_ENABLED(CONFIG_64BIT)) {
141
#ifdef CONFIG_KASAN
142
		print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
143
#endif
144

145 146
		print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR,
			 (unsigned long)ADDRESS_SPACE_END);
147
	}
Y
Yash Shah 已提交
148 149 150 151 152
}
#else
static void print_vm_layout(void) { }
#endif /* CONFIG_DEBUG_VM */

P
Palmer Dabbelt 已提交
153 154 155 156 157 158
void __init mem_init(void)
{
#ifdef CONFIG_FLATMEM
	BUG_ON(!mem_map);
#endif /* CONFIG_FLATMEM */

159
	swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE);
160
	memblock_free_all();
P
Palmer Dabbelt 已提交
161

Y
Yash Shah 已提交
162
	print_vm_layout();
P
Palmer Dabbelt 已提交
163 164
}

165 166
/* Limit the memory size via mem. */
static phys_addr_t memory_limit;
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183

static int __init early_mem(char *p)
{
	u64 size;

	if (!p)
		return 1;

	size = memparse(p, &p) & PAGE_MASK;
	memory_limit = min_t(u64, size, memory_limit);

	pr_notice("Memory limited to %lldMB\n", (u64)memory_limit >> 20);

	return 0;
}
early_param("mem", early_mem);

184
static void __init setup_bootmem(void)
185
{
186
	phys_addr_t vmlinux_end = __pa_symbol(&_end);
187
	phys_addr_t max_mapped_addr;
188
	phys_addr_t phys_ram_end, vmlinux_start;
189

190 191 192 193
	if (IS_ENABLED(CONFIG_XIP_KERNEL))
		vmlinux_start = __pa_symbol(&_sdata);
	else
		vmlinux_start = __pa_symbol(&_start);
V
Vitaly Wool 已提交
194

195
	memblock_enforce_memory_limit(memory_limit);
196

197 198
	/*
	 * Make sure we align the reservation on PMD_SIZE since we will
199 200 201
	 * map the kernel in the linear mapping as read-only: we do not want
	 * any allocation to happen between _end and the next pmd aligned page.
	 */
202 203 204 205 206
	if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
		vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
	/*
	 * Reserve from the start of the kernel to the end of the kernel
	 */
207
	memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
208

209
	phys_ram_end = memblock_end_of_DRAM();
210 211
	if (!IS_ENABLED(CONFIG_XIP_KERNEL))
		phys_ram_base = memblock_start_of_DRAM();
A
Atish Patra 已提交
212 213 214 215
	/*
	 * memblock allocator is not aware of the fact that last 4K bytes of
	 * the addressable memory can not be mapped because of IS_ERR_VALUE
	 * macro. Make sure that last 4k bytes are not usable by memblock
216 217 218
	 * if end of dram is equal to maximum addressable memory.  For 64-bit
	 * kernel, this problem can't happen here as the end of the virtual
	 * address space is occupied by the kernel mapping then this check must
219
	 * be done as soon as the kernel mapping base address is determined.
A
Atish Patra 已提交
220
	 */
221 222 223 224 225
	if (!IS_ENABLED(CONFIG_64BIT)) {
		max_mapped_addr = __pa(~(ulong)0);
		if (max_mapped_addr == (phys_ram_end - 1))
			memblock_set_current_limit(max_mapped_addr - 4096);
	}
A
Atish Patra 已提交
226

227 228
	min_low_pfn = PFN_UP(phys_ram_base);
	max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
229
	high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
K
Kefeng Wang 已提交
230

K
Kefeng Wang 已提交
231
	dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
232
	set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
233

234
	reserve_initrd_mem();
235
	/*
236 237 238
	 * If DTB is built in, no need to reserve its memblock.
	 * Otherwise, do reserve it but avoid using
	 * early_init_fdt_reserve_self() since __pa() does
239 240
	 * not work for DTB pointers that are fixmap addresses
	 */
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
	if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) {
		/*
		 * In case the DTB is not located in a memory region we won't
		 * be able to locate it later on via the linear mapping and
		 * get a segfault when accessing it via __va(dtb_early_pa).
		 * To avoid this situation copy DTB to a memory region.
		 * Note that memblock_phys_alloc will also reserve DTB region.
		 */
		if (!memblock_is_memory(dtb_early_pa)) {
			size_t fdt_size = fdt_totalsize(dtb_early_va);
			phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE);
			void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size);

			memcpy(new_dtb_early_va, dtb_early_va, fdt_size);
			early_memunmap(new_dtb_early_va, fdt_size);
			_dtb_early_pa = new_dtb_early_pa;
		} else
			memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
	}
260

261
	early_init_fdt_scan_reserved_mem();
K
Kefeng Wang 已提交
262
	dma_contiguous_reserve(dma32_phys_limit);
263 264
	if (IS_ENABLED(CONFIG_64BIT))
		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
265 266
	memblock_allow_resize();
}
267

C
Christoph Hellwig 已提交
268
#ifdef CONFIG_MMU
269
struct pt_alloc_ops pt_ops __initdata;
V
Vitaly Wool 已提交
270

271 272
unsigned long riscv_pfn_base __ro_after_init;
EXPORT_SYMBOL(riscv_pfn_base);
273

274
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
275
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
276
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
277

278
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
279
static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
A
Alexandre Ghiti 已提交
280
static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
281
static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
282

V
Vitaly Wool 已提交
283
#ifdef CONFIG_XIP_KERNEL
284
#define pt_ops			(*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
285
#define riscv_pfn_base         (*(unsigned long  *)XIP_FIXUP(&riscv_pfn_base))
V
Vitaly Wool 已提交
286 287 288 289 290
#define trampoline_pg_dir      ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte             ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir           ((pgd_t *)XIP_FIXUP(early_pg_dir))
#endif /* CONFIG_XIP_KERNEL */

291 292 293 294 295 296 297 298 299
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
	unsigned long addr = __fix_to_virt(idx);
	pte_t *ptep;

	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);

	ptep = &fixmap_pte[pte_index(addr)];

300
	if (pgprot_val(prot))
301
		set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
302
	else
303
		pte_clear(&init_mm, addr, ptep);
304
	local_flush_tlb_page(addr);
305 306
}

307
static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
308
{
309
	return (pte_t *)((uintptr_t)pa);
310 311
}

312 313 314 315 316 317
static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa)
{
	clear_fixmap(FIX_PTE);
	return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
}

318
static inline pte_t *__init get_pte_virt_late(phys_addr_t pa)
319 320 321 322 323
{
	return (pte_t *) __va(pa);
}

static inline phys_addr_t __init alloc_pte_early(uintptr_t va)
324 325 326 327 328
{
	/*
	 * We only create PMD or PGD early mappings so we
	 * should never reach here with MMU disabled.
	 */
329 330
	BUG();
}
331

332 333
static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
{
334 335 336
	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}

337
static phys_addr_t __init alloc_pte_late(uintptr_t va)
338 339 340 341
{
	unsigned long vaddr;

	vaddr = __get_free_page(GFP_KERNEL);
342 343
	BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)));

344 345 346
	return __pa(vaddr);
}

347 348 349 350
static void __init create_pte_mapping(pte_t *ptep,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
351
	uintptr_t pte_idx = pte_index(va);
352 353 354

	BUG_ON(sz != PAGE_SIZE);

355 356
	if (pte_none(ptep[pte_idx]))
		ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot);
357 358 359 360
}

#ifndef __PAGETABLE_PMD_FOLDED

361 362 363
static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
364

V
Vitaly Wool 已提交
365 366 367 368 369 370
#ifdef CONFIG_XIP_KERNEL
#define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
#define fixmap_pmd     ((pmd_t *)XIP_FIXUP(fixmap_pmd))
#define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
#endif /* CONFIG_XIP_KERNEL */

371 372 373 374 375 376 377 378 379 380
static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);

#ifdef CONFIG_XIP_KERNEL
#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
#define fixmap_p4d     ((p4d_t *)XIP_FIXUP(fixmap_p4d))
#define early_p4d      ((p4d_t *)XIP_FIXUP(early_p4d))
#endif /* CONFIG_XIP_KERNEL */

A
Alexandre Ghiti 已提交
381 382 383 384 385 386 387 388 389 390
static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);

#ifdef CONFIG_XIP_KERNEL
#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud))
#define fixmap_pud     ((pud_t *)XIP_FIXUP(fixmap_pud))
#define early_pud      ((pud_t *)XIP_FIXUP(early_pud))
#endif /* CONFIG_XIP_KERNEL */

391
static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
392
{
393 394
	/* Before MMU is enabled */
	return (pmd_t *)((uintptr_t)pa);
395 396
}

397
static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
398
{
399 400 401
	clear_fixmap(FIX_PMD);
	return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
}
402

403
static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
404 405 406
{
	return (pmd_t *) __va(pa);
}
407

408 409
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
A
Alexandre Ghiti 已提交
410
	BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
411

412
	return (uintptr_t)early_pmd;
413 414
}

415 416 417 418 419
static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
{
	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}

420
static phys_addr_t __init alloc_pmd_late(uintptr_t va)
421 422 423 424
{
	unsigned long vaddr;

	vaddr = __get_free_page(GFP_KERNEL);
425 426
	BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page(vaddr)));

427 428 429
	return __pa(vaddr);
}

430 431 432 433 434 435
static void __init create_pmd_mapping(pmd_t *pmdp,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
	pte_t *ptep;
	phys_addr_t pte_phys;
436
	uintptr_t pmd_idx = pmd_index(va);
437 438

	if (sz == PMD_SIZE) {
439 440
		if (pmd_none(pmdp[pmd_idx]))
			pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot);
441 442 443
		return;
	}

444
	if (pmd_none(pmdp[pmd_idx])) {
445
		pte_phys = pt_ops.alloc_pte(va);
446
		pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
447
		ptep = pt_ops.get_pte_virt(pte_phys);
448 449
		memset(ptep, 0, PAGE_SIZE);
	} else {
450
		pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
451
		ptep = pt_ops.get_pte_virt(pte_phys);
452 453 454 455 456
	}

	create_pte_mapping(ptep, va, pa, sz, prot);
}

A
Alexandre Ghiti 已提交
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
static pud_t *__init get_pud_virt_early(phys_addr_t pa)
{
	return (pud_t *)((uintptr_t)pa);
}

static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa)
{
	clear_fixmap(FIX_PUD);
	return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
}

static pud_t *__init get_pud_virt_late(phys_addr_t pa)
{
	return (pud_t *)__va(pa);
}

static phys_addr_t __init alloc_pud_early(uintptr_t va)
{
	/* Only one PUD is available for early mapping */
	BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);

	return (uintptr_t)early_pud;
}

static phys_addr_t __init alloc_pud_fixmap(uintptr_t va)
{
	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}

static phys_addr_t alloc_pud_late(uintptr_t va)
{
	unsigned long vaddr;

	vaddr = __get_free_page(GFP_KERNEL);
	BUG_ON(!vaddr);
	return __pa(vaddr);
}

495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
{
	return (p4d_t *)((uintptr_t)pa);
}

static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
{
	clear_fixmap(FIX_P4D);
	return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
}

static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
{
	return (p4d_t *)__va(pa);
}

static phys_addr_t __init alloc_p4d_early(uintptr_t va)
{
	/* Only one P4D is available for early mapping */
	BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);

	return (uintptr_t)early_p4d;
}

static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
{
	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}

static phys_addr_t alloc_p4d_late(uintptr_t va)
{
	unsigned long vaddr;

	vaddr = __get_free_page(GFP_KERNEL);
	BUG_ON(!vaddr);
	return __pa(vaddr);
}

A
Alexandre Ghiti 已提交
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
static void __init create_pud_mapping(pud_t *pudp,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
	pmd_t *nextp;
	phys_addr_t next_phys;
	uintptr_t pud_index = pud_index(va);

	if (sz == PUD_SIZE) {
		if (pud_val(pudp[pud_index]) == 0)
			pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
		return;
	}

	if (pud_val(pudp[pud_index]) == 0) {
		next_phys = pt_ops.alloc_pmd(va);
		pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
		nextp = pt_ops.get_pmd_virt(next_phys);
		memset(nextp, 0, PAGE_SIZE);
	} else {
		next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
		nextp = pt_ops.get_pmd_virt(next_phys);
	}

	create_pmd_mapping(nextp, va, pa, sz, prot);
}

560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
static void __init create_p4d_mapping(p4d_t *p4dp,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
	pud_t *nextp;
	phys_addr_t next_phys;
	uintptr_t p4d_index = p4d_index(va);

	if (sz == P4D_SIZE) {
		if (p4d_val(p4dp[p4d_index]) == 0)
			p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
		return;
	}

	if (p4d_val(p4dp[p4d_index]) == 0) {
		next_phys = pt_ops.alloc_pud(va);
		p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
		nextp = pt_ops.get_pud_virt(next_phys);
		memset(nextp, 0, PAGE_SIZE);
	} else {
		next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
		nextp = pt_ops.get_pud_virt(next_phys);
	}

	create_pud_mapping(nextp, va, pa, sz, prot);
}

#define pgd_next_t		p4d_t
#define alloc_pgd_next(__va)	(pgtable_l5_enabled ?			\
		pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ?		\
		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
#define get_pgd_next_virt(__pa)	(pgtable_l5_enabled ?			\
		pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ?	\
		pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
594
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
595 596
				(pgtable_l5_enabled ?			\
		create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
A
Alexandre Ghiti 已提交
597
				(pgtable_l4_enabled ?			\
598 599 600 601 602 603 604 605 606 607 608
		create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) :	\
		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
#define fixmap_pgd_next		(pgtable_l5_enabled ?			\
		(uintptr_t)fixmap_p4d : (pgtable_l4_enabled ?		\
		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
#define trampoline_pgd_next	(pgtable_l5_enabled ?			\
		(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ?	\
		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
#define early_dtb_pgd_next	(pgtable_l5_enabled ?			\
		(uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ?	\
		(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
609 610
#else
#define pgd_next_t		pte_t
611 612
#define alloc_pgd_next(__va)	pt_ops.alloc_pte(__va)
#define get_pgd_next_virt(__pa)	pt_ops.get_pte_virt(__pa)
613 614
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
A
Alexandre Ghiti 已提交
615 616
#define fixmap_pgd_next		((uintptr_t)fixmap_pte)
#define early_dtb_pgd_next	((uintptr_t)early_dtb_pmd)
617 618 619
#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
A
Alexandre Ghiti 已提交
620
#endif /* __PAGETABLE_PMD_FOLDED */
621

A
Atish Patra 已提交
622
void __init create_pgd_mapping(pgd_t *pgdp,
623 624 625 626 627
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
	pgd_next_t *nextp;
	phys_addr_t next_phys;
628
	uintptr_t pgd_idx = pgd_index(va);
629 630

	if (sz == PGDIR_SIZE) {
631 632
		if (pgd_val(pgdp[pgd_idx]) == 0)
			pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(pa), prot);
633 634 635
		return;
	}

636
	if (pgd_val(pgdp[pgd_idx]) == 0) {
637
		next_phys = alloc_pgd_next(va);
638
		pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE);
639 640 641
		nextp = get_pgd_next_virt(next_phys);
		memset(nextp, 0, PAGE_SIZE);
	} else {
642
		next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx]));
643 644 645 646 647 648 649 650
		nextp = get_pgd_next_virt(next_phys);
	}

	create_pgd_next_mapping(nextp, va, pa, sz, prot);
}

static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
{
651 652 653
	/* Upgrade to PMD_SIZE mappings whenever possible */
	if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1)))
		return PAGE_SIZE;
654

655
	return PMD_SIZE;
656 657
}

V
Vitaly Wool 已提交
658
#ifdef CONFIG_XIP_KERNEL
659
#define phys_ram_base  (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base))
660 661
extern char _xiprom[], _exiprom[], __data_loc;

V
Vitaly Wool 已提交
662 663 664
/* called from head.S with MMU off */
asmlinkage void __init __copy_data(void)
{
665
	void *from = (void *)(&__data_loc);
V
Vitaly Wool 已提交
666
	void *to = (void *)CONFIG_PHYS_RAM_BASE;
667
	size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata));
V
Vitaly Wool 已提交
668 669 670 671 672

	memcpy(to, from, sz);
}
#endif

673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
#ifdef CONFIG_STRICT_KERNEL_RWX
static __init pgprot_t pgprot_from_va(uintptr_t va)
{
	if (is_va_kernel_text(va))
		return PAGE_KERNEL_READ_EXEC;

	/*
	 * In 64-bit kernel, the kernel mapping is outside the linear mapping so
	 * we must protect its linear mapping alias from being executed and
	 * written.
	 * And rodata section is marked readonly in mark_rodata_ro.
	 */
	if (IS_ENABLED(CONFIG_64BIT) && is_va_kernel_lm_alias_text(va))
		return PAGE_KERNEL_READ;

	return PAGE_KERNEL;
}

void mark_rodata_ro(void)
{
	set_kernel_memory(__start_rodata, _data, set_memory_ro);
	if (IS_ENABLED(CONFIG_64BIT))
		set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
				  set_memory_ro);

	debug_checkwx();
}
#else
static __init pgprot_t pgprot_from_va(uintptr_t va)
{
	if (IS_ENABLED(CONFIG_64BIT) && !is_kernel_mapping(va))
		return PAGE_KERNEL;

	return PAGE_KERNEL_EXEC;
}
#endif /* CONFIG_STRICT_KERNEL_RWX */

P
Palmer Dabbelt 已提交
710
#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
Q
Qinglin Pan 已提交
711 712 713 714 715 716 717
static void __init disable_pgtable_l5(void)
{
	pgtable_l5_enabled = false;
	kernel_map.page_offset = PAGE_OFFSET_L4;
	satp_mode = SATP_MODE_48;
}

A
Alexandre Ghiti 已提交
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
static void __init disable_pgtable_l4(void)
{
	pgtable_l4_enabled = false;
	kernel_map.page_offset = PAGE_OFFSET_L3;
	satp_mode = SATP_MODE_39;
}

/*
 * There is a simple way to determine if 4-level is supported by the
 * underlying hardware: establish 1:1 mapping in 4-level page table mode
 * then read SATP to see if the configuration was taken into account
 * meaning sv48 is supported.
 */
static __init void set_satp_mode(void)
{
	u64 identity_satp, hw_satp;
Q
Qinglin Pan 已提交
734 735
	uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
	bool check_l4 = false;
A
Alexandre Ghiti 已提交
736

Q
Qinglin Pan 已提交
737 738 739
	create_p4d_mapping(early_p4d,
			set_satp_mode_pmd, (uintptr_t)early_pud,
			P4D_SIZE, PAGE_TABLE);
A
Alexandre Ghiti 已提交
740 741 742 743 744 745 746 747 748 749 750
	create_pud_mapping(early_pud,
			   set_satp_mode_pmd, (uintptr_t)early_pmd,
			   PUD_SIZE, PAGE_TABLE);
	/* Handle the case where set_satp_mode straddles 2 PMDs */
	create_pmd_mapping(early_pmd,
			   set_satp_mode_pmd, set_satp_mode_pmd,
			   PMD_SIZE, PAGE_KERNEL_EXEC);
	create_pmd_mapping(early_pmd,
			   set_satp_mode_pmd + PMD_SIZE,
			   set_satp_mode_pmd + PMD_SIZE,
			   PMD_SIZE, PAGE_KERNEL_EXEC);
Q
Qinglin Pan 已提交
751 752 753 754 755
retry:
	create_pgd_mapping(early_pg_dir,
			   set_satp_mode_pmd,
			   check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
			   PGDIR_SIZE, PAGE_TABLE);
A
Alexandre Ghiti 已提交
756 757 758 759 760 761 762 763

	identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;

	local_flush_tlb_all();
	csr_write(CSR_SATP, identity_satp);
	hw_satp = csr_swap(CSR_SATP, 0ULL);
	local_flush_tlb_all();

Q
Qinglin Pan 已提交
764 765 766 767
	if (hw_satp != identity_satp) {
		if (!check_l4) {
			disable_pgtable_l5();
			check_l4 = true;
768
			memset(early_pg_dir, 0, PAGE_SIZE);
Q
Qinglin Pan 已提交
769 770
			goto retry;
		}
A
Alexandre Ghiti 已提交
771
		disable_pgtable_l4();
Q
Qinglin Pan 已提交
772
	}
A
Alexandre Ghiti 已提交
773 774

	memset(early_pg_dir, 0, PAGE_SIZE);
Q
Qinglin Pan 已提交
775
	memset(early_p4d, 0, PAGE_SIZE);
A
Alexandre Ghiti 已提交
776 777 778 779 780
	memset(early_pud, 0, PAGE_SIZE);
	memset(early_pmd, 0, PAGE_SIZE);
}
#endif

781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
/*
 * setup_vm() is called from head.S with MMU-off.
 *
 * Following requirements should be honoured for setup_vm() to work
 * correctly:
 * 1) It should use PC-relative addressing for accessing kernel symbols.
 *    To achieve this we always use GCC cmodel=medany.
 * 2) The compiler instrumentation for FTRACE will not work for setup_vm()
 *    so disable compiler instrumentation when FTRACE is enabled.
 *
 * Currently, the above requirements are honoured by using custom CFLAGS
 * for init.o in mm/Makefile.
 */

#ifndef __riscv_cmodel_medany
796
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
797 798
#endif

V
Vitaly Wool 已提交
799
#ifdef CONFIG_XIP_KERNEL
800
static void __init create_kernel_page_table(pgd_t *pgdir,
801
					    __always_unused bool early)
V
Vitaly Wool 已提交
802 803 804 805
{
	uintptr_t va, end_va;

	/* Map the flash resident part */
806
	end_va = kernel_map.virt_addr + kernel_map.xiprom_sz;
807
	for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
V
Vitaly Wool 已提交
808
		create_pgd_mapping(pgdir, va,
809
				   kernel_map.xiprom + (va - kernel_map.virt_addr),
810
				   PMD_SIZE, PAGE_KERNEL_EXEC);
V
Vitaly Wool 已提交
811 812

	/* Map the data in RAM */
813
	end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size;
814
	for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE)
V
Vitaly Wool 已提交
815
		create_pgd_mapping(pgdir, va,
816
				   kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)),
817
				   PMD_SIZE, PAGE_KERNEL);
V
Vitaly Wool 已提交
818 819
}
#else
820
static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
821 822 823
{
	uintptr_t va, end_va;

824
	end_va = kernel_map.virt_addr + kernel_map.size;
825
	for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
826
		create_pgd_mapping(pgdir, va,
827
				   kernel_map.phys_addr + (va - kernel_map.virt_addr),
828
				   PMD_SIZE,
829 830
				   early ?
					PAGE_KERNEL_EXEC : pgprot_from_va(va));
831
}
V
Vitaly Wool 已提交
832
#endif
833

834 835 836 837 838 839
/*
 * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel,
 * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
 * entry.
 */
static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
840
{
841 842 843 844
#ifndef CONFIG_BUILTIN_DTB
	uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);

	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
A
Alexandre Ghiti 已提交
845
			   IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
846 847 848
			   PGDIR_SIZE,
			   IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);

849 850 851 852 853
	if (pgtable_l5_enabled)
		create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
				   (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);

	if (pgtable_l4_enabled)
A
Alexandre Ghiti 已提交
854 855 856
		create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
				   (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);

857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
	if (IS_ENABLED(CONFIG_64BIT)) {
		create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
				   pa, PMD_SIZE, PAGE_KERNEL);
		create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
				   pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
	}

	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
#else
	/*
	 * For 64-bit kernel, __va can't be used since it would return a linear
	 * mapping address whereas dtb_early_va will be used before
	 * setup_vm_final installs the linear mapping. For 32-bit kernel, as the
	 * kernel is mapped in the linear mapping, that makes no difference.
	 */
	dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
A
Atish Patra 已提交
873
#endif
874

875 876 877
	dtb_early_pa = dtb_pa;
}

878 879 880 881
/*
 * MMU is not enabled, the page tables are allocated directly using
 * early_pmd/pud/p4d and the address returned is the physical one.
 */
882
static void __init pt_ops_set_early(void)
883 884 885 886 887 888
{
	pt_ops.alloc_pte = alloc_pte_early;
	pt_ops.get_pte_virt = get_pte_virt_early;
#ifndef __PAGETABLE_PMD_FOLDED
	pt_ops.alloc_pmd = alloc_pmd_early;
	pt_ops.get_pmd_virt = get_pmd_virt_early;
A
Alexandre Ghiti 已提交
889 890
	pt_ops.alloc_pud = alloc_pud_early;
	pt_ops.get_pud_virt = get_pud_virt_early;
891 892
	pt_ops.alloc_p4d = alloc_p4d_early;
	pt_ops.get_p4d_virt = get_p4d_virt_early;
893 894 895 896 897 898 899 900 901 902 903
#endif
}

/*
 * MMU is enabled but page table setup is not complete yet.
 * fixmap page table alloc functions must be used as a means to temporarily
 * map the allocated physical pages since the linear mapping does not exist yet.
 *
 * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va,
 * but it will be used as described above.
 */
904
static void __init pt_ops_set_fixmap(void)
905 906 907 908 909 910
{
	pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap);
	pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap);
#ifndef __PAGETABLE_PMD_FOLDED
	pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap);
	pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
A
Alexandre Ghiti 已提交
911 912
	pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
	pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
913 914
	pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap);
	pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap);
915 916 917 918 919 920 921
#endif
}

/*
 * MMU is enabled and page table setup is complete, so from now, we can use
 * generic page allocation functions to setup page table.
 */
922
static void __init pt_ops_set_late(void)
923 924 925 926 927 928
{
	pt_ops.alloc_pte = alloc_pte_late;
	pt_ops.get_pte_virt = get_pte_virt_late;
#ifndef __PAGETABLE_PMD_FOLDED
	pt_ops.alloc_pmd = alloc_pmd_late;
	pt_ops.get_pmd_virt = get_pmd_virt_late;
A
Alexandre Ghiti 已提交
929 930
	pt_ops.alloc_pud = alloc_pud_late;
	pt_ops.get_pud_virt = get_pud_virt_late;
931 932
	pt_ops.alloc_p4d = alloc_p4d_late;
	pt_ops.get_p4d_virt = get_p4d_virt_late;
933 934 935
#endif
}

936
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
937
{
938
	pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
939

940
	kernel_map.virt_addr = KERNEL_LINK_ADDR;
A
Alexandre Ghiti 已提交
941
	kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
942

V
Vitaly Wool 已提交
943
#ifdef CONFIG_XIP_KERNEL
944 945
	kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
	kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
V
Vitaly Wool 已提交
946

947
	phys_ram_base = CONFIG_PHYS_RAM_BASE;
948 949
	kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
	kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata);
V
Vitaly Wool 已提交
950

951
	kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom;
V
Vitaly Wool 已提交
952
#else
953 954
	kernel_map.phys_addr = (uintptr_t)(&_start);
	kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
V
Vitaly Wool 已提交
955
#endif
A
Alexandre Ghiti 已提交
956 957 958 959 960

#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
	set_satp_mode();
#endif

961 962
	kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
	kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
963

964
	riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
965

966 967 968 969 970 971 972 973
	/*
	 * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
	 * kernel, whereas for 64-bit kernel, the end of the virtual address
	 * space is occupied by the modules/BPF/kernel mappings which reduces
	 * the available size of the linear mapping.
	 */
	memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0);

974 975
	/* Sanity check alignment and size */
	BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
976
	BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
977

978 979 980 981 982 983
#ifdef CONFIG_64BIT
	/*
	 * The last 4K bytes of the addressable memory can not be mapped because
	 * of IS_ERR_VALUE macro.
	 */
	BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
984
#endif
985

986
	apply_early_boot_alternatives();
987 988
	pt_ops_set_early();

989 990
	/* Setup early PGD for fixmap */
	create_pgd_mapping(early_pg_dir, FIXADDR_START,
A
Alexandre Ghiti 已提交
991
			   fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
992 993

#ifndef __PAGETABLE_PMD_FOLDED
994 995 996 997
	/* Setup fixmap P4D and PUD */
	if (pgtable_l5_enabled)
		create_p4d_mapping(fixmap_p4d, FIXADDR_START,
				   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
A
Alexandre Ghiti 已提交
998 999 1000 1001
	/* Setup fixmap PUD and PMD */
	if (pgtable_l4_enabled)
		create_pud_mapping(fixmap_pud, FIXADDR_START,
				   (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
1002 1003 1004
	create_pmd_mapping(fixmap_pmd, FIXADDR_START,
			   (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
	/* Setup trampoline PGD and PMD */
1005
	create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
A
Alexandre Ghiti 已提交
1006
			   trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
1007 1008 1009
	if (pgtable_l5_enabled)
		create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
				   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
A
Alexandre Ghiti 已提交
1010 1011 1012
	if (pgtable_l4_enabled)
		create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
				   (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
V
Vitaly Wool 已提交
1013
#ifdef CONFIG_XIP_KERNEL
1014 1015
	create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
			   kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
V
Vitaly Wool 已提交
1016
#else
1017 1018
	create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
			   kernel_map.phys_addr, PMD_SIZE, PAGE_KERNEL_EXEC);
V
Vitaly Wool 已提交
1019
#endif
1020 1021
#else
	/* Setup trampoline PGD */
1022 1023
	create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
			   kernel_map.phys_addr, PGDIR_SIZE, PAGE_KERNEL_EXEC);
1024
#endif
1025

1026
	/*
1027
	 * Setup early PGD covering entire kernel which will allow
1028 1029 1030
	 * us to reach paging_init(). We map all memory banks later
	 * in setup_vm_final() below.
	 */
1031
	create_kernel_page_table(early_pg_dir, true);
1032

1033 1034
	/* Setup early mapping for FDT early scan */
	create_fdt_early_page_table(early_pg_dir, dtb_pa);
A
Atish Patra 已提交
1035 1036 1037 1038 1039

	/*
	 * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
	 * range can not span multiple pmds.
	 */
A
Alexandre Ghiti 已提交
1040
	BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
A
Atish Patra 已提交
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));

#ifndef __PAGETABLE_PMD_FOLDED
	/*
	 * Early ioremap fixmap is already created as it lies within first 2MB
	 * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END
	 * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn
	 * the user if not.
	 */
	fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))];
	fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))];
	if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) {
		WARN_ON(1);
		pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n",
			pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd));
		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
			fix_to_virt(FIX_BTMAP_BEGIN));
		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
			fix_to_virt(FIX_BTMAP_END));

		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
	}
#endif
1065 1066

	pt_ops_set_fixmap();
1067
}
1068

1069 1070 1071 1072
static void __init setup_vm_final(void)
{
	uintptr_t va, map_size;
	phys_addr_t pa, start, end;
1073
	u64 i;
1074 1075 1076

	/* Setup swapper PGD for fixmap */
	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
1077
			   __pa_symbol(fixmap_pgd_next),
1078
			   PGDIR_SIZE, PAGE_TABLE);
1079

1080
	/* Map all memory banks in the linear mapping */
1081
	for_each_mem_range(i, &start, &end) {
1082 1083 1084 1085 1086
		if (start >= end)
			break;
		if (start <= __pa(PAGE_OFFSET) &&
		    __pa(PAGE_OFFSET) < end)
			start = __pa(PAGE_OFFSET);
1087 1088
		if (end >= __pa(PAGE_OFFSET) + memory_limit)
			end = __pa(PAGE_OFFSET) + memory_limit;
1089 1090 1091 1092

		map_size = best_map_size(start, end - start);
		for (pa = start; pa < end; pa += map_size) {
			va = (uintptr_t)__va(pa);
1093

1094 1095
			create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
					   pgprot_from_va(va));
1096
		}
1097
	}
1098

1099
	/* Map the kernel */
1100 1101
	if (IS_ENABLED(CONFIG_64BIT))
		create_kernel_page_table(swapper_pg_dir, false);
1102

1103 1104 1105 1106
#ifdef CONFIG_KASAN
	kasan_swapper_init();
#endif

1107 1108 1109
	/* Clear fixmap PTE and PMD mappings */
	clear_fixmap(FIX_PTE);
	clear_fixmap(FIX_PMD);
A
Alexandre Ghiti 已提交
1110
	clear_fixmap(FIX_PUD);
1111
	clear_fixmap(FIX_P4D);
1112 1113

	/* Move to swapper page table */
A
Alexandre Ghiti 已提交
1114
	csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
1115
	local_flush_tlb_all();
1116

1117
	pt_ops_set_late();
1118
}
C
Christoph Hellwig 已提交
1119 1120 1121 1122
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
	dtb_early_va = (void *)dtb_pa;
1123
	dtb_early_pa = dtb_pa;
C
Christoph Hellwig 已提交
1124 1125 1126 1127 1128 1129
}

static inline void setup_vm_final(void)
{
}
#endif /* CONFIG_MMU */
1130

N
Nick Kossifidis 已提交
1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
/*
 * reserve_crashkernel() - reserves memory for crash kernel
 *
 * This function reserves memory area given in "crashkernel=" kernel command
 * line parameter. The memory reserved is used by dump capture kernel when
 * primary kernel is crashing.
 */
static void __init reserve_crashkernel(void)
{
	unsigned long long crash_base = 0;
	unsigned long long crash_size = 0;
	unsigned long search_start = memblock_start_of_DRAM();
	unsigned long search_end = memblock_end_of_DRAM();

	int ret = 0;

1147 1148
	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
		return;
1149 1150 1151 1152 1153 1154 1155 1156 1157 1158
	/*
	 * Don't reserve a region for a crash kernel on a crash kernel
	 * since it doesn't make much sense and we have limited memory
	 * resources.
	 */
	if (is_kdump_kernel()) {
		pr_info("crashkernel: ignoring reservation request\n");
		return;
	}

N
Nick Kossifidis 已提交
1159 1160 1161 1162 1163 1164 1165
	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
				&crash_size, &crash_base);
	if (ret || !crash_size)
		return;

	crash_size = PAGE_ALIGN(crash_size);

1166 1167 1168 1169
	if (crash_base) {
		search_start = crash_base;
		search_end = crash_base + crash_size;
	}
N
Nick Kossifidis 已提交
1170

1171 1172 1173
	/*
	 * Current riscv boot protocol requires 2MB alignment for
	 * RV64 and 4MB alignment for RV32 (hugepage size)
1174 1175 1176
	 *
	 * Try to alloc from 32bit addressible physical memory so that
	 * swiotlb can work on the crash kernel.
1177 1178
	 */
	crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
1179 1180
					       search_start,
					       min(search_end, (unsigned long) SZ_4G));
1181
	if (crash_base == 0) {
1182 1183 1184 1185 1186 1187 1188 1189
		/* Try again without restricting region to 32bit addressible memory */
		crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
						search_start, search_end);
		if (crash_base == 0) {
			pr_warn("crashkernel: couldn't allocate %lldKB\n",
				crash_size >> 10);
			return;
		}
N
Nick Kossifidis 已提交
1190 1191 1192 1193 1194 1195 1196 1197 1198
	}

	pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
		crash_base, crash_base + crash_size, crash_size >> 20);

	crashk_res.start = crash_base;
	crashk_res.end = crash_base + crash_size - 1;
}

1199 1200
void __init paging_init(void)
{
1201
	setup_bootmem();
1202
	setup_vm_final();
1203 1204 1205 1206
}

void __init misc_mem_init(void)
{
K
Kefeng Wang 已提交
1207
	early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
1208
	arch_numa_init();
1209
	sparse_init();
1210
	zone_sizes_init();
N
Nick Kossifidis 已提交
1211
	reserve_crashkernel();
1212
	memblock_dump_all();
1213
}
L
Logan Gunthorpe 已提交
1214

1215
#ifdef CONFIG_SPARSEMEM_VMEMMAP
L
Logan Gunthorpe 已提交
1216 1217 1218
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
			       struct vmem_altmap *altmap)
{
1219
	return vmemmap_populate_basepages(start, end, node, NULL);
L
Logan Gunthorpe 已提交
1220 1221
}
#endif