init.c 14.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
P
Palmer Dabbelt 已提交
2 3
/*
 * Copyright (C) 2012 Regents of the University of California
4
 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
P
Palmer Dabbelt 已提交
5 6 7 8 9
 */

#include <linux/init.h>
#include <linux/mm.h>
#include <linux/memblock.h>
M
Mike Rapoport 已提交
10
#include <linux/initrd.h>
P
Palmer Dabbelt 已提交
11
#include <linux/swap.h>
C
Christoph Hellwig 已提交
12
#include <linux/sizes.h>
13
#include <linux/of_fdt.h>
14
#include <linux/libfdt.h>
Z
Zong Li 已提交
15
#include <linux/set_memory.h>
P
Palmer Dabbelt 已提交
16

17
#include <asm/fixmap.h>
P
Palmer Dabbelt 已提交
18 19
#include <asm/tlbflush.h>
#include <asm/sections.h>
20
#include <asm/soc.h>
P
Palmer Dabbelt 已提交
21
#include <asm/io.h>
Z
Zong Li 已提交
22
#include <asm/ptdump.h>
P
Palmer Dabbelt 已提交
23

24 25
#include "../kernel/head.h"

26 27 28 29
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
							__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);

30
extern char _start[];
C
Christoph Hellwig 已提交
31
void *dtb_early_va;
32

P
Palmer Dabbelt 已提交
33 34
static void __init zone_sizes_init(void)
{
C
Christoph Hellwig 已提交
35
	unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
P
Palmer Dabbelt 已提交
36

37
#ifdef CONFIG_ZONE_DMA32
38 39
	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G,
			(unsigned long) PFN_PHYS(max_low_pfn)));
40
#endif
C
Christoph Hellwig 已提交
41 42
	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;

43
	free_area_init(max_zone_pfns);
P
Palmer Dabbelt 已提交
44 45
}

C
Christoph Hellwig 已提交
46
static void setup_zero_page(void)
P
Palmer Dabbelt 已提交
47 48 49 50
{
	memset((void *)empty_zero_page, 0, PAGE_SIZE);
}

51
#if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM)
Y
Yash Shah 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
static inline void print_mlk(char *name, unsigned long b, unsigned long t)
{
	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld kB)\n", name, b, t,
		  (((t) - (b)) >> 10));
}

static inline void print_mlm(char *name, unsigned long b, unsigned long t)
{
	pr_notice("%12s : 0x%08lx - 0x%08lx   (%4ld MB)\n", name, b, t,
		  (((t) - (b)) >> 20));
}

static void print_vm_layout(void)
{
	pr_notice("Virtual kernel memory layout:\n");
	print_mlk("fixmap", (unsigned long)FIXADDR_START,
		  (unsigned long)FIXADDR_TOP);
	print_mlm("pci io", (unsigned long)PCI_IO_START,
		  (unsigned long)PCI_IO_END);
	print_mlm("vmemmap", (unsigned long)VMEMMAP_START,
		  (unsigned long)VMEMMAP_END);
	print_mlm("vmalloc", (unsigned long)VMALLOC_START,
		  (unsigned long)VMALLOC_END);
	print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
		  (unsigned long)high_memory);
}
#else
static void print_vm_layout(void) { }
#endif /* CONFIG_DEBUG_VM */

P
Palmer Dabbelt 已提交
82 83 84 85 86 87 88
void __init mem_init(void)
{
#ifdef CONFIG_FLATMEM
	BUG_ON(!mem_map);
#endif /* CONFIG_FLATMEM */

	high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
89
	memblock_free_all();
P
Palmer Dabbelt 已提交
90 91

	mem_init_print_info(NULL);
Y
Yash Shah 已提交
92
	print_vm_layout();
P
Palmer Dabbelt 已提交
93 94 95
}

#ifdef CONFIG_BLK_DEV_INITRD
96 97
static void __init setup_initrd(void)
{
98
	phys_addr_t start;
99 100
	unsigned long size;

101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
	/* Ignore the virtul address computed during device tree parsing */
	initrd_start = initrd_end = 0;

	if (!phys_initrd_size)
		return;
	/*
	 * Round the memory region to page boundaries as per free_initrd_mem()
	 * This allows us to detect whether the pages overlapping the initrd
	 * are in use, but more importantly, reserves the entire set of pages
	 * as we don't want these pages allocated for other purposes.
	 */
	start = round_down(phys_initrd_start, PAGE_SIZE);
	size = phys_initrd_size + (phys_initrd_start - start);
	size = round_up(size, PAGE_SIZE);

	if (!memblock_is_region_memory(start, size)) {
		pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region",
		       (u64)start, size);
119 120
		goto disable;
	}
121 122 123 124

	if (memblock_is_region_reserved(start, size)) {
		pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n",
		       (u64)start, size);
125 126 127
		goto disable;
	}

128 129 130 131
	memblock_reserve(start, size);
	/* Now convert initrd to virtual addresses */
	initrd_start = (unsigned long)__va(phys_initrd_start);
	initrd_end = initrd_start + phys_initrd_size;
132 133 134 135 136 137 138 139 140 141
	initrd_below_start_ok = 1;

	pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
		(void *)(initrd_start), size);
	return;
disable:
	pr_cont(" - disabling initrd\n");
	initrd_start = 0;
	initrd_end = 0;
}
P
Palmer Dabbelt 已提交
142
#endif /* CONFIG_BLK_DEV_INITRD */
143

144 145
static phys_addr_t dtb_early_pa __initdata;

146 147 148 149
void __init setup_bootmem(void)
{
	struct memblock_region *reg;
	phys_addr_t mem_size = 0;
150 151
	phys_addr_t vmlinux_end = __pa_symbol(&_end);
	phys_addr_t vmlinux_start = __pa_symbol(&_start);
152 153 154 155 156

	/* Find the memory region containing the kernel */
	for_each_memblock(memory, reg) {
		phys_addr_t end = reg->base + reg->size;

157
		if (reg->base <= vmlinux_start && vmlinux_end <= end) {
158
			mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET);
159 160 161 162 163 164 165 166

			/*
			 * Remove memblock from the end of usable area to the
			 * end of region
			 */
			if (reg->base + mem_size < end)
				memblock_remove(reg->base + mem_size,
						end - reg->base - mem_size);
167 168 169 170
		}
	}
	BUG_ON(mem_size == 0);

171 172 173
	/* Reserve from the start of the kernel to the end of the kernel */
	memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);

174 175
	max_pfn = PFN_DOWN(memblock_end_of_DRAM());
	max_low_pfn = max_pfn;
176
	set_max_mapnr(max_low_pfn);
177 178 179 180 181

#ifdef CONFIG_BLK_DEV_INITRD
	setup_initrd();
#endif /* CONFIG_BLK_DEV_INITRD */

182 183 184 185 186 187
	/*
	 * Avoid using early_init_fdt_reserve_self() since __pa() does
	 * not work for DTB pointers that are fixmap addresses
	 */
	memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));

188 189 190 191 192 193 194 195 196 197 198 199 200
	early_init_fdt_scan_reserved_mem();
	memblock_allow_resize();
	memblock_dump_all();

	for_each_memblock(memory, reg) {
		unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
		unsigned long end_pfn = memblock_region_memory_end_pfn(reg);

		memblock_set_node(PFN_PHYS(start_pfn),
				  PFN_PHYS(end_pfn - start_pfn),
				  &memblock.memory, 0);
	}
}
201

C
Christoph Hellwig 已提交
202
#ifdef CONFIG_MMU
203 204 205 206 207
unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);

208
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
209 210 211
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
static bool mmu_enabled;
212

213
#define MAX_EARLY_MAPPING_SIZE	SZ_128M
214

215
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233

void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
	unsigned long addr = __fix_to_virt(idx);
	pte_t *ptep;

	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);

	ptep = &fixmap_pte[pte_index(addr)];

	if (pgprot_val(prot)) {
		set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
	} else {
		pte_clear(&init_mm, addr, ptep);
		local_flush_tlb_page(addr);
	}
}

234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
static pte_t *__init get_pte_virt(phys_addr_t pa)
{
	if (mmu_enabled) {
		clear_fixmap(FIX_PTE);
		return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
	} else {
		return (pte_t *)((uintptr_t)pa);
	}
}

static phys_addr_t __init alloc_pte(uintptr_t va)
{
	/*
	 * We only create PMD or PGD early mappings so we
	 * should never reach here with MMU disabled.
	 */
	BUG_ON(!mmu_enabled);

	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}

static void __init create_pte_mapping(pte_t *ptep,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
259
	uintptr_t pte_idx = pte_index(va);
260 261 262

	BUG_ON(sz != PAGE_SIZE);

263 264
	if (pte_none(ptep[pte_idx]))
		ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot);
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
}

#ifndef __PAGETABLE_PMD_FOLDED

pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;

#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE
#define NUM_EARLY_PMDS		1UL
#else
#define NUM_EARLY_PMDS		(1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE)
#endif
pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);

static pmd_t *__init get_pmd_virt(phys_addr_t pa)
{
	if (mmu_enabled) {
		clear_fixmap(FIX_PMD);
		return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
	} else {
		return (pmd_t *)((uintptr_t)pa);
	}
}

static phys_addr_t __init alloc_pmd(uintptr_t va)
{
	uintptr_t pmd_num;

	if (mmu_enabled)
		return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);

	pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
	BUG_ON(pmd_num >= NUM_EARLY_PMDS);
	return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
}

static void __init create_pmd_mapping(pmd_t *pmdp,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
	pte_t *ptep;
	phys_addr_t pte_phys;
307
	uintptr_t pmd_idx = pmd_index(va);
308 309

	if (sz == PMD_SIZE) {
310 311
		if (pmd_none(pmdp[pmd_idx]))
			pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot);
312 313 314
		return;
	}

315
	if (pmd_none(pmdp[pmd_idx])) {
316
		pte_phys = alloc_pte(va);
317
		pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
318 319 320
		ptep = get_pte_virt(pte_phys);
		memset(ptep, 0, PAGE_SIZE);
	} else {
321
		pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
		ptep = get_pte_virt(pte_phys);
	}

	create_pte_mapping(ptep, va, pa, sz, prot);
}

#define pgd_next_t		pmd_t
#define alloc_pgd_next(__va)	alloc_pmd(__va)
#define get_pgd_next_virt(__pa)	get_pmd_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
	create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next		fixmap_pmd
#else
#define pgd_next_t		pte_t
#define alloc_pgd_next(__va)	alloc_pte(__va)
#define get_pgd_next_virt(__pa)	get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next		fixmap_pte
#endif

static void __init create_pgd_mapping(pgd_t *pgdp,
				      uintptr_t va, phys_addr_t pa,
				      phys_addr_t sz, pgprot_t prot)
{
	pgd_next_t *nextp;
	phys_addr_t next_phys;
349
	uintptr_t pgd_idx = pgd_index(va);
350 351

	if (sz == PGDIR_SIZE) {
352 353
		if (pgd_val(pgdp[pgd_idx]) == 0)
			pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(pa), prot);
354 355 356
		return;
	}

357
	if (pgd_val(pgdp[pgd_idx]) == 0) {
358
		next_phys = alloc_pgd_next(va);
359
		pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE);
360 361 362
		nextp = get_pgd_next_virt(next_phys);
		memset(nextp, 0, PAGE_SIZE);
	} else {
363
		next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx]));
364 365 366 367 368 369 370 371
		nextp = get_pgd_next_virt(next_phys);
	}

	create_pgd_next_mapping(nextp, va, pa, sz, prot);
}

static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
{
372 373 374
	/* Upgrade to PMD_SIZE mappings whenever possible */
	if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1)))
		return PAGE_SIZE;
375

376
	return PMD_SIZE;
377 378
}

379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
/*
 * setup_vm() is called from head.S with MMU-off.
 *
 * Following requirements should be honoured for setup_vm() to work
 * correctly:
 * 1) It should use PC-relative addressing for accessing kernel symbols.
 *    To achieve this we always use GCC cmodel=medany.
 * 2) The compiler instrumentation for FTRACE will not work for setup_vm()
 *    so disable compiler instrumentation when FTRACE is enabled.
 *
 * Currently, the above requirements are honoured by using custom CFLAGS
 * for init.o in mm/Makefile.
 */

#ifndef __riscv_cmodel_medany
394
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
395 396
#endif

397
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
398
{
399 400 401 402 403 404 405
	uintptr_t va, end_va;
	uintptr_t load_pa = (uintptr_t)(&_start);
	uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
	uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);

	va_pa_offset = PAGE_OFFSET - load_pa;
	pfn_base = PFN_DOWN(load_pa);
406

407 408 409 410 411
	/*
	 * Enforce boot alignment requirements of RV32 and
	 * RV64 by only allowing PMD or PGD mappings.
	 */
	BUG_ON(map_size == PAGE_SIZE);
412 413 414

	/* Sanity check alignment and size */
	BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
415 416 417 418 419 420
	BUG_ON((load_pa % map_size) != 0);
	BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);

	/* Setup early PGD for fixmap */
	create_pgd_mapping(early_pg_dir, FIXADDR_START,
			   (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
421 422

#ifndef __PAGETABLE_PMD_FOLDED
423 424 425 426 427 428 429 430 431 432 433 434 435
	/* Setup fixmap PMD */
	create_pmd_mapping(fixmap_pmd, FIXADDR_START,
			   (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
	/* Setup trampoline PGD and PMD */
	create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
			   (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
	create_pmd_mapping(trampoline_pmd, PAGE_OFFSET,
			   load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
#else
	/* Setup trampoline PGD */
	create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
			   load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC);
#endif
436

437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
	/*
	 * Setup early PGD covering entire kernel which will allows
	 * us to reach paging_init(). We map all memory banks later
	 * in setup_vm_final() below.
	 */
	end_va = PAGE_OFFSET + load_sz;
	for (va = PAGE_OFFSET; va < end_va; va += map_size)
		create_pgd_mapping(early_pg_dir, va,
				   load_pa + (va - PAGE_OFFSET),
				   map_size, PAGE_KERNEL_EXEC);

	/* Create fixed mapping for early FDT parsing */
	end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
	for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE)
		create_pte_mapping(fixmap_pte, va,
				   dtb_pa + (va - __fix_to_virt(FIX_FDT)),
				   PAGE_SIZE, PAGE_KERNEL);

	/* Save pointer to DTB for early FDT parsing */
	dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK);
457 458
	/* Save physical address for memblock reservation */
	dtb_early_pa = dtb_pa;
459
}
460

461 462 463 464 465 466 467 468
static void __init setup_vm_final(void)
{
	uintptr_t va, map_size;
	phys_addr_t pa, start, end;
	struct memblock_region *reg;

	/* Set mmu_enabled flag */
	mmu_enabled = true;
469

470 471
	/* Setup swapper PGD for fixmap */
	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
472
			   __pa_symbol(fixmap_pgd_next),
473
			   PGDIR_SIZE, PAGE_TABLE);
474

475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
	/* Map all memory banks */
	for_each_memblock(memory, reg) {
		start = reg->base;
		end = start + reg->size;

		if (start >= end)
			break;
		if (memblock_is_nomap(reg))
			continue;
		if (start <= __pa(PAGE_OFFSET) &&
		    __pa(PAGE_OFFSET) < end)
			start = __pa(PAGE_OFFSET);

		map_size = best_map_size(start, end - start);
		for (pa = start; pa < end; pa += map_size) {
			va = (uintptr_t)__va(pa);
			create_pgd_mapping(swapper_pg_dir, va, pa,
					   map_size, PAGE_KERNEL_EXEC);
		}
494
	}
495

496 497 498 499 500
	/* Clear fixmap PTE and PMD mappings */
	clear_fixmap(FIX_PTE);
	clear_fixmap(FIX_PMD);

	/* Move to swapper page table */
501
	csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
502 503
	local_flush_tlb_all();
}
C
Christoph Hellwig 已提交
504 505 506
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
507 508 509 510 511 512 513
#ifdef CONFIG_BUILTIN_DTB
	dtb_early_va = soc_lookup_builtin_dtb();
	if (!dtb_early_va) {
		/* Fallback to first available DTS */
		dtb_early_va = (void *) __dtb_start;
	}
#else
C
Christoph Hellwig 已提交
514
	dtb_early_va = (void *)dtb_pa;
515
#endif
C
Christoph Hellwig 已提交
516 517 518 519 520 521
}

static inline void setup_vm_final(void)
{
}
#endif /* CONFIG_MMU */
522

Z
Zong Li 已提交
523 524 525 526 527 528 529 530 531 532 533 534 535
#ifdef CONFIG_STRICT_KERNEL_RWX
void mark_rodata_ro(void)
{
	unsigned long text_start = (unsigned long)_text;
	unsigned long text_end = (unsigned long)_etext;
	unsigned long rodata_start = (unsigned long)__start_rodata;
	unsigned long data_start = (unsigned long)_data;
	unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));

	set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
	set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
	set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
	set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
Z
Zong Li 已提交
536 537

	debug_checkwx();
Z
Zong Li 已提交
538 539 540
}
#endif

541 542 543
void __init paging_init(void)
{
	setup_vm_final();
L
Logan Gunthorpe 已提交
544 545
	memblocks_present();
	sparse_init();
546 547
	setup_zero_page();
	zone_sizes_init();
548
}
L
Logan Gunthorpe 已提交
549

550
#ifdef CONFIG_SPARSEMEM_VMEMMAP
L
Logan Gunthorpe 已提交
551 552 553 554 555 556
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
			       struct vmem_altmap *altmap)
{
	return vmemmap_populate_basepages(start, end, node);
}
#endif