sparse-vmemmap.c 7.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4
/*
 * Virtual Memory Map support
 *
C
Christoph Lameter 已提交
5
 * (C) 2007 sgi. Christoph Lameter.
6 7 8 9 10 11 12
 *
 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
 * virt_to_page, page_address() to be implemented as a base offset
 * calculation without memory access.
 *
 * However, virtual mappings need a page table and TLBs. Many Linux
 * architectures already map their physical space using 1-1 mappings
13
 * via TLBs. For those arches the virtual memory map is essentially
14 15 16 17
 * for free if we use the same page size as the 1-1 mappings. In that
 * case the overhead consists of a few additional pages that are
 * allocated to create a view of memory for vmemmap.
 *
18 19
 * The architecture is expected to provide a vmemmap_populate() function
 * to instantiate the mapping.
20 21 22
 */
#include <linux/mm.h>
#include <linux/mmzone.h>
23
#include <linux/memblock.h>
24
#include <linux/memremap.h>
25
#include <linux/highmem.h>
26
#include <linux/slab.h>
27 28
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
29
#include <linux/sched.h>
30 31 32 33 34 35 36 37
#include <asm/dma.h>
#include <asm/pgalloc.h>

/*
 * Allocate a block of memory to be used to back the virtual memory map
 * or to back the page tables that are used to create the mapping.
 * Uses the main allocators if they are available, else bootmem.
 */
38

39
static void * __ref __earlyonly_bootmem_alloc(int node,
40 41 42 43
				unsigned long size,
				unsigned long align,
				unsigned long goal)
{
44
	return memblock_alloc_try_nid_raw(size, align, goal,
45
					       MEMBLOCK_ALLOC_ACCESSIBLE, node);
46 47
}

48 49 50 51
void * __meminit vmemmap_alloc_block(unsigned long size, int node)
{
	/* If the main allocator is up use that, fallback to bootmem. */
	if (slab_is_available()) {
52 53 54
		gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
		int order = get_order(size);
		static bool warned;
55 56
		struct page *page;

57
		page = alloc_pages_node(node, gfp_mask, order);
58 59
		if (page)
			return page_address(page);
60 61 62 63 64 65

		if (!warned) {
			warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
				   "vmemmap alloc failure: order:%u", order);
			warned = true;
		}
66 67
		return NULL;
	} else
68
		return __earlyonly_bootmem_alloc(node, size, size,
69 70 71
				__pa(MAX_DMA_ADDRESS));
}

72 73 74
static void * __meminit altmap_alloc_block_buf(unsigned long size,
					       struct vmem_altmap *altmap);

75
/* need to make sure size is all the same during early stage */
76 77
void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
					 struct vmem_altmap *altmap)
78
{
79 80 81 82
	void *ptr;

	if (altmap)
		return altmap_alloc_block_buf(size, altmap);
83

84
	ptr = sparse_buffer_alloc(size);
85 86
	if (!ptr)
		ptr = vmemmap_alloc_block(size, node);
87 88 89
	return ptr;
}

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
{
	return altmap->base_pfn + altmap->reserve + altmap->alloc
		+ altmap->align;
}

static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
{
	unsigned long allocated = altmap->alloc + altmap->align;

	if (altmap->free > allocated)
		return altmap->free - allocated;
	return 0;
}

105 106
static void * __meminit altmap_alloc_block_buf(unsigned long size,
					       struct vmem_altmap *altmap)
107
{
108
	unsigned long pfn, nr_pfns, nr_align;
109 110 111 112 113 114 115

	if (size & ~PAGE_MASK) {
		pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
				__func__, size);
		return NULL;
	}

116
	pfn = vmem_altmap_next_pfn(altmap);
117
	nr_pfns = size >> PAGE_SHIFT;
118 119 120 121 122 123 124 125 126
	nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
	nr_align = ALIGN(pfn, nr_align) - pfn;
	if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
		return NULL;

	altmap->alloc += nr_pfns;
	altmap->align += nr_align;
	pfn += nr_align;

127 128
	pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
			__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
129
	return __va(__pfn_to_phys(pfn));
130 131
}

132 133 134 135 136 137
void __meminit vmemmap_verify(pte_t *pte, int node,
				unsigned long start, unsigned long end)
{
	unsigned long pfn = pte_pfn(*pte);
	int actual_node = early_pfn_to_nid(pfn);

138
	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
139 140
		pr_warn("[%lx-%lx] potential offnode page_structs\n",
			start, end - 1);
141 142
}

143 144
pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
				       struct vmem_altmap *altmap)
145
{
146 147 148
	pte_t *pte = pte_offset_kernel(pmd, addr);
	if (pte_none(*pte)) {
		pte_t entry;
149 150
		void *p;

151
		p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
152
		if (!p)
A
Al Viro 已提交
153
			return NULL;
154 155 156 157
		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
		set_pte_at(&init_mm, addr, pte, entry);
	}
	return pte;
158 159
}

160 161 162 163 164 165 166 167 168 169 170
static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
{
	void *p = vmemmap_alloc_block(size, node);

	if (!p)
		return NULL;
	memset(p, 0, size);

	return p;
}

171
pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
172
{
173 174
	pmd_t *pmd = pmd_offset(pud, addr);
	if (pmd_none(*pmd)) {
175
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
176
		if (!p)
A
Al Viro 已提交
177
			return NULL;
178
		pmd_populate_kernel(&init_mm, pmd, p);
179
	}
180
	return pmd;
181 182
}

183
pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
184
{
185
	pud_t *pud = pud_offset(p4d, addr);
186
	if (pud_none(*pud)) {
187
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
188
		if (!p)
A
Al Viro 已提交
189
			return NULL;
190 191 192 193
		pud_populate(&init_mm, pud, p);
	}
	return pud;
}
194

195 196 197 198
p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
{
	p4d_t *p4d = p4d_offset(pgd, addr);
	if (p4d_none(*p4d)) {
199
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
200 201 202 203 204 205 206
		if (!p)
			return NULL;
		p4d_populate(&init_mm, p4d, p);
	}
	return p4d;
}

207 208 209 210
pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
{
	pgd_t *pgd = pgd_offset_k(addr);
	if (pgd_none(*pgd)) {
211
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
212
		if (!p)
A
Al Viro 已提交
213
			return NULL;
214
		pgd_populate(&init_mm, pgd, p);
215
	}
216
	return pgd;
217 218
}

219 220
int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
					 int node, struct vmem_altmap *altmap)
221
{
222
	unsigned long addr = start;
223
	pgd_t *pgd;
224
	p4d_t *p4d;
225 226 227
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
228

229 230 231 232
	for (; addr < end; addr += PAGE_SIZE) {
		pgd = vmemmap_pgd_populate(addr, node);
		if (!pgd)
			return -ENOMEM;
233 234 235 236
		p4d = vmemmap_p4d_populate(pgd, addr, node);
		if (!p4d)
			return -ENOMEM;
		pud = vmemmap_pud_populate(p4d, addr, node);
237 238 239 240 241
		if (!pud)
			return -ENOMEM;
		pmd = vmemmap_pmd_populate(pud, addr, node);
		if (!pmd)
			return -ENOMEM;
242
		pte = vmemmap_pte_populate(pmd, addr, node, altmap);
243 244 245
		if (!pte)
			return -ENOMEM;
		vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
246
	}
247 248

	return 0;
249 250
}

251 252
struct page * __meminit __populate_section_memmap(unsigned long pfn,
		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
253
{
254 255 256
	unsigned long start;
	unsigned long end;

257 258 259 260 261 262 263 264 265 266 267
	/*
	 * The minimum granularity of memmap extensions is
	 * PAGES_PER_SUBSECTION as allocations are tracked in the
	 * 'subsection_map' bitmap of the section.
	 */
	end = ALIGN(pfn + nr_pages, PAGES_PER_SUBSECTION);
	pfn &= PAGE_SUBSECTION_MASK;
	nr_pages = end - pfn;

	start = (unsigned long) pfn_to_page(pfn);
	end = start + nr_pages * sizeof(struct page);
268

269
	if (vmemmap_populate(start, end, nid, altmap))
270 271
		return NULL;

272
	return pfn_to_page(pfn);
273
}