sparse-vmemmap.c 6.8 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4
/*
 * Virtual Memory Map support
 *
C
Christoph Lameter 已提交
5
 * (C) 2007 sgi. Christoph Lameter.
6 7 8 9 10 11 12
 *
 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
 * virt_to_page, page_address() to be implemented as a base offset
 * calculation without memory access.
 *
 * However, virtual mappings need a page table and TLBs. Many Linux
 * architectures already map their physical space using 1-1 mappings
13
 * via TLBs. For those arches the virtual memory map is essentially
14 15 16 17
 * for free if we use the same page size as the 1-1 mappings. In that
 * case the overhead consists of a few additional pages that are
 * allocated to create a view of memory for vmemmap.
 *
18 19
 * The architecture is expected to provide a vmemmap_populate() function
 * to instantiate the mapping.
20 21 22
 */
#include <linux/mm.h>
#include <linux/mmzone.h>
23
#include <linux/memblock.h>
24
#include <linux/memremap.h>
25
#include <linux/highmem.h>
26
#include <linux/slab.h>
27 28
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
29
#include <linux/sched.h>
30 31 32 33 34 35 36 37
#include <asm/dma.h>
#include <asm/pgalloc.h>

/*
 * Allocate a block of memory to be used to back the virtual memory map
 * or to back the page tables that are used to create the mapping.
 * Uses the main allocators if they are available, else bootmem.
 */
38

39
static void * __ref __earlyonly_bootmem_alloc(int node,
40 41 42 43
				unsigned long size,
				unsigned long align,
				unsigned long goal)
{
44
	return memblock_alloc_try_nid_raw(size, align, goal,
45
					       MEMBLOCK_ALLOC_ACCESSIBLE, node);
46 47
}

48 49 50 51
void * __meminit vmemmap_alloc_block(unsigned long size, int node)
{
	/* If the main allocator is up use that, fallback to bootmem. */
	if (slab_is_available()) {
52 53 54
		gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
		int order = get_order(size);
		static bool warned;
55 56
		struct page *page;

57
		page = alloc_pages_node(node, gfp_mask, order);
58 59
		if (page)
			return page_address(page);
60 61 62 63 64 65

		if (!warned) {
			warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
				   "vmemmap alloc failure: order:%u", order);
			warned = true;
		}
66 67
		return NULL;
	} else
68
		return __earlyonly_bootmem_alloc(node, size, size,
69 70 71
				__pa(MAX_DMA_ADDRESS));
}

72
/* need to make sure size is all the same during early stage */
73
void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
74
{
75
	void *ptr = sparse_buffer_alloc(size);
76

77 78
	if (!ptr)
		ptr = vmemmap_alloc_block(size, node);
79 80 81
	return ptr;
}

82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
{
	return altmap->base_pfn + altmap->reserve + altmap->alloc
		+ altmap->align;
}

static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
{
	unsigned long allocated = altmap->alloc + altmap->align;

	if (altmap->free > allocated)
		return altmap->free - allocated;
	return 0;
}

/**
98 99 100
 * altmap_alloc_block_buf - allocate pages from the device page map
 * @altmap:	device page map
 * @size:	size (in bytes) of the allocation
101
 *
102
 * Allocations are aligned to the size of the request.
103
 */
104
void * __meminit altmap_alloc_block_buf(unsigned long size,
105 106
		struct vmem_altmap *altmap)
{
107
	unsigned long pfn, nr_pfns, nr_align;
108 109 110 111 112 113 114

	if (size & ~PAGE_MASK) {
		pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
				__func__, size);
		return NULL;
	}

115
	pfn = vmem_altmap_next_pfn(altmap);
116
	nr_pfns = size >> PAGE_SHIFT;
117 118 119 120 121 122 123 124 125
	nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
	nr_align = ALIGN(pfn, nr_align) - pfn;
	if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
		return NULL;

	altmap->alloc += nr_pfns;
	altmap->align += nr_align;
	pfn += nr_align;

126 127
	pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
			__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
128
	return __va(__pfn_to_phys(pfn));
129 130
}

131 132 133 134 135 136
void __meminit vmemmap_verify(pte_t *pte, int node,
				unsigned long start, unsigned long end)
{
	unsigned long pfn = pte_pfn(*pte);
	int actual_node = early_pfn_to_nid(pfn);

137
	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
138 139
		pr_warn("[%lx-%lx] potential offnode page_structs\n",
			start, end - 1);
140 141
}

142
pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
143
{
144 145 146
	pte_t *pte = pte_offset_kernel(pmd, addr);
	if (pte_none(*pte)) {
		pte_t entry;
147
		void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
148
		if (!p)
A
Al Viro 已提交
149
			return NULL;
150 151 152 153
		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
		set_pte_at(&init_mm, addr, pte, entry);
	}
	return pte;
154 155
}

156 157 158 159 160 161 162 163 164 165 166
static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
{
	void *p = vmemmap_alloc_block(size, node);

	if (!p)
		return NULL;
	memset(p, 0, size);

	return p;
}

167
pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
168
{
169 170
	pmd_t *pmd = pmd_offset(pud, addr);
	if (pmd_none(*pmd)) {
171
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
172
		if (!p)
A
Al Viro 已提交
173
			return NULL;
174
		pmd_populate_kernel(&init_mm, pmd, p);
175
	}
176
	return pmd;
177 178
}

179
pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
180
{
181
	pud_t *pud = pud_offset(p4d, addr);
182
	if (pud_none(*pud)) {
183
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
184
		if (!p)
A
Al Viro 已提交
185
			return NULL;
186 187 188 189
		pud_populate(&init_mm, pud, p);
	}
	return pud;
}
190

191 192 193 194
p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
{
	p4d_t *p4d = p4d_offset(pgd, addr);
	if (p4d_none(*p4d)) {
195
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
196 197 198 199 200 201 202
		if (!p)
			return NULL;
		p4d_populate(&init_mm, p4d, p);
	}
	return p4d;
}

203 204 205 206
pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
{
	pgd_t *pgd = pgd_offset_k(addr);
	if (pgd_none(*pgd)) {
207
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
208
		if (!p)
A
Al Viro 已提交
209
			return NULL;
210
		pgd_populate(&init_mm, pgd, p);
211
	}
212
	return pgd;
213 214
}

215 216
int __meminit vmemmap_populate_basepages(unsigned long start,
					 unsigned long end, int node)
217
{
218
	unsigned long addr = start;
219
	pgd_t *pgd;
220
	p4d_t *p4d;
221 222 223
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
224

225 226 227 228
	for (; addr < end; addr += PAGE_SIZE) {
		pgd = vmemmap_pgd_populate(addr, node);
		if (!pgd)
			return -ENOMEM;
229 230 231 232
		p4d = vmemmap_p4d_populate(pgd, addr, node);
		if (!p4d)
			return -ENOMEM;
		pud = vmemmap_pud_populate(p4d, addr, node);
233 234 235 236 237 238 239 240 241
		if (!pud)
			return -ENOMEM;
		pmd = vmemmap_pmd_populate(pud, addr, node);
		if (!pmd)
			return -ENOMEM;
		pte = vmemmap_pte_populate(pmd, addr, node);
		if (!pte)
			return -ENOMEM;
		vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
242
	}
243 244

	return 0;
245 246
}

247 248
struct page * __meminit __populate_section_memmap(unsigned long pfn,
		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
249
{
250 251 252
	unsigned long start;
	unsigned long end;

253 254 255 256 257 258 259 260 261 262 263
	/*
	 * The minimum granularity of memmap extensions is
	 * PAGES_PER_SUBSECTION as allocations are tracked in the
	 * 'subsection_map' bitmap of the section.
	 */
	end = ALIGN(pfn + nr_pages, PAGES_PER_SUBSECTION);
	pfn &= PAGE_SUBSECTION_MASK;
	nr_pages = end - pfn;

	start = (unsigned long) pfn_to_page(pfn);
	end = start + nr_pages * sizeof(struct page);
264

265
	if (vmemmap_populate(start, end, nid, altmap))
266 267
		return NULL;

268
	return pfn_to_page(pfn);
269
}