sparse-vmemmap.c 6.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4
/*
 * Virtual Memory Map support
 *
C
Christoph Lameter 已提交
5
 * (C) 2007 sgi. Christoph Lameter.
6 7 8 9 10 11 12
 *
 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
 * virt_to_page, page_address() to be implemented as a base offset
 * calculation without memory access.
 *
 * However, virtual mappings need a page table and TLBs. Many Linux
 * architectures already map their physical space using 1-1 mappings
13
 * via TLBs. For those arches the virtual memory map is essentially
14 15 16 17
 * for free if we use the same page size as the 1-1 mappings. In that
 * case the overhead consists of a few additional pages that are
 * allocated to create a view of memory for vmemmap.
 *
18 19
 * The architecture is expected to provide a vmemmap_populate() function
 * to instantiate the mapping.
20 21 22 23
 */
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/bootmem.h>
24
#include <linux/memremap.h>
25
#include <linux/highmem.h>
26
#include <linux/slab.h>
27 28
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
29
#include <linux/sched.h>
30 31 32 33 34 35 36 37 38
#include <asm/dma.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>

/*
 * Allocate a block of memory to be used to back the virtual memory map
 * or to back the page tables that are used to create the mapping.
 * Uses the main allocators if they are available, else bootmem.
 */
39

40
static void * __ref __earlyonly_bootmem_alloc(int node,
41 42 43 44
				unsigned long size,
				unsigned long align,
				unsigned long goal)
{
45
	return memblock_alloc_try_nid_raw(size, align, goal,
46
					       BOOTMEM_ALLOC_ACCESSIBLE, node);
47 48
}

49 50 51 52
void * __meminit vmemmap_alloc_block(unsigned long size, int node)
{
	/* If the main allocator is up use that, fallback to bootmem. */
	if (slab_is_available()) {
53 54 55
		gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
		int order = get_order(size);
		static bool warned;
56 57
		struct page *page;

58
		page = alloc_pages_node(node, gfp_mask, order);
59 60
		if (page)
			return page_address(page);
61 62 63 64 65 66

		if (!warned) {
			warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
				   "vmemmap alloc failure: order:%u", order);
			warned = true;
		}
67 68
		return NULL;
	} else
69
		return __earlyonly_bootmem_alloc(node, size, size,
70 71 72
				__pa(MAX_DMA_ADDRESS));
}

73
/* need to make sure size is all the same during early stage */
74
void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
75
{
76
	void *ptr = sparse_buffer_alloc(size);
77

78 79
	if (!ptr)
		ptr = vmemmap_alloc_block(size, node);
80 81 82
	return ptr;
}

83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
{
	return altmap->base_pfn + altmap->reserve + altmap->alloc
		+ altmap->align;
}

static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
{
	unsigned long allocated = altmap->alloc + altmap->align;

	if (altmap->free > allocated)
		return altmap->free - allocated;
	return 0;
}

/**
99 100 101
 * altmap_alloc_block_buf - allocate pages from the device page map
 * @altmap:	device page map
 * @size:	size (in bytes) of the allocation
102
 *
103
 * Allocations are aligned to the size of the request.
104
 */
105
void * __meminit altmap_alloc_block_buf(unsigned long size,
106 107
		struct vmem_altmap *altmap)
{
108
	unsigned long pfn, nr_pfns, nr_align;
109 110 111 112 113 114 115

	if (size & ~PAGE_MASK) {
		pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
				__func__, size);
		return NULL;
	}

116
	pfn = vmem_altmap_next_pfn(altmap);
117
	nr_pfns = size >> PAGE_SHIFT;
118 119 120 121 122 123 124 125 126
	nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
	nr_align = ALIGN(pfn, nr_align) - pfn;
	if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
		return NULL;

	altmap->alloc += nr_pfns;
	altmap->align += nr_align;
	pfn += nr_align;

127 128
	pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
			__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
129
	return __va(__pfn_to_phys(pfn));
130 131
}

132 133 134 135 136 137
void __meminit vmemmap_verify(pte_t *pte, int node,
				unsigned long start, unsigned long end)
{
	unsigned long pfn = pte_pfn(*pte);
	int actual_node = early_pfn_to_nid(pfn);

138
	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
139 140
		pr_warn("[%lx-%lx] potential offnode page_structs\n",
			start, end - 1);
141 142
}

143
pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
144
{
145 146 147
	pte_t *pte = pte_offset_kernel(pmd, addr);
	if (pte_none(*pte)) {
		pte_t entry;
148
		void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
149
		if (!p)
A
Al Viro 已提交
150
			return NULL;
151 152 153 154
		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
		set_pte_at(&init_mm, addr, pte, entry);
	}
	return pte;
155 156
}

157 158 159 160 161 162 163 164 165 166 167
static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
{
	void *p = vmemmap_alloc_block(size, node);

	if (!p)
		return NULL;
	memset(p, 0, size);

	return p;
}

168
pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
169
{
170 171
	pmd_t *pmd = pmd_offset(pud, addr);
	if (pmd_none(*pmd)) {
172
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
173
		if (!p)
A
Al Viro 已提交
174
			return NULL;
175
		pmd_populate_kernel(&init_mm, pmd, p);
176
	}
177
	return pmd;
178 179
}

180
pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
181
{
182
	pud_t *pud = pud_offset(p4d, addr);
183
	if (pud_none(*pud)) {
184
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
185
		if (!p)
A
Al Viro 已提交
186
			return NULL;
187 188 189 190
		pud_populate(&init_mm, pud, p);
	}
	return pud;
}
191

192 193 194 195
p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
{
	p4d_t *p4d = p4d_offset(pgd, addr);
	if (p4d_none(*p4d)) {
196
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
197 198 199 200 201 202 203
		if (!p)
			return NULL;
		p4d_populate(&init_mm, p4d, p);
	}
	return p4d;
}

204 205 206 207
pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
{
	pgd_t *pgd = pgd_offset_k(addr);
	if (pgd_none(*pgd)) {
208
		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
209
		if (!p)
A
Al Viro 已提交
210
			return NULL;
211
		pgd_populate(&init_mm, pgd, p);
212
	}
213
	return pgd;
214 215
}

216 217
int __meminit vmemmap_populate_basepages(unsigned long start,
					 unsigned long end, int node)
218
{
219
	unsigned long addr = start;
220
	pgd_t *pgd;
221
	p4d_t *p4d;
222 223 224
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
225

226 227 228 229
	for (; addr < end; addr += PAGE_SIZE) {
		pgd = vmemmap_pgd_populate(addr, node);
		if (!pgd)
			return -ENOMEM;
230 231 232 233
		p4d = vmemmap_p4d_populate(pgd, addr, node);
		if (!p4d)
			return -ENOMEM;
		pud = vmemmap_pud_populate(p4d, addr, node);
234 235 236 237 238 239 240 241 242
		if (!pud)
			return -ENOMEM;
		pmd = vmemmap_pmd_populate(pud, addr, node);
		if (!pmd)
			return -ENOMEM;
		pte = vmemmap_pte_populate(pmd, addr, node);
		if (!pte)
			return -ENOMEM;
		vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
243
	}
244 245

	return 0;
246 247
}

248 249
struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid,
		struct vmem_altmap *altmap)
250
{
251 252 253 254 255 256 257 258
	unsigned long start;
	unsigned long end;
	struct page *map;

	map = pfn_to_page(pnum * PAGES_PER_SECTION);
	start = (unsigned long)map;
	end = (unsigned long)(map + PAGES_PER_SECTION);

259
	if (vmemmap_populate(start, end, nid, altmap))
260 261 262 263
		return NULL;

	return map;
}