vmem.c 9.5 KB
Newer Older
H
Heiko Carstens 已提交
1 2 3 4 5 6 7 8 9 10
/*
 *    Copyright IBM Corp. 2006
 *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
 */

#include <linux/bootmem.h>
#include <linux/pfn.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/list.h>
11
#include <linux/hugetlb.h>
12
#include <linux/slab.h>
13
#include <linux/memblock.h>
H
Heiko Carstens 已提交
14 15 16 17
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
#include <asm/tlbflush.h>
18
#include <asm/sections.h>
H
Heiko Carstens 已提交
19 20 21 22 23 24 25 26 27 28 29

static DEFINE_MUTEX(vmem_mutex);

struct memory_segment {
	struct list_head list;
	unsigned long start;
	unsigned long size;
};

static LIST_HEAD(mem_segs);

30 31 32 33 34 35 36 37
static void __ref *vmem_alloc_pages(unsigned int order)
{
	if (slab_is_available())
		return (void *)__get_free_pages(GFP_KERNEL, order);
	return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
}

static inline pud_t *vmem_pud_alloc(void)
38 39 40 41
{
	pud_t *pud = NULL;

#ifdef CONFIG_64BIT
42
	pud = vmem_alloc_pages(2);
43 44
	if (!pud)
		return NULL;
45
	clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
46 47 48
#endif
	return pud;
}
M
Martin Schwidefsky 已提交
49

50
static inline pmd_t *vmem_pmd_alloc(void)
H
Heiko Carstens 已提交
51
{
52
	pmd_t *pmd = NULL;
H
Heiko Carstens 已提交
53

54
#ifdef CONFIG_64BIT
55
	pmd = vmem_alloc_pages(2);
H
Heiko Carstens 已提交
56 57
	if (!pmd)
		return NULL;
58
	clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
59
#endif
H
Heiko Carstens 已提交
60 61 62
	return pmd;
}

63
static pte_t __ref *vmem_pte_alloc(unsigned long address)
H
Heiko Carstens 已提交
64
{
65
	pte_t *pte;
H
Heiko Carstens 已提交
66

67
	if (slab_is_available())
68
		pte = (pte_t *) page_table_alloc(&init_mm);
69
	else
70 71
		pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
					  PTRS_PER_PTE * sizeof(pte_t));
H
Heiko Carstens 已提交
72 73
	if (!pte)
		return NULL;
74
	clear_table((unsigned long *) pte, _PAGE_INVALID,
75
		    PTRS_PER_PTE * sizeof(pte_t));
H
Heiko Carstens 已提交
76 77 78 79 80 81
	return pte;
}

/*
 * Add a physical memory range to the 1:1 mapping.
 */
82
static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
H
Heiko Carstens 已提交
83
{
84 85
	unsigned long end = start + size;
	unsigned long address = start;
H
Heiko Carstens 已提交
86
	pgd_t *pg_dir;
M
Martin Schwidefsky 已提交
87
	pud_t *pu_dir;
H
Heiko Carstens 已提交
88 89 90 91
	pmd_t *pm_dir;
	pte_t *pt_dir;
	int ret = -ENOMEM;

92
	while (address < end) {
H
Heiko Carstens 已提交
93 94
		pg_dir = pgd_offset_k(address);
		if (pgd_none(*pg_dir)) {
M
Martin Schwidefsky 已提交
95 96 97
			pu_dir = vmem_pud_alloc();
			if (!pu_dir)
				goto out;
98
			pgd_populate(&init_mm, pg_dir, pu_dir);
M
Martin Schwidefsky 已提交
99 100
		}
		pu_dir = pud_offset(pg_dir, address);
101 102 103
#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
104 105
			pud_val(*pu_dir) = __pa(address) |
				_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
106
				(ro ? _REGION_ENTRY_PROTECT : 0);
107 108 109 110
			address += PUD_SIZE;
			continue;
		}
#endif
M
Martin Schwidefsky 已提交
111
		if (pud_none(*pu_dir)) {
H
Heiko Carstens 已提交
112 113 114
			pm_dir = vmem_pmd_alloc();
			if (!pm_dir)
				goto out;
115
			pud_populate(&init_mm, pu_dir, pm_dir);
H
Heiko Carstens 已提交
116
		}
M
Martin Schwidefsky 已提交
117
		pm_dir = pmd_offset(pu_dir, address);
118
#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
119 120
		if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
121 122
			pmd_val(*pm_dir) = __pa(address) |
				_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
123
				_SEGMENT_ENTRY_YOUNG |
124
				(ro ? _SEGMENT_ENTRY_PROTECT : 0);
125
			address += PMD_SIZE;
126 127 128
			continue;
		}
#endif
H
Heiko Carstens 已提交
129
		if (pmd_none(*pm_dir)) {
130
			pt_dir = vmem_pte_alloc(address);
H
Heiko Carstens 已提交
131 132
			if (!pt_dir)
				goto out;
133
			pmd_populate(&init_mm, pm_dir, pt_dir);
H
Heiko Carstens 已提交
134 135 136
		}

		pt_dir = pte_offset_kernel(pm_dir, address);
137 138
		pte_val(*pt_dir) = __pa(address) |
			pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
139
		address += PAGE_SIZE;
H
Heiko Carstens 已提交
140 141 142 143 144 145 146 147 148 149 150 151
	}
	ret = 0;
out:
	return ret;
}

/*
 * Remove a physical memory range from the 1:1 mapping.
 * Currently only invalidates page table entries.
 */
static void vmem_remove_range(unsigned long start, unsigned long size)
{
152 153
	unsigned long end = start + size;
	unsigned long address = start;
H
Heiko Carstens 已提交
154
	pgd_t *pg_dir;
M
Martin Schwidefsky 已提交
155
	pud_t *pu_dir;
H
Heiko Carstens 已提交
156 157 158 159
	pmd_t *pm_dir;
	pte_t *pt_dir;
	pte_t  pte;

160
	pte_val(pte) = _PAGE_INVALID;
161
	while (address < end) {
H
Heiko Carstens 已提交
162
		pg_dir = pgd_offset_k(address);
163 164 165 166
		if (pgd_none(*pg_dir)) {
			address += PGDIR_SIZE;
			continue;
		}
M
Martin Schwidefsky 已提交
167
		pu_dir = pud_offset(pg_dir, address);
168 169
		if (pud_none(*pu_dir)) {
			address += PUD_SIZE;
H
Heiko Carstens 已提交
170
			continue;
171
		}
172 173 174 175 176
		if (pud_large(*pu_dir)) {
			pud_clear(pu_dir);
			address += PUD_SIZE;
			continue;
		}
M
Martin Schwidefsky 已提交
177
		pm_dir = pmd_offset(pu_dir, address);
178 179
		if (pmd_none(*pm_dir)) {
			address += PMD_SIZE;
H
Heiko Carstens 已提交
180
			continue;
181
		}
182
		if (pmd_large(*pm_dir)) {
183
			pmd_clear(pm_dir);
184
			address += PMD_SIZE;
185 186
			continue;
		}
H
Heiko Carstens 已提交
187
		pt_dir = pte_offset_kernel(pm_dir, address);
G
Gerald Schaefer 已提交
188
		*pt_dir = pte;
189
		address += PAGE_SIZE;
H
Heiko Carstens 已提交
190
	}
191
	flush_tlb_kernel_range(start, end);
H
Heiko Carstens 已提交
192 193 194 195 196
}

/*
 * Add a backed mem_map array to the virtual mem_map array.
 */
197
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
H
Heiko Carstens 已提交
198
{
199
	unsigned long address = start;
H
Heiko Carstens 已提交
200
	pgd_t *pg_dir;
M
Martin Schwidefsky 已提交
201
	pud_t *pu_dir;
H
Heiko Carstens 已提交
202 203 204 205
	pmd_t *pm_dir;
	pte_t *pt_dir;
	int ret = -ENOMEM;

206
	for (address = start; address < end;) {
H
Heiko Carstens 已提交
207 208
		pg_dir = pgd_offset_k(address);
		if (pgd_none(*pg_dir)) {
M
Martin Schwidefsky 已提交
209 210 211
			pu_dir = vmem_pud_alloc();
			if (!pu_dir)
				goto out;
212
			pgd_populate(&init_mm, pg_dir, pu_dir);
M
Martin Schwidefsky 已提交
213 214 215 216
		}

		pu_dir = pud_offset(pg_dir, address);
		if (pud_none(*pu_dir)) {
H
Heiko Carstens 已提交
217 218 219
			pm_dir = vmem_pmd_alloc();
			if (!pm_dir)
				goto out;
220
			pud_populate(&init_mm, pu_dir, pm_dir);
H
Heiko Carstens 已提交
221 222
		}

M
Martin Schwidefsky 已提交
223
		pm_dir = pmd_offset(pu_dir, address);
H
Heiko Carstens 已提交
224
		if (pmd_none(*pm_dir)) {
225 226 227 228 229 230 231 232 233 234 235 236 237
#ifdef CONFIG_64BIT
			/* Use 1MB frames for vmemmap if available. We always
			 * use large frames even if they are only partially
			 * used.
			 * Otherwise we would have also page tables since
			 * vmemmap_populate gets called for each section
			 * separately. */
			if (MACHINE_HAS_EDAT1) {
				void *new_page;

				new_page = vmemmap_alloc_block(PMD_SIZE, node);
				if (!new_page)
					goto out;
238
				pmd_val(*pm_dir) = __pa(new_page) |
239 240
					_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
					_SEGMENT_ENTRY_CO;
241 242 243 244
				address = (address + PMD_SIZE) & PMD_MASK;
				continue;
			}
#endif
245
			pt_dir = vmem_pte_alloc(address);
H
Heiko Carstens 已提交
246 247
			if (!pt_dir)
				goto out;
248
			pmd_populate(&init_mm, pm_dir, pt_dir);
249 250 251
		} else if (pmd_large(*pm_dir)) {
			address = (address + PMD_SIZE) & PMD_MASK;
			continue;
H
Heiko Carstens 已提交
252 253 254 255 256 257
		}

		pt_dir = pte_offset_kernel(pm_dir, address);
		if (pte_none(*pt_dir)) {
			unsigned long new_page;

258
			new_page =__pa(vmem_alloc_pages(0));
H
Heiko Carstens 已提交
259 260
			if (!new_page)
				goto out;
261 262
			pte_val(*pt_dir) =
				__pa(new_page) | pgprot_val(PAGE_KERNEL);
H
Heiko Carstens 已提交
263
		}
264
		address += PAGE_SIZE;
H
Heiko Carstens 已提交
265
	}
266
	memset((void *)start, 0, end - start);
H
Heiko Carstens 已提交
267 268 269 270 271
	ret = 0;
out:
	return ret;
}

272
void vmemmap_free(unsigned long start, unsigned long end)
273 274 275
{
}

H
Heiko Carstens 已提交
276 277 278 279 280 281 282 283
/*
 * Add memory segment to the segment list if it doesn't overlap with
 * an already present segment.
 */
static int insert_memory_segment(struct memory_segment *seg)
{
	struct memory_segment *tmp;

284
	if (seg->start + seg->size > VMEM_MAX_PHYS ||
H
Heiko Carstens 已提交
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
	    seg->start + seg->size < seg->start)
		return -ERANGE;

	list_for_each_entry(tmp, &mem_segs, list) {
		if (seg->start >= tmp->start + tmp->size)
			continue;
		if (seg->start + seg->size <= tmp->start)
			continue;
		return -ENOSPC;
	}
	list_add(&seg->list, &mem_segs);
	return 0;
}

/*
 * Remove memory segment from the segment list.
 */
static void remove_memory_segment(struct memory_segment *seg)
{
	list_del(&seg->list);
}

static void __remove_shared_memory(struct memory_segment *seg)
{
	remove_memory_segment(seg);
	vmem_remove_range(seg->start, seg->size);
}

313
int vmem_remove_mapping(unsigned long start, unsigned long size)
H
Heiko Carstens 已提交
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
{
	struct memory_segment *seg;
	int ret;

	mutex_lock(&vmem_mutex);

	ret = -ENOENT;
	list_for_each_entry(seg, &mem_segs, list) {
		if (seg->start == start && seg->size == size)
			break;
	}

	if (seg->start != start || seg->size != size)
		goto out;

	ret = 0;
	__remove_shared_memory(seg);
	kfree(seg);
out:
	mutex_unlock(&vmem_mutex);
	return ret;
}

337
int vmem_add_mapping(unsigned long start, unsigned long size)
H
Heiko Carstens 已提交
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
{
	struct memory_segment *seg;
	int ret;

	mutex_lock(&vmem_mutex);
	ret = -ENOMEM;
	seg = kzalloc(sizeof(*seg), GFP_KERNEL);
	if (!seg)
		goto out;
	seg->start = start;
	seg->size = size;

	ret = insert_memory_segment(seg);
	if (ret)
		goto out_free;

354
	ret = vmem_add_mem(start, size, 0);
H
Heiko Carstens 已提交
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
	if (ret)
		goto out_remove;
	goto out;

out_remove:
	__remove_shared_memory(seg);
out_free:
	kfree(seg);
out:
	mutex_unlock(&vmem_mutex);
	return ret;
}

/*
 * map whole physical memory to virtual memory (identity mapping)
370 371
 * we reserve enough space in the vmalloc area for vmemmap to hotplug
 * additional memory segments.
H
Heiko Carstens 已提交
372 373 374
 */
void __init vmem_map_init(void)
{
375
	unsigned long ro_start, ro_end;
376 377
	struct memblock_region *reg;
	phys_addr_t start, end;
H
Heiko Carstens 已提交
378

379 380
	ro_start = PFN_ALIGN((unsigned long)&_stext);
	ro_end = (unsigned long)&_eshared & PAGE_MASK;
381 382 383
	for_each_memblock(memory, reg) {
		start = reg->base;
		end = reg->base + reg->size - 1;
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
		if (start >= ro_end || end <= ro_start)
			vmem_add_mem(start, end - start, 0);
		else if (start >= ro_start && end <= ro_end)
			vmem_add_mem(start, end - start, 1);
		else if (start >= ro_start) {
			vmem_add_mem(start, ro_end - start, 1);
			vmem_add_mem(ro_end, end - ro_end, 0);
		} else if (end < ro_end) {
			vmem_add_mem(start, ro_start - start, 0);
			vmem_add_mem(ro_start, end - ro_start, 1);
		} else {
			vmem_add_mem(start, ro_start - start, 0);
			vmem_add_mem(ro_start, ro_end - ro_start, 1);
			vmem_add_mem(ro_end, end - ro_end, 0);
		}
	}
H
Heiko Carstens 已提交
400 401 402
}

/*
403 404
 * Convert memblock.memory  to a memory segment list so there is a single
 * list that contains all memory segments.
H
Heiko Carstens 已提交
405 406 407
 */
static int __init vmem_convert_memory_chunk(void)
{
408
	struct memblock_region *reg;
H
Heiko Carstens 已提交
409 410 411
	struct memory_segment *seg;

	mutex_lock(&vmem_mutex);
412
	for_each_memblock(memory, reg) {
H
Heiko Carstens 已提交
413 414 415
		seg = kzalloc(sizeof(*seg), GFP_KERNEL);
		if (!seg)
			panic("Out of memory...\n");
416 417
		seg->start = reg->base;
		seg->size = reg->size;
H
Heiko Carstens 已提交
418 419 420 421 422 423 424
		insert_memory_segment(seg);
	}
	mutex_unlock(&vmem_mutex);
	return 0;
}

core_initcall(vmem_convert_memory_chunk);