hugetlbpage.c 11.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2 3 4
/*
 * SPARC64 Huge TLB page support.
 *
5
 * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
L
Linus Torvalds 已提交
6 7 8 9
 */

#include <linux/fs.h>
#include <linux/mm.h>
10
#include <linux/sched/mm.h>
L
Linus Torvalds 已提交
11 12 13 14 15 16
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/sysctl.h>

#include <asm/mman.h>
#include <asm/pgalloc.h>
17
#include <asm/pgtable.h>
L
Linus Torvalds 已提交
18 19 20 21 22
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>

23 24 25 26 27 28 29 30 31 32
/* Slightly simplified from the non-hugepage variant because by
 * definition we don't have to worry about any page coloring stuff
 */

static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
							unsigned long addr,
							unsigned long len,
							unsigned long pgoff,
							unsigned long flags)
{
N
Nitin Gupta 已提交
33
	struct hstate *h = hstate_file(filp);
34
	unsigned long task_size = TASK_SIZE;
35
	struct vm_unmapped_area_info info;
36 37 38 39

	if (test_thread_flag(TIF_32BIT))
		task_size = STACK_TOP32;

40 41 42 43
	info.flags = 0;
	info.length = len;
	info.low_limit = TASK_UNMAPPED_BASE;
	info.high_limit = min(task_size, VA_EXCLUDE_START);
N
Nitin Gupta 已提交
44
	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
45 46 47 48 49 50 51 52
	info.align_offset = 0;
	addr = vm_unmapped_area(&info);

	if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
		VM_BUG_ON(addr != -ENOMEM);
		info.low_limit = VA_EXCLUDE_END;
		info.high_limit = task_size;
		addr = vm_unmapped_area(&info);
53 54
	}

55
	return addr;
56 57 58 59 60 61 62 63
}

static unsigned long
hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
				  const unsigned long len,
				  const unsigned long pgoff,
				  const unsigned long flags)
{
N
Nitin Gupta 已提交
64
	struct hstate *h = hstate_file(filp);
65 66
	struct mm_struct *mm = current->mm;
	unsigned long addr = addr0;
67
	struct vm_unmapped_area_info info;
68 69 70 71

	/* This should only ever run for 32-bit processes.  */
	BUG_ON(!test_thread_flag(TIF_32BIT));

72 73 74 75
	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
	info.length = len;
	info.low_limit = PAGE_SIZE;
	info.high_limit = mm->mmap_base;
N
Nitin Gupta 已提交
76
	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
77 78
	info.align_offset = 0;
	addr = vm_unmapped_area(&info);
79 80 81 82 83 84 85

	/*
	 * A failed mmap() very likely causes application failure,
	 * so fall back to the bottom-up function here. This scenario
	 * can happen with large stack limits and large mmap()
	 * allocations.
	 */
86 87 88 89 90 91 92
	if (addr & ~PAGE_MASK) {
		VM_BUG_ON(addr != -ENOMEM);
		info.flags = 0;
		info.low_limit = TASK_UNMAPPED_BASE;
		info.high_limit = STACK_TOP32;
		addr = vm_unmapped_area(&info);
	}
93 94 95 96 97 98 99 100

	return addr;
}

unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
		unsigned long len, unsigned long pgoff, unsigned long flags)
{
N
Nitin Gupta 已提交
101
	struct hstate *h = hstate_file(file);
102 103 104 105 106 107 108
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	unsigned long task_size = TASK_SIZE;

	if (test_thread_flag(TIF_32BIT))
		task_size = STACK_TOP32;

N
Nitin Gupta 已提交
109
	if (len & ~huge_page_mask(h))
110 111 112 113
		return -EINVAL;
	if (len > task_size)
		return -ENOMEM;

114
	if (flags & MAP_FIXED) {
115
		if (prepare_hugepage_range(file, addr, len))
116 117 118 119
			return -EINVAL;
		return addr;
	}

120
	if (addr) {
N
Nitin Gupta 已提交
121
		addr = ALIGN(addr, huge_page_size(h));
122 123
		vma = find_vma(mm, addr);
		if (task_size - len >= addr &&
124
		    (!vma || addr + len <= vm_start_gap(vma)))
125 126 127 128 129 130 131 132 133 134
			return addr;
	}
	if (mm->get_unmapped_area == arch_get_unmapped_area)
		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
				pgoff, flags);
	else
		return hugetlb_get_unmapped_area_topdown(file, addr, len,
				pgoff, flags);
}

N
Nitin Gupta 已提交
135 136 137 138 139 140 141 142 143 144 145 146
static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
{
	return entry;
}

static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
{
	unsigned long hugepage_size = _PAGE_SZ4MB_4V;

	pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;

	switch (shift) {
N
Nitin Gupta 已提交
147 148 149 150
	case HPAGE_16GB_SHIFT:
		hugepage_size = _PAGE_SZ16GB_4V;
		pte_val(entry) |= _PAGE_PUD_HUGE;
		break;
151 152 153 154
	case HPAGE_2GB_SHIFT:
		hugepage_size = _PAGE_SZ2GB_4V;
		pte_val(entry) |= _PAGE_PMD_HUGE;
		break;
N
Nitin Gupta 已提交
155 156 157 158 159 160 161
	case HPAGE_256MB_SHIFT:
		hugepage_size = _PAGE_SZ256MB_4V;
		pte_val(entry) |= _PAGE_PMD_HUGE;
		break;
	case HPAGE_SHIFT:
		pte_val(entry) |= _PAGE_PMD_HUGE;
		break;
N
Nitin Gupta 已提交
162 163 164
	case HPAGE_64K_SHIFT:
		hugepage_size = _PAGE_SZ64K_4V;
		break;
N
Nitin Gupta 已提交
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
	default:
		WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
	}

	pte_val(entry) = pte_val(entry) | hugepage_size;
	return entry;
}

static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
{
	if (tlb_type == hypervisor)
		return sun4v_hugepage_shift_to_tte(entry, shift);
	else
		return sun4u_hugepage_shift_to_tte(entry, shift);
}

pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
			 struct page *page, int writeable)
{
	unsigned int shift = huge_page_shift(hstate_vma(vma));
185
	pte_t pte;
N
Nitin Gupta 已提交
186

187 188 189 190 191 192 193 194 195 196 197 198
	pte = hugepage_shift_to_tte(entry, shift);

#ifdef CONFIG_SPARC64
	/* If this vma has ADI enabled on it, turn on TTE.mcd
	 */
	if (vma->vm_flags & VM_SPARC_ADI)
		return pte_mkmcd(pte);
	else
		return pte_mknotmcd(pte);
#else
	return pte;
#endif
N
Nitin Gupta 已提交
199 200 201 202 203 204 205 206
}

static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
{
	unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
	unsigned int shift;

	switch (tte_szbits) {
N
Nitin Gupta 已提交
207 208 209
	case _PAGE_SZ16GB_4V:
		shift = HPAGE_16GB_SHIFT;
		break;
210 211 212
	case _PAGE_SZ2GB_4V:
		shift = HPAGE_2GB_SHIFT;
		break;
N
Nitin Gupta 已提交
213 214 215 216 217 218
	case _PAGE_SZ256MB_4V:
		shift = HPAGE_256MB_SHIFT;
		break;
	case _PAGE_SZ4MB_4V:
		shift = REAL_HPAGE_SHIFT;
		break;
N
Nitin Gupta 已提交
219 220 221
	case _PAGE_SZ64K_4V:
		shift = HPAGE_64K_SHIFT;
		break;
N
Nitin Gupta 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
	default:
		shift = PAGE_SHIFT;
		break;
	}
	return shift;
}

static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
{
	unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
	unsigned int shift;

	switch (tte_szbits) {
	case _PAGE_SZ256MB_4U:
		shift = HPAGE_256MB_SHIFT;
		break;
	case _PAGE_SZ4MB_4U:
		shift = REAL_HPAGE_SHIFT;
		break;
N
Nitin Gupta 已提交
241 242 243
	case _PAGE_SZ64K_4U:
		shift = HPAGE_64K_SHIFT;
		break;
N
Nitin Gupta 已提交
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
	default:
		shift = PAGE_SHIFT;
		break;
	}
	return shift;
}

static unsigned int huge_tte_to_shift(pte_t entry)
{
	unsigned long shift;

	if (tlb_type == hypervisor)
		shift = sun4v_huge_tte_to_shift(entry);
	else
		shift = sun4u_huge_tte_to_shift(entry);

	if (shift == PAGE_SHIFT)
		WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
			  pte_val(entry));

	return shift;
}

static unsigned long huge_tte_to_size(pte_t pte)
{
	unsigned long size = 1UL << huge_tte_to_shift(pte);

	if (size == REAL_HPAGE_SIZE)
		size = HPAGE_SIZE;
	return size;
}

276 277
pte_t *huge_pte_alloc(struct mm_struct *mm,
			unsigned long addr, unsigned long sz)
L
Linus Torvalds 已提交
278 279
{
	pgd_t *pgd;
280
	p4d_t *p4d;
L
Linus Torvalds 已提交
281
	pud_t *pud;
N
Nitin Gupta 已提交
282
	pmd_t *pmd;
L
Linus Torvalds 已提交
283 284

	pgd = pgd_offset(mm, addr);
285 286
	p4d = p4d_offset(pgd, addr);
	pud = pud_alloc(mm, p4d, addr);
N
Nitin Gupta 已提交
287 288 289
	if (!pud)
		return NULL;
	if (sz >= PUD_SIZE)
290 291 292 293 294 295 296
		return (pte_t *)pud;
	pmd = pmd_alloc(mm, pud, addr);
	if (!pmd)
		return NULL;
	if (sz >= PMD_SIZE)
		return (pte_t *)pmd;
	return pte_alloc_map(mm, pmd, addr);
L
Linus Torvalds 已提交
297 298
}

299 300
pte_t *huge_pte_offset(struct mm_struct *mm,
		       unsigned long addr, unsigned long sz)
L
Linus Torvalds 已提交
301 302
{
	pgd_t *pgd;
303
	p4d_t *p4d;
L
Linus Torvalds 已提交
304
	pud_t *pud;
N
Nitin Gupta 已提交
305
	pmd_t *pmd;
L
Linus Torvalds 已提交
306 307

	pgd = pgd_offset(mm, addr);
308 309
	if (pgd_none(*pgd))
		return NULL;
310 311 312 313
	p4d = p4d_offset(pgd, addr);
	if (p4d_none(*p4d))
		return NULL;
	pud = pud_offset(p4d, addr);
314 315 316 317 318 319 320 321 322 323
	if (pud_none(*pud))
		return NULL;
	if (is_hugetlb_pud(*pud))
		return (pte_t *)pud;
	pmd = pmd_offset(pud, addr);
	if (pmd_none(*pmd))
		return NULL;
	if (is_hugetlb_pmd(*pmd))
		return (pte_t *)pmd;
	return pte_offset_map(pmd, addr);
L
Linus Torvalds 已提交
324 325
}

D
David Gibson 已提交
326 327
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
		     pte_t *ptep, pte_t entry)
L
Linus Torvalds 已提交
328
{
N
Nitin Gupta 已提交
329 330
	unsigned int nptes, orig_shift, shift;
	unsigned long i, size;
331
	pte_t orig;
D
David Gibson 已提交
332

N
Nitin Gupta 已提交
333
	size = huge_tte_to_size(entry);
N
Nitin Gupta 已提交
334 335 336 337 338 339 340 341 342

	shift = PAGE_SHIFT;
	if (size >= PUD_SIZE)
		shift = PUD_SHIFT;
	else if (size >= PMD_SIZE)
		shift = PMD_SHIFT;
	else
		shift = PAGE_SHIFT;

N
Nitin Gupta 已提交
343
	nptes = size >> shift;
N
Nitin Gupta 已提交
344

345
	if (!pte_present(*ptep) && pte_present(entry))
N
Nitin Gupta 已提交
346
		mm->context.hugetlb_pte_count += nptes;
347

N
Nitin Gupta 已提交
348
	addr &= ~(size - 1);
349
	orig = *ptep;
350
	orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig);
351

N
Nitin Gupta 已提交
352
	for (i = 0; i < nptes; i++)
N
Nitin Gupta 已提交
353
		ptep[i] = __pte(pte_val(entry) + (i << shift));
N
Nitin Gupta 已提交
354

N
Nitin Gupta 已提交
355
	maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift);
N
Nitin Gupta 已提交
356 357 358
	/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
	if (size == HPAGE_SIZE)
		maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
N
Nitin Gupta 已提交
359
				    orig_shift);
D
David Gibson 已提交
360
}
L
Linus Torvalds 已提交
361

D
David Gibson 已提交
362 363 364
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep)
{
N
Nitin Gupta 已提交
365
	unsigned int i, nptes, orig_shift, shift;
N
Nitin Gupta 已提交
366
	unsigned long size;
D
David Gibson 已提交
367
	pte_t entry;
L
Linus Torvalds 已提交
368

D
David Gibson 已提交
369
	entry = *ptep;
N
Nitin Gupta 已提交
370
	size = huge_tte_to_size(entry);
N
Nitin Gupta 已提交
371 372 373 374 375 376

	shift = PAGE_SHIFT;
	if (size >= PUD_SIZE)
		shift = PUD_SHIFT;
	else if (size >= PMD_SIZE)
		shift = PMD_SHIFT;
N
Nitin Gupta 已提交
377
	else
N
Nitin Gupta 已提交
378
		shift = PAGE_SHIFT;
N
Nitin Gupta 已提交
379

N
Nitin Gupta 已提交
380 381
	nptes = size >> shift;
	orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
N
Nitin Gupta 已提交
382

383
	if (pte_present(entry))
N
Nitin Gupta 已提交
384
		mm->context.hugetlb_pte_count -= nptes;
L
Linus Torvalds 已提交
385

N
Nitin Gupta 已提交
386 387 388
	addr &= ~(size - 1);
	for (i = 0; i < nptes; i++)
		ptep[i] = __pte(0UL);
D
David Gibson 已提交
389

N
Nitin Gupta 已提交
390
	maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
N
Nitin Gupta 已提交
391 392 393
	/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
	if (size == HPAGE_SIZE)
		maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
N
Nitin Gupta 已提交
394
				    orig_shift);
395

D
David Gibson 已提交
396
	return entry;
L
Linus Torvalds 已提交
397 398 399 400
}

int pmd_huge(pmd_t pmd)
{
401 402
	return !pmd_none(pmd) &&
		(pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
L
Linus Torvalds 已提交
403 404
}

A
Andi Kleen 已提交
405 406
int pud_huge(pud_t pud)
{
N
Nitin Gupta 已提交
407 408
	return !pud_none(pud) &&
		(pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
A
Andi Kleen 已提交
409
}
410 411 412 413 414 415 416 417

static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
			   unsigned long addr)
{
	pgtable_t token = pmd_pgtable(*pmd);

	pmd_clear(pmd);
	pte_free_tlb(tlb, token, addr);
418
	mm_dec_nr_ptes(tlb->mm);
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
}

static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
				   unsigned long addr, unsigned long end,
				   unsigned long floor, unsigned long ceiling)
{
	pmd_t *pmd;
	unsigned long next;
	unsigned long start;

	start = addr;
	pmd = pmd_offset(pud, addr);
	do {
		next = pmd_addr_end(addr, end);
		if (pmd_none(*pmd))
			continue;
		if (is_hugetlb_pmd(*pmd))
			pmd_clear(pmd);
		else
			hugetlb_free_pte_range(tlb, pmd, addr);
	} while (pmd++, addr = next, addr != end);

	start &= PUD_MASK;
	if (start < floor)
		return;
	if (ceiling) {
		ceiling &= PUD_MASK;
		if (!ceiling)
			return;
	}
	if (end - 1 > ceiling - 1)
		return;

	pmd = pmd_offset(pud, start);
	pud_clear(pud);
	pmd_free_tlb(tlb, pmd, start);
	mm_dec_nr_pmds(tlb->mm);
}

458
static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
459 460 461 462 463 464 465 466
				   unsigned long addr, unsigned long end,
				   unsigned long floor, unsigned long ceiling)
{
	pud_t *pud;
	unsigned long next;
	unsigned long start;

	start = addr;
467
	pud = pud_offset(p4d, addr);
468 469 470 471
	do {
		next = pud_addr_end(addr, end);
		if (pud_none_or_clear_bad(pud))
			continue;
N
Nitin Gupta 已提交
472 473 474 475 476
		if (is_hugetlb_pud(*pud))
			pud_clear(pud);
		else
			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
					       ceiling);
477 478 479 480 481 482 483 484 485 486 487 488 489
	} while (pud++, addr = next, addr != end);

	start &= PGDIR_MASK;
	if (start < floor)
		return;
	if (ceiling) {
		ceiling &= PGDIR_MASK;
		if (!ceiling)
			return;
	}
	if (end - 1 > ceiling - 1)
		return;

490 491
	pud = pud_offset(p4d, start);
	p4d_clear(p4d);
492
	pud_free_tlb(tlb, pud, start);
K
Kirill A. Shutemov 已提交
493
	mm_dec_nr_puds(tlb->mm);
494 495 496 497 498 499 500
}

void hugetlb_free_pgd_range(struct mmu_gather *tlb,
			    unsigned long addr, unsigned long end,
			    unsigned long floor, unsigned long ceiling)
{
	pgd_t *pgd;
501
	p4d_t *p4d;
502 503
	unsigned long next;

504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
	addr &= PMD_MASK;
	if (addr < floor) {
		addr += PMD_SIZE;
		if (!addr)
			return;
	}
	if (ceiling) {
		ceiling &= PMD_MASK;
		if (!ceiling)
			return;
	}
	if (end - 1 > ceiling - 1)
		end -= PMD_SIZE;
	if (addr > end - 1)
		return;

520
	pgd = pgd_offset(tlb->mm, addr);
521
	p4d = p4d_offset(pgd, addr);
522
	do {
523 524
		next = p4d_addr_end(addr, end);
		if (p4d_none_or_clear_bad(p4d))
525
			continue;
526 527
		hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
	} while (p4d++, addr = next, addr != end);
528
}