hugetlbpage.c 10.0 KB
Newer Older
S
Steve Capper 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * arch/arm64/mm/hugetlbpage.c
 *
 * Copyright (C) 2013 Linaro Ltd.
 *
 * Based on arch/x86/mm/hugetlbpage.c.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/err.h>
#include <linux/sysctl.h>
#include <asm/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>

int pmd_huge(pmd_t pmd)
{
32
	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
S
Steve Capper 已提交
33 34 35 36
}

int pud_huge(pud_t pud)
{
37
#ifndef __PAGETABLE_PMD_FOLDED
38
	return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
39 40 41
#else
	return 0;
#endif
S
Steve Capper 已提交
42 43
}

44 45 46 47 48 49 50 51 52 53
/*
 * Select all bits except the pfn
 */
static inline pgprot_t pte_pgprot(pte_t pte)
{
	unsigned long pfn = pte_pfn(pte);

	return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
}

54
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
55
			   pte_t *ptep, size_t *pgsize)
56
{
57 58 59
	pgd_t *pgdp = pgd_offset(mm, addr);
	pud_t *pudp;
	pmd_t *pmdp;
60 61

	*pgsize = PAGE_SIZE;
62 63 64
	pudp = pud_offset(pgdp, addr);
	pmdp = pmd_offset(pudp, addr);
	if ((pte_t *)pmdp == ptep) {
65 66 67 68 69 70
		*pgsize = PMD_SIZE;
		return CONT_PMDS;
	}
	return CONT_PTES;
}

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
{
	int contig_ptes = 0;

	*pgsize = size;

	switch (size) {
#ifdef CONFIG_ARM64_4K_PAGES
	case PUD_SIZE:
#endif
	case PMD_SIZE:
		contig_ptes = 1;
		break;
	case CONT_PMD_SIZE:
		*pgsize = PMD_SIZE;
		contig_ptes = CONT_PMDS;
		break;
	case CONT_PTE_SIZE:
		*pgsize = PAGE_SIZE;
		contig_ptes = CONT_PTES;
		break;
	}

	return contig_ptes;
}

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
/*
 * Changing some bits of contiguous entries requires us to follow a
 * Break-Before-Make approach, breaking the whole contiguous set
 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 * "Misprogramming of the Contiguous bit", page D4-1762.
 *
 * This helper performs the break step.
 */
static pte_t get_clear_flush(struct mm_struct *mm,
			     unsigned long addr,
			     pte_t *ptep,
			     unsigned long pgsize,
			     unsigned long ncontig)
{
	pte_t orig_pte = huge_ptep_get(ptep);
	bool valid = pte_valid(orig_pte);
	unsigned long i, saddr = addr;

	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
		pte_t pte = ptep_get_and_clear(mm, addr, ptep);

		/*
		 * If HW_AFDBM is enabled, then the HW could turn on
120 121
		 * the dirty or accessed bit for any page in the set,
		 * so check them all.
122 123 124
		 */
		if (pte_dirty(pte))
			orig_pte = pte_mkdirty(orig_pte);
125 126 127

		if (pte_young(pte))
			orig_pte = pte_mkyoung(orig_pte);
128 129
	}

130 131
	if (valid) {
		struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
132
		flush_tlb_range(&vma, saddr, addr);
133
	}
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
	return orig_pte;
}

/*
 * Changing some bits of contiguous entries requires us to follow a
 * Break-Before-Make approach, breaking the whole contiguous set
 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 * "Misprogramming of the Contiguous bit", page D4-1762.
 *
 * This helper performs the break step for use cases where the
 * original pte is not needed.
 */
static void clear_flush(struct mm_struct *mm,
			     unsigned long addr,
			     pte_t *ptep,
			     unsigned long pgsize,
			     unsigned long ncontig)
{
152
	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
153 154 155 156 157 158 159 160
	unsigned long i, saddr = addr;

	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
		pte_clear(mm, addr, ptep);

	flush_tlb_range(&vma, saddr, addr);
}

161 162 163 164 165
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
			    pte_t *ptep, pte_t pte)
{
	size_t pgsize;
	int i;
166
	int ncontig;
167
	unsigned long pfn, dpfn;
168 169
	pgprot_t hugeprot;

170 171 172 173 174 175
	/*
	 * Code needs to be expanded to handle huge swap and migration
	 * entries. Needed for HUGETLB and MEMORY_FAILURE.
	 */
	WARN_ON(!pte_present(pte));

176
	if (!pte_cont(pte)) {
177 178 179 180
		set_pte_at(mm, addr, ptep, pte);
		return;
	}

181
	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
182
	pfn = pte_pfn(pte);
183
	dpfn = pgsize >> PAGE_SHIFT;
184
	hugeprot = pte_pgprot(pte);
185

186 187
	clear_flush(mm, addr, ptep, pgsize, ncontig);

188
	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
189 190 191
		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
}

192 193 194 195 196 197 198 199 200 201 202 203
void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
			  pte_t *ptep, pte_t pte, unsigned long sz)
{
	int i, ncontig;
	size_t pgsize;

	ncontig = num_contig_ptes(sz, &pgsize);

	for (i = 0; i < ncontig; i++, ptep++)
		set_pte(ptep, pte);
}

204 205 206
pte_t *huge_pte_alloc(struct mm_struct *mm,
		      unsigned long addr, unsigned long sz)
{
207 208 209 210 211 212 213 214
	pgd_t *pgdp;
	pud_t *pudp;
	pmd_t *pmdp;
	pte_t *ptep = NULL;

	pgdp = pgd_offset(mm, addr);
	pudp = pud_alloc(mm, pgdp, addr);
	if (!pudp)
215 216 217
		return NULL;

	if (sz == PUD_SIZE) {
218
		ptep = (pte_t *)pudp;
219
	} else if (sz == (PAGE_SIZE * CONT_PTES)) {
220
		pmdp = pmd_alloc(mm, pudp, addr);
221 222 223 224 225 226 227 228 229

		WARN_ON(addr & (sz - 1));
		/*
		 * Note that if this code were ever ported to the
		 * 32-bit arm platform then it will cause trouble in
		 * the case where CONFIG_HIGHPTE is set, since there
		 * will be no pte_unmap() to correspond with this
		 * pte_alloc_map().
		 */
230
		ptep = pte_alloc_map(mm, pmdp, addr);
231 232
	} else if (sz == PMD_SIZE) {
		if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
233 234
		    pud_none(READ_ONCE(*pudp)))
			ptep = huge_pmd_share(mm, addr, pudp);
235
		else
236
			ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
237
	} else if (sz == (PMD_SIZE * CONT_PMDS)) {
238
		pmdp = pmd_alloc(mm, pudp, addr);
239
		WARN_ON(addr & (sz - 1));
240
		return (pte_t *)pmdp;
241 242
	}

243
	return ptep;
244 245
}

246 247
pte_t *huge_pte_offset(struct mm_struct *mm,
		       unsigned long addr, unsigned long sz)
248
{
249 250 251
	pgd_t *pgdp;
	pud_t *pudp, pud;
	pmd_t *pmdp, pmd;
252

253 254
	pgdp = pgd_offset(mm, addr);
	if (!pgd_present(READ_ONCE(*pgdp)))
255
		return NULL;
256

257 258 259
	pudp = pud_offset(pgdp, addr);
	pud = READ_ONCE(*pudp);
	if (sz != PUD_SIZE && pud_none(pud))
260
		return NULL;
261
	/* hugepage or swap? */
262 263
	if (pud_huge(pud) || !pud_present(pud))
		return (pte_t *)pudp;
264 265
	/* table; check the next level */

266 267 268
	if (sz == CONT_PMD_SIZE)
		addr &= CONT_PMD_MASK;

269 270
	pmdp = pmd_offset(pudp, addr);
	pmd = READ_ONCE(*pmdp);
271
	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
272
	    pmd_none(pmd))
273
		return NULL;
274 275
	if (pmd_huge(pmd) || !pmd_present(pmd))
		return (pte_t *)pmdp;
276

277 278
	if (sz == CONT_PTE_SIZE)
		return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
279

280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
	return NULL;
}

pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
			 struct page *page, int writable)
{
	size_t pagesize = huge_page_size(hstate_vma(vma));

	if (pagesize == CONT_PTE_SIZE) {
		entry = pte_mkcont(entry);
	} else if (pagesize == CONT_PMD_SIZE) {
		entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
	} else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
		pr_warn("%s: unrecognized huge page size 0x%lx\n",
			__func__, pagesize);
	}
	return entry;
}

299 300 301 302 303 304 305 306 307 308 309 310
void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
		    pte_t *ptep, unsigned long sz)
{
	int i, ncontig;
	size_t pgsize;

	ncontig = num_contig_ptes(sz, &pgsize);

	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
		pte_clear(mm, addr, ptep);
}

311 312 313
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
			      unsigned long addr, pte_t *ptep)
{
314
	int ncontig;
315 316 317 318
	size_t pgsize;
	pte_t orig_pte = huge_ptep_get(ptep);

	if (!pte_cont(orig_pte))
319
		return ptep_get_and_clear(mm, addr, ptep);
320 321 322

	ncontig = find_num_contig(mm, addr, ptep, &pgsize);

323
	return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
324 325 326 327 328 329
}

int huge_ptep_set_access_flags(struct vm_area_struct *vma,
			       unsigned long addr, pte_t *ptep,
			       pte_t pte, int dirty)
{
330 331 332 333
	int ncontig, i, changed = 0;
	size_t pgsize = 0;
	unsigned long pfn = pte_pfn(pte), dpfn;
	pgprot_t hugeprot;
334
	pte_t orig_pte;
335 336

	if (!pte_cont(pte))
337
		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
338 339 340 341

	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
	dpfn = pgsize >> PAGE_SHIFT;

342 343 344 345
	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
	if (!pte_same(orig_pte, pte))
		changed = 1;

346
	/* Make sure we don't lose the dirty or young state */
347 348 349
	if (pte_dirty(orig_pte))
		pte = pte_mkdirty(pte);

350 351 352
	if (pte_young(orig_pte))
		pte = pte_mkyoung(pte);

353 354 355
	hugeprot = pte_pgprot(pte);
	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
356 357

	return changed;
358 359 360 361 362
}

void huge_ptep_set_wrprotect(struct mm_struct *mm,
			     unsigned long addr, pte_t *ptep)
{
363 364
	unsigned long pfn, dpfn;
	pgprot_t hugeprot;
365 366
	int ncontig, i;
	size_t pgsize;
367
	pte_t pte;
368

369
	if (!pte_cont(READ_ONCE(*ptep))) {
370
		ptep_set_wrprotect(mm, addr, ptep);
371
		return;
372
	}
373 374

	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
375 376 377 378 379 380 381 382 383 384
	dpfn = pgsize >> PAGE_SHIFT;

	pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
	pte = pte_wrprotect(pte);

	hugeprot = pte_pgprot(pte);
	pfn = pte_pfn(pte);

	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
385 386 387 388 389
}

void huge_ptep_clear_flush(struct vm_area_struct *vma,
			   unsigned long addr, pte_t *ptep)
{
390
	size_t pgsize;
391
	int ncontig;
392

393
	if (!pte_cont(READ_ONCE(*ptep))) {
394
		ptep_clear_flush(vma, addr, ptep);
395
		return;
396
	}
397 398

	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
399
	clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
400 401
}

S
Steve Capper 已提交
402 403 404
static __init int setup_hugepagesz(char *opt)
{
	unsigned long ps = memparse(opt, &opt);
405

406 407 408 409 410 411 412 413 414
	switch (ps) {
#ifdef CONFIG_ARM64_4K_PAGES
	case PUD_SIZE:
#endif
	case PMD_SIZE * CONT_PMDS:
	case PMD_SIZE:
	case PAGE_SIZE * CONT_PTES:
		hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT);
		return 1;
S
Steve Capper 已提交
415
	}
416 417 418 419

	hugetlb_bad_size();
	pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
	return 0;
S
Steve Capper 已提交
420 421
}
__setup("hugepagesz=", setup_hugepagesz);
422 423 424 425 426 427 428 429 430 431

#ifdef CONFIG_ARM64_64K_PAGES
static __init int add_default_hugepagesz(void)
{
	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
		hugetlb_add_hstate(CONT_PTE_SHIFT);
	return 0;
}
arch_initcall(add_default_hugepagesz);
#endif