debug_vm_pgtable.c 37.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
// SPDX-License-Identifier: GPL-2.0-only
/*
 * This kernel test validates architecture page table helpers and
 * accessors and helps in verifying their continued compliance with
 * expected generic MM semantics.
 *
 * Copyright (C) 2019 ARM Ltd.
 *
 * Author: Anshuman Khandual <anshuman.khandual@arm.com>
 */
11
#define pr_fmt(fmt) "debug_vm_pgtable: [%-25s]: " fmt, __func__
12 13 14 15 16 17 18 19 20 21 22 23

#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
#include <linux/kernel.h>
#include <linux/kconfig.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/mm_types.h>
#include <linux/module.h>
#include <linux/pfn_t.h>
#include <linux/printk.h>
24
#include <linux/pgtable.h>
25 26 27 28 29 30
#include <linux/random.h>
#include <linux/spinlock.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/start_kernel.h>
#include <linux/sched/mm.h>
31
#include <linux/io.h>
32
#include <asm/pgalloc.h>
33
#include <asm/tlbflush.h>
34

35 36 37 38 39 40
/*
 * Please refer Documentation/vm/arch_pgtable_helpers.rst for the semantics
 * expectations that are being validated here. All future changes in here
 * or the documentation need to be in sync.
 */

41 42 43 44 45 46 47
#define VMFLAGS	(VM_READ|VM_WRITE|VM_EXEC)

/*
 * On s390 platform, the lower 4 bits are used to identify given page table
 * entry type. But these bits might affect the ability to clear entries with
 * pxx_clear() because of how dynamic page table folding works on s390. So
 * while loading up the entries do not change the lower 4 bits. It does not
48 49
 * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
 * used to mark a pte entry.
50
 */
51 52 53 54 55 56 57 58
#define S390_SKIP_MASK		GENMASK(3, 0)
#if __BITS_PER_LONG == 64
#define PPC64_SKIP_MASK		GENMASK(62, 62)
#else
#define PPC64_SKIP_MASK		0x0
#endif
#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
59 60
#define RANDOM_NZVALUE	GENMASK(7, 0)

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
struct pgtable_debug_args {
	struct mm_struct	*mm;
	struct vm_area_struct	*vma;

	pgd_t			*pgdp;
	p4d_t			*p4dp;
	pud_t			*pudp;
	pmd_t			*pmdp;
	pte_t			*ptep;

	p4d_t			*start_p4dp;
	pud_t			*start_pudp;
	pmd_t			*start_pmdp;
	pgtable_t		start_ptep;

	unsigned long		vaddr;
	pgprot_t		page_prot;
	pgprot_t		page_prot_none;

	bool			is_contiguous_page;
	unsigned long		pud_pfn;
	unsigned long		pmd_pfn;
	unsigned long		pte_pfn;

	unsigned long		fixed_pgd_pfn;
	unsigned long		fixed_p4d_pfn;
	unsigned long		fixed_pud_pfn;
	unsigned long		fixed_pmd_pfn;
	unsigned long		fixed_pte_pfn;
};

92
static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx)
93
{
94
	pgprot_t prot = protection_map[idx];
95
	pte_t pte = pfn_pte(args->fixed_pte_pfn, prot);
96
	unsigned long val = idx, *ptr = &val;
97

98
	pr_debug("Validating PTE basic (%pGv)\n", ptr);
99 100 101 102 103 104 105 106 107 108

	/*
	 * This test needs to be executed after the given page table entry
	 * is created with pfn_pte() to make sure that protection_map[idx]
	 * does not have the dirty bit enabled from the beginning. This is
	 * important for platforms like arm64 where (!PTE_RDONLY) indicate
	 * dirty bit being set.
	 */
	WARN_ON(pte_dirty(pte_wrprotect(pte)));

109 110 111 112 113 114 115
	WARN_ON(!pte_same(pte, pte));
	WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte))));
	WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte))));
	WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte))));
	WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte))));
	WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte))));
	WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte))));
116 117
	WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte))));
	WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte))));
118 119
}

120
static void __init pte_advanced_tests(struct pgtable_debug_args *args)
121
{
122
	pte_t pte;
123

124 125 126 127 128
	/*
	 * Architectures optimize set_pte_at by avoiding TLB flush.
	 * This requires set_pte_at to be not used to update an
	 * existing pte entry. Clear pte before we do set_pte_at
	 */
129 130
	if (args->pte_pfn == ULONG_MAX)
		return;
131

132
	pr_debug("Validating PTE advanced\n");
133 134 135 136
	pte = pfn_pte(args->pte_pfn, args->page_prot);
	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
	ptep_set_wrprotect(args->mm, args->vaddr, args->ptep);
	pte = ptep_get(args->ptep);
137
	WARN_ON(pte_write(pte));
138 139
	ptep_get_and_clear(args->mm, args->vaddr, args->ptep);
	pte = ptep_get(args->ptep);
140 141
	WARN_ON(!pte_none(pte));

142
	pte = pfn_pte(args->pte_pfn, args->page_prot);
143 144
	pte = pte_wrprotect(pte);
	pte = pte_mkclean(pte);
145
	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
146 147
	pte = pte_mkwrite(pte);
	pte = pte_mkdirty(pte);
148 149
	ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
	pte = ptep_get(args->ptep);
150
	WARN_ON(!(pte_write(pte) && pte_dirty(pte)));
151 152
	ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1);
	pte = ptep_get(args->ptep);
153 154
	WARN_ON(!pte_none(pte));

155
	pte = pfn_pte(args->pte_pfn, args->page_prot);
156
	pte = pte_mkyoung(pte);
157 158 159
	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
	ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep);
	pte = ptep_get(args->ptep);
160 161 162
	WARN_ON(pte_young(pte));
}

163
static void __init pte_savedwrite_tests(struct pgtable_debug_args *args)
164
{
165
	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none);
166

167 168 169
	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
		return;

170
	pr_debug("Validating PTE saved write\n");
171 172 173
	WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
	WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte))));
}
174

175
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
176
static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
177
{
178 179
	pgprot_t prot = protection_map[idx];
	unsigned long val = idx, *ptr = &val;
180
	pmd_t pmd;
181

182 183 184
	if (!has_transparent_hugepage())
		return;

185
	pr_debug("Validating PMD basic (%pGv)\n", ptr);
186
	pmd = pfn_pmd(args->fixed_pmd_pfn, prot);
187 188 189 190 191 192 193 194 195 196 197

	/*
	 * This test needs to be executed after the given page table entry
	 * is created with pfn_pmd() to make sure that protection_map[idx]
	 * does not have the dirty bit enabled from the beginning. This is
	 * important for platforms like arm64 where (!PTE_RDONLY) indicate
	 * dirty bit being set.
	 */
	WARN_ON(pmd_dirty(pmd_wrprotect(pmd)));


198 199 200 201 202 203 204
	WARN_ON(!pmd_same(pmd, pmd));
	WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
	WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
	WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd))));
	WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd))));
	WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd))));
	WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd))));
205 206
	WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd))));
	WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd))));
207 208 209 210 211 212 213
	/*
	 * A huge page does not point to next level page table
	 * entry. Hence this must qualify as pmd_bad().
	 */
	WARN_ON(!pmd_bad(pmd_mkhuge(pmd)));
}

214
static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
215
{
216
	pmd_t pmd;
217
	unsigned long vaddr = args->vaddr;
218 219 220 221

	if (!has_transparent_hugepage())
		return;

222 223 224
	if (args->pmd_pfn == ULONG_MAX)
		return;

225
	pr_debug("Validating PMD advanced\n");
226
	/* Align the address wrt HPAGE_PMD_SIZE */
227
	vaddr &= HPAGE_PMD_MASK;
228

229
	pgtable_trans_huge_deposit(args->mm, args->pmdp, args->start_ptep);
230

231 232 233 234
	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
	pmdp_set_wrprotect(args->mm, vaddr, args->pmdp);
	pmd = READ_ONCE(*args->pmdp);
235
	WARN_ON(pmd_write(pmd));
236 237
	pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp);
	pmd = READ_ONCE(*args->pmdp);
238 239
	WARN_ON(!pmd_none(pmd));

240
	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
241 242
	pmd = pmd_wrprotect(pmd);
	pmd = pmd_mkclean(pmd);
243
	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
244 245
	pmd = pmd_mkwrite(pmd);
	pmd = pmd_mkdirty(pmd);
246 247
	pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
	pmd = READ_ONCE(*args->pmdp);
248
	WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd)));
249 250
	pmdp_huge_get_and_clear_full(args->vma, vaddr, args->pmdp, 1);
	pmd = READ_ONCE(*args->pmdp);
251 252
	WARN_ON(!pmd_none(pmd));

253
	pmd = pmd_mkhuge(pfn_pmd(args->pmd_pfn, args->page_prot));
254
	pmd = pmd_mkyoung(pmd);
255 256 257
	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
	pmdp_test_and_clear_young(args->vma, vaddr, args->pmdp);
	pmd = READ_ONCE(*args->pmdp);
258
	WARN_ON(pmd_young(pmd));
259

260
	/*  Clear the pte entries  */
261 262
	pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp);
	pgtable_trans_huge_withdraw(args->mm, args->pmdp);
263 264
}

265
static void __init pmd_leaf_tests(struct pgtable_debug_args *args)
266
{
267 268 269 270
	pmd_t pmd;

	if (!has_transparent_hugepage())
		return;
271

272
	pr_debug("Validating PMD leaf\n");
273
	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
274

275 276 277 278 279 280 281
	/*
	 * PMD based THP is a leaf entry.
	 */
	pmd = pmd_mkhuge(pmd);
	WARN_ON(!pmd_leaf(pmd));
}

282
static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args)
283
{
284
	pmd_t pmd;
285

286 287 288
	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
		return;

289 290 291
	if (!has_transparent_hugepage())
		return;

292
	pr_debug("Validating PMD saved write\n");
293
	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none);
294 295 296 297
	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
	WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
}

298
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
299
static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx)
300
{
301 302
	pgprot_t prot = protection_map[idx];
	unsigned long val = idx, *ptr = &val;
303
	pud_t pud;
304

305 306 307
	if (!has_transparent_hugepage())
		return;

308
	pr_debug("Validating PUD basic (%pGv)\n", ptr);
309
	pud = pfn_pud(args->fixed_pud_pfn, prot);
310 311 312 313 314 315 316 317 318 319

	/*
	 * This test needs to be executed after the given page table entry
	 * is created with pfn_pud() to make sure that protection_map[idx]
	 * does not have the dirty bit enabled from the beginning. This is
	 * important for platforms like arm64 where (!PTE_RDONLY) indicate
	 * dirty bit being set.
	 */
	WARN_ON(pud_dirty(pud_wrprotect(pud)));

320 321
	WARN_ON(!pud_same(pud, pud));
	WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud))));
322 323
	WARN_ON(!pud_dirty(pud_mkdirty(pud_mkclean(pud))));
	WARN_ON(pud_dirty(pud_mkclean(pud_mkdirty(pud))));
324 325 326
	WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud))));
	WARN_ON(pud_write(pud_wrprotect(pud_mkwrite(pud))));
	WARN_ON(pud_young(pud_mkold(pud_mkyoung(pud))));
327 328
	WARN_ON(pud_dirty(pud_wrprotect(pud_mkclean(pud))));
	WARN_ON(!pud_dirty(pud_wrprotect(pud_mkdirty(pud))));
329

330
	if (mm_pmd_folded(args->mm))
331 332 333 334 335 336 337 338
		return;

	/*
	 * A huge page does not point to next level page table
	 * entry. Hence this must qualify as pud_bad().
	 */
	WARN_ON(!pud_bad(pud_mkhuge(pud)));
}
339

340
static void __init pud_advanced_tests(struct pgtable_debug_args *args)
341
{
342
	unsigned long vaddr = args->vaddr;
343
	pud_t pud;
344 345 346 347

	if (!has_transparent_hugepage())
		return;

348 349 350
	if (args->pud_pfn == ULONG_MAX)
		return;

351
	pr_debug("Validating PUD advanced\n");
352
	/* Align the address wrt HPAGE_PUD_SIZE */
353
	vaddr &= HPAGE_PUD_MASK;
354

355 356 357 358
	pud = pfn_pud(args->pud_pfn, args->page_prot);
	set_pud_at(args->mm, vaddr, args->pudp, pud);
	pudp_set_wrprotect(args->mm, vaddr, args->pudp);
	pud = READ_ONCE(*args->pudp);
359 360 361
	WARN_ON(pud_write(pud));

#ifndef __PAGETABLE_PMD_FOLDED
362 363
	pudp_huge_get_and_clear(args->mm, vaddr, args->pudp);
	pud = READ_ONCE(*args->pudp);
364 365
	WARN_ON(!pud_none(pud));
#endif /* __PAGETABLE_PMD_FOLDED */
366
	pud = pfn_pud(args->pud_pfn, args->page_prot);
367 368
	pud = pud_wrprotect(pud);
	pud = pud_mkclean(pud);
369
	set_pud_at(args->mm, vaddr, args->pudp, pud);
370 371
	pud = pud_mkwrite(pud);
	pud = pud_mkdirty(pud);
372 373
	pudp_set_access_flags(args->vma, vaddr, args->pudp, pud, 1);
	pud = READ_ONCE(*args->pudp);
374 375
	WARN_ON(!(pud_write(pud) && pud_dirty(pud)));

376
#ifndef __PAGETABLE_PMD_FOLDED
377 378
	pudp_huge_get_and_clear_full(args->mm, vaddr, args->pudp, 1);
	pud = READ_ONCE(*args->pudp);
379 380 381
	WARN_ON(!pud_none(pud));
#endif /* __PAGETABLE_PMD_FOLDED */

382
	pud = pfn_pud(args->pud_pfn, args->page_prot);
383
	pud = pud_mkyoung(pud);
384 385 386
	set_pud_at(args->mm, vaddr, args->pudp, pud);
	pudp_test_and_clear_young(args->vma, vaddr, args->pudp);
	pud = READ_ONCE(*args->pudp);
387
	WARN_ON(pud_young(pud));
388

389
	pudp_huge_get_and_clear(args->mm, vaddr, args->pudp);
390 391
}

392
static void __init pud_leaf_tests(struct pgtable_debug_args *args)
393
{
394 395 396 397
	pud_t pud;

	if (!has_transparent_hugepage())
		return;
398

399
	pr_debug("Validating PUD leaf\n");
400
	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
401 402 403 404 405 406
	/*
	 * PUD based THP is a leaf entry.
	 */
	pud = pud_mkhuge(pud);
	WARN_ON(!pud_leaf(pud));
}
407
#else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
408
static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { }
409
static void __init pud_advanced_tests(struct pgtable_debug_args *args) { }
410
static void __init pud_leaf_tests(struct pgtable_debug_args *args) { }
411 412
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
#else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
413 414
static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { }
static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { }
415
static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { }
416
static void __init pud_advanced_tests(struct pgtable_debug_args *args) { }
417 418 419
static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { }
static void __init pud_leaf_tests(struct pgtable_debug_args *args) { }
static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) { }
420 421 422
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
423
static void __init pmd_huge_tests(struct pgtable_debug_args *args)
424
{
425 426
	pmd_t pmd;

427
	if (!arch_vmap_pmd_supported(args->page_prot))
428 429 430 431 432 433 434
		return;

	pr_debug("Validating PMD huge\n");
	/*
	 * X86 defined pmd_set_huge() verifies that the given
	 * PMD is not a populated non-leaf entry.
	 */
435 436 437 438
	WRITE_ONCE(*args->pmdp, __pmd(0));
	WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), args->page_prot));
	WARN_ON(!pmd_clear_huge(args->pmdp));
	pmd = READ_ONCE(*args->pmdp);
439
	WARN_ON(!pmd_none(pmd));
440
}
441

442
static void __init pud_huge_tests(struct pgtable_debug_args *args)
443
{
444 445
	pud_t pud;

446
	if (!arch_vmap_pud_supported(args->page_prot))
447 448 449 450 451 452 453
		return;

	pr_debug("Validating PUD huge\n");
	/*
	 * X86 defined pud_set_huge() verifies that the given
	 * PUD is not a populated non-leaf entry.
	 */
454 455 456 457
	WRITE_ONCE(*args->pudp, __pud(0));
	WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), args->page_prot));
	WARN_ON(!pud_clear_huge(args->pudp));
	pud = READ_ONCE(*args->pudp);
458
	WARN_ON(!pud_none(pud));
459
}
460
#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
461
static void __init pmd_huge_tests(struct pgtable_debug_args *args) { }
462
static void __init pud_huge_tests(struct pgtable_debug_args *args) { }
463
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
464

465
static void __init p4d_basic_tests(struct pgtable_debug_args *args)
466 467 468
{
	p4d_t p4d;

469
	pr_debug("Validating P4D basic\n");
470 471 472 473
	memset(&p4d, RANDOM_NZVALUE, sizeof(p4d_t));
	WARN_ON(!p4d_same(p4d, p4d));
}

474
static void __init pgd_basic_tests(struct pgtable_debug_args *args)
475 476 477
{
	pgd_t pgd;

478
	pr_debug("Validating PGD basic\n");
479 480 481 482 483
	memset(&pgd, RANDOM_NZVALUE, sizeof(pgd_t));
	WARN_ON(!pgd_same(pgd, pgd));
}

#ifndef __PAGETABLE_PUD_FOLDED
484
static void __init pud_clear_tests(struct pgtable_debug_args *args)
485
{
486
	pud_t pud = READ_ONCE(*args->pudp);
487

488
	if (mm_pmd_folded(args->mm))
489 490
		return;

491
	pr_debug("Validating PUD clear\n");
492
	pud = __pud(pud_val(pud) | RANDOM_ORVALUE);
493 494 495
	WRITE_ONCE(*args->pudp, pud);
	pud_clear(args->pudp);
	pud = READ_ONCE(*args->pudp);
496 497 498
	WARN_ON(!pud_none(pud));
}

499
static void __init pud_populate_tests(struct pgtable_debug_args *args)
500 501 502
{
	pud_t pud;

503
	if (mm_pmd_folded(args->mm))
504
		return;
505 506

	pr_debug("Validating PUD populate\n");
507 508 509 510
	/*
	 * This entry points to next level page table page.
	 * Hence this must not qualify as pud_bad().
	 */
511 512
	pud_populate(args->mm, args->pudp, args->start_pmdp);
	pud = READ_ONCE(*args->pudp);
513 514 515
	WARN_ON(pud_bad(pud));
}
#else  /* !__PAGETABLE_PUD_FOLDED */
516 517
static void __init pud_clear_tests(struct pgtable_debug_args *args) { }
static void __init pud_populate_tests(struct pgtable_debug_args *args) { }
518 519 520
#endif /* PAGETABLE_PUD_FOLDED */

#ifndef __PAGETABLE_P4D_FOLDED
521
static void __init p4d_clear_tests(struct pgtable_debug_args *args)
522
{
523
	p4d_t p4d = READ_ONCE(*args->p4dp);
524

525
	if (mm_pud_folded(args->mm))
526 527
		return;

528
	pr_debug("Validating P4D clear\n");
529
	p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE);
530 531 532
	WRITE_ONCE(*args->p4dp, p4d);
	p4d_clear(args->p4dp);
	p4d = READ_ONCE(*args->p4dp);
533 534 535
	WARN_ON(!p4d_none(p4d));
}

536
static void __init p4d_populate_tests(struct pgtable_debug_args *args)
537 538 539
{
	p4d_t p4d;

540
	if (mm_pud_folded(args->mm))
541 542
		return;

543
	pr_debug("Validating P4D populate\n");
544 545 546 547
	/*
	 * This entry points to next level page table page.
	 * Hence this must not qualify as p4d_bad().
	 */
548 549 550 551
	pud_clear(args->pudp);
	p4d_clear(args->p4dp);
	p4d_populate(args->mm, args->p4dp, args->start_pudp);
	p4d = READ_ONCE(*args->p4dp);
552 553 554
	WARN_ON(p4d_bad(p4d));
}

555
static void __init pgd_clear_tests(struct pgtable_debug_args *args)
556
{
557
	pgd_t pgd = READ_ONCE(*(args->pgdp));
558

559
	if (mm_p4d_folded(args->mm))
560 561
		return;

562
	pr_debug("Validating PGD clear\n");
563
	pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE);
564 565 566
	WRITE_ONCE(*args->pgdp, pgd);
	pgd_clear(args->pgdp);
	pgd = READ_ONCE(*args->pgdp);
567 568 569
	WARN_ON(!pgd_none(pgd));
}

570
static void __init pgd_populate_tests(struct pgtable_debug_args *args)
571 572 573
{
	pgd_t pgd;

574
	if (mm_p4d_folded(args->mm))
575 576
		return;

577
	pr_debug("Validating PGD populate\n");
578 579 580 581
	/*
	 * This entry points to next level page table page.
	 * Hence this must not qualify as pgd_bad().
	 */
582 583 584 585
	p4d_clear(args->p4dp);
	pgd_clear(args->pgdp);
	pgd_populate(args->mm, args->pgdp, args->start_p4dp);
	pgd = READ_ONCE(*args->pgdp);
586 587 588
	WARN_ON(pgd_bad(pgd));
}
#else  /* !__PAGETABLE_P4D_FOLDED */
589 590 591 592
static void __init p4d_clear_tests(struct pgtable_debug_args *args) { }
static void __init pgd_clear_tests(struct pgtable_debug_args *args) { }
static void __init p4d_populate_tests(struct pgtable_debug_args *args) { }
static void __init pgd_populate_tests(struct pgtable_debug_args *args) { }
593 594
#endif /* PAGETABLE_P4D_FOLDED */

595
static void __init pte_clear_tests(struct pgtable_debug_args *args)
596
{
597 598 599 600
	pte_t pte = pfn_pte(args->pte_pfn, args->page_prot);

	if (args->pte_pfn == ULONG_MAX)
		return;
601

602
	pr_debug("Validating PTE clear\n");
603
#ifndef CONFIG_RISCV
604
	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
605
#endif
606
	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
607
	barrier();
608 609
	pte_clear(args->mm, args->vaddr, args->ptep);
	pte = ptep_get(args->ptep);
610 611 612
	WARN_ON(!pte_none(pte));
}

613
static void __init pmd_clear_tests(struct pgtable_debug_args *args)
614
{
615
	pmd_t pmd = READ_ONCE(*args->pmdp);
616

617
	pr_debug("Validating PMD clear\n");
618
	pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE);
619 620 621
	WRITE_ONCE(*args->pmdp, pmd);
	pmd_clear(args->pmdp);
	pmd = READ_ONCE(*args->pmdp);
622 623 624
	WARN_ON(!pmd_none(pmd));
}

625
static void __init pmd_populate_tests(struct pgtable_debug_args *args)
626 627 628
{
	pmd_t pmd;

629
	pr_debug("Validating PMD populate\n");
630 631 632 633
	/*
	 * This entry points to next level page table page.
	 * Hence this must not qualify as pmd_bad().
	 */
634 635
	pmd_populate(args->mm, args->pmdp, args->start_ptep);
	pmd = READ_ONCE(*args->pmdp);
636 637 638
	WARN_ON(pmd_bad(pmd));
}

639
static void __init pte_special_tests(struct pgtable_debug_args *args)
640
{
641
	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
642 643 644 645

	if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL))
		return;

646
	pr_debug("Validating PTE special\n");
647 648 649
	WARN_ON(!pte_special(pte_mkspecial(pte)));
}

650
static void __init pte_protnone_tests(struct pgtable_debug_args *args)
651
{
652
	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none);
653 654 655 656

	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
		return;

657
	pr_debug("Validating PTE protnone\n");
658 659 660 661 662
	WARN_ON(!pte_protnone(pte));
	WARN_ON(!pte_present(pte));
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
663
static void __init pmd_protnone_tests(struct pgtable_debug_args *args)
664
{
665
	pmd_t pmd;
666 667 668 669

	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
		return;

670 671 672
	if (!has_transparent_hugepage())
		return;

673
	pr_debug("Validating PMD protnone\n");
674
	pmd = pmd_mkhuge(pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none));
675 676 677 678
	WARN_ON(!pmd_protnone(pmd));
	WARN_ON(!pmd_present(pmd));
}
#else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
679
static void __init pmd_protnone_tests(struct pgtable_debug_args *args) { }
680 681 682
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
683
static void __init pte_devmap_tests(struct pgtable_debug_args *args)
684
{
685
	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
686

687
	pr_debug("Validating PTE devmap\n");
688 689 690 691
	WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
692
static void __init pmd_devmap_tests(struct pgtable_debug_args *args)
693
{
694 695 696 697
	pmd_t pmd;

	if (!has_transparent_hugepage())
		return;
698

699
	pr_debug("Validating PMD devmap\n");
700
	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
701 702 703 704
	WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd)));
}

#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
705
static void __init pud_devmap_tests(struct pgtable_debug_args *args)
706
{
707 708 709 710
	pud_t pud;

	if (!has_transparent_hugepage())
		return;
711

712
	pr_debug("Validating PUD devmap\n");
713
	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
714 715 716
	WARN_ON(!pud_devmap(pud_mkdevmap(pud)));
}
#else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
717
static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
718 719
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
#else  /* CONFIG_TRANSPARENT_HUGEPAGE */
720 721
static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { }
static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
722 723
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#else
724 725 726
static void __init pte_devmap_tests(struct pgtable_debug_args *args) { }
static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { }
static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
727 728
#endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */

729
static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
730
{
731
	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
732 733 734 735

	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
		return;

736
	pr_debug("Validating PTE soft dirty\n");
737 738 739 740
	WARN_ON(!pte_soft_dirty(pte_mksoft_dirty(pte)));
	WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte)));
}

741
static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
742
{
743
	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
744 745 746 747

	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
		return;

748
	pr_debug("Validating PTE swap soft dirty\n");
749 750 751 752 753
	WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte)));
	WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte)));
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
754
static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
755
{
756
	pmd_t pmd;
757 758 759 760

	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
		return;

761 762 763
	if (!has_transparent_hugepage())
		return;

764
	pr_debug("Validating PMD soft dirty\n");
765
	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
766 767 768 769
	WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd)));
	WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd)));
}

770
static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
771
{
772
	pmd_t pmd;
773 774 775 776 777

	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
		!IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
		return;

778 779 780
	if (!has_transparent_hugepage())
		return;

781
	pr_debug("Validating PMD swap soft dirty\n");
782
	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
783 784 785
	WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd)));
	WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd)));
}
786
#else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
787 788
static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { }
static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { }
789
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
790

791
static void __init pte_swap_tests(struct pgtable_debug_args *args)
792 793 794 795
{
	swp_entry_t swp;
	pte_t pte;

796
	pr_debug("Validating PTE swap\n");
797
	pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
798 799
	swp = __pte_to_swp_entry(pte);
	pte = __swp_entry_to_pte(swp);
800
	WARN_ON(args->fixed_pte_pfn != pte_pfn(pte));
801 802 803
}

#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
804
static void __init pmd_swap_tests(struct pgtable_debug_args *args)
805 806 807 808
{
	swp_entry_t swp;
	pmd_t pmd;

809 810 811
	if (!has_transparent_hugepage())
		return;

812
	pr_debug("Validating PMD swap\n");
813
	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
814 815
	swp = __pmd_to_swp_entry(pmd);
	pmd = __swp_entry_to_pmd(swp);
816
	WARN_ON(args->fixed_pmd_pfn != pmd_pfn(pmd));
817 818
}
#else  /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */
819
static void __init pmd_swap_tests(struct pgtable_debug_args *args) { }
820 821
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */

822
static void __init swap_migration_tests(struct pgtable_debug_args *args)
823 824 825 826 827 828
{
	struct page *page;
	swp_entry_t swp;

	if (!IS_ENABLED(CONFIG_MIGRATION))
		return;
829

830 831 832 833
	/*
	 * swap_migration_tests() requires a dedicated page as it needs to
	 * be locked before creating a migration entry from it. Locking the
	 * page that actually maps kernel text ('start_kernel') can be real
834 835
	 * problematic. Lets use the allocated page explicitly for this
	 * purpose.
836
	 */
837 838
	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
	if (!page)
839
		return;
840 841

	pr_debug("Validating swap migration\n");
842 843 844 845 846 847

	/*
	 * make_migration_entry() expects given page to be
	 * locked, otherwise it stumbles upon a BUG_ON().
	 */
	__SetPageLocked(page);
848
	swp = make_writable_migration_entry(page_to_pfn(page));
849
	WARN_ON(!is_migration_entry(swp));
850
	WARN_ON(!is_writable_migration_entry(swp));
851

852
	swp = make_readable_migration_entry(swp_offset(swp));
853
	WARN_ON(!is_migration_entry(swp));
854
	WARN_ON(is_writable_migration_entry(swp));
855

856
	swp = make_readable_migration_entry(page_to_pfn(page));
857
	WARN_ON(!is_migration_entry(swp));
858
	WARN_ON(is_writable_migration_entry(swp));
859 860 861 862
	__ClearPageLocked(page);
}

#ifdef CONFIG_HUGETLB_PAGE
863
static void __init hugetlb_basic_tests(struct pgtable_debug_args *args)
864 865 866 867
{
	struct page *page;
	pte_t pte;

868
	pr_debug("Validating HugeTLB basic\n");
869 870 871 872
	/*
	 * Accessing the page associated with the pfn is safe here,
	 * as it was previously derived from a real kernel symbol.
	 */
873 874
	page = pfn_to_page(args->fixed_pmd_pfn);
	pte = mk_huge_pte(page, args->page_prot);
875 876 877 878 879 880

	WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte)));
	WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte))));
	WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte))));

#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
881
	pte = pfn_pte(args->fixed_pmd_pfn, args->page_prot);
882 883 884 885 886

	WARN_ON(!pte_huge(pte_mkhuge(pte)));
#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
}
#else  /* !CONFIG_HUGETLB_PAGE */
887
static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) { }
888 889 890
#endif /* CONFIG_HUGETLB_PAGE */

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
891
static void __init pmd_thp_tests(struct pgtable_debug_args *args)
892 893 894 895 896 897
{
	pmd_t pmd;

	if (!has_transparent_hugepage())
		return;

898
	pr_debug("Validating PMD based THP\n");
899 900 901 902 903 904 905 906 907 908 909
	/*
	 * pmd_trans_huge() and pmd_present() must return positive after
	 * MMU invalidation with pmd_mkinvalid(). This behavior is an
	 * optimization for transparent huge page. pmd_trans_huge() must
	 * be true if pmd_page() returns a valid THP to avoid taking the
	 * pmd_lock when others walk over non transhuge pmds (i.e. there
	 * are no THP allocated). Especially when splitting a THP and
	 * removing the present bit from the pmd, pmd_trans_huge() still
	 * needs to return true. pmd_present() should be true whenever
	 * pmd_trans_huge() returns true.
	 */
910
	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
911 912 913 914 915 916 917 918 919
	WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd)));

#ifndef __HAVE_ARCH_PMDP_INVALIDATE
	WARN_ON(!pmd_trans_huge(pmd_mkinvalid(pmd_mkhuge(pmd))));
	WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd))));
#endif /* __HAVE_ARCH_PMDP_INVALIDATE */
}

#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
920
static void __init pud_thp_tests(struct pgtable_debug_args *args)
921 922 923 924 925 926
{
	pud_t pud;

	if (!has_transparent_hugepage())
		return;

927
	pr_debug("Validating PUD based THP\n");
928
	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
929 930 931 932 933 934 935 936 937 938 939
	WARN_ON(!pud_trans_huge(pud_mkhuge(pud)));

	/*
	 * pud_mkinvalid() has been dropped for now. Enable back
	 * these tests when it comes back with a modified pud_present().
	 *
	 * WARN_ON(!pud_trans_huge(pud_mkinvalid(pud_mkhuge(pud))));
	 * WARN_ON(!pud_present(pud_mkinvalid(pud_mkhuge(pud))));
	 */
}
#else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
940
static void __init pud_thp_tests(struct pgtable_debug_args *args) { }
941 942
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
#else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
943 944
static void __init pmd_thp_tests(struct pgtable_debug_args *args) { }
static void __init pud_thp_tests(struct pgtable_debug_args *args) { }
945 946
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

947 948 949 950 951 952 953 954 955 956 957 958
static unsigned long __init get_random_vaddr(void)
{
	unsigned long random_vaddr, random_pages, total_user_pages;

	total_user_pages = (TASK_SIZE - FIRST_USER_ADDRESS) / PAGE_SIZE;

	random_pages = get_random_long() % total_user_pages;
	random_vaddr = FIRST_USER_ADDRESS + random_pages * PAGE_SIZE;

	return random_vaddr;
}

959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188
static void __init destroy_args(struct pgtable_debug_args *args)
{
	struct page *page = NULL;

	/* Free (huge) page */
	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
	    IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
	    has_transparent_hugepage() &&
	    args->pud_pfn != ULONG_MAX) {
		if (args->is_contiguous_page) {
			free_contig_range(args->pud_pfn,
					  (1 << (HPAGE_PUD_SHIFT - PAGE_SHIFT)));
		} else {
			page = pfn_to_page(args->pud_pfn);
			__free_pages(page, HPAGE_PUD_SHIFT - PAGE_SHIFT);
		}

		args->pud_pfn = ULONG_MAX;
		args->pmd_pfn = ULONG_MAX;
		args->pte_pfn = ULONG_MAX;
	}

	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
	    has_transparent_hugepage() &&
	    args->pmd_pfn != ULONG_MAX) {
		if (args->is_contiguous_page) {
			free_contig_range(args->pmd_pfn, (1 << HPAGE_PMD_ORDER));
		} else {
			page = pfn_to_page(args->pmd_pfn);
			__free_pages(page, HPAGE_PMD_ORDER);
		}

		args->pmd_pfn = ULONG_MAX;
		args->pte_pfn = ULONG_MAX;
	}

	if (args->pte_pfn != ULONG_MAX) {
		page = pfn_to_page(args->pte_pfn);
		__free_pages(page, 0);

		args->pte_pfn = ULONG_MAX;
	}

	/* Free page table entries */
	if (args->start_ptep) {
		pte_free(args->mm, args->start_ptep);
		mm_dec_nr_ptes(args->mm);
	}

	if (args->start_pmdp) {
		pmd_free(args->mm, args->start_pmdp);
		mm_dec_nr_pmds(args->mm);
	}

	if (args->start_pudp) {
		pud_free(args->mm, args->start_pudp);
		mm_dec_nr_puds(args->mm);
	}

	if (args->start_p4dp)
		p4d_free(args->mm, args->start_p4dp);

	/* Free vma and mm struct */
	if (args->vma)
		vm_area_free(args->vma);

	if (args->mm)
		mmdrop(args->mm);
}

static struct page * __init
debug_vm_pgtable_alloc_huge_page(struct pgtable_debug_args *args, int order)
{
	struct page *page = NULL;

#ifdef CONFIG_CONTIG_ALLOC
	if (order >= MAX_ORDER) {
		page = alloc_contig_pages((1 << order), GFP_KERNEL,
					  first_online_node, NULL);
		if (page) {
			args->is_contiguous_page = true;
			return page;
		}
	}
#endif

	if (order < MAX_ORDER)
		page = alloc_pages(GFP_KERNEL, order);

	return page;
}

static int __init init_args(struct pgtable_debug_args *args)
{
	struct page *page = NULL;
	phys_addr_t phys;
	int ret = 0;

	/*
	 * Initialize the debugging data.
	 *
	 * __P000 (or even __S000) will help create page table entries with
	 * PROT_NONE permission as required for pxx_protnone_tests().
	 */
	memset(args, 0, sizeof(*args));
	args->vaddr              = get_random_vaddr();
	args->page_prot          = vm_get_page_prot(VMFLAGS);
	args->page_prot_none     = __P000;
	args->is_contiguous_page = false;
	args->pud_pfn            = ULONG_MAX;
	args->pmd_pfn            = ULONG_MAX;
	args->pte_pfn            = ULONG_MAX;
	args->fixed_pgd_pfn      = ULONG_MAX;
	args->fixed_p4d_pfn      = ULONG_MAX;
	args->fixed_pud_pfn      = ULONG_MAX;
	args->fixed_pmd_pfn      = ULONG_MAX;
	args->fixed_pte_pfn      = ULONG_MAX;

	/* Allocate mm and vma */
	args->mm = mm_alloc();
	if (!args->mm) {
		pr_err("Failed to allocate mm struct\n");
		ret = -ENOMEM;
		goto error;
	}

	args->vma = vm_area_alloc(args->mm);
	if (!args->vma) {
		pr_err("Failed to allocate vma\n");
		ret = -ENOMEM;
		goto error;
	}

	/*
	 * Allocate page table entries. They will be modified in the tests.
	 * Lets save the page table entries so that they can be released
	 * when the tests are completed.
	 */
	args->pgdp = pgd_offset(args->mm, args->vaddr);
	args->p4dp = p4d_alloc(args->mm, args->pgdp, args->vaddr);
	if (!args->p4dp) {
		pr_err("Failed to allocate p4d entries\n");
		ret = -ENOMEM;
		goto error;
	}
	args->start_p4dp = p4d_offset(args->pgdp, 0UL);
	WARN_ON(!args->start_p4dp);

	args->pudp = pud_alloc(args->mm, args->p4dp, args->vaddr);
	if (!args->pudp) {
		pr_err("Failed to allocate pud entries\n");
		ret = -ENOMEM;
		goto error;
	}
	args->start_pudp = pud_offset(args->p4dp, 0UL);
	WARN_ON(!args->start_pudp);

	args->pmdp = pmd_alloc(args->mm, args->pudp, args->vaddr);
	if (!args->pmdp) {
		pr_err("Failed to allocate pmd entries\n");
		ret = -ENOMEM;
		goto error;
	}
	args->start_pmdp = pmd_offset(args->pudp, 0UL);
	WARN_ON(!args->start_pmdp);

	if (pte_alloc(args->mm, args->pmdp)) {
		pr_err("Failed to allocate pte entries\n");
		ret = -ENOMEM;
		goto error;
	}
	args->start_ptep = pmd_pgtable(READ_ONCE(*args->pmdp));
	WARN_ON(!args->start_ptep);

	/*
	 * PFN for mapping at PTE level is determined from a standard kernel
	 * text symbol. But pfns for higher page table levels are derived by
	 * masking lower bits of this real pfn. These derived pfns might not
	 * exist on the platform but that does not really matter as pfn_pxx()
	 * helpers will still create appropriate entries for the test. This
	 * helps avoid large memory block allocations to be used for mapping
	 * at higher page table levels in some of the tests.
	 */
	phys = __pa_symbol(&start_kernel);
	args->fixed_pgd_pfn = __phys_to_pfn(phys & PGDIR_MASK);
	args->fixed_p4d_pfn = __phys_to_pfn(phys & P4D_MASK);
	args->fixed_pud_pfn = __phys_to_pfn(phys & PUD_MASK);
	args->fixed_pmd_pfn = __phys_to_pfn(phys & PMD_MASK);
	args->fixed_pte_pfn = __phys_to_pfn(phys & PAGE_MASK);
	WARN_ON(!pfn_valid(args->fixed_pte_pfn));

	/*
	 * Allocate (huge) pages because some of the tests need to access
	 * the data in the pages. The corresponding tests will be skipped
	 * if we fail to allocate (huge) pages.
	 */
	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
	    IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
	    has_transparent_hugepage()) {
		page = debug_vm_pgtable_alloc_huge_page(args,
				HPAGE_PUD_SHIFT - PAGE_SHIFT);
		if (page) {
			args->pud_pfn = page_to_pfn(page);
			args->pmd_pfn = args->pud_pfn;
			args->pte_pfn = args->pud_pfn;
			return 0;
		}
	}

	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
	    has_transparent_hugepage()) {
		page = debug_vm_pgtable_alloc_huge_page(args, HPAGE_PMD_ORDER);
		if (page) {
			args->pmd_pfn = page_to_pfn(page);
			args->pte_pfn = args->pmd_pfn;
			return 0;
		}
	}

	page = alloc_pages(GFP_KERNEL, 0);
	if (page)
		args->pte_pfn = page_to_pfn(page);

	return 0;

error:
	destroy_args(args);
	return ret;
}

1189 1190
static int __init debug_vm_pgtable(void)
{
1191
	struct pgtable_debug_args args;
1192
	struct vm_area_struct *vma;
1193 1194
	struct mm_struct *mm;
	pgd_t *pgdp;
1195 1196
	p4d_t *p4dp;
	pud_t *pudp;
1197 1198
	pmd_t *pmdp, *saved_pmdp, pmd;
	pgtable_t saved_ptep;
1199
	unsigned long vaddr;
1200
	spinlock_t *ptl = NULL;
1201
	int idx, ret;
1202 1203

	pr_info("Validating architecture page table helpers\n");
1204 1205 1206 1207
	ret = init_args(&args);
	if (ret)
		return ret;

1208 1209 1210 1211 1212 1213 1214
	vaddr = get_random_vaddr();
	mm = mm_alloc();
	if (!mm) {
		pr_err("mm_struct allocation failed\n");
		return 1;
	}

1215 1216 1217 1218 1219 1220
	vma = vm_area_alloc(mm);
	if (!vma) {
		pr_err("vma allocation failed\n");
		return 1;
	}

1221 1222 1223 1224
	pgdp = pgd_offset(mm, vaddr);
	p4dp = p4d_alloc(mm, pgdp, vaddr);
	pudp = pud_alloc(mm, p4dp, vaddr);
	pmdp = pmd_alloc(mm, pudp, vaddr);
1225 1226 1227 1228 1229 1230 1231
	/*
	 * Allocate pgtable_t
	 */
	if (pte_alloc(mm, pmdp)) {
		pr_err("pgtable allocation failed\n");
		return 1;
	}
1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242

	/*
	 * Save all the page table page addresses as the page table
	 * entries will be used for testing with random or garbage
	 * values. These saved addresses will be used for freeing
	 * page table pages.
	 */
	pmd = READ_ONCE(*pmdp);
	saved_pmdp = pmd_offset(pudp, 0UL);
	saved_ptep = pmd_pgtable(pmd);

1243 1244 1245 1246 1247 1248 1249
	/*
	 * Iterate over the protection_map[] to make sure that all
	 * the basic page table transformation validations just hold
	 * true irrespective of the starting protection value for a
	 * given page table entry.
	 */
	for (idx = 0; idx < ARRAY_SIZE(protection_map); idx++) {
1250 1251 1252
		pte_basic_tests(&args, idx);
		pmd_basic_tests(&args, idx);
		pud_basic_tests(&args, idx);
1253 1254 1255 1256 1257 1258 1259 1260 1261
	}

	/*
	 * Both P4D and PGD level tests are very basic which do not
	 * involve creating page table entries from the protection
	 * value and the given pfn. Hence just keep them out from
	 * the above iteration for now to save some test execution
	 * time.
	 */
1262 1263
	p4d_basic_tests(&args);
	pgd_basic_tests(&args);
1264

1265 1266
	pmd_leaf_tests(&args);
	pud_leaf_tests(&args);
1267

1268 1269
	pte_savedwrite_tests(&args);
	pmd_savedwrite_tests(&args);
1270

1271 1272 1273
	pte_special_tests(&args);
	pte_protnone_tests(&args);
	pmd_protnone_tests(&args);
1274

1275 1276 1277
	pte_devmap_tests(&args);
	pmd_devmap_tests(&args);
	pud_devmap_tests(&args);
1278

1279 1280 1281 1282
	pte_soft_dirty_tests(&args);
	pmd_soft_dirty_tests(&args);
	pte_swap_soft_dirty_tests(&args);
	pmd_swap_soft_dirty_tests(&args);
1283

1284 1285
	pte_swap_tests(&args);
	pmd_swap_tests(&args);
1286

1287
	swap_migration_tests(&args);
1288

1289 1290
	pmd_thp_tests(&args);
	pud_thp_tests(&args);
1291

1292
	hugetlb_basic_tests(&args);
1293

1294 1295 1296 1297
	/*
	 * Page table modifying tests. They need to hold
	 * proper page table lock.
	 */
1298

1299 1300 1301 1302
	args.ptep = pte_offset_map_lock(args.mm, args.pmdp, args.vaddr, &ptl);
	pte_clear_tests(&args);
	pte_advanced_tests(&args);
	pte_unmap_unlock(args.ptep, ptl);
1303

1304 1305 1306 1307 1308
	ptl = pmd_lock(args.mm, args.pmdp);
	pmd_clear_tests(&args);
	pmd_advanced_tests(&args);
	pmd_huge_tests(&args);
	pmd_populate_tests(&args);
1309 1310
	spin_unlock(ptl);

1311 1312 1313 1314 1315
	ptl = pud_lock(args.mm, args.pudp);
	pud_clear_tests(&args);
	pud_advanced_tests(&args);
	pud_huge_tests(&args);
	pud_populate_tests(&args);
1316
	spin_unlock(ptl);
1317

1318 1319 1320 1321 1322 1323
	spin_lock(&(args.mm->page_table_lock));
	p4d_clear_tests(&args);
	pgd_clear_tests(&args);
	p4d_populate_tests(&args);
	pgd_populate_tests(&args);
	spin_unlock(&(args.mm->page_table_lock));
1324

1325 1326
	p4d_free(mm, p4d_offset(pgdp, 0UL));
	pud_free(mm, pud_offset(p4dp, 0UL));
1327 1328 1329
	pmd_free(mm, saved_pmdp);
	pte_free(mm, saved_ptep);

1330
	vm_area_free(vma);
1331 1332 1333 1334
	mm_dec_nr_puds(mm);
	mm_dec_nr_pmds(mm);
	mm_dec_nr_ptes(mm);
	mmdrop(mm);
1335 1336

	destroy_args(&args);
1337 1338 1339
	return 0;
}
late_initcall(debug_vm_pgtable);