debug_vm_pgtable.c 28.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
// SPDX-License-Identifier: GPL-2.0-only
/*
 * This kernel test validates architecture page table helpers and
 * accessors and helps in verifying their continued compliance with
 * expected generic MM semantics.
 *
 * Copyright (C) 2019 ARM Ltd.
 *
 * Author: Anshuman Khandual <anshuman.khandual@arm.com>
 */
11
#define pr_fmt(fmt) "debug_vm_pgtable: [%-25s]: " fmt, __func__
12 13 14 15 16 17 18 19 20 21 22 23

#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
#include <linux/kernel.h>
#include <linux/kconfig.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/mm_types.h>
#include <linux/module.h>
#include <linux/pfn_t.h>
#include <linux/printk.h>
24
#include <linux/pgtable.h>
25 26 27 28 29 30
#include <linux/random.h>
#include <linux/spinlock.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/start_kernel.h>
#include <linux/sched/mm.h>
31
#include <linux/io.h>
32
#include <asm/pgalloc.h>
33
#include <asm/tlbflush.h>
34

35 36 37 38 39 40
/*
 * Please refer Documentation/vm/arch_pgtable_helpers.rst for the semantics
 * expectations that are being validated here. All future changes in here
 * or the documentation need to be in sync.
 */

41 42 43 44 45 46 47
#define VMFLAGS	(VM_READ|VM_WRITE|VM_EXEC)

/*
 * On s390 platform, the lower 4 bits are used to identify given page table
 * entry type. But these bits might affect the ability to clear entries with
 * pxx_clear() because of how dynamic page table folding works on s390. So
 * while loading up the entries do not change the lower 4 bits. It does not
48 49
 * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
 * used to mark a pte entry.
50
 */
51 52 53 54 55 56 57 58
#define S390_SKIP_MASK		GENMASK(3, 0)
#if __BITS_PER_LONG == 64
#define PPC64_SKIP_MASK		GENMASK(62, 62)
#else
#define PPC64_SKIP_MASK		0x0
#endif
#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
59 60 61 62 63 64
#define RANDOM_NZVALUE	GENMASK(7, 0)

static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
{
	pte_t pte = pfn_pte(pfn, prot);

65
	pr_debug("Validating PTE basic\n");
66 67 68 69 70 71 72 73 74
	WARN_ON(!pte_same(pte, pte));
	WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte))));
	WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte))));
	WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte))));
	WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte))));
	WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte))));
	WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte))));
}

75 76 77 78 79 80 81
static void __init pte_advanced_tests(struct mm_struct *mm,
				      struct vm_area_struct *vma, pte_t *ptep,
				      unsigned long pfn, unsigned long vaddr,
				      pgprot_t prot)
{
	pte_t pte = pfn_pte(pfn, prot);

82 83 84 85 86 87
	/*
	 * Architectures optimize set_pte_at by avoiding TLB flush.
	 * This requires set_pte_at to be not used to update an
	 * existing pte entry. Clear pte before we do set_pte_at
	 */

88
	pr_debug("Validating PTE advanced\n");
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
	pte = pfn_pte(pfn, prot);
	set_pte_at(mm, vaddr, ptep, pte);
	ptep_set_wrprotect(mm, vaddr, ptep);
	pte = ptep_get(ptep);
	WARN_ON(pte_write(pte));
	ptep_get_and_clear(mm, vaddr, ptep);
	pte = ptep_get(ptep);
	WARN_ON(!pte_none(pte));

	pte = pfn_pte(pfn, prot);
	pte = pte_wrprotect(pte);
	pte = pte_mkclean(pte);
	set_pte_at(mm, vaddr, ptep, pte);
	pte = pte_mkwrite(pte);
	pte = pte_mkdirty(pte);
	ptep_set_access_flags(vma, vaddr, ptep, pte, 1);
	pte = ptep_get(ptep);
	WARN_ON(!(pte_write(pte) && pte_dirty(pte)));
	ptep_get_and_clear_full(mm, vaddr, ptep, 1);
	pte = ptep_get(ptep);
	WARN_ON(!pte_none(pte));

111
	pte = pfn_pte(pfn, prot);
112 113 114 115 116 117 118 119 120 121 122
	pte = pte_mkyoung(pte);
	set_pte_at(mm, vaddr, ptep, pte);
	ptep_test_and_clear_young(vma, vaddr, ptep);
	pte = ptep_get(ptep);
	WARN_ON(pte_young(pte));
}

static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
{
	pte_t pte = pfn_pte(pfn, prot);

123 124 125
	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
		return;

126
	pr_debug("Validating PTE saved write\n");
127 128 129
	WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
	WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte))));
}
130

131 132 133 134 135
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
{
	pmd_t pmd = pfn_pmd(pfn, prot);

136 137 138
	if (!has_transparent_hugepage())
		return;

139
	pr_debug("Validating PMD basic\n");
140 141 142 143 144 145 146 147 148 149 150 151 152 153
	WARN_ON(!pmd_same(pmd, pmd));
	WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
	WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
	WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd))));
	WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd))));
	WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd))));
	WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd))));
	/*
	 * A huge page does not point to next level page table
	 * entry. Hence this must qualify as pmd_bad().
	 */
	WARN_ON(!pmd_bad(pmd_mkhuge(pmd)));
}

154 155 156
static void __init pmd_advanced_tests(struct mm_struct *mm,
				      struct vm_area_struct *vma, pmd_t *pmdp,
				      unsigned long pfn, unsigned long vaddr,
157
				      pgprot_t prot, pgtable_t pgtable)
158 159 160 161 162 163
{
	pmd_t pmd = pfn_pmd(pfn, prot);

	if (!has_transparent_hugepage())
		return;

164
	pr_debug("Validating PMD advanced\n");
165 166 167
	/* Align the address wrt HPAGE_PMD_SIZE */
	vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE;

168 169
	pgtable_trans_huge_deposit(mm, pmdp, pgtable);

170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
	pmd = pfn_pmd(pfn, prot);
	set_pmd_at(mm, vaddr, pmdp, pmd);
	pmdp_set_wrprotect(mm, vaddr, pmdp);
	pmd = READ_ONCE(*pmdp);
	WARN_ON(pmd_write(pmd));
	pmdp_huge_get_and_clear(mm, vaddr, pmdp);
	pmd = READ_ONCE(*pmdp);
	WARN_ON(!pmd_none(pmd));

	pmd = pfn_pmd(pfn, prot);
	pmd = pmd_wrprotect(pmd);
	pmd = pmd_mkclean(pmd);
	set_pmd_at(mm, vaddr, pmdp, pmd);
	pmd = pmd_mkwrite(pmd);
	pmd = pmd_mkdirty(pmd);
	pmdp_set_access_flags(vma, vaddr, pmdp, pmd, 1);
	pmd = READ_ONCE(*pmdp);
	WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd)));
	pmdp_huge_get_and_clear_full(vma, vaddr, pmdp, 1);
	pmd = READ_ONCE(*pmdp);
	WARN_ON(!pmd_none(pmd));

192
	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
193 194 195 196 197
	pmd = pmd_mkyoung(pmd);
	set_pmd_at(mm, vaddr, pmdp, pmd);
	pmdp_test_and_clear_young(vma, vaddr, pmdp);
	pmd = READ_ONCE(*pmdp);
	WARN_ON(pmd_young(pmd));
198

199 200
	/*  Clear the pte entries  */
	pmdp_huge_get_and_clear(mm, vaddr, pmdp);
201
	pgtable = pgtable_trans_huge_withdraw(mm, pmdp);
202 203 204 205 206 207
}

static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
{
	pmd_t pmd = pfn_pmd(pfn, prot);

208
	pr_debug("Validating PMD leaf\n");
209 210 211 212 213 214 215
	/*
	 * PMD based THP is a leaf entry.
	 */
	pmd = pmd_mkhuge(pmd);
	WARN_ON(!pmd_leaf(pmd));
}

216
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
217 218 219 220
static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
{
	pmd_t pmd;

221
	if (!arch_ioremap_pmd_supported())
222
		return;
223 224

	pr_debug("Validating PMD huge\n");
225 226 227 228 229 230 231 232 233 234
	/*
	 * X86 defined pmd_set_huge() verifies that the given
	 * PMD is not a populated non-leaf entry.
	 */
	WRITE_ONCE(*pmdp, __pmd(0));
	WARN_ON(!pmd_set_huge(pmdp, __pfn_to_phys(pfn), prot));
	WARN_ON(!pmd_clear_huge(pmdp));
	pmd = READ_ONCE(*pmdp);
	WARN_ON(!pmd_none(pmd));
}
235 236 237
#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) { }
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
238 239 240 241 242

static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
{
	pmd_t pmd = pfn_pmd(pfn, prot);

243 244 245
	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
		return;

246
	pr_debug("Validating PMD saved write\n");
247 248 249 250
	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
	WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
}

251 252 253 254 255
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot)
{
	pud_t pud = pfn_pud(pfn, prot);

256 257 258
	if (!has_transparent_hugepage())
		return;

259
	pr_debug("Validating PUD basic\n");
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
	WARN_ON(!pud_same(pud, pud));
	WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud))));
	WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud))));
	WARN_ON(pud_write(pud_wrprotect(pud_mkwrite(pud))));
	WARN_ON(pud_young(pud_mkold(pud_mkyoung(pud))));

	if (mm_pmd_folded(mm))
		return;

	/*
	 * A huge page does not point to next level page table
	 * entry. Hence this must qualify as pud_bad().
	 */
	WARN_ON(!pud_bad(pud_mkhuge(pud)));
}
275 276 277 278 279 280 281 282 283 284 285

static void __init pud_advanced_tests(struct mm_struct *mm,
				      struct vm_area_struct *vma, pud_t *pudp,
				      unsigned long pfn, unsigned long vaddr,
				      pgprot_t prot)
{
	pud_t pud = pfn_pud(pfn, prot);

	if (!has_transparent_hugepage())
		return;

286
	pr_debug("Validating PUD advanced\n");
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
	/* Align the address wrt HPAGE_PUD_SIZE */
	vaddr = (vaddr & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE;

	set_pud_at(mm, vaddr, pudp, pud);
	pudp_set_wrprotect(mm, vaddr, pudp);
	pud = READ_ONCE(*pudp);
	WARN_ON(pud_write(pud));

#ifndef __PAGETABLE_PMD_FOLDED
	pudp_huge_get_and_clear(mm, vaddr, pudp);
	pud = READ_ONCE(*pudp);
	WARN_ON(!pud_none(pud));
#endif /* __PAGETABLE_PMD_FOLDED */
	pud = pfn_pud(pfn, prot);
	pud = pud_wrprotect(pud);
	pud = pud_mkclean(pud);
	set_pud_at(mm, vaddr, pudp, pud);
	pud = pud_mkwrite(pud);
	pud = pud_mkdirty(pud);
	pudp_set_access_flags(vma, vaddr, pudp, pud, 1);
	pud = READ_ONCE(*pudp);
	WARN_ON(!(pud_write(pud) && pud_dirty(pud)));

310 311 312 313 314 315 316
#ifndef __PAGETABLE_PMD_FOLDED
	pudp_huge_get_and_clear_full(mm, vaddr, pudp, 1);
	pud = READ_ONCE(*pudp);
	WARN_ON(!pud_none(pud));
#endif /* __PAGETABLE_PMD_FOLDED */

	pud = pfn_pud(pfn, prot);
317 318 319 320 321
	pud = pud_mkyoung(pud);
	set_pud_at(mm, vaddr, pudp, pud);
	pudp_test_and_clear_young(vma, vaddr, pudp);
	pud = READ_ONCE(*pudp);
	WARN_ON(pud_young(pud));
322 323

	pudp_huge_get_and_clear(mm, vaddr, pudp);
324 325 326 327 328 329
}

static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)
{
	pud_t pud = pfn_pud(pfn, prot);

330
	pr_debug("Validating PUD leaf\n");
331 332 333 334 335 336 337
	/*
	 * PUD based THP is a leaf entry.
	 */
	pud = pud_mkhuge(pud);
	WARN_ON(!pud_leaf(pud));
}

338
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
339 340 341 342
static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
{
	pud_t pud;

343
	if (!arch_ioremap_pud_supported())
344
		return;
345 346

	pr_debug("Validating PUD huge\n");
347 348 349 350 351 352 353 354 355 356
	/*
	 * X86 defined pud_set_huge() verifies that the given
	 * PUD is not a populated non-leaf entry.
	 */
	WRITE_ONCE(*pudp, __pud(0));
	WARN_ON(!pud_set_huge(pudp, __pfn_to_phys(pfn), prot));
	WARN_ON(!pud_clear_huge(pudp));
	pud = READ_ONCE(*pudp);
	WARN_ON(!pud_none(pud));
}
357 358 359 360
#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) { }
#endif /* !CONFIG_HAVE_ARCH_HUGE_VMAP */

361 362
#else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { }
363 364 365 366 367 368 369 370 371 372
static void __init pud_advanced_tests(struct mm_struct *mm,
				      struct vm_area_struct *vma, pud_t *pudp,
				      unsigned long pfn, unsigned long vaddr,
				      pgprot_t prot)
{
}
static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
{
}
373 374 375 376
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
#else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { }
377 378 379
static void __init pmd_advanced_tests(struct mm_struct *mm,
				      struct vm_area_struct *vma, pmd_t *pmdp,
				      unsigned long pfn, unsigned long vaddr,
380
				      pgprot_t prot, pgtable_t pgtable)
381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
{
}
static void __init pud_advanced_tests(struct mm_struct *mm,
				      struct vm_area_struct *vma, pud_t *pudp,
				      unsigned long pfn, unsigned long vaddr,
				      pgprot_t prot)
{
}
static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
{
}
static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
{
}
static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot) { }
398 399 400 401 402 403
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

static void __init p4d_basic_tests(unsigned long pfn, pgprot_t prot)
{
	p4d_t p4d;

404
	pr_debug("Validating P4D basic\n");
405 406 407 408 409 410 411 412
	memset(&p4d, RANDOM_NZVALUE, sizeof(p4d_t));
	WARN_ON(!p4d_same(p4d, p4d));
}

static void __init pgd_basic_tests(unsigned long pfn, pgprot_t prot)
{
	pgd_t pgd;

413
	pr_debug("Validating PGD basic\n");
414 415 416 417 418 419 420 421 422 423 424 425
	memset(&pgd, RANDOM_NZVALUE, sizeof(pgd_t));
	WARN_ON(!pgd_same(pgd, pgd));
}

#ifndef __PAGETABLE_PUD_FOLDED
static void __init pud_clear_tests(struct mm_struct *mm, pud_t *pudp)
{
	pud_t pud = READ_ONCE(*pudp);

	if (mm_pmd_folded(mm))
		return;

426
	pr_debug("Validating PUD clear\n");
427 428 429 430 431 432 433 434 435 436 437 438 439 440
	pud = __pud(pud_val(pud) | RANDOM_ORVALUE);
	WRITE_ONCE(*pudp, pud);
	pud_clear(pudp);
	pud = READ_ONCE(*pudp);
	WARN_ON(!pud_none(pud));
}

static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp,
				      pmd_t *pmdp)
{
	pud_t pud;

	if (mm_pmd_folded(mm))
		return;
441 442

	pr_debug("Validating PUD populate\n");
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
	/*
	 * This entry points to next level page table page.
	 * Hence this must not qualify as pud_bad().
	 */
	pud_populate(mm, pudp, pmdp);
	pud = READ_ONCE(*pudp);
	WARN_ON(pud_bad(pud));
}
#else  /* !__PAGETABLE_PUD_FOLDED */
static void __init pud_clear_tests(struct mm_struct *mm, pud_t *pudp) { }
static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp,
				      pmd_t *pmdp)
{
}
#endif /* PAGETABLE_PUD_FOLDED */

#ifndef __PAGETABLE_P4D_FOLDED
static void __init p4d_clear_tests(struct mm_struct *mm, p4d_t *p4dp)
{
	p4d_t p4d = READ_ONCE(*p4dp);

	if (mm_pud_folded(mm))
		return;

467
	pr_debug("Validating P4D clear\n");
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
	p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE);
	WRITE_ONCE(*p4dp, p4d);
	p4d_clear(p4dp);
	p4d = READ_ONCE(*p4dp);
	WARN_ON(!p4d_none(p4d));
}

static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp,
				      pud_t *pudp)
{
	p4d_t p4d;

	if (mm_pud_folded(mm))
		return;

483
	pr_debug("Validating P4D populate\n");
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
	/*
	 * This entry points to next level page table page.
	 * Hence this must not qualify as p4d_bad().
	 */
	pud_clear(pudp);
	p4d_clear(p4dp);
	p4d_populate(mm, p4dp, pudp);
	p4d = READ_ONCE(*p4dp);
	WARN_ON(p4d_bad(p4d));
}

static void __init pgd_clear_tests(struct mm_struct *mm, pgd_t *pgdp)
{
	pgd_t pgd = READ_ONCE(*pgdp);

	if (mm_p4d_folded(mm))
		return;

502
	pr_debug("Validating PGD clear\n");
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
	pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE);
	WRITE_ONCE(*pgdp, pgd);
	pgd_clear(pgdp);
	pgd = READ_ONCE(*pgdp);
	WARN_ON(!pgd_none(pgd));
}

static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
				      p4d_t *p4dp)
{
	pgd_t pgd;

	if (mm_p4d_folded(mm))
		return;

518
	pr_debug("Validating PGD populate\n");
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
	/*
	 * This entry points to next level page table page.
	 * Hence this must not qualify as pgd_bad().
	 */
	p4d_clear(p4dp);
	pgd_clear(pgdp);
	pgd_populate(mm, pgdp, p4dp);
	pgd = READ_ONCE(*pgdp);
	WARN_ON(pgd_bad(pgd));
}
#else  /* !__PAGETABLE_P4D_FOLDED */
static void __init p4d_clear_tests(struct mm_struct *mm, p4d_t *p4dp) { }
static void __init pgd_clear_tests(struct mm_struct *mm, pgd_t *pgdp) { }
static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp,
				      pud_t *pudp)
{
}
static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
				      p4d_t *p4dp)
{
}
#endif /* PAGETABLE_P4D_FOLDED */

static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
543 544
				   unsigned long pfn, unsigned long vaddr,
				   pgprot_t prot)
545
{
546
	pte_t pte = pfn_pte(pfn, prot);
547

548
	pr_debug("Validating PTE clear\n");
549
#ifndef CONFIG_RISCV
550
	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
551
#endif
552 553 554
	set_pte_at(mm, vaddr, ptep, pte);
	barrier();
	pte_clear(mm, vaddr, ptep);
555
	pte = ptep_get(ptep);
556 557 558 559 560 561 562
	WARN_ON(!pte_none(pte));
}

static void __init pmd_clear_tests(struct mm_struct *mm, pmd_t *pmdp)
{
	pmd_t pmd = READ_ONCE(*pmdp);

563
	pr_debug("Validating PMD clear\n");
564 565 566 567 568 569 570 571 572 573 574 575
	pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE);
	WRITE_ONCE(*pmdp, pmd);
	pmd_clear(pmdp);
	pmd = READ_ONCE(*pmdp);
	WARN_ON(!pmd_none(pmd));
}

static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp,
				      pgtable_t pgtable)
{
	pmd_t pmd;

576
	pr_debug("Validating PMD populate\n");
577 578 579 580 581 582 583 584 585
	/*
	 * This entry points to next level page table page.
	 * Hence this must not qualify as pmd_bad().
	 */
	pmd_populate(mm, pmdp, pgtable);
	pmd = READ_ONCE(*pmdp);
	WARN_ON(pmd_bad(pmd));
}

586 587 588 589 590 591 592
static void __init pte_special_tests(unsigned long pfn, pgprot_t prot)
{
	pte_t pte = pfn_pte(pfn, prot);

	if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL))
		return;

593
	pr_debug("Validating PTE special\n");
594 595 596 597 598 599 600 601 602 603
	WARN_ON(!pte_special(pte_mkspecial(pte)));
}

static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot)
{
	pte_t pte = pfn_pte(pfn, prot);

	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
		return;

604
	pr_debug("Validating PTE protnone\n");
605 606 607 608 609 610 611 612 613 614 615 616
	WARN_ON(!pte_protnone(pte));
	WARN_ON(!pte_present(pte));
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot)
{
	pmd_t pmd = pmd_mkhuge(pfn_pmd(pfn, prot));

	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
		return;

617
	pr_debug("Validating PMD protnone\n");
618 619 620 621 622 623 624 625 626 627 628 629
	WARN_ON(!pmd_protnone(pmd));
	WARN_ON(!pmd_present(pmd));
}
#else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) { }
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot)
{
	pte_t pte = pfn_pte(pfn, prot);

630
	pr_debug("Validating PTE devmap\n");
631 632 633 634 635 636 637 638
	WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot)
{
	pmd_t pmd = pfn_pmd(pfn, prot);

639
	pr_debug("Validating PMD devmap\n");
640 641 642 643 644 645 646 647
	WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd)));
}

#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot)
{
	pud_t pud = pfn_pud(pfn, prot);

648
	pr_debug("Validating PUD devmap\n");
649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
	WARN_ON(!pud_devmap(pud_mkdevmap(pud)));
}
#else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
#else  /* CONFIG_TRANSPARENT_HUGEPAGE */
static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#else
static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
#endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */

static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
{
	pte_t pte = pfn_pte(pfn, prot);

	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
		return;

671
	pr_debug("Validating PTE soft dirty\n");
672 673 674 675 676 677 678 679 680 681 682
	WARN_ON(!pte_soft_dirty(pte_mksoft_dirty(pte)));
	WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte)));
}

static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
{
	pte_t pte = pfn_pte(pfn, prot);

	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
		return;

683
	pr_debug("Validating PTE swap soft dirty\n");
684 685 686 687 688 689 690 691 692 693 694 695
	WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte)));
	WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte)));
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
{
	pmd_t pmd = pfn_pmd(pfn, prot);

	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
		return;

696
	pr_debug("Validating PMD soft dirty\n");
697 698 699 700 701 702 703 704 705 706 707 708
	WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd)));
	WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd)));
}

static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
{
	pmd_t pmd = pfn_pmd(pfn, prot);

	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
		!IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
		return;

709
	pr_debug("Validating PMD swap soft dirty\n");
710 711 712 713 714 715 716 717 718 719 720 721 722 723 724
	WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd)));
	WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd)));
}
#else  /* !CONFIG_ARCH_HAS_PTE_DEVMAP */
static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
{
}
#endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */

static void __init pte_swap_tests(unsigned long pfn, pgprot_t prot)
{
	swp_entry_t swp;
	pte_t pte;

725
	pr_debug("Validating PTE swap\n");
726 727 728 729 730 731 732 733 734 735 736 737
	pte = pfn_pte(pfn, prot);
	swp = __pte_to_swp_entry(pte);
	pte = __swp_entry_to_pte(swp);
	WARN_ON(pfn != pte_pfn(pte));
}

#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot)
{
	swp_entry_t swp;
	pmd_t pmd;

738
	pr_debug("Validating PMD swap\n");
739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
	pmd = pfn_pmd(pfn, prot);
	swp = __pmd_to_swp_entry(pmd);
	pmd = __swp_entry_to_pmd(swp);
	WARN_ON(pfn != pmd_pfn(pmd));
}
#else  /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */
static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) { }
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */

static void __init swap_migration_tests(void)
{
	struct page *page;
	swp_entry_t swp;

	if (!IS_ENABLED(CONFIG_MIGRATION))
		return;
755 756

	pr_debug("Validating swap migration\n");
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
	/*
	 * swap_migration_tests() requires a dedicated page as it needs to
	 * be locked before creating a migration entry from it. Locking the
	 * page that actually maps kernel text ('start_kernel') can be real
	 * problematic. Lets allocate a dedicated page explicitly for this
	 * purpose that will be freed subsequently.
	 */
	page = alloc_page(GFP_KERNEL);
	if (!page) {
		pr_err("page allocation failed\n");
		return;
	}

	/*
	 * make_migration_entry() expects given page to be
	 * locked, otherwise it stumbles upon a BUG_ON().
	 */
	__SetPageLocked(page);
	swp = make_migration_entry(page, 1);
	WARN_ON(!is_migration_entry(swp));
	WARN_ON(!is_write_migration_entry(swp));

	make_migration_entry_read(&swp);
	WARN_ON(!is_migration_entry(swp));
	WARN_ON(is_write_migration_entry(swp));

	swp = make_migration_entry(page, 0);
	WARN_ON(!is_migration_entry(swp));
	WARN_ON(is_write_migration_entry(swp));
	__ClearPageLocked(page);
	__free_page(page);
}

#ifdef CONFIG_HUGETLB_PAGE
static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot)
{
	struct page *page;
	pte_t pte;

796
	pr_debug("Validating HugeTLB basic\n");
797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
	/*
	 * Accessing the page associated with the pfn is safe here,
	 * as it was previously derived from a real kernel symbol.
	 */
	page = pfn_to_page(pfn);
	pte = mk_huge_pte(page, prot);

	WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte)));
	WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte))));
	WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte))));

#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
	pte = pfn_pte(pfn, prot);

	WARN_ON(!pte_huge(pte_mkhuge(pte)));
#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
}
#else  /* !CONFIG_HUGETLB_PAGE */
static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) { }
#endif /* CONFIG_HUGETLB_PAGE */

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot)
{
	pmd_t pmd;

	if (!has_transparent_hugepage())
		return;

826
	pr_debug("Validating PMD based THP\n");
827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
	/*
	 * pmd_trans_huge() and pmd_present() must return positive after
	 * MMU invalidation with pmd_mkinvalid(). This behavior is an
	 * optimization for transparent huge page. pmd_trans_huge() must
	 * be true if pmd_page() returns a valid THP to avoid taking the
	 * pmd_lock when others walk over non transhuge pmds (i.e. there
	 * are no THP allocated). Especially when splitting a THP and
	 * removing the present bit from the pmd, pmd_trans_huge() still
	 * needs to return true. pmd_present() should be true whenever
	 * pmd_trans_huge() returns true.
	 */
	pmd = pfn_pmd(pfn, prot);
	WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd)));

#ifndef __HAVE_ARCH_PMDP_INVALIDATE
	WARN_ON(!pmd_trans_huge(pmd_mkinvalid(pmd_mkhuge(pmd))));
	WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd))));
#endif /* __HAVE_ARCH_PMDP_INVALIDATE */
}

#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot)
{
	pud_t pud;

	if (!has_transparent_hugepage())
		return;

855
	pr_debug("Validating PUD based THP\n");
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
	pud = pfn_pud(pfn, prot);
	WARN_ON(!pud_trans_huge(pud_mkhuge(pud)));

	/*
	 * pud_mkinvalid() has been dropped for now. Enable back
	 * these tests when it comes back with a modified pud_present().
	 *
	 * WARN_ON(!pud_trans_huge(pud_mkinvalid(pud_mkhuge(pud))));
	 * WARN_ON(!pud_present(pud_mkinvalid(pud_mkhuge(pud))));
	 */
}
#else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) { }
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
#else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) { }
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

875 876 877 878 879 880 881 882 883 884 885 886 887 888
static unsigned long __init get_random_vaddr(void)
{
	unsigned long random_vaddr, random_pages, total_user_pages;

	total_user_pages = (TASK_SIZE - FIRST_USER_ADDRESS) / PAGE_SIZE;

	random_pages = get_random_long() % total_user_pages;
	random_vaddr = FIRST_USER_ADDRESS + random_pages * PAGE_SIZE;

	return random_vaddr;
}

static int __init debug_vm_pgtable(void)
{
889
	struct vm_area_struct *vma;
890 891 892 893 894 895 896
	struct mm_struct *mm;
	pgd_t *pgdp;
	p4d_t *p4dp, *saved_p4dp;
	pud_t *pudp, *saved_pudp;
	pmd_t *pmdp, *saved_pmdp, pmd;
	pte_t *ptep;
	pgtable_t saved_ptep;
897
	pgprot_t prot, protnone;
898 899 900
	phys_addr_t paddr;
	unsigned long vaddr, pte_aligned, pmd_aligned;
	unsigned long pud_aligned, p4d_aligned, pgd_aligned;
901
	spinlock_t *ptl = NULL;
902 903 904 905 906 907 908 909 910 911

	pr_info("Validating architecture page table helpers\n");
	prot = vm_get_page_prot(VMFLAGS);
	vaddr = get_random_vaddr();
	mm = mm_alloc();
	if (!mm) {
		pr_err("mm_struct allocation failed\n");
		return 1;
	}

912 913 914 915 916 917
	/*
	 * __P000 (or even __S000) will help create page table entries with
	 * PROT_NONE permission as required for pxx_protnone_tests().
	 */
	protnone = __P000;

918 919 920 921 922 923
	vma = vm_area_alloc(mm);
	if (!vma) {
		pr_err("vma allocation failed\n");
		return 1;
	}

924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945
	/*
	 * PFN for mapping at PTE level is determined from a standard kernel
	 * text symbol. But pfns for higher page table levels are derived by
	 * masking lower bits of this real pfn. These derived pfns might not
	 * exist on the platform but that does not really matter as pfn_pxx()
	 * helpers will still create appropriate entries for the test. This
	 * helps avoid large memory block allocations to be used for mapping
	 * at higher page table levels.
	 */
	paddr = __pa_symbol(&start_kernel);

	pte_aligned = (paddr & PAGE_MASK) >> PAGE_SHIFT;
	pmd_aligned = (paddr & PMD_MASK) >> PAGE_SHIFT;
	pud_aligned = (paddr & PUD_MASK) >> PAGE_SHIFT;
	p4d_aligned = (paddr & P4D_MASK) >> PAGE_SHIFT;
	pgd_aligned = (paddr & PGDIR_MASK) >> PAGE_SHIFT;
	WARN_ON(!pfn_valid(pte_aligned));

	pgdp = pgd_offset(mm, vaddr);
	p4dp = p4d_alloc(mm, pgdp, vaddr);
	pudp = pud_alloc(mm, p4dp, vaddr);
	pmdp = pmd_alloc(mm, pudp, vaddr);
946
	ptep = pte_alloc_map(mm, pmdp, vaddr);
947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965

	/*
	 * Save all the page table page addresses as the page table
	 * entries will be used for testing with random or garbage
	 * values. These saved addresses will be used for freeing
	 * page table pages.
	 */
	pmd = READ_ONCE(*pmdp);
	saved_p4dp = p4d_offset(pgdp, 0UL);
	saved_pudp = pud_offset(p4dp, 0UL);
	saved_pmdp = pmd_offset(pudp, 0UL);
	saved_ptep = pmd_pgtable(pmd);

	pte_basic_tests(pte_aligned, prot);
	pmd_basic_tests(pmd_aligned, prot);
	pud_basic_tests(pud_aligned, prot);
	p4d_basic_tests(p4d_aligned, prot);
	pgd_basic_tests(pgd_aligned, prot);

966 967 968
	pmd_leaf_tests(pmd_aligned, prot);
	pud_leaf_tests(pud_aligned, prot);

969 970
	pte_savedwrite_tests(pte_aligned, protnone);
	pmd_savedwrite_tests(pmd_aligned, protnone);
971

972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992
	pte_special_tests(pte_aligned, prot);
	pte_protnone_tests(pte_aligned, protnone);
	pmd_protnone_tests(pmd_aligned, protnone);

	pte_devmap_tests(pte_aligned, prot);
	pmd_devmap_tests(pmd_aligned, prot);
	pud_devmap_tests(pud_aligned, prot);

	pte_soft_dirty_tests(pte_aligned, prot);
	pmd_soft_dirty_tests(pmd_aligned, prot);
	pte_swap_soft_dirty_tests(pte_aligned, prot);
	pmd_swap_soft_dirty_tests(pmd_aligned, prot);

	pte_swap_tests(pte_aligned, prot);
	pmd_swap_tests(pmd_aligned, prot);

	swap_migration_tests();

	pmd_thp_tests(pmd_aligned, prot);
	pud_thp_tests(pud_aligned, prot);

993 994
	hugetlb_basic_tests(pte_aligned, prot);

995 996 997 998
	/*
	 * Page table modifying tests. They need to hold
	 * proper page table lock.
	 */
999 1000 1001

	ptl = pte_lockptr(mm, pmdp);
	spin_lock(ptl);
1002
	pte_clear_tests(mm, ptep, pte_aligned, vaddr, prot);
1003
	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
1004
	pte_unmap_unlock(ptep, ptl);
1005

1006 1007
	ptl = pmd_lock(mm, pmdp);
	pmd_clear_tests(mm, pmdp);
1008
	pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep);
1009
	pmd_huge_tests(pmdp, pmd_aligned, prot);
1010 1011 1012 1013 1014 1015
	pmd_populate_tests(mm, pmdp, saved_ptep);
	spin_unlock(ptl);

	ptl = pud_lock(mm, pudp);
	pud_clear_tests(mm, pudp);
	pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
1016
	pud_huge_tests(pudp, pud_aligned, prot);
1017 1018
	pud_populate_tests(mm, pudp, saved_pmdp);
	spin_unlock(ptl);
1019

1020 1021 1022
	spin_lock(&mm->page_table_lock);
	p4d_clear_tests(mm, p4dp);
	pgd_clear_tests(mm, pgdp);
1023 1024
	p4d_populate_tests(mm, p4dp, saved_pudp);
	pgd_populate_tests(mm, pgdp, saved_p4dp);
1025
	spin_unlock(&mm->page_table_lock);
1026

1027 1028 1029 1030 1031
	p4d_free(mm, saved_p4dp);
	pud_free(mm, saved_pudp);
	pmd_free(mm, saved_pmdp);
	pte_free(mm, saved_ptep);

1032
	vm_area_free(vma);
1033 1034 1035 1036 1037 1038 1039
	mm_dec_nr_puds(mm);
	mm_dec_nr_pmds(mm);
	mm_dec_nr_ptes(mm);
	mmdrop(mm);
	return 0;
}
late_initcall(debug_vm_pgtable);