diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 510bdfbc40734ff0a3e8cc03d8b38f357d4e0d98..625407e4d3b0a77314181af9fd891448e765c8a0 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -212,7 +212,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* Look up the Linux PTE for the backing page */ pte_size = psize; pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); - if (pte_present(pte) && !pte_numa(pte)) { + if (pte_present(pte) && !pte_protnone(pte)) { if (writing && !pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6154b0a2b06331f0c29efe56b210baa6f90d43c7..f38327b95f761b76094ad6a30af4c3de39b07666 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -398,8 +398,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * processors use the same I/D cache coherency mechanism * as embedded. */ - if (error_code & DSISR_PROTFAULT) - goto bad_area; #endif /* CONFIG_PPC_STD_MMU */ /* @@ -423,9 +421,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, flags |= FAULT_FLAG_WRITE; /* a read */ } else { - /* protection fault */ - if (error_code & 0x08000000) - goto bad_area; if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index c90e602677c94f38e084957a78ab386cba89b037..83dfcb55ffef69b2f5921212ccdaf6d18b49da52 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -172,9 +172,14 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { -#ifdef CONFIG_DEBUG_VM - WARN_ON(pte_val(*ptep) & _PAGE_PRESENT); -#endif + /* + * When handling numa faults, we already have the pte marked + * _PAGE_PRESENT, but we can be sure that it is not in hpte. + * Hence we can use set_pte_at for them. + */ + VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) == + (_PAGE_PRESENT | _PAGE_USER)); + /* Note: mm->context.id might not yet have been assigned as * this context might not have been activated yet when this * is called. diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 4fe5f64cc1793e693693dc0ca92420ce99dc6209..91bb8836825a6f516a0000525b9f44b69799d42b 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -718,7 +718,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { #ifdef CONFIG_DEBUG_VM - WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT); + WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) == + (_PAGE_PRESENT | _PAGE_USER)); assert_spin_locked(&mm->page_table_lock); WARN_ON(!pmd_trans_huge(pmd)); #endif diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 89df70e0caa6b95427bf5996a601510d34e5265b..81bf3d2af3eb396e6c1703a7b7f71e5af5b4c14d 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -84,7 +84,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, struct page *page; /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_numa(pte)) { + if (pte_protnone(pte)) { pte_unmap(ptep); return 0; } @@ -178,7 +178,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, * slowpath for accounting purposes and so that they * can be serialised against THP migration. */ - if (pmd_numa(pmd)) + if (pmd_protnone(pmd)) return 0; if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) return 0; diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 0d11c3dcd3a18ed93ca7bd8f4cc971988d9d0ed2..9cd8b21dddbe66b57febfdeb53987e51c03d6dbd 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -67,7 +67,7 @@ enum mpol_rebind_step { #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ -#define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */ +#define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */ #endif /* _UAPI_LINUX_MEMPOLICY_H */ diff --git a/mm/gup.c b/mm/gup.c index c2da1163986aa52badc10c0eabca39540e458aee..51bf0b06ca7bd76a53f6370d60baf93d9e7996f8 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -64,7 +64,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, migration_entry_wait(mm, pmd, address); goto retry; } - if ((flags & FOLL_NUMA) && pte_numa(pte)) + if ((flags & FOLL_NUMA) && pte_protnone(pte)) goto no_page; if ((flags & FOLL_WRITE) && !pte_write(pte)) { pte_unmap_unlock(ptep, ptl); @@ -184,7 +184,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma, return page; return no_page_table(vma, flags); } - if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) + if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) return no_page_table(vma, flags); if (pmd_trans_huge(*pmd)) { if (flags & FOLL_SPLIT) { @@ -906,10 +906,10 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, /* * Similar to the PMD case below, NUMA hinting must take slow - * path + * path using the pte_protnone check. */ if (!pte_present(pte) || pte_special(pte) || - pte_numa(pte) || (write && !pte_write(pte))) + pte_protnone(pte) || (write && !pte_write(pte))) goto pte_unmap; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); @@ -1104,7 +1104,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, * slowpath for accounting purposes and so that they * can be serialised against THP migration. */ - if (pmd_numa(pmd)) + if (pmd_protnone(pmd)) return 0; if (!gup_huge_pmd(pmd, pmdp, addr, next, write, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c6921362c5fc9bbfc6573b2ba0508e9ed6409ef7..915941c451698f0cd72623311cf174332a0a6986 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1211,7 +1211,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, return ERR_PTR(-EFAULT); /* Full NUMA hinting faults to serialise migration in fault paths */ - if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) + if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) goto out; page = pmd_page(*pmd); @@ -1342,7 +1342,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* * Migrate the THP to the requested node, returns with page unlocked - * and pmd_numa cleared. + * and access rights restored. */ spin_unlock(ptl); migrated = migrate_misplaced_transhuge_page(mm, vma, @@ -1357,7 +1357,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, BUG_ON(!PageLocked(page)); pmd = pmd_mknonnuma(pmd); set_pmd_at(mm, haddr, pmdp, pmd); - VM_BUG_ON(pmd_numa(*pmdp)); + VM_BUG_ON(pmd_protnone(*pmdp)); update_mmu_cache_pmd(vma, addr, pmdp); unlock_page(page); out_unlock: @@ -1483,7 +1483,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ret = 1; if (!prot_numa) { entry = pmdp_get_and_clear_notify(mm, addr, pmd); - if (pmd_numa(entry)) + if (pmd_protnone(entry)) entry = pmd_mknonnuma(entry); entry = pmd_modify(entry, newprot); ret = HPAGE_PMD_NR; @@ -1499,7 +1499,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, * local vs remote hits on the zero page. */ if (!is_huge_zero_page(page) && - !pmd_numa(*pmd)) { + !pmd_protnone(*pmd)) { pmdp_set_numa(mm, addr, pmd); ret = HPAGE_PMD_NR; } @@ -1767,9 +1767,9 @@ static int __split_huge_page_map(struct page *page, pte_t *pte, entry; BUG_ON(PageCompound(page+i)); /* - * Note that pmd_numa is not transferred deliberately - * to avoid any possibility that pte_numa leaks to - * a PROT_NONE VMA by accident. + * Note that NUMA hinting access restrictions are not + * transferred to avoid any possibility of altering + * permissions across VMAs. */ entry = mk_pte(page + i, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); diff --git a/mm/memory.c b/mm/memory.c index bbe6a73a899d2bbfe173b3ad4821075390859b68..92e6a6299e8682c20e33daf13ddd0d604e9e7696 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3124,7 +3124,7 @@ static int handle_pte_fault(struct mm_struct *mm, pte, pmd, flags, entry); } - if (pte_numa(entry)) + if (pte_protnone(entry)) return do_numa_page(mm, vma, address, entry, pte, pmd); ptl = pte_lockptr(mm, pmd); @@ -3202,7 +3202,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (pmd_trans_splitting(orig_pmd)) return 0; - if (pmd_numa(orig_pmd)) + if (pmd_protnone(orig_pmd)) return do_huge_pmd_numa_page(mm, vma, address, orig_pmd, pmd); diff --git a/mm/mprotect.c b/mm/mprotect.c index 33121662f08b502ee8ae6bcee2f0ad579436cf9e..44ffa698484d58cf6e4be777b7c3229355d78ce6 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -75,36 +75,18 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, oldpte = *pte; if (pte_present(oldpte)) { pte_t ptent; - bool updated = false; - if (!prot_numa) { - ptent = ptep_modify_prot_start(mm, addr, pte); - if (pte_numa(ptent)) - ptent = pte_mknonnuma(ptent); - ptent = pte_modify(ptent, newprot); - /* - * Avoid taking write faults for pages we - * know to be dirty. - */ - if (dirty_accountable && pte_dirty(ptent) && - (pte_soft_dirty(ptent) || - !(vma->vm_flags & VM_SOFTDIRTY))) - ptent = pte_mkwrite(ptent); - ptep_modify_prot_commit(mm, addr, pte, ptent); - updated = true; - } else { - struct page *page; - - page = vm_normal_page(vma, addr, oldpte); - if (page && !PageKsm(page)) { - if (!pte_numa(oldpte)) { - ptep_set_numa(mm, addr, pte); - updated = true; - } - } + ptent = ptep_modify_prot_start(mm, addr, pte); + ptent = pte_modify(ptent, newprot); + + /* Avoid taking write faults for known dirty pages */ + if (dirty_accountable && pte_dirty(ptent) && + (pte_soft_dirty(ptent) || + !(vma->vm_flags & VM_SOFTDIRTY))) { + ptent = pte_mkwrite(ptent); } - if (updated) - pages++; + ptep_modify_prot_commit(mm, addr, pte, ptent); + pages++; } else if (IS_ENABLED(CONFIG_MIGRATION)) { swp_entry_t entry = pte_to_swp_entry(oldpte); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index dfb79e028ecbf225c626d971179954a4918b646e..4b8ad760dde32aa22bd4c7c06a4eb0bd5b373d29 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -193,7 +193,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { pmd_t entry = *pmdp; - if (pmd_numa(entry)) + if (pmd_protnone(entry)) entry = pmd_mknonnuma(entry); set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry)); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);