提交 9fd6dad1 编写于 作者: P Paolo Bonzini

mm: provide a saner PTE walking API for modules

Currently, the follow_pfn function is exported for modules but
follow_pte is not.  However, follow_pfn is very easy to misuse,
because it does not provide protections (so most of its callers
assume the page is writable!) and because it returns after having
already unlocked the page table lock.

Provide instead a simplified version of follow_pte that does
not have the pmdpp and range arguments.  The older version
survives as follow_invalidate_pte() for use by fs/dax.c.
Reviewed-by: NJason Gunthorpe <jgg@nvidia.com>
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
上级 897218ff
...@@ -170,7 +170,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, ...@@ -170,7 +170,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto out_unlock_mmap; goto out_unlock_mmap;
ret = follow_pte(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
if (ret) if (ret)
goto out_unlock_mmap; goto out_unlock_mmap;
...@@ -311,7 +311,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, ...@@ -311,7 +311,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto out_unlock_mmap; goto out_unlock_mmap;
ret = follow_pte(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
if (ret) if (ret)
goto out_unlock_mmap; goto out_unlock_mmap;
......
...@@ -810,11 +810,12 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index, ...@@ -810,11 +810,12 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
address = pgoff_address(index, vma); address = pgoff_address(index, vma);
/* /*
* Note because we provide range to follow_pte it will call * follow_invalidate_pte() will use the range to call
* mmu_notifier_invalidate_range_start() on our behalf before * mmu_notifier_invalidate_range_start() on our behalf before
* taking any lock. * taking any lock.
*/ */
if (follow_pte(vma->vm_mm, address, &range, &ptep, &pmdp, &ptl)) if (follow_invalidate_pte(vma->vm_mm, address, &range, &ptep,
&pmdp, &ptl))
continue; continue;
/* /*
......
...@@ -1658,9 +1658,11 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, ...@@ -1658,9 +1658,11 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling); unsigned long end, unsigned long floor, unsigned long ceiling);
int int
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
int follow_invalidate_pte(struct mm_struct *mm, unsigned long address,
struct mmu_notifier_range *range, pte_t **ptepp,
pmd_t **pmdpp, spinlock_t **ptlp);
int follow_pte(struct mm_struct *mm, unsigned long address, int follow_pte(struct mm_struct *mm, unsigned long address,
struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, pte_t **ptepp, spinlock_t **ptlp);
spinlock_t **ptlp);
int follow_pfn(struct vm_area_struct *vma, unsigned long address, int follow_pfn(struct vm_area_struct *vma, unsigned long address,
unsigned long *pfn); unsigned long *pfn);
int follow_phys(struct vm_area_struct *vma, unsigned long address, int follow_phys(struct vm_area_struct *vma, unsigned long address,
......
...@@ -4709,9 +4709,9 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) ...@@ -4709,9 +4709,9 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
} }
#endif /* __PAGETABLE_PMD_FOLDED */ #endif /* __PAGETABLE_PMD_FOLDED */
int follow_pte(struct mm_struct *mm, unsigned long address, int follow_invalidate_pte(struct mm_struct *mm, unsigned long address,
struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, struct mmu_notifier_range *range, pte_t **ptepp,
spinlock_t **ptlp) pmd_t **pmdpp, spinlock_t **ptlp)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d; p4d_t *p4d;
...@@ -4776,6 +4776,34 @@ int follow_pte(struct mm_struct *mm, unsigned long address, ...@@ -4776,6 +4776,34 @@ int follow_pte(struct mm_struct *mm, unsigned long address,
return -EINVAL; return -EINVAL;
} }
/**
* follow_pte - look up PTE at a user virtual address
* @mm: the mm_struct of the target address space
* @address: user virtual address
* @ptepp: location to store found PTE
* @ptlp: location to store the lock for the PTE
*
* On a successful return, the pointer to the PTE is stored in @ptepp;
* the corresponding lock is taken and its location is stored in @ptlp.
* The contents of the PTE are only stable until @ptlp is released;
* any further use, if any, must be protected against invalidation
* with MMU notifiers.
*
* Only IO mappings and raw PFN mappings are allowed. The mmap semaphore
* should be taken for read.
*
* KVM uses this function. While it is arguably less bad than ``follow_pfn``,
* it is not a good general-purpose API.
*
* Return: zero on success, -ve otherwise.
*/
int follow_pte(struct mm_struct *mm, unsigned long address,
pte_t **ptepp, spinlock_t **ptlp)
{
return follow_invalidate_pte(mm, address, NULL, ptepp, NULL, ptlp);
}
EXPORT_SYMBOL_GPL(follow_pte);
/** /**
* follow_pfn - look up PFN at a user virtual address * follow_pfn - look up PFN at a user virtual address
* @vma: memory mapping * @vma: memory mapping
...@@ -4784,6 +4812,9 @@ int follow_pte(struct mm_struct *mm, unsigned long address, ...@@ -4784,6 +4812,9 @@ int follow_pte(struct mm_struct *mm, unsigned long address,
* *
* Only IO mappings and raw PFN mappings are allowed. * Only IO mappings and raw PFN mappings are allowed.
* *
* This function does not allow the caller to read the permissions
* of the PTE. Do not use it.
*
* Return: zero and the pfn at @pfn on success, -ve otherwise. * Return: zero and the pfn at @pfn on success, -ve otherwise.
*/ */
int follow_pfn(struct vm_area_struct *vma, unsigned long address, int follow_pfn(struct vm_area_struct *vma, unsigned long address,
...@@ -4796,7 +4827,7 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address, ...@@ -4796,7 +4827,7 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address,
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
return ret; return ret;
ret = follow_pte(vma->vm_mm, address, NULL, &ptep, NULL, &ptl); ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
if (ret) if (ret)
return ret; return ret;
*pfn = pte_pfn(*ptep); *pfn = pte_pfn(*ptep);
...@@ -4817,7 +4848,7 @@ int follow_phys(struct vm_area_struct *vma, ...@@ -4817,7 +4848,7 @@ int follow_phys(struct vm_area_struct *vma,
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
goto out; goto out;
if (follow_pte(vma->vm_mm, address, NULL, &ptep, NULL, &ptl)) if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
goto out; goto out;
pte = *ptep; pte = *ptep;
......
...@@ -1911,7 +1911,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, ...@@ -1911,7 +1911,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
spinlock_t *ptl; spinlock_t *ptl;
int r; int r;
r = follow_pte(vma->vm_mm, addr, NULL, &ptep, NULL, &ptl); r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
if (r) { if (r) {
/* /*
* get_user_pages fails for VM_IO and VM_PFNMAP vmas and does * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
...@@ -1926,7 +1926,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, ...@@ -1926,7 +1926,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
if (r) if (r)
return r; return r;
r = follow_pte(vma->vm_mm, addr, NULL, &ptep, NULL, &ptl); r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
if (r) if (r)
return r; return r;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册