diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index de6cdfa51694cb09b53fc91cd6e5136adfc80c2b..2383d81ca2d6d66282dea3201bc02cd41e91b428 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -384,7 +384,7 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, const struct hstate *hugetlb_get_hstate(void); struct page *hugetlb_alloc_hugepage(int nid, int flag); int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr, - pgprot_t prot, struct page *hpage); + pgprot_t prot, struct page *hpage); #else static inline const struct hstate *hugetlb_get_hstate(void) { @@ -402,6 +402,8 @@ static inline int hugetlb_insert_hugepage_pte(struct mm_struct *mm, return -EPERM; } #endif +int hugetlb_insert_hugepage(struct vm_area_struct *vma, unsigned long addr, + struct page *hpage, pgprot_t prot); /* arch callback */ int __init __alloc_bootmem_huge_page(struct hstate *h); diff --git a/include/linux/mm.h b/include/linux/mm.h index 8aa492fb7d538bbee57315299f722f917efddeb3..e4a20206c3f397dc542f0660dae92d26c3391825 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -230,6 +230,10 @@ extern unsigned int kobjsize(const void *objp); #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ #define VM_PA32BIT 0x400000000 /* Physical address is within 4G */ +#ifdef CONFIG_ASCEND_SHARE_POOL +#define VM_HUGE_SPECIAL 0x800000000 /* Special hugepage flag used by share pool */ +#endif + #ifdef CONFIG_COHERENT_DEVICE #define VM_CDM 0x100000000 /* Contains coherent device memory */ #endif @@ -247,11 +251,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) +#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #ifdef CONFIG_ARCH_HAS_PKEYS @@ -267,6 +273,12 @@ extern unsigned int kobjsize(const void *objp); #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ +#if defined(CONFIG_ASCEND_SHARE_POOL) +# define VM_SHARE_POOL VM_HIGH_ARCH_5 +#else +# define VM_SHARE_POOL VM_NONE +#endif + #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ #elif defined(CONFIG_PPC) @@ -620,7 +632,7 @@ int region_intersects(resource_size_t offset, size_t size, unsigned long flags, /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); unsigned long vmalloc_to_pfn(const void *addr); - +struct page *vmalloc_to_hugepage(const void *addr); /* * Determine if an address is within the vmalloc range * @@ -2407,10 +2419,14 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf); + extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(unsigned long start, size_t len_in, int behavior); - +extern unsigned long __do_mmap(struct mm_struct *mm, struct file *file, + unsigned long addr, unsigned long len, unsigned long prot, + unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, + unsigned long *populate, struct list_head *uf); static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -2428,14 +2444,21 @@ static inline void mm_populate(unsigned long addr, unsigned long len) /* Ignore errors */ (void) __mm_populate(addr, len, 1); } +extern int do_mm_populate(struct mm_struct *mm, unsigned long addr, unsigned long len, + int ignore_errors); #else static inline void mm_populate(unsigned long addr, unsigned long len) {} +int do_mm_populate(struct mm_struct *mm, unsigned long addr, unsigned long len, + int ignore_errors) +{ +} #endif /* These take the mm semaphore themselves */ extern int __must_check vm_brk(unsigned long, unsigned long); extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); +extern int do_vm_munmap(struct mm_struct *mm, unsigned long start, size_t len); extern unsigned long do_vm_mmap(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff); diff --git a/mm/gup.c b/mm/gup.c index 5801d4bd523a6252d29b9915c5966ffc43b50cb0..6372fb45e2dca96bde6ee1c18b3ae0dfcdcef17d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -1228,6 +1229,7 @@ long populate_vma_page_range(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; unsigned long nr_pages = (end - start) / PAGE_SIZE; int gup_flags; + struct task_struct *tsk; VM_BUG_ON(start & ~PAGE_MASK); VM_BUG_ON(end & ~PAGE_MASK); @@ -1253,24 +1255,22 @@ long populate_vma_page_range(struct vm_area_struct *vma, if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) gup_flags |= FOLL_FORCE; + tsk = sp_get_task(mm); /* * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ - return __get_user_pages(current, mm, start, nr_pages, gup_flags, + return __get_user_pages(tsk, mm, start, nr_pages, gup_flags, NULL, NULL, nonblocking); } /* - * __mm_populate - populate and/or mlock pages within a range of address space. - * - * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap - * flags. VMAs must be already marked with the desired vm_flags, and - * mmap_sem must not be held. + * do_mm_populate - populate and/or mlock pages within a range of + * address space for the specified mm_struct. */ -int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) +int do_mm_populate(struct mm_struct *mm, unsigned long start, unsigned long len, + int ignore_errors) { - struct mm_struct *mm = current->mm; unsigned long end, nstart, nend; struct vm_area_struct *vma = NULL; int locked = 0; @@ -1321,6 +1321,18 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) return ret; /* 0 or negative error code */ } +/* + * __mm_populate - populate and/or mlock pages within a range of address space. + * + * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap + * flags. VMAs must be already marked with the desired vm_flags, and + * mmap_sem must not be held. + */ +int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) +{ + return do_mm_populate(current->mm, start, len, ignore_errors); +} + /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 87f0f2bd6410b0f0c0dc95d49163ec0afa0f1918..7d57d6a943c250c3f46c1c272a16158f08381b3d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -4010,6 +4011,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, } page = alloc_huge_page(vma, haddr, 0); + if (IS_ERR(page) && sp_check_vm_share_pool(vma->vm_flags)) { + page = alloc_huge_page_node(hstate_file(vma->vm_file), + numa_mem_id()); + if (!page) + page = ERR_PTR(-ENOMEM); + } if (IS_ERR(page)) { /* * Returning error will result in faulting task being @@ -5359,6 +5366,41 @@ int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm, } EXPORT_SYMBOL_GPL(hugetlb_insert_hugepage_pte_by_pa); +int hugetlb_insert_hugepage(struct vm_area_struct *vma, unsigned long addr, + struct page *hpage, pgprot_t prot) +{ + struct hstate *h = hstate_vma(vma); + int anon_rmap = 0; + spinlock_t *ptl; + pte_t *ptep; + pte_t pte; + struct mm_struct *mm = vma->vm_mm; + + ptep = hugetlb_huge_pte_alloc(mm, addr, huge_page_size(h)); + if (!ptep) + return -ENXIO; + + get_page(hpage); + + ptl = huge_pte_lock(h, mm, ptep); + if (anon_rmap) { + ClearPagePrivate(hpage); + hugepage_add_new_anon_rmap(hpage, vma, addr); + } else { + page_dup_rmap(hpage, true); + } + + pte = make_huge_pte(vma, hpage, ((vma->vm_flags & VM_WRITE) + && (vma->vm_flags & VM_SHARED))); + set_huge_pte_at(mm, addr, ptep, pte); + + hugetlb_count_add(pages_per_huge_page(h), mm); + + spin_unlock(ptl); + + return 0; +} + #ifdef CONFIG_ASCEND_CHARGE_MIGRATE_HUGEPAGES static int __init ascend_enable_charge_migrate_hugepages(char *s) diff --git a/mm/memory.c b/mm/memory.c index 56e57897d565f55d7d3344512a6b02ee8ae8c2d1..6530d76a40af8fceb193695613460bd82fdb3113 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -1540,7 +1541,11 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, BUG_ON(vma->vm_flags & VM_PFNMAP); vma->vm_flags |= VM_MIXEDMAP; } - return insert_page(vma, addr, page, vma->vm_page_prot); + + if (sp_check_hugepage(page)) + return hugetlb_insert_hugepage(vma, addr, page, vma->vm_page_prot); + else + return insert_page(vma, addr, page, vma->vm_page_prot); } EXPORT_SYMBOL(vm_insert_page); diff --git a/mm/mmap.c b/mm/mmap.c index f7f1fd3b5fa394d23e4724cd90b0d4636ff9aa28..9c9a4a98abb21e0188c51d164d01c8a058f3c5a1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -178,6 +179,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); + sp_area_drop(vma); vm_area_free(vma); return next; } @@ -1119,6 +1121,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, if (vm_flags & VM_SPECIAL) return NULL; + /* don't merge this kind of vma as sp_area couldn't be merged */ + if (sp_check_vm_share_pool(vm_flags)) + return NULL; + if (prev) next = prev->vm_next; else @@ -1373,12 +1379,17 @@ int unregister_mmap_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(unregister_mmap_notifier); #endif -static inline unsigned long -__do_mmap(struct file *file, unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, vm_flags_t vm_flags, - unsigned long pgoff, unsigned long *populate, struct list_head *uf) +static unsigned long __mmap_region(struct mm_struct *mm, + struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, + unsigned long pgoff, struct list_head *uf); + +inline unsigned long +__do_mmap(struct mm_struct *mm, struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, unsigned long flags, + vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, + struct list_head *uf) { - struct mm_struct *mm = current->mm; int pkey = 0; *populate = 0; @@ -1403,6 +1414,10 @@ __do_mmap(struct file *file, unsigned long addr, unsigned long len, if (!(flags & MAP_FIXED)) addr = round_hint_to_min(addr); + /* the MAP_DVPP couldn't work with MAP_SHARE_POOL */ + if ((flags & MAP_DVPP) && sp_mmap_check(flags)) + return -EINVAL; + /* Careful about overflows.. */ len = PAGE_ALIGN(len); if (!len) @@ -1567,7 +1582,7 @@ __do_mmap(struct file *file, unsigned long addr, unsigned long len, if (flags & MAP_CHECKNODE) set_vm_checknode(&vm_flags, flags); - addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); + addr = __mmap_region(mm, file, addr, len, vm_flags, pgoff, uf); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) @@ -1737,12 +1752,11 @@ do_user_swap(struct mm_struct *mm, unsigned long addr_start, unsigned long len, } static inline unsigned long -do_uswap_mmap(struct file *file, unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, vm_flags_t vm_flags, - unsigned long pgoff, unsigned long *populate, - struct list_head *uf) +do_uswap_mmap(struct mm_struct *mm, struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, unsigned long flags, + vm_flags_t vm_flags, unsigned long pgoff, + unsigned long *populate, struct list_head *uf) { - struct mm_struct *mm = current->mm; unsigned long old_addr = addr; struct page **pages = NULL; unsigned long ret; @@ -1758,7 +1772,7 @@ do_uswap_mmap(struct file *file, unsigned long addr, unsigned long len, /* mark the vma as special to avoid merging with other vmas */ vm_flags |= VM_SPECIAL; - addr = __do_mmap(file, addr, len, prot, flags, vm_flags, pgoff, + addr = __do_mmap(mm, file, addr, len, prot, flags, vm_flags, pgoff, populate, uf); if (IS_ERR_VALUE(addr)) { ret = addr; @@ -1788,10 +1802,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, { #ifdef CONFIG_USERSWAP if (enable_userswap && (flags & MAP_REPLACE)) - return do_uswap_mmap(file, addr, len, prot, flags, vm_flags, - pgoff, populate, uf); + return do_uswap_mmap(current->mm, file, addr, len, prot, flags, + vm_flags, pgoff, populate, uf); #endif - return __do_mmap(file, addr, len, prot, flags, vm_flags, + return __do_mmap(current->mm, file, addr, len, prot, flags, vm_flags, pgoff, populate, uf); } @@ -1939,11 +1953,11 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; } -unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, - struct list_head *uf) +static unsigned long __mmap_region(struct mm_struct *mm, struct file *file, + unsigned long addr, unsigned long len, + vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf) { - struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; int error; struct rb_node **rb_link, *rb_parent; @@ -2105,6 +2119,13 @@ unsigned long mmap_region(struct file *file, unsigned long addr, return error; } +unsigned long mmap_region(struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, + unsigned long pgoff, struct list_head *uf) +{ + return __mmap_region(current->mm, file, addr, len, vm_flags, pgoff, uf); +} + unsigned long unmapped_area(struct vm_unmapped_area_info *info) { /* @@ -2356,6 +2377,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags); + sp_area_work_around(&info); + return vm_unmapped_area(&info); } #endif @@ -2406,6 +2429,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags); + sp_area_work_around(&info); + addr = vm_unmapped_area(&info); /* @@ -2423,6 +2448,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags); + sp_area_work_around(&info); + addr = vm_unmapped_area(&info); } @@ -3094,6 +3121,24 @@ int vm_munmap(unsigned long start, size_t len) } EXPORT_SYMBOL(vm_munmap); +int do_vm_munmap(struct mm_struct *mm, unsigned long start, size_t len) +{ + int ret; + LIST_HEAD(uf); + + if (mm == NULL) + return -EINVAL; + + if (down_write_killable(&mm->mmap_sem)) + return -EINTR; + + ret = do_munmap(mm, start, len, &uf); + up_write(&mm->mmap_sem); + userfaultfd_unmap_complete(mm, &uf); + return ret; +} +EXPORT_SYMBOL(do_vm_munmap); + /* * Must acquire an additional reference to the mm struct to prevent the * mm struct of other process from being released.