diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 870c29d8b8e4ae3d997ed7323600b9fa04121fc3..bd658f44e133b1cf80a475229b47685181cf69bd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -382,8 +382,10 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, const struct hstate *hugetlb_get_hstate(void); struct page *hugetlb_alloc_hugepage(int nid); int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr, - pgprot_t prot, struct page *hpage); + pgprot_t prot, struct page *hpage); #endif +int hugetlb_insert_hugepage(struct vm_area_struct *vma, unsigned long addr, + struct page *hpage, pgprot_t prot); /* arch callback */ int __init __alloc_bootmem_huge_page(struct hstate *h); diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ff3de89f89727ffb80de6a9eb10a76277510563..35c2225c6e57e294dcd59f44196aca4d56254f70 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -230,6 +230,10 @@ extern unsigned int kobjsize(const void *objp); #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ #define VM_PA32BIT 0x400000000 /* Physical address is within 4G */ +#ifdef CONFIG_ASCEND_SHARE_POOL +#define VM_HUGE_SPECIAL 0x800000000 /* Special hugepage flag used by share pool */ +#endif + #ifdef CONFIG_COHERENT_DEVICE #define VM_CDM 0x100000000 /* Contains coherent device memory */ #endif @@ -240,11 +244,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) +#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #ifdef CONFIG_ARCH_HAS_PKEYS @@ -260,6 +266,12 @@ extern unsigned int kobjsize(const void *objp); #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ +#if defined(CONFIG_ASCEND_SHARE_POOL) +# define VM_SHARE_POOL VM_HIGH_ARCH_5 +#else +# define VM_SHARE_POOL VM_NONE +#endif + #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ #elif defined(CONFIG_PPC) @@ -559,7 +571,7 @@ int region_intersects(resource_size_t offset, size_t size, unsigned long flags, /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); unsigned long vmalloc_to_pfn(const void *addr); - +struct page *vmalloc_to_hugepage(const void *addr); /* * Determine if an address is within the vmalloc range * @@ -2344,6 +2356,10 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf); +extern unsigned long __do_mmap(struct mm_struct *mm, struct file *file, + unsigned long addr, unsigned long len, unsigned long prot, + unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, + unsigned long *populate, struct list_head *uf); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); @@ -2364,14 +2380,21 @@ static inline void mm_populate(unsigned long addr, unsigned long len) /* Ignore errors */ (void) __mm_populate(addr, len, 1); } +extern int do_mm_populate(struct mm_struct *mm, unsigned long addr, unsigned long len, + int ignore_errors); #else static inline void mm_populate(unsigned long addr, unsigned long len) {} +int do_mm_populate(struct mm_struct *mm, unsigned long addr, unsigned long len, + int ignore_errors) +{ +} #endif /* These take the mm semaphore themselves */ extern int __must_check vm_brk(unsigned long, unsigned long); extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); +extern int do_vm_munmap(struct task_struct *tsk, unsigned long start, size_t len); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/mm/gup.c b/mm/gup.c index 3fc585282f24631ae5f21902f2da90b19844d22c..707c374d4f6b2c1c988ed03a5460cdf112976728 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -1355,6 +1356,7 @@ long populate_vma_page_range(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; unsigned long nr_pages = (end - start) / PAGE_SIZE; int gup_flags; + struct task_struct *tsk; VM_BUG_ON(start & ~PAGE_MASK); VM_BUG_ON(end & ~PAGE_MASK); @@ -1380,24 +1382,22 @@ long populate_vma_page_range(struct vm_area_struct *vma, if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) gup_flags |= FOLL_FORCE; + tsk = sp_get_task(mm); /* * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ - return __get_user_pages(current, mm, start, nr_pages, gup_flags, + return __get_user_pages(tsk, mm, start, nr_pages, gup_flags, NULL, NULL, nonblocking); } /* - * __mm_populate - populate and/or mlock pages within a range of address space. - * - * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap - * flags. VMAs must be already marked with the desired vm_flags, and - * mmap_sem must not be held. + * do_mm_populate - populate and/or mlock pages within a range of + * address space for the specified mm_struct. */ -int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) +int do_mm_populate(struct mm_struct *mm, unsigned long start, unsigned long len, + int ignore_errors) { - struct mm_struct *mm = current->mm; unsigned long end, nstart, nend; struct vm_area_struct *vma = NULL; int locked = 0; @@ -1448,6 +1448,18 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) return ret; /* 0 or negative error code */ } +/* + * __mm_populate - populate and/or mlock pages within a range of address space. + * + * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap + * flags. VMAs must be already marked with the desired vm_flags, and + * mmap_sem must not be held. + */ +int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) +{ + return do_mm_populate(current->mm, start, len, ignore_errors); +} + /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 225dcf7536ae396e6913e87389159082c4d9050e..9d2035632aed813b4c418c95d68dde50cbc797e8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -3961,6 +3962,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, } page = alloc_huge_page(vma, haddr, 0); + if (IS_ERR(page) && sp_check_vm_share_pool(vma->vm_flags)) { + page = alloc_huge_page_node(hstate_file(vma->vm_file), + numa_mem_id()); + if (!page) + page = ERR_PTR(-ENOMEM); + } if (IS_ERR(page)) { /* * Returning error will result in faulting task being @@ -5265,6 +5272,41 @@ int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm, } EXPORT_SYMBOL_GPL(hugetlb_insert_hugepage_pte_by_pa); +int hugetlb_insert_hugepage(struct vm_area_struct *vma, unsigned long addr, + struct page *hpage, pgprot_t prot) +{ + struct hstate *h = hstate_vma(vma); + int anon_rmap = 0; + spinlock_t *ptl; + pte_t *ptep; + pte_t pte; + struct mm_struct *mm = vma->vm_mm; + + ptep = hugetlb_huge_pte_alloc(mm, addr, huge_page_size(h)); + if (!ptep) + return -ENXIO; + + get_page(hpage); + + ptl = huge_pte_lock(h, mm, ptep); + if (anon_rmap) { + ClearPagePrivate(hpage); + hugepage_add_new_anon_rmap(hpage, vma, addr); + } else { + page_dup_rmap(hpage, true); + } + + pte = make_huge_pte(vma, hpage, ((vma->vm_flags & VM_WRITE) + && (vma->vm_flags & VM_SHARED))); + set_huge_pte_at(mm, addr, ptep, pte); + + hugetlb_count_add(pages_per_huge_page(h), mm); + + spin_unlock(ptl); + + return 0; +} + #ifdef CONFIG_ASCEND_CHARGE_MIGRATE_HUGEPAGES static int __init ascend_enable_charge_migrate_hugepages(char *s) diff --git a/mm/memory.c b/mm/memory.c index 7503203c8436c1cc45fc8b97bdc22a40f291206e..e369f3961ad214b428a71fd3d288c9c5398367c9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -1529,7 +1530,11 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, BUG_ON(vma->vm_flags & VM_PFNMAP); vma->vm_flags |= VM_MIXEDMAP; } - return insert_page(vma, addr, page, vma->vm_page_prot); + + if (sp_check_hugepage(page)) + return hugetlb_insert_hugepage(vma, addr, page, vma->vm_page_prot); + else + return insert_page(vma, addr, page, vma->vm_page_prot); } EXPORT_SYMBOL(vm_insert_page); diff --git a/mm/mmap.c b/mm/mmap.c index 00702331afc15a2ae6a36b59705d19f43c3bbcf9..e1069c42ec8e7275e90f045a31d88104ea080216 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -178,6 +179,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); + sp_area_drop(vma); vm_area_free(vma); return next; } @@ -1119,6 +1121,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, if (vm_flags & VM_SPECIAL) return NULL; + /* don't merge this kind of vma as sp_area couldn't be merged */ + if (sp_check_vm_share_pool(vm_flags)) + return NULL; + if (prev) next = prev->vm_next; else @@ -1373,16 +1379,20 @@ int unregister_mmap_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(unregister_mmap_notifier); #endif +static unsigned long __mmap_region(struct mm_struct *mm, + struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, + unsigned long pgoff, struct list_head *uf); + /* * The caller must hold down_write(¤t->mm->mmap_sem). */ -unsigned long do_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flags, vm_flags_t vm_flags, - unsigned long pgoff, unsigned long *populate, - struct list_head *uf) +unsigned long __do_mmap(struct mm_struct *mm, struct file *file, + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + vm_flags_t vm_flags, unsigned long pgoff, + unsigned long *populate, struct list_head *uf) { - struct mm_struct *mm = current->mm; int pkey = 0; *populate = 0; @@ -1407,6 +1417,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (!(flags & MAP_FIXED)) addr = round_hint_to_min(addr); + /* the MAP_DVPP couldn't work with MAP_SHARE_POOL */ + if ((flags & MAP_DVPP) && sp_mmap_check(flags)) + return -EINVAL; + /* Careful about overflows.. */ len = PAGE_ALIGN(len); if (!len) @@ -1568,7 +1582,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (is_set_cdmmask()) vm_flags |= ((numanode << CHECKNODE_BITS) & CHECKNODE_MASK); - addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); + addr = __mmap_region(mm, file, addr, len, vm_flags, pgoff, uf); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) @@ -1576,6 +1590,17 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return addr; } +/* + * The caller must hold down_write(¤t->mm->mmap_sem). + */ +unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, vm_flags_t vm_flags, + unsigned long pgoff, unsigned long *populate, struct list_head *uf) +{ + return __do_mmap(current->mm, file, addr, len, prot, flags, vm_flags, pgoff, populate, uf); +} + + unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) @@ -1716,11 +1741,11 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; } -unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, - struct list_head *uf) +static unsigned long __mmap_region(struct mm_struct *mm, struct file *file, + unsigned long addr, unsigned long len, + vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf) { - struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; int error; struct rb_node **rb_link, *rb_parent; @@ -1882,6 +1907,13 @@ unsigned long mmap_region(struct file *file, unsigned long addr, return error; } +unsigned long mmap_region(struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, + unsigned long pgoff, struct list_head *uf) +{ + return __mmap_region(current->mm, file, addr, len, vm_flags, pgoff, uf); +} + unsigned long unmapped_area(struct vm_unmapped_area_info *info) { /* @@ -2133,6 +2165,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags); + sp_area_work_around(&info); + return vm_unmapped_area(&info); } #endif @@ -2183,6 +2217,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags); + sp_area_work_around(&info); + addr = vm_unmapped_area(&info); /* @@ -2200,6 +2236,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags); + sp_area_work_around(&info); + addr = vm_unmapped_area(&info); } @@ -2871,6 +2909,22 @@ int vm_munmap(unsigned long start, size_t len) } EXPORT_SYMBOL(vm_munmap); +int do_vm_munmap(struct task_struct *tsk, unsigned long start, size_t len) +{ + int ret; + struct mm_struct *mm = tsk->mm; + LIST_HEAD(uf); + + if (down_write_killable(&mm->mmap_sem)) + return -EINTR; + + ret = do_munmap(mm, start, len, &uf); + up_write(&mm->mmap_sem); + userfaultfd_unmap_complete(mm, &uf); + return ret; +} +EXPORT_SYMBOL(do_vm_munmap); + SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) { profile_munmap(addr);