diff --git a/fs/Kconfig b/fs/Kconfig index aa097ca64ef6ab9d61413dea9faea5cbc3d9d89a..cde0ec856dfd6e3ca3082c8651e1bfeaad119179 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -270,6 +270,18 @@ config DYNAMIC_HUGETLB pages automatically. The tasks in the memcg prefer to alloc dynamic hugepage. +config ENHANCED_HUGETLB_MMAP + bool "enhanced hugetlb mmap" + default n + depends on HUGETLBFS + help + Add private file mmap for hugetlb. + This feature adds vm_actual_file in vma to record the original file and + copies file contents to hugetlb pages during page fault. + Procfs and perf record will show file name of vm_actual_file. + Hugetlb is useful for optimizing TLB miss rate, and this feature is + aimed to extend its usage. + config MEMFD_CREATE def_bool TMPFS || HUGETLBFS diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7b8a513d9f69e6b66f8f4203fb64cdf15c84034d..391b967fcfbfdb69d2f4d8c3a48d6dff1742c353 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -280,6 +280,11 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) dev_t dev = 0; const char *name = NULL; +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP + if (vma->vm_actual_file) + file = vma->vm_actual_file; +#endif + if (file) { struct inode *inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1c22e294f083be2503b76eedf4dd77f13278daa4..9de02b116185054b47a7709afeb669b8259a3334 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -376,7 +376,11 @@ struct vm_area_struct { #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +#if defined(CONFIG_ENHANCED_HUGETLB_MMAP) && !defined(__GENKSYMS__) + KABI_USE(1, struct file *vm_actual_file); +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index e75b65364dcefb3648e6099f39a135f065977d0e..2a396d81aca696429f041514ab1859eadb05ff99 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -31,6 +31,7 @@ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ #define MAP_REPLACE 0x1000000 +#define MAP_FILE_HUGETLB 0x2000000 /* hugetlb private file map support */ #define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be * uninitialized */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 68dc8a8e7990a97c675b547f593056630e46b59d..bbc770d8cbdb595c774854fe22d1158fad9ac6dc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8119,6 +8119,13 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) flags |= MAP_LOCKED; if (is_vm_hugetlb_page(vma)) flags |= MAP_HUGETLB; +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP + if (vma->vm_actual_file) { + /* perf will ignore hugetlb vma, so remove this flag */ + flags &= ~MAP_HUGETLB; + file = vma->vm_actual_file; + } +#endif if (file) { struct inode *inode; diff --git a/kernel/fork.c b/kernel/fork.c index 0fb86b65ae60ca5e1fc9fdc757fa4151ec98d7e3..c8ec029e158a645a5c8b4ca4a9f0cde184a294dd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -571,6 +571,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, i_mmap_unlock_write(mapping); } +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP + if (tmp->vm_actual_file) + get_file(tmp->vm_actual_file); +#endif + /* * Clear hugetlb-related page reserves for children. This only * affects MAP_PRIVATE mappings. Faults generated by the child diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c5168c7f282af0e6890cde514a4aa9e6a0c46d17..817ae73d40bd669c4d891536c5619191c5e6b61e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4536,6 +4536,20 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, i_mmap_unlock_write(mapping); } +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP +static int read_actual_file(struct page *page, struct vm_area_struct *vma, + loff_t *off, size_t size) +{ + void *kaddr; + unsigned long read_size = 0; + + kaddr = kmap(page); + read_size = kernel_read(vma->vm_actual_file, kaddr, size, off); + kunmap(page); + return IS_ERR_VALUE(read_size) ? read_size : 0; +} +#endif + /* * Hugetlb_cow() should be called with page lock of the original hugepage held. * Called with hugetlb_instantiation_mutex held and pte_page locked so we @@ -4837,6 +4851,17 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, goto out; } clear_huge_page(page, address, pages_per_huge_page(h)); +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP + if (vma->vm_actual_file) { + loff_t off = haddr - vma->vm_start + + (vma->vm_pgoff << PAGE_SHIFT); + size_t page_size = huge_page_size(h); + + ret = read_actual_file(page, vma, &off, page_size); + if (ret) + goto out; + } +#endif __SetPageUptodate(page); new_page = true; diff --git a/mm/mmap.c b/mm/mmap.c index 5489d70db84e35018de8b7c0f8cf22d1c3bea459..515d668e130170ff5656c76ac7d162dc34a091e7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -188,6 +188,10 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) vma->vm_ops->close(vma); if (vma->vm_file) fput(vma->vm_file); +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP + if (vma->vm_actual_file) + fput(vma->vm_actual_file); +#endif mpol_put(vma_policy(vma)); sp_area_drop(vma); vm_area_free(vma); @@ -1849,6 +1853,17 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, return -EBADF; if (is_file_hugepages(file)) { len = ALIGN(len, huge_page_size(hstate_file(file))); +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP + /* + * glibc can use this flag to load libraries, + * a similar feature of exec_hugetlb. + */ + } else if (unlikely(flags & MAP_FILE_HUGETLB)) { + if (!(flags & MAP_PRIVATE)) { + retval = -EINVAL; + goto out_fput; + } +#endif } else if (unlikely(flags & MAP_HUGETLB)) { retval = -EINVAL; goto out_fput; @@ -3047,6 +3062,11 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, if (new->vm_file) get_file(new->vm_file); +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP + if (new->vm_actual_file) + get_file(new->vm_actual_file); +#endif + if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); diff --git a/mm/util.c b/mm/util.c index 67b350f4ffdc5f9e145ed27babea34c4b8b77821..05efa0b50be765957998cda99dc732cbba3362f7 100644 --- a/mm/util.c +++ b/mm/util.c @@ -496,6 +496,31 @@ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc) } EXPORT_SYMBOL_GPL(account_locked_vm); +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP +static struct file *prepare_hugetlb_mmap(unsigned long flags, unsigned long size) +{ + int page_size_log = (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK; + struct user_struct *user = NULL; + + return hugetlb_file_setup(HUGETLB_ANON_FILE, size, VM_NORESERVE, &user, + HUGETLB_ANONHUGE_INODE, page_size_log); +} + +static unsigned long finish_hugetlb_mmap(unsigned long addr, struct file *actual_file, + struct file *huge_file) +{ + struct vm_area_struct *vma; + + fput(huge_file); + vma = find_vma(current->mm, addr); + if (!vma) + return -EINVAL; + vma->vm_actual_file = get_file(actual_file); + + return addr; +} +#endif + unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) @@ -504,13 +529,28 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, struct mm_struct *mm = current->mm; unsigned long populate; LIST_HEAD(uf); +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP + struct file *actual_file = NULL; +#endif ret = security_mmap_file(file, prot, flag); +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP + if (flag & MAP_FILE_HUGETLB) { + actual_file = file; + file = prepare_hugetlb_mmap(flag, len + (pgoff << PAGE_SHIFT)); + if (IS_ERR(file)) + return PTR_ERR(file); + } +#endif if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; ret = do_mmap(file, addr, len, prot, flag, pgoff, &populate, &uf); +#ifdef CONFIG_ENHANCED_HUGETLB_MMAP + if (!IS_ERR_VALUE(addr) && (flag & MAP_FILE_HUGETLB)) + ret = finish_hugetlb_mmap(ret, actual_file, file); +#endif mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); if (populate)