diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ef2c25b71736144487539d8c19c95a176e23ec16..2ac900f0228093c4cd28c832010059c0aabac215 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -120,6 +120,45 @@ static void huge_pagevec_release(struct pagevec *pvec) pagevec_reinit(pvec); } +/* + * Check current numa node has enough free huge pages to mmap hugetlb. + * resv_huge_pages_node: mmap hugepages but haven't used in current + * numa node. + */ +static int hugetlb_checknode(struct vm_area_struct *vma, long nr) +{ + int nid; + int ret = 0; + struct hstate *h = &default_hstate; + + spin_lock(&hugetlb_lock); + + nid = vma->vm_flags >> CHECKNODE_BITS; + + if (nid >= MAX_NUMNODES) { + ret = -EINVAL; + goto err; + } + + if (h->free_huge_pages_node[nid] < nr) { + ret = -ENOMEM; + goto err; + } else { + if (h->resv_huge_pages_node[nid] + nr > + h->free_huge_pages_node[nid]) { + ret = -ENOMEM; + goto err; + } else { + h->resv_huge_pages_node[nid] += nr; + ret = 0; + } + } + +err: + spin_unlock(&hugetlb_lock); + return ret; +} + /* * Mask used when checking the page offset value passed in via system * calls. This value will be converted to a loff_t which is signed. @@ -172,6 +211,12 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) inode_lock(inode); file_accessed(file); + if (is_set_cdmmask()) { + ret = hugetlb_checknode(vma, len >> huge_page_shift(h)); + if (ret < 0) + goto out; + } + ret = -ENOMEM; if (hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f9ac17a4d3682a08e7f74f0fd2fde8a7dd302647..32f2837a60751c75c19ec7240ba72284924669a8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -15,6 +15,9 @@ struct ctl_table; struct user_struct; struct mmu_gather; +#define CHECKNODE_BITS 48 +#define CHECKNODE_MASK (~((_AC(1, UL) << CHECKNODE_BITS) - 1)) + #ifndef is_hugepd /* * Some architectures requires a hugepage directory format that is @@ -350,6 +353,7 @@ struct hstate { unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; + unsigned int resv_huge_pages_node[MAX_NUMNODES]; #ifdef CONFIG_CGROUP_HUGETLB /* cgroup control files */ struct cftype cgroup_files[5]; diff --git a/include/linux/mm.h b/include/linux/mm.h index 7a59dba0024eae11a27f98f2df72c0d9d3622497..2749c4232d49acaf282c13585ec1427d369ab2ec 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -70,6 +70,15 @@ extern const int mmap_rnd_compat_bits_max; extern int mmap_rnd_compat_bits __read_mostly; #endif +#ifdef CONFIG_COHERENT_DEVICE +static inline bool is_set_cdmmask(void) +{ + return !nodes_empty(cdmmask); +} +#else +#define is_set_cdmmask() (0) +#endif + #include #include #include diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cbd67b316a84070ebf017cd0b57d5c3dd902a023..157bffb45099accff4b719716896d5f7d90f977e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -969,6 +969,8 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { SetPagePrivate(page); h->resv_huge_pages--; + if (is_set_cdmmask()) + h->resv_huge_pages_node[vma->vm_flags >> CHECKNODE_BITS]--; } mpol_cond_put(mpol); diff --git a/mm/mmap.c b/mm/mmap.c index c1034012aeaa08be4ed8181fde74cc0f4b03e8f1..e1a4d3fa713ec2a3c499baeb83965c2604be794a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -69,6 +69,7 @@ const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX; int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; #endif +static unsigned long numanode; static bool ignore_rlimit_data; core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644); @@ -1531,6 +1532,12 @@ unsigned long do_mmap(struct file *file, unsigned long addr, vm_flags |= VM_NORESERVE; } + /* set numa node id into vm_flags, + * hugetlbfs file mmap will use it to check node + */ + if (is_set_cdmmask()) + vm_flags |= ((numanode << CHECKNODE_BITS) & CHECKNODE_MASK); + addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || @@ -1546,6 +1553,12 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, struct file *file = NULL; unsigned long retval; + /* get mmap numa node id */ + if (is_set_cdmmask()) { + numanode = (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK; + flags &= ~(MAP_HUGE_MASK << MAP_HUGE_SHIFT); + } + if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); file = fget(fd);