From 6174ecb523613c8ed8dcdc889d46f4c02f65b9e4 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 15 Feb 2019 10:32:33 +0800 Subject: [PATCH] pagecache: add sysctl interface to limit pagecache euleros inclusion category: feature feature: pagecache limit add proc sysctl interface to set pagecache limit for reclaim memory Signed-off-by: zhong jiang Reviewed-by: Jing xiangfeng Signed-off-by: zhong jiang Signed-off-by: Yang Yingliang --- include/linux/pagemap.h | 18 +-------- include/linux/swap.h | 19 +++++++++ kernel/sysctl.c | 18 +++++++++ mm/filemap.c | 20 ++++++++++ mm/page_alloc.c | 68 ++++++++++++++++++++++++++++++++ mm/vmscan.c | 86 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 213 insertions(+), 16 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b1bd2186e6d2..65245ce3557f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -613,6 +613,8 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size) return 0; } +int add_to_page_cache(struct page *page, struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask); int add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, @@ -623,22 +625,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); void delete_from_page_cache_batch(struct address_space *mapping, struct pagevec *pvec); -/* - * Like add_to_page_cache_locked, but used to add newly allocated pages: - * the page is new, so we can just run __SetPageLocked() against it. - */ -static inline int add_to_page_cache(struct page *page, - struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) -{ - int error; - - __SetPageLocked(page); - error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); - if (unlikely(error)) - __ClearPageLocked(page); - return error; -} - static inline unsigned long dir_pages(struct inode *inode) { return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> diff --git a/include/linux/swap.h b/include/linux/swap.h index 77221c16733a..d7046787c40d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -367,6 +367,25 @@ extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); extern unsigned long vm_total_pages; +extern unsigned long vm_cache_limit_ratio; +extern unsigned long vm_cache_limit_ratio_min; +extern unsigned long vm_cache_limit_ratio_max; +extern unsigned long vm_cache_limit_mbytes; +extern unsigned long vm_cache_limit_mbytes_min; +extern unsigned long vm_cache_limit_mbytes_max; +extern int vm_cache_reclaim_s; +extern int vm_cache_reclaim_s_min; +extern int vm_cache_reclaim_s_max; +extern int vm_cache_reclaim_weight; +extern int vm_cache_reclaim_weight_min; +extern int vm_cache_reclaim_weight_max; +extern unsigned long page_cache_over_limit(void); +extern void shrink_page_cache(gfp_t mask); +extern int cache_limit_ratio_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos); +extern int cache_limit_mbytes_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos); + #ifdef CONFIG_NUMA extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 292e19af18d7..739da03342d2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1364,6 +1364,24 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, + { + .procname = "cache_reclaim_s", + .data = &vm_cache_reclaim_s, + .maxlen = sizeof(vm_cache_reclaim_s), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &vm_cache_reclaim_s_min, + .extra2 = &vm_cache_reclaim_s_max, + }, + { + .procname = "cache_reclaim_weight", + .data = &vm_cache_reclaim_weight, + .maxlen = sizeof(vm_cache_reclaim_weight), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &vm_cache_reclaim_weight_min, + .extra2 = &vm_cache_reclaim_weight_max, + }, #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", diff --git a/mm/filemap.c b/mm/filemap.c index 52517f28e6f4..ac4b66869cca 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -895,6 +895,26 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, } EXPORT_SYMBOL(add_to_page_cache_locked); +/* + * Like add_to_page_cache_locked, but used to add newly allocated pages: + * the page is new, so we can just run __SetPageLocked() against it. + */ +int add_to_page_cache(struct page *page, + struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) +{ + int error; + + if (vm_cache_limit_mbytes && page_cache_over_limit()) + shrink_page_cache(gfp_mask); + __SetPageLocked(page); + error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); + if (unlikely(error)) + __ClearPageLocked(page); + + return error; +} +EXPORT_SYMBOL(add_to_page_cache); + int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a9de1dbb9a6c..19bf37971989 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8149,3 +8149,71 @@ bool set_hwpoison_free_buddy_page(struct page *page) return hwpoisoned; } #endif + +unsigned long page_cache_over_limit(void) +{ + unsigned long lru_file, limit; + + limit = vm_cache_limit_mbytes * ((1024 * 1024UL) / PAGE_SIZE); + lru_file = global_node_page_state(NR_ACTIVE_FILE) + + global_node_page_state(NR_INACTIVE_FILE); + if (lru_file > limit) + return lru_file - limit; + + return 0; +} + +int cache_limit_ratio_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int ret; + + /* totalram_page may be changed after early boot */ + vm_cache_limit_mbytes_max = totalram_pages >> (20 - PAGE_SHIFT); + + ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); + if (ret) + return ret; + if (write) { + vm_cache_limit_mbytes = totalram_pages + * vm_cache_limit_ratio / 100 + * PAGE_SIZE / (1024 * 1024UL); + if (vm_cache_limit_ratio) + pr_warn("page cache limit set to %lu%%\n", + vm_cache_limit_ratio); + else + pr_warn("page cache limit off\n"); + while (vm_cache_limit_mbytes && page_cache_over_limit()) + shrink_page_cache(GFP_KERNEL); + } + + return 0; +} + +int cache_limit_mbytes_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int ret; + + vm_cache_limit_mbytes_max = totalram_pages >> (20 - PAGE_SHIFT); + + ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); + if (ret) + return ret; + if (write) { + vm_cache_limit_ratio = (vm_cache_limit_mbytes + * ((1024 * 1024UL) / PAGE_SIZE) + + totalram_pages / 200) + * 100 / totalram_pages; + if (vm_cache_limit_mbytes) + pr_warn("page cache limit set to %luMB\n", + vm_cache_limit_mbytes); + else + pr_warn("page cache limit off\n"); + + while (vm_cache_limit_mbytes && page_cache_over_limit()) + shrink_page_cache(GFP_KERNEL); + } + + return 0; +} diff --git a/mm/vmscan.c b/mm/vmscan.c index 961401c46334..993bcb02709a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -166,6 +166,20 @@ int vm_swappiness = 60; */ unsigned long vm_total_pages; +unsigned long vm_cache_limit_ratio; +unsigned long vm_cache_limit_ratio_min; +unsigned long vm_cache_limit_ratio_max; +unsigned long vm_cache_limit_mbytes __read_mostly; +unsigned long vm_cache_limit_mbytes_min; +unsigned long vm_cache_limit_mbytes_max; +int vm_cache_reclaim_s __read_mostly; +int vm_cache_reclaim_s_min; +int vm_cache_reclaim_s_max; +int vm_cache_reclaim_weight __read_mostly; +int vm_cache_reclaim_weight_min; +int vm_cache_reclaim_weight_max; +static DEFINE_PER_CPU(struct delayed_work, vmscan_work); + static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); @@ -3513,6 +3527,9 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) count_vm_event(PAGEOUTRUN); + if (vm_cache_limit_mbytes && page_cache_over_limit()) + shrink_page_cache(GFP_KERNEL); + do { unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; @@ -3895,6 +3912,74 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) } #endif /* CONFIG_HIBERNATION */ +static unsigned long __shrink_page_cache(gfp_t mask) +{ + struct scan_control sc = { + .gfp_mask = current_gfp_context(mask), + .reclaim_idx = gfp_zone(mask), + .may_writepage = !laptop_mode, + .nr_to_reclaim = SWAP_CLUSTER_MAX * + (unsigned long)vm_cache_reclaim_weight, + .may_unmap = 1, + .may_swap = 1, + .order = 0, + .priority = DEF_PRIORITY, + .target_mem_cgroup = NULL, + .nodemask = NULL, + }; + + struct zonelist *zonelist = node_zonelist(numa_node_id(), mask); + + return do_try_to_free_pages(zonelist, &sc); +} + +void shrink_page_cache(gfp_t mask) +{ + /* We reclaim the highmem zone too, it is useful for 32bit arch */ + __shrink_page_cache(mask | __GFP_HIGHMEM); +} + +static void shrink_page_cache_work(struct work_struct *w) +{ + struct delayed_work *work = to_delayed_work(w); + + if (vm_cache_reclaim_s == 0) { + schedule_delayed_work(work, round_jiffies_relative(120 * HZ)); + return; + } + + shrink_page_cache(GFP_KERNEL); + schedule_delayed_work(work, + round_jiffies_relative((unsigned long)vm_cache_reclaim_s * HZ)); +} + +static void shrink_page_cache_init(void) +{ + int cpu; + + vm_cache_limit_ratio = 0; + vm_cache_limit_ratio_min = 0; + vm_cache_limit_ratio_max = 100; + vm_cache_limit_mbytes = 0; + vm_cache_limit_mbytes_min = 0; + vm_cache_limit_mbytes_max = totalram_pages >> (20 - PAGE_SHIFT); + vm_cache_reclaim_s = 0; + vm_cache_reclaim_s_min = 0; + vm_cache_reclaim_s_max = 43200; + vm_cache_reclaim_weight = 1; + vm_cache_reclaim_weight_min = 1; + vm_cache_reclaim_weight_max = 100; + + for_each_online_cpu(cpu) { + struct delayed_work *work = &per_cpu(vmscan_work, cpu); + + INIT_DEFERRABLE_WORK(work, shrink_page_cache_work); + schedule_delayed_work_on(cpu, work, + __round_jiffies_relative( + (unsigned long)vm_cache_reclaim_s * HZ, cpu)); + } +} + /* It's optimal to keep kswapds on the same CPUs as their memory, but not required for correctness. So if the last cpu in a node goes away, we get changed to run anywhere: as the first one comes back, @@ -3964,6 +4049,7 @@ static int __init kswapd_init(void) "mm/vmscan:online", kswapd_cpu_online, NULL); WARN_ON(ret < 0); + shrink_page_cache_init(); return 0; } -- GitLab