From bbaee3afa992f8f15357a742d20175e6b9d1d725 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Sat, 17 Aug 2019 08:04:03 +0800 Subject: [PATCH] alinux: mm: memcontrol: make distance between wmark_low and wmark_high configurable Introduce a new interface, wmark_scale_factor, which defines the distance between wmark_high and wmark_low. The unit is in fractions of 10,000. The default value of 50 means the distance between wmark_high and wmark_low is 0.5% of the max limit of the cgroup. The maximum value is 1000, or 10% of the max limit. The distance between wmark_low and wmark_high have impact on how hard memcg kswapd would reclaim. Reviewed-by: Gavin Shan Reviewed-by: Xunlei Pang Signed-off-by: Yang Shi --- Documentation/admin-guide/cgroup-v2.rst | 9 ++++ Documentation/cgroup-v1/memory.txt | 4 ++ include/linux/memcontrol.h | 1 + mm/memcontrol.c | 62 ++++++++++++++++++++++++- 4 files changed, 75 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 2c86a6cffd88..f0e3e0663d49 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1116,6 +1116,15 @@ PAGE_SIZE multiple when read back. Memory usage low water mark, which means the available memory is ok. For details, please refer to the above wmark_ratio section. + memory.wmark_scale_factor + A read-write single value file which exists on non-root cgroups. + The default is 50. + + The gap between wmark_low and wmark_high. The unit is in fractions + of 10,000. The default value of 50 means the distance between wmark_high + and wmark_low is 0.5% of the max limit of the cgroup. The maximum value + is 1000, or 10% of max limit. + memory.oom.group A read-write single value file which exists on non-root cgroups. The default value is "0". diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt index 51af42b1e007..8f5ef7d09205 100644 --- a/Documentation/cgroup-v1/memory.txt +++ b/Documentation/cgroup-v1/memory.txt @@ -93,6 +93,10 @@ Brief summary of control files. read-only) memory.wmark_high # high limit (memory usge high water mark, read-only) + memory.wmark_scale_factor # the gap between wmark_low and wmark_high, + percentage of max limit, default is 50 or + 0.5% of max limit. The max value is 1000 or + 10% of max limit. 1. History diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 52fa4797f20f..8feaa0abf1a4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -292,6 +292,7 @@ struct mem_cgroup { unsigned int wmark_ratio; struct work_struct wmark_work; + unsigned int wmark_scale_factor; #ifdef CONFIG_MEMCG_KMEM /* Index in the kmem_cache->memcg_params.memcg_caches array */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6aaaaa3ae090..6d6605182b72 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2957,10 +2957,19 @@ static void setup_memcg_wmark(struct mem_cgroup *memcg) unsigned long max = memcg->high > memcg->memory.max ? memcg->memory.max : memcg->high; unsigned int wmark_ratio = memcg->wmark_ratio; + unsigned int wmark_scale_factor = memcg->wmark_scale_factor; + unsigned long gap; if (wmark_ratio) { high_wmark = (max * wmark_ratio) / 100; - low_wmark = high_wmark - (high_wmark >> 8); + + /* + * Set the memcg watermark distance according to the + * scale factor in proportion to max limit. + */ + gap = mult_frac(max, wmark_scale_factor, 10000); + + low_wmark = high_wmark - gap; page_counter_set_wmark_low(&memcg->memory, low_wmark); page_counter_set_wmark_high(&memcg->memory, high_wmark); @@ -3800,6 +3809,42 @@ static ssize_t memory_wmark_ratio_write(struct kernfs_open_file *of, return nbytes; } +static int memory_wmark_scale_factor_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + unsigned int wmark_scale_factor; + + wmark_scale_factor = READ_ONCE(memcg->wmark_scale_factor); + + seq_printf(m, "%d\n", wmark_scale_factor); + + return 0; +} + +static ssize_t memory_wmark_scale_factor_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + int ret, wmark_scale_factor; + + buf = strstrip(buf); + if (!buf) + return -EINVAL; + + ret = kstrtouint(buf, 0, &wmark_scale_factor); + if (ret) + return ret; + + if (wmark_scale_factor > 1000 || wmark_scale_factor < 1) + return -EINVAL; + + xchg(&memcg->wmark_scale_factor, wmark_scale_factor); + + setup_memcg_wmark(memcg); + + return nbytes; +} + static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) { struct mem_cgroup_threshold_ary *t; @@ -4525,6 +4570,12 @@ static struct cftype mem_cgroup_legacy_files[] = { .private = MEMFILE_PRIVATE(_MEM, WMARK_LOW_LIMIT), .read_u64 = mem_cgroup_read_u64, }, + { + .name = "wmark_scale_factor", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_wmark_scale_factor_show, + .write = memory_wmark_scale_factor_write, + }, { .name = "force_empty", .write = mem_cgroup_force_empty_write, @@ -4826,6 +4877,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) memcg->swappiness = mem_cgroup_swappiness(parent); memcg->oom_kill_disable = parent->oom_kill_disable; memcg->wmark_ratio = parent->wmark_ratio; + /* Default gap is 0.5% max limit */ + memcg->wmark_scale_factor = parent->wmark_scale_factor ? + : 50; } if (parent && parent->use_hierarchy) { memcg->use_hierarchy = true; @@ -6075,6 +6129,12 @@ static struct cftype memory_files[] = { .flags = CFTYPE_NOT_ON_ROOT, .seq_show = memory_wmark_low_show, }, + { + .name = "wmark_scale_factor", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_wmark_scale_factor_show, + .write = memory_wmark_scale_factor_write, + }, { .name = "events", .flags = CFTYPE_NOT_ON_ROOT, -- GitLab