From 6221288e5e97c3a3feca3bb1cc30d9661d36e459 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Sun, 1 Sep 2019 19:10:49 +0800 Subject: [PATCH] alinux: memcg: Account throttled time due to memory.wmark_min_adj Accessing original memory.stat turned out to be one heavy operation which has been caused many real product problems. Introduce new cgroup memory.exstat, memory.exstat stands for "extra/extended memory.stat", which contains dedicated statistics from Alibaba Clould Kernel. memory.exstat is supposed to provide hierarchical statistics. Export its first "wmark_min_throttled_ms", and will add more like direct reclaim, direct compaction, etc. Reviewed-by: Yang Shi Reviewed-by: Gavin Shan Signed-off-by: Xunlei Pang --- include/linux/memcontrol.h | 13 +++++++++++++ mm/memcontrol.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a8e2e9d6b61b..84f91faed4de 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -49,6 +49,16 @@ enum memcg_stat_item { MEMCG_NR_STAT, }; +enum memcg_exstat_item { + MEMCG_WMARK_MIN, + MEMCG_NR_EXSTAT, +}; + +/* Only care about 64bit using "long" */ +struct mem_cgroup_exstat_cpu { + unsigned long item[MEMCG_NR_EXSTAT]; +}; + enum memcg_memory_event { MEMCG_LOW, MEMCG_HIGH, @@ -298,6 +308,9 @@ struct mem_cgroup { bool tcpmem_active; int tcpmem_pressure; + /* memory.exstat */ + struct mem_cgroup_exstat_cpu __percpu *exstat_cpu; + int wmark_min_adj; /* user-set value */ int wmark_min_eadj; /* value in effect */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 64a6d7449cee..a6ee0c17e466 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3990,6 +3990,21 @@ static int memcg_stat_show(struct seq_file *m, void *v) return 0; } +static int memcg_exstat_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + u64 wmark_min = 0; + int cpu; + + for_each_possible_cpu(cpu) { + wmark_min += + per_cpu_ptr(memcg->exstat_cpu, cpu)->item[MEMCG_WMARK_MIN]; + } + seq_printf(m, "wmark_min_throttled_ms %llu\n", wmark_min); + + return 0; +} + static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css, struct cftype *cft) { @@ -4178,6 +4193,7 @@ void mem_cgroup_wmark_min_throttle(void) { unsigned int msec = current->wmark_min_throttle_ms; unsigned long pflags; + struct mem_cgroup *memcg, *iter; if (likely(!msec)) return; @@ -4185,6 +4201,12 @@ void mem_cgroup_wmark_min_throttle(void) msleep_interruptible(msec); psi_memstall_leave(&pflags); current->wmark_min_throttle_ms = 0; + + /* Account throttled time hierarchically, ignore premature sleep */ + memcg = get_mem_cgroup_from_mm(current->mm); + for (iter = memcg; iter; iter = parent_mem_cgroup(iter)) + __this_cpu_add(iter->exstat_cpu->item[MEMCG_WMARK_MIN], msec); + css_put(&memcg->css); } #define WMARK_MIN_THROTTLE_MS 100UL @@ -5084,6 +5106,10 @@ static struct cftype mem_cgroup_legacy_files[] = { .name = "stat", .seq_show = memcg_stat_show, }, + { + .name = "exstat", + .seq_show = memcg_exstat_show, + }, { .name = "wmark_ratio", .flags = CFTYPE_NOT_ON_ROOT, @@ -5378,6 +5404,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->stat_cpu); + free_percpu(memcg->exstat_cpu); kfree(memcg); } @@ -5410,6 +5437,10 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (!memcg->stat_cpu) goto fail; + memcg->exstat_cpu = alloc_percpu(struct mem_cgroup_exstat_cpu); + if (!memcg->exstat_cpu) + goto fail; + for_each_node(node) if (alloc_mem_cgroup_per_node_info(memcg, node)) goto fail; -- GitLab