diff --git a/Documentation/alibaba/interfaces.rst b/Documentation/alibaba/interfaces.rst index 67ef70c65d7492f2c324b354abbf60d681406e53..b957e493065c3c31a72ece9b0b8aed2911758936 100644 --- a/Documentation/alibaba/interfaces.rst +++ b/Documentation/alibaba/interfaces.rst @@ -85,6 +85,10 @@ memory.exstat "wmark_min_throttled_ms" field is the total throttled time in milliseconds due to positive memory.wmark_min_adj under global memory pressure. + "wmark_reclaim_work_ms" field is the total background async page reclaim + (a.k.a, memcg kswap) work time in milliseconds, including sleep/resched + time currently, due to excessive usage of memory over wmark_high. + zombie memcgs reaper ==================== After memcg was deleted, page caches still reference to this memcg diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9b99df80959050bf79558ac943c2d282326fd4d2..8d0950810f851c91b1902198c1a3dee17adaa4f4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -54,6 +54,7 @@ enum memcg_stat_item { enum memcg_exstat_item { MEMCG_WMARK_MIN, + MEMCG_WMARK_RECLAIM, MEMCG_NR_EXSTAT, }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 66037ae2edc74db70c6fa8ed3d5185bab87415a8..e663acde03848fbb174d20b61a366c4e3c1d72e9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2313,6 +2313,8 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) static void reclaim_wmark(struct mem_cgroup *memcg) { long nr_pages; + struct mem_cgroup *iter; + u64 start, duration; if (is_wmark_ok(memcg, false)) return; @@ -2324,7 +2326,21 @@ static void reclaim_wmark(struct mem_cgroup *memcg) nr_pages = max(SWAP_CLUSTER_MAX, (unsigned long)nr_pages); + /* + * Typically, we would like to record the actual cpu% of reclaim_wmark + * work, excluding any sleep/resched time. However, currently we just + * simply record the whole duration of reclaim_wmark work for the + * overhead-accuracy trade-off. + */ + start = ktime_get_ns(); try_to_free_mem_cgroup_pages(memcg, nr_pages, GFP_KERNEL, true); + duration = ktime_get_ns() - start; + + css_get(&memcg->css); + for (iter = memcg; iter; iter = parent_mem_cgroup(iter)) + this_cpu_add(iter->exstat_cpu->item[MEMCG_WMARK_RECLAIM], + duration); + css_put(&memcg->css); } static void wmark_work_func(struct work_struct *work) @@ -4167,17 +4183,26 @@ static int memcg_stat_show(struct seq_file *m, void *v) return 0; } +static u64 memcg_exstat_gather(struct mem_cgroup *memcg, + enum memcg_exstat_item idx) +{ + u64 sum = 0; + int cpu; + + for_each_online_cpu(cpu) + sum += per_cpu_ptr(memcg->exstat_cpu, cpu)->item[idx]; + + return sum; +} + static int memcg_exstat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); - u64 wmark_min = 0; - int cpu; - for_each_possible_cpu(cpu) { - wmark_min += - per_cpu_ptr(memcg->exstat_cpu, cpu)->item[MEMCG_WMARK_MIN]; - } - seq_printf(m, "wmark_min_throttled_ms %llu\n", wmark_min); + seq_printf(m, "wmark_min_throttled_ms %llu\n", + memcg_exstat_gather(memcg, MEMCG_WMARK_MIN)); + seq_printf(m, "wmark_reclaim_work_ms %llu\n", + memcg_exstat_gather(memcg, MEMCG_WMARK_RECLAIM) / 1000000); return 0; }