提交 3ec93e1a 编写于 作者: X Xu Yu

alinux: mm, memcg: record latency of memcg wmark reclaim

The memcg background async page reclaim, a.k.a, memcg kswapd, is
implemented with a dedicated unbound workqueue currently.

However, memcg kswapd will run too frequently, resulting in high
overhead, page cache thrashing, frequent dirty page writeback, etc., due
to improper memcg memory.wmark_ratio, unreasonable memcg memor capacity,
or even abnormal memcg memory usage.

We need to find out the problematic memcg(s) where memcg kswapd
introduces significant overhead.

This records the latency of each run of memcg kswapd work, and then
aggregates into the exstat of per memcg.
Signed-off-by: NXu Yu <xuyu@linux.alibaba.com>
Reviewed-by: NXunlei Pang <xlpang@linux.alibaba.com>
上级 87bac306
...@@ -85,6 +85,10 @@ memory.exstat ...@@ -85,6 +85,10 @@ memory.exstat
"wmark_min_throttled_ms" field is the total throttled time in milliseconds "wmark_min_throttled_ms" field is the total throttled time in milliseconds
due to positive memory.wmark_min_adj under global memory pressure. due to positive memory.wmark_min_adj under global memory pressure.
"wmark_reclaim_work_ms" field is the total background async page reclaim
(a.k.a, memcg kswap) work time in milliseconds, including sleep/resched
time currently, due to excessive usage of memory over wmark_high.
zombie memcgs reaper zombie memcgs reaper
==================== ====================
After memcg was deleted, page caches still reference to this memcg After memcg was deleted, page caches still reference to this memcg
......
...@@ -54,6 +54,7 @@ enum memcg_stat_item { ...@@ -54,6 +54,7 @@ enum memcg_stat_item {
enum memcg_exstat_item { enum memcg_exstat_item {
MEMCG_WMARK_MIN, MEMCG_WMARK_MIN,
MEMCG_WMARK_RECLAIM,
MEMCG_NR_EXSTAT, MEMCG_NR_EXSTAT,
}; };
......
...@@ -2313,6 +2313,8 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) ...@@ -2313,6 +2313,8 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
static void reclaim_wmark(struct mem_cgroup *memcg) static void reclaim_wmark(struct mem_cgroup *memcg)
{ {
long nr_pages; long nr_pages;
struct mem_cgroup *iter;
u64 start, duration;
if (is_wmark_ok(memcg, false)) if (is_wmark_ok(memcg, false))
return; return;
...@@ -2324,7 +2326,21 @@ static void reclaim_wmark(struct mem_cgroup *memcg) ...@@ -2324,7 +2326,21 @@ static void reclaim_wmark(struct mem_cgroup *memcg)
nr_pages = max(SWAP_CLUSTER_MAX, (unsigned long)nr_pages); nr_pages = max(SWAP_CLUSTER_MAX, (unsigned long)nr_pages);
/*
* Typically, we would like to record the actual cpu% of reclaim_wmark
* work, excluding any sleep/resched time. However, currently we just
* simply record the whole duration of reclaim_wmark work for the
* overhead-accuracy trade-off.
*/
start = ktime_get_ns();
try_to_free_mem_cgroup_pages(memcg, nr_pages, GFP_KERNEL, true); try_to_free_mem_cgroup_pages(memcg, nr_pages, GFP_KERNEL, true);
duration = ktime_get_ns() - start;
css_get(&memcg->css);
for (iter = memcg; iter; iter = parent_mem_cgroup(iter))
this_cpu_add(iter->exstat_cpu->item[MEMCG_WMARK_RECLAIM],
duration);
css_put(&memcg->css);
} }
static void wmark_work_func(struct work_struct *work) static void wmark_work_func(struct work_struct *work)
...@@ -4167,17 +4183,26 @@ static int memcg_stat_show(struct seq_file *m, void *v) ...@@ -4167,17 +4183,26 @@ static int memcg_stat_show(struct seq_file *m, void *v)
return 0; return 0;
} }
static u64 memcg_exstat_gather(struct mem_cgroup *memcg,
enum memcg_exstat_item idx)
{
u64 sum = 0;
int cpu;
for_each_online_cpu(cpu)
sum += per_cpu_ptr(memcg->exstat_cpu, cpu)->item[idx];
return sum;
}
static int memcg_exstat_show(struct seq_file *m, void *v) static int memcg_exstat_show(struct seq_file *m, void *v)
{ {
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
u64 wmark_min = 0;
int cpu;
for_each_possible_cpu(cpu) { seq_printf(m, "wmark_min_throttled_ms %llu\n",
wmark_min += memcg_exstat_gather(memcg, MEMCG_WMARK_MIN));
per_cpu_ptr(memcg->exstat_cpu, cpu)->item[MEMCG_WMARK_MIN]; seq_printf(m, "wmark_reclaim_work_ms %llu\n",
} memcg_exstat_gather(memcg, MEMCG_WMARK_RECLAIM) / 1000000);
seq_printf(m, "wmark_min_throttled_ms %llu\n", wmark_min);
return 0; return 0;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册