提交 83058e75 编写于 作者: X Xu Yu

alinux: mm, memcg: record latency of direct reclaim in every memcg

to #26424368

Probe and calculate the latency of global direct reclaim and memcg
direct reclaim, respectively, and then group into the latency histogram
in struct mem_cgroup. Besides, the total latency is accumulated each
time the histogram is updated.

Note that the latency in each memcg is aggregated from all child memcgs.

Usage:

$ cat memory.direct_reclaim_global_latency
0-1ms:  228
1-5ms:  283
5-10ms:         0
10-100ms:       0
100-500ms:      0
500-1000ms:     0
>=1000ms:       0
total(ms):      539

Each line is the count of global direct reclaim within the appropriate
latency range.

To clear the latency histogram:

$ echo 0 > memory.direct_reclaim_global_latency
$ cat memory.direct_reclaim_global_latency
0-1ms:  0
1-5ms:  0
5-10ms:         0
10-100ms:       0
100-500ms:      0
500-1000ms:     0
>=1000ms:       0
total(ms):      0

The usage of memory.direct_reclaim_memcg_latency is the same as
memory.direct_reclaim_global_latency.
Signed-off-by: NXu Yu <xuyu@linux.alibaba.com>
Reviewed-by: NXunlei Pang <xlpang@linux.alibaba.com>
上级 a5f32c14
......@@ -88,6 +88,29 @@ struct mem_cgroup_reclaim_cookie {
struct alloc_context;
enum mem_lat_stat_item {
GLOBAL_DIRECT_RECLAIM, /* global direct reclaim latency */
MEMCG_DIRECT_RECLAIM, /* memcg direct reclaim latency */
MEM_LAT_NR_STAT,
};
/* Memory latency histogram distribution, in milliseconds */
enum mem_lat_count_t {
MEM_LAT_0_1,
MEM_LAT_1_5,
MEM_LAT_5_10,
MEM_LAT_10_100,
MEM_LAT_100_500,
MEM_LAT_500_1000,
MEM_LAT_1000_INF,
MEM_LAT_TOTAL,
MEM_LAT_NR_COUNT,
};
struct mem_cgroup_lat_stat_cpu {
unsigned long item[MEM_LAT_NR_STAT][MEM_LAT_NR_COUNT];
};
#ifdef CONFIG_MEMCG
#define MEM_CGROUP_ID_SHIFT 16
......@@ -361,6 +384,9 @@ struct mem_cgroup {
unsigned long offline_jiffies;
/* memory latency stat */
struct mem_cgroup_lat_stat_cpu __percpu *lat_stat_cpu;
ALI_HOTFIX_RESERVE(1)
ALI_HOTFIX_RESERVE(2)
ALI_HOTFIX_RESERVE(3)
......@@ -905,6 +931,8 @@ static inline bool is_wmark_ok(struct mem_cgroup *memcg, bool high)
int memcg_get_wmark_min_adj(struct task_struct *curr);
void memcg_check_wmark_min_adj(struct task_struct *curr,
struct alloc_context *ac);
extern void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration);
#else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0
......@@ -1255,6 +1283,11 @@ static inline void memcg_check_wmark_min_adj(struct task_struct *curr,
struct alloc_context *ac)
{
}
static inline void memcg_lat_stat_update(enum mem_lat_stat_item sidx,
u64 duration)
{
}
#endif /* CONFIG_MEMCG */
/* idx can be of type enum memcg_stat_item or node_stat_item */
......
......@@ -4511,6 +4511,152 @@ void memcg_check_wmark_min_adj(struct task_struct *curr,
}
}
static void smp_global_direct_reclaim_write(void *info)
{
struct mem_cgroup *memcg = (struct mem_cgroup *)info;
int idx = GLOBAL_DIRECT_RECLAIM;
int i;
for (i = MEM_LAT_0_1; i < MEM_LAT_NR_COUNT; i++)
this_cpu_write(memcg->lat_stat_cpu->item[idx][i], 0);
}
static void smp_memcg_direct_reclaim_write(void *info)
{
struct mem_cgroup *memcg = (struct mem_cgroup *)info;
int idx = MEMCG_DIRECT_RECLAIM;
int i;
for (i = MEM_LAT_0_1; i < MEM_LAT_NR_COUNT; i++)
this_cpu_write(memcg->lat_stat_cpu->item[idx][i], 0);
}
smp_call_func_t smp_memcg_lat_write_funcs[] = {
smp_global_direct_reclaim_write,
smp_memcg_direct_reclaim_write,
};
static int memcg_lat_stat_write(struct cgroup_subsys_state *css,
enum mem_lat_stat_item idx)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
smp_call_func_t func = smp_memcg_lat_write_funcs[idx];
func((void *)memcg);
smp_call_function(func, (void *)memcg, 1);
return 0;
}
static int global_direct_reclaim_latency_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
if (val != 0)
return -EINVAL;
return memcg_lat_stat_write(css, GLOBAL_DIRECT_RECLAIM);
}
static int memcg_direct_reclaim_latency_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
if (val != 0)
return -EINVAL;
return memcg_lat_stat_write(css, MEMCG_DIRECT_RECLAIM);
}
static u64 memcg_lat_stat_gather(struct mem_cgroup *memcg,
enum mem_lat_stat_item sidx,
enum mem_lat_count_t cidx)
{
u64 sum = 0;
int cpu;
for_each_online_cpu(cpu)
sum += per_cpu_ptr(memcg->lat_stat_cpu, cpu)->item[sidx][cidx];
return sum;
}
static void memcg_lat_stat_show(struct seq_file *m, enum mem_lat_stat_item idx)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
seq_printf(m, "0-1ms: \t%llu\n",
memcg_lat_stat_gather(memcg, idx, MEM_LAT_0_1));
seq_printf(m, "1-5ms: \t%llu\n",
memcg_lat_stat_gather(memcg, idx, MEM_LAT_1_5));
seq_printf(m, "5-10ms: \t%llu\n",
memcg_lat_stat_gather(memcg, idx, MEM_LAT_5_10));
seq_printf(m, "10-100ms: \t%llu\n",
memcg_lat_stat_gather(memcg, idx, MEM_LAT_10_100));
seq_printf(m, "100-500ms: \t%llu\n",
memcg_lat_stat_gather(memcg, idx, MEM_LAT_100_500));
seq_printf(m, "500-1000ms: \t%llu\n",
memcg_lat_stat_gather(memcg, idx, MEM_LAT_500_1000));
seq_printf(m, ">=1000ms: \t%llu\n",
memcg_lat_stat_gather(memcg, idx, MEM_LAT_1000_INF));
seq_printf(m, "total(ms): \t%llu\n",
memcg_lat_stat_gather(memcg, idx, MEM_LAT_TOTAL) / 1000000);
}
static int global_direct_reclaim_latency_show(struct seq_file *m, void *v)
{
memcg_lat_stat_show(m, GLOBAL_DIRECT_RECLAIM);
return 0;
}
static int memcg_direct_reclaim_latency_show(struct seq_file *m, void *v)
{
memcg_lat_stat_show(m, MEMCG_DIRECT_RECLAIM);
return 0;
}
enum mem_lat_count_t get_mem_lat_count_idx(u64 duration)
{
enum mem_lat_count_t idx;
duration = duration / 1000000;
if (duration < 1)
idx = MEM_LAT_0_1;
else if (duration < 5)
idx = MEM_LAT_1_5;
else if (duration < 10)
idx = MEM_LAT_5_10;
else if (duration < 100)
idx = MEM_LAT_10_100;
else if (duration < 500)
idx = MEM_LAT_100_500;
else if (duration < 1000)
idx = MEM_LAT_500_1000;
else
idx = MEM_LAT_1000_INF;
return idx;
}
void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration)
{
struct mem_cgroup *memcg, *iter;
enum mem_lat_count_t cidx;
if (mem_cgroup_disabled())
return;
cidx = get_mem_lat_count_idx(duration);
memcg = get_mem_cgroup_from_mm(current->mm);
for (iter = memcg; iter; iter = parent_mem_cgroup(iter)) {
this_cpu_inc(iter->lat_stat_cpu->item[sidx][cidx]);
this_cpu_add(iter->lat_stat_cpu->item[sidx][MEM_LAT_TOTAL],
duration);
}
css_put(&memcg->css);
}
static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
{
struct mem_cgroup_threshold_ary *t;
......@@ -5410,6 +5556,16 @@ static struct cftype mem_cgroup_legacy_files[] = {
.seq_show = memory_wmark_min_adj_show,
.write = memory_wmark_min_adj_write,
},
{
.name = "direct_reclaim_global_latency",
.write_u64 = global_direct_reclaim_latency_write,
.seq_show = global_direct_reclaim_latency_show,
},
{
.name = "direct_reclaim_memcg_latency",
.write_u64 = memcg_direct_reclaim_latency_write,
.seq_show = memcg_direct_reclaim_latency_show,
},
{
.name = "force_empty",
.write = mem_cgroup_force_empty_write,
......@@ -5692,6 +5848,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->stat_cpu);
free_percpu(memcg->exstat_cpu);
free_percpu(memcg->lat_stat_cpu);
kfree(memcg);
}
......@@ -5728,6 +5885,10 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (!memcg->exstat_cpu)
goto fail;
memcg->lat_stat_cpu = alloc_percpu(struct mem_cgroup_lat_stat_cpu);
if (!memcg->lat_stat_cpu)
goto fail;
for_each_node(node)
if (alloc_mem_cgroup_per_node_info(memcg, node))
goto fail;
......
......@@ -3931,12 +3931,14 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
int progress;
unsigned int noreclaim_flag;
unsigned long pflags;
u64 start;
cond_resched();
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
psi_memstall_enter(&pflags);
start = ktime_get_ns();
fs_reclaim_acquire(gfp_mask);
noreclaim_flag = memalloc_noreclaim_save();
reclaim_state.reclaimed_slab = 0;
......@@ -3948,6 +3950,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
current->reclaim_state = NULL;
memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(gfp_mask);
memcg_lat_stat_update(GLOBAL_DIRECT_RECLAIM, (ktime_get_ns() - start));
psi_memstall_leave(&pflags);
cond_resched();
......
......@@ -3454,6 +3454,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long pflags;
int nid;
unsigned int noreclaim_flag;
u64 start;
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (current_gfp_context(gfp_mask) & GFP_RECLAIM_MASK) |
......@@ -3481,11 +3482,13 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
sc.reclaim_idx);
psi_memstall_enter(&pflags);
start = ktime_get_ns();
noreclaim_flag = memalloc_noreclaim_save();
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
memalloc_noreclaim_restore(noreclaim_flag);
memcg_lat_stat_update(MEMCG_DIRECT_RECLAIM, (ktime_get_ns() - start));
psi_memstall_leave(&pflags);
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册