提交 6dbaddaa 编写于 作者: Y Yihao Wu

alinux: sched: Add cgroup's scheduling latency histograms

to #28739709

This patch adds cpuacct.cgroup_wait_latency interface. It exports the
histogram of the sched entity's schedule latency. Unlike wait_latency,
the sched entity is a cgroup rather than task.

This is useful when tasks are not directly clustered under one cgroup.
For examples:

cgroup1 --- cgroupA --- task1
        --- cgroupB --- task2
cgroup2 --- cgroupC --- task3
        --- cgroupD --- task4

This is a common cgroup hierarchy used by many applications. With
cgroup_wait_latency, we can just read from cgroup1 to know aggregated
wait latency information of task1 and task2.

The interface output format is identical to cpuacct.wait_latency.
Signed-off-by: NYihao Wu <wuyihao@linux.alibaba.com>
Acked-by: NMichael Wang <yun.wang@linux.alibaba.com>
上级 a055ee2c
...@@ -37,6 +37,7 @@ enum sched_lat_stat_item { ...@@ -37,6 +37,7 @@ enum sched_lat_stat_item {
SCHED_LAT_WAIT, SCHED_LAT_WAIT,
SCHED_LAT_BLOCK, SCHED_LAT_BLOCK,
SCHED_LAT_IOBLOCK, SCHED_LAT_IOBLOCK,
SCHED_LAT_CGROUP_WAIT,
SCHED_LAT_NR_STAT SCHED_LAT_NR_STAT
}; };
...@@ -118,6 +119,12 @@ struct cpuacct { ...@@ -118,6 +119,12 @@ struct cpuacct {
ALI_HOTFIX_RESERVE(4) ALI_HOTFIX_RESERVE(4)
}; };
static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
{
return container_of(global_cgroup_css(cgrp, cpuacct_cgrp_id),
struct cpuacct, css);
}
static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
{ {
return css ? container_of(css, struct cpuacct, css) : NULL; return css ? container_of(css, struct cpuacct, css) : NULL;
...@@ -255,22 +262,29 @@ void task_ca_update_block(struct task_struct *tsk, u64 runtime) ...@@ -255,22 +262,29 @@ void task_ca_update_block(struct task_struct *tsk, u64 runtime)
rcu_read_unlock(); rcu_read_unlock();
} }
void cpuacct_update_latency(struct task_struct *tsk, u64 delta) void cpuacct_update_latency(struct sched_entity *se, u64 delta)
{ {
enum sched_lat_count_t idx; int idx;
enum sched_lat_stat_item s;
struct cpuacct *ca; struct cpuacct *ca;
unsigned int msecs; unsigned int msecs;
struct task_group *tg;
if (static_branch_likely(&cpuacct_no_sched_lat)) if (static_branch_likely(&cpuacct_no_sched_lat))
return; return;
rcu_read_lock(); rcu_read_lock();
ca = task_ca(tsk); tg = se->cfs_rq->tg;
ca = cgroup_ca(tg->css.cgroup);
if (entity_is_task(se))
s = SCHED_LAT_WAIT;
else
s = SCHED_LAT_CGROUP_WAIT;
msecs = delta >> 20; /* Proximately to speed up */ msecs = delta >> 20; /* Proximately to speed up */
idx = get_sched_lat_count_idx(msecs); idx = get_sched_lat_count_idx(msecs);
this_cpu_inc(ca->lat_stat_cpu->item[SCHED_LAT_WAIT][idx]); this_cpu_inc(ca->lat_stat_cpu->item[s][idx]);
this_cpu_add(ca->lat_stat_cpu->item[SCHED_LAT_WAIT][SCHED_LAT_TOTAL], this_cpu_add(ca->lat_stat_cpu->item[s][SCHED_LAT_TOTAL], delta);
delta);
rcu_read_unlock(); rcu_read_unlock();
} }
#endif #endif
...@@ -806,11 +820,13 @@ static void smp_write_##name(void *info) \ ...@@ -806,11 +820,13 @@ static void smp_write_##name(void *info) \
} \ } \
SCHED_LAT_STAT_SMP_WRITE(sched_wait_latency, SCHED_LAT_WAIT); SCHED_LAT_STAT_SMP_WRITE(sched_wait_latency, SCHED_LAT_WAIT);
SCHED_LAT_STAT_SMP_WRITE(sched_wait_cgroup_latency, SCHED_LAT_CGROUP_WAIT);
SCHED_LAT_STAT_SMP_WRITE(sched_block_latency, SCHED_LAT_BLOCK); SCHED_LAT_STAT_SMP_WRITE(sched_block_latency, SCHED_LAT_BLOCK);
SCHED_LAT_STAT_SMP_WRITE(sched_ioblock_latency, SCHED_LAT_IOBLOCK); SCHED_LAT_STAT_SMP_WRITE(sched_ioblock_latency, SCHED_LAT_IOBLOCK);
smp_call_func_t smp_sched_lat_write_funcs[] = { smp_call_func_t smp_sched_lat_write_funcs[] = {
smp_write_sched_wait_latency, smp_write_sched_wait_latency,
smp_write_sched_wait_cgroup_latency,
smp_write_sched_block_latency, smp_write_sched_block_latency,
smp_write_sched_ioblock_latency smp_write_sched_ioblock_latency
}; };
...@@ -922,6 +938,12 @@ static struct cftype files[] = { ...@@ -922,6 +938,12 @@ static struct cftype files[] = {
.write_u64 = sched_lat_stat_write, .write_u64 = sched_lat_stat_write,
.seq_show = sched_lat_stat_show .seq_show = sched_lat_stat_show
}, },
{
.name = "cgroup_wait_latency",
.private = SCHED_LAT_CGROUP_WAIT,
.write_u64 = sched_lat_stat_write,
.seq_show = sched_lat_stat_show
},
{ {
.name = "block_latency", .name = "block_latency",
.private = SCHED_LAT_BLOCK, .private = SCHED_LAT_BLOCK,
......
...@@ -890,8 +890,8 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -890,8 +890,8 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
return; return;
} }
trace_sched_stat_wait(p, delta); trace_sched_stat_wait(p, delta);
cpuacct_update_latency(p, delta);
} }
cpuacct_update_latency(se, delta);
__schedstat_set(se->statistics.wait_max, __schedstat_set(se->statistics.wait_max,
max(schedstat_val(se->statistics.wait_max), delta)); max(schedstat_val(se->statistics.wait_max), delta));
......
...@@ -2282,7 +2282,7 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned ...@@ -2282,7 +2282,7 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
extern u64 get_idle_time(int cpu); extern u64 get_idle_time(int cpu);
extern u64 get_iowait_time(int cpu); extern u64 get_iowait_time(int cpu);
extern void task_ca_increase_nr_migrations(struct task_struct *tsk); extern void task_ca_increase_nr_migrations(struct task_struct *tsk);
void cpuacct_update_latency(struct task_struct *tsk, u64 delta); void cpuacct_update_latency(struct sched_entity *se, u64 delta);
void task_ca_update_block(struct task_struct *tsk, u64 runtime); void task_ca_update_block(struct task_struct *tsk, u64 runtime);
#else #else
static inline void task_ca_increase_nr_migrations(struct task_struct *tsk) { } static inline void task_ca_increase_nr_migrations(struct task_struct *tsk) { }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册