alinux: sched: Add cgroup's scheduling latency histograms

to #28739709 This patch adds cpuacct.cgroup_wait_latency interface. It exports the histogram of the sched entity's schedule latency. Unlike wait_latency, the sched entity is a cgroup rather than task. This is useful when tasks are not directly clustered under one cgroup. For examples: cgroup1 --- cgroupA --- task1 --- cgroupB --- task2 cgroup2 --- cgroupC --- task3 --- cgroupD --- task4 This is a common cgroup hierarchy used by many applications. With cgroup_wait_latency, we can just read from cgroup1 to know aggregated wait latency information of task1 and task2. The interface output format is identical to cpuacct.wait_latency. Signed-off-by: N Yihao Wu <wuyihao@linux.alibaba.com> Acked-by: N Michael Wang <yun.wang@linux.alibaba.com>

alinux: sched: Add cgroup's scheduling latency histograms
to #28739709 This patch adds cpuacct.cgroup_wait_latency interface. It exports the histogram of the sched entity's schedule latency. Unlike wait_latency, the sched entity is a cgroup rather than task. This is useful when tasks are not directly clustered under one cgroup. For examples: cgroup1 --- cgroupA --- task1 --- cgroupB --- task2 cgroup2 --- cgroupC --- task3 --- cgroupD --- task4 This is a common cgroup hierarchy used by many applications. With cgroup_wait_latency, we can just read from cgroup1 to know aggregated wait latency information of task1 and task2. The interface output format is identical to cpuacct.wait_latency. Signed-off-by: N Yihao Wu <wuyihao@linux.alibaba.com> Acked-by: N Michael Wang <yun.wang@linux.alibaba.com>
6dbaddaa · Yihao Wu · a055ee2c · 6dbaddaa · 6dbaddaa · 6dbaddaa
显示空白变更内容
内联并排

Showing with 30 addition and 8 deletion

kernel/sched/cpuacct.c kernel/sched/cpuacct.c +28 -6

kernel/sched/fair.c kernel/sched/fair.c +1 -1

kernel/sched/sched.h kernel/sched/sched.h +1 -1

未找到文件。
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -37,6 +37,7 @@ enum sched_lat_stat_item {
 	SCHED_LAT_WAIT,
 	SCHED_LAT_BLOCK,
 	SCHED_LAT_IOBLOCK,
+	SCHED_LAT_CGROUP_WAIT,
 	SCHED_LAT_NR_STAT
 };
@@ -118,6 +119,12 @@ struct cpuacct {
 	ALI_HOTFIX_RESERVE(4)
 };
+static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
+{
+	return container_of(global_cgroup_css(cgrp, cpuacct_cgrp_id),
+				struct cpuacct, css);
+}
 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
 {
 	return css ? container_of(css, struct cpuacct, css) : NULL;
@@ -255,22 +262,29 @@ void task_ca_update_block(struct task_struct *tsk, u64 runtime)
 	rcu_read_unlock();
 }
-void cpuacct_update_latency(struct task_struct *tsk, u64 delta)
+void cpuacct_update_latency(struct sched_entity *se, u64 delta)
 {
-	enum sched_lat_count_t idx;
+	int idx;
+	enum sched_lat_stat_item s;
 	struct cpuacct *ca;
 	unsigned int msecs;
+	struct task_group *tg;
 	if (static_branch_likely(&cpuacct_no_sched_lat))
 		return;
 	rcu_read_lock();
-	ca = task_ca(tsk);
+	tg = se->cfs_rq->tg;
+	ca = cgroup_ca(tg->css.cgroup);
+	if (entity_is_task(se))
+		s = SCHED_LAT_WAIT;
+	else
+		s = SCHED_LAT_CGROUP_WAIT;
 	msecs = delta >> 20; /* Proximately to speed up */
 	idx = get_sched_lat_count_idx(msecs);
-	this_cpu_inc(ca->lat_stat_cpu->item[SCHED_LAT_WAIT][idx]);
+	this_cpu_inc(ca->lat_stat_cpu->item[s][idx]);
-	this_cpu_add(ca->lat_stat_cpu->item[SCHED_LAT_WAIT][SCHED_LAT_TOTAL],
+	this_cpu_add(ca->lat_stat_cpu->item[s][SCHED_LAT_TOTAL], delta);
-			delta);
 	rcu_read_unlock();
 }
 #endif
@@ -806,11 +820,13 @@ static void smp_write_##name(void *info)				\
 }									\
 SCHED_LAT_STAT_SMP_WRITE(sched_wait_latency, SCHED_LAT_WAIT);
+SCHED_LAT_STAT_SMP_WRITE(sched_wait_cgroup_latency, SCHED_LAT_CGROUP_WAIT);
 SCHED_LAT_STAT_SMP_WRITE(sched_block_latency, SCHED_LAT_BLOCK);
 SCHED_LAT_STAT_SMP_WRITE(sched_ioblock_latency, SCHED_LAT_IOBLOCK);
 smp_call_func_t smp_sched_lat_write_funcs[] = {
 	smp_write_sched_wait_latency,
+	smp_write_sched_wait_cgroup_latency,
 	smp_write_sched_block_latency,
 	smp_write_sched_ioblock_latency
 };
@@ -922,6 +938,12 @@ static struct cftype files[] = {
 		.write_u64 = sched_lat_stat_write,
 		.seq_show = sched_lat_stat_show
 	},
+	{
+		.name = "cgroup_wait_latency",
+		.private = SCHED_LAT_CGROUP_WAIT,
+		.write_u64 = sched_lat_stat_write,
+		.seq_show = sched_lat_stat_show
+	},
 	{
 		.name = "block_latency",
 		.private = SCHED_LAT_BLOCK,

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -890,8 +890,8 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			return;
 		}
 		trace_sched_stat_wait(p, delta);
-		cpuacct_update_latency(p, delta);
 	}
+	cpuacct_update_latency(se, delta);
 	__schedstat_set(se->statistics.wait_max,
 		      max(schedstat_val(se->statistics.wait_max), delta));

--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2282,7 +2282,7 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
 extern u64 get_idle_time(int cpu);
 extern u64 get_iowait_time(int cpu);
 extern void task_ca_increase_nr_migrations(struct task_struct *tsk);
-void cpuacct_update_latency(struct task_struct *tsk, u64 delta);
+void cpuacct_update_latency(struct sched_entity *se, u64 delta);
 void task_ca_update_block(struct task_struct *tsk, u64 runtime);
 #else
 static inline void task_ca_increase_nr_migrations(struct task_struct *tsk) { }