sched/cpufreq: Move the cfs_rq_util_change() call to cpufreq_update_util()

to #28739709 commit bef69dd87828ef5d8ecdab8d857cd3a33cf98675 upstream update_cfs_rq_load_avg() calls cfs_rq_util_change() every time PELT decays, which might be inefficient when the cpufreq driver has rate limitation. When a task is attached on a CPU, we have this call path: update_load_avg() update_cfs_rq_load_avg() cfs_rq_util_change -- > trig frequency update attach_entity_load_avg() cfs_rq_util_change -- > trig frequency update The 1st frequency update will not take into account the utilization of the newly attached task and the 2nd one might be discarded because of rate limitation of the cpufreq driver. update_cfs_rq_load_avg() is only called by update_blocked_averages() and update_load_avg() so we can move the call to cfs_rq_util_change/cpufreq_update_util() into these two functions. It's also interesting to note that update_load_avg() already calls cfs_rq_util_change() directly for the !SMP case. This change will also ensure that cpufreq_update_util() is called even when there is no more CFS rq in the leaf_cfs_rq_list to update, but only IRQ, RT or DL PELT signals. [ mingo: Minor updates. ] Reported-by: N Doug Smythies <dsmythies@telus.net> Tested-by: N Doug Smythies <dsmythies@telus.net> Signed-off-by: N Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: N Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: N Dietmar Eggemann <dietmar.eggemann@arm.com> Acked-by: N Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: juri.lelli@redhat.com Cc: linux-pm@vger.kernel.org Cc: mgorman@suse.de Cc: rostedt@goodmis.org Cc: sargun@sargun.me Cc: srinivas.pandruvada@linux.intel.com Cc: tj@kernel.org Cc: xiexiuqi@huawei.com Cc: xiezhipeng1@huawei.com Fixes: 039ae8b ("sched/fair: Fix O(nr_cgroups) in the load balancing path") Link: https://lkml.kernel.org/r/1574083279-799-1-git-send-email-vincent.guittot@linaro.orgSigned-off-by: N Ingo Molnar <mingo@kernel.org> Signed-off-by: N Yihao Wu <wuyihao@linux.alibaba.com> Acked-by: N Michael Wang <yun.wang@linux.alibaba.com>

sched/cpufreq: Move the cfs_rq_util_change() call to cpufreq_update_util()
to #28739709 commit bef69dd87828ef5d8ecdab8d857cd3a33cf98675 upstream update_cfs_rq_load_avg() calls cfs_rq_util_change() every time PELT decays, which might be inefficient when the cpufreq driver has rate limitation. When a task is attached on a CPU, we have this call path: update_load_avg() update_cfs_rq_load_avg() cfs_rq_util_change -- > trig frequency update attach_entity_load_avg() cfs_rq_util_change -- > trig frequency update The 1st frequency update will not take into account the utilization of the newly attached task and the 2nd one might be discarded because of rate limitation of the cpufreq driver. update_cfs_rq_load_avg() is only called by update_blocked_averages() and update_load_avg() so we can move the call to cfs_rq_util_change/cpufreq_update_util() into these two functions. It's also interesting to note that update_load_avg() already calls cfs_rq_util_change() directly for the !SMP case. This change will also ensure that cpufreq_update_util() is called even when there is no more CFS rq in the leaf_cfs_rq_list to update, but only IRQ, RT or DL PELT signals. [ mingo: Minor updates. ] Reported-by: N Doug Smythies <dsmythies@telus.net> Tested-by: N Doug Smythies <dsmythies@telus.net> Signed-off-by: N Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: N Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: N Dietmar Eggemann <dietmar.eggemann@arm.com> Acked-by: N Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: juri.lelli@redhat.com Cc: linux-pm@vger.kernel.org Cc: mgorman@suse.de Cc: rostedt@goodmis.org Cc: sargun@sargun.me Cc: srinivas.pandruvada@linux.intel.com Cc: tj@kernel.org Cc: xiexiuqi@huawei.com Cc: xiezhipeng1@huawei.com Fixes: 039ae8b ("sched/fair: Fix O(nr_cgroups) in the load balancing path") Link: https://lkml.kernel.org/r/1574083279-799-1-git-send-email-vincent.guittot@linaro.orgSigned-off-by: N Ingo Molnar <mingo@kernel.org> Signed-off-by: N Yihao Wu <wuyihao@linux.alibaba.com> Acked-by: N Michael Wang <yun.wang@linux.alibaba.com>
8edf7c76 · Vincent Guittot · Yihao Wu · a843cbc3 · 8edf7c76
显示空白变更内容
内联并排

Showing with 70 addition and 49 deletion

kernel/sched/fair.c kernel/sched/fair.c +70 -49

未找到文件。
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3506,9 +3506,6 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 	cfs_rq->load_last_update_time_copy = sa->last_update_time;
 #endif
-	if (decayed)
-		cfs_rq_util_change(cfs_rq, 0);
 	return decayed;
 }
@@ -3616,8 +3613,12 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 		attach_entity_load_avg(cfs_rq, se, SCHED_CPUFREQ_MIGRATION);
 		update_tg_load_avg(cfs_rq, 0);
-	} else if (decayed && (flags & UPDATE_TG))
+	} else if (decayed) {
+		cfs_rq_util_change(cfs_rq, 0);
+		if (flags & UPDATE_TG)
 			update_tg_load_avg(cfs_rq, 0);
+	}
 }
 #ifndef CONFIG_64BIT
@@ -7201,6 +7202,28 @@ static inline bool others_have_blocked(struct rq *rq)
 	return false;
 }
+static bool __update_blocked_others(struct rq *rq, bool *done)
+{
+	const struct sched_class *curr_class;
+	u64 now = rq_clock_task(rq);
+	bool decayed;
+	/*
+	 * update_load_avg() can call cpufreq_update_util(). Make sure that RT,
+	 * DL and IRQ signals have been updated before updating CFS.
+	 */
+	curr_class = rq->curr->sched_class;
+	decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
+		  update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
+		  update_irq_load_avg(rq, 0);
+	if (others_have_blocked(rq))
+		*done = false;
+	return decayed;
+}
 #ifdef CONFIG_FAIR_GROUP_SCHED
 DEFINE_STATIC_KEY_TRUE(sched_blocked_averages);
@@ -7249,19 +7272,14 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
 	return true;
 }
-static void update_blocked_averages(int cpu)
+static bool __update_blocked_fair(struct rq *rq, bool *done)
 {
-	struct rq *rq = cpu_rq(cpu);
 	struct cfs_rq *cfs_rq, *pos;
-	const struct sched_class *curr_class;
+	bool decayed = false;
-	struct rq_flags rf;
+	int cpu = cpu_of(rq);
-	bool done = true;
 	if (!static_branch_unlikely(&sched_blocked_averages))
-		return;
+		return false;
-	rq_lock_irqsave(rq, &rf);
-	update_rq_clock(rq);
 	/*
 	 * Iterates the task_group tree in a bottom up fashion, see
@@ -7270,9 +7288,13 @@ static void update_blocked_averages(int cpu)
 	for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
 		struct sched_entity *se;
-		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq)) {
 			update_tg_load_avg(cfs_rq, 0);
+			if (cfs_rq == &rq->cfs)
+				decayed = true;
+		}
 		/* Propagate pending load changes to the parent, if any: */
 		se = cfs_rq->tg->se[cpu];
 		if (se && !skip_blocked_update(se))
@@ -7287,23 +7309,10 @@ static void update_blocked_averages(int cpu)
 		/* Don't need periodic decay once load/util_avg are null */
 		if (cfs_rq_has_blocked(cfs_rq))
-			done = false;
+			*done = false;
 	}
-	curr_class = rq->curr->sched_class;
+	return decayed;
-	update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
-	update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
-	update_irq_load_avg(rq, 0);
-	/* Don't need periodic decay once load/util_avg are null */
-	if (others_have_blocked(rq))
-		done = false;
-#ifdef CONFIG_NO_HZ_COMMON
-	rq->last_blocked_load_update_tick = jiffies;
-	if (done)
-		rq->has_blocked_load = 0;
-#endif
-	rq_unlock_irqrestore(rq, &rf);
 }
 /*
@@ -7395,30 +7404,19 @@ static unsigned long task_h_load_static(struct task_struct *p)
 			cfs_rq->load.weight + 1);
 }
 #else
-static inline void update_blocked_averages(int cpu)
+static bool __update_blocked_fair(struct rq *rq, bool *done)
 {
+	struct cfs_rq *cfs_rq = &rq->cfs;
+	bool decayed;
 	if (!static_key_true(&sched_blocked_averages))
 		return;
-	struct rq *rq = cpu_rq(cpu);
+	decayed = update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
-	struct cfs_rq *cfs_rq = &rq->cfs;
+	if (cfs_rq_has_blocked(cfs_rq))
-	const struct sched_class *curr_class;
+		*done = false;
-	struct rq_flags rf;
-	rq_lock_irqsave(rq, &rf);
-	update_rq_clock(rq);
-	update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
-	curr_class = rq->curr->sched_class;
+	return decayed;
-	update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
-	update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
-	update_irq_load_avg(rq, 0);
-#ifdef CONFIG_NO_HZ_COMMON
-	rq->last_blocked_load_update_tick = jiffies;
-	if (!cfs_rq_has_blocked(cfs_rq) && !others_have_blocked(rq))
-		rq->has_blocked_load = 0;
-#endif
-	rq_unlock_irqrestore(rq, &rf);
 }
 static unsigned long task_h_load(struct task_struct *p)
@@ -7432,6 +7430,29 @@ static unsigned long task_h_load_static(struct task_struct *p)
 }
 #endif
+static void update_blocked_averages(int cpu)
+{
+	bool decayed = false, done = true;
+	struct rq *rq = cpu_rq(cpu);
+	struct rq_flags rf;
+	rq_lock_irqsave(rq, &rf);
+	update_rq_clock(rq);
+	decayed |= __update_blocked_others(rq, &done);
+	decayed |= __update_blocked_fair(rq, &done);
+#ifdef CONFIG_NO_HZ_COMMON
+	rq->last_blocked_load_update_tick = jiffies;
+	if (done)
+		rq->has_blocked_load = 0;
+#endif
+	if (decayed)
+		cpufreq_update_util(rq, 0);
+	rq_unlock_irqrestore(rq, &rf);
+}
 /********** Helpers for find_busiest_group ************************/
 enum group_type {