sched/fair: Calculate runnable_weight slightly differently

Our runnable_weight currently looks like this runnable_weight = shares * runnable_load_avg / load_avg The goal is to scale the runnable weight for the group based on its runnable to load_avg ratio. The problem with this is it biases us towards tasks that never go to sleep. Tasks that go to sleep are going to have their runnable_load_avg decayed pretty hard, which will drastically reduce the runnable weight of groups with interactive tasks. To solve this imbalance we tweak this slightly, so in the ideal case it is still the above, but in the interactive case it is runnable_weight = shares * runnable_weight / load_weight which will make the weight distribution fairer between interactive and non-interactive groups. Signed-off-by: N Josef Bacik <jbacik@fb.com> Signed-off-by: N Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: kernel-team@fb.com Cc: linux-kernel@vger.kernel.org Cc: riel@redhat.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/1501773219-18774-2-git-send-email-jbacik@fb.comSigned-off-by: N Ingo Molnar <mingo@kernel.org>

sched/fair: Calculate runnable_weight slightly differently
Our runnable_weight currently looks like this runnable_weight = shares * runnable_load_avg / load_avg The goal is to scale the runnable weight for the group based on its runnable to load_avg ratio. The problem with this is it biases us towards tasks that never go to sleep. Tasks that go to sleep are going to have their runnable_load_avg decayed pretty hard, which will drastically reduce the runnable weight of groups with interactive tasks. To solve this imbalance we tweak this slightly, so in the ideal case it is still the above, but in the interactive case it is runnable_weight = shares * runnable_weight / load_weight which will make the weight distribution fairer between interactive and non-interactive groups. Signed-off-by: N Josef Bacik <jbacik@fb.com> Signed-off-by: N Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: kernel-team@fb.com Cc: linux-kernel@vger.kernel.org Cc: riel@redhat.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/1501773219-18774-2-git-send-email-jbacik@fb.comSigned-off-by: N Ingo Molnar <mingo@kernel.org>
2c8e4dce · Josef Bacik · Ingo Molnar · 9a2dd585 · 2c8e4dce
隐藏空白更改
内联并排

Showing with 33 addition and 12 deletion

kernel/sched/fair.c kernel/sched/fair.c +33 -12

未找到文件。
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2883,7 +2883,7 @@ void reweight_task(struct task_struct *p, int prio)
 *
 * hence icky!
 */
-static long calc_cfs_shares(struct cfs_rq *cfs_rq)
+static long calc_group_shares(struct cfs_rq *cfs_rq)
 {
 	long tg_weight, tg_shares, load, shares;
 	struct task_group *tg = cfs_rq->tg;
@@ -2920,6 +2920,36 @@ static long calc_cfs_shares(struct cfs_rq *cfs_rq)
 	 */
 	return clamp_t(long, shares, MIN_SHARES, tg_shares);
 }
+
+/*
+ * The runnable shares of this group are calculated as such
+ *
+ *          max(cfs_rq->avg.runnable_load_avg, cfs_rq->runnable_weight)
+ * shares * ------------------------------------------------------------
+ *               max(cfs_rq->avg.load_avg, cfs_rq->load.weight)
+ *
+ * We do this to keep the shares in line with expected load on the cfs_rq.
+ * Consider a cfs_rq that has several tasks wake up on this cfs_rq for the first
+ * time, it's runnable_load_avg is not going to be representative of the actual
+ * load this cfs_rq will now experience, which will bias us agaisnt this cfs_rq.
+ * The weight on the cfs_rq is the immediate effect of having new tasks
+ * enqueue'd onto it which should be used to calculate the new runnable shares.
+ * At the same time we need the actual load_avg to be the lower bounds for the
+ * calculation, to handle when our weight drops quickly from having entities
+ * dequeued.
+ */
+static long calc_group_runnable(struct cfs_rq *cfs_rq, long shares)
+{
+	long load_avg = max(cfs_rq->avg.load_avg,
+			    scale_load_down(cfs_rq->load.weight));
+	long runnable = max(cfs_rq->avg.runnable_load_avg,
+			    scale_load_down(cfs_rq->runnable_weight));
+
+	runnable *= shares;
+	if (load_avg)
+		runnable /= load_avg;
+	return clamp_t(long, runnable, MIN_SHARES, shares);
+}
 # endif /* CONFIG_SMP */

 static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
@@ -2945,17 +2975,8 @@ static void update_cfs_group(struct sched_entity *se)
 	if (likely(se->load.weight == shares))
 		return;
 #else
-	shares = calc_cfs_shares(gcfs_rq);
-	/*
-	 * The hierarchical runnable load metric is the proportional part
-	 * of this group's runnable_load_avg / load_avg.
-	 *
-	 * Note: we need to deal with very sporadic 'runnable > load' cases
-	 * due to numerical instability.
-	 */
-	runnable = shares * gcfs_rq->avg.runnable_load_avg;
-	if (runnable)
-		runnable /= max(gcfs_rq->avg.load_avg, gcfs_rq->avg.runnable_load_avg);
+	shares   = calc_group_shares(gcfs_rq);
+	runnable = calc_group_runnable(gcfs_rq, shares);
 #endif

 	reweight_entity(cfs_rq_of(se), se, shares, runnable);