From fbea24f5894e86a18b21bbee485e2a04cb233b3b Mon Sep 17 00:00:00 2001
From: Zhang Qiao <zhangqiao22@huawei.com>
Date: Fri, 16 Dec 2022 02:07:13 +0000
Subject: [PATCH] sched/qos: Don't unthrottle cfs_rq when cfs_rq is throttled
 by qos

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I64OUS
CVE: NA

-------------------------------

When a cfs_rq throttled by qos, mark cfs_rq->throttled as 1,
and cfs bw will unthrottled this cfs_rq by mistake, it cause
a list_del_valid warning.
So add macro QOS_THROTTLED(=2), when a cfs_rq is throttled by
qos, we mark the cfs_rq->throttled as QOS_THROTTLED, will check
the value of cfs_rq->throttled before unthrottle a cfs_rq.

Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
Reviewed-by: Chen Hui <judy.chenhui@huawei.com>
Reviewed-by: zheng zucheng <zhengzucheng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 kernel/sched/fair.c | 59 ++++++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a34ca843bf0a..0dba06ce0677 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -103,6 +103,13 @@ int __weak arch_asym_cpu_priority(int cpu)
 #endif
 
 #ifdef CONFIG_QOS_SCHED
+
+/*
+ * To distinguish cfs bw, use QOS_THROTTLED mark cfs_rq->throttled
+ * when qos throttled(and cfs bw throttle mark cfs_rq->throttled as 1).
+ */
+#define QOS_THROTTLED	2
+
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct list_head, qos_throttled_cfs_rq);
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct hrtimer, qos_overload_timer);
 static DEFINE_PER_CPU(int, qos_cpu_overload);
@@ -4649,6 +4656,14 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
 	se = cfs_rq->tg->se[cpu_of(rq)];
 
+#ifdef CONFIG_QOS_SCHED
+	/*
+	 * if this cfs_rq throttled by qos, not need unthrottle it.
+	 */
+	if (cfs_rq->throttled == QOS_THROTTLED)
+		return;
+#endif
+
 	cfs_rq->throttled = 0;
 
 	update_rq_clock(rq);
@@ -7066,37 +7081,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 #ifdef CONFIG_QOS_SCHED
 static void start_qos_hrtimer(int cpu);
 
-static int qos_tg_unthrottle_up(struct task_group *tg, void *data)
-{
-	struct rq *rq = data;
-	struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
-
-	cfs_rq->throttle_count--;
-
-	return 0;
-}
-
-static int qos_tg_throttle_down(struct task_group *tg, void *data)
-{
-	struct rq *rq = data;
-	struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
-
-	cfs_rq->throttle_count++;
-
-	return 0;
-}
-
 static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	struct rq *rq = rq_of(cfs_rq);
 	struct sched_entity *se;
 	long task_delta, idle_task_delta, dequeue = 1;
+	unsigned int prev_nr = rq->cfs.h_nr_running;
 
 	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
 
 	/* freeze hierarchy runnable averages while throttled */
 	rcu_read_lock();
-	walk_tg_tree_from(cfs_rq->tg, qos_tg_throttle_down, tg_nop, (void *)rq);
+	walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
 	rcu_read_unlock();
 
 	task_delta = cfs_rq->h_nr_running;
@@ -7118,12 +7114,14 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 
 	if (!se) {
 		sub_nr_running(rq, task_delta);
+		if (prev_nr >= 2 && prev_nr - task_delta < 2)
+			overload_clear(rq);
 	}
 
 	if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq))))
 		start_qos_hrtimer(cpu_of(rq));
 
-	cfs_rq->throttled = 1;
+	cfs_rq->throttled = QOS_THROTTLED;
 
 	list_add(&cfs_rq->qos_throttled_list,
 		 &per_cpu(qos_throttled_cfs_rq, cpu_of(rq)));
@@ -7135,8 +7133,11 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 	struct sched_entity *se;
 	int enqueue = 1;
 	long task_delta, idle_task_delta;
+	unsigned int prev_nr = rq->cfs.h_nr_running;
 
 	se = cfs_rq->tg->se[cpu_of(rq)];
+	if (cfs_rq->throttled != QOS_THROTTLED)
+		return;
 
 	cfs_rq->throttled = 0;
 
@@ -7145,7 +7146,7 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 
 	/* update hierarchical throttle state */
 	rcu_read_lock();
-	walk_tg_tree_from(cfs_rq->tg, tg_nop, qos_tg_unthrottle_up, (void *)rq);
+	walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
 	rcu_read_unlock();
 
 	if (!cfs_rq->load.weight)
@@ -7167,12 +7168,20 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 			break;
 	}
 
-	assert_list_leaf_cfs_rq(rq);
-
 	if (!se) {
 		add_nr_running(rq, task_delta);
+		if (prev_nr < 2 && prev_nr + task_delta >= 2)
+			overload_set(rq);
+	}
+
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+
+		list_add_leaf_cfs_rq(cfs_rq);
 	}
 
+	assert_list_leaf_cfs_rq(rq);
+
 	/* Determine whether we need to wake up potentially idle CPU: */
 	if (rq->curr == rq->idle && rq->cfs.nr_running)
 		resched_curr(rq);
-- 
GitLab