sched: Throttle qos cfs_rq when current cpu is running online task

hulk inclusion category: feature bugzilla: 51828, https://gitee.com/openeuler/kernel/issues/I4K96G CVE: NA -------------------------------- In a co-location scenario, we usually deploy online and offline task groups in the same server. The online tasks are more important than offline tasks. And to avoid offline tasks affects online tasks, we will throttle the offline tasks group when some online task groups running in the same cpu and unthrottle offline tasks when the cpu is about to enter idle state. Signed-off-by: N Zhang Qiao <zhangqiao22@huawei.com> Signed-off-by: N Zheng Zucheng <zhengzucheng@huawei.com> Reviewed-by: N Chen Hui <judy.chenhui@huawei.com> Reviewed-by: N Xiu Jianfeng <xiujianfeng@huawei.com> Signed-off-by: N Yang Yingliang <yangyingliang@huawei.com>

sched: Throttle qos cfs_rq when current cpu is running online task
hulk inclusion category: feature bugzilla: 51828, https://gitee.com/openeuler/kernel/issues/I4K96G CVE: NA -------------------------------- In a co-location scenario, we usually deploy online and offline task groups in the same server. The online tasks are more important than offline tasks. And to avoid offline tasks affects online tasks, we will throttle the offline tasks group when some online task groups running in the same cpu and unthrottle offline tasks when the cpu is about to enter idle state. Signed-off-by: N Zhang Qiao <zhangqiao22@huawei.com> Signed-off-by: N Zheng Zucheng <zhengzucheng@huawei.com> Reviewed-by: N Chen Hui <judy.chenhui@huawei.com> Reviewed-by: N Xiu Jianfeng <xiujianfeng@huawei.com> Signed-off-by: N Yang Yingliang <yangyingliang@huawei.com>
b0d97ae0 · Zhang Qiao · Yang Yingliang · 2666666a · b0d97ae0 · b0d97ae0
隐藏空白更改
内联并排

Showing with 148 addition and 0 deletion

kernel/sched/core.c kernel/sched/core.c +2 -0

kernel/sched/fair.c kernel/sched/fair.c +146 -0

未找到文件。
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6946,8 +6946,10 @@ static int cpu_qos_write(struct cgroup_subsys_state *css,

 	if (qos_level == -1) {
 		policy = SCHED_IDLE;
+		cfs_bandwidth_usage_inc();
 	} else {
 		policy = SCHED_NORMAL;
+		cfs_bandwidth_usage_dec();
 	}

 	tg->qos_level = qos_level;

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -99,6 +99,10 @@ int __weak arch_asym_cpu_priority(int cpu)
 }
 #endif

+#ifdef CONFIG_QOS_SCHED
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct list_head, qos_throttled_cfs_rq);
+#endif
+
 #ifdef CONFIG_CFS_BANDWIDTH
 /*
 * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
@@ -6868,6 +6872,128 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 		set_last_buddy(se);
 }

+#ifdef CONFIG_QOS_SCHED
+static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
+{
+	struct rq *rq = rq_of(cfs_rq);
+	struct sched_entity *se;
+	long task_delta, idle_task_delta, dequeue = 1;
+
+	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
+
+	/* freeze hierarchy runnable averages while throttled */
+	rcu_read_lock();
+	walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
+	rcu_read_unlock();
+
+	task_delta = cfs_rq->h_nr_running;
+	idle_task_delta = cfs_rq->idle_h_nr_running;
+	for_each_sched_entity(se) {
+		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+		/* throttled entity or throttle-on-deactivate */
+		if (!se->on_rq)
+			break;
+
+		if (dequeue)
+			dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
+		qcfs_rq->h_nr_running -= task_delta;
+		qcfs_rq->idle_h_nr_running -= idle_task_delta;
+
+		if (qcfs_rq->load.weight)
+			dequeue = 0;
+	}
+
+	if (!se) {
+		sub_nr_running(rq, task_delta);
+	}
+
+	cfs_rq->throttled = 1;
+	cfs_rq->throttled_clock = rq_clock(rq);
+
+	list_add(&cfs_rq->throttled_list, &per_cpu(qos_throttled_cfs_rq, cpu_of(rq)));
+}
+
+static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
+{
+	struct rq *rq = rq_of(cfs_rq);
+	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+	struct sched_entity *se;
+	int enqueue = 1;
+	long task_delta, idle_task_delta;
+
+	se = cfs_rq->tg->se[cpu_of(rq)];
+
+	cfs_rq->throttled = 0;
+
+	update_rq_clock(rq);
+
+	cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+	list_del_init(&cfs_rq->throttled_list);
+
+	/* update hierarchical throttle state */
+	walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
+
+	if (!cfs_rq->load.weight)
+		return;
+
+	task_delta = cfs_rq->h_nr_running;
+	idle_task_delta = cfs_rq->idle_h_nr_running;
+	for_each_sched_entity(se) {
+		if (se->on_rq)
+			enqueue = 0;
+
+		cfs_rq = cfs_rq_of(se);
+		if (enqueue)
+			enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
+		cfs_rq->h_nr_running += task_delta;
+		cfs_rq->idle_h_nr_running += idle_task_delta;
+
+		if (cfs_rq_throttled(cfs_rq))
+			break;
+	}
+
+	assert_list_leaf_cfs_rq(rq);
+
+	if (!se) {
+		add_nr_running(rq, task_delta);
+	}
+
+	/* Determine whether we need to wake up potentially idle CPU: */
+	if (rq->curr == rq->idle && rq->cfs.nr_running)
+		resched_curr(rq);
+}
+
+static int unthrottle_qos_cfs_rqs(int cpu)
+{
+	struct cfs_rq *cfs_rq, *tmp_rq;
+	int res = 0;
+
+	list_for_each_entry_safe(cfs_rq, tmp_rq, &per_cpu(qos_throttled_cfs_rq, cpu),
+				throttled_list) {
+		if (cfs_rq_throttled(cfs_rq)) {
+			unthrottle_qos_cfs_rq(cfs_rq);
+			res++;
+		}
+	}
+
+	return res;
+}
+
+static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq)
+{
+	if (!cfs_bandwidth_used())
+		return false;
+
+	if (cfs_rq && cfs_rq->tg->qos_level < 0 &&
+	    !sched_idle_cpu(cpu_of(rq_of(cfs_rq)))) {
+		throttle_qos_cfs_rq(cfs_rq);
+		return true;
+	}
+
+	return false;
+}
+#endif
+
 static struct task_struct *
 pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
@@ -6926,6 +7052,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf

 		se = pick_next_entity(cfs_rq, curr);
 		cfs_rq = group_cfs_rq(se);
+#ifdef CONFIG_QOS_SCHED
+		if (check_qos_cfs_rq(cfs_rq)) {
+			cfs_rq = &rq->cfs;
+			BUG_ON(cfs_rq->nr_running == 0);
+		}
+#endif
 	} while (cfs_rq);

 	p = task_of(se);
@@ -7015,6 +7147,12 @@ done: __maybe_unused;
 	if (new_tasks > 0)
 		goto again;

+#ifdef CONFIG_QOS_SCHED
+	if (unthrottle_qos_cfs_rqs(cpu_of(rq))) {
+		goto again;
+	}
+#endif
+
 	return NULL;
 }

@@ -10688,6 +10826,14 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)

 __init void init_sched_fair_class(void)
 {
+#ifdef CONFIG_QOS_SCHED
+	int i;
+
+	for_each_possible_cpu(i) {
+		INIT_LIST_HEAD(&per_cpu(qos_throttled_cfs_rq, i));
+	}
+#endif
+
 #ifdef CONFIG_SMP
 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);