From 95938a35c5562afa7af7252821e44132391a3db8 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Mon, 15 Oct 2007 17:00:14 +0200
Subject: [PATCH] sched: prevent wakeup over-scheduling

Prevent wakeup over-scheduling.  Once a task has been preempted by a
task of the same or lower priority, it becomes ineligible for repeated
preemption by same until it has been ticked, or slept.  Instead, the
task is marked for preemption at the next tick.  Tasks of higher
priority still preempt immediately.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 +
 kernel/sched.c        |  4 +++-
 kernel/sched_fair.c   | 14 +++++++++++---
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04233c8974d9..8be5b57768c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -912,6 +912,7 @@ struct sched_entity {
 	struct load_weight	load;		/* for load-balancing */
 	struct rb_node		run_node;
 	unsigned int		on_rq;
+	int			peer_preempt;
 
 	u64			exec_start;
 	u64			sum_exec_runtime;
diff --git a/kernel/sched.c b/kernel/sched.c
index 0bd8f2c0fb40..e8051bd59acb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -445,6 +445,7 @@ enum {
 	SCHED_FEAT_TREE_AVG             = 4,
 	SCHED_FEAT_APPROX_AVG           = 8,
 	SCHED_FEAT_WAKEUP_PREEMPT	= 16,
+	SCHED_FEAT_PREEMPT_RESTRICT	= 32,
 };
 
 const_debug unsigned int sysctl_sched_features =
@@ -452,7 +453,8 @@ const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_START_DEBIT		*1 |
 		SCHED_FEAT_TREE_AVG		*0 |
 		SCHED_FEAT_APPROX_AVG		*0 |
-		SCHED_FEAT_WAKEUP_PREEMPT	*1;
+		SCHED_FEAT_WAKEUP_PREEMPT	*1 |
+		SCHED_FEAT_PREEMPT_RESTRICT	*1;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3843ec71aad5..f819f943fb86 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -526,6 +526,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 
 	update_stats_dequeue(cfs_rq, se);
 	if (sleep) {
+		se->peer_preempt = 0;
 #ifdef CONFIG_SCHEDSTATS
 		if (entity_is_task(se)) {
 			struct task_struct *tsk = task_of(se);
@@ -553,8 +554,10 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 
 	ideal_runtime = sched_slice(cfs_rq, curr);
 	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
-	if (delta_exec > ideal_runtime)
+	if (delta_exec > ideal_runtime ||
+			(sched_feat(PREEMPT_RESTRICT) && curr->peer_preempt))
 		resched_task(rq_of(cfs_rq)->curr);
+	curr->peer_preempt = 0;
 }
 
 static void
@@ -839,8 +842,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
 		if (unlikely(se->load.weight != NICE_0_LOAD))
 			gran = calc_delta_fair(gran, &se->load);
 
-		if (delta > gran)
-			resched_task(curr);
+		if (delta > gran) {
+			int now = !sched_feat(PREEMPT_RESTRICT);
+
+			if (now || p->prio < curr->prio || !se->peer_preempt++)
+				resched_task(curr);
+		}
 	}
 }
 
@@ -1034,6 +1041,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 	check_spread(cfs_rq, curr);
 	__enqueue_entity(cfs_rq, se);
 	account_entity_enqueue(cfs_rq, se);
+	se->peer_preempt = 0;
 	resched_task(rq->curr);
 }
 
-- 
GitLab