未验证 提交 f3014d9a 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!640 sched/fair: Introduce QOS_SMT_EXPELL priority reversion mechanism

Merge Pull Request from: @NNNNicole 
 
Here is the typical case that priority inversion will caused occasionally
by SMT expelling: Assuming that there are two SMT cores-cA and cB,
online tasks are running on cA while offline tasks on cB. With SMT
expelling, online task will drives off offline tasks to occupy all SMT
cores exclusively, which, in turn, will starve the offline task to release
the related resources other tasks with higher priority need.

Hence, this patch will introduce another mechanism to alleviate this
situation. For all offline tasks, one metric of profiling the maximum task
expelling duration is set up and the default value is 5 seconds, if
such offline task exsits, all offline tasks will be allowed to run into
one small sleep(msleep) loop in kernel before they goes into
usermode; and further, if the two SMT cores(such as cA and cB) are
idle or don't get any online tasks to run, for these offline tasks, they
will continue to run in usermode for the next schedule.

kernel options:
CONFIG_QOS_SCHED_SMT_EXPELLER=y 
 
Link:https://gitee.com/openeuler/kernel/pulls/640 

Reviewed-by: Zucheng Zheng <zhengzucheng@huawei.com> 
Reviewed-by: Liu Chao <liuchao173@huawei.com> 
Signed-off-by: Jialin Zhang <zhangjialin11@huawei.com> 
...@@ -140,7 +140,7 @@ CONFIG_CGROUP_V1_WRITEBACK=y ...@@ -140,7 +140,7 @@ CONFIG_CGROUP_V1_WRITEBACK=y
CONFIG_CGROUP_SCHED=y CONFIG_CGROUP_SCHED=y
CONFIG_QOS_SCHED=y CONFIG_QOS_SCHED=y
CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y
# CONFIG_QOS_SCHED_SMT_EXPELLER is not set CONFIG_QOS_SCHED_SMT_EXPELLER=y
CONFIG_FAIR_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y
CONFIG_QOS_SCHED_PRIO_LB=y CONFIG_QOS_SCHED_PRIO_LB=y
CONFIG_CFS_BANDWIDTH=y CONFIG_CFS_BANDWIDTH=y
......
...@@ -159,7 +159,7 @@ CONFIG_CGROUP_V1_WRITEBACK=y ...@@ -159,7 +159,7 @@ CONFIG_CGROUP_V1_WRITEBACK=y
CONFIG_CGROUP_SCHED=y CONFIG_CGROUP_SCHED=y
CONFIG_QOS_SCHED=y CONFIG_QOS_SCHED=y
CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y
# CONFIG_QOS_SCHED_SMT_EXPELLER is not set CONFIG_QOS_SCHED_SMT_EXPELLER=y
CONFIG_FAIR_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y
CONFIG_QOS_SCHED_PRIO_LB=y CONFIG_QOS_SCHED_PRIO_LB=y
CONFIG_CFS_BANDWIDTH=y CONFIG_CFS_BANDWIDTH=y
......
...@@ -137,6 +137,7 @@ static DEFINE_PER_CPU(int, qos_cpu_overload); ...@@ -137,6 +137,7 @@ static DEFINE_PER_CPU(int, qos_cpu_overload);
unsigned int sysctl_overload_detect_period = 5000; /* in ms */ unsigned int sysctl_overload_detect_period = 5000; /* in ms */
unsigned int sysctl_offline_wait_interval = 100; /* in ms */ unsigned int sysctl_offline_wait_interval = 100; /* in ms */
static int unthrottle_qos_cfs_rqs(int cpu); static int unthrottle_qos_cfs_rqs(int cpu);
static bool qos_smt_expelled(int this_cpu);
#endif #endif
#ifdef CONFIG_QOS_SCHED_PRIO_LB #ifdef CONFIG_QOS_SCHED_PRIO_LB
...@@ -4886,6 +4887,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -4886,6 +4887,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
struct sched_entity *se; struct sched_entity *se;
long task_delta, idle_task_delta, dequeue = 1; long task_delta, idle_task_delta, dequeue = 1;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
long qos_idle_delta;
#endif
raw_spin_lock(&cfs_b->lock); raw_spin_lock(&cfs_b->lock);
/* This will start the period timer if necessary */ /* This will start the period timer if necessary */
...@@ -4917,6 +4921,10 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -4917,6 +4921,10 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
task_delta = cfs_rq->h_nr_running; task_delta = cfs_rq->h_nr_running;
idle_task_delta = cfs_rq->idle_h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
for_each_sched_entity(se) { for_each_sched_entity(se) {
struct cfs_rq *qcfs_rq = cfs_rq_of(se); struct cfs_rq *qcfs_rq = cfs_rq_of(se);
/* throttled entity or throttle-on-deactivate */ /* throttled entity or throttle-on-deactivate */
...@@ -4932,6 +4940,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -4932,6 +4940,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
qcfs_rq->h_nr_running -= task_delta; qcfs_rq->h_nr_running -= task_delta;
qcfs_rq->idle_h_nr_running -= idle_task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta;
#endif
if (qcfs_rq->load.weight) if (qcfs_rq->load.weight)
dequeue = 0; dequeue = 0;
...@@ -4959,6 +4970,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -4959,6 +4970,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
struct sched_entity *se; struct sched_entity *se;
long task_delta, idle_task_delta; long task_delta, idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
long qos_idle_delta;
#endif
se = cfs_rq->tg->se[cpu_of(rq)]; se = cfs_rq->tg->se[cpu_of(rq)];
...@@ -4987,6 +5001,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -4987,6 +5001,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
task_delta = cfs_rq->h_nr_running; task_delta = cfs_rq->h_nr_running;
idle_task_delta = cfs_rq->idle_h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
for_each_sched_entity(se) { for_each_sched_entity(se) {
if (se->on_rq) if (se->on_rq)
break; break;
...@@ -4995,6 +5012,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -4995,6 +5012,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
cfs_rq->h_nr_running += task_delta; cfs_rq->h_nr_running += task_delta;
cfs_rq->idle_h_nr_running += idle_task_delta; cfs_rq->idle_h_nr_running += idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
/* end evaluation on encountering a throttled cfs_rq */ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
...@@ -5009,7 +5029,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -5009,7 +5029,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
cfs_rq->h_nr_running += task_delta; cfs_rq->h_nr_running += task_delta;
cfs_rq->idle_h_nr_running += idle_task_delta; cfs_rq->idle_h_nr_running += idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
/* end evaluation on encountering a throttled cfs_rq */ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
...@@ -5643,6 +5665,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5643,6 +5665,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se; struct sched_entity *se = &p->se;
int idle_h_nr_running = task_has_idle_policy(p); int idle_h_nr_running = task_has_idle_policy(p);
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
int qos_idle_h_nr_running = task_has_qos_idle_policy(p);
#endif
int task_new = !(flags & ENQUEUE_WAKEUP); int task_new = !(flags & ENQUEUE_WAKEUP);
unsigned int prev_nr = rq->cfs.h_nr_running; unsigned int prev_nr = rq->cfs.h_nr_running;
...@@ -5670,6 +5695,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5670,6 +5695,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running++; cfs_rq->h_nr_running++;
cfs_rq->idle_h_nr_running += idle_h_nr_running; cfs_rq->idle_h_nr_running += idle_h_nr_running;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
cfs_rq->qos_idle_h_nr_running += qos_idle_h_nr_running;
#endif
/* end evaluation on encountering a throttled cfs_rq */ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
...@@ -5687,6 +5715,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5687,6 +5715,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running++; cfs_rq->h_nr_running++;
cfs_rq->idle_h_nr_running += idle_h_nr_running; cfs_rq->idle_h_nr_running += idle_h_nr_running;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
cfs_rq->qos_idle_h_nr_running += qos_idle_h_nr_running;
#endif
/* end evaluation on encountering a throttled cfs_rq */ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
...@@ -5761,6 +5792,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5761,6 +5792,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct sched_entity *se = &p->se; struct sched_entity *se = &p->se;
int task_sleep = flags & DEQUEUE_SLEEP; int task_sleep = flags & DEQUEUE_SLEEP;
int idle_h_nr_running = task_has_idle_policy(p); int idle_h_nr_running = task_has_idle_policy(p);
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
int qos_idle_h_nr_running = task_has_qos_idle_policy(p);
#endif
unsigned int prev_nr = rq->cfs.h_nr_running; unsigned int prev_nr = rq->cfs.h_nr_running;
bool was_sched_idle = sched_idle_rq(rq); bool was_sched_idle = sched_idle_rq(rq);
...@@ -5772,6 +5806,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5772,6 +5806,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running--; cfs_rq->h_nr_running--;
cfs_rq->idle_h_nr_running -= idle_h_nr_running; cfs_rq->idle_h_nr_running -= idle_h_nr_running;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
cfs_rq->qos_idle_h_nr_running -= qos_idle_h_nr_running;
#endif
/* end evaluation on encountering a throttled cfs_rq */ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
...@@ -5801,6 +5838,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5801,6 +5838,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running--; cfs_rq->h_nr_running--;
cfs_rq->idle_h_nr_running -= idle_h_nr_running; cfs_rq->idle_h_nr_running -= idle_h_nr_running;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
cfs_rq->qos_idle_h_nr_running -= qos_idle_h_nr_running;
#endif
/* end evaluation on encountering a throttled cfs_rq */ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
...@@ -7465,6 +7505,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ ...@@ -7465,6 +7505,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
} }
#ifdef CONFIG_QOS_SCHED #ifdef CONFIG_QOS_SCHED
static inline bool qos_timer_is_activated(int cpu)
{
return hrtimer_active(per_cpu_ptr(&qos_overload_timer, cpu));
}
static inline void cancel_qos_timer(int cpu)
{
hrtimer_cancel(per_cpu_ptr(&qos_overload_timer, cpu));
}
static inline bool is_offline_task(struct task_struct *p) static inline bool is_offline_task(struct task_struct *p)
{ {
...@@ -7479,7 +7528,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -7479,7 +7528,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
struct sched_entity *se; struct sched_entity *se;
unsigned int prev_nr = cfs_rq->h_nr_running; unsigned int prev_nr = cfs_rq->h_nr_running;
long task_delta, idle_task_delta, dequeue = 1; long task_delta, idle_task_delta, dequeue = 1;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
long qos_idle_delta;
#endif
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
/* freeze hierarchy runnable averages while throttled */ /* freeze hierarchy runnable averages while throttled */
...@@ -7489,6 +7540,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -7489,6 +7540,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
task_delta = cfs_rq->h_nr_running; task_delta = cfs_rq->h_nr_running;
idle_task_delta = cfs_rq->idle_h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
for_each_sched_entity(se) { for_each_sched_entity(se) {
struct cfs_rq *qcfs_rq = cfs_rq_of(se); struct cfs_rq *qcfs_rq = cfs_rq_of(se);
/* throttled entity or throttle-on-deactivate */ /* throttled entity or throttle-on-deactivate */
...@@ -7504,6 +7558,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -7504,6 +7558,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
qcfs_rq->h_nr_running -= task_delta; qcfs_rq->h_nr_running -= task_delta;
qcfs_rq->idle_h_nr_running -= idle_task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta;
#endif
if (qcfs_rq->load.weight) if (qcfs_rq->load.weight)
dequeue = 0; dequeue = 0;
...@@ -7516,7 +7573,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -7516,7 +7573,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
} }
if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq)))) if (!qos_timer_is_activated(cpu_of(rq)))
start_qos_hrtimer(cpu_of(rq)); start_qos_hrtimer(cpu_of(rq));
cfs_rq->throttled = QOS_THROTTLED; cfs_rq->throttled = QOS_THROTTLED;
...@@ -7531,6 +7588,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -7531,6 +7588,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
struct sched_entity *se; struct sched_entity *se;
unsigned int prev_nr = cfs_rq->h_nr_running; unsigned int prev_nr = cfs_rq->h_nr_running;
long task_delta, idle_task_delta; long task_delta, idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
long qos_idle_delta;
#endif
se = cfs_rq->tg->se[cpu_of(rq)]; se = cfs_rq->tg->se[cpu_of(rq)];
...@@ -7552,6 +7612,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -7552,6 +7612,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
task_delta = cfs_rq->h_nr_running; task_delta = cfs_rq->h_nr_running;
idle_task_delta = cfs_rq->idle_h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
for_each_sched_entity(se) { for_each_sched_entity(se) {
if (se->on_rq) if (se->on_rq)
break; break;
...@@ -7561,6 +7624,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -7561,6 +7624,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
cfs_rq->h_nr_running += task_delta; cfs_rq->h_nr_running += task_delta;
cfs_rq->idle_h_nr_running += idle_task_delta; cfs_rq->idle_h_nr_running += idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
goto unthrottle_throttle; goto unthrottle_throttle;
...@@ -7574,7 +7640,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -7574,7 +7640,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
cfs_rq->h_nr_running += task_delta; cfs_rq->h_nr_running += task_delta;
cfs_rq->idle_h_nr_running += idle_task_delta; cfs_rq->idle_h_nr_running += idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
/* end evaluation on encountering a throttled cfs_rq */ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
goto unthrottle_throttle; goto unthrottle_throttle;
...@@ -7605,10 +7673,6 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -7605,10 +7673,6 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
} }
assert_list_leaf_cfs_rq(rq); assert_list_leaf_cfs_rq(rq);
/* Determine whether we need to wake up potentially idle CPU: */
if (rq->curr == rq->idle && rq->cfs.nr_running)
resched_curr(rq);
} }
static int __unthrottle_qos_cfs_rqs(int cpu) static int __unthrottle_qos_cfs_rqs(int cpu)
...@@ -7630,10 +7694,10 @@ static int __unthrottle_qos_cfs_rqs(int cpu) ...@@ -7630,10 +7694,10 @@ static int __unthrottle_qos_cfs_rqs(int cpu)
static int unthrottle_qos_cfs_rqs(int cpu) static int unthrottle_qos_cfs_rqs(int cpu)
{ {
int res; int res;
res = __unthrottle_qos_cfs_rqs(cpu); res = __unthrottle_qos_cfs_rqs(cpu);
if (res)
hrtimer_cancel(&(per_cpu(qos_overload_timer, cpu))); if (qos_timer_is_activated(cpu) && !qos_smt_expelled(cpu))
cancel_qos_timer(cpu);
return res; return res;
} }
...@@ -7686,8 +7750,13 @@ static enum hrtimer_restart qos_overload_timer_handler(struct hrtimer *timer) ...@@ -7686,8 +7750,13 @@ static enum hrtimer_restart qos_overload_timer_handler(struct hrtimer *timer)
struct rq *rq = this_rq(); struct rq *rq = this_rq();
rq_lock_irqsave(rq, &rf); rq_lock_irqsave(rq, &rf);
if (__unthrottle_qos_cfs_rqs(smp_processor_id())) __unthrottle_qos_cfs_rqs(smp_processor_id());
__this_cpu_write(qos_cpu_overload, 1); __this_cpu_write(qos_cpu_overload, 1);
/* Determine whether we need to wake up potentially idle CPU. */
if (rq->curr == rq->idle && rq->cfs.nr_running)
resched_curr(rq);
rq_unlock_irqrestore(rq, &rf); rq_unlock_irqrestore(rq, &rf);
return HRTIMER_NORESTART; return HRTIMER_NORESTART;
...@@ -7727,9 +7796,24 @@ static void qos_schedule_throttle(struct task_struct *p) ...@@ -7727,9 +7796,24 @@ static void qos_schedule_throttle(struct task_struct *p)
} }
} }
#ifndef CONFIG_QOS_SCHED_SMT_EXPELLER
static bool qos_smt_expelled(int this_cpu)
{
return false;
}
#endif
#endif #endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
DEFINE_STATIC_KEY_TRUE(qos_smt_expell_switch);
static int __init qos_sched_smt_noexpell_setup(char *__unused)
{
static_branch_disable(&qos_smt_expell_switch);
return 1;
}
__setup("nosmtexpell", qos_sched_smt_noexpell_setup);
static bool qos_smt_check_siblings_status(int this_cpu) static bool qos_smt_check_siblings_status(int this_cpu)
{ {
int cpu; int cpu;
...@@ -7748,14 +7832,25 @@ static bool qos_smt_check_siblings_status(int this_cpu) ...@@ -7748,14 +7832,25 @@ static bool qos_smt_check_siblings_status(int this_cpu)
return false; return false;
} }
static bool qos_sched_idle_cpu(int this_cpu)
{
struct rq *rq = cpu_rq(this_cpu);
return unlikely(rq->nr_running == rq->cfs.qos_idle_h_nr_running &&
rq->nr_running);
}
static bool qos_smt_expelled(int this_cpu) static bool qos_smt_expelled(int this_cpu)
{ {
if (!static_branch_likely(&qos_smt_expell_switch))
return false;
/* /*
* The qos_smt_status of siblings cpu is online, and current cpu only has * The qos_smt_status of siblings cpu is online, and current cpu only has
* offline tasks enqueued, there is not suitable task, * offline tasks enqueued, there is not suitable task,
* so pick_next_task_fair return null. * so pick_next_task_fair return null.
*/ */
if (qos_smt_check_siblings_status(this_cpu) && sched_idle_cpu(this_cpu)) if (qos_smt_check_siblings_status(this_cpu) && qos_sched_idle_cpu(this_cpu))
return true; return true;
return false; return false;
...@@ -7806,15 +7901,29 @@ static void qos_smt_send_ipi(int this_cpu) ...@@ -7806,15 +7901,29 @@ static void qos_smt_send_ipi(int this_cpu)
static void qos_smt_expel(int this_cpu, struct task_struct *p) static void qos_smt_expel(int this_cpu, struct task_struct *p)
{ {
if (!static_branch_likely(&qos_smt_expell_switch))
return;
if (qos_smt_update_status(p)) if (qos_smt_update_status(p))
qos_smt_send_ipi(this_cpu); qos_smt_send_ipi(this_cpu);
} }
static inline bool qos_smt_enabled(void)
{
if (!static_branch_likely(&qos_smt_expell_switch))
return false;
if (!sched_smt_active())
return false;
return true;
}
static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq) static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq)
{ {
int cpu; int cpu;
if (!sched_smt_active()) if (!qos_smt_enabled())
return false; return false;
for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { for_each_cpu(cpu, cpu_smt_mask(this_cpu)) {
...@@ -7835,7 +7944,7 @@ static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq) ...@@ -7835,7 +7944,7 @@ static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq)
} }
if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_OFFLINE && if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_OFFLINE &&
rq->curr == rq->idle && sched_idle_cpu(this_cpu)) { rq->curr == rq->idle && qos_sched_idle_cpu(this_cpu)) {
trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu)); trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu));
return true; return true;
} }
...@@ -7906,8 +8015,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf ...@@ -7906,8 +8015,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
again: again:
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
if (qos_smt_expelled(this_cpu)) { if (qos_smt_expelled(this_cpu) && !__this_cpu_read(qos_cpu_overload)) {
__this_cpu_write(qos_smt_status, QOS_LEVEL_OFFLINE); __this_cpu_write(qos_smt_status, QOS_LEVEL_OFFLINE);
if (!qos_timer_is_activated(this_cpu))
start_qos_hrtimer(this_cpu);
schedstat_inc(rq->curr->se.statistics.nr_qos_smt_expelled); schedstat_inc(rq->curr->se.statistics.nr_qos_smt_expelled);
trace_sched_qos_smt_expelled(rq->curr, per_cpu(qos_smt_status, this_cpu)); trace_sched_qos_smt_expelled(rq->curr, per_cpu(qos_smt_status, this_cpu));
return NULL; return NULL;
...@@ -8119,7 +8232,8 @@ done: __maybe_unused; ...@@ -8119,7 +8232,8 @@ done: __maybe_unused;
goto again; goto again;
} }
__this_cpu_write(qos_cpu_overload, 0); if (!qos_smt_expelled(cpu_of(rq)))
__this_cpu_write(qos_cpu_overload, 0);
#endif #endif
/* /*
* rq is about to be idle, check if we need to update the * rq is about to be idle, check if we need to update the
......
...@@ -650,6 +650,12 @@ struct cfs_rq { ...@@ -650,6 +650,12 @@ struct cfs_rq {
unsigned int forceidle_seq; unsigned int forceidle_seq;
KABI_FILL_HOLE(unsigned int kabi_hole) KABI_FILL_HOLE(unsigned int kabi_hole)
u64 min_vruntime_fi; u64 min_vruntime_fi;
#elif defined CONFIG_QOS_SCHED_SMT_EXPELLER && !defined(__GENKSYMS__)
union {
unsigned int qos_idle_h_nr_running; /* qos_level:-1 */
unsigned long qos_idle_h_nr_running_padding;
};
KABI_FILL_HOLE(unsigned long kabi_hole)
#else #else
KABI_RESERVE(3) KABI_RESERVE(3)
KABI_RESERVE(4) KABI_RESERVE(4)
...@@ -3028,5 +3034,19 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) ...@@ -3028,5 +3034,19 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
} }
#endif #endif
#ifdef CONFIG_QOS_SCHED
static inline int qos_idle_policy(int policy)
{
return policy == QOS_LEVEL_OFFLINE;
}
#endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
static inline int task_has_qos_idle_policy(struct task_struct *p)
{
return qos_idle_policy(task_group(p)->qos_level) && p->policy == SCHED_IDLE;
}
#endif
void swake_up_all_locked(struct swait_queue_head *q); void swake_up_all_locked(struct swait_queue_head *q);
void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册