diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 02f5281eb0184aca38bfb34a818746d2ae84bb8d..1eb5b7a052f1daaff0543a10fc05277c7e4f577b 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -140,7 +140,7 @@ CONFIG_CGROUP_V1_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_QOS_SCHED=y CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y -# CONFIG_QOS_SCHED_SMT_EXPELLER is not set +CONFIG_QOS_SCHED_SMT_EXPELLER=y CONFIG_FAIR_GROUP_SCHED=y CONFIG_QOS_SCHED_PRIO_LB=y CONFIG_CFS_BANDWIDTH=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 56f5f9845f9cf021f8c957c6e83f47efb6d8ef6b..11323cdc33014d7be4ffb98b86e8fce77704a594 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -159,7 +159,7 @@ CONFIG_CGROUP_V1_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_QOS_SCHED=y CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y -# CONFIG_QOS_SCHED_SMT_EXPELLER is not set +CONFIG_QOS_SCHED_SMT_EXPELLER=y CONFIG_FAIR_GROUP_SCHED=y CONFIG_QOS_SCHED_PRIO_LB=y CONFIG_CFS_BANDWIDTH=y diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a28eb51b3c724b24b2328a09176f89eac4b421c3..32ef2dab67080f9cba417574454fafa81ca92ad1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -137,6 +137,7 @@ static DEFINE_PER_CPU(int, qos_cpu_overload); unsigned int sysctl_overload_detect_period = 5000; /* in ms */ unsigned int sysctl_offline_wait_interval = 100; /* in ms */ static int unthrottle_qos_cfs_rqs(int cpu); +static bool qos_smt_expelled(int this_cpu); #endif #ifdef CONFIG_QOS_SCHED_PRIO_LB @@ -4886,6 +4887,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta, dequeue = 1; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + long qos_idle_delta; +#endif raw_spin_lock(&cfs_b->lock); /* This will start the period timer if necessary */ @@ -4917,6 +4921,10 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_idle_delta = cfs_rq->qos_idle_h_nr_running; +#endif + for_each_sched_entity(se) { struct cfs_rq *qcfs_rq = cfs_rq_of(se); /* throttled entity or throttle-on-deactivate */ @@ -4932,6 +4940,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running -= task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta; +#endif if (qcfs_rq->load.weight) dequeue = 0; @@ -4959,6 +4970,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + long qos_idle_delta; +#endif se = cfs_rq->tg->se[cpu_of(rq)]; @@ -4987,6 +5001,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_idle_delta = cfs_rq->qos_idle_h_nr_running; +#endif for_each_sched_entity(se) { if (se->on_rq) break; @@ -4995,6 +5012,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->h_nr_running += task_delta; cfs_rq->idle_h_nr_running += idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running += qos_idle_delta; +#endif /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) @@ -5009,7 +5029,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->h_nr_running += task_delta; cfs_rq->idle_h_nr_running += idle_task_delta; - +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running += qos_idle_delta; +#endif /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) @@ -5643,6 +5665,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; int idle_h_nr_running = task_has_idle_policy(p); +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + int qos_idle_h_nr_running = task_has_qos_idle_policy(p); +#endif int task_new = !(flags & ENQUEUE_WAKEUP); unsigned int prev_nr = rq->cfs.h_nr_running; @@ -5670,6 +5695,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running++; cfs_rq->idle_h_nr_running += idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running += qos_idle_h_nr_running; +#endif /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) @@ -5687,6 +5715,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running++; cfs_rq->idle_h_nr_running += idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running += qos_idle_h_nr_running; +#endif /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) @@ -5761,6 +5792,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int task_sleep = flags & DEQUEUE_SLEEP; int idle_h_nr_running = task_has_idle_policy(p); +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + int qos_idle_h_nr_running = task_has_qos_idle_policy(p); +#endif unsigned int prev_nr = rq->cfs.h_nr_running; bool was_sched_idle = sched_idle_rq(rq); @@ -5772,6 +5806,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running--; cfs_rq->idle_h_nr_running -= idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running -= qos_idle_h_nr_running; +#endif /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) @@ -5801,6 +5838,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running--; cfs_rq->idle_h_nr_running -= idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running -= qos_idle_h_nr_running; +#endif /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) @@ -7465,6 +7505,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ } #ifdef CONFIG_QOS_SCHED +static inline bool qos_timer_is_activated(int cpu) +{ + return hrtimer_active(per_cpu_ptr(&qos_overload_timer, cpu)); +} + +static inline void cancel_qos_timer(int cpu) +{ + hrtimer_cancel(per_cpu_ptr(&qos_overload_timer, cpu)); +} static inline bool is_offline_task(struct task_struct *p) { @@ -7479,7 +7528,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) struct sched_entity *se; unsigned int prev_nr = cfs_rq->h_nr_running; long task_delta, idle_task_delta, dequeue = 1; - +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + long qos_idle_delta; +#endif se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; /* freeze hierarchy runnable averages while throttled */ @@ -7489,6 +7540,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_idle_delta = cfs_rq->qos_idle_h_nr_running; +#endif for_each_sched_entity(se) { struct cfs_rq *qcfs_rq = cfs_rq_of(se); /* throttled entity or throttle-on-deactivate */ @@ -7504,6 +7558,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running -= task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta; +#endif if (qcfs_rq->load.weight) dequeue = 0; @@ -7516,7 +7573,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) } - if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq)))) + if (!qos_timer_is_activated(cpu_of(rq))) start_qos_hrtimer(cpu_of(rq)); cfs_rq->throttled = QOS_THROTTLED; @@ -7531,6 +7588,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) struct sched_entity *se; unsigned int prev_nr = cfs_rq->h_nr_running; long task_delta, idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + long qos_idle_delta; +#endif se = cfs_rq->tg->se[cpu_of(rq)]; @@ -7552,6 +7612,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_idle_delta = cfs_rq->qos_idle_h_nr_running; +#endif for_each_sched_entity(se) { if (se->on_rq) break; @@ -7561,6 +7624,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->h_nr_running += task_delta; cfs_rq->idle_h_nr_running += idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running += qos_idle_delta; +#endif if (cfs_rq_throttled(cfs_rq)) goto unthrottle_throttle; @@ -7574,7 +7640,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->h_nr_running += task_delta; cfs_rq->idle_h_nr_running += idle_task_delta; - +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running += qos_idle_delta; +#endif /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) goto unthrottle_throttle; @@ -7605,10 +7673,6 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) } assert_list_leaf_cfs_rq(rq); - - /* Determine whether we need to wake up potentially idle CPU: */ - if (rq->curr == rq->idle && rq->cfs.nr_running) - resched_curr(rq); } static int __unthrottle_qos_cfs_rqs(int cpu) @@ -7630,10 +7694,10 @@ static int __unthrottle_qos_cfs_rqs(int cpu) static int unthrottle_qos_cfs_rqs(int cpu) { int res; - res = __unthrottle_qos_cfs_rqs(cpu); - if (res) - hrtimer_cancel(&(per_cpu(qos_overload_timer, cpu))); + + if (qos_timer_is_activated(cpu) && !qos_smt_expelled(cpu)) + cancel_qos_timer(cpu); return res; } @@ -7686,8 +7750,13 @@ static enum hrtimer_restart qos_overload_timer_handler(struct hrtimer *timer) struct rq *rq = this_rq(); rq_lock_irqsave(rq, &rf); - if (__unthrottle_qos_cfs_rqs(smp_processor_id())) - __this_cpu_write(qos_cpu_overload, 1); + __unthrottle_qos_cfs_rqs(smp_processor_id()); + __this_cpu_write(qos_cpu_overload, 1); + + /* Determine whether we need to wake up potentially idle CPU. */ + if (rq->curr == rq->idle && rq->cfs.nr_running) + resched_curr(rq); + rq_unlock_irqrestore(rq, &rf); return HRTIMER_NORESTART; @@ -7727,9 +7796,24 @@ static void qos_schedule_throttle(struct task_struct *p) } } +#ifndef CONFIG_QOS_SCHED_SMT_EXPELLER +static bool qos_smt_expelled(int this_cpu) +{ + return false; +} +#endif #endif #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +DEFINE_STATIC_KEY_TRUE(qos_smt_expell_switch); + +static int __init qos_sched_smt_noexpell_setup(char *__unused) +{ + static_branch_disable(&qos_smt_expell_switch); + return 1; +} +__setup("nosmtexpell", qos_sched_smt_noexpell_setup); + static bool qos_smt_check_siblings_status(int this_cpu) { int cpu; @@ -7748,14 +7832,25 @@ static bool qos_smt_check_siblings_status(int this_cpu) return false; } +static bool qos_sched_idle_cpu(int this_cpu) +{ + struct rq *rq = cpu_rq(this_cpu); + + return unlikely(rq->nr_running == rq->cfs.qos_idle_h_nr_running && + rq->nr_running); +} + static bool qos_smt_expelled(int this_cpu) { + if (!static_branch_likely(&qos_smt_expell_switch)) + return false; + /* * The qos_smt_status of siblings cpu is online, and current cpu only has * offline tasks enqueued, there is not suitable task, * so pick_next_task_fair return null. */ - if (qos_smt_check_siblings_status(this_cpu) && sched_idle_cpu(this_cpu)) + if (qos_smt_check_siblings_status(this_cpu) && qos_sched_idle_cpu(this_cpu)) return true; return false; @@ -7806,15 +7901,29 @@ static void qos_smt_send_ipi(int this_cpu) static void qos_smt_expel(int this_cpu, struct task_struct *p) { + if (!static_branch_likely(&qos_smt_expell_switch)) + return; + if (qos_smt_update_status(p)) qos_smt_send_ipi(this_cpu); } +static inline bool qos_smt_enabled(void) +{ + if (!static_branch_likely(&qos_smt_expell_switch)) + return false; + + if (!sched_smt_active()) + return false; + + return true; +} + static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq) { int cpu; - if (!sched_smt_active()) + if (!qos_smt_enabled()) return false; for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { @@ -7835,7 +7944,7 @@ static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq) } if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_OFFLINE && - rq->curr == rq->idle && sched_idle_cpu(this_cpu)) { + rq->curr == rq->idle && qos_sched_idle_cpu(this_cpu)) { trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu)); return true; } @@ -7906,8 +8015,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf again: #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER - if (qos_smt_expelled(this_cpu)) { + if (qos_smt_expelled(this_cpu) && !__this_cpu_read(qos_cpu_overload)) { __this_cpu_write(qos_smt_status, QOS_LEVEL_OFFLINE); + + if (!qos_timer_is_activated(this_cpu)) + start_qos_hrtimer(this_cpu); + schedstat_inc(rq->curr->se.statistics.nr_qos_smt_expelled); trace_sched_qos_smt_expelled(rq->curr, per_cpu(qos_smt_status, this_cpu)); return NULL; @@ -8119,7 +8232,8 @@ done: __maybe_unused; goto again; } - __this_cpu_write(qos_cpu_overload, 0); + if (!qos_smt_expelled(cpu_of(rq))) + __this_cpu_write(qos_cpu_overload, 0); #endif /* * rq is about to be idle, check if we need to update the diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6bb11d71caff750ac0672ecb340aecc544f097fa..3bf1e7e3f9958861d4a022720e6fbc4392f07b18 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -650,6 +650,12 @@ struct cfs_rq { unsigned int forceidle_seq; KABI_FILL_HOLE(unsigned int kabi_hole) u64 min_vruntime_fi; +#elif defined CONFIG_QOS_SCHED_SMT_EXPELLER && !defined(__GENKSYMS__) + union { + unsigned int qos_idle_h_nr_running; /* qos_level:-1 */ + unsigned long qos_idle_h_nr_running_padding; + }; + KABI_FILL_HOLE(unsigned long kabi_hole) #else KABI_RESERVE(3) KABI_RESERVE(4) @@ -3028,5 +3034,19 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) } #endif +#ifdef CONFIG_QOS_SCHED +static inline int qos_idle_policy(int policy) +{ + return policy == QOS_LEVEL_OFFLINE; +} +#endif + +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +static inline int task_has_qos_idle_policy(struct task_struct *p) +{ + return qos_idle_policy(task_group(p)->qos_level) && p->policy == SCHED_IDLE; +} +#endif + void swake_up_all_locked(struct swait_queue_head *q); void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);