diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 4461fcfd7592da92fc72024bf9ca46ade9524224..7701377126be064ebdb2686985c8c0443666455f 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -140,6 +140,7 @@ CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_V1_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_QOS_SCHED=y +CONFIG_QOS_SCHED_MULTILEVEL=y CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y CONFIG_QOS_SCHED_SMT_EXPELLER=y CONFIG_FAIR_GROUP_SCHED=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index dfc9088ac7dc69f9998c4ad4c82de2be10694ec7..62a8e5c1af11c1f05b659574dd71f2ceceb6dd25 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -158,6 +158,7 @@ CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_V1_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_QOS_SCHED=y +CONFIG_QOS_SCHED_MULTILEVEL=y CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y CONFIG_QOS_SCHED_SMT_EXPELLER=y CONFIG_FAIR_GROUP_SCHED=y diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 933ffee18b4bfacc32496d99ace9aeeeb8423636..4d6bbc0934c9ed8534d291d586192aaa6e4ec587 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -83,6 +83,10 @@ extern unsigned int sysctl_overload_detect_period; extern unsigned int sysctl_offline_wait_interval; #endif +#ifdef CONFIG_QOS_SCHED_MULTILEVEL +extern unsigned int sysctl_qos_level_weights[]; +#endif + #ifdef CONFIG_QOS_SCHED_PRIO_LB extern unsigned int sysctl_sched_prio_load_balance_enabled; #endif diff --git a/init/Kconfig b/init/Kconfig index 52115d61b88fc605c7cf6d20147f86a09bc2b3ee..b7fbf5b9bdf28eed86b774ad474aa1953b9ca2d8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -977,6 +977,15 @@ config QOS_SCHED default n +config QOS_SCHED_MULTILEVEL + bool "Multiple qos level task scheduling" + depends on QOS_SCHED + default n + help + This feature enable multiple qos level on task scheduling. + Expand the qos_level to [-2,2] to distinguish the tasks expected + to be with extremely high or low priority level. + config QOS_SCHED_SMT_EXPELLER bool "Qos smt expeller" depends on SCHED_SMT diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ee6e459611f7a0692e6bf7937db048880ffffdf5..454bca0c9c6bd7bf951151243ced900c2592fcee 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6437,7 +6437,7 @@ static int __sched_setscheduler(struct task_struct *p, * other than SCHED_IDLE, the online task preemption and cpu resource * isolation will be invalid, so return -EINVAL in this case. */ - if (unlikely(task_group(p)->qos_level == -1 && !idle_policy(policy))) { + if (unlikely(is_offline_level(task_group(p)->qos_level) && !idle_policy(policy))) { retval = -EINVAL; goto unlock; } @@ -8562,7 +8562,7 @@ static void sched_change_qos_group(struct task_struct *tsk, struct task_group *t */ if (!(tsk->flags & PF_EXITING) && !task_group_is_autogroup(tg) && - (tg->qos_level == -1)) { + (is_offline_level(tg->qos_level))) { attr.sched_priority = 0; attr.sched_policy = SCHED_IDLE; __setscheduler_params(tsk, &attr); @@ -8590,7 +8590,7 @@ void sched_move_offline_task(struct task_struct *p) { struct offline_args *args; - if (unlikely(task_group(p)->qos_level != -1)) + if (unlikely(!is_offline_level(task_group(p)->qos_level))) return; args = kmalloc(sizeof(struct offline_args), GFP_ATOMIC); @@ -9463,7 +9463,7 @@ static int tg_change_scheduler(struct task_group *tg, void *data) struct cgroup_subsys_state *css = &tg->css; tg->qos_level = qos_level; - if (qos_level == -1) + if (is_offline_level(qos_level)) policy = SCHED_IDLE; else policy = SCHED_NORMAL; @@ -9485,19 +9485,27 @@ static int cpu_qos_write(struct cgroup_subsys_state *css, if (!tg->se[0]) return -EINVAL; +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + if (qos_level > QOS_LEVEL_HIGH_EX || qos_level < QOS_LEVEL_OFFLINE_EX) +#else if (qos_level != -1 && qos_level != 0) +#endif return -EINVAL; if (tg->qos_level == qos_level) goto done; +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + if (!is_normal_level(tg->qos_level)) +#else if (tg->qos_level == -1 && qos_level == 0) +#endif return -EINVAL; cpus_read_lock(); - if (qos_level == -1) + if (is_offline_level(qos_level)) cfs_bandwidth_usage_inc(); - else + else if (is_offline_level(tg->qos_level) && !is_offline_level(qos_level)) cfs_bandwidth_usage_dec(); cpus_read_unlock(); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0b168eacd8bf6f165653a483bd2e2b39dd7e8113..c7b560ffb75edd63b29dfc5b9bc8075dd8e637b8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -140,6 +140,23 @@ static int unthrottle_qos_cfs_rqs(int cpu); static bool qos_smt_expelled(int this_cpu); #endif +#ifdef CONFIG_QOS_SCHED_MULTILEVEL +#define QOS_LEVEL_WEIGHT_OFFLINE_EX 1 +#define QOS_LEVEL_WEIGHT_OFFLINE 10 +#define QOS_LEVEL_WEIGHT_ONLINE 100 +#define QOS_LEVEL_WEIGHT_HIGH 1000 +#define QOS_LEVEL_WEIGHT_HIGH_EX 10000 + +unsigned int sysctl_qos_level_weights[5] = { + QOS_LEVEL_WEIGHT_OFFLINE_EX, + QOS_LEVEL_WEIGHT_OFFLINE, + QOS_LEVEL_WEIGHT_ONLINE, + QOS_LEVEL_WEIGHT_HIGH, + QOS_LEVEL_WEIGHT_HIGH_EX, +}; +static long qos_reweight(long shares, struct task_group *tg); +#endif + #ifdef CONFIG_QOS_SCHED_PRIO_LB unsigned int sysctl_sched_prio_load_balance_enabled; #endif @@ -2987,7 +3004,7 @@ adjust_rq_cfs_tasks(void (*list_op)(struct list_head *, struct list_head *), { struct task_group *tg = task_group(task_of(se)); - if (sysctl_sched_prio_load_balance_enabled && tg->qos_level == -1) + if (sysctl_sched_prio_load_balance_enabled && is_offline_level(tg->qos_level)) (*list_op)(&se->group_node, &rq->cfs_offline_tasks); else (*list_op)(&se->group_node, &rq->cfs_tasks); @@ -3217,6 +3234,9 @@ static long calc_group_shares(struct cfs_rq *cfs_rq) struct task_group *tg = cfs_rq->tg; tg_shares = READ_ONCE(tg->shares); +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + tg_shares = qos_reweight(tg_shares, tg); +#endif load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg); @@ -3265,6 +3285,9 @@ static void update_cfs_group(struct sched_entity *se) #ifndef CONFIG_SMP shares = READ_ONCE(gcfs_rq->tg->shares); +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + shares = qos_reweight(shares, gcfs_rq->tg); +#endif if (likely(se->load.weight == shares)) return; @@ -4494,6 +4517,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) update_min_vruntime(cfs_rq); } + /* * Preempt the current task with a newly woken task if needed: */ @@ -7548,7 +7572,7 @@ static inline void cancel_qos_timer(int cpu) static inline bool is_offline_task(struct task_struct *p) { - return task_group(p)->qos_level == QOS_LEVEL_OFFLINE; + return task_group(p)->qos_level < QOS_LEVEL_ONLINE; } static void start_qos_hrtimer(int cpu); @@ -7739,7 +7763,7 @@ static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq) return false; } - if (unlikely(cfs_rq && cfs_rq->tg->qos_level < 0 && + if (unlikely(cfs_rq && is_offline_level(cfs_rq->tg->qos_level) && !sched_idle_cpu(smp_processor_id()) && cfs_rq->h_nr_running == cfs_rq->idle_h_nr_running)) { throttle_qos_cfs_rq(cfs_rq); @@ -7755,7 +7779,7 @@ static inline void unthrottle_qos_sched_group(struct cfs_rq *cfs_rq) struct rq_flags rf; rq_lock_irqsave(rq, &rf); - if (cfs_rq->tg->qos_level == -1 && cfs_rq_throttled(cfs_rq)) + if (is_offline_level(cfs_rq->tg->qos_level) && cfs_rq_throttled(cfs_rq)) unthrottle_qos_cfs_rq(cfs_rq); rq_unlock_irqrestore(rq, &rf); } @@ -7768,7 +7792,7 @@ void sched_qos_offline_wait(void) rcu_read_lock(); qos_level = task_group(current)->qos_level; rcu_read_unlock(); - if (qos_level != -1 || fatal_signal_pending(current)) + if (!is_offline_level(qos_level) || fatal_signal_pending(current)) break; schedule_timeout_killable(msecs_to_jiffies(sysctl_offline_wait_interval)); @@ -7835,6 +7859,39 @@ static bool qos_smt_expelled(int this_cpu) #endif #endif +#ifdef CONFIG_QOS_SCHED_MULTILEVEL +static long qos_reweight(long shares, struct task_group *tg) +{ + long qos_weight = 100; + long div = 100; + long scale_shares; + + switch (tg->qos_level) { + case QOS_LEVEL_OFFLINE_EX: + qos_weight = sysctl_qos_level_weights[0]; + break; + case QOS_LEVEL_OFFLINE: + qos_weight = sysctl_qos_level_weights[1]; + break; + case QOS_LEVEL_ONLINE: + qos_weight = sysctl_qos_level_weights[2]; + break; + case QOS_LEVEL_HIGH: + qos_weight = sysctl_qos_level_weights[3]; + break; + case QOS_LEVEL_HIGH_EX: + qos_weight = sysctl_qos_level_weights[4]; + break; + } + if (qos_weight > LONG_MAX / shares) + scale_shares = LONG_MAX / div; + else + scale_shares = shares * qos_weight / div; + scale_shares = clamp_t(long, scale_shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); + return scale_shares; +} +#endif + #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER DEFINE_STATIC_KEY_TRUE(qos_smt_expell_switch); @@ -7891,7 +7948,7 @@ static bool qos_smt_update_status(struct task_struct *p) { int status = QOS_LEVEL_OFFLINE; - if (p != NULL && task_group(p)->qos_level >= QOS_LEVEL_ONLINE) + if (p != NULL && !is_offline_level(task_group(p)->qos_level)) status = QOS_LEVEL_ONLINE; if (__this_cpu_read(qos_smt_status) == status) @@ -7969,7 +8026,7 @@ static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq) * and current cpu only has SCHED_IDLE tasks enqueued. */ if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE && - task_group(current)->qos_level < QOS_LEVEL_ONLINE) { + is_offline_level(task_group(current)->qos_level)) { trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu)); return true; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 10b0d7e52a9893c2277b79d02808da3339a39333..27fd1240ac85baf58766664306c9f698187f5a19 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1182,11 +1182,20 @@ static inline int cpu_of(struct rq *rq) } #ifdef CONFIG_QOS_SCHED +#ifdef CONFIG_QOS_SCHED_MULTILEVEL enum task_qos_level { + QOS_LEVEL_OFFLINE_EX = -2, QOS_LEVEL_OFFLINE = -1, QOS_LEVEL_ONLINE = 0, - QOS_LEVEL_MAX + QOS_LEVEL_HIGH = 1, + QOS_LEVEL_HIGH_EX = 2 }; +#else +enum task_qos_level { + QOS_LEVEL_OFFLINE = -1, + QOS_LEVEL_ONLINE = 0, +}; +#endif void init_qos_hrtimer(int cpu); #endif @@ -3036,7 +3045,22 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) #ifdef CONFIG_QOS_SCHED static inline int qos_idle_policy(int policy) { - return policy == QOS_LEVEL_OFFLINE; + return policy <= QOS_LEVEL_OFFLINE; +} + +static inline int is_high_level(long qos_level) +{ + return qos_level > QOS_LEVEL_ONLINE; +} + +static inline int is_normal_level(long qos_level) +{ + return qos_level == QOS_LEVEL_ONLINE; +} + +static inline int is_offline_level(long qos_level) +{ + return qos_level < QOS_LEVEL_ONLINE; } #endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 60079815aafdeb795a7dc18c0dc46d493749bf8b..e22228e55afcdda84eff8422f60cf1bf72ec22d1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2718,6 +2718,15 @@ static struct ctl_table kern_table[] = { .extra2 = &one_thousand, }, #endif +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + { + .procname = "qos_level_weights", + .data = &sysctl_qos_level_weights, + .maxlen = 5*sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_QOS_SCHED_PRIO_LB { .procname = "sched_prio_load_balance_enabled",