未验证 提交 c4fb2bc6 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!795 sched/fair: Introduce multiple qos level

Merge Pull Request from: @zhaowenhui8 
 
Expand qos_level from {-1,0} to [-2, 2], to distinguish the tasks expected
to be with extremely high or low priority level. Using qos_level_weight
to reweight the shares when calculating group's weight. Meanwhile,
set offline task's schedule policy to SCHED_IDLE so that it can be
preempted at check_preempt_wakeup.

kernel option:
CONFIG_QOS_SCHED_MULTILEVEL 
 
Link:https://gitee.com/openeuler/kernel/pulls/795 

Reviewed-by: Zucheng Zheng <zhengzucheng@huawei.com> 
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> 
...@@ -140,6 +140,7 @@ CONFIG_CGROUP_WRITEBACK=y ...@@ -140,6 +140,7 @@ CONFIG_CGROUP_WRITEBACK=y
CONFIG_CGROUP_V1_WRITEBACK=y CONFIG_CGROUP_V1_WRITEBACK=y
CONFIG_CGROUP_SCHED=y CONFIG_CGROUP_SCHED=y
CONFIG_QOS_SCHED=y CONFIG_QOS_SCHED=y
CONFIG_QOS_SCHED_MULTILEVEL=y
CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y
CONFIG_QOS_SCHED_SMT_EXPELLER=y CONFIG_QOS_SCHED_SMT_EXPELLER=y
CONFIG_FAIR_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y
......
...@@ -158,6 +158,7 @@ CONFIG_CGROUP_WRITEBACK=y ...@@ -158,6 +158,7 @@ CONFIG_CGROUP_WRITEBACK=y
CONFIG_CGROUP_V1_WRITEBACK=y CONFIG_CGROUP_V1_WRITEBACK=y
CONFIG_CGROUP_SCHED=y CONFIG_CGROUP_SCHED=y
CONFIG_QOS_SCHED=y CONFIG_QOS_SCHED=y
CONFIG_QOS_SCHED_MULTILEVEL=y
CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y
CONFIG_QOS_SCHED_SMT_EXPELLER=y CONFIG_QOS_SCHED_SMT_EXPELLER=y
CONFIG_FAIR_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y
......
...@@ -83,6 +83,10 @@ extern unsigned int sysctl_overload_detect_period; ...@@ -83,6 +83,10 @@ extern unsigned int sysctl_overload_detect_period;
extern unsigned int sysctl_offline_wait_interval; extern unsigned int sysctl_offline_wait_interval;
#endif #endif
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
extern unsigned int sysctl_qos_level_weights[];
#endif
#ifdef CONFIG_QOS_SCHED_PRIO_LB #ifdef CONFIG_QOS_SCHED_PRIO_LB
extern unsigned int sysctl_sched_prio_load_balance_enabled; extern unsigned int sysctl_sched_prio_load_balance_enabled;
#endif #endif
......
...@@ -977,6 +977,15 @@ config QOS_SCHED ...@@ -977,6 +977,15 @@ config QOS_SCHED
default n default n
config QOS_SCHED_MULTILEVEL
bool "Multiple qos level task scheduling"
depends on QOS_SCHED
default n
help
This feature enable multiple qos level on task scheduling.
Expand the qos_level to [-2,2] to distinguish the tasks expected
to be with extremely high or low priority level.
config QOS_SCHED_SMT_EXPELLER config QOS_SCHED_SMT_EXPELLER
bool "Qos smt expeller" bool "Qos smt expeller"
depends on SCHED_SMT depends on SCHED_SMT
......
...@@ -6437,7 +6437,7 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -6437,7 +6437,7 @@ static int __sched_setscheduler(struct task_struct *p,
* other than SCHED_IDLE, the online task preemption and cpu resource * other than SCHED_IDLE, the online task preemption and cpu resource
* isolation will be invalid, so return -EINVAL in this case. * isolation will be invalid, so return -EINVAL in this case.
*/ */
if (unlikely(task_group(p)->qos_level == -1 && !idle_policy(policy))) { if (unlikely(is_offline_level(task_group(p)->qos_level) && !idle_policy(policy))) {
retval = -EINVAL; retval = -EINVAL;
goto unlock; goto unlock;
} }
...@@ -8562,7 +8562,7 @@ static void sched_change_qos_group(struct task_struct *tsk, struct task_group *t ...@@ -8562,7 +8562,7 @@ static void sched_change_qos_group(struct task_struct *tsk, struct task_group *t
*/ */
if (!(tsk->flags & PF_EXITING) && if (!(tsk->flags & PF_EXITING) &&
!task_group_is_autogroup(tg) && !task_group_is_autogroup(tg) &&
(tg->qos_level == -1)) { (is_offline_level(tg->qos_level))) {
attr.sched_priority = 0; attr.sched_priority = 0;
attr.sched_policy = SCHED_IDLE; attr.sched_policy = SCHED_IDLE;
__setscheduler_params(tsk, &attr); __setscheduler_params(tsk, &attr);
...@@ -8590,7 +8590,7 @@ void sched_move_offline_task(struct task_struct *p) ...@@ -8590,7 +8590,7 @@ void sched_move_offline_task(struct task_struct *p)
{ {
struct offline_args *args; struct offline_args *args;
if (unlikely(task_group(p)->qos_level != -1)) if (unlikely(!is_offline_level(task_group(p)->qos_level)))
return; return;
args = kmalloc(sizeof(struct offline_args), GFP_ATOMIC); args = kmalloc(sizeof(struct offline_args), GFP_ATOMIC);
...@@ -9463,7 +9463,7 @@ static int tg_change_scheduler(struct task_group *tg, void *data) ...@@ -9463,7 +9463,7 @@ static int tg_change_scheduler(struct task_group *tg, void *data)
struct cgroup_subsys_state *css = &tg->css; struct cgroup_subsys_state *css = &tg->css;
tg->qos_level = qos_level; tg->qos_level = qos_level;
if (qos_level == -1) if (is_offline_level(qos_level))
policy = SCHED_IDLE; policy = SCHED_IDLE;
else else
policy = SCHED_NORMAL; policy = SCHED_NORMAL;
...@@ -9485,19 +9485,27 @@ static int cpu_qos_write(struct cgroup_subsys_state *css, ...@@ -9485,19 +9485,27 @@ static int cpu_qos_write(struct cgroup_subsys_state *css,
if (!tg->se[0]) if (!tg->se[0])
return -EINVAL; return -EINVAL;
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
if (qos_level > QOS_LEVEL_HIGH_EX || qos_level < QOS_LEVEL_OFFLINE_EX)
#else
if (qos_level != -1 && qos_level != 0) if (qos_level != -1 && qos_level != 0)
#endif
return -EINVAL; return -EINVAL;
if (tg->qos_level == qos_level) if (tg->qos_level == qos_level)
goto done; goto done;
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
if (!is_normal_level(tg->qos_level))
#else
if (tg->qos_level == -1 && qos_level == 0) if (tg->qos_level == -1 && qos_level == 0)
#endif
return -EINVAL; return -EINVAL;
cpus_read_lock(); cpus_read_lock();
if (qos_level == -1) if (is_offline_level(qos_level))
cfs_bandwidth_usage_inc(); cfs_bandwidth_usage_inc();
else else if (is_offline_level(tg->qos_level) && !is_offline_level(qos_level))
cfs_bandwidth_usage_dec(); cfs_bandwidth_usage_dec();
cpus_read_unlock(); cpus_read_unlock();
......
...@@ -140,6 +140,23 @@ static int unthrottle_qos_cfs_rqs(int cpu); ...@@ -140,6 +140,23 @@ static int unthrottle_qos_cfs_rqs(int cpu);
static bool qos_smt_expelled(int this_cpu); static bool qos_smt_expelled(int this_cpu);
#endif #endif
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
#define QOS_LEVEL_WEIGHT_OFFLINE_EX 1
#define QOS_LEVEL_WEIGHT_OFFLINE 10
#define QOS_LEVEL_WEIGHT_ONLINE 100
#define QOS_LEVEL_WEIGHT_HIGH 1000
#define QOS_LEVEL_WEIGHT_HIGH_EX 10000
unsigned int sysctl_qos_level_weights[5] = {
QOS_LEVEL_WEIGHT_OFFLINE_EX,
QOS_LEVEL_WEIGHT_OFFLINE,
QOS_LEVEL_WEIGHT_ONLINE,
QOS_LEVEL_WEIGHT_HIGH,
QOS_LEVEL_WEIGHT_HIGH_EX,
};
static long qos_reweight(long shares, struct task_group *tg);
#endif
#ifdef CONFIG_QOS_SCHED_PRIO_LB #ifdef CONFIG_QOS_SCHED_PRIO_LB
unsigned int sysctl_sched_prio_load_balance_enabled; unsigned int sysctl_sched_prio_load_balance_enabled;
#endif #endif
...@@ -2987,7 +3004,7 @@ adjust_rq_cfs_tasks(void (*list_op)(struct list_head *, struct list_head *), ...@@ -2987,7 +3004,7 @@ adjust_rq_cfs_tasks(void (*list_op)(struct list_head *, struct list_head *),
{ {
struct task_group *tg = task_group(task_of(se)); struct task_group *tg = task_group(task_of(se));
if (sysctl_sched_prio_load_balance_enabled && tg->qos_level == -1) if (sysctl_sched_prio_load_balance_enabled && is_offline_level(tg->qos_level))
(*list_op)(&se->group_node, &rq->cfs_offline_tasks); (*list_op)(&se->group_node, &rq->cfs_offline_tasks);
else else
(*list_op)(&se->group_node, &rq->cfs_tasks); (*list_op)(&se->group_node, &rq->cfs_tasks);
...@@ -3217,6 +3234,9 @@ static long calc_group_shares(struct cfs_rq *cfs_rq) ...@@ -3217,6 +3234,9 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
struct task_group *tg = cfs_rq->tg; struct task_group *tg = cfs_rq->tg;
tg_shares = READ_ONCE(tg->shares); tg_shares = READ_ONCE(tg->shares);
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
tg_shares = qos_reweight(tg_shares, tg);
#endif
load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg); load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg);
...@@ -3265,6 +3285,9 @@ static void update_cfs_group(struct sched_entity *se) ...@@ -3265,6 +3285,9 @@ static void update_cfs_group(struct sched_entity *se)
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
shares = READ_ONCE(gcfs_rq->tg->shares); shares = READ_ONCE(gcfs_rq->tg->shares);
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
shares = qos_reweight(shares, gcfs_rq->tg);
#endif
if (likely(se->load.weight == shares)) if (likely(se->load.weight == shares))
return; return;
...@@ -4494,6 +4517,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -4494,6 +4517,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_min_vruntime(cfs_rq); update_min_vruntime(cfs_rq);
} }
/* /*
* Preempt the current task with a newly woken task if needed: * Preempt the current task with a newly woken task if needed:
*/ */
...@@ -7548,7 +7572,7 @@ static inline void cancel_qos_timer(int cpu) ...@@ -7548,7 +7572,7 @@ static inline void cancel_qos_timer(int cpu)
static inline bool is_offline_task(struct task_struct *p) static inline bool is_offline_task(struct task_struct *p)
{ {
return task_group(p)->qos_level == QOS_LEVEL_OFFLINE; return task_group(p)->qos_level < QOS_LEVEL_ONLINE;
} }
static void start_qos_hrtimer(int cpu); static void start_qos_hrtimer(int cpu);
...@@ -7739,7 +7763,7 @@ static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -7739,7 +7763,7 @@ static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq)
return false; return false;
} }
if (unlikely(cfs_rq && cfs_rq->tg->qos_level < 0 && if (unlikely(cfs_rq && is_offline_level(cfs_rq->tg->qos_level) &&
!sched_idle_cpu(smp_processor_id()) && !sched_idle_cpu(smp_processor_id()) &&
cfs_rq->h_nr_running == cfs_rq->idle_h_nr_running)) { cfs_rq->h_nr_running == cfs_rq->idle_h_nr_running)) {
throttle_qos_cfs_rq(cfs_rq); throttle_qos_cfs_rq(cfs_rq);
...@@ -7755,7 +7779,7 @@ static inline void unthrottle_qos_sched_group(struct cfs_rq *cfs_rq) ...@@ -7755,7 +7779,7 @@ static inline void unthrottle_qos_sched_group(struct cfs_rq *cfs_rq)
struct rq_flags rf; struct rq_flags rf;
rq_lock_irqsave(rq, &rf); rq_lock_irqsave(rq, &rf);
if (cfs_rq->tg->qos_level == -1 && cfs_rq_throttled(cfs_rq)) if (is_offline_level(cfs_rq->tg->qos_level) && cfs_rq_throttled(cfs_rq))
unthrottle_qos_cfs_rq(cfs_rq); unthrottle_qos_cfs_rq(cfs_rq);
rq_unlock_irqrestore(rq, &rf); rq_unlock_irqrestore(rq, &rf);
} }
...@@ -7768,7 +7792,7 @@ void sched_qos_offline_wait(void) ...@@ -7768,7 +7792,7 @@ void sched_qos_offline_wait(void)
rcu_read_lock(); rcu_read_lock();
qos_level = task_group(current)->qos_level; qos_level = task_group(current)->qos_level;
rcu_read_unlock(); rcu_read_unlock();
if (qos_level != -1 || fatal_signal_pending(current)) if (!is_offline_level(qos_level) || fatal_signal_pending(current))
break; break;
schedule_timeout_killable(msecs_to_jiffies(sysctl_offline_wait_interval)); schedule_timeout_killable(msecs_to_jiffies(sysctl_offline_wait_interval));
...@@ -7835,6 +7859,39 @@ static bool qos_smt_expelled(int this_cpu) ...@@ -7835,6 +7859,39 @@ static bool qos_smt_expelled(int this_cpu)
#endif #endif
#endif #endif
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
static long qos_reweight(long shares, struct task_group *tg)
{
long qos_weight = 100;
long div = 100;
long scale_shares;
switch (tg->qos_level) {
case QOS_LEVEL_OFFLINE_EX:
qos_weight = sysctl_qos_level_weights[0];
break;
case QOS_LEVEL_OFFLINE:
qos_weight = sysctl_qos_level_weights[1];
break;
case QOS_LEVEL_ONLINE:
qos_weight = sysctl_qos_level_weights[2];
break;
case QOS_LEVEL_HIGH:
qos_weight = sysctl_qos_level_weights[3];
break;
case QOS_LEVEL_HIGH_EX:
qos_weight = sysctl_qos_level_weights[4];
break;
}
if (qos_weight > LONG_MAX / shares)
scale_shares = LONG_MAX / div;
else
scale_shares = shares * qos_weight / div;
scale_shares = clamp_t(long, scale_shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
return scale_shares;
}
#endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
DEFINE_STATIC_KEY_TRUE(qos_smt_expell_switch); DEFINE_STATIC_KEY_TRUE(qos_smt_expell_switch);
...@@ -7891,7 +7948,7 @@ static bool qos_smt_update_status(struct task_struct *p) ...@@ -7891,7 +7948,7 @@ static bool qos_smt_update_status(struct task_struct *p)
{ {
int status = QOS_LEVEL_OFFLINE; int status = QOS_LEVEL_OFFLINE;
if (p != NULL && task_group(p)->qos_level >= QOS_LEVEL_ONLINE) if (p != NULL && !is_offline_level(task_group(p)->qos_level))
status = QOS_LEVEL_ONLINE; status = QOS_LEVEL_ONLINE;
if (__this_cpu_read(qos_smt_status) == status) if (__this_cpu_read(qos_smt_status) == status)
...@@ -7969,7 +8026,7 @@ static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq) ...@@ -7969,7 +8026,7 @@ static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq)
* and current cpu only has SCHED_IDLE tasks enqueued. * and current cpu only has SCHED_IDLE tasks enqueued.
*/ */
if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE && if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE &&
task_group(current)->qos_level < QOS_LEVEL_ONLINE) { is_offline_level(task_group(current)->qos_level)) {
trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu)); trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu));
return true; return true;
} }
......
...@@ -1182,11 +1182,20 @@ static inline int cpu_of(struct rq *rq) ...@@ -1182,11 +1182,20 @@ static inline int cpu_of(struct rq *rq)
} }
#ifdef CONFIG_QOS_SCHED #ifdef CONFIG_QOS_SCHED
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
enum task_qos_level { enum task_qos_level {
QOS_LEVEL_OFFLINE_EX = -2,
QOS_LEVEL_OFFLINE = -1, QOS_LEVEL_OFFLINE = -1,
QOS_LEVEL_ONLINE = 0, QOS_LEVEL_ONLINE = 0,
QOS_LEVEL_MAX QOS_LEVEL_HIGH = 1,
QOS_LEVEL_HIGH_EX = 2
}; };
#else
enum task_qos_level {
QOS_LEVEL_OFFLINE = -1,
QOS_LEVEL_ONLINE = 0,
};
#endif
void init_qos_hrtimer(int cpu); void init_qos_hrtimer(int cpu);
#endif #endif
...@@ -3036,7 +3045,22 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) ...@@ -3036,7 +3045,22 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
#ifdef CONFIG_QOS_SCHED #ifdef CONFIG_QOS_SCHED
static inline int qos_idle_policy(int policy) static inline int qos_idle_policy(int policy)
{ {
return policy == QOS_LEVEL_OFFLINE; return policy <= QOS_LEVEL_OFFLINE;
}
static inline int is_high_level(long qos_level)
{
return qos_level > QOS_LEVEL_ONLINE;
}
static inline int is_normal_level(long qos_level)
{
return qos_level == QOS_LEVEL_ONLINE;
}
static inline int is_offline_level(long qos_level)
{
return qos_level < QOS_LEVEL_ONLINE;
} }
#endif #endif
......
...@@ -2718,6 +2718,15 @@ static struct ctl_table kern_table[] = { ...@@ -2718,6 +2718,15 @@ static struct ctl_table kern_table[] = {
.extra2 = &one_thousand, .extra2 = &one_thousand,
}, },
#endif #endif
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
{
.procname = "qos_level_weights",
.data = &sysctl_qos_level_weights,
.maxlen = 5*sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_QOS_SCHED_PRIO_LB #ifdef CONFIG_QOS_SCHED_PRIO_LB
{ {
.procname = "sched_prio_load_balance_enabled", .procname = "sched_prio_load_balance_enabled",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册