未验证 提交 c4fb2bc6 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!795 sched/fair: Introduce multiple qos level

Merge Pull Request from: @zhaowenhui8 
 
Expand qos_level from {-1,0} to [-2, 2], to distinguish the tasks expected
to be with extremely high or low priority level. Using qos_level_weight
to reweight the shares when calculating group's weight. Meanwhile,
set offline task's schedule policy to SCHED_IDLE so that it can be
preempted at check_preempt_wakeup.

kernel option:
CONFIG_QOS_SCHED_MULTILEVEL 
 
Link:https://gitee.com/openeuler/kernel/pulls/795 

Reviewed-by: Zucheng Zheng <zhengzucheng@huawei.com> 
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> 
......@@ -140,6 +140,7 @@ CONFIG_CGROUP_WRITEBACK=y
CONFIG_CGROUP_V1_WRITEBACK=y
CONFIG_CGROUP_SCHED=y
CONFIG_QOS_SCHED=y
CONFIG_QOS_SCHED_MULTILEVEL=y
CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y
CONFIG_QOS_SCHED_SMT_EXPELLER=y
CONFIG_FAIR_GROUP_SCHED=y
......
......@@ -158,6 +158,7 @@ CONFIG_CGROUP_WRITEBACK=y
CONFIG_CGROUP_V1_WRITEBACK=y
CONFIG_CGROUP_SCHED=y
CONFIG_QOS_SCHED=y
CONFIG_QOS_SCHED_MULTILEVEL=y
CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y
CONFIG_QOS_SCHED_SMT_EXPELLER=y
CONFIG_FAIR_GROUP_SCHED=y
......
......@@ -83,6 +83,10 @@ extern unsigned int sysctl_overload_detect_period;
extern unsigned int sysctl_offline_wait_interval;
#endif
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
extern unsigned int sysctl_qos_level_weights[];
#endif
#ifdef CONFIG_QOS_SCHED_PRIO_LB
extern unsigned int sysctl_sched_prio_load_balance_enabled;
#endif
......
......@@ -977,6 +977,15 @@ config QOS_SCHED
default n
config QOS_SCHED_MULTILEVEL
bool "Multiple qos level task scheduling"
depends on QOS_SCHED
default n
help
This feature enable multiple qos level on task scheduling.
Expand the qos_level to [-2,2] to distinguish the tasks expected
to be with extremely high or low priority level.
config QOS_SCHED_SMT_EXPELLER
bool "Qos smt expeller"
depends on SCHED_SMT
......
......@@ -6437,7 +6437,7 @@ static int __sched_setscheduler(struct task_struct *p,
* other than SCHED_IDLE, the online task preemption and cpu resource
* isolation will be invalid, so return -EINVAL in this case.
*/
if (unlikely(task_group(p)->qos_level == -1 && !idle_policy(policy))) {
if (unlikely(is_offline_level(task_group(p)->qos_level) && !idle_policy(policy))) {
retval = -EINVAL;
goto unlock;
}
......@@ -8562,7 +8562,7 @@ static void sched_change_qos_group(struct task_struct *tsk, struct task_group *t
*/
if (!(tsk->flags & PF_EXITING) &&
!task_group_is_autogroup(tg) &&
(tg->qos_level == -1)) {
(is_offline_level(tg->qos_level))) {
attr.sched_priority = 0;
attr.sched_policy = SCHED_IDLE;
__setscheduler_params(tsk, &attr);
......@@ -8590,7 +8590,7 @@ void sched_move_offline_task(struct task_struct *p)
{
struct offline_args *args;
if (unlikely(task_group(p)->qos_level != -1))
if (unlikely(!is_offline_level(task_group(p)->qos_level)))
return;
args = kmalloc(sizeof(struct offline_args), GFP_ATOMIC);
......@@ -9463,7 +9463,7 @@ static int tg_change_scheduler(struct task_group *tg, void *data)
struct cgroup_subsys_state *css = &tg->css;
tg->qos_level = qos_level;
if (qos_level == -1)
if (is_offline_level(qos_level))
policy = SCHED_IDLE;
else
policy = SCHED_NORMAL;
......@@ -9485,19 +9485,27 @@ static int cpu_qos_write(struct cgroup_subsys_state *css,
if (!tg->se[0])
return -EINVAL;
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
if (qos_level > QOS_LEVEL_HIGH_EX || qos_level < QOS_LEVEL_OFFLINE_EX)
#else
if (qos_level != -1 && qos_level != 0)
#endif
return -EINVAL;
if (tg->qos_level == qos_level)
goto done;
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
if (!is_normal_level(tg->qos_level))
#else
if (tg->qos_level == -1 && qos_level == 0)
#endif
return -EINVAL;
cpus_read_lock();
if (qos_level == -1)
if (is_offline_level(qos_level))
cfs_bandwidth_usage_inc();
else
else if (is_offline_level(tg->qos_level) && !is_offline_level(qos_level))
cfs_bandwidth_usage_dec();
cpus_read_unlock();
......
......@@ -140,6 +140,23 @@ static int unthrottle_qos_cfs_rqs(int cpu);
static bool qos_smt_expelled(int this_cpu);
#endif
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
#define QOS_LEVEL_WEIGHT_OFFLINE_EX 1
#define QOS_LEVEL_WEIGHT_OFFLINE 10
#define QOS_LEVEL_WEIGHT_ONLINE 100
#define QOS_LEVEL_WEIGHT_HIGH 1000
#define QOS_LEVEL_WEIGHT_HIGH_EX 10000
unsigned int sysctl_qos_level_weights[5] = {
QOS_LEVEL_WEIGHT_OFFLINE_EX,
QOS_LEVEL_WEIGHT_OFFLINE,
QOS_LEVEL_WEIGHT_ONLINE,
QOS_LEVEL_WEIGHT_HIGH,
QOS_LEVEL_WEIGHT_HIGH_EX,
};
static long qos_reweight(long shares, struct task_group *tg);
#endif
#ifdef CONFIG_QOS_SCHED_PRIO_LB
unsigned int sysctl_sched_prio_load_balance_enabled;
#endif
......@@ -2987,7 +3004,7 @@ adjust_rq_cfs_tasks(void (*list_op)(struct list_head *, struct list_head *),
{
struct task_group *tg = task_group(task_of(se));
if (sysctl_sched_prio_load_balance_enabled && tg->qos_level == -1)
if (sysctl_sched_prio_load_balance_enabled && is_offline_level(tg->qos_level))
(*list_op)(&se->group_node, &rq->cfs_offline_tasks);
else
(*list_op)(&se->group_node, &rq->cfs_tasks);
......@@ -3217,6 +3234,9 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
struct task_group *tg = cfs_rq->tg;
tg_shares = READ_ONCE(tg->shares);
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
tg_shares = qos_reweight(tg_shares, tg);
#endif
load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg);
......@@ -3265,6 +3285,9 @@ static void update_cfs_group(struct sched_entity *se)
#ifndef CONFIG_SMP
shares = READ_ONCE(gcfs_rq->tg->shares);
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
shares = qos_reweight(shares, gcfs_rq->tg);
#endif
if (likely(se->load.weight == shares))
return;
......@@ -4494,6 +4517,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_min_vruntime(cfs_rq);
}
/*
* Preempt the current task with a newly woken task if needed:
*/
......@@ -7548,7 +7572,7 @@ static inline void cancel_qos_timer(int cpu)
static inline bool is_offline_task(struct task_struct *p)
{
return task_group(p)->qos_level == QOS_LEVEL_OFFLINE;
return task_group(p)->qos_level < QOS_LEVEL_ONLINE;
}
static void start_qos_hrtimer(int cpu);
......@@ -7739,7 +7763,7 @@ static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq)
return false;
}
if (unlikely(cfs_rq && cfs_rq->tg->qos_level < 0 &&
if (unlikely(cfs_rq && is_offline_level(cfs_rq->tg->qos_level) &&
!sched_idle_cpu(smp_processor_id()) &&
cfs_rq->h_nr_running == cfs_rq->idle_h_nr_running)) {
throttle_qos_cfs_rq(cfs_rq);
......@@ -7755,7 +7779,7 @@ static inline void unthrottle_qos_sched_group(struct cfs_rq *cfs_rq)
struct rq_flags rf;
rq_lock_irqsave(rq, &rf);
if (cfs_rq->tg->qos_level == -1 && cfs_rq_throttled(cfs_rq))
if (is_offline_level(cfs_rq->tg->qos_level) && cfs_rq_throttled(cfs_rq))
unthrottle_qos_cfs_rq(cfs_rq);
rq_unlock_irqrestore(rq, &rf);
}
......@@ -7768,7 +7792,7 @@ void sched_qos_offline_wait(void)
rcu_read_lock();
qos_level = task_group(current)->qos_level;
rcu_read_unlock();
if (qos_level != -1 || fatal_signal_pending(current))
if (!is_offline_level(qos_level) || fatal_signal_pending(current))
break;
schedule_timeout_killable(msecs_to_jiffies(sysctl_offline_wait_interval));
......@@ -7835,6 +7859,39 @@ static bool qos_smt_expelled(int this_cpu)
#endif
#endif
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
static long qos_reweight(long shares, struct task_group *tg)
{
long qos_weight = 100;
long div = 100;
long scale_shares;
switch (tg->qos_level) {
case QOS_LEVEL_OFFLINE_EX:
qos_weight = sysctl_qos_level_weights[0];
break;
case QOS_LEVEL_OFFLINE:
qos_weight = sysctl_qos_level_weights[1];
break;
case QOS_LEVEL_ONLINE:
qos_weight = sysctl_qos_level_weights[2];
break;
case QOS_LEVEL_HIGH:
qos_weight = sysctl_qos_level_weights[3];
break;
case QOS_LEVEL_HIGH_EX:
qos_weight = sysctl_qos_level_weights[4];
break;
}
if (qos_weight > LONG_MAX / shares)
scale_shares = LONG_MAX / div;
else
scale_shares = shares * qos_weight / div;
scale_shares = clamp_t(long, scale_shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
return scale_shares;
}
#endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
DEFINE_STATIC_KEY_TRUE(qos_smt_expell_switch);
......@@ -7891,7 +7948,7 @@ static bool qos_smt_update_status(struct task_struct *p)
{
int status = QOS_LEVEL_OFFLINE;
if (p != NULL && task_group(p)->qos_level >= QOS_LEVEL_ONLINE)
if (p != NULL && !is_offline_level(task_group(p)->qos_level))
status = QOS_LEVEL_ONLINE;
if (__this_cpu_read(qos_smt_status) == status)
......@@ -7969,7 +8026,7 @@ static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq)
* and current cpu only has SCHED_IDLE tasks enqueued.
*/
if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE &&
task_group(current)->qos_level < QOS_LEVEL_ONLINE) {
is_offline_level(task_group(current)->qos_level)) {
trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu));
return true;
}
......
......@@ -1182,11 +1182,20 @@ static inline int cpu_of(struct rq *rq)
}
#ifdef CONFIG_QOS_SCHED
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
enum task_qos_level {
QOS_LEVEL_OFFLINE_EX = -2,
QOS_LEVEL_OFFLINE = -1,
QOS_LEVEL_ONLINE = 0,
QOS_LEVEL_MAX
QOS_LEVEL_HIGH = 1,
QOS_LEVEL_HIGH_EX = 2
};
#else
enum task_qos_level {
QOS_LEVEL_OFFLINE = -1,
QOS_LEVEL_ONLINE = 0,
};
#endif
void init_qos_hrtimer(int cpu);
#endif
......@@ -3036,7 +3045,22 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
#ifdef CONFIG_QOS_SCHED
static inline int qos_idle_policy(int policy)
{
return policy == QOS_LEVEL_OFFLINE;
return policy <= QOS_LEVEL_OFFLINE;
}
static inline int is_high_level(long qos_level)
{
return qos_level > QOS_LEVEL_ONLINE;
}
static inline int is_normal_level(long qos_level)
{
return qos_level == QOS_LEVEL_ONLINE;
}
static inline int is_offline_level(long qos_level)
{
return qos_level < QOS_LEVEL_ONLINE;
}
#endif
......
......@@ -2718,6 +2718,15 @@ static struct ctl_table kern_table[] = {
.extra2 = &one_thousand,
},
#endif
#ifdef CONFIG_QOS_SCHED_MULTILEVEL
{
.procname = "qos_level_weights",
.data = &sysctl_qos_level_weights,
.maxlen = 5*sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_QOS_SCHED_PRIO_LB
{
.procname = "sched_prio_load_balance_enabled",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册