提交 9e7b35d6 编写于 作者: Y Yihao Wu

alinux: sched: Introduce per-cgroup iowait accounting

to #26424323

We account iowait when the cgroup's se is idle, and it has blocked
task on the hierarchy of se->my_q.

To achieve this, we also add cg_nr_running to track the hierarchical
number of blocked tasks. We do it when a blocked task wakes up or
a task is blocked.
Signed-off-by: NYihao Wu <wuyihao@linux.alibaba.com>
Signed-off-by: NShanpei Chen <shanpeic@linux.alibaba.com>
Acked-by: NMichael Wang <yun.wang@linux.alibaba.com>
上级 c7552980
......@@ -468,9 +468,13 @@ struct sched_entity {
u64 cg_idle_start;
u64 cg_idle_sum;
u64 cg_init_time;
u64 cg_nr_iowait;
u64 cg_iowait_sum;
u64 cg_iowait_start;
u64 cg_ineffective_sum;
u64 cg_ineffective_start;
seqcount_t idle_seqcount;
spinlock_t iowait_lock;
u64 nr_migrations;
......
......@@ -2046,6 +2046,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
if (p->in_iowait) {
delayacct_blkio_end(p);
atomic_dec(&task_rq(p)->nr_iowait);
update_nr_iowait(p, -1);
}
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
......@@ -2060,6 +2061,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
if (p->in_iowait) {
delayacct_blkio_end(p);
atomic_dec(&task_rq(p)->nr_iowait);
update_nr_iowait(p, -1);
}
#endif /* CONFIG_SMP */
......@@ -3436,6 +3438,7 @@ static void __sched notrace __schedule(bool preempt)
if (prev->in_iowait) {
atomic_inc(&rq->nr_iowait);
update_nr_iowait(prev, 1);
delayacct_blkio_start();
}
}
......
......@@ -447,6 +447,7 @@ static unsigned long ca_uninterruptible(struct cpuacct *ca, int cpu)
void cgroup_idle_start(struct sched_entity *se)
{
unsigned long flags;
u64 clock;
if (!schedstat_enabled())
......@@ -457,12 +458,18 @@ void cgroup_idle_start(struct sched_entity *se)
write_seqcount_begin(&se->idle_seqcount);
__schedstat_set(se->cg_idle_start, clock);
write_seqcount_end(&se->idle_seqcount);
spin_lock_irqsave(&se->iowait_lock, flags);
if (schedstat_val(se->cg_nr_iowait))
__schedstat_set(se->cg_iowait_start, clock);
spin_unlock_irqrestore(&se->iowait_lock, flags);
}
void cgroup_idle_end(struct sched_entity *se)
{
unsigned long flags;
u64 clock;
u64 idle_start;
u64 idle_start, iowait_start;
if (!schedstat_enabled())
return;
......@@ -474,6 +481,14 @@ void cgroup_idle_end(struct sched_entity *se)
__schedstat_add(se->cg_idle_sum, clock - idle_start);
__schedstat_set(se->cg_idle_start, 0);
write_seqcount_end(&se->idle_seqcount);
spin_lock_irqsave(&se->iowait_lock, flags);
if (schedstat_val(se->cg_nr_iowait)) {
iowait_start = schedstat_val(se->cg_iowait_start);
__schedstat_add(se->cg_iowait_sum, clock - iowait_start);
__schedstat_set(se->cg_iowait_start, 0);
}
spin_unlock_irqrestore(&se->iowait_lock, flags);
}
void cpuacct_cpuset_changed(struct cgroup *cgrp, struct cpumask *deleted,
......@@ -561,8 +576,9 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
res->softirq = kcpustat->cpustat[CPUTIME_SOFTIRQ];
if (se && schedstat_enabled()) {
unsigned int seq;
unsigned long flags;
u64 idle_start, ineff, ineff_start, elapse, complement;
u64 clock;
u64 clock, iowait_start;
do {
seq = read_seqcount_begin(&se->idle_seqcount);
......@@ -578,6 +594,13 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
if (ineff_start)
__schedstat_add(ineff, clock - ineff_start);
spin_lock_irqsave(&se->iowait_lock, flags);
res->iowait = schedstat_val(se->cg_iowait_sum);
iowait_start = schedstat_val(se->cg_iowait_start);
if (iowait_start)
__schedstat_add(res->iowait, clock - iowait_start);
spin_unlock_irqrestore(&se->iowait_lock, flags);
res->steal = 0;
elapse = clock - schedstat_val(se->cg_init_time);
......@@ -585,6 +608,7 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
if (elapse > complement)
res->steal = elapse - complement;
res->idle -= res->iowait;
} else {
res->idle = res->iowait = res->steal = 0;
}
......
......@@ -10032,6 +10032,44 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
}
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SCHED_SLI
static void update_nr_iowait_fair(struct task_struct *p, long inc)
{
unsigned long flags;
struct sched_entity *se = p->se.parent;
u64 clock;
if (!schedstat_enabled())
return;
clock = __rq_clock_broken(cpu_rq(p->cpu));
for_each_sched_entity(se) {
/*
* Avoid locking rq->lock from try_to_wakeup hot path, in
* the price of poor consistency among cgroup hierarchy,
* which we can tolerate.
* While accessing se->on_rq does need to hold rq->lock. We
* already do, because when inc==1, the caller is __schedule
* and task_move_group_fair
*/
spin_lock_irqsave(&se->iowait_lock, flags);
if (!se->on_rq && !schedstat_val(se->cg_nr_iowait) && inc > 0)
__schedstat_set(se->cg_iowait_start, clock);
if (schedstat_val(se->cg_iowait_start) > 0 &&
schedstat_val(se->cg_nr_iowait) + inc == 0) {
__schedstat_add(se->cg_iowait_sum, clock -
schedstat_val(se->cg_iowait_start));
__schedstat_set(se->cg_iowait_start, 0);
}
__schedstat_add(se->cg_nr_iowait, inc);
spin_unlock_irqrestore(&se->iowait_lock, flags);
}
}
#else
static void update_nr_iowait_fair(struct task_struct *p, long inc) {}
#endif
static void task_set_group_fair(struct task_struct *p)
{
struct sched_entity *se = &p->se;
......@@ -10042,6 +10080,8 @@ static void task_set_group_fair(struct task_struct *p)
static void task_move_group_fair(struct task_struct *p)
{
if (p->in_iowait)
update_nr_iowait_fair(p, -1);
detach_task_cfs_rq(p);
set_task_rq(p, task_cpu(p));
......@@ -10050,6 +10090,8 @@ static void task_move_group_fair(struct task_struct *p)
p->se.avg.last_update_time = 0;
#endif
attach_task_cfs_rq(p);
if (p->in_iowait)
update_nr_iowait_fair(p, 1);
}
static void task_change_group_fair(struct task_struct *p, int type)
......@@ -10196,6 +10238,7 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
update_load_set(&se->load, NICE_0_LOAD);
se->parent = parent;
seqcount_init(&se->idle_seqcount);
spin_lock_init(&se->iowait_lock);
se->cg_idle_start = se->cg_init_time = cpu_clock(cpu);
}
......@@ -10322,6 +10365,7 @@ const struct sched_class fair_sched_class = {
#ifdef CONFIG_SCHED_SLI
.update_nr_uninterruptible = update_nr_uninterruptible_fair,
.update_nr_iowait = update_nr_iowait_fair,
#endif
};
......
......@@ -1677,6 +1677,7 @@ struct sched_class {
#endif
void (*update_nr_uninterruptible)(struct task_struct *p, long inc);
void (*update_nr_iowait)(struct task_struct *p, long inc);
};
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
......@@ -1689,6 +1690,12 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
curr->sched_class->set_curr_task(rq);
}
static inline void update_nr_iowait(struct task_struct *p, long inc)
{
if (p->sched_class->update_nr_iowait)
p->sched_class->update_nr_iowait(p, inc);
}
#ifdef CONFIG_SMP
#define sched_class_highest (&stop_sched_class)
#else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册