提交 9e7b35d6 编写于 作者: Y Yihao Wu

alinux: sched: Introduce per-cgroup iowait accounting

to #26424323

We account iowait when the cgroup's se is idle, and it has blocked
task on the hierarchy of se->my_q.

To achieve this, we also add cg_nr_running to track the hierarchical
number of blocked tasks. We do it when a blocked task wakes up or
a task is blocked.
Signed-off-by: NYihao Wu <wuyihao@linux.alibaba.com>
Signed-off-by: NShanpei Chen <shanpeic@linux.alibaba.com>
Acked-by: NMichael Wang <yun.wang@linux.alibaba.com>
上级 c7552980
...@@ -468,9 +468,13 @@ struct sched_entity { ...@@ -468,9 +468,13 @@ struct sched_entity {
u64 cg_idle_start; u64 cg_idle_start;
u64 cg_idle_sum; u64 cg_idle_sum;
u64 cg_init_time; u64 cg_init_time;
u64 cg_nr_iowait;
u64 cg_iowait_sum;
u64 cg_iowait_start;
u64 cg_ineffective_sum; u64 cg_ineffective_sum;
u64 cg_ineffective_start; u64 cg_ineffective_start;
seqcount_t idle_seqcount; seqcount_t idle_seqcount;
spinlock_t iowait_lock;
u64 nr_migrations; u64 nr_migrations;
......
...@@ -2046,6 +2046,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ...@@ -2046,6 +2046,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
if (p->in_iowait) { if (p->in_iowait) {
delayacct_blkio_end(p); delayacct_blkio_end(p);
atomic_dec(&task_rq(p)->nr_iowait); atomic_dec(&task_rq(p)->nr_iowait);
update_nr_iowait(p, -1);
} }
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
...@@ -2060,6 +2061,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ...@@ -2060,6 +2061,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
if (p->in_iowait) { if (p->in_iowait) {
delayacct_blkio_end(p); delayacct_blkio_end(p);
atomic_dec(&task_rq(p)->nr_iowait); atomic_dec(&task_rq(p)->nr_iowait);
update_nr_iowait(p, -1);
} }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
...@@ -3436,6 +3438,7 @@ static void __sched notrace __schedule(bool preempt) ...@@ -3436,6 +3438,7 @@ static void __sched notrace __schedule(bool preempt)
if (prev->in_iowait) { if (prev->in_iowait) {
atomic_inc(&rq->nr_iowait); atomic_inc(&rq->nr_iowait);
update_nr_iowait(prev, 1);
delayacct_blkio_start(); delayacct_blkio_start();
} }
} }
......
...@@ -447,6 +447,7 @@ static unsigned long ca_uninterruptible(struct cpuacct *ca, int cpu) ...@@ -447,6 +447,7 @@ static unsigned long ca_uninterruptible(struct cpuacct *ca, int cpu)
void cgroup_idle_start(struct sched_entity *se) void cgroup_idle_start(struct sched_entity *se)
{ {
unsigned long flags;
u64 clock; u64 clock;
if (!schedstat_enabled()) if (!schedstat_enabled())
...@@ -457,12 +458,18 @@ void cgroup_idle_start(struct sched_entity *se) ...@@ -457,12 +458,18 @@ void cgroup_idle_start(struct sched_entity *se)
write_seqcount_begin(&se->idle_seqcount); write_seqcount_begin(&se->idle_seqcount);
__schedstat_set(se->cg_idle_start, clock); __schedstat_set(se->cg_idle_start, clock);
write_seqcount_end(&se->idle_seqcount); write_seqcount_end(&se->idle_seqcount);
spin_lock_irqsave(&se->iowait_lock, flags);
if (schedstat_val(se->cg_nr_iowait))
__schedstat_set(se->cg_iowait_start, clock);
spin_unlock_irqrestore(&se->iowait_lock, flags);
} }
void cgroup_idle_end(struct sched_entity *se) void cgroup_idle_end(struct sched_entity *se)
{ {
unsigned long flags;
u64 clock; u64 clock;
u64 idle_start; u64 idle_start, iowait_start;
if (!schedstat_enabled()) if (!schedstat_enabled())
return; return;
...@@ -474,6 +481,14 @@ void cgroup_idle_end(struct sched_entity *se) ...@@ -474,6 +481,14 @@ void cgroup_idle_end(struct sched_entity *se)
__schedstat_add(se->cg_idle_sum, clock - idle_start); __schedstat_add(se->cg_idle_sum, clock - idle_start);
__schedstat_set(se->cg_idle_start, 0); __schedstat_set(se->cg_idle_start, 0);
write_seqcount_end(&se->idle_seqcount); write_seqcount_end(&se->idle_seqcount);
spin_lock_irqsave(&se->iowait_lock, flags);
if (schedstat_val(se->cg_nr_iowait)) {
iowait_start = schedstat_val(se->cg_iowait_start);
__schedstat_add(se->cg_iowait_sum, clock - iowait_start);
__schedstat_set(se->cg_iowait_start, 0);
}
spin_unlock_irqrestore(&se->iowait_lock, flags);
} }
void cpuacct_cpuset_changed(struct cgroup *cgrp, struct cpumask *deleted, void cpuacct_cpuset_changed(struct cgroup *cgrp, struct cpumask *deleted,
...@@ -561,8 +576,9 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu, ...@@ -561,8 +576,9 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
res->softirq = kcpustat->cpustat[CPUTIME_SOFTIRQ]; res->softirq = kcpustat->cpustat[CPUTIME_SOFTIRQ];
if (se && schedstat_enabled()) { if (se && schedstat_enabled()) {
unsigned int seq; unsigned int seq;
unsigned long flags;
u64 idle_start, ineff, ineff_start, elapse, complement; u64 idle_start, ineff, ineff_start, elapse, complement;
u64 clock; u64 clock, iowait_start;
do { do {
seq = read_seqcount_begin(&se->idle_seqcount); seq = read_seqcount_begin(&se->idle_seqcount);
...@@ -578,6 +594,13 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu, ...@@ -578,6 +594,13 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
if (ineff_start) if (ineff_start)
__schedstat_add(ineff, clock - ineff_start); __schedstat_add(ineff, clock - ineff_start);
spin_lock_irqsave(&se->iowait_lock, flags);
res->iowait = schedstat_val(se->cg_iowait_sum);
iowait_start = schedstat_val(se->cg_iowait_start);
if (iowait_start)
__schedstat_add(res->iowait, clock - iowait_start);
spin_unlock_irqrestore(&se->iowait_lock, flags);
res->steal = 0; res->steal = 0;
elapse = clock - schedstat_val(se->cg_init_time); elapse = clock - schedstat_val(se->cg_init_time);
...@@ -585,6 +608,7 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu, ...@@ -585,6 +608,7 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
if (elapse > complement) if (elapse > complement)
res->steal = elapse - complement; res->steal = elapse - complement;
res->idle -= res->iowait;
} else { } else {
res->idle = res->iowait = res->steal = 0; res->idle = res->iowait = res->steal = 0;
} }
......
...@@ -10032,6 +10032,44 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -10032,6 +10032,44 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
} }
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SCHED_SLI
static void update_nr_iowait_fair(struct task_struct *p, long inc)
{
unsigned long flags;
struct sched_entity *se = p->se.parent;
u64 clock;
if (!schedstat_enabled())
return;
clock = __rq_clock_broken(cpu_rq(p->cpu));
for_each_sched_entity(se) {
/*
* Avoid locking rq->lock from try_to_wakeup hot path, in
* the price of poor consistency among cgroup hierarchy,
* which we can tolerate.
* While accessing se->on_rq does need to hold rq->lock. We
* already do, because when inc==1, the caller is __schedule
* and task_move_group_fair
*/
spin_lock_irqsave(&se->iowait_lock, flags);
if (!se->on_rq && !schedstat_val(se->cg_nr_iowait) && inc > 0)
__schedstat_set(se->cg_iowait_start, clock);
if (schedstat_val(se->cg_iowait_start) > 0 &&
schedstat_val(se->cg_nr_iowait) + inc == 0) {
__schedstat_add(se->cg_iowait_sum, clock -
schedstat_val(se->cg_iowait_start));
__schedstat_set(se->cg_iowait_start, 0);
}
__schedstat_add(se->cg_nr_iowait, inc);
spin_unlock_irqrestore(&se->iowait_lock, flags);
}
}
#else
static void update_nr_iowait_fair(struct task_struct *p, long inc) {}
#endif
static void task_set_group_fair(struct task_struct *p) static void task_set_group_fair(struct task_struct *p)
{ {
struct sched_entity *se = &p->se; struct sched_entity *se = &p->se;
...@@ -10042,6 +10080,8 @@ static void task_set_group_fair(struct task_struct *p) ...@@ -10042,6 +10080,8 @@ static void task_set_group_fair(struct task_struct *p)
static void task_move_group_fair(struct task_struct *p) static void task_move_group_fair(struct task_struct *p)
{ {
if (p->in_iowait)
update_nr_iowait_fair(p, -1);
detach_task_cfs_rq(p); detach_task_cfs_rq(p);
set_task_rq(p, task_cpu(p)); set_task_rq(p, task_cpu(p));
...@@ -10050,6 +10090,8 @@ static void task_move_group_fair(struct task_struct *p) ...@@ -10050,6 +10090,8 @@ static void task_move_group_fair(struct task_struct *p)
p->se.avg.last_update_time = 0; p->se.avg.last_update_time = 0;
#endif #endif
attach_task_cfs_rq(p); attach_task_cfs_rq(p);
if (p->in_iowait)
update_nr_iowait_fair(p, 1);
} }
static void task_change_group_fair(struct task_struct *p, int type) static void task_change_group_fair(struct task_struct *p, int type)
...@@ -10196,6 +10238,7 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, ...@@ -10196,6 +10238,7 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
update_load_set(&se->load, NICE_0_LOAD); update_load_set(&se->load, NICE_0_LOAD);
se->parent = parent; se->parent = parent;
seqcount_init(&se->idle_seqcount); seqcount_init(&se->idle_seqcount);
spin_lock_init(&se->iowait_lock);
se->cg_idle_start = se->cg_init_time = cpu_clock(cpu); se->cg_idle_start = se->cg_init_time = cpu_clock(cpu);
} }
...@@ -10322,6 +10365,7 @@ const struct sched_class fair_sched_class = { ...@@ -10322,6 +10365,7 @@ const struct sched_class fair_sched_class = {
#ifdef CONFIG_SCHED_SLI #ifdef CONFIG_SCHED_SLI
.update_nr_uninterruptible = update_nr_uninterruptible_fair, .update_nr_uninterruptible = update_nr_uninterruptible_fair,
.update_nr_iowait = update_nr_iowait_fair,
#endif #endif
}; };
......
...@@ -1677,6 +1677,7 @@ struct sched_class { ...@@ -1677,6 +1677,7 @@ struct sched_class {
#endif #endif
void (*update_nr_uninterruptible)(struct task_struct *p, long inc); void (*update_nr_uninterruptible)(struct task_struct *p, long inc);
void (*update_nr_iowait)(struct task_struct *p, long inc);
}; };
static inline void put_prev_task(struct rq *rq, struct task_struct *prev) static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
...@@ -1689,6 +1690,12 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr) ...@@ -1689,6 +1690,12 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
curr->sched_class->set_curr_task(rq); curr->sched_class->set_curr_task(rq);
} }
static inline void update_nr_iowait(struct task_struct *p, long inc)
{
if (p->sched_class->update_nr_iowait)
p->sched_class->update_nr_iowait(p, inc);
}
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define sched_class_highest (&stop_sched_class) #define sched_class_highest (&stop_sched_class)
#else #else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册