提交 c7552980 编写于 作者: Y Yihao Wu

alinux: sched: Introduce per-cgroup steal accounting

to #26424323

From the previous patch. We know there are 4 possible states.
Since steal state's transition is complex. We choose to account
its supplement.

        steal = elapse - idle - sum_exec_raw - ineffective

Where elapse is the time since the cgroup is created. sum_exec_raw is
the running time including IRQ time. ineffective is the total time that
the cpuacct-binded cpuset doesn't allow this cpu for the cgroup.
Signed-off-by: NYihao Wu <wuyihao@linux.alibaba.com>
Signed-off-by: NShanpei Chen <shanpeic@linux.alibaba.com>
Acked-by: NMichael Wang <yun.wang@linux.alibaba.com>
上级 61e58859
......@@ -462,8 +462,14 @@ struct sched_entity {
u64 vruntime;
u64 prev_sum_exec_runtime;
/* irq time is included */
u64 exec_start_raw;
u64 sum_exec_raw;
u64 cg_idle_start;
u64 cg_idle_sum;
u64 cg_init_time;
u64 cg_ineffective_sum;
u64 cg_ineffective_start;
seqcount_t idle_seqcount;
u64 nr_migrations;
......
......@@ -499,6 +499,10 @@ void cpuacct_cpuset_changed(struct cgroup *cgrp, struct cpumask *deleted,
for_each_cpu(cpu, added) {
se = tg->se[cpu];
cgroup_idle_start(se);
__schedstat_add(se->cg_ineffective_sum,
__rq_clock_broken(cpu_rq(cpu)) -
se->cg_ineffective_start);
__schedstat_set(se->cg_ineffective_start, 0);
}
}
......@@ -507,6 +511,9 @@ void cpuacct_cpuset_changed(struct cgroup *cgrp, struct cpumask *deleted,
for_each_cpu(cpu, deleted) {
se = tg->se[cpu];
cgroup_idle_end(se);
/* Use __rq_clock_broken to avoid warning */
__schedstat_set(se->cg_ineffective_start,
__rq_clock_broken(cpu_rq(cpu)));
}
}
......@@ -554,8 +561,8 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
res->softirq = kcpustat->cpustat[CPUTIME_SOFTIRQ];
if (se && schedstat_enabled()) {
unsigned int seq;
u64 idle_start;
u64 clock = cpu_clock(cpu);
u64 idle_start, ineff, ineff_start, elapse, complement;
u64 clock;
do {
seq = read_seqcount_begin(&se->idle_seqcount);
......@@ -566,7 +573,18 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
res->idle += clock - idle_start;
} while (read_seqcount_retry(&se->idle_seqcount, seq));
ineff = schedstat_val(se->cg_ineffective_sum);
ineff_start = schedstat_val(se->cg_ineffective_start);
if (ineff_start)
__schedstat_add(ineff, clock - ineff_start);
res->steal = 0;
elapse = clock - schedstat_val(se->cg_init_time);
complement = res->idle + se->sum_exec_raw + ineff;
if (elapse > complement)
res->steal = elapse - complement;
} else {
res->idle = res->iowait = res->steal = 0;
}
......
......@@ -796,6 +796,15 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
}
#endif /* CONFIG_SMP */
static inline void
update_exec_raw(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
u64 now = rq_clock(rq_of(cfs_rq));
curr->sum_exec_raw += now - curr->exec_start_raw;
curr->exec_start_raw = now;
}
/*
* Update the current task's runtime statistics.
*/
......@@ -832,6 +841,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
}
account_cfs_rq_runtime(cfs_rq, delta_exec);
update_exec_raw(cfs_rq, curr);
}
static void update_curr_fair(struct rq *rq)
......@@ -1013,6 +1023,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
* We are starting a new run period:
*/
se->exec_start = rq_clock_task(rq_of(cfs_rq));
se->exec_start_raw = rq_clock_task(rq_of(cfs_rq));
}
/**************************************************
......@@ -10185,7 +10196,7 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
update_load_set(&se->load, NICE_0_LOAD);
se->parent = parent;
seqcount_init(&se->idle_seqcount);
se->cg_idle_start = cpu_clock(cpu);
se->cg_idle_start = se->cg_init_time = cpu_clock(cpu);
}
static DEFINE_MUTEX(shares_mutex);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册