diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 535eda7857cfd5d16545edd40ce0c3cae0fb8f10..7e832b24847dd3f7328bcb3d4cf34196dfbf5f82 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -45,14 +45,16 @@ static u64 get_iowait_time(int cpu) #else -static u64 get_idle_time(int cpu) +u64 get_idle_time(int cpu) { u64 idle, idle_usecs = -1ULL; if (cpu_online(cpu)) idle_usecs = get_cpu_idle_time_us(cpu, NULL); - if (idle_usecs == -1ULL) + if (idle_usecs == -1ULL && use_sched_idle_time) + return sched_get_idle_time(cpu); + else if (idle_usecs == -1ULL) /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; else @@ -95,9 +97,19 @@ static int show_stat(struct seq_file *p, void *v) getboottime64(&boottime); for_each_possible_cpu(i) { - user += kcpustat_cpu(i).cpustat[CPUTIME_USER]; + if (use_sched_idle_time && cpu_online(i)) { + u64 u = 0, s = 0; + + sched_idle_time_adjust(i, &u, &s); + + user += u; + system += s; + } else { + user += kcpustat_cpu(i).cpustat[CPUTIME_USER]; + system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; + } + nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE]; - system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; idle += get_idle_time(i); iowait += get_iowait_time(i); irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ]; @@ -131,9 +143,13 @@ static int show_stat(struct seq_file *p, void *v) for_each_online_cpu(i) { /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ - user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; + if (use_sched_idle_time) { + sched_idle_time_adjust(i, &user, &system); + } else { + user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; + system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; + } nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE]; - system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; idle = get_idle_time(i); iowait = get_iowait_time(i); irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ]; diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 53f883f5a2fd1d29d1e2131b89260304a0be8df0..1ebbeec02051abf3284840296925df2d9eed2b54 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -186,4 +186,9 @@ static inline void prev_cputime_init(struct prev_cputime *prev) extern unsigned long long task_sched_runtime(struct task_struct *task); +extern int use_sched_idle_time; +extern int sched_idle_time_adjust(int cpu, u64 *utime, u64 *stime); +extern unsigned long long sched_get_idle_time(int cpu); +extern u64 get_idle_time(int cpu); + #endif /* _LINUX_SCHED_CPUTIME_H */ diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index b36f4cf38111579a6af935d855315a8a0769d509..631d8579c257b329a4b160b1e50976e69024584e 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -22,9 +22,11 @@ static inline void nohz_balance_enter_idle(int cpu) { } #endif #ifdef CONFIG_NO_HZ_COMMON +extern unsigned long tick_nohz_active; void calc_load_nohz_start(void); void calc_load_nohz_stop(void); #else +#define tick_nohz_active (0) static inline void calc_load_nohz_start(void) { } static inline void calc_load_nohz_stop(void) { } #endif /* CONFIG_NO_HZ_COMMON */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2db1d0600a28e122d843f015d2dc02be38eed785..5f1e8362c97a3aef65cb1c029675b4c53a63005d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5913,6 +5913,17 @@ static struct kmem_cache *task_group_cache __read_mostly; DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); +static __init void rq_cputime_init(void) +{ + int cpu; + struct rq_cputime *rq_cputime; + + for_each_possible_cpu(cpu) { + rq_cputime = &per_cpu(rq_cputimes, cpu); + raw_spin_lock_init(&rq_cputime->lock); + } +} + void __init sched_init(void) { int i, j; @@ -6074,6 +6085,9 @@ void __init sched_init(void) init_schedstats(); + if (use_sched_idle_time) + rq_cputime_init(); + scheduler_running = 1; } diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 0796f938c4f0df3988bd1afbb30e5df8ce549f5e..ded9d62b1fe4cad831d89f624cf6c4b584660d13 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -567,6 +567,74 @@ static u64 scale_stime(u64 stime, u64 rtime, u64 total) return scaled; } +int use_sched_idle_time __read_mostly; +DEFINE_PER_CPU_SHARED_ALIGNED(struct rq_cputime, rq_cputimes); + +static int __init sched_idle_time_setup(char *str) +{ + use_sched_idle_time = 1; + + return 0; +} +early_param("use-sched-idle-time", sched_idle_time_setup); + +int sched_idle_time_adjust(int cpu, u64 *utime, u64 *stime) +{ + struct rq_cputime *rq_cputime = &per_cpu(rq_cputimes, cpu); + struct cputime *prev = &rq_cputime->cpu_prev_time; + struct cputime *last = &rq_cputime->cpu_last_time; + u64 ut, st, delta, delta_ut, delta_st; + + raw_spin_lock(&rq_cputime->lock); + + delta = cpu_clock(cpu) - get_idle_time(cpu) + - (prev->utime + prev->stime); + + ut = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + st = kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + + delta_ut = ut - last->utime; + delta_st = st - last->stime; + + if (unlikely((s64)delta <= 0)) + goto out; + + if (delta_st == 0) { + prev->utime += delta; + } else if (delta_ut == 0) { + prev->stime += delta; + } else { + delta_st = scale_stime(delta_st, delta, delta_ut + delta_st); + + if (unlikely(delta_st > delta)) + delta_st = delta; + + prev->stime += delta_st; + prev->utime += delta - delta_st; + } + +out: + last->utime = ut; + last->stime = st; + + *utime = prev->utime; + *stime = prev->stime; + + raw_spin_unlock(&rq_cputime->lock); + + return 0; +} + +unsigned long long sched_get_idle_time(int cpu) +{ + struct rq_cputime *rt = &per_cpu(rq_cputimes, cpu); + + if (is_idle_task(curr_task(cpu))) + return rt->sum_idle_time + cpu_clock(cpu) - rt->last_entry_idle; + else + return rt->sum_idle_time; +} + /* * Adjust tick based cputime random precision against scheduler runtime * accounting. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b63172288f7ba9022437a50def079aac2870d4bc..bc001966315f701be9eaeddc4fc181452641955a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -919,6 +919,21 @@ struct rq { #endif }; +struct cputime { + u64 utime; + u64 stime; +}; + +struct rq_cputime { + raw_spinlock_t lock; + unsigned long long sum_idle_time; + unsigned long long last_entry_idle; + struct cputime cpu_prev_time; + struct cputime cpu_last_time; +}; + +DECLARE_PER_CPU(struct rq_cputime, rq_cputimes); + static inline int cpu_of(struct rq *rq) { #ifdef CONFIG_SMP diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 8aea199a39b4a61c6f53f4d44b876ec1798dab3e..27b9980a0e9a31442db6b9817b5b22b7860b9547 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #ifdef CONFIG_SCHEDSTATS @@ -153,6 +154,20 @@ __sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct static inline void sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { + if (use_sched_idle_time && !tick_nohz_active) { + struct rq *rq = task_rq(prev); + struct rq_cputime *rq_cputime = this_cpu_ptr(&rq_cputimes); + unsigned long long now = cpu_clock(cpu_of(rq)), delta = 0; + + if (prev == rq->idle) { + delta = now - rq_cputime->last_entry_idle; + rq_cputime->sum_idle_time += delta; + } + + if (next == rq->idle) + rq_cputime->last_entry_idle = now; + } + if (unlikely(sched_info_on())) __sched_info_switch(rq, prev, next); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index e277284c2831c9c1dae2219c1a135e1f5dc8945d..9ff03c4e7e92130e9881728d2bbc52789638ee43 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -149,14 +149,12 @@ static inline void tick_nohz_init(void) { } #endif #ifdef CONFIG_NO_HZ_COMMON -extern unsigned long tick_nohz_active; extern void timers_update_nohz(void); # ifdef CONFIG_SMP extern struct static_key_false timers_migration_enabled; # endif #else /* CONFIG_NO_HZ_COMMON */ static inline void timers_update_nohz(void) { } -#define tick_nohz_active (0) #endif DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);