diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 12bf44f083f53355ca785a663bd76422dafcdfc1..e8ffce898bf913c7c12943b0a40785354dde6834 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -53,7 +53,9 @@ static inline int kstat_irqs(int irq) } extern void account_user_time(struct task_struct *, cputime_t); +extern void account_user_time_scaled(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); +extern void account_system_time_scaled(struct task_struct *, cputime_t); extern void account_steal_time(struct task_struct *, cputime_t); #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 269b234609b866853564afc4b4f4c12cfc500ae7..7accc04e23ab0310166d72ebf32623427a6c5a71 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -991,7 +991,7 @@ struct task_struct { int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ unsigned int rt_priority; - cputime_t utime, stime; + cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index dce1ed2049727a9b7a3576abc0c075440bc121e6..92bfd1c153fb9048919d5c4e180456ae9205ee5f 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -31,7 +31,7 @@ */ -#define TASKSTATS_VERSION 5 +#define TASKSTATS_VERSION 6 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -85,9 +85,12 @@ struct taskstats { * On some architectures, value will adjust for cpu time stolen * from the kernel in involuntary waits due to virtualization. * Value is cumulative, in nanoseconds, without a corresponding count - * and wraps around to zero silently on overflow + * and wraps around to zero silently on overflow. The + * _scaled_ version accounts for cpus which can scale the + * number of instructions executed each cycle. */ __u64 cpu_run_real_total; + __u64 cpu_scaled_run_real_total; /* cpu "virtual" running time * Uses time intervals seen by the kernel i.e. no adjustment @@ -142,6 +145,10 @@ struct taskstats { __u64 write_char; /* bytes written */ __u64 read_syscalls; /* read syscalls */ __u64 write_syscalls; /* write syscalls */ + + /* time accounting for SMT machines */ + __u64 ac_utimescaled; /* utime scaled on frequency etc */ + __u64 ac_stimescaled; /* stime scaled on frequency etc */ /* Extended accounting fields end */ #define TASKSTATS_HAS_IO_ACCOUNTING diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 09e9574eeb26f0273054e1111224521e0a072c67..10e43fd8b721a7c28f44dbb60515efa9d0d4042d 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -115,6 +115,12 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) tmp += timespec_to_ns(&ts); d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; + tmp = (s64)d->cpu_scaled_run_real_total; + cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts); + tmp += timespec_to_ns(&ts); + d->cpu_scaled_run_real_total = + (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; + /* * No locking available for sched_info (and too expensive to add one) * Mitigate by taking snapshot of values diff --git a/kernel/fork.c b/kernel/fork.c index 1232aac6a1cd9051e3da1102b42648ad1e283f60..2ce28f165e31a1e671e014b10a852e53cd1c535b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1059,6 +1059,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; + p->utimescaled = cputime_zero; + p->stimescaled = cputime_zero; #ifdef CONFIG_TASK_XACCT p->rchar = 0; /* I/O counter: bytes read */ diff --git a/kernel/sched.c b/kernel/sched.c index 92721d1534b85f1e2f3b192b041677b50b79b9e9..12534421d7b5f4c46559a3a3c6aede140d2f5f3c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3333,6 +3333,16 @@ void account_guest_time(struct task_struct *p, cputime_t cputime) cpustat->guest = cputime64_add(cpustat->guest, tmp); } +/* + * Account scaled user cpu time to a process. + * @p: the process that the cpu time gets accounted to + * @cputime: the cpu time spent in user space since the last update + */ +void account_user_time_scaled(struct task_struct *p, cputime_t cputime) +{ + p->utimescaled = cputime_add(p->utimescaled, cputime); +} + /* * Account system cpu time to a process. * @p: the process that the cpu time gets accounted to @@ -3370,6 +3380,17 @@ void account_system_time(struct task_struct *p, int hardirq_offset, acct_update_integrals(p); } +/* + * Account scaled system cpu time to a process. + * @p: the process that the cpu time gets accounted to + * @hardirq_offset: the offset to subtract from hardirq_count() + * @cputime: the cpu time spent in kernel space since the last update + */ +void account_system_time_scaled(struct task_struct *p, cputime_t cputime) +{ + p->stimescaled = cputime_add(p->stimescaled, cputime); +} + /* * Account for involuntary wait time. * @p: the process from which the cpu time has been stolen diff --git a/kernel/timer.c b/kernel/timer.c index 0735f0aa3afb68da6455b125d13ce86ab1c128ca..8521d10fbb27543602bcd71ae503ce44d21d73f3 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -826,10 +826,13 @@ void update_process_times(int user_tick) int cpu = smp_processor_id(); /* Note: this timer irq context must be accounted for as well. */ - if (user_tick) + if (user_tick) { account_user_time(p, jiffies_to_cputime(1)); - else + account_user_time_scaled(p, jiffies_to_cputime(1)); + } else { account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + account_system_time_scaled(p, jiffies_to_cputime(1)); + } run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index c122131a122f54121f239ece78f830efbaf90d69..4ab1b584961b922afacc61f80750d1f0396ac0ec 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -62,6 +62,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) rcu_read_unlock(); stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; + stats->ac_utimescaled = + cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC; + stats->ac_stimescaled = + cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC; stats->ac_minflt = tsk->min_flt; stats->ac_majflt = tsk->maj_flt;