提交 abf917cd 编写于 作者: F Frederic Weisbecker

cputime: Generic on-demand virtual cputime accounting

If we want to stop the tick further idle, we need to be
able to account the cputime without using the tick.

Virtual based cputime accounting solves that problem by
hooking into kernel/user boundaries.

However implementing CONFIG_VIRT_CPU_ACCOUNTING require
low level hooks and involves more overhead. But we already
have a generic context tracking subsystem that is required
for RCU needs by archs which plan to shut down the tick
outside idle.

This patch implements a generic virtual based cputime
accounting that relies on these generic kernel/user hooks.

There are some upsides of doing this:

- This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING
if context tracking is already built (already necessary for RCU in full
tickless mode).

- We can rely on the generic context tracking subsystem to dynamically
(de)activate the hooks, so that we can switch anytime between virtual
and tick based accounting. This way we don't have the overhead
of the virtual accounting when the tick is running periodically.

And one downside:

- There is probably more overhead than a native virtual based cputime
accounting. But this relies on hooks that are already set anyway.
Signed-off-by: NFrederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
上级 ae8dda5c
...@@ -11,19 +11,19 @@ ...@@ -11,19 +11,19 @@
* as published by the Free Software Foundation; either version * as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
* *
* If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec. * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec.
* Otherwise we measure cpu time in jiffies using the generic definitions. * Otherwise we measure cpu time in jiffies using the generic definitions.
*/ */
#ifndef __IA64_CPUTIME_H #ifndef __IA64_CPUTIME_H
#define __IA64_CPUTIME_H #define __IA64_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
# include <asm-generic/cputime.h> # include <asm-generic/cputime.h>
#else #else
# include <asm/processor.h> # include <asm/processor.h>
# include <asm-generic/cputime_nsecs.h> # include <asm-generic/cputime_nsecs.h>
extern void arch_vtime_task_switch(struct task_struct *tsk); extern void arch_vtime_task_switch(struct task_struct *tsk);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __IA64_CPUTIME_H */ #endif /* __IA64_CPUTIME_H */
...@@ -31,7 +31,7 @@ struct thread_info { ...@@ -31,7 +31,7 @@ struct thread_info {
mm_segment_t addr_limit; /* user-level address space limit */ mm_segment_t addr_limit; /* user-level address space limit */
int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
struct restart_block restart_block; struct restart_block restart_block;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
__u64 ac_stamp; __u64 ac_stamp;
__u64 ac_leave; __u64 ac_leave;
__u64 ac_stime; __u64 ac_stime;
...@@ -69,7 +69,7 @@ struct thread_info { ...@@ -69,7 +69,7 @@ struct thread_info {
#define task_stack_page(tsk) ((void *)(tsk)) #define task_stack_page(tsk) ((void *)(tsk))
#define __HAVE_THREAD_FUNCTIONS #define __HAVE_THREAD_FUNCTIONS
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#define setup_thread_stack(p, org) \ #define setup_thread_stack(p, org) \
*task_thread_info(p) = *task_thread_info(org); \ *task_thread_info(p) = *task_thread_info(org); \
task_thread_info(p)->ac_stime = 0; \ task_thread_info(p)->ac_stime = 0; \
......
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* read ar.itc in advance, and use it before leaving bank 0 */ /* read ar.itc in advance, and use it before leaving bank 0 */
#define XEN_ACCOUNT_GET_STAMP \ #define XEN_ACCOUNT_GET_STAMP \
MOV_FROM_ITC(pUStk, p6, r20, r2); MOV_FROM_ITC(pUStk, p6, r20, r2);
......
...@@ -41,7 +41,7 @@ void foo(void) ...@@ -41,7 +41,7 @@ void foo(void)
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
......
...@@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall) ...@@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
#endif #endif
.global __paravirt_work_processed_syscall; .global __paravirt_work_processed_syscall;
__paravirt_work_processed_syscall: __paravirt_work_processed_syscall:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
adds r2=PT(LOADRS)+16,r12 adds r2=PT(LOADRS)+16,r12
MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
...@@ -762,7 +762,7 @@ __paravirt_work_processed_syscall: ...@@ -762,7 +762,7 @@ __paravirt_work_processed_syscall:
ld8 r29=[r2],16 // M0|1 load cr.ipsr ld8 r29=[r2],16 // M0|1 load cr.ipsr
ld8 r28=[r3],16 // M0|1 load cr.iip ld8 r28=[r3],16 // M0|1 load cr.iip
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
;; ;;
ld8 r30=[r2],16 // M0|1 load cr.ifs ld8 r30=[r2],16 // M0|1 load cr.ifs
...@@ -793,7 +793,7 @@ __paravirt_work_processed_syscall: ...@@ -793,7 +793,7 @@ __paravirt_work_processed_syscall:
ld8.fill r1=[r3],16 // M0|1 load r1 ld8.fill r1=[r3],16 // M0|1 load r1
(pUStk) mov r17=1 // A (pUStk) mov r17=1 // A
;; ;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) st1 [r15]=r17 // M2|3 (pUStk) st1 [r15]=r17 // M2|3
#else #else
(pUStk) st1 [r14]=r17 // M2|3 (pUStk) st1 [r14]=r17 // M2|3
...@@ -813,7 +813,7 @@ __paravirt_work_processed_syscall: ...@@ -813,7 +813,7 @@ __paravirt_work_processed_syscall:
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
COVER // B add current frame into dirty partition & set cr.ifs COVER // B add current frame into dirty partition & set cr.ifs
;; ;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
mov r19=ar.bsp // M2 get new backing store pointer mov r19=ar.bsp // M2 get new backing store pointer
st8 [r14]=r22 // M save time at leave st8 [r14]=r22 // M save time at leave
mov f10=f0 // F clear f10 mov f10=f0 // F clear f10
...@@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ...@@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
adds r16=PT(CR_IPSR)+16,r12 adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12 adds r17=PT(CR_IIP)+16,r12
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
.pred.rel.mutex pUStk,pKStk .pred.rel.mutex pUStk,pKStk
MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled
MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave
...@@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ...@@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
;; ;;
ld8.fill r12=[r16],16 ld8.fill r12=[r16],16
ld8.fill r13=[r17],16 ld8.fill r13=[r17],16
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 (pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
#else #else
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
...@@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ...@@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
;; ;;
ld8 r20=[r16],16 // ar.fpsr ld8 r20=[r16],16 // ar.fpsr
ld8.fill r15=[r17],16 ld8.fill r15=[r17],16
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred
#endif #endif
;; ;;
...@@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ...@@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
ld8.fill r2=[r17] ld8.fill r2=[r17]
(pUStk) mov r17=1 (pUStk) mov r17=1
;; ;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
// mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;;
// mib : mov add br -> mib : ld8 add br // mib : mov add br -> mib : ld8 add br
// bbb_ : br nop cover;; mbb_ : mov br cover;; // bbb_ : br nop cover;; mbb_ : mov br cover;;
......
...@@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down) ...@@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
nop.i 0 nop.i 0
;; ;;
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting
#else #else
nop.m 0 nop.m 0
...@@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down) ...@@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
br.call.sptk.many b7=ia64_syscall_setup // B br.call.sptk.many b7=ia64_syscall_setup // B
;; ;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
// mov.m r30=ar.itc is called in advance // mov.m r30=ar.itc is called in advance
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
......
...@@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock) ...@@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock)
sched_clock = ia64_native_sched_clock sched_clock = ia64_native_sched_clock
#endif #endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
GLOBAL_ENTRY(cycle_to_cputime) GLOBAL_ENTRY(cycle_to_cputime)
alloc r16=ar.pfs,1,0,0,0 alloc r16=ar.pfs,1,0,0,0
addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
...@@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime) ...@@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
br.ret.sptk.many rp br.ret.sptk.many rp
END(cycle_to_cputime) END(cycle_to_cputime)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_IA64_BRL_EMU #ifdef CONFIG_IA64_BRL_EMU
......
...@@ -784,7 +784,7 @@ ENTRY(break_fault) ...@@ -784,7 +784,7 @@ ENTRY(break_fault)
(p8) adds r28=16,r28 // A switch cr.iip to next bundle (p8) adds r28=16,r28 // A switch cr.iip to next bundle
(p9) adds r8=1,r8 // A increment ei to next slot (p9) adds r8=1,r8 // A increment ei to next slot
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
;; ;;
mov b6=r30 // I0 setup syscall handler branch reg early mov b6=r30 // I0 setup syscall handler branch reg early
#else #else
...@@ -801,7 +801,7 @@ ENTRY(break_fault) ...@@ -801,7 +801,7 @@ ENTRY(break_fault)
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting
#else #else
mov b6=r30 // I0 setup syscall handler branch reg early mov b6=r30 // I0 setup syscall handler branch reg early
...@@ -817,7 +817,7 @@ ENTRY(break_fault) ...@@ -817,7 +817,7 @@ ENTRY(break_fault)
cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
br.call.sptk.many b7=ia64_syscall_setup // B br.call.sptk.many b7=ia64_syscall_setup // B
1: 1:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
// mov.m r30=ar.itc is called in advance, and r13 is current // mov.m r30=ar.itc is called in advance, and r13 is current
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A
...@@ -1043,7 +1043,7 @@ END(ia64_syscall_setup) ...@@ -1043,7 +1043,7 @@ END(ia64_syscall_setup)
DBG_FAULT(16) DBG_FAULT(16)
FAULT(16) FAULT(16)
#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
/* /*
* There is no particular reason for this code to be here, other than * There is no particular reason for this code to be here, other than
* that there happens to be space here that would go unused otherwise. * that there happens to be space here that would go unused otherwise.
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include "entry.h" #include "entry.h"
#include "paravirt_inst.h" #include "paravirt_inst.h"
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* read ar.itc in advance, and use it before leaving bank 0 */ /* read ar.itc in advance, and use it before leaving bank 0 */
#define ACCOUNT_GET_STAMP \ #define ACCOUNT_GET_STAMP \
(pUStk) mov.m r20=ar.itc; (pUStk) mov.m r20=ar.itc;
......
...@@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = { ...@@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = {
}; };
static struct clocksource *itc_clocksource; static struct clocksource *itc_clocksource;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
...@@ -142,7 +142,7 @@ void vtime_account_idle(struct task_struct *tsk) ...@@ -142,7 +142,7 @@ void vtime_account_idle(struct task_struct *tsk)
account_idle_time(vtime_delta(tsk)); account_idle_time(vtime_delta(tsk));
} }
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static irqreturn_t static irqreturn_t
timer_interrupt (int irq, void *dev_id) timer_interrupt (int irq, void *dev_id)
......
CONFIG_PPC64=y CONFIG_PPC64=y
CONFIG_PPC_BOOK3E_64=y CONFIG_PPC_BOOK3E_64=y
# CONFIG_VIRT_CPU_ACCOUNTING is not set # CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
CONFIG_SMP=y CONFIG_SMP=y
CONFIG_NR_CPUS=256 CONFIG_NR_CPUS=256
CONFIG_EXPERIMENTAL=y CONFIG_EXPERIMENTAL=y
......
CONFIG_PPC64=y CONFIG_PPC64=y
CONFIG_PPC_BOOK3E_64=y CONFIG_PPC_BOOK3E_64=y
# CONFIG_VIRT_CPU_ACCOUNTING is not set # CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
CONFIG_SMP=y CONFIG_SMP=y
CONFIG_NR_CPUS=2 CONFIG_NR_CPUS=2
CONFIG_EXPERIMENTAL=y CONFIG_EXPERIMENTAL=y
......
CONFIG_PPC64=y CONFIG_PPC64=y
CONFIG_ALTIVEC=y CONFIG_ALTIVEC=y
# CONFIG_VIRT_CPU_ACCOUNTING is not set # CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
CONFIG_SMP=y CONFIG_SMP=y
CONFIG_NR_CPUS=2 CONFIG_NR_CPUS=2
CONFIG_EXPERIMENTAL=y CONFIG_EXPERIMENTAL=y
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* as published by the Free Software Foundation; either version * as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
* *
* If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
* the same units as the timebase. Otherwise we measure cpu time * the same units as the timebase. Otherwise we measure cpu time
* in jiffies using the generic definitions. * in jiffies using the generic definitions.
*/ */
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#ifndef __POWERPC_CPUTIME_H #ifndef __POWERPC_CPUTIME_H
#define __POWERPC_CPUTIME_H #define __POWERPC_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <asm-generic/cputime.h> #include <asm-generic/cputime.h>
#ifdef __KERNEL__ #ifdef __KERNEL__
static inline void setup_cputime_one_jiffy(void) { } static inline void setup_cputime_one_jiffy(void) { }
...@@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk) ...@@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
static inline void arch_vtime_task_switch(struct task_struct *tsk) { } static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __POWERPC_CPUTIME_H */ #endif /* __POWERPC_CPUTIME_H */
...@@ -145,7 +145,7 @@ struct dtl_entry { ...@@ -145,7 +145,7 @@ struct dtl_entry {
extern struct kmem_cache *dtl_cache; extern struct kmem_cache *dtl_cache;
/* /*
* When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
* reading from the dispatch trace log. If other code wants to consume * reading from the dispatch trace log. If other code wants to consume
* DTL entries, it can set this pointer to a function that will get * DTL entries, it can set this pointer to a function that will get
* called once for each DTL entry that gets processed. * called once for each DTL entry that gets processed.
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
* user_time and system_time fields in the paca. * user_time and system_time fields in the paca.
*/ */
#ifndef CONFIG_VIRT_CPU_ACCOUNTING #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#define ACCOUNT_CPU_USER_ENTRY(ra, rb) #define ACCOUNT_CPU_USER_ENTRY(ra, rb)
#define ACCOUNT_CPU_USER_EXIT(ra, rb) #define ACCOUNT_CPU_USER_EXIT(ra, rb)
#define ACCOUNT_STOLEN_TIME #define ACCOUNT_STOLEN_TIME
...@@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) ...@@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#endif /* CONFIG_PPC_SPLPAR */ #endif /* CONFIG_PPC_SPLPAR */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
/* /*
* Macros for storing registers into and loading registers from * Macros for storing registers into and loading registers from
......
...@@ -94,7 +94,7 @@ system_call_common: ...@@ -94,7 +94,7 @@ system_call_common:
addi r9,r1,STACK_FRAME_OVERHEAD addi r9,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2) ld r11,exception_marker@toc(r2)
std r11,-16(r9) /* "regshere" marker */ std r11,-16(r9) /* "regshere" marker */
#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION BEGIN_FW_FTR_SECTION
beq 33f beq 33f
/* if from user, see if there are any DTL entries to process */ /* if from user, see if there are any DTL entries to process */
...@@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION ...@@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION
addi r9,r1,STACK_FRAME_OVERHEAD addi r9,r1,STACK_FRAME_OVERHEAD
33: 33:
END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
/* /*
* A syscall should always be called with interrupts enabled * A syscall should always be called with interrupts enabled
......
...@@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq); ...@@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
unsigned long ppc_tb_freq; unsigned long ppc_tb_freq;
EXPORT_SYMBOL_GPL(ppc_tb_freq); EXPORT_SYMBOL_GPL(ppc_tb_freq);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* /*
* Factors for converting from cputime_t (timebase ticks) to * Factors for converting from cputime_t (timebase ticks) to
* jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
...@@ -377,7 +377,7 @@ void vtime_account_user(struct task_struct *tsk) ...@@ -377,7 +377,7 @@ void vtime_account_user(struct task_struct *tsk)
account_user_time(tsk, utime, utimescaled); account_user_time(tsk, utime, utimescaled);
} }
#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#define calc_cputime_factors() #define calc_cputime_factors()
#endif #endif
......
...@@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7; ...@@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7;
*/ */
static int dtl_buf_entries = N_DISPATCH_LOG; static int dtl_buf_entries = N_DISPATCH_LOG;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct dtl_ring { struct dtl_ring {
u64 write_index; u64 write_index;
struct dtl_entry *write_ptr; struct dtl_entry *write_ptr;
...@@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl) ...@@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl)
return per_cpu(dtl_rings, dtl->cpu).write_index; return per_cpu(dtl_rings, dtl->cpu).write_index;
} }
#else /* CONFIG_VIRT_CPU_ACCOUNTING */ #else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static int dtl_start(struct dtl *dtl) static int dtl_start(struct dtl *dtl)
{ {
...@@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl) ...@@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl)
{ {
return lppaca_of(dtl->cpu).dtl_idx; return lppaca_of(dtl->cpu).dtl_idx;
} }
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static int dtl_enable(struct dtl *dtl) static int dtl_enable(struct dtl *dtl)
{ {
......
...@@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = { ...@@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = {
struct kmem_cache *dtl_cache; struct kmem_cache *dtl_cache;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* /*
* Allocate space for the dispatch trace log for all possible cpus * Allocate space for the dispatch trace log for all possible cpus
* and register the buffers with the hypervisor. This is used for * and register the buffers with the hypervisor. This is used for
...@@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void) ...@@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void)
return 0; return 0;
} }
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static inline int alloc_dispatch_logs(void) static inline int alloc_dispatch_logs(void)
{ {
return 0; return 0;
} }
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static int alloc_dispatch_log_kmem_cache(void) static int alloc_dispatch_log_kmem_cache(void)
{ {
......
...@@ -4,6 +4,12 @@ ...@@ -4,6 +4,12 @@
#include <linux/time.h> #include <linux/time.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <asm-generic/cputime_jiffies.h> #ifndef CONFIG_VIRT_CPU_ACCOUNTING
# include <asm-generic/cputime_jiffies.h>
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# include <asm-generic/cputime_nsecs.h>
#endif
#endif #endif
...@@ -26,6 +26,7 @@ typedef u64 __nocast cputime64_t; ...@@ -26,6 +26,7 @@ typedef u64 __nocast cputime64_t;
*/ */
#define cputime_to_jiffies(__ct) \ #define cputime_to_jiffies(__ct) \
((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
#define cputime_to_scaled(__ct) (__ct)
#define jiffies_to_cputime(__jif) \ #define jiffies_to_cputime(__jif) \
(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
#define cputime64_to_jiffies64(__ct) \ #define cputime64_to_jiffies64(__ct) \
...@@ -33,6 +34,13 @@ typedef u64 __nocast cputime64_t; ...@@ -33,6 +34,13 @@ typedef u64 __nocast cputime64_t;
#define jiffies64_to_cputime64(__jif) \ #define jiffies64_to_cputime64(__jif) \
(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
/*
* Convert cputime <-> nanoseconds
*/
#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs))
/* /*
* Convert cputime <-> microseconds * Convert cputime <-> microseconds
*/ */
......
...@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t) ...@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
extern void account_steal_time(cputime_t); extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t); extern void account_idle_time(cputime_t);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user) static inline void account_process_tick(struct task_struct *tsk, int user)
{ {
vtime_account_user(tsk); vtime_account_user(tsk);
......
...@@ -14,9 +14,25 @@ extern void vtime_account(struct task_struct *tsk); ...@@ -14,9 +14,25 @@ extern void vtime_account(struct task_struct *tsk);
static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system(struct task_struct *tsk) { }
static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
static inline void vtime_account_user(struct task_struct *tsk) { }
static inline void vtime_account(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { }
#endif #endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
static inline void vtime_user_enter(struct task_struct *tsk)
{
vtime_account_system(tsk);
}
static inline void vtime_user_exit(struct task_struct *tsk)
{
vtime_account_user(tsk);
}
#else
static inline void vtime_user_enter(struct task_struct *tsk) { }
static inline void vtime_user_exit(struct task_struct *tsk) { }
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
extern void irqtime_account_irq(struct task_struct *tsk); extern void irqtime_account_irq(struct task_struct *tsk);
#else #else
......
...@@ -326,6 +326,9 @@ source "kernel/time/Kconfig" ...@@ -326,6 +326,9 @@ source "kernel/time/Kconfig"
menu "CPU/Task time and stats accounting" menu "CPU/Task time and stats accounting"
config VIRT_CPU_ACCOUNTING
bool
choice choice
prompt "Cputime accounting" prompt "Cputime accounting"
default TICK_CPU_ACCOUNTING if !PPC64 default TICK_CPU_ACCOUNTING if !PPC64
...@@ -342,9 +345,10 @@ config TICK_CPU_ACCOUNTING ...@@ -342,9 +345,10 @@ config TICK_CPU_ACCOUNTING
If unsure, say Y. If unsure, say Y.
config VIRT_CPU_ACCOUNTING config VIRT_CPU_ACCOUNTING_NATIVE
bool "Deterministic task and CPU time accounting" bool "Deterministic task and CPU time accounting"
depends on HAVE_VIRT_CPU_ACCOUNTING depends on HAVE_VIRT_CPU_ACCOUNTING
select VIRT_CPU_ACCOUNTING
help help
Select this option to enable more accurate task and CPU time Select this option to enable more accurate task and CPU time
accounting. This is done by reading a CPU counter on each accounting. This is done by reading a CPU counter on each
...@@ -354,6 +358,23 @@ config VIRT_CPU_ACCOUNTING ...@@ -354,6 +358,23 @@ config VIRT_CPU_ACCOUNTING
this also enables accounting of stolen time on logically-partitioned this also enables accounting of stolen time on logically-partitioned
systems. systems.
config VIRT_CPU_ACCOUNTING_GEN
bool "Full dynticks CPU time accounting"
depends on HAVE_CONTEXT_TRACKING && 64BIT
select VIRT_CPU_ACCOUNTING
select CONTEXT_TRACKING
help
Select this option to enable task and CPU time accounting on full
dynticks systems. This accounting is implemented by watching every
kernel-user boundaries using the context tracking subsystem.
The accounting is thus performed at the expense of some significant
overhead.
For now this is only useful if you are working on the full
dynticks subsystem development.
If unsure, say N.
config IRQ_TIME_ACCOUNTING config IRQ_TIME_ACCOUNTING
bool "Fine granularity task level IRQ time accounting" bool "Fine granularity task level IRQ time accounting"
depends on HAVE_IRQ_TIME_ACCOUNTING depends on HAVE_IRQ_TIME_ACCOUNTING
......
...@@ -30,8 +30,9 @@ void user_enter(void) ...@@ -30,8 +30,9 @@ void user_enter(void)
local_irq_save(flags); local_irq_save(flags);
if (__this_cpu_read(context_tracking.active) && if (__this_cpu_read(context_tracking.active) &&
__this_cpu_read(context_tracking.state) != IN_USER) { __this_cpu_read(context_tracking.state) != IN_USER) {
__this_cpu_write(context_tracking.state, IN_USER); vtime_user_enter(current);
rcu_user_enter(); rcu_user_enter();
__this_cpu_write(context_tracking.state, IN_USER);
} }
local_irq_restore(flags); local_irq_restore(flags);
} }
...@@ -53,8 +54,9 @@ void user_exit(void) ...@@ -53,8 +54,9 @@ void user_exit(void)
local_irq_save(flags); local_irq_save(flags);
if (__this_cpu_read(context_tracking.state) == IN_USER) { if (__this_cpu_read(context_tracking.state) == IN_USER) {
__this_cpu_write(context_tracking.state, IN_KERNEL);
rcu_user_exit(); rcu_user_exit();
vtime_user_exit(current);
__this_cpu_write(context_tracking.state, IN_KERNEL);
} }
local_irq_restore(flags); local_irq_restore(flags);
} }
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/tsacct_kern.h> #include <linux/tsacct_kern.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/static_key.h> #include <linux/static_key.h>
#include <linux/context_tracking.h>
#include "sched.h" #include "sched.h"
...@@ -479,7 +480,9 @@ void vtime_task_switch(struct task_struct *prev) ...@@ -479,7 +480,9 @@ void vtime_task_switch(struct task_struct *prev)
else else
vtime_account_system(prev); vtime_account_system(prev);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
vtime_account_user(prev); vtime_account_user(prev);
#endif
arch_vtime_task_switch(prev); arch_vtime_task_switch(prev);
} }
#endif #endif
...@@ -495,10 +498,24 @@ void vtime_task_switch(struct task_struct *prev) ...@@ -495,10 +498,24 @@ void vtime_task_switch(struct task_struct *prev)
#ifndef __ARCH_HAS_VTIME_ACCOUNT #ifndef __ARCH_HAS_VTIME_ACCOUNT
void vtime_account(struct task_struct *tsk) void vtime_account(struct task_struct *tsk)
{ {
if (in_interrupt() || !is_idle_task(tsk)) if (!in_interrupt()) {
vtime_account_system(tsk); /*
else * If we interrupted user, context_tracking_in_user()
vtime_account_idle(tsk); * is 1 because the context tracking don't hook
* on irq entry/exit. This way we know if
* we need to flush user time on kernel entry.
*/
if (context_tracking_in_user()) {
vtime_account_user(tsk);
return;
}
if (is_idle_task(tsk)) {
vtime_account_idle(tsk);
return;
}
}
vtime_account_system(tsk);
} }
EXPORT_SYMBOL_GPL(vtime_account); EXPORT_SYMBOL_GPL(vtime_account);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */ #endif /* __ARCH_HAS_VTIME_ACCOUNT */
...@@ -583,3 +600,39 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime ...@@ -583,3 +600,39 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
} }
#endif #endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static DEFINE_PER_CPU(unsigned long long, cputime_snap);
static cputime_t get_vtime_delta(void)
{
unsigned long long delta;
delta = sched_clock() - __this_cpu_read(cputime_snap);
__this_cpu_add(cputime_snap, delta);
/* CHECKME: always safe to convert nsecs to cputime? */
return nsecs_to_cputime(delta);
}
void vtime_account_system(struct task_struct *tsk)
{
cputime_t delta_cpu = get_vtime_delta();
account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
}
void vtime_account_user(struct task_struct *tsk)
{
cputime_t delta_cpu = get_vtime_delta();
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
}
void vtime_account_idle(struct task_struct *tsk)
{
cputime_t delta_cpu = get_vtime_delta();
account_idle_time(delta_cpu);
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册