提交 4e3da467 编写于 作者: I Ingo Molnar

Merge branch 'sched/cputime' of...

Merge branch 'sched/cputime' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into sched/core

Pull cputime changes from Frederic Weisbecker:

  * Generalize exception handling

  * Fix race in context tracking state restore on return from exception
    and irq exit kernel preemption

  * Fix cputime scaling in full dynticks accounting dynamic off-case

  * Fix default Kconfig value
Signed-off-by: NIngo Molnar <mingo@kernel.org>
#ifndef _ASM_X86_CONTEXT_TRACKING_H
#define _ASM_X86_CONTEXT_TRACKING_H
#ifndef __ASSEMBLY__
#include <linux/context_tracking.h>
#include <asm/ptrace.h>
static inline void exception_enter(struct pt_regs *regs)
{
user_exit();
}
static inline void exception_exit(struct pt_regs *regs)
{
#ifdef CONFIG_CONTEXT_TRACKING
if (user_mode(regs))
user_enter();
#endif
}
#else /* __ASSEMBLY__ */
#ifdef CONFIG_CONTEXT_TRACKING
# define SCHEDULE_USER call schedule_user
#else
# define SCHEDULE_USER call schedule
#endif
#endif /* !__ASSEMBLY__ */
#endif
......@@ -20,6 +20,7 @@
* Authors: Anthony Liguori <aliguori@us.ibm.com>
*/
#include <linux/context_tracking.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/kvm_para.h>
......@@ -43,7 +44,6 @@
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
#include <asm/kvm_guest.h>
#include <asm/context_tracking.h>
static int kvmapf = 1;
......@@ -254,16 +254,18 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
dotraplinkage void __kprobes
do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
{
enum ctx_state prev_state;
switch (kvm_read_and_reset_pf_reason()) {
default:
do_page_fault(regs, error_code);
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
exception_enter(regs);
prev_state = exception_enter();
exit_idle();
kvm_async_pf_task_wait((u32)read_cr2());
exception_exit(regs);
exception_exit(prev_state);
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
......
......@@ -12,6 +12,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/context_tracking.h>
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
#include <linux/spinlock.h>
......@@ -55,8 +56,6 @@
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/mce.h>
#include <asm/context_tracking.h>
#include <asm/mach_traps.h>
#ifdef CONFIG_X86_64
......@@ -176,34 +175,38 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
#define DO_ERROR(trapnr, signr, str, name) \
dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \
exception_enter(regs); \
enum ctx_state prev_state; \
\
prev_state = exception_enter(); \
if (notify_die(DIE_TRAP, str, regs, error_code, \
trapnr, signr) == NOTIFY_STOP) { \
exception_exit(regs); \
exception_exit(prev_state); \
return; \
} \
conditional_sti(regs); \
do_trap(trapnr, signr, str, regs, error_code, NULL); \
exception_exit(regs); \
exception_exit(prev_state); \
}
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \
siginfo_t info; \
enum ctx_state prev_state; \
\
info.si_signo = signr; \
info.si_errno = 0; \
info.si_code = sicode; \
info.si_addr = (void __user *)siaddr; \
exception_enter(regs); \
prev_state = exception_enter(); \
if (notify_die(DIE_TRAP, str, regs, error_code, \
trapnr, signr) == NOTIFY_STOP) { \
exception_exit(regs); \
exception_exit(prev_state); \
return; \
} \
conditional_sti(regs); \
do_trap(trapnr, signr, str, regs, error_code, &info); \
exception_exit(regs); \
exception_exit(prev_state); \
}
DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
......@@ -226,14 +229,16 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
/* Runs on IST stack */
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
{
exception_enter(regs);
enum ctx_state prev_state;
prev_state = exception_enter();
if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
preempt_conditional_sti(regs);
do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
preempt_conditional_cli(regs);
}
exception_exit(regs);
exception_exit(prev_state);
}
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
......@@ -241,7 +246,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
static const char str[] = "double fault";
struct task_struct *tsk = current;
exception_enter(regs);
exception_enter();
/* Return not checked because double check cannot be ignored */
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
......@@ -261,8 +266,9 @@ dotraplinkage void __kprobes
do_general_protection(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk;
enum ctx_state prev_state;
exception_enter(regs);
prev_state = exception_enter();
conditional_sti(regs);
#ifdef CONFIG_X86_32
......@@ -300,12 +306,14 @@ do_general_protection(struct pt_regs *regs, long error_code)
force_sig(SIGSEGV, tsk);
exit:
exception_exit(regs);
exception_exit(prev_state);
}
/* May run on IST stack. */
dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
{
enum ctx_state prev_state;
#ifdef CONFIG_DYNAMIC_FTRACE
/*
* ftrace must be first, everything else may cause a recursive crash.
......@@ -315,7 +323,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
ftrace_int3_handler(regs))
return;
#endif
exception_enter(regs);
prev_state = exception_enter();
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
SIGTRAP) == NOTIFY_STOP)
......@@ -336,7 +344,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
preempt_conditional_cli(regs);
debug_stack_usage_dec();
exit:
exception_exit(regs);
exception_exit(prev_state);
}
#ifdef CONFIG_X86_64
......@@ -393,11 +401,12 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
enum ctx_state prev_state;
int user_icebp = 0;
unsigned long dr6;
int si_code;
exception_enter(regs);
prev_state = exception_enter();
get_debugreg(dr6, 6);
......@@ -467,7 +476,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_dec();
exit:
exception_exit(regs);
exception_exit(prev_state);
}
/*
......@@ -561,17 +570,21 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
{
exception_enter(regs);
enum ctx_state prev_state;
prev_state = exception_enter();
math_error(regs, error_code, X86_TRAP_MF);
exception_exit(regs);
exception_exit(prev_state);
}
dotraplinkage void
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
{
exception_enter(regs);
enum ctx_state prev_state;
prev_state = exception_enter();
math_error(regs, error_code, X86_TRAP_XF);
exception_exit(regs);
exception_exit(prev_state);
}
dotraplinkage void
......@@ -639,7 +652,9 @@ EXPORT_SYMBOL_GPL(math_state_restore);
dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code)
{
exception_enter(regs);
enum ctx_state prev_state;
prev_state = exception_enter();
BUG_ON(use_eager_fpu());
#ifdef CONFIG_MATH_EMULATION
......@@ -650,7 +665,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
info.regs = regs;
math_emulate(&info);
exception_exit(regs);
exception_exit(prev_state);
return;
}
#endif
......@@ -658,15 +673,16 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#ifdef CONFIG_X86_32
conditional_sti(regs);
#endif
exception_exit(regs);
exception_exit(prev_state);
}
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
{
siginfo_t info;
enum ctx_state prev_state;
exception_enter(regs);
prev_state = exception_enter();
local_irq_enable();
info.si_signo = SIGILL;
......@@ -678,7 +694,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
&info);
}
exception_exit(regs);
exception_exit(prev_state);
}
#endif
......
......@@ -13,12 +13,12 @@
#include <linux/perf_event.h> /* perf_sw_event */
#include <linux/hugetlb.h> /* hstate_index_to_shift */
#include <linux/prefetch.h> /* prefetchw */
#include <linux/context_tracking.h> /* exception_enter(), ... */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
#include <asm/fixmap.h> /* VSYSCALL_START */
#include <asm/context_tracking.h> /* exception_enter(), ... */
/*
* Page fault error code bits:
......@@ -1222,7 +1222,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
dotraplinkage void __kprobes
do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
exception_enter(regs);
enum ctx_state prev_state;
prev_state = exception_enter();
__do_page_fault(regs, error_code);
exception_exit(regs);
exception_exit(prev_state);
}
#ifndef _LINUX_CONTEXT_TRACKING_H
#define _LINUX_CONTEXT_TRACKING_H
#ifdef CONFIG_CONTEXT_TRACKING
#include <linux/sched.h>
#include <linux/percpu.h>
#include <asm/ptrace.h>
struct context_tracking {
/*
......@@ -13,12 +13,13 @@ struct context_tracking {
* may be further optimized using static keys.
*/
bool active;
enum {
enum ctx_state {
IN_KERNEL = 0,
IN_USER,
} state;
};
#ifdef CONFIG_CONTEXT_TRACKING
DECLARE_PER_CPU(struct context_tracking, context_tracking);
static inline bool context_tracking_in_user(void)
......@@ -33,12 +34,31 @@ static inline bool context_tracking_active(void)
extern void user_enter(void);
extern void user_exit(void);
static inline enum ctx_state exception_enter(void)
{
enum ctx_state prev_ctx;
prev_ctx = this_cpu_read(context_tracking.state);
user_exit();
return prev_ctx;
}
static inline void exception_exit(enum ctx_state prev_ctx)
{
if (prev_ctx == IN_USER)
user_enter();
}
extern void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next);
#else
static inline bool context_tracking_in_user(void) { return false; }
static inline void user_enter(void) { }
static inline void user_exit(void) { }
static inline enum ctx_state exception_enter(void) { return 0; }
static inline void exception_exit(enum ctx_state prev_ctx) { }
static inline void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next) { }
#endif /* !CONFIG_CONTEXT_TRACKING */
......
......@@ -558,7 +558,7 @@ struct signal_struct {
cputime_t utime, stime, cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
#endif
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
......@@ -1158,7 +1158,7 @@ struct task_struct {
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
......
......@@ -509,6 +509,7 @@ config RCU_USER_QS
config CONTEXT_TRACKING_FORCE
bool "Force context tracking"
depends on CONTEXT_TRACKING
default CONTEXT_TRACKING
help
Probe on user/kernel boundaries by default in order to
test the features that rely on it such as userspace RCU extended
......
......@@ -1230,7 +1230,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->utime = p->stime = p->gtime = 0;
p->utimescaled = p->stimescaled = 0;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
p->prev_cputime.utime = p->prev_cputime.stime = 0;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
......
......@@ -3082,11 +3082,13 @@ EXPORT_SYMBOL(preempt_schedule);
asmlinkage void __sched preempt_schedule_irq(void)
{
struct thread_info *ti = current_thread_info();
enum ctx_state prev_state;
/* Catch callers which need to be fixed */
BUG_ON(ti->preempt_count || !irqs_disabled());
user_exit();
prev_state = exception_enter();
do {
add_preempt_count(PREEMPT_ACTIVE);
local_irq_enable();
......@@ -3100,6 +3102,8 @@ asmlinkage void __sched preempt_schedule_irq(void)
*/
barrier();
} while (need_resched());
exception_exit(prev_state);
}
#endif /* CONFIG_PREEMPT */
......
......@@ -388,82 +388,10 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
struct rq *rq) {}
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Account a single tick of cpu time.
* @p: the process that the cpu time gets accounted to
* @user_tick: indicates if the tick is a user or a system tick
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
if (vtime_accounting_enabled())
return;
if (sched_clock_irqtime) {
irqtime_account_process_tick(p, user_tick, rq);
return;
}
if (steal_account_process_tick())
return;
if (user_tick)
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
one_jiffy_scaled);
else
account_idle_time(cputime_one_jiffy);
}
/*
* Account multiple ticks of steal time.
* @p: the process from which the cpu time has been stolen
* @ticks: number of stolen ticks
*/
void account_steal_ticks(unsigned long ticks)
{
account_steal_time(jiffies_to_cputime(ticks));
}
/*
* Account multiple ticks of idle time.
* @ticks: number of stolen ticks
*/
void account_idle_ticks(unsigned long ticks)
{
if (sched_clock_irqtime) {
irqtime_account_idle_ticks(ticks);
return;
}
account_idle_time(jiffies_to_cputime(ticks));
}
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
/*
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
*ut = p->utime;
*st = p->stime;
}
void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
struct task_cputime cputime;
thread_group_cputime(p, &cputime);
*ut = cputime.utime;
*st = cputime.stime;
}
#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
void vtime_task_switch(struct task_struct *prev)
......@@ -518,8 +446,80 @@ void vtime_account_irq_enter(struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
*ut = p->utime;
*st = p->stime;
}
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
struct task_cputime cputime;
thread_group_cputime(p, &cputime);
*ut = cputime.utime;
*st = cputime.stime;
}
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
/*
* Account a single tick of cpu time.
* @p: the process that the cpu time gets accounted to
* @user_tick: indicates if the tick is a user or a system tick
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
if (vtime_accounting_enabled())
return;
if (sched_clock_irqtime) {
irqtime_account_process_tick(p, user_tick, rq);
return;
}
if (steal_account_process_tick())
return;
if (user_tick)
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
one_jiffy_scaled);
else
account_idle_time(cputime_one_jiffy);
}
/*
* Account multiple ticks of steal time.
* @p: the process from which the cpu time has been stolen
* @ticks: number of stolen ticks
*/
void account_steal_ticks(unsigned long ticks)
{
account_steal_time(jiffies_to_cputime(ticks));
}
/*
* Account multiple ticks of idle time.
* @ticks: number of stolen ticks
*/
void account_idle_ticks(unsigned long ticks)
{
if (sched_clock_irqtime) {
irqtime_account_idle_ticks(ticks);
return;
}
account_idle_time(jiffies_to_cputime(ticks));
}
static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total)
{
......@@ -545,6 +545,12 @@ static void cputime_adjust(struct task_cputime *curr,
{
cputime_t rtime, stime, total;
if (vtime_accounting_enabled()) {
*ut = curr->utime;
*st = curr->stime;
return;
}
stime = curr->stime;
total = stime + curr->utime;
......@@ -597,7 +603,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
thread_group_cputime(p, &cputime);
cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
}
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static unsigned long long vtime_delta(struct task_struct *tsk)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册