diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 23a8d6cc8d30116d2da48449df9bc20ed7982236..4d534582514e014b5fdb3fc5e0b9db7e52c7b306 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "cpuidle.h" @@ -69,18 +70,20 @@ int cpuidle_play_dead(void) * cpuidle_find_deepest_state - Find deepest state meeting specific conditions. * @drv: cpuidle driver for the given CPU. * @dev: cpuidle device for the given CPU. + * @freeze: Whether or not the state should be suitable for suspend-to-idle. */ static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, bool freeze) { unsigned int latency_req = 0; - int i, ret = CPUIDLE_DRIVER_STATE_START - 1; + int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1; for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state_usage *su = &dev->states_usage[i]; - if (s->disabled || su->disable || s->exit_latency <= latency_req) + if (s->disabled || su->disable || s->exit_latency <= latency_req + || (freeze && !s->enter_freeze)) continue; latency_req = s->exit_latency; @@ -89,10 +92,31 @@ static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, return ret; } +static void enter_freeze_proper(struct cpuidle_driver *drv, + struct cpuidle_device *dev, int index) +{ + tick_freeze(); + /* + * The state used here cannot be a "coupled" one, because the "coupled" + * cpuidle mechanism enables interrupts and doing that with timekeeping + * suspended is generally unsafe. + */ + drv->states[index].enter_freeze(dev, drv, index); + WARN_ON(!irqs_disabled()); + /* + * timekeeping_resume() that will be called by tick_unfreeze() for the + * last CPU executing it calls functions containing RCU read-side + * critical sections, so tell RCU about that. + */ + RCU_NONIDLE(tick_unfreeze()); +} + /** * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle. * - * Find the deepest state available and enter it. + * If there are states with the ->enter_freeze callback, find the deepest of + * them and enter it with frozen tick. Otherwise, find the deepest state + * available and enter it normally. */ void cpuidle_enter_freeze(void) { @@ -100,7 +124,22 @@ void cpuidle_enter_freeze(void) struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int index; - index = cpuidle_find_deepest_state(drv, dev); + /* + * Find the deepest state with ->enter_freeze present, which guarantees + * that interrupts won't be enabled when it exits and allows the tick to + * be frozen safely. + */ + index = cpuidle_find_deepest_state(drv, dev, true); + if (index >= 0) { + enter_freeze_proper(drv, dev, index); + return; + } + + /* + * It is not safe to freeze the tick, find the deepest state available + * at all and try to enter it normally. + */ + index = cpuidle_find_deepest_state(drv, dev, false); if (index >= 0) cpuidle_enter(drv, dev, index); else diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index f63aabf4ee90b13becf4df90c47344faecfb449b..f551a9299ac987c5121a6f58383a953a8704d729 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -50,6 +50,15 @@ struct cpuidle_state { int index); int (*enter_dead) (struct cpuidle_device *dev, int index); + + /* + * CPUs execute ->enter_freeze with the local tick or entire timekeeping + * suspended, so it must not re-enable interrupts at any point (even + * temporarily) or attempt to change states of clock event devices. + */ + void (*enter_freeze) (struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index); }; /* Idle State Flags */ diff --git a/include/linux/tick.h b/include/linux/tick.h index eda850ca757a8998edb64c71295fb4ce05dfc17b..9c085dc12ae92626e3d6ae831df435a82f188c08 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -79,6 +79,9 @@ extern void __init tick_init(void); extern int tick_is_oneshot_available(void); extern struct tick_device *tick_get_device(int cpu); +extern void tick_freeze(void); +extern void tick_unfreeze(void); + # ifdef CONFIG_HIGH_RES_TIMERS extern int tick_init_highres(void); extern int tick_program_event(ktime_t expires, int force); @@ -119,6 +122,8 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } +static inline void tick_freeze(void) { } +static inline void tick_unfreeze(void) { } static inline void tick_cancel_sched_timer(int cpu) { } static inline void tick_clock_notify(void) { } static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } @@ -226,5 +231,4 @@ static inline void tick_nohz_task_switch(struct task_struct *tsk) __tick_nohz_task_switch(tsk); } - #endif diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 7efeedf53ebd1aa45f3286ad50da3e24c46f391a..f7c515595b42b2bf9794a8f3f4ee1f9a2c17df89 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -394,6 +394,56 @@ void tick_resume(void) } } +static DEFINE_RAW_SPINLOCK(tick_freeze_lock); +static unsigned int tick_freeze_depth; + +/** + * tick_freeze - Suspend the local tick and (possibly) timekeeping. + * + * Check if this is the last online CPU executing the function and if so, + * suspend timekeeping. Otherwise suspend the local tick. + * + * Call with interrupts disabled. Must be balanced with %tick_unfreeze(). + * Interrupts must not be enabled before the subsequent %tick_unfreeze(). + */ +void tick_freeze(void) +{ + raw_spin_lock(&tick_freeze_lock); + + tick_freeze_depth++; + if (tick_freeze_depth == num_online_cpus()) { + timekeeping_suspend(); + } else { + tick_suspend(); + tick_suspend_broadcast(); + } + + raw_spin_unlock(&tick_freeze_lock); +} + +/** + * tick_unfreeze - Resume the local tick and (possibly) timekeeping. + * + * Check if this is the first CPU executing the function and if so, resume + * timekeeping. Otherwise resume the local tick. + * + * Call with interrupts disabled. Must be balanced with %tick_freeze(). + * Interrupts must not be enabled after the preceding %tick_freeze(). + */ +void tick_unfreeze(void) +{ + raw_spin_lock(&tick_freeze_lock); + + if (tick_freeze_depth == num_online_cpus()) + timekeeping_resume(); + else + tick_resume(); + + tick_freeze_depth--; + + raw_spin_unlock(&tick_freeze_lock); +} + /** * tick_init - initialize the tick control */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index aef5dc722abf94f1bf2e6a4d4d8beeaf88bf3872..91db94136c1062571ba0d0f1bfd1ed687770af3e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1197,7 +1197,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) * xtime/wall_to_monotonic/jiffies/etc are * still managed by arch specific suspend/resume code. */ -static void timekeeping_resume(void) +void timekeeping_resume(void) { struct timekeeper *tk = &tk_core.timekeeper; struct clocksource *clock = tk->tkr.clock; @@ -1278,7 +1278,7 @@ static void timekeeping_resume(void) hrtimers_resume(); } -static int timekeeping_suspend(void) +int timekeeping_suspend(void) { struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index adc1fc98bde3b53da2a17fb436c83a9cf8d2e1fa..1d91416055d5e9f05e5b6d482bc8ab490acdeeb2 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -16,5 +16,7 @@ extern int timekeeping_inject_offset(struct timespec *ts); extern s32 timekeeping_get_tai_offset(void); extern void timekeeping_set_tai_offset(s32 tai_offset); extern void timekeeping_clocktai(struct timespec *ts); +extern int timekeeping_suspend(void); +extern void timekeeping_resume(void); #endif