提交 27185016 编写于 作者: T Thomas Gleixner

nohz: Track last do_timer() cpu

The previous patch which limits the sleep time to the maximum
deferment time of the time keeping clocksource has some limitations on
SMP machines: if all CPUs are idle then for all CPUs the maximum sleep
time is limited.

Solve this by keeping track of which cpu had the do_timer() duty
assigned last and limit the sleep time only for this cpu.
Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Cc: Jon Hunter <jon-hunter@ti.com>
Cc: John Stultz <johnstul@us.ibm.com>
上级 98962465
...@@ -43,6 +43,7 @@ enum tick_nohz_mode { ...@@ -43,6 +43,7 @@ enum tick_nohz_mode {
* @idle_exittime: Time when the idle state was left * @idle_exittime: Time when the idle state was left
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
* @sleep_length: Duration of the current idle sleep * @sleep_length: Duration of the current idle sleep
* @do_timer_lst: CPU was the last one doing do_timer before going idle
*/ */
struct tick_sched { struct tick_sched {
struct hrtimer sched_timer; struct hrtimer sched_timer;
...@@ -64,6 +65,7 @@ struct tick_sched { ...@@ -64,6 +65,7 @@ struct tick_sched {
unsigned long last_jiffies; unsigned long last_jiffies;
unsigned long next_jiffies; unsigned long next_jiffies;
ktime_t idle_expires; ktime_t idle_expires;
int do_timer_last;
}; };
extern void __init tick_init(void); extern void __init tick_init(void);
......
...@@ -263,17 +263,7 @@ void tick_nohz_stop_sched_tick(int inidle) ...@@ -263,17 +263,7 @@ void tick_nohz_stop_sched_tick(int inidle)
seq = read_seqbegin(&xtime_lock); seq = read_seqbegin(&xtime_lock);
last_update = last_jiffies_update; last_update = last_jiffies_update;
last_jiffies = jiffies; last_jiffies = jiffies;
/*
* On SMP we really should only care for the CPU which
* has the do_timer duty assigned. All other CPUs can
* sleep as long as they want.
*/
if (cpu == tick_do_timer_cpu ||
tick_do_timer_cpu == TICK_DO_TIMER_NONE)
time_delta = timekeeping_max_deferment(); time_delta = timekeeping_max_deferment();
else
time_delta = KTIME_MAX;
} while (read_seqretry(&xtime_lock, seq)); } while (read_seqretry(&xtime_lock, seq));
if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
...@@ -295,6 +285,29 @@ void tick_nohz_stop_sched_tick(int inidle) ...@@ -295,6 +285,29 @@ void tick_nohz_stop_sched_tick(int inidle)
/* Schedule the tick, if we are at least one jiffie off */ /* Schedule the tick, if we are at least one jiffie off */
if ((long)delta_jiffies >= 1) { if ((long)delta_jiffies >= 1) {
/*
* If this cpu is the one which updates jiffies, then
* give up the assignment and let it be taken by the
* cpu which runs the tick timer next, which might be
* this cpu as well. If we don't drop this here the
* jiffies might be stale and do_timer() never
* invoked. Keep track of the fact that it was the one
* which had the do_timer() duty last. If this cpu is
* the one which had the do_timer() duty last, we
* limit the sleep time to the timekeeping
* max_deferement value which we retrieved
* above. Otherwise we can sleep as long as we want.
*/
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
ts->do_timer_last = 1;
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
time_delta = KTIME_MAX;
ts->do_timer_last = 0;
} else if (!ts->do_timer_last) {
time_delta = KTIME_MAX;
}
/* /*
* calculate the expiry time for the next timer wheel * calculate the expiry time for the next timer wheel
* timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
...@@ -312,21 +325,12 @@ void tick_nohz_stop_sched_tick(int inidle) ...@@ -312,21 +325,12 @@ void tick_nohz_stop_sched_tick(int inidle)
*/ */
time_delta = min_t(u64, time_delta, time_delta = min_t(u64, time_delta,
tick_period.tv64 * delta_jiffies); tick_period.tv64 * delta_jiffies);
expires = ktime_add_ns(last_update, time_delta);
} else {
expires.tv64 = KTIME_MAX;
} }
/* if (time_delta < KTIME_MAX)
* If this cpu is the one which updates jiffies, then expires = ktime_add_ns(last_update, time_delta);
* give up the assignment and let it be taken by the else
* cpu which runs the tick timer next, which might be expires.tv64 = KTIME_MAX;
* this cpu as well. If we don't drop this here the
* jiffies might be stale and do_timer() never
* invoked.
*/
if (cpu == tick_do_timer_cpu)
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
if (delta_jiffies > 1) if (delta_jiffies > 1)
cpumask_set_cpu(cpu, nohz_cpu_mask); cpumask_set_cpu(cpu, nohz_cpu_mask);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册