idle.c 6.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
/*
 * Generic entry point for the idle threads
 */
#include <linux/sched.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/tick.h>
#include <linux/mm.h>
#include <linux/stackprotector.h>

#include <asm/tlb.h>

#include <trace/events/power.h>

static int __read_mostly cpu_idle_force_poll;

void cpu_idle_poll_ctrl(bool enable)
{
	if (enable) {
		cpu_idle_force_poll++;
	} else {
		cpu_idle_force_poll--;
		WARN_ON_ONCE(cpu_idle_force_poll < 0);
	}
}

#ifdef CONFIG_GENERIC_IDLE_POLL_SETUP
static int __init cpu_idle_poll_setup(char *__unused)
{
	cpu_idle_force_poll = 1;
	return 1;
}
__setup("nohlt", cpu_idle_poll_setup);

static int __init cpu_idle_nopoll_setup(char *__unused)
{
	cpu_idle_force_poll = 0;
	return 1;
}
__setup("hlt", cpu_idle_nopoll_setup);
#endif

static inline int cpu_idle_poll(void)
{
	rcu_idle_enter();
	trace_cpu_idle_rcuidle(0, smp_processor_id());
	local_irq_enable();
	while (!tif_need_resched())
		cpu_relax();
	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
	rcu_idle_exit();
	return 1;
}

/* Weak implementations for optional arch specific functions */
void __weak arch_cpu_idle_prepare(void) { }
void __weak arch_cpu_idle_enter(void) { }
void __weak arch_cpu_idle_exit(void) { }
void __weak arch_cpu_idle_dead(void) { }
void __weak arch_cpu_idle(void)
{
	cpu_idle_force_poll = 1;
	local_irq_enable();
}

66 67 68 69
/**
 * cpuidle_idle_call - the main idle function
 *
 * NOTE: no locks or semaphores should be used here
70 71 72 73
 *
 * On archs that support TIF_POLLING_NRFLAG, is called with polling
 * set, and it returns with polling set.  If it ever stops polling, it
 * must clear the polling bit.
74
 */
75
static void cpuidle_idle_call(void)
76 77 78
{
	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
79
	int next_state, entered_state;
80 81
	bool broadcast;

82 83
	/*
	 * Check if the idle task must be rescheduled. If it is the
84
	 * case, exit the function after re-enabling the local irq.
85
	 */
86
	if (need_resched()) {
87
		local_irq_enable();
88
		return;
89 90
	}

91 92 93 94
	/*
	 * During the idle period, stop measuring the disabled irqs
	 * critical sections latencies
	 */
95
	stop_critical_timings();
96 97 98 99 100 101

	/*
	 * Tell the RCU framework we are entering an idle section,
	 * so no more rcu read side critical sections and one more
	 * step to the grace period
	 */
102 103
	rcu_idle_enter();

104
	/*
105
	 * Ask the cpuidle framework to choose a convenient idle state.
I
Ingo Molnar 已提交
106
	 * Fall back to the default arch idle method on errors.
107
	 */
108
	next_state = cpuidle_select(drv, dev);
I
Ingo Molnar 已提交
109
	if (next_state < 0) {
110
use_default:
111
		/*
112 113
		 * We can't use the cpuidle framework, let's use the default
		 * idle routine.
114
		 */
115
		if (current_clr_polling_and_test())
116
			local_irq_enable();
117 118 119 120
		else
			arch_cpu_idle();

		goto exit_idle;
121 122
	}

123 124 125 126 127 128 129 130 131 132 133

	/*
	 * The idle task must be scheduled, it is pointless to
	 * go to idle, just update no idle residency and get
	 * out of this function
	 */
	if (current_clr_polling_and_test()) {
		dev->last_residency = 0;
		entered_state = next_state;
		local_irq_enable();
		goto exit_idle;
134
	}
135

136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
	broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP);

	/*
	 * Tell the time framework to switch to a broadcast timer
	 * because our local timer will be shutdown. If a local timer
	 * is used from another cpu as a broadcast timer, this call may
	 * fail if it is not available
	 */
	if (broadcast &&
	    clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
		goto use_default;

	trace_cpu_idle_rcuidle(next_state, dev->cpu);

	/*
	 * Enter the idle state previously returned by the governor decision.
	 * This function will block until an interrupt occurs and will take
	 * care of re-enabling the local interrupts
	 */
	entered_state = cpuidle_enter(drv, dev, next_state);

	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);

	if (broadcast)
		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);

	/*
	 * Give the governor an opportunity to reflect on the outcome
	 */
	cpuidle_reflect(dev, entered_state);

exit_idle:
168
	__current_set_polling();
169

170
	/*
171
	 * It is up to the idle functions to reenable local interrupts
172
	 */
173 174 175 176 177
	if (WARN_ON_ONCE(irqs_disabled()))
		local_irq_enable();

	rcu_idle_exit();
	start_critical_timings();
178 179
}

180 181
/*
 * Generic idle loop implementation
182 183
 *
 * Called with polling cleared.
184 185 186 187
 */
static void cpu_idle_loop(void)
{
	while (1) {
188 189 190 191 192 193 194 195 196 197
		/*
		 * If the arch has a polling bit, we maintain an invariant:
		 *
		 * Our polling bit is clear if we're not scheduled (i.e. if
		 * rq->curr != rq->idle).  This means that, if rq->idle has
		 * the polling bit set, then setting need_resched is
		 * guaranteed to cause the cpu to reschedule.
		 */

		__current_set_polling();
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
		tick_nohz_idle_enter();

		while (!need_resched()) {
			check_pgt_cache();
			rmb();

			if (cpu_is_offline(smp_processor_id()))
				arch_cpu_idle_dead();

			local_irq_disable();
			arch_cpu_idle_enter();

			/*
			 * In poll mode we reenable interrupts and spin.
			 *
			 * Also if we detected in the wakeup from idle
			 * path that the tick broadcast device expired
			 * for us, we don't want to go deep idle as we
			 * know that the IPI is going to arrive right
			 * away
			 */
219
			if (cpu_idle_force_poll || tick_check_broadcast_expired())
220
				cpu_idle_poll();
221 222 223
			else
				cpuidle_idle_call();

224 225
			arch_cpu_idle_exit();
		}
226 227 228 229 230 231 232 233 234 235

		/*
		 * Since we fell out of the loop above, we know
		 * TIF_NEED_RESCHED must be set, propagate it into
		 * PREEMPT_NEED_RESCHED.
		 *
		 * This is required because for polling idle loops we will
		 * not have had an IPI to fold the state for us.
		 */
		preempt_set_need_resched();
236
		tick_nohz_idle_exit();
237 238 239 240 241 242 243 244 245
		__current_clr_polling();

		/*
		 * We promise to reschedule if need_resched is set while
		 * polling is set.  That means that clearing polling
		 * needs to be visible before rescheduling.
		 */
		smp_mb__after_atomic();

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
		schedule_preempt_disabled();
	}
}

void cpu_startup_entry(enum cpuhp_state state)
{
	/*
	 * This #ifdef needs to die, but it's too late in the cycle to
	 * make this generic (arm and sh have never invoked the canary
	 * init for the non boot cpus!). Will be fixed in 3.11
	 */
#ifdef CONFIG_X86
	/*
	 * If we're the non-boot CPU, nothing set the stack canary up
	 * for us. The boot CPU already has it initialized but no harm
	 * in doing it again. This is a good place for updating it, as
	 * we wont ever return from this function (so the invalid
	 * canaries already on the stack wont ever trigger).
	 */
	boot_init_stack_canary();
#endif
	arch_cpu_idle_prepare();
	cpu_idle_loop();
}