idle.c 6.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Generic entry point for the idle threads
 */
#include <linux/sched.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/tick.h>
#include <linux/mm.h>
#include <linux/stackprotector.h>

#include <asm/tlb.h>

#include <trace/events/power.h>

15 16
#include "sched.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
static int __read_mostly cpu_idle_force_poll;

void cpu_idle_poll_ctrl(bool enable)
{
	if (enable) {
		cpu_idle_force_poll++;
	} else {
		cpu_idle_force_poll--;
		WARN_ON_ONCE(cpu_idle_force_poll < 0);
	}
}

#ifdef CONFIG_GENERIC_IDLE_POLL_SETUP
static int __init cpu_idle_poll_setup(char *__unused)
{
	cpu_idle_force_poll = 1;
	return 1;
}
__setup("nohlt", cpu_idle_poll_setup);

static int __init cpu_idle_nopoll_setup(char *__unused)
{
	cpu_idle_force_poll = 0;
	return 1;
}
__setup("hlt", cpu_idle_nopoll_setup);
#endif

static inline int cpu_idle_poll(void)
{
	rcu_idle_enter();
	trace_cpu_idle_rcuidle(0, smp_processor_id());
	local_irq_enable();
	while (!tif_need_resched())
		cpu_relax();
	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
	rcu_idle_exit();
	return 1;
}

/* Weak implementations for optional arch specific functions */
void __weak arch_cpu_idle_prepare(void) { }
void __weak arch_cpu_idle_enter(void) { }
void __weak arch_cpu_idle_exit(void) { }
void __weak arch_cpu_idle_dead(void) { }
void __weak arch_cpu_idle(void)
{
	cpu_idle_force_poll = 1;
	local_irq_enable();
}

68 69 70 71
/**
 * cpuidle_idle_call - the main idle function
 *
 * NOTE: no locks or semaphores should be used here
72 73 74 75
 *
 * On archs that support TIF_POLLING_NRFLAG, is called with polling
 * set, and it returns with polling set.  If it ever stops polling, it
 * must clear the polling bit.
76
 */
77
static void cpuidle_idle_call(void)
78 79 80
{
	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
81
	int next_state, entered_state;
82
	unsigned int broadcast;
83

84 85
	/*
	 * Check if the idle task must be rescheduled. If it is the
86
	 * case, exit the function after re-enabling the local irq.
87
	 */
88
	if (need_resched()) {
89
		local_irq_enable();
90
		return;
91 92
	}

93 94 95 96
	/*
	 * During the idle period, stop measuring the disabled irqs
	 * critical sections latencies
	 */
97
	stop_critical_timings();
98 99 100 101 102 103

	/*
	 * Tell the RCU framework we are entering an idle section,
	 * so no more rcu read side critical sections and one more
	 * step to the grace period
	 */
104 105
	rcu_idle_enter();

106
	/*
107
	 * Ask the cpuidle framework to choose a convenient idle state.
I
Ingo Molnar 已提交
108
	 * Fall back to the default arch idle method on errors.
109
	 */
110
	next_state = cpuidle_select(drv, dev);
I
Ingo Molnar 已提交
111
	if (next_state < 0) {
112
use_default:
113
		/*
114 115
		 * We can't use the cpuidle framework, let's use the default
		 * idle routine.
116
		 */
117
		if (current_clr_polling_and_test())
118
			local_irq_enable();
119 120 121 122
		else
			arch_cpu_idle();

		goto exit_idle;
123 124
	}

125 126 127 128 129 130 131 132 133 134 135

	/*
	 * The idle task must be scheduled, it is pointless to
	 * go to idle, just update no idle residency and get
	 * out of this function
	 */
	if (current_clr_polling_and_test()) {
		dev->last_residency = 0;
		entered_state = next_state;
		local_irq_enable();
		goto exit_idle;
136
	}
137

138
	broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP;
139 140 141 142 143 144 145 146 147 148 149

	/*
	 * Tell the time framework to switch to a broadcast timer
	 * because our local timer will be shutdown. If a local timer
	 * is used from another cpu as a broadcast timer, this call may
	 * fail if it is not available
	 */
	if (broadcast &&
	    clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
		goto use_default;

150 151 152
	/* Take note of the planned idle state. */
	idle_set_state(this_rq(), &drv->states[next_state]);

153 154 155 156 157 158 159
	/*
	 * Enter the idle state previously returned by the governor decision.
	 * This function will block until an interrupt occurs and will take
	 * care of re-enabling the local interrupts
	 */
	entered_state = cpuidle_enter(drv, dev, next_state);

160 161 162
	/* The cpu is no longer idle or about to enter idle. */
	idle_set_state(this_rq(), NULL);

163 164 165 166 167 168 169 170 171
	if (broadcast)
		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);

	/*
	 * Give the governor an opportunity to reflect on the outcome
	 */
	cpuidle_reflect(dev, entered_state);

exit_idle:
172
	__current_set_polling();
173

174
	/*
175
	 * It is up to the idle functions to reenable local interrupts
176
	 */
177 178 179 180 181
	if (WARN_ON_ONCE(irqs_disabled()))
		local_irq_enable();

	rcu_idle_exit();
	start_critical_timings();
182 183
}

184 185
/*
 * Generic idle loop implementation
186 187
 *
 * Called with polling cleared.
188 189 190 191
 */
static void cpu_idle_loop(void)
{
	while (1) {
192 193 194 195 196 197 198 199 200 201
		/*
		 * If the arch has a polling bit, we maintain an invariant:
		 *
		 * Our polling bit is clear if we're not scheduled (i.e. if
		 * rq->curr != rq->idle).  This means that, if rq->idle has
		 * the polling bit set, then setting need_resched is
		 * guaranteed to cause the cpu to reschedule.
		 */

		__current_set_polling();
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
		tick_nohz_idle_enter();

		while (!need_resched()) {
			check_pgt_cache();
			rmb();

			if (cpu_is_offline(smp_processor_id()))
				arch_cpu_idle_dead();

			local_irq_disable();
			arch_cpu_idle_enter();

			/*
			 * In poll mode we reenable interrupts and spin.
			 *
			 * Also if we detected in the wakeup from idle
			 * path that the tick broadcast device expired
			 * for us, we don't want to go deep idle as we
			 * know that the IPI is going to arrive right
			 * away
			 */
223
			if (cpu_idle_force_poll || tick_check_broadcast_expired())
224
				cpu_idle_poll();
225 226 227
			else
				cpuidle_idle_call();

228 229
			arch_cpu_idle_exit();
		}
230 231 232 233 234 235 236 237 238 239

		/*
		 * Since we fell out of the loop above, we know
		 * TIF_NEED_RESCHED must be set, propagate it into
		 * PREEMPT_NEED_RESCHED.
		 *
		 * This is required because for polling idle loops we will
		 * not have had an IPI to fold the state for us.
		 */
		preempt_set_need_resched();
240
		tick_nohz_idle_exit();
241 242 243
		__current_clr_polling();

		/*
244 245 246 247
		 * We promise to call sched_ttwu_pending and reschedule
		 * if need_resched is set while polling is set.  That
		 * means that clearing polling needs to be visible
		 * before doing these things.
248 249 250
		 */
		smp_mb__after_atomic();

251
		sched_ttwu_pending();
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
		schedule_preempt_disabled();
	}
}

void cpu_startup_entry(enum cpuhp_state state)
{
	/*
	 * This #ifdef needs to die, but it's too late in the cycle to
	 * make this generic (arm and sh have never invoked the canary
	 * init for the non boot cpus!). Will be fixed in 3.11
	 */
#ifdef CONFIG_X86
	/*
	 * If we're the non-boot CPU, nothing set the stack canary up
	 * for us. The boot CPU already has it initialized but no harm
	 * in doing it again. This is a good place for updating it, as
	 * we wont ever return from this function (so the invalid
	 * canaries already on the stack wont ever trigger).
	 */
	boot_init_stack_canary();
#endif
	arch_cpu_idle_prepare();
	cpu_idle_loop();
}