processor_idle.c 30.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 * processor_idle - idle state submodule to the ACPI processor driver
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6
 *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
L
Linus Torvalds 已提交
7 8
 *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 *  			- Added processor hotplug support
9 10
 *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
 *  			- Added support for C3 on SMP
L
Linus Torvalds 已提交
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/module.h>
#include <linux/acpi.h>
#include <linux/dmi.h>
34
#include <linux/sched.h>       /* need_resched() */
35
#include <linux/clockchips.h>
36
#include <linux/cpuidle.h>
37
#include <linux/syscore_ops.h>
38
#include <acpi/processor.h>
L
Linus Torvalds 已提交
39

40 41 42 43 44 45 46 47 48 49
/*
 * Include the apic definitions for x86 to have the APIC timer related defines
 * available also for UP (on SMP it gets magically included via linux/smp.h).
 * asm/acpi.h is not an option, as it would require more include magic. Also
 * creating an empty asm-ia64/apic.h would just trade pest vs. cholera.
 */
#ifdef CONFIG_X86
#include <asm/apic.h>
#endif

50 51
#define PREFIX "ACPI: "

L
Linus Torvalds 已提交
52 53
#define ACPI_PROCESSOR_CLASS            "processor"
#define _COMPONENT              ACPI_PROCESSOR_COMPONENT
54
ACPI_MODULE_NAME("processor_idle");
L
Linus Torvalds 已提交
55

56 57
static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
module_param(max_cstate, uint, 0000);
58
static unsigned int nocst __read_mostly;
L
Linus Torvalds 已提交
59
module_param(nocst, uint, 0000);
60 61
static int bm_check_disable __read_mostly;
module_param(bm_check_disable, uint, 0000);
L
Linus Torvalds 已提交
62

63
static unsigned int latency_factor __read_mostly = 2;
64
module_param(latency_factor, uint, 0644);
L
Linus Torvalds 已提交
65

66 67
static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device);

68 69
static DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX],
								acpi_cstate);
70

71 72 73 74 75 76
static int disabled_by_idle_boot_param(void)
{
	return boot_option_idle_override == IDLE_POLL ||
		boot_option_idle_override == IDLE_HALT;
}

L
Linus Torvalds 已提交
77 78 79 80 81 82
/*
 * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
 * For now disable this. Probably a bug somewhere else.
 *
 * To skip this limit, boot/load with a large max_cstate limit.
 */
83
static int set_max_cstate(const struct dmi_system_id *id)
L
Linus Torvalds 已提交
84 85 86 87
{
	if (max_cstate > ACPI_PROCESSOR_MAX_POWER)
		return 0;

88
	printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate."
L
Len Brown 已提交
89 90
	       " Override with \"processor.max_cstate=%d\"\n", id->ident,
	       (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1);
L
Linus Torvalds 已提交
91

92
	max_cstate = (long)id->driver_data;
L
Linus Torvalds 已提交
93 94 95 96

	return 0;
}

97
static struct dmi_system_id processor_power_dmi_table[] = {
98 99 100
	{ set_max_cstate, "Clevo 5600D", {
	  DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
	  DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")},
L
Len Brown 已提交
101
	 (void *)2},
102 103 104 105 106 107 108 109
	{ set_max_cstate, "Pavilion zv5000", {
	  DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
	  DMI_MATCH(DMI_PRODUCT_NAME,"Pavilion zv5000 (DS502A#ABA)")},
	 (void *)1},
	{ set_max_cstate, "Asus L8400B", {
	  DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
	  DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")},
	 (void *)1},
L
Linus Torvalds 已提交
110 111 112
	{},
};

113

114 115 116 117
/*
 * Callers should disable interrupts before the call and enable
 * interrupts after return.
 */
118 119
static void acpi_safe_halt(void)
{
120
	if (!tif_need_resched()) {
121
		safe_halt();
122 123
		local_irq_disable();
	}
124 125
}

126 127 128 129
#ifdef ARCH_APICTIMER_STOPS_ON_C3

/*
 * Some BIOS implementations switch to C3 in the published C2 state.
130 131 132
 * This seems to be a common problem on AMD boxen, but other vendors
 * are affected too. We pick the most conservative approach: we assume
 * that the local APIC stops in both C2 and C3.
133
 */
134
static void lapic_timer_check_state(int state, struct acpi_processor *pr,
135 136 137
				   struct acpi_processor_cx *cx)
{
	struct acpi_processor_power *pwr = &pr->power;
138
	u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2;
139

140 141 142
	if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
		return;

143
	if (amd_e400_c1e_detected)
144 145
		type = ACPI_STATE_C1;

146 147 148 149 150 151 152
	/*
	 * Check, if one of the previous states already marked the lapic
	 * unstable
	 */
	if (pwr->timer_broadcast_on_state < state)
		return;

153
	if (cx->type >= type)
154
		pr->power.timer_broadcast_on_state = state;
155 156
}

157
static void __lapic_timer_propagate_broadcast(void *arg)
158
{
159
	struct acpi_processor *pr = (struct acpi_processor *) arg;
160 161 162 163 164 165 166 167
	unsigned long reason;

	reason = pr->power.timer_broadcast_on_state < INT_MAX ?
		CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;

	clockevents_notify(reason, &pr->id);
}

168 169 170 171 172 173
static void lapic_timer_propagate_broadcast(struct acpi_processor *pr)
{
	smp_call_function_single(pr->id, __lapic_timer_propagate_broadcast,
				 (void *)pr, 1);
}

174
/* Power(C) State timer broadcast control */
175
static void lapic_timer_state_broadcast(struct acpi_processor *pr,
176 177 178 179 180 181 182 183 184 185 186 187
				       struct acpi_processor_cx *cx,
				       int broadcast)
{
	int state = cx - pr->power.states;

	if (state >= pr->power.timer_broadcast_on_state) {
		unsigned long reason;

		reason = broadcast ?  CLOCK_EVT_NOTIFY_BROADCAST_ENTER :
			CLOCK_EVT_NOTIFY_BROADCAST_EXIT;
		clockevents_notify(reason, &pr->id);
	}
188 189 190 191
}

#else

192
static void lapic_timer_check_state(int state, struct acpi_processor *pr,
193
				   struct acpi_processor_cx *cstate) { }
194 195
static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
static void lapic_timer_state_broadcast(struct acpi_processor *pr,
196 197 198 199
				       struct acpi_processor_cx *cx,
				       int broadcast)
{
}
200 201 202

#endif

203
#ifdef CONFIG_PM_SLEEP
204 205
static u32 saved_bm_rld;

206
static int acpi_processor_suspend(void)
207 208
{
	acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld);
209
	return 0;
210
}
211

212
static void acpi_processor_resume(void)
213 214 215 216
{
	u32 resumed_bm_rld;

	acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld);
217 218
	if (resumed_bm_rld == saved_bm_rld)
		return;
219

220
	acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld);
221
}
222

223 224 225 226 227 228
static struct syscore_ops acpi_processor_syscore_ops = {
	.suspend = acpi_processor_suspend,
	.resume = acpi_processor_resume,
};

void acpi_processor_syscore_init(void)
229
{
230
	register_syscore_ops(&acpi_processor_syscore_ops);
231 232
}

233
void acpi_processor_syscore_exit(void)
234
{
235
	unregister_syscore_ops(&acpi_processor_syscore_ops);
236
}
237
#endif /* CONFIG_PM_SLEEP */
238

J
John Stultz 已提交
239
#if defined(CONFIG_X86)
240
static void tsc_check_state(int state)
241 242 243
{
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_AMD:
244
	case X86_VENDOR_INTEL:
245 246 247 248
		/*
		 * AMD Fam10h TSC will tick in all
		 * C/P/S0/S1 states when this bit is set.
		 */
249
		if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
250
			return;
251

252 253
		/*FALL THROUGH*/
	default:
254 255 256
		/* TSC could halt in idle, so notify users */
		if (state > ACPI_STATE_C1)
			mark_tsc_unstable("TSC halts in idle");
257 258
	}
}
259 260
#else
static void tsc_check_state(int state) { return; }
261 262
#endif

L
Len Brown 已提交
263
static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
L
Linus Torvalds 已提交
264 265 266
{

	if (!pr->pblk)
267
		return -ENODEV;
L
Linus Torvalds 已提交
268 269 270 271 272

	/* if info is obtained from pblk/fadt, type equals state */
	pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2;
	pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3;

273 274 275
#ifndef CONFIG_HOTPLUG_CPU
	/*
	 * Check for P_LVL2_UP flag before entering C2 and above on
276
	 * an SMP system.
277
	 */
278
	if ((num_online_cpus() > 1) &&
279
	    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
280
		return -ENODEV;
281 282
#endif

L
Linus Torvalds 已提交
283 284 285 286 287
	/* determine C2 and C3 address from pblk */
	pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4;
	pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5;

	/* determine latencies from FADT */
288 289
	pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.c2_latency;
	pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.c3_latency;
L
Linus Torvalds 已提交
290

291 292 293 294
	/*
	 * FADT specified C2 latency must be less than or equal to
	 * 100 microseconds.
	 */
295
	if (acpi_gbl_FADT.c2_latency > ACPI_PROCESSOR_MAX_C2_LATENCY) {
296
		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
297
			"C2 latency too large [%d]\n", acpi_gbl_FADT.c2_latency));
298 299 300 301
		/* invalidate C2 */
		pr->power.states[ACPI_STATE_C2].address = 0;
	}

L
Len Brown 已提交
302 303 304 305
	/*
	 * FADT supplied C3 latency must be less than or equal to
	 * 1000 microseconds.
	 */
306
	if (acpi_gbl_FADT.c3_latency > ACPI_PROCESSOR_MAX_C3_LATENCY) {
L
Len Brown 已提交
307
		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
308
			"C3 latency too large [%d]\n", acpi_gbl_FADT.c3_latency));
L
Len Brown 已提交
309 310 311 312
		/* invalidate C3 */
		pr->power.states[ACPI_STATE_C3].address = 0;
	}

L
Linus Torvalds 已提交
313 314 315 316 317
	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
			  "lvl2[0x%08x] lvl3[0x%08x]\n",
			  pr->power.states[ACPI_STATE_C2].address,
			  pr->power.states[ACPI_STATE_C3].address));

318
	return 0;
L
Linus Torvalds 已提交
319 320
}

321
static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
322
{
323 324 325 326 327
	if (!pr->power.states[ACPI_STATE_C1].valid) {
		/* set the first C-State to C1 */
		/* all processors need to support C1 */
		pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
		pr->power.states[ACPI_STATE_C1].valid = 1;
328
		pr->power.states[ACPI_STATE_C1].entry_method = ACPI_CSTATE_HALT;
329 330
	}
	/* the C0 state only exists as a filler in our array */
331
	pr->power.states[ACPI_STATE_C0].valid = 1;
332
	return 0;
333 334
}

L
Len Brown 已提交
335
static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
L
Linus Torvalds 已提交
336
{
L
Len Brown 已提交
337
	acpi_status status = 0;
L
Lin Ming 已提交
338
	u64 count;
339
	int current_count;
L
Len Brown 已提交
340 341 342
	int i;
	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
	union acpi_object *cst;
L
Linus Torvalds 已提交
343 344 345


	if (nocst)
346
		return -ENODEV;
L
Linus Torvalds 已提交
347

348
	current_count = 0;
L
Linus Torvalds 已提交
349 350 351 352

	status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
	if (ACPI_FAILURE(status)) {
		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n"));
353
		return -ENODEV;
L
Len Brown 已提交
354
	}
L
Linus Torvalds 已提交
355

356
	cst = buffer.pointer;
L
Linus Torvalds 已提交
357 358 359

	/* There must be at least 2 elements */
	if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) {
360
		printk(KERN_ERR PREFIX "not enough elements in _CST\n");
L
Linus Torvalds 已提交
361 362 363 364 365 366 367 368
		status = -EFAULT;
		goto end;
	}

	count = cst->package.elements[0].integer.value;

	/* Validate number of power states. */
	if (count < 1 || count != cst->package.count - 1) {
369
		printk(KERN_ERR PREFIX "count given by _CST is not valid\n");
L
Linus Torvalds 已提交
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
		status = -EFAULT;
		goto end;
	}

	/* Tell driver that at least _CST is supported. */
	pr->flags.has_cst = 1;

	for (i = 1; i <= count; i++) {
		union acpi_object *element;
		union acpi_object *obj;
		struct acpi_power_register *reg;
		struct acpi_processor_cx cx;

		memset(&cx, 0, sizeof(cx));

385
		element = &(cst->package.elements[i]);
L
Linus Torvalds 已提交
386 387 388 389 390 391
		if (element->type != ACPI_TYPE_PACKAGE)
			continue;

		if (element->package.count != 4)
			continue;

392
		obj = &(element->package.elements[0]);
L
Linus Torvalds 已提交
393 394 395 396

		if (obj->type != ACPI_TYPE_BUFFER)
			continue;

L
Len Brown 已提交
397
		reg = (struct acpi_power_register *)obj->buffer.pointer;
L
Linus Torvalds 已提交
398 399

		if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO &&
L
Len Brown 已提交
400
		    (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
L
Linus Torvalds 已提交
401 402 403
			continue;

		/* There should be an easy way to extract an integer... */
404
		obj = &(element->package.elements[1]);
L
Linus Torvalds 已提交
405 406 407 408
		if (obj->type != ACPI_TYPE_INTEGER)
			continue;

		cx.type = obj->integer.value;
409 410 411 412 413 414 415 416 417 418
		/*
		 * Some buggy BIOSes won't list C1 in _CST -
		 * Let acpi_processor_get_power_info_default() handle them later
		 */
		if (i == 1 && cx.type != ACPI_STATE_C1)
			current_count++;

		cx.address = reg->address;
		cx.index = current_count + 1;

419
		cx.entry_method = ACPI_CSTATE_SYSTEMIO;
420 421 422
		if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
			if (acpi_processor_ffh_cstate_probe
					(pr->id, &cx, reg) == 0) {
423 424
				cx.entry_method = ACPI_CSTATE_FFH;
			} else if (cx.type == ACPI_STATE_C1) {
425 426 427 428 429 430
				/*
				 * C1 is a special case where FIXED_HARDWARE
				 * can be handled in non-MWAIT way as well.
				 * In that case, save this _CST entry info.
				 * Otherwise, ignore this info and continue.
				 */
431
				cx.entry_method = ACPI_CSTATE_HALT;
432
				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
433
			} else {
434 435
				continue;
			}
436
			if (cx.type == ACPI_STATE_C1 &&
437
			    (boot_option_idle_override == IDLE_NOMWAIT)) {
Z
Zhao Yakui 已提交
438 439 440 441 442 443
				/*
				 * In most cases the C1 space_id obtained from
				 * _CST object is FIXED_HARDWARE access mode.
				 * But when the option of idle=halt is added,
				 * the entry_method type should be changed from
				 * CSTATE_FFH to CSTATE_HALT.
444 445 446
				 * When the option of idle=nomwait is added,
				 * the C1 entry_method type should be
				 * CSTATE_HALT.
Z
Zhao Yakui 已提交
447 448 449 450
				 */
				cx.entry_method = ACPI_CSTATE_HALT;
				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
			}
451 452 453
		} else {
			snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x",
				 cx.address);
454
		}
L
Linus Torvalds 已提交
455

456 457 458
		if (cx.type == ACPI_STATE_C1) {
			cx.valid = 1;
		}
459

460
		obj = &(element->package.elements[2]);
L
Linus Torvalds 已提交
461 462 463 464 465
		if (obj->type != ACPI_TYPE_INTEGER)
			continue;

		cx.latency = obj->integer.value;

466
		obj = &(element->package.elements[3]);
L
Linus Torvalds 已提交
467 468 469
		if (obj->type != ACPI_TYPE_INTEGER)
			continue;

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
		current_count++;
		memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));

		/*
		 * We support total ACPI_PROCESSOR_MAX_POWER - 1
		 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1)
		 */
		if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) {
			printk(KERN_WARNING
			       "Limiting number of power states to max (%d)\n",
			       ACPI_PROCESSOR_MAX_POWER);
			printk(KERN_WARNING
			       "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
			break;
		}
L
Linus Torvalds 已提交
485 486
	}

L
Len Brown 已提交
487
	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n",
488
			  current_count));
L
Linus Torvalds 已提交
489 490

	/* Validate number of power states discovered */
491
	if (current_count < 2)
492
		status = -EFAULT;
L
Linus Torvalds 已提交
493

L
Len Brown 已提交
494
      end:
495
	kfree(buffer.pointer);
L
Linus Torvalds 已提交
496

497
	return status;
L
Linus Torvalds 已提交
498 499
}

L
Len Brown 已提交
500 501
static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
					   struct acpi_processor_cx *cx)
L
Linus Torvalds 已提交
502
{
503 504
	static int bm_check_flag = -1;
	static int bm_control_flag = -1;
505

L
Linus Torvalds 已提交
506 507

	if (!cx->address)
508
		return;
L
Linus Torvalds 已提交
509 510 511 512 513 514 515 516 517 518

	/*
	 * PIIX4 Erratum #18: We don't support C3 when Type-F (fast)
	 * DMA transfers are used by any ISA device to avoid livelock.
	 * Note that we could disable Type-F DMA (as recommended by
	 * the erratum), but this is known to disrupt certain ISA
	 * devices thus we take the conservative approach.
	 */
	else if (errata.piix4.fdma) {
		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
L
Len Brown 已提交
519
				  "C3 not supported on PIIX4 with Type-F DMA\n"));
520
		return;
L
Linus Torvalds 已提交
521 522
	}

523
	/* All the logic here assumes flags.bm_check is same across all CPUs */
524
	if (bm_check_flag == -1) {
525 526 527
		/* Determine whether bm_check is needed based on CPU  */
		acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
		bm_check_flag = pr->flags.bm_check;
528
		bm_control_flag = pr->flags.bm_control;
529 530
	} else {
		pr->flags.bm_check = bm_check_flag;
531
		pr->flags.bm_control = bm_control_flag;
532 533 534 535
	}

	if (pr->flags.bm_check) {
		if (!pr->flags.bm_control) {
536 537 538 539 540 541 542 543 544 545
			if (pr->flags.has_cst != 1) {
				/* bus mastering control is necessary */
				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
					"C3 support requires BM control\n"));
				return;
			} else {
				/* Here we enter C3 without bus mastering */
				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
					"C3 support without BM control\n"));
			}
546 547 548 549 550 551
		}
	} else {
		/*
		 * WBINVD should be set in fadt, for C3 state to be
		 * supported on when bm_check is not required.
		 */
552
		if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) {
553
			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
L
Len Brown 已提交
554 555
					  "Cache invalidation should work properly"
					  " for C3 to be enabled on SMP systems\n"));
556
			return;
557 558 559
		}
	}

L
Linus Torvalds 已提交
560 561 562 563 564 565 566
	/*
	 * Otherwise we've met all of our C3 requirements.
	 * Normalize the C3 latency to expidite policy.  Enable
	 * checking of bus mastering status (bm_check) so we can
	 * use this in our C3 policy
	 */
	cx->valid = 1;
567

568 569 570 571 572 573 574 575
	/*
	 * On older chipsets, BM_RLD needs to be set
	 * in order for Bus Master activity to wake the
	 * system from C3.  Newer chipsets handle DMA
	 * during C3 automatically and BM_RLD is a NOP.
	 * In either case, the proper way to
	 * handle BM_RLD is to set it and leave it set.
	 */
576
	acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
L
Linus Torvalds 已提交
577

578
	return;
L
Linus Torvalds 已提交
579 580 581 582 583 584
}

static int acpi_processor_power_verify(struct acpi_processor *pr)
{
	unsigned int i;
	unsigned int working = 0;
585

586
	pr->power.timer_broadcast_on_state = INT_MAX;
587

588
	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
L
Linus Torvalds 已提交
589 590 591 592 593 594 595 596
		struct acpi_processor_cx *cx = &pr->power.states[i];

		switch (cx->type) {
		case ACPI_STATE_C1:
			cx->valid = 1;
			break;

		case ACPI_STATE_C2:
597 598
			if (!cx->address)
				break;
A
Al Stone 已提交
599
			cx->valid = 1;
L
Linus Torvalds 已提交
600 601 602 603 604 605
			break;

		case ACPI_STATE_C3:
			acpi_processor_power_verify_c3(pr, cx);
			break;
		}
606 607
		if (!cx->valid)
			continue;
L
Linus Torvalds 已提交
608

609 610 611
		lapic_timer_check_state(i, pr, cx);
		tsc_check_state(cx->type);
		working++;
L
Linus Torvalds 已提交
612
	}
613

614
	lapic_timer_propagate_broadcast(pr);
L
Linus Torvalds 已提交
615 616 617 618

	return (working);
}

L
Len Brown 已提交
619
static int acpi_processor_get_power_info(struct acpi_processor *pr)
L
Linus Torvalds 已提交
620 621 622 623 624 625 626 627
{
	unsigned int i;
	int result;


	/* NOTE: the idle thread may not be running while calling
	 * this function */

628 629 630
	/* Zero initialize all the C-states info. */
	memset(pr->power.states, 0, sizeof(pr->power.states));

L
Linus Torvalds 已提交
631
	result = acpi_processor_get_power_info_cst(pr);
632
	if (result == -ENODEV)
633
		result = acpi_processor_get_power_info_fadt(pr);
634

635 636 637 638 639
	if (result)
		return result;

	acpi_processor_get_power_info_default(pr);

640
	pr->power.count = acpi_processor_power_verify(pr);
L
Linus Torvalds 已提交
641 642 643 644 645 646

	/*
	 * if one state of type C2 or C3 is available, mark this
	 * CPU as being "idle manageable"
	 */
	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
647
		if (pr->power.states[i].valid) {
L
Linus Torvalds 已提交
648
			pr->power.count = i;
649 650
			if (pr->power.states[i].type >= ACPI_STATE_C2)
				pr->flags.power = 1;
651
		}
L
Linus Torvalds 已提交
652 653
	}

654
	return 0;
L
Linus Torvalds 已提交
655 656
}

657 658 659 660 661 662 663
/**
 * acpi_idle_bm_check - checks if bus master activity was detected
 */
static int acpi_idle_bm_check(void)
{
	u32 bm_status = 0;

664 665 666
	if (bm_check_disable)
		return 0;

667
	acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
668
	if (bm_status)
669
		acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
	/*
	 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
	 * the true state of bus mastering activity; forcing us to
	 * manually check the BMIDEA bit of each IDE channel.
	 */
	else if (errata.piix4.bmisx) {
		if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
		    || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
			bm_status = 1;
	}
	return bm_status;
}

/**
 * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
 * @cx: cstate data
686 687
 *
 * Caller disables interrupt before call and enables interrupt after return.
688 689 690
 */
static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
{
691 692
	/* Don't trace irqs off for idle */
	stop_critical_timings();
693
	if (cx->entry_method == ACPI_CSTATE_FFH) {
694 695
		/* Call into architectural FFH based C-state */
		acpi_processor_ffh_cstate_enter(cx);
696 697
	} else if (cx->entry_method == ACPI_CSTATE_HALT) {
		acpi_safe_halt();
698 699 700 701 702 703
	} else {
		/* IO port based C-state */
		inb(cx->address);
		/* Dummy wait op - must do something useless after P_LVL2 read
		   because chipsets cannot guarantee that STPCLK# signal
		   gets asserted in time to freeze execution properly. */
704
		inl(acpi_gbl_FADT.xpm_timer_block.address);
705
	}
706
	start_critical_timings();
707 708 709 710 711
}

/**
 * acpi_idle_enter_c1 - enters an ACPI C1 state-type
 * @dev: the target CPU
712
 * @drv: cpuidle driver containing cpuidle state info
713
 * @index: index of target state
714 715 716 717
 *
 * This is equivalent to the HALT instruction.
 */
static int acpi_idle_enter_c1(struct cpuidle_device *dev,
718
		struct cpuidle_driver *drv, int index)
719 720
{
	struct acpi_processor *pr;
721
	struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
722

723
	pr = __this_cpu_read(processors);
724 725

	if (unlikely(!pr))
726
		return -EINVAL;
727

728 729 730 731 732
	if (cx->entry_method == ACPI_CSTATE_FFH) {
		if (current_set_polling_and_test())
			return -EINVAL;
	}

733
	lapic_timer_state_broadcast(pr, cx, 1);
734
	acpi_idle_do_entry(cx);
735

736
	lapic_timer_state_broadcast(pr, cx, 0);
737

738
	return index;
739 740
}

741 742 743 744 745 746 747 748

/**
 * acpi_idle_play_dead - enters an ACPI state for long-term idle (i.e. off-lining)
 * @dev: the target CPU
 * @index: the index of suggested state
 */
static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
{
749
	struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
750 751 752 753 754 755

	ACPI_FLUSH_CPU_CACHE();

	while (1) {

		if (cx->entry_method == ACPI_CSTATE_HALT)
756
			safe_halt();
757 758 759 760 761 762 763 764 765 766 767 768
		else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
			inb(cx->address);
			/* See comment in acpi_idle_do_entry() */
			inl(acpi_gbl_FADT.xpm_timer_block.address);
		} else
			return -ENODEV;
	}

	/* Never reached */
	return 0;
}

769 770 771
/**
 * acpi_idle_enter_simple - enters an ACPI state without BM handling
 * @dev: the target CPU
772
 * @drv: cpuidle driver with cpuidle state information
773
 * @index: the index of suggested state
774 775
 */
static int acpi_idle_enter_simple(struct cpuidle_device *dev,
776
		struct cpuidle_driver *drv, int index)
777 778
{
	struct acpi_processor *pr;
779
	struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
780

781
	pr = __this_cpu_read(processors);
782 783

	if (unlikely(!pr))
784
		return -EINVAL;
785

786 787
	if (cx->entry_method == ACPI_CSTATE_FFH) {
		if (current_set_polling_and_test())
788
			return -EINVAL;
789 790
	}

791 792 793 794
	/*
	 * Must be done before busmaster disable as we might need to
	 * access HPET !
	 */
795
	lapic_timer_state_broadcast(pr, cx, 1);
796

797 798 799
	if (cx->type == ACPI_STATE_C3)
		ACPI_FLUSH_CPU_CACHE();

800 801
	/* Tell the scheduler that we are going deep-idle: */
	sched_clock_idle_sleep_event();
802 803
	acpi_idle_do_entry(cx);

804
	sched_clock_idle_wakeup_event(0);
805

806
	lapic_timer_state_broadcast(pr, cx, 0);
807
	return index;
808 809 810
}

static int c3_cpu_count;
811
static DEFINE_RAW_SPINLOCK(c3_lock);
812 813 814 815

/**
 * acpi_idle_enter_bm - enters C3 with proper BM handling
 * @dev: the target CPU
816
 * @drv: cpuidle driver containing state data
817
 * @index: the index of suggested state
818 819 820 821
 *
 * If BM is detected, the deepest non-C3 idle state is entered instead.
 */
static int acpi_idle_enter_bm(struct cpuidle_device *dev,
822
		struct cpuidle_driver *drv, int index)
823 824
{
	struct acpi_processor *pr;
825
	struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
826

827
	pr = __this_cpu_read(processors);
828 829

	if (unlikely(!pr))
830
		return -EINVAL;
831

832
	if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
833 834 835
		if (drv->safe_state_index >= 0) {
			return drv->states[drv->safe_state_index].enter(dev,
						drv, drv->safe_state_index);
836
		} else {
837
			acpi_safe_halt();
838
			return -EBUSY;
839 840 841
		}
	}

842 843
	if (cx->entry_method == ACPI_CSTATE_FFH) {
		if (current_set_polling_and_test())
844
			return -EINVAL;
845 846
	}

847 848
	acpi_unlazy_tlb(smp_processor_id());

849 850
	/* Tell the scheduler that we are going deep-idle: */
	sched_clock_idle_sleep_event();
851 852 853 854
	/*
	 * Must be done before busmaster disable as we might need to
	 * access HPET !
	 */
855
	lapic_timer_state_broadcast(pr, cx, 1);
856

857 858 859 860 861 862 863 864 865 866 867
	/*
	 * disable bus master
	 * bm_check implies we need ARB_DIS
	 * !bm_check implies we need cache flush
	 * bm_control implies whether we can do ARB_DIS
	 *
	 * That leaves a case where bm_check is set and bm_control is
	 * not set. In that case we cannot do much, we enter C3
	 * without doing anything.
	 */
	if (pr->flags.bm_check && pr->flags.bm_control) {
868
		raw_spin_lock(&c3_lock);
869 870 871
		c3_cpu_count++;
		/* Disable bus master arbitration when all CPUs are in C3 */
		if (c3_cpu_count == num_online_cpus())
872
			acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
873
		raw_spin_unlock(&c3_lock);
874 875 876
	} else if (!pr->flags.bm_check) {
		ACPI_FLUSH_CPU_CACHE();
	}
877

878
	acpi_idle_do_entry(cx);
879

880 881
	/* Re-enable bus master arbitration */
	if (pr->flags.bm_check && pr->flags.bm_control) {
882
		raw_spin_lock(&c3_lock);
883
		acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
884
		c3_cpu_count--;
885
		raw_spin_unlock(&c3_lock);
886
	}
887

888
	sched_clock_idle_wakeup_event(0);
889

890
	lapic_timer_state_broadcast(pr, cx, 0);
891
	return index;
892 893 894 895 896 897 898 899
}

struct cpuidle_driver acpi_idle_driver = {
	.name =		"acpi_idle",
	.owner =	THIS_MODULE,
};

/**
900 901 902
 * acpi_processor_setup_cpuidle_cx - prepares and configures CPUIDLE
 * device i.e. per-cpu data
 *
903
 * @pr: the ACPI processor
904
 * @dev : the cpuidle device
905
 */
906 907
static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr,
					   struct cpuidle_device *dev)
908
{
909
	int i, count = CPUIDLE_DRIVER_STATE_START;
910 911 912 913 914 915 916 917 918
	struct acpi_processor_cx *cx;

	if (!pr->flags.power_setup_done)
		return -EINVAL;

	if (pr->flags.power == 0) {
		return -EINVAL;
	}

919 920 921
	if (!dev)
		return -EINVAL;

922
	dev->cpu = pr->id;
923

924 925 926
	if (max_cstate == 0)
		max_cstate = 1;

927 928 929 930 931 932 933 934 935 936 937
	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
		cx = &pr->power.states[i];

		if (!cx->valid)
			continue;

#ifdef CONFIG_HOTPLUG_CPU
		if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
		    !pr->flags.has_cst &&
		    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
			continue;
938
#endif
939
		per_cpu(acpi_cstate[count], dev->cpu) = cx;
940

941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973
		count++;
		if (count == CPUIDLE_STATE_MAX)
			break;
	}

	dev->state_count = count;

	if (!count)
		return -EINVAL;

	return 0;
}

/**
 * acpi_processor_setup_cpuidle states- prepares and configures cpuidle
 * global state data i.e. idle routines
 *
 * @pr: the ACPI processor
 */
static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
{
	int i, count = CPUIDLE_DRIVER_STATE_START;
	struct acpi_processor_cx *cx;
	struct cpuidle_state *state;
	struct cpuidle_driver *drv = &acpi_idle_driver;

	if (!pr->flags.power_setup_done)
		return -EINVAL;

	if (pr->flags.power == 0)
		return -EINVAL;

	drv->safe_state_index = -1;
974
	for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
975 976
		drv->states[i].name[0] = '\0';
		drv->states[i].desc[0] = '\0';
977 978
	}

979 980 981
	if (max_cstate == 0)
		max_cstate = 1;

982 983 984 985 986 987 988 989 990 991 992
	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
		cx = &pr->power.states[i];

		if (!cx->valid)
			continue;

#ifdef CONFIG_HOTPLUG_CPU
		if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
		    !pr->flags.has_cst &&
		    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
			continue;
993
#endif
994

995
		state = &drv->states[count];
996
		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i);
997
		strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
998
		state->exit_latency = cx->latency;
999
		state->target_residency = cx->latency * latency_factor;
1000 1001 1002 1003

		state->flags = 0;
		switch (cx->type) {
			case ACPI_STATE_C1:
1004 1005 1006
			if (cx->entry_method == ACPI_CSTATE_FFH)
				state->flags |= CPUIDLE_FLAG_TIME_VALID;

1007
			state->enter = acpi_idle_enter_c1;
1008
			state->enter_dead = acpi_idle_play_dead;
1009
			drv->safe_state_index = count;
1010 1011 1012 1013 1014
			break;

			case ACPI_STATE_C2:
			state->flags |= CPUIDLE_FLAG_TIME_VALID;
			state->enter = acpi_idle_enter_simple;
1015
			state->enter_dead = acpi_idle_play_dead;
1016
			drv->safe_state_index = count;
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
			break;

			case ACPI_STATE_C3:
			state->flags |= CPUIDLE_FLAG_TIME_VALID;
			state->enter = pr->flags.bm_check ?
					acpi_idle_enter_bm :
					acpi_idle_enter_simple;
			break;
		}

		count++;
1028 1029
		if (count == CPUIDLE_STATE_MAX)
			break;
1030 1031
	}

1032
	drv->state_count = count;
1033 1034 1035 1036 1037 1038 1039

	if (!count)
		return -EINVAL;

	return 0;
}

1040
int acpi_processor_hotplug(struct acpi_processor *pr)
1041
{
1042
	int ret = 0;
1043
	struct cpuidle_device *dev;
1044

1045
	if (disabled_by_idle_boot_param())
1046 1047
		return 0;

1048
	if (nocst)
1049 1050 1051 1052 1053
		return -ENODEV;

	if (!pr->flags.power_setup_done)
		return -ENODEV;

1054
	dev = per_cpu(acpi_cpuidle_device, pr->id);
1055
	cpuidle_pause_and_lock();
1056
	cpuidle_disable_device(dev);
1057
	acpi_processor_get_power_info(pr);
1058
	if (pr->flags.power) {
1059
		acpi_processor_setup_cpuidle_cx(pr, dev);
1060
		ret = cpuidle_enable_device(dev);
1061
	}
1062 1063 1064 1065 1066
	cpuidle_resume_and_unlock();

	return ret;
}

1067 1068 1069 1070
int acpi_processor_cst_has_changed(struct acpi_processor *pr)
{
	int cpu;
	struct acpi_processor *_pr;
1071
	struct cpuidle_device *dev;
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087

	if (disabled_by_idle_boot_param())
		return 0;

	if (nocst)
		return -ENODEV;

	if (!pr->flags.power_setup_done)
		return -ENODEV;

	/*
	 * FIXME:  Design the ACPI notification to make it once per
	 * system instead of once per-cpu.  This condition is a hack
	 * to make the code that updates C-States be called once.
	 */

1088
	if (pr->id == 0 && cpuidle_get_driver() == &acpi_idle_driver) {
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098

		cpuidle_pause_and_lock();
		/* Protect against cpu-hotplug */
		get_online_cpus();

		/* Disable all cpuidle devices */
		for_each_online_cpu(cpu) {
			_pr = per_cpu(processors, cpu);
			if (!_pr || !_pr->flags.power_setup_done)
				continue;
1099 1100
			dev = per_cpu(acpi_cpuidle_device, cpu);
			cpuidle_disable_device(dev);
1101 1102 1103
		}

		/* Populate Updated C-state information */
1104
		acpi_processor_get_power_info(pr);
1105 1106 1107 1108 1109 1110 1111 1112 1113
		acpi_processor_setup_cpuidle_states(pr);

		/* Enable all cpuidle devices */
		for_each_online_cpu(cpu) {
			_pr = per_cpu(processors, cpu);
			if (!_pr || !_pr->flags.power_setup_done)
				continue;
			acpi_processor_get_power_info(_pr);
			if (_pr->flags.power) {
1114
				dev = per_cpu(acpi_cpuidle_device, cpu);
1115
				acpi_processor_setup_cpuidle_cx(_pr, dev);
1116
				cpuidle_enable_device(dev);
1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127
			}
		}
		put_online_cpus();
		cpuidle_resume_and_unlock();
	}

	return 0;
}

static int acpi_processor_registered;

1128
int acpi_processor_power_init(struct acpi_processor *pr)
L
Linus Torvalds 已提交
1129
{
L
Len Brown 已提交
1130
	acpi_status status = 0;
1131
	int retval;
1132
	struct cpuidle_device *dev;
1133
	static int first_run;
L
Linus Torvalds 已提交
1134

1135
	if (disabled_by_idle_boot_param())
1136
		return 0;
L
Linus Torvalds 已提交
1137 1138 1139

	if (!first_run) {
		dmi_check_system(processor_power_dmi_table);
1140
		max_cstate = acpi_processor_cstate_check(max_cstate);
L
Linus Torvalds 已提交
1141
		if (max_cstate < ACPI_C_STATES_MAX)
L
Len Brown 已提交
1142 1143 1144
			printk(KERN_NOTICE
			       "ACPI: processor limited to max C-state %d\n",
			       max_cstate);
L
Linus Torvalds 已提交
1145 1146 1147
		first_run++;
	}

1148
	if (acpi_gbl_FADT.cst_control && !nocst) {
L
Len Brown 已提交
1149
		status =
1150
		    acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8);
L
Linus Torvalds 已提交
1151
		if (ACPI_FAILURE(status)) {
1152 1153
			ACPI_EXCEPTION((AE_INFO, status,
					"Notifying BIOS of _CST ability failed"));
L
Linus Torvalds 已提交
1154 1155 1156 1157
		}
	}

	acpi_processor_get_power_info(pr);
1158
	pr->flags.power_setup_done = 1;
L
Linus Torvalds 已提交
1159 1160 1161 1162 1163 1164

	/*
	 * Install the idle handler if processor power management is supported.
	 * Note that we use previously set idle handler will be used on
	 * platforms that only support C1.
	 */
1165
	if (pr->flags.power) {
1166 1167 1168 1169 1170 1171 1172 1173 1174
		/* Register acpi_idle_driver if not already registered */
		if (!acpi_processor_registered) {
			acpi_processor_setup_cpuidle_states(pr);
			retval = cpuidle_register_driver(&acpi_idle_driver);
			if (retval)
				return retval;
			printk(KERN_DEBUG "ACPI: %s registered with cpuidle\n",
					acpi_idle_driver.name);
		}
1175 1176 1177 1178 1179 1180

		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
		if (!dev)
			return -ENOMEM;
		per_cpu(acpi_cpuidle_device, pr->id) = dev;

1181
		acpi_processor_setup_cpuidle_cx(pr, dev);
1182

1183 1184 1185
		/* Register per-cpu cpuidle_device. Cpuidle driver
		 * must already be registered before registering device
		 */
1186
		retval = cpuidle_register_device(dev);
1187 1188 1189 1190 1191 1192
		if (retval) {
			if (acpi_processor_registered == 0)
				cpuidle_unregister_driver(&acpi_idle_driver);
			return retval;
		}
		acpi_processor_registered++;
L
Linus Torvalds 已提交
1193
	}
1194
	return 0;
L
Linus Torvalds 已提交
1195 1196
}

1197
int acpi_processor_power_exit(struct acpi_processor *pr)
L
Linus Torvalds 已提交
1198
{
1199 1200
	struct cpuidle_device *dev = per_cpu(acpi_cpuidle_device, pr->id);

1201
	if (disabled_by_idle_boot_param())
1202 1203
		return 0;

1204
	if (pr->flags.power) {
1205
		cpuidle_unregister_device(dev);
1206 1207 1208 1209
		acpi_processor_registered--;
		if (acpi_processor_registered == 0)
			cpuidle_unregister_driver(&acpi_idle_driver);
	}
L
Linus Torvalds 已提交
1210

1211
	pr->flags.power_setup_done = 0;
1212
	return 0;
L
Linus Torvalds 已提交
1213
}