processor_idle.c 30.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 * processor_idle - idle state submodule to the ACPI processor driver
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6
 *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
L
Linus Torvalds 已提交
7 8
 *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 *  			- Added processor hotplug support
9 10
 *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
 *  			- Added support for C3 on SMP
L
Linus Torvalds 已提交
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/module.h>
#include <linux/acpi.h>
#include <linux/dmi.h>
34
#include <linux/sched.h>       /* need_resched() */
35
#include <linux/clockchips.h>
36
#include <linux/cpuidle.h>
37
#include <linux/syscore_ops.h>
38
#include <acpi/processor.h>
L
Linus Torvalds 已提交
39

40 41 42 43 44 45 46 47 48 49
/*
 * Include the apic definitions for x86 to have the APIC timer related defines
 * available also for UP (on SMP it gets magically included via linux/smp.h).
 * asm/acpi.h is not an option, as it would require more include magic. Also
 * creating an empty asm-ia64/apic.h would just trade pest vs. cholera.
 */
#ifdef CONFIG_X86
#include <asm/apic.h>
#endif

50 51
#define PREFIX "ACPI: "

L
Linus Torvalds 已提交
52 53
#define ACPI_PROCESSOR_CLASS            "processor"
#define _COMPONENT              ACPI_PROCESSOR_COMPONENT
54
ACPI_MODULE_NAME("processor_idle");
L
Linus Torvalds 已提交
55

56 57
static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
module_param(max_cstate, uint, 0000);
58
static unsigned int nocst __read_mostly;
L
Linus Torvalds 已提交
59
module_param(nocst, uint, 0000);
60 61
static int bm_check_disable __read_mostly;
module_param(bm_check_disable, uint, 0000);
L
Linus Torvalds 已提交
62

63
static unsigned int latency_factor __read_mostly = 2;
64
module_param(latency_factor, uint, 0644);
L
Linus Torvalds 已提交
65

66 67
static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device);

68 69
static DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX],
								acpi_cstate);
70

71 72 73 74 75 76
static int disabled_by_idle_boot_param(void)
{
	return boot_option_idle_override == IDLE_POLL ||
		boot_option_idle_override == IDLE_HALT;
}

L
Linus Torvalds 已提交
77 78 79 80 81 82
/*
 * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
 * For now disable this. Probably a bug somewhere else.
 *
 * To skip this limit, boot/load with a large max_cstate limit.
 */
83
static int set_max_cstate(const struct dmi_system_id *id)
L
Linus Torvalds 已提交
84 85 86 87
{
	if (max_cstate > ACPI_PROCESSOR_MAX_POWER)
		return 0;

88
	printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate."
L
Len Brown 已提交
89 90
	       " Override with \"processor.max_cstate=%d\"\n", id->ident,
	       (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1);
L
Linus Torvalds 已提交
91

92
	max_cstate = (long)id->driver_data;
L
Linus Torvalds 已提交
93 94 95 96

	return 0;
}

97
static struct dmi_system_id processor_power_dmi_table[] = {
98 99 100
	{ set_max_cstate, "Clevo 5600D", {
	  DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
	  DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")},
L
Len Brown 已提交
101
	 (void *)2},
102 103 104 105 106 107 108 109
	{ set_max_cstate, "Pavilion zv5000", {
	  DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
	  DMI_MATCH(DMI_PRODUCT_NAME,"Pavilion zv5000 (DS502A#ABA)")},
	 (void *)1},
	{ set_max_cstate, "Asus L8400B", {
	  DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
	  DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")},
	 (void *)1},
L
Linus Torvalds 已提交
110 111 112
	{},
};

113

114 115 116 117
/*
 * Callers should disable interrupts before the call and enable
 * interrupts after return.
 */
118 119
static void acpi_safe_halt(void)
{
120
	if (!tif_need_resched()) {
121
		safe_halt();
122 123
		local_irq_disable();
	}
124 125
}

126 127 128 129
#ifdef ARCH_APICTIMER_STOPS_ON_C3

/*
 * Some BIOS implementations switch to C3 in the published C2 state.
130 131 132
 * This seems to be a common problem on AMD boxen, but other vendors
 * are affected too. We pick the most conservative approach: we assume
 * that the local APIC stops in both C2 and C3.
133
 */
134
static void lapic_timer_check_state(int state, struct acpi_processor *pr,
135 136 137
				   struct acpi_processor_cx *cx)
{
	struct acpi_processor_power *pwr = &pr->power;
138
	u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2;
139

140 141 142
	if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
		return;

143
	if (amd_e400_c1e_detected)
144 145
		type = ACPI_STATE_C1;

146 147 148 149 150 151 152
	/*
	 * Check, if one of the previous states already marked the lapic
	 * unstable
	 */
	if (pwr->timer_broadcast_on_state < state)
		return;

153
	if (cx->type >= type)
154
		pr->power.timer_broadcast_on_state = state;
155 156
}

157
static void __lapic_timer_propagate_broadcast(void *arg)
158
{
159
	struct acpi_processor *pr = (struct acpi_processor *) arg;
160 161 162 163 164 165 166 167
	unsigned long reason;

	reason = pr->power.timer_broadcast_on_state < INT_MAX ?
		CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;

	clockevents_notify(reason, &pr->id);
}

168 169 170 171 172 173
static void lapic_timer_propagate_broadcast(struct acpi_processor *pr)
{
	smp_call_function_single(pr->id, __lapic_timer_propagate_broadcast,
				 (void *)pr, 1);
}

174
/* Power(C) State timer broadcast control */
175
static void lapic_timer_state_broadcast(struct acpi_processor *pr,
176 177 178 179 180 181 182 183 184 185 186 187
				       struct acpi_processor_cx *cx,
				       int broadcast)
{
	int state = cx - pr->power.states;

	if (state >= pr->power.timer_broadcast_on_state) {
		unsigned long reason;

		reason = broadcast ?  CLOCK_EVT_NOTIFY_BROADCAST_ENTER :
			CLOCK_EVT_NOTIFY_BROADCAST_EXIT;
		clockevents_notify(reason, &pr->id);
	}
188 189 190 191
}

#else

192
static void lapic_timer_check_state(int state, struct acpi_processor *pr,
193
				   struct acpi_processor_cx *cstate) { }
194 195
static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
static void lapic_timer_state_broadcast(struct acpi_processor *pr,
196 197 198 199
				       struct acpi_processor_cx *cx,
				       int broadcast)
{
}
200 201 202

#endif

203
#ifdef CONFIG_PM_SLEEP
204 205
static u32 saved_bm_rld;

206
static int acpi_processor_suspend(void)
207 208
{
	acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld);
209
	return 0;
210
}
211

212
static void acpi_processor_resume(void)
213
{
214
	u32 resumed_bm_rld = 0;
215 216

	acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld);
217 218
	if (resumed_bm_rld == saved_bm_rld)
		return;
219

220
	acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld);
221
}
222

223 224 225 226 227 228
static struct syscore_ops acpi_processor_syscore_ops = {
	.suspend = acpi_processor_suspend,
	.resume = acpi_processor_resume,
};

void acpi_processor_syscore_init(void)
229
{
230
	register_syscore_ops(&acpi_processor_syscore_ops);
231 232
}

233
void acpi_processor_syscore_exit(void)
234
{
235
	unregister_syscore_ops(&acpi_processor_syscore_ops);
236
}
237
#endif /* CONFIG_PM_SLEEP */
238

J
John Stultz 已提交
239
#if defined(CONFIG_X86)
240
static void tsc_check_state(int state)
241 242 243
{
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_AMD:
244
	case X86_VENDOR_INTEL:
245 246 247 248
		/*
		 * AMD Fam10h TSC will tick in all
		 * C/P/S0/S1 states when this bit is set.
		 */
249
		if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
250
			return;
251

252 253
		/*FALL THROUGH*/
	default:
254 255 256
		/* TSC could halt in idle, so notify users */
		if (state > ACPI_STATE_C1)
			mark_tsc_unstable("TSC halts in idle");
257 258
	}
}
259 260
#else
static void tsc_check_state(int state) { return; }
261 262
#endif

L
Len Brown 已提交
263
static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
L
Linus Torvalds 已提交
264 265 266
{

	if (!pr->pblk)
267
		return -ENODEV;
L
Linus Torvalds 已提交
268 269 270 271 272

	/* if info is obtained from pblk/fadt, type equals state */
	pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2;
	pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3;

273 274 275
#ifndef CONFIG_HOTPLUG_CPU
	/*
	 * Check for P_LVL2_UP flag before entering C2 and above on
276
	 * an SMP system.
277
	 */
278
	if ((num_online_cpus() > 1) &&
279
	    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
280
		return -ENODEV;
281 282
#endif

L
Linus Torvalds 已提交
283 284 285 286 287
	/* determine C2 and C3 address from pblk */
	pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4;
	pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5;

	/* determine latencies from FADT */
288 289
	pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.c2_latency;
	pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.c3_latency;
L
Linus Torvalds 已提交
290

291 292 293 294
	/*
	 * FADT specified C2 latency must be less than or equal to
	 * 100 microseconds.
	 */
295
	if (acpi_gbl_FADT.c2_latency > ACPI_PROCESSOR_MAX_C2_LATENCY) {
296
		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
297
			"C2 latency too large [%d]\n", acpi_gbl_FADT.c2_latency));
298 299 300 301
		/* invalidate C2 */
		pr->power.states[ACPI_STATE_C2].address = 0;
	}

L
Len Brown 已提交
302 303 304 305
	/*
	 * FADT supplied C3 latency must be less than or equal to
	 * 1000 microseconds.
	 */
306
	if (acpi_gbl_FADT.c3_latency > ACPI_PROCESSOR_MAX_C3_LATENCY) {
L
Len Brown 已提交
307
		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
308
			"C3 latency too large [%d]\n", acpi_gbl_FADT.c3_latency));
L
Len Brown 已提交
309 310 311 312
		/* invalidate C3 */
		pr->power.states[ACPI_STATE_C3].address = 0;
	}

L
Linus Torvalds 已提交
313 314 315 316 317
	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
			  "lvl2[0x%08x] lvl3[0x%08x]\n",
			  pr->power.states[ACPI_STATE_C2].address,
			  pr->power.states[ACPI_STATE_C3].address));

318
	return 0;
L
Linus Torvalds 已提交
319 320
}

321
static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
322
{
323 324 325 326 327
	if (!pr->power.states[ACPI_STATE_C1].valid) {
		/* set the first C-State to C1 */
		/* all processors need to support C1 */
		pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
		pr->power.states[ACPI_STATE_C1].valid = 1;
328
		pr->power.states[ACPI_STATE_C1].entry_method = ACPI_CSTATE_HALT;
329 330
	}
	/* the C0 state only exists as a filler in our array */
331
	pr->power.states[ACPI_STATE_C0].valid = 1;
332
	return 0;
333 334
}

L
Len Brown 已提交
335
static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
L
Linus Torvalds 已提交
336
{
337
	acpi_status status;
L
Lin Ming 已提交
338
	u64 count;
339
	int current_count;
340
	int i, ret = 0;
L
Len Brown 已提交
341 342
	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
	union acpi_object *cst;
L
Linus Torvalds 已提交
343 344 345


	if (nocst)
346
		return -ENODEV;
L
Linus Torvalds 已提交
347

348
	current_count = 0;
L
Linus Torvalds 已提交
349 350 351 352

	status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
	if (ACPI_FAILURE(status)) {
		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n"));
353
		return -ENODEV;
L
Len Brown 已提交
354
	}
L
Linus Torvalds 已提交
355

356
	cst = buffer.pointer;
L
Linus Torvalds 已提交
357 358 359

	/* There must be at least 2 elements */
	if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) {
360
		printk(KERN_ERR PREFIX "not enough elements in _CST\n");
361
		ret = -EFAULT;
L
Linus Torvalds 已提交
362 363 364 365 366 367 368
		goto end;
	}

	count = cst->package.elements[0].integer.value;

	/* Validate number of power states. */
	if (count < 1 || count != cst->package.count - 1) {
369
		printk(KERN_ERR PREFIX "count given by _CST is not valid\n");
370
		ret = -EFAULT;
L
Linus Torvalds 已提交
371 372 373 374 375 376 377 378 379 380 381 382 383 384
		goto end;
	}

	/* Tell driver that at least _CST is supported. */
	pr->flags.has_cst = 1;

	for (i = 1; i <= count; i++) {
		union acpi_object *element;
		union acpi_object *obj;
		struct acpi_power_register *reg;
		struct acpi_processor_cx cx;

		memset(&cx, 0, sizeof(cx));

385
		element = &(cst->package.elements[i]);
L
Linus Torvalds 已提交
386 387 388 389 390 391
		if (element->type != ACPI_TYPE_PACKAGE)
			continue;

		if (element->package.count != 4)
			continue;

392
		obj = &(element->package.elements[0]);
L
Linus Torvalds 已提交
393 394 395 396

		if (obj->type != ACPI_TYPE_BUFFER)
			continue;

L
Len Brown 已提交
397
		reg = (struct acpi_power_register *)obj->buffer.pointer;
L
Linus Torvalds 已提交
398 399

		if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO &&
L
Len Brown 已提交
400
		    (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
L
Linus Torvalds 已提交
401 402 403
			continue;

		/* There should be an easy way to extract an integer... */
404
		obj = &(element->package.elements[1]);
L
Linus Torvalds 已提交
405 406 407 408
		if (obj->type != ACPI_TYPE_INTEGER)
			continue;

		cx.type = obj->integer.value;
409 410 411 412 413 414 415 416 417 418
		/*
		 * Some buggy BIOSes won't list C1 in _CST -
		 * Let acpi_processor_get_power_info_default() handle them later
		 */
		if (i == 1 && cx.type != ACPI_STATE_C1)
			current_count++;

		cx.address = reg->address;
		cx.index = current_count + 1;

419
		cx.entry_method = ACPI_CSTATE_SYSTEMIO;
420 421 422
		if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
			if (acpi_processor_ffh_cstate_probe
					(pr->id, &cx, reg) == 0) {
423 424
				cx.entry_method = ACPI_CSTATE_FFH;
			} else if (cx.type == ACPI_STATE_C1) {
425 426 427 428 429 430
				/*
				 * C1 is a special case where FIXED_HARDWARE
				 * can be handled in non-MWAIT way as well.
				 * In that case, save this _CST entry info.
				 * Otherwise, ignore this info and continue.
				 */
431
				cx.entry_method = ACPI_CSTATE_HALT;
432
				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
433
			} else {
434 435
				continue;
			}
436
			if (cx.type == ACPI_STATE_C1 &&
437
			    (boot_option_idle_override == IDLE_NOMWAIT)) {
Z
Zhao Yakui 已提交
438 439 440 441 442 443
				/*
				 * In most cases the C1 space_id obtained from
				 * _CST object is FIXED_HARDWARE access mode.
				 * But when the option of idle=halt is added,
				 * the entry_method type should be changed from
				 * CSTATE_FFH to CSTATE_HALT.
444 445 446
				 * When the option of idle=nomwait is added,
				 * the C1 entry_method type should be
				 * CSTATE_HALT.
Z
Zhao Yakui 已提交
447 448 449 450
				 */
				cx.entry_method = ACPI_CSTATE_HALT;
				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
			}
451 452 453
		} else {
			snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x",
				 cx.address);
454
		}
L
Linus Torvalds 已提交
455

456 457 458
		if (cx.type == ACPI_STATE_C1) {
			cx.valid = 1;
		}
459

460
		obj = &(element->package.elements[2]);
L
Linus Torvalds 已提交
461 462 463 464 465
		if (obj->type != ACPI_TYPE_INTEGER)
			continue;

		cx.latency = obj->integer.value;

466
		obj = &(element->package.elements[3]);
L
Linus Torvalds 已提交
467 468 469
		if (obj->type != ACPI_TYPE_INTEGER)
			continue;

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
		current_count++;
		memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));

		/*
		 * We support total ACPI_PROCESSOR_MAX_POWER - 1
		 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1)
		 */
		if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) {
			printk(KERN_WARNING
			       "Limiting number of power states to max (%d)\n",
			       ACPI_PROCESSOR_MAX_POWER);
			printk(KERN_WARNING
			       "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
			break;
		}
L
Linus Torvalds 已提交
485 486
	}

L
Len Brown 已提交
487
	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n",
488
			  current_count));
L
Linus Torvalds 已提交
489 490

	/* Validate number of power states discovered */
491
	if (current_count < 2)
492
		ret = -EFAULT;
L
Linus Torvalds 已提交
493

L
Len Brown 已提交
494
      end:
495
	kfree(buffer.pointer);
L
Linus Torvalds 已提交
496

497
	return ret;
L
Linus Torvalds 已提交
498 499
}

L
Len Brown 已提交
500 501
static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
					   struct acpi_processor_cx *cx)
L
Linus Torvalds 已提交
502
{
503 504
	static int bm_check_flag = -1;
	static int bm_control_flag = -1;
505

L
Linus Torvalds 已提交
506 507

	if (!cx->address)
508
		return;
L
Linus Torvalds 已提交
509 510 511 512 513 514 515 516 517 518

	/*
	 * PIIX4 Erratum #18: We don't support C3 when Type-F (fast)
	 * DMA transfers are used by any ISA device to avoid livelock.
	 * Note that we could disable Type-F DMA (as recommended by
	 * the erratum), but this is known to disrupt certain ISA
	 * devices thus we take the conservative approach.
	 */
	else if (errata.piix4.fdma) {
		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
L
Len Brown 已提交
519
				  "C3 not supported on PIIX4 with Type-F DMA\n"));
520
		return;
L
Linus Torvalds 已提交
521 522
	}

523
	/* All the logic here assumes flags.bm_check is same across all CPUs */
524
	if (bm_check_flag == -1) {
525 526 527
		/* Determine whether bm_check is needed based on CPU  */
		acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
		bm_check_flag = pr->flags.bm_check;
528
		bm_control_flag = pr->flags.bm_control;
529 530
	} else {
		pr->flags.bm_check = bm_check_flag;
531
		pr->flags.bm_control = bm_control_flag;
532 533 534 535
	}

	if (pr->flags.bm_check) {
		if (!pr->flags.bm_control) {
536 537 538 539 540 541 542 543 544 545
			if (pr->flags.has_cst != 1) {
				/* bus mastering control is necessary */
				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
					"C3 support requires BM control\n"));
				return;
			} else {
				/* Here we enter C3 without bus mastering */
				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
					"C3 support without BM control\n"));
			}
546 547 548 549 550 551
		}
	} else {
		/*
		 * WBINVD should be set in fadt, for C3 state to be
		 * supported on when bm_check is not required.
		 */
552
		if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) {
553
			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
L
Len Brown 已提交
554 555
					  "Cache invalidation should work properly"
					  " for C3 to be enabled on SMP systems\n"));
556
			return;
557 558 559
		}
	}

L
Linus Torvalds 已提交
560 561 562 563 564 565 566
	/*
	 * Otherwise we've met all of our C3 requirements.
	 * Normalize the C3 latency to expidite policy.  Enable
	 * checking of bus mastering status (bm_check) so we can
	 * use this in our C3 policy
	 */
	cx->valid = 1;
567

568 569 570 571 572 573 574 575
	/*
	 * On older chipsets, BM_RLD needs to be set
	 * in order for Bus Master activity to wake the
	 * system from C3.  Newer chipsets handle DMA
	 * during C3 automatically and BM_RLD is a NOP.
	 * In either case, the proper way to
	 * handle BM_RLD is to set it and leave it set.
	 */
576
	acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
L
Linus Torvalds 已提交
577

578
	return;
L
Linus Torvalds 已提交
579 580 581 582 583 584
}

static int acpi_processor_power_verify(struct acpi_processor *pr)
{
	unsigned int i;
	unsigned int working = 0;
585

586
	pr->power.timer_broadcast_on_state = INT_MAX;
587

588
	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
L
Linus Torvalds 已提交
589 590 591 592 593 594 595 596
		struct acpi_processor_cx *cx = &pr->power.states[i];

		switch (cx->type) {
		case ACPI_STATE_C1:
			cx->valid = 1;
			break;

		case ACPI_STATE_C2:
597 598
			if (!cx->address)
				break;
A
Al Stone 已提交
599
			cx->valid = 1;
L
Linus Torvalds 已提交
600 601 602 603 604 605
			break;

		case ACPI_STATE_C3:
			acpi_processor_power_verify_c3(pr, cx);
			break;
		}
606 607
		if (!cx->valid)
			continue;
L
Linus Torvalds 已提交
608

609 610 611
		lapic_timer_check_state(i, pr, cx);
		tsc_check_state(cx->type);
		working++;
L
Linus Torvalds 已提交
612
	}
613

614
	lapic_timer_propagate_broadcast(pr);
L
Linus Torvalds 已提交
615 616 617 618

	return (working);
}

L
Len Brown 已提交
619
static int acpi_processor_get_power_info(struct acpi_processor *pr)
L
Linus Torvalds 已提交
620 621 622 623 624 625 626 627
{
	unsigned int i;
	int result;


	/* NOTE: the idle thread may not be running while calling
	 * this function */

628 629 630
	/* Zero initialize all the C-states info. */
	memset(pr->power.states, 0, sizeof(pr->power.states));

L
Linus Torvalds 已提交
631
	result = acpi_processor_get_power_info_cst(pr);
632
	if (result == -ENODEV)
633
		result = acpi_processor_get_power_info_fadt(pr);
634

635 636 637 638 639
	if (result)
		return result;

	acpi_processor_get_power_info_default(pr);

640
	pr->power.count = acpi_processor_power_verify(pr);
L
Linus Torvalds 已提交
641 642 643 644 645 646

	/*
	 * if one state of type C2 or C3 is available, mark this
	 * CPU as being "idle manageable"
	 */
	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
647
		if (pr->power.states[i].valid) {
L
Linus Torvalds 已提交
648
			pr->power.count = i;
649 650
			if (pr->power.states[i].type >= ACPI_STATE_C2)
				pr->flags.power = 1;
651
		}
L
Linus Torvalds 已提交
652 653
	}

654
	return 0;
L
Linus Torvalds 已提交
655 656
}

657 658 659 660 661 662 663
/**
 * acpi_idle_bm_check - checks if bus master activity was detected
 */
static int acpi_idle_bm_check(void)
{
	u32 bm_status = 0;

664 665 666
	if (bm_check_disable)
		return 0;

667
	acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
668
	if (bm_status)
669
		acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
	/*
	 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
	 * the true state of bus mastering activity; forcing us to
	 * manually check the BMIDEA bit of each IDE channel.
	 */
	else if (errata.piix4.bmisx) {
		if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
		    || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
			bm_status = 1;
	}
	return bm_status;
}

/**
 * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
 * @cx: cstate data
686 687
 *
 * Caller disables interrupt before call and enables interrupt after return.
688 689 690
 */
static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
{
691 692
	/* Don't trace irqs off for idle */
	stop_critical_timings();
693
	if (cx->entry_method == ACPI_CSTATE_FFH) {
694 695
		/* Call into architectural FFH based C-state */
		acpi_processor_ffh_cstate_enter(cx);
696 697
	} else if (cx->entry_method == ACPI_CSTATE_HALT) {
		acpi_safe_halt();
698 699 700 701 702 703
	} else {
		/* IO port based C-state */
		inb(cx->address);
		/* Dummy wait op - must do something useless after P_LVL2 read
		   because chipsets cannot guarantee that STPCLK# signal
		   gets asserted in time to freeze execution properly. */
704
		inl(acpi_gbl_FADT.xpm_timer_block.address);
705
	}
706
	start_critical_timings();
707 708 709 710 711
}

/**
 * acpi_idle_enter_c1 - enters an ACPI C1 state-type
 * @dev: the target CPU
712
 * @drv: cpuidle driver containing cpuidle state info
713
 * @index: index of target state
714 715 716 717
 *
 * This is equivalent to the HALT instruction.
 */
static int acpi_idle_enter_c1(struct cpuidle_device *dev,
718
		struct cpuidle_driver *drv, int index)
719 720
{
	struct acpi_processor *pr;
721
	struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
722

723
	pr = __this_cpu_read(processors);
724 725

	if (unlikely(!pr))
726
		return -EINVAL;
727

728
	lapic_timer_state_broadcast(pr, cx, 1);
729
	acpi_idle_do_entry(cx);
730

731
	lapic_timer_state_broadcast(pr, cx, 0);
732

733
	return index;
734 735
}

736 737 738 739 740 741 742 743

/**
 * acpi_idle_play_dead - enters an ACPI state for long-term idle (i.e. off-lining)
 * @dev: the target CPU
 * @index: the index of suggested state
 */
static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
{
744
	struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
745 746 747 748 749 750

	ACPI_FLUSH_CPU_CACHE();

	while (1) {

		if (cx->entry_method == ACPI_CSTATE_HALT)
751
			safe_halt();
752 753 754 755 756 757 758 759 760 761 762 763
		else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
			inb(cx->address);
			/* See comment in acpi_idle_do_entry() */
			inl(acpi_gbl_FADT.xpm_timer_block.address);
		} else
			return -ENODEV;
	}

	/* Never reached */
	return 0;
}

764 765 766
/**
 * acpi_idle_enter_simple - enters an ACPI state without BM handling
 * @dev: the target CPU
767
 * @drv: cpuidle driver with cpuidle state information
768
 * @index: the index of suggested state
769 770
 */
static int acpi_idle_enter_simple(struct cpuidle_device *dev,
771
		struct cpuidle_driver *drv, int index)
772 773
{
	struct acpi_processor *pr;
774
	struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
775

776
	pr = __this_cpu_read(processors);
777 778

	if (unlikely(!pr))
779
		return -EINVAL;
780

781 782 783 784 785 786 787
#ifdef CONFIG_HOTPLUG_CPU
	if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
	    !pr->flags.has_cst &&
	    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
		return acpi_idle_enter_c1(dev, drv, CPUIDLE_DRIVER_STATE_START);
#endif

788 789 790 791
	/*
	 * Must be done before busmaster disable as we might need to
	 * access HPET !
	 */
792
	lapic_timer_state_broadcast(pr, cx, 1);
793

794 795 796
	if (cx->type == ACPI_STATE_C3)
		ACPI_FLUSH_CPU_CACHE();

797 798
	/* Tell the scheduler that we are going deep-idle: */
	sched_clock_idle_sleep_event();
799 800
	acpi_idle_do_entry(cx);

801
	sched_clock_idle_wakeup_event(0);
802

803
	lapic_timer_state_broadcast(pr, cx, 0);
804
	return index;
805 806 807
}

static int c3_cpu_count;
808
static DEFINE_RAW_SPINLOCK(c3_lock);
809 810 811 812

/**
 * acpi_idle_enter_bm - enters C3 with proper BM handling
 * @dev: the target CPU
813
 * @drv: cpuidle driver containing state data
814
 * @index: the index of suggested state
815 816 817 818
 *
 * If BM is detected, the deepest non-C3 idle state is entered instead.
 */
static int acpi_idle_enter_bm(struct cpuidle_device *dev,
819
		struct cpuidle_driver *drv, int index)
820 821
{
	struct acpi_processor *pr;
822
	struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
823

824
	pr = __this_cpu_read(processors);
825 826

	if (unlikely(!pr))
827
		return -EINVAL;
828

829 830 831 832 833 834 835
#ifdef CONFIG_HOTPLUG_CPU
	if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
	    !pr->flags.has_cst &&
	    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
		return acpi_idle_enter_c1(dev, drv, CPUIDLE_DRIVER_STATE_START);
#endif

836
	if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
837 838 839
		if (drv->safe_state_index >= 0) {
			return drv->states[drv->safe_state_index].enter(dev,
						drv, drv->safe_state_index);
840
		} else {
841
			acpi_safe_halt();
842
			return -EBUSY;
843 844 845
		}
	}

846 847
	acpi_unlazy_tlb(smp_processor_id());

848 849
	/* Tell the scheduler that we are going deep-idle: */
	sched_clock_idle_sleep_event();
850 851 852 853
	/*
	 * Must be done before busmaster disable as we might need to
	 * access HPET !
	 */
854
	lapic_timer_state_broadcast(pr, cx, 1);
855

856 857 858 859 860 861 862 863 864 865 866
	/*
	 * disable bus master
	 * bm_check implies we need ARB_DIS
	 * !bm_check implies we need cache flush
	 * bm_control implies whether we can do ARB_DIS
	 *
	 * That leaves a case where bm_check is set and bm_control is
	 * not set. In that case we cannot do much, we enter C3
	 * without doing anything.
	 */
	if (pr->flags.bm_check && pr->flags.bm_control) {
867
		raw_spin_lock(&c3_lock);
868 869 870
		c3_cpu_count++;
		/* Disable bus master arbitration when all CPUs are in C3 */
		if (c3_cpu_count == num_online_cpus())
871
			acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
872
		raw_spin_unlock(&c3_lock);
873 874 875
	} else if (!pr->flags.bm_check) {
		ACPI_FLUSH_CPU_CACHE();
	}
876

877
	acpi_idle_do_entry(cx);
878

879 880
	/* Re-enable bus master arbitration */
	if (pr->flags.bm_check && pr->flags.bm_control) {
881
		raw_spin_lock(&c3_lock);
882
		acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
883
		c3_cpu_count--;
884
		raw_spin_unlock(&c3_lock);
885
	}
886

887
	sched_clock_idle_wakeup_event(0);
888

889
	lapic_timer_state_broadcast(pr, cx, 0);
890
	return index;
891 892 893 894 895 896 897 898
}

struct cpuidle_driver acpi_idle_driver = {
	.name =		"acpi_idle",
	.owner =	THIS_MODULE,
};

/**
899 900 901
 * acpi_processor_setup_cpuidle_cx - prepares and configures CPUIDLE
 * device i.e. per-cpu data
 *
902
 * @pr: the ACPI processor
903
 * @dev : the cpuidle device
904
 */
905 906
static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr,
					   struct cpuidle_device *dev)
907
{
908
	int i, count = CPUIDLE_DRIVER_STATE_START;
909 910 911 912 913 914 915 916 917
	struct acpi_processor_cx *cx;

	if (!pr->flags.power_setup_done)
		return -EINVAL;

	if (pr->flags.power == 0) {
		return -EINVAL;
	}

918 919 920
	if (!dev)
		return -EINVAL;

921
	dev->cpu = pr->id;
922

923 924 925
	if (max_cstate == 0)
		max_cstate = 1;

926 927 928 929 930 931
	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
		cx = &pr->power.states[i];

		if (!cx->valid)
			continue;

932
		per_cpu(acpi_cstate[count], dev->cpu) = cx;
933

934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
		count++;
		if (count == CPUIDLE_STATE_MAX)
			break;
	}

	if (!count)
		return -EINVAL;

	return 0;
}

/**
 * acpi_processor_setup_cpuidle states- prepares and configures cpuidle
 * global state data i.e. idle routines
 *
 * @pr: the ACPI processor
 */
static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
{
	int i, count = CPUIDLE_DRIVER_STATE_START;
	struct acpi_processor_cx *cx;
	struct cpuidle_state *state;
	struct cpuidle_driver *drv = &acpi_idle_driver;

	if (!pr->flags.power_setup_done)
		return -EINVAL;

	if (pr->flags.power == 0)
		return -EINVAL;

	drv->safe_state_index = -1;
965
	for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
966 967
		drv->states[i].name[0] = '\0';
		drv->states[i].desc[0] = '\0';
968 969
	}

970 971 972
	if (max_cstate == 0)
		max_cstate = 1;

973 974 975 976 977 978
	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
		cx = &pr->power.states[i];

		if (!cx->valid)
			continue;

979
		state = &drv->states[count];
980
		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i);
981
		strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
982
		state->exit_latency = cx->latency;
983
		state->target_residency = cx->latency * latency_factor;
984 985 986 987

		state->flags = 0;
		switch (cx->type) {
			case ACPI_STATE_C1:
988

989
			state->enter = acpi_idle_enter_c1;
990
			state->enter_dead = acpi_idle_play_dead;
991
			drv->safe_state_index = count;
992 993 994 995
			break;

			case ACPI_STATE_C2:
			state->enter = acpi_idle_enter_simple;
996
			state->enter_dead = acpi_idle_play_dead;
997
			drv->safe_state_index = count;
998 999 1000 1001 1002 1003 1004 1005 1006 1007
			break;

			case ACPI_STATE_C3:
			state->enter = pr->flags.bm_check ?
					acpi_idle_enter_bm :
					acpi_idle_enter_simple;
			break;
		}

		count++;
1008 1009
		if (count == CPUIDLE_STATE_MAX)
			break;
1010 1011
	}

1012
	drv->state_count = count;
1013 1014 1015 1016 1017 1018 1019

	if (!count)
		return -EINVAL;

	return 0;
}

1020
int acpi_processor_hotplug(struct acpi_processor *pr)
1021
{
1022
	int ret = 0;
1023
	struct cpuidle_device *dev;
1024

1025
	if (disabled_by_idle_boot_param())
1026 1027
		return 0;

1028
	if (nocst)
1029 1030 1031 1032 1033
		return -ENODEV;

	if (!pr->flags.power_setup_done)
		return -ENODEV;

1034
	dev = per_cpu(acpi_cpuidle_device, pr->id);
1035
	cpuidle_pause_and_lock();
1036
	cpuidle_disable_device(dev);
1037
	acpi_processor_get_power_info(pr);
1038
	if (pr->flags.power) {
1039
		acpi_processor_setup_cpuidle_cx(pr, dev);
1040
		ret = cpuidle_enable_device(dev);
1041
	}
1042 1043 1044 1045 1046
	cpuidle_resume_and_unlock();

	return ret;
}

1047 1048 1049 1050
int acpi_processor_cst_has_changed(struct acpi_processor *pr)
{
	int cpu;
	struct acpi_processor *_pr;
1051
	struct cpuidle_device *dev;
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067

	if (disabled_by_idle_boot_param())
		return 0;

	if (nocst)
		return -ENODEV;

	if (!pr->flags.power_setup_done)
		return -ENODEV;

	/*
	 * FIXME:  Design the ACPI notification to make it once per
	 * system instead of once per-cpu.  This condition is a hack
	 * to make the code that updates C-States be called once.
	 */

1068
	if (pr->id == 0 && cpuidle_get_driver() == &acpi_idle_driver) {
1069 1070 1071

		/* Protect against cpu-hotplug */
		get_online_cpus();
1072
		cpuidle_pause_and_lock();
1073 1074 1075 1076 1077 1078

		/* Disable all cpuidle devices */
		for_each_online_cpu(cpu) {
			_pr = per_cpu(processors, cpu);
			if (!_pr || !_pr->flags.power_setup_done)
				continue;
1079 1080
			dev = per_cpu(acpi_cpuidle_device, cpu);
			cpuidle_disable_device(dev);
1081 1082 1083
		}

		/* Populate Updated C-state information */
1084
		acpi_processor_get_power_info(pr);
1085 1086 1087 1088 1089 1090 1091 1092 1093
		acpi_processor_setup_cpuidle_states(pr);

		/* Enable all cpuidle devices */
		for_each_online_cpu(cpu) {
			_pr = per_cpu(processors, cpu);
			if (!_pr || !_pr->flags.power_setup_done)
				continue;
			acpi_processor_get_power_info(_pr);
			if (_pr->flags.power) {
1094
				dev = per_cpu(acpi_cpuidle_device, cpu);
1095
				acpi_processor_setup_cpuidle_cx(_pr, dev);
1096
				cpuidle_enable_device(dev);
1097 1098 1099
			}
		}
		cpuidle_resume_and_unlock();
1100
		put_online_cpus();
1101 1102 1103 1104 1105 1106 1107
	}

	return 0;
}

static int acpi_processor_registered;

1108
int acpi_processor_power_init(struct acpi_processor *pr)
L
Linus Torvalds 已提交
1109
{
1110
	acpi_status status;
1111
	int retval;
1112
	struct cpuidle_device *dev;
1113
	static int first_run;
L
Linus Torvalds 已提交
1114

1115
	if (disabled_by_idle_boot_param())
1116
		return 0;
L
Linus Torvalds 已提交
1117 1118 1119

	if (!first_run) {
		dmi_check_system(processor_power_dmi_table);
1120
		max_cstate = acpi_processor_cstate_check(max_cstate);
L
Linus Torvalds 已提交
1121
		if (max_cstate < ACPI_C_STATES_MAX)
L
Len Brown 已提交
1122 1123 1124
			printk(KERN_NOTICE
			       "ACPI: processor limited to max C-state %d\n",
			       max_cstate);
L
Linus Torvalds 已提交
1125 1126 1127
		first_run++;
	}

1128
	if (acpi_gbl_FADT.cst_control && !nocst) {
L
Len Brown 已提交
1129
		status =
1130
		    acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8);
L
Linus Torvalds 已提交
1131
		if (ACPI_FAILURE(status)) {
1132 1133
			ACPI_EXCEPTION((AE_INFO, status,
					"Notifying BIOS of _CST ability failed"));
L
Linus Torvalds 已提交
1134 1135 1136 1137
		}
	}

	acpi_processor_get_power_info(pr);
1138
	pr->flags.power_setup_done = 1;
L
Linus Torvalds 已提交
1139 1140 1141 1142 1143 1144

	/*
	 * Install the idle handler if processor power management is supported.
	 * Note that we use previously set idle handler will be used on
	 * platforms that only support C1.
	 */
1145
	if (pr->flags.power) {
1146 1147 1148 1149 1150 1151 1152 1153 1154
		/* Register acpi_idle_driver if not already registered */
		if (!acpi_processor_registered) {
			acpi_processor_setup_cpuidle_states(pr);
			retval = cpuidle_register_driver(&acpi_idle_driver);
			if (retval)
				return retval;
			printk(KERN_DEBUG "ACPI: %s registered with cpuidle\n",
					acpi_idle_driver.name);
		}
1155 1156 1157 1158 1159 1160

		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
		if (!dev)
			return -ENOMEM;
		per_cpu(acpi_cpuidle_device, pr->id) = dev;

1161
		acpi_processor_setup_cpuidle_cx(pr, dev);
1162

1163 1164 1165
		/* Register per-cpu cpuidle_device. Cpuidle driver
		 * must already be registered before registering device
		 */
1166
		retval = cpuidle_register_device(dev);
1167 1168 1169 1170 1171 1172
		if (retval) {
			if (acpi_processor_registered == 0)
				cpuidle_unregister_driver(&acpi_idle_driver);
			return retval;
		}
		acpi_processor_registered++;
L
Linus Torvalds 已提交
1173
	}
1174
	return 0;
L
Linus Torvalds 已提交
1175 1176
}

1177
int acpi_processor_power_exit(struct acpi_processor *pr)
L
Linus Torvalds 已提交
1178
{
1179 1180
	struct cpuidle_device *dev = per_cpu(acpi_cpuidle_device, pr->id);

1181
	if (disabled_by_idle_boot_param())
1182 1183
		return 0;

1184
	if (pr->flags.power) {
1185
		cpuidle_unregister_device(dev);
1186 1187 1188 1189
		acpi_processor_registered--;
		if (acpi_processor_registered == 0)
			cpuidle_unregister_driver(&acpi_idle_driver);
	}
L
Linus Torvalds 已提交
1190

1191
	pr->flags.power_setup_done = 0;
1192
	return 0;
L
Linus Torvalds 已提交
1193
}