acpi-cpufreq.c 24.0 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47
#include <asm/processor.h>
#include <asm/cpufeature.h>
48
#include "mperf.h"
49

L
Linus Torvalds 已提交
50 51 52 53
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

54 55
#define PFX "acpi-cpufreq: "

56 57 58
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
59
	SYSTEM_AMD_MSR_CAPABLE,
60 61 62 63
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
64
#define AMD_MSR_RANGE		(0x7)
65

66 67
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

68
struct acpi_cpufreq_data {
69 70 71 72
	struct acpi_processor_performance *acpi_data;
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
L
Linus Torvalds 已提交
73 74
};

75
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
76

77
/* acpi_perf_data is a pointer to percpu data. */
78
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
79 80 81

static struct cpufreq_driver acpi_cpufreq_driver;

82
static unsigned int acpi_pstate_strict;
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
static bool boost_enabled, boost_supported;
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

static ssize_t store_global_boost(struct kobject *kobj, struct attribute *attr,
				  const char *buf, size_t count)
{
	int ret;
	unsigned long val = 0;

	if (!boost_supported)
		return -EINVAL;

	ret = kstrtoul(buf, 10, &val);
	if (ret || (val > 1))
		return -EINVAL;

	if ((val && boost_enabled) || (!val && !boost_enabled))
		return count;

	get_online_cpus();

	boost_set_msrs(val, cpu_online_mask);

	put_online_cpus();

	boost_enabled = val;
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

	return count;
}

static ssize_t show_global_boost(struct kobject *kobj,
				 struct attribute *attr, char *buf)
{
	return sprintf(buf, "%u\n", boost_enabled);
}

static struct global_attr global_boost = __ATTR(boost, 0644,
						show_global_boost,
						store_global_boost);
173

174 175
static int check_est_cpu(unsigned int cpuid)
{
176
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
177

178
	return cpu_has(cpu, X86_FEATURE_EST);
179 180
}

181 182 183 184 185 186 187
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

188
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
189
{
190 191
	struct acpi_processor_performance *perf;
	int i;
192 193 194

	perf = data->acpi_data;

195
	for (i = 0; i < perf->state_count; i++) {
196 197 198 199 200 201
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

202 203 204
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
	int i;
205
	struct acpi_processor_performance *perf;
206

207 208 209 210 211
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

212 213
	perf = data->acpi_data;

214
	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
215
		if (msr == perf->states[data->freq_table[i].index].status)
216 217 218 219 220 221 222 223
			return data->freq_table[i].frequency;
	}
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
224
	case SYSTEM_INTEL_MSR_CAPABLE:
225
	case SYSTEM_AMD_MSR_CAPABLE:
226
		return extract_msr(val, data);
227
	case SYSTEM_IO_CAPABLE:
228
		return extract_io(val, data);
229
	default:
230 231 232 233 234 235 236 237
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

238 239 240 241 242 243
struct io_addr {
	u16 port;
	u8 bit_width;
};

struct drv_cmd {
244
	unsigned int type;
245
	const struct cpumask *mask;
246 247 248 249
	union {
		struct msr_addr msr;
		struct io_addr io;
	} addr;
250 251 252
	u32 val;
};

253 254
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
255
{
256
	struct drv_cmd *cmd = _cmd;
257 258 259
	u32 h;

	switch (cmd->type) {
260
	case SYSTEM_INTEL_MSR_CAPABLE:
261
	case SYSTEM_AMD_MSR_CAPABLE:
262 263
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
264
	case SYSTEM_IO_CAPABLE:
265 266 267
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
268
		break;
269
	default:
270 271
		break;
	}
272
}
L
Linus Torvalds 已提交
273

274 275
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
276
{
277
	struct drv_cmd *cmd = _cmd;
278
	u32 lo, hi;
279 280

	switch (cmd->type) {
281
	case SYSTEM_INTEL_MSR_CAPABLE:
282 283 284
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
285
		break;
286 287 288
	case SYSTEM_AMD_MSR_CAPABLE:
		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
		break;
289
	case SYSTEM_IO_CAPABLE:
290 291 292
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
293
		break;
294
	default:
295 296
		break;
	}
297
}
L
Linus Torvalds 已提交
298

299
static void drv_read(struct drv_cmd *cmd)
300
{
301
	int err;
302 303
	cmd->val = 0;

304 305
	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
306 307 308 309
}

static void drv_write(struct drv_cmd *cmd)
{
310 311 312 313 314
	int this_cpu;

	this_cpu = get_cpu();
	if (cpumask_test_cpu(this_cpu, cmd->mask))
		do_drv_write(cmd);
315
	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
316
	put_cpu();
317
}
L
Linus Torvalds 已提交
318

319
static u32 get_cur_val(const struct cpumask *mask)
320
{
321 322
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
323

324
	if (unlikely(cpumask_empty(mask)))
325
		return 0;
L
Linus Torvalds 已提交
326

327
	switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
328 329 330 331
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
		break;
332 333 334 335
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_STATUS;
		break;
336 337
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
338
		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
339 340 341 342 343 344 345
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

346
	cmd.mask = mask;
347
	drv_read(&cmd);
L
Linus Torvalds 已提交
348

349
	pr_debug("get_cur_val = %u\n", cmd.val);
350 351 352

	return cmd.val;
}
L
Linus Torvalds 已提交
353

354 355
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
356
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
357
	unsigned int freq;
358
	unsigned int cached_freq;
359

360
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
361 362

	if (unlikely(data == NULL ||
363
		     data->acpi_data == NULL || data->freq_table == NULL)) {
364
		return 0;
L
Linus Torvalds 已提交
365 366
	}

367
	cached_freq = data->freq_table[data->acpi_data->state].frequency;
368
	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
369 370 371 372 373 374 375 376
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

377
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
378

379
	return freq;
L
Linus Torvalds 已提交
380 381
}

382
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
383
				struct acpi_cpufreq_data *data)
384
{
385 386
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
387

388
	for (i = 0; i < 100; i++) {
389 390 391 392 393 394 395 396 397
		cur_freq = extract_freq(get_cur_val(mask), data);
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
398
			       unsigned int target_freq, unsigned int relation)
L
Linus Torvalds 已提交
399
{
400
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
401 402 403
	struct acpi_processor_performance *perf;
	struct cpufreq_freqs freqs;
	struct drv_cmd cmd;
404 405
	unsigned int next_state = 0; /* Index into freq_table */
	unsigned int next_perf_state = 0; /* Index into perf table */
406 407
	unsigned int i;
	int result = 0;
408

409
	pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
410 411

	if (unlikely(data == NULL ||
412
	     data->acpi_data == NULL || data->freq_table == NULL)) {
413 414
		return -ENODEV;
	}
L
Linus Torvalds 已提交
415

416
	perf = data->acpi_data;
L
Linus Torvalds 已提交
417
	result = cpufreq_frequency_table_target(policy,
418 419 420
						data->freq_table,
						target_freq,
						relation, &next_state);
421 422 423 424
	if (unlikely(result)) {
		result = -ENODEV;
		goto out;
	}
L
Linus Torvalds 已提交
425

426
	next_perf_state = data->freq_table[next_state].index;
427
	if (perf->state == next_perf_state) {
428
		if (unlikely(data->resume)) {
429
			pr_debug("Called after resume, resetting to P%d\n",
430
				next_perf_state);
431 432
			data->resume = 0;
		} else {
433
			pr_debug("Already at target state (P%d)\n",
434
				next_perf_state);
435
			goto out;
436
		}
437 438
	}

439 440 441 442
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
443
		cmd.val = (u32) perf->states[next_perf_state].control;
444
		break;
445 446 447 448 449
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
450 451 452 453 454 455 456
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
457 458
		result = -ENODEV;
		goto out;
459
	}
460

461
	/* cpufreq holds the hotplug lock, so we are safe from here on */
462
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
463
		cmd.mask = policy->cpus;
464
	else
465
		cmd.mask = cpumask_of(policy->cpu);
466

467 468
	freqs.old = perf->states[perf->state].core_frequency * 1000;
	freqs.new = data->freq_table[next_state].frequency;
469
	for_each_cpu(i, policy->cpus) {
470 471
		freqs.cpu = i;
		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
472
	}
L
Linus Torvalds 已提交
473

474
	drv_write(&cmd);
475

476
	if (acpi_pstate_strict) {
477
		if (!check_freqs(cmd.mask, freqs.new, data)) {
478
			pr_debug("acpi_cpufreq_target failed (%d)\n",
479
				policy->cpu);
480 481
			result = -EAGAIN;
			goto out;
482 483 484
		}
	}

485
	for_each_cpu(i, policy->cpus) {
486 487 488 489 490
		freqs.cpu = i;
		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
	}
	perf->state = next_perf_state;

491
out:
492
	return result;
L
Linus Torvalds 已提交
493 494
}

495
static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
496
{
497
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
L
Linus Torvalds 已提交
498

499
	pr_debug("acpi_cpufreq_verify\n");
L
Linus Torvalds 已提交
500

501
	return cpufreq_frequency_table_verify(policy, data->freq_table);
L
Linus Torvalds 已提交
502 503 504
}

static unsigned long
505
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
506
{
507
	struct acpi_processor_performance *perf = data->acpi_data;
508

L
Linus Torvalds 已提交
509 510 511 512
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
513
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
514

515
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
516
			freq = freqn;
517
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
518
			if ((2 * cpu_khz) > (freqn + freq)) {
519
				perf->state = i;
520
				return freq;
L
Linus Torvalds 已提交
521 522
			}
		}
523
		perf->state = perf->state_count-1;
524
		return freqn;
525
	} else {
L
Linus Torvalds 已提交
526
		/* assume CPU is at P0... */
527 528 529
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
530 531
}

532 533 534 535 536 537 538 539 540 541 542
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
		boost_set_msrs(boost_enabled, cpumask);
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

581 582 583 584 585 586 587 588
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
589
static int __init acpi_cpufreq_early_init(void)
590
{
591
	unsigned int i;
592
	pr_debug("acpi_cpufreq_early_init\n");
593

594 595
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
596
		pr_debug("Memory allocation error for acpi_perf_data.\n");
597
		return -ENOMEM;
598
	}
599
	for_each_possible_cpu(i) {
600
		if (!zalloc_cpumask_var_node(
601 602
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
603 604 605 606 607 608

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
609 610

	/* Do initialization in ACPI core */
611 612
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
613 614
}

615
#ifdef CONFIG_SMP
616 617 618 619 620 621 622 623
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

624
static int sw_any_bug_found(const struct dmi_system_id *d)
625 626 627 628 629
{
	bios_with_sw_any_bug = 1;
	return 0;
}

630
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
631 632 633 634 635 636 637 638 639 640 641
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
642 643 644

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
645 646
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
647 648
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
649
	 * Both Processor Cores to Lock Up. */
650 651 652
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
653 654 655 656 657
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
658
			return -ENODEV;
659
		    }
660 661 662
		}
	return 0;
}
663
#endif
664

665
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
666
{
667 668 669 670 671
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
672
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
673
	struct acpi_processor_performance *perf;
674 675 676
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
677

678
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
679

680
#ifdef CONFIG_SMP
681 682 683 684 685
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
686 687
#endif

688
	data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
L
Linus Torvalds 已提交
689
	if (!data)
690
		return -ENOMEM;
L
Linus Torvalds 已提交
691

692
	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
693
	per_cpu(acfreq_data, cpu) = data;
L
Linus Torvalds 已提交
694

695
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
696
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
697

698
	result = acpi_processor_register_performance(data->acpi_data, cpu);
L
Linus Torvalds 已提交
699 700 701
	if (result)
		goto err_free;

702 703
	perf = data->acpi_data;
	policy->shared_type = perf->shared_type;
704

705
	/*
706
	 * Will let policy->cpus know about dependency only when software
707 708 709
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
710
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
711
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
712
	}
713
	cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
714 715 716

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
717
	if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
718
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
719
		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
720
	}
721 722 723 724 725 726 727 728

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
		cpumask_copy(policy->related_cpus, cpu_sibling_mask(cpu));
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
729
#endif
730

L
Linus Torvalds 已提交
731
	/* capability check */
732
	if (perf->state_count <= 1) {
733
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
734 735 736
		result = -ENODEV;
		goto err_unreg;
	}
737

738 739 740 741 742 743
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
744
	case ACPI_ADR_SPACE_SYSTEM_IO:
745
		pr_debug("SYSTEM IO addr space\n");
746 747
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
748
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
749
		pr_debug("HARDWARE addr space\n");
750 751 752
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
			break;
753
		}
754 755 756 757 758 759
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
			break;
		}
		result = -ENODEV;
		goto err_unreg;
760
	default:
761
		pr_debug("Unknown addr space %d\n",
762
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
763 764 765 766
		result = -ENODEV;
		goto err_unreg;
	}

767 768
	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
769 770 771 772 773 774 775
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
776
	for (i = 0; i < perf->state_count; i++) {
777 778 779 780
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
781 782
	}

783 784 785 786
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
787 788
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
789 790
	}

L
Linus Torvalds 已提交
791
	/* table init */
792 793
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
794
		    data->freq_table[valid_states-1].frequency / 1000)
795 796 797 798
			continue;

		data->freq_table[valid_states].index = i;
		data->freq_table[valid_states].frequency =
799
		    perf->states[i].core_frequency * 1000;
800
		valid_states++;
L
Linus Torvalds 已提交
801
	}
802
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
803
	perf->state = 0;
L
Linus Torvalds 已提交
804 805

	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
806
	if (result)
L
Linus Torvalds 已提交
807 808
		goto err_freqfree;

809 810 811
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

812
	switch (perf->control_register.space_id) {
813
	case ACPI_ADR_SPACE_SYSTEM_IO:
814 815 816
		/* Current speed is unknown and not detectable by IO port */
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
817
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
818
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
819
		policy->cur = get_cur_freq_on_cpu(cpu);
820
		break;
821
	default:
822 823 824
		break;
	}

L
Linus Torvalds 已提交
825 826 827
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

828
	/* Check for APERF/MPERF support in hardware */
829
	if (boot_cpu_has(X86_FEATURE_APERFMPERF))
830
		acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
831

832
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
833
	for (i = 0; i < perf->state_count; i++)
834
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
835
			(i == perf->state ? '*' : ' '), i,
836 837 838
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
839 840

	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
841

842 843 844 845 846
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
847

848
	return result;
L
Linus Torvalds 已提交
849

850
err_freqfree:
L
Linus Torvalds 已提交
851
	kfree(data->freq_table);
852
err_unreg:
853
	acpi_processor_unregister_performance(perf, cpu);
854
err_free:
L
Linus Torvalds 已提交
855
	kfree(data);
856
	per_cpu(acfreq_data, cpu) = NULL;
L
Linus Torvalds 已提交
857

858
	return result;
L
Linus Torvalds 已提交
859 860
}

861
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
862
{
863
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
L
Linus Torvalds 已提交
864

865
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
866 867 868

	if (data) {
		cpufreq_frequency_table_put_attr(policy->cpu);
869
		per_cpu(acfreq_data, policy->cpu) = NULL;
870 871
		acpi_processor_unregister_performance(data->acpi_data,
						      policy->cpu);
872
		kfree(data->freq_table);
L
Linus Torvalds 已提交
873 874 875
		kfree(data);
	}

876
	return 0;
L
Linus Torvalds 已提交
877 878
}

879
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
880
{
881
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
L
Linus Torvalds 已提交
882

883
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
884 885 886

	data->resume = 1;

887
	return 0;
L
Linus Torvalds 已提交
888 889
}

890
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
891 892 893 894 895
	&cpufreq_freq_attr_scaling_available_freqs,
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
896 897 898 899 900 901 902 903 904
	.verify		= acpi_cpufreq_verify,
	.target		= acpi_cpufreq_target,
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.owner		= THIS_MODULE,
	.attr		= acpi_cpufreq_attr,
L
Linus Torvalds 已提交
905 906
};

907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

		boost_supported = true;
		boost_enabled = boost_state(0);

		get_online_cpus();

		/* Force all MSRs to the same value */
		boost_set_msrs(boost_enabled, cpu_online_mask);

		register_cpu_notifier(&boost_nb);

		put_online_cpus();
	} else
		global_boost.attr.mode = 0444;

	/* We create the boost file in any case, though for systems without
	 * hardware support it will be read-only and hardwired to return 0.
	 */
	if (sysfs_create_file(cpufreq_global_kobject, &(global_boost.attr)))
		pr_warn(PFX "could not register global boost sysfs file\n");
	else
		pr_debug("registered global boost sysfs file\n");
}

static void __exit acpi_cpufreq_boost_exit(void)
{
	sysfs_remove_file(cpufreq_global_kobject, &(global_boost.attr));

	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

950
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
951
{
952 953
	int ret;

954 955 956
	if (acpi_disabled)
		return 0;

957
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
958

959 960 961
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
962

963 964
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
	if (ret)
965
		free_acpi_perf_data();
966 967
	else
		acpi_cpufreq_boost_init();
968 969

	return ret;
L
Linus Torvalds 已提交
970 971
}

972
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
973
{
974
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
975

976 977
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
978 979
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

980
	free_acpi_perf_data();
L
Linus Torvalds 已提交
981 982
}

983
module_param(acpi_pstate_strict, uint, 0644);
984
MODULE_PARM_DESC(acpi_pstate_strict,
985 986
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
987 988 989 990 991

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

MODULE_ALIAS("acpi");