acpi-cpufreq.c 25.3 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
71
	unsigned int acpi_perf_cpu;
72
	cpumask_var_t freqdomain_cpus;
73 74
	void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
	u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
L
Linus Torvalds 已提交
75 76
};

77
/* acpi_perf_data is a pointer to percpu data. */
78
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
79

80 81 82 83 84
static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
{
	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
}

L
Linus Torvalds 已提交
85 86
static struct cpufreq_driver acpi_cpufreq_driver;

87
static unsigned int acpi_pstate_strict;
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

140
static int set_boost(int val)
141 142 143 144 145 146
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

147
	return 0;
148 149
}

150 151
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
152
	struct acpi_cpufreq_data *data = policy->driver_data;
153

154 155 156
	if (unlikely(!data))
		return -ENODEV;

157 158 159 160 161
	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

162
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
163 164
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
165 166
{
	int ret;
167
	unsigned int val = 0;
168

169
	if (!acpi_cpufreq_driver.set_boost)
170 171
		return -EINVAL;

172 173
	ret = kstrtouint(buf, 10, &val);
	if (ret || val > 1)
174 175
		return -EINVAL;

176
	set_boost(val);
177 178 179 180

	return count;
}

181 182
static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
183
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
184 185
}

186
cpufreq_freq_attr_rw(cpb);
187 188
#endif

189 190
static int check_est_cpu(unsigned int cpuid)
{
191
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
192

193
	return cpu_has(cpu, X86_FEATURE_EST);
194 195
}

196 197 198 199 200 201 202
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

203
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
204
{
205 206
	struct acpi_processor_performance *perf;
	int i;
207

208
	perf = to_perf_data(data);
209

210
	for (i = 0; i < perf->state_count; i++) {
211 212 213 214 215 216
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

217 218
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
219
	struct cpufreq_frequency_table *pos;
220
	struct acpi_processor_performance *perf;
221

222 223 224 225 226
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

227
	perf = to_perf_data(data);
228

229 230 231
	cpufreq_for_each_entry(pos, data->freq_table)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
232 233 234 235 236 237
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
238
	case SYSTEM_INTEL_MSR_CAPABLE:
239
	case SYSTEM_AMD_MSR_CAPABLE:
240
		return extract_msr(val, data);
241
	case SYSTEM_IO_CAPABLE:
242
		return extract_io(val, data);
243
	default:
244 245 246 247
		return 0;
	}
}

248
static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
249 250
{
	u32 val, dummy;
251

252 253 254 255
	rdmsr(MSR_IA32_PERF_CTL, val, dummy);
	return val;
}

256
static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
257 258 259 260 261 262 263 264
{
	u32 lo, hi;

	rdmsr(MSR_IA32_PERF_CTL, lo, hi);
	lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
	wrmsr(MSR_IA32_PERF_CTL, lo, hi);
}

265
static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
266 267 268 269 270 271 272
{
	u32 val, dummy;

	rdmsr(MSR_AMD_PERF_CTL, val, dummy);
	return val;
}

273
static void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
274 275 276 277
{
	wrmsr(MSR_AMD_PERF_CTL, val, 0);
}

278
static u32 cpu_freq_read_io(struct acpi_pct_register *reg)
279 280 281 282 283 284 285
{
	u32 val;

	acpi_os_read_port(reg->address, &val, reg->bit_width);
	return val;
}

286
static void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
287 288 289
{
	acpi_os_write_port(reg->address, val, reg->bit_width);
}
290 291

struct drv_cmd {
292
	struct acpi_pct_register *reg;
293
	u32 val;
294 295 296 297
	union {
		void (*write)(struct acpi_pct_register *reg, u32 val);
		u32 (*read)(struct acpi_pct_register *reg);
	} func;
298 299
};

300 301
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
302
{
303
	struct drv_cmd *cmd = _cmd;
304

305
	cmd->val = cmd->func.read(cmd->reg);
306
}
L
Linus Torvalds 已提交
307

308
static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask)
309
{
310 311 312 313 314 315
	struct acpi_processor_performance *perf = to_perf_data(data);
	struct drv_cmd cmd = {
		.reg = &perf->control_register,
		.func.read = data->cpu_freq_read,
	};
	int err;
316

317 318 319
	err = smp_call_function_any(mask, do_drv_read, &cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
	return cmd.val;
320
}
L
Linus Torvalds 已提交
321

322 323
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
324
{
325
	struct drv_cmd *cmd = _cmd;
326

327
	cmd->func.write(cmd->reg, cmd->val);
328 329
}

330 331
static void drv_write(struct acpi_cpufreq_data *data,
		      const struct cpumask *mask, u32 val)
332
{
333 334 335 336 337 338
	struct acpi_processor_performance *perf = to_perf_data(data);
	struct drv_cmd cmd = {
		.reg = &perf->control_register,
		.val = val,
		.func.write = data->cpu_freq_write,
	};
339 340 341
	int this_cpu;

	this_cpu = get_cpu();
342 343 344 345
	if (cpumask_test_cpu(this_cpu, mask))
		do_drv_write(&cmd);

	smp_call_function_many(mask, do_drv_write, &cmd, 1);
346
	put_cpu();
347
}
L
Linus Torvalds 已提交
348

349
static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
350
{
351
	u32 val;
L
Linus Torvalds 已提交
352

353
	if (unlikely(cpumask_empty(mask)))
354
		return 0;
L
Linus Torvalds 已提交
355

356
	val = drv_read(data, mask);
L
Linus Torvalds 已提交
357

358
	pr_debug("get_cur_val = %u\n", val);
359

360
	return val;
361
}
L
Linus Torvalds 已提交
362

363 364
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
365 366
	struct acpi_cpufreq_data *data;
	struct cpufreq_policy *policy;
367
	unsigned int freq;
368
	unsigned int cached_freq;
369

370
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
371

372
	policy = cpufreq_cpu_get_raw(cpu);
373 374 375 376
	if (unlikely(!policy))
		return 0;

	data = policy->driver_data;
377
	if (unlikely(!data || !data->freq_table))
378
		return 0;
L
Linus Torvalds 已提交
379

380
	cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
381
	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
382 383 384 385 386 387 388 389
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

390
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
391

392
	return freq;
L
Linus Torvalds 已提交
393 394
}

395
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
396
				struct acpi_cpufreq_data *data)
397
{
398 399
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
400

401
	for (i = 0; i < 100; i++) {
402
		cur_freq = extract_freq(get_cur_val(mask, data), data);
403 404 405 406 407 408 409 410
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
411
			       unsigned int index)
L
Linus Torvalds 已提交
412
{
413
	struct acpi_cpufreq_data *data = policy->driver_data;
414
	struct acpi_processor_performance *perf;
415
	const struct cpumask *mask;
416
	unsigned int next_perf_state = 0; /* Index into perf table */
417
	int result = 0;
418

419
	if (unlikely(data == NULL || data->freq_table == NULL)) {
420 421
		return -ENODEV;
	}
L
Linus Torvalds 已提交
422

423
	perf = to_perf_data(data);
424
	next_perf_state = data->freq_table[index].driver_data;
425
	if (perf->state == next_perf_state) {
426
		if (unlikely(data->resume)) {
427
			pr_debug("Called after resume, resetting to P%d\n",
428
				next_perf_state);
429 430
			data->resume = 0;
		} else {
431
			pr_debug("Already at target state (P%d)\n",
432
				next_perf_state);
433
			return 0;
434
		}
435 436
	}

437 438 439 440 441 442
	/*
	 * The core won't allow CPUs to go away until the governor has been
	 * stopped, so we can rely on the stability of policy->cpus.
	 */
	mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ?
		cpumask_of(policy->cpu) : policy->cpus;
443

444
	drv_write(data, mask, perf->states[next_perf_state].control);
445

446
	if (acpi_pstate_strict) {
447
		if (!check_freqs(mask, data->freq_table[index].frequency,
448
					data)) {
449
			pr_debug("acpi_cpufreq_target failed (%d)\n",
450
				policy->cpu);
451
			result = -EAGAIN;
452 453 454
		}
	}

455 456
	if (!result)
		perf->state = next_perf_state;
457 458

	return result;
L
Linus Torvalds 已提交
459 460
}

461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497
unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
				      unsigned int target_freq)
{
	struct acpi_cpufreq_data *data = policy->driver_data;
	struct acpi_processor_performance *perf;
	struct cpufreq_frequency_table *entry;
	unsigned int next_perf_state, next_freq, freq;

	/*
	 * Find the closest frequency above target_freq.
	 *
	 * The table is sorted in the reverse order with respect to the
	 * frequency and all of the entries are valid (see the initialization).
	 */
	entry = data->freq_table;
	do {
		entry++;
		freq = entry->frequency;
	} while (freq >= target_freq && freq != CPUFREQ_TABLE_END);
	entry--;
	next_freq = entry->frequency;
	next_perf_state = entry->driver_data;

	perf = to_perf_data(data);
	if (perf->state == next_perf_state) {
		if (unlikely(data->resume))
			data->resume = 0;
		else
			return next_freq;
	}

	data->cpu_freq_write(&perf->control_register,
			     perf->states[next_perf_state].control);
	perf->state = next_perf_state;
	return next_freq;
}

L
Linus Torvalds 已提交
498
static unsigned long
499
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
500
{
501
	struct acpi_processor_performance *perf;
502

503
	perf = to_perf_data(data);
L
Linus Torvalds 已提交
504 505 506 507
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
508
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
509

510
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
511
			freq = freqn;
512
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
513
			if ((2 * cpu_khz) > (freqn + freq)) {
514
				perf->state = i;
515
				return freq;
L
Linus Torvalds 已提交
516 517
			}
		}
518
		perf->state = perf->state_count-1;
519
		return freqn;
520
	} else {
L
Linus Torvalds 已提交
521
		/* assume CPU is at P0... */
522 523 524
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
525 526
}

527 528 529 530 531 532 533 534 535 536 537
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
554 555 556 557
	case CPU_DOWN_FAILED:
	case CPU_DOWN_FAILED_FROZEN:
	case CPU_ONLINE:
	case CPU_ONLINE_FROZEN:
558
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

578 579 580 581 582 583 584 585
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
586
static int __init acpi_cpufreq_early_init(void)
587
{
588
	unsigned int i;
589
	pr_debug("acpi_cpufreq_early_init\n");
590

591 592
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
593
		pr_debug("Memory allocation error for acpi_perf_data.\n");
594
		return -ENOMEM;
595
	}
596
	for_each_possible_cpu(i) {
597
		if (!zalloc_cpumask_var_node(
598 599
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
600 601 602 603 604 605

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
606 607

	/* Do initialization in ACPI core */
608 609
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
610 611
}

612
#ifdef CONFIG_SMP
613 614 615 616 617 618 619 620
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

621
static int sw_any_bug_found(const struct dmi_system_id *d)
622 623 624 625 626
{
	bios_with_sw_any_bug = 1;
	return 0;
}

627
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
628 629 630 631 632 633 634 635 636 637 638
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
639 640 641

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
642 643
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
644 645
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
646
	 * Both Processor Cores to Lock Up. */
647 648 649
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
650 651 652 653 654
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
655
			return -ENODEV;
656
		    }
657 658 659
		}
	return 0;
}
660
#endif
661

662
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
663
{
664 665 666 667 668
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
669
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
670
	struct acpi_processor_performance *perf;
671 672 673
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
674

675
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
676

677
#ifdef CONFIG_SMP
678 679 680 681 682
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
683 684
#endif

685
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
686
	if (!data)
687
		return -ENOMEM;
L
Linus Torvalds 已提交
688

689 690 691 692 693
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

694
	perf = per_cpu_ptr(acpi_perf_data, cpu);
695
	data->acpi_perf_cpu = cpu;
696
	policy->driver_data = data;
L
Linus Torvalds 已提交
697

698
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
699
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
700

701
	result = acpi_processor_register_performance(perf, cpu);
L
Linus Torvalds 已提交
702
	if (result)
703
		goto err_free_mask;
L
Linus Torvalds 已提交
704

705
	policy->shared_type = perf->shared_type;
706

707
	/*
708
	 * Will let policy->cpus know about dependency only when software
709 710 711
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
712
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
713
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
714
	}
715
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
716 717 718

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
719
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
720
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
721
		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
722
	}
723 724 725 726

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
727 728
		cpumask_copy(data->freqdomain_cpus,
			     topology_sibling_cpumask(cpu));
729 730 731
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
732
#endif
733

L
Linus Torvalds 已提交
734
	/* capability check */
735
	if (perf->state_count <= 1) {
736
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
737 738 739
		result = -ENODEV;
		goto err_unreg;
	}
740

741 742 743 744 745 746
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
747
	case ACPI_ADR_SPACE_SYSTEM_IO:
748 749 750 751 752 753
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
754
		pr_debug("SYSTEM IO addr space\n");
755
		data->cpu_feature = SYSTEM_IO_CAPABLE;
756 757
		data->cpu_freq_read = cpu_freq_read_io;
		data->cpu_freq_write = cpu_freq_write_io;
758
		break;
759
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
760
		pr_debug("HARDWARE addr space\n");
761 762
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
763 764
			data->cpu_freq_read = cpu_freq_read_intel;
			data->cpu_freq_write = cpu_freq_write_intel;
765
			break;
766
		}
767 768
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
769 770
			data->cpu_freq_read = cpu_freq_read_amd;
			data->cpu_freq_write = cpu_freq_write_amd;
771 772 773 774
			break;
		}
		result = -ENODEV;
		goto err_unreg;
775
	default:
776
		pr_debug("Unknown addr space %d\n",
777
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
778 779 780 781
		result = -ENODEV;
		goto err_unreg;
	}

782
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
783
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
784 785 786 787 788 789 790
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
791
	for (i = 0; i < perf->state_count; i++) {
792 793 794 795
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
796 797
	}

798 799 800 801
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
802 803
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
804 805
	}

L
Linus Torvalds 已提交
806
	/* table init */
807 808
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
809
		    data->freq_table[valid_states-1].frequency / 1000)
810 811
			continue;

812
		data->freq_table[valid_states].driver_data = i;
813
		data->freq_table[valid_states].frequency =
814
		    perf->states[i].core_frequency * 1000;
815
		valid_states++;
L
Linus Torvalds 已提交
816
	}
817
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
818
	perf->state = 0;
L
Linus Torvalds 已提交
819

820
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
821
	if (result)
L
Linus Torvalds 已提交
822 823
		goto err_freqfree;

824 825 826
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

827
	switch (perf->control_register.space_id) {
828
	case ACPI_ADR_SPACE_SYSTEM_IO:
829 830 831 832 833 834
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
835 836
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
837
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
838
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
839
		break;
840
	default:
841 842 843
		break;
	}

L
Linus Torvalds 已提交
844 845 846
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

847
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
848
	for (i = 0; i < perf->state_count; i++)
849
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
850
			(i == perf->state ? '*' : ' '), i,
851 852 853
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
854

855 856 857 858 859
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
860

861 862 863
	policy->fast_switch_possible = !acpi_pstate_strict &&
		!(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY);

864
	return result;
L
Linus Torvalds 已提交
865

866
err_freqfree:
L
Linus Torvalds 已提交
867
	kfree(data->freq_table);
868
err_unreg:
869
	acpi_processor_unregister_performance(cpu);
870 871
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
872
err_free:
L
Linus Torvalds 已提交
873
	kfree(data);
874
	policy->driver_data = NULL;
L
Linus Torvalds 已提交
875

876
	return result;
L
Linus Torvalds 已提交
877 878
}

879
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
880
{
881
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
882

883
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
884 885

	if (data) {
886
		policy->fast_switch_possible = false;
887
		policy->driver_data = NULL;
888
		acpi_processor_unregister_performance(data->acpi_perf_cpu);
889
		free_cpumask_var(data->freqdomain_cpus);
890
		kfree(data->freq_table);
L
Linus Torvalds 已提交
891 892 893
		kfree(data);
	}

894
	return 0;
L
Linus Torvalds 已提交
895 896
}

897
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
898
{
899
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
900

901
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
902 903 904

	data->resume = 1;

905
	return 0;
L
Linus Torvalds 已提交
906 907
}

908
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
909
	&cpufreq_freq_attr_scaling_available_freqs,
910
	&freqdomain_cpus,
911 912 913
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	&cpb,
#endif
L
Linus Torvalds 已提交
914 915 916 917
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
918
	.verify		= cpufreq_generic_frequency_table_verify,
919
	.target_index	= acpi_cpufreq_target,
920
	.fast_switch	= acpi_cpufreq_fast_switch,
921 922 923 924 925 926
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
L
Linus Torvalds 已提交
927 928
};

929 930 931 932 933 934 935 936
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

937
		acpi_cpufreq_driver.set_boost = set_boost;
938
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
939 940

		cpu_notifier_register_begin();
941 942

		/* Force all MSRs to the same value */
943 944
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
945

946
		__register_cpu_notifier(&boost_nb);
947

948
		cpu_notifier_register_done();
949
	}
950 951
}

952
static void acpi_cpufreq_boost_exit(void)
953 954 955 956 957 958 959 960 961
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

962
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
963
{
964 965
	int ret;

966 967 968
	if (acpi_disabled)
		return -ENODEV;

969 970
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
971
		return -EEXIST;
972

973
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
974

975 976 977
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
978

979 980 981 982 983 984 985
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
986 987
	if (!check_amd_hwpstate_cpu(0)) {
		struct freq_attr **attr;
988

989
		pr_debug("CPB unsupported, do not expose it\n");
990

991 992 993 994 995
		for (attr = acpi_cpufreq_attr; *attr; attr++)
			if (*attr == &cpb) {
				*attr = NULL;
				break;
			}
996 997
	}
#endif
998
	acpi_cpufreq_boost_init();
999

1000
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
1001
	if (ret) {
1002
		free_acpi_perf_data();
1003 1004
		acpi_cpufreq_boost_exit();
	}
1005
	return ret;
L
Linus Torvalds 已提交
1006 1007
}

1008
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
1009
{
1010
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
1011

1012 1013
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
1014 1015
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

1016
	free_acpi_perf_data();
L
Linus Torvalds 已提交
1017 1018
}

1019
module_param(acpi_pstate_strict, uint, 0644);
1020
MODULE_PARM_DESC(acpi_pstate_strict,
1021 1022
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
1023 1024 1025 1026

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

1027 1028 1029 1030 1031 1032 1033
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1034 1035 1036 1037 1038 1039 1040
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1041
MODULE_ALIAS("acpi");