acpi-cpufreq.c 24.3 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
71
	unsigned int acpi_perf_cpu;
72
	cpumask_var_t freqdomain_cpus;
L
Linus Torvalds 已提交
73 74
};

75
/* acpi_perf_data is a pointer to percpu data. */
76
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
77

78 79 80 81 82
static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
{
	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
}

L
Linus Torvalds 已提交
83 84
static struct cpufreq_driver acpi_cpufreq_driver;

85
static unsigned int acpi_pstate_strict;
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

138
static int _store_boost(int val)
139 140 141 142 143 144
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

145
	return 0;
146 147
}

148 149
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
150
	struct acpi_cpufreq_data *data = policy->driver_data;
151

152 153 154
	if (unlikely(!data))
		return -ENODEV;

155 156 157 158 159
	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

160
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
static ssize_t store_boost(const char *buf, size_t count)
{
	int ret;
	unsigned long val = 0;

	if (!acpi_cpufreq_driver.boost_supported)
		return -EINVAL;

	ret = kstrtoul(buf, 10, &val);
	if (ret || (val > 1))
		return -EINVAL;

	_store_boost((int) val);

	return count;
}

178 179 180
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
{
181
	return store_boost(buf, count);
182 183 184 185
}

static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
186
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
187 188
}

189
cpufreq_freq_attr_rw(cpb);
190 191
#endif

192 193
static int check_est_cpu(unsigned int cpuid)
{
194
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
195

196
	return cpu_has(cpu, X86_FEATURE_EST);
197 198
}

199 200 201 202 203 204 205
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

206
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
207
{
208 209
	struct acpi_processor_performance *perf;
	int i;
210

211
	perf = to_perf_data(data);
212

213
	for (i = 0; i < perf->state_count; i++) {
214 215 216 217 218 219
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

220 221
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
222
	struct cpufreq_frequency_table *pos;
223
	struct acpi_processor_performance *perf;
224

225 226 227 228 229
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

230
	perf = to_perf_data(data);
231

232 233 234
	cpufreq_for_each_entry(pos, data->freq_table)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
235 236 237 238 239 240
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
241
	case SYSTEM_INTEL_MSR_CAPABLE:
242
	case SYSTEM_AMD_MSR_CAPABLE:
243
		return extract_msr(val, data);
244
	case SYSTEM_IO_CAPABLE:
245
		return extract_io(val, data);
246
	default:
247 248 249 250 251 252 253 254
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

255 256 257 258 259 260
struct io_addr {
	u16 port;
	u8 bit_width;
};

struct drv_cmd {
261
	unsigned int type;
262
	const struct cpumask *mask;
263 264 265 266
	union {
		struct msr_addr msr;
		struct io_addr io;
	} addr;
267 268 269
	u32 val;
};

270 271
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
272
{
273
	struct drv_cmd *cmd = _cmd;
274 275 276
	u32 h;

	switch (cmd->type) {
277
	case SYSTEM_INTEL_MSR_CAPABLE:
278
	case SYSTEM_AMD_MSR_CAPABLE:
279 280
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
281
	case SYSTEM_IO_CAPABLE:
282 283 284
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
285
		break;
286
	default:
287 288
		break;
	}
289
}
L
Linus Torvalds 已提交
290

291 292
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
293
{
294
	struct drv_cmd *cmd = _cmd;
295
	u32 lo, hi;
296 297

	switch (cmd->type) {
298
	case SYSTEM_INTEL_MSR_CAPABLE:
299 300 301
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
302
		break;
303 304 305
	case SYSTEM_AMD_MSR_CAPABLE:
		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
		break;
306
	case SYSTEM_IO_CAPABLE:
307 308 309
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
310
		break;
311
	default:
312 313
		break;
	}
314
}
L
Linus Torvalds 已提交
315

316
static void drv_read(struct drv_cmd *cmd)
317
{
318
	int err;
319 320
	cmd->val = 0;

321 322
	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
323 324 325 326
}

static void drv_write(struct drv_cmd *cmd)
{
327 328 329 330 331
	int this_cpu;

	this_cpu = get_cpu();
	if (cpumask_test_cpu(this_cpu, cmd->mask))
		do_drv_write(cmd);
332
	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
333
	put_cpu();
334
}
L
Linus Torvalds 已提交
335

336 337
static u32
get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
338
{
339 340
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
341

342
	if (unlikely(cpumask_empty(mask)))
343
		return 0;
L
Linus Torvalds 已提交
344

345
	switch (data->cpu_feature) {
346 347
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
348
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
349
		break;
350 351
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
352
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
353
		break;
354 355
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
356
		perf = to_perf_data(data);
357 358 359 360 361 362 363
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

364
	cmd.mask = mask;
365
	drv_read(&cmd);
L
Linus Torvalds 已提交
366

367
	pr_debug("get_cur_val = %u\n", cmd.val);
368 369 370

	return cmd.val;
}
L
Linus Torvalds 已提交
371

372 373
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
374 375
	struct acpi_cpufreq_data *data;
	struct cpufreq_policy *policy;
376
	unsigned int freq;
377
	unsigned int cached_freq;
378

379
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
380

381
	policy = cpufreq_cpu_get_raw(cpu);
382 383 384 385
	if (unlikely(!policy))
		return 0;

	data = policy->driver_data;
386
	if (unlikely(!data || !data->freq_table))
387
		return 0;
L
Linus Torvalds 已提交
388

389
	cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
390
	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
391 392 393 394 395 396 397 398
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

399
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
400

401
	return freq;
L
Linus Torvalds 已提交
402 403
}

404
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
405
				struct acpi_cpufreq_data *data)
406
{
407 408
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
409

410
	for (i = 0; i < 100; i++) {
411
		cur_freq = extract_freq(get_cur_val(mask, data), data);
412 413 414 415 416 417 418 419
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
420
			       unsigned int index)
L
Linus Torvalds 已提交
421
{
422
	struct acpi_cpufreq_data *data = policy->driver_data;
423 424
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
425
	unsigned int next_perf_state = 0; /* Index into perf table */
426
	int result = 0;
427

428
	if (unlikely(data == NULL || data->freq_table == NULL)) {
429 430
		return -ENODEV;
	}
L
Linus Torvalds 已提交
431

432
	perf = to_perf_data(data);
433
	next_perf_state = data->freq_table[index].driver_data;
434
	if (perf->state == next_perf_state) {
435
		if (unlikely(data->resume)) {
436
			pr_debug("Called after resume, resetting to P%d\n",
437
				next_perf_state);
438 439
			data->resume = 0;
		} else {
440
			pr_debug("Already at target state (P%d)\n",
441
				next_perf_state);
442
			goto out;
443
		}
444 445
	}

446 447 448 449
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
450
		cmd.val = (u32) perf->states[next_perf_state].control;
451
		break;
452 453 454 455 456
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
457 458 459 460 461 462 463
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
464 465
		result = -ENODEV;
		goto out;
466
	}
467

468
	/* cpufreq holds the hotplug lock, so we are safe from here on */
469
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
470
		cmd.mask = policy->cpus;
471
	else
472
		cmd.mask = cpumask_of(policy->cpu);
473

474
	drv_write(&cmd);
475

476
	if (acpi_pstate_strict) {
477 478
		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
					data)) {
479
			pr_debug("acpi_cpufreq_target failed (%d)\n",
480
				policy->cpu);
481
			result = -EAGAIN;
482 483 484
		}
	}

485 486
	if (!result)
		perf->state = next_perf_state;
487

488
out:
489
	return result;
L
Linus Torvalds 已提交
490 491 492
}

static unsigned long
493
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
494
{
495
	struct acpi_processor_performance *perf;
496

497
	perf = to_perf_data(data);
L
Linus Torvalds 已提交
498 499 500 501
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
502
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
503

504
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
505
			freq = freqn;
506
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
507
			if ((2 * cpu_khz) > (freqn + freq)) {
508
				perf->state = i;
509
				return freq;
L
Linus Torvalds 已提交
510 511
			}
		}
512
		perf->state = perf->state_count-1;
513
		return freqn;
514
	} else {
L
Linus Torvalds 已提交
515
		/* assume CPU is at P0... */
516 517 518
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
519 520
}

521 522 523 524 525 526 527 528 529 530 531
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
550
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

570 571 572 573 574 575 576 577
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
578
static int __init acpi_cpufreq_early_init(void)
579
{
580
	unsigned int i;
581
	pr_debug("acpi_cpufreq_early_init\n");
582

583 584
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
585
		pr_debug("Memory allocation error for acpi_perf_data.\n");
586
		return -ENOMEM;
587
	}
588
	for_each_possible_cpu(i) {
589
		if (!zalloc_cpumask_var_node(
590 591
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
592 593 594 595 596 597

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
598 599

	/* Do initialization in ACPI core */
600 601
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
602 603
}

604
#ifdef CONFIG_SMP
605 606 607 608 609 610 611 612
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

613
static int sw_any_bug_found(const struct dmi_system_id *d)
614 615 616 617 618
{
	bios_with_sw_any_bug = 1;
	return 0;
}

619
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
620 621 622 623 624 625 626 627 628 629 630
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
631 632 633

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
634 635
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
636 637
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
638
	 * Both Processor Cores to Lock Up. */
639 640 641
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
642 643 644 645 646
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
647
			return -ENODEV;
648
		    }
649 650 651
		}
	return 0;
}
652
#endif
653

654
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
655
{
656 657 658 659 660
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
661
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
662
	struct acpi_processor_performance *perf;
663 664 665
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
666

667
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
668

669
#ifdef CONFIG_SMP
670 671 672 673 674
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
675 676
#endif

677
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
678
	if (!data)
679
		return -ENOMEM;
L
Linus Torvalds 已提交
680

681 682 683 684 685
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

686
	perf = per_cpu_ptr(acpi_perf_data, cpu);
687
	data->acpi_perf_cpu = cpu;
688
	policy->driver_data = data;
L
Linus Torvalds 已提交
689

690
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
691
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
692

693
	result = acpi_processor_register_performance(perf, cpu);
L
Linus Torvalds 已提交
694
	if (result)
695
		goto err_free_mask;
L
Linus Torvalds 已提交
696

697
	policy->shared_type = perf->shared_type;
698

699
	/*
700
	 * Will let policy->cpus know about dependency only when software
701 702 703
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
704
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
705
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
706
	}
707
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
708 709 710

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
711
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
712
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
713
		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
714
	}
715 716 717 718

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
719 720
		cpumask_copy(data->freqdomain_cpus,
			     topology_sibling_cpumask(cpu));
721 722 723
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
724
#endif
725

L
Linus Torvalds 已提交
726
	/* capability check */
727
	if (perf->state_count <= 1) {
728
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
729 730 731
		result = -ENODEV;
		goto err_unreg;
	}
732

733 734 735 736 737 738
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
739
	case ACPI_ADR_SPACE_SYSTEM_IO:
740 741 742 743 744 745
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
746
		pr_debug("SYSTEM IO addr space\n");
747 748
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
749
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
750
		pr_debug("HARDWARE addr space\n");
751 752 753
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
			break;
754
		}
755 756 757 758 759 760
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
			break;
		}
		result = -ENODEV;
		goto err_unreg;
761
	default:
762
		pr_debug("Unknown addr space %d\n",
763
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
764 765 766 767
		result = -ENODEV;
		goto err_unreg;
	}

768
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
769
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
770 771 772 773 774 775 776
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
777
	for (i = 0; i < perf->state_count; i++) {
778 779 780 781
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
782 783
	}

784 785 786 787
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
788 789
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
790 791
	}

L
Linus Torvalds 已提交
792
	/* table init */
793 794
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
795
		    data->freq_table[valid_states-1].frequency / 1000)
796 797
			continue;

798
		data->freq_table[valid_states].driver_data = i;
799
		data->freq_table[valid_states].frequency =
800
		    perf->states[i].core_frequency * 1000;
801
		valid_states++;
L
Linus Torvalds 已提交
802
	}
803
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
804
	perf->state = 0;
L
Linus Torvalds 已提交
805

806
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
807
	if (result)
L
Linus Torvalds 已提交
808 809
		goto err_freqfree;

810 811 812
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

813
	switch (perf->control_register.space_id) {
814
	case ACPI_ADR_SPACE_SYSTEM_IO:
815 816 817 818 819 820
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
821 822
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
823
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
824
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
825
		break;
826
	default:
827 828 829
		break;
	}

L
Linus Torvalds 已提交
830 831 832
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

833
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
834
	for (i = 0; i < perf->state_count; i++)
835
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
836
			(i == perf->state ? '*' : ' '), i,
837 838 839
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
840

841 842 843 844 845
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
846

847
	return result;
L
Linus Torvalds 已提交
848

849
err_freqfree:
L
Linus Torvalds 已提交
850
	kfree(data->freq_table);
851
err_unreg:
852
	acpi_processor_unregister_performance(cpu);
853 854
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
855
err_free:
L
Linus Torvalds 已提交
856
	kfree(data);
857
	policy->driver_data = NULL;
L
Linus Torvalds 已提交
858

859
	return result;
L
Linus Torvalds 已提交
860 861
}

862
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
863
{
864
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
865

866
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
867 868

	if (data) {
869
		policy->driver_data = NULL;
870
		acpi_processor_unregister_performance(data->acpi_perf_cpu);
871
		free_cpumask_var(data->freqdomain_cpus);
872
		kfree(data->freq_table);
L
Linus Torvalds 已提交
873 874 875
		kfree(data);
	}

876
	return 0;
L
Linus Torvalds 已提交
877 878
}

879
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
880
{
881
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
882

883
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
884 885 886

	data->resume = 1;

887
	return 0;
L
Linus Torvalds 已提交
888 889
}

890
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
891
	&cpufreq_freq_attr_scaling_available_freqs,
892
	&freqdomain_cpus,
893 894 895
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	&cpb,
#endif
L
Linus Torvalds 已提交
896 897 898 899
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
900
	.verify		= cpufreq_generic_frequency_table_verify,
901
	.target_index	= acpi_cpufreq_target,
902 903 904 905 906 907
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
908
	.set_boost      = _store_boost,
L
Linus Torvalds 已提交
909 910
};

911 912 913 914 915 916 917 918
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

919 920
		acpi_cpufreq_driver.boost_supported = true;
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
921 922

		cpu_notifier_register_begin();
923 924

		/* Force all MSRs to the same value */
925 926
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
927

928
		__register_cpu_notifier(&boost_nb);
929

930
		cpu_notifier_register_done();
931
	}
932 933
}

934
static void acpi_cpufreq_boost_exit(void)
935 936 937 938 939 940 941 942 943
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

944
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
945
{
946 947
	int ret;

948 949 950
	if (acpi_disabled)
		return -ENODEV;

951 952
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
953
		return -EEXIST;
954

955
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
956

957 958 959
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
960

961 962 963 964 965 966 967
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
968 969
	if (!check_amd_hwpstate_cpu(0)) {
		struct freq_attr **attr;
970

971
		pr_debug("CPB unsupported, do not expose it\n");
972

973 974 975 976 977
		for (attr = acpi_cpufreq_attr; *attr; attr++)
			if (*attr == &cpb) {
				*attr = NULL;
				break;
			}
978 979
	}
#endif
980
	acpi_cpufreq_boost_init();
981

982
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
983
	if (ret) {
984
		free_acpi_perf_data();
985 986
		acpi_cpufreq_boost_exit();
	}
987
	return ret;
L
Linus Torvalds 已提交
988 989
}

990
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
991
{
992
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
993

994 995
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
996 997
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

998
	free_acpi_perf_data();
L
Linus Torvalds 已提交
999 1000
}

1001
module_param(acpi_pstate_strict, uint, 0644);
1002
MODULE_PARM_DESC(acpi_pstate_strict,
1003 1004
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
1005 1006 1007 1008

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

1009 1010 1011 1012 1013 1014 1015
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1016 1017 1018 1019 1020 1021 1022
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1023
MODULE_ALIAS("acpi");