acpi-cpufreq.c 24.3 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
71
	unsigned int acpi_perf_cpu;
72
	cpumask_var_t freqdomain_cpus;
L
Linus Torvalds 已提交
73 74
};

75
/* acpi_perf_data is a pointer to percpu data. */
76
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
77

78 79 80 81 82
static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
{
	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
}

L
Linus Torvalds 已提交
83 84
static struct cpufreq_driver acpi_cpufreq_driver;

85
static unsigned int acpi_pstate_strict;
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

138
static int _store_boost(int val)
139 140 141 142 143 144
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

145
	return 0;
146 147
}

148 149
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
150
	struct acpi_cpufreq_data *data = policy->driver_data;
151 152 153 154 155 156

	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

157
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
static ssize_t store_boost(const char *buf, size_t count)
{
	int ret;
	unsigned long val = 0;

	if (!acpi_cpufreq_driver.boost_supported)
		return -EINVAL;

	ret = kstrtoul(buf, 10, &val);
	if (ret || (val > 1))
		return -EINVAL;

	_store_boost((int) val);

	return count;
}

175 176 177
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
{
178
	return store_boost(buf, count);
179 180 181 182
}

static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
183
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
184 185
}

186
cpufreq_freq_attr_rw(cpb);
187 188
#endif

189 190
static int check_est_cpu(unsigned int cpuid)
{
191
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
192

193
	return cpu_has(cpu, X86_FEATURE_EST);
194 195
}

196 197 198 199 200 201 202
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

203
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
204
{
205 206
	struct acpi_processor_performance *perf;
	int i;
207

208
	perf = to_perf_data(data);
209

210
	for (i = 0; i < perf->state_count; i++) {
211 212 213 214 215 216
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

217 218
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
219
	struct cpufreq_frequency_table *pos;
220
	struct acpi_processor_performance *perf;
221

222 223 224 225 226
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

227
	perf = to_perf_data(data);
228

229 230 231
	cpufreq_for_each_entry(pos, data->freq_table)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
232 233 234 235 236 237
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
238
	case SYSTEM_INTEL_MSR_CAPABLE:
239
	case SYSTEM_AMD_MSR_CAPABLE:
240
		return extract_msr(val, data);
241
	case SYSTEM_IO_CAPABLE:
242
		return extract_io(val, data);
243
	default:
244 245 246 247 248 249 250 251
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

252 253 254 255 256 257
struct io_addr {
	u16 port;
	u8 bit_width;
};

struct drv_cmd {
258
	unsigned int type;
259
	const struct cpumask *mask;
260 261 262 263
	union {
		struct msr_addr msr;
		struct io_addr io;
	} addr;
264 265 266
	u32 val;
};

267 268
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
269
{
270
	struct drv_cmd *cmd = _cmd;
271 272 273
	u32 h;

	switch (cmd->type) {
274
	case SYSTEM_INTEL_MSR_CAPABLE:
275
	case SYSTEM_AMD_MSR_CAPABLE:
276 277
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
278
	case SYSTEM_IO_CAPABLE:
279 280 281
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
282
		break;
283
	default:
284 285
		break;
	}
286
}
L
Linus Torvalds 已提交
287

288 289
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
290
{
291
	struct drv_cmd *cmd = _cmd;
292
	u32 lo, hi;
293 294

	switch (cmd->type) {
295
	case SYSTEM_INTEL_MSR_CAPABLE:
296 297 298
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
299
		break;
300 301 302
	case SYSTEM_AMD_MSR_CAPABLE:
		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
		break;
303
	case SYSTEM_IO_CAPABLE:
304 305 306
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
307
		break;
308
	default:
309 310
		break;
	}
311
}
L
Linus Torvalds 已提交
312

313
static void drv_read(struct drv_cmd *cmd)
314
{
315
	int err;
316 317
	cmd->val = 0;

318 319
	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
320 321 322 323
}

static void drv_write(struct drv_cmd *cmd)
{
324 325 326 327 328
	int this_cpu;

	this_cpu = get_cpu();
	if (cpumask_test_cpu(this_cpu, cmd->mask))
		do_drv_write(cmd);
329
	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
330
	put_cpu();
331
}
L
Linus Torvalds 已提交
332

333 334
static u32
get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
335
{
336 337
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
338

339
	if (unlikely(cpumask_empty(mask)))
340
		return 0;
L
Linus Torvalds 已提交
341

342
	switch (data->cpu_feature) {
343 344
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
345
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
346
		break;
347 348
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
349
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
350
		break;
351 352
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
353
		perf = to_perf_data(data);
354 355 356 357 358 359 360
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

361
	cmd.mask = mask;
362
	drv_read(&cmd);
L
Linus Torvalds 已提交
363

364
	pr_debug("get_cur_val = %u\n", cmd.val);
365 366 367

	return cmd.val;
}
L
Linus Torvalds 已提交
368

369 370
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
371 372
	struct acpi_cpufreq_data *data;
	struct cpufreq_policy *policy;
373
	unsigned int freq;
374
	unsigned int cached_freq;
375

376
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
377

378
	policy = cpufreq_cpu_get_raw(cpu);
379 380 381 382
	if (unlikely(!policy))
		return 0;

	data = policy->driver_data;
383
	if (unlikely(!data || !data->freq_table))
384
		return 0;
L
Linus Torvalds 已提交
385

386
	cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
387
	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
388 389 390 391 392 393 394 395
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

396
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
397

398
	return freq;
L
Linus Torvalds 已提交
399 400
}

401
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
402
				struct acpi_cpufreq_data *data)
403
{
404 405
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
406

407
	for (i = 0; i < 100; i++) {
408
		cur_freq = extract_freq(get_cur_val(mask, data), data);
409 410 411 412 413 414 415 416
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
417
			       unsigned int index)
L
Linus Torvalds 已提交
418
{
419
	struct acpi_cpufreq_data *data = policy->driver_data;
420 421
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
422
	unsigned int next_perf_state = 0; /* Index into perf table */
423
	int result = 0;
424

425
	if (unlikely(data == NULL || data->freq_table == NULL)) {
426 427
		return -ENODEV;
	}
L
Linus Torvalds 已提交
428

429
	perf = to_perf_data(data);
430
	next_perf_state = data->freq_table[index].driver_data;
431
	if (perf->state == next_perf_state) {
432
		if (unlikely(data->resume)) {
433
			pr_debug("Called after resume, resetting to P%d\n",
434
				next_perf_state);
435 436
			data->resume = 0;
		} else {
437
			pr_debug("Already at target state (P%d)\n",
438
				next_perf_state);
439
			goto out;
440
		}
441 442
	}

443 444 445 446
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
447
		cmd.val = (u32) perf->states[next_perf_state].control;
448
		break;
449 450 451 452 453
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
454 455 456 457 458 459 460
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
461 462
		result = -ENODEV;
		goto out;
463
	}
464

465
	/* cpufreq holds the hotplug lock, so we are safe from here on */
466
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
467
		cmd.mask = policy->cpus;
468
	else
469
		cmd.mask = cpumask_of(policy->cpu);
470

471
	drv_write(&cmd);
472

473
	if (acpi_pstate_strict) {
474 475
		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
					data)) {
476
			pr_debug("acpi_cpufreq_target failed (%d)\n",
477
				policy->cpu);
478
			result = -EAGAIN;
479 480 481
		}
	}

482 483
	if (!result)
		perf->state = next_perf_state;
484

485
out:
486
	return result;
L
Linus Torvalds 已提交
487 488 489
}

static unsigned long
490
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
491
{
492
	struct acpi_processor_performance *perf;
493

494
	perf = to_perf_data(data);
L
Linus Torvalds 已提交
495 496 497 498
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
499
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
500

501
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
502
			freq = freqn;
503
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
504
			if ((2 * cpu_khz) > (freqn + freq)) {
505
				perf->state = i;
506
				return freq;
L
Linus Torvalds 已提交
507 508
			}
		}
509
		perf->state = perf->state_count-1;
510
		return freqn;
511
	} else {
L
Linus Torvalds 已提交
512
		/* assume CPU is at P0... */
513 514 515
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
516 517
}

518 519 520 521 522 523 524 525 526 527 528
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
547
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

567 568 569 570 571 572 573 574
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
575
static int __init acpi_cpufreq_early_init(void)
576
{
577
	unsigned int i;
578
	pr_debug("acpi_cpufreq_early_init\n");
579

580 581
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
582
		pr_debug("Memory allocation error for acpi_perf_data.\n");
583
		return -ENOMEM;
584
	}
585
	for_each_possible_cpu(i) {
586
		if (!zalloc_cpumask_var_node(
587 588
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
589 590 591 592 593 594

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
595 596

	/* Do initialization in ACPI core */
597 598
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
599 600
}

601
#ifdef CONFIG_SMP
602 603 604 605 606 607 608 609
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

610
static int sw_any_bug_found(const struct dmi_system_id *d)
611 612 613 614 615
{
	bios_with_sw_any_bug = 1;
	return 0;
}

616
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
617 618 619 620 621 622 623 624 625 626 627
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
628 629 630

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
631 632
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
633 634
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
635
	 * Both Processor Cores to Lock Up. */
636 637 638
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
639 640 641 642 643
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
644
			return -ENODEV;
645
		    }
646 647 648
		}
	return 0;
}
649
#endif
650

651
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
652
{
653 654 655 656 657
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
658
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
659
	struct acpi_processor_performance *perf;
660 661 662
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
663

664
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
665

666
#ifdef CONFIG_SMP
667 668 669 670 671
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
672 673
#endif

674
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
675
	if (!data)
676
		return -ENOMEM;
L
Linus Torvalds 已提交
677

678 679 680 681 682
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

683
	perf = per_cpu_ptr(acpi_perf_data, cpu);
684
	data->acpi_perf_cpu = cpu;
685
	policy->driver_data = data;
L
Linus Torvalds 已提交
686

687
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
688
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
689

690
	result = acpi_processor_register_performance(perf, cpu);
L
Linus Torvalds 已提交
691
	if (result)
692
		goto err_free_mask;
L
Linus Torvalds 已提交
693

694
	policy->shared_type = perf->shared_type;
695

696
	/*
697
	 * Will let policy->cpus know about dependency only when software
698 699 700
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
701
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
702
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
703
	}
704
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
705 706 707

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
708
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
709
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
710
		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
711
	}
712 713 714 715

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
716 717
		cpumask_copy(data->freqdomain_cpus,
			     topology_sibling_cpumask(cpu));
718 719 720
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
721
#endif
722

L
Linus Torvalds 已提交
723
	/* capability check */
724
	if (perf->state_count <= 1) {
725
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
726 727 728
		result = -ENODEV;
		goto err_unreg;
	}
729

730 731 732 733 734 735
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
736
	case ACPI_ADR_SPACE_SYSTEM_IO:
737 738 739 740 741 742
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
743
		pr_debug("SYSTEM IO addr space\n");
744 745
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
746
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
747
		pr_debug("HARDWARE addr space\n");
748 749 750
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
			break;
751
		}
752 753 754 755 756 757
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
			break;
		}
		result = -ENODEV;
		goto err_unreg;
758
	default:
759
		pr_debug("Unknown addr space %d\n",
760
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
761 762 763 764
		result = -ENODEV;
		goto err_unreg;
	}

765
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
766
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
767 768 769 770 771 772 773
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
774
	for (i = 0; i < perf->state_count; i++) {
775 776 777 778
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
779 780
	}

781 782 783 784
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
785 786
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
787 788
	}

L
Linus Torvalds 已提交
789
	/* table init */
790 791
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
792
		    data->freq_table[valid_states-1].frequency / 1000)
793 794
			continue;

795
		data->freq_table[valid_states].driver_data = i;
796
		data->freq_table[valid_states].frequency =
797
		    perf->states[i].core_frequency * 1000;
798
		valid_states++;
L
Linus Torvalds 已提交
799
	}
800
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
801
	perf->state = 0;
L
Linus Torvalds 已提交
802

803
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
804
	if (result)
L
Linus Torvalds 已提交
805 806
		goto err_freqfree;

807 808 809
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

810
	switch (perf->control_register.space_id) {
811
	case ACPI_ADR_SPACE_SYSTEM_IO:
812 813 814 815 816 817
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
818 819
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
820
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
821
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
822
		break;
823
	default:
824 825 826
		break;
	}

L
Linus Torvalds 已提交
827 828 829
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

830
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
831
	for (i = 0; i < perf->state_count; i++)
832
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
833
			(i == perf->state ? '*' : ' '), i,
834 835 836
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
837

838 839 840 841 842
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
843

844
	return result;
L
Linus Torvalds 已提交
845

846
err_freqfree:
L
Linus Torvalds 已提交
847
	kfree(data->freq_table);
848
err_unreg:
849
	acpi_processor_unregister_performance(cpu);
850 851
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
852
err_free:
L
Linus Torvalds 已提交
853
	kfree(data);
854
	policy->driver_data = NULL;
L
Linus Torvalds 已提交
855

856
	return result;
L
Linus Torvalds 已提交
857 858
}

859
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
860
{
861
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
862

863
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
864 865

	if (data) {
866
		policy->driver_data = NULL;
867
		acpi_processor_unregister_performance(data->acpi_perf_cpu);
868
		free_cpumask_var(data->freqdomain_cpus);
869
		kfree(data->freq_table);
L
Linus Torvalds 已提交
870 871 872
		kfree(data);
	}

873
	return 0;
L
Linus Torvalds 已提交
874 875
}

876
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
877
{
878
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
879

880
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
881 882 883

	data->resume = 1;

884
	return 0;
L
Linus Torvalds 已提交
885 886
}

887
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
888
	&cpufreq_freq_attr_scaling_available_freqs,
889
	&freqdomain_cpus,
890 891 892
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	&cpb,
#endif
L
Linus Torvalds 已提交
893 894 895 896
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
897
	.verify		= cpufreq_generic_frequency_table_verify,
898
	.target_index	= acpi_cpufreq_target,
899 900 901 902 903 904
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
905
	.set_boost      = _store_boost,
L
Linus Torvalds 已提交
906 907
};

908 909 910 911 912 913 914 915
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

916 917
		acpi_cpufreq_driver.boost_supported = true;
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
918 919

		cpu_notifier_register_begin();
920 921

		/* Force all MSRs to the same value */
922 923
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
924

925
		__register_cpu_notifier(&boost_nb);
926

927
		cpu_notifier_register_done();
928
	}
929 930
}

931
static void acpi_cpufreq_boost_exit(void)
932 933 934 935 936 937 938 939 940
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

941
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
942
{
943 944
	int ret;

945 946 947
	if (acpi_disabled)
		return -ENODEV;

948 949
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
950
		return -EEXIST;
951

952
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
953

954 955 956
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
957

958 959 960 961 962 963 964
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
965 966
	if (!check_amd_hwpstate_cpu(0)) {
		struct freq_attr **attr;
967

968
		pr_debug("CPB unsupported, do not expose it\n");
969

970 971 972 973 974
		for (attr = acpi_cpufreq_attr; *attr; attr++)
			if (*attr == &cpb) {
				*attr = NULL;
				break;
			}
975 976
	}
#endif
977
	acpi_cpufreq_boost_init();
978

979
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
980
	if (ret) {
981
		free_acpi_perf_data();
982 983
		acpi_cpufreq_boost_exit();
	}
984
	return ret;
L
Linus Torvalds 已提交
985 986
}

987
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
988
{
989
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
990

991 992
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
993 994
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

995
	free_acpi_perf_data();
L
Linus Torvalds 已提交
996 997
}

998
module_param(acpi_pstate_strict, uint, 0644);
999
MODULE_PARM_DESC(acpi_pstate_strict,
1000 1001
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
1002 1003 1004 1005

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

1006 1007 1008 1009 1010 1011 1012
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1013 1014 1015 1016 1017 1018 1019
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1020
MODULE_ALIAS("acpi");