acpi-cpufreq.c 24.4 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70 71
	struct acpi_processor_performance *acpi_data;
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
72
	cpumask_var_t freqdomain_cpus;
L
Linus Torvalds 已提交
73 74
};

75
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
76

77
/* acpi_perf_data is a pointer to percpu data. */
78
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
79 80 81

static struct cpufreq_driver acpi_cpufreq_driver;

82
static unsigned int acpi_pstate_strict;
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

135
static int _store_boost(int val)
136 137 138 139 140 141
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

142
	return 0;
143 144
}

145 146 147 148 149 150 151 152 153
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);

	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

154
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
static ssize_t store_boost(const char *buf, size_t count)
{
	int ret;
	unsigned long val = 0;

	if (!acpi_cpufreq_driver.boost_supported)
		return -EINVAL;

	ret = kstrtoul(buf, 10, &val);
	if (ret || (val > 1))
		return -EINVAL;

	_store_boost((int) val);

	return count;
}

172 173 174
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
{
175
	return store_boost(buf, count);
176 177 178 179
}

static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
180
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
181 182
}

183
cpufreq_freq_attr_rw(cpb);
184 185
#endif

186 187
static int check_est_cpu(unsigned int cpuid)
{
188
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
189

190
	return cpu_has(cpu, X86_FEATURE_EST);
191 192
}

193 194 195 196 197 198 199
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

200
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
201
{
202 203
	struct acpi_processor_performance *perf;
	int i;
204 205 206

	perf = data->acpi_data;

207
	for (i = 0; i < perf->state_count; i++) {
208 209 210 211 212 213
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

214 215
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
216
	struct cpufreq_frequency_table *pos;
217
	struct acpi_processor_performance *perf;
218

219 220 221 222 223
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

224 225
	perf = data->acpi_data;

226 227 228
	cpufreq_for_each_entry(pos, data->freq_table)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
229 230 231 232 233 234
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
235
	case SYSTEM_INTEL_MSR_CAPABLE:
236
	case SYSTEM_AMD_MSR_CAPABLE:
237
		return extract_msr(val, data);
238
	case SYSTEM_IO_CAPABLE:
239
		return extract_io(val, data);
240
	default:
241 242 243 244 245 246 247 248
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

249 250 251 252 253 254
struct io_addr {
	u16 port;
	u8 bit_width;
};

struct drv_cmd {
255
	unsigned int type;
256
	const struct cpumask *mask;
257 258 259 260
	union {
		struct msr_addr msr;
		struct io_addr io;
	} addr;
261 262 263
	u32 val;
};

264 265
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
266
{
267
	struct drv_cmd *cmd = _cmd;
268 269 270
	u32 h;

	switch (cmd->type) {
271
	case SYSTEM_INTEL_MSR_CAPABLE:
272
	case SYSTEM_AMD_MSR_CAPABLE:
273 274
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
275
	case SYSTEM_IO_CAPABLE:
276 277 278
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
279
		break;
280
	default:
281 282
		break;
	}
283
}
L
Linus Torvalds 已提交
284

285 286
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
287
{
288
	struct drv_cmd *cmd = _cmd;
289
	u32 lo, hi;
290 291

	switch (cmd->type) {
292
	case SYSTEM_INTEL_MSR_CAPABLE:
293 294 295
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
296
		break;
297 298 299
	case SYSTEM_AMD_MSR_CAPABLE:
		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
		break;
300
	case SYSTEM_IO_CAPABLE:
301 302 303
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
304
		break;
305
	default:
306 307
		break;
	}
308
}
L
Linus Torvalds 已提交
309

310
static void drv_read(struct drv_cmd *cmd)
311
{
312
	int err;
313 314
	cmd->val = 0;

315 316
	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
317 318 319 320
}

static void drv_write(struct drv_cmd *cmd)
{
321 322 323 324 325
	int this_cpu;

	this_cpu = get_cpu();
	if (cpumask_test_cpu(this_cpu, cmd->mask))
		do_drv_write(cmd);
326
	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
327
	put_cpu();
328
}
L
Linus Torvalds 已提交
329

330
static u32 get_cur_val(const struct cpumask *mask)
331
{
332 333
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
334

335
	if (unlikely(cpumask_empty(mask)))
336
		return 0;
L
Linus Torvalds 已提交
337

338
	switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
339 340
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
341
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
342
		break;
343 344
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
345
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
346
		break;
347 348
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
349
		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
350 351 352 353 354 355 356
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

357
	cmd.mask = mask;
358
	drv_read(&cmd);
L
Linus Torvalds 已提交
359

360
	pr_debug("get_cur_val = %u\n", cmd.val);
361 362 363

	return cmd.val;
}
L
Linus Torvalds 已提交
364

365 366
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
367
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
368
	unsigned int freq;
369
	unsigned int cached_freq;
370

371
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
372 373

	if (unlikely(data == NULL ||
374
		     data->acpi_data == NULL || data->freq_table == NULL)) {
375
		return 0;
L
Linus Torvalds 已提交
376 377
	}

378
	cached_freq = data->freq_table[data->acpi_data->state].frequency;
379
	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
380 381 382 383 384 385 386 387
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

388
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
389

390
	return freq;
L
Linus Torvalds 已提交
391 392
}

393
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
394
				struct acpi_cpufreq_data *data)
395
{
396 397
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
398

399
	for (i = 0; i < 100; i++) {
400 401 402 403 404 405 406 407 408
		cur_freq = extract_freq(get_cur_val(mask), data);
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
409
			       unsigned int index)
L
Linus Torvalds 已提交
410
{
411
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
412 413
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
414
	unsigned int next_perf_state = 0; /* Index into perf table */
415
	int result = 0;
416 417

	if (unlikely(data == NULL ||
418
	     data->acpi_data == NULL || data->freq_table == NULL)) {
419 420
		return -ENODEV;
	}
L
Linus Torvalds 已提交
421

422
	perf = data->acpi_data;
423
	next_perf_state = data->freq_table[index].driver_data;
424
	if (perf->state == next_perf_state) {
425
		if (unlikely(data->resume)) {
426
			pr_debug("Called after resume, resetting to P%d\n",
427
				next_perf_state);
428 429
			data->resume = 0;
		} else {
430
			pr_debug("Already at target state (P%d)\n",
431
				next_perf_state);
432
			goto out;
433
		}
434 435
	}

436 437 438 439
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
440
		cmd.val = (u32) perf->states[next_perf_state].control;
441
		break;
442 443 444 445 446
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
447 448 449 450 451 452 453
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
454 455
		result = -ENODEV;
		goto out;
456
	}
457

458
	/* cpufreq holds the hotplug lock, so we are safe from here on */
459
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
460
		cmd.mask = policy->cpus;
461
	else
462
		cmd.mask = cpumask_of(policy->cpu);
463

464
	drv_write(&cmd);
465

466
	if (acpi_pstate_strict) {
467 468
		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
					data)) {
469
			pr_debug("acpi_cpufreq_target failed (%d)\n",
470
				policy->cpu);
471
			result = -EAGAIN;
472 473 474
		}
	}

475 476
	if (!result)
		perf->state = next_perf_state;
477

478
out:
479
	return result;
L
Linus Torvalds 已提交
480 481 482
}

static unsigned long
483
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
484
{
485
	struct acpi_processor_performance *perf = data->acpi_data;
486

L
Linus Torvalds 已提交
487 488 489 490
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
491
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
492

493
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
494
			freq = freqn;
495
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
496
			if ((2 * cpu_khz) > (freqn + freq)) {
497
				perf->state = i;
498
				return freq;
L
Linus Torvalds 已提交
499 500
			}
		}
501
		perf->state = perf->state_count-1;
502
		return freqn;
503
	} else {
L
Linus Torvalds 已提交
504
		/* assume CPU is at P0... */
505 506 507
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
508 509
}

510 511 512 513 514 515 516 517 518 519 520
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
539
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

559 560 561 562 563 564 565 566
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
567
static int __init acpi_cpufreq_early_init(void)
568
{
569
	unsigned int i;
570
	pr_debug("acpi_cpufreq_early_init\n");
571

572 573
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
574
		pr_debug("Memory allocation error for acpi_perf_data.\n");
575
		return -ENOMEM;
576
	}
577
	for_each_possible_cpu(i) {
578
		if (!zalloc_cpumask_var_node(
579 580
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
581 582 583 584 585 586

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
587 588

	/* Do initialization in ACPI core */
589 590
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
591 592
}

593
#ifdef CONFIG_SMP
594 595 596 597 598 599 600 601
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

602
static int sw_any_bug_found(const struct dmi_system_id *d)
603 604 605 606 607
{
	bios_with_sw_any_bug = 1;
	return 0;
}

608
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
609 610 611 612 613 614 615 616 617 618 619
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
620 621 622

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
623 624
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
625 626
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
627
	 * Both Processor Cores to Lock Up. */
628 629 630
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
631 632 633 634 635
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
636
			return -ENODEV;
637
		    }
638 639 640
		}
	return 0;
}
641
#endif
642

643
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
644
{
645 646 647 648 649
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
650
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
651
	struct acpi_processor_performance *perf;
652 653 654
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
655

656
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
657

658
#ifdef CONFIG_SMP
659 660 661 662 663
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
664 665
#endif

666
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
667
	if (!data)
668
		return -ENOMEM;
L
Linus Torvalds 已提交
669

670 671 672 673 674
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

675
	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
676
	per_cpu(acfreq_data, cpu) = data;
L
Linus Torvalds 已提交
677

678
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
679
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
680

681
	result = acpi_processor_register_performance(data->acpi_data, cpu);
L
Linus Torvalds 已提交
682
	if (result)
683
		goto err_free_mask;
L
Linus Torvalds 已提交
684

685 686
	perf = data->acpi_data;
	policy->shared_type = perf->shared_type;
687

688
	/*
689
	 * Will let policy->cpus know about dependency only when software
690 691 692
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
693
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
694
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
695
	}
696
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
697 698 699

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
700
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
701
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
702
		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
703
	}
704 705 706 707

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
708
		cpumask_copy(data->freqdomain_cpus, cpu_sibling_mask(cpu));
709 710 711
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
712
#endif
713

L
Linus Torvalds 已提交
714
	/* capability check */
715
	if (perf->state_count <= 1) {
716
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
717 718 719
		result = -ENODEV;
		goto err_unreg;
	}
720

721 722 723 724 725 726
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
727
	case ACPI_ADR_SPACE_SYSTEM_IO:
728 729 730 731 732 733
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
734
		pr_debug("SYSTEM IO addr space\n");
735 736
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
737
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
738
		pr_debug("HARDWARE addr space\n");
739 740 741
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
			break;
742
		}
743 744 745 746 747 748
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
			break;
		}
		result = -ENODEV;
		goto err_unreg;
749
	default:
750
		pr_debug("Unknown addr space %d\n",
751
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
752 753 754 755
		result = -ENODEV;
		goto err_unreg;
	}

756
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
757
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
758 759 760 761 762 763 764
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
765
	for (i = 0; i < perf->state_count; i++) {
766 767 768 769
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
770 771
	}

772 773 774 775
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
776 777
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
778 779
	}

L
Linus Torvalds 已提交
780
	/* table init */
781 782
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
783
		    data->freq_table[valid_states-1].frequency / 1000)
784 785
			continue;

786
		data->freq_table[valid_states].driver_data = i;
787
		data->freq_table[valid_states].frequency =
788
		    perf->states[i].core_frequency * 1000;
789
		valid_states++;
L
Linus Torvalds 已提交
790
	}
791
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
792
	perf->state = 0;
L
Linus Torvalds 已提交
793

794
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
795
	if (result)
L
Linus Torvalds 已提交
796 797
		goto err_freqfree;

798 799 800
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

801
	switch (perf->control_register.space_id) {
802
	case ACPI_ADR_SPACE_SYSTEM_IO:
803 804 805 806 807 808
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
809 810
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
811
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
812
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
813
		break;
814
	default:
815 816 817
		break;
	}

L
Linus Torvalds 已提交
818 819 820
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

821
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
822
	for (i = 0; i < perf->state_count; i++)
823
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
824
			(i == perf->state ? '*' : ' '), i,
825 826 827
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
828

829 830 831 832 833
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
834

835
	return result;
L
Linus Torvalds 已提交
836

837
err_freqfree:
L
Linus Torvalds 已提交
838
	kfree(data->freq_table);
839
err_unreg:
840
	acpi_processor_unregister_performance(perf, cpu);
841 842
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
843
err_free:
L
Linus Torvalds 已提交
844
	kfree(data);
845
	per_cpu(acfreq_data, cpu) = NULL;
L
Linus Torvalds 已提交
846

847
	return result;
L
Linus Torvalds 已提交
848 849
}

850
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
851
{
852
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
L
Linus Torvalds 已提交
853

854
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
855 856

	if (data) {
857
		per_cpu(acfreq_data, policy->cpu) = NULL;
858 859
		acpi_processor_unregister_performance(data->acpi_data,
						      policy->cpu);
860
		free_cpumask_var(data->freqdomain_cpus);
861
		kfree(data->freq_table);
L
Linus Torvalds 已提交
862 863 864
		kfree(data);
	}

865
	return 0;
L
Linus Torvalds 已提交
866 867
}

868
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
869
{
870
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
L
Linus Torvalds 已提交
871

872
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
873 874 875

	data->resume = 1;

876
	return 0;
L
Linus Torvalds 已提交
877 878
}

879
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
880
	&cpufreq_freq_attr_scaling_available_freqs,
881
	&freqdomain_cpus,
882
	NULL,	/* this is a placeholder for cpb, do not remove */
L
Linus Torvalds 已提交
883 884 885 886
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
887
	.verify		= cpufreq_generic_frequency_table_verify,
888
	.target_index	= acpi_cpufreq_target,
889 890 891 892 893 894
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
895
	.set_boost      = _store_boost,
L
Linus Torvalds 已提交
896 897
};

898 899 900 901 902 903 904 905
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

906 907
		acpi_cpufreq_driver.boost_supported = true;
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
908 909

		cpu_notifier_register_begin();
910 911

		/* Force all MSRs to the same value */
912 913
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
914

915
		__register_cpu_notifier(&boost_nb);
916

917
		cpu_notifier_register_done();
918
	}
919 920
}

921
static void acpi_cpufreq_boost_exit(void)
922 923 924 925 926 927 928 929 930
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

931
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
932
{
933 934
	int ret;

935 936 937
	if (acpi_disabled)
		return -ENODEV;

938 939
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
940
		return -EEXIST;
941

942
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
943

944 945 946
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
947

948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
	if (check_amd_hwpstate_cpu(0)) {
		struct freq_attr **iter;

		pr_debug("adding sysfs entry for cpb\n");

		for (iter = acpi_cpufreq_attr; *iter != NULL; iter++)
			;

		/* make sure there is a terminator behind it */
		if (iter[1] == NULL)
			*iter = &cpb;
	}
#endif
968
	acpi_cpufreq_boost_init();
969

970
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
971
	if (ret) {
972
		free_acpi_perf_data();
973 974
		acpi_cpufreq_boost_exit();
	}
975
	return ret;
L
Linus Torvalds 已提交
976 977
}

978
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
979
{
980
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
981

982 983
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
984 985
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

986
	free_acpi_perf_data();
L
Linus Torvalds 已提交
987 988
}

989
module_param(acpi_pstate_strict, uint, 0644);
990
MODULE_PARM_DESC(acpi_pstate_strict,
991 992
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
993 994 995 996

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

997 998 999 1000 1001 1002 1003
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1004 1005 1006 1007 1008 1009 1010
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1011
MODULE_ALIAS("acpi");