acpi-cpufreq.c 24.4 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70 71
	struct acpi_processor_performance *acpi_data;
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
72
	cpumask_var_t freqdomain_cpus;
L
Linus Torvalds 已提交
73 74
};

75
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
76

77
/* acpi_perf_data is a pointer to percpu data. */
78
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
79 80 81

static struct cpufreq_driver acpi_cpufreq_driver;

82
static unsigned int acpi_pstate_strict;
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

135
static int _store_boost(int val)
136 137 138 139 140 141
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

142
	return 0;
143 144
}

145 146 147 148 149 150 151 152 153
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);

	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

154
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
static ssize_t store_boost(const char *buf, size_t count)
{
	int ret;
	unsigned long val = 0;

	if (!acpi_cpufreq_driver.boost_supported)
		return -EINVAL;

	ret = kstrtoul(buf, 10, &val);
	if (ret || (val > 1))
		return -EINVAL;

	_store_boost((int) val);

	return count;
}

172 173 174
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
{
175
	return store_boost(buf, count);
176 177 178 179
}

static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
180
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
181 182
}

183
cpufreq_freq_attr_rw(cpb);
184 185
#endif

186 187
static int check_est_cpu(unsigned int cpuid)
{
188
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
189

190
	return cpu_has(cpu, X86_FEATURE_EST);
191 192
}

193 194 195 196 197 198 199
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

200
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
201
{
202 203
	struct acpi_processor_performance *perf;
	int i;
204 205 206

	perf = data->acpi_data;

207
	for (i = 0; i < perf->state_count; i++) {
208 209 210 211 212 213
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

214 215
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
216
	struct cpufreq_frequency_table *pos;
217
	struct acpi_processor_performance *perf;
218

219 220 221 222 223
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

224 225
	perf = data->acpi_data;

226 227 228
	cpufreq_for_each_entry(pos, data->freq_table)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
229 230 231 232 233 234
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
235
	case SYSTEM_INTEL_MSR_CAPABLE:
236
	case SYSTEM_AMD_MSR_CAPABLE:
237
		return extract_msr(val, data);
238
	case SYSTEM_IO_CAPABLE:
239
		return extract_io(val, data);
240
	default:
241 242 243 244 245 246 247 248
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

249 250 251 252 253 254
struct io_addr {
	u16 port;
	u8 bit_width;
};

struct drv_cmd {
255
	unsigned int type;
256
	const struct cpumask *mask;
257 258 259 260
	union {
		struct msr_addr msr;
		struct io_addr io;
	} addr;
261 262 263
	u32 val;
};

264 265
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
266
{
267
	struct drv_cmd *cmd = _cmd;
268 269 270
	u32 h;

	switch (cmd->type) {
271
	case SYSTEM_INTEL_MSR_CAPABLE:
272
	case SYSTEM_AMD_MSR_CAPABLE:
273 274
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
275
	case SYSTEM_IO_CAPABLE:
276 277 278
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
279
		break;
280
	default:
281 282
		break;
	}
283
}
L
Linus Torvalds 已提交
284

285 286
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
287
{
288
	struct drv_cmd *cmd = _cmd;
289
	u32 lo, hi;
290 291

	switch (cmd->type) {
292
	case SYSTEM_INTEL_MSR_CAPABLE:
293 294 295
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
296
		break;
297 298 299
	case SYSTEM_AMD_MSR_CAPABLE:
		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
		break;
300
	case SYSTEM_IO_CAPABLE:
301 302 303
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
304
		break;
305
	default:
306 307
		break;
	}
308
}
L
Linus Torvalds 已提交
309

310
static void drv_read(struct drv_cmd *cmd)
311
{
312
	int err;
313 314
	cmd->val = 0;

315 316
	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
317 318 319 320
}

static void drv_write(struct drv_cmd *cmd)
{
321 322 323 324 325
	int this_cpu;

	this_cpu = get_cpu();
	if (cpumask_test_cpu(this_cpu, cmd->mask))
		do_drv_write(cmd);
326
	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
327
	put_cpu();
328
}
L
Linus Torvalds 已提交
329

330
static u32 get_cur_val(const struct cpumask *mask)
331
{
332 333
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
334

335
	if (unlikely(cpumask_empty(mask)))
336
		return 0;
L
Linus Torvalds 已提交
337

338
	switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
339 340
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
341
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
342
		break;
343 344
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
345
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
346
		break;
347 348
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
349
		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
350 351 352 353 354 355 356
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

357
	cmd.mask = mask;
358
	drv_read(&cmd);
L
Linus Torvalds 已提交
359

360
	pr_debug("get_cur_val = %u\n", cmd.val);
361 362 363

	return cmd.val;
}
L
Linus Torvalds 已提交
364

365 366
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
367
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
368
	unsigned int freq;
369
	unsigned int cached_freq;
370

371
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
372 373

	if (unlikely(data == NULL ||
374
		     data->acpi_data == NULL || data->freq_table == NULL)) {
375
		return 0;
L
Linus Torvalds 已提交
376 377
	}

378
	cached_freq = data->freq_table[data->acpi_data->state].frequency;
379
	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
380 381 382 383 384 385 386 387
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

388
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
389

390
	return freq;
L
Linus Torvalds 已提交
391 392
}

393
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
394
				struct acpi_cpufreq_data *data)
395
{
396 397
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
398

399
	for (i = 0; i < 100; i++) {
400 401 402 403 404 405 406 407 408
		cur_freq = extract_freq(get_cur_val(mask), data);
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
409
			       unsigned int index)
L
Linus Torvalds 已提交
410
{
411
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
412 413
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
414
	unsigned int next_perf_state = 0; /* Index into perf table */
415
	int result = 0;
416 417

	if (unlikely(data == NULL ||
418
	     data->acpi_data == NULL || data->freq_table == NULL)) {
419 420
		return -ENODEV;
	}
L
Linus Torvalds 已提交
421

422
	perf = data->acpi_data;
423
	next_perf_state = data->freq_table[index].driver_data;
424
	if (perf->state == next_perf_state) {
425
		if (unlikely(data->resume)) {
426
			pr_debug("Called after resume, resetting to P%d\n",
427
				next_perf_state);
428 429
			data->resume = 0;
		} else {
430
			pr_debug("Already at target state (P%d)\n",
431
				next_perf_state);
432
			goto out;
433
		}
434 435
	}

436 437 438 439
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
440
		cmd.val = (u32) perf->states[next_perf_state].control;
441
		break;
442 443 444 445 446
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
447 448 449 450 451 452 453
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
454 455
		result = -ENODEV;
		goto out;
456
	}
457

458
	/* cpufreq holds the hotplug lock, so we are safe from here on */
459
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
460
		cmd.mask = policy->cpus;
461
	else
462
		cmd.mask = cpumask_of(policy->cpu);
463

464
	drv_write(&cmd);
465

466
	if (acpi_pstate_strict) {
467 468
		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
					data)) {
469
			pr_debug("acpi_cpufreq_target failed (%d)\n",
470
				policy->cpu);
471
			result = -EAGAIN;
472 473 474
		}
	}

475 476
	if (!result)
		perf->state = next_perf_state;
477

478
out:
479
	return result;
L
Linus Torvalds 已提交
480 481 482
}

static unsigned long
483
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
484
{
485
	struct acpi_processor_performance *perf = data->acpi_data;
486

L
Linus Torvalds 已提交
487 488 489 490
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
491
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
492

493
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
494
			freq = freqn;
495
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
496
			if ((2 * cpu_khz) > (freqn + freq)) {
497
				perf->state = i;
498
				return freq;
L
Linus Torvalds 已提交
499 500
			}
		}
501
		perf->state = perf->state_count-1;
502
		return freqn;
503
	} else {
L
Linus Torvalds 已提交
504
		/* assume CPU is at P0... */
505 506 507
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
508 509
}

510 511 512 513 514 515 516 517 518 519 520
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
539
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

559 560 561 562 563 564 565 566
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
567
static int __init acpi_cpufreq_early_init(void)
568
{
569
	unsigned int i;
570
	pr_debug("acpi_cpufreq_early_init\n");
571

572 573
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
574
		pr_debug("Memory allocation error for acpi_perf_data.\n");
575
		return -ENOMEM;
576
	}
577
	for_each_possible_cpu(i) {
578
		if (!zalloc_cpumask_var_node(
579 580
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
581 582 583 584 585 586

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
587 588

	/* Do initialization in ACPI core */
589 590
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
591 592
}

593
#ifdef CONFIG_SMP
594 595 596 597 598 599 600 601
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

602
static int sw_any_bug_found(const struct dmi_system_id *d)
603 604 605 606 607
{
	bios_with_sw_any_bug = 1;
	return 0;
}

608
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
609 610 611 612 613 614 615 616 617 618 619
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
620 621 622

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
623 624
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
625 626
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
627
	 * Both Processor Cores to Lock Up. */
628 629 630
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
631 632 633 634 635
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
636
			return -ENODEV;
637
		    }
638 639 640
		}
	return 0;
}
641
#endif
642

643
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
644
{
645 646 647 648 649
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
650
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
651
	struct acpi_processor_performance *perf;
652 653 654
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
655

656
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
657

658
#ifdef CONFIG_SMP
659 660 661 662 663
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
664 665
#endif

666
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
667
	if (!data)
668
		return -ENOMEM;
L
Linus Torvalds 已提交
669

670 671 672 673 674
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

675
	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
676
	per_cpu(acfreq_data, cpu) = data;
L
Linus Torvalds 已提交
677

678
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
679
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
680

681
	result = acpi_processor_register_performance(data->acpi_data, cpu);
L
Linus Torvalds 已提交
682
	if (result)
683
		goto err_free_mask;
L
Linus Torvalds 已提交
684

685 686
	perf = data->acpi_data;
	policy->shared_type = perf->shared_type;
687

688
	/*
689
	 * Will let policy->cpus know about dependency only when software
690 691 692
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
693
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
694
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
695
	}
696
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
697 698 699

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
700
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
701
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
702
		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
703
	}
704 705 706 707

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
708 709
		cpumask_copy(data->freqdomain_cpus,
			     topology_sibling_cpumask(cpu));
710 711 712
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
713
#endif
714

L
Linus Torvalds 已提交
715
	/* capability check */
716
	if (perf->state_count <= 1) {
717
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
718 719 720
		result = -ENODEV;
		goto err_unreg;
	}
721

722 723 724 725 726 727
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
728
	case ACPI_ADR_SPACE_SYSTEM_IO:
729 730 731 732 733 734
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
735
		pr_debug("SYSTEM IO addr space\n");
736 737
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
738
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
739
		pr_debug("HARDWARE addr space\n");
740 741 742
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
			break;
743
		}
744 745 746 747 748 749
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
			break;
		}
		result = -ENODEV;
		goto err_unreg;
750
	default:
751
		pr_debug("Unknown addr space %d\n",
752
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
753 754 755 756
		result = -ENODEV;
		goto err_unreg;
	}

757
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
758
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
759 760 761 762 763 764 765
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
766
	for (i = 0; i < perf->state_count; i++) {
767 768 769 770
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
771 772
	}

773 774 775 776
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
777 778
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
779 780
	}

L
Linus Torvalds 已提交
781
	/* table init */
782 783
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
784
		    data->freq_table[valid_states-1].frequency / 1000)
785 786
			continue;

787
		data->freq_table[valid_states].driver_data = i;
788
		data->freq_table[valid_states].frequency =
789
		    perf->states[i].core_frequency * 1000;
790
		valid_states++;
L
Linus Torvalds 已提交
791
	}
792
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
793
	perf->state = 0;
L
Linus Torvalds 已提交
794

795
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
796
	if (result)
L
Linus Torvalds 已提交
797 798
		goto err_freqfree;

799 800 801
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

802
	switch (perf->control_register.space_id) {
803
	case ACPI_ADR_SPACE_SYSTEM_IO:
804 805 806 807 808 809
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
810 811
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
812
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
813
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
814
		break;
815
	default:
816 817 818
		break;
	}

L
Linus Torvalds 已提交
819 820 821
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

822
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
823
	for (i = 0; i < perf->state_count; i++)
824
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
825
			(i == perf->state ? '*' : ' '), i,
826 827 828
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
829

830 831 832 833 834
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
835

836
	return result;
L
Linus Torvalds 已提交
837

838
err_freqfree:
L
Linus Torvalds 已提交
839
	kfree(data->freq_table);
840
err_unreg:
841
	acpi_processor_unregister_performance(perf, cpu);
842 843
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
844
err_free:
L
Linus Torvalds 已提交
845
	kfree(data);
846
	per_cpu(acfreq_data, cpu) = NULL;
L
Linus Torvalds 已提交
847

848
	return result;
L
Linus Torvalds 已提交
849 850
}

851
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
852
{
853
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
L
Linus Torvalds 已提交
854

855
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
856 857

	if (data) {
858
		per_cpu(acfreq_data, policy->cpu) = NULL;
859 860
		acpi_processor_unregister_performance(data->acpi_data,
						      policy->cpu);
861
		free_cpumask_var(data->freqdomain_cpus);
862
		kfree(data->freq_table);
L
Linus Torvalds 已提交
863 864 865
		kfree(data);
	}

866
	return 0;
L
Linus Torvalds 已提交
867 868
}

869
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
870
{
871
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
L
Linus Torvalds 已提交
872

873
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
874 875 876

	data->resume = 1;

877
	return 0;
L
Linus Torvalds 已提交
878 879
}

880
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
881
	&cpufreq_freq_attr_scaling_available_freqs,
882
	&freqdomain_cpus,
883
	NULL,	/* this is a placeholder for cpb, do not remove */
L
Linus Torvalds 已提交
884 885 886 887
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
888
	.verify		= cpufreq_generic_frequency_table_verify,
889
	.target_index	= acpi_cpufreq_target,
890 891 892 893 894 895
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
896
	.set_boost      = _store_boost,
L
Linus Torvalds 已提交
897 898
};

899 900 901 902 903 904 905 906
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

907 908
		acpi_cpufreq_driver.boost_supported = true;
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
909 910

		cpu_notifier_register_begin();
911 912

		/* Force all MSRs to the same value */
913 914
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
915

916
		__register_cpu_notifier(&boost_nb);
917

918
		cpu_notifier_register_done();
919
	}
920 921
}

922
static void acpi_cpufreq_boost_exit(void)
923 924 925 926 927 928 929 930 931
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

932
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
933
{
934 935
	int ret;

936 937 938
	if (acpi_disabled)
		return -ENODEV;

939 940
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
941
		return -EEXIST;
942

943
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
944

945 946 947
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
948

949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
	if (check_amd_hwpstate_cpu(0)) {
		struct freq_attr **iter;

		pr_debug("adding sysfs entry for cpb\n");

		for (iter = acpi_cpufreq_attr; *iter != NULL; iter++)
			;

		/* make sure there is a terminator behind it */
		if (iter[1] == NULL)
			*iter = &cpb;
	}
#endif
969
	acpi_cpufreq_boost_init();
970

971
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
972
	if (ret) {
973
		free_acpi_perf_data();
974 975
		acpi_cpufreq_boost_exit();
	}
976
	return ret;
L
Linus Torvalds 已提交
977 978
}

979
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
980
{
981
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
982

983 984
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
985 986
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

987
	free_acpi_perf_data();
L
Linus Torvalds 已提交
988 989
}

990
module_param(acpi_pstate_strict, uint, 0644);
991
MODULE_PARM_DESC(acpi_pstate_strict,
992 993
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
994 995 996 997

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

998 999 1000 1001 1002 1003 1004
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1005 1006 1007 1008 1009 1010 1011
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1012
MODULE_ALIAS("acpi");