acpi-cpufreq.c 24.4 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
71
	unsigned int acpi_perf_cpu;
72
	cpumask_var_t freqdomain_cpus;
L
Linus Torvalds 已提交
73 74
};

75
/* acpi_perf_data is a pointer to percpu data. */
76
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
77

78 79 80 81 82
static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
{
	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
}

L
Linus Torvalds 已提交
83 84
static struct cpufreq_driver acpi_cpufreq_driver;

85
static unsigned int acpi_pstate_strict;
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

138
static int _store_boost(int val)
139 140 141 142 143 144
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

145
	return 0;
146 147
}

148 149
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
150
	struct acpi_cpufreq_data *data = policy->driver_data;
151 152 153 154 155 156

	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

157
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
static ssize_t store_boost(const char *buf, size_t count)
{
	int ret;
	unsigned long val = 0;

	if (!acpi_cpufreq_driver.boost_supported)
		return -EINVAL;

	ret = kstrtoul(buf, 10, &val);
	if (ret || (val > 1))
		return -EINVAL;

	_store_boost((int) val);

	return count;
}

175 176 177
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
{
178
	return store_boost(buf, count);
179 180 181 182
}

static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
183
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
184 185
}

186
cpufreq_freq_attr_rw(cpb);
187 188
#endif

189 190
static int check_est_cpu(unsigned int cpuid)
{
191
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
192

193
	return cpu_has(cpu, X86_FEATURE_EST);
194 195
}

196 197 198 199 200 201 202
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

203
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
204
{
205 206
	struct acpi_processor_performance *perf;
	int i;
207

208
	perf = to_perf_data(data);
209

210
	for (i = 0; i < perf->state_count; i++) {
211 212 213 214 215 216
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

217 218
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
219
	struct cpufreq_frequency_table *pos;
220
	struct acpi_processor_performance *perf;
221

222 223 224 225 226
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

227
	perf = to_perf_data(data);
228

229 230 231
	cpufreq_for_each_entry(pos, data->freq_table)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
232 233 234 235 236 237
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
238
	case SYSTEM_INTEL_MSR_CAPABLE:
239
	case SYSTEM_AMD_MSR_CAPABLE:
240
		return extract_msr(val, data);
241
	case SYSTEM_IO_CAPABLE:
242
		return extract_io(val, data);
243
	default:
244 245 246 247 248 249 250 251
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

252 253 254 255 256 257
struct io_addr {
	u16 port;
	u8 bit_width;
};

struct drv_cmd {
258
	unsigned int type;
259
	const struct cpumask *mask;
260 261 262 263
	union {
		struct msr_addr msr;
		struct io_addr io;
	} addr;
264 265 266
	u32 val;
};

267 268
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
269
{
270
	struct drv_cmd *cmd = _cmd;
271 272 273
	u32 h;

	switch (cmd->type) {
274
	case SYSTEM_INTEL_MSR_CAPABLE:
275
	case SYSTEM_AMD_MSR_CAPABLE:
276 277
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
278
	case SYSTEM_IO_CAPABLE:
279 280 281
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
282
		break;
283
	default:
284 285
		break;
	}
286
}
L
Linus Torvalds 已提交
287

288 289
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
290
{
291
	struct drv_cmd *cmd = _cmd;
292
	u32 lo, hi;
293 294

	switch (cmd->type) {
295
	case SYSTEM_INTEL_MSR_CAPABLE:
296 297 298
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
299
		break;
300 301 302
	case SYSTEM_AMD_MSR_CAPABLE:
		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
		break;
303
	case SYSTEM_IO_CAPABLE:
304 305 306
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
307
		break;
308
	default:
309 310
		break;
	}
311
}
L
Linus Torvalds 已提交
312

313
static void drv_read(struct drv_cmd *cmd)
314
{
315
	int err;
316 317
	cmd->val = 0;

318 319
	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
320 321 322 323
}

static void drv_write(struct drv_cmd *cmd)
{
324 325 326 327 328
	int this_cpu;

	this_cpu = get_cpu();
	if (cpumask_test_cpu(this_cpu, cmd->mask))
		do_drv_write(cmd);
329
	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
330
	put_cpu();
331
}
L
Linus Torvalds 已提交
332

333 334
static u32
get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
335
{
336 337
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
338

339
	if (unlikely(cpumask_empty(mask)))
340
		return 0;
L
Linus Torvalds 已提交
341

342
	switch (data->cpu_feature) {
343 344
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
345
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
346
		break;
347 348
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
349
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
350
		break;
351 352
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
353
		perf = to_perf_data(data);
354 355 356 357 358 359 360
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

361
	cmd.mask = mask;
362
	drv_read(&cmd);
L
Linus Torvalds 已提交
363

364
	pr_debug("get_cur_val = %u\n", cmd.val);
365 366 367

	return cmd.val;
}
L
Linus Torvalds 已提交
368

369 370
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
371 372
	struct acpi_cpufreq_data *data;
	struct cpufreq_policy *policy;
373
	unsigned int freq;
374
	unsigned int cached_freq;
375

376
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
377

378 379 380 381 382 383
	policy = cpufreq_cpu_get(cpu);
	if (unlikely(!policy))
		return 0;

	data = policy->driver_data;
	cpufreq_cpu_put(policy);
384
	if (unlikely(!data || !data->freq_table))
385
		return 0;
L
Linus Torvalds 已提交
386

387
	cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
388
	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
389 390 391 392 393 394 395 396
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

397
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
398

399
	return freq;
L
Linus Torvalds 已提交
400 401
}

402
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
403
				struct acpi_cpufreq_data *data)
404
{
405 406
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
407

408
	for (i = 0; i < 100; i++) {
409
		cur_freq = extract_freq(get_cur_val(mask, data), data);
410 411 412 413 414 415 416 417
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
418
			       unsigned int index)
L
Linus Torvalds 已提交
419
{
420
	struct acpi_cpufreq_data *data = policy->driver_data;
421 422
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
423
	unsigned int next_perf_state = 0; /* Index into perf table */
424
	int result = 0;
425

426
	if (unlikely(data == NULL || data->freq_table == NULL)) {
427 428
		return -ENODEV;
	}
L
Linus Torvalds 已提交
429

430
	perf = to_perf_data(data);
431
	next_perf_state = data->freq_table[index].driver_data;
432
	if (perf->state == next_perf_state) {
433
		if (unlikely(data->resume)) {
434
			pr_debug("Called after resume, resetting to P%d\n",
435
				next_perf_state);
436 437
			data->resume = 0;
		} else {
438
			pr_debug("Already at target state (P%d)\n",
439
				next_perf_state);
440
			goto out;
441
		}
442 443
	}

444 445 446 447
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
448
		cmd.val = (u32) perf->states[next_perf_state].control;
449
		break;
450 451 452 453 454
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
455 456 457 458 459 460 461
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
462 463
		result = -ENODEV;
		goto out;
464
	}
465

466
	/* cpufreq holds the hotplug lock, so we are safe from here on */
467
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
468
		cmd.mask = policy->cpus;
469
	else
470
		cmd.mask = cpumask_of(policy->cpu);
471

472
	drv_write(&cmd);
473

474
	if (acpi_pstate_strict) {
475 476
		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
					data)) {
477
			pr_debug("acpi_cpufreq_target failed (%d)\n",
478
				policy->cpu);
479
			result = -EAGAIN;
480 481 482
		}
	}

483 484
	if (!result)
		perf->state = next_perf_state;
485

486
out:
487
	return result;
L
Linus Torvalds 已提交
488 489 490
}

static unsigned long
491
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
492
{
493
	struct acpi_processor_performance *perf;
494

495
	perf = to_perf_data(data);
L
Linus Torvalds 已提交
496 497 498 499
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
500
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
501

502
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
503
			freq = freqn;
504
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
505
			if ((2 * cpu_khz) > (freqn + freq)) {
506
				perf->state = i;
507
				return freq;
L
Linus Torvalds 已提交
508 509
			}
		}
510
		perf->state = perf->state_count-1;
511
		return freqn;
512
	} else {
L
Linus Torvalds 已提交
513
		/* assume CPU is at P0... */
514 515 516
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
517 518
}

519 520 521 522 523 524 525 526 527 528 529
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
548
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

568 569 570 571 572 573 574 575
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
576
static int __init acpi_cpufreq_early_init(void)
577
{
578
	unsigned int i;
579
	pr_debug("acpi_cpufreq_early_init\n");
580

581 582
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
583
		pr_debug("Memory allocation error for acpi_perf_data.\n");
584
		return -ENOMEM;
585
	}
586
	for_each_possible_cpu(i) {
587
		if (!zalloc_cpumask_var_node(
588 589
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
590 591 592 593 594 595

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
596 597

	/* Do initialization in ACPI core */
598 599
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
600 601
}

602
#ifdef CONFIG_SMP
603 604 605 606 607 608 609 610
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

611
static int sw_any_bug_found(const struct dmi_system_id *d)
612 613 614 615 616
{
	bios_with_sw_any_bug = 1;
	return 0;
}

617
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
618 619 620 621 622 623 624 625 626 627 628
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
629 630 631

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
632 633
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
634 635
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
636
	 * Both Processor Cores to Lock Up. */
637 638 639
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
640 641 642 643 644
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
645
			return -ENODEV;
646
		    }
647 648 649
		}
	return 0;
}
650
#endif
651

652
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
653
{
654 655 656 657 658
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
659
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
660
	struct acpi_processor_performance *perf;
661 662 663
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
664

665
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
666

667
#ifdef CONFIG_SMP
668 669 670 671 672
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
673 674
#endif

675
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
676
	if (!data)
677
		return -ENOMEM;
L
Linus Torvalds 已提交
678

679 680 681 682 683
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

684
	perf = per_cpu_ptr(acpi_perf_data, cpu);
685
	data->acpi_perf_cpu = cpu;
686
	policy->driver_data = data;
L
Linus Torvalds 已提交
687

688
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
689
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
690

691
	result = acpi_processor_register_performance(perf, cpu);
L
Linus Torvalds 已提交
692
	if (result)
693
		goto err_free_mask;
L
Linus Torvalds 已提交
694

695
	policy->shared_type = perf->shared_type;
696

697
	/*
698
	 * Will let policy->cpus know about dependency only when software
699 700 701
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
702
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
703
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
704
	}
705
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
706 707 708

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
709
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
710
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
711
		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
712
	}
713 714 715 716

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
717 718
		cpumask_copy(data->freqdomain_cpus,
			     topology_sibling_cpumask(cpu));
719 720 721
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
722
#endif
723

L
Linus Torvalds 已提交
724
	/* capability check */
725
	if (perf->state_count <= 1) {
726
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
727 728 729
		result = -ENODEV;
		goto err_unreg;
	}
730

731 732 733 734 735 736
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
737
	case ACPI_ADR_SPACE_SYSTEM_IO:
738 739 740 741 742 743
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
744
		pr_debug("SYSTEM IO addr space\n");
745 746
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
747
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
748
		pr_debug("HARDWARE addr space\n");
749 750 751
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
			break;
752
		}
753 754 755 756 757 758
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
			break;
		}
		result = -ENODEV;
		goto err_unreg;
759
	default:
760
		pr_debug("Unknown addr space %d\n",
761
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
762 763 764 765
		result = -ENODEV;
		goto err_unreg;
	}

766
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
767
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
768 769 770 771 772 773 774
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
775
	for (i = 0; i < perf->state_count; i++) {
776 777 778 779
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
780 781
	}

782 783 784 785
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
786 787
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
788 789
	}

L
Linus Torvalds 已提交
790
	/* table init */
791 792
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
793
		    data->freq_table[valid_states-1].frequency / 1000)
794 795
			continue;

796
		data->freq_table[valid_states].driver_data = i;
797
		data->freq_table[valid_states].frequency =
798
		    perf->states[i].core_frequency * 1000;
799
		valid_states++;
L
Linus Torvalds 已提交
800
	}
801
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
802
	perf->state = 0;
L
Linus Torvalds 已提交
803

804
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
805
	if (result)
L
Linus Torvalds 已提交
806 807
		goto err_freqfree;

808 809 810
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

811
	switch (perf->control_register.space_id) {
812
	case ACPI_ADR_SPACE_SYSTEM_IO:
813 814 815 816 817 818
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
819 820
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
821
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
822
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
823
		break;
824
	default:
825 826 827
		break;
	}

L
Linus Torvalds 已提交
828 829 830
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

831
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
832
	for (i = 0; i < perf->state_count; i++)
833
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
834
			(i == perf->state ? '*' : ' '), i,
835 836 837
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
838

839 840 841 842 843
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
844

845
	return result;
L
Linus Torvalds 已提交
846

847
err_freqfree:
L
Linus Torvalds 已提交
848
	kfree(data->freq_table);
849
err_unreg:
850
	acpi_processor_unregister_performance(cpu);
851 852
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
853
err_free:
L
Linus Torvalds 已提交
854
	kfree(data);
855
	policy->driver_data = NULL;
L
Linus Torvalds 已提交
856

857
	return result;
L
Linus Torvalds 已提交
858 859
}

860
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
861
{
862
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
863

864
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
865 866

	if (data) {
867
		policy->driver_data = NULL;
868
		acpi_processor_unregister_performance(data->acpi_perf_cpu);
869
		free_cpumask_var(data->freqdomain_cpus);
870
		kfree(data->freq_table);
L
Linus Torvalds 已提交
871 872 873
		kfree(data);
	}

874
	return 0;
L
Linus Torvalds 已提交
875 876
}

877
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
878
{
879
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
880

881
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
882 883 884

	data->resume = 1;

885
	return 0;
L
Linus Torvalds 已提交
886 887
}

888
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
889
	&cpufreq_freq_attr_scaling_available_freqs,
890
	&freqdomain_cpus,
891
	NULL,	/* this is a placeholder for cpb, do not remove */
L
Linus Torvalds 已提交
892 893 894 895
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
896
	.verify		= cpufreq_generic_frequency_table_verify,
897
	.target_index	= acpi_cpufreq_target,
898 899 900 901 902 903
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
904
	.set_boost      = _store_boost,
L
Linus Torvalds 已提交
905 906
};

907 908 909 910 911 912 913 914
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

915 916
		acpi_cpufreq_driver.boost_supported = true;
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
917 918

		cpu_notifier_register_begin();
919 920

		/* Force all MSRs to the same value */
921 922
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
923

924
		__register_cpu_notifier(&boost_nb);
925

926
		cpu_notifier_register_done();
927
	}
928 929
}

930
static void acpi_cpufreq_boost_exit(void)
931 932 933 934 935 936 937 938 939
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

940
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
941
{
942 943
	int ret;

944 945 946
	if (acpi_disabled)
		return -ENODEV;

947 948
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
949
		return -EEXIST;
950

951
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
952

953 954 955
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
956

957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
	if (check_amd_hwpstate_cpu(0)) {
		struct freq_attr **iter;

		pr_debug("adding sysfs entry for cpb\n");

		for (iter = acpi_cpufreq_attr; *iter != NULL; iter++)
			;

		/* make sure there is a terminator behind it */
		if (iter[1] == NULL)
			*iter = &cpb;
	}
#endif
977
	acpi_cpufreq_boost_init();
978

979
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
980
	if (ret) {
981
		free_acpi_perf_data();
982 983
		acpi_cpufreq_boost_exit();
	}
984
	return ret;
L
Linus Torvalds 已提交
985 986
}

987
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
988
{
989
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
990

991 992
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
993 994
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

995
	free_acpi_perf_data();
L
Linus Torvalds 已提交
996 997
}

998
module_param(acpi_pstate_strict, uint, 0644);
999
MODULE_PARM_DESC(acpi_pstate_strict,
1000 1001
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
1002 1003 1004 1005

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

1006 1007 1008 1009 1010 1011 1012
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1013 1014 1015 1016 1017 1018 1019
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1020
MODULE_ALIAS("acpi");