acpi-cpufreq.c 24.4 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70 71
	struct acpi_processor_performance *acpi_data;
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
72
	unsigned int acpi_perf_cpu;
73
	cpumask_var_t freqdomain_cpus;
L
Linus Torvalds 已提交
74 75
};

76
/* acpi_perf_data is a pointer to percpu data. */
77
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
78 79 80

static struct cpufreq_driver acpi_cpufreq_driver;

81
static unsigned int acpi_pstate_strict;
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

134
static int _store_boost(int val)
135 136 137 138 139 140
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

141
	return 0;
142 143
}

144 145
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
146
	struct acpi_cpufreq_data *data = policy->driver_data;
147 148 149 150 151 152

	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

153
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
static ssize_t store_boost(const char *buf, size_t count)
{
	int ret;
	unsigned long val = 0;

	if (!acpi_cpufreq_driver.boost_supported)
		return -EINVAL;

	ret = kstrtoul(buf, 10, &val);
	if (ret || (val > 1))
		return -EINVAL;

	_store_boost((int) val);

	return count;
}

171 172 173
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
{
174
	return store_boost(buf, count);
175 176 177 178
}

static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
179
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
180 181
}

182
cpufreq_freq_attr_rw(cpb);
183 184
#endif

185 186
static int check_est_cpu(unsigned int cpuid)
{
187
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
188

189
	return cpu_has(cpu, X86_FEATURE_EST);
190 191
}

192 193 194 195 196 197 198
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

199
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
200
{
201 202
	struct acpi_processor_performance *perf;
	int i;
203 204 205

	perf = data->acpi_data;

206
	for (i = 0; i < perf->state_count; i++) {
207 208 209 210 211 212
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

213 214
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
215
	struct cpufreq_frequency_table *pos;
216
	struct acpi_processor_performance *perf;
217

218 219 220 221 222
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

223 224
	perf = data->acpi_data;

225 226 227
	cpufreq_for_each_entry(pos, data->freq_table)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
228 229 230 231 232 233
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
234
	case SYSTEM_INTEL_MSR_CAPABLE:
235
	case SYSTEM_AMD_MSR_CAPABLE:
236
		return extract_msr(val, data);
237
	case SYSTEM_IO_CAPABLE:
238
		return extract_io(val, data);
239
	default:
240 241 242 243 244 245 246 247
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

248 249 250 251 252 253
struct io_addr {
	u16 port;
	u8 bit_width;
};

struct drv_cmd {
254
	unsigned int type;
255
	const struct cpumask *mask;
256 257 258 259
	union {
		struct msr_addr msr;
		struct io_addr io;
	} addr;
260 261 262
	u32 val;
};

263 264
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
265
{
266
	struct drv_cmd *cmd = _cmd;
267 268 269
	u32 h;

	switch (cmd->type) {
270
	case SYSTEM_INTEL_MSR_CAPABLE:
271
	case SYSTEM_AMD_MSR_CAPABLE:
272 273
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
274
	case SYSTEM_IO_CAPABLE:
275 276 277
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
278
		break;
279
	default:
280 281
		break;
	}
282
}
L
Linus Torvalds 已提交
283

284 285
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
286
{
287
	struct drv_cmd *cmd = _cmd;
288
	u32 lo, hi;
289 290

	switch (cmd->type) {
291
	case SYSTEM_INTEL_MSR_CAPABLE:
292 293 294
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
295
		break;
296 297 298
	case SYSTEM_AMD_MSR_CAPABLE:
		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
		break;
299
	case SYSTEM_IO_CAPABLE:
300 301 302
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
303
		break;
304
	default:
305 306
		break;
	}
307
}
L
Linus Torvalds 已提交
308

309
static void drv_read(struct drv_cmd *cmd)
310
{
311
	int err;
312 313
	cmd->val = 0;

314 315
	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
316 317 318 319
}

static void drv_write(struct drv_cmd *cmd)
{
320 321 322 323 324
	int this_cpu;

	this_cpu = get_cpu();
	if (cpumask_test_cpu(this_cpu, cmd->mask))
		do_drv_write(cmd);
325
	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
326
	put_cpu();
327
}
L
Linus Torvalds 已提交
328

329 330
static u32
get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
331
{
332 333
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
334

335
	if (unlikely(cpumask_empty(mask)))
336
		return 0;
L
Linus Torvalds 已提交
337

338
	switch (data->cpu_feature) {
339 340
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
341
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
342
		break;
343 344
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
345
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
346
		break;
347 348
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
349
		perf = data->acpi_data;
350 351 352 353 354 355 356
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

357
	cmd.mask = mask;
358
	drv_read(&cmd);
L
Linus Torvalds 已提交
359

360
	pr_debug("get_cur_val = %u\n", cmd.val);
361 362 363

	return cmd.val;
}
L
Linus Torvalds 已提交
364

365 366
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
367 368
	struct acpi_cpufreq_data *data;
	struct cpufreq_policy *policy;
369
	unsigned int freq;
370
	unsigned int cached_freq;
371

372
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
373

374 375 376 377 378 379 380
	policy = cpufreq_cpu_get(cpu);
	if (unlikely(!policy))
		return 0;

	data = policy->driver_data;
	cpufreq_cpu_put(policy);
	if (unlikely(!data || !data->acpi_data || !data->freq_table))
381
		return 0;
L
Linus Torvalds 已提交
382

383
	cached_freq = data->freq_table[data->acpi_data->state].frequency;
384
	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
385 386 387 388 389 390 391 392
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

393
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
394

395
	return freq;
L
Linus Torvalds 已提交
396 397
}

398
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
399
				struct acpi_cpufreq_data *data)
400
{
401 402
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
403

404
	for (i = 0; i < 100; i++) {
405
		cur_freq = extract_freq(get_cur_val(mask, data), data);
406 407 408 409 410 411 412 413
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
414
			       unsigned int index)
L
Linus Torvalds 已提交
415
{
416
	struct acpi_cpufreq_data *data = policy->driver_data;
417 418
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
419
	unsigned int next_perf_state = 0; /* Index into perf table */
420
	int result = 0;
421 422

	if (unlikely(data == NULL ||
423
	     data->acpi_data == NULL || data->freq_table == NULL)) {
424 425
		return -ENODEV;
	}
L
Linus Torvalds 已提交
426

427
	perf = data->acpi_data;
428
	next_perf_state = data->freq_table[index].driver_data;
429
	if (perf->state == next_perf_state) {
430
		if (unlikely(data->resume)) {
431
			pr_debug("Called after resume, resetting to P%d\n",
432
				next_perf_state);
433 434
			data->resume = 0;
		} else {
435
			pr_debug("Already at target state (P%d)\n",
436
				next_perf_state);
437
			goto out;
438
		}
439 440
	}

441 442 443 444
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
445
		cmd.val = (u32) perf->states[next_perf_state].control;
446
		break;
447 448 449 450 451
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
452 453 454 455 456 457 458
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
459 460
		result = -ENODEV;
		goto out;
461
	}
462

463
	/* cpufreq holds the hotplug lock, so we are safe from here on */
464
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
465
		cmd.mask = policy->cpus;
466
	else
467
		cmd.mask = cpumask_of(policy->cpu);
468

469
	drv_write(&cmd);
470

471
	if (acpi_pstate_strict) {
472 473
		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
					data)) {
474
			pr_debug("acpi_cpufreq_target failed (%d)\n",
475
				policy->cpu);
476
			result = -EAGAIN;
477 478 479
		}
	}

480 481
	if (!result)
		perf->state = next_perf_state;
482

483
out:
484
	return result;
L
Linus Torvalds 已提交
485 486 487
}

static unsigned long
488
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
489
{
490
	struct acpi_processor_performance *perf = data->acpi_data;
491

L
Linus Torvalds 已提交
492 493 494 495
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
496
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
497

498
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
499
			freq = freqn;
500
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
501
			if ((2 * cpu_khz) > (freqn + freq)) {
502
				perf->state = i;
503
				return freq;
L
Linus Torvalds 已提交
504 505
			}
		}
506
		perf->state = perf->state_count-1;
507
		return freqn;
508
	} else {
L
Linus Torvalds 已提交
509
		/* assume CPU is at P0... */
510 511 512
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
513 514
}

515 516 517 518 519 520 521 522 523 524 525
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
544
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

564 565 566 567 568 569 570 571
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
572
static int __init acpi_cpufreq_early_init(void)
573
{
574
	unsigned int i;
575
	pr_debug("acpi_cpufreq_early_init\n");
576

577 578
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
579
		pr_debug("Memory allocation error for acpi_perf_data.\n");
580
		return -ENOMEM;
581
	}
582
	for_each_possible_cpu(i) {
583
		if (!zalloc_cpumask_var_node(
584 585
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
586 587 588 589 590 591

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
592 593

	/* Do initialization in ACPI core */
594 595
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
596 597
}

598
#ifdef CONFIG_SMP
599 600 601 602 603 604 605 606
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

607
static int sw_any_bug_found(const struct dmi_system_id *d)
608 609 610 611 612
{
	bios_with_sw_any_bug = 1;
	return 0;
}

613
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
614 615 616 617 618 619 620 621 622 623 624
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
625 626 627

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
628 629
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
630 631
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
632
	 * Both Processor Cores to Lock Up. */
633 634 635
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
636 637 638 639 640
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
641
			return -ENODEV;
642
		    }
643 644 645
		}
	return 0;
}
646
#endif
647

648
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
649
{
650 651 652 653 654
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
655
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
656
	struct acpi_processor_performance *perf;
657 658 659
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
660

661
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
662

663
#ifdef CONFIG_SMP
664 665 666 667 668
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
669 670
#endif

671
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
672
	if (!data)
673
		return -ENOMEM;
L
Linus Torvalds 已提交
674

675 676 677 678 679
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

680
	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
681
	data->acpi_perf_cpu = cpu;
682
	policy->driver_data = data;
L
Linus Torvalds 已提交
683

684
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
685
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
686

687
	result = acpi_processor_register_performance(data->acpi_data, cpu);
L
Linus Torvalds 已提交
688
	if (result)
689
		goto err_free_mask;
L
Linus Torvalds 已提交
690

691 692
	perf = data->acpi_data;
	policy->shared_type = perf->shared_type;
693

694
	/*
695
	 * Will let policy->cpus know about dependency only when software
696 697 698
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
699
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
700
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
701
	}
702
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
703 704 705

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
706
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
707
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
708
		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
709
	}
710 711 712 713

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
714 715
		cpumask_copy(data->freqdomain_cpus,
			     topology_sibling_cpumask(cpu));
716 717 718
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
719
#endif
720

L
Linus Torvalds 已提交
721
	/* capability check */
722
	if (perf->state_count <= 1) {
723
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
724 725 726
		result = -ENODEV;
		goto err_unreg;
	}
727

728 729 730 731 732 733
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
734
	case ACPI_ADR_SPACE_SYSTEM_IO:
735 736 737 738 739 740
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
741
		pr_debug("SYSTEM IO addr space\n");
742 743
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
744
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
745
		pr_debug("HARDWARE addr space\n");
746 747 748
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
			break;
749
		}
750 751 752 753 754 755
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
			break;
		}
		result = -ENODEV;
		goto err_unreg;
756
	default:
757
		pr_debug("Unknown addr space %d\n",
758
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
759 760 761 762
		result = -ENODEV;
		goto err_unreg;
	}

763
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
764
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
765 766 767 768 769 770 771
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
772
	for (i = 0; i < perf->state_count; i++) {
773 774 775 776
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
777 778
	}

779 780 781 782
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
783 784
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
785 786
	}

L
Linus Torvalds 已提交
787
	/* table init */
788 789
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
790
		    data->freq_table[valid_states-1].frequency / 1000)
791 792
			continue;

793
		data->freq_table[valid_states].driver_data = i;
794
		data->freq_table[valid_states].frequency =
795
		    perf->states[i].core_frequency * 1000;
796
		valid_states++;
L
Linus Torvalds 已提交
797
	}
798
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
799
	perf->state = 0;
L
Linus Torvalds 已提交
800

801
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
802
	if (result)
L
Linus Torvalds 已提交
803 804
		goto err_freqfree;

805 806 807
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

808
	switch (perf->control_register.space_id) {
809
	case ACPI_ADR_SPACE_SYSTEM_IO:
810 811 812 813 814 815
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
816 817
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
818
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
819
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
820
		break;
821
	default:
822 823 824
		break;
	}

L
Linus Torvalds 已提交
825 826 827
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

828
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
829
	for (i = 0; i < perf->state_count; i++)
830
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
831
			(i == perf->state ? '*' : ' '), i,
832 833 834
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
835

836 837 838 839 840
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
841

842
	return result;
L
Linus Torvalds 已提交
843

844
err_freqfree:
L
Linus Torvalds 已提交
845
	kfree(data->freq_table);
846
err_unreg:
847
	acpi_processor_unregister_performance(perf, cpu);
848 849
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
850
err_free:
L
Linus Torvalds 已提交
851
	kfree(data);
852
	policy->driver_data = NULL;
L
Linus Torvalds 已提交
853

854
	return result;
L
Linus Torvalds 已提交
855 856
}

857
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
858
{
859
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
860

861
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
862 863

	if (data) {
864
		policy->driver_data = NULL;
865
		acpi_processor_unregister_performance(data->acpi_data,
866
						      data->acpi_perf_cpu);
867
		free_cpumask_var(data->freqdomain_cpus);
868
		kfree(data->freq_table);
L
Linus Torvalds 已提交
869 870 871
		kfree(data);
	}

872
	return 0;
L
Linus Torvalds 已提交
873 874
}

875
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
876
{
877
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
878

879
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
880 881 882

	data->resume = 1;

883
	return 0;
L
Linus Torvalds 已提交
884 885
}

886
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
887
	&cpufreq_freq_attr_scaling_available_freqs,
888
	&freqdomain_cpus,
889
	NULL,	/* this is a placeholder for cpb, do not remove */
L
Linus Torvalds 已提交
890 891 892 893
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
894
	.verify		= cpufreq_generic_frequency_table_verify,
895
	.target_index	= acpi_cpufreq_target,
896 897 898 899 900 901
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
902
	.set_boost      = _store_boost,
L
Linus Torvalds 已提交
903 904
};

905 906 907 908 909 910 911 912
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

913 914
		acpi_cpufreq_driver.boost_supported = true;
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
915 916

		cpu_notifier_register_begin();
917 918

		/* Force all MSRs to the same value */
919 920
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
921

922
		__register_cpu_notifier(&boost_nb);
923

924
		cpu_notifier_register_done();
925
	}
926 927
}

928
static void acpi_cpufreq_boost_exit(void)
929 930 931 932 933 934 935 936 937
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

938
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
939
{
940 941
	int ret;

942 943 944
	if (acpi_disabled)
		return -ENODEV;

945 946
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
947
		return -EEXIST;
948

949
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
950

951 952 953
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
954

955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
	if (check_amd_hwpstate_cpu(0)) {
		struct freq_attr **iter;

		pr_debug("adding sysfs entry for cpb\n");

		for (iter = acpi_cpufreq_attr; *iter != NULL; iter++)
			;

		/* make sure there is a terminator behind it */
		if (iter[1] == NULL)
			*iter = &cpb;
	}
#endif
975
	acpi_cpufreq_boost_init();
976

977
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
978
	if (ret) {
979
		free_acpi_perf_data();
980 981
		acpi_cpufreq_boost_exit();
	}
982
	return ret;
L
Linus Torvalds 已提交
983 984
}

985
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
986
{
987
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
988

989 990
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
991 992
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

993
	free_acpi_perf_data();
L
Linus Torvalds 已提交
994 995
}

996
module_param(acpi_pstate_strict, uint, 0644);
997
MODULE_PARM_DESC(acpi_pstate_strict,
998 999
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
1000 1001 1002 1003

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

1004 1005 1006 1007 1008 1009 1010
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1011 1012 1013 1014 1015 1016 1017
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1018
MODULE_ALIAS("acpi");