acpi-cpufreq.c 24.2 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
71
	unsigned int acpi_perf_cpu;
72
	cpumask_var_t freqdomain_cpus;
L
Linus Torvalds 已提交
73 74
};

75
/* acpi_perf_data is a pointer to percpu data. */
76
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
77

78 79 80 81 82
static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
{
	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
}

L
Linus Torvalds 已提交
83 84
static struct cpufreq_driver acpi_cpufreq_driver;

85
static unsigned int acpi_pstate_strict;
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

138
static int set_boost(int val)
139 140 141 142 143 144
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

145
	return 0;
146 147
}

148 149
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
150
	struct acpi_cpufreq_data *data = policy->driver_data;
151

152 153 154
	if (unlikely(!data))
		return -ENODEV;

155 156 157 158 159
	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

160
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
161 162
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
163 164
{
	int ret;
165
	unsigned int val = 0;
166

167
	if (!acpi_cpufreq_driver.set_boost)
168 169
		return -EINVAL;

170 171
	ret = kstrtouint(buf, 10, &val);
	if (ret || val > 1)
172 173
		return -EINVAL;

174
	set_boost(val);
175 176 177 178

	return count;
}

179 180
static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
181
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
182 183
}

184
cpufreq_freq_attr_rw(cpb);
185 186
#endif

187 188
static int check_est_cpu(unsigned int cpuid)
{
189
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
190

191
	return cpu_has(cpu, X86_FEATURE_EST);
192 193
}

194 195 196 197 198 199 200
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

201
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
202
{
203 204
	struct acpi_processor_performance *perf;
	int i;
205

206
	perf = to_perf_data(data);
207

208
	for (i = 0; i < perf->state_count; i++) {
209 210 211 212 213 214
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

215 216
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
217
	struct cpufreq_frequency_table *pos;
218
	struct acpi_processor_performance *perf;
219

220 221 222 223 224
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

225
	perf = to_perf_data(data);
226

227 228 229
	cpufreq_for_each_entry(pos, data->freq_table)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
230 231 232 233 234 235
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
236
	case SYSTEM_INTEL_MSR_CAPABLE:
237
	case SYSTEM_AMD_MSR_CAPABLE:
238
		return extract_msr(val, data);
239
	case SYSTEM_IO_CAPABLE:
240
		return extract_io(val, data);
241
	default:
242 243 244 245 246 247 248 249
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

250 251 252 253 254 255
struct io_addr {
	u16 port;
	u8 bit_width;
};

struct drv_cmd {
256
	unsigned int type;
257
	const struct cpumask *mask;
258 259 260 261
	union {
		struct msr_addr msr;
		struct io_addr io;
	} addr;
262 263 264
	u32 val;
};

265 266
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
267
{
268
	struct drv_cmd *cmd = _cmd;
269 270 271
	u32 h;

	switch (cmd->type) {
272
	case SYSTEM_INTEL_MSR_CAPABLE:
273
	case SYSTEM_AMD_MSR_CAPABLE:
274 275
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
276
	case SYSTEM_IO_CAPABLE:
277 278 279
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
280
		break;
281
	default:
282 283
		break;
	}
284
}
L
Linus Torvalds 已提交
285

286 287
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
288
{
289
	struct drv_cmd *cmd = _cmd;
290
	u32 lo, hi;
291 292

	switch (cmd->type) {
293
	case SYSTEM_INTEL_MSR_CAPABLE:
294 295 296
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
297
		break;
298 299 300
	case SYSTEM_AMD_MSR_CAPABLE:
		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
		break;
301
	case SYSTEM_IO_CAPABLE:
302 303 304
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
305
		break;
306
	default:
307 308
		break;
	}
309
}
L
Linus Torvalds 已提交
310

311
static void drv_read(struct drv_cmd *cmd)
312
{
313
	int err;
314 315
	cmd->val = 0;

316 317
	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
318 319 320 321
}

static void drv_write(struct drv_cmd *cmd)
{
322 323 324 325 326
	int this_cpu;

	this_cpu = get_cpu();
	if (cpumask_test_cpu(this_cpu, cmd->mask))
		do_drv_write(cmd);
327
	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
328
	put_cpu();
329
}
L
Linus Torvalds 已提交
330

331 332
static u32
get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
333
{
334 335
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
336

337
	if (unlikely(cpumask_empty(mask)))
338
		return 0;
L
Linus Torvalds 已提交
339

340
	switch (data->cpu_feature) {
341 342
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
343
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
344
		break;
345 346
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
347
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
348
		break;
349 350
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
351
		perf = to_perf_data(data);
352 353 354 355 356 357 358
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

359
	cmd.mask = mask;
360
	drv_read(&cmd);
L
Linus Torvalds 已提交
361

362
	pr_debug("get_cur_val = %u\n", cmd.val);
363 364 365

	return cmd.val;
}
L
Linus Torvalds 已提交
366

367 368
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
369 370
	struct acpi_cpufreq_data *data;
	struct cpufreq_policy *policy;
371
	unsigned int freq;
372
	unsigned int cached_freq;
373

374
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
375

376
	policy = cpufreq_cpu_get_raw(cpu);
377 378 379 380
	if (unlikely(!policy))
		return 0;

	data = policy->driver_data;
381
	if (unlikely(!data || !data->freq_table))
382
		return 0;
L
Linus Torvalds 已提交
383

384
	cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
385
	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
386 387 388 389 390 391 392 393
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

394
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
395

396
	return freq;
L
Linus Torvalds 已提交
397 398
}

399
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
400
				struct acpi_cpufreq_data *data)
401
{
402 403
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
404

405
	for (i = 0; i < 100; i++) {
406
		cur_freq = extract_freq(get_cur_val(mask, data), data);
407 408 409 410 411 412 413 414
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
415
			       unsigned int index)
L
Linus Torvalds 已提交
416
{
417
	struct acpi_cpufreq_data *data = policy->driver_data;
418 419
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
420
	unsigned int next_perf_state = 0; /* Index into perf table */
421
	int result = 0;
422

423
	if (unlikely(data == NULL || data->freq_table == NULL)) {
424 425
		return -ENODEV;
	}
L
Linus Torvalds 已提交
426

427
	perf = to_perf_data(data);
428
	next_perf_state = data->freq_table[index].driver_data;
429
	if (perf->state == next_perf_state) {
430
		if (unlikely(data->resume)) {
431
			pr_debug("Called after resume, resetting to P%d\n",
432
				next_perf_state);
433 434
			data->resume = 0;
		} else {
435
			pr_debug("Already at target state (P%d)\n",
436
				next_perf_state);
437
			goto out;
438
		}
439 440
	}

441 442 443 444
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
445
		cmd.val = (u32) perf->states[next_perf_state].control;
446
		break;
447 448 449 450 451
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
452 453 454 455 456 457 458
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
459 460
		result = -ENODEV;
		goto out;
461
	}
462

463
	/* cpufreq holds the hotplug lock, so we are safe from here on */
464
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
465
		cmd.mask = policy->cpus;
466
	else
467
		cmd.mask = cpumask_of(policy->cpu);
468

469
	drv_write(&cmd);
470

471
	if (acpi_pstate_strict) {
472 473
		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
					data)) {
474
			pr_debug("acpi_cpufreq_target failed (%d)\n",
475
				policy->cpu);
476
			result = -EAGAIN;
477 478 479
		}
	}

480 481
	if (!result)
		perf->state = next_perf_state;
482

483
out:
484
	return result;
L
Linus Torvalds 已提交
485 486 487
}

static unsigned long
488
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
489
{
490
	struct acpi_processor_performance *perf;
491

492
	perf = to_perf_data(data);
L
Linus Torvalds 已提交
493 494 495 496
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
497
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
498

499
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
500
			freq = freqn;
501
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
502
			if ((2 * cpu_khz) > (freqn + freq)) {
503
				perf->state = i;
504
				return freq;
L
Linus Torvalds 已提交
505 506
			}
		}
507
		perf->state = perf->state_count-1;
508
		return freqn;
509
	} else {
L
Linus Torvalds 已提交
510
		/* assume CPU is at P0... */
511 512 513
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
514 515
}

516 517 518 519 520 521 522 523 524 525 526
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
545
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

565 566 567 568 569 570 571 572
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
573
static int __init acpi_cpufreq_early_init(void)
574
{
575
	unsigned int i;
576
	pr_debug("acpi_cpufreq_early_init\n");
577

578 579
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
580
		pr_debug("Memory allocation error for acpi_perf_data.\n");
581
		return -ENOMEM;
582
	}
583
	for_each_possible_cpu(i) {
584
		if (!zalloc_cpumask_var_node(
585 586
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
587 588 589 590 591 592

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
593 594

	/* Do initialization in ACPI core */
595 596
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
597 598
}

599
#ifdef CONFIG_SMP
600 601 602 603 604 605 606 607
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

608
static int sw_any_bug_found(const struct dmi_system_id *d)
609 610 611 612 613
{
	bios_with_sw_any_bug = 1;
	return 0;
}

614
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
615 616 617 618 619 620 621 622 623 624 625
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
626 627 628

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
629 630
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
631 632
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
633
	 * Both Processor Cores to Lock Up. */
634 635 636
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
637 638 639 640 641
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
642
			return -ENODEV;
643
		    }
644 645 646
		}
	return 0;
}
647
#endif
648

649
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
650
{
651 652 653 654 655
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
656
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
657
	struct acpi_processor_performance *perf;
658 659 660
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
661

662
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
663

664
#ifdef CONFIG_SMP
665 666 667 668 669
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
670 671
#endif

672
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
673
	if (!data)
674
		return -ENOMEM;
L
Linus Torvalds 已提交
675

676 677 678 679 680
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

681
	perf = per_cpu_ptr(acpi_perf_data, cpu);
682
	data->acpi_perf_cpu = cpu;
683
	policy->driver_data = data;
L
Linus Torvalds 已提交
684

685
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
686
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
687

688
	result = acpi_processor_register_performance(perf, cpu);
L
Linus Torvalds 已提交
689
	if (result)
690
		goto err_free_mask;
L
Linus Torvalds 已提交
691

692
	policy->shared_type = perf->shared_type;
693

694
	/*
695
	 * Will let policy->cpus know about dependency only when software
696 697 698
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
699
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
700
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
701
	}
702
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
703 704 705

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
706
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
707
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
708
		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
709
	}
710 711 712 713

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
714 715
		cpumask_copy(data->freqdomain_cpus,
			     topology_sibling_cpumask(cpu));
716 717 718
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
719
#endif
720

L
Linus Torvalds 已提交
721
	/* capability check */
722
	if (perf->state_count <= 1) {
723
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
724 725 726
		result = -ENODEV;
		goto err_unreg;
	}
727

728 729 730 731 732 733
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
734
	case ACPI_ADR_SPACE_SYSTEM_IO:
735 736 737 738 739 740
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
741
		pr_debug("SYSTEM IO addr space\n");
742 743
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
744
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
745
		pr_debug("HARDWARE addr space\n");
746 747 748
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
			break;
749
		}
750 751 752 753 754 755
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
			break;
		}
		result = -ENODEV;
		goto err_unreg;
756
	default:
757
		pr_debug("Unknown addr space %d\n",
758
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
759 760 761 762
		result = -ENODEV;
		goto err_unreg;
	}

763
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
764
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
765 766 767 768 769 770 771
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
772
	for (i = 0; i < perf->state_count; i++) {
773 774 775 776
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
777 778
	}

779 780 781 782
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
783 784
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
785 786
	}

L
Linus Torvalds 已提交
787
	/* table init */
788 789
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
790
		    data->freq_table[valid_states-1].frequency / 1000)
791 792
			continue;

793
		data->freq_table[valid_states].driver_data = i;
794
		data->freq_table[valid_states].frequency =
795
		    perf->states[i].core_frequency * 1000;
796
		valid_states++;
L
Linus Torvalds 已提交
797
	}
798
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
799
	perf->state = 0;
L
Linus Torvalds 已提交
800

801
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
802
	if (result)
L
Linus Torvalds 已提交
803 804
		goto err_freqfree;

805 806 807
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

808
	switch (perf->control_register.space_id) {
809
	case ACPI_ADR_SPACE_SYSTEM_IO:
810 811 812 813 814 815
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
816 817
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
818
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
819
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
820
		break;
821
	default:
822 823 824
		break;
	}

L
Linus Torvalds 已提交
825 826 827
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

828
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
829
	for (i = 0; i < perf->state_count; i++)
830
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
831
			(i == perf->state ? '*' : ' '), i,
832 833 834
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
835

836 837 838 839 840
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
841

842
	return result;
L
Linus Torvalds 已提交
843

844
err_freqfree:
L
Linus Torvalds 已提交
845
	kfree(data->freq_table);
846
err_unreg:
847
	acpi_processor_unregister_performance(cpu);
848 849
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
850
err_free:
L
Linus Torvalds 已提交
851
	kfree(data);
852
	policy->driver_data = NULL;
L
Linus Torvalds 已提交
853

854
	return result;
L
Linus Torvalds 已提交
855 856
}

857
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
858
{
859
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
860

861
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
862 863

	if (data) {
864
		policy->driver_data = NULL;
865
		acpi_processor_unregister_performance(data->acpi_perf_cpu);
866
		free_cpumask_var(data->freqdomain_cpus);
867
		kfree(data->freq_table);
L
Linus Torvalds 已提交
868 869 870
		kfree(data);
	}

871
	return 0;
L
Linus Torvalds 已提交
872 873
}

874
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
875
{
876
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
877

878
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
879 880 881

	data->resume = 1;

882
	return 0;
L
Linus Torvalds 已提交
883 884
}

885
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
886
	&cpufreq_freq_attr_scaling_available_freqs,
887
	&freqdomain_cpus,
888 889 890
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	&cpb,
#endif
L
Linus Torvalds 已提交
891 892 893 894
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
895
	.verify		= cpufreq_generic_frequency_table_verify,
896
	.target_index	= acpi_cpufreq_target,
897 898 899 900 901 902
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
L
Linus Torvalds 已提交
903 904
};

905 906 907 908 909 910 911 912
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

913
		acpi_cpufreq_driver.set_boost = set_boost;
914
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
915 916

		cpu_notifier_register_begin();
917 918

		/* Force all MSRs to the same value */
919 920
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
921

922
		__register_cpu_notifier(&boost_nb);
923

924
		cpu_notifier_register_done();
925
	}
926 927
}

928
static void acpi_cpufreq_boost_exit(void)
929 930 931 932 933 934 935 936 937
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

938
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
939
{
940 941
	int ret;

942 943 944
	if (acpi_disabled)
		return -ENODEV;

945 946
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
947
		return -EEXIST;
948

949
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
950

951 952 953
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
954

955 956 957 958 959 960 961
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
962 963
	if (!check_amd_hwpstate_cpu(0)) {
		struct freq_attr **attr;
964

965
		pr_debug("CPB unsupported, do not expose it\n");
966

967 968 969 970 971
		for (attr = acpi_cpufreq_attr; *attr; attr++)
			if (*attr == &cpb) {
				*attr = NULL;
				break;
			}
972 973
	}
#endif
974
	acpi_cpufreq_boost_init();
975

976
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
977
	if (ret) {
978
		free_acpi_perf_data();
979 980
		acpi_cpufreq_boost_exit();
	}
981
	return ret;
L
Linus Torvalds 已提交
982 983
}

984
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
985
{
986
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
987

988 989
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
990 991
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

992
	free_acpi_perf_data();
L
Linus Torvalds 已提交
993 994
}

995
module_param(acpi_pstate_strict, uint, 0644);
996
MODULE_PARM_DESC(acpi_pstate_strict,
997 998
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
999 1000 1001 1002

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

1003 1004 1005 1006 1007 1008 1009
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1010 1011 1012 1013 1014 1015 1016
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1017
MODULE_ALIAS("acpi");