acpi-cpufreq.c 24.2 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
71
	unsigned int acpi_perf_cpu;
72
	cpumask_var_t freqdomain_cpus;
L
Linus Torvalds 已提交
73 74
};

75
/* acpi_perf_data is a pointer to percpu data. */
76
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
77

78 79 80 81 82
static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
{
	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
}

L
Linus Torvalds 已提交
83 84
static struct cpufreq_driver acpi_cpufreq_driver;

85
static unsigned int acpi_pstate_strict;
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

138
static int set_boost(int val)
139 140 141 142 143 144
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

145
	return 0;
146 147
}

148 149
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
150
	struct acpi_cpufreq_data *data = policy->driver_data;
151

152 153 154
	if (unlikely(!data))
		return -ENODEV;

155 156 157 158 159
	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

160
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
161 162
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
163 164
{
	int ret;
165
	unsigned int val = 0;
166

167
	if (!acpi_cpufreq_driver.set_boost)
168 169
		return -EINVAL;

170 171
	ret = kstrtouint(buf, 10, &val);
	if (ret || val > 1)
172 173
		return -EINVAL;

174
	set_boost(val);
175 176 177 178

	return count;
}

179 180
static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
181
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
182 183
}

184
cpufreq_freq_attr_rw(cpb);
185 186
#endif

187 188
static int check_est_cpu(unsigned int cpuid)
{
189
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
190

191
	return cpu_has(cpu, X86_FEATURE_EST);
192 193
}

194 195 196 197 198 199 200
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

201
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
202
{
203 204
	struct acpi_processor_performance *perf;
	int i;
205

206
	perf = to_perf_data(data);
207

208
	for (i = 0; i < perf->state_count; i++) {
209 210 211 212 213 214
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

215 216
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
217
	struct cpufreq_frequency_table *pos;
218
	struct acpi_processor_performance *perf;
219

220 221 222 223 224
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

225
	perf = to_perf_data(data);
226

227 228 229
	cpufreq_for_each_entry(pos, data->freq_table)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
230 231 232 233 234 235
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
236
	case SYSTEM_INTEL_MSR_CAPABLE:
237
	case SYSTEM_AMD_MSR_CAPABLE:
238
		return extract_msr(val, data);
239
	case SYSTEM_IO_CAPABLE:
240
		return extract_io(val, data);
241
	default:
242 243 244 245 246 247 248 249
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

250 251 252 253 254 255
struct io_addr {
	u16 port;
	u8 bit_width;
};

struct drv_cmd {
256
	unsigned int type;
257
	const struct cpumask *mask;
258 259 260 261
	union {
		struct msr_addr msr;
		struct io_addr io;
	} addr;
262 263 264
	u32 val;
};

265 266
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
267
{
268
	struct drv_cmd *cmd = _cmd;
269 270 271
	u32 h;

	switch (cmd->type) {
272
	case SYSTEM_INTEL_MSR_CAPABLE:
273
	case SYSTEM_AMD_MSR_CAPABLE:
274 275
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
276
	case SYSTEM_IO_CAPABLE:
277 278 279
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
280
		break;
281
	default:
282 283
		break;
	}
284
}
L
Linus Torvalds 已提交
285

286 287
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
288
{
289
	struct drv_cmd *cmd = _cmd;
290
	u32 lo, hi;
291 292

	switch (cmd->type) {
293
	case SYSTEM_INTEL_MSR_CAPABLE:
294 295 296
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
297
		break;
298 299 300
	case SYSTEM_AMD_MSR_CAPABLE:
		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
		break;
301
	case SYSTEM_IO_CAPABLE:
302 303 304
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
305
		break;
306
	default:
307 308
		break;
	}
309
}
L
Linus Torvalds 已提交
310

311
static void drv_read(struct drv_cmd *cmd)
312
{
313
	int err;
314 315
	cmd->val = 0;

316 317
	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
318 319 320 321
}

static void drv_write(struct drv_cmd *cmd)
{
322 323 324 325 326
	int this_cpu;

	this_cpu = get_cpu();
	if (cpumask_test_cpu(this_cpu, cmd->mask))
		do_drv_write(cmd);
327
	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
328
	put_cpu();
329
}
L
Linus Torvalds 已提交
330

331 332
static u32
get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
333
{
334 335
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
336

337
	if (unlikely(cpumask_empty(mask)))
338
		return 0;
L
Linus Torvalds 已提交
339

340
	switch (data->cpu_feature) {
341 342
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
343
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
344
		break;
345 346
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
347
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
348
		break;
349 350
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
351
		perf = to_perf_data(data);
352 353 354 355 356 357 358
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

359
	cmd.mask = mask;
360
	drv_read(&cmd);
L
Linus Torvalds 已提交
361

362
	pr_debug("get_cur_val = %u\n", cmd.val);
363 364 365

	return cmd.val;
}
L
Linus Torvalds 已提交
366

367 368
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
369 370
	struct acpi_cpufreq_data *data;
	struct cpufreq_policy *policy;
371
	unsigned int freq;
372
	unsigned int cached_freq;
373

374
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
375

376
	policy = cpufreq_cpu_get_raw(cpu);
377 378 379 380
	if (unlikely(!policy))
		return 0;

	data = policy->driver_data;
381
	if (unlikely(!data || !data->freq_table))
382
		return 0;
L
Linus Torvalds 已提交
383

384
	cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
385
	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
386 387 388 389 390 391 392 393
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

394
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
395

396
	return freq;
L
Linus Torvalds 已提交
397 398
}

399
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
400
				struct acpi_cpufreq_data *data)
401
{
402 403
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
404

405
	for (i = 0; i < 100; i++) {
406
		cur_freq = extract_freq(get_cur_val(mask, data), data);
407 408 409 410 411 412 413 414
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
415
			       unsigned int index)
L
Linus Torvalds 已提交
416
{
417
	struct acpi_cpufreq_data *data = policy->driver_data;
418 419
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
420
	unsigned int next_perf_state = 0; /* Index into perf table */
421
	int result = 0;
422

423
	if (unlikely(data == NULL || data->freq_table == NULL)) {
424 425
		return -ENODEV;
	}
L
Linus Torvalds 已提交
426

427
	perf = to_perf_data(data);
428
	next_perf_state = data->freq_table[index].driver_data;
429
	if (perf->state == next_perf_state) {
430
		if (unlikely(data->resume)) {
431
			pr_debug("Called after resume, resetting to P%d\n",
432
				next_perf_state);
433 434
			data->resume = 0;
		} else {
435
			pr_debug("Already at target state (P%d)\n",
436
				next_perf_state);
437
			return 0;
438
		}
439 440
	}

441 442 443 444
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
445
		cmd.val = (u32) perf->states[next_perf_state].control;
446
		break;
447 448 449 450 451
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
452 453 454 455 456 457 458
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
459
		return -ENODEV;
460
	}
461

462
	/* cpufreq holds the hotplug lock, so we are safe from here on */
463
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
464
		cmd.mask = policy->cpus;
465
	else
466
		cmd.mask = cpumask_of(policy->cpu);
467

468
	drv_write(&cmd);
469

470
	if (acpi_pstate_strict) {
471 472
		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
					data)) {
473
			pr_debug("acpi_cpufreq_target failed (%d)\n",
474
				policy->cpu);
475
			result = -EAGAIN;
476 477 478
		}
	}

479 480
	if (!result)
		perf->state = next_perf_state;
481 482

	return result;
L
Linus Torvalds 已提交
483 484 485
}

static unsigned long
486
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
487
{
488
	struct acpi_processor_performance *perf;
489

490
	perf = to_perf_data(data);
L
Linus Torvalds 已提交
491 492 493 494
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
495
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
496

497
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
498
			freq = freqn;
499
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
500
			if ((2 * cpu_khz) > (freqn + freq)) {
501
				perf->state = i;
502
				return freq;
L
Linus Torvalds 已提交
503 504
			}
		}
505
		perf->state = perf->state_count-1;
506
		return freqn;
507
	} else {
L
Linus Torvalds 已提交
508
		/* assume CPU is at P0... */
509 510 511
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
512 513
}

514 515 516 517 518 519 520 521 522 523 524
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
543
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

563 564 565 566 567 568 569 570
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
571
static int __init acpi_cpufreq_early_init(void)
572
{
573
	unsigned int i;
574
	pr_debug("acpi_cpufreq_early_init\n");
575

576 577
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
578
		pr_debug("Memory allocation error for acpi_perf_data.\n");
579
		return -ENOMEM;
580
	}
581
	for_each_possible_cpu(i) {
582
		if (!zalloc_cpumask_var_node(
583 584
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
585 586 587 588 589 590

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
591 592

	/* Do initialization in ACPI core */
593 594
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
595 596
}

597
#ifdef CONFIG_SMP
598 599 600 601 602 603 604 605
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

606
static int sw_any_bug_found(const struct dmi_system_id *d)
607 608 609 610 611
{
	bios_with_sw_any_bug = 1;
	return 0;
}

612
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
613 614 615 616 617 618 619 620 621 622 623
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
624 625 626

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
627 628
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
629 630
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
631
	 * Both Processor Cores to Lock Up. */
632 633 634
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
635 636 637 638 639
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
640
			return -ENODEV;
641
		    }
642 643 644
		}
	return 0;
}
645
#endif
646

647
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
648
{
649 650 651 652 653
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
654
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
655
	struct acpi_processor_performance *perf;
656 657 658
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
659

660
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
661

662
#ifdef CONFIG_SMP
663 664 665 666 667
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
668 669
#endif

670
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
671
	if (!data)
672
		return -ENOMEM;
L
Linus Torvalds 已提交
673

674 675 676 677 678
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

679
	perf = per_cpu_ptr(acpi_perf_data, cpu);
680
	data->acpi_perf_cpu = cpu;
681
	policy->driver_data = data;
L
Linus Torvalds 已提交
682

683
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
684
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
685

686
	result = acpi_processor_register_performance(perf, cpu);
L
Linus Torvalds 已提交
687
	if (result)
688
		goto err_free_mask;
L
Linus Torvalds 已提交
689

690
	policy->shared_type = perf->shared_type;
691

692
	/*
693
	 * Will let policy->cpus know about dependency only when software
694 695 696
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
697
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
698
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
699
	}
700
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
701 702 703

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
704
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
705
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
706
		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
707
	}
708 709 710 711

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
712 713
		cpumask_copy(data->freqdomain_cpus,
			     topology_sibling_cpumask(cpu));
714 715 716
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
717
#endif
718

L
Linus Torvalds 已提交
719
	/* capability check */
720
	if (perf->state_count <= 1) {
721
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
722 723 724
		result = -ENODEV;
		goto err_unreg;
	}
725

726 727 728 729 730 731
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
732
	case ACPI_ADR_SPACE_SYSTEM_IO:
733 734 735 736 737 738
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
739
		pr_debug("SYSTEM IO addr space\n");
740 741
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
742
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
743
		pr_debug("HARDWARE addr space\n");
744 745 746
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
			break;
747
		}
748 749 750 751 752 753
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
			break;
		}
		result = -ENODEV;
		goto err_unreg;
754
	default:
755
		pr_debug("Unknown addr space %d\n",
756
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
757 758 759 760
		result = -ENODEV;
		goto err_unreg;
	}

761
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
762
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
763 764 765 766 767 768 769
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
770
	for (i = 0; i < perf->state_count; i++) {
771 772 773 774
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
775 776
	}

777 778 779 780
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
781 782
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
783 784
	}

L
Linus Torvalds 已提交
785
	/* table init */
786 787
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
788
		    data->freq_table[valid_states-1].frequency / 1000)
789 790
			continue;

791
		data->freq_table[valid_states].driver_data = i;
792
		data->freq_table[valid_states].frequency =
793
		    perf->states[i].core_frequency * 1000;
794
		valid_states++;
L
Linus Torvalds 已提交
795
	}
796
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
797
	perf->state = 0;
L
Linus Torvalds 已提交
798

799
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
800
	if (result)
L
Linus Torvalds 已提交
801 802
		goto err_freqfree;

803 804 805
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

806
	switch (perf->control_register.space_id) {
807
	case ACPI_ADR_SPACE_SYSTEM_IO:
808 809 810 811 812 813
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
814 815
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
816
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
817
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
818
		break;
819
	default:
820 821 822
		break;
	}

L
Linus Torvalds 已提交
823 824 825
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

826
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
827
	for (i = 0; i < perf->state_count; i++)
828
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
829
			(i == perf->state ? '*' : ' '), i,
830 831 832
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
833

834 835 836 837 838
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
839

840
	return result;
L
Linus Torvalds 已提交
841

842
err_freqfree:
L
Linus Torvalds 已提交
843
	kfree(data->freq_table);
844
err_unreg:
845
	acpi_processor_unregister_performance(cpu);
846 847
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
848
err_free:
L
Linus Torvalds 已提交
849
	kfree(data);
850
	policy->driver_data = NULL;
L
Linus Torvalds 已提交
851

852
	return result;
L
Linus Torvalds 已提交
853 854
}

855
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
856
{
857
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
858

859
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
860 861

	if (data) {
862
		policy->driver_data = NULL;
863
		acpi_processor_unregister_performance(data->acpi_perf_cpu);
864
		free_cpumask_var(data->freqdomain_cpus);
865
		kfree(data->freq_table);
L
Linus Torvalds 已提交
866 867 868
		kfree(data);
	}

869
	return 0;
L
Linus Torvalds 已提交
870 871
}

872
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
873
{
874
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
875

876
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
877 878 879

	data->resume = 1;

880
	return 0;
L
Linus Torvalds 已提交
881 882
}

883
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
884
	&cpufreq_freq_attr_scaling_available_freqs,
885
	&freqdomain_cpus,
886 887 888
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	&cpb,
#endif
L
Linus Torvalds 已提交
889 890 891 892
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
893
	.verify		= cpufreq_generic_frequency_table_verify,
894
	.target_index	= acpi_cpufreq_target,
895 896 897 898 899 900
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
L
Linus Torvalds 已提交
901 902
};

903 904 905 906 907 908 909 910
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

911
		acpi_cpufreq_driver.set_boost = set_boost;
912
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
913 914

		cpu_notifier_register_begin();
915 916

		/* Force all MSRs to the same value */
917 918
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
919

920
		__register_cpu_notifier(&boost_nb);
921

922
		cpu_notifier_register_done();
923
	}
924 925
}

926
static void acpi_cpufreq_boost_exit(void)
927 928 929 930 931 932 933 934 935
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

936
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
937
{
938 939
	int ret;

940 941 942
	if (acpi_disabled)
		return -ENODEV;

943 944
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
945
		return -EEXIST;
946

947
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
948

949 950 951
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
952

953 954 955 956 957 958 959
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
960 961
	if (!check_amd_hwpstate_cpu(0)) {
		struct freq_attr **attr;
962

963
		pr_debug("CPB unsupported, do not expose it\n");
964

965 966 967 968 969
		for (attr = acpi_cpufreq_attr; *attr; attr++)
			if (*attr == &cpb) {
				*attr = NULL;
				break;
			}
970 971
	}
#endif
972
	acpi_cpufreq_boost_init();
973

974
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
975
	if (ret) {
976
		free_acpi_perf_data();
977 978
		acpi_cpufreq_boost_exit();
	}
979
	return ret;
L
Linus Torvalds 已提交
980 981
}

982
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
983
{
984
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
985

986 987
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
988 989
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

990
	free_acpi_perf_data();
L
Linus Torvalds 已提交
991 992
}

993
module_param(acpi_pstate_strict, uint, 0644);
994
MODULE_PARM_DESC(acpi_pstate_strict,
995 996
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
997 998 999 1000

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

1001 1002 1003 1004 1005 1006 1007
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1008 1009 1010 1011 1012 1013 1014
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1015
MODULE_ALIAS("acpi");