acpi-cpufreq.c 24.1 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
71
	unsigned int acpi_perf_cpu;
72
	cpumask_var_t freqdomain_cpus;
73 74
	void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
	u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
L
Linus Torvalds 已提交
75 76
};

77
/* acpi_perf_data is a pointer to percpu data. */
78
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
79

80 81 82 83 84
static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
{
	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
}

L
Linus Torvalds 已提交
85 86
static struct cpufreq_driver acpi_cpufreq_driver;

87
static unsigned int acpi_pstate_strict;
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

140
static int set_boost(int val)
141 142 143 144 145 146
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

147
	return 0;
148 149
}

150 151
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
152
	struct acpi_cpufreq_data *data = policy->driver_data;
153

154 155 156
	if (unlikely(!data))
		return -ENODEV;

157 158 159 160 161
	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

162
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
163 164
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
165 166
{
	int ret;
167
	unsigned int val = 0;
168

169
	if (!acpi_cpufreq_driver.set_boost)
170 171
		return -EINVAL;

172 173
	ret = kstrtouint(buf, 10, &val);
	if (ret || val > 1)
174 175
		return -EINVAL;

176
	set_boost(val);
177 178 179 180

	return count;
}

181 182
static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
183
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
184 185
}

186
cpufreq_freq_attr_rw(cpb);
187 188
#endif

189 190
static int check_est_cpu(unsigned int cpuid)
{
191
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
192

193
	return cpu_has(cpu, X86_FEATURE_EST);
194 195
}

196 197 198 199 200 201 202
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

203
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
204
{
205 206
	struct acpi_processor_performance *perf;
	int i;
207

208
	perf = to_perf_data(data);
209

210
	for (i = 0; i < perf->state_count; i++) {
211 212 213 214 215 216
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

217 218
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
219
	struct cpufreq_frequency_table *pos;
220
	struct acpi_processor_performance *perf;
221

222 223 224 225 226
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

227
	perf = to_perf_data(data);
228

229 230 231
	cpufreq_for_each_entry(pos, data->freq_table)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
232 233 234 235 236 237
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
238
	case SYSTEM_INTEL_MSR_CAPABLE:
239
	case SYSTEM_AMD_MSR_CAPABLE:
240
		return extract_msr(val, data);
241
	case SYSTEM_IO_CAPABLE:
242
		return extract_io(val, data);
243
	default:
244 245 246 247
		return 0;
	}
}

248
static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
249 250
{
	u32 val, dummy;
251

252 253 254 255
	rdmsr(MSR_IA32_PERF_CTL, val, dummy);
	return val;
}

256
static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
257 258 259 260 261 262 263 264
{
	u32 lo, hi;

	rdmsr(MSR_IA32_PERF_CTL, lo, hi);
	lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
	wrmsr(MSR_IA32_PERF_CTL, lo, hi);
}

265
static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
266 267 268 269 270 271 272
{
	u32 val, dummy;

	rdmsr(MSR_AMD_PERF_CTL, val, dummy);
	return val;
}

273
static void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
274 275 276 277
{
	wrmsr(MSR_AMD_PERF_CTL, val, 0);
}

278
static u32 cpu_freq_read_io(struct acpi_pct_register *reg)
279 280 281 282 283 284 285
{
	u32 val;

	acpi_os_read_port(reg->address, &val, reg->bit_width);
	return val;
}

286
static void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
287 288 289
{
	acpi_os_write_port(reg->address, val, reg->bit_width);
}
290 291

struct drv_cmd {
292
	struct acpi_pct_register *reg;
293
	u32 val;
294 295 296 297
	union {
		void (*write)(struct acpi_pct_register *reg, u32 val);
		u32 (*read)(struct acpi_pct_register *reg);
	} func;
298 299
};

300 301
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
302
{
303
	struct drv_cmd *cmd = _cmd;
304

305
	cmd->val = cmd->func.read(cmd->reg);
306
}
L
Linus Torvalds 已提交
307

308
static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask)
309
{
310 311 312 313 314 315
	struct acpi_processor_performance *perf = to_perf_data(data);
	struct drv_cmd cmd = {
		.reg = &perf->control_register,
		.func.read = data->cpu_freq_read,
	};
	int err;
316

317 318 319
	err = smp_call_function_any(mask, do_drv_read, &cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
	return cmd.val;
320
}
L
Linus Torvalds 已提交
321

322 323
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
324
{
325
	struct drv_cmd *cmd = _cmd;
326

327
	cmd->func.write(cmd->reg, cmd->val);
328 329
}

330 331
static void drv_write(struct acpi_cpufreq_data *data,
		      const struct cpumask *mask, u32 val)
332
{
333 334 335 336 337 338
	struct acpi_processor_performance *perf = to_perf_data(data);
	struct drv_cmd cmd = {
		.reg = &perf->control_register,
		.val = val,
		.func.write = data->cpu_freq_write,
	};
339 340 341
	int this_cpu;

	this_cpu = get_cpu();
342 343 344 345
	if (cpumask_test_cpu(this_cpu, mask))
		do_drv_write(&cmd);

	smp_call_function_many(mask, do_drv_write, &cmd, 1);
346
	put_cpu();
347
}
L
Linus Torvalds 已提交
348

349
static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
350
{
351
	u32 val;
L
Linus Torvalds 已提交
352

353
	if (unlikely(cpumask_empty(mask)))
354
		return 0;
L
Linus Torvalds 已提交
355

356
	val = drv_read(data, mask);
L
Linus Torvalds 已提交
357

358
	pr_debug("get_cur_val = %u\n", val);
359

360
	return val;
361
}
L
Linus Torvalds 已提交
362

363 364
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
365 366
	struct acpi_cpufreq_data *data;
	struct cpufreq_policy *policy;
367
	unsigned int freq;
368
	unsigned int cached_freq;
369

370
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
371

372
	policy = cpufreq_cpu_get_raw(cpu);
373 374 375 376
	if (unlikely(!policy))
		return 0;

	data = policy->driver_data;
377
	if (unlikely(!data || !data->freq_table))
378
		return 0;
L
Linus Torvalds 已提交
379

380
	cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
381
	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
382 383 384 385 386 387 388 389
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

390
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
391

392
	return freq;
L
Linus Torvalds 已提交
393 394
}

395
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
396
				struct acpi_cpufreq_data *data)
397
{
398 399
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
400

401
	for (i = 0; i < 100; i++) {
402
		cur_freq = extract_freq(get_cur_val(mask, data), data);
403 404 405 406 407 408 409 410
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
411
			       unsigned int index)
L
Linus Torvalds 已提交
412
{
413
	struct acpi_cpufreq_data *data = policy->driver_data;
414
	struct acpi_processor_performance *perf;
415
	const struct cpumask *mask;
416
	unsigned int next_perf_state = 0; /* Index into perf table */
417
	int result = 0;
418

419
	if (unlikely(data == NULL || data->freq_table == NULL)) {
420 421
		return -ENODEV;
	}
L
Linus Torvalds 已提交
422

423
	perf = to_perf_data(data);
424
	next_perf_state = data->freq_table[index].driver_data;
425
	if (perf->state == next_perf_state) {
426
		if (unlikely(data->resume)) {
427
			pr_debug("Called after resume, resetting to P%d\n",
428
				next_perf_state);
429 430
			data->resume = 0;
		} else {
431
			pr_debug("Already at target state (P%d)\n",
432
				next_perf_state);
433
			return 0;
434
		}
435 436
	}

437 438 439 440 441 442
	/*
	 * The core won't allow CPUs to go away until the governor has been
	 * stopped, so we can rely on the stability of policy->cpus.
	 */
	mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ?
		cpumask_of(policy->cpu) : policy->cpus;
443

444
	drv_write(data, mask, perf->states[next_perf_state].control);
445

446
	if (acpi_pstate_strict) {
447
		if (!check_freqs(mask, data->freq_table[index].frequency,
448
					data)) {
449
			pr_debug("acpi_cpufreq_target failed (%d)\n",
450
				policy->cpu);
451
			result = -EAGAIN;
452 453 454
		}
	}

455 456
	if (!result)
		perf->state = next_perf_state;
457 458

	return result;
L
Linus Torvalds 已提交
459 460 461
}

static unsigned long
462
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
463
{
464
	struct acpi_processor_performance *perf;
465

466
	perf = to_perf_data(data);
L
Linus Torvalds 已提交
467 468 469 470
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
471
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
472

473
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
474
			freq = freqn;
475
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
476
			if ((2 * cpu_khz) > (freqn + freq)) {
477
				perf->state = i;
478
				return freq;
L
Linus Torvalds 已提交
479 480
			}
		}
481
		perf->state = perf->state_count-1;
482
		return freqn;
483
	} else {
L
Linus Torvalds 已提交
484
		/* assume CPU is at P0... */
485 486 487
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
488 489
}

490 491 492 493 494 495 496 497 498 499 500
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
517 518 519 520
	case CPU_DOWN_FAILED:
	case CPU_DOWN_FAILED_FROZEN:
	case CPU_ONLINE:
	case CPU_ONLINE_FROZEN:
521
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

541 542 543 544 545 546 547 548
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
549
static int __init acpi_cpufreq_early_init(void)
550
{
551
	unsigned int i;
552
	pr_debug("acpi_cpufreq_early_init\n");
553

554 555
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
556
		pr_debug("Memory allocation error for acpi_perf_data.\n");
557
		return -ENOMEM;
558
	}
559
	for_each_possible_cpu(i) {
560
		if (!zalloc_cpumask_var_node(
561 562
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
563 564 565 566 567 568

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
569 570

	/* Do initialization in ACPI core */
571 572
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
573 574
}

575
#ifdef CONFIG_SMP
576 577 578 579 580 581 582 583
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

584
static int sw_any_bug_found(const struct dmi_system_id *d)
585 586 587 588 589
{
	bios_with_sw_any_bug = 1;
	return 0;
}

590
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
591 592 593 594 595 596 597 598 599 600 601
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
602 603 604

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
605 606
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
607 608
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
609
	 * Both Processor Cores to Lock Up. */
610 611 612
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
613 614 615 616 617
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
618
			return -ENODEV;
619
		    }
620 621 622
		}
	return 0;
}
623
#endif
624

625
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
626
{
627 628 629 630 631
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
632
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
633
	struct acpi_processor_performance *perf;
634 635 636
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
637

638
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
639

640
#ifdef CONFIG_SMP
641 642 643 644 645
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
646 647
#endif

648
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
649
	if (!data)
650
		return -ENOMEM;
L
Linus Torvalds 已提交
651

652 653 654 655 656
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

657
	perf = per_cpu_ptr(acpi_perf_data, cpu);
658
	data->acpi_perf_cpu = cpu;
659
	policy->driver_data = data;
L
Linus Torvalds 已提交
660

661
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
662
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
663

664
	result = acpi_processor_register_performance(perf, cpu);
L
Linus Torvalds 已提交
665
	if (result)
666
		goto err_free_mask;
L
Linus Torvalds 已提交
667

668
	policy->shared_type = perf->shared_type;
669

670
	/*
671
	 * Will let policy->cpus know about dependency only when software
672 673 674
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
675
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
676
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
677
	}
678
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
679 680 681

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
682
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
683
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
684
		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
685
	}
686 687 688 689

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
690 691
		cpumask_copy(data->freqdomain_cpus,
			     topology_sibling_cpumask(cpu));
692 693 694
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
695
#endif
696

L
Linus Torvalds 已提交
697
	/* capability check */
698
	if (perf->state_count <= 1) {
699
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
700 701 702
		result = -ENODEV;
		goto err_unreg;
	}
703

704 705 706 707 708 709
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
710
	case ACPI_ADR_SPACE_SYSTEM_IO:
711 712 713 714 715 716
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
717
		pr_debug("SYSTEM IO addr space\n");
718
		data->cpu_feature = SYSTEM_IO_CAPABLE;
719 720
		data->cpu_freq_read = cpu_freq_read_io;
		data->cpu_freq_write = cpu_freq_write_io;
721
		break;
722
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
723
		pr_debug("HARDWARE addr space\n");
724 725
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
726 727
			data->cpu_freq_read = cpu_freq_read_intel;
			data->cpu_freq_write = cpu_freq_write_intel;
728
			break;
729
		}
730 731
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
732 733
			data->cpu_freq_read = cpu_freq_read_amd;
			data->cpu_freq_write = cpu_freq_write_amd;
734 735 736 737
			break;
		}
		result = -ENODEV;
		goto err_unreg;
738
	default:
739
		pr_debug("Unknown addr space %d\n",
740
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
741 742 743 744
		result = -ENODEV;
		goto err_unreg;
	}

745
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
746
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
747 748 749 750 751 752 753
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
754
	for (i = 0; i < perf->state_count; i++) {
755 756 757 758
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
759 760
	}

761 762 763 764
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
765 766
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
767 768
	}

L
Linus Torvalds 已提交
769
	/* table init */
770 771
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
772
		    data->freq_table[valid_states-1].frequency / 1000)
773 774
			continue;

775
		data->freq_table[valid_states].driver_data = i;
776
		data->freq_table[valid_states].frequency =
777
		    perf->states[i].core_frequency * 1000;
778
		valid_states++;
L
Linus Torvalds 已提交
779
	}
780
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
781
	perf->state = 0;
L
Linus Torvalds 已提交
782

783
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
784
	if (result)
L
Linus Torvalds 已提交
785 786
		goto err_freqfree;

787 788 789
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

790
	switch (perf->control_register.space_id) {
791
	case ACPI_ADR_SPACE_SYSTEM_IO:
792 793 794 795 796 797
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
798 799
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
800
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
801
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
802
		break;
803
	default:
804 805 806
		break;
	}

L
Linus Torvalds 已提交
807 808 809
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

810
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
811
	for (i = 0; i < perf->state_count; i++)
812
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
813
			(i == perf->state ? '*' : ' '), i,
814 815 816
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
817

818 819 820 821 822
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
823

824
	return result;
L
Linus Torvalds 已提交
825

826
err_freqfree:
L
Linus Torvalds 已提交
827
	kfree(data->freq_table);
828
err_unreg:
829
	acpi_processor_unregister_performance(cpu);
830 831
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
832
err_free:
L
Linus Torvalds 已提交
833
	kfree(data);
834
	policy->driver_data = NULL;
L
Linus Torvalds 已提交
835

836
	return result;
L
Linus Torvalds 已提交
837 838
}

839
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
840
{
841
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
842

843
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
844 845

	if (data) {
846
		policy->driver_data = NULL;
847
		acpi_processor_unregister_performance(data->acpi_perf_cpu);
848
		free_cpumask_var(data->freqdomain_cpus);
849
		kfree(data->freq_table);
L
Linus Torvalds 已提交
850 851 852
		kfree(data);
	}

853
	return 0;
L
Linus Torvalds 已提交
854 855
}

856
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
857
{
858
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
859

860
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
861 862 863

	data->resume = 1;

864
	return 0;
L
Linus Torvalds 已提交
865 866
}

867
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
868
	&cpufreq_freq_attr_scaling_available_freqs,
869
	&freqdomain_cpus,
870 871 872
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	&cpb,
#endif
L
Linus Torvalds 已提交
873 874 875 876
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
877
	.verify		= cpufreq_generic_frequency_table_verify,
878
	.target_index	= acpi_cpufreq_target,
879 880 881 882 883 884
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
L
Linus Torvalds 已提交
885 886
};

887 888 889 890 891 892 893 894
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

895
		acpi_cpufreq_driver.set_boost = set_boost;
896
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
897 898

		cpu_notifier_register_begin();
899 900

		/* Force all MSRs to the same value */
901 902
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
903

904
		__register_cpu_notifier(&boost_nb);
905

906
		cpu_notifier_register_done();
907
	}
908 909
}

910
static void acpi_cpufreq_boost_exit(void)
911 912 913 914 915 916 917 918 919
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

920
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
921
{
922 923
	int ret;

924 925 926
	if (acpi_disabled)
		return -ENODEV;

927 928
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
929
		return -EEXIST;
930

931
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
932

933 934 935
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
936

937 938 939 940 941 942 943
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
944 945
	if (!check_amd_hwpstate_cpu(0)) {
		struct freq_attr **attr;
946

947
		pr_debug("CPB unsupported, do not expose it\n");
948

949 950 951 952 953
		for (attr = acpi_cpufreq_attr; *attr; attr++)
			if (*attr == &cpb) {
				*attr = NULL;
				break;
			}
954 955
	}
#endif
956
	acpi_cpufreq_boost_init();
957

958
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
959
	if (ret) {
960
		free_acpi_perf_data();
961 962
		acpi_cpufreq_boost_exit();
	}
963
	return ret;
L
Linus Torvalds 已提交
964 965
}

966
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
967
{
968
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
969

970 971
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
972 973
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

974
	free_acpi_perf_data();
L
Linus Torvalds 已提交
975 976
}

977
module_param(acpi_pstate_strict, uint, 0644);
978
MODULE_PARM_DESC(acpi_pstate_strict,
979 980
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
981 982 983 984

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

985 986 987 988 989 990 991
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

992 993 994 995 996 997 998
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
999
MODULE_ALIAS("acpi");