acpi-cpufreq.c 24.9 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

28 29
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

L
Linus Torvalds 已提交
30 31 32
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
33 34
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
35
#include <linux/cpufreq.h>
36
#include <linux/compiler.h>
37
#include <linux/dmi.h>
38
#include <linux/slab.h>
L
Linus Torvalds 已提交
39 40

#include <linux/acpi.h>
41 42 43 44
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
45 46
#include <acpi/processor.h>

47
#include <asm/msr.h>
48 49 50
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
51 52 53 54
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69
	unsigned int resume;
	unsigned int cpu_feature;
70
	unsigned int acpi_perf_cpu;
71
	cpumask_var_t freqdomain_cpus;
72 73
	void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
	u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
L
Linus Torvalds 已提交
74 75
};

76
/* acpi_perf_data is a pointer to percpu data. */
77
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
78

79 80 81 82 83
static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
{
	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
}

L
Linus Torvalds 已提交
84 85
static struct cpufreq_driver acpi_cpufreq_driver;

86
static unsigned int acpi_pstate_strict;
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

106
static int boost_set_msr(bool enable)
107 108
{
	u32 msr_addr;
109
	u64 msr_mask, val;
110 111 112 113 114 115 116 117 118 119 120

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
121
		return -EINVAL;
122 123
	}

124
	rdmsrl(msr_addr, val);
125

126 127 128 129
	if (enable)
		val &= ~msr_mask;
	else
		val |= msr_mask;
130

131 132 133 134 135 136 137 138 139
	wrmsrl(msr_addr, val);
	return 0;
}

static void boost_set_msr_each(void *p_en)
{
	bool enable = (bool) p_en;

	boost_set_msr(enable);
140 141
}

142
static int set_boost(int val)
143 144
{
	get_online_cpus();
145
	on_each_cpu(boost_set_msr_each, (void *)(long)val, 1);
146 147 148
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

149
	return 0;
150 151
}

152 153
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
154
	struct acpi_cpufreq_data *data = policy->driver_data;
155

156 157 158
	if (unlikely(!data))
		return -ENODEV;

159 160 161 162 163
	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

164
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
165 166
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
167 168
{
	int ret;
169
	unsigned int val = 0;
170

171
	if (!acpi_cpufreq_driver.set_boost)
172 173
		return -EINVAL;

174 175
	ret = kstrtouint(buf, 10, &val);
	if (ret || val > 1)
176 177
		return -EINVAL;

178
	set_boost(val);
179 180 181 182

	return count;
}

183 184
static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
185
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
186 187
}

188
cpufreq_freq_attr_rw(cpb);
189 190
#endif

191 192
static int check_est_cpu(unsigned int cpuid)
{
193
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
194

195
	return cpu_has(cpu, X86_FEATURE_EST);
196 197
}

198 199 200 201 202 203 204
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

205
static unsigned extract_io(struct cpufreq_policy *policy, u32 value)
206
{
207
	struct acpi_cpufreq_data *data = policy->driver_data;
208 209
	struct acpi_processor_performance *perf;
	int i;
210

211
	perf = to_perf_data(data);
212

213
	for (i = 0; i < perf->state_count; i++) {
214
		if (value == perf->states[i].status)
215
			return policy->freq_table[i].frequency;
216 217 218 219
	}
	return 0;
}

220
static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
221
{
222
	struct acpi_cpufreq_data *data = policy->driver_data;
223
	struct cpufreq_frequency_table *pos;
224
	struct acpi_processor_performance *perf;
225

226 227 228 229 230
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

231
	perf = to_perf_data(data);
232

233
	cpufreq_for_each_entry(pos, policy->freq_table)
234 235
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
236
	return policy->freq_table[0].frequency;
237 238
}

239
static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
240
{
241 242
	struct acpi_cpufreq_data *data = policy->driver_data;

243
	switch (data->cpu_feature) {
244
	case SYSTEM_INTEL_MSR_CAPABLE:
245
	case SYSTEM_AMD_MSR_CAPABLE:
246
		return extract_msr(policy, val);
247
	case SYSTEM_IO_CAPABLE:
248
		return extract_io(policy, val);
249
	default:
250 251 252 253
		return 0;
	}
}

254
static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
255 256
{
	u32 val, dummy;
257

258 259 260 261
	rdmsr(MSR_IA32_PERF_CTL, val, dummy);
	return val;
}

262
static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
263 264 265 266 267 268 269 270
{
	u32 lo, hi;

	rdmsr(MSR_IA32_PERF_CTL, lo, hi);
	lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
	wrmsr(MSR_IA32_PERF_CTL, lo, hi);
}

271
static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
272 273 274 275 276 277 278
{
	u32 val, dummy;

	rdmsr(MSR_AMD_PERF_CTL, val, dummy);
	return val;
}

279
static void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
280 281 282 283
{
	wrmsr(MSR_AMD_PERF_CTL, val, 0);
}

284
static u32 cpu_freq_read_io(struct acpi_pct_register *reg)
285 286 287 288 289 290 291
{
	u32 val;

	acpi_os_read_port(reg->address, &val, reg->bit_width);
	return val;
}

292
static void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
293 294 295
{
	acpi_os_write_port(reg->address, val, reg->bit_width);
}
296 297

struct drv_cmd {
298
	struct acpi_pct_register *reg;
299
	u32 val;
300 301 302 303
	union {
		void (*write)(struct acpi_pct_register *reg, u32 val);
		u32 (*read)(struct acpi_pct_register *reg);
	} func;
304 305
};

306 307
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
308
{
309
	struct drv_cmd *cmd = _cmd;
310

311
	cmd->val = cmd->func.read(cmd->reg);
312
}
L
Linus Torvalds 已提交
313

314
static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask)
315
{
316 317 318 319 320 321
	struct acpi_processor_performance *perf = to_perf_data(data);
	struct drv_cmd cmd = {
		.reg = &perf->control_register,
		.func.read = data->cpu_freq_read,
	};
	int err;
322

323 324 325
	err = smp_call_function_any(mask, do_drv_read, &cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
	return cmd.val;
326
}
L
Linus Torvalds 已提交
327

328 329
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
330
{
331
	struct drv_cmd *cmd = _cmd;
332

333
	cmd->func.write(cmd->reg, cmd->val);
334 335
}

336 337
static void drv_write(struct acpi_cpufreq_data *data,
		      const struct cpumask *mask, u32 val)
338
{
339 340 341 342 343 344
	struct acpi_processor_performance *perf = to_perf_data(data);
	struct drv_cmd cmd = {
		.reg = &perf->control_register,
		.val = val,
		.func.write = data->cpu_freq_write,
	};
345 346 347
	int this_cpu;

	this_cpu = get_cpu();
348 349 350 351
	if (cpumask_test_cpu(this_cpu, mask))
		do_drv_write(&cmd);

	smp_call_function_many(mask, do_drv_write, &cmd, 1);
352
	put_cpu();
353
}
L
Linus Torvalds 已提交
354

355
static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
356
{
357
	u32 val;
L
Linus Torvalds 已提交
358

359
	if (unlikely(cpumask_empty(mask)))
360
		return 0;
L
Linus Torvalds 已提交
361

362
	val = drv_read(data, mask);
L
Linus Torvalds 已提交
363

364
	pr_debug("get_cur_val = %u\n", val);
365

366
	return val;
367
}
L
Linus Torvalds 已提交
368

369 370
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
371 372
	struct acpi_cpufreq_data *data;
	struct cpufreq_policy *policy;
373
	unsigned int freq;
374
	unsigned int cached_freq;
375

376
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
377

378
	policy = cpufreq_cpu_get_raw(cpu);
379 380 381 382
	if (unlikely(!policy))
		return 0;

	data = policy->driver_data;
383
	if (unlikely(!data || !policy->freq_table))
384
		return 0;
L
Linus Torvalds 已提交
385

386 387
	cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
	freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
388 389 390 391 392 393 394 395
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

396
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
397

398
	return freq;
L
Linus Torvalds 已提交
399 400
}

401 402
static unsigned int check_freqs(struct cpufreq_policy *policy,
				const struct cpumask *mask, unsigned int freq)
403
{
404
	struct acpi_cpufreq_data *data = policy->driver_data;
405 406
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
407

408
	for (i = 0; i < 100; i++) {
409
		cur_freq = extract_freq(policy, get_cur_val(mask, data));
410 411 412 413 414 415 416 417
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
418
			       unsigned int index)
L
Linus Torvalds 已提交
419
{
420
	struct acpi_cpufreq_data *data = policy->driver_data;
421
	struct acpi_processor_performance *perf;
422
	const struct cpumask *mask;
423
	unsigned int next_perf_state = 0; /* Index into perf table */
424
	int result = 0;
425

426
	if (unlikely(!data)) {
427 428
		return -ENODEV;
	}
L
Linus Torvalds 已提交
429

430
	perf = to_perf_data(data);
431
	next_perf_state = policy->freq_table[index].driver_data;
432
	if (perf->state == next_perf_state) {
433
		if (unlikely(data->resume)) {
434
			pr_debug("Called after resume, resetting to P%d\n",
435
				next_perf_state);
436 437
			data->resume = 0;
		} else {
438
			pr_debug("Already at target state (P%d)\n",
439
				next_perf_state);
440
			return 0;
441
		}
442 443
	}

444 445 446 447 448 449
	/*
	 * The core won't allow CPUs to go away until the governor has been
	 * stopped, so we can rely on the stability of policy->cpus.
	 */
	mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ?
		cpumask_of(policy->cpu) : policy->cpus;
450

451
	drv_write(data, mask, perf->states[next_perf_state].control);
452

453
	if (acpi_pstate_strict) {
454 455
		if (!check_freqs(policy, mask,
				 policy->freq_table[index].frequency)) {
456
			pr_debug("acpi_cpufreq_target failed (%d)\n",
457
				policy->cpu);
458
			result = -EAGAIN;
459 460 461
		}
	}

462 463
	if (!result)
		perf->state = next_perf_state;
464 465

	return result;
L
Linus Torvalds 已提交
466 467
}

468 469 470 471 472 473
unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
				      unsigned int target_freq)
{
	struct acpi_cpufreq_data *data = policy->driver_data;
	struct acpi_processor_performance *perf;
	struct cpufreq_frequency_table *entry;
474
	unsigned int next_perf_state, next_freq, index;
475 476 477 478

	/*
	 * Find the closest frequency above target_freq.
	 */
479 480 481 482
	if (policy->cached_target_freq == target_freq)
		index = policy->cached_resolved_idx;
	else
		index = cpufreq_table_find_index_dl(policy, target_freq);
483 484

	entry = &policy->freq_table[index];
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
	next_freq = entry->frequency;
	next_perf_state = entry->driver_data;

	perf = to_perf_data(data);
	if (perf->state == next_perf_state) {
		if (unlikely(data->resume))
			data->resume = 0;
		else
			return next_freq;
	}

	data->cpu_freq_write(&perf->control_register,
			     perf->states[next_perf_state].control);
	perf->state = next_perf_state;
	return next_freq;
}

L
Linus Torvalds 已提交
502
static unsigned long
503
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
504
{
505
	struct acpi_processor_performance *perf;
506

507
	perf = to_perf_data(data);
L
Linus Torvalds 已提交
508 509 510 511
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
512
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
513

514
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
515
			freq = freqn;
516
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
517
			if ((2 * cpu_khz) > (freqn + freq)) {
518
				perf->state = i;
519
				return freq;
L
Linus Torvalds 已提交
520 521
			}
		}
522
		perf->state = perf->state_count-1;
523
		return freqn;
524
	} else {
L
Linus Torvalds 已提交
525
		/* assume CPU is at P0... */
526 527 528
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
529 530
}

531 532 533 534 535 536 537 538 539 540 541
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

542
static int cpufreq_boost_online(unsigned int cpu)
543 544
{
	/*
545 546
	 * On the CPU_UP path we simply keep the boost-disable flag
	 * in sync with the current global state.
547
	 */
548
	return boost_set_msr(acpi_cpufreq_driver.boost_enabled);
549
}
550

551 552 553 554 555 556
static int cpufreq_boost_down_prep(unsigned int cpu)
{
	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting.
	 */
557
	return boost_set_msr(1);
558 559
}

560 561 562 563 564 565 566 567
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
568
static int __init acpi_cpufreq_early_init(void)
569
{
570
	unsigned int i;
571
	pr_debug("acpi_cpufreq_early_init\n");
572

573 574
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
575
		pr_debug("Memory allocation error for acpi_perf_data.\n");
576
		return -ENOMEM;
577
	}
578
	for_each_possible_cpu(i) {
579
		if (!zalloc_cpumask_var_node(
580 581
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
582 583 584 585 586 587

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
588 589

	/* Do initialization in ACPI core */
590 591
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
592 593
}

594
#ifdef CONFIG_SMP
595 596 597 598 599 600 601 602
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

603
static int sw_any_bug_found(const struct dmi_system_id *d)
604 605 606 607 608
{
	bios_with_sw_any_bug = 1;
	return 0;
}

609
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
610 611 612 613 614 615 616 617 618 619 620
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
621 622 623

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
624 625
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
626 627
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
628
	 * Both Processor Cores to Lock Up. */
629 630 631
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
632
		    (c->x86_mask == 8)) {
633
			pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
634
			return -ENODEV;
635
		    }
636 637 638
		}
	return 0;
}
639
#endif
640

641
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
642
{
643 644 645 646 647
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
648
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
649
	struct acpi_processor_performance *perf;
650
	struct cpufreq_frequency_table *freq_table;
651 652 653
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
654

655
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
656

657
#ifdef CONFIG_SMP
658 659 660 661 662
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
663 664
#endif

665
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
666
	if (!data)
667
		return -ENOMEM;
L
Linus Torvalds 已提交
668

669 670 671 672 673
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

674
	perf = per_cpu_ptr(acpi_perf_data, cpu);
675
	data->acpi_perf_cpu = cpu;
676
	policy->driver_data = data;
L
Linus Torvalds 已提交
677

678
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
679
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
680

681
	result = acpi_processor_register_performance(perf, cpu);
L
Linus Torvalds 已提交
682
	if (result)
683
		goto err_free_mask;
L
Linus Torvalds 已提交
684

685
	policy->shared_type = perf->shared_type;
686

687
	/*
688
	 * Will let policy->cpus know about dependency only when software
689 690 691
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
692
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
693
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
694
	}
695
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
696 697 698

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
699
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
700
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
701
		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
702
	}
703 704 705 706

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
707 708
		cpumask_copy(data->freqdomain_cpus,
			     topology_sibling_cpumask(cpu));
709
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
710
		pr_info_once("overriding BIOS provided _PSD data\n");
711
	}
712
#endif
713

L
Linus Torvalds 已提交
714
	/* capability check */
715
	if (perf->state_count <= 1) {
716
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
717 718 719
		result = -ENODEV;
		goto err_unreg;
	}
720

721 722 723 724 725 726
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
727
	case ACPI_ADR_SPACE_SYSTEM_IO:
728 729 730 731 732 733
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
734
		pr_debug("SYSTEM IO addr space\n");
735
		data->cpu_feature = SYSTEM_IO_CAPABLE;
736 737
		data->cpu_freq_read = cpu_freq_read_io;
		data->cpu_freq_write = cpu_freq_write_io;
738
		break;
739
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
740
		pr_debug("HARDWARE addr space\n");
741 742
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
743 744
			data->cpu_freq_read = cpu_freq_read_intel;
			data->cpu_freq_write = cpu_freq_write_intel;
745
			break;
746
		}
747 748
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
749 750
			data->cpu_freq_read = cpu_freq_read_amd;
			data->cpu_freq_write = cpu_freq_write_amd;
751 752 753 754
			break;
		}
		result = -ENODEV;
		goto err_unreg;
755
	default:
756
		pr_debug("Unknown addr space %d\n",
757
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
758 759 760 761
		result = -ENODEV;
		goto err_unreg;
	}

762
	freq_table = kzalloc(sizeof(*freq_table) *
763
		    (perf->state_count+1), GFP_KERNEL);
764
	if (!freq_table) {
L
Linus Torvalds 已提交
765 766 767 768 769 770
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
771
	for (i = 0; i < perf->state_count; i++) {
772 773 774 775
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
776 777
	}

778 779 780 781
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
782
		pr_info_once("P-state transition latency capped at 20 uS\n");
783 784
	}

L
Linus Torvalds 已提交
785
	/* table init */
786 787
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
788
		    freq_table[valid_states-1].frequency / 1000)
789 790
			continue;

791 792
		freq_table[valid_states].driver_data = i;
		freq_table[valid_states].frequency =
793
		    perf->states[i].core_frequency * 1000;
794
		valid_states++;
L
Linus Torvalds 已提交
795
	}
796
	freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
797
	perf->state = 0;
L
Linus Torvalds 已提交
798

799
	result = cpufreq_table_validate_and_show(policy, freq_table);
800
	if (result)
L
Linus Torvalds 已提交
801 802
		goto err_freqfree;

803
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
804
		pr_warn(FW_WARN "P-state 0 is not max freq\n");
805

806
	switch (perf->control_register.space_id) {
807
	case ACPI_ADR_SPACE_SYSTEM_IO:
808 809 810 811 812 813
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
814 815
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
816
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
817
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
818
		break;
819
	default:
820 821 822
		break;
	}

L
Linus Torvalds 已提交
823 824 825
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

826
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
827
	for (i = 0; i < perf->state_count; i++)
828
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
829
			(i == perf->state ? '*' : ' '), i,
830 831 832
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
833

834 835 836 837 838
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
839

840 841 842
	policy->fast_switch_possible = !acpi_pstate_strict &&
		!(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY);

843
	return result;
L
Linus Torvalds 已提交
844

845
err_freqfree:
846
	kfree(freq_table);
847
err_unreg:
848
	acpi_processor_unregister_performance(cpu);
849 850
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
851
err_free:
L
Linus Torvalds 已提交
852
	kfree(data);
853
	policy->driver_data = NULL;
L
Linus Torvalds 已提交
854

855
	return result;
L
Linus Torvalds 已提交
856 857
}

858
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
859
{
860
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
861

862
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
863

864 865 866 867
	policy->fast_switch_possible = false;
	policy->driver_data = NULL;
	acpi_processor_unregister_performance(data->acpi_perf_cpu);
	free_cpumask_var(data->freqdomain_cpus);
868
	kfree(policy->freq_table);
869
	kfree(data);
L
Linus Torvalds 已提交
870

871
	return 0;
L
Linus Torvalds 已提交
872 873
}

874
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
875
{
876
	struct acpi_cpufreq_data *data = policy->driver_data;
L
Linus Torvalds 已提交
877

878
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
879 880 881

	data->resume = 1;

882
	return 0;
L
Linus Torvalds 已提交
883 884
}

885
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
886
	&cpufreq_freq_attr_scaling_available_freqs,
887
	&freqdomain_cpus,
888 889 890
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	&cpb,
#endif
L
Linus Torvalds 已提交
891 892 893 894
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
895
	.verify		= cpufreq_generic_frequency_table_verify,
896
	.target_index	= acpi_cpufreq_target,
897
	.fast_switch	= acpi_cpufreq_fast_switch,
898 899 900 901 902 903
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
L
Linus Torvalds 已提交
904 905
};

906 907
static enum cpuhp_state acpi_cpufreq_online;

908 909
static void __init acpi_cpufreq_boost_init(void)
{
910
	int ret;
911

912 913
	if (!(boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)))
		return;
914

915 916
	acpi_cpufreq_driver.set_boost = set_boost;
	acpi_cpufreq_driver.boost_enabled = boost_state(0);
917

918 919 920 921 922 923 924 925 926
	/*
	 * This calls the online callback on all online cpu and forces all
	 * MSRs to the same value.
	 */
	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpufreq/acpi:online",
				cpufreq_boost_online, cpufreq_boost_down_prep);
	if (ret < 0) {
		pr_err("acpi_cpufreq: failed to register hotplug callbacks\n");
		return;
927
	}
928
	acpi_cpufreq_online = ret;
929 930
}

931
static void acpi_cpufreq_boost_exit(void)
932
{
933
	if (acpi_cpufreq_online > 0)
934
		cpuhp_remove_state_nocalls(acpi_cpufreq_online);
935 936
}

937
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
938
{
939 940
	int ret;

941 942 943
	if (acpi_disabled)
		return -ENODEV;

944 945
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
946
		return -EEXIST;
947

948
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
949

950 951 952
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
953

954 955 956 957 958 959 960
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
961 962
	if (!check_amd_hwpstate_cpu(0)) {
		struct freq_attr **attr;
963

964
		pr_debug("CPB unsupported, do not expose it\n");
965

966 967 968 969 970
		for (attr = acpi_cpufreq_attr; *attr; attr++)
			if (*attr == &cpb) {
				*attr = NULL;
				break;
			}
971 972
	}
#endif
973
	acpi_cpufreq_boost_init();
974

975
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
976
	if (ret) {
977
		free_acpi_perf_data();
978 979
		acpi_cpufreq_boost_exit();
	}
980
	return ret;
L
Linus Torvalds 已提交
981 982
}

983
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
984
{
985
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
986

987 988
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
989 990
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

991
	free_acpi_perf_data();
L
Linus Torvalds 已提交
992 993
}

994
module_param(acpi_pstate_strict, uint, 0644);
995
MODULE_PARM_DESC(acpi_pstate_strict,
996 997
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
998 999 1000 1001

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

1002 1003 1004 1005 1006 1007 1008
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1009 1010 1011 1012 1013 1014 1015
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1016
MODULE_ALIAS("acpi");