acpi-cpufreq.c 24.4 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
36
#include <linux/slab.h>
L
Linus Torvalds 已提交
37 38

#include <linux/acpi.h>
39 40 41 42
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

L
Linus Torvalds 已提交
43 44
#include <acpi/processor.h>

45
#include <asm/msr.h>
46 47 48
#include <asm/processor.h>
#include <asm/cpufeature.h>

L
Linus Torvalds 已提交
49 50 51 52
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54
#define PFX "acpi-cpufreq: "

55 56 57
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
58
	SYSTEM_AMD_MSR_CAPABLE,
59 60 61 62
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
63
#define AMD_MSR_RANGE		(0x7)
64

65 66
#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)

67
struct acpi_cpufreq_data {
68 69 70 71
	struct acpi_processor_performance *acpi_data;
	struct cpufreq_frequency_table *freq_table;
	unsigned int resume;
	unsigned int cpu_feature;
72
	cpumask_var_t freqdomain_cpus;
L
Linus Torvalds 已提交
73 74
};

75
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
76

77
/* acpi_perf_data is a pointer to percpu data. */
78
static struct acpi_processor_performance __percpu *acpi_perf_data;
L
Linus Torvalds 已提交
79 80 81

static struct cpufreq_driver acpi_cpufreq_driver;

82
static unsigned int acpi_pstate_strict;
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
static struct msr __percpu *msrs;

static bool boost_state(unsigned int cpu)
{
	u32 lo, hi;
	u64 msr;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
	case X86_VENDOR_AMD:
		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
		msr = lo | ((u64)hi << 32);
		return !(msr & MSR_K7_HWCR_CPB_DIS);
	}
	return false;
}

static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
{
	u32 cpu;
	u32 msr_addr;
	u64 msr_mask;

	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
		msr_addr = MSR_IA32_MISC_ENABLE;
		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
		break;
	case X86_VENDOR_AMD:
		msr_addr = MSR_K7_HWCR;
		msr_mask = MSR_K7_HWCR_CPB_DIS;
		break;
	default:
		return;
	}

	rdmsr_on_cpus(cpumask, msr_addr, msrs);

	for_each_cpu(cpu, cpumask) {
		struct msr *reg = per_cpu_ptr(msrs, cpu);
		if (enable)
			reg->q &= ~msr_mask;
		else
			reg->q |= msr_mask;
	}

	wrmsr_on_cpus(cpumask, msr_addr, msrs);
}

135
static int _store_boost(int val)
136 137 138 139 140 141
{
	get_online_cpus();
	boost_set_msrs(val, cpu_online_mask);
	put_online_cpus();
	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");

142
	return 0;
143 144
}

145 146 147 148 149 150 151 152 153
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
{
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);

	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
}

cpufreq_freq_attr_ro(freqdomain_cpus);

154
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
static ssize_t store_boost(const char *buf, size_t count)
{
	int ret;
	unsigned long val = 0;

	if (!acpi_cpufreq_driver.boost_supported)
		return -EINVAL;

	ret = kstrtoul(buf, 10, &val);
	if (ret || (val > 1))
		return -EINVAL;

	_store_boost((int) val);

	return count;
}

172 173 174
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
			 size_t count)
{
175
	return store_boost(buf, count);
176 177 178 179
}

static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
180
	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
181 182
}

183
cpufreq_freq_attr_rw(cpb);
184 185
#endif

186 187
static int check_est_cpu(unsigned int cpuid)
{
188
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
189

190
	return cpu_has(cpu, X86_FEATURE_EST);
191 192
}

193 194 195 196 197 198 199
static int check_amd_hwpstate_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);

	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}

200
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
201
{
202 203
	struct acpi_processor_performance *perf;
	int i;
204 205 206

	perf = data->acpi_data;

207
	for (i = 0; i < perf->state_count; i++) {
208 209 210 211 212 213
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

214 215 216
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
	int i;
217
	struct acpi_processor_performance *perf;
218

219 220 221 222 223
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		msr &= AMD_MSR_RANGE;
	else
		msr &= INTEL_MSR_RANGE;

224 225
	perf = data->acpi_data;

226
	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
227
		if (msr == perf->states[data->freq_table[i].driver_data].status)
228 229 230 231 232 233 234 235
			return data->freq_table[i].frequency;
	}
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
236
	case SYSTEM_INTEL_MSR_CAPABLE:
237
	case SYSTEM_AMD_MSR_CAPABLE:
238
		return extract_msr(val, data);
239
	case SYSTEM_IO_CAPABLE:
240
		return extract_io(val, data);
241
	default:
242 243 244 245 246 247 248 249
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

250 251 252 253 254 255
struct io_addr {
	u16 port;
	u8 bit_width;
};

struct drv_cmd {
256
	unsigned int type;
257
	const struct cpumask *mask;
258 259 260 261
	union {
		struct msr_addr msr;
		struct io_addr io;
	} addr;
262 263 264
	u32 val;
};

265 266
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
L
Linus Torvalds 已提交
267
{
268
	struct drv_cmd *cmd = _cmd;
269 270 271
	u32 h;

	switch (cmd->type) {
272
	case SYSTEM_INTEL_MSR_CAPABLE:
273
	case SYSTEM_AMD_MSR_CAPABLE:
274 275
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
276
	case SYSTEM_IO_CAPABLE:
277 278 279
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
280
		break;
281
	default:
282 283
		break;
	}
284
}
L
Linus Torvalds 已提交
285

286 287
/* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
288
{
289
	struct drv_cmd *cmd = _cmd;
290
	u32 lo, hi;
291 292

	switch (cmd->type) {
293
	case SYSTEM_INTEL_MSR_CAPABLE:
294 295 296
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
297
		break;
298 299 300
	case SYSTEM_AMD_MSR_CAPABLE:
		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
		break;
301
	case SYSTEM_IO_CAPABLE:
302 303 304
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
305
		break;
306
	default:
307 308
		break;
	}
309
}
L
Linus Torvalds 已提交
310

311
static void drv_read(struct drv_cmd *cmd)
312
{
313
	int err;
314 315
	cmd->val = 0;

316 317
	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
318 319 320 321
}

static void drv_write(struct drv_cmd *cmd)
{
322 323 324 325 326
	int this_cpu;

	this_cpu = get_cpu();
	if (cpumask_test_cpu(this_cpu, cmd->mask))
		do_drv_write(cmd);
327
	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
328
	put_cpu();
329
}
L
Linus Torvalds 已提交
330

331
static u32 get_cur_val(const struct cpumask *mask)
332
{
333 334
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
335

336
	if (unlikely(cpumask_empty(mask)))
337
		return 0;
L
Linus Torvalds 已提交
338

339
	switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
340 341
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
342
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
343
		break;
344 345
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
346
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
347
		break;
348 349
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
350
		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
351 352 353 354 355 356 357
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

358
	cmd.mask = mask;
359
	drv_read(&cmd);
L
Linus Torvalds 已提交
360

361
	pr_debug("get_cur_val = %u\n", cmd.val);
362 363 364

	return cmd.val;
}
L
Linus Torvalds 已提交
365

366 367
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
368
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
369
	unsigned int freq;
370
	unsigned int cached_freq;
371

372
	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
373 374

	if (unlikely(data == NULL ||
375
		     data->acpi_data == NULL || data->freq_table == NULL)) {
376
		return 0;
L
Linus Torvalds 已提交
377 378
	}

379
	cached_freq = data->freq_table[data->acpi_data->state].frequency;
380
	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
381 382 383 384 385 386 387 388
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

389
	pr_debug("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
390

391
	return freq;
L
Linus Torvalds 已提交
392 393
}

394
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
395
				struct acpi_cpufreq_data *data)
396
{
397 398
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
399

400
	for (i = 0; i < 100; i++) {
401 402 403 404 405 406 407 408 409
		cur_freq = extract_freq(get_cur_val(mask), data);
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
410
			       unsigned int index)
L
Linus Torvalds 已提交
411
{
412
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
413 414
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
415
	unsigned int next_perf_state = 0; /* Index into perf table */
416
	int result = 0;
417 418

	if (unlikely(data == NULL ||
419
	     data->acpi_data == NULL || data->freq_table == NULL)) {
420 421
		return -ENODEV;
	}
L
Linus Torvalds 已提交
422

423
	perf = data->acpi_data;
424
	next_perf_state = data->freq_table[index].driver_data;
425
	if (perf->state == next_perf_state) {
426
		if (unlikely(data->resume)) {
427
			pr_debug("Called after resume, resetting to P%d\n",
428
				next_perf_state);
429 430
			data->resume = 0;
		} else {
431
			pr_debug("Already at target state (P%d)\n",
432
				next_perf_state);
433
			goto out;
434
		}
435 436
	}

437 438 439 440
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
441
		cmd.val = (u32) perf->states[next_perf_state].control;
442
		break;
443 444 445 446 447
	case SYSTEM_AMD_MSR_CAPABLE:
		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
448 449 450 451 452 453 454
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
455 456
		result = -ENODEV;
		goto out;
457
	}
458

459
	/* cpufreq holds the hotplug lock, so we are safe from here on */
460
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
461
		cmd.mask = policy->cpus;
462
	else
463
		cmd.mask = cpumask_of(policy->cpu);
464

465
	drv_write(&cmd);
466

467
	if (acpi_pstate_strict) {
468 469
		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
					data)) {
470
			pr_debug("acpi_cpufreq_target failed (%d)\n",
471
				policy->cpu);
472
			result = -EAGAIN;
473 474 475
		}
	}

476 477
	if (!result)
		perf->state = next_perf_state;
478

479
out:
480
	return result;
L
Linus Torvalds 已提交
481 482 483
}

static unsigned long
484
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
485
{
486
	struct acpi_processor_performance *perf = data->acpi_data;
487

L
Linus Torvalds 已提交
488 489 490 491
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
492
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
493

494
		for (i = 0; i < (perf->state_count-1); i++) {
L
Linus Torvalds 已提交
495
			freq = freqn;
496
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
497
			if ((2 * cpu_khz) > (freqn + freq)) {
498
				perf->state = i;
499
				return freq;
L
Linus Torvalds 已提交
500 501
			}
		}
502
		perf->state = perf->state_count-1;
503
		return freqn;
504
	} else {
L
Linus Torvalds 已提交
505
		/* assume CPU is at P0... */
506 507 508
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
509 510
}

511 512 513 514 515 516 517 518 519 520 521
static void free_acpi_perf_data(void)
{
	unsigned int i;

	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
	for_each_possible_cpu(i)
		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
				 ->shared_cpu_map);
	free_percpu(acpi_perf_data);
}

522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
static int boost_notify(struct notifier_block *nb, unsigned long action,
		      void *hcpu)
{
	unsigned cpu = (long)hcpu;
	const struct cpumask *cpumask;

	cpumask = get_cpu_mask(cpu);

	/*
	 * Clear the boost-disable bit on the CPU_DOWN path so that
	 * this cpu cannot block the remaining ones from boosting. On
	 * the CPU_UP path we simply keep the boost-disable flag in
	 * sync with the current global state.
	 */

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
540
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
		break;

	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		boost_set_msrs(1, cpumask);
		break;

	default:
		break;
	}

	return NOTIFY_OK;
}


static struct notifier_block boost_nb = {
	.notifier_call          = boost_notify,
};

560 561 562 563 564 565 566 567
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
568
static int __init acpi_cpufreq_early_init(void)
569
{
570
	unsigned int i;
571
	pr_debug("acpi_cpufreq_early_init\n");
572

573 574
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
575
		pr_debug("Memory allocation error for acpi_perf_data.\n");
576
		return -ENOMEM;
577
	}
578
	for_each_possible_cpu(i) {
579
		if (!zalloc_cpumask_var_node(
580 581
			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
			GFP_KERNEL, cpu_to_node(i))) {
582 583 584 585 586 587

			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
			free_acpi_perf_data();
			return -ENOMEM;
		}
	}
588 589

	/* Do initialization in ACPI core */
590 591
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
592 593
}

594
#ifdef CONFIG_SMP
595 596 597 598 599 600 601 602
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

603
static int sw_any_bug_found(const struct dmi_system_id *d)
604 605 606 607 608
{
	bios_with_sw_any_bug = 1;
	return 0;
}

609
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
610 611 612 613 614 615 616 617 618 619 620
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
621 622 623

static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
624 625
	/* Intel Xeon Processor 7100 Series Specification Update
	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
626 627
	 * AL30: A Machine Check Exception (MCE) Occurring during an
	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
628
	 * Both Processor Cores to Lock Up. */
629 630 631
	if (c->x86_vendor == X86_VENDOR_INTEL) {
		if ((c->x86 == 15) &&
		    (c->x86_model == 6) &&
632 633 634 635 636
		    (c->x86_mask == 8)) {
			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
			    "Xeon(R) 7100 Errata AL30, processors may "
			    "lock up on frequency changes: disabling "
			    "acpi-cpufreq.\n");
637
			return -ENODEV;
638
		    }
639 640 641
		}
	return 0;
}
642
#endif
643

644
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
645
{
646 647 648 649 650
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
651
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
652
	struct acpi_processor_performance *perf;
653 654 655
#ifdef CONFIG_SMP
	static int blacklisted;
#endif
L
Linus Torvalds 已提交
656

657
	pr_debug("acpi_cpufreq_cpu_init\n");
L
Linus Torvalds 已提交
658

659
#ifdef CONFIG_SMP
660 661 662 663 664
	if (blacklisted)
		return blacklisted;
	blacklisted = acpi_cpufreq_blacklist(c);
	if (blacklisted)
		return blacklisted;
665 666
#endif

667
	data = kzalloc(sizeof(*data), GFP_KERNEL);
L
Linus Torvalds 已提交
668
	if (!data)
669
		return -ENOMEM;
L
Linus Torvalds 已提交
670

671 672 673 674 675
	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
		result = -ENOMEM;
		goto err_free;
	}

676
	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
677
	per_cpu(acfreq_data, cpu) = data;
L
Linus Torvalds 已提交
678

679
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
680
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
681

682
	result = acpi_processor_register_performance(data->acpi_data, cpu);
L
Linus Torvalds 已提交
683
	if (result)
684
		goto err_free_mask;
L
Linus Torvalds 已提交
685

686 687
	perf = data->acpi_data;
	policy->shared_type = perf->shared_type;
688

689
	/*
690
	 * Will let policy->cpus know about dependency only when software
691 692 693
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
694
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
695
		cpumask_copy(policy->cpus, perf->shared_cpu_map);
696
	}
697
	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
698 699 700

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
701
	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
702
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
703
		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
704
	}
705 706 707 708

	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
		cpumask_clear(policy->cpus);
		cpumask_set_cpu(cpu, policy->cpus);
709
		cpumask_copy(data->freqdomain_cpus, cpu_sibling_mask(cpu));
710 711 712
		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
	}
713
#endif
714

L
Linus Torvalds 已提交
715
	/* capability check */
716
	if (perf->state_count <= 1) {
717
		pr_debug("No P-States\n");
L
Linus Torvalds 已提交
718 719 720
		result = -ENODEV;
		goto err_unreg;
	}
721

722 723 724 725 726 727
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
728
	case ACPI_ADR_SPACE_SYSTEM_IO:
729 730 731 732 733 734
		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		    boot_cpu_data.x86 == 0xf) {
			pr_debug("AMD K8 systems must use native drivers.\n");
			result = -ENODEV;
			goto err_unreg;
		}
735
		pr_debug("SYSTEM IO addr space\n");
736 737
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
738
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
739
		pr_debug("HARDWARE addr space\n");
740 741 742
		if (check_est_cpu(cpu)) {
			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
			break;
743
		}
744 745 746 747 748 749
		if (check_amd_hwpstate_cpu(cpu)) {
			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
			break;
		}
		result = -ENODEV;
		goto err_unreg;
750
	default:
751
		pr_debug("Unknown addr space %d\n",
752
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
753 754 755 756
		result = -ENODEV;
		goto err_unreg;
	}

757
	data->freq_table = kzalloc(sizeof(*data->freq_table) *
758
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
759 760 761 762 763 764 765
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
766
	for (i = 0; i < perf->state_count; i++) {
767 768 769 770
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
771 772
	}

773 774 775 776
	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
	    policy->cpuinfo.transition_latency > 20 * 1000) {
		policy->cpuinfo.transition_latency = 20 * 1000;
777 778
		printk_once(KERN_INFO
			    "P-state transition latency capped at 20 uS\n");
779 780
	}

L
Linus Torvalds 已提交
781
	/* table init */
782 783
	for (i = 0; i < perf->state_count; i++) {
		if (i > 0 && perf->states[i].core_frequency >=
784
		    data->freq_table[valid_states-1].frequency / 1000)
785 786
			continue;

787
		data->freq_table[valid_states].driver_data = i;
788
		data->freq_table[valid_states].frequency =
789
		    perf->states[i].core_frequency * 1000;
790
		valid_states++;
L
Linus Torvalds 已提交
791
	}
792
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
793
	perf->state = 0;
L
Linus Torvalds 已提交
794

795
	result = cpufreq_table_validate_and_show(policy, data->freq_table);
796
	if (result)
L
Linus Torvalds 已提交
797 798
		goto err_freqfree;

799 800 801
	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

802
	switch (perf->control_register.space_id) {
803
	case ACPI_ADR_SPACE_SYSTEM_IO:
804 805 806 807 808 809
		/*
		 * The core will not set policy->cur, because
		 * cpufreq_driver->get is NULL, so we need to set it here.
		 * However, we have to guess it, because the current speed is
		 * unknown and not detectable via IO ports.
		 */
810 811
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
812
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
813
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
814
		break;
815
	default:
816 817 818
		break;
	}

L
Linus Torvalds 已提交
819 820 821
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

822
	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
823
	for (i = 0; i < perf->state_count; i++)
824
		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
825
			(i == perf->state ? '*' : ' '), i,
826 827 828
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
829

830 831 832 833 834
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
835

836
	return result;
L
Linus Torvalds 已提交
837

838
err_freqfree:
L
Linus Torvalds 已提交
839
	kfree(data->freq_table);
840
err_unreg:
841
	acpi_processor_unregister_performance(perf, cpu);
842 843
err_free_mask:
	free_cpumask_var(data->freqdomain_cpus);
844
err_free:
L
Linus Torvalds 已提交
845
	kfree(data);
846
	per_cpu(acfreq_data, cpu) = NULL;
L
Linus Torvalds 已提交
847

848
	return result;
L
Linus Torvalds 已提交
849 850
}

851
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
852
{
853
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
L
Linus Torvalds 已提交
854

855
	pr_debug("acpi_cpufreq_cpu_exit\n");
L
Linus Torvalds 已提交
856 857

	if (data) {
858
		per_cpu(acfreq_data, policy->cpu) = NULL;
859 860
		acpi_processor_unregister_performance(data->acpi_data,
						      policy->cpu);
861
		free_cpumask_var(data->freqdomain_cpus);
862
		kfree(data->freq_table);
L
Linus Torvalds 已提交
863 864 865
		kfree(data);
	}

866
	return 0;
L
Linus Torvalds 已提交
867 868
}

869
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
870
{
871
	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
L
Linus Torvalds 已提交
872

873
	pr_debug("acpi_cpufreq_resume\n");
L
Linus Torvalds 已提交
874 875 876

	data->resume = 1;

877
	return 0;
L
Linus Torvalds 已提交
878 879
}

880
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
881
	&cpufreq_freq_attr_scaling_available_freqs,
882
	&freqdomain_cpus,
883
	NULL,	/* this is a placeholder for cpb, do not remove */
L
Linus Torvalds 已提交
884 885 886 887
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
888
	.verify		= cpufreq_generic_frequency_table_verify,
889
	.target_index	= acpi_cpufreq_target,
890 891 892 893 894 895
	.bios_limit	= acpi_processor_get_bios_limit,
	.init		= acpi_cpufreq_cpu_init,
	.exit		= acpi_cpufreq_cpu_exit,
	.resume		= acpi_cpufreq_resume,
	.name		= "acpi-cpufreq",
	.attr		= acpi_cpufreq_attr,
896
	.set_boost      = _store_boost,
L
Linus Torvalds 已提交
897 898
};

899 900 901 902 903 904 905 906
static void __init acpi_cpufreq_boost_init(void)
{
	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
		msrs = msrs_alloc();

		if (!msrs)
			return;

907 908
		acpi_cpufreq_driver.boost_supported = true;
		acpi_cpufreq_driver.boost_enabled = boost_state(0);
909 910

		cpu_notifier_register_begin();
911 912

		/* Force all MSRs to the same value */
913 914
		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
			       cpu_online_mask);
915

916
		__register_cpu_notifier(&boost_nb);
917

918
		cpu_notifier_register_done();
919
	}
920 921
}

922
static void acpi_cpufreq_boost_exit(void)
923 924 925 926 927 928 929 930 931
{
	if (msrs) {
		unregister_cpu_notifier(&boost_nb);

		msrs_free(msrs);
		msrs = NULL;
	}
}

932
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
933
{
934 935
	int ret;

936 937 938
	if (acpi_disabled)
		return -ENODEV;

939 940
	/* don't keep reloading if cpufreq_driver exists */
	if (cpufreq_get_current_driver())
941
		return -EEXIST;
942

943
	pr_debug("acpi_cpufreq_init\n");
L
Linus Torvalds 已提交
944

945 946 947
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
948

949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
	/* this is a sysfs file with a strange name and an even stranger
	 * semantic - per CPU instantiation, but system global effect.
	 * Lets enable it only on AMD CPUs for compatibility reasons and
	 * only if configured. This is considered legacy code, which
	 * will probably be removed at some point in the future.
	 */
	if (check_amd_hwpstate_cpu(0)) {
		struct freq_attr **iter;

		pr_debug("adding sysfs entry for cpb\n");

		for (iter = acpi_cpufreq_attr; *iter != NULL; iter++)
			;

		/* make sure there is a terminator behind it */
		if (iter[1] == NULL)
			*iter = &cpb;
	}
#endif
969
	acpi_cpufreq_boost_init();
970

971
	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
972
	if (ret) {
973
		free_acpi_perf_data();
974 975
		acpi_cpufreq_boost_exit();
	}
976
	return ret;
L
Linus Torvalds 已提交
977 978
}

979
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
980
{
981
	pr_debug("acpi_cpufreq_exit\n");
L
Linus Torvalds 已提交
982

983 984
	acpi_cpufreq_boost_exit();

L
Linus Torvalds 已提交
985 986
	cpufreq_unregister_driver(&acpi_cpufreq_driver);

987
	free_acpi_perf_data();
L
Linus Torvalds 已提交
988 989
}

990
module_param(acpi_pstate_strict, uint, 0644);
991
MODULE_PARM_DESC(acpi_pstate_strict,
992 993
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
994 995 996 997

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

998 999 1000 1001 1002 1003 1004
static const struct x86_cpu_id acpi_cpufreq_ids[] = {
	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

1005 1006 1007 1008 1009 1010 1011
static const struct acpi_device_id processor_device_ids[] = {
	{ACPI_PROCESSOR_OBJECT_HID, },
	{ACPI_PROCESSOR_DEVICE_HID, },
	{},
};
MODULE_DEVICE_TABLE(acpi, processor_device_ids);

L
Linus Torvalds 已提交
1012
MODULE_ALIAS("acpi");