acpi-cpufreq.c 19.8 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $)
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
L
Linus Torvalds 已提交
36 37 38 39

#include <linux/acpi.h>
#include <acpi/processor.h>

40
#include <asm/io.h>
41
#include <asm/msr.h>
42 43 44 45 46
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/delay.h>
#include <asm/uaccess.h>

L
Linus Torvalds 已提交
47 48 49 50 51 52
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)

MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54 55 56 57 58 59
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
60
#define CPUID_6_ECX_APERFMPERF_CAPABILITY	(0x1)
61

62
struct acpi_cpufreq_data {
63 64
	struct acpi_processor_performance *acpi_data;
	struct cpufreq_frequency_table *freq_table;
65
	unsigned int max_freq;
66 67
	unsigned int resume;
	unsigned int cpu_feature;
L
Linus Torvalds 已提交
68 69
};

70 71
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);

72 73
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
L
Linus Torvalds 已提交
74 75 76

static struct cpufreq_driver acpi_cpufreq_driver;

77 78
static unsigned int acpi_pstate_strict;

79 80
static int check_est_cpu(unsigned int cpuid)
{
81
	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
82 83

	if (cpu->x86_vendor != X86_VENDOR_INTEL ||
84
	    !cpu_has(cpu, X86_FEATURE_EST))
85 86 87 88 89 90
		return 0;

	return 1;
}

static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
91
{
92 93
	struct acpi_processor_performance *perf;
	int i;
94 95 96

	perf = data->acpi_data;

97
	for (i=0; i<perf->state_count; i++) {
98 99 100 101 102 103
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

104 105 106
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
	int i;
107
	struct acpi_processor_performance *perf;
108 109

	msr &= INTEL_MSR_RANGE;
110 111
	perf = data->acpi_data;

112
	for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
113
		if (msr == perf->states[data->freq_table[i].index].status)
114 115 116 117 118 119 120 121
			return data->freq_table[i].frequency;
	}
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
122
	case SYSTEM_INTEL_MSR_CAPABLE:
123
		return extract_msr(val, data);
124
	case SYSTEM_IO_CAPABLE:
125
		return extract_io(val, data);
126
	default:
127 128 129 130 131 132 133 134
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

135 136 137 138 139
struct io_addr {
	u16 port;
	u8 bit_width;
};

140 141 142 143 144
typedef union {
	struct msr_addr msr;
	struct io_addr io;
} drv_addr_union;

145
struct drv_cmd {
146
	unsigned int type;
147
	cpumask_t mask;
148
	drv_addr_union addr;
149 150 151 152
	u32 val;
};

static void do_drv_read(struct drv_cmd *cmd)
L
Linus Torvalds 已提交
153
{
154 155 156
	u32 h;

	switch (cmd->type) {
157
	case SYSTEM_INTEL_MSR_CAPABLE:
158 159
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
160
	case SYSTEM_IO_CAPABLE:
161 162 163
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
164
		break;
165
	default:
166 167
		break;
	}
168
}
L
Linus Torvalds 已提交
169

170 171
static void do_drv_write(struct drv_cmd *cmd)
{
172
	u32 lo, hi;
173 174

	switch (cmd->type) {
175
	case SYSTEM_INTEL_MSR_CAPABLE:
176 177 178
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
179
		break;
180
	case SYSTEM_IO_CAPABLE:
181 182 183
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
184
		break;
185
	default:
186 187
		break;
	}
188
}
L
Linus Torvalds 已提交
189

190
static void drv_read(struct drv_cmd *cmd)
191
{
192
	cpumask_t saved_mask = current->cpus_allowed;
193 194
	cmd->val = 0;

195
	set_cpus_allowed_ptr(current, &cmd->mask);
196
	do_drv_read(cmd);
197
	set_cpus_allowed_ptr(current, &saved_mask);
198 199 200 201
}

static void drv_write(struct drv_cmd *cmd)
{
202 203
	cpumask_t saved_mask = current->cpus_allowed;
	unsigned int i;
204 205

	for_each_cpu_mask(i, cmd->mask) {
206
		set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
207
		do_drv_write(cmd);
L
Linus Torvalds 已提交
208 209
	}

210
	set_cpus_allowed_ptr(current, &saved_mask);
211 212
	return;
}
L
Linus Torvalds 已提交
213

214
static u32 get_cur_val(const cpumask_t *mask)
215
{
216 217
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
218

219
	if (unlikely(cpus_empty(*mask)))
220
		return 0;
L
Linus Torvalds 已提交
221

222
	switch (per_cpu(drv_data, first_cpu(*mask))->cpu_feature) {
223 224 225 226 227 228
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
		break;
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
229
		perf = per_cpu(drv_data, first_cpu(*mask))->acpi_data;
230 231 232 233 234 235 236
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

237
	cmd.mask = *mask;
L
Linus Torvalds 已提交
238

239
	drv_read(&cmd);
L
Linus Torvalds 已提交
240

241 242 243 244
	dprintk("get_cur_val = %u\n", cmd.val);

	return cmd.val;
}
L
Linus Torvalds 已提交
245

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
/*
 * Return the measured active (C0) frequency on this CPU since last call
 * to this function.
 * Input: cpu number
 * Return: Average CPU frequency in terms of max frequency (zero on error)
 *
 * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
 * over a period of time, while CPU is in C0 state.
 * IA32_MPERF counts at the rate of max advertised frequency
 * IA32_APERF counts at the rate of actual CPU frequency
 * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
 * no meaning should be associated with absolute values of these MSRs.
 */
static unsigned int get_measured_perf(unsigned int cpu)
{
	union {
		struct {
			u32 lo;
			u32 hi;
		} split;
		u64 whole;
	} aperf_cur, mperf_cur;

	cpumask_t saved_mask;
	unsigned int perf_percent;
	unsigned int retval;

	saved_mask = current->cpus_allowed;
274
	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
	if (get_cpu() != cpu) {
		/* We were not able to run on requested processor */
		put_cpu();
		return 0;
	}

	rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
	rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);

	wrmsr(MSR_IA32_APERF, 0,0);
	wrmsr(MSR_IA32_MPERF, 0,0);

#ifdef __i386__
	/*
	 * We dont want to do 64 bit divide with 32 bit kernel
	 * Get an approximate value. Return failure in case we cannot get
	 * an approximate value.
	 */
	if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) {
		int shift_count;
		u32 h;

		h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi);
		shift_count = fls(h);

		aperf_cur.whole >>= shift_count;
		mperf_cur.whole >>= shift_count;
	}

	if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) {
		int shift_count = 7;
		aperf_cur.split.lo >>= shift_count;
		mperf_cur.split.lo >>= shift_count;
	}

310
	if (aperf_cur.split.lo && mperf_cur.split.lo)
311
		perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo;
312
	else
313 314 315 316 317 318 319 320 321
		perf_percent = 0;

#else
	if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
		int shift_count = 7;
		aperf_cur.whole >>= shift_count;
		mperf_cur.whole >>= shift_count;
	}

322
	if (aperf_cur.whole && mperf_cur.whole)
323
		perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole;
324
	else
325 326 327 328
		perf_percent = 0;

#endif

329
	retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100;
330 331

	put_cpu();
332
	set_cpus_allowed_ptr(current, &saved_mask);
333 334 335 336 337

	dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
	return retval;
}

338 339
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
340
	struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
341
	unsigned int freq;
342
	unsigned int cached_freq;
343 344 345 346

	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);

	if (unlikely(data == NULL ||
347
		     data->acpi_data == NULL || data->freq_table == NULL)) {
348
		return 0;
L
Linus Torvalds 已提交
349 350
	}

351
	cached_freq = data->freq_table[data->acpi_data->state].frequency;
352
	freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
353 354 355 356 357 358 359 360
	if (freq != cached_freq) {
		/*
		 * The dreaded BIOS frequency change behind our back.
		 * Force set the frequency on next target call.
		 */
		data->resume = 1;
	}

361
	dprintk("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
362

363
	return freq;
L
Linus Torvalds 已提交
364 365
}

366
static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq,
367
				struct acpi_cpufreq_data *data)
368
{
369 370
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
371

372
	for (i=0; i<100; i++) {
373 374 375 376 377 378 379 380 381
		cur_freq = extract_freq(get_cur_val(mask), data);
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
382
			       unsigned int target_freq, unsigned int relation)
L
Linus Torvalds 已提交
383
{
384
	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
385 386 387 388
	struct acpi_processor_performance *perf;
	struct cpufreq_freqs freqs;
	cpumask_t online_policy_cpus;
	struct drv_cmd cmd;
389 390
	unsigned int next_state = 0; /* Index into freq_table */
	unsigned int next_perf_state = 0; /* Index into perf table */
391 392
	unsigned int i;
	int result = 0;
393 394 395 396

	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);

	if (unlikely(data == NULL ||
397
	     data->acpi_data == NULL || data->freq_table == NULL)) {
398 399
		return -ENODEV;
	}
L
Linus Torvalds 已提交
400

401
	perf = data->acpi_data;
L
Linus Torvalds 已提交
402
	result = cpufreq_frequency_table_target(policy,
403 404 405
						data->freq_table,
						target_freq,
						relation, &next_state);
406
	if (unlikely(result))
407
		return -ENODEV;
408

409
#ifdef CONFIG_HOTPLUG_CPU
410 411
	/* cpufreq holds the hotplug lock, so we are safe from here on */
	cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
412 413 414
#else
	online_policy_cpus = policy->cpus;
#endif
L
Linus Torvalds 已提交
415

416
	next_perf_state = data->freq_table[next_state].index;
417
	if (perf->state == next_perf_state) {
418
		if (unlikely(data->resume)) {
419 420
			dprintk("Called after resume, resetting to P%d\n",
				next_perf_state);
421 422
			data->resume = 0;
		} else {
423 424
			dprintk("Already at target state (P%d)\n",
				next_perf_state);
425 426
			return 0;
		}
427 428
	}

429 430 431 432
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
433
		cmd.val = (u32) perf->states[next_perf_state].control;
434 435 436 437 438 439 440 441 442 443
		break;
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
		return -ENODEV;
	}
444

445
	cpus_clear(cmd.mask);
446

447 448 449 450
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
		cmd.mask = online_policy_cpus;
	else
		cpu_set(policy->cpu, cmd.mask);
451

452 453
	freqs.old = perf->states[perf->state].core_frequency * 1000;
	freqs.new = data->freq_table[next_state].frequency;
454 455 456
	for_each_cpu_mask(i, cmd.mask) {
		freqs.cpu = i;
		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
457
	}
L
Linus Torvalds 已提交
458

459
	drv_write(&cmd);
460

461
	if (acpi_pstate_strict) {
462
		if (!check_freqs(&cmd.mask, freqs.new, data)) {
463
			dprintk("acpi_cpufreq_target failed (%d)\n",
464
				policy->cpu);
465
			return -EAGAIN;
466 467 468
		}
	}

469 470 471 472 473 474 475
	for_each_cpu_mask(i, cmd.mask) {
		freqs.cpu = i;
		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
	}
	perf->state = next_perf_state;

	return result;
L
Linus Torvalds 已提交
476 477
}

478
static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
479
{
480
	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
L
Linus Torvalds 已提交
481 482 483

	dprintk("acpi_cpufreq_verify\n");

484
	return cpufreq_frequency_table_verify(policy, data->freq_table);
L
Linus Torvalds 已提交
485 486 487
}

static unsigned long
488
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
489
{
490
	struct acpi_processor_performance *perf = data->acpi_data;
491

L
Linus Torvalds 已提交
492 493 494 495
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
496
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
497

498
		for (i=0; i<(perf->state_count-1); i++) {
L
Linus Torvalds 已提交
499
			freq = freqn;
500
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
501
			if ((2 * cpu_khz) > (freqn + freq)) {
502
				perf->state = i;
503
				return freq;
L
Linus Torvalds 已提交
504 505
			}
		}
506
		perf->state = perf->state_count-1;
507
		return freqn;
508
	} else {
L
Linus Torvalds 已提交
509
		/* assume CPU is at P0... */
510 511 512
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
513 514
}

515 516 517 518 519 520 521 522
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
523
static int __init acpi_cpufreq_early_init(void)
524 525 526
{
	dprintk("acpi_cpufreq_early_init\n");

527 528 529 530
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
		dprintk("Memory allocation error for acpi_perf_data.\n");
		return -ENOMEM;
531 532 533
	}

	/* Do initialization in ACPI core */
534 535
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
536 537
}

538
#ifdef CONFIG_SMP
539 540 541 542 543 544 545 546
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

547
static int sw_any_bug_found(const struct dmi_system_id *d)
548 549 550 551 552
{
	bios_with_sw_any_bug = 1;
	return 0;
}

553
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
554 555 556 557 558 559 560 561 562 563 564
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
565
#endif
566

567
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
568
{
569 570 571 572 573
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
574
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
575
	struct acpi_processor_performance *perf;
L
Linus Torvalds 已提交
576 577 578

	dprintk("acpi_cpufreq_cpu_init\n");

579
	data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
L
Linus Torvalds 已提交
580
	if (!data)
581
		return -ENOMEM;
L
Linus Torvalds 已提交
582

583
	data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
584
	per_cpu(drv_data, cpu) = data;
L
Linus Torvalds 已提交
585

586
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
587
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
588

589
	result = acpi_processor_register_performance(data->acpi_data, cpu);
L
Linus Torvalds 已提交
590 591 592
	if (result)
		goto err_free;

593 594
	perf = data->acpi_data;
	policy->shared_type = perf->shared_type;
595

596
	/*
597
	 * Will let policy->cpus know about dependency only when software
598 599 600
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
601
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
602
		policy->cpus = perf->shared_cpu_map;
603
	}
604
	policy->related_cpus = perf->shared_cpu_map;
605 606 607 608 609

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
610
		policy->cpus = per_cpu(cpu_core_map, cpu);
611 612
	}
#endif
613

L
Linus Torvalds 已提交
614
	/* capability check */
615
	if (perf->state_count <= 1) {
L
Linus Torvalds 已提交
616 617 618 619
		dprintk("No P-States\n");
		result = -ENODEV;
		goto err_unreg;
	}
620

621 622 623 624 625 626
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
627
	case ACPI_ADR_SPACE_SYSTEM_IO:
628
		dprintk("SYSTEM IO addr space\n");
629 630
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
631
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
632 633 634 635 636 637
		dprintk("HARDWARE addr space\n");
		if (!check_est_cpu(cpu)) {
			result = -ENODEV;
			goto err_unreg;
		}
		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
638
		break;
639
	default:
640
		dprintk("Unknown addr space %d\n",
641
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
642 643 644 645
		result = -ENODEV;
		goto err_unreg;
	}

646 647
	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
648 649 650 651 652 653 654
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
655
	for (i=0; i<perf->state_count; i++) {
656 657 658 659
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
660 661
	}

662
	data->max_freq = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
663
	/* table init */
664
	for (i=0; i<perf->state_count; i++) {
665 666
		if (i>0 && perf->states[i].core_frequency >=
		    data->freq_table[valid_states-1].frequency / 1000)
667 668 669 670
			continue;

		data->freq_table[valid_states].index = i;
		data->freq_table[valid_states].frequency =
671
		    perf->states[i].core_frequency * 1000;
672
		valid_states++;
L
Linus Torvalds 已提交
673
	}
674
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
675
	perf->state = 0;
L
Linus Torvalds 已提交
676 677

	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
678
	if (result)
L
Linus Torvalds 已提交
679 680
		goto err_freqfree;

681
	switch (perf->control_register.space_id) {
682
	case ACPI_ADR_SPACE_SYSTEM_IO:
683 684 685
		/* Current speed is unknown and not detectable by IO port */
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
686
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
687
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
688
		policy->cur = get_cur_freq_on_cpu(cpu);
689
		break;
690
	default:
691 692 693
		break;
	}

L
Linus Torvalds 已提交
694 695 696
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

697 698 699 700
	/* Check for APERF/MPERF support in hardware */
	if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
		unsigned int ecx;
		ecx = cpuid_ecx(6);
701
		if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
702 703 704
			acpi_cpufreq_driver.getavg = get_measured_perf;
	}

705
	dprintk("CPU%u - ACPI performance management activated.\n", cpu);
706
	for (i = 0; i < perf->state_count; i++)
L
Linus Torvalds 已提交
707
		dprintk("     %cP%d: %d MHz, %d mW, %d uS\n",
708
			(i == perf->state ? '*' : ' '), i,
709 710 711
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
712 713

	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
714

715 716 717 718 719
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
720

721
	return result;
L
Linus Torvalds 已提交
722

723
err_freqfree:
L
Linus Torvalds 已提交
724
	kfree(data->freq_table);
725
err_unreg:
726
	acpi_processor_unregister_performance(perf, cpu);
727
err_free:
L
Linus Torvalds 已提交
728
	kfree(data);
729
	per_cpu(drv_data, cpu) = NULL;
L
Linus Torvalds 已提交
730

731
	return result;
L
Linus Torvalds 已提交
732 733
}

734
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
735
{
736
	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
L
Linus Torvalds 已提交
737 738 739 740 741

	dprintk("acpi_cpufreq_cpu_exit\n");

	if (data) {
		cpufreq_frequency_table_put_attr(policy->cpu);
742
		per_cpu(drv_data, policy->cpu) = NULL;
743 744
		acpi_processor_unregister_performance(data->acpi_data,
						      policy->cpu);
L
Linus Torvalds 已提交
745 746 747
		kfree(data);
	}

748
	return 0;
L
Linus Torvalds 已提交
749 750
}

751
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
752
{
753
	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
L
Linus Torvalds 已提交
754 755 756 757 758

	dprintk("acpi_cpufreq_resume\n");

	data->resume = 1;

759
	return 0;
L
Linus Torvalds 已提交
760 761
}

762
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
763 764 765 766 767
	&cpufreq_freq_attr_scaling_available_freqs,
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
768 769 770 771 772 773 774 775
	.verify = acpi_cpufreq_verify,
	.target = acpi_cpufreq_target,
	.init = acpi_cpufreq_cpu_init,
	.exit = acpi_cpufreq_cpu_exit,
	.resume = acpi_cpufreq_resume,
	.name = "acpi-cpufreq",
	.owner = THIS_MODULE,
	.attr = acpi_cpufreq_attr,
L
Linus Torvalds 已提交
776 777
};

778
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
779
{
780 781
	int ret;

L
Linus Torvalds 已提交
782 783
	dprintk("acpi_cpufreq_init\n");

784 785 786
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
787

788
	return cpufreq_register_driver(&acpi_cpufreq_driver);
L
Linus Torvalds 已提交
789 790
}

791
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
792 793 794 795 796
{
	dprintk("acpi_cpufreq_exit\n");

	cpufreq_unregister_driver(&acpi_cpufreq_driver);

797 798
	free_percpu(acpi_perf_data);

L
Linus Torvalds 已提交
799 800 801
	return;
}

802
module_param(acpi_pstate_strict, uint, 0644);
803
MODULE_PARM_DESC(acpi_pstate_strict,
804 805
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
806 807 808 809 810

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

MODULE_ALIAS("acpi");