acpi-cpufreq.c 19.4 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $)
L
Linus Torvalds 已提交
3 4 5 6
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7
 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
31 32
#include <linux/smp.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
33
#include <linux/cpufreq.h>
34
#include <linux/compiler.h>
35
#include <linux/dmi.h>
L
Linus Torvalds 已提交
36 37 38 39

#include <linux/acpi.h>
#include <acpi/processor.h>

40
#include <asm/io.h>
41
#include <asm/msr.h>
42 43 44 45 46
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/delay.h>
#include <asm/uaccess.h>

L
Linus Torvalds 已提交
47 48 49 50 51 52
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)

MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");

53 54 55 56 57 58 59
enum {
	UNDEFINED_CAPABLE = 0,
	SYSTEM_INTEL_MSR_CAPABLE,
	SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE		(0xffff)
60
#define CPUID_6_ECX_APERFMPERF_CAPABILITY	(0x1)
61

62
struct acpi_cpufreq_data {
63 64
	struct acpi_processor_performance *acpi_data;
	struct cpufreq_frequency_table *freq_table;
65
	unsigned int max_freq;
66 67
	unsigned int resume;
	unsigned int cpu_feature;
L
Linus Torvalds 已提交
68 69
};

70
static struct acpi_cpufreq_data *drv_data[NR_CPUS];
71 72
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
L
Linus Torvalds 已提交
73 74 75

static struct cpufreq_driver acpi_cpufreq_driver;

76 77
static unsigned int acpi_pstate_strict;

78 79 80 81 82
static int check_est_cpu(unsigned int cpuid)
{
	struct cpuinfo_x86 *cpu = &cpu_data[cpuid];

	if (cpu->x86_vendor != X86_VENDOR_INTEL ||
83
	    !cpu_has(cpu, X86_FEATURE_EST))
84 85 86 87 88 89
		return 0;

	return 1;
}

static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
90
{
91 92
	struct acpi_processor_performance *perf;
	int i;
93 94 95

	perf = data->acpi_data;

96
	for (i=0; i<perf->state_count; i++) {
97 98 99 100 101 102
		if (value == perf->states[i].status)
			return data->freq_table[i].frequency;
	}
	return 0;
}

103 104 105
static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
{
	int i;
106
	struct acpi_processor_performance *perf;
107 108

	msr &= INTEL_MSR_RANGE;
109 110
	perf = data->acpi_data;

111
	for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
112
		if (msr == perf->states[data->freq_table[i].index].status)
113 114 115 116 117 118 119 120
			return data->freq_table[i].frequency;
	}
	return data->freq_table[0].frequency;
}

static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
	switch (data->cpu_feature) {
121
	case SYSTEM_INTEL_MSR_CAPABLE:
122
		return extract_msr(val, data);
123
	case SYSTEM_IO_CAPABLE:
124
		return extract_io(val, data);
125
	default:
126 127 128 129 130 131 132 133
		return 0;
	}
}

struct msr_addr {
	u32 reg;
};

134 135 136 137 138
struct io_addr {
	u16 port;
	u8 bit_width;
};

139 140 141 142 143
typedef union {
	struct msr_addr msr;
	struct io_addr io;
} drv_addr_union;

144
struct drv_cmd {
145
	unsigned int type;
146
	cpumask_t mask;
147
	drv_addr_union addr;
148 149 150 151
	u32 val;
};

static void do_drv_read(struct drv_cmd *cmd)
L
Linus Torvalds 已提交
152
{
153 154 155
	u32 h;

	switch (cmd->type) {
156
	case SYSTEM_INTEL_MSR_CAPABLE:
157 158
		rdmsr(cmd->addr.msr.reg, cmd->val, h);
		break;
159
	case SYSTEM_IO_CAPABLE:
160 161 162
		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
				&cmd->val,
				(u32)cmd->addr.io.bit_width);
163
		break;
164
	default:
165 166
		break;
	}
167
}
L
Linus Torvalds 已提交
168

169 170
static void do_drv_write(struct drv_cmd *cmd)
{
171
	u32 lo, hi;
172 173

	switch (cmd->type) {
174
	case SYSTEM_INTEL_MSR_CAPABLE:
175 176 177
		rdmsr(cmd->addr.msr.reg, lo, hi);
		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
		wrmsr(cmd->addr.msr.reg, lo, hi);
178
		break;
179
	case SYSTEM_IO_CAPABLE:
180 181 182
		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
				cmd->val,
				(u32)cmd->addr.io.bit_width);
183
		break;
184
	default:
185 186
		break;
	}
187
}
L
Linus Torvalds 已提交
188

189
static void drv_read(struct drv_cmd *cmd)
190
{
191
	cpumask_t saved_mask = current->cpus_allowed;
192 193 194 195 196 197 198 199 200
	cmd->val = 0;

	set_cpus_allowed(current, cmd->mask);
	do_drv_read(cmd);
	set_cpus_allowed(current, saved_mask);
}

static void drv_write(struct drv_cmd *cmd)
{
201 202
	cpumask_t saved_mask = current->cpus_allowed;
	unsigned int i;
203 204 205 206

	for_each_cpu_mask(i, cmd->mask) {
		set_cpus_allowed(current, cpumask_of_cpu(i));
		do_drv_write(cmd);
L
Linus Torvalds 已提交
207 208
	}

209 210 211
	set_cpus_allowed(current, saved_mask);
	return;
}
L
Linus Torvalds 已提交
212

213 214
static u32 get_cur_val(cpumask_t mask)
{
215 216
	struct acpi_processor_performance *perf;
	struct drv_cmd cmd;
L
Linus Torvalds 已提交
217

218 219
	if (unlikely(cpus_empty(mask)))
		return 0;
L
Linus Torvalds 已提交
220

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
	switch (drv_data[first_cpu(mask)]->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
		break;
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		perf = drv_data[first_cpu(mask)]->acpi_data;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		break;
	default:
		return 0;
	}

236
	cmd.mask = mask;
L
Linus Torvalds 已提交
237

238
	drv_read(&cmd);
L
Linus Torvalds 已提交
239

240 241 242 243
	dprintk("get_cur_val = %u\n", cmd.val);

	return cmd.val;
}
L
Linus Torvalds 已提交
244

245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
/*
 * Return the measured active (C0) frequency on this CPU since last call
 * to this function.
 * Input: cpu number
 * Return: Average CPU frequency in terms of max frequency (zero on error)
 *
 * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
 * over a period of time, while CPU is in C0 state.
 * IA32_MPERF counts at the rate of max advertised frequency
 * IA32_APERF counts at the rate of actual CPU frequency
 * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
 * no meaning should be associated with absolute values of these MSRs.
 */
static unsigned int get_measured_perf(unsigned int cpu)
{
	union {
		struct {
			u32 lo;
			u32 hi;
		} split;
		u64 whole;
	} aperf_cur, mperf_cur;

	cpumask_t saved_mask;
	unsigned int perf_percent;
	unsigned int retval;

	saved_mask = current->cpus_allowed;
	set_cpus_allowed(current, cpumask_of_cpu(cpu));
	if (get_cpu() != cpu) {
		/* We were not able to run on requested processor */
		put_cpu();
		return 0;
	}

	rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
	rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);

	wrmsr(MSR_IA32_APERF, 0,0);
	wrmsr(MSR_IA32_MPERF, 0,0);

#ifdef __i386__
	/*
	 * We dont want to do 64 bit divide with 32 bit kernel
	 * Get an approximate value. Return failure in case we cannot get
	 * an approximate value.
	 */
	if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) {
		int shift_count;
		u32 h;

		h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi);
		shift_count = fls(h);

		aperf_cur.whole >>= shift_count;
		mperf_cur.whole >>= shift_count;
	}

	if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) {
		int shift_count = 7;
		aperf_cur.split.lo >>= shift_count;
		mperf_cur.split.lo >>= shift_count;
	}

309
	if (aperf_cur.split.lo && mperf_cur.split.lo)
310
		perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo;
311
	else
312 313 314 315 316 317 318 319 320
		perf_percent = 0;

#else
	if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
		int shift_count = 7;
		aperf_cur.whole >>= shift_count;
		mperf_cur.whole >>= shift_count;
	}

321
	if (aperf_cur.whole && mperf_cur.whole)
322
		perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole;
323
	else
324 325 326 327 328 329 330 331 332 333 334 335 336
		perf_percent = 0;

#endif

	retval = drv_data[cpu]->max_freq * perf_percent / 100;

	put_cpu();
	set_cpus_allowed(current, saved_mask);

	dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
	return retval;
}

337 338
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
339 340
	struct acpi_cpufreq_data *data = drv_data[cpu];
	unsigned int freq;
341 342 343 344

	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);

	if (unlikely(data == NULL ||
345
		     data->acpi_data == NULL || data->freq_table == NULL)) {
346
		return 0;
L
Linus Torvalds 已提交
347 348
	}

349 350
	freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data);
	dprintk("cur freq = %u\n", freq);
L
Linus Torvalds 已提交
351

352
	return freq;
L
Linus Torvalds 已提交
353 354
}

355
static unsigned int check_freqs(cpumask_t mask, unsigned int freq,
356
				struct acpi_cpufreq_data *data)
357
{
358 359
	unsigned int cur_freq;
	unsigned int i;
L
Linus Torvalds 已提交
360

361
	for (i=0; i<100; i++) {
362 363 364 365 366 367 368 369 370
		cur_freq = extract_freq(get_cur_val(mask), data);
		if (cur_freq == freq)
			return 1;
		udelay(10);
	}
	return 0;
}

static int acpi_cpufreq_target(struct cpufreq_policy *policy,
371
			       unsigned int target_freq, unsigned int relation)
L
Linus Torvalds 已提交
372
{
373 374 375 376 377
	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
	struct acpi_processor_performance *perf;
	struct cpufreq_freqs freqs;
	cpumask_t online_policy_cpus;
	struct drv_cmd cmd;
378 379
	unsigned int next_state = 0; /* Index into freq_table */
	unsigned int next_perf_state = 0; /* Index into perf table */
380 381
	unsigned int i;
	int result = 0;
382 383 384 385

	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);

	if (unlikely(data == NULL ||
386
	     data->acpi_data == NULL || data->freq_table == NULL)) {
387 388
		return -ENODEV;
	}
L
Linus Torvalds 已提交
389

390
	perf = data->acpi_data;
L
Linus Torvalds 已提交
391
	result = cpufreq_frequency_table_target(policy,
392 393 394
						data->freq_table,
						target_freq,
						relation, &next_state);
395
	if (unlikely(result))
396
		return -ENODEV;
397

398
#ifdef CONFIG_HOTPLUG_CPU
399 400
	/* cpufreq holds the hotplug lock, so we are safe from here on */
	cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
401 402 403
#else
	online_policy_cpus = policy->cpus;
#endif
L
Linus Torvalds 已提交
404

405
	next_perf_state = data->freq_table[next_state].index;
406
	if (perf->state == next_perf_state) {
407
		if (unlikely(data->resume)) {
408 409
			dprintk("Called after resume, resetting to P%d\n",
				next_perf_state);
410 411
			data->resume = 0;
		} else {
412 413
			dprintk("Already at target state (P%d)\n",
				next_perf_state);
414 415
			return 0;
		}
416 417
	}

418 419 420 421
	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
422
		cmd.val = (u32) perf->states[next_perf_state].control;
423 424 425 426 427 428 429 430 431 432
		break;
	case SYSTEM_IO_CAPABLE:
		cmd.type = SYSTEM_IO_CAPABLE;
		cmd.addr.io.port = perf->control_register.address;
		cmd.addr.io.bit_width = perf->control_register.bit_width;
		cmd.val = (u32) perf->states[next_perf_state].control;
		break;
	default:
		return -ENODEV;
	}
433

434
	cpus_clear(cmd.mask);
435

436 437 438 439
	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
		cmd.mask = online_policy_cpus;
	else
		cpu_set(policy->cpu, cmd.mask);
440

441 442
	freqs.old = perf->states[perf->state].core_frequency * 1000;
	freqs.new = data->freq_table[next_state].frequency;
443 444 445
	for_each_cpu_mask(i, cmd.mask) {
		freqs.cpu = i;
		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
446
	}
L
Linus Torvalds 已提交
447

448
	drv_write(&cmd);
449

450 451 452
	if (acpi_pstate_strict) {
		if (!check_freqs(cmd.mask, freqs.new, data)) {
			dprintk("acpi_cpufreq_target failed (%d)\n",
453
				policy->cpu);
454
			return -EAGAIN;
455 456 457
		}
	}

458 459 460 461 462 463 464
	for_each_cpu_mask(i, cmd.mask) {
		freqs.cpu = i;
		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
	}
	perf->state = next_perf_state;

	return result;
L
Linus Torvalds 已提交
465 466
}

467
static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
468
{
469
	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
L
Linus Torvalds 已提交
470 471 472

	dprintk("acpi_cpufreq_verify\n");

473
	return cpufreq_frequency_table_verify(policy, data->freq_table);
L
Linus Torvalds 已提交
474 475 476
}

static unsigned long
477
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
L
Linus Torvalds 已提交
478
{
479
	struct acpi_processor_performance *perf = data->acpi_data;
480

L
Linus Torvalds 已提交
481 482 483 484
	if (cpu_khz) {
		/* search the closest match to cpu_khz */
		unsigned int i;
		unsigned long freq;
485
		unsigned long freqn = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
486

487
		for (i=0; i<(perf->state_count-1); i++) {
L
Linus Torvalds 已提交
488
			freq = freqn;
489
			freqn = perf->states[i+1].core_frequency * 1000;
L
Linus Torvalds 已提交
490
			if ((2 * cpu_khz) > (freqn + freq)) {
491
				perf->state = i;
492
				return freq;
L
Linus Torvalds 已提交
493 494
			}
		}
495
		perf->state = perf->state_count-1;
496
		return freqn;
497
	} else {
L
Linus Torvalds 已提交
498
		/* assume CPU is at P0... */
499 500 501
		perf->state = 0;
		return perf->states[0].core_frequency * 1000;
	}
L
Linus Torvalds 已提交
502 503
}

504 505 506 507 508 509 510 511
/*
 * acpi_cpufreq_early_init - initialize ACPI P-States library
 *
 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
 * in order to determine correct frequency and voltage pairings. We can
 * do _PDC and _PSD and find out the processor dependency for the
 * actual init that will happen later...
 */
512
static int __init acpi_cpufreq_early_init(void)
513 514 515
{
	dprintk("acpi_cpufreq_early_init\n");

516 517 518 519
	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
	if (!acpi_perf_data) {
		dprintk("Memory allocation error for acpi_perf_data.\n");
		return -ENOMEM;
520 521 522
	}

	/* Do initialization in ACPI core */
523 524
	acpi_processor_preregister_performance(acpi_perf_data);
	return 0;
525 526
}

527
#ifdef CONFIG_SMP
528 529 530 531 532 533 534 535
/*
 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
 * or do it in BIOS firmware and won't inform about it to OS. If not
 * detected, this has a side effect of making CPU run at a different speed
 * than OS intended it to run at. Detect it and handle it cleanly.
 */
static int bios_with_sw_any_bug;

536
static int sw_any_bug_found(const struct dmi_system_id *d)
537 538 539 540 541
{
	bios_with_sw_any_bug = 1;
	return 0;
}

542
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
543 544 545 546 547 548 549 550 551 552 553
	{
		.callback = sw_any_bug_found,
		.ident = "Supermicro Server X6DLP",
		.matches = {
			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
		},
	},
	{ }
};
554
#endif
555

556
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
557
{
558 559 560 561 562 563 564
	unsigned int i;
	unsigned int valid_states = 0;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	unsigned int result = 0;
	struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
	struct acpi_processor_performance *perf;
L
Linus Torvalds 已提交
565 566 567

	dprintk("acpi_cpufreq_cpu_init\n");

568
	data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
L
Linus Torvalds 已提交
569
	if (!data)
570
		return -ENOMEM;
L
Linus Torvalds 已提交
571

572
	data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
573
	drv_data[cpu] = data;
L
Linus Torvalds 已提交
574

575
	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
576
		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
L
Linus Torvalds 已提交
577

578
	result = acpi_processor_register_performance(data->acpi_data, cpu);
L
Linus Torvalds 已提交
579 580 581
	if (result)
		goto err_free;

582 583
	perf = data->acpi_data;
	policy->shared_type = perf->shared_type;
584

585
	/*
586
	 * Will let policy->cpus know about dependency only when software
587 588 589
	 * coordination is required.
	 */
	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
590
	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
591
		policy->cpus = perf->shared_cpu_map;
592 593 594 595 596 597
	}

#ifdef CONFIG_SMP
	dmi_check_system(sw_any_bug_dmi_table);
	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
598
		policy->cpus = per_cpu(cpu_core_map, cpu);
599 600
	}
#endif
601

L
Linus Torvalds 已提交
602
	/* capability check */
603
	if (perf->state_count <= 1) {
L
Linus Torvalds 已提交
604 605 606 607
		dprintk("No P-States\n");
		result = -ENODEV;
		goto err_unreg;
	}
608

609 610 611 612 613 614
	if (perf->control_register.space_id != perf->status_register.space_id) {
		result = -ENODEV;
		goto err_unreg;
	}

	switch (perf->control_register.space_id) {
615
	case ACPI_ADR_SPACE_SYSTEM_IO:
616
		dprintk("SYSTEM IO addr space\n");
617 618
		data->cpu_feature = SYSTEM_IO_CAPABLE;
		break;
619
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
620 621 622 623 624 625
		dprintk("HARDWARE addr space\n");
		if (!check_est_cpu(cpu)) {
			result = -ENODEV;
			goto err_unreg;
		}
		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
626
		break;
627
	default:
628
		dprintk("Unknown addr space %d\n",
629
			(u32) (perf->control_register.space_id));
L
Linus Torvalds 已提交
630 631 632 633
		result = -ENODEV;
		goto err_unreg;
	}

634 635
	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
		    (perf->state_count+1), GFP_KERNEL);
L
Linus Torvalds 已提交
636 637 638 639 640 641 642
	if (!data->freq_table) {
		result = -ENOMEM;
		goto err_unreg;
	}

	/* detect transition latency */
	policy->cpuinfo.transition_latency = 0;
643
	for (i=0; i<perf->state_count; i++) {
644 645 646 647
		if ((perf->states[i].transition_latency * 1000) >
		    policy->cpuinfo.transition_latency)
			policy->cpuinfo.transition_latency =
			    perf->states[i].transition_latency * 1000;
L
Linus Torvalds 已提交
648 649
	}

650
	data->max_freq = perf->states[0].core_frequency * 1000;
L
Linus Torvalds 已提交
651
	/* table init */
652
	for (i=0; i<perf->state_count; i++) {
653 654
		if (i>0 && perf->states[i].core_frequency >=
		    data->freq_table[valid_states-1].frequency / 1000)
655 656 657 658
			continue;

		data->freq_table[valid_states].index = i;
		data->freq_table[valid_states].frequency =
659
		    perf->states[i].core_frequency * 1000;
660
		valid_states++;
L
Linus Torvalds 已提交
661
	}
662
	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
663
	perf->state = 0;
L
Linus Torvalds 已提交
664 665

	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
666
	if (result)
L
Linus Torvalds 已提交
667 668
		goto err_freqfree;

669
	switch (perf->control_register.space_id) {
670
	case ACPI_ADR_SPACE_SYSTEM_IO:
671 672 673
		/* Current speed is unknown and not detectable by IO port */
		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
		break;
674
	case ACPI_ADR_SPACE_FIXED_HARDWARE:
675
		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
676
		policy->cur = get_cur_freq_on_cpu(cpu);
677
		break;
678
	default:
679 680 681
		break;
	}

L
Linus Torvalds 已提交
682 683 684
	/* notify BIOS that we exist */
	acpi_processor_notify_smm(THIS_MODULE);

685 686 687 688
	/* Check for APERF/MPERF support in hardware */
	if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
		unsigned int ecx;
		ecx = cpuid_ecx(6);
689
		if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
690 691 692
			acpi_cpufreq_driver.getavg = get_measured_perf;
	}

693
	dprintk("CPU%u - ACPI performance management activated.\n", cpu);
694
	for (i = 0; i < perf->state_count; i++)
L
Linus Torvalds 已提交
695
		dprintk("     %cP%d: %d MHz, %d mW, %d uS\n",
696
			(i == perf->state ? '*' : ' '), i,
697 698 699
			(u32) perf->states[i].core_frequency,
			(u32) perf->states[i].power,
			(u32) perf->states[i].transition_latency);
L
Linus Torvalds 已提交
700 701

	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
702

703 704 705 706 707
	/*
	 * the first call to ->target() should result in us actually
	 * writing something to the appropriate registers.
	 */
	data->resume = 1;
708

709
	return result;
L
Linus Torvalds 已提交
710

711
err_freqfree:
L
Linus Torvalds 已提交
712
	kfree(data->freq_table);
713
err_unreg:
714
	acpi_processor_unregister_performance(perf, cpu);
715
err_free:
L
Linus Torvalds 已提交
716
	kfree(data);
717
	drv_data[cpu] = NULL;
L
Linus Torvalds 已提交
718

719
	return result;
L
Linus Torvalds 已提交
720 721
}

722
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
723
{
724
	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
L
Linus Torvalds 已提交
725 726 727 728 729

	dprintk("acpi_cpufreq_cpu_exit\n");

	if (data) {
		cpufreq_frequency_table_put_attr(policy->cpu);
730
		drv_data[policy->cpu] = NULL;
731 732
		acpi_processor_unregister_performance(data->acpi_data,
						      policy->cpu);
L
Linus Torvalds 已提交
733 734 735
		kfree(data);
	}

736
	return 0;
L
Linus Torvalds 已提交
737 738
}

739
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
740
{
741
	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
L
Linus Torvalds 已提交
742 743 744 745 746

	dprintk("acpi_cpufreq_resume\n");

	data->resume = 1;

747
	return 0;
L
Linus Torvalds 已提交
748 749
}

750
static struct freq_attr *acpi_cpufreq_attr[] = {
L
Linus Torvalds 已提交
751 752 753 754 755
	&cpufreq_freq_attr_scaling_available_freqs,
	NULL,
};

static struct cpufreq_driver acpi_cpufreq_driver = {
756 757 758 759 760 761 762 763
	.verify = acpi_cpufreq_verify,
	.target = acpi_cpufreq_target,
	.init = acpi_cpufreq_cpu_init,
	.exit = acpi_cpufreq_cpu_exit,
	.resume = acpi_cpufreq_resume,
	.name = "acpi-cpufreq",
	.owner = THIS_MODULE,
	.attr = acpi_cpufreq_attr,
L
Linus Torvalds 已提交
764 765
};

766
static int __init acpi_cpufreq_init(void)
L
Linus Torvalds 已提交
767
{
768 769
	int ret;

L
Linus Torvalds 已提交
770 771
	dprintk("acpi_cpufreq_init\n");

772 773 774
	ret = acpi_cpufreq_early_init();
	if (ret)
		return ret;
775

776
	return cpufreq_register_driver(&acpi_cpufreq_driver);
L
Linus Torvalds 已提交
777 778
}

779
static void __exit acpi_cpufreq_exit(void)
L
Linus Torvalds 已提交
780 781 782 783 784
{
	dprintk("acpi_cpufreq_exit\n");

	cpufreq_unregister_driver(&acpi_cpufreq_driver);

785 786
	free_percpu(acpi_perf_data);

L
Linus Torvalds 已提交
787 788 789
	return;
}

790
module_param(acpi_pstate_strict, uint, 0644);
791
MODULE_PARM_DESC(acpi_pstate_strict,
792 793
	"value 0 or non-zero. non-zero -> strict ACPI checks are "
	"performed during frequency changes.");
L
Linus Torvalds 已提交
794 795 796 797 798

late_initcall(acpi_cpufreq_init);
module_exit(acpi_cpufreq_exit);

MODULE_ALIAS("acpi");