intel_pstate.c 34.2 KB
Newer Older
1
/*
2
 * intel_pstate.c: Native P state management for Intel processors
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * (C) Copyright 2012 Intel Corporation
 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 */

#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/module.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/sysfs.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
28
#include <linux/acpi.h>
29
#include <linux/vmalloc.h>
30 31 32 33 34
#include <trace/events/power.h>

#include <asm/div64.h>
#include <asm/msr.h>
#include <asm/cpu_device_id.h>
35
#include <asm/cpufeature.h>
36

37 38 39 40
#define ATOM_RATIOS		0x66a
#define ATOM_VIDS		0x66b
#define ATOM_TURBO_RATIOS	0x66c
#define ATOM_TURBO_VIDS		0x66d
41

42
#define FRAC_BITS 8
43 44
#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
#define fp_toint(X) ((X) >> FRAC_BITS)
45

46 47 48 49 50
static inline int32_t mul_fp(int32_t x, int32_t y)
{
	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
}

51
static inline int32_t div_fp(s64 x, s64 y)
52
{
53
	return div64_s64((int64_t)x << FRAC_BITS, y);
54 55
}

56 57 58 59 60 61 62 63 64 65 66
static inline int ceiling_fp(int32_t x)
{
	int mask, ret;

	ret = fp_toint(x);
	mask = (1 << FRAC_BITS) - 1;
	if (x & mask)
		ret += 1;
	return ret;
}

67
struct sample {
68
	int32_t core_pct_busy;
69
	int32_t busy_scaled;
70 71
	u64 aperf;
	u64 mperf;
72
	u64 tsc;
73
	int freq;
74
	u64 time;
75 76 77 78 79 80
};

struct pstate_data {
	int	current_pstate;
	int	min_pstate;
	int	max_pstate;
81
	int	max_pstate_physical;
82
	int	scaling;
83 84 85
	int	turbo_pstate;
};

86
struct vid_data {
87 88 89
	int min;
	int max;
	int turbo;
90 91 92
	int32_t ratio;
};

93 94 95 96 97 98 99
struct _pid {
	int setpoint;
	int32_t integral;
	int32_t p_gain;
	int32_t i_gain;
	int32_t d_gain;
	int deadband;
100
	int32_t last_err;
101 102 103 104 105
};

struct cpudata {
	int cpu;

106
	struct update_util_data update_util;
107 108

	struct pstate_data pstate;
109
	struct vid_data vid;
110 111
	struct _pid pid;

112
	u64	last_sample_time;
113 114
	u64	prev_aperf;
	u64	prev_mperf;
115
	u64	prev_tsc;
116
	u64	prev_cummulative_iowait;
117
	struct sample sample;
118 119 120 121 122
};

static struct cpudata **all_cpu_data;
struct pstate_adjust_policy {
	int sample_rate_ms;
123
	s64 sample_rate_ns;
124 125 126 127 128 129 130
	int deadband;
	int setpoint;
	int p_gain_pct;
	int d_gain_pct;
	int i_gain_pct;
};

131 132
struct pstate_funcs {
	int (*get_max)(void);
133
	int (*get_max_physical)(void);
134 135
	int (*get_min)(void);
	int (*get_turbo)(void);
136
	int (*get_scaling)(void);
137 138
	void (*set)(struct cpudata*, int pstate);
	void (*get_vid)(struct cpudata *);
139
	int32_t (*get_target_pstate)(struct cpudata *);
140 141
};

142 143 144
struct cpu_defaults {
	struct pstate_adjust_policy pid_policy;
	struct pstate_funcs funcs;
145 146
};

147
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
148
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
149

150 151
static struct pstate_adjust_policy pid_params;
static struct pstate_funcs pstate_funcs;
D
Dirk Brandewie 已提交
152
static int hwp_active;
153

154 155
struct perf_limits {
	int no_turbo;
156
	int turbo_disabled;
157 158 159 160
	int max_perf_pct;
	int min_perf_pct;
	int32_t max_perf;
	int32_t min_perf;
161 162
	int max_policy_pct;
	int max_sysfs_pct;
163 164
	int min_policy_pct;
	int min_sysfs_pct;
165 166
};

167 168 169 170 171 172 173 174 175 176 177 178 179 180
static struct perf_limits performance_limits = {
	.no_turbo = 0,
	.turbo_disabled = 0,
	.max_perf_pct = 100,
	.max_perf = int_tofp(1),
	.min_perf_pct = 100,
	.min_perf = int_tofp(1),
	.max_policy_pct = 100,
	.max_sysfs_pct = 100,
	.min_policy_pct = 0,
	.min_sysfs_pct = 0,
};

static struct perf_limits powersave_limits = {
181
	.no_turbo = 0,
182
	.turbo_disabled = 0,
183 184 185 186
	.max_perf_pct = 100,
	.max_perf = int_tofp(1),
	.min_perf_pct = 0,
	.min_perf = 0,
187 188
	.max_policy_pct = 100,
	.max_sysfs_pct = 100,
189 190
	.min_policy_pct = 0,
	.min_sysfs_pct = 0,
191 192
};

193 194 195 196 197 198
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
static struct perf_limits *limits = &performance_limits;
#else
static struct perf_limits *limits = &powersave_limits;
#endif

199
static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
200
			     int deadband, int integral) {
201 202 203
	pid->setpoint = setpoint;
	pid->deadband  = deadband;
	pid->integral  = int_tofp(integral);
204
	pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
}

static inline void pid_p_gain_set(struct _pid *pid, int percent)
{
	pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
}

static inline void pid_i_gain_set(struct _pid *pid, int percent)
{
	pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
}

static inline void pid_d_gain_set(struct _pid *pid, int percent)
{
	pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
}

222
static signed int pid_calc(struct _pid *pid, int32_t busy)
223
{
224
	signed int result;
225 226 227
	int32_t pterm, dterm, fp_error;
	int32_t integral_limit;

228
	fp_error = int_tofp(pid->setpoint) - busy;
229

230
	if (abs(fp_error) <= int_tofp(pid->deadband))
231 232 233 234 235 236
		return 0;

	pterm = mul_fp(pid->p_gain, fp_error);

	pid->integral += fp_error;

237 238 239 240 241 242 243 244
	/*
	 * We limit the integral here so that it will never
	 * get higher than 30.  This prevents it from becoming
	 * too large an input over long periods of time and allows
	 * it to get factored out sooner.
	 *
	 * The value of 30 was chosen through experimentation.
	 */
245 246 247 248 249 250
	integral_limit = int_tofp(30);
	if (pid->integral > integral_limit)
		pid->integral = integral_limit;
	if (pid->integral < -integral_limit)
		pid->integral = -integral_limit;

251 252
	dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
	pid->last_err = fp_error;
253 254

	result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
255
	result = result + (1 << (FRAC_BITS-1));
256 257 258 259 260
	return (signed int)fp_toint(result);
}

static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
{
261 262 263
	pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
	pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
	pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
264

265
	pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
266 267 268 269 270
}

static inline void intel_pstate_reset_all_pid(void)
{
	unsigned int cpu;
271

272 273 274 275 276 277
	for_each_online_cpu(cpu) {
		if (all_cpu_data[cpu])
			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
	}
}

278 279 280 281 282 283 284
static inline void update_turbo_state(void)
{
	u64 misc_en;
	struct cpudata *cpu;

	cpu = all_cpu_data[0];
	rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
285
	limits->turbo_disabled =
286 287 288 289
		(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}

D
Dirk Brandewie 已提交
290 291
static void intel_pstate_hwp_set(void)
{
292 293 294 295 296 297 298
	int min, hw_min, max, hw_max, cpu, range, adj_range;
	u64 value, cap;

	rdmsrl(MSR_HWP_CAPABILITIES, cap);
	hw_min = HWP_LOWEST_PERF(cap);
	hw_max = HWP_HIGHEST_PERF(cap);
	range = hw_max - hw_min;
D
Dirk Brandewie 已提交
299 300 301 302 303

	get_online_cpus();

	for_each_online_cpu(cpu) {
		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
304
		adj_range = limits->min_perf_pct * range / 100;
305
		min = hw_min + adj_range;
D
Dirk Brandewie 已提交
306 307 308
		value &= ~HWP_MIN_PERF(~0L);
		value |= HWP_MIN_PERF(min);

309
		adj_range = limits->max_perf_pct * range / 100;
310
		max = hw_min + adj_range;
311
		if (limits->no_turbo) {
312 313 314
			hw_max = HWP_GUARANTEED_PERF(cap);
			if (hw_max < max)
				max = hw_max;
D
Dirk Brandewie 已提交
315 316 317 318 319 320 321 322 323 324
		}

		value &= ~HWP_MAX_PERF(~0L);
		value |= HWP_MAX_PERF(max);
		wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
	}

	put_online_cpus();
}

325 326 327 328 329 330 331
/************************** debugfs begin ************************/
static int pid_param_set(void *data, u64 val)
{
	*(u32 *)data = val;
	intel_pstate_reset_all_pid();
	return 0;
}
332

333 334 335 336 337
static int pid_param_get(void *data, u64 *val)
{
	*val = *(u32 *)data;
	return 0;
}
338
DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
339 340 341 342 343 344 345

struct pid_param {
	char *name;
	void *value;
};

static struct pid_param pid_files[] = {
346 347 348 349 350 351
	{"sample_rate_ms", &pid_params.sample_rate_ms},
	{"d_gain_pct", &pid_params.d_gain_pct},
	{"i_gain_pct", &pid_params.i_gain_pct},
	{"deadband", &pid_params.deadband},
	{"setpoint", &pid_params.setpoint},
	{"p_gain_pct", &pid_params.p_gain_pct},
352 353 354
	{NULL, NULL}
};

355
static void __init intel_pstate_debug_expose_params(void)
356
{
357
	struct dentry *debugfs_parent;
358 359
	int i = 0;

D
Dirk Brandewie 已提交
360 361
	if (hwp_active)
		return;
362 363 364 365 366
	debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
	if (IS_ERR_OR_NULL(debugfs_parent))
		return;
	while (pid_files[i].name) {
		debugfs_create_file(pid_files[i].name, 0660,
367 368
				    debugfs_parent, pid_files[i].value,
				    &fops_pid_param);
369 370 371 372 373 374 375 376 377 378 379
		i++;
	}
}

/************************** debugfs end ************************/

/************************** sysfs begin ************************/
#define show_one(file_name, object)					\
	static ssize_t show_##file_name					\
	(struct kobject *kobj, struct attribute *attr, char *buf)	\
	{								\
380
		return sprintf(buf, "%u\n", limits->object);		\
381 382
	}

383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
static ssize_t show_turbo_pct(struct kobject *kobj,
				struct attribute *attr, char *buf)
{
	struct cpudata *cpu;
	int total, no_turbo, turbo_pct;
	uint32_t turbo_fp;

	cpu = all_cpu_data[0];

	total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
	no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
	turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
	turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
	return sprintf(buf, "%u\n", turbo_pct);
}

399 400 401 402 403 404 405 406 407 408 409
static ssize_t show_num_pstates(struct kobject *kobj,
				struct attribute *attr, char *buf)
{
	struct cpudata *cpu;
	int total;

	cpu = all_cpu_data[0];
	total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
	return sprintf(buf, "%u\n", total);
}

410 411 412 413 414 415
static ssize_t show_no_turbo(struct kobject *kobj,
			     struct attribute *attr, char *buf)
{
	ssize_t ret;

	update_turbo_state();
416 417
	if (limits->turbo_disabled)
		ret = sprintf(buf, "%u\n", limits->turbo_disabled);
418
	else
419
		ret = sprintf(buf, "%u\n", limits->no_turbo);
420 421 422 423

	return ret;
}

424
static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
425
			      const char *buf, size_t count)
426 427 428
{
	unsigned int input;
	int ret;
429

430 431 432
	ret = sscanf(buf, "%u", &input);
	if (ret != 1)
		return -EINVAL;
433 434

	update_turbo_state();
435
	if (limits->turbo_disabled) {
436
		pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n");
437
		return -EPERM;
438
	}
D
Dirk Brandewie 已提交
439

440
	limits->no_turbo = clamp_t(int, input, 0, 1);
441

D
Dirk Brandewie 已提交
442 443 444
	if (hwp_active)
		intel_pstate_hwp_set();

445 446 447 448
	return count;
}

static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
449
				  const char *buf, size_t count)
450 451 452
{
	unsigned int input;
	int ret;
453

454 455 456 457
	ret = sscanf(buf, "%u", &input);
	if (ret != 1)
		return -EINVAL;

458 459 460 461 462 463 464 465 466
	limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
	limits->max_perf_pct = min(limits->max_policy_pct,
				   limits->max_sysfs_pct);
	limits->max_perf_pct = max(limits->min_policy_pct,
				   limits->max_perf_pct);
	limits->max_perf_pct = max(limits->min_perf_pct,
				   limits->max_perf_pct);
	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
				  int_tofp(100));
467

D
Dirk Brandewie 已提交
468 469
	if (hwp_active)
		intel_pstate_hwp_set();
470 471 472 473
	return count;
}

static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
474
				  const char *buf, size_t count)
475 476 477
{
	unsigned int input;
	int ret;
478

479 480 481
	ret = sscanf(buf, "%u", &input);
	if (ret != 1)
		return -EINVAL;
482

483 484 485 486 487 488 489 490 491
	limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
	limits->min_perf_pct = max(limits->min_policy_pct,
				   limits->min_sysfs_pct);
	limits->min_perf_pct = min(limits->max_policy_pct,
				   limits->min_perf_pct);
	limits->min_perf_pct = min(limits->max_perf_pct,
				   limits->min_perf_pct);
	limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
				  int_tofp(100));
492

D
Dirk Brandewie 已提交
493 494
	if (hwp_active)
		intel_pstate_hwp_set();
495 496 497 498 499 500 501 502 503
	return count;
}

show_one(max_perf_pct, max_perf_pct);
show_one(min_perf_pct, min_perf_pct);

define_one_global_rw(no_turbo);
define_one_global_rw(max_perf_pct);
define_one_global_rw(min_perf_pct);
504
define_one_global_ro(turbo_pct);
505
define_one_global_ro(num_pstates);
506 507 508 509 510

static struct attribute *intel_pstate_attributes[] = {
	&no_turbo.attr,
	&max_perf_pct.attr,
	&min_perf_pct.attr,
511
	&turbo_pct.attr,
512
	&num_pstates.attr,
513 514 515 516 517 518 519
	NULL
};

static struct attribute_group intel_pstate_attr_group = {
	.attrs = intel_pstate_attributes,
};

520
static void __init intel_pstate_sysfs_expose_params(void)
521
{
522
	struct kobject *intel_pstate_kobject;
523 524 525 526 527
	int rc;

	intel_pstate_kobject = kobject_create_and_add("intel_pstate",
						&cpu_subsys.dev_root->kobj);
	BUG_ON(!intel_pstate_kobject);
528
	rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
529 530 531
	BUG_ON(rc);
}
/************************** sysfs end ************************/
D
Dirk Brandewie 已提交
532

533
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
D
Dirk Brandewie 已提交
534
{
535
	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
D
Dirk Brandewie 已提交
536 537
}

538
static int atom_get_min_pstate(void)
539 540
{
	u64 value;
541

542
	rdmsrl(ATOM_RATIOS, value);
D
Dirk Brandewie 已提交
543
	return (value >> 8) & 0x7F;
544 545
}

546
static int atom_get_max_pstate(void)
547 548
{
	u64 value;
549

550
	rdmsrl(ATOM_RATIOS, value);
D
Dirk Brandewie 已提交
551
	return (value >> 16) & 0x7F;
552
}
553

554
static int atom_get_turbo_pstate(void)
555 556
{
	u64 value;
557

558
	rdmsrl(ATOM_TURBO_RATIOS, value);
D
Dirk Brandewie 已提交
559
	return value & 0x7F;
560 561
}

562
static void atom_set_pstate(struct cpudata *cpudata, int pstate)
563 564 565 566 567
{
	u64 val;
	int32_t vid_fp;
	u32 vid;

568
	val = (u64)pstate << 8;
569
	if (limits->no_turbo && !limits->turbo_disabled)
570 571 572 573 574 575 576
		val |= (u64)1 << 32;

	vid_fp = cpudata->vid.min + mul_fp(
		int_tofp(pstate - cpudata->pstate.min_pstate),
		cpudata->vid.ratio);

	vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
577
	vid = ceiling_fp(vid_fp);
578

579 580 581
	if (pstate > cpudata->pstate.max_pstate)
		vid = cpudata->vid.turbo;

582 583
	val |= vid;

584
	wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
585 586
}

587
static int silvermont_get_scaling(void)
588 589 590
{
	u64 value;
	int i;
591 592 593
	/* Defined in Table 35-6 from SDM (Sept 2015) */
	static int silvermont_freq_table[] = {
		83300, 100000, 133300, 116700, 80000};
594 595

	rdmsrl(MSR_FSB_FREQ, value);
596 597
	i = value & 0x7;
	WARN_ON(i > 4);
598

599 600
	return silvermont_freq_table[i];
}
601

602 603 604 605 606 607 608 609 610 611 612 613 614 615
static int airmont_get_scaling(void)
{
	u64 value;
	int i;
	/* Defined in Table 35-10 from SDM (Sept 2015) */
	static int airmont_freq_table[] = {
		83300, 100000, 133300, 116700, 80000,
		93300, 90000, 88900, 87500};

	rdmsrl(MSR_FSB_FREQ, value);
	i = value & 0xF;
	WARN_ON(i > 8);

	return airmont_freq_table[i];
616 617
}

618
static void atom_get_vid(struct cpudata *cpudata)
619 620 621
{
	u64 value;

622
	rdmsrl(ATOM_VIDS, value);
D
Dirk Brandewie 已提交
623 624
	cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
	cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
625 626 627 628
	cpudata->vid.ratio = div_fp(
		cpudata->vid.max - cpudata->vid.min,
		int_tofp(cpudata->pstate.max_pstate -
			cpudata->pstate.min_pstate));
629

630
	rdmsrl(ATOM_TURBO_VIDS, value);
631
	cpudata->vid.turbo = value & 0x7f;
632 633
}

634
static int core_get_min_pstate(void)
635 636
{
	u64 value;
637

638
	rdmsrl(MSR_PLATFORM_INFO, value);
639 640 641
	return (value >> 40) & 0xFF;
}

642
static int core_get_max_pstate_physical(void)
643 644
{
	u64 value;
645

646
	rdmsrl(MSR_PLATFORM_INFO, value);
647 648 649
	return (value >> 8) & 0xFF;
}

650
static int core_get_max_pstate(void)
651
{
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
	u64 tar;
	u64 plat_info;
	int max_pstate;
	int err;

	rdmsrl(MSR_PLATFORM_INFO, plat_info);
	max_pstate = (plat_info >> 8) & 0xFF;

	err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
	if (!err) {
		/* Do some sanity checking for safety */
		if (plat_info & 0x600000000) {
			u64 tdp_ctrl;
			u64 tdp_ratio;
			int tdp_msr;

			err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
			if (err)
				goto skip_tar;

			tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
			err = rdmsrl_safe(tdp_msr, &tdp_ratio);
			if (err)
				goto skip_tar;

			if (tdp_ratio - 1 == tar) {
				max_pstate = tar;
				pr_debug("max_pstate=TAC %x\n", max_pstate);
			} else {
				goto skip_tar;
			}
		}
	}
685

686 687
skip_tar:
	return max_pstate;
688 689
}

690
static int core_get_turbo_pstate(void)
691 692 693
{
	u64 value;
	int nont, ret;
694

695
	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
696
	nont = core_get_max_pstate();
697
	ret = (value) & 255;
698 699 700 701 702
	if (ret <= nont)
		ret = nont;
	return ret;
}

703 704 705 706 707
static inline int core_get_scaling(void)
{
	return 100000;
}

708
static void core_set_pstate(struct cpudata *cpudata, int pstate)
709 710 711
{
	u64 val;

712
	val = (u64)pstate << 8;
713
	if (limits->no_turbo && !limits->turbo_disabled)
714 715
		val |= (u64)1 << 32;

716
	wrmsrl(MSR_IA32_PERF_CTL, val);
717 718
}

719 720 721 722 723 724 725 726 727 728 729 730 731
static int knl_get_turbo_pstate(void)
{
	u64 value;
	int nont, ret;

	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
	nont = core_get_max_pstate();
	ret = (((value) >> 8) & 0xFF);
	if (ret <= nont)
		ret = nont;
	return ret;
}

732 733 734 735 736 737 738 739 740 741 742
static struct cpu_defaults core_params = {
	.pid_policy = {
		.sample_rate_ms = 10,
		.deadband = 0,
		.setpoint = 97,
		.p_gain_pct = 20,
		.d_gain_pct = 0,
		.i_gain_pct = 0,
	},
	.funcs = {
		.get_max = core_get_max_pstate,
743
		.get_max_physical = core_get_max_pstate_physical,
744 745
		.get_min = core_get_min_pstate,
		.get_turbo = core_get_turbo_pstate,
746
		.get_scaling = core_get_scaling,
747
		.set = core_set_pstate,
748
		.get_target_pstate = get_target_pstate_use_performance,
749 750 751
	},
};

752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
static struct cpu_defaults silvermont_params = {
	.pid_policy = {
		.sample_rate_ms = 10,
		.deadband = 0,
		.setpoint = 60,
		.p_gain_pct = 14,
		.d_gain_pct = 0,
		.i_gain_pct = 4,
	},
	.funcs = {
		.get_max = atom_get_max_pstate,
		.get_max_physical = atom_get_max_pstate,
		.get_min = atom_get_min_pstate,
		.get_turbo = atom_get_turbo_pstate,
		.set = atom_set_pstate,
		.get_scaling = silvermont_get_scaling,
		.get_vid = atom_get_vid,
769
		.get_target_pstate = get_target_pstate_use_cpu_load,
770 771 772 773
	},
};

static struct cpu_defaults airmont_params = {
774 775 776
	.pid_policy = {
		.sample_rate_ms = 10,
		.deadband = 0,
777
		.setpoint = 60,
778 779 780 781 782
		.p_gain_pct = 14,
		.d_gain_pct = 0,
		.i_gain_pct = 4,
	},
	.funcs = {
783 784 785 786 787
		.get_max = atom_get_max_pstate,
		.get_max_physical = atom_get_max_pstate,
		.get_min = atom_get_min_pstate,
		.get_turbo = atom_get_turbo_pstate,
		.set = atom_set_pstate,
788
		.get_scaling = airmont_get_scaling,
789
		.get_vid = atom_get_vid,
790
		.get_target_pstate = get_target_pstate_use_cpu_load,
791 792 793
	},
};

794 795 796 797 798 799 800 801 802 803 804
static struct cpu_defaults knl_params = {
	.pid_policy = {
		.sample_rate_ms = 10,
		.deadband = 0,
		.setpoint = 97,
		.p_gain_pct = 20,
		.d_gain_pct = 0,
		.i_gain_pct = 0,
	},
	.funcs = {
		.get_max = core_get_max_pstate,
805
		.get_max_physical = core_get_max_pstate_physical,
806 807
		.get_min = core_get_min_pstate,
		.get_turbo = knl_get_turbo_pstate,
808
		.get_scaling = core_get_scaling,
809
		.set = core_set_pstate,
810
		.get_target_pstate = get_target_pstate_use_performance,
811 812 813
	},
};

814 815 816
static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
{
	int max_perf = cpu->pstate.turbo_pstate;
817
	int max_perf_adj;
818
	int min_perf;
819

820
	if (limits->no_turbo || limits->turbo_disabled)
821 822
		max_perf = cpu->pstate.max_pstate;

823 824 825 826 827
	/*
	 * performance can be limited by user through sysfs, by cpufreq
	 * policy, or by cpu specific default values determined through
	 * experimentation.
	 */
828 829 830
	max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf));
	*max = clamp_t(int, max_perf_adj,
			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
831

832 833
	min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf));
	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
834 835
}

836
static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
837 838 839
{
	int max_perf, min_perf;

840 841
	if (force) {
		update_turbo_state();
842

843
		intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
844

845
		pstate = clamp_t(int, pstate, min_perf, max_perf);
846

847 848 849
		if (pstate == cpu->pstate.current_pstate)
			return;
	}
850
	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
851

852 853
	cpu->pstate.current_pstate = pstate;

854
	pstate_funcs.set(cpu, pstate);
855 856 857 858
}

static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
{
859 860
	cpu->pstate.min_pstate = pstate_funcs.get_min();
	cpu->pstate.max_pstate = pstate_funcs.get_max();
861
	cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
862
	cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
863
	cpu->pstate.scaling = pstate_funcs.get_scaling();
864

865 866
	if (pstate_funcs.get_vid)
		pstate_funcs.get_vid(cpu);
867
	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
868 869
}

870
static inline void intel_pstate_calc_busy(struct cpudata *cpu)
871
{
872
	struct sample *sample = &cpu->sample;
873
	int64_t core_pct;
874

875
	core_pct = int_tofp(sample->aperf) * int_tofp(100);
876
	core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
877

878
	sample->freq = fp_toint(
879
		mul_fp(int_tofp(
880 881
			cpu->pstate.max_pstate_physical *
			cpu->pstate.scaling / 100),
882
			core_pct));
883

884
	sample->core_pct_busy = (int32_t)core_pct;
885 886
}

887
static inline void intel_pstate_sample(struct cpudata *cpu, u64 time)
888 889
{
	u64 aperf, mperf;
890
	unsigned long flags;
891
	u64 tsc;
892

893
	local_irq_save(flags);
894 895
	rdmsrl(MSR_IA32_APERF, aperf);
	rdmsrl(MSR_IA32_MPERF, mperf);
896 897
	tsc = rdtsc();
	if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) {
898 899 900
		local_irq_restore(flags);
		return;
	}
901
	local_irq_restore(flags);
902

903
	cpu->last_sample_time = cpu->sample.time;
904
	cpu->sample.time = time;
905 906
	cpu->sample.aperf = aperf;
	cpu->sample.mperf = mperf;
907
	cpu->sample.tsc =  tsc;
908 909
	cpu->sample.aperf -= cpu->prev_aperf;
	cpu->sample.mperf -= cpu->prev_mperf;
910
	cpu->sample.tsc -= cpu->prev_tsc;
911

912
	intel_pstate_calc_busy(cpu);
913 914 915

	cpu->prev_aperf = aperf;
	cpu->prev_mperf = mperf;
916
	cpu->prev_tsc = tsc;
917 918
}

919 920 921
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
{
	struct sample *sample = &cpu->sample;
922 923 924
	u64 cummulative_iowait, delta_iowait_us;
	u64 delta_iowait_mperf;
	u64 mperf, now;
925 926
	int32_t cpu_load;

927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942
	cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);

	/*
	 * Convert iowait time into number of IO cycles spent at max_freq.
	 * IO is considered as busy only for the cpu_load algorithm. For
	 * performance this is not needed since we always try to reach the
	 * maximum P-State, so we are already boosting the IOs.
	 */
	delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
	delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
		cpu->pstate.max_pstate, MSEC_PER_SEC);

	mperf = cpu->sample.mperf + delta_iowait_mperf;
	cpu->prev_cummulative_iowait = cummulative_iowait;


943 944 945 946 947 948
	/*
	 * The load can be estimated as the ratio of the mperf counter
	 * running at a constant frequency during active periods
	 * (C0) and the time stamp counter running at the same frequency
	 * also during C-states.
	 */
949
	cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
950 951 952 953 954
	cpu->sample.busy_scaled = cpu_load;

	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
}

955
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
956
{
957
	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
958
	u64 duration_ns;
959

960 961 962 963 964 965 966 967 968 969 970
	/*
	 * core_busy is the ratio of actual performance to max
	 * max_pstate is the max non turbo pstate available
	 * current_pstate was the pstate that was requested during
	 * 	the last sample period.
	 *
	 * We normalize core_busy, which was our actual percent
	 * performance to what we requested during the last sample
	 * period. The result will be a percentage of busy at a
	 * specified pstate.
	 */
971
	core_busy = cpu->sample.core_pct_busy;
972
	max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
973
	current_pstate = int_tofp(cpu->pstate.current_pstate);
974
	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
975

976
	/*
977 978 979 980
	 * Since our utilization update callback will not run unless we are
	 * in C0, check if the actual elapsed time is significantly greater (3x)
	 * than our sample interval.  If it is, then we were idle for a long
	 * enough period of time to adjust our busyness.
981
	 */
982 983 984 985 986
	duration_ns = cpu->sample.time - cpu->last_sample_time;
	if ((s64)duration_ns > pid_params.sample_rate_ns * 3
	    && cpu->last_sample_time > 0) {
		sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
				      int_tofp(duration_ns));
987 988 989
		core_busy = mul_fp(core_busy, sample_ratio);
	}

990 991
	cpu->sample.busy_scaled = core_busy;
	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
992 993 994 995
}

static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
{
996
	int from, target_pstate;
997 998 999
	struct sample *sample;

	from = cpu->pstate.current_pstate;
1000

1001
	target_pstate = pstate_funcs.get_target_pstate(cpu);
1002

1003
	intel_pstate_set_pstate(cpu, target_pstate, true);
1004 1005 1006

	sample = &cpu->sample;
	trace_pstate_sample(fp_toint(sample->core_pct_busy),
1007
		fp_toint(sample->busy_scaled),
1008 1009 1010 1011 1012 1013
		from,
		cpu->pstate.current_pstate,
		sample->mperf,
		sample->aperf,
		sample->tsc,
		sample->freq);
1014 1015
}

1016 1017
static void intel_pstate_update_util(struct update_util_data *data, u64 time,
				     unsigned long util, unsigned long max)
1018
{
1019 1020
	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
	u64 delta_ns = time - cpu->sample.time;
1021

1022 1023 1024 1025 1026
	if ((s64)delta_ns >= pid_params.sample_rate_ns) {
		intel_pstate_sample(cpu, time);
		if (!hwp_active)
			intel_pstate_adjust_busy_pstate(cpu);
	}
1027 1028 1029
}

#define ICPU(model, policy) \
1030 1031
	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
			(unsigned long)&policy }
1032 1033

static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
1034 1035
	ICPU(0x2a, core_params),
	ICPU(0x2d, core_params),
1036
	ICPU(0x37, silvermont_params),
1037 1038
	ICPU(0x3a, core_params),
	ICPU(0x3c, core_params),
1039
	ICPU(0x3d, core_params),
1040 1041 1042 1043
	ICPU(0x3e, core_params),
	ICPU(0x3f, core_params),
	ICPU(0x45, core_params),
	ICPU(0x46, core_params),
1044
	ICPU(0x47, core_params),
1045
	ICPU(0x4c, airmont_params),
1046
	ICPU(0x4e, core_params),
1047
	ICPU(0x4f, core_params),
1048
	ICPU(0x5e, core_params),
1049
	ICPU(0x56, core_params),
1050
	ICPU(0x57, knl_params),
1051 1052 1053 1054
	{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);

D
Dirk Brandewie 已提交
1055 1056 1057 1058 1059
static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
	ICPU(0x56, core_params),
	{}
};

1060 1061 1062 1063
static int intel_pstate_init_cpu(unsigned int cpunum)
{
	struct cpudata *cpu;

1064 1065 1066
	if (!all_cpu_data[cpunum])
		all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
					       GFP_KERNEL);
1067 1068 1069 1070 1071 1072
	if (!all_cpu_data[cpunum])
		return -ENOMEM;

	cpu = all_cpu_data[cpunum];

	cpu->cpu = cpunum;
1073

1074
	if (hwp_active) {
1075
		intel_pstate_hwp_enable(cpu);
1076 1077 1078
		pid_params.sample_rate_ms = 50;
		pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
	}
1079

1080
	intel_pstate_get_cpu_pstates(cpu);
1081

1082
	intel_pstate_busy_pid_reset(cpu);
1083
	intel_pstate_sample(cpu, 0);
1084

1085 1086
	cpu->update_util.func = intel_pstate_update_util;
	cpufreq_set_update_util_data(cpunum, &cpu->update_util);
1087

1088
	pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100

	return 0;
}

static unsigned int intel_pstate_get(unsigned int cpu_num)
{
	struct sample *sample;
	struct cpudata *cpu;

	cpu = all_cpu_data[cpu_num];
	if (!cpu)
		return 0;
1101
	sample = &cpu->sample;
1102 1103 1104 1105 1106
	return sample->freq;
}

static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
1107 1108 1109
	if (!policy->cpuinfo.max_freq)
		return -ENODEV;

1110 1111
	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
	    policy->max >= policy->cpuinfo.max_freq) {
1112 1113
		pr_debug("intel_pstate: set performance\n");
		limits = &performance_limits;
1114 1115
		if (hwp_active)
			intel_pstate_hwp_set();
1116
		return 0;
1117
	}
D
Dirk Brandewie 已提交
1118

1119 1120 1121 1122
	pr_debug("intel_pstate: set powersave\n");
	limits = &powersave_limits;
	limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
	limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
1123 1124
	limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
					      policy->cpuinfo.max_freq);
1125
	limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
1126 1127

	/* Normalize user input to [min_policy_pct, max_policy_pct] */
1128 1129 1130 1131 1132 1133 1134 1135
	limits->min_perf_pct = max(limits->min_policy_pct,
				   limits->min_sysfs_pct);
	limits->min_perf_pct = min(limits->max_policy_pct,
				   limits->min_perf_pct);
	limits->max_perf_pct = min(limits->max_policy_pct,
				   limits->max_sysfs_pct);
	limits->max_perf_pct = max(limits->min_policy_pct,
				   limits->max_perf_pct);
1136
	limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
1137 1138

	/* Make sure min_perf_pct <= max_perf_pct */
1139
	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
1140

1141 1142 1143 1144
	limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
				  int_tofp(100));
	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
				  int_tofp(100));
1145

D
Dirk Brandewie 已提交
1146 1147 1148
	if (hwp_active)
		intel_pstate_hwp_set();

1149 1150 1151 1152 1153
	return 0;
}

static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
{
1154
	cpufreq_verify_within_cpu_limits(policy);
1155

1156
	if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
1157
	    policy->policy != CPUFREQ_POLICY_PERFORMANCE)
1158 1159 1160 1161 1162
		return -EINVAL;

	return 0;
}

1163
static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
1164
{
1165 1166
	int cpu_num = policy->cpu;
	struct cpudata *cpu = all_cpu_data[cpu_num];
1167

1168
	pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
1169

1170 1171 1172
	cpufreq_set_update_util_data(cpu_num, NULL);
	synchronize_rcu();

D
Dirk Brandewie 已提交
1173 1174 1175
	if (hwp_active)
		return;

1176
	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
1177 1178
}

1179
static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
1180 1181
{
	struct cpudata *cpu;
1182
	int rc;
1183 1184 1185 1186 1187 1188 1189

	rc = intel_pstate_init_cpu(policy->cpu);
	if (rc)
		return rc;

	cpu = all_cpu_data[policy->cpu];

1190
	if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
1191 1192 1193 1194
		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
	else
		policy->policy = CPUFREQ_POLICY_POWERSAVE;

1195 1196
	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1197 1198

	/* cpuinfo and default policy values */
1199 1200 1201
	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
	policy->cpuinfo.max_freq =
		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
	cpumask_set_cpu(policy->cpu, policy->cpus);

	return 0;
}

static struct cpufreq_driver intel_pstate_driver = {
	.flags		= CPUFREQ_CONST_LOOPS,
	.verify		= intel_pstate_verify_policy,
	.setpolicy	= intel_pstate_set_policy,
	.get		= intel_pstate_get,
	.init		= intel_pstate_cpu_init,
1214
	.stop_cpu	= intel_pstate_stop_cpu,
1215 1216 1217
	.name		= "intel_pstate",
};

1218
static int __initdata no_load;
D
Dirk Brandewie 已提交
1219
static int __initdata no_hwp;
1220
static int __initdata hwp_only;
1221
static unsigned int force_load;
1222

1223 1224
static int intel_pstate_msrs_not_valid(void)
{
1225
	if (!pstate_funcs.get_max() ||
1226 1227
	    !pstate_funcs.get_min() ||
	    !pstate_funcs.get_turbo())
1228 1229 1230 1231
		return -ENODEV;

	return 0;
}
1232

1233
static void copy_pid_params(struct pstate_adjust_policy *policy)
1234 1235
{
	pid_params.sample_rate_ms = policy->sample_rate_ms;
1236
	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
1237 1238 1239 1240 1241 1242 1243
	pid_params.p_gain_pct = policy->p_gain_pct;
	pid_params.i_gain_pct = policy->i_gain_pct;
	pid_params.d_gain_pct = policy->d_gain_pct;
	pid_params.deadband = policy->deadband;
	pid_params.setpoint = policy->setpoint;
}

1244
static void copy_cpu_funcs(struct pstate_funcs *funcs)
1245 1246
{
	pstate_funcs.get_max   = funcs->get_max;
1247
	pstate_funcs.get_max_physical = funcs->get_max_physical;
1248 1249
	pstate_funcs.get_min   = funcs->get_min;
	pstate_funcs.get_turbo = funcs->get_turbo;
1250
	pstate_funcs.get_scaling = funcs->get_scaling;
1251
	pstate_funcs.set       = funcs->set;
1252
	pstate_funcs.get_vid   = funcs->get_vid;
1253 1254
	pstate_funcs.get_target_pstate = funcs->get_target_pstate;

1255 1256
}

1257
#if IS_ENABLED(CONFIG_ACPI)
1258
#include <acpi/processor.h>
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288

static bool intel_pstate_no_acpi_pss(void)
{
	int i;

	for_each_possible_cpu(i) {
		acpi_status status;
		union acpi_object *pss;
		struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
		struct acpi_processor *pr = per_cpu(processors, i);

		if (!pr)
			continue;

		status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
		if (ACPI_FAILURE(status))
			continue;

		pss = buffer.pointer;
		if (pss && pss->type == ACPI_TYPE_PACKAGE) {
			kfree(pss);
			return false;
		}

		kfree(pss);
	}

	return true;
}

1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308
static bool intel_pstate_has_acpi_ppc(void)
{
	int i;

	for_each_possible_cpu(i) {
		struct acpi_processor *pr = per_cpu(processors, i);

		if (!pr)
			continue;
		if (acpi_has_method(pr->handle, "_PPC"))
			return true;
	}
	return false;
}

enum {
	PSS,
	PPC,
};

1309 1310 1311 1312
struct hw_vendor_info {
	u16  valid;
	char oem_id[ACPI_OEM_ID_SIZE];
	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
1313
	int  oem_pwr_table;
1314 1315 1316 1317
};

/* Hardware vendor-specific info that has its own power management modes */
static struct hw_vendor_info vendor_info[] = {
1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328
	{1, "HP    ", "ProLiant", PSS},
	{1, "ORACLE", "X4-2    ", PPC},
	{1, "ORACLE", "X4-2L   ", PPC},
	{1, "ORACLE", "X4-2B   ", PPC},
	{1, "ORACLE", "X3-2    ", PPC},
	{1, "ORACLE", "X3-2L   ", PPC},
	{1, "ORACLE", "X3-2B   ", PPC},
	{1, "ORACLE", "X4470M2 ", PPC},
	{1, "ORACLE", "X4270M3 ", PPC},
	{1, "ORACLE", "X4270M2 ", PPC},
	{1, "ORACLE", "X4170M2 ", PPC},
1329 1330 1331 1332
	{1, "ORACLE", "X4170 M3", PPC},
	{1, "ORACLE", "X4275 M3", PPC},
	{1, "ORACLE", "X6-2    ", PPC},
	{1, "ORACLE", "Sudbury ", PPC},
1333 1334 1335 1336 1337 1338 1339
	{0, "", ""},
};

static bool intel_pstate_platform_pwr_mgmt_exists(void)
{
	struct acpi_table_header hdr;
	struct hw_vendor_info *v_info;
D
Dirk Brandewie 已提交
1340 1341 1342 1343 1344 1345 1346 1347 1348
	const struct x86_cpu_id *id;
	u64 misc_pwr;

	id = x86_match_cpu(intel_pstate_cpu_oob_ids);
	if (id) {
		rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
		if ( misc_pwr & (1 << 8))
			return true;
	}
1349

1350 1351
	if (acpi_disabled ||
	    ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
1352 1353 1354
		return false;

	for (v_info = vendor_info; v_info->valid; v_info++) {
1355
		if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
1356 1357 1358 1359 1360 1361
			!strncmp(hdr.oem_table_id, v_info->oem_table_id,
						ACPI_OEM_TABLE_ID_SIZE))
			switch (v_info->oem_pwr_table) {
			case PSS:
				return intel_pstate_no_acpi_pss();
			case PPC:
1362 1363
				return intel_pstate_has_acpi_ppc() &&
					(!force_load);
1364
			}
1365 1366 1367 1368 1369 1370
	}

	return false;
}
#else /* CONFIG_ACPI not enabled */
static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
1371
static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
1372 1373
#endif /* CONFIG_ACPI */

1374 1375
static int __init intel_pstate_init(void)
{
1376
	int cpu, rc = 0;
1377
	const struct x86_cpu_id *id;
1378
	struct cpu_defaults *cpu_def;
1379

1380 1381 1382
	if (no_load)
		return -ENODEV;

1383 1384 1385 1386
	id = x86_match_cpu(intel_pstate_cpu_ids);
	if (!id)
		return -ENODEV;

1387 1388 1389 1390 1391 1392 1393
	/*
	 * The Intel pstate driver will be ignored if the platform
	 * firmware has its own power management modes.
	 */
	if (intel_pstate_platform_pwr_mgmt_exists())
		return -ENODEV;

1394
	cpu_def = (struct cpu_defaults *)id->driver_data;
1395

1396 1397
	copy_pid_params(&cpu_def->pid_policy);
	copy_cpu_funcs(&cpu_def->funcs);
1398

1399 1400 1401
	if (intel_pstate_msrs_not_valid())
		return -ENODEV;

1402 1403
	pr_info("Intel P-state driver initializing.\n");

1404
	all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
1405 1406 1407
	if (!all_cpu_data)
		return -ENOMEM;

1408 1409
	if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
		pr_info("intel_pstate: HWP enabled\n");
1410
		hwp_active++;
1411
	}
D
Dirk Brandewie 已提交
1412

1413 1414 1415
	if (!hwp_active && hwp_only)
		goto out;

1416 1417 1418 1419 1420 1421
	rc = cpufreq_register_driver(&intel_pstate_driver);
	if (rc)
		goto out;

	intel_pstate_debug_expose_params();
	intel_pstate_sysfs_expose_params();
1422

1423 1424
	return rc;
out:
1425 1426 1427
	get_online_cpus();
	for_each_online_cpu(cpu) {
		if (all_cpu_data[cpu]) {
1428 1429
			cpufreq_set_update_util_data(cpu, NULL);
			synchronize_rcu();
1430 1431 1432 1433 1434 1435
			kfree(all_cpu_data[cpu]);
		}
	}

	put_online_cpus();
	vfree(all_cpu_data);
1436 1437 1438 1439
	return -ENODEV;
}
device_initcall(intel_pstate_init);

1440 1441 1442 1443 1444 1445 1446
static int __init intel_pstate_setup(char *str)
{
	if (!str)
		return -EINVAL;

	if (!strcmp(str, "disable"))
		no_load = 1;
1447 1448
	if (!strcmp(str, "no_hwp")) {
		pr_info("intel_pstate: HWP disabled\n");
D
Dirk Brandewie 已提交
1449
		no_hwp = 1;
1450
	}
1451 1452
	if (!strcmp(str, "force"))
		force_load = 1;
1453 1454
	if (!strcmp(str, "hwp_only"))
		hwp_only = 1;
1455 1456 1457 1458
	return 0;
}
early_param("intel_pstate", intel_pstate_setup);

1459 1460 1461
MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
MODULE_LICENSE("GPL");