intel_pstate.c 36.3 KB
Newer Older
1
/*
2
 * intel_pstate.c: Native P state management for Intel processors
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * (C) Copyright 2012 Intel Corporation
 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 */

#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/module.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/sysfs.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
28
#include <linux/acpi.h>
29
#include <linux/vmalloc.h>
30 31 32 33 34
#include <trace/events/power.h>

#include <asm/div64.h>
#include <asm/msr.h>
#include <asm/cpu_device_id.h>
35
#include <asm/cpufeature.h>
36

37 38 39 40
#define ATOM_RATIOS		0x66a
#define ATOM_VIDS		0x66b
#define ATOM_TURBO_RATIOS	0x66c
#define ATOM_TURBO_VIDS		0x66d
41

42
#define FRAC_BITS 8
43 44
#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
#define fp_toint(X) ((X) >> FRAC_BITS)
45

46 47 48 49 50
static inline int32_t mul_fp(int32_t x, int32_t y)
{
	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
}

51
static inline int32_t div_fp(s64 x, s64 y)
52
{
53
	return div64_s64((int64_t)x << FRAC_BITS, y);
54 55
}

56 57 58 59 60 61 62 63 64 65 66
static inline int ceiling_fp(int32_t x)
{
	int mask, ret;

	ret = fp_toint(x);
	mask = (1 << FRAC_BITS) - 1;
	if (x & mask)
		ret += 1;
	return ret;
}

67
struct sample {
68
	int32_t core_pct_busy;
69
	int32_t busy_scaled;
70 71
	u64 aperf;
	u64 mperf;
72
	u64 tsc;
73
	int freq;
74
	u64 time;
75 76 77 78 79 80
};

struct pstate_data {
	int	current_pstate;
	int	min_pstate;
	int	max_pstate;
81
	int	max_pstate_physical;
82
	int	scaling;
83 84 85
	int	turbo_pstate;
};

86
struct vid_data {
87 88 89
	int min;
	int max;
	int turbo;
90 91 92
	int32_t ratio;
};

93 94 95 96 97 98 99
struct _pid {
	int setpoint;
	int32_t integral;
	int32_t p_gain;
	int32_t i_gain;
	int32_t d_gain;
	int deadband;
100
	int32_t last_err;
101 102 103 104 105
};

struct cpudata {
	int cpu;

106
	struct update_util_data update_util;
107 108

	struct pstate_data pstate;
109
	struct vid_data vid;
110 111
	struct _pid pid;

112
	u64	last_sample_time;
113 114
	u64	prev_aperf;
	u64	prev_mperf;
115
	u64	prev_tsc;
116
	u64	prev_cummulative_iowait;
117
	struct sample sample;
118 119 120 121 122
};

static struct cpudata **all_cpu_data;
struct pstate_adjust_policy {
	int sample_rate_ms;
123
	s64 sample_rate_ns;
124 125 126 127 128 129 130
	int deadband;
	int setpoint;
	int p_gain_pct;
	int d_gain_pct;
	int i_gain_pct;
};

131 132
struct pstate_funcs {
	int (*get_max)(void);
133
	int (*get_max_physical)(void);
134 135
	int (*get_min)(void);
	int (*get_turbo)(void);
136
	int (*get_scaling)(void);
137
	u64 (*get_val)(struct cpudata*, int pstate);
138
	void (*get_vid)(struct cpudata *);
139
	int32_t (*get_target_pstate)(struct cpudata *);
140 141
};

142 143 144
struct cpu_defaults {
	struct pstate_adjust_policy pid_policy;
	struct pstate_funcs funcs;
145 146
};

147
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
148
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
149

150 151
static struct pstate_adjust_policy pid_params;
static struct pstate_funcs pstate_funcs;
D
Dirk Brandewie 已提交
152
static int hwp_active;
153

154 155
struct perf_limits {
	int no_turbo;
156
	int turbo_disabled;
157 158 159 160
	int max_perf_pct;
	int min_perf_pct;
	int32_t max_perf;
	int32_t min_perf;
161 162
	int max_policy_pct;
	int max_sysfs_pct;
163 164
	int min_policy_pct;
	int min_sysfs_pct;
165 166
};

167 168 169 170 171 172 173 174 175 176 177 178 179 180
static struct perf_limits performance_limits = {
	.no_turbo = 0,
	.turbo_disabled = 0,
	.max_perf_pct = 100,
	.max_perf = int_tofp(1),
	.min_perf_pct = 100,
	.min_perf = int_tofp(1),
	.max_policy_pct = 100,
	.max_sysfs_pct = 100,
	.min_policy_pct = 0,
	.min_sysfs_pct = 0,
};

static struct perf_limits powersave_limits = {
181
	.no_turbo = 0,
182
	.turbo_disabled = 0,
183 184 185 186
	.max_perf_pct = 100,
	.max_perf = int_tofp(1),
	.min_perf_pct = 0,
	.min_perf = 0,
187 188
	.max_policy_pct = 100,
	.max_sysfs_pct = 100,
189 190
	.min_policy_pct = 0,
	.min_sysfs_pct = 0,
191 192
};

193 194 195 196 197 198
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
static struct perf_limits *limits = &performance_limits;
#else
static struct perf_limits *limits = &powersave_limits;
#endif

199
static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
200
			     int deadband, int integral) {
201 202
	pid->setpoint = int_tofp(setpoint);
	pid->deadband  = int_tofp(deadband);
203
	pid->integral  = int_tofp(integral);
204
	pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
}

static inline void pid_p_gain_set(struct _pid *pid, int percent)
{
	pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
}

static inline void pid_i_gain_set(struct _pid *pid, int percent)
{
	pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
}

static inline void pid_d_gain_set(struct _pid *pid, int percent)
{
	pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
}

222
static signed int pid_calc(struct _pid *pid, int32_t busy)
223
{
224
	signed int result;
225 226 227
	int32_t pterm, dterm, fp_error;
	int32_t integral_limit;

228
	fp_error = pid->setpoint - busy;
229

230
	if (abs(fp_error) <= pid->deadband)
231 232 233 234 235 236
		return 0;

	pterm = mul_fp(pid->p_gain, fp_error);

	pid->integral += fp_error;

237 238 239 240 241 242 243 244
	/*
	 * We limit the integral here so that it will never
	 * get higher than 30.  This prevents it from becoming
	 * too large an input over long periods of time and allows
	 * it to get factored out sooner.
	 *
	 * The value of 30 was chosen through experimentation.
	 */
245 246 247 248 249 250
	integral_limit = int_tofp(30);
	if (pid->integral > integral_limit)
		pid->integral = integral_limit;
	if (pid->integral < -integral_limit)
		pid->integral = -integral_limit;

251 252
	dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
	pid->last_err = fp_error;
253 254

	result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
255
	result = result + (1 << (FRAC_BITS-1));
256 257 258 259 260
	return (signed int)fp_toint(result);
}

static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
{
261 262 263
	pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
	pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
	pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
264

265
	pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
266 267 268 269 270
}

static inline void intel_pstate_reset_all_pid(void)
{
	unsigned int cpu;
271

272 273 274 275 276 277
	for_each_online_cpu(cpu) {
		if (all_cpu_data[cpu])
			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
	}
}

278 279 280 281 282 283 284
static inline void update_turbo_state(void)
{
	u64 misc_en;
	struct cpudata *cpu;

	cpu = all_cpu_data[0];
	rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
285
	limits->turbo_disabled =
286 287 288 289
		(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}

290
static void intel_pstate_hwp_set(const struct cpumask *cpumask)
D
Dirk Brandewie 已提交
291
{
292 293 294 295 296 297 298
	int min, hw_min, max, hw_max, cpu, range, adj_range;
	u64 value, cap;

	rdmsrl(MSR_HWP_CAPABILITIES, cap);
	hw_min = HWP_LOWEST_PERF(cap);
	hw_max = HWP_HIGHEST_PERF(cap);
	range = hw_max - hw_min;
D
Dirk Brandewie 已提交
299

300
	for_each_cpu(cpu, cpumask) {
D
Dirk Brandewie 已提交
301
		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
302
		adj_range = limits->min_perf_pct * range / 100;
303
		min = hw_min + adj_range;
D
Dirk Brandewie 已提交
304 305 306
		value &= ~HWP_MIN_PERF(~0L);
		value |= HWP_MIN_PERF(min);

307
		adj_range = limits->max_perf_pct * range / 100;
308
		max = hw_min + adj_range;
309
		if (limits->no_turbo) {
310 311 312
			hw_max = HWP_GUARANTEED_PERF(cap);
			if (hw_max < max)
				max = hw_max;
D
Dirk Brandewie 已提交
313 314 315 316 317 318
		}

		value &= ~HWP_MAX_PERF(~0L);
		value |= HWP_MAX_PERF(max);
		wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
	}
319
}
D
Dirk Brandewie 已提交
320

321 322 323 324
static void intel_pstate_hwp_set_online_cpus(void)
{
	get_online_cpus();
	intel_pstate_hwp_set(cpu_online_mask);
D
Dirk Brandewie 已提交
325 326 327
	put_online_cpus();
}

328 329 330 331 332 333 334
/************************** debugfs begin ************************/
static int pid_param_set(void *data, u64 val)
{
	*(u32 *)data = val;
	intel_pstate_reset_all_pid();
	return 0;
}
335

336 337 338 339 340
static int pid_param_get(void *data, u64 *val)
{
	*val = *(u32 *)data;
	return 0;
}
341
DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
342 343 344 345 346 347 348

struct pid_param {
	char *name;
	void *value;
};

static struct pid_param pid_files[] = {
349 350 351 352 353 354
	{"sample_rate_ms", &pid_params.sample_rate_ms},
	{"d_gain_pct", &pid_params.d_gain_pct},
	{"i_gain_pct", &pid_params.i_gain_pct},
	{"deadband", &pid_params.deadband},
	{"setpoint", &pid_params.setpoint},
	{"p_gain_pct", &pid_params.p_gain_pct},
355 356 357
	{NULL, NULL}
};

358
static void __init intel_pstate_debug_expose_params(void)
359
{
360
	struct dentry *debugfs_parent;
361 362
	int i = 0;

D
Dirk Brandewie 已提交
363 364
	if (hwp_active)
		return;
365 366 367 368 369
	debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
	if (IS_ERR_OR_NULL(debugfs_parent))
		return;
	while (pid_files[i].name) {
		debugfs_create_file(pid_files[i].name, 0660,
370 371
				    debugfs_parent, pid_files[i].value,
				    &fops_pid_param);
372 373 374 375 376 377 378 379 380 381 382
		i++;
	}
}

/************************** debugfs end ************************/

/************************** sysfs begin ************************/
#define show_one(file_name, object)					\
	static ssize_t show_##file_name					\
	(struct kobject *kobj, struct attribute *attr, char *buf)	\
	{								\
383
		return sprintf(buf, "%u\n", limits->object);		\
384 385
	}

386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
static ssize_t show_turbo_pct(struct kobject *kobj,
				struct attribute *attr, char *buf)
{
	struct cpudata *cpu;
	int total, no_turbo, turbo_pct;
	uint32_t turbo_fp;

	cpu = all_cpu_data[0];

	total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
	no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
	turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
	turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
	return sprintf(buf, "%u\n", turbo_pct);
}

402 403 404 405 406 407 408 409 410 411 412
static ssize_t show_num_pstates(struct kobject *kobj,
				struct attribute *attr, char *buf)
{
	struct cpudata *cpu;
	int total;

	cpu = all_cpu_data[0];
	total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
	return sprintf(buf, "%u\n", total);
}

413 414 415 416 417 418
static ssize_t show_no_turbo(struct kobject *kobj,
			     struct attribute *attr, char *buf)
{
	ssize_t ret;

	update_turbo_state();
419 420
	if (limits->turbo_disabled)
		ret = sprintf(buf, "%u\n", limits->turbo_disabled);
421
	else
422
		ret = sprintf(buf, "%u\n", limits->no_turbo);
423 424 425 426

	return ret;
}

427
static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
428
			      const char *buf, size_t count)
429 430 431
{
	unsigned int input;
	int ret;
432

433 434 435
	ret = sscanf(buf, "%u", &input);
	if (ret != 1)
		return -EINVAL;
436 437

	update_turbo_state();
438
	if (limits->turbo_disabled) {
439
		pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n");
440
		return -EPERM;
441
	}
D
Dirk Brandewie 已提交
442

443
	limits->no_turbo = clamp_t(int, input, 0, 1);
444

D
Dirk Brandewie 已提交
445
	if (hwp_active)
446
		intel_pstate_hwp_set_online_cpus();
D
Dirk Brandewie 已提交
447

448 449 450 451
	return count;
}

static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
452
				  const char *buf, size_t count)
453 454 455
{
	unsigned int input;
	int ret;
456

457 458 459 460
	ret = sscanf(buf, "%u", &input);
	if (ret != 1)
		return -EINVAL;

461 462 463 464 465 466 467 468 469
	limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
	limits->max_perf_pct = min(limits->max_policy_pct,
				   limits->max_sysfs_pct);
	limits->max_perf_pct = max(limits->min_policy_pct,
				   limits->max_perf_pct);
	limits->max_perf_pct = max(limits->min_perf_pct,
				   limits->max_perf_pct);
	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
				  int_tofp(100));
470

D
Dirk Brandewie 已提交
471
	if (hwp_active)
472
		intel_pstate_hwp_set_online_cpus();
473 474 475 476
	return count;
}

static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
477
				  const char *buf, size_t count)
478 479 480
{
	unsigned int input;
	int ret;
481

482 483 484
	ret = sscanf(buf, "%u", &input);
	if (ret != 1)
		return -EINVAL;
485

486 487 488 489 490 491 492 493 494
	limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
	limits->min_perf_pct = max(limits->min_policy_pct,
				   limits->min_sysfs_pct);
	limits->min_perf_pct = min(limits->max_policy_pct,
				   limits->min_perf_pct);
	limits->min_perf_pct = min(limits->max_perf_pct,
				   limits->min_perf_pct);
	limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
				  int_tofp(100));
495

D
Dirk Brandewie 已提交
496
	if (hwp_active)
497
		intel_pstate_hwp_set_online_cpus();
498 499 500 501 502 503 504 505 506
	return count;
}

show_one(max_perf_pct, max_perf_pct);
show_one(min_perf_pct, min_perf_pct);

define_one_global_rw(no_turbo);
define_one_global_rw(max_perf_pct);
define_one_global_rw(min_perf_pct);
507
define_one_global_ro(turbo_pct);
508
define_one_global_ro(num_pstates);
509 510 511 512 513

static struct attribute *intel_pstate_attributes[] = {
	&no_turbo.attr,
	&max_perf_pct.attr,
	&min_perf_pct.attr,
514
	&turbo_pct.attr,
515
	&num_pstates.attr,
516 517 518 519 520 521 522
	NULL
};

static struct attribute_group intel_pstate_attr_group = {
	.attrs = intel_pstate_attributes,
};

523
static void __init intel_pstate_sysfs_expose_params(void)
524
{
525
	struct kobject *intel_pstate_kobject;
526 527 528 529 530
	int rc;

	intel_pstate_kobject = kobject_create_and_add("intel_pstate",
						&cpu_subsys.dev_root->kobj);
	BUG_ON(!intel_pstate_kobject);
531
	rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
532 533 534
	BUG_ON(rc);
}
/************************** sysfs end ************************/
D
Dirk Brandewie 已提交
535

536
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
D
Dirk Brandewie 已提交
537
{
538 539 540
	/* First disable HWP notification interrupt as we don't process them */
	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);

541
	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
D
Dirk Brandewie 已提交
542 543
}

544
static int atom_get_min_pstate(void)
545 546
{
	u64 value;
547

548
	rdmsrl(ATOM_RATIOS, value);
D
Dirk Brandewie 已提交
549
	return (value >> 8) & 0x7F;
550 551
}

552
static int atom_get_max_pstate(void)
553 554
{
	u64 value;
555

556
	rdmsrl(ATOM_RATIOS, value);
D
Dirk Brandewie 已提交
557
	return (value >> 16) & 0x7F;
558
}
559

560
static int atom_get_turbo_pstate(void)
561 562
{
	u64 value;
563

564
	rdmsrl(ATOM_TURBO_RATIOS, value);
D
Dirk Brandewie 已提交
565
	return value & 0x7F;
566 567
}

568
static u64 atom_get_val(struct cpudata *cpudata, int pstate)
569 570 571 572 573
{
	u64 val;
	int32_t vid_fp;
	u32 vid;

574
	val = (u64)pstate << 8;
575
	if (limits->no_turbo && !limits->turbo_disabled)
576 577 578 579 580 581 582
		val |= (u64)1 << 32;

	vid_fp = cpudata->vid.min + mul_fp(
		int_tofp(pstate - cpudata->pstate.min_pstate),
		cpudata->vid.ratio);

	vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
583
	vid = ceiling_fp(vid_fp);
584

585 586 587
	if (pstate > cpudata->pstate.max_pstate)
		vid = cpudata->vid.turbo;

588
	return val | vid;
589 590
}

591
static int silvermont_get_scaling(void)
592 593 594
{
	u64 value;
	int i;
595 596 597
	/* Defined in Table 35-6 from SDM (Sept 2015) */
	static int silvermont_freq_table[] = {
		83300, 100000, 133300, 116700, 80000};
598 599

	rdmsrl(MSR_FSB_FREQ, value);
600 601
	i = value & 0x7;
	WARN_ON(i > 4);
602

603 604
	return silvermont_freq_table[i];
}
605

606 607 608 609 610 611 612 613 614 615 616 617 618 619
static int airmont_get_scaling(void)
{
	u64 value;
	int i;
	/* Defined in Table 35-10 from SDM (Sept 2015) */
	static int airmont_freq_table[] = {
		83300, 100000, 133300, 116700, 80000,
		93300, 90000, 88900, 87500};

	rdmsrl(MSR_FSB_FREQ, value);
	i = value & 0xF;
	WARN_ON(i > 8);

	return airmont_freq_table[i];
620 621
}

622
static void atom_get_vid(struct cpudata *cpudata)
623 624 625
{
	u64 value;

626
	rdmsrl(ATOM_VIDS, value);
D
Dirk Brandewie 已提交
627 628
	cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
	cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
629 630 631 632
	cpudata->vid.ratio = div_fp(
		cpudata->vid.max - cpudata->vid.min,
		int_tofp(cpudata->pstate.max_pstate -
			cpudata->pstate.min_pstate));
633

634
	rdmsrl(ATOM_TURBO_VIDS, value);
635
	cpudata->vid.turbo = value & 0x7f;
636 637
}

638
static int core_get_min_pstate(void)
639 640
{
	u64 value;
641

642
	rdmsrl(MSR_PLATFORM_INFO, value);
643 644 645
	return (value >> 40) & 0xFF;
}

646
static int core_get_max_pstate_physical(void)
647 648
{
	u64 value;
649

650
	rdmsrl(MSR_PLATFORM_INFO, value);
651 652 653
	return (value >> 8) & 0xFF;
}

654
static int core_get_max_pstate(void)
655
{
656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
	u64 tar;
	u64 plat_info;
	int max_pstate;
	int err;

	rdmsrl(MSR_PLATFORM_INFO, plat_info);
	max_pstate = (plat_info >> 8) & 0xFF;

	err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
	if (!err) {
		/* Do some sanity checking for safety */
		if (plat_info & 0x600000000) {
			u64 tdp_ctrl;
			u64 tdp_ratio;
			int tdp_msr;

			err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
			if (err)
				goto skip_tar;

			tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
			err = rdmsrl_safe(tdp_msr, &tdp_ratio);
			if (err)
				goto skip_tar;

			if (tdp_ratio - 1 == tar) {
				max_pstate = tar;
				pr_debug("max_pstate=TAC %x\n", max_pstate);
			} else {
				goto skip_tar;
			}
		}
	}
689

690 691
skip_tar:
	return max_pstate;
692 693
}

694
static int core_get_turbo_pstate(void)
695 696 697
{
	u64 value;
	int nont, ret;
698

699
	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
700
	nont = core_get_max_pstate();
701
	ret = (value) & 255;
702 703 704 705 706
	if (ret <= nont)
		ret = nont;
	return ret;
}

707 708 709 710 711
static inline int core_get_scaling(void)
{
	return 100000;
}

712
static u64 core_get_val(struct cpudata *cpudata, int pstate)
713 714 715
{
	u64 val;

716
	val = (u64)pstate << 8;
717
	if (limits->no_turbo && !limits->turbo_disabled)
718 719
		val |= (u64)1 << 32;

720
	return val;
721 722
}

723 724 725 726 727 728 729 730 731 732 733 734 735
static int knl_get_turbo_pstate(void)
{
	u64 value;
	int nont, ret;

	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
	nont = core_get_max_pstate();
	ret = (((value) >> 8) & 0xFF);
	if (ret <= nont)
		ret = nont;
	return ret;
}

736 737 738 739 740 741 742 743 744 745 746
static struct cpu_defaults core_params = {
	.pid_policy = {
		.sample_rate_ms = 10,
		.deadband = 0,
		.setpoint = 97,
		.p_gain_pct = 20,
		.d_gain_pct = 0,
		.i_gain_pct = 0,
	},
	.funcs = {
		.get_max = core_get_max_pstate,
747
		.get_max_physical = core_get_max_pstate_physical,
748 749
		.get_min = core_get_min_pstate,
		.get_turbo = core_get_turbo_pstate,
750
		.get_scaling = core_get_scaling,
751
		.get_val = core_get_val,
752
		.get_target_pstate = get_target_pstate_use_performance,
753 754 755
	},
};

756 757 758 759 760 761 762 763 764 765 766 767 768 769
static struct cpu_defaults silvermont_params = {
	.pid_policy = {
		.sample_rate_ms = 10,
		.deadband = 0,
		.setpoint = 60,
		.p_gain_pct = 14,
		.d_gain_pct = 0,
		.i_gain_pct = 4,
	},
	.funcs = {
		.get_max = atom_get_max_pstate,
		.get_max_physical = atom_get_max_pstate,
		.get_min = atom_get_min_pstate,
		.get_turbo = atom_get_turbo_pstate,
770
		.get_val = atom_get_val,
771 772
		.get_scaling = silvermont_get_scaling,
		.get_vid = atom_get_vid,
773
		.get_target_pstate = get_target_pstate_use_cpu_load,
774 775 776 777
	},
};

static struct cpu_defaults airmont_params = {
778 779 780
	.pid_policy = {
		.sample_rate_ms = 10,
		.deadband = 0,
781
		.setpoint = 60,
782 783 784 785 786
		.p_gain_pct = 14,
		.d_gain_pct = 0,
		.i_gain_pct = 4,
	},
	.funcs = {
787 788 789 790
		.get_max = atom_get_max_pstate,
		.get_max_physical = atom_get_max_pstate,
		.get_min = atom_get_min_pstate,
		.get_turbo = atom_get_turbo_pstate,
791
		.get_val = atom_get_val,
792
		.get_scaling = airmont_get_scaling,
793
		.get_vid = atom_get_vid,
794
		.get_target_pstate = get_target_pstate_use_cpu_load,
795 796 797
	},
};

798 799 800 801 802 803 804 805 806 807 808
static struct cpu_defaults knl_params = {
	.pid_policy = {
		.sample_rate_ms = 10,
		.deadband = 0,
		.setpoint = 97,
		.p_gain_pct = 20,
		.d_gain_pct = 0,
		.i_gain_pct = 0,
	},
	.funcs = {
		.get_max = core_get_max_pstate,
809
		.get_max_physical = core_get_max_pstate_physical,
810 811
		.get_min = core_get_min_pstate,
		.get_turbo = knl_get_turbo_pstate,
812
		.get_scaling = core_get_scaling,
813
		.get_val = core_get_val,
814
		.get_target_pstate = get_target_pstate_use_performance,
815 816 817
	},
};

818 819 820
static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
{
	int max_perf = cpu->pstate.turbo_pstate;
821
	int max_perf_adj;
822
	int min_perf;
823

824
	if (limits->no_turbo || limits->turbo_disabled)
825 826
		max_perf = cpu->pstate.max_pstate;

827 828 829 830 831
	/*
	 * performance can be limited by user through sysfs, by cpufreq
	 * policy, or by cpu specific default values determined through
	 * experimentation.
	 */
832
	max_perf_adj = fp_toint(max_perf * limits->max_perf);
833 834
	*max = clamp_t(int, max_perf_adj,
			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
835

836
	min_perf = fp_toint(max_perf * limits->min_perf);
837
	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
838 839
}

840
static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
841
{
842
	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
843
	cpu->pstate.current_pstate = pstate;
844
}
845

846 847 848 849 850 851 852 853 854 855 856 857
static void intel_pstate_set_min_pstate(struct cpudata *cpu)
{
	int pstate = cpu->pstate.min_pstate;

	intel_pstate_record_pstate(cpu, pstate);
	/*
	 * Generally, there is no guarantee that this code will always run on
	 * the CPU being updated, so force the register update to run on the
	 * right CPU.
	 */
	wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
		      pstate_funcs.get_val(cpu, pstate));
858 859 860 861
}

static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
{
862 863
	cpu->pstate.min_pstate = pstate_funcs.get_min();
	cpu->pstate.max_pstate = pstate_funcs.get_max();
864
	cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
865
	cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
866
	cpu->pstate.scaling = pstate_funcs.get_scaling();
867

868 869
	if (pstate_funcs.get_vid)
		pstate_funcs.get_vid(cpu);
870 871

	intel_pstate_set_min_pstate(cpu);
872 873
}

874
static inline void intel_pstate_calc_busy(struct cpudata *cpu)
875
{
876
	struct sample *sample = &cpu->sample;
877
	int64_t core_pct;
878

879
	core_pct = int_tofp(sample->aperf) * int_tofp(100);
880
	core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
881

882
	sample->core_pct_busy = (int32_t)core_pct;
883 884
}

885
static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
886 887
{
	u64 aperf, mperf;
888
	unsigned long flags;
889
	u64 tsc;
890

891
	local_irq_save(flags);
892 893
	rdmsrl(MSR_IA32_APERF, aperf);
	rdmsrl(MSR_IA32_MPERF, mperf);
894
	tsc = rdtsc();
895
	if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
896
		local_irq_restore(flags);
897
		return false;
898
	}
899
	local_irq_restore(flags);
900

901
	cpu->last_sample_time = cpu->sample.time;
902
	cpu->sample.time = time;
903 904
	cpu->sample.aperf = aperf;
	cpu->sample.mperf = mperf;
905
	cpu->sample.tsc =  tsc;
906 907
	cpu->sample.aperf -= cpu->prev_aperf;
	cpu->sample.mperf -= cpu->prev_mperf;
908
	cpu->sample.tsc -= cpu->prev_tsc;
909

910 911
	cpu->prev_aperf = aperf;
	cpu->prev_mperf = mperf;
912
	cpu->prev_tsc = tsc;
913 914 915 916 917 918 919 920
	/*
	 * First time this function is invoked in a given cycle, all of the
	 * previous sample data fields are equal to zero or stale and they must
	 * be populated with meaningful numbers for things to work, so assume
	 * that sample.time will always be reset before setting the utilization
	 * update hook and make the caller skip the sample then.
	 */
	return !!cpu->last_sample_time;
921 922
}

923 924 925 926 927 928
static inline int32_t get_avg_frequency(struct cpudata *cpu)
{
	return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf *
		cpu->pstate.scaling, cpu->sample.mperf);
}

929 930 931
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
{
	struct sample *sample = &cpu->sample;
932 933 934
	u64 cummulative_iowait, delta_iowait_us;
	u64 delta_iowait_mperf;
	u64 mperf, now;
935 936
	int32_t cpu_load;

937 938 939 940 941 942 943 944 945 946 947 948 949 950 951
	cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);

	/*
	 * Convert iowait time into number of IO cycles spent at max_freq.
	 * IO is considered as busy only for the cpu_load algorithm. For
	 * performance this is not needed since we always try to reach the
	 * maximum P-State, so we are already boosting the IOs.
	 */
	delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
	delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
		cpu->pstate.max_pstate, MSEC_PER_SEC);

	mperf = cpu->sample.mperf + delta_iowait_mperf;
	cpu->prev_cummulative_iowait = cummulative_iowait;

952 953 954 955 956 957
	/*
	 * The load can be estimated as the ratio of the mperf counter
	 * running at a constant frequency during active periods
	 * (C0) and the time stamp counter running at the same frequency
	 * also during C-states.
	 */
958
	cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
959 960 961 962 963
	cpu->sample.busy_scaled = cpu_load;

	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
}

964
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
965
{
966
	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
967
	u64 duration_ns;
968

969 970
	intel_pstate_calc_busy(cpu);

971 972 973 974 975 976 977 978 979 980 981
	/*
	 * core_busy is the ratio of actual performance to max
	 * max_pstate is the max non turbo pstate available
	 * current_pstate was the pstate that was requested during
	 * 	the last sample period.
	 *
	 * We normalize core_busy, which was our actual percent
	 * performance to what we requested during the last sample
	 * period. The result will be a percentage of busy at a
	 * specified pstate.
	 */
982
	core_busy = cpu->sample.core_pct_busy;
983
	max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
984
	current_pstate = int_tofp(cpu->pstate.current_pstate);
985
	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
986

987
	/*
988 989 990 991
	 * Since our utilization update callback will not run unless we are
	 * in C0, check if the actual elapsed time is significantly greater (3x)
	 * than our sample interval.  If it is, then we were idle for a long
	 * enough period of time to adjust our busyness.
992
	 */
993
	duration_ns = cpu->sample.time - cpu->last_sample_time;
994
	if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
995 996
		sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
				      int_tofp(duration_ns));
997 998 999
		core_busy = mul_fp(core_busy, sample_ratio);
	}

1000 1001
	cpu->sample.busy_scaled = core_busy;
	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
1002 1003
}

1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
{
	int max_perf, min_perf;

	update_turbo_state();

	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
	pstate = clamp_t(int, pstate, min_perf, max_perf);
	if (pstate == cpu->pstate.current_pstate)
		return;

	intel_pstate_record_pstate(cpu, pstate);
	wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
}

1019 1020
static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
{
1021
	int from, target_pstate;
1022 1023 1024
	struct sample *sample;

	from = cpu->pstate.current_pstate;
1025

1026
	target_pstate = pstate_funcs.get_target_pstate(cpu);
1027

1028
	intel_pstate_update_pstate(cpu, target_pstate);
1029 1030 1031

	sample = &cpu->sample;
	trace_pstate_sample(fp_toint(sample->core_pct_busy),
1032
		fp_toint(sample->busy_scaled),
1033 1034 1035 1036 1037
		from,
		cpu->pstate.current_pstate,
		sample->mperf,
		sample->aperf,
		sample->tsc,
1038
		get_avg_frequency(cpu));
1039 1040
}

1041 1042
static void intel_pstate_update_util(struct update_util_data *data, u64 time,
				     unsigned long util, unsigned long max)
1043
{
1044 1045
	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
	u64 delta_ns = time - cpu->sample.time;
1046

1047
	if ((s64)delta_ns >= pid_params.sample_rate_ns) {
1048 1049 1050
		bool sample_taken = intel_pstate_sample(cpu, time);

		if (sample_taken && !hwp_active)
1051 1052
			intel_pstate_adjust_busy_pstate(cpu);
	}
1053 1054 1055
}

#define ICPU(model, policy) \
1056 1057
	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
			(unsigned long)&policy }
1058 1059

static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
1060 1061
	ICPU(0x2a, core_params),
	ICPU(0x2d, core_params),
1062
	ICPU(0x37, silvermont_params),
1063 1064
	ICPU(0x3a, core_params),
	ICPU(0x3c, core_params),
1065
	ICPU(0x3d, core_params),
1066 1067 1068 1069
	ICPU(0x3e, core_params),
	ICPU(0x3f, core_params),
	ICPU(0x45, core_params),
	ICPU(0x46, core_params),
1070
	ICPU(0x47, core_params),
1071
	ICPU(0x4c, airmont_params),
1072
	ICPU(0x4e, core_params),
1073
	ICPU(0x4f, core_params),
1074
	ICPU(0x5e, core_params),
1075
	ICPU(0x56, core_params),
1076
	ICPU(0x57, knl_params),
1077 1078 1079 1080
	{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);

D
Dirk Brandewie 已提交
1081 1082 1083 1084 1085
static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
	ICPU(0x56, core_params),
	{}
};

1086 1087 1088 1089
static int intel_pstate_init_cpu(unsigned int cpunum)
{
	struct cpudata *cpu;

1090 1091 1092
	if (!all_cpu_data[cpunum])
		all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
					       GFP_KERNEL);
1093 1094 1095 1096 1097 1098
	if (!all_cpu_data[cpunum])
		return -ENOMEM;

	cpu = all_cpu_data[cpunum];

	cpu->cpu = cpunum;
1099

1100
	if (hwp_active) {
1101
		intel_pstate_hwp_enable(cpu);
1102 1103 1104
		pid_params.sample_rate_ms = 50;
		pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
	}
1105

1106
	intel_pstate_get_cpu_pstates(cpu);
1107

1108 1109
	intel_pstate_busy_pid_reset(cpu);

1110
	cpu->update_util.func = intel_pstate_update_util;
1111

1112
	pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124

	return 0;
}

static unsigned int intel_pstate_get(unsigned int cpu_num)
{
	struct sample *sample;
	struct cpudata *cpu;

	cpu = all_cpu_data[cpu_num];
	if (!cpu)
		return 0;
1125
	sample = &cpu->sample;
1126
	return get_avg_frequency(cpu);
1127 1128
}

1129
static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
1130
{
1131 1132 1133 1134 1135
	struct cpudata *cpu = all_cpu_data[cpu_num];

	/* Prevent intel_pstate_update_util() from using stale data. */
	cpu->sample.time = 0;
	cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
1136 1137 1138 1139 1140 1141 1142 1143
}

static void intel_pstate_clear_update_util_hook(unsigned int cpu)
{
	cpufreq_set_update_util_data(cpu, NULL);
	synchronize_sched();
}

1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157
static void intel_pstate_set_performance_limits(struct perf_limits *limits)
{
	limits->no_turbo = 0;
	limits->turbo_disabled = 0;
	limits->max_perf_pct = 100;
	limits->max_perf = int_tofp(1);
	limits->min_perf_pct = 100;
	limits->min_perf = int_tofp(1);
	limits->max_policy_pct = 100;
	limits->max_sysfs_pct = 100;
	limits->min_policy_pct = 0;
	limits->min_sysfs_pct = 0;
}

1158 1159
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
1160 1161 1162
	if (!policy->cpuinfo.max_freq)
		return -ENODEV;

1163 1164
	intel_pstate_clear_update_util_hook(policy->cpu);

1165
	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
1166
		limits = &performance_limits;
1167 1168 1169 1170 1171 1172 1173 1174
		if (policy->max >= policy->cpuinfo.max_freq) {
			pr_debug("intel_pstate: set performance\n");
			intel_pstate_set_performance_limits(limits);
			goto out;
		}
	} else {
		pr_debug("intel_pstate: set powersave\n");
		limits = &powersave_limits;
1175
	}
D
Dirk Brandewie 已提交
1176

1177 1178
	limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
	limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
1179 1180
	limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
					      policy->cpuinfo.max_freq);
1181
	limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
1182 1183

	/* Normalize user input to [min_policy_pct, max_policy_pct] */
1184 1185 1186 1187 1188 1189 1190 1191
	limits->min_perf_pct = max(limits->min_policy_pct,
				   limits->min_sysfs_pct);
	limits->min_perf_pct = min(limits->max_policy_pct,
				   limits->min_perf_pct);
	limits->max_perf_pct = min(limits->max_policy_pct,
				   limits->max_sysfs_pct);
	limits->max_perf_pct = max(limits->min_policy_pct,
				   limits->max_perf_pct);
1192
	limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
1193 1194

	/* Make sure min_perf_pct <= max_perf_pct */
1195
	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
1196

1197 1198 1199 1200
	limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
				  int_tofp(100));
	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
				  int_tofp(100));
1201

1202 1203 1204
 out:
	intel_pstate_set_update_util_hook(policy->cpu);

D
Dirk Brandewie 已提交
1205
	if (hwp_active)
1206
		intel_pstate_hwp_set(policy->cpus);
D
Dirk Brandewie 已提交
1207

1208 1209 1210 1211 1212
	return 0;
}

static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
{
1213
	cpufreq_verify_within_cpu_limits(policy);
1214

1215
	if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
1216
	    policy->policy != CPUFREQ_POLICY_PERFORMANCE)
1217 1218 1219 1220 1221
		return -EINVAL;

	return 0;
}

1222
static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
1223
{
1224 1225
	int cpu_num = policy->cpu;
	struct cpudata *cpu = all_cpu_data[cpu_num];
1226

1227
	pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
1228

1229
	intel_pstate_clear_update_util_hook(cpu_num);
1230

D
Dirk Brandewie 已提交
1231 1232 1233
	if (hwp_active)
		return;

1234
	intel_pstate_set_min_pstate(cpu);
1235 1236
}

1237
static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
1238 1239
{
	struct cpudata *cpu;
1240
	int rc;
1241 1242 1243 1244 1245 1246 1247

	rc = intel_pstate_init_cpu(policy->cpu);
	if (rc)
		return rc;

	cpu = all_cpu_data[policy->cpu];

1248
	if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
1249 1250 1251 1252
		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
	else
		policy->policy = CPUFREQ_POLICY_POWERSAVE;

1253 1254
	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1255 1256

	/* cpuinfo and default policy values */
1257 1258 1259
	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
	policy->cpuinfo.max_freq =
		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271
	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
	cpumask_set_cpu(policy->cpu, policy->cpus);

	return 0;
}

static struct cpufreq_driver intel_pstate_driver = {
	.flags		= CPUFREQ_CONST_LOOPS,
	.verify		= intel_pstate_verify_policy,
	.setpolicy	= intel_pstate_set_policy,
	.get		= intel_pstate_get,
	.init		= intel_pstate_cpu_init,
1272
	.stop_cpu	= intel_pstate_stop_cpu,
1273 1274 1275
	.name		= "intel_pstate",
};

1276
static int __initdata no_load;
D
Dirk Brandewie 已提交
1277
static int __initdata no_hwp;
1278
static int __initdata hwp_only;
1279
static unsigned int force_load;
1280

1281 1282
static int intel_pstate_msrs_not_valid(void)
{
1283
	if (!pstate_funcs.get_max() ||
1284 1285
	    !pstate_funcs.get_min() ||
	    !pstate_funcs.get_turbo())
1286 1287 1288 1289
		return -ENODEV;

	return 0;
}
1290

1291
static void copy_pid_params(struct pstate_adjust_policy *policy)
1292 1293
{
	pid_params.sample_rate_ms = policy->sample_rate_ms;
1294
	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
1295 1296 1297 1298 1299 1300 1301
	pid_params.p_gain_pct = policy->p_gain_pct;
	pid_params.i_gain_pct = policy->i_gain_pct;
	pid_params.d_gain_pct = policy->d_gain_pct;
	pid_params.deadband = policy->deadband;
	pid_params.setpoint = policy->setpoint;
}

1302
static void copy_cpu_funcs(struct pstate_funcs *funcs)
1303 1304
{
	pstate_funcs.get_max   = funcs->get_max;
1305
	pstate_funcs.get_max_physical = funcs->get_max_physical;
1306 1307
	pstate_funcs.get_min   = funcs->get_min;
	pstate_funcs.get_turbo = funcs->get_turbo;
1308
	pstate_funcs.get_scaling = funcs->get_scaling;
1309
	pstate_funcs.get_val   = funcs->get_val;
1310
	pstate_funcs.get_vid   = funcs->get_vid;
1311 1312
	pstate_funcs.get_target_pstate = funcs->get_target_pstate;

1313 1314
}

1315
#if IS_ENABLED(CONFIG_ACPI)
1316
#include <acpi/processor.h>
1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346

static bool intel_pstate_no_acpi_pss(void)
{
	int i;

	for_each_possible_cpu(i) {
		acpi_status status;
		union acpi_object *pss;
		struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
		struct acpi_processor *pr = per_cpu(processors, i);

		if (!pr)
			continue;

		status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
		if (ACPI_FAILURE(status))
			continue;

		pss = buffer.pointer;
		if (pss && pss->type == ACPI_TYPE_PACKAGE) {
			kfree(pss);
			return false;
		}

		kfree(pss);
	}

	return true;
}

1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366
static bool intel_pstate_has_acpi_ppc(void)
{
	int i;

	for_each_possible_cpu(i) {
		struct acpi_processor *pr = per_cpu(processors, i);

		if (!pr)
			continue;
		if (acpi_has_method(pr->handle, "_PPC"))
			return true;
	}
	return false;
}

enum {
	PSS,
	PPC,
};

1367 1368 1369 1370
struct hw_vendor_info {
	u16  valid;
	char oem_id[ACPI_OEM_ID_SIZE];
	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
1371
	int  oem_pwr_table;
1372 1373 1374 1375
};

/* Hardware vendor-specific info that has its own power management modes */
static struct hw_vendor_info vendor_info[] = {
1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386
	{1, "HP    ", "ProLiant", PSS},
	{1, "ORACLE", "X4-2    ", PPC},
	{1, "ORACLE", "X4-2L   ", PPC},
	{1, "ORACLE", "X4-2B   ", PPC},
	{1, "ORACLE", "X3-2    ", PPC},
	{1, "ORACLE", "X3-2L   ", PPC},
	{1, "ORACLE", "X3-2B   ", PPC},
	{1, "ORACLE", "X4470M2 ", PPC},
	{1, "ORACLE", "X4270M3 ", PPC},
	{1, "ORACLE", "X4270M2 ", PPC},
	{1, "ORACLE", "X4170M2 ", PPC},
1387 1388 1389 1390
	{1, "ORACLE", "X4170 M3", PPC},
	{1, "ORACLE", "X4275 M3", PPC},
	{1, "ORACLE", "X6-2    ", PPC},
	{1, "ORACLE", "Sudbury ", PPC},
1391 1392 1393 1394 1395 1396 1397
	{0, "", ""},
};

static bool intel_pstate_platform_pwr_mgmt_exists(void)
{
	struct acpi_table_header hdr;
	struct hw_vendor_info *v_info;
D
Dirk Brandewie 已提交
1398 1399 1400 1401 1402 1403 1404 1405 1406
	const struct x86_cpu_id *id;
	u64 misc_pwr;

	id = x86_match_cpu(intel_pstate_cpu_oob_ids);
	if (id) {
		rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
		if ( misc_pwr & (1 << 8))
			return true;
	}
1407

1408 1409
	if (acpi_disabled ||
	    ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
1410 1411 1412
		return false;

	for (v_info = vendor_info; v_info->valid; v_info++) {
1413
		if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
1414 1415 1416 1417 1418 1419
			!strncmp(hdr.oem_table_id, v_info->oem_table_id,
						ACPI_OEM_TABLE_ID_SIZE))
			switch (v_info->oem_pwr_table) {
			case PSS:
				return intel_pstate_no_acpi_pss();
			case PPC:
1420 1421
				return intel_pstate_has_acpi_ppc() &&
					(!force_load);
1422
			}
1423 1424 1425 1426 1427 1428
	}

	return false;
}
#else /* CONFIG_ACPI not enabled */
static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
1429
static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
1430 1431
#endif /* CONFIG_ACPI */

1432 1433 1434 1435 1436
static const struct x86_cpu_id hwp_support_ids[] __initconst = {
	{ X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP },
	{}
};

1437 1438
static int __init intel_pstate_init(void)
{
1439
	int cpu, rc = 0;
1440
	const struct x86_cpu_id *id;
1441
	struct cpu_defaults *cpu_def;
1442

1443 1444 1445
	if (no_load)
		return -ENODEV;

1446 1447 1448 1449 1450 1451
	if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
		copy_cpu_funcs(&core_params.funcs);
		hwp_active++;
		goto hwp_cpu_matched;
	}

1452 1453 1454 1455
	id = x86_match_cpu(intel_pstate_cpu_ids);
	if (!id)
		return -ENODEV;

1456
	cpu_def = (struct cpu_defaults *)id->driver_data;
1457

1458 1459
	copy_pid_params(&cpu_def->pid_policy);
	copy_cpu_funcs(&cpu_def->funcs);
1460

1461 1462 1463
	if (intel_pstate_msrs_not_valid())
		return -ENODEV;

1464 1465 1466 1467 1468 1469 1470 1471
hwp_cpu_matched:
	/*
	 * The Intel pstate driver will be ignored if the platform
	 * firmware has its own power management modes.
	 */
	if (intel_pstate_platform_pwr_mgmt_exists())
		return -ENODEV;

1472 1473
	pr_info("Intel P-state driver initializing.\n");

1474
	all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
1475 1476 1477
	if (!all_cpu_data)
		return -ENOMEM;

1478 1479 1480
	if (!hwp_active && hwp_only)
		goto out;

1481 1482 1483 1484 1485 1486
	rc = cpufreq_register_driver(&intel_pstate_driver);
	if (rc)
		goto out;

	intel_pstate_debug_expose_params();
	intel_pstate_sysfs_expose_params();
1487

1488 1489 1490
	if (hwp_active)
		pr_info("intel_pstate: HWP enabled\n");

1491 1492
	return rc;
out:
1493 1494 1495
	get_online_cpus();
	for_each_online_cpu(cpu) {
		if (all_cpu_data[cpu]) {
1496
			intel_pstate_clear_update_util_hook(cpu);
1497 1498 1499 1500 1501 1502
			kfree(all_cpu_data[cpu]);
		}
	}

	put_online_cpus();
	vfree(all_cpu_data);
1503 1504 1505 1506
	return -ENODEV;
}
device_initcall(intel_pstate_init);

1507 1508 1509 1510 1511 1512 1513
static int __init intel_pstate_setup(char *str)
{
	if (!str)
		return -EINVAL;

	if (!strcmp(str, "disable"))
		no_load = 1;
1514 1515
	if (!strcmp(str, "no_hwp")) {
		pr_info("intel_pstate: HWP disabled\n");
D
Dirk Brandewie 已提交
1516
		no_hwp = 1;
1517
	}
1518 1519
	if (!strcmp(str, "force"))
		force_load = 1;
1520 1521
	if (!strcmp(str, "hwp_only"))
		hwp_only = 1;
1522 1523 1524 1525
	return 0;
}
early_param("intel_pstate", intel_pstate_setup);

1526 1527 1528
MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
MODULE_LICENSE("GPL");