intel_pstate.c 22.8 KB
Newer Older
1
/*
2
 * intel_pstate.c: Native P state management for Intel processors
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * (C) Copyright 2012 Intel Corporation
 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 */

#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/module.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/sysfs.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
28
#include <linux/acpi.h>
29 30 31 32 33 34
#include <trace/events/power.h>

#include <asm/div64.h>
#include <asm/msr.h>
#include <asm/cpu_device_id.h>

35 36 37
#define BYT_RATIOS		0x66a
#define BYT_VIDS		0x66b
#define BYT_TURBO_RATIOS	0x66c
38
#define BYT_TURBO_VIDS		0x66d
39

40
#define FRAC_BITS 8
41 42
#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
#define fp_toint(X) ((X) >> FRAC_BITS)
43

44 45 46 47 48 49 50 51

static inline int32_t mul_fp(int32_t x, int32_t y)
{
	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
}

static inline int32_t div_fp(int32_t x, int32_t y)
{
52
	return div_s64((int64_t)x << FRAC_BITS, y);
53 54 55
}

struct sample {
56
	int32_t core_pct_busy;
57 58 59
	u64 aperf;
	u64 mperf;
	int freq;
60
	ktime_t time;
61 62 63 64 65 66 67 68 69
};

struct pstate_data {
	int	current_pstate;
	int	min_pstate;
	int	max_pstate;
	int	turbo_pstate;
};

70
struct vid_data {
71 72 73
	int min;
	int max;
	int turbo;
74 75 76
	int32_t ratio;
};

77 78 79 80 81 82 83
struct _pid {
	int setpoint;
	int32_t integral;
	int32_t p_gain;
	int32_t i_gain;
	int32_t d_gain;
	int deadband;
84
	int32_t last_err;
85 86 87 88 89 90 91 92
};

struct cpudata {
	int cpu;

	struct timer_list timer;

	struct pstate_data pstate;
93
	struct vid_data vid;
94 95
	struct _pid pid;

96
	ktime_t last_sample_time;
97 98
	u64	prev_aperf;
	u64	prev_mperf;
99
	struct sample sample;
100 101 102 103 104 105 106 107 108 109 110 111
};

static struct cpudata **all_cpu_data;
struct pstate_adjust_policy {
	int sample_rate_ms;
	int deadband;
	int setpoint;
	int p_gain_pct;
	int d_gain_pct;
	int i_gain_pct;
};

112 113 114 115
struct pstate_funcs {
	int (*get_max)(void);
	int (*get_min)(void);
	int (*get_turbo)(void);
116 117
	void (*set)(struct cpudata*, int pstate);
	void (*get_vid)(struct cpudata *);
118 119
};

120 121 122
struct cpu_defaults {
	struct pstate_adjust_policy pid_policy;
	struct pstate_funcs funcs;
123 124
};

125 126 127
static struct pstate_adjust_policy pid_params;
static struct pstate_funcs pstate_funcs;

128 129
struct perf_limits {
	int no_turbo;
130
	int turbo_disabled;
131 132 133 134
	int max_perf_pct;
	int min_perf_pct;
	int32_t max_perf;
	int32_t min_perf;
135 136
	int max_policy_pct;
	int max_sysfs_pct;
137 138 139 140
};

static struct perf_limits limits = {
	.no_turbo = 0,
141
	.turbo_disabled = 0,
142 143 144 145
	.max_perf_pct = 100,
	.max_perf = int_tofp(1),
	.min_perf_pct = 0,
	.min_perf = 0,
146 147
	.max_policy_pct = 100,
	.max_sysfs_pct = 100,
148 149 150
};

static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
151
			     int deadband, int integral) {
152 153 154
	pid->setpoint = setpoint;
	pid->deadband  = deadband;
	pid->integral  = int_tofp(integral);
155
	pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
}

static inline void pid_p_gain_set(struct _pid *pid, int percent)
{
	pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
}

static inline void pid_i_gain_set(struct _pid *pid, int percent)
{
	pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
}

static inline void pid_d_gain_set(struct _pid *pid, int percent)
{
	pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
}

173
static signed int pid_calc(struct _pid *pid, int32_t busy)
174
{
175
	signed int result;
176 177 178
	int32_t pterm, dterm, fp_error;
	int32_t integral_limit;

179
	fp_error = int_tofp(pid->setpoint) - busy;
180

181
	if (abs(fp_error) <= int_tofp(pid->deadband))
182 183 184 185 186 187 188 189 190 191 192 193 194
		return 0;

	pterm = mul_fp(pid->p_gain, fp_error);

	pid->integral += fp_error;

	/* limit the integral term */
	integral_limit = int_tofp(30);
	if (pid->integral > integral_limit)
		pid->integral = integral_limit;
	if (pid->integral < -integral_limit)
		pid->integral = -integral_limit;

195 196
	dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
	pid->last_err = fp_error;
197 198

	result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
199
	result = result + (1 << (FRAC_BITS-1));
200 201 202 203 204
	return (signed int)fp_toint(result);
}

static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
{
205 206 207
	pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
	pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
	pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
208

209
	pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
210 211 212 213 214
}

static inline void intel_pstate_reset_all_pid(void)
{
	unsigned int cpu;
215

216 217 218 219 220 221
	for_each_online_cpu(cpu) {
		if (all_cpu_data[cpu])
			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
	}
}

222 223 224 225 226 227 228 229 230 231 232 233
static inline void update_turbo_state(void)
{
	u64 misc_en;
	struct cpudata *cpu;

	cpu = all_cpu_data[0];
	rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
	limits.turbo_disabled =
		(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}

234 235 236 237 238 239 240
/************************** debugfs begin ************************/
static int pid_param_set(void *data, u64 val)
{
	*(u32 *)data = val;
	intel_pstate_reset_all_pid();
	return 0;
}
241

242 243 244 245 246
static int pid_param_get(void *data, u64 *val)
{
	*val = *(u32 *)data;
	return 0;
}
247
DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
248 249 250 251 252 253 254

struct pid_param {
	char *name;
	void *value;
};

static struct pid_param pid_files[] = {
255 256 257 258 259 260
	{"sample_rate_ms", &pid_params.sample_rate_ms},
	{"d_gain_pct", &pid_params.d_gain_pct},
	{"i_gain_pct", &pid_params.i_gain_pct},
	{"deadband", &pid_params.deadband},
	{"setpoint", &pid_params.setpoint},
	{"p_gain_pct", &pid_params.p_gain_pct},
261 262 263
	{NULL, NULL}
};

264
static void __init intel_pstate_debug_expose_params(void)
265
{
266
	struct dentry *debugfs_parent;
267 268 269 270 271 272 273
	int i = 0;

	debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
	if (IS_ERR_OR_NULL(debugfs_parent))
		return;
	while (pid_files[i].name) {
		debugfs_create_file(pid_files[i].name, 0660,
274 275
				    debugfs_parent, pid_files[i].value,
				    &fops_pid_param);
276 277 278 279 280 281 282 283 284 285 286 287 288 289
		i++;
	}
}

/************************** debugfs end ************************/

/************************** sysfs begin ************************/
#define show_one(file_name, object)					\
	static ssize_t show_##file_name					\
	(struct kobject *kobj, struct attribute *attr, char *buf)	\
	{								\
		return sprintf(buf, "%u\n", limits.object);		\
	}

290 291 292 293 294 295 296 297 298 299 300 301 302 303
static ssize_t show_no_turbo(struct kobject *kobj,
			     struct attribute *attr, char *buf)
{
	ssize_t ret;

	update_turbo_state();
	if (limits.turbo_disabled)
		ret = sprintf(buf, "%u\n", limits.turbo_disabled);
	else
		ret = sprintf(buf, "%u\n", limits.no_turbo);

	return ret;
}

304
static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
305
			      const char *buf, size_t count)
306 307 308
{
	unsigned int input;
	int ret;
309

310 311 312
	ret = sscanf(buf, "%u", &input);
	if (ret != 1)
		return -EINVAL;
313 314

	update_turbo_state();
315 316
	if (limits.turbo_disabled) {
		pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
317
		return -EPERM;
318
	}
319 320
	limits.no_turbo = clamp_t(int, input, 0, 1);

321 322 323 324
	return count;
}

static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
325
				  const char *buf, size_t count)
326 327 328
{
	unsigned int input;
	int ret;
329

330 331 332 333
	ret = sscanf(buf, "%u", &input);
	if (ret != 1)
		return -EINVAL;

334 335
	limits.max_sysfs_pct = clamp_t(int, input, 0 , 100);
	limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
336
	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
337

338 339 340 341
	return count;
}

static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
342
				  const char *buf, size_t count)
343 344 345
{
	unsigned int input;
	int ret;
346

347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
	ret = sscanf(buf, "%u", &input);
	if (ret != 1)
		return -EINVAL;
	limits.min_perf_pct = clamp_t(int, input, 0 , 100);
	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));

	return count;
}

show_one(max_perf_pct, max_perf_pct);
show_one(min_perf_pct, min_perf_pct);

define_one_global_rw(no_turbo);
define_one_global_rw(max_perf_pct);
define_one_global_rw(min_perf_pct);

static struct attribute *intel_pstate_attributes[] = {
	&no_turbo.attr,
	&max_perf_pct.attr,
	&min_perf_pct.attr,
	NULL
};

static struct attribute_group intel_pstate_attr_group = {
	.attrs = intel_pstate_attributes,
};

374
static void __init intel_pstate_sysfs_expose_params(void)
375
{
376
	struct kobject *intel_pstate_kobject;
377 378 379 380 381
	int rc;

	intel_pstate_kobject = kobject_create_and_add("intel_pstate",
						&cpu_subsys.dev_root->kobj);
	BUG_ON(!intel_pstate_kobject);
382
	rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
383 384 385 386
	BUG_ON(rc);
}

/************************** sysfs end ************************/
387 388 389
static int byt_get_min_pstate(void)
{
	u64 value;
390

391
	rdmsrl(BYT_RATIOS, value);
D
Dirk Brandewie 已提交
392
	return (value >> 8) & 0x7F;
393 394 395 396 397
}

static int byt_get_max_pstate(void)
{
	u64 value;
398

399
	rdmsrl(BYT_RATIOS, value);
D
Dirk Brandewie 已提交
400
	return (value >> 16) & 0x7F;
401
}
402

403 404 405
static int byt_get_turbo_pstate(void)
{
	u64 value;
406

407
	rdmsrl(BYT_TURBO_RATIOS, value);
D
Dirk Brandewie 已提交
408
	return value & 0x7F;
409 410
}

411 412 413 414 415 416 417
static void byt_set_pstate(struct cpudata *cpudata, int pstate)
{
	u64 val;
	int32_t vid_fp;
	u32 vid;

	val = pstate << 8;
418
	if (limits.no_turbo && !limits.turbo_disabled)
419 420 421 422 423 424 425 426 427
		val |= (u64)1 << 32;

	vid_fp = cpudata->vid.min + mul_fp(
		int_tofp(pstate - cpudata->pstate.min_pstate),
		cpudata->vid.ratio);

	vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
	vid = fp_toint(vid_fp);

428 429 430
	if (pstate > cpudata->pstate.max_pstate)
		vid = cpudata->vid.turbo;

431 432 433 434 435 436 437 438 439 440
	val |= vid;

	wrmsrl(MSR_IA32_PERF_CTL, val);
}

static void byt_get_vid(struct cpudata *cpudata)
{
	u64 value;

	rdmsrl(BYT_VIDS, value);
D
Dirk Brandewie 已提交
441 442
	cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
	cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
443 444 445 446
	cpudata->vid.ratio = div_fp(
		cpudata->vid.max - cpudata->vid.min,
		int_tofp(cpudata->pstate.max_pstate -
			cpudata->pstate.min_pstate));
447 448 449

	rdmsrl(BYT_TURBO_VIDS, value);
	cpudata->vid.turbo = value & 0x7f;
450 451
}

452
static int core_get_min_pstate(void)
453 454
{
	u64 value;
455

456
	rdmsrl(MSR_PLATFORM_INFO, value);
457 458 459
	return (value >> 40) & 0xFF;
}

460
static int core_get_max_pstate(void)
461 462
{
	u64 value;
463

464
	rdmsrl(MSR_PLATFORM_INFO, value);
465 466 467
	return (value >> 8) & 0xFF;
}

468
static int core_get_turbo_pstate(void)
469 470 471
{
	u64 value;
	int nont, ret;
472

473
	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
474
	nont = core_get_max_pstate();
475
	ret = (value) & 255;
476 477 478 479 480
	if (ret <= nont)
		ret = nont;
	return ret;
}

481
static void core_set_pstate(struct cpudata *cpudata, int pstate)
482 483 484 485
{
	u64 val;

	val = pstate << 8;
486
	if (limits.no_turbo && !limits.turbo_disabled)
487 488
		val |= (u64)1 << 32;

489
	wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
}

static struct cpu_defaults core_params = {
	.pid_policy = {
		.sample_rate_ms = 10,
		.deadband = 0,
		.setpoint = 97,
		.p_gain_pct = 20,
		.d_gain_pct = 0,
		.i_gain_pct = 0,
	},
	.funcs = {
		.get_max = core_get_max_pstate,
		.get_min = core_get_min_pstate,
		.get_turbo = core_get_turbo_pstate,
		.set = core_set_pstate,
	},
};

509 510 511 512 513 514 515 516 517 518 519 520
static struct cpu_defaults byt_params = {
	.pid_policy = {
		.sample_rate_ms = 10,
		.deadband = 0,
		.setpoint = 97,
		.p_gain_pct = 14,
		.d_gain_pct = 0,
		.i_gain_pct = 4,
	},
	.funcs = {
		.get_max = byt_get_max_pstate,
		.get_min = byt_get_min_pstate,
521
		.get_turbo = byt_get_turbo_pstate,
522 523
		.set = byt_set_pstate,
		.get_vid = byt_get_vid,
524 525 526
	},
};

527 528 529
static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
{
	int max_perf = cpu->pstate.turbo_pstate;
530
	int max_perf_adj;
531
	int min_perf;
532

533
	if (limits.no_turbo || limits.turbo_disabled)
534 535
		max_perf = cpu->pstate.max_pstate;

536 537
	max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
	*max = clamp_t(int, max_perf_adj,
538 539 540
			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);

	min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
541
	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
542 543 544 545 546 547
}

static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
{
	int max_perf, min_perf;

548 549
	update_turbo_state();

550 551 552 553 554 555 556 557
	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);

	pstate = clamp_t(int, pstate, min_perf, max_perf);

	if (pstate == cpu->pstate.current_pstate)
		return;

	trace_cpu_frequency(pstate * 100000, cpu->cpu);
558

559 560
	cpu->pstate.current_pstate = pstate;

561
	pstate_funcs.set(cpu, pstate);
562 563 564 565
}

static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
{
566 567 568
	cpu->pstate.min_pstate = pstate_funcs.get_min();
	cpu->pstate.max_pstate = pstate_funcs.get_max();
	cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
569

570 571
	if (pstate_funcs.get_vid)
		pstate_funcs.get_vid(cpu);
572
	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
573 574
}

575
static inline void intel_pstate_calc_busy(struct cpudata *cpu)
576
{
577
	struct sample *sample = &cpu->sample;
578
	int64_t core_pct;
579

580
	core_pct = int_tofp(sample->aperf) * int_tofp(100);
581
	core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
582

583
	sample->freq = fp_toint(
584
		mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct));
585

586
	sample->core_pct_busy = (int32_t)core_pct;
587 588 589 590 591
}

static inline void intel_pstate_sample(struct cpudata *cpu)
{
	u64 aperf, mperf;
592
	unsigned long flags;
593

594
	local_irq_save(flags);
595 596
	rdmsrl(MSR_IA32_APERF, aperf);
	rdmsrl(MSR_IA32_MPERF, mperf);
597
	local_irq_restore(flags);
598

599 600
	cpu->last_sample_time = cpu->sample.time;
	cpu->sample.time = ktime_get();
601 602 603 604
	cpu->sample.aperf = aperf;
	cpu->sample.mperf = mperf;
	cpu->sample.aperf -= cpu->prev_aperf;
	cpu->sample.mperf -= cpu->prev_mperf;
605

606
	intel_pstate_calc_busy(cpu);
607 608 609 610 611 612 613

	cpu->prev_aperf = aperf;
	cpu->prev_mperf = mperf;
}

static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
{
614
	int delay;
615

616
	delay = msecs_to_jiffies(pid_params.sample_rate_ms);
617 618 619
	mod_timer_pinned(&cpu->timer, jiffies + delay);
}

620
static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
621
{
622 623 624
	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
	u32 duration_us;
	u32 sample_time;
625

626
	core_busy = cpu->sample.core_pct_busy;
627
	max_pstate = int_tofp(cpu->pstate.max_pstate);
628
	current_pstate = int_tofp(cpu->pstate.current_pstate);
629
	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
630

631
	sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
632
	duration_us = (u32) ktime_us_delta(cpu->sample.time,
633
					   cpu->last_sample_time);
634 635
	if (duration_us > sample_time * 3) {
		sample_ratio = div_fp(int_tofp(sample_time),
636
				      int_tofp(duration_us));
637 638 639
		core_busy = mul_fp(core_busy, sample_ratio);
	}

640
	return core_busy;
641 642 643 644
}

static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
{
645
	int32_t busy_scaled;
646
	struct _pid *pid;
647
	signed int ctl;
648 649 650 651 652 653

	pid = &cpu->pid;
	busy_scaled = intel_pstate_get_scaled_busy(cpu);

	ctl = pid_calc(pid, busy_scaled);

654 655
	/* Negative values of ctl increase the pstate and vice versa */
	intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl);
656 657 658 659 660
}

static void intel_pstate_timer_func(unsigned long __data)
{
	struct cpudata *cpu = (struct cpudata *) __data;
661
	struct sample *sample;
662 663

	intel_pstate_sample(cpu);
664

665
	sample = &cpu->sample;
666

667
	intel_pstate_adjust_busy_pstate(cpu);
668 669 670 671 672 673 674 675

	trace_pstate_sample(fp_toint(sample->core_pct_busy),
			fp_toint(intel_pstate_get_scaled_busy(cpu)),
			cpu->pstate.current_pstate,
			sample->mperf,
			sample->aperf,
			sample->freq);

676 677 678 679
	intel_pstate_set_sample_time(cpu);
}

#define ICPU(model, policy) \
680 681
	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
			(unsigned long)&policy }
682 683

static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
684 685
	ICPU(0x2a, core_params),
	ICPU(0x2d, core_params),
686
	ICPU(0x37, byt_params),
687 688
	ICPU(0x3a, core_params),
	ICPU(0x3c, core_params),
689
	ICPU(0x3d, core_params),
690 691 692 693
	ICPU(0x3e, core_params),
	ICPU(0x3f, core_params),
	ICPU(0x45, core_params),
	ICPU(0x46, core_params),
694
	ICPU(0x4c, byt_params),
695 696
	ICPU(0x4f, core_params),
	ICPU(0x56, core_params),
697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
	{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);

static int intel_pstate_init_cpu(unsigned int cpunum)
{
	struct cpudata *cpu;

	all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
	if (!all_cpu_data[cpunum])
		return -ENOMEM;

	cpu = all_cpu_data[cpunum];

	cpu->cpu = cpunum;
712
	intel_pstate_get_cpu_pstates(cpu);
713

714 715
	init_timer_deferrable(&cpu->timer);
	cpu->timer.function = intel_pstate_timer_func;
716
	cpu->timer.data = (unsigned long)cpu;
717 718 719 720 721 722
	cpu->timer.expires = jiffies + HZ/100;
	intel_pstate_busy_pid_reset(cpu);
	intel_pstate_sample(cpu);

	add_timer_on(&cpu->timer, cpunum);

723
	pr_debug("Intel pstate controlling: cpu %d\n", cpunum);
724 725 726 727 728 729 730 731 732 733 734 735

	return 0;
}

static unsigned int intel_pstate_get(unsigned int cpu_num)
{
	struct sample *sample;
	struct cpudata *cpu;

	cpu = all_cpu_data[cpu_num];
	if (!cpu)
		return 0;
736
	sample = &cpu->sample;
737 738 739 740 741
	return sample->freq;
}

static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
742 743 744
	if (!policy->cpuinfo.max_freq)
		return -ENODEV;

745 746 747
	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
		limits.min_perf_pct = 100;
		limits.min_perf = int_tofp(1);
748
		limits.max_policy_pct = 100;
749 750
		limits.max_perf_pct = 100;
		limits.max_perf = int_tofp(1);
751
		limits.no_turbo = 0;
752
		return 0;
753
	}
754 755 756 757
	limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
	limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100);
	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));

758
	limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq;
759 760
	limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100);
	limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
761
	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
762 763 764 765 766 767

	return 0;
}

static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
{
768
	cpufreq_verify_within_cpu_limits(policy);
769

770
	if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
771
	    policy->policy != CPUFREQ_POLICY_PERFORMANCE)
772 773 774 775 776
		return -EINVAL;

	return 0;
}

777
static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
778
{
779 780
	int cpu_num = policy->cpu;
	struct cpudata *cpu = all_cpu_data[cpu_num];
781

782 783
	pr_info("intel_pstate CPU %d exiting\n", cpu_num);

784
	del_timer_sync(&all_cpu_data[cpu_num]->timer);
785 786 787
	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
	kfree(all_cpu_data[cpu_num]);
	all_cpu_data[cpu_num] = NULL;
788 789
}

790
static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
791 792
{
	struct cpudata *cpu;
793
	int rc;
794 795 796 797 798 799 800

	rc = intel_pstate_init_cpu(policy->cpu);
	if (rc)
		return rc;

	cpu = all_cpu_data[policy->cpu];

801
	if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
802 803 804 805
		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
	else
		policy->policy = CPUFREQ_POLICY_POWERSAVE;

806 807
	policy->min = cpu->pstate.min_pstate * 100000;
	policy->max = cpu->pstate.turbo_pstate * 100000;
808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823

	/* cpuinfo and default policy values */
	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000;
	policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000;
	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
	cpumask_set_cpu(policy->cpu, policy->cpus);

	return 0;
}

static struct cpufreq_driver intel_pstate_driver = {
	.flags		= CPUFREQ_CONST_LOOPS,
	.verify		= intel_pstate_verify_policy,
	.setpolicy	= intel_pstate_set_policy,
	.get		= intel_pstate_get,
	.init		= intel_pstate_cpu_init,
824
	.stop_cpu	= intel_pstate_stop_cpu,
825 826 827
	.name		= "intel_pstate",
};

828 829
static int __initdata no_load;

830 831 832 833 834 835 836 837
static int intel_pstate_msrs_not_valid(void)
{
	/* Check that all the msr's we are using are valid. */
	u64 aperf, mperf, tmp;

	rdmsrl(MSR_IA32_APERF, aperf);
	rdmsrl(MSR_IA32_MPERF, mperf);

838
	if (!pstate_funcs.get_max() ||
839 840
	    !pstate_funcs.get_min() ||
	    !pstate_funcs.get_turbo())
841 842 843 844 845 846 847 848 849 850 851 852
		return -ENODEV;

	rdmsrl(MSR_IA32_APERF, tmp);
	if (!(tmp - aperf))
		return -ENODEV;

	rdmsrl(MSR_IA32_MPERF, tmp);
	if (!(tmp - mperf))
		return -ENODEV;

	return 0;
}
853

854
static void copy_pid_params(struct pstate_adjust_policy *policy)
855 856 857 858 859 860 861 862 863
{
	pid_params.sample_rate_ms = policy->sample_rate_ms;
	pid_params.p_gain_pct = policy->p_gain_pct;
	pid_params.i_gain_pct = policy->i_gain_pct;
	pid_params.d_gain_pct = policy->d_gain_pct;
	pid_params.deadband = policy->deadband;
	pid_params.setpoint = policy->setpoint;
}

864
static void copy_cpu_funcs(struct pstate_funcs *funcs)
865 866 867 868 869
{
	pstate_funcs.get_max   = funcs->get_max;
	pstate_funcs.get_min   = funcs->get_min;
	pstate_funcs.get_turbo = funcs->get_turbo;
	pstate_funcs.set       = funcs->set;
870
	pstate_funcs.get_vid   = funcs->get_vid;
871 872
}

873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921
#if IS_ENABLED(CONFIG_ACPI)
#include <acpi/processor.h>

static bool intel_pstate_no_acpi_pss(void)
{
	int i;

	for_each_possible_cpu(i) {
		acpi_status status;
		union acpi_object *pss;
		struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
		struct acpi_processor *pr = per_cpu(processors, i);

		if (!pr)
			continue;

		status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
		if (ACPI_FAILURE(status))
			continue;

		pss = buffer.pointer;
		if (pss && pss->type == ACPI_TYPE_PACKAGE) {
			kfree(pss);
			return false;
		}

		kfree(pss);
	}

	return true;
}

struct hw_vendor_info {
	u16  valid;
	char oem_id[ACPI_OEM_ID_SIZE];
	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
};

/* Hardware vendor-specific info that has its own power management modes */
static struct hw_vendor_info vendor_info[] = {
	{1, "HP    ", "ProLiant"},
	{0, "", ""},
};

static bool intel_pstate_platform_pwr_mgmt_exists(void)
{
	struct acpi_table_header hdr;
	struct hw_vendor_info *v_info;

922 923
	if (acpi_disabled ||
	    ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
924 925 926
		return false;

	for (v_info = vendor_info; v_info->valid; v_info++) {
927 928 929
		if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
		    !strncmp(hdr.oem_table_id, v_info->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
		    intel_pstate_no_acpi_pss())
930 931 932 933 934 935 936 937 938
			return true;
	}

	return false;
}
#else /* CONFIG_ACPI not enabled */
static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
#endif /* CONFIG_ACPI */

939 940
static int __init intel_pstate_init(void)
{
941
	int cpu, rc = 0;
942
	const struct x86_cpu_id *id;
943
	struct cpu_defaults *cpu_info;
944

945 946 947
	if (no_load)
		return -ENODEV;

948 949 950 951
	id = x86_match_cpu(intel_pstate_cpu_ids);
	if (!id)
		return -ENODEV;

952 953 954 955 956 957 958
	/*
	 * The Intel pstate driver will be ignored if the platform
	 * firmware has its own power management modes.
	 */
	if (intel_pstate_platform_pwr_mgmt_exists())
		return -ENODEV;

959 960 961 962 963
	cpu_info = (struct cpu_defaults *)id->driver_data;

	copy_pid_params(&cpu_info->pid_policy);
	copy_cpu_funcs(&cpu_info->funcs);

964 965 966
	if (intel_pstate_msrs_not_valid())
		return -ENODEV;

967 968
	pr_info("Intel P-state driver initializing.\n");

969
	all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
970 971 972 973 974 975 976 977 978
	if (!all_cpu_data)
		return -ENOMEM;

	rc = cpufreq_register_driver(&intel_pstate_driver);
	if (rc)
		goto out;

	intel_pstate_debug_expose_params();
	intel_pstate_sysfs_expose_params();
979

980 981
	return rc;
out:
982 983 984 985 986 987 988 989 990 991
	get_online_cpus();
	for_each_online_cpu(cpu) {
		if (all_cpu_data[cpu]) {
			del_timer_sync(&all_cpu_data[cpu]->timer);
			kfree(all_cpu_data[cpu]);
		}
	}

	put_online_cpus();
	vfree(all_cpu_data);
992 993 994 995
	return -ENODEV;
}
device_initcall(intel_pstate_init);

996 997 998 999 1000 1001 1002 1003 1004 1005 1006
static int __init intel_pstate_setup(char *str)
{
	if (!str)
		return -EINVAL;

	if (!strcmp(str, "disable"))
		no_load = 1;
	return 0;
}
early_param("intel_pstate", intel_pstate_setup);

1007 1008 1009
MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
MODULE_LICENSE("GPL");