therm_throt.c 14.7 KB
Newer Older
1
/*
2 3
 * Thermal throttle event support code (such as syslog messaging and rate
 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
I
Ingo Molnar 已提交
4
 *
5 6 7 8
 * This allows consistent reporting of CPU thermal throttle events.
 *
 * Maintains a counter in /sys that keeps track of the number of thermal
 * events, such that the user knows how bad the thermal problem might be
9
 * (since the logging to syslog is rate limited).
10 11 12 13
 *
 * Author: Dmitriy Zavin (dmitriyz@google.com)
 *
 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
14
 *          Inspired by Ross Biro's and Al Borchers' counter code.
15
 */
16
#include <linux/interrupt.h>
I
Ingo Molnar 已提交
17 18
#include <linux/notifier.h>
#include <linux/jiffies.h>
19
#include <linux/kernel.h>
20
#include <linux/percpu.h>
21
#include <linux/export.h>
22 23 24
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
25
#include <linux/cpu.h>
I
Ingo Molnar 已提交
26

27 28
#include <asm/processor.h>
#include <asm/apic.h>
29
#include <asm/mce.h>
30
#include <asm/msr.h>
31
#include <asm/trace/irq_vectors.h>
32 33

/* How long to wait between reporting thermal events */
I
Ingo Molnar 已提交
34
#define CHECK_INTERVAL		(300 * HZ)
35

36 37 38
#define THERMAL_THROTTLING_EVENT	0
#define POWER_LIMIT_EVENT		1

39
/*
40
 * Current thermal event state:
41
 */
42
struct _thermal_state {
43 44
	bool			new_event;
	int			event;
45
	u64			next_check;
46 47
	unsigned long		count;
	unsigned long		last_count;
48
};
I
Ingo Molnar 已提交
49

50
struct thermal_state {
51 52 53 54
	struct _thermal_state core_throttle;
	struct _thermal_state core_power_limit;
	struct _thermal_state package_throttle;
	struct _thermal_state package_power_limit;
55 56
	struct _thermal_state core_thresh0;
	struct _thermal_state core_thresh1;
57 58
	struct _thermal_state pkg_thresh0;
	struct _thermal_state pkg_thresh1;
59 60
};

61 62
/* Callback to handle core threshold interrupts */
int (*platform_thermal_notify)(__u64 msr_val);
63
EXPORT_SYMBOL(platform_thermal_notify);
64

65 66 67 68 69 70 71 72 73 74
/* Callback to handle core package threshold_interrupts */
int (*platform_thermal_package_notify)(__u64 msr_val);
EXPORT_SYMBOL_GPL(platform_thermal_package_notify);

/* Callback support of rate control, return true, if
 * callback has rate control */
bool (*platform_thermal_package_rate_control)(void);
EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);


75 76 77
static DEFINE_PER_CPU(struct thermal_state, thermal_state);

static atomic_t therm_throt_en	= ATOMIC_INIT(0);
78

79 80
static u32 lvtthmr_init __read_mostly;

81
#ifdef CONFIG_SYSFS
82 83 84
#define define_therm_throt_device_one_ro(_name)				\
	static DEVICE_ATTR(_name, 0444,					\
			   therm_throt_device_show_##_name,		\
85
				   NULL)				\
I
Ingo Molnar 已提交
86

87
#define define_therm_throt_device_show_func(event, name)		\
88
									\
89 90 91
static ssize_t therm_throt_device_show_##event##_##name(		\
			struct device *dev,				\
			struct device_attribute *attr,			\
92
			char *buf)					\
I
Ingo Molnar 已提交
93 94 95 96 97
{									\
	unsigned int cpu = dev->id;					\
	ssize_t ret;							\
									\
	preempt_disable();	/* CPU hotplug */			\
98
	if (cpu_online(cpu)) {						\
I
Ingo Molnar 已提交
99
		ret = sprintf(buf, "%lu\n",				\
100
			      per_cpu(thermal_state, cpu).event.name);	\
101
	} else								\
I
Ingo Molnar 已提交
102 103 104 105
		ret = 0;						\
	preempt_enable();						\
									\
	return ret;							\
106 107
}

108 109
define_therm_throt_device_show_func(core_throttle, count);
define_therm_throt_device_one_ro(core_throttle_count);
110

111 112
define_therm_throt_device_show_func(core_power_limit, count);
define_therm_throt_device_one_ro(core_power_limit_count);
113

114 115
define_therm_throt_device_show_func(package_throttle, count);
define_therm_throt_device_one_ro(package_throttle_count);
116

117 118
define_therm_throt_device_show_func(package_power_limit, count);
define_therm_throt_device_one_ro(package_power_limit_count);
119

120
static struct attribute *thermal_throttle_attrs[] = {
121
	&dev_attr_core_throttle_count.attr,
122 123 124
	NULL
};

125
static const struct attribute_group thermal_attr_group = {
I
Ingo Molnar 已提交
126 127
	.attrs	= thermal_throttle_attrs,
	.name	= "thermal_throttle"
128 129
};
#endif /* CONFIG_SYSFS */
130

131 132 133
#define CORE_LEVEL	0
#define PACKAGE_LEVEL	1

134
/***
135
 * therm_throt_process - Process thermal throttling event from interrupt
136 137 138 139
 * @curr: Whether the condition is current or not (boolean), since the
 *        thermal interrupt normally gets called both when the thermal
 *        event begins and once the event has ended.
 *
140
 * This function is called by the thermal interrupt after the
141 142 143 144
 * IRQ has been acknowledged.
 *
 * It will take care of rate limiting and printing messages to the syslog.
 */
145
static void therm_throt_process(bool new_event, int event, int level)
146
{
147
	struct _thermal_state *state;
148 149
	unsigned int this_cpu = smp_processor_id();
	bool old_event;
150
	u64 now;
151
	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
152 153

	now = get_jiffies_64();
154 155 156 157 158 159
	if (level == CORE_LEVEL) {
		if (event == THERMAL_THROTTLING_EVENT)
			state = &pstate->core_throttle;
		else if (event == POWER_LIMIT_EVENT)
			state = &pstate->core_power_limit;
		else
160
			return;
161 162 163 164 165 166
	} else if (level == PACKAGE_LEVEL) {
		if (event == THERMAL_THROTTLING_EVENT)
			state = &pstate->package_throttle;
		else if (event == POWER_LIMIT_EVENT)
			state = &pstate->package_power_limit;
		else
167
			return;
168
	} else
169
		return;
170

171 172
	old_event = state->new_event;
	state->new_event = new_event;
173

174 175
	if (new_event)
		state->count++;
176

177
	if (time_before64(now, state->next_check) &&
178
			state->count != state->last_count)
179
		return;
180

181
	state->next_check = now + CHECK_INTERVAL;
182
	state->last_count = state->count;
183 184

	/* if we just entered the thermal event */
185 186
	if (new_event) {
		if (event == THERMAL_THROTTLING_EVENT)
187
			pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
188 189 190
				this_cpu,
				level == CORE_LEVEL ? "Core" : "Package",
				state->count);
191
		return;
192
	}
193 194
	if (old_event) {
		if (event == THERMAL_THROTTLING_EVENT)
195
			pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
196
				level == CORE_LEVEL ? "Core" : "Package");
197
		return;
198 199
	}
}
200

201
static int thresh_event_valid(int level, int event)
202 203 204 205 206 207
{
	struct _thermal_state *state;
	unsigned int this_cpu = smp_processor_id();
	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
	u64 now = get_jiffies_64();

208 209 210 211 212 213
	if (level == PACKAGE_LEVEL)
		state = (event == 0) ? &pstate->pkg_thresh0 :
						&pstate->pkg_thresh1;
	else
		state = (event == 0) ? &pstate->core_thresh0 :
						&pstate->core_thresh1;
214 215 216 217 218

	if (time_before64(now, state->next_check))
		return 0;

	state->next_check = now + CHECK_INTERVAL;
219

220 221 222
	return 1;
}

223 224 225 226 227 228 229 230 231
static bool int_pln_enable;
static int __init int_pln_enable_setup(char *s)
{
	int_pln_enable = true;

	return 1;
}
__setup("int_pln_enable", int_pln_enable_setup);

232
#ifdef CONFIG_SYSFS
I
Ingo Molnar 已提交
233
/* Add/Remove thermal_throttle interface for CPU device: */
234
static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
235
{
236
	int err;
237
	struct cpuinfo_x86 *c = &cpu_data(cpu);
238

239
	err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
240 241 242
	if (err)
		return err;

243
	if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
244 245
		err = sysfs_add_file_to_group(&dev->kobj,
					      &dev_attr_core_power_limit_count.attr,
246
					      thermal_attr_group.name);
247
	if (cpu_has(c, X86_FEATURE_PTS)) {
248 249
		err = sysfs_add_file_to_group(&dev->kobj,
					      &dev_attr_package_throttle_count.attr,
250
					      thermal_attr_group.name);
251
		if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
252 253
			err = sysfs_add_file_to_group(&dev->kobj,
					&dev_attr_package_power_limit_count.attr,
254
					thermal_attr_group.name);
255
	}
256 257

	return err;
258 259
}

260
static void thermal_throttle_remove_dev(struct device *dev)
261
{
262
	sysfs_remove_group(&dev->kobj, &thermal_attr_group);
263 264 265
}

/* Get notified when a cpu comes on/off. Be hotplug friendly. */
266
static int thermal_throttle_online(unsigned int cpu)
267
{
268 269 270
	struct device *dev = get_cpu_device(cpu);

	return thermal_throttle_add_dev(dev, cpu);
271 272
}

273
static int thermal_throttle_offline(unsigned int cpu)
274
{
275 276 277 278 279
	struct device *dev = get_cpu_device(cpu);

	thermal_throttle_remove_dev(dev);
	return 0;
}
280 281 282

static __init int thermal_throttle_init_device(void)
{
283
	int ret;
284 285 286 287

	if (!atomic_read(&therm_throt_en))
		return 0;

288 289 290 291
	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
				thermal_throttle_online,
				thermal_throttle_offline);
	return ret < 0 ? ret : 0;
292 293
}
device_initcall(thermal_throttle_init_device);
294

295
#endif /* CONFIG_SYSFS */
296

297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
static void notify_package_thresholds(__u64 msr_val)
{
	bool notify_thres_0 = false;
	bool notify_thres_1 = false;

	if (!platform_thermal_package_notify)
		return;

	/* lower threshold check */
	if (msr_val & THERM_LOG_THRESHOLD0)
		notify_thres_0 = true;
	/* higher threshold check */
	if (msr_val & THERM_LOG_THRESHOLD1)
		notify_thres_1 = true;

	if (!notify_thres_0 && !notify_thres_1)
		return;

	if (platform_thermal_package_rate_control &&
		platform_thermal_package_rate_control()) {
		/* Rate control is implemented in callback */
		platform_thermal_package_notify(msr_val);
		return;
	}

	/* lower threshold reached */
	if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
		platform_thermal_package_notify(msr_val);
	/* higher threshold reached */
	if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
		platform_thermal_package_notify(msr_val);
}

330 331 332 333 334 335 336 337 338
static void notify_thresholds(__u64 msr_val)
{
	/* check whether the interrupt handler is defined;
	 * otherwise simply return
	 */
	if (!platform_thermal_notify)
		return;

	/* lower threshold reached */
339 340
	if ((msr_val & THERM_LOG_THRESHOLD0) &&
			thresh_event_valid(CORE_LEVEL, 0))
341 342
		platform_thermal_notify(msr_val);
	/* higher threshold reached */
343 344
	if ((msr_val & THERM_LOG_THRESHOLD1) &&
			thresh_event_valid(CORE_LEVEL, 1))
345 346 347
		platform_thermal_notify(msr_val);
}

348
/* Thermal transition interrupt handler */
349
static void intel_thermal_interrupt(void)
350 351 352
{
	__u64 msr_val;

353 354 355
	if (static_cpu_has(X86_FEATURE_HWP))
		wrmsrl_safe(MSR_HWP_STATUS, 0);

356
	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
357

358 359 360
	/* Check for violation of core thermal thresholds*/
	notify_thresholds(msr_val);

361 362 363
	therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
			    THERMAL_THROTTLING_EVENT,
			    CORE_LEVEL);
364

365
	if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
366
		therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
367
					POWER_LIMIT_EVENT,
368
					CORE_LEVEL);
369

370
	if (this_cpu_has(X86_FEATURE_PTS)) {
371
		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
372 373
		/* check violations of package thermal thresholds */
		notify_package_thresholds(msr_val);
374
		therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
375
					THERMAL_THROTTLING_EVENT,
376
					PACKAGE_LEVEL);
377
		if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
378
			therm_throt_process(msr_val &
379 380
					PACKAGE_THERM_STATUS_POWER_LIMIT,
					POWER_LIMIT_EVENT,
381
					PACKAGE_LEVEL);
382
	}
383 384 385 386
}

static void unexpected_thermal_interrupt(void)
{
387 388
	pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
		smp_processor_id());
389 390 391 392
}

static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;

393
static inline void __smp_thermal_interrupt(void)
394 395 396
{
	inc_irq_stat(irq_thermal_count);
	smp_thermal_vector();
397 398
}

399 400
asmlinkage __visible void __irq_entry
smp_thermal_interrupt(struct pt_regs *regs)
401 402 403 404
{
	entering_irq();
	__smp_thermal_interrupt();
	exiting_ack_irq();
405 406
}

407 408
asmlinkage __visible void __irq_entry
smp_trace_thermal_interrupt(struct pt_regs *regs)
409 410 411 412 413 414 415 416
{
	entering_irq();
	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
	__smp_thermal_interrupt();
	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
	exiting_ack_irq();
}

417 418 419
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
static int intel_thermal_supported(struct cpuinfo_x86 *c)
{
420
	if (!boot_cpu_has(X86_FEATURE_APIC))
421 422 423 424 425 426
		return 0;
	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
		return 0;
	return 1;
}

427
void __init mcheck_intel_therm_init(void)
428 429 430 431 432 433
{
	/*
	 * This function is only called on boot CPU. Save the init thermal
	 * LVT value on BSP and use that value to restore APs' thermal LVT
	 * entry BIOS programmed later
	 */
434
	if (intel_thermal_supported(&boot_cpu_data))
435 436 437
		lvtthmr_init = apic_read(APIC_LVTTHMR);
}

H
Hidetoshi Seto 已提交
438
void intel_init_thermal(struct cpuinfo_x86 *c)
439 440 441 442 443
{
	unsigned int cpu = smp_processor_id();
	int tm2 = 0;
	u32 l, h;

444
	if (!intel_thermal_supported(c))
445 446 447 448 449 450 451 452
		return;

	/*
	 * First check if its enabled already, in which case there might
	 * be some SMM goo which handles it, so we can't even put a handler
	 * since it might be delivered via SMI already:
	 */
	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
453

454
	h = lvtthmr_init;
455 456 457 458 459
	/*
	 * The initial value of thermal LVT entries on all APs always reads
	 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
	 * sequence to them and LVT registers are reset to 0s except for
	 * the mask bits which are set to 1s when APs receive INIT IPI.
460 461 462 463
	 * If BIOS takes over the thermal interrupt and sets its interrupt
	 * delivery mode to SMI (not fixed), it restores the value that the
	 * BIOS has programmed on AP based on BSP's info we saved since BIOS
	 * is always setting the same value for all threads/cores.
464
	 */
465 466
	if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
		apic_write(APIC_LVTTHMR, lvtthmr_init);
467 468


469
	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
470
		if (system_state == SYSTEM_BOOTING)
471
			pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
472 473 474
		return;
	}

475 476 477 478 479 480 481 482 483 484
	/* early Pentium M models use different method for enabling TM2 */
	if (cpu_has(c, X86_FEATURE_TM2)) {
		if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
			rdmsr(MSR_THERM2_CTL, l, h);
			if (l & MSR_THERM2_CTL_TM_SELECT)
				tm2 = 1;
		} else if (l & MSR_IA32_MISC_ENABLE_TM2)
			tm2 = 1;
	}

485 486 487 488 489
	/* We'll mask the thermal vector in the lapic till we're ready: */
	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
	apic_write(APIC_LVTTHMR, h);

	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
490
	if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
491
		wrmsr(MSR_IA32_THERM_INTERRUPT,
492 493 494
			(l | (THERM_INT_LOW_ENABLE
			| THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
	else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
495
		wrmsr(MSR_IA32_THERM_INTERRUPT,
496
			l | (THERM_INT_LOW_ENABLE
497 498 499 500
			| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
	else
		wrmsr(MSR_IA32_THERM_INTERRUPT,
		      l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
501

502 503
	if (cpu_has(c, X86_FEATURE_PTS)) {
		rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
504
		if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
505
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
506 507 508 509 510 511
				(l | (PACKAGE_THERM_INT_LOW_ENABLE
				| PACKAGE_THERM_INT_HIGH_ENABLE))
				& ~PACKAGE_THERM_INT_PLN_ENABLE, h);
		else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
				l | (PACKAGE_THERM_INT_LOW_ENABLE
512 513 514 515 516 517
				| PACKAGE_THERM_INT_HIGH_ENABLE
				| PACKAGE_THERM_INT_PLN_ENABLE), h);
		else
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
			      l | (PACKAGE_THERM_INT_LOW_ENABLE
				| PACKAGE_THERM_INT_HIGH_ENABLE), h);
518 519
	}

520
	smp_thermal_vector = intel_thermal_interrupt;
521 522 523 524 525 526 527 528

	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);

	/* Unmask the thermal vector: */
	l = apic_read(APIC_LVTTHMR);
	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);

529 530
	pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
		      tm2 ? "TM2" : "TM1");
531 532 533 534

	/* enable thermal throttle processing */
	atomic_set(&therm_throt_en, 1);
}