therm_throt.c 15.0 KB
Newer Older
1
/*
2 3
 * Thermal throttle event support code (such as syslog messaging and rate
 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
I
Ingo Molnar 已提交
4
 *
5 6 7 8 9
 * This allows consistent reporting of CPU thermal throttle events.
 *
 * Maintains a counter in /sys that keeps track of the number of thermal
 * events, such that the user knows how bad the thermal problem might be
 * (since the logging to syslog and mcelog is rate limited).
10 11 12 13
 *
 * Author: Dmitriy Zavin (dmitriyz@google.com)
 *
 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
14
 *          Inspired by Ross Biro's and Al Borchers' counter code.
15
 */
16
#include <linux/interrupt.h>
I
Ingo Molnar 已提交
17 18
#include <linux/notifier.h>
#include <linux/jiffies.h>
19
#include <linux/kernel.h>
20
#include <linux/percpu.h>
21
#include <linux/export.h>
22 23 24
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
25
#include <linux/cpu.h>
I
Ingo Molnar 已提交
26

27 28
#include <asm/processor.h>
#include <asm/apic.h>
29
#include <asm/mce.h>
30
#include <asm/msr.h>
31
#include <asm/trace/irq_vectors.h>
32 33

/* How long to wait between reporting thermal events */
I
Ingo Molnar 已提交
34
#define CHECK_INTERVAL		(300 * HZ)
35

36 37 38
#define THERMAL_THROTTLING_EVENT	0
#define POWER_LIMIT_EVENT		1

39
/*
40
 * Current thermal event state:
41
 */
42
struct _thermal_state {
43 44
	bool			new_event;
	int			event;
45
	u64			next_check;
46 47
	unsigned long		count;
	unsigned long		last_count;
48
};
I
Ingo Molnar 已提交
49

50
struct thermal_state {
51 52 53 54
	struct _thermal_state core_throttle;
	struct _thermal_state core_power_limit;
	struct _thermal_state package_throttle;
	struct _thermal_state package_power_limit;
55 56
	struct _thermal_state core_thresh0;
	struct _thermal_state core_thresh1;
57 58
	struct _thermal_state pkg_thresh0;
	struct _thermal_state pkg_thresh1;
59 60
};

61 62
/* Callback to handle core threshold interrupts */
int (*platform_thermal_notify)(__u64 msr_val);
63
EXPORT_SYMBOL(platform_thermal_notify);
64

65 66 67 68 69 70 71 72 73 74
/* Callback to handle core package threshold_interrupts */
int (*platform_thermal_package_notify)(__u64 msr_val);
EXPORT_SYMBOL_GPL(platform_thermal_package_notify);

/* Callback support of rate control, return true, if
 * callback has rate control */
bool (*platform_thermal_package_rate_control)(void);
EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);


75 76 77
static DEFINE_PER_CPU(struct thermal_state, thermal_state);

static atomic_t therm_throt_en	= ATOMIC_INIT(0);
78

79 80
static u32 lvtthmr_init __read_mostly;

81
#ifdef CONFIG_SYSFS
82 83 84
#define define_therm_throt_device_one_ro(_name)				\
	static DEVICE_ATTR(_name, 0444,					\
			   therm_throt_device_show_##_name,		\
85
				   NULL)				\
I
Ingo Molnar 已提交
86

87
#define define_therm_throt_device_show_func(event, name)		\
88
									\
89 90 91
static ssize_t therm_throt_device_show_##event##_##name(		\
			struct device *dev,				\
			struct device_attribute *attr,			\
92
			char *buf)					\
I
Ingo Molnar 已提交
93 94 95 96 97
{									\
	unsigned int cpu = dev->id;					\
	ssize_t ret;							\
									\
	preempt_disable();	/* CPU hotplug */			\
98
	if (cpu_online(cpu)) {						\
I
Ingo Molnar 已提交
99
		ret = sprintf(buf, "%lu\n",				\
100
			      per_cpu(thermal_state, cpu).event.name);	\
101
	} else								\
I
Ingo Molnar 已提交
102 103 104 105
		ret = 0;						\
	preempt_enable();						\
									\
	return ret;							\
106 107
}

108 109
define_therm_throt_device_show_func(core_throttle, count);
define_therm_throt_device_one_ro(core_throttle_count);
110

111 112
define_therm_throt_device_show_func(core_power_limit, count);
define_therm_throt_device_one_ro(core_power_limit_count);
113

114 115
define_therm_throt_device_show_func(package_throttle, count);
define_therm_throt_device_one_ro(package_throttle_count);
116

117 118
define_therm_throt_device_show_func(package_power_limit, count);
define_therm_throt_device_one_ro(package_power_limit_count);
119

120
static struct attribute *thermal_throttle_attrs[] = {
121
	&dev_attr_core_throttle_count.attr,
122 123 124
	NULL
};

125
static struct attribute_group thermal_attr_group = {
I
Ingo Molnar 已提交
126 127
	.attrs	= thermal_throttle_attrs,
	.name	= "thermal_throttle"
128 129
};
#endif /* CONFIG_SYSFS */
130

131 132 133
#define CORE_LEVEL	0
#define PACKAGE_LEVEL	1

134
/***
135
 * therm_throt_process - Process thermal throttling event from interrupt
136 137 138 139
 * @curr: Whether the condition is current or not (boolean), since the
 *        thermal interrupt normally gets called both when the thermal
 *        event begins and once the event has ended.
 *
140
 * This function is called by the thermal interrupt after the
141 142 143 144 145 146 147 148 149
 * IRQ has been acknowledged.
 *
 * It will take care of rate limiting and printing messages to the syslog.
 *
 * Returns: 0 : Event should NOT be further logged, i.e. still in
 *              "timeout" from previous log message.
 *          1 : Event should be logged further, and a message has been
 *              printed to the syslog.
 */
150
static int therm_throt_process(bool new_event, int event, int level)
151
{
152
	struct _thermal_state *state;
153 154
	unsigned int this_cpu = smp_processor_id();
	bool old_event;
155
	u64 now;
156
	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
157 158

	now = get_jiffies_64();
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
	if (level == CORE_LEVEL) {
		if (event == THERMAL_THROTTLING_EVENT)
			state = &pstate->core_throttle;
		else if (event == POWER_LIMIT_EVENT)
			state = &pstate->core_power_limit;
		else
			 return 0;
	} else if (level == PACKAGE_LEVEL) {
		if (event == THERMAL_THROTTLING_EVENT)
			state = &pstate->package_throttle;
		else if (event == POWER_LIMIT_EVENT)
			state = &pstate->package_power_limit;
		else
			return 0;
	} else
		return 0;
175

176 177
	old_event = state->new_event;
	state->new_event = new_event;
178

179 180
	if (new_event)
		state->count++;
181

182
	if (time_before64(now, state->next_check) &&
183
			state->count != state->last_count)
184 185
		return 0;

186
	state->next_check = now + CHECK_INTERVAL;
187
	state->last_count = state->count;
188 189

	/* if we just entered the thermal event */
190 191
	if (new_event) {
		if (event == THERMAL_THROTTLING_EVENT)
192
			pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
193 194 195
				this_cpu,
				level == CORE_LEVEL ? "Core" : "Package",
				state->count);
196 197
		return 1;
	}
198 199
	if (old_event) {
		if (event == THERMAL_THROTTLING_EVENT)
200
			pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
201
				level == CORE_LEVEL ? "Core" : "Package");
202
		return 1;
203 204
	}

205
	return 0;
206
}
207

208
static int thresh_event_valid(int level, int event)
209 210 211 212 213 214
{
	struct _thermal_state *state;
	unsigned int this_cpu = smp_processor_id();
	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
	u64 now = get_jiffies_64();

215 216 217 218 219 220
	if (level == PACKAGE_LEVEL)
		state = (event == 0) ? &pstate->pkg_thresh0 :
						&pstate->pkg_thresh1;
	else
		state = (event == 0) ? &pstate->core_thresh0 :
						&pstate->core_thresh1;
221 222 223 224 225

	if (time_before64(now, state->next_check))
		return 0;

	state->next_check = now + CHECK_INTERVAL;
226

227 228 229
	return 1;
}

230 231 232 233 234 235 236 237 238
static bool int_pln_enable;
static int __init int_pln_enable_setup(char *s)
{
	int_pln_enable = true;

	return 1;
}
__setup("int_pln_enable", int_pln_enable_setup);

239
#ifdef CONFIG_SYSFS
I
Ingo Molnar 已提交
240
/* Add/Remove thermal_throttle interface for CPU device: */
241
static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
242
{
243
	int err;
244
	struct cpuinfo_x86 *c = &cpu_data(cpu);
245

246
	err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
247 248 249
	if (err)
		return err;

250
	if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
251 252
		err = sysfs_add_file_to_group(&dev->kobj,
					      &dev_attr_core_power_limit_count.attr,
253
					      thermal_attr_group.name);
254
	if (cpu_has(c, X86_FEATURE_PTS)) {
255 256
		err = sysfs_add_file_to_group(&dev->kobj,
					      &dev_attr_package_throttle_count.attr,
257
					      thermal_attr_group.name);
258
		if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
259 260
			err = sysfs_add_file_to_group(&dev->kobj,
					&dev_attr_package_power_limit_count.attr,
261
					thermal_attr_group.name);
262
	}
263 264

	return err;
265 266
}

267
static void thermal_throttle_remove_dev(struct device *dev)
268
{
269
	sysfs_remove_group(&dev->kobj, &thermal_attr_group);
270 271 272
}

/* Get notified when a cpu comes on/off. Be hotplug friendly. */
273
static int thermal_throttle_online(unsigned int cpu)
274
{
275 276 277
	struct device *dev = get_cpu_device(cpu);

	return thermal_throttle_add_dev(dev, cpu);
278 279
}

280
static int thermal_throttle_offline(unsigned int cpu)
281
{
282 283 284 285 286
	struct device *dev = get_cpu_device(cpu);

	thermal_throttle_remove_dev(dev);
	return 0;
}
287 288 289

static __init int thermal_throttle_init_device(void)
{
290
	int ret;
291 292 293 294

	if (!atomic_read(&therm_throt_en))
		return 0;

295 296 297 298
	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
				thermal_throttle_online,
				thermal_throttle_offline);
	return ret < 0 ? ret : 0;
299 300
}
device_initcall(thermal_throttle_init_device);
301

302
#endif /* CONFIG_SYSFS */
303

304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
static void notify_package_thresholds(__u64 msr_val)
{
	bool notify_thres_0 = false;
	bool notify_thres_1 = false;

	if (!platform_thermal_package_notify)
		return;

	/* lower threshold check */
	if (msr_val & THERM_LOG_THRESHOLD0)
		notify_thres_0 = true;
	/* higher threshold check */
	if (msr_val & THERM_LOG_THRESHOLD1)
		notify_thres_1 = true;

	if (!notify_thres_0 && !notify_thres_1)
		return;

	if (platform_thermal_package_rate_control &&
		platform_thermal_package_rate_control()) {
		/* Rate control is implemented in callback */
		platform_thermal_package_notify(msr_val);
		return;
	}

	/* lower threshold reached */
	if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
		platform_thermal_package_notify(msr_val);
	/* higher threshold reached */
	if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
		platform_thermal_package_notify(msr_val);
}

337 338 339 340 341 342 343 344 345
static void notify_thresholds(__u64 msr_val)
{
	/* check whether the interrupt handler is defined;
	 * otherwise simply return
	 */
	if (!platform_thermal_notify)
		return;

	/* lower threshold reached */
346 347
	if ((msr_val & THERM_LOG_THRESHOLD0) &&
			thresh_event_valid(CORE_LEVEL, 0))
348 349
		platform_thermal_notify(msr_val);
	/* higher threshold reached */
350 351
	if ((msr_val & THERM_LOG_THRESHOLD1) &&
			thresh_event_valid(CORE_LEVEL, 1))
352 353 354
		platform_thermal_notify(msr_val);
}

355
/* Thermal transition interrupt handler */
356
static void intel_thermal_interrupt(void)
357 358 359
{
	__u64 msr_val;

360 361 362
	if (static_cpu_has(X86_FEATURE_HWP))
		wrmsrl_safe(MSR_HWP_STATUS, 0);

363
	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
364

365 366 367
	/* Check for violation of core thermal thresholds*/
	notify_thresholds(msr_val);

368
	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
369
				THERMAL_THROTTLING_EVENT,
370
				CORE_LEVEL) != 0)
371
		mce_log_therm_throt_event(msr_val);
372

373
	if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
374
		therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
375
					POWER_LIMIT_EVENT,
376
					CORE_LEVEL);
377

378
	if (this_cpu_has(X86_FEATURE_PTS)) {
379
		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
380 381
		/* check violations of package thermal thresholds */
		notify_package_thresholds(msr_val);
382
		therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
383
					THERMAL_THROTTLING_EVENT,
384
					PACKAGE_LEVEL);
385
		if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
386
			therm_throt_process(msr_val &
387 388
					PACKAGE_THERM_STATUS_POWER_LIMIT,
					POWER_LIMIT_EVENT,
389
					PACKAGE_LEVEL);
390
	}
391 392 393 394
}

static void unexpected_thermal_interrupt(void)
{
395 396
	pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
		smp_processor_id());
397 398 399 400
}

static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;

401
static inline void __smp_thermal_interrupt(void)
402 403 404
{
	inc_irq_stat(irq_thermal_count);
	smp_thermal_vector();
405 406
}

407
asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
408 409 410 411
{
	entering_irq();
	__smp_thermal_interrupt();
	exiting_ack_irq();
412 413
}

414
asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
415 416 417 418 419 420 421 422
{
	entering_irq();
	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
	__smp_thermal_interrupt();
	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
	exiting_ack_irq();
}

423 424 425
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
static int intel_thermal_supported(struct cpuinfo_x86 *c)
{
426
	if (!boot_cpu_has(X86_FEATURE_APIC))
427 428 429 430 431 432
		return 0;
	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
		return 0;
	return 1;
}

433
void __init mcheck_intel_therm_init(void)
434 435 436 437 438 439
{
	/*
	 * This function is only called on boot CPU. Save the init thermal
	 * LVT value on BSP and use that value to restore APs' thermal LVT
	 * entry BIOS programmed later
	 */
440
	if (intel_thermal_supported(&boot_cpu_data))
441 442 443
		lvtthmr_init = apic_read(APIC_LVTTHMR);
}

H
Hidetoshi Seto 已提交
444
void intel_init_thermal(struct cpuinfo_x86 *c)
445 446 447 448 449
{
	unsigned int cpu = smp_processor_id();
	int tm2 = 0;
	u32 l, h;

450
	if (!intel_thermal_supported(c))
451 452 453 454 455 456 457 458
		return;

	/*
	 * First check if its enabled already, in which case there might
	 * be some SMM goo which handles it, so we can't even put a handler
	 * since it might be delivered via SMI already:
	 */
	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
459

460
	h = lvtthmr_init;
461 462 463 464 465
	/*
	 * The initial value of thermal LVT entries on all APs always reads
	 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
	 * sequence to them and LVT registers are reset to 0s except for
	 * the mask bits which are set to 1s when APs receive INIT IPI.
466 467 468 469
	 * If BIOS takes over the thermal interrupt and sets its interrupt
	 * delivery mode to SMI (not fixed), it restores the value that the
	 * BIOS has programmed on AP based on BSP's info we saved since BIOS
	 * is always setting the same value for all threads/cores.
470
	 */
471 472
	if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
		apic_write(APIC_LVTTHMR, lvtthmr_init);
473 474


475
	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
476
		if (system_state == SYSTEM_BOOTING)
477
			pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
478 479 480
		return;
	}

481 482 483 484 485 486 487 488 489 490
	/* early Pentium M models use different method for enabling TM2 */
	if (cpu_has(c, X86_FEATURE_TM2)) {
		if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
			rdmsr(MSR_THERM2_CTL, l, h);
			if (l & MSR_THERM2_CTL_TM_SELECT)
				tm2 = 1;
		} else if (l & MSR_IA32_MISC_ENABLE_TM2)
			tm2 = 1;
	}

491 492 493 494 495
	/* We'll mask the thermal vector in the lapic till we're ready: */
	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
	apic_write(APIC_LVTTHMR, h);

	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
496
	if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
497
		wrmsr(MSR_IA32_THERM_INTERRUPT,
498 499 500
			(l | (THERM_INT_LOW_ENABLE
			| THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
	else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
501
		wrmsr(MSR_IA32_THERM_INTERRUPT,
502
			l | (THERM_INT_LOW_ENABLE
503 504 505 506
			| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
	else
		wrmsr(MSR_IA32_THERM_INTERRUPT,
		      l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
507

508 509
	if (cpu_has(c, X86_FEATURE_PTS)) {
		rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
510
		if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
511
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
512 513 514 515 516 517
				(l | (PACKAGE_THERM_INT_LOW_ENABLE
				| PACKAGE_THERM_INT_HIGH_ENABLE))
				& ~PACKAGE_THERM_INT_PLN_ENABLE, h);
		else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
				l | (PACKAGE_THERM_INT_LOW_ENABLE
518 519 520 521 522 523
				| PACKAGE_THERM_INT_HIGH_ENABLE
				| PACKAGE_THERM_INT_PLN_ENABLE), h);
		else
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
			      l | (PACKAGE_THERM_INT_LOW_ENABLE
				| PACKAGE_THERM_INT_HIGH_ENABLE), h);
524 525
	}

526
	smp_thermal_vector = intel_thermal_interrupt;
527 528 529 530 531 532 533 534

	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);

	/* Unmask the thermal vector: */
	l = apic_read(APIC_LVTTHMR);
	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);

535 536
	pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
		      tm2 ? "TM2" : "TM1");
537 538 539 540

	/* enable thermal throttle processing */
	atomic_set(&therm_throt_en, 1);
}