therm_throt.c 15.9 KB
Newer Older
1
/*
2 3
 * Thermal throttle event support code (such as syslog messaging and rate
 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
I
Ingo Molnar 已提交
4
 *
5 6 7 8 9
 * This allows consistent reporting of CPU thermal throttle events.
 *
 * Maintains a counter in /sys that keeps track of the number of thermal
 * events, such that the user knows how bad the thermal problem might be
 * (since the logging to syslog and mcelog is rate limited).
10 11 12 13
 *
 * Author: Dmitriy Zavin (dmitriyz@google.com)
 *
 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
14
 *          Inspired by Ross Biro's and Al Borchers' counter code.
15
 */
16
#include <linux/interrupt.h>
I
Ingo Molnar 已提交
17 18
#include <linux/notifier.h>
#include <linux/jiffies.h>
19
#include <linux/kernel.h>
20
#include <linux/percpu.h>
21
#include <linux/export.h>
22 23 24
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
25
#include <linux/cpu.h>
I
Ingo Molnar 已提交
26

27 28
#include <asm/processor.h>
#include <asm/apic.h>
29 30
#include <asm/idle.h>
#include <asm/mce.h>
31
#include <asm/msr.h>
32
#include <asm/trace/irq_vectors.h>
33 34

/* How long to wait between reporting thermal events */
I
Ingo Molnar 已提交
35
#define CHECK_INTERVAL		(300 * HZ)
36

37 38 39
#define THERMAL_THROTTLING_EVENT	0
#define POWER_LIMIT_EVENT		1

40
/*
41
 * Current thermal event state:
42
 */
43
struct _thermal_state {
44 45
	bool			new_event;
	int			event;
46
	u64			next_check;
47 48
	unsigned long		count;
	unsigned long		last_count;
49
};
I
Ingo Molnar 已提交
50

51
struct thermal_state {
52 53 54 55
	struct _thermal_state core_throttle;
	struct _thermal_state core_power_limit;
	struct _thermal_state package_throttle;
	struct _thermal_state package_power_limit;
56 57
	struct _thermal_state core_thresh0;
	struct _thermal_state core_thresh1;
58 59
	struct _thermal_state pkg_thresh0;
	struct _thermal_state pkg_thresh1;
60 61
};

62 63
/* Callback to handle core threshold interrupts */
int (*platform_thermal_notify)(__u64 msr_val);
64
EXPORT_SYMBOL(platform_thermal_notify);
65

66 67 68 69 70 71 72 73 74 75
/* Callback to handle core package threshold_interrupts */
int (*platform_thermal_package_notify)(__u64 msr_val);
EXPORT_SYMBOL_GPL(platform_thermal_package_notify);

/* Callback support of rate control, return true, if
 * callback has rate control */
bool (*platform_thermal_package_rate_control)(void);
EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);


76 77 78
static DEFINE_PER_CPU(struct thermal_state, thermal_state);

static atomic_t therm_throt_en	= ATOMIC_INIT(0);
79

80 81
static u32 lvtthmr_init __read_mostly;

82
#ifdef CONFIG_SYSFS
83 84 85
#define define_therm_throt_device_one_ro(_name)				\
	static DEVICE_ATTR(_name, 0444,					\
			   therm_throt_device_show_##_name,		\
86
				   NULL)				\
I
Ingo Molnar 已提交
87

88
#define define_therm_throt_device_show_func(event, name)		\
89
									\
90 91 92
static ssize_t therm_throt_device_show_##event##_##name(		\
			struct device *dev,				\
			struct device_attribute *attr,			\
93
			char *buf)					\
I
Ingo Molnar 已提交
94 95 96 97 98
{									\
	unsigned int cpu = dev->id;					\
	ssize_t ret;							\
									\
	preempt_disable();	/* CPU hotplug */			\
99
	if (cpu_online(cpu)) {						\
I
Ingo Molnar 已提交
100
		ret = sprintf(buf, "%lu\n",				\
101
			      per_cpu(thermal_state, cpu).event.name);	\
102
	} else								\
I
Ingo Molnar 已提交
103 104 105 106
		ret = 0;						\
	preempt_enable();						\
									\
	return ret;							\
107 108
}

109 110
define_therm_throt_device_show_func(core_throttle, count);
define_therm_throt_device_one_ro(core_throttle_count);
111

112 113
define_therm_throt_device_show_func(core_power_limit, count);
define_therm_throt_device_one_ro(core_power_limit_count);
114

115 116
define_therm_throt_device_show_func(package_throttle, count);
define_therm_throt_device_one_ro(package_throttle_count);
117

118 119
define_therm_throt_device_show_func(package_power_limit, count);
define_therm_throt_device_one_ro(package_power_limit_count);
120

121
static struct attribute *thermal_throttle_attrs[] = {
122
	&dev_attr_core_throttle_count.attr,
123 124 125
	NULL
};

126
static struct attribute_group thermal_attr_group = {
I
Ingo Molnar 已提交
127 128
	.attrs	= thermal_throttle_attrs,
	.name	= "thermal_throttle"
129 130
};
#endif /* CONFIG_SYSFS */
131

132 133 134
#define CORE_LEVEL	0
#define PACKAGE_LEVEL	1

135
/***
136
 * therm_throt_process - Process thermal throttling event from interrupt
137 138 139 140
 * @curr: Whether the condition is current or not (boolean), since the
 *        thermal interrupt normally gets called both when the thermal
 *        event begins and once the event has ended.
 *
141
 * This function is called by the thermal interrupt after the
142 143 144 145 146 147 148 149 150
 * IRQ has been acknowledged.
 *
 * It will take care of rate limiting and printing messages to the syslog.
 *
 * Returns: 0 : Event should NOT be further logged, i.e. still in
 *              "timeout" from previous log message.
 *          1 : Event should be logged further, and a message has been
 *              printed to the syslog.
 */
151
static int therm_throt_process(bool new_event, int event, int level)
152
{
153
	struct _thermal_state *state;
154 155
	unsigned int this_cpu = smp_processor_id();
	bool old_event;
156
	u64 now;
157
	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
158 159

	now = get_jiffies_64();
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
	if (level == CORE_LEVEL) {
		if (event == THERMAL_THROTTLING_EVENT)
			state = &pstate->core_throttle;
		else if (event == POWER_LIMIT_EVENT)
			state = &pstate->core_power_limit;
		else
			 return 0;
	} else if (level == PACKAGE_LEVEL) {
		if (event == THERMAL_THROTTLING_EVENT)
			state = &pstate->package_throttle;
		else if (event == POWER_LIMIT_EVENT)
			state = &pstate->package_power_limit;
		else
			return 0;
	} else
		return 0;
176

177 178
	old_event = state->new_event;
	state->new_event = new_event;
179

180 181
	if (new_event)
		state->count++;
182

183
	if (time_before64(now, state->next_check) &&
184
			state->count != state->last_count)
185 186
		return 0;

187
	state->next_check = now + CHECK_INTERVAL;
188
	state->last_count = state->count;
189 190

	/* if we just entered the thermal event */
191 192 193 194 195 196
	if (new_event) {
		if (event == THERMAL_THROTTLING_EVENT)
			printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
				this_cpu,
				level == CORE_LEVEL ? "Core" : "Package",
				state->count);
197 198
		return 1;
	}
199 200 201 202 203
	if (old_event) {
		if (event == THERMAL_THROTTLING_EVENT)
			printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
				this_cpu,
				level == CORE_LEVEL ? "Core" : "Package");
204
		return 1;
205 206
	}

207
	return 0;
208
}
209

210
static int thresh_event_valid(int level, int event)
211 212 213 214 215 216
{
	struct _thermal_state *state;
	unsigned int this_cpu = smp_processor_id();
	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
	u64 now = get_jiffies_64();

217 218 219 220 221 222
	if (level == PACKAGE_LEVEL)
		state = (event == 0) ? &pstate->pkg_thresh0 :
						&pstate->pkg_thresh1;
	else
		state = (event == 0) ? &pstate->core_thresh0 :
						&pstate->core_thresh1;
223 224 225 226 227

	if (time_before64(now, state->next_check))
		return 0;

	state->next_check = now + CHECK_INTERVAL;
228

229 230 231
	return 1;
}

232 233 234 235 236 237 238 239 240
static bool int_pln_enable;
static int __init int_pln_enable_setup(char *s)
{
	int_pln_enable = true;

	return 1;
}
__setup("int_pln_enable", int_pln_enable_setup);

241
#ifdef CONFIG_SYSFS
I
Ingo Molnar 已提交
242
/* Add/Remove thermal_throttle interface for CPU device: */
243
static __cpuinit int thermal_throttle_add_dev(struct device *dev,
244
				unsigned int cpu)
245
{
246
	int err;
247
	struct cpuinfo_x86 *c = &cpu_data(cpu);
248

249
	err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
250 251 252
	if (err)
		return err;

253
	if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
254 255
		err = sysfs_add_file_to_group(&dev->kobj,
					      &dev_attr_core_power_limit_count.attr,
256
					      thermal_attr_group.name);
257
	if (cpu_has(c, X86_FEATURE_PTS)) {
258 259
		err = sysfs_add_file_to_group(&dev->kobj,
					      &dev_attr_package_throttle_count.attr,
260
					      thermal_attr_group.name);
261
		if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
262 263
			err = sysfs_add_file_to_group(&dev->kobj,
					&dev_attr_package_power_limit_count.attr,
264
					thermal_attr_group.name);
265
	}
266 267

	return err;
268 269
}

270
static __cpuinit void thermal_throttle_remove_dev(struct device *dev)
271
{
272
	sysfs_remove_group(&dev->kobj, &thermal_attr_group);
273 274
}

I
Ingo Molnar 已提交
275
/* Mutex protecting device creation against CPU hotplug: */
276 277 278
static DEFINE_MUTEX(therm_cpu_lock);

/* Get notified when a cpu comes on/off. Be hotplug friendly. */
I
Ingo Molnar 已提交
279 280 281 282
static __cpuinit int
thermal_throttle_cpu_callback(struct notifier_block *nfb,
			      unsigned long action,
			      void *hcpu)
283 284
{
	unsigned int cpu = (unsigned long)hcpu;
285
	struct device *dev;
286
	int err = 0;
287

288
	dev = get_cpu_device(cpu);
I
Ingo Molnar 已提交
289

290
	switch (action) {
291 292
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
293
		mutex_lock(&therm_cpu_lock);
294
		err = thermal_throttle_add_dev(dev, cpu);
295
		mutex_unlock(&therm_cpu_lock);
296
		WARN_ON(err);
297
		break;
298 299
	case CPU_UP_CANCELED:
	case CPU_UP_CANCELED_FROZEN:
300
	case CPU_DEAD:
301
	case CPU_DEAD_FROZEN:
302
		mutex_lock(&therm_cpu_lock);
303
		thermal_throttle_remove_dev(dev);
304
		mutex_unlock(&therm_cpu_lock);
305 306
		break;
	}
307
	return notifier_from_errno(err);
308 309
}

S
Satyam Sharma 已提交
310
static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
311 312 313 314 315 316 317
{
	.notifier_call = thermal_throttle_cpu_callback,
};

static __init int thermal_throttle_init_device(void)
{
	unsigned int cpu = 0;
318
	int err;
319 320 321 322 323 324 325 326 327 328

	if (!atomic_read(&therm_throt_en))
		return 0;

	register_hotcpu_notifier(&thermal_throttle_cpu_notifier);

#ifdef CONFIG_HOTPLUG_CPU
	mutex_lock(&therm_cpu_lock);
#endif
	/* connect live CPUs to sysfs */
329
	for_each_online_cpu(cpu) {
330
		err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
331 332
		WARN_ON(err);
	}
333 334 335 336 337 338 339
#ifdef CONFIG_HOTPLUG_CPU
	mutex_unlock(&therm_cpu_lock);
#endif

	return 0;
}
device_initcall(thermal_throttle_init_device);
340

341
#endif /* CONFIG_SYSFS */
342

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
static void notify_package_thresholds(__u64 msr_val)
{
	bool notify_thres_0 = false;
	bool notify_thres_1 = false;

	if (!platform_thermal_package_notify)
		return;

	/* lower threshold check */
	if (msr_val & THERM_LOG_THRESHOLD0)
		notify_thres_0 = true;
	/* higher threshold check */
	if (msr_val & THERM_LOG_THRESHOLD1)
		notify_thres_1 = true;

	if (!notify_thres_0 && !notify_thres_1)
		return;

	if (platform_thermal_package_rate_control &&
		platform_thermal_package_rate_control()) {
		/* Rate control is implemented in callback */
		platform_thermal_package_notify(msr_val);
		return;
	}

	/* lower threshold reached */
	if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
		platform_thermal_package_notify(msr_val);
	/* higher threshold reached */
	if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
		platform_thermal_package_notify(msr_val);
}

376 377 378 379 380 381 382 383 384
static void notify_thresholds(__u64 msr_val)
{
	/* check whether the interrupt handler is defined;
	 * otherwise simply return
	 */
	if (!platform_thermal_notify)
		return;

	/* lower threshold reached */
385 386
	if ((msr_val & THERM_LOG_THRESHOLD0) &&
			thresh_event_valid(CORE_LEVEL, 0))
387 388
		platform_thermal_notify(msr_val);
	/* higher threshold reached */
389 390
	if ((msr_val & THERM_LOG_THRESHOLD1) &&
			thresh_event_valid(CORE_LEVEL, 1))
391 392 393
		platform_thermal_notify(msr_val);
}

394
/* Thermal transition interrupt handler */
395
static void intel_thermal_interrupt(void)
396 397 398 399
{
	__u64 msr_val;

	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
400

401 402 403
	/* Check for violation of core thermal thresholds*/
	notify_thresholds(msr_val);

404
	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
405
				THERMAL_THROTTLING_EVENT,
406
				CORE_LEVEL) != 0)
407
		mce_log_therm_throt_event(msr_val);
408

409
	if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
410
		therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
411
					POWER_LIMIT_EVENT,
412
					CORE_LEVEL);
413

414
	if (this_cpu_has(X86_FEATURE_PTS)) {
415
		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
416 417
		/* check violations of package thermal thresholds */
		notify_package_thresholds(msr_val);
418
		therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
419
					THERMAL_THROTTLING_EVENT,
420
					PACKAGE_LEVEL);
421
		if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
422
			therm_throt_process(msr_val &
423 424
					PACKAGE_THERM_STATUS_POWER_LIMIT,
					POWER_LIMIT_EVENT,
425
					PACKAGE_LEVEL);
426
	}
427 428 429 430
}

static void unexpected_thermal_interrupt(void)
{
431
	printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
432 433 434 435 436
			smp_processor_id());
}

static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;

437
static inline void __smp_thermal_interrupt(void)
438 439 440
{
	inc_irq_stat(irq_thermal_count);
	smp_thermal_vector();
441 442 443 444 445 446 447
}

asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
{
	entering_irq();
	__smp_thermal_interrupt();
	exiting_ack_irq();
448 449
}

450 451 452 453 454 455 456 457 458
asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
{
	entering_irq();
	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
	__smp_thermal_interrupt();
	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
	exiting_ack_irq();
}

459 460 461 462 463 464 465 466 467 468
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
static int intel_thermal_supported(struct cpuinfo_x86 *c)
{
	if (!cpu_has_apic)
		return 0;
	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
		return 0;
	return 1;
}

469
void __init mcheck_intel_therm_init(void)
470 471 472 473 474 475
{
	/*
	 * This function is only called on boot CPU. Save the init thermal
	 * LVT value on BSP and use that value to restore APs' thermal LVT
	 * entry BIOS programmed later
	 */
476
	if (intel_thermal_supported(&boot_cpu_data))
477 478 479
		lvtthmr_init = apic_read(APIC_LVTTHMR);
}

H
Hidetoshi Seto 已提交
480
void intel_init_thermal(struct cpuinfo_x86 *c)
481 482 483 484 485
{
	unsigned int cpu = smp_processor_id();
	int tm2 = 0;
	u32 l, h;

486
	if (!intel_thermal_supported(c))
487 488 489 490 491 492 493 494
		return;

	/*
	 * First check if its enabled already, in which case there might
	 * be some SMM goo which handles it, so we can't even put a handler
	 * since it might be delivered via SMI already:
	 */
	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
495

496
	h = lvtthmr_init;
497 498 499 500 501
	/*
	 * The initial value of thermal LVT entries on all APs always reads
	 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
	 * sequence to them and LVT registers are reset to 0s except for
	 * the mask bits which are set to 1s when APs receive INIT IPI.
502 503 504 505
	 * If BIOS takes over the thermal interrupt and sets its interrupt
	 * delivery mode to SMI (not fixed), it restores the value that the
	 * BIOS has programmed on AP based on BSP's info we saved since BIOS
	 * is always setting the same value for all threads/cores.
506
	 */
507 508
	if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
		apic_write(APIC_LVTTHMR, lvtthmr_init);
509 510


511 512 513 514 515 516 517 518 519 520 521 522 523 524
	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
		printk(KERN_DEBUG
		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
		return;
	}

	/* Check whether a vector already exists */
	if (h & APIC_VECTOR_MASK) {
		printk(KERN_DEBUG
		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
		       cpu, (h & APIC_VECTOR_MASK));
		return;
	}

525 526 527 528 529 530 531 532 533 534
	/* early Pentium M models use different method for enabling TM2 */
	if (cpu_has(c, X86_FEATURE_TM2)) {
		if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
			rdmsr(MSR_THERM2_CTL, l, h);
			if (l & MSR_THERM2_CTL_TM_SELECT)
				tm2 = 1;
		} else if (l & MSR_IA32_MISC_ENABLE_TM2)
			tm2 = 1;
	}

535 536 537 538 539
	/* We'll mask the thermal vector in the lapic till we're ready: */
	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
	apic_write(APIC_LVTTHMR, h);

	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
540
	if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
541
		wrmsr(MSR_IA32_THERM_INTERRUPT,
542 543 544
			(l | (THERM_INT_LOW_ENABLE
			| THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
	else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
545
		wrmsr(MSR_IA32_THERM_INTERRUPT,
546
			l | (THERM_INT_LOW_ENABLE
547 548 549 550
			| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
	else
		wrmsr(MSR_IA32_THERM_INTERRUPT,
		      l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
551

552 553
	if (cpu_has(c, X86_FEATURE_PTS)) {
		rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
554
		if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
555
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
556 557 558 559 560 561
				(l | (PACKAGE_THERM_INT_LOW_ENABLE
				| PACKAGE_THERM_INT_HIGH_ENABLE))
				& ~PACKAGE_THERM_INT_PLN_ENABLE, h);
		else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
				l | (PACKAGE_THERM_INT_LOW_ENABLE
562 563 564 565 566 567
				| PACKAGE_THERM_INT_HIGH_ENABLE
				| PACKAGE_THERM_INT_PLN_ENABLE), h);
		else
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
			      l | (PACKAGE_THERM_INT_LOW_ENABLE
				| PACKAGE_THERM_INT_HIGH_ENABLE), h);
568 569
	}

570
	smp_thermal_vector = intel_thermal_interrupt;
571 572 573 574 575 576 577 578

	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);

	/* Unmask the thermal vector: */
	l = apic_read(APIC_LVTTHMR);
	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);

579 580
	printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
		       tm2 ? "TM2" : "TM1");
581 582 583 584

	/* enable thermal throttle processing */
	atomic_set(&therm_throt_en, 1);
}