nmi_int.c 11.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/**
 * @file nmi_int.c
 *
4
 * @remark Copyright 2002-2008 OProfile authors
L
Linus Torvalds 已提交
5 6 7
 * @remark Read the file COPYING
 *
 * @author John Levon <levon@movementarian.org>
8
 * @author Robert Richter <robert.richter@amd.com>
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16
 */

#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
17
#include <linux/moduleparam.h>
18
#include <linux/kdebug.h>
19
#include <linux/cpu.h>
L
Linus Torvalds 已提交
20 21 22
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
23

L
Linus Torvalds 已提交
24 25
#include "op_counter.h"
#include "op_x86_model.h"
26

27
static struct op_x86_model_spec const *model;
28 29
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
30

L
Linus Torvalds 已提交
31 32 33
/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

34 35
static int profile_exceptions_notify(struct notifier_block *self,
				     unsigned long val, void *data)
L
Linus Torvalds 已提交
36
{
37 38 39 40
	struct die_args *args = (struct die_args *)data;
	int ret = NOTIFY_DONE;
	int cpu = smp_processor_id();

41
	switch (val) {
42
	case DIE_NMI:
43 44 45
	case DIE_NMI_IPI:
		model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
		ret = NOTIFY_STOP;
46 47 48 49 50
		break;
	default:
		break;
	}
	return ret;
L
Linus Torvalds 已提交
51
}
52

53
static void nmi_cpu_save_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
54 55
{
	unsigned int const nr_ctrs = model->num_counters;
56 57 58
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
59 60 61
	unsigned int i;

	for (i = 0; i < nr_ctrs; ++i) {
62
		if (counters[i].addr) {
63 64 65 66
			rdmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
67
	}
68

L
Linus Torvalds 已提交
69
	for (i = 0; i < nr_ctrls; ++i) {
70
		if (controls[i].addr) {
71 72 73 74
			rdmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
75 76 77
	}
}

78
static void nmi_save_registers(void *dummy)
L
Linus Torvalds 已提交
79 80
{
	int cpu = smp_processor_id();
81
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
82 83 84 85 86 87
	nmi_cpu_save_registers(msrs);
}

static void free_msrs(void)
{
	int i;
88
	for_each_possible_cpu(i) {
89 90 91 92
		kfree(per_cpu(cpu_msrs, i).counters);
		per_cpu(cpu_msrs, i).counters = NULL;
		kfree(per_cpu(cpu_msrs, i).controls);
		per_cpu(cpu_msrs, i).controls = NULL;
L
Linus Torvalds 已提交
93 94 95 96 97
	}
}

static int allocate_msrs(void)
{
98
	int success = 1;
L
Linus Torvalds 已提交
99 100 101
	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
	size_t counters_size = sizeof(struct op_msr) * model->num_counters;

102
	int i;
C
Chris Wright 已提交
103
	for_each_possible_cpu(i) {
104 105 106
		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).counters) {
L
Linus Torvalds 已提交
107 108 109
			success = 0;
			break;
		}
110 111
		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
								GFP_KERNEL);
112
		if (!per_cpu(cpu_msrs, i).controls) {
L
Linus Torvalds 已提交
113 114 115 116 117 118 119 120 121 122 123
			success = 0;
			break;
		}
	}

	if (!success)
		free_msrs();

	return success;
}

124
static void nmi_cpu_setup(void *dummy)
L
Linus Torvalds 已提交
125 126
{
	int cpu = smp_processor_id();
127
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
128 129 130
	spin_lock(&oprofilefs_lock);
	model->setup_ctrs(msrs);
	spin_unlock(&oprofilefs_lock);
131
	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
L
Linus Torvalds 已提交
132 133 134
	apic_write(APIC_LVTPC, APIC_DM_NMI);
}

135 136 137
static struct notifier_block profile_exceptions_nb = {
	.notifier_call = profile_exceptions_notify,
	.next = NULL,
138
	.priority = 2
139
};
L
Linus Torvalds 已提交
140 141 142

static int nmi_setup(void)
{
143
	int err = 0;
144
	int cpu;
145

L
Linus Torvalds 已提交
146 147 148
	if (!allocate_msrs())
		return -ENOMEM;

149 150
	err = register_die_notifier(&profile_exceptions_nb);
	if (err) {
L
Linus Torvalds 已提交
151
		free_msrs();
152
		return err;
L
Linus Torvalds 已提交
153
	}
154

155
	/* We need to serialize save and setup for HT because the subset
L
Linus Torvalds 已提交
156 157
	 * of msrs are distinct for save and setup operations
	 */
158 159

	/* Assume saved/restored counters are the same on all CPUs */
160
	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
161
	for_each_possible_cpu(cpu) {
C
Chris Wright 已提交
162
		if (cpu != 0) {
163 164
			memcpy(per_cpu(cpu_msrs, cpu).counters,
				per_cpu(cpu_msrs, 0).counters,
C
Chris Wright 已提交
165 166
				sizeof(struct op_msr) * model->num_counters);

167 168
			memcpy(per_cpu(cpu_msrs, cpu).controls,
				per_cpu(cpu_msrs, 0).controls,
C
Chris Wright 已提交
169 170
				sizeof(struct op_msr) * model->num_controls);
		}
171

172
	}
173 174
	on_each_cpu(nmi_save_registers, NULL, 1);
	on_each_cpu(nmi_cpu_setup, NULL, 1);
L
Linus Torvalds 已提交
175 176 177 178
	nmi_enabled = 1;
	return 0;
}

179
static void nmi_restore_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
180 181
{
	unsigned int const nr_ctrs = model->num_counters;
182 183 184
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
185 186 187
	unsigned int i;

	for (i = 0; i < nr_ctrls; ++i) {
188
		if (controls[i].addr) {
189 190 191 192
			wrmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
193
	}
194

L
Linus Torvalds 已提交
195
	for (i = 0; i < nr_ctrs; ++i) {
196
		if (counters[i].addr) {
197 198 199 200
			wrmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
201 202 203
	}
}

204
static void nmi_cpu_shutdown(void *dummy)
L
Linus Torvalds 已提交
205 206 207
{
	unsigned int v;
	int cpu = smp_processor_id();
208
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
209

L
Linus Torvalds 已提交
210 211 212 213 214 215 216
	/* restoring APIC_LVTPC can trigger an apic error because the delivery
	 * mode and vector nr combination can be illegal. That's by design: on
	 * power on apic lvt contain a zero vector nr which are legal only for
	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
	 */
	v = apic_read(APIC_LVTERR);
	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
217
	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
L
Linus Torvalds 已提交
218
	apic_write(APIC_LVTERR, v);
219
	nmi_restore_registers(msrs);
L
Linus Torvalds 已提交
220 221 222 223
}

static void nmi_shutdown(void)
{
224 225
	struct op_msrs *msrs;

L
Linus Torvalds 已提交
226
	nmi_enabled = 0;
227
	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
228
	unregister_die_notifier(&profile_exceptions_nb);
229
	msrs = &get_cpu_var(cpu_msrs);
230
	model->shutdown(msrs);
L
Linus Torvalds 已提交
231
	free_msrs();
232
	put_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
233 234
}

235
static void nmi_cpu_start(void *dummy)
L
Linus Torvalds 已提交
236
{
237
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
238 239 240 241 242
	model->start(msrs);
}

static int nmi_start(void)
{
243
	on_each_cpu(nmi_cpu_start, NULL, 1);
L
Linus Torvalds 已提交
244 245
	return 0;
}
246 247

static void nmi_cpu_stop(void *dummy)
L
Linus Torvalds 已提交
248
{
249
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
250 251
	model->stop(msrs);
}
252

L
Linus Torvalds 已提交
253 254
static void nmi_stop(void)
{
255
	on_each_cpu(nmi_cpu_stop, NULL, 1);
L
Linus Torvalds 已提交
256 257 258 259
}

struct op_counter_config counter_config[OP_MAX_COUNTER];

260
static int nmi_create_files(struct super_block *sb, struct dentry *root)
L
Linus Torvalds 已提交
261 262 263 264
{
	unsigned int i;

	for (i = 0; i < model->num_counters; ++i) {
265
		struct dentry *dir;
266
		char buf[4];
267 268

		/* quick little hack to _not_ expose a counter if it is not
269 270 271 272 273 274 275
		 * available for use.  This should protect userspace app.
		 * NOTE:  assumes 1:1 mapping here (that counters are organized
		 *        sequentially in their struct assignment).
		 */
		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
			continue;

276
		snprintf(buf,  sizeof(buf), "%d", i);
L
Linus Torvalds 已提交
277
		dir = oprofilefs_mkdir(sb, root, buf);
278 279 280 281 282 283
		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
L
Linus Torvalds 已提交
284 285 286 287
	}

	return 0;
}
288

289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
#ifdef CONFIG_SMP
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
				 void *data)
{
	int cpu = (unsigned long)data;
	switch (action) {
	case CPU_DOWN_FAILED:
	case CPU_ONLINE:
		smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
		break;
	case CPU_DOWN_PREPARE:
		smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
		break;
	}
	return NOTIFY_DONE;
}

static struct notifier_block oprofile_cpu_nb = {
	.notifier_call = oprofile_cpu_notifier
};
#endif

#ifdef CONFIG_PM

static int nmi_suspend(struct sys_device *dev, pm_message_t state)
{
	/* Only one CPU left, just stop that one */
	if (nmi_enabled == 1)
		nmi_cpu_stop(NULL);
	return 0;
}

static int nmi_resume(struct sys_device *dev)
{
	if (nmi_enabled == 1)
		nmi_cpu_start(NULL);
	return 0;
}

static struct sysdev_class oprofile_sysclass = {
	.name		= "oprofile",
	.resume		= nmi_resume,
	.suspend	= nmi_suspend,
};

static struct sys_device device_oprofile = {
	.id	= 0,
	.cls	= &oprofile_sysclass,
};

static int __init init_sysfs(void)
{
	int error;

	error = sysdev_class_register(&oprofile_sysclass);
	if (!error)
		error = sysdev_register(&device_oprofile);
	return error;
}

static void exit_sysfs(void)
{
	sysdev_unregister(&device_oprofile);
	sysdev_class_unregister(&oprofile_sysclass);
}

#else
#define init_sysfs() do { } while (0)
#define exit_sysfs() do { } while (0)
#endif /* CONFIG_PM */

360
static int __init p4_init(char **cpu_type)
L
Linus Torvalds 已提交
361 362 363
{
	__u8 cpu_model = boot_cpu_data.x86_model;

364
	if (cpu_model > 6 || cpu_model == 5)
L
Linus Torvalds 已提交
365 366 367 368 369 370 371 372
		return 0;

#ifndef CONFIG_SMP
	*cpu_type = "i386/p4";
	model = &op_p4_spec;
	return 1;
#else
	switch (smp_num_siblings) {
373 374 375 376 377 378 379 380 381
	case 1:
		*cpu_type = "i386/p4";
		model = &op_p4_spec;
		return 1;

	case 2:
		*cpu_type = "i386/p4-ht";
		model = &op_p4_ht2_spec;
		return 1;
L
Linus Torvalds 已提交
382 383 384 385 386 387 388 389
	}
#endif

	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
	return 0;
}

390 391 392 393 394 395 396 397 398 399 400
static int force_arch_perfmon;
static int force_cpu_type(const char *str, struct kernel_param *kp)
{
	if (!strcmp(str, "archperfmon")) {
		force_arch_perfmon = 1;
		printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
	}

	return 0;
}
module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
401

402
static int __init ppro_init(char **cpu_type)
L
Linus Torvalds 已提交
403 404 405
{
	__u8 cpu_model = boot_cpu_data.x86_model;

406 407 408
	if (force_arch_perfmon && cpu_has_arch_perfmon)
		return 0;

409 410 411 412 413 414 415 416
	switch (cpu_model) {
	case 0 ... 2:
		*cpu_type = "i386/ppro";
		break;
	case 3 ... 5:
		*cpu_type = "i386/pii";
		break;
	case 6 ... 8:
417
	case 10 ... 11:
418 419 420
		*cpu_type = "i386/piii";
		break;
	case 9:
421
	case 13:
422 423 424
		*cpu_type = "i386/p6_mobile";
		break;
	case 14:
425
		*cpu_type = "i386/core";
426 427 428 429
		break;
	case 15: case 23:
		*cpu_type = "i386/core_2";
		break;
430 431 432 433 434 435 436
	case 26:
		arch_perfmon_setup_counters();
		*cpu_type = "i386/core_i7";
		break;
	case 28:
		*cpu_type = "i386/atom";
		break;
437 438
	default:
		/* Unknown */
L
Linus Torvalds 已提交
439 440 441 442 443 444 445
		return 0;
	}

	model = &op_ppro_spec;
	return 1;
}

446 447 448 449 450 451 452 453 454 455
static int __init arch_perfmon_init(char **cpu_type)
{
	if (!cpu_has_arch_perfmon)
		return 0;
	*cpu_type = "i386/arch_perfmon";
	model = &op_arch_perfmon_spec;
	arch_perfmon_setup_counters();
	return 1;
}

456
/* in order to get sysfs right */
L
Linus Torvalds 已提交
457 458
static int using_nmi;

459
int __init op_nmi_init(struct oprofile_operations *ops)
L
Linus Torvalds 已提交
460 461 462
{
	__u8 vendor = boot_cpu_data.x86_vendor;
	__u8 family = boot_cpu_data.x86;
463
	char *cpu_type = NULL;
464
	int ret = 0;
L
Linus Torvalds 已提交
465 466 467

	if (!cpu_has_apic)
		return -ENODEV;
468

L
Linus Torvalds 已提交
469
	switch (vendor) {
470 471
	case X86_VENDOR_AMD:
		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
L
Linus Torvalds 已提交
472

473 474 475 476
		switch (family) {
		default:
			return -ENODEV;
		case 6:
477
			model = &op_amd_spec;
478 479 480
			cpu_type = "i386/athlon";
			break;
		case 0xf:
481
			model = &op_amd_spec;
482 483 484 485 486
			/* Actually it could be i386/hammer too, but give
			 user space an consistent name. */
			cpu_type = "x86-64/hammer";
			break;
		case 0x10:
487
			model = &op_amd_spec;
488 489
			cpu_type = "x86-64/family10";
			break;
490
		case 0x11:
491
			model = &op_amd_spec;
492 493
			cpu_type = "x86-64/family11h";
			break;
494 495 496 497 498 499 500
		}
		break;

	case X86_VENDOR_INTEL:
		switch (family) {
			/* Pentium IV */
		case 0xf:
501
			p4_init(&cpu_type);
L
Linus Torvalds 已提交
502
			break;
503 504 505

			/* A P6-class processor */
		case 6:
506
			ppro_init(&cpu_type);
L
Linus Torvalds 已提交
507 508 509
			break;

		default:
510
			break;
511
		}
512 513 514

		if (!cpu_type && !arch_perfmon_init(&cpu_type))
			return -ENODEV;
515 516 517 518
		break;

	default:
		return -ENODEV;
L
Linus Torvalds 已提交
519 520
	}

521 522 523
#ifdef CONFIG_SMP
	register_cpu_notifier(&oprofile_cpu_nb);
#endif
524 525 526 527 528 529 530 531
	/* default values, can be overwritten by model */
	ops->create_files = nmi_create_files;
	ops->setup = nmi_setup;
	ops->shutdown = nmi_shutdown;
	ops->start = nmi_start;
	ops->stop = nmi_stop;
	ops->cpu_type = cpu_type;

532 533 534 535 536
	if (model->init)
		ret = model->init(ops);
	if (ret)
		return ret;

537
	init_sysfs();
L
Linus Torvalds 已提交
538 539 540 541 542
	using_nmi = 1;
	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
	return 0;
}

543
void op_nmi_exit(void)
L
Linus Torvalds 已提交
544
{
545
	if (using_nmi) {
546
		exit_sysfs();
547 548 549 550
#ifdef CONFIG_SMP
		unregister_cpu_notifier(&oprofile_cpu_nb);
#endif
	}
551 552
	if (model->exit)
		model->exit();
L
Linus Torvalds 已提交
553
}