nmi_int.c 11.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/**
 * @file nmi_int.c
 *
4
 * @remark Copyright 2002-2008 OProfile authors
L
Linus Torvalds 已提交
5 6 7
 * @remark Read the file COPYING
 *
 * @author John Levon <levon@movementarian.org>
8
 * @author Robert Richter <robert.richter@amd.com>
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16
 */

#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
17
#include <linux/moduleparam.h>
18
#include <linux/kdebug.h>
19
#include <linux/cpu.h>
L
Linus Torvalds 已提交
20 21 22
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
23

L
Linus Torvalds 已提交
24 25
#include "op_counter.h"
#include "op_x86_model.h"
26

27
static struct op_x86_model_spec const *model;
28 29
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
30

L
Linus Torvalds 已提交
31 32 33
/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

34 35
static int profile_exceptions_notify(struct notifier_block *self,
				     unsigned long val, void *data)
L
Linus Torvalds 已提交
36
{
37 38 39 40
	struct die_args *args = (struct die_args *)data;
	int ret = NOTIFY_DONE;
	int cpu = smp_processor_id();

41
	switch (val) {
42
	case DIE_NMI:
43
		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
44 45 46 47 48 49
			ret = NOTIFY_STOP;
		break;
	default:
		break;
	}
	return ret;
L
Linus Torvalds 已提交
50
}
51

52
static void nmi_cpu_save_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
53 54
{
	unsigned int const nr_ctrs = model->num_counters;
55 56 57
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
58 59 60
	unsigned int i;

	for (i = 0; i < nr_ctrs; ++i) {
61
		if (counters[i].addr) {
62 63 64 65
			rdmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
66
	}
67

L
Linus Torvalds 已提交
68
	for (i = 0; i < nr_ctrls; ++i) {
69
		if (controls[i].addr) {
70 71 72 73
			rdmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
74 75 76
	}
}

77
static void nmi_save_registers(void *dummy)
L
Linus Torvalds 已提交
78 79
{
	int cpu = smp_processor_id();
80
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
81 82 83 84 85 86
	nmi_cpu_save_registers(msrs);
}

static void free_msrs(void)
{
	int i;
87
	for_each_possible_cpu(i) {
88 89 90 91
		kfree(per_cpu(cpu_msrs, i).counters);
		per_cpu(cpu_msrs, i).counters = NULL;
		kfree(per_cpu(cpu_msrs, i).controls);
		per_cpu(cpu_msrs, i).controls = NULL;
L
Linus Torvalds 已提交
92 93 94 95 96
	}
}

static int allocate_msrs(void)
{
97
	int success = 1;
L
Linus Torvalds 已提交
98 99 100
	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
	size_t counters_size = sizeof(struct op_msr) * model->num_counters;

101
	int i;
C
Chris Wright 已提交
102
	for_each_possible_cpu(i) {
103 104 105
		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).counters) {
L
Linus Torvalds 已提交
106 107 108
			success = 0;
			break;
		}
109 110
		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
								GFP_KERNEL);
111
		if (!per_cpu(cpu_msrs, i).controls) {
L
Linus Torvalds 已提交
112 113 114 115 116 117 118 119 120 121 122
			success = 0;
			break;
		}
	}

	if (!success)
		free_msrs();

	return success;
}

123
static void nmi_cpu_setup(void *dummy)
L
Linus Torvalds 已提交
124 125
{
	int cpu = smp_processor_id();
126
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
127 128 129
	spin_lock(&oprofilefs_lock);
	model->setup_ctrs(msrs);
	spin_unlock(&oprofilefs_lock);
130
	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
L
Linus Torvalds 已提交
131 132 133
	apic_write(APIC_LVTPC, APIC_DM_NMI);
}

134 135 136 137 138
static struct notifier_block profile_exceptions_nb = {
	.notifier_call = profile_exceptions_notify,
	.next = NULL,
	.priority = 0
};
L
Linus Torvalds 已提交
139 140 141

static int nmi_setup(void)
{
142
	int err = 0;
143
	int cpu;
144

L
Linus Torvalds 已提交
145 146 147
	if (!allocate_msrs())
		return -ENOMEM;

148 149
	err = register_die_notifier(&profile_exceptions_nb);
	if (err) {
L
Linus Torvalds 已提交
150
		free_msrs();
151
		return err;
L
Linus Torvalds 已提交
152
	}
153

154
	/* We need to serialize save and setup for HT because the subset
L
Linus Torvalds 已提交
155 156
	 * of msrs are distinct for save and setup operations
	 */
157 158

	/* Assume saved/restored counters are the same on all CPUs */
159
	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
160
	for_each_possible_cpu(cpu) {
C
Chris Wright 已提交
161
		if (cpu != 0) {
162 163
			memcpy(per_cpu(cpu_msrs, cpu).counters,
				per_cpu(cpu_msrs, 0).counters,
C
Chris Wright 已提交
164 165
				sizeof(struct op_msr) * model->num_counters);

166 167
			memcpy(per_cpu(cpu_msrs, cpu).controls,
				per_cpu(cpu_msrs, 0).controls,
C
Chris Wright 已提交
168 169
				sizeof(struct op_msr) * model->num_controls);
		}
170

171
	}
172 173
	on_each_cpu(nmi_save_registers, NULL, 1);
	on_each_cpu(nmi_cpu_setup, NULL, 1);
L
Linus Torvalds 已提交
174 175 176 177
	nmi_enabled = 1;
	return 0;
}

178
static void nmi_restore_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
179 180
{
	unsigned int const nr_ctrs = model->num_counters;
181 182 183
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
184 185 186
	unsigned int i;

	for (i = 0; i < nr_ctrls; ++i) {
187
		if (controls[i].addr) {
188 189 190 191
			wrmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
192
	}
193

L
Linus Torvalds 已提交
194
	for (i = 0; i < nr_ctrs; ++i) {
195
		if (counters[i].addr) {
196 197 198 199
			wrmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
200 201 202
	}
}

203
static void nmi_cpu_shutdown(void *dummy)
L
Linus Torvalds 已提交
204 205 206
{
	unsigned int v;
	int cpu = smp_processor_id();
207
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
208

L
Linus Torvalds 已提交
209 210 211 212 213 214 215
	/* restoring APIC_LVTPC can trigger an apic error because the delivery
	 * mode and vector nr combination can be illegal. That's by design: on
	 * power on apic lvt contain a zero vector nr which are legal only for
	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
	 */
	v = apic_read(APIC_LVTERR);
	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
216
	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
L
Linus Torvalds 已提交
217
	apic_write(APIC_LVTERR, v);
218
	nmi_restore_registers(msrs);
L
Linus Torvalds 已提交
219 220 221 222
}

static void nmi_shutdown(void)
{
223 224
	struct op_msrs *msrs;

L
Linus Torvalds 已提交
225
	nmi_enabled = 0;
226
	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
227
	unregister_die_notifier(&profile_exceptions_nb);
228
	msrs = &get_cpu_var(cpu_msrs);
229
	model->shutdown(msrs);
L
Linus Torvalds 已提交
230
	free_msrs();
231
	put_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
232 233
}

234
static void nmi_cpu_start(void *dummy)
L
Linus Torvalds 已提交
235
{
236
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
237 238 239 240 241
	model->start(msrs);
}

static int nmi_start(void)
{
242
	on_each_cpu(nmi_cpu_start, NULL, 1);
L
Linus Torvalds 已提交
243 244
	return 0;
}
245 246

static void nmi_cpu_stop(void *dummy)
L
Linus Torvalds 已提交
247
{
248
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
249 250
	model->stop(msrs);
}
251

L
Linus Torvalds 已提交
252 253
static void nmi_stop(void)
{
254
	on_each_cpu(nmi_cpu_stop, NULL, 1);
L
Linus Torvalds 已提交
255 256 257 258
}

struct op_counter_config counter_config[OP_MAX_COUNTER];

259
static int nmi_create_files(struct super_block *sb, struct dentry *root)
L
Linus Torvalds 已提交
260 261 262 263
{
	unsigned int i;

	for (i = 0; i < model->num_counters; ++i) {
264
		struct dentry *dir;
265
		char buf[4];
266 267

		/* quick little hack to _not_ expose a counter if it is not
268 269 270 271 272 273 274
		 * available for use.  This should protect userspace app.
		 * NOTE:  assumes 1:1 mapping here (that counters are organized
		 *        sequentially in their struct assignment).
		 */
		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
			continue;

275
		snprintf(buf,  sizeof(buf), "%d", i);
L
Linus Torvalds 已提交
276
		dir = oprofilefs_mkdir(sb, root, buf);
277 278 279 280 281 282
		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
L
Linus Torvalds 已提交
283 284 285 286
	}

	return 0;
}
287

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
#ifdef CONFIG_SMP
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
				 void *data)
{
	int cpu = (unsigned long)data;
	switch (action) {
	case CPU_DOWN_FAILED:
	case CPU_ONLINE:
		smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
		break;
	case CPU_DOWN_PREPARE:
		smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
		break;
	}
	return NOTIFY_DONE;
}

static struct notifier_block oprofile_cpu_nb = {
	.notifier_call = oprofile_cpu_notifier
};
#endif

#ifdef CONFIG_PM

static int nmi_suspend(struct sys_device *dev, pm_message_t state)
{
	/* Only one CPU left, just stop that one */
	if (nmi_enabled == 1)
		nmi_cpu_stop(NULL);
	return 0;
}

static int nmi_resume(struct sys_device *dev)
{
	if (nmi_enabled == 1)
		nmi_cpu_start(NULL);
	return 0;
}

static struct sysdev_class oprofile_sysclass = {
	.name		= "oprofile",
	.resume		= nmi_resume,
	.suspend	= nmi_suspend,
};

static struct sys_device device_oprofile = {
	.id	= 0,
	.cls	= &oprofile_sysclass,
};

static int __init init_sysfs(void)
{
	int error;

	error = sysdev_class_register(&oprofile_sysclass);
	if (!error)
		error = sysdev_register(&device_oprofile);
	return error;
}

static void exit_sysfs(void)
{
	sysdev_unregister(&device_oprofile);
	sysdev_class_unregister(&oprofile_sysclass);
}

#else
#define init_sysfs() do { } while (0)
#define exit_sysfs() do { } while (0)
#endif /* CONFIG_PM */

359 360
static int p4force;
module_param(p4force, int, 0);
361 362

static int __init p4_init(char **cpu_type)
L
Linus Torvalds 已提交
363 364 365
{
	__u8 cpu_model = boot_cpu_data.x86_model;

366
	if (!p4force && (cpu_model > 6 || cpu_model == 5))
L
Linus Torvalds 已提交
367 368 369 370 371 372 373 374
		return 0;

#ifndef CONFIG_SMP
	*cpu_type = "i386/p4";
	model = &op_p4_spec;
	return 1;
#else
	switch (smp_num_siblings) {
375 376 377 378 379 380 381 382 383
	case 1:
		*cpu_type = "i386/p4";
		model = &op_p4_spec;
		return 1;

	case 2:
		*cpu_type = "i386/p4-ht";
		model = &op_p4_ht2_spec;
		return 1;
L
Linus Torvalds 已提交
384 385 386 387 388 389 390 391
	}
#endif

	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
	return 0;
}

392 393 394
int force_arch_perfmon;
module_param(force_arch_perfmon, int, 0);

395
static int __init ppro_init(char **cpu_type)
L
Linus Torvalds 已提交
396 397 398
{
	__u8 cpu_model = boot_cpu_data.x86_model;

399 400 401
	if (force_arch_perfmon && cpu_has_arch_perfmon)
		return 0;

402 403 404 405 406 407 408 409
	switch (cpu_model) {
	case 0 ... 2:
		*cpu_type = "i386/ppro";
		break;
	case 3 ... 5:
		*cpu_type = "i386/pii";
		break;
	case 6 ... 8:
410
	case 10 ... 11:
411 412 413
		*cpu_type = "i386/piii";
		break;
	case 9:
414
	case 13:
415 416 417
		*cpu_type = "i386/p6_mobile";
		break;
	case 14:
418
		*cpu_type = "i386/core";
419 420 421 422 423 424
		break;
	case 15: case 23:
		*cpu_type = "i386/core_2";
		break;
	default:
		/* Unknown */
L
Linus Torvalds 已提交
425 426 427 428 429 430 431
		return 0;
	}

	model = &op_ppro_spec;
	return 1;
}

432 433 434 435 436 437 438 439 440 441
static int __init arch_perfmon_init(char **cpu_type)
{
	if (!cpu_has_arch_perfmon)
		return 0;
	*cpu_type = "i386/arch_perfmon";
	model = &op_arch_perfmon_spec;
	arch_perfmon_setup_counters();
	return 1;
}

442
/* in order to get sysfs right */
L
Linus Torvalds 已提交
443 444
static int using_nmi;

445
int __init op_nmi_init(struct oprofile_operations *ops)
L
Linus Torvalds 已提交
446 447 448
{
	__u8 vendor = boot_cpu_data.x86_vendor;
	__u8 family = boot_cpu_data.x86;
449
	char *cpu_type = NULL;
450
	int ret = 0;
L
Linus Torvalds 已提交
451 452 453

	if (!cpu_has_apic)
		return -ENODEV;
454

L
Linus Torvalds 已提交
455
	switch (vendor) {
456 457
	case X86_VENDOR_AMD:
		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
L
Linus Torvalds 已提交
458

459 460 461 462
		switch (family) {
		default:
			return -ENODEV;
		case 6:
463
			model = &op_amd_spec;
464 465 466
			cpu_type = "i386/athlon";
			break;
		case 0xf:
467
			model = &op_amd_spec;
468 469 470 471 472
			/* Actually it could be i386/hammer too, but give
			 user space an consistent name. */
			cpu_type = "x86-64/hammer";
			break;
		case 0x10:
473
			model = &op_amd_spec;
474 475
			cpu_type = "x86-64/family10";
			break;
476
		case 0x11:
477
			model = &op_amd_spec;
478 479
			cpu_type = "x86-64/family11h";
			break;
480 481 482 483 484 485 486
		}
		break;

	case X86_VENDOR_INTEL:
		switch (family) {
			/* Pentium IV */
		case 0xf:
487
			p4_init(&cpu_type);
L
Linus Torvalds 已提交
488
			break;
489 490 491

			/* A P6-class processor */
		case 6:
492
			ppro_init(&cpu_type);
L
Linus Torvalds 已提交
493 494 495
			break;

		default:
496
			break;
497
		}
498 499 500

		if (!cpu_type && !arch_perfmon_init(&cpu_type))
			return -ENODEV;
501 502 503 504
		break;

	default:
		return -ENODEV;
L
Linus Torvalds 已提交
505 506
	}

507 508 509
#ifdef CONFIG_SMP
	register_cpu_notifier(&oprofile_cpu_nb);
#endif
510 511 512 513 514 515 516 517
	/* default values, can be overwritten by model */
	ops->create_files = nmi_create_files;
	ops->setup = nmi_setup;
	ops->shutdown = nmi_shutdown;
	ops->start = nmi_start;
	ops->stop = nmi_stop;
	ops->cpu_type = cpu_type;

518 519 520 521 522
	if (model->init)
		ret = model->init(ops);
	if (ret)
		return ret;

523
	init_sysfs();
L
Linus Torvalds 已提交
524 525 526 527 528
	using_nmi = 1;
	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
	return 0;
}

529
void op_nmi_exit(void)
L
Linus Torvalds 已提交
530
{
531
	if (using_nmi) {
532
		exit_sysfs();
533 534 535 536
#ifdef CONFIG_SMP
		unregister_cpu_notifier(&oprofile_cpu_nb);
#endif
	}
537 538
	if (model->exit)
		model->exit();
L
Linus Torvalds 已提交
539
}