nmi_int.c 11.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/**
 * @file nmi_int.c
 *
4
 * @remark Copyright 2002-2008 OProfile authors
L
Linus Torvalds 已提交
5 6 7
 * @remark Read the file COPYING
 *
 * @author John Levon <levon@movementarian.org>
8
 * @author Robert Richter <robert.richter@amd.com>
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16
 */

#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
17
#include <linux/moduleparam.h>
18
#include <linux/kdebug.h>
19
#include <linux/cpu.h>
L
Linus Torvalds 已提交
20 21 22
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
23

L
Linus Torvalds 已提交
24 25
#include "op_counter.h"
#include "op_x86_model.h"
26

27
static struct op_x86_model_spec const *model;
28 29
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
30

L
Linus Torvalds 已提交
31 32 33
/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

34 35
static int profile_exceptions_notify(struct notifier_block *self,
				     unsigned long val, void *data)
L
Linus Torvalds 已提交
36
{
37 38 39 40
	struct die_args *args = (struct die_args *)data;
	int ret = NOTIFY_DONE;
	int cpu = smp_processor_id();

41
	switch (val) {
42
	case DIE_NMI:
43
		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
44 45 46 47 48 49
			ret = NOTIFY_STOP;
		break;
	default:
		break;
	}
	return ret;
L
Linus Torvalds 已提交
50
}
51

52
static void nmi_cpu_save_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
53 54
{
	unsigned int const nr_ctrs = model->num_counters;
55 56 57
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
58 59 60
	unsigned int i;

	for (i = 0; i < nr_ctrs; ++i) {
61
		if (counters[i].addr) {
62 63 64 65
			rdmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
66
	}
67

L
Linus Torvalds 已提交
68
	for (i = 0; i < nr_ctrls; ++i) {
69
		if (controls[i].addr) {
70 71 72 73
			rdmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
74 75 76
	}
}

77
static void nmi_save_registers(void *dummy)
L
Linus Torvalds 已提交
78 79
{
	int cpu = smp_processor_id();
80
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
81 82 83 84 85 86
	nmi_cpu_save_registers(msrs);
}

static void free_msrs(void)
{
	int i;
87
	for_each_possible_cpu(i) {
88 89 90 91
		kfree(per_cpu(cpu_msrs, i).counters);
		per_cpu(cpu_msrs, i).counters = NULL;
		kfree(per_cpu(cpu_msrs, i).controls);
		per_cpu(cpu_msrs, i).controls = NULL;
L
Linus Torvalds 已提交
92 93 94 95 96
	}
}

static int allocate_msrs(void)
{
97
	int success = 1;
L
Linus Torvalds 已提交
98 99 100
	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
	size_t counters_size = sizeof(struct op_msr) * model->num_counters;

101
	int i;
C
Chris Wright 已提交
102
	for_each_possible_cpu(i) {
103 104 105
		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).counters) {
L
Linus Torvalds 已提交
106 107 108
			success = 0;
			break;
		}
109 110
		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
								GFP_KERNEL);
111
		if (!per_cpu(cpu_msrs, i).controls) {
L
Linus Torvalds 已提交
112 113 114 115 116 117 118 119 120 121 122
			success = 0;
			break;
		}
	}

	if (!success)
		free_msrs();

	return success;
}

123
static void nmi_cpu_setup(void *dummy)
L
Linus Torvalds 已提交
124 125
{
	int cpu = smp_processor_id();
126
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
127 128 129
	spin_lock(&oprofilefs_lock);
	model->setup_ctrs(msrs);
	spin_unlock(&oprofilefs_lock);
130
	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
L
Linus Torvalds 已提交
131 132 133
	apic_write(APIC_LVTPC, APIC_DM_NMI);
}

134 135 136 137 138
static struct notifier_block profile_exceptions_nb = {
	.notifier_call = profile_exceptions_notify,
	.next = NULL,
	.priority = 0
};
L
Linus Torvalds 已提交
139 140 141

static int nmi_setup(void)
{
142
	int err = 0;
143
	int cpu;
144

L
Linus Torvalds 已提交
145 146 147
	if (!allocate_msrs())
		return -ENOMEM;

148 149
	err = register_die_notifier(&profile_exceptions_nb);
	if (err) {
L
Linus Torvalds 已提交
150
		free_msrs();
151
		return err;
L
Linus Torvalds 已提交
152
	}
153

154
	/* We need to serialize save and setup for HT because the subset
L
Linus Torvalds 已提交
155 156
	 * of msrs are distinct for save and setup operations
	 */
157 158

	/* Assume saved/restored counters are the same on all CPUs */
159
	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
160
	for_each_possible_cpu(cpu) {
C
Chris Wright 已提交
161
		if (cpu != 0) {
162 163
			memcpy(per_cpu(cpu_msrs, cpu).counters,
				per_cpu(cpu_msrs, 0).counters,
C
Chris Wright 已提交
164 165
				sizeof(struct op_msr) * model->num_counters);

166 167
			memcpy(per_cpu(cpu_msrs, cpu).controls,
				per_cpu(cpu_msrs, 0).controls,
C
Chris Wright 已提交
168 169
				sizeof(struct op_msr) * model->num_controls);
		}
170

171
	}
172 173
	on_each_cpu(nmi_save_registers, NULL, 1);
	on_each_cpu(nmi_cpu_setup, NULL, 1);
L
Linus Torvalds 已提交
174 175 176 177
	nmi_enabled = 1;
	return 0;
}

178
static void nmi_restore_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
179 180
{
	unsigned int const nr_ctrs = model->num_counters;
181 182 183
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
184 185 186
	unsigned int i;

	for (i = 0; i < nr_ctrls; ++i) {
187
		if (controls[i].addr) {
188 189 190 191
			wrmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
192
	}
193

L
Linus Torvalds 已提交
194
	for (i = 0; i < nr_ctrs; ++i) {
195
		if (counters[i].addr) {
196 197 198 199
			wrmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
200 201 202
	}
}

203
static void nmi_cpu_shutdown(void *dummy)
L
Linus Torvalds 已提交
204 205 206
{
	unsigned int v;
	int cpu = smp_processor_id();
207
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
208

L
Linus Torvalds 已提交
209 210 211 212 213 214 215
	/* restoring APIC_LVTPC can trigger an apic error because the delivery
	 * mode and vector nr combination can be illegal. That's by design: on
	 * power on apic lvt contain a zero vector nr which are legal only for
	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
	 */
	v = apic_read(APIC_LVTERR);
	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
216
	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
L
Linus Torvalds 已提交
217
	apic_write(APIC_LVTERR, v);
218
	nmi_restore_registers(msrs);
L
Linus Torvalds 已提交
219 220 221 222
}

static void nmi_shutdown(void)
{
223 224
	struct op_msrs *msrs;

L
Linus Torvalds 已提交
225
	nmi_enabled = 0;
226
	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
227
	unregister_die_notifier(&profile_exceptions_nb);
228
	msrs = &get_cpu_var(cpu_msrs);
229
	model->shutdown(msrs);
L
Linus Torvalds 已提交
230
	free_msrs();
231
	put_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
232 233
}

234
static void nmi_cpu_start(void *dummy)
L
Linus Torvalds 已提交
235
{
236
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
237 238 239 240 241
	model->start(msrs);
}

static int nmi_start(void)
{
242
	on_each_cpu(nmi_cpu_start, NULL, 1);
L
Linus Torvalds 已提交
243 244
	return 0;
}
245 246

static void nmi_cpu_stop(void *dummy)
L
Linus Torvalds 已提交
247
{
248
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
249 250
	model->stop(msrs);
}
251

L
Linus Torvalds 已提交
252 253
static void nmi_stop(void)
{
254
	on_each_cpu(nmi_cpu_stop, NULL, 1);
L
Linus Torvalds 已提交
255 256 257 258
}

struct op_counter_config counter_config[OP_MAX_COUNTER];

259
static int nmi_create_files(struct super_block *sb, struct dentry *root)
L
Linus Torvalds 已提交
260 261 262 263
{
	unsigned int i;

	for (i = 0; i < model->num_counters; ++i) {
264
		struct dentry *dir;
265
		char buf[4];
266 267

		/* quick little hack to _not_ expose a counter if it is not
268 269 270 271 272 273 274
		 * available for use.  This should protect userspace app.
		 * NOTE:  assumes 1:1 mapping here (that counters are organized
		 *        sequentially in their struct assignment).
		 */
		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
			continue;

275
		snprintf(buf,  sizeof(buf), "%d", i);
L
Linus Torvalds 已提交
276
		dir = oprofilefs_mkdir(sb, root, buf);
277 278 279 280 281 282
		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
L
Linus Torvalds 已提交
283 284 285 286
	}

	return 0;
}
287

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
#ifdef CONFIG_SMP
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
				 void *data)
{
	int cpu = (unsigned long)data;
	switch (action) {
	case CPU_DOWN_FAILED:
	case CPU_ONLINE:
		smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
		break;
	case CPU_DOWN_PREPARE:
		smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
		break;
	}
	return NOTIFY_DONE;
}

static struct notifier_block oprofile_cpu_nb = {
	.notifier_call = oprofile_cpu_notifier
};
#endif

#ifdef CONFIG_PM

static int nmi_suspend(struct sys_device *dev, pm_message_t state)
{
	/* Only one CPU left, just stop that one */
	if (nmi_enabled == 1)
		nmi_cpu_stop(NULL);
	return 0;
}

static int nmi_resume(struct sys_device *dev)
{
	if (nmi_enabled == 1)
		nmi_cpu_start(NULL);
	return 0;
}

static struct sysdev_class oprofile_sysclass = {
	.name		= "oprofile",
	.resume		= nmi_resume,
	.suspend	= nmi_suspend,
};

static struct sys_device device_oprofile = {
	.id	= 0,
	.cls	= &oprofile_sysclass,
};

static int __init init_sysfs(void)
{
	int error;

	error = sysdev_class_register(&oprofile_sysclass);
	if (!error)
		error = sysdev_register(&device_oprofile);
	return error;
}

static void exit_sysfs(void)
{
	sysdev_unregister(&device_oprofile);
	sysdev_class_unregister(&oprofile_sysclass);
}

#else
#define init_sysfs() do { } while (0)
#define exit_sysfs() do { } while (0)
#endif /* CONFIG_PM */

359
static int __init p4_init(char **cpu_type)
L
Linus Torvalds 已提交
360 361 362
{
	__u8 cpu_model = boot_cpu_data.x86_model;

363
	if (cpu_model > 6 || cpu_model == 5)
L
Linus Torvalds 已提交
364 365 366 367 368 369 370 371
		return 0;

#ifndef CONFIG_SMP
	*cpu_type = "i386/p4";
	model = &op_p4_spec;
	return 1;
#else
	switch (smp_num_siblings) {
372 373 374 375 376 377 378 379 380
	case 1:
		*cpu_type = "i386/p4";
		model = &op_p4_spec;
		return 1;

	case 2:
		*cpu_type = "i386/p4-ht";
		model = &op_p4_ht2_spec;
		return 1;
L
Linus Torvalds 已提交
381 382 383 384 385 386 387 388
	}
#endif

	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
	return 0;
}

389 390 391 392 393 394 395 396 397 398 399
static int force_arch_perfmon;
static int force_cpu_type(const char *str, struct kernel_param *kp)
{
	if (!strcmp(str, "archperfmon")) {
		force_arch_perfmon = 1;
		printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
	}

	return 0;
}
module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
400

401
static int __init ppro_init(char **cpu_type)
L
Linus Torvalds 已提交
402 403 404
{
	__u8 cpu_model = boot_cpu_data.x86_model;

405 406 407
	if (force_arch_perfmon && cpu_has_arch_perfmon)
		return 0;

408 409 410 411 412 413 414 415
	switch (cpu_model) {
	case 0 ... 2:
		*cpu_type = "i386/ppro";
		break;
	case 3 ... 5:
		*cpu_type = "i386/pii";
		break;
	case 6 ... 8:
416
	case 10 ... 11:
417 418 419
		*cpu_type = "i386/piii";
		break;
	case 9:
420
	case 13:
421 422 423
		*cpu_type = "i386/p6_mobile";
		break;
	case 14:
424
		*cpu_type = "i386/core";
425 426 427 428
		break;
	case 15: case 23:
		*cpu_type = "i386/core_2";
		break;
429
	case 26:
430
		model = &op_arch_perfmon_spec;
431 432 433 434 435
		*cpu_type = "i386/core_i7";
		break;
	case 28:
		*cpu_type = "i386/atom";
		break;
436 437
	default:
		/* Unknown */
L
Linus Torvalds 已提交
438 439 440 441 442 443 444
		return 0;
	}

	model = &op_ppro_spec;
	return 1;
}

445
/* in order to get sysfs right */
L
Linus Torvalds 已提交
446 447
static int using_nmi;

448
int __init op_nmi_init(struct oprofile_operations *ops)
L
Linus Torvalds 已提交
449 450 451
{
	__u8 vendor = boot_cpu_data.x86_vendor;
	__u8 family = boot_cpu_data.x86;
452
	char *cpu_type = NULL;
453
	int ret = 0;
L
Linus Torvalds 已提交
454 455 456

	if (!cpu_has_apic)
		return -ENODEV;
457

L
Linus Torvalds 已提交
458
	switch (vendor) {
459 460
	case X86_VENDOR_AMD:
		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
L
Linus Torvalds 已提交
461

462 463 464 465
		switch (family) {
		default:
			return -ENODEV;
		case 6:
466
			model = &op_amd_spec;
467 468 469
			cpu_type = "i386/athlon";
			break;
		case 0xf:
470
			model = &op_amd_spec;
471 472 473 474 475
			/* Actually it could be i386/hammer too, but give
			 user space an consistent name. */
			cpu_type = "x86-64/hammer";
			break;
		case 0x10:
476
			model = &op_amd_spec;
477 478
			cpu_type = "x86-64/family10";
			break;
479
		case 0x11:
480
			model = &op_amd_spec;
481 482
			cpu_type = "x86-64/family11h";
			break;
483 484 485 486 487 488 489
		}
		break;

	case X86_VENDOR_INTEL:
		switch (family) {
			/* Pentium IV */
		case 0xf:
490
			p4_init(&cpu_type);
L
Linus Torvalds 已提交
491
			break;
492 493 494

			/* A P6-class processor */
		case 6:
495
			ppro_init(&cpu_type);
L
Linus Torvalds 已提交
496 497 498
			break;

		default:
499
			break;
500
		}
501

502 503 504 505
		if (cpu_type)
			break;

		if (!cpu_has_arch_perfmon)
506
			return -ENODEV;
507 508 509 510

		/* use arch perfmon as fallback */
		cpu_type = "i386/arch_perfmon";
		model = &op_arch_perfmon_spec;
511 512 513 514
		break;

	default:
		return -ENODEV;
L
Linus Torvalds 已提交
515 516
	}

517 518 519
#ifdef CONFIG_SMP
	register_cpu_notifier(&oprofile_cpu_nb);
#endif
520 521 522 523 524 525 526 527
	/* default values, can be overwritten by model */
	ops->create_files = nmi_create_files;
	ops->setup = nmi_setup;
	ops->shutdown = nmi_shutdown;
	ops->start = nmi_start;
	ops->stop = nmi_stop;
	ops->cpu_type = cpu_type;

528 529 530 531 532
	if (model->init)
		ret = model->init(ops);
	if (ret)
		return ret;

533
	init_sysfs();
L
Linus Torvalds 已提交
534 535 536 537 538
	using_nmi = 1;
	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
	return 0;
}

539
void op_nmi_exit(void)
L
Linus Torvalds 已提交
540
{
541
	if (using_nmi) {
542
		exit_sysfs();
543 544 545 546
#ifdef CONFIG_SMP
		unregister_cpu_notifier(&oprofile_cpu_nb);
#endif
	}
547 548
	if (model->exit)
		model->exit();
L
Linus Torvalds 已提交
549
}