nmi_int.c 11.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/**
 * @file nmi_int.c
 *
4
 * @remark Copyright 2002-2008 OProfile authors
L
Linus Torvalds 已提交
5 6 7
 * @remark Read the file COPYING
 *
 * @author John Levon <levon@movementarian.org>
8
 * @author Robert Richter <robert.richter@amd.com>
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16
 */

#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
17
#include <linux/moduleparam.h>
18
#include <linux/kdebug.h>
19
#include <linux/cpu.h>
L
Linus Torvalds 已提交
20 21 22
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
23

L
Linus Torvalds 已提交
24 25
#include "op_counter.h"
#include "op_x86_model.h"
26

27
static struct op_x86_model_spec const *model;
28 29
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
30

L
Linus Torvalds 已提交
31 32 33
/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

34 35
static int profile_exceptions_notify(struct notifier_block *self,
				     unsigned long val, void *data)
L
Linus Torvalds 已提交
36
{
37 38 39 40
	struct die_args *args = (struct die_args *)data;
	int ret = NOTIFY_DONE;
	int cpu = smp_processor_id();

41
	switch (val) {
42
	case DIE_NMI:
43
		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
44 45 46 47 48 49
			ret = NOTIFY_STOP;
		break;
	default:
		break;
	}
	return ret;
L
Linus Torvalds 已提交
50
}
51

52
static void nmi_cpu_save_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
53 54
{
	unsigned int const nr_ctrs = model->num_counters;
55 56 57
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
58 59 60
	unsigned int i;

	for (i = 0; i < nr_ctrs; ++i) {
61
		if (counters[i].addr) {
62 63 64 65
			rdmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
66
	}
67

L
Linus Torvalds 已提交
68
	for (i = 0; i < nr_ctrls; ++i) {
69
		if (controls[i].addr) {
70 71 72 73
			rdmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
74 75 76
	}
}

77
static void nmi_save_registers(void *dummy)
L
Linus Torvalds 已提交
78 79
{
	int cpu = smp_processor_id();
80
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
81 82 83 84 85 86
	nmi_cpu_save_registers(msrs);
}

static void free_msrs(void)
{
	int i;
87
	for_each_possible_cpu(i) {
88 89 90 91
		kfree(per_cpu(cpu_msrs, i).counters);
		per_cpu(cpu_msrs, i).counters = NULL;
		kfree(per_cpu(cpu_msrs, i).controls);
		per_cpu(cpu_msrs, i).controls = NULL;
L
Linus Torvalds 已提交
92 93 94 95 96
	}
}

static int allocate_msrs(void)
{
97
	int success = 1;
L
Linus Torvalds 已提交
98 99 100
	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
	size_t counters_size = sizeof(struct op_msr) * model->num_counters;

101
	int i;
C
Chris Wright 已提交
102
	for_each_possible_cpu(i) {
103 104 105
		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).counters) {
L
Linus Torvalds 已提交
106 107 108
			success = 0;
			break;
		}
109 110
		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
								GFP_KERNEL);
111
		if (!per_cpu(cpu_msrs, i).controls) {
L
Linus Torvalds 已提交
112 113 114 115 116 117 118 119 120 121 122
			success = 0;
			break;
		}
	}

	if (!success)
		free_msrs();

	return success;
}

123
static void nmi_cpu_setup(void *dummy)
L
Linus Torvalds 已提交
124 125
{
	int cpu = smp_processor_id();
126
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
127 128 129
	spin_lock(&oprofilefs_lock);
	model->setup_ctrs(msrs);
	spin_unlock(&oprofilefs_lock);
130
	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
L
Linus Torvalds 已提交
131 132 133
	apic_write(APIC_LVTPC, APIC_DM_NMI);
}

134 135 136 137 138
static struct notifier_block profile_exceptions_nb = {
	.notifier_call = profile_exceptions_notify,
	.next = NULL,
	.priority = 0
};
L
Linus Torvalds 已提交
139 140 141

static int nmi_setup(void)
{
142
	int err = 0;
143
	int cpu;
144

L
Linus Torvalds 已提交
145 146 147
	if (!allocate_msrs())
		return -ENOMEM;

148 149
	err = register_die_notifier(&profile_exceptions_nb);
	if (err) {
L
Linus Torvalds 已提交
150
		free_msrs();
151
		return err;
L
Linus Torvalds 已提交
152
	}
153

154
	/* We need to serialize save and setup for HT because the subset
L
Linus Torvalds 已提交
155 156
	 * of msrs are distinct for save and setup operations
	 */
157 158

	/* Assume saved/restored counters are the same on all CPUs */
159
	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
160
	for_each_possible_cpu(cpu) {
C
Chris Wright 已提交
161
		if (cpu != 0) {
162 163
			memcpy(per_cpu(cpu_msrs, cpu).counters,
				per_cpu(cpu_msrs, 0).counters,
C
Chris Wright 已提交
164 165
				sizeof(struct op_msr) * model->num_counters);

166 167
			memcpy(per_cpu(cpu_msrs, cpu).controls,
				per_cpu(cpu_msrs, 0).controls,
C
Chris Wright 已提交
168 169
				sizeof(struct op_msr) * model->num_controls);
		}
170

171
	}
172 173
	on_each_cpu(nmi_save_registers, NULL, 1);
	on_each_cpu(nmi_cpu_setup, NULL, 1);
L
Linus Torvalds 已提交
174 175 176 177
	nmi_enabled = 1;
	return 0;
}

178
static void nmi_restore_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
179 180
{
	unsigned int const nr_ctrs = model->num_counters;
181 182 183
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
184 185 186
	unsigned int i;

	for (i = 0; i < nr_ctrls; ++i) {
187
		if (controls[i].addr) {
188 189 190 191
			wrmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
192
	}
193

L
Linus Torvalds 已提交
194
	for (i = 0; i < nr_ctrs; ++i) {
195
		if (counters[i].addr) {
196 197 198 199
			wrmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
200 201 202
	}
}

203
static void nmi_cpu_shutdown(void *dummy)
L
Linus Torvalds 已提交
204 205 206
{
	unsigned int v;
	int cpu = smp_processor_id();
207
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
208

L
Linus Torvalds 已提交
209 210 211 212 213 214 215
	/* restoring APIC_LVTPC can trigger an apic error because the delivery
	 * mode and vector nr combination can be illegal. That's by design: on
	 * power on apic lvt contain a zero vector nr which are legal only for
	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
	 */
	v = apic_read(APIC_LVTERR);
	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
216
	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
L
Linus Torvalds 已提交
217
	apic_write(APIC_LVTERR, v);
218
	nmi_restore_registers(msrs);
L
Linus Torvalds 已提交
219 220 221 222
}

static void nmi_shutdown(void)
{
223 224
	struct op_msrs *msrs;

L
Linus Torvalds 已提交
225
	nmi_enabled = 0;
226
	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
227
	unregister_die_notifier(&profile_exceptions_nb);
228
	msrs = &get_cpu_var(cpu_msrs);
229
	model->shutdown(msrs);
L
Linus Torvalds 已提交
230
	free_msrs();
231
	put_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
232 233
}

234
static void nmi_cpu_start(void *dummy)
L
Linus Torvalds 已提交
235
{
236
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
237 238 239 240 241
	model->start(msrs);
}

static int nmi_start(void)
{
242
	on_each_cpu(nmi_cpu_start, NULL, 1);
L
Linus Torvalds 已提交
243 244
	return 0;
}
245 246

static void nmi_cpu_stop(void *dummy)
L
Linus Torvalds 已提交
247
{
248
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
249 250
	model->stop(msrs);
}
251

L
Linus Torvalds 已提交
252 253
static void nmi_stop(void)
{
254
	on_each_cpu(nmi_cpu_stop, NULL, 1);
L
Linus Torvalds 已提交
255 256 257 258
}

struct op_counter_config counter_config[OP_MAX_COUNTER];

259
static int nmi_create_files(struct super_block *sb, struct dentry *root)
L
Linus Torvalds 已提交
260 261 262 263
{
	unsigned int i;

	for (i = 0; i < model->num_counters; ++i) {
264
		struct dentry *dir;
265
		char buf[4];
266 267

		/* quick little hack to _not_ expose a counter if it is not
268 269 270 271 272 273 274
		 * available for use.  This should protect userspace app.
		 * NOTE:  assumes 1:1 mapping here (that counters are organized
		 *        sequentially in their struct assignment).
		 */
		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
			continue;

275
		snprintf(buf,  sizeof(buf), "%d", i);
L
Linus Torvalds 已提交
276
		dir = oprofilefs_mkdir(sb, root, buf);
277 278 279 280 281 282
		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
L
Linus Torvalds 已提交
283 284 285 286
	}

	return 0;
}
287

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
#ifdef CONFIG_SMP
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
				 void *data)
{
	int cpu = (unsigned long)data;
	switch (action) {
	case CPU_DOWN_FAILED:
	case CPU_ONLINE:
		smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
		break;
	case CPU_DOWN_PREPARE:
		smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
		break;
	}
	return NOTIFY_DONE;
}

static struct notifier_block oprofile_cpu_nb = {
	.notifier_call = oprofile_cpu_notifier
};
#endif

#ifdef CONFIG_PM

static int nmi_suspend(struct sys_device *dev, pm_message_t state)
{
	/* Only one CPU left, just stop that one */
	if (nmi_enabled == 1)
		nmi_cpu_stop(NULL);
	return 0;
}

static int nmi_resume(struct sys_device *dev)
{
	if (nmi_enabled == 1)
		nmi_cpu_start(NULL);
	return 0;
}

static struct sysdev_class oprofile_sysclass = {
	.name		= "oprofile",
	.resume		= nmi_resume,
	.suspend	= nmi_suspend,
};

static struct sys_device device_oprofile = {
	.id	= 0,
	.cls	= &oprofile_sysclass,
};

static int __init init_sysfs(void)
{
	int error;

	error = sysdev_class_register(&oprofile_sysclass);
	if (!error)
		error = sysdev_register(&device_oprofile);
	return error;
}

static void exit_sysfs(void)
{
	sysdev_unregister(&device_oprofile);
	sysdev_class_unregister(&oprofile_sysclass);
}

#else
#define init_sysfs() do { } while (0)
#define exit_sysfs() do { } while (0)
#endif /* CONFIG_PM */

359
static int __init p4_init(char **cpu_type)
L
Linus Torvalds 已提交
360 361 362
{
	__u8 cpu_model = boot_cpu_data.x86_model;

363
	if (cpu_model > 6 || cpu_model == 5)
L
Linus Torvalds 已提交
364 365 366 367 368 369 370 371
		return 0;

#ifndef CONFIG_SMP
	*cpu_type = "i386/p4";
	model = &op_p4_spec;
	return 1;
#else
	switch (smp_num_siblings) {
372 373 374 375 376 377 378 379 380
	case 1:
		*cpu_type = "i386/p4";
		model = &op_p4_spec;
		return 1;

	case 2:
		*cpu_type = "i386/p4-ht";
		model = &op_p4_ht2_spec;
		return 1;
L
Linus Torvalds 已提交
381 382 383 384 385 386 387 388
	}
#endif

	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
	return 0;
}

389 390 391
int force_arch_perfmon;
module_param(force_arch_perfmon, int, 0);

392
static int __init ppro_init(char **cpu_type)
L
Linus Torvalds 已提交
393 394 395
{
	__u8 cpu_model = boot_cpu_data.x86_model;

396 397 398
	if (force_arch_perfmon && cpu_has_arch_perfmon)
		return 0;

399 400 401 402 403 404 405 406
	switch (cpu_model) {
	case 0 ... 2:
		*cpu_type = "i386/ppro";
		break;
	case 3 ... 5:
		*cpu_type = "i386/pii";
		break;
	case 6 ... 8:
407
	case 10 ... 11:
408 409 410
		*cpu_type = "i386/piii";
		break;
	case 9:
411
	case 13:
412 413 414
		*cpu_type = "i386/p6_mobile";
		break;
	case 14:
415
		*cpu_type = "i386/core";
416 417 418 419 420 421
		break;
	case 15: case 23:
		*cpu_type = "i386/core_2";
		break;
	default:
		/* Unknown */
L
Linus Torvalds 已提交
422 423 424 425 426 427 428
		return 0;
	}

	model = &op_ppro_spec;
	return 1;
}

429 430 431 432 433 434 435 436 437 438
static int __init arch_perfmon_init(char **cpu_type)
{
	if (!cpu_has_arch_perfmon)
		return 0;
	*cpu_type = "i386/arch_perfmon";
	model = &op_arch_perfmon_spec;
	arch_perfmon_setup_counters();
	return 1;
}

439
/* in order to get sysfs right */
L
Linus Torvalds 已提交
440 441
static int using_nmi;

442
int __init op_nmi_init(struct oprofile_operations *ops)
L
Linus Torvalds 已提交
443 444 445
{
	__u8 vendor = boot_cpu_data.x86_vendor;
	__u8 family = boot_cpu_data.x86;
446
	char *cpu_type = NULL;
447
	int ret = 0;
L
Linus Torvalds 已提交
448 449 450

	if (!cpu_has_apic)
		return -ENODEV;
451

L
Linus Torvalds 已提交
452
	switch (vendor) {
453 454
	case X86_VENDOR_AMD:
		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
L
Linus Torvalds 已提交
455

456 457 458 459
		switch (family) {
		default:
			return -ENODEV;
		case 6:
460
			model = &op_amd_spec;
461 462 463
			cpu_type = "i386/athlon";
			break;
		case 0xf:
464
			model = &op_amd_spec;
465 466 467 468 469
			/* Actually it could be i386/hammer too, but give
			 user space an consistent name. */
			cpu_type = "x86-64/hammer";
			break;
		case 0x10:
470
			model = &op_amd_spec;
471 472
			cpu_type = "x86-64/family10";
			break;
473
		case 0x11:
474
			model = &op_amd_spec;
475 476
			cpu_type = "x86-64/family11h";
			break;
477 478 479 480 481 482 483
		}
		break;

	case X86_VENDOR_INTEL:
		switch (family) {
			/* Pentium IV */
		case 0xf:
484
			p4_init(&cpu_type);
L
Linus Torvalds 已提交
485
			break;
486 487 488

			/* A P6-class processor */
		case 6:
489
			ppro_init(&cpu_type);
L
Linus Torvalds 已提交
490 491 492
			break;

		default:
493
			break;
494
		}
495 496 497

		if (!cpu_type && !arch_perfmon_init(&cpu_type))
			return -ENODEV;
498 499 500 501
		break;

	default:
		return -ENODEV;
L
Linus Torvalds 已提交
502 503
	}

504 505 506
#ifdef CONFIG_SMP
	register_cpu_notifier(&oprofile_cpu_nb);
#endif
507 508 509 510 511 512 513 514
	/* default values, can be overwritten by model */
	ops->create_files = nmi_create_files;
	ops->setup = nmi_setup;
	ops->shutdown = nmi_shutdown;
	ops->start = nmi_start;
	ops->stop = nmi_stop;
	ops->cpu_type = cpu_type;

515 516 517 518 519
	if (model->init)
		ret = model->init(ops);
	if (ret)
		return ret;

520
	init_sysfs();
L
Linus Torvalds 已提交
521 522 523 524 525
	using_nmi = 1;
	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
	return 0;
}

526
void op_nmi_exit(void)
L
Linus Torvalds 已提交
527
{
528
	if (using_nmi) {
529
		exit_sysfs();
530 531 532 533
#ifdef CONFIG_SMP
		unregister_cpu_notifier(&oprofile_cpu_nb);
#endif
	}
534 535
	if (model->exit)
		model->exit();
L
Linus Torvalds 已提交
536
}