nmi_int.c 10.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * @file nmi_int.c
 *
 * @remark Copyright 2002 OProfile authors
 * @remark Read the file COPYING
 *
 * @author John Levon <levon@movementarian.org>
 */

#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
16
#include <linux/moduleparam.h>
17
#include <linux/kdebug.h>
18
#include <linux/cpu.h>
L
Linus Torvalds 已提交
19 20 21
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
22

L
Linus Torvalds 已提交
23 24
#include "op_counter.h"
#include "op_x86_model.h"
25

26
static struct op_x86_model_spec const *model;
27 28
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
29

L
Linus Torvalds 已提交
30 31
static int nmi_start(void);
static void nmi_stop(void);
32 33
static void nmi_cpu_start(void *dummy);
static void nmi_cpu_stop(void *dummy);
L
Linus Torvalds 已提交
34 35 36 37

/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
#ifdef CONFIG_SMP
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
				 void *data)
{
	int cpu = (unsigned long)data;
	switch (action) {
	case CPU_DOWN_FAILED:
	case CPU_ONLINE:
		smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
		break;
	case CPU_DOWN_PREPARE:
		smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
		break;
	}
	return NOTIFY_DONE;
}

static struct notifier_block oprofile_cpu_nb = {
	.notifier_call = oprofile_cpu_notifier
};
#endif

L
Linus Torvalds 已提交
60 61
#ifdef CONFIG_PM

62
static int nmi_suspend(struct sys_device *dev, pm_message_t state)
L
Linus Torvalds 已提交
63
{
64
	/* Only one CPU left, just stop that one */
L
Linus Torvalds 已提交
65
	if (nmi_enabled == 1)
66
		nmi_cpu_stop(NULL);
L
Linus Torvalds 已提交
67 68 69 70 71 72
	return 0;
}

static int nmi_resume(struct sys_device *dev)
{
	if (nmi_enabled == 1)
73
		nmi_cpu_start(NULL);
L
Linus Torvalds 已提交
74 75 76 77
	return 0;
}

static struct sysdev_class oprofile_sysclass = {
78
	.name		= "oprofile",
L
Linus Torvalds 已提交
79 80 81 82 83 84 85 86 87
	.resume		= nmi_resume,
	.suspend	= nmi_suspend,
};

static struct sys_device device_oprofile = {
	.id	= 0,
	.cls	= &oprofile_sysclass,
};

88
static int __init init_sysfs(void)
L
Linus Torvalds 已提交
89 90
{
	int error;
91 92 93

	error = sysdev_class_register(&oprofile_sysclass);
	if (!error)
L
Linus Torvalds 已提交
94 95 96 97
		error = sysdev_register(&device_oprofile);
	return error;
}

98
static void exit_sysfs(void)
L
Linus Torvalds 已提交
99 100 101 102 103 104
{
	sysdev_unregister(&device_oprofile);
	sysdev_class_unregister(&oprofile_sysclass);
}

#else
105 106
#define init_sysfs() do { } while (0)
#define exit_sysfs() do { } while (0)
L
Linus Torvalds 已提交
107 108
#endif /* CONFIG_PM */

109 110
static int profile_exceptions_notify(struct notifier_block *self,
				     unsigned long val, void *data)
L
Linus Torvalds 已提交
111
{
112 113 114 115
	struct die_args *args = (struct die_args *)data;
	int ret = NOTIFY_DONE;
	int cpu = smp_processor_id();

116
	switch (val) {
117
	case DIE_NMI:
118
		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
119 120 121 122 123 124
			ret = NOTIFY_STOP;
		break;
	default:
		break;
	}
	return ret;
L
Linus Torvalds 已提交
125
}
126

127
static void nmi_cpu_save_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
128 129
{
	unsigned int const nr_ctrs = model->num_counters;
130 131 132
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
133 134 135
	unsigned int i;

	for (i = 0; i < nr_ctrs; ++i) {
136
		if (counters[i].addr) {
137 138 139 140
			rdmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
141
	}
142

L
Linus Torvalds 已提交
143
	for (i = 0; i < nr_ctrls; ++i) {
144
		if (controls[i].addr) {
145 146 147 148
			rdmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
149 150 151
	}
}

152
static void nmi_save_registers(void *dummy)
L
Linus Torvalds 已提交
153 154
{
	int cpu = smp_processor_id();
155
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
156 157 158 159 160 161
	nmi_cpu_save_registers(msrs);
}

static void free_msrs(void)
{
	int i;
162
	for_each_possible_cpu(i) {
163 164 165 166
		kfree(per_cpu(cpu_msrs, i).counters);
		per_cpu(cpu_msrs, i).counters = NULL;
		kfree(per_cpu(cpu_msrs, i).controls);
		per_cpu(cpu_msrs, i).controls = NULL;
L
Linus Torvalds 已提交
167 168 169 170 171 172 173 174 175 176
	}
}

static int allocate_msrs(void)
{
	int success = 1;
	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
	size_t counters_size = sizeof(struct op_msr) * model->num_counters;

	int i;
C
Chris Wright 已提交
177
	for_each_possible_cpu(i) {
178 179 180
		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).counters) {
L
Linus Torvalds 已提交
181 182 183
			success = 0;
			break;
		}
184 185 186
		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).controls) {
L
Linus Torvalds 已提交
187 188 189 190 191 192 193 194 195 196 197
			success = 0;
			break;
		}
	}

	if (!success)
		free_msrs();

	return success;
}

198
static void nmi_cpu_setup(void *dummy)
L
Linus Torvalds 已提交
199 200
{
	int cpu = smp_processor_id();
201
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
202 203 204
	spin_lock(&oprofilefs_lock);
	model->setup_ctrs(msrs);
	spin_unlock(&oprofilefs_lock);
205
	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
L
Linus Torvalds 已提交
206 207 208
	apic_write(APIC_LVTPC, APIC_DM_NMI);
}

209 210 211 212 213
static struct notifier_block profile_exceptions_nb = {
	.notifier_call = profile_exceptions_notify,
	.next = NULL,
	.priority = 0
};
L
Linus Torvalds 已提交
214 215 216

static int nmi_setup(void)
{
217
	int err = 0;
218
	int cpu;
219

L
Linus Torvalds 已提交
220 221 222
	if (!allocate_msrs())
		return -ENOMEM;

223 224
	err = register_die_notifier(&profile_exceptions_nb);
	if (err) {
L
Linus Torvalds 已提交
225
		free_msrs();
226
		return err;
L
Linus Torvalds 已提交
227
	}
228

L
Linus Torvalds 已提交
229 230 231
	/* We need to serialize save and setup for HT because the subset
	 * of msrs are distinct for save and setup operations
	 */
232 233

	/* Assume saved/restored counters are the same on all CPUs */
234
	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
235
	for_each_possible_cpu(cpu) {
C
Chris Wright 已提交
236
		if (cpu != 0) {
237 238
			memcpy(per_cpu(cpu_msrs, cpu).counters,
				per_cpu(cpu_msrs, 0).counters,
C
Chris Wright 已提交
239 240
				sizeof(struct op_msr) * model->num_counters);

241 242
			memcpy(per_cpu(cpu_msrs, cpu).controls,
				per_cpu(cpu_msrs, 0).controls,
C
Chris Wright 已提交
243 244 245
				sizeof(struct op_msr) * model->num_controls);
		}

246
	}
247 248
	on_each_cpu(nmi_save_registers, NULL, 1);
	on_each_cpu(nmi_cpu_setup, NULL, 1);
L
Linus Torvalds 已提交
249 250 251 252
	nmi_enabled = 1;
	return 0;
}

253
static void nmi_restore_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
254 255
{
	unsigned int const nr_ctrs = model->num_counters;
256 257 258
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
259 260 261
	unsigned int i;

	for (i = 0; i < nr_ctrls; ++i) {
262
		if (controls[i].addr) {
263 264 265 266
			wrmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
267
	}
268

L
Linus Torvalds 已提交
269
	for (i = 0; i < nr_ctrs; ++i) {
270
		if (counters[i].addr) {
271 272 273 274
			wrmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
275 276 277
	}
}

278
static void nmi_cpu_shutdown(void *dummy)
L
Linus Torvalds 已提交
279 280 281
{
	unsigned int v;
	int cpu = smp_processor_id();
282
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
283

L
Linus Torvalds 已提交
284 285 286 287 288 289 290
	/* restoring APIC_LVTPC can trigger an apic error because the delivery
	 * mode and vector nr combination can be illegal. That's by design: on
	 * power on apic lvt contain a zero vector nr which are legal only for
	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
	 */
	v = apic_read(APIC_LVTERR);
	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
291
	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
L
Linus Torvalds 已提交
292 293 294 295 296 297
	apic_write(APIC_LVTERR, v);
	nmi_restore_registers(msrs);
}

static void nmi_shutdown(void)
{
298
	struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
299
	nmi_enabled = 0;
300
	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
301
	unregister_die_notifier(&profile_exceptions_nb);
302
	model->shutdown(msrs);
L
Linus Torvalds 已提交
303
	free_msrs();
304
	put_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
305 306
}

307
static void nmi_cpu_start(void *dummy)
L
Linus Torvalds 已提交
308
{
309
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
310 311 312 313 314
	model->start(msrs);
}

static int nmi_start(void)
{
315
	on_each_cpu(nmi_cpu_start, NULL, 1);
L
Linus Torvalds 已提交
316 317
	return 0;
}
318 319

static void nmi_cpu_stop(void *dummy)
L
Linus Torvalds 已提交
320
{
321
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
322 323
	model->stop(msrs);
}
324

L
Linus Torvalds 已提交
325 326
static void nmi_stop(void)
{
327
	on_each_cpu(nmi_cpu_stop, NULL, 1);
L
Linus Torvalds 已提交
328 329 330 331
}

struct op_counter_config counter_config[OP_MAX_COUNTER];

332
static int nmi_create_files(struct super_block *sb, struct dentry *root)
L
Linus Torvalds 已提交
333 334 335 336
{
	unsigned int i;

	for (i = 0; i < model->num_counters; ++i) {
337
		struct dentry *dir;
338
		char buf[4];
339 340

		/* quick little hack to _not_ expose a counter if it is not
341 342 343 344 345 346 347
		 * available for use.  This should protect userspace app.
		 * NOTE:  assumes 1:1 mapping here (that counters are organized
		 *        sequentially in their struct assignment).
		 */
		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
			continue;

348
		snprintf(buf,  sizeof(buf), "%d", i);
L
Linus Torvalds 已提交
349
		dir = oprofilefs_mkdir(sb, root, buf);
350 351 352 353 354 355
		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
L
Linus Torvalds 已提交
356 357 358 359
	}

	return 0;
}
360

361 362
static int p4force;
module_param(p4force, int, 0);
363 364

static int __init p4_init(char **cpu_type)
L
Linus Torvalds 已提交
365 366 367
{
	__u8 cpu_model = boot_cpu_data.x86_model;

368
	if (!p4force && (cpu_model > 6 || cpu_model == 5))
L
Linus Torvalds 已提交
369 370 371 372 373 374 375 376
		return 0;

#ifndef CONFIG_SMP
	*cpu_type = "i386/p4";
	model = &op_p4_spec;
	return 1;
#else
	switch (smp_num_siblings) {
377 378 379 380 381 382 383 384 385
	case 1:
		*cpu_type = "i386/p4";
		model = &op_p4_spec;
		return 1;

	case 2:
		*cpu_type = "i386/p4-ht";
		model = &op_p4_ht2_spec;
		return 1;
L
Linus Torvalds 已提交
386 387 388 389 390 391 392 393
	}
#endif

	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
	return 0;
}

394
static int __init ppro_init(char **cpu_type)
L
Linus Torvalds 已提交
395 396 397
{
	__u8 cpu_model = boot_cpu_data.x86_model;

398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
	switch (cpu_model) {
	case 0 ... 2:
		*cpu_type = "i386/ppro";
		break;
	case 3 ... 5:
		*cpu_type = "i386/pii";
		break;
	case 6 ... 8:
		*cpu_type = "i386/piii";
		break;
	case 9:
		*cpu_type = "i386/p6_mobile";
		break;
	case 10 ... 13:
		*cpu_type = "i386/p6";
		break;
	case 14:
415
		*cpu_type = "i386/core";
416 417 418 419 420
		break;
	case 15: case 23:
		*cpu_type = "i386/core_2";
		break;
	case 26:
421
		*cpu_type = "i386/core_2";
422 423 424
		break;
	default:
		/* Unknown */
L
Linus Torvalds 已提交
425 426 427 428 429 430 431
		return 0;
	}

	model = &op_ppro_spec;
	return 1;
}

432
/* in order to get sysfs right */
L
Linus Torvalds 已提交
433 434
static int using_nmi;

435
int __init op_nmi_init(struct oprofile_operations *ops)
L
Linus Torvalds 已提交
436 437 438 439 440 441 442
{
	__u8 vendor = boot_cpu_data.x86_vendor;
	__u8 family = boot_cpu_data.x86;
	char *cpu_type;

	if (!cpu_has_apic)
		return -ENODEV;
443

L
Linus Torvalds 已提交
444
	switch (vendor) {
445 446
	case X86_VENDOR_AMD:
		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
L
Linus Torvalds 已提交
447

448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
		switch (family) {
		default:
			return -ENODEV;
		case 6:
			model = &op_athlon_spec;
			cpu_type = "i386/athlon";
			break;
		case 0xf:
			model = &op_athlon_spec;
			/* Actually it could be i386/hammer too, but give
			 user space an consistent name. */
			cpu_type = "x86-64/hammer";
			break;
		case 0x10:
			model = &op_athlon_spec;
			cpu_type = "x86-64/family10";
			break;
		}
		break;

	case X86_VENDOR_INTEL:
		switch (family) {
			/* Pentium IV */
		case 0xf:
			if (!p4_init(&cpu_type))
L
Linus Torvalds 已提交
473 474
				return -ENODEV;
			break;
475 476 477 478 479

			/* A P6-class processor */
		case 6:
			if (!ppro_init(&cpu_type))
				return -ENODEV;
L
Linus Torvalds 已提交
480 481 482 483
			break;

		default:
			return -ENODEV;
484 485 486 487 488
		}
		break;

	default:
		return -ENODEV;
L
Linus Torvalds 已提交
489 490
	}

491
	init_sysfs();
492 493 494
#ifdef CONFIG_SMP
	register_cpu_notifier(&oprofile_cpu_nb);
#endif
L
Linus Torvalds 已提交
495 496 497 498 499 500 501 502 503 504 505
	using_nmi = 1;
	ops->create_files = nmi_create_files;
	ops->setup = nmi_setup;
	ops->shutdown = nmi_shutdown;
	ops->start = nmi_start;
	ops->stop = nmi_stop;
	ops->cpu_type = cpu_type;
	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
	return 0;
}

506
void op_nmi_exit(void)
L
Linus Torvalds 已提交
507
{
508
	if (using_nmi) {
509
		exit_sysfs();
510 511 512 513
#ifdef CONFIG_SMP
		unregister_cpu_notifier(&oprofile_cpu_nb);
#endif
	}
L
Linus Torvalds 已提交
514
}