nmi_int.c 10.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * @file nmi_int.c
 *
 * @remark Copyright 2002 OProfile authors
 * @remark Read the file COPYING
 *
 * @author John Levon <levon@movementarian.org>
 */

#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
16
#include <linux/moduleparam.h>
17
#include <linux/kdebug.h>
18
#include <linux/cpu.h>
L
Linus Torvalds 已提交
19 20 21
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
22

L
Linus Torvalds 已提交
23 24
#include "op_counter.h"
#include "op_x86_model.h"
25

26
static struct op_x86_model_spec const *model;
27 28
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
29

L
Linus Torvalds 已提交
30 31
static int nmi_start(void);
static void nmi_stop(void);
32 33
static void nmi_cpu_start(void *dummy);
static void nmi_cpu_stop(void *dummy);
L
Linus Torvalds 已提交
34 35 36 37

/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
#ifdef CONFIG_SMP
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
				 void *data)
{
	int cpu = (unsigned long)data;
	switch (action) {
	case CPU_DOWN_FAILED:
	case CPU_ONLINE:
		smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
		break;
	case CPU_DOWN_PREPARE:
		smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
		break;
	}
	return NOTIFY_DONE;
}

static struct notifier_block oprofile_cpu_nb = {
	.notifier_call = oprofile_cpu_notifier
};
#endif

L
Linus Torvalds 已提交
60 61
#ifdef CONFIG_PM

62
static int nmi_suspend(struct sys_device *dev, pm_message_t state)
L
Linus Torvalds 已提交
63
{
64
	/* Only one CPU left, just stop that one */
L
Linus Torvalds 已提交
65
	if (nmi_enabled == 1)
66
		nmi_cpu_stop(NULL);
L
Linus Torvalds 已提交
67 68 69 70 71 72
	return 0;
}

static int nmi_resume(struct sys_device *dev)
{
	if (nmi_enabled == 1)
73
		nmi_cpu_start(NULL);
L
Linus Torvalds 已提交
74 75 76 77
	return 0;
}

static struct sysdev_class oprofile_sysclass = {
78
	.name		= "oprofile",
L
Linus Torvalds 已提交
79 80 81 82 83 84 85 86 87
	.resume		= nmi_resume,
	.suspend	= nmi_suspend,
};

static struct sys_device device_oprofile = {
	.id	= 0,
	.cls	= &oprofile_sysclass,
};

88
static int __init init_sysfs(void)
L
Linus Torvalds 已提交
89 90
{
	int error;
91 92 93

	error = sysdev_class_register(&oprofile_sysclass);
	if (!error)
L
Linus Torvalds 已提交
94 95 96 97
		error = sysdev_register(&device_oprofile);
	return error;
}

98
static void exit_sysfs(void)
L
Linus Torvalds 已提交
99 100 101 102 103 104
{
	sysdev_unregister(&device_oprofile);
	sysdev_class_unregister(&oprofile_sysclass);
}

#else
105 106
#define init_sysfs() do { } while (0)
#define exit_sysfs() do { } while (0)
L
Linus Torvalds 已提交
107 108
#endif /* CONFIG_PM */

109 110
static int profile_exceptions_notify(struct notifier_block *self,
				     unsigned long val, void *data)
L
Linus Torvalds 已提交
111
{
112 113 114 115
	struct die_args *args = (struct die_args *)data;
	int ret = NOTIFY_DONE;
	int cpu = smp_processor_id();

116
	switch (val) {
117
	case DIE_NMI:
118
		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
119 120 121 122 123 124
			ret = NOTIFY_STOP;
		break;
	default:
		break;
	}
	return ret;
L
Linus Torvalds 已提交
125
}
126

127
static void nmi_cpu_save_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
128 129
{
	unsigned int const nr_ctrs = model->num_counters;
130 131 132
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
133 134 135
	unsigned int i;

	for (i = 0; i < nr_ctrs; ++i) {
136
		if (counters[i].addr) {
137 138 139 140
			rdmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
141
	}
142

L
Linus Torvalds 已提交
143
	for (i = 0; i < nr_ctrls; ++i) {
144
		if (controls[i].addr) {
145 146 147 148
			rdmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
149 150 151
	}
}

152
static void nmi_save_registers(void *dummy)
L
Linus Torvalds 已提交
153 154
{
	int cpu = smp_processor_id();
155
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
156 157 158 159 160 161
	nmi_cpu_save_registers(msrs);
}

static void free_msrs(void)
{
	int i;
162
	for_each_possible_cpu(i) {
163 164 165 166
		kfree(per_cpu(cpu_msrs, i).counters);
		per_cpu(cpu_msrs, i).counters = NULL;
		kfree(per_cpu(cpu_msrs, i).controls);
		per_cpu(cpu_msrs, i).controls = NULL;
L
Linus Torvalds 已提交
167 168 169 170 171 172 173 174 175 176
	}
}

static int allocate_msrs(void)
{
	int success = 1;
	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
	size_t counters_size = sizeof(struct op_msr) * model->num_counters;

	int i;
C
Chris Wright 已提交
177
	for_each_possible_cpu(i) {
178 179 180
		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).counters) {
L
Linus Torvalds 已提交
181 182 183
			success = 0;
			break;
		}
184 185 186
		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).controls) {
L
Linus Torvalds 已提交
187 188 189 190 191 192 193 194 195 196 197
			success = 0;
			break;
		}
	}

	if (!success)
		free_msrs();

	return success;
}

198
static void nmi_cpu_setup(void *dummy)
L
Linus Torvalds 已提交
199 200
{
	int cpu = smp_processor_id();
201
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
202 203 204
	spin_lock(&oprofilefs_lock);
	model->setup_ctrs(msrs);
	spin_unlock(&oprofilefs_lock);
205
	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
L
Linus Torvalds 已提交
206 207 208
	apic_write(APIC_LVTPC, APIC_DM_NMI);
}

209 210 211 212 213
static struct notifier_block profile_exceptions_nb = {
	.notifier_call = profile_exceptions_notify,
	.next = NULL,
	.priority = 0
};
L
Linus Torvalds 已提交
214 215 216

static int nmi_setup(void)
{
217
	int err = 0;
218
	int cpu;
219

L
Linus Torvalds 已提交
220 221 222
	if (!allocate_msrs())
		return -ENOMEM;

223 224
	err = register_die_notifier(&profile_exceptions_nb);
	if (err) {
L
Linus Torvalds 已提交
225
		free_msrs();
226
		return err;
L
Linus Torvalds 已提交
227
	}
228

L
Linus Torvalds 已提交
229 230 231
	/* We need to serialize save and setup for HT because the subset
	 * of msrs are distinct for save and setup operations
	 */
232 233

	/* Assume saved/restored counters are the same on all CPUs */
234
	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
235
	for_each_possible_cpu(cpu) {
C
Chris Wright 已提交
236
		if (cpu != 0) {
237 238
			memcpy(per_cpu(cpu_msrs, cpu).counters,
				per_cpu(cpu_msrs, 0).counters,
C
Chris Wright 已提交
239 240
				sizeof(struct op_msr) * model->num_counters);

241 242
			memcpy(per_cpu(cpu_msrs, cpu).controls,
				per_cpu(cpu_msrs, 0).controls,
C
Chris Wright 已提交
243 244 245
				sizeof(struct op_msr) * model->num_controls);
		}

246
	}
247 248
	on_each_cpu(nmi_save_registers, NULL, 1);
	on_each_cpu(nmi_cpu_setup, NULL, 1);
L
Linus Torvalds 已提交
249 250 251 252
	nmi_enabled = 1;
	return 0;
}

253
static void nmi_restore_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
254 255
{
	unsigned int const nr_ctrs = model->num_counters;
256 257 258
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
259 260 261
	unsigned int i;

	for (i = 0; i < nr_ctrls; ++i) {
262
		if (controls[i].addr) {
263 264 265 266
			wrmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
267
	}
268

L
Linus Torvalds 已提交
269
	for (i = 0; i < nr_ctrs; ++i) {
270
		if (counters[i].addr) {
271 272 273 274
			wrmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
275 276 277
	}
}

278
static void nmi_cpu_shutdown(void *dummy)
L
Linus Torvalds 已提交
279 280 281
{
	unsigned int v;
	int cpu = smp_processor_id();
282
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
283

L
Linus Torvalds 已提交
284 285 286 287 288 289 290
	/* restoring APIC_LVTPC can trigger an apic error because the delivery
	 * mode and vector nr combination can be illegal. That's by design: on
	 * power on apic lvt contain a zero vector nr which are legal only for
	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
	 */
	v = apic_read(APIC_LVTERR);
	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
291
	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
L
Linus Torvalds 已提交
292 293 294 295 296 297
	apic_write(APIC_LVTERR, v);
	nmi_restore_registers(msrs);
}

static void nmi_shutdown(void)
{
298 299
	struct op_msrs *msrs;

L
Linus Torvalds 已提交
300
	nmi_enabled = 0;
301
	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
302
	unregister_die_notifier(&profile_exceptions_nb);
303
	msrs = &get_cpu_var(cpu_msrs);
304
	model->shutdown(msrs);
L
Linus Torvalds 已提交
305
	free_msrs();
306
	put_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
307 308
}

309
static void nmi_cpu_start(void *dummy)
L
Linus Torvalds 已提交
310
{
311
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
312 313 314 315 316
	model->start(msrs);
}

static int nmi_start(void)
{
317
	on_each_cpu(nmi_cpu_start, NULL, 1);
L
Linus Torvalds 已提交
318 319
	return 0;
}
320 321

static void nmi_cpu_stop(void *dummy)
L
Linus Torvalds 已提交
322
{
323
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
324 325
	model->stop(msrs);
}
326

L
Linus Torvalds 已提交
327 328
static void nmi_stop(void)
{
329
	on_each_cpu(nmi_cpu_stop, NULL, 1);
L
Linus Torvalds 已提交
330 331 332 333
}

struct op_counter_config counter_config[OP_MAX_COUNTER];

334
static int nmi_create_files(struct super_block *sb, struct dentry *root)
L
Linus Torvalds 已提交
335 336 337 338
{
	unsigned int i;

	for (i = 0; i < model->num_counters; ++i) {
339
		struct dentry *dir;
340
		char buf[4];
341 342

		/* quick little hack to _not_ expose a counter if it is not
343 344 345 346 347 348 349
		 * available for use.  This should protect userspace app.
		 * NOTE:  assumes 1:1 mapping here (that counters are organized
		 *        sequentially in their struct assignment).
		 */
		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
			continue;

350
		snprintf(buf,  sizeof(buf), "%d", i);
L
Linus Torvalds 已提交
351
		dir = oprofilefs_mkdir(sb, root, buf);
352 353 354 355 356 357
		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
L
Linus Torvalds 已提交
358 359 360 361
	}

	return 0;
}
362

363 364
static int p4force;
module_param(p4force, int, 0);
365 366

static int __init p4_init(char **cpu_type)
L
Linus Torvalds 已提交
367 368 369
{
	__u8 cpu_model = boot_cpu_data.x86_model;

370
	if (!p4force && (cpu_model > 6 || cpu_model == 5))
L
Linus Torvalds 已提交
371 372 373 374 375 376 377 378
		return 0;

#ifndef CONFIG_SMP
	*cpu_type = "i386/p4";
	model = &op_p4_spec;
	return 1;
#else
	switch (smp_num_siblings) {
379 380 381 382 383 384 385 386 387
	case 1:
		*cpu_type = "i386/p4";
		model = &op_p4_spec;
		return 1;

	case 2:
		*cpu_type = "i386/p4-ht";
		model = &op_p4_ht2_spec;
		return 1;
L
Linus Torvalds 已提交
388 389 390 391 392 393 394 395
	}
#endif

	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
	return 0;
}

396
static int __init ppro_init(char **cpu_type)
L
Linus Torvalds 已提交
397 398 399
{
	__u8 cpu_model = boot_cpu_data.x86_model;

400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
	switch (cpu_model) {
	case 0 ... 2:
		*cpu_type = "i386/ppro";
		break;
	case 3 ... 5:
		*cpu_type = "i386/pii";
		break;
	case 6 ... 8:
		*cpu_type = "i386/piii";
		break;
	case 9:
		*cpu_type = "i386/p6_mobile";
		break;
	case 10 ... 13:
		*cpu_type = "i386/p6";
		break;
	case 14:
417
		*cpu_type = "i386/core";
418 419 420 421 422
		break;
	case 15: case 23:
		*cpu_type = "i386/core_2";
		break;
	case 26:
423
		*cpu_type = "i386/core_2";
424 425 426
		break;
	default:
		/* Unknown */
L
Linus Torvalds 已提交
427 428 429 430 431 432 433
		return 0;
	}

	model = &op_ppro_spec;
	return 1;
}

434
/* in order to get sysfs right */
L
Linus Torvalds 已提交
435 436
static int using_nmi;

437
int __init op_nmi_init(struct oprofile_operations *ops)
L
Linus Torvalds 已提交
438 439 440 441 442 443 444
{
	__u8 vendor = boot_cpu_data.x86_vendor;
	__u8 family = boot_cpu_data.x86;
	char *cpu_type;

	if (!cpu_has_apic)
		return -ENODEV;
445

L
Linus Torvalds 已提交
446
	switch (vendor) {
447 448
	case X86_VENDOR_AMD:
		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
L
Linus Torvalds 已提交
449

450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
		switch (family) {
		default:
			return -ENODEV;
		case 6:
			model = &op_athlon_spec;
			cpu_type = "i386/athlon";
			break;
		case 0xf:
			model = &op_athlon_spec;
			/* Actually it could be i386/hammer too, but give
			 user space an consistent name. */
			cpu_type = "x86-64/hammer";
			break;
		case 0x10:
			model = &op_athlon_spec;
			cpu_type = "x86-64/family10";
			break;
		}
		break;

	case X86_VENDOR_INTEL:
		switch (family) {
			/* Pentium IV */
		case 0xf:
			if (!p4_init(&cpu_type))
L
Linus Torvalds 已提交
475 476
				return -ENODEV;
			break;
477 478 479 480 481

			/* A P6-class processor */
		case 6:
			if (!ppro_init(&cpu_type))
				return -ENODEV;
L
Linus Torvalds 已提交
482 483 484 485
			break;

		default:
			return -ENODEV;
486 487 488 489 490
		}
		break;

	default:
		return -ENODEV;
L
Linus Torvalds 已提交
491 492
	}

493
	init_sysfs();
494 495 496
#ifdef CONFIG_SMP
	register_cpu_notifier(&oprofile_cpu_nb);
#endif
L
Linus Torvalds 已提交
497 498 499 500 501 502 503 504 505 506 507
	using_nmi = 1;
	ops->create_files = nmi_create_files;
	ops->setup = nmi_setup;
	ops->shutdown = nmi_shutdown;
	ops->start = nmi_start;
	ops->stop = nmi_stop;
	ops->cpu_type = cpu_type;
	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
	return 0;
}

508
void op_nmi_exit(void)
L
Linus Torvalds 已提交
509
{
510
	if (using_nmi) {
511
		exit_sysfs();
512 513 514 515
#ifdef CONFIG_SMP
		unregister_cpu_notifier(&oprofile_cpu_nb);
#endif
	}
L
Linus Torvalds 已提交
516
}