nmi_int.c 11.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/**
 * @file nmi_int.c
 *
4
 * @remark Copyright 2002-2008 OProfile authors
L
Linus Torvalds 已提交
5 6 7
 * @remark Read the file COPYING
 *
 * @author John Levon <levon@movementarian.org>
8
 * @author Robert Richter <robert.richter@amd.com>
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16
 */

#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
17
#include <linux/moduleparam.h>
18
#include <linux/kdebug.h>
19
#include <linux/cpu.h>
L
Linus Torvalds 已提交
20 21 22
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
23

L
Linus Torvalds 已提交
24 25
#include "op_counter.h"
#include "op_x86_model.h"
26

27
static struct op_x86_model_spec const *model;
28 29
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
30

L
Linus Torvalds 已提交
31 32 33
/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
/* common functions */

u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
		    struct op_counter_config *counter_config)
{
	u64 val = 0;
	u16 event = (u16)counter_config->event;

	val |= ARCH_PERFMON_EVENTSEL_INT;
	val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
	val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
	val |= (counter_config->unit_mask & 0xFF) << 8;
	event &= model->event_mask ? model->event_mask : 0xFF;
	val |= event & 0xFF;
	val |= (event & 0x0F00) << 24;

	return val;
}


54 55
static int profile_exceptions_notify(struct notifier_block *self,
				     unsigned long val, void *data)
L
Linus Torvalds 已提交
56
{
57 58 59 60
	struct die_args *args = (struct die_args *)data;
	int ret = NOTIFY_DONE;
	int cpu = smp_processor_id();

61
	switch (val) {
62
	case DIE_NMI:
63
		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
64 65 66 67 68 69
			ret = NOTIFY_STOP;
		break;
	default:
		break;
	}
	return ret;
L
Linus Torvalds 已提交
70
}
71

72
static void nmi_cpu_save_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
73 74
{
	unsigned int const nr_ctrs = model->num_counters;
75 76 77
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
78 79 80
	unsigned int i;

	for (i = 0; i < nr_ctrs; ++i) {
81 82
		if (counters[i].addr)
			rdmsrl(counters[i].addr, counters[i].saved);
L
Linus Torvalds 已提交
83
	}
84

L
Linus Torvalds 已提交
85
	for (i = 0; i < nr_ctrls; ++i) {
86 87
		if (controls[i].addr)
			rdmsrl(controls[i].addr, controls[i].saved);
L
Linus Torvalds 已提交
88 89 90
	}
}

91
static void nmi_save_registers(void *dummy)
L
Linus Torvalds 已提交
92 93
{
	int cpu = smp_processor_id();
94
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
95 96 97 98 99 100
	nmi_cpu_save_registers(msrs);
}

static void free_msrs(void)
{
	int i;
101
	for_each_possible_cpu(i) {
102 103 104 105
		kfree(per_cpu(cpu_msrs, i).counters);
		per_cpu(cpu_msrs, i).counters = NULL;
		kfree(per_cpu(cpu_msrs, i).controls);
		per_cpu(cpu_msrs, i).controls = NULL;
L
Linus Torvalds 已提交
106 107 108 109 110
	}
}

static int allocate_msrs(void)
{
111
	int success = 1;
L
Linus Torvalds 已提交
112 113 114
	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
	size_t counters_size = sizeof(struct op_msr) * model->num_counters;

115
	int i;
C
Chris Wright 已提交
116
	for_each_possible_cpu(i) {
117 118 119
		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).counters) {
L
Linus Torvalds 已提交
120 121 122
			success = 0;
			break;
		}
123 124
		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
								GFP_KERNEL);
125
		if (!per_cpu(cpu_msrs, i).controls) {
L
Linus Torvalds 已提交
126 127 128 129 130 131 132 133 134 135 136
			success = 0;
			break;
		}
	}

	if (!success)
		free_msrs();

	return success;
}

137
static void nmi_cpu_setup(void *dummy)
L
Linus Torvalds 已提交
138 139
{
	int cpu = smp_processor_id();
140
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
141
	spin_lock(&oprofilefs_lock);
142
	model->setup_ctrs(model, msrs);
L
Linus Torvalds 已提交
143
	spin_unlock(&oprofilefs_lock);
144
	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
L
Linus Torvalds 已提交
145 146 147
	apic_write(APIC_LVTPC, APIC_DM_NMI);
}

148 149 150 151 152
static struct notifier_block profile_exceptions_nb = {
	.notifier_call = profile_exceptions_notify,
	.next = NULL,
	.priority = 0
};
L
Linus Torvalds 已提交
153 154 155

static int nmi_setup(void)
{
156
	int err = 0;
157
	int cpu;
158

L
Linus Torvalds 已提交
159 160 161
	if (!allocate_msrs())
		return -ENOMEM;

162 163
	err = register_die_notifier(&profile_exceptions_nb);
	if (err) {
L
Linus Torvalds 已提交
164
		free_msrs();
165
		return err;
L
Linus Torvalds 已提交
166
	}
167

168
	/* We need to serialize save and setup for HT because the subset
L
Linus Torvalds 已提交
169 170
	 * of msrs are distinct for save and setup operations
	 */
171 172

	/* Assume saved/restored counters are the same on all CPUs */
173
	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
174
	for_each_possible_cpu(cpu) {
C
Chris Wright 已提交
175
		if (cpu != 0) {
176 177
			memcpy(per_cpu(cpu_msrs, cpu).counters,
				per_cpu(cpu_msrs, 0).counters,
C
Chris Wright 已提交
178 179
				sizeof(struct op_msr) * model->num_counters);

180 181
			memcpy(per_cpu(cpu_msrs, cpu).controls,
				per_cpu(cpu_msrs, 0).controls,
C
Chris Wright 已提交
182 183
				sizeof(struct op_msr) * model->num_controls);
		}
184

185
	}
186 187
	on_each_cpu(nmi_save_registers, NULL, 1);
	on_each_cpu(nmi_cpu_setup, NULL, 1);
L
Linus Torvalds 已提交
188 189 190 191
	nmi_enabled = 1;
	return 0;
}

192
static void nmi_restore_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
193 194
{
	unsigned int const nr_ctrs = model->num_counters;
195 196 197
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
198 199 200
	unsigned int i;

	for (i = 0; i < nr_ctrls; ++i) {
201 202
		if (controls[i].addr)
			wrmsrl(controls[i].addr, controls[i].saved);
L
Linus Torvalds 已提交
203
	}
204

L
Linus Torvalds 已提交
205
	for (i = 0; i < nr_ctrs; ++i) {
206 207
		if (counters[i].addr)
			wrmsrl(counters[i].addr, counters[i].saved);
L
Linus Torvalds 已提交
208 209 210
	}
}

211
static void nmi_cpu_shutdown(void *dummy)
L
Linus Torvalds 已提交
212 213 214
{
	unsigned int v;
	int cpu = smp_processor_id();
215
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
216

L
Linus Torvalds 已提交
217 218 219 220 221 222 223
	/* restoring APIC_LVTPC can trigger an apic error because the delivery
	 * mode and vector nr combination can be illegal. That's by design: on
	 * power on apic lvt contain a zero vector nr which are legal only for
	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
	 */
	v = apic_read(APIC_LVTERR);
	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
224
	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
L
Linus Torvalds 已提交
225
	apic_write(APIC_LVTERR, v);
226
	nmi_restore_registers(msrs);
L
Linus Torvalds 已提交
227 228 229 230
}

static void nmi_shutdown(void)
{
231 232
	struct op_msrs *msrs;

L
Linus Torvalds 已提交
233
	nmi_enabled = 0;
234
	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
235
	unregister_die_notifier(&profile_exceptions_nb);
236
	msrs = &get_cpu_var(cpu_msrs);
237
	model->shutdown(msrs);
L
Linus Torvalds 已提交
238
	free_msrs();
239
	put_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
240 241
}

242
static void nmi_cpu_start(void *dummy)
L
Linus Torvalds 已提交
243
{
244
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
245 246 247 248 249
	model->start(msrs);
}

static int nmi_start(void)
{
250
	on_each_cpu(nmi_cpu_start, NULL, 1);
L
Linus Torvalds 已提交
251 252
	return 0;
}
253 254

static void nmi_cpu_stop(void *dummy)
L
Linus Torvalds 已提交
255
{
256
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
257 258
	model->stop(msrs);
}
259

L
Linus Torvalds 已提交
260 261
static void nmi_stop(void)
{
262
	on_each_cpu(nmi_cpu_stop, NULL, 1);
L
Linus Torvalds 已提交
263 264 265 266
}

struct op_counter_config counter_config[OP_MAX_COUNTER];

267
static int nmi_create_files(struct super_block *sb, struct dentry *root)
L
Linus Torvalds 已提交
268 269 270 271
{
	unsigned int i;

	for (i = 0; i < model->num_counters; ++i) {
272
		struct dentry *dir;
273
		char buf[4];
274 275

		/* quick little hack to _not_ expose a counter if it is not
276 277 278 279 280 281 282
		 * available for use.  This should protect userspace app.
		 * NOTE:  assumes 1:1 mapping here (that counters are organized
		 *        sequentially in their struct assignment).
		 */
		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
			continue;

283
		snprintf(buf,  sizeof(buf), "%d", i);
L
Linus Torvalds 已提交
284
		dir = oprofilefs_mkdir(sb, root, buf);
285 286 287 288 289 290
		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
L
Linus Torvalds 已提交
291 292 293 294
	}

	return 0;
}
295

296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
#ifdef CONFIG_SMP
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
				 void *data)
{
	int cpu = (unsigned long)data;
	switch (action) {
	case CPU_DOWN_FAILED:
	case CPU_ONLINE:
		smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
		break;
	case CPU_DOWN_PREPARE:
		smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
		break;
	}
	return NOTIFY_DONE;
}

static struct notifier_block oprofile_cpu_nb = {
	.notifier_call = oprofile_cpu_notifier
};
#endif

#ifdef CONFIG_PM

static int nmi_suspend(struct sys_device *dev, pm_message_t state)
{
	/* Only one CPU left, just stop that one */
	if (nmi_enabled == 1)
		nmi_cpu_stop(NULL);
	return 0;
}

static int nmi_resume(struct sys_device *dev)
{
	if (nmi_enabled == 1)
		nmi_cpu_start(NULL);
	return 0;
}

static struct sysdev_class oprofile_sysclass = {
	.name		= "oprofile",
	.resume		= nmi_resume,
	.suspend	= nmi_suspend,
};

static struct sys_device device_oprofile = {
	.id	= 0,
	.cls	= &oprofile_sysclass,
};

static int __init init_sysfs(void)
{
	int error;

	error = sysdev_class_register(&oprofile_sysclass);
	if (!error)
		error = sysdev_register(&device_oprofile);
	return error;
}

static void exit_sysfs(void)
{
	sysdev_unregister(&device_oprofile);
	sysdev_class_unregister(&oprofile_sysclass);
}

#else
#define init_sysfs() do { } while (0)
#define exit_sysfs() do { } while (0)
#endif /* CONFIG_PM */

367
static int __init p4_init(char **cpu_type)
L
Linus Torvalds 已提交
368 369 370
{
	__u8 cpu_model = boot_cpu_data.x86_model;

371
	if (cpu_model > 6 || cpu_model == 5)
L
Linus Torvalds 已提交
372 373 374 375 376 377 378 379
		return 0;

#ifndef CONFIG_SMP
	*cpu_type = "i386/p4";
	model = &op_p4_spec;
	return 1;
#else
	switch (smp_num_siblings) {
380 381 382 383 384 385 386 387 388
	case 1:
		*cpu_type = "i386/p4";
		model = &op_p4_spec;
		return 1;

	case 2:
		*cpu_type = "i386/p4-ht";
		model = &op_p4_ht2_spec;
		return 1;
L
Linus Torvalds 已提交
389 390 391 392 393 394 395 396
	}
#endif

	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
	return 0;
}

397 398 399 400 401 402 403 404 405 406 407
static int force_arch_perfmon;
static int force_cpu_type(const char *str, struct kernel_param *kp)
{
	if (!strcmp(str, "archperfmon")) {
		force_arch_perfmon = 1;
		printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
	}

	return 0;
}
module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
408

409
static int __init ppro_init(char **cpu_type)
L
Linus Torvalds 已提交
410 411 412
{
	__u8 cpu_model = boot_cpu_data.x86_model;

413 414 415
	if (force_arch_perfmon && cpu_has_arch_perfmon)
		return 0;

416 417 418 419 420 421 422 423
	switch (cpu_model) {
	case 0 ... 2:
		*cpu_type = "i386/ppro";
		break;
	case 3 ... 5:
		*cpu_type = "i386/pii";
		break;
	case 6 ... 8:
424
	case 10 ... 11:
425 426 427
		*cpu_type = "i386/piii";
		break;
	case 9:
428
	case 13:
429 430 431
		*cpu_type = "i386/p6_mobile";
		break;
	case 14:
432
		*cpu_type = "i386/core";
433 434 435 436
		break;
	case 15: case 23:
		*cpu_type = "i386/core_2";
		break;
437
	case 26:
438
		model = &op_arch_perfmon_spec;
439 440 441 442 443
		*cpu_type = "i386/core_i7";
		break;
	case 28:
		*cpu_type = "i386/atom";
		break;
444 445
	default:
		/* Unknown */
L
Linus Torvalds 已提交
446 447 448 449 450 451 452
		return 0;
	}

	model = &op_ppro_spec;
	return 1;
}

453
/* in order to get sysfs right */
L
Linus Torvalds 已提交
454 455
static int using_nmi;

456
int __init op_nmi_init(struct oprofile_operations *ops)
L
Linus Torvalds 已提交
457 458 459
{
	__u8 vendor = boot_cpu_data.x86_vendor;
	__u8 family = boot_cpu_data.x86;
460
	char *cpu_type = NULL;
461
	int ret = 0;
L
Linus Torvalds 已提交
462 463 464

	if (!cpu_has_apic)
		return -ENODEV;
465

L
Linus Torvalds 已提交
466
	switch (vendor) {
467 468
	case X86_VENDOR_AMD:
		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
L
Linus Torvalds 已提交
469

470 471 472 473 474
		switch (family) {
		case 6:
			cpu_type = "i386/athlon";
			break;
		case 0xf:
475 476 477 478
			/*
			 * Actually it could be i386/hammer too, but
			 * give user space an consistent name.
			 */
479 480 481 482 483
			cpu_type = "x86-64/hammer";
			break;
		case 0x10:
			cpu_type = "x86-64/family10";
			break;
484 485 486
		case 0x11:
			cpu_type = "x86-64/family11h";
			break;
487 488
		default:
			return -ENODEV;
489
		}
490
		model = &op_amd_spec;
491 492 493 494 495 496
		break;

	case X86_VENDOR_INTEL:
		switch (family) {
			/* Pentium IV */
		case 0xf:
497
			p4_init(&cpu_type);
L
Linus Torvalds 已提交
498
			break;
499 500 501

			/* A P6-class processor */
		case 6:
502
			ppro_init(&cpu_type);
L
Linus Torvalds 已提交
503 504 505
			break;

		default:
506
			break;
507
		}
508

509 510 511 512
		if (cpu_type)
			break;

		if (!cpu_has_arch_perfmon)
513
			return -ENODEV;
514 515 516 517

		/* use arch perfmon as fallback */
		cpu_type = "i386/arch_perfmon";
		model = &op_arch_perfmon_spec;
518 519 520 521
		break;

	default:
		return -ENODEV;
L
Linus Torvalds 已提交
522 523
	}

524 525 526
#ifdef CONFIG_SMP
	register_cpu_notifier(&oprofile_cpu_nb);
#endif
527 528 529 530 531 532 533 534
	/* default values, can be overwritten by model */
	ops->create_files = nmi_create_files;
	ops->setup = nmi_setup;
	ops->shutdown = nmi_shutdown;
	ops->start = nmi_start;
	ops->stop = nmi_stop;
	ops->cpu_type = cpu_type;

535 536 537 538 539
	if (model->init)
		ret = model->init(ops);
	if (ret)
		return ret;

540
	init_sysfs();
L
Linus Torvalds 已提交
541 542 543 544 545
	using_nmi = 1;
	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
	return 0;
}

546
void op_nmi_exit(void)
L
Linus Torvalds 已提交
547
{
548
	if (using_nmi) {
549
		exit_sysfs();
550 551 552 553
#ifdef CONFIG_SMP
		unregister_cpu_notifier(&oprofile_cpu_nb);
#endif
	}
554 555
	if (model->exit)
		model->exit();
L
Linus Torvalds 已提交
556
}