nmi_int.c 11.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/**
 * @file nmi_int.c
 *
4
 * @remark Copyright 2002-2008 OProfile authors
L
Linus Torvalds 已提交
5 6 7
 * @remark Read the file COPYING
 *
 * @author John Levon <levon@movementarian.org>
8
 * @author Robert Richter <robert.richter@amd.com>
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16
 */

#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
17
#include <linux/moduleparam.h>
18
#include <linux/kdebug.h>
19
#include <linux/cpu.h>
L
Linus Torvalds 已提交
20 21 22
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
23

L
Linus Torvalds 已提交
24 25
#include "op_counter.h"
#include "op_x86_model.h"
26

27
static struct op_x86_model_spec const *model;
28 29
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
30

L
Linus Torvalds 已提交
31 32 33
/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
/* common functions */

u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
		    struct op_counter_config *counter_config)
{
	u64 val = 0;
	u16 event = (u16)counter_config->event;

	val |= ARCH_PERFMON_EVENTSEL_INT;
	val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
	val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
	val |= (counter_config->unit_mask & 0xFF) << 8;
	event &= model->event_mask ? model->event_mask : 0xFF;
	val |= event & 0xFF;
	val |= (event & 0x0F00) << 24;

	return val;
}


54 55
static int profile_exceptions_notify(struct notifier_block *self,
				     unsigned long val, void *data)
L
Linus Torvalds 已提交
56
{
57 58 59 60
	struct die_args *args = (struct die_args *)data;
	int ret = NOTIFY_DONE;
	int cpu = smp_processor_id();

61
	switch (val) {
62
	case DIE_NMI:
63 64 65
	case DIE_NMI_IPI:
		model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
		ret = NOTIFY_STOP;
66 67 68 69 70
		break;
	default:
		break;
	}
	return ret;
L
Linus Torvalds 已提交
71
}
72

73
static void nmi_cpu_save_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
74
{
75 76
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
77 78
	unsigned int i;

79
	for (i = 0; i < model->num_counters; ++i) {
80 81
		if (counters[i].addr)
			rdmsrl(counters[i].addr, counters[i].saved);
L
Linus Torvalds 已提交
82
	}
83

84
	for (i = 0; i < model->num_controls; ++i) {
85 86
		if (controls[i].addr)
			rdmsrl(controls[i].addr, controls[i].saved);
L
Linus Torvalds 已提交
87 88 89 90 91 92
	}
}

static void free_msrs(void)
{
	int i;
93
	for_each_possible_cpu(i) {
94 95 96 97
		kfree(per_cpu(cpu_msrs, i).counters);
		per_cpu(cpu_msrs, i).counters = NULL;
		kfree(per_cpu(cpu_msrs, i).controls);
		per_cpu(cpu_msrs, i).controls = NULL;
L
Linus Torvalds 已提交
98 99 100 101 102
	}
}

static int allocate_msrs(void)
{
103
	int success = 1;
L
Linus Torvalds 已提交
104 105 106
	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
	size_t counters_size = sizeof(struct op_msr) * model->num_counters;

107
	int i;
C
Chris Wright 已提交
108
	for_each_possible_cpu(i) {
109 110 111
		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).counters) {
L
Linus Torvalds 已提交
112 113 114
			success = 0;
			break;
		}
115 116
		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
								GFP_KERNEL);
117
		if (!per_cpu(cpu_msrs, i).controls) {
L
Linus Torvalds 已提交
118 119 120 121 122 123 124 125 126 127 128
			success = 0;
			break;
		}
	}

	if (!success)
		free_msrs();

	return success;
}

129
static void nmi_cpu_setup(void *dummy)
L
Linus Torvalds 已提交
130 131
{
	int cpu = smp_processor_id();
132
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
133
	nmi_cpu_save_registers(msrs);
L
Linus Torvalds 已提交
134
	spin_lock(&oprofilefs_lock);
135
	model->setup_ctrs(model, msrs);
L
Linus Torvalds 已提交
136
	spin_unlock(&oprofilefs_lock);
137
	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
L
Linus Torvalds 已提交
138 139 140
	apic_write(APIC_LVTPC, APIC_DM_NMI);
}

141 142 143
static struct notifier_block profile_exceptions_nb = {
	.notifier_call = profile_exceptions_notify,
	.next = NULL,
144
	.priority = 2
145
};
L
Linus Torvalds 已提交
146 147 148

static int nmi_setup(void)
{
149
	int err = 0;
150
	int cpu;
151

L
Linus Torvalds 已提交
152 153 154
	if (!allocate_msrs())
		return -ENOMEM;

155 156
	err = register_die_notifier(&profile_exceptions_nb);
	if (err) {
L
Linus Torvalds 已提交
157
		free_msrs();
158
		return err;
L
Linus Torvalds 已提交
159
	}
160

161
	/* We need to serialize save and setup for HT because the subset
L
Linus Torvalds 已提交
162 163
	 * of msrs are distinct for save and setup operations
	 */
164 165

	/* Assume saved/restored counters are the same on all CPUs */
166
	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
167
	for_each_possible_cpu(cpu) {
C
Chris Wright 已提交
168
		if (cpu != 0) {
169 170
			memcpy(per_cpu(cpu_msrs, cpu).counters,
				per_cpu(cpu_msrs, 0).counters,
C
Chris Wright 已提交
171 172
				sizeof(struct op_msr) * model->num_counters);

173 174
			memcpy(per_cpu(cpu_msrs, cpu).controls,
				per_cpu(cpu_msrs, 0).controls,
C
Chris Wright 已提交
175 176
				sizeof(struct op_msr) * model->num_controls);
		}
177

178
	}
179
	on_each_cpu(nmi_cpu_setup, NULL, 1);
L
Linus Torvalds 已提交
180 181 182 183
	nmi_enabled = 1;
	return 0;
}

184
static void nmi_cpu_restore_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
185
{
186 187
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
188 189
	unsigned int i;

190
	for (i = 0; i < model->num_controls; ++i) {
191 192
		if (controls[i].addr)
			wrmsrl(controls[i].addr, controls[i].saved);
L
Linus Torvalds 已提交
193
	}
194

195
	for (i = 0; i < model->num_counters; ++i) {
196 197
		if (counters[i].addr)
			wrmsrl(counters[i].addr, counters[i].saved);
L
Linus Torvalds 已提交
198 199 200
	}
}

201
static void nmi_cpu_shutdown(void *dummy)
L
Linus Torvalds 已提交
202 203 204
{
	unsigned int v;
	int cpu = smp_processor_id();
205
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
206

L
Linus Torvalds 已提交
207 208 209 210 211 212 213
	/* restoring APIC_LVTPC can trigger an apic error because the delivery
	 * mode and vector nr combination can be illegal. That's by design: on
	 * power on apic lvt contain a zero vector nr which are legal only for
	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
	 */
	v = apic_read(APIC_LVTERR);
	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
214
	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
L
Linus Torvalds 已提交
215
	apic_write(APIC_LVTERR, v);
216
	nmi_cpu_restore_registers(msrs);
L
Linus Torvalds 已提交
217 218 219 220
}

static void nmi_shutdown(void)
{
221 222
	struct op_msrs *msrs;

L
Linus Torvalds 已提交
223
	nmi_enabled = 0;
224
	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
225
	unregister_die_notifier(&profile_exceptions_nb);
226
	msrs = &get_cpu_var(cpu_msrs);
227
	model->shutdown(msrs);
L
Linus Torvalds 已提交
228
	free_msrs();
229
	put_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
230 231
}

232
static void nmi_cpu_start(void *dummy)
L
Linus Torvalds 已提交
233
{
234
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
235 236 237 238 239
	model->start(msrs);
}

static int nmi_start(void)
{
240
	on_each_cpu(nmi_cpu_start, NULL, 1);
L
Linus Torvalds 已提交
241 242
	return 0;
}
243 244

static void nmi_cpu_stop(void *dummy)
L
Linus Torvalds 已提交
245
{
246
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
247 248
	model->stop(msrs);
}
249

L
Linus Torvalds 已提交
250 251
static void nmi_stop(void)
{
252
	on_each_cpu(nmi_cpu_stop, NULL, 1);
L
Linus Torvalds 已提交
253 254 255 256
}

struct op_counter_config counter_config[OP_MAX_COUNTER];

257
static int nmi_create_files(struct super_block *sb, struct dentry *root)
L
Linus Torvalds 已提交
258 259 260 261
{
	unsigned int i;

	for (i = 0; i < model->num_counters; ++i) {
262
		struct dentry *dir;
263
		char buf[4];
264 265

		/* quick little hack to _not_ expose a counter if it is not
266 267 268 269 270 271 272
		 * available for use.  This should protect userspace app.
		 * NOTE:  assumes 1:1 mapping here (that counters are organized
		 *        sequentially in their struct assignment).
		 */
		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
			continue;

273
		snprintf(buf,  sizeof(buf), "%d", i);
L
Linus Torvalds 已提交
274
		dir = oprofilefs_mkdir(sb, root, buf);
275 276 277 278 279 280
		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
L
Linus Torvalds 已提交
281 282 283 284
	}

	return 0;
}
285

286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
#ifdef CONFIG_SMP
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
				 void *data)
{
	int cpu = (unsigned long)data;
	switch (action) {
	case CPU_DOWN_FAILED:
	case CPU_ONLINE:
		smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
		break;
	case CPU_DOWN_PREPARE:
		smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
		break;
	}
	return NOTIFY_DONE;
}

static struct notifier_block oprofile_cpu_nb = {
	.notifier_call = oprofile_cpu_notifier
};
#endif

#ifdef CONFIG_PM

static int nmi_suspend(struct sys_device *dev, pm_message_t state)
{
	/* Only one CPU left, just stop that one */
	if (nmi_enabled == 1)
		nmi_cpu_stop(NULL);
	return 0;
}

static int nmi_resume(struct sys_device *dev)
{
	if (nmi_enabled == 1)
		nmi_cpu_start(NULL);
	return 0;
}

static struct sysdev_class oprofile_sysclass = {
	.name		= "oprofile",
	.resume		= nmi_resume,
	.suspend	= nmi_suspend,
};

static struct sys_device device_oprofile = {
	.id	= 0,
	.cls	= &oprofile_sysclass,
};

static int __init init_sysfs(void)
{
	int error;

	error = sysdev_class_register(&oprofile_sysclass);
	if (!error)
		error = sysdev_register(&device_oprofile);
	return error;
}

static void exit_sysfs(void)
{
	sysdev_unregister(&device_oprofile);
	sysdev_class_unregister(&oprofile_sysclass);
}

#else
#define init_sysfs() do { } while (0)
#define exit_sysfs() do { } while (0)
#endif /* CONFIG_PM */

357
static int __init p4_init(char **cpu_type)
L
Linus Torvalds 已提交
358 359 360
{
	__u8 cpu_model = boot_cpu_data.x86_model;

361
	if (cpu_model > 6 || cpu_model == 5)
L
Linus Torvalds 已提交
362 363 364 365 366 367 368 369
		return 0;

#ifndef CONFIG_SMP
	*cpu_type = "i386/p4";
	model = &op_p4_spec;
	return 1;
#else
	switch (smp_num_siblings) {
370 371 372 373 374 375 376 377 378
	case 1:
		*cpu_type = "i386/p4";
		model = &op_p4_spec;
		return 1;

	case 2:
		*cpu_type = "i386/p4-ht";
		model = &op_p4_ht2_spec;
		return 1;
L
Linus Torvalds 已提交
379 380 381 382 383 384 385 386
	}
#endif

	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
	return 0;
}

387 388 389
static int force_arch_perfmon;
static int force_cpu_type(const char *str, struct kernel_param *kp)
{
390
	if (!strcmp(str, "arch_perfmon")) {
391 392 393 394 395 396 397
		force_arch_perfmon = 1;
		printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
	}

	return 0;
}
module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
398

399
static int __init ppro_init(char **cpu_type)
L
Linus Torvalds 已提交
400 401
{
	__u8 cpu_model = boot_cpu_data.x86_model;
402
	struct op_x86_model_spec const *spec = &op_ppro_spec;	/* default */
L
Linus Torvalds 已提交
403

404 405 406
	if (force_arch_perfmon && cpu_has_arch_perfmon)
		return 0;

407 408 409 410 411 412 413 414
	switch (cpu_model) {
	case 0 ... 2:
		*cpu_type = "i386/ppro";
		break;
	case 3 ... 5:
		*cpu_type = "i386/pii";
		break;
	case 6 ... 8:
415
	case 10 ... 11:
416 417 418
		*cpu_type = "i386/piii";
		break;
	case 9:
419
	case 13:
420 421 422
		*cpu_type = "i386/p6_mobile";
		break;
	case 14:
423
		*cpu_type = "i386/core";
424 425 426 427
		break;
	case 15: case 23:
		*cpu_type = "i386/core_2";
		break;
428
	case 26:
429
		spec = &op_arch_perfmon_spec;
430 431 432 433 434
		*cpu_type = "i386/core_i7";
		break;
	case 28:
		*cpu_type = "i386/atom";
		break;
435 436
	default:
		/* Unknown */
L
Linus Torvalds 已提交
437 438 439
		return 0;
	}

440
	model = spec;
L
Linus Torvalds 已提交
441 442 443
	return 1;
}

444
/* in order to get sysfs right */
L
Linus Torvalds 已提交
445 446
static int using_nmi;

447
int __init op_nmi_init(struct oprofile_operations *ops)
L
Linus Torvalds 已提交
448 449 450
{
	__u8 vendor = boot_cpu_data.x86_vendor;
	__u8 family = boot_cpu_data.x86;
451
	char *cpu_type = NULL;
452
	int ret = 0;
L
Linus Torvalds 已提交
453 454 455

	if (!cpu_has_apic)
		return -ENODEV;
456

L
Linus Torvalds 已提交
457
	switch (vendor) {
458 459
	case X86_VENDOR_AMD:
		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
L
Linus Torvalds 已提交
460

461 462 463 464 465
		switch (family) {
		case 6:
			cpu_type = "i386/athlon";
			break;
		case 0xf:
466 467 468 469
			/*
			 * Actually it could be i386/hammer too, but
			 * give user space an consistent name.
			 */
470 471 472 473 474
			cpu_type = "x86-64/hammer";
			break;
		case 0x10:
			cpu_type = "x86-64/family10";
			break;
475 476 477
		case 0x11:
			cpu_type = "x86-64/family11h";
			break;
478 479
		default:
			return -ENODEV;
480
		}
481
		model = &op_amd_spec;
482 483 484 485 486 487
		break;

	case X86_VENDOR_INTEL:
		switch (family) {
			/* Pentium IV */
		case 0xf:
488
			p4_init(&cpu_type);
L
Linus Torvalds 已提交
489
			break;
490 491 492

			/* A P6-class processor */
		case 6:
493
			ppro_init(&cpu_type);
L
Linus Torvalds 已提交
494 495 496
			break;

		default:
497
			break;
498
		}
499

500 501 502 503
		if (cpu_type)
			break;

		if (!cpu_has_arch_perfmon)
504
			return -ENODEV;
505 506 507 508

		/* use arch perfmon as fallback */
		cpu_type = "i386/arch_perfmon";
		model = &op_arch_perfmon_spec;
509 510 511 512
		break;

	default:
		return -ENODEV;
L
Linus Torvalds 已提交
513 514
	}

515 516 517
#ifdef CONFIG_SMP
	register_cpu_notifier(&oprofile_cpu_nb);
#endif
518 519 520 521 522 523 524 525
	/* default values, can be overwritten by model */
	ops->create_files = nmi_create_files;
	ops->setup = nmi_setup;
	ops->shutdown = nmi_shutdown;
	ops->start = nmi_start;
	ops->stop = nmi_stop;
	ops->cpu_type = cpu_type;

526 527 528 529 530
	if (model->init)
		ret = model->init(ops);
	if (ret)
		return ret;

531
	init_sysfs();
L
Linus Torvalds 已提交
532 533 534 535 536
	using_nmi = 1;
	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
	return 0;
}

537
void op_nmi_exit(void)
L
Linus Torvalds 已提交
538
{
539
	if (using_nmi) {
540
		exit_sysfs();
541 542 543 544
#ifdef CONFIG_SMP
		unregister_cpu_notifier(&oprofile_cpu_nb);
#endif
	}
545 546
	if (model->exit)
		model->exit();
L
Linus Torvalds 已提交
547
}