nmi_int.c 10.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * @file nmi_int.c
 *
 * @remark Copyright 2002 OProfile authors
 * @remark Read the file COPYING
 *
 * @author John Levon <levon@movementarian.org>
 */

#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
16
#include <linux/moduleparam.h>
17
#include <linux/kdebug.h>
L
Linus Torvalds 已提交
18 19 20
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
21

L
Linus Torvalds 已提交
22 23
#include "op_counter.h"
#include "op_x86_model.h"
24

25
static struct op_x86_model_spec const *model;
26 27
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
28

L
Linus Torvalds 已提交
29 30 31 32 33 34 35 36
static int nmi_start(void);
static void nmi_stop(void);

/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

#ifdef CONFIG_PM

37
static int nmi_suspend(struct sys_device *dev, pm_message_t state)
L
Linus Torvalds 已提交
38 39 40 41 42 43 44 45 46 47 48 49 50 51
{
	if (nmi_enabled == 1)
		nmi_stop();
	return 0;
}

static int nmi_resume(struct sys_device *dev)
{
	if (nmi_enabled == 1)
		nmi_start();
	return 0;
}

static struct sysdev_class oprofile_sysclass = {
52
	.name		= "oprofile",
L
Linus Torvalds 已提交
53 54 55 56 57 58 59 60 61
	.resume		= nmi_resume,
	.suspend	= nmi_suspend,
};

static struct sys_device device_oprofile = {
	.id	= 0,
	.cls	= &oprofile_sysclass,
};

62
static int __init init_sysfs(void)
L
Linus Torvalds 已提交
63 64
{
	int error;
65 66 67

	error = sysdev_class_register(&oprofile_sysclass);
	if (!error)
L
Linus Torvalds 已提交
68 69 70 71
		error = sysdev_register(&device_oprofile);
	return error;
}

72
static void exit_sysfs(void)
L
Linus Torvalds 已提交
73 74 75 76 77 78
{
	sysdev_unregister(&device_oprofile);
	sysdev_class_unregister(&oprofile_sysclass);
}

#else
79 80
#define init_sysfs() do { } while (0)
#define exit_sysfs() do { } while (0)
L
Linus Torvalds 已提交
81 82
#endif /* CONFIG_PM */

83 84
static int profile_exceptions_notify(struct notifier_block *self,
				     unsigned long val, void *data)
L
Linus Torvalds 已提交
85
{
86 87 88 89
	struct die_args *args = (struct die_args *)data;
	int ret = NOTIFY_DONE;
	int cpu = smp_processor_id();

90
	switch (val) {
91
	case DIE_NMI:
92
		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
93 94 95 96 97 98
			ret = NOTIFY_STOP;
		break;
	default:
		break;
	}
	return ret;
L
Linus Torvalds 已提交
99
}
100

101
static void nmi_cpu_save_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
102 103
{
	unsigned int const nr_ctrs = model->num_counters;
104 105 106
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
107 108 109
	unsigned int i;

	for (i = 0; i < nr_ctrs; ++i) {
110
		if (counters[i].addr) {
111 112 113 114
			rdmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
115
	}
116

L
Linus Torvalds 已提交
117
	for (i = 0; i < nr_ctrls; ++i) {
118
		if (controls[i].addr) {
119 120 121 122
			rdmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
123 124 125
	}
}

126
static void nmi_save_registers(void *dummy)
L
Linus Torvalds 已提交
127 128
{
	int cpu = smp_processor_id();
129
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
130 131 132 133 134 135
	nmi_cpu_save_registers(msrs);
}

static void free_msrs(void)
{
	int i;
136
	for_each_possible_cpu(i) {
137 138 139 140
		kfree(per_cpu(cpu_msrs, i).counters);
		per_cpu(cpu_msrs, i).counters = NULL;
		kfree(per_cpu(cpu_msrs, i).controls);
		per_cpu(cpu_msrs, i).controls = NULL;
L
Linus Torvalds 已提交
141 142 143 144 145 146 147 148 149 150
	}
}

static int allocate_msrs(void)
{
	int success = 1;
	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
	size_t counters_size = sizeof(struct op_msr) * model->num_counters;

	int i;
C
Chris Wright 已提交
151
	for_each_possible_cpu(i) {
152 153 154
		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).counters) {
L
Linus Torvalds 已提交
155 156 157
			success = 0;
			break;
		}
158 159 160
		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).controls) {
L
Linus Torvalds 已提交
161 162 163 164 165 166 167 168 169 170 171
			success = 0;
			break;
		}
	}

	if (!success)
		free_msrs();

	return success;
}

172
static void nmi_cpu_setup(void *dummy)
L
Linus Torvalds 已提交
173 174
{
	int cpu = smp_processor_id();
175
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
176 177 178
	spin_lock(&oprofilefs_lock);
	model->setup_ctrs(msrs);
	spin_unlock(&oprofilefs_lock);
179
	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
L
Linus Torvalds 已提交
180 181 182
	apic_write(APIC_LVTPC, APIC_DM_NMI);
}

183 184 185 186 187
static struct notifier_block profile_exceptions_nb = {
	.notifier_call = profile_exceptions_notify,
	.next = NULL,
	.priority = 0
};
L
Linus Torvalds 已提交
188 189 190

static int nmi_setup(void)
{
191
	int err = 0;
192
	int cpu;
193

L
Linus Torvalds 已提交
194 195 196
	if (!allocate_msrs())
		return -ENOMEM;

197 198
	err = register_die_notifier(&profile_exceptions_nb);
	if (err) {
L
Linus Torvalds 已提交
199
		free_msrs();
200
		return err;
L
Linus Torvalds 已提交
201
	}
202

L
Linus Torvalds 已提交
203 204 205
	/* We need to serialize save and setup for HT because the subset
	 * of msrs are distinct for save and setup operations
	 */
206 207

	/* Assume saved/restored counters are the same on all CPUs */
208
	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
209
	for_each_possible_cpu(cpu) {
C
Chris Wright 已提交
210
		if (cpu != 0) {
211 212
			memcpy(per_cpu(cpu_msrs, cpu).counters,
				per_cpu(cpu_msrs, 0).counters,
C
Chris Wright 已提交
213 214
				sizeof(struct op_msr) * model->num_counters);

215 216
			memcpy(per_cpu(cpu_msrs, cpu).controls,
				per_cpu(cpu_msrs, 0).controls,
C
Chris Wright 已提交
217 218 219
				sizeof(struct op_msr) * model->num_controls);
		}

220
	}
221 222
	on_each_cpu(nmi_save_registers, NULL, 1);
	on_each_cpu(nmi_cpu_setup, NULL, 1);
L
Linus Torvalds 已提交
223 224 225 226
	nmi_enabled = 1;
	return 0;
}

227
static void nmi_restore_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
228 229
{
	unsigned int const nr_ctrs = model->num_counters;
230 231 232
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
233 234 235
	unsigned int i;

	for (i = 0; i < nr_ctrls; ++i) {
236
		if (controls[i].addr) {
237 238 239 240
			wrmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
241
	}
242

L
Linus Torvalds 已提交
243
	for (i = 0; i < nr_ctrs; ++i) {
244
		if (counters[i].addr) {
245 246 247 248
			wrmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
249 250 251
	}
}

252
static void nmi_cpu_shutdown(void *dummy)
L
Linus Torvalds 已提交
253 254 255
{
	unsigned int v;
	int cpu = smp_processor_id();
256
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
257

L
Linus Torvalds 已提交
258 259 260 261 262 263 264
	/* restoring APIC_LVTPC can trigger an apic error because the delivery
	 * mode and vector nr combination can be illegal. That's by design: on
	 * power on apic lvt contain a zero vector nr which are legal only for
	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
	 */
	v = apic_read(APIC_LVTERR);
	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
265
	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
L
Linus Torvalds 已提交
266 267 268 269 270 271
	apic_write(APIC_LVTERR, v);
	nmi_restore_registers(msrs);
}

static void nmi_shutdown(void)
{
272
	struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
273
	nmi_enabled = 0;
274
	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
275
	unregister_die_notifier(&profile_exceptions_nb);
276
	model->shutdown(msrs);
L
Linus Torvalds 已提交
277
	free_msrs();
278
	put_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
279 280
}

281
static void nmi_cpu_start(void *dummy)
L
Linus Torvalds 已提交
282
{
283
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
284 285 286 287 288
	model->start(msrs);
}

static int nmi_start(void)
{
289
	on_each_cpu(nmi_cpu_start, NULL, 1);
L
Linus Torvalds 已提交
290 291
	return 0;
}
292 293

static void nmi_cpu_stop(void *dummy)
L
Linus Torvalds 已提交
294
{
295
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
296 297
	model->stop(msrs);
}
298

L
Linus Torvalds 已提交
299 300
static void nmi_stop(void)
{
301
	on_each_cpu(nmi_cpu_stop, NULL, 1);
L
Linus Torvalds 已提交
302 303 304 305
}

struct op_counter_config counter_config[OP_MAX_COUNTER];

306
static int nmi_create_files(struct super_block *sb, struct dentry *root)
L
Linus Torvalds 已提交
307 308 309 310
{
	unsigned int i;

	for (i = 0; i < model->num_counters; ++i) {
311
		struct dentry *dir;
312
		char buf[4];
313 314

		/* quick little hack to _not_ expose a counter if it is not
315 316 317 318 319 320 321
		 * available for use.  This should protect userspace app.
		 * NOTE:  assumes 1:1 mapping here (that counters are organized
		 *        sequentially in their struct assignment).
		 */
		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
			continue;

322
		snprintf(buf,  sizeof(buf), "%d", i);
L
Linus Torvalds 已提交
323
		dir = oprofilefs_mkdir(sb, root, buf);
324 325 326 327 328 329
		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
L
Linus Torvalds 已提交
330 331 332 333
	}

	return 0;
}
334

335 336
static int p4force;
module_param(p4force, int, 0);
337 338

static int __init p4_init(char **cpu_type)
L
Linus Torvalds 已提交
339 340 341
{
	__u8 cpu_model = boot_cpu_data.x86_model;

342
	if (!p4force && (cpu_model > 6 || cpu_model == 5))
L
Linus Torvalds 已提交
343 344 345 346 347 348 349 350
		return 0;

#ifndef CONFIG_SMP
	*cpu_type = "i386/p4";
	model = &op_p4_spec;
	return 1;
#else
	switch (smp_num_siblings) {
351 352 353 354 355 356 357 358 359
	case 1:
		*cpu_type = "i386/p4";
		model = &op_p4_spec;
		return 1;

	case 2:
		*cpu_type = "i386/p4-ht";
		model = &op_p4_ht2_spec;
		return 1;
L
Linus Torvalds 已提交
360 361 362 363 364 365 366 367
	}
#endif

	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
	return 0;
}

368
static int __init ppro_init(char **cpu_type)
L
Linus Torvalds 已提交
369 370 371
{
	__u8 cpu_model = boot_cpu_data.x86_model;

372 373
	if (cpu_model == 14)
		*cpu_type = "i386/core";
374
	else if (cpu_model == 15 || cpu_model == 23)
375
		*cpu_type = "i386/core_2";
376
	else if (cpu_model > 0xd)
L
Linus Torvalds 已提交
377
		return 0;
378
	else if (cpu_model == 9) {
L
Linus Torvalds 已提交
379 380 381 382 383 384 385 386 387 388 389 390 391
		*cpu_type = "i386/p6_mobile";
	} else if (cpu_model > 5) {
		*cpu_type = "i386/piii";
	} else if (cpu_model > 2) {
		*cpu_type = "i386/pii";
	} else {
		*cpu_type = "i386/ppro";
	}

	model = &op_ppro_spec;
	return 1;
}

392
/* in order to get sysfs right */
L
Linus Torvalds 已提交
393 394
static int using_nmi;

395
int __init op_nmi_init(struct oprofile_operations *ops)
L
Linus Torvalds 已提交
396 397 398 399 400 401 402
{
	__u8 vendor = boot_cpu_data.x86_vendor;
	__u8 family = boot_cpu_data.x86;
	char *cpu_type;

	if (!cpu_has_apic)
		return -ENODEV;
403

L
Linus Torvalds 已提交
404
	switch (vendor) {
405 406
	case X86_VENDOR_AMD:
		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
L
Linus Torvalds 已提交
407

408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
		switch (family) {
		default:
			return -ENODEV;
		case 6:
			model = &op_athlon_spec;
			cpu_type = "i386/athlon";
			break;
		case 0xf:
			model = &op_athlon_spec;
			/* Actually it could be i386/hammer too, but give
			 user space an consistent name. */
			cpu_type = "x86-64/hammer";
			break;
		case 0x10:
			model = &op_athlon_spec;
			cpu_type = "x86-64/family10";
			break;
		}
		break;

	case X86_VENDOR_INTEL:
		switch (family) {
			/* Pentium IV */
		case 0xf:
			if (!p4_init(&cpu_type))
L
Linus Torvalds 已提交
433 434
				return -ENODEV;
			break;
435 436 437 438 439

			/* A P6-class processor */
		case 6:
			if (!ppro_init(&cpu_type))
				return -ENODEV;
L
Linus Torvalds 已提交
440 441 442 443
			break;

		default:
			return -ENODEV;
444 445 446 447 448
		}
		break;

	default:
		return -ENODEV;
L
Linus Torvalds 已提交
449 450
	}

451
	init_sysfs();
L
Linus Torvalds 已提交
452 453 454 455 456 457 458 459 460 461 462
	using_nmi = 1;
	ops->create_files = nmi_create_files;
	ops->setup = nmi_setup;
	ops->shutdown = nmi_shutdown;
	ops->start = nmi_start;
	ops->stop = nmi_stop;
	ops->cpu_type = cpu_type;
	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
	return 0;
}

463
void op_nmi_exit(void)
L
Linus Torvalds 已提交
464 465
{
	if (using_nmi)
466
		exit_sysfs();
L
Linus Torvalds 已提交
467
}