nmi_int.c 10.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * @file nmi_int.c
 *
 * @remark Copyright 2002 OProfile authors
 * @remark Read the file COPYING
 *
 * @author John Levon <levon@movementarian.org>
 */

#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
16
#include <linux/moduleparam.h>
17
#include <linux/kdebug.h>
L
Linus Torvalds 已提交
18 19 20
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
21

L
Linus Torvalds 已提交
22 23
#include "op_counter.h"
#include "op_x86_model.h"
24

25
static struct op_x86_model_spec const *model;
26 27
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
28

L
Linus Torvalds 已提交
29 30 31 32 33 34 35 36
static int nmi_start(void);
static void nmi_stop(void);

/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

#ifdef CONFIG_PM

37
static int nmi_suspend(struct sys_device *dev, pm_message_t state)
L
Linus Torvalds 已提交
38 39 40 41 42 43 44 45 46 47 48 49 50 51
{
	if (nmi_enabled == 1)
		nmi_stop();
	return 0;
}

static int nmi_resume(struct sys_device *dev)
{
	if (nmi_enabled == 1)
		nmi_start();
	return 0;
}

static struct sysdev_class oprofile_sysclass = {
52
	.name		= "oprofile",
L
Linus Torvalds 已提交
53 54 55 56 57 58 59 60 61
	.resume		= nmi_resume,
	.suspend	= nmi_suspend,
};

static struct sys_device device_oprofile = {
	.id	= 0,
	.cls	= &oprofile_sysclass,
};

62
static int __init init_sysfs(void)
L
Linus Torvalds 已提交
63 64
{
	int error;
65 66 67

	error = sysdev_class_register(&oprofile_sysclass);
	if (!error)
L
Linus Torvalds 已提交
68 69 70 71
		error = sysdev_register(&device_oprofile);
	return error;
}

72
static void exit_sysfs(void)
L
Linus Torvalds 已提交
73 74 75 76 77 78
{
	sysdev_unregister(&device_oprofile);
	sysdev_class_unregister(&oprofile_sysclass);
}

#else
79 80
#define init_sysfs() do { } while (0)
#define exit_sysfs() do { } while (0)
L
Linus Torvalds 已提交
81 82
#endif /* CONFIG_PM */

83 84
static int profile_exceptions_notify(struct notifier_block *self,
				     unsigned long val, void *data)
L
Linus Torvalds 已提交
85
{
86 87 88 89
	struct die_args *args = (struct die_args *)data;
	int ret = NOTIFY_DONE;
	int cpu = smp_processor_id();

90
	switch (val) {
91
	case DIE_NMI:
92
		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
93 94 95 96 97 98
			ret = NOTIFY_STOP;
		break;
	default:
		break;
	}
	return ret;
L
Linus Torvalds 已提交
99
}
100

101
static void nmi_cpu_save_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
102 103
{
	unsigned int const nr_ctrs = model->num_counters;
104 105 106
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
107 108 109
	unsigned int i;

	for (i = 0; i < nr_ctrs; ++i) {
110
		if (counters[i].addr) {
111 112 113 114
			rdmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
115
	}
116

L
Linus Torvalds 已提交
117
	for (i = 0; i < nr_ctrls; ++i) {
118
		if (controls[i].addr) {
119 120 121 122
			rdmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
123 124 125
	}
}

126
static void nmi_save_registers(void *dummy)
L
Linus Torvalds 已提交
127 128
{
	int cpu = smp_processor_id();
129
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
130 131 132 133 134 135
	nmi_cpu_save_registers(msrs);
}

static void free_msrs(void)
{
	int i;
136
	for_each_possible_cpu(i) {
137 138 139 140
		kfree(per_cpu(cpu_msrs, i).counters);
		per_cpu(cpu_msrs, i).counters = NULL;
		kfree(per_cpu(cpu_msrs, i).controls);
		per_cpu(cpu_msrs, i).controls = NULL;
L
Linus Torvalds 已提交
141 142 143 144 145 146 147 148 149 150
	}
}

static int allocate_msrs(void)
{
	int success = 1;
	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
	size_t counters_size = sizeof(struct op_msr) * model->num_counters;

	int i;
C
Chris Wright 已提交
151
	for_each_possible_cpu(i) {
152 153 154
		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).counters) {
L
Linus Torvalds 已提交
155 156 157
			success = 0;
			break;
		}
158 159 160
		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
								GFP_KERNEL);
		if (!per_cpu(cpu_msrs, i).controls) {
L
Linus Torvalds 已提交
161 162 163 164 165 166 167 168 169 170 171
			success = 0;
			break;
		}
	}

	if (!success)
		free_msrs();

	return success;
}

172
static void nmi_cpu_setup(void *dummy)
L
Linus Torvalds 已提交
173 174
{
	int cpu = smp_processor_id();
175
	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
L
Linus Torvalds 已提交
176 177 178
	spin_lock(&oprofilefs_lock);
	model->setup_ctrs(msrs);
	spin_unlock(&oprofilefs_lock);
179
	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
L
Linus Torvalds 已提交
180 181 182
	apic_write(APIC_LVTPC, APIC_DM_NMI);
}

183 184 185 186 187
static struct notifier_block profile_exceptions_nb = {
	.notifier_call = profile_exceptions_notify,
	.next = NULL,
	.priority = 0
};
L
Linus Torvalds 已提交
188 189 190

static int nmi_setup(void)
{
191
	int err = 0;
192
	int cpu;
193

L
Linus Torvalds 已提交
194 195 196
	if (!allocate_msrs())
		return -ENOMEM;

197 198
	err = register_die_notifier(&profile_exceptions_nb);
	if (err) {
L
Linus Torvalds 已提交
199
		free_msrs();
200
		return err;
L
Linus Torvalds 已提交
201
	}
202

L
Linus Torvalds 已提交
203 204 205
	/* We need to serialize save and setup for HT because the subset
	 * of msrs are distinct for save and setup operations
	 */
206 207

	/* Assume saved/restored counters are the same on all CPUs */
208
	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
209
	for_each_possible_cpu(cpu) {
C
Chris Wright 已提交
210
		if (cpu != 0) {
211 212
			memcpy(per_cpu(cpu_msrs, cpu).counters,
				per_cpu(cpu_msrs, 0).counters,
C
Chris Wright 已提交
213 214
				sizeof(struct op_msr) * model->num_counters);

215 216
			memcpy(per_cpu(cpu_msrs, cpu).controls,
				per_cpu(cpu_msrs, 0).controls,
C
Chris Wright 已提交
217 218 219
				sizeof(struct op_msr) * model->num_controls);
		}

220
	}
L
Linus Torvalds 已提交
221 222 223 224 225 226
	on_each_cpu(nmi_save_registers, NULL, 0, 1);
	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
	nmi_enabled = 1;
	return 0;
}

227
static void nmi_restore_registers(struct op_msrs *msrs)
L
Linus Torvalds 已提交
228 229
{
	unsigned int const nr_ctrs = model->num_counters;
230 231 232
	unsigned int const nr_ctrls = model->num_controls;
	struct op_msr *counters = msrs->counters;
	struct op_msr *controls = msrs->controls;
L
Linus Torvalds 已提交
233 234 235
	unsigned int i;

	for (i = 0; i < nr_ctrls; ++i) {
236
		if (controls[i].addr) {
237 238 239 240
			wrmsr(controls[i].addr,
				controls[i].saved.low,
				controls[i].saved.high);
		}
L
Linus Torvalds 已提交
241
	}
242

L
Linus Torvalds 已提交
243
	for (i = 0; i < nr_ctrs; ++i) {
244
		if (counters[i].addr) {
245 246 247 248
			wrmsr(counters[i].addr,
				counters[i].saved.low,
				counters[i].saved.high);
		}
L
Linus Torvalds 已提交
249 250 251
	}
}

252
static void nmi_cpu_shutdown(void *dummy)
L
Linus Torvalds 已提交
253 254 255
{
	unsigned int v;
	int cpu = smp_processor_id();
256
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
257

L
Linus Torvalds 已提交
258 259 260 261 262 263 264
	/* restoring APIC_LVTPC can trigger an apic error because the delivery
	 * mode and vector nr combination can be illegal. That's by design: on
	 * power on apic lvt contain a zero vector nr which are legal only for
	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
	 */
	v = apic_read(APIC_LVTERR);
	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
265
	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
L
Linus Torvalds 已提交
266 267 268 269 270 271
	apic_write(APIC_LVTERR, v);
	nmi_restore_registers(msrs);
}

static void nmi_shutdown(void)
{
272
	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
273 274
	nmi_enabled = 0;
	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
275
	unregister_die_notifier(&profile_exceptions_nb);
276
	model->shutdown(msrs);
L
Linus Torvalds 已提交
277 278 279
	free_msrs();
}

280
static void nmi_cpu_start(void *dummy)
L
Linus Torvalds 已提交
281
{
282
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
283 284 285 286 287 288 289 290
	model->start(msrs);
}

static int nmi_start(void)
{
	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
	return 0;
}
291 292

static void nmi_cpu_stop(void *dummy)
L
Linus Torvalds 已提交
293
{
294
	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
L
Linus Torvalds 已提交
295 296
	model->stop(msrs);
}
297

L
Linus Torvalds 已提交
298 299 300 301 302 303 304
static void nmi_stop(void)
{
	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
}

struct op_counter_config counter_config[OP_MAX_COUNTER];

305
static int nmi_create_files(struct super_block *sb, struct dentry *root)
L
Linus Torvalds 已提交
306 307 308 309
{
	unsigned int i;

	for (i = 0; i < model->num_counters; ++i) {
310
		struct dentry *dir;
311
		char buf[4];
312 313

		/* quick little hack to _not_ expose a counter if it is not
314 315 316 317 318 319 320
		 * available for use.  This should protect userspace app.
		 * NOTE:  assumes 1:1 mapping here (that counters are organized
		 *        sequentially in their struct assignment).
		 */
		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
			continue;

321
		snprintf(buf,  sizeof(buf), "%d", i);
L
Linus Torvalds 已提交
322
		dir = oprofilefs_mkdir(sb, root, buf);
323 324 325 326 327 328
		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
L
Linus Torvalds 已提交
329 330 331 332
	}

	return 0;
}
333

334 335
static int p4force;
module_param(p4force, int, 0);
336 337

static int __init p4_init(char **cpu_type)
L
Linus Torvalds 已提交
338 339 340
{
	__u8 cpu_model = boot_cpu_data.x86_model;

341
	if (!p4force && (cpu_model > 6 || cpu_model == 5))
L
Linus Torvalds 已提交
342 343 344 345 346 347 348 349
		return 0;

#ifndef CONFIG_SMP
	*cpu_type = "i386/p4";
	model = &op_p4_spec;
	return 1;
#else
	switch (smp_num_siblings) {
350 351 352 353 354 355 356 357 358
	case 1:
		*cpu_type = "i386/p4";
		model = &op_p4_spec;
		return 1;

	case 2:
		*cpu_type = "i386/p4-ht";
		model = &op_p4_ht2_spec;
		return 1;
L
Linus Torvalds 已提交
359 360 361 362 363 364 365 366
	}
#endif

	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
	return 0;
}

367
static int __init ppro_init(char **cpu_type)
L
Linus Torvalds 已提交
368 369 370
{
	__u8 cpu_model = boot_cpu_data.x86_model;

371 372
	if (cpu_model == 14)
		*cpu_type = "i386/core";
373
	else if (cpu_model == 15 || cpu_model == 23)
374
		*cpu_type = "i386/core_2";
375
	else if (cpu_model > 0xd)
L
Linus Torvalds 已提交
376
		return 0;
377
	else if (cpu_model == 9) {
L
Linus Torvalds 已提交
378 379 380 381 382 383 384 385 386 387 388 389 390
		*cpu_type = "i386/p6_mobile";
	} else if (cpu_model > 5) {
		*cpu_type = "i386/piii";
	} else if (cpu_model > 2) {
		*cpu_type = "i386/pii";
	} else {
		*cpu_type = "i386/ppro";
	}

	model = &op_ppro_spec;
	return 1;
}

391
/* in order to get sysfs right */
L
Linus Torvalds 已提交
392 393
static int using_nmi;

394
int __init op_nmi_init(struct oprofile_operations *ops)
L
Linus Torvalds 已提交
395 396 397 398 399 400 401
{
	__u8 vendor = boot_cpu_data.x86_vendor;
	__u8 family = boot_cpu_data.x86;
	char *cpu_type;

	if (!cpu_has_apic)
		return -ENODEV;
402

L
Linus Torvalds 已提交
403
	switch (vendor) {
404 405
	case X86_VENDOR_AMD:
		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
L
Linus Torvalds 已提交
406

407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
		switch (family) {
		default:
			return -ENODEV;
		case 6:
			model = &op_athlon_spec;
			cpu_type = "i386/athlon";
			break;
		case 0xf:
			model = &op_athlon_spec;
			/* Actually it could be i386/hammer too, but give
			 user space an consistent name. */
			cpu_type = "x86-64/hammer";
			break;
		case 0x10:
			model = &op_athlon_spec;
			cpu_type = "x86-64/family10";
			break;
		}
		break;

	case X86_VENDOR_INTEL:
		switch (family) {
			/* Pentium IV */
		case 0xf:
			if (!p4_init(&cpu_type))
L
Linus Torvalds 已提交
432 433
				return -ENODEV;
			break;
434 435 436 437 438

			/* A P6-class processor */
		case 6:
			if (!ppro_init(&cpu_type))
				return -ENODEV;
L
Linus Torvalds 已提交
439 440 441 442
			break;

		default:
			return -ENODEV;
443 444 445 446 447
		}
		break;

	default:
		return -ENODEV;
L
Linus Torvalds 已提交
448 449
	}

450
	init_sysfs();
L
Linus Torvalds 已提交
451 452 453 454 455 456 457 458 459 460 461
	using_nmi = 1;
	ops->create_files = nmi_create_files;
	ops->setup = nmi_setup;
	ops->shutdown = nmi_shutdown;
	ops->start = nmi_start;
	ops->stop = nmi_stop;
	ops->cpu_type = cpu_type;
	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
	return 0;
}

462
void op_nmi_exit(void)
L
Linus Torvalds 已提交
463 464
{
	if (using_nmi)
465
		exit_sysfs();
L
Linus Torvalds 已提交
466
}