op_model_p4.c 18.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12
/**
 * @file op_model_p4.c
 * P4 model-specific MSR operations
 *
 * @remark Copyright 2002 OProfile authors
 * @remark Read the file COPYING
 *
 * @author Graydon Hoare
 */

#include <linux/oprofile.h>
#include <linux/smp.h>
13 14
#include <linux/ptrace.h>
#include <linux/nmi.h>
L
Linus Torvalds 已提交
15 16 17
#include <asm/msr.h>
#include <asm/fixmap.h>
#include <asm/apic.h>
18

L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35

#include "op_x86_model.h"
#include "op_counter.h"

#define NUM_EVENTS 39

#define NUM_COUNTERS_NON_HT 8
#define NUM_ESCRS_NON_HT 45
#define NUM_CCCRS_NON_HT 18
#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)

#define NUM_COUNTERS_HT2 4
#define NUM_ESCRS_HT2 23
#define NUM_CCCRS_HT2 9
#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)

static unsigned int num_counters = NUM_COUNTERS_NON_HT;
36
static unsigned int num_controls = NUM_CONTROLS_NON_HT;
L
Linus Torvalds 已提交
37 38 39 40 41 42 43

/* this has to be checked dynamically since the
   hyper-threadedness of a chip is discovered at
   kernel boot-time. */
static inline void setup_num_counters(void)
{
#ifdef CONFIG_SMP
44
	if (smp_num_siblings == 2) {
L
Linus Torvalds 已提交
45
		num_counters = NUM_COUNTERS_HT2;
46 47
		num_controls = NUM_CONTROLS_HT2;
	}
L
Linus Torvalds 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
#endif
}

static int inline addr_increment(void)
{
#ifdef CONFIG_SMP
	return smp_num_siblings == 2 ? 2 : 1;
#else
	return 1;
#endif
}


/* tables to simulate simplified hardware view of p4 registers */
struct p4_counter_binding {
	int virt_counter;
	int counter_address;
	int cccr_address;
};

struct p4_event_binding {
	int escr_select;  /* value to put in CCCR */
	int event_select; /* value to put in ESCR */
	struct {
		int virt_counter; /* for this counter... */
		int escr_address; /* use this ESCR       */
	} bindings[2];
};

/* nb: these CTR_* defines are a duplicate of defines in
   event/i386.p4*events. */


#define CTR_BPU_0      (1 << 0)
#define CTR_MS_0       (1 << 1)
#define CTR_FLAME_0    (1 << 2)
#define CTR_IQ_4       (1 << 3)
#define CTR_BPU_2      (1 << 4)
#define CTR_MS_2       (1 << 5)
#define CTR_FLAME_2    (1 << 6)
#define CTR_IQ_5       (1 << 7)

90
static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
L
Linus Torvalds 已提交
91 92 93 94 95 96 97 98 99 100
	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
};

101
#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
L
Linus Torvalds 已提交
102 103 104 105

/* p4 event codes in libop/op_event.h are indices into this table. */

static struct p4_event_binding p4_events[NUM_EVENTS] = {
106

L
Linus Torvalds 已提交
107
	{ /* BRANCH_RETIRED */
108
		0x05, 0x06,
L
Linus Torvalds 已提交
109 110 111
		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
	},
112

L
Linus Torvalds 已提交
113
	{ /* MISPRED_BRANCH_RETIRED */
114
		0x04, 0x03,
L
Linus Torvalds 已提交
115 116 117
		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
	},
118

L
Linus Torvalds 已提交
119 120
	{ /* TC_DELIVER_MODE */
		0x01, 0x01,
121
		{ { CTR_MS_0, MSR_P4_TC_ESCR0},
L
Linus Torvalds 已提交
122 123
		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
	},
124

L
Linus Torvalds 已提交
125
	{ /* BPU_FETCH_REQUEST */
126
		0x00, 0x03,
L
Linus Torvalds 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
	},

	{ /* ITLB_REFERENCE */
		0x03, 0x18,
		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
	},

	{ /* MEMORY_CANCEL */
		0x05, 0x02,
		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
	},

	{ /* MEMORY_COMPLETE */
		0x02, 0x08,
		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
	},

	{ /* LOAD_PORT_REPLAY */
150
		0x02, 0x04,
L
Linus Torvalds 已提交
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
	},

	{ /* STORE_PORT_REPLAY */
		0x02, 0x05,
		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
	},

	{ /* MOB_LOAD_REPLAY */
		0x02, 0x03,
		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
	},

	{ /* PAGE_WALK_TYPE */
		0x04, 0x01,
		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
	},

	{ /* BSQ_CACHE_REFERENCE */
174
		0x07, 0x0c,
L
Linus Torvalds 已提交
175 176 177 178 179
		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
	},

	{ /* IOQ_ALLOCATION */
180
		0x06, 0x03,
L
Linus Torvalds 已提交
181 182 183 184 185
		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
		  { 0, 0 } }
	},

	{ /* IOQ_ACTIVE_ENTRIES */
186
		0x06, 0x1a,
L
Linus Torvalds 已提交
187 188 189 190 191
		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
		  { 0, 0 } }
	},

	{ /* FSB_DATA_ACTIVITY */
192
		0x06, 0x17,
L
Linus Torvalds 已提交
193 194 195 196 197
		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
	},

	{ /* BSQ_ALLOCATION */
198
		0x07, 0x05,
L
Linus Torvalds 已提交
199 200 201 202 203 204
		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
		  { 0, 0 } }
	},

	{ /* BSQ_ACTIVE_ENTRIES */
		0x07, 0x06,
205
		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
L
Linus Torvalds 已提交
206 207 208 209
		  { 0, 0 } }
	},

	{ /* X87_ASSIST */
210
		0x05, 0x03,
L
Linus Torvalds 已提交
211 212 213 214 215 216 217 218 219
		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
	},

	{ /* SSE_INPUT_ASSIST */
		0x01, 0x34,
		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
	},
220

L
Linus Torvalds 已提交
221
	{ /* PACKED_SP_UOP */
222
		0x01, 0x08,
L
Linus Torvalds 已提交
223 224 225
		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
	},
226

L
Linus Torvalds 已提交
227
	{ /* PACKED_DP_UOP */
228
		0x01, 0x0c,
L
Linus Torvalds 已提交
229 230 231 232 233
		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
	},

	{ /* SCALAR_SP_UOP */
234
		0x01, 0x0a,
L
Linus Torvalds 已提交
235 236 237 238 239 240 241 242 243 244 245
		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
	},

	{ /* SCALAR_DP_UOP */
		0x01, 0x0e,
		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
	},

	{ /* 64BIT_MMX_UOP */
246
		0x01, 0x02,
L
Linus Torvalds 已提交
247 248 249
		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
	},
250

L
Linus Torvalds 已提交
251
	{ /* 128BIT_MMX_UOP */
252
		0x01, 0x1a,
L
Linus Torvalds 已提交
253 254 255 256 257
		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
	},

	{ /* X87_FP_UOP */
258
		0x01, 0x04,
L
Linus Torvalds 已提交
259 260 261
		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
	},
262

L
Linus Torvalds 已提交
263
	{ /* X87_SIMD_MOVES_UOP */
264
		0x01, 0x2e,
L
Linus Torvalds 已提交
265 266 267
		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
	},
268

L
Linus Torvalds 已提交
269
	{ /* MACHINE_CLEAR */
270
		0x05, 0x02,
L
Linus Torvalds 已提交
271 272 273 274 275 276 277 278 279
		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
	},

	{ /* GLOBAL_POWER_EVENTS */
		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
	},
280

L
Linus Torvalds 已提交
281
	{ /* TC_MS_XFER */
282
		0x00, 0x05,
L
Linus Torvalds 已提交
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
	},

	{ /* UOP_QUEUE_WRITES */
		0x00, 0x09,
		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
	},

	{ /* FRONT_END_EVENT */
		0x05, 0x08,
		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
	},

	{ /* EXECUTION_EVENT */
		0x05, 0x0c,
		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
	},

	{ /* REPLAY_EVENT */
		0x05, 0x09,
		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
	},

	{ /* INSTR_RETIRED */
312
		0x04, 0x02,
L
Linus Torvalds 已提交
313 314 315 316 317 318 319 320 321 322
		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
	},

	{ /* UOPS_RETIRED */
		0x04, 0x01,
		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
	},

323 324
	{ /* UOP_TYPE */
		0x02, 0x02,
L
Linus Torvalds 已提交
325 326 327 328 329
		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
	},

	{ /* RETIRED_MISPRED_BRANCH_TYPE */
330
		0x02, 0x05,
L
Linus Torvalds 已提交
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
	},

	{ /* RETIRED_BRANCH_TYPE */
		0x02, 0x04,
		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
	}
};


#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)

#define ESCR_RESERVED_BITS 0x80000003
#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
353 354
#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
L
Linus Torvalds 已提交
355 356 357 358 359 360 361 362 363

#define CCCR_RESERVED_BITS 0x38030FFF
#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
364 365
#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
L
Linus Torvalds 已提交
366 367 368
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))

369 370 371 372
#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
L
Linus Torvalds 已提交
373 374 375 376 377 378 379 380 381 382
#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))


/* this assigns a "stagger" to the current CPU, which is used throughout
   the code in this module as an extra array offset, to select the "even"
   or "odd" part of all the divided resources. */
static unsigned int get_stagger(void)
{
#ifdef CONFIG_SMP
	int cpu = smp_processor_id();
383
	return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
384
#endif
L
Linus Torvalds 已提交
385 386 387 388 389 390 391 392 393 394 395 396 397 398
	return 0;
}


/* finally, mediate access to a real hardware counter
   by passing a "virtual" counter numer to this macro,
   along with your stagger setting. */
#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))

static unsigned long reset_value[NUM_COUNTERS_NON_HT];


static void p4_fill_in_addresses(struct op_msrs * const msrs)
{
399
	unsigned int i;
400
	unsigned int addr, cccraddr, stag;
L
Linus Torvalds 已提交
401 402 403 404

	setup_num_counters();
	stag = get_stagger();

405
	/* initialize some registers */
406
	for (i = 0; i < num_counters; ++i)
407
		msrs->counters[i].addr = 0;
408
	for (i = 0; i < num_controls; ++i)
409
		msrs->controls[i].addr = 0;
410

411 412 413 414
	/* the counter & cccr registers we pay attention to */
	for (i = 0; i < num_counters; ++i) {
		addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
		cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
415
		if (reserve_perfctr_nmi(addr)) {
416 417 418 419 420
			msrs->counters[i].addr = addr;
			msrs->controls[i].addr = cccraddr;
		}
	}

L
Linus Torvalds 已提交
421 422 423
	/* 43 ESCR registers in three or four discontiguous group */
	for (addr = MSR_P4_BSU_ESCR0 + stag;
	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
424 425
		if (reserve_evntsel_nmi(addr))
			msrs->controls[i].addr = addr;
L
Linus Torvalds 已提交
426 427 428 429 430 431 432
	}

	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
	 * to avoid special case in nmi_{save|restore}_registers() */
	if (boot_cpu_data.x86_model >= 0x3) {
		for (addr = MSR_P4_BSU_ESCR0 + stag;
		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
433 434
			if (reserve_evntsel_nmi(addr))
				msrs->controls[i].addr = addr;
L
Linus Torvalds 已提交
435 436 437 438
		}
	} else {
		for (addr = MSR_P4_IQ_ESCR0 + stag;
		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
439 440
			if (reserve_evntsel_nmi(addr))
				msrs->controls[i].addr = addr;
L
Linus Torvalds 已提交
441 442 443 444 445
		}
	}

	for (addr = MSR_P4_RAT_ESCR0 + stag;
	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
446 447
		if (reserve_evntsel_nmi(addr))
			msrs->controls[i].addr = addr;
L
Linus Torvalds 已提交
448
	}
449

L
Linus Torvalds 已提交
450
	for (addr = MSR_P4_MS_ESCR0 + stag;
451
	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
452 453
		if (reserve_evntsel_nmi(addr))
			msrs->controls[i].addr = addr;
L
Linus Torvalds 已提交
454
	}
455

L
Linus Torvalds 已提交
456
	for (addr = MSR_P4_IX_ESCR0 + stag;
457
	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
458 459
		if (reserve_evntsel_nmi(addr))
			msrs->controls[i].addr = addr;
L
Linus Torvalds 已提交
460 461 462 463
	}

	/* there are 2 remaining non-contiguously located ESCRs */

464
	if (num_counters == NUM_COUNTERS_NON_HT) {
L
Linus Torvalds 已提交
465
		/* standard non-HT CPUs handle both remaining ESCRs*/
466 467 468 469
		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
L
Linus Torvalds 已提交
470 471 472 473

	} else if (stag == 0) {
		/* HT CPUs give the first remainder to the even thread, as
		   the 32nd control register */
474 475
		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
L
Linus Torvalds 已提交
476 477 478 479

	} else {
		/* and two copies of the second to the odd thread,
		   for the 22st and 23nd control registers */
480 481 482 483
		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
		}
L
Linus Torvalds 已提交
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
	}
}


static void pmc_setup_one_p4_counter(unsigned int ctr)
{
	int i;
	int const maxbind = 2;
	unsigned int cccr = 0;
	unsigned int escr = 0;
	unsigned int high = 0;
	unsigned int counter_bit;
	struct p4_event_binding *ev = NULL;
	unsigned int stag;

	stag = get_stagger();
500

L
Linus Torvalds 已提交
501 502
	/* convert from counter *number* to counter *bit* */
	counter_bit = 1 << VIRT_CTR(stag, ctr);
503

L
Linus Torvalds 已提交
504 505
	/* find our event binding structure. */
	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
506 507
		printk(KERN_ERR
		       "oprofile: P4 event code 0x%lx out of range\n",
L
Linus Torvalds 已提交
508 509 510
		       counter_config[ctr].event);
		return;
	}
511

L
Linus Torvalds 已提交
512
	ev = &(p4_events[counter_config[ctr].event - 1]);
513

L
Linus Torvalds 已提交
514 515 516 517 518 519 520 521 522 523 524 525 526 527
	for (i = 0; i < maxbind; i++) {
		if (ev->bindings[i].virt_counter & counter_bit) {

			/* modify ESCR */
			ESCR_READ(escr, high, ev, i);
			ESCR_CLEAR(escr);
			if (stag == 0) {
				ESCR_SET_USR_0(escr, counter_config[ctr].user);
				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
			} else {
				ESCR_SET_USR_1(escr, counter_config[ctr].user);
				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
			}
			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
528
			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
L
Linus Torvalds 已提交
529
			ESCR_WRITE(escr, high, ev, i);
530

L
Linus Torvalds 已提交
531 532 533 534 535
			/* modify CCCR */
			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
			CCCR_CLEAR(cccr);
			CCCR_SET_REQUIRED_BITS(cccr);
			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
536
			if (stag == 0)
L
Linus Torvalds 已提交
537
				CCCR_SET_PMI_OVF_0(cccr);
538
			else
L
Linus Torvalds 已提交
539 540 541 542 543 544
				CCCR_SET_PMI_OVF_1(cccr);
			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
			return;
		}
	}

545
	printk(KERN_ERR
L
Linus Torvalds 已提交
546 547 548 549 550 551 552 553 554 555 556 557 558 559
	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
	       counter_config[ctr].event, stag, ctr);
}


static void p4_setup_ctrs(struct op_msrs const * const msrs)
{
	unsigned int i;
	unsigned int low, high;
	unsigned int stag;

	stag = get_stagger();

	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
560
	if (!MISC_PMC_ENABLED_P(low)) {
L
Linus Torvalds 已提交
561 562 563 564 565 566
		printk(KERN_ERR "oprofile: P4 PMC not available\n");
		return;
	}

	/* clear the cccrs we will use */
	for (i = 0 ; i < num_counters ; i++) {
567
		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
568
			continue;
L
Linus Torvalds 已提交
569 570 571 572 573 574 575
		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
		CCCR_CLEAR(low);
		CCCR_SET_REQUIRED_BITS(low);
		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
	}

	/* clear all escrs (including those outside our concern) */
576
	for (i = num_counters; i < num_controls; i++) {
577
		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
578 579
			continue;
		wrmsr(msrs->controls[i].addr, 0, 0);
L
Linus Torvalds 已提交
580 581 582 583
	}

	/* setup all counters */
	for (i = 0 ; i < num_counters ; ++i) {
584
		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
L
Linus Torvalds 已提交
585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
			reset_value[i] = counter_config[i].count;
			pmc_setup_one_p4_counter(i);
			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
		} else {
			reset_value[i] = 0;
		}
	}
}


static int p4_check_ctrs(struct pt_regs * const regs,
			 struct op_msrs const * const msrs)
{
	unsigned long ctr, low, high, stag, real;
	int i;

	stag = get_stagger();

	for (i = 0; i < num_counters; ++i) {
604 605

		if (!reset_value[i])
L
Linus Torvalds 已提交
606 607
			continue;

608
		/*
L
Linus Torvalds 已提交
609 610 611 612 613 614 615 616
		 * there is some eccentricity in the hardware which
		 * requires that we perform 2 extra corrections:
		 *
		 * - check both the CCCR:OVF flag for overflow and the
		 *   counter high bit for un-flagged overflows.
		 *
		 * - write the counter back twice to ensure it gets
		 *   updated properly.
617
		 *
L
Linus Torvalds 已提交
618 619 620 621 622 623
		 * the former seems to be related to extra NMIs happening
		 * during the current NMI; the latter is reported as errata
		 * N15 in intel doc 249199-029, pentium 4 specification
		 * update, though their suggested work-around does not
		 * appear to solve the problem.
		 */
624

L
Linus Torvalds 已提交
625 626 627
		real = VIRT_CTR(stag, i);

		CCCR_READ(low, high, real);
628
		CTR_READ(ctr, high, real);
L
Linus Torvalds 已提交
629 630
		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
			oprofile_add_sample(regs, i);
631
			CTR_WRITE(reset_value[i], real);
L
Linus Torvalds 已提交
632 633
			CCCR_CLEAR_OVF(low);
			CCCR_WRITE(low, high, real);
634
			CTR_WRITE(reset_value[i], real);
L
Linus Torvalds 已提交
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
		}
	}

	/* P4 quirk: you have to re-unmask the apic vector */
	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);

	/* See op_model_ppro.c */
	return 1;
}


static void p4_start(struct op_msrs const * const msrs)
{
	unsigned int low, high, stag;
	int i;

	stag = get_stagger();

	for (i = 0; i < num_counters; ++i) {
		if (!reset_value[i])
			continue;
		CCCR_READ(low, high, VIRT_CTR(stag, i));
		CCCR_SET_ENABLE(low);
		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
	}
}


static void p4_stop(struct op_msrs const * const msrs)
{
	unsigned int low, high, stag;
	int i;

	stag = get_stagger();

	for (i = 0; i < num_counters; ++i) {
671 672
		if (!reset_value[i])
			continue;
L
Linus Torvalds 已提交
673 674 675 676 677 678
		CCCR_READ(low, high, VIRT_CTR(stag, i));
		CCCR_SET_DISABLE(low);
		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
	}
}

679 680 681 682 683
static void p4_shutdown(struct op_msrs const * const msrs)
{
	int i;

	for (i = 0 ; i < num_counters ; ++i) {
684
		if (CTR_IS_RESERVED(msrs, i))
685 686
			release_perfctr_nmi(msrs->counters[i].addr);
	}
687 688
	/*
	 * some of the control registers are specially reserved in
689 690 691 692
	 * conjunction with the counter registers (hence the starting offset).
	 * This saves a few bits.
	 */
	for (i = num_counters ; i < num_controls ; ++i) {
693
		if (CTRL_IS_RESERVED(msrs, i))
694 695 696 697
			release_evntsel_nmi(msrs->controls[i].addr);
	}
}

L
Linus Torvalds 已提交
698 699 700

#ifdef CONFIG_SMP
struct op_x86_model_spec const op_p4_ht2_spec = {
R
Robert Richter 已提交
701 702 703 704 705 706 707 708
	.num_counters		= NUM_COUNTERS_HT2,
	.num_controls		= NUM_CONTROLS_HT2,
	.fill_in_addresses	= &p4_fill_in_addresses,
	.setup_ctrs		= &p4_setup_ctrs,
	.check_ctrs		= &p4_check_ctrs,
	.start			= &p4_start,
	.stop			= &p4_stop,
	.shutdown		= &p4_shutdown
L
Linus Torvalds 已提交
709 710 711 712
};
#endif

struct op_x86_model_spec const op_p4_spec = {
R
Robert Richter 已提交
713 714 715 716 717 718 719 720
	.num_counters		= NUM_COUNTERS_NON_HT,
	.num_controls		= NUM_CONTROLS_NON_HT,
	.fill_in_addresses	= &p4_fill_in_addresses,
	.setup_ctrs		= &p4_setup_ctrs,
	.check_ctrs		= &p4_check_ctrs,
	.start			= &p4_start,
	.stop			= &p4_stop,
	.shutdown		= &p4_shutdown
L
Linus Torvalds 已提交
721
};