perf_event.c 56.7 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2
 * Performance events x86 architecture code
I
Ingo Molnar 已提交
3
 *
4 5 6 7 8
 *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
 *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
 *  Copyright (C) 2009 Jaswinder Singh Rajput
 *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
 *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9
 *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
I
Ingo Molnar 已提交
10 11 12 13
 *
 *  For licencing details see kernel-base/COPYING
 */

14
#include <linux/perf_event.h>
I
Ingo Molnar 已提交
15 16 17 18
#include <linux/capability.h>
#include <linux/notifier.h>
#include <linux/hardirq.h>
#include <linux/kprobes.h>
19
#include <linux/module.h>
I
Ingo Molnar 已提交
20 21
#include <linux/kdebug.h>
#include <linux/sched.h>
22
#include <linux/uaccess.h>
23
#include <linux/highmem.h>
24
#include <linux/cpu.h>
I
Ingo Molnar 已提交
25 26

#include <asm/apic.h>
27
#include <asm/stacktrace.h>
P
Peter Zijlstra 已提交
28
#include <asm/nmi.h>
I
Ingo Molnar 已提交
29

30
static u64 perf_event_mask __read_mostly;
31

32 33
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS	4
34 35 36 37 38

/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE		24

/* The size of a per-cpu BTS buffer in bytes: */
39
#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
40 41

/* The BTS overflow threshold in bytes from the end of the buffer: */
42
#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67


/*
 * Bits in the debugctlmsr controlling branch tracing.
 */
#define X86_DEBUGCTL_TR			(1 << 6)
#define X86_DEBUGCTL_BTS		(1 << 7)
#define X86_DEBUGCTL_BTINT		(1 << 8)
#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)

/*
 * A debug store configuration.
 *
 * We only support architectures that use 64bit fields.
 */
struct debug_store {
	u64	bts_buffer_base;
	u64	bts_index;
	u64	bts_absolute_maximum;
	u64	bts_interrupt_threshold;
	u64	pebs_buffer_base;
	u64	pebs_index;
	u64	pebs_absolute_maximum;
	u64	pebs_interrupt_threshold;
68
	u64	pebs_event_reset[MAX_PEBS_EVENTS];
69 70
};

71 72
struct cpu_hw_events {
	struct perf_event	*events[X86_PMC_IDX_MAX];
73 74
	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75
	unsigned long		interrupts;
76
	int			enabled;
77
	struct debug_store	*ds;
I
Ingo Molnar 已提交
78 79
};

80 81 82 83 84 85 86 87 88 89 90 91
struct event_constraint {
	unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
	int		code;
};

#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
#define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }

#define for_each_event_constraint(e, c) \
	for ((e) = (c); (e)->idxmsk[0]; (e)++)


I
Ingo Molnar 已提交
92
/*
93
 * struct x86_pmu - generic x86 pmu
I
Ingo Molnar 已提交
94
 */
95
struct x86_pmu {
96 97
	const char	*name;
	int		version;
98
	int		(*handle_irq)(struct pt_regs *);
99 100
	void		(*disable_all)(void);
	void		(*enable_all)(void);
101 102
	void		(*enable)(struct hw_perf_event *, int);
	void		(*disable)(struct hw_perf_event *, int);
103 104
	unsigned	eventsel;
	unsigned	perfctr;
105 106
	u64		(*event_map)(int);
	u64		(*raw_event)(u64);
107
	int		max_events;
108 109 110 111
	int		num_events;
	int		num_events_fixed;
	int		event_bits;
	u64		event_mask;
112
	int		apic;
113
	u64		max_period;
114
	u64		intel_ctrl;
115 116
	void		(*enable_bts)(u64 config);
	void		(*disable_bts)(void);
117 118
	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
					 struct hw_perf_event *hwc);
119 120
};

121
static struct x86_pmu x86_pmu __read_mostly;
122

123
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
124 125
	.enabled = 1,
};
I
Ingo Molnar 已提交
126

127
static const struct event_constraint *event_constraints;
128

V
Vince Weaver 已提交
129 130 131 132 133 134 135
/*
 * Not sure about some of these
 */
static const u64 p6_perfmon_event_map[] =
{
  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
136 137
  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
V
Vince Weaver 已提交
138 139 140 141 142
  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
};

143
static u64 p6_pmu_event_map(int hw_event)
V
Vince Weaver 已提交
144
{
145
	return p6_perfmon_event_map[hw_event];
V
Vince Weaver 已提交
146 147
}

148
/*
149
 * Event setting that is specified not to count anything.
150 151 152 153
 * We use this to effectively disable a counter.
 *
 * L2_RQSTS with 0 MESI unit mask.
 */
154
#define P6_NOP_EVENT			0x0000002EULL
155

156
static u64 p6_pmu_raw_event(u64 hw_event)
V
Vince Weaver 已提交
157 158 159 160 161
{
#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
#define P6_EVNTSEL_INV_MASK		0x00800000ULL
162
#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
V
Vince Weaver 已提交
163 164 165 166 167 168

#define P6_EVNTSEL_MASK			\
	(P6_EVNTSEL_EVENT_MASK |	\
	 P6_EVNTSEL_UNIT_MASK  |	\
	 P6_EVNTSEL_EDGE_MASK  |	\
	 P6_EVNTSEL_INV_MASK   |	\
169
	 P6_EVNTSEL_REG_MASK)
V
Vince Weaver 已提交
170

171
	return hw_event & P6_EVNTSEL_MASK;
V
Vince Weaver 已提交
172 173
}

174 175 176 177 178 179 180 181 182 183
static const struct event_constraint intel_p6_event_constraints[] =
{
	EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
	EVENT_CONSTRAINT(0x11, 0x1),	/* FP_ASSIST */
	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
	EVENT_CONSTRAINT_END
};
V
Vince Weaver 已提交
184

185 186 187
/*
 * Intel PerfMon v3. Used on Core2 and later.
 */
188
static const u64 intel_perfmon_event_map[] =
I
Ingo Molnar 已提交
189
{
190 191 192 193 194 195 196
  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
I
Ingo Molnar 已提交
197 198
};

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
static const struct event_constraint intel_core_event_constraints[] =
{
	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
	EVENT_CONSTRAINT(0x11, 0x2),	/* FP_ASSIST */
	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
	EVENT_CONSTRAINT(0x18, 0x1),	/* IDLE_DURING_DIV */
	EVENT_CONSTRAINT(0x19, 0x2),	/* DELAYED_BYPASS */
	EVENT_CONSTRAINT(0xa1, 0x1),	/* RS_UOPS_DISPATCH_CYCLES */
	EVENT_CONSTRAINT(0xcb, 0x1),	/* MEM_LOAD_RETIRED */
	EVENT_CONSTRAINT_END
};

static const struct event_constraint intel_nehalem_event_constraints[] =
{
	EVENT_CONSTRAINT(0x40, 0x3),	/* L1D_CACHE_LD */
	EVENT_CONSTRAINT(0x41, 0x3),	/* L1D_CACHE_ST */
	EVENT_CONSTRAINT(0x42, 0x3),	/* L1D_CACHE_LOCK */
	EVENT_CONSTRAINT(0x43, 0x3),	/* L1D_ALL_REF */
	EVENT_CONSTRAINT(0x4e, 0x3),	/* L1D_PREFETCH */
	EVENT_CONSTRAINT(0x4c, 0x3),	/* LOAD_HIT_PRE */
	EVENT_CONSTRAINT(0x51, 0x3),	/* L1D */
	EVENT_CONSTRAINT(0x52, 0x3),	/* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
	EVENT_CONSTRAINT(0x53, 0x3),	/* L1D_CACHE_LOCK_FB_HIT */
	EVENT_CONSTRAINT(0xc5, 0x3),	/* CACHE_LOCK_CYCLES */
	EVENT_CONSTRAINT_END
};

228
static u64 intel_pmu_event_map(int hw_event)
229
{
230
	return intel_perfmon_event_map[hw_event];
231
}
I
Ingo Molnar 已提交
232

233
/*
234
 * Generalized hw caching related hw_event table, filled
235
 * in on a per model basis. A value of 0 means
236 237
 * 'not supported', -1 means 'hw_event makes no sense on
 * this CPU', any other value means the raw hw_event
238 239 240 241 242 243 244 245 246 247
 * ID.
 */

#define C(x) PERF_COUNT_HW_CACHE_##x

static u64 __read_mostly hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX];

248
static __initconst u64 nehalem_hw_cache_event_ids
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
269
		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
270 271 272 273 274 275 276 277 278 279 280
		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x0,
		[ C(RESULT_MISS)   ] = 0x0,
	},
 },
281
 [ C(LL  ) ] = {
282 283 284 285 286 287 288 289 290
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
	},
	[ C(OP_PREFETCH) ] = {
291 292
		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x0,
		[ C(RESULT_MISS)   ] = 0x0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
312
		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
};

339
static __initconst u64 core2_hw_cache_event_ids
340 341 342 343
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
372
 [ C(LL  ) ] = {
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
428 429
};

430
static __initconst u64 atom_hw_cache_event_ids
431 432 433 434
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
435 436 437 438 439 440
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_WRITE) ] = {
441
		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
442 443 444 445 446 447 448 449 450
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
451 452
		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
453 454 455 456 457 458 459 460 461 462
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
463
 [ C(LL  ) ] = {
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
479
		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
480 481 482
		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
	},
	[ C(OP_WRITE) ] = {
483
		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
519 520
};

521
static u64 intel_pmu_raw_event(u64 hw_event)
522
{
523 524
#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
525 526
#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
527
#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
528

529
#define CORE_EVNTSEL_MASK		\
530 531
	(CORE_EVNTSEL_EVENT_MASK |	\
	 CORE_EVNTSEL_UNIT_MASK  |	\
532 533
	 CORE_EVNTSEL_EDGE_MASK  |	\
	 CORE_EVNTSEL_INV_MASK  |	\
534
	 CORE_EVNTSEL_REG_MASK)
535

536
	return hw_event & CORE_EVNTSEL_MASK;
537 538
}

539
static __initconst u64 amd_hw_cache_event_ids
540 541 542 543 544 545
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
546 547
		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
548 549
	},
	[ C(OP_WRITE) ] = {
550
		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
551 552 553
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
554 555
		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
556 557 558 559 560 561 562 563 564 565 566 567
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
568
		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
569 570 571
		[ C(RESULT_MISS)   ] = 0,
	},
 },
572
 [ C(LL  ) ] = {
573
	[ C(OP_READ) ] = {
574 575
		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
576 577
	},
	[ C(OP_WRITE) ] = {
578
		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
579 580 581 582 583 584 585 586 587
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
588 589
		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
};

630 631 632
/*
 * AMD Performance Monitor K7 and later.
 */
633
static const u64 amd_perfmon_event_map[] =
634
{
635 636 637 638 639 640
  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
641 642
};

643
static u64 amd_pmu_event_map(int hw_event)
644
{
645
	return amd_perfmon_event_map[hw_event];
646 647
}

648
static u64 amd_pmu_raw_event(u64 hw_event)
649
{
650 651
#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
652 653
#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
#define K7_EVNTSEL_INV_MASK	0x000800000ULL
654
#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
655 656 657 658

#define K7_EVNTSEL_MASK			\
	(K7_EVNTSEL_EVENT_MASK |	\
	 K7_EVNTSEL_UNIT_MASK  |	\
659 660
	 K7_EVNTSEL_EDGE_MASK  |	\
	 K7_EVNTSEL_INV_MASK   |	\
661
	 K7_EVNTSEL_REG_MASK)
662

663
	return hw_event & K7_EVNTSEL_MASK;
664 665
}

666
/*
667 668
 * Propagate event elapsed time into the generic event.
 * Can only be executed on the CPU where the event is active.
669 670
 * Returns the delta events processed.
 */
671
static u64
672 673
x86_perf_event_update(struct perf_event *event,
			struct hw_perf_event *hwc, int idx)
674
{
675
	int shift = 64 - x86_pmu.event_bits;
676 677
	u64 prev_raw_count, new_raw_count;
	s64 delta;
678

679 680 681
	if (idx == X86_PMC_IDX_FIXED_BTS)
		return 0;

682
	/*
683
	 * Careful: an NMI might modify the previous event value.
684 685 686
	 *
	 * Our tactic to handle this is to first atomically read and
	 * exchange a new raw count - then add that new-prev delta
687
	 * count to the generic event atomically:
688 689 690
	 */
again:
	prev_raw_count = atomic64_read(&hwc->prev_count);
691
	rdmsrl(hwc->event_base + idx, new_raw_count);
692 693 694 695 696 697 698 699

	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
					new_raw_count) != prev_raw_count)
		goto again;

	/*
	 * Now we have the new raw value and have updated the prev
	 * timestamp already. We can now calculate the elapsed delta
700
	 * (event-)time and add that to the generic event.
701 702
	 *
	 * Careful, not all hw sign-extends above the physical width
703
	 * of the count.
704
	 */
705 706
	delta = (new_raw_count << shift) - (prev_raw_count << shift);
	delta >>= shift;
707

708
	atomic64_add(delta, &event->count);
709
	atomic64_sub(delta, &hwc->period_left);
710 711

	return new_raw_count;
712 713
}

714
static atomic_t active_events;
P
Peter Zijlstra 已提交
715 716 717 718
static DEFINE_MUTEX(pmc_reserve_mutex);

static bool reserve_pmc_hardware(void)
{
719
#ifdef CONFIG_X86_LOCAL_APIC
P
Peter Zijlstra 已提交
720 721 722 723 724
	int i;

	if (nmi_watchdog == NMI_LOCAL_APIC)
		disable_lapic_nmi_watchdog();

725
	for (i = 0; i < x86_pmu.num_events; i++) {
726
		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
P
Peter Zijlstra 已提交
727 728 729
			goto perfctr_fail;
	}

730
	for (i = 0; i < x86_pmu.num_events; i++) {
731
		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
P
Peter Zijlstra 已提交
732 733
			goto eventsel_fail;
	}
734
#endif
P
Peter Zijlstra 已提交
735 736 737

	return true;

738
#ifdef CONFIG_X86_LOCAL_APIC
P
Peter Zijlstra 已提交
739 740
eventsel_fail:
	for (i--; i >= 0; i--)
741
		release_evntsel_nmi(x86_pmu.eventsel + i);
P
Peter Zijlstra 已提交
742

743
	i = x86_pmu.num_events;
P
Peter Zijlstra 已提交
744 745 746

perfctr_fail:
	for (i--; i >= 0; i--)
747
		release_perfctr_nmi(x86_pmu.perfctr + i);
P
Peter Zijlstra 已提交
748 749 750 751 752

	if (nmi_watchdog == NMI_LOCAL_APIC)
		enable_lapic_nmi_watchdog();

	return false;
753
#endif
P
Peter Zijlstra 已提交
754 755 756 757
}

static void release_pmc_hardware(void)
{
758
#ifdef CONFIG_X86_LOCAL_APIC
P
Peter Zijlstra 已提交
759 760
	int i;

761
	for (i = 0; i < x86_pmu.num_events; i++) {
762 763
		release_perfctr_nmi(x86_pmu.perfctr + i);
		release_evntsel_nmi(x86_pmu.eventsel + i);
P
Peter Zijlstra 已提交
764 765 766 767
	}

	if (nmi_watchdog == NMI_LOCAL_APIC)
		enable_lapic_nmi_watchdog();
768
#endif
P
Peter Zijlstra 已提交
769 770
}

771 772 773 774 775 776 777
static inline bool bts_available(void)
{
	return x86_pmu.enable_bts != NULL;
}

static inline void init_debug_store_on_cpu(int cpu)
{
778
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
779 780 781 782 783

	if (!ds)
		return;

	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
784 785
		     (u32)((u64)(unsigned long)ds),
		     (u32)((u64)(unsigned long)ds >> 32));
786 787 788 789
}

static inline void fini_debug_store_on_cpu(int cpu)
{
790
	if (!per_cpu(cpu_hw_events, cpu).ds)
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
		return;

	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}

static void release_bts_hardware(void)
{
	int cpu;

	if (!bts_available())
		return;

	get_online_cpus();

	for_each_online_cpu(cpu)
		fini_debug_store_on_cpu(cpu);

	for_each_possible_cpu(cpu) {
809
		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
810 811 812 813

		if (!ds)
			continue;

814
		per_cpu(cpu_hw_events, cpu).ds = NULL;
815

816
		kfree((void *)(unsigned long)ds->bts_buffer_base);
817 818 819 820 821 822 823 824 825 826 827
		kfree(ds);
	}

	put_online_cpus();
}

static int reserve_bts_hardware(void)
{
	int cpu, err = 0;

	if (!bts_available())
828
		return 0;
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846

	get_online_cpus();

	for_each_possible_cpu(cpu) {
		struct debug_store *ds;
		void *buffer;

		err = -ENOMEM;
		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
		if (unlikely(!buffer))
			break;

		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
		if (unlikely(!ds)) {
			kfree(buffer);
			break;
		}

847
		ds->bts_buffer_base = (u64)(unsigned long)buffer;
848 849 850 851 852 853
		ds->bts_index = ds->bts_buffer_base;
		ds->bts_absolute_maximum =
			ds->bts_buffer_base + BTS_BUFFER_SIZE;
		ds->bts_interrupt_threshold =
			ds->bts_absolute_maximum - BTS_OVFL_TH;

854
		per_cpu(cpu_hw_events, cpu).ds = ds;
855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
		err = 0;
	}

	if (err)
		release_bts_hardware();
	else {
		for_each_online_cpu(cpu)
			init_debug_store_on_cpu(cpu);
	}

	put_online_cpus();

	return err;
}

870
static void hw_perf_event_destroy(struct perf_event *event)
P
Peter Zijlstra 已提交
871
{
872
	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
P
Peter Zijlstra 已提交
873
		release_pmc_hardware();
874
		release_bts_hardware();
P
Peter Zijlstra 已提交
875 876 877 878
		mutex_unlock(&pmc_reserve_mutex);
	}
}

879 880 881 882 883
static inline int x86_pmu_initialized(void)
{
	return x86_pmu.handle_irq != NULL;
}

884
static inline int
885
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
{
	unsigned int cache_type, cache_op, cache_result;
	u64 config, val;

	config = attr->config;

	cache_type = (config >>  0) & 0xff;
	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
		return -EINVAL;

	cache_op = (config >>  8) & 0xff;
	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
		return -EINVAL;

	cache_result = (config >> 16) & 0xff;
	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
		return -EINVAL;

	val = hw_cache_event_ids[cache_type][cache_op][cache_result];

	if (val == 0)
		return -ENOENT;

	if (val == -1)
		return -EINVAL;

	hwc->config |= val;

	return 0;
}

917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937
static void intel_pmu_enable_bts(u64 config)
{
	unsigned long debugctlmsr;

	debugctlmsr = get_debugctlmsr();

	debugctlmsr |= X86_DEBUGCTL_TR;
	debugctlmsr |= X86_DEBUGCTL_BTS;
	debugctlmsr |= X86_DEBUGCTL_BTINT;

	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;

	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;

	update_debugctlmsr(debugctlmsr);
}

static void intel_pmu_disable_bts(void)
{
938
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
939 940 941 942 943 944 945 946 947 948 949 950 951 952
	unsigned long debugctlmsr;

	if (!cpuc->ds)
		return;

	debugctlmsr = get_debugctlmsr();

	debugctlmsr &=
		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);

	update_debugctlmsr(debugctlmsr);
}

I
Ingo Molnar 已提交
953
/*
954
 * Setup the hardware configuration for a given attr_type
I
Ingo Molnar 已提交
955
 */
956
static int __hw_perf_event_init(struct perf_event *event)
I
Ingo Molnar 已提交
957
{
958 959
	struct perf_event_attr *attr = &event->attr;
	struct hw_perf_event *hwc = &event->hw;
960
	u64 config;
P
Peter Zijlstra 已提交
961
	int err;
I
Ingo Molnar 已提交
962

963 964
	if (!x86_pmu_initialized())
		return -ENODEV;
I
Ingo Molnar 已提交
965

P
Peter Zijlstra 已提交
966
	err = 0;
967
	if (!atomic_inc_not_zero(&active_events)) {
P
Peter Zijlstra 已提交
968
		mutex_lock(&pmc_reserve_mutex);
969
		if (atomic_read(&active_events) == 0) {
970 971 972
			if (!reserve_pmc_hardware())
				err = -EBUSY;
			else
973
				err = reserve_bts_hardware();
974 975
		}
		if (!err)
976
			atomic_inc(&active_events);
P
Peter Zijlstra 已提交
977 978 979 980 981
		mutex_unlock(&pmc_reserve_mutex);
	}
	if (err)
		return err;

982
	event->destroy = hw_perf_event_destroy;
983

I
Ingo Molnar 已提交
984
	/*
985
	 * Generate PMC IRQs:
I
Ingo Molnar 已提交
986 987
	 * (keep 'enabled' bit clear for now)
	 */
988
	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
I
Ingo Molnar 已提交
989

990 991
	hwc->idx = -1;

I
Ingo Molnar 已提交
992
	/*
993
	 * Count user and OS events unless requested not to.
I
Ingo Molnar 已提交
994
	 */
995
	if (!attr->exclude_user)
996
		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
997
	if (!attr->exclude_kernel)
I
Ingo Molnar 已提交
998
		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
999

1000
	if (!hwc->sample_period) {
1001
		hwc->sample_period = x86_pmu.max_period;
1002
		hwc->last_period = hwc->sample_period;
1003
		atomic64_set(&hwc->period_left, hwc->sample_period);
1004 1005 1006 1007
	} else {
		/*
		 * If we have a PMU initialized but no APIC
		 * interrupts, we cannot sample hardware
1008 1009
		 * events (user-space has to fall back and
		 * sample via a hrtimer based software event):
1010 1011 1012
		 */
		if (!x86_pmu.apic)
			return -EOPNOTSUPP;
1013
	}
1014

I
Ingo Molnar 已提交
1015
	/*
1016
	 * Raw hw_event type provide the config in the hw_event structure
I
Ingo Molnar 已提交
1017
	 */
1018 1019
	if (attr->type == PERF_TYPE_RAW) {
		hwc->config |= x86_pmu.raw_event(attr->config);
1020
		return 0;
I
Ingo Molnar 已提交
1021 1022
	}

1023 1024 1025 1026 1027
	if (attr->type == PERF_TYPE_HW_CACHE)
		return set_ext_hw_attr(hwc, attr);

	if (attr->config >= x86_pmu.max_events)
		return -EINVAL;
1028

1029 1030 1031
	/*
	 * The generic map:
	 */
1032 1033 1034 1035 1036 1037 1038 1039
	config = x86_pmu.event_map(attr->config);

	if (config == 0)
		return -ENOENT;

	if (config == -1LL)
		return -EINVAL;

1040 1041 1042 1043
	/*
	 * Branch tracing:
	 */
	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
1044 1045 1046 1047 1048 1049 1050 1051 1052
	    (hwc->sample_period == 1)) {
		/* BTS is not supported by this architecture. */
		if (!bts_available())
			return -EOPNOTSUPP;

		/* BTS is currently only allowed for user-mode. */
		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
			return -EOPNOTSUPP;
	}
1053

1054
	hwc->config |= config;
P
Peter Zijlstra 已提交
1055

I
Ingo Molnar 已提交
1056 1057 1058
	return 0;
}

V
Vince Weaver 已提交
1059 1060
static void p6_pmu_disable_all(void)
{
1061
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1062
	u64 val;
V
Vince Weaver 已提交
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075

	if (!cpuc->enabled)
		return;

	cpuc->enabled = 0;
	barrier();

	/* p6 only has one enable register */
	rdmsrl(MSR_P6_EVNTSEL0, val);
	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
	wrmsrl(MSR_P6_EVNTSEL0, val);
}

1076
static void intel_pmu_disable_all(void)
1077
{
1078
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1079 1080 1081 1082 1083 1084 1085

	if (!cpuc->enabled)
		return;

	cpuc->enabled = 0;
	barrier();

1086
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1087 1088 1089

	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
		intel_pmu_disable_bts();
I
Ingo Molnar 已提交
1090
}
1091

1092
static void amd_pmu_disable_all(void)
1093
{
1094
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1095 1096 1097 1098
	int idx;

	if (!cpuc->enabled)
		return;
1099 1100

	cpuc->enabled = 0;
1101 1102
	/*
	 * ensure we write the disable before we start disabling the
1103
	 * events proper, so that amd_pmu_enable_event() does the
1104
	 * right thing.
1105
	 */
1106
	barrier();
1107

1108
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
1109 1110
		u64 val;

1111
		if (!test_bit(idx, cpuc->active_mask))
1112
			continue;
1113
		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
1114 1115 1116 1117
		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
			continue;
		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1118 1119 1120
	}
}

1121
void hw_perf_disable(void)
1122
{
1123
	if (!x86_pmu_initialized())
1124 1125
		return;
	return x86_pmu.disable_all();
1126
}
I
Ingo Molnar 已提交
1127

V
Vince Weaver 已提交
1128 1129
static void p6_pmu_enable_all(void)
{
1130
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
V
Vince Weaver 已提交
1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
	unsigned long val;

	if (cpuc->enabled)
		return;

	cpuc->enabled = 1;
	barrier();

	/* p6 only has one enable register */
	rdmsrl(MSR_P6_EVNTSEL0, val);
	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
	wrmsrl(MSR_P6_EVNTSEL0, val);
}

1145
static void intel_pmu_enable_all(void)
1146
{
1147
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1148 1149 1150 1151 1152 1153 1154

	if (cpuc->enabled)
		return;

	cpuc->enabled = 1;
	barrier();

1155
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1156 1157

	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1158 1159
		struct perf_event *event =
			cpuc->events[X86_PMC_IDX_FIXED_BTS];
1160

1161
		if (WARN_ON_ONCE(!event))
1162 1163
			return;

1164
		intel_pmu_enable_bts(event->hw.config);
1165
	}
1166 1167
}

1168
static void amd_pmu_enable_all(void)
1169
{
1170
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1171 1172
	int idx;

1173
	if (cpuc->enabled)
1174 1175
		return;

1176 1177 1178
	cpuc->enabled = 1;
	barrier();

1179 1180
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
		struct perf_event *event = cpuc->events[idx];
1181
		u64 val;
1182

1183
		if (!test_bit(idx, cpuc->active_mask))
1184
			continue;
1185

1186
		val = event->hw.config;
1187 1188
		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1189 1190 1191
	}
}

1192
void hw_perf_enable(void)
1193
{
1194
	if (!x86_pmu_initialized())
1195
		return;
1196
	x86_pmu.enable_all();
1197 1198
}

1199
static inline u64 intel_pmu_get_status(void)
1200 1201 1202
{
	u64 status;

1203
	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1204

1205
	return status;
1206 1207
}

1208
static inline void intel_pmu_ack_status(u64 ack)
1209 1210 1211 1212
{
	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

1213
static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1214
{
V
Vince Weaver 已提交
1215
	(void)checking_wrmsrl(hwc->config_base + idx,
1216
			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1217 1218
}

1219
static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1220
{
V
Vince Weaver 已提交
1221
	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1222 1223
}

1224
static inline void
1225
intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
1226 1227 1228 1229 1230 1231 1232 1233
{
	int idx = __idx - X86_PMC_IDX_FIXED;
	u64 ctrl_val, mask;

	mask = 0xfULL << (idx * 4);

	rdmsrl(hwc->config_base, ctrl_val);
	ctrl_val &= ~mask;
V
Vince Weaver 已提交
1234 1235 1236 1237
	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
}

static inline void
1238
p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
V
Vince Weaver 已提交
1239
{
1240 1241
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	u64 val = P6_NOP_EVENT;
V
Vince Weaver 已提交
1242

1243 1244
	if (cpuc->enabled)
		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
V
Vince Weaver 已提交
1245 1246

	(void)checking_wrmsrl(hwc->config_base + idx, val);
1247 1248
}

1249
static inline void
1250
intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1251
{
1252 1253 1254 1255 1256
	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
		intel_pmu_disable_bts();
		return;
	}

1257 1258 1259 1260 1261
	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
		intel_pmu_disable_fixed(hwc, idx);
		return;
	}

1262
	x86_pmu_disable_event(hwc, idx);
1263 1264 1265
}

static inline void
1266
amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1267
{
1268
	x86_pmu_disable_event(hwc, idx);
1269 1270
}

1271
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
I
Ingo Molnar 已提交
1272

1273 1274
/*
 * Set the next IRQ period, based on the hwc->period_left value.
1275
 * To be called with the event disabled in hw:
1276
 */
1277
static int
1278 1279
x86_perf_event_set_period(struct perf_event *event,
			     struct hw_perf_event *hwc, int idx)
I
Ingo Molnar 已提交
1280
{
1281
	s64 left = atomic64_read(&hwc->period_left);
1282 1283
	s64 period = hwc->sample_period;
	int err, ret = 0;
1284

1285 1286 1287
	if (idx == X86_PMC_IDX_FIXED_BTS)
		return 0;

1288
	/*
1289
	 * If we are way outside a reasonable range then just skip forward:
1290 1291 1292 1293
	 */
	if (unlikely(left <= -period)) {
		left = period;
		atomic64_set(&hwc->period_left, left);
1294
		hwc->last_period = period;
1295
		ret = 1;
1296 1297 1298 1299 1300
	}

	if (unlikely(left <= 0)) {
		left += period;
		atomic64_set(&hwc->period_left, left);
1301
		hwc->last_period = period;
1302
		ret = 1;
1303
	}
1304
	/*
1305
	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1306 1307 1308
	 */
	if (unlikely(left < 2))
		left = 2;
I
Ingo Molnar 已提交
1309

1310 1311 1312
	if (left > x86_pmu.max_period)
		left = x86_pmu.max_period;

1313
	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1314 1315

	/*
1316
	 * The hw event starts counting from this event offset,
1317 1318
	 * mark it to be able to extra future deltas:
	 */
1319
	atomic64_set(&hwc->prev_count, (u64)-left);
1320

1321 1322
	err = checking_wrmsrl(hwc->event_base + idx,
			     (u64)(-left) & x86_pmu.event_mask);
1323

1324
	perf_event_update_userpage(event);
1325

1326
	return ret;
1327 1328 1329
}

static inline void
1330
intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1331 1332 1333 1334 1335 1336
{
	int idx = __idx - X86_PMC_IDX_FIXED;
	u64 ctrl_val, bits, mask;
	int err;

	/*
1337 1338 1339
	 * Enable IRQ generation (0x8),
	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
	 * if requested:
1340
	 */
1341 1342 1343
	bits = 0x8ULL;
	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
		bits |= 0x2;
1344 1345
	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
		bits |= 0x1;
1346 1347 1348 1349 1350 1351 1352

	/*
	 * ANY bit is supported in v3 and up
	 */
	if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
		bits |= 0x4;

1353 1354 1355 1356 1357 1358 1359
	bits <<= (idx * 4);
	mask = 0xfULL << (idx * 4);

	rdmsrl(hwc->config_base, ctrl_val);
	ctrl_val &= ~mask;
	ctrl_val |= bits;
	err = checking_wrmsrl(hwc->config_base, ctrl_val);
1360 1361
}

1362
static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
V
Vince Weaver 已提交
1363
{
1364
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1365
	u64 val;
V
Vince Weaver 已提交
1366

1367
	val = hwc->config;
V
Vince Weaver 已提交
1368
	if (cpuc->enabled)
1369 1370 1371
		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;

	(void)checking_wrmsrl(hwc->config_base + idx, val);
V
Vince Weaver 已提交
1372 1373 1374
}


1375
static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1376
{
1377
	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1378
		if (!__get_cpu_var(cpu_hw_events).enabled)
1379 1380 1381 1382 1383 1384
			return;

		intel_pmu_enable_bts(hwc->config);
		return;
	}

1385 1386 1387 1388 1389
	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
		intel_pmu_enable_fixed(hwc, idx);
		return;
	}

1390
	x86_pmu_enable_event(hwc, idx);
1391 1392
}

1393
static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1394
{
1395
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1396 1397

	if (cpuc->enabled)
1398
		x86_pmu_enable_event(hwc, idx);
I
Ingo Molnar 已提交
1399 1400
}

1401
static int fixed_mode_idx(struct hw_perf_event *hwc)
1402
{
1403
	unsigned int hw_event;
1404

1405
	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1406

1407
	if (unlikely((hw_event ==
1408 1409 1410 1411
		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
		     (hwc->sample_period == 1)))
		return X86_PMC_IDX_FIXED_BTS;

1412
	if (!x86_pmu.num_events_fixed)
1413 1414
		return -1;

1415 1416 1417 1418 1419 1420
	/*
	 * fixed counters do not take all possible filters
	 */
	if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
		return -1;

1421
	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1422
		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1423
	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1424
		return X86_PMC_IDX_FIXED_CPU_CYCLES;
1425
	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1426 1427
		return X86_PMC_IDX_FIXED_BUS_CYCLES;

1428 1429 1430
	return -1;
}

1431 1432 1433
/*
 * generic counter allocator: get next free counter
 */
1434 1435
static int
gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1436 1437 1438 1439 1440 1441 1442 1443 1444 1445
{
	int idx;

	idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
	return idx == x86_pmu.num_events ? -1 : idx;
}

/*
 * intel-specific counter allocator: check event constraints
 */
1446 1447
static int
intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1448 1449 1450 1451
{
	const struct event_constraint *event_constraint;
	int i, code;

1452
	if (!event_constraints)
1453 1454
		goto skip;

1455
	code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
1456

1457
	for_each_event_constraint(event_constraint, event_constraints) {
1458 1459 1460 1461 1462 1463 1464 1465 1466
		if (code == event_constraint->code) {
			for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
				if (!test_and_set_bit(i, cpuc->used_mask))
					return i;
			}
			return -1;
		}
	}
skip:
1467
	return gen_get_event_idx(cpuc, hwc);
1468 1469
}

1470 1471
static int
x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
I
Ingo Molnar 已提交
1472
{
1473
	int idx;
I
Ingo Molnar 已提交
1474

1475
	idx = fixed_mode_idx(hwc);
1476
	if (idx == X86_PMC_IDX_FIXED_BTS) {
1477
		/* BTS is already occupied. */
1478
		if (test_and_set_bit(idx, cpuc->used_mask))
1479
			return -EAGAIN;
1480 1481

		hwc->config_base	= 0;
1482
		hwc->event_base		= 0;
1483 1484
		hwc->idx		= idx;
	} else if (idx >= 0) {
1485
		/*
1486 1487
		 * Try to get the fixed event, if that is already taken
		 * then try to get a generic event:
1488
		 */
1489
		if (test_and_set_bit(idx, cpuc->used_mask))
1490
			goto try_generic;
1491

1492 1493
		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
		/*
1494
		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
1495 1496
		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
		 */
1497
		hwc->event_base =
1498
			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
I
Ingo Molnar 已提交
1499
		hwc->idx = idx;
1500 1501
	} else {
		idx = hwc->idx;
1502
		/* Try to get the previous generic event again */
1503
		if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
1504
try_generic:
1505
			idx = x86_pmu.get_event_idx(cpuc, hwc);
1506
			if (idx == -1)
1507 1508
				return -EAGAIN;

1509
			set_bit(idx, cpuc->used_mask);
1510 1511
			hwc->idx = idx;
		}
1512 1513
		hwc->config_base = x86_pmu.eventsel;
		hwc->event_base  = x86_pmu.perfctr;
I
Ingo Molnar 已提交
1514 1515
	}

1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531
	return idx;
}

/*
 * Find a PMC slot for the freshly enabled / scheduled in event:
 */
static int x86_pmu_enable(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	int idx;

	idx = x86_schedule_event(cpuc, hwc);
	if (idx < 0)
		return idx;

1532
	perf_events_lapic_init();
1533

1534
	x86_pmu.disable(hwc, idx);
I
Ingo Molnar 已提交
1535

1536
	cpuc->events[idx] = event;
1537
	set_bit(idx, cpuc->active_mask);
1538

1539
	x86_perf_event_set_period(event, hwc, idx);
1540
	x86_pmu.enable(hwc, idx);
1541

1542
	perf_event_update_userpage(event);
1543

1544
	return 0;
I
Ingo Molnar 已提交
1545 1546
}

1547
static void x86_pmu_unthrottle(struct perf_event *event)
1548
{
1549 1550
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
1551 1552

	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1553
				cpuc->events[hwc->idx] != event))
1554 1555 1556 1557 1558
		return;

	x86_pmu.enable(hwc, hwc->idx);
}

1559
void perf_event_print_debug(void)
I
Ingo Molnar 已提交
1560
{
1561
	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1562
	struct cpu_hw_events *cpuc;
1563
	unsigned long flags;
1564 1565
	int cpu, idx;

1566
	if (!x86_pmu.num_events)
1567
		return;
I
Ingo Molnar 已提交
1568

1569
	local_irq_save(flags);
I
Ingo Molnar 已提交
1570 1571

	cpu = smp_processor_id();
1572
	cpuc = &per_cpu(cpu_hw_events, cpu);
I
Ingo Molnar 已提交
1573

1574
	if (x86_pmu.version >= 2) {
1575 1576 1577 1578 1579 1580 1581 1582 1583 1584
		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);

		pr_info("\n");
		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
1585
	}
1586
	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
I
Ingo Molnar 已提交
1587

1588
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
1589 1590
		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
I
Ingo Molnar 已提交
1591

1592
		prev_left = per_cpu(pmc_prev_left[idx], cpu);
I
Ingo Molnar 已提交
1593

1594
		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
I
Ingo Molnar 已提交
1595
			cpu, idx, pmc_ctrl);
1596
		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
I
Ingo Molnar 已提交
1597
			cpu, idx, pmc_count);
1598
		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
1599
			cpu, idx, prev_left);
I
Ingo Molnar 已提交
1600
	}
1601
	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1602 1603
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);

1604
		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1605 1606
			cpu, idx, pmc_count);
	}
1607
	local_irq_restore(flags);
I
Ingo Molnar 已提交
1608 1609
}

1610
static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
1611 1612 1613 1614 1615 1616 1617
{
	struct debug_store *ds = cpuc->ds;
	struct bts_record {
		u64	from;
		u64	to;
		u64	flags;
	};
1618
	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1619
	struct bts_record *at, *top;
1620 1621 1622 1623
	struct perf_output_handle handle;
	struct perf_event_header header;
	struct perf_sample_data data;
	struct pt_regs regs;
1624

1625
	if (!event)
1626 1627 1628 1629 1630
		return;

	if (!ds)
		return;

1631 1632
	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
	top = (struct bts_record *)(unsigned long)ds->bts_index;
1633

1634 1635 1636
	if (top <= at)
		return;

1637 1638
	ds->bts_index = ds->bts_buffer_base;

1639

1640
	data.period	= event->hw.last_period;
1641
	data.addr	= 0;
1642
	data.raw	= NULL;
1643 1644 1645 1646 1647 1648 1649
	regs.ip		= 0;

	/*
	 * Prepare a generic sample, i.e. fill in the invariant fields.
	 * We will overwrite the from and to address before we output
	 * the sample.
	 */
1650
	perf_prepare_sample(&header, &data, event, &regs);
1651

1652
	if (perf_output_begin(&handle, event,
1653 1654 1655
			      header.size * (top - at), 1, 1))
		return;

1656
	for (; at < top; at++) {
1657 1658
		data.ip		= at->from;
		data.addr	= at->to;
1659

1660
		perf_output_sample(&handle, &header, &data, event);
1661 1662
	}

1663
	perf_output_end(&handle);
1664 1665

	/* There's new data available. */
1666 1667
	event->hw.interrupts++;
	event->pending_kill = POLL_IN;
1668 1669
}

1670
static void x86_pmu_disable(struct perf_event *event)
I
Ingo Molnar 已提交
1671
{
1672 1673
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
1674
	int idx = hwc->idx;
I
Ingo Molnar 已提交
1675

1676 1677 1678 1679
	/*
	 * Must be done before we disable, otherwise the nmi handler
	 * could reenable again:
	 */
1680
	clear_bit(idx, cpuc->active_mask);
1681
	x86_pmu.disable(hwc, idx);
I
Ingo Molnar 已提交
1682

1683 1684
	/*
	 * Make sure the cleared pointer becomes visible before we
1685
	 * (potentially) free the event:
1686
	 */
1687
	barrier();
I
Ingo Molnar 已提交
1688

1689
	/*
1690
	 * Drain the remaining delta count out of a event
1691 1692
	 * that we are disabling:
	 */
1693
	x86_perf_event_update(event, hwc, idx);
1694 1695

	/* Drain the remaining BTS records. */
1696 1697
	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
		intel_pmu_drain_bts_buffer(cpuc);
1698

1699
	cpuc->events[idx] = NULL;
1700
	clear_bit(idx, cpuc->used_mask);
1701

1702
	perf_event_update_userpage(event);
I
Ingo Molnar 已提交
1703 1704
}

1705
/*
1706 1707
 * Save and restart an expired event. Called by NMI contexts,
 * so it has to be careful about preempting normal event ops:
1708
 */
1709
static int intel_pmu_save_and_restart(struct perf_event *event)
I
Ingo Molnar 已提交
1710
{
1711
	struct hw_perf_event *hwc = &event->hw;
I
Ingo Molnar 已提交
1712
	int idx = hwc->idx;
1713
	int ret;
I
Ingo Molnar 已提交
1714

1715 1716
	x86_perf_event_update(event, hwc, idx);
	ret = x86_perf_event_set_period(event, hwc, idx);
1717

1718 1719
	if (event->state == PERF_EVENT_STATE_ACTIVE)
		intel_pmu_enable_event(hwc, idx);
1720 1721

	return ret;
I
Ingo Molnar 已提交
1722 1723
}

1724 1725
static void intel_pmu_reset(void)
{
1726
	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1727 1728 1729
	unsigned long flags;
	int idx;

1730
	if (!x86_pmu.num_events)
1731 1732 1733 1734 1735 1736
		return;

	local_irq_save(flags);

	printk("clearing PMU state on CPU#%d\n", smp_processor_id());

1737
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
1738 1739 1740
		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
	}
1741
	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1742 1743
		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
	}
1744 1745
	if (ds)
		ds->bts_index = ds->bts_buffer_base;
1746 1747 1748 1749

	local_irq_restore(flags);
}

V
Vince Weaver 已提交
1750 1751 1752
static int p6_pmu_handle_irq(struct pt_regs *regs)
{
	struct perf_sample_data data;
1753 1754 1755
	struct cpu_hw_events *cpuc;
	struct perf_event *event;
	struct hw_perf_event *hwc;
V
Vince Weaver 已提交
1756 1757 1758 1759
	int idx, handled = 0;
	u64 val;

	data.addr = 0;
1760
	data.raw = NULL;
V
Vince Weaver 已提交
1761

1762
	cpuc = &__get_cpu_var(cpu_hw_events);
V
Vince Weaver 已提交
1763

1764
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
V
Vince Weaver 已提交
1765 1766 1767
		if (!test_bit(idx, cpuc->active_mask))
			continue;

1768 1769
		event = cpuc->events[idx];
		hwc = &event->hw;
V
Vince Weaver 已提交
1770

1771 1772
		val = x86_perf_event_update(event, hwc, idx);
		if (val & (1ULL << (x86_pmu.event_bits - 1)))
V
Vince Weaver 已提交
1773 1774 1775
			continue;

		/*
1776
		 * event overflow
V
Vince Weaver 已提交
1777 1778
		 */
		handled		= 1;
1779
		data.period	= event->hw.last_period;
V
Vince Weaver 已提交
1780

1781
		if (!x86_perf_event_set_period(event, hwc, idx))
V
Vince Weaver 已提交
1782 1783
			continue;

1784 1785
		if (perf_event_overflow(event, 1, &data, regs))
			p6_pmu_disable_event(hwc, idx);
V
Vince Weaver 已提交
1786 1787 1788 1789 1790 1791 1792
	}

	if (handled)
		inc_irq_stat(apic_perf_irqs);

	return handled;
}
1793

I
Ingo Molnar 已提交
1794 1795 1796 1797
/*
 * This handler is triggered by the local APIC, so the APIC IRQ handling
 * rules apply:
 */
1798
static int intel_pmu_handle_irq(struct pt_regs *regs)
I
Ingo Molnar 已提交
1799
{
1800
	struct perf_sample_data data;
1801
	struct cpu_hw_events *cpuc;
V
Vince Weaver 已提交
1802
	int bit, loops;
1803
	u64 ack, status;
1804

1805
	data.addr = 0;
1806
	data.raw = NULL;
1807

1808
	cpuc = &__get_cpu_var(cpu_hw_events);
I
Ingo Molnar 已提交
1809

1810
	perf_disable();
1811
	intel_pmu_drain_bts_buffer(cpuc);
1812
	status = intel_pmu_get_status();
1813 1814 1815 1816
	if (!status) {
		perf_enable();
		return 0;
	}
1817

1818
	loops = 0;
I
Ingo Molnar 已提交
1819
again:
1820
	if (++loops > 100) {
1821 1822
		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
		perf_event_print_debug();
1823 1824
		intel_pmu_reset();
		perf_enable();
1825 1826 1827
		return 1;
	}

1828
	inc_irq_stat(apic_perf_irqs);
I
Ingo Molnar 已提交
1829
	ack = status;
1830
	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1831
		struct perf_event *event = cpuc->events[bit];
I
Ingo Molnar 已提交
1832 1833

		clear_bit(bit, (unsigned long *) &status);
1834
		if (!test_bit(bit, cpuc->active_mask))
I
Ingo Molnar 已提交
1835 1836
			continue;

1837
		if (!intel_pmu_save_and_restart(event))
1838 1839
			continue;

1840
		data.period = event->hw.last_period;
1841

1842 1843
		if (perf_event_overflow(event, 1, &data, regs))
			intel_pmu_disable_event(&event->hw, bit);
I
Ingo Molnar 已提交
1844 1845
	}

1846
	intel_pmu_ack_status(ack);
I
Ingo Molnar 已提交
1847 1848 1849 1850

	/*
	 * Repeat if there is more work to be done:
	 */
1851
	status = intel_pmu_get_status();
I
Ingo Molnar 已提交
1852 1853
	if (status)
		goto again;
1854

1855
	perf_enable();
1856 1857

	return 1;
1858 1859
}

1860
static int amd_pmu_handle_irq(struct pt_regs *regs)
1861
{
1862
	struct perf_sample_data data;
1863 1864 1865
	struct cpu_hw_events *cpuc;
	struct perf_event *event;
	struct hw_perf_event *hwc;
V
Vince Weaver 已提交
1866
	int idx, handled = 0;
1867 1868
	u64 val;

1869
	data.addr = 0;
1870
	data.raw = NULL;
1871

1872
	cpuc = &__get_cpu_var(cpu_hw_events);
1873

1874
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
1875
		if (!test_bit(idx, cpuc->active_mask))
1876
			continue;
1877

1878 1879
		event = cpuc->events[idx];
		hwc = &event->hw;
1880

1881 1882
		val = x86_perf_event_update(event, hwc, idx);
		if (val & (1ULL << (x86_pmu.event_bits - 1)))
1883
			continue;
1884

1885
		/*
1886
		 * event overflow
1887 1888
		 */
		handled		= 1;
1889
		data.period	= event->hw.last_period;
1890

1891
		if (!x86_perf_event_set_period(event, hwc, idx))
1892 1893
			continue;

1894 1895
		if (perf_event_overflow(event, 1, &data, regs))
			amd_pmu_disable_event(hwc, idx);
1896
	}
1897

1898 1899 1900
	if (handled)
		inc_irq_stat(apic_perf_irqs);

1901 1902
	return handled;
}
1903

1904 1905 1906 1907 1908
void smp_perf_pending_interrupt(struct pt_regs *regs)
{
	irq_enter();
	ack_APIC_irq();
	inc_irq_stat(apic_pending_irqs);
1909
	perf_event_do_pending();
1910 1911 1912
	irq_exit();
}

1913
void set_perf_event_pending(void)
1914
{
1915
#ifdef CONFIG_X86_LOCAL_APIC
1916 1917 1918
	if (!x86_pmu.apic || !x86_pmu_initialized())
		return;

1919
	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1920
#endif
1921 1922
}

1923
void perf_events_lapic_init(void)
I
Ingo Molnar 已提交
1924
{
1925 1926
#ifdef CONFIG_X86_LOCAL_APIC
	if (!x86_pmu.apic || !x86_pmu_initialized())
I
Ingo Molnar 已提交
1927
		return;
1928

I
Ingo Molnar 已提交
1929
	/*
1930
	 * Always use NMI for PMU
I
Ingo Molnar 已提交
1931
	 */
1932
	apic_write(APIC_LVTPC, APIC_DM_NMI);
1933
#endif
I
Ingo Molnar 已提交
1934 1935 1936
}

static int __kprobes
1937
perf_event_nmi_handler(struct notifier_block *self,
I
Ingo Molnar 已提交
1938 1939 1940 1941
			 unsigned long cmd, void *__args)
{
	struct die_args *args = __args;
	struct pt_regs *regs;
1942

1943
	if (!atomic_read(&active_events))
1944 1945
		return NOTIFY_DONE;

1946 1947 1948 1949
	switch (cmd) {
	case DIE_NMI:
	case DIE_NMI_IPI:
		break;
I
Ingo Molnar 已提交
1950

1951
	default:
I
Ingo Molnar 已提交
1952
		return NOTIFY_DONE;
1953
	}
I
Ingo Molnar 已提交
1954 1955 1956

	regs = args->regs;

1957
#ifdef CONFIG_X86_LOCAL_APIC
I
Ingo Molnar 已提交
1958
	apic_write(APIC_LVTPC, APIC_DM_NMI);
1959
#endif
1960 1961
	/*
	 * Can't rely on the handled return value to say it was our NMI, two
1962
	 * events could trigger 'simultaneously' raising two back-to-back NMIs.
1963 1964 1965 1966
	 *
	 * If the first NMI handles both, the latter will be empty and daze
	 * the CPU.
	 */
1967
	x86_pmu.handle_irq(regs);
I
Ingo Molnar 已提交
1968

1969
	return NOTIFY_STOP;
I
Ingo Molnar 已提交
1970 1971
}

1972 1973
static __read_mostly struct notifier_block perf_event_nmi_notifier = {
	.notifier_call		= perf_event_nmi_handler,
1974 1975
	.next			= NULL,
	.priority		= 1
I
Ingo Molnar 已提交
1976 1977
};

1978
static __initconst struct x86_pmu p6_pmu = {
V
Vince Weaver 已提交
1979 1980 1981 1982
	.name			= "p6",
	.handle_irq		= p6_pmu_handle_irq,
	.disable_all		= p6_pmu_disable_all,
	.enable_all		= p6_pmu_enable_all,
1983 1984
	.enable			= p6_pmu_enable_event,
	.disable		= p6_pmu_disable_event,
V
Vince Weaver 已提交
1985 1986 1987 1988 1989
	.eventsel		= MSR_P6_EVNTSEL0,
	.perfctr		= MSR_P6_PERFCTR0,
	.event_map		= p6_pmu_event_map,
	.raw_event		= p6_pmu_raw_event,
	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
1990
	.apic			= 1,
V
Vince Weaver 已提交
1991 1992
	.max_period		= (1ULL << 31) - 1,
	.version		= 0,
1993
	.num_events		= 2,
V
Vince Weaver 已提交
1994
	/*
1995
	 * Events have 40 bits implemented. However they are designed such
V
Vince Weaver 已提交
1996
	 * that bits [32-39] are sign extensions of bit 31. As such the
1997
	 * effective width of a event for P6-like PMU is 32 bits only.
V
Vince Weaver 已提交
1998 1999 2000
	 *
	 * See IA-32 Intel Architecture Software developer manual Vol 3B
	 */
2001 2002
	.event_bits		= 32,
	.event_mask		= (1ULL << 32) - 1,
2003
	.get_event_idx		= intel_get_event_idx,
V
Vince Weaver 已提交
2004 2005
};

2006
static __initconst struct x86_pmu intel_pmu = {
2007
	.name			= "Intel",
2008
	.handle_irq		= intel_pmu_handle_irq,
2009 2010
	.disable_all		= intel_pmu_disable_all,
	.enable_all		= intel_pmu_enable_all,
2011 2012
	.enable			= intel_pmu_enable_event,
	.disable		= intel_pmu_disable_event,
2013 2014
	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
2015 2016
	.event_map		= intel_pmu_event_map,
	.raw_event		= intel_pmu_raw_event,
2017
	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
2018
	.apic			= 1,
2019 2020 2021
	/*
	 * Intel PMCs cannot be accessed sanely above 32 bit width,
	 * so we install an artificial 1<<31 period regardless of
2022
	 * the generic event period:
2023 2024
	 */
	.max_period		= (1ULL << 31) - 1,
2025 2026
	.enable_bts		= intel_pmu_enable_bts,
	.disable_bts		= intel_pmu_disable_bts,
2027
	.get_event_idx		= intel_get_event_idx,
2028 2029
};

2030
static __initconst struct x86_pmu amd_pmu = {
2031
	.name			= "AMD",
2032
	.handle_irq		= amd_pmu_handle_irq,
2033 2034
	.disable_all		= amd_pmu_disable_all,
	.enable_all		= amd_pmu_enable_all,
2035 2036
	.enable			= amd_pmu_enable_event,
	.disable		= amd_pmu_disable_event,
2037 2038
	.eventsel		= MSR_K7_EVNTSEL0,
	.perfctr		= MSR_K7_PERFCTR0,
2039 2040
	.event_map		= amd_pmu_event_map,
	.raw_event		= amd_pmu_raw_event,
2041
	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
2042 2043 2044
	.num_events		= 4,
	.event_bits		= 48,
	.event_mask		= (1ULL << 48) - 1,
2045
	.apic			= 1,
2046 2047
	/* use highest bit to detect overflow */
	.max_period		= (1ULL << 47) - 1,
2048
	.get_event_idx		= gen_get_event_idx,
2049 2050
};

2051
static __init int p6_pmu_init(void)
V
Vince Weaver 已提交
2052 2053 2054 2055 2056 2057 2058 2059 2060
{
	switch (boot_cpu_data.x86_model) {
	case 1:
	case 3:  /* Pentium Pro */
	case 5:
	case 6:  /* Pentium II */
	case 7:
	case 8:
	case 11: /* Pentium III */
2061
		event_constraints = intel_p6_event_constraints;
V
Vince Weaver 已提交
2062 2063 2064
		break;
	case 9:
	case 13:
2065
		/* Pentium M */
2066
		event_constraints = intel_p6_event_constraints;
2067
		break;
V
Vince Weaver 已提交
2068 2069 2070 2071 2072 2073
	default:
		pr_cont("unsupported p6 CPU model %d ",
			boot_cpu_data.x86_model);
		return -ENODEV;
	}

2074 2075
	x86_pmu = p6_pmu;

V
Vince Weaver 已提交
2076 2077 2078
	return 0;
}

2079
static __init int intel_pmu_init(void)
I
Ingo Molnar 已提交
2080
{
2081
	union cpuid10_edx edx;
I
Ingo Molnar 已提交
2082
	union cpuid10_eax eax;
2083
	unsigned int unused;
2084
	unsigned int ebx;
2085
	int version;
I
Ingo Molnar 已提交
2086

V
Vince Weaver 已提交
2087 2088 2089 2090 2091
	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
		/* check for P6 processor family */
	   if (boot_cpu_data.x86 == 6) {
		return p6_pmu_init();
	   } else {
2092
		return -ENODEV;
V
Vince Weaver 已提交
2093 2094
	   }
	}
2095

I
Ingo Molnar 已提交
2096 2097
	/*
	 * Check whether the Architectural PerfMon supports
2098
	 * Branch Misses Retired hw_event or not.
I
Ingo Molnar 已提交
2099
	 */
2100
	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
I
Ingo Molnar 已提交
2101
	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2102
		return -ENODEV;
I
Ingo Molnar 已提交
2103

2104 2105
	version = eax.split.version_id;
	if (version < 2)
2106
		return -ENODEV;
2107

2108 2109
	x86_pmu				= intel_pmu;
	x86_pmu.version			= version;
2110 2111 2112
	x86_pmu.num_events		= eax.split.num_events;
	x86_pmu.event_bits		= eax.split.bit_width;
	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
2113 2114

	/*
2115 2116
	 * Quirk: v2 perfmon does not report fixed-purpose events, so
	 * assume at least 3 events:
2117
	 */
2118
	x86_pmu.num_events_fixed	= max((int)edx.split.num_events_fixed, 3);
2119

2120
	/*
2121
	 * Install the hw-cache-events table:
2122 2123
	 */
	switch (boot_cpu_data.x86_model) {
2124 2125 2126 2127
	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
	case 29: /* six-core 45 nm xeon "Dunnington" */
2128
		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2129
		       sizeof(hw_cache_event_ids));
2130

2131
		pr_cont("Core2 events, ");
2132
		event_constraints = intel_core_event_constraints;
2133 2134 2135 2136
		break;
	default:
	case 26:
		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2137
		       sizeof(hw_cache_event_ids));
2138

2139
		event_constraints = intel_nehalem_event_constraints;
2140
		pr_cont("Nehalem/Corei7 events, ");
2141 2142 2143
		break;
	case 28:
		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2144
		       sizeof(hw_cache_event_ids));
2145

2146
		pr_cont("Atom events, ");
2147 2148
		break;
	}
2149
	return 0;
2150 2151
}

2152
static __init int amd_pmu_init(void)
2153
{
2154 2155 2156 2157
	/* Performance-monitoring supported from K7 and later: */
	if (boot_cpu_data.x86 < 6)
		return -ENODEV;

2158
	x86_pmu = amd_pmu;
2159

2160 2161 2162
	/* Events are common for all AMDs */
	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
	       sizeof(hw_cache_event_ids));
2163

2164
	return 0;
2165 2166
}

2167 2168 2169 2170 2171 2172 2173 2174 2175 2176
static void __init pmu_check_apic(void)
{
	if (cpu_has_apic)
		return;

	x86_pmu.apic = 0;
	pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
	pr_info("no hardware sampling interrupt available.\n");
}

2177
void __init init_hw_perf_events(void)
2178
{
2179 2180
	int err;

2181
	pr_info("Performance Events: ");
2182

2183 2184
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
2185
		err = intel_pmu_init();
2186
		break;
2187
	case X86_VENDOR_AMD:
2188
		err = amd_pmu_init();
2189
		break;
2190 2191
	default:
		return;
2192
	}
2193
	if (err != 0) {
2194
		pr_cont("no PMU driver, software events only.\n");
2195
		return;
2196
	}
2197

2198 2199
	pmu_check_apic();

2200
	pr_cont("%s PMU driver.\n", x86_pmu.name);
2201

2202 2203 2204 2205
	if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
		     x86_pmu.num_events, X86_PMC_MAX_GENERIC);
		x86_pmu.num_events = X86_PMC_MAX_GENERIC;
I
Ingo Molnar 已提交
2206
	}
2207 2208
	perf_event_mask = (1 << x86_pmu.num_events) - 1;
	perf_max_events = x86_pmu.num_events;
I
Ingo Molnar 已提交
2209

2210 2211 2212 2213
	if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
		     x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
		x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
2214
	}
2215

2216 2217 2218
	perf_event_mask |=
		((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
	x86_pmu.intel_ctrl = perf_event_mask;
I
Ingo Molnar 已提交
2219

2220 2221
	perf_events_lapic_init();
	register_die_notifier(&perf_event_nmi_notifier);
2222

I
Ingo Molnar 已提交
2223 2224 2225 2226 2227 2228 2229
	pr_info("... version:                %d\n",     x86_pmu.version);
	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
	pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
	pr_info("... event mask:             %016Lx\n", perf_event_mask);
I
Ingo Molnar 已提交
2230
}
I
Ingo Molnar 已提交
2231

2232
static inline void x86_pmu_read(struct perf_event *event)
2233
{
2234
	x86_perf_event_update(event, &event->hw, event->hw.idx);
2235 2236
}

2237 2238 2239 2240
static const struct pmu pmu = {
	.enable		= x86_pmu_enable,
	.disable	= x86_pmu_disable,
	.read		= x86_pmu_read,
2241
	.unthrottle	= x86_pmu_unthrottle,
I
Ingo Molnar 已提交
2242 2243
};

2244 2245 2246 2247 2248
static int
validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
{
	struct hw_perf_event fake_event = event->hw;

2249
	if (event->pmu && event->pmu != &pmu)
2250 2251
		return 0;

2252
	return x86_schedule_event(cpuc, &fake_event) >= 0;
2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275
}

static int validate_group(struct perf_event *event)
{
	struct perf_event *sibling, *leader = event->group_leader;
	struct cpu_hw_events fake_pmu;

	memset(&fake_pmu, 0, sizeof(fake_pmu));

	if (!validate_event(&fake_pmu, leader))
		return -ENOSPC;

	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
		if (!validate_event(&fake_pmu, sibling))
			return -ENOSPC;
	}

	if (!validate_event(&fake_pmu, event))
		return -ENOSPC;

	return 0;
}

2276
const struct pmu *hw_perf_event_init(struct perf_event *event)
I
Ingo Molnar 已提交
2277 2278 2279
{
	int err;

2280
	err = __hw_perf_event_init(event);
2281 2282 2283 2284
	if (!err) {
		if (event->group_leader != event)
			err = validate_group(event);
	}
2285
	if (err) {
2286 2287
		if (event->destroy)
			event->destroy(event);
2288
		return ERR_PTR(err);
2289
	}
I
Ingo Molnar 已提交
2290

2291
	return &pmu;
I
Ingo Molnar 已提交
2292
}
2293 2294 2295 2296 2297 2298

/*
 * callchain support
 */

static inline
2299
void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2300
{
2301
	if (entry->nr < PERF_MAX_STACK_DEPTH)
2302 2303 2304
		entry->ip[entry->nr++] = ip;
}

2305 2306
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2307
static DEFINE_PER_CPU(int, in_ignored_frame);
2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322


static void
backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
{
	/* Ignore warnings */
}

static void backtrace_warning(void *data, char *msg)
{
	/* Ignore warnings */
}

static int backtrace_stack(void *data, char *name)
{
2323 2324 2325
	per_cpu(in_ignored_frame, smp_processor_id()) =
			x86_is_stack_id(NMI_STACK, name) ||
			x86_is_stack_id(DEBUG_STACK, name);
2326

2327
	return 0;
2328 2329 2330 2331 2332 2333
}

static void backtrace_address(void *data, unsigned long addr, int reliable)
{
	struct perf_callchain_entry *entry = data;

2334
	if (per_cpu(in_ignored_frame, smp_processor_id()))
2335 2336
		return;

2337 2338 2339 2340 2341 2342 2343 2344 2345
	if (reliable)
		callchain_store(entry, addr);
}

static const struct stacktrace_ops backtrace_ops = {
	.warning		= backtrace_warning,
	.warning_symbol		= backtrace_warning_symbol,
	.stack			= backtrace_stack,
	.address		= backtrace_address,
2346
	.walk_stack		= print_context_stack_bp,
2347 2348
};

2349 2350
#include "../dumpstack.h"

2351 2352 2353
static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
2354
	callchain_store(entry, PERF_CONTEXT_KERNEL);
2355
	callchain_store(entry, regs->ip);
2356

2357
	dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
2358 2359
}

2360 2361 2362 2363 2364
/*
 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
 */
static unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
2365
{
2366 2367 2368 2369 2370
	unsigned long offset, addr = (unsigned long)from;
	int type = in_nmi() ? KM_NMI : KM_IRQ0;
	unsigned long size, len = 0;
	struct page *page;
	void *map;
2371 2372
	int ret;

2373 2374 2375 2376
	do {
		ret = __get_user_pages_fast(addr, 1, 0, &page);
		if (!ret)
			break;
2377

2378 2379
		offset = addr & (PAGE_SIZE - 1);
		size = min(PAGE_SIZE - offset, n - len);
2380

2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401
		map = kmap_atomic(page, type);
		memcpy(to, map+offset, size);
		kunmap_atomic(map, type);
		put_page(page);

		len  += size;
		to   += size;
		addr += size;

	} while (len < n);

	return len;
}

static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{
	unsigned long bytes;

	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));

	return bytes == sizeof(*frame);
2402 2403 2404 2405 2406 2407 2408 2409
}

static void
perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
	struct stack_frame frame;
	const void __user *fp;

2410 2411 2412
	if (!user_mode(regs))
		regs = task_pt_regs(current);

2413
	fp = (void __user *)regs->bp;
2414

2415
	callchain_store(entry, PERF_CONTEXT_USER);
2416 2417
	callchain_store(entry, regs->ip);

2418
	while (entry->nr < PERF_MAX_STACK_DEPTH) {
2419
		frame.next_frame	     = NULL;
2420 2421 2422 2423 2424
		frame.return_address = 0;

		if (!copy_stack_frame(fp, &frame))
			break;

2425
		if ((unsigned long)fp < regs->sp)
2426 2427 2428
			break;

		callchain_store(entry, frame.return_address);
2429
		fp = frame.next_frame;
2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460
	}
}

static void
perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
	int is_user;

	if (!regs)
		return;

	is_user = user_mode(regs);

	if (!current || current->pid == 0)
		return;

	if (is_user && current->state != TASK_RUNNING)
		return;

	if (!is_user)
		perf_callchain_kernel(regs, entry);

	if (current->mm)
		perf_callchain_user(regs, entry);
}

struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
{
	struct perf_callchain_entry *entry;

	if (in_nmi())
2461
		entry = &__get_cpu_var(pmc_nmi_entry);
2462
	else
2463
		entry = &__get_cpu_var(pmc_irq_entry);
2464 2465 2466 2467 2468 2469 2470

	entry->nr = 0;

	perf_do_callchain(regs, entry);

	return entry;
}
2471

2472
void hw_perf_event_setup_online(int cpu)
2473 2474 2475
{
	init_debug_store_on_cpu(cpu);
}