perf_event.c 56.5 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2
 * Performance events x86 architecture code
I
Ingo Molnar 已提交
3
 *
4 5 6 7 8
 *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
 *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
 *  Copyright (C) 2009 Jaswinder Singh Rajput
 *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
 *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9
 *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
I
Ingo Molnar 已提交
10 11 12 13
 *
 *  For licencing details see kernel-base/COPYING
 */

14
#include <linux/perf_event.h>
I
Ingo Molnar 已提交
15 16 17 18
#include <linux/capability.h>
#include <linux/notifier.h>
#include <linux/hardirq.h>
#include <linux/kprobes.h>
19
#include <linux/module.h>
I
Ingo Molnar 已提交
20 21
#include <linux/kdebug.h>
#include <linux/sched.h>
22
#include <linux/uaccess.h>
23
#include <linux/highmem.h>
24
#include <linux/cpu.h>
I
Ingo Molnar 已提交
25 26

#include <asm/apic.h>
27
#include <asm/stacktrace.h>
P
Peter Zijlstra 已提交
28
#include <asm/nmi.h>
I
Ingo Molnar 已提交
29

30
static u64 perf_event_mask __read_mostly;
31

32 33
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS	4
34 35 36 37 38

/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE		24

/* The size of a per-cpu BTS buffer in bytes: */
39
#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
40 41

/* The BTS overflow threshold in bytes from the end of the buffer: */
42
#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67


/*
 * Bits in the debugctlmsr controlling branch tracing.
 */
#define X86_DEBUGCTL_TR			(1 << 6)
#define X86_DEBUGCTL_BTS		(1 << 7)
#define X86_DEBUGCTL_BTINT		(1 << 8)
#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)

/*
 * A debug store configuration.
 *
 * We only support architectures that use 64bit fields.
 */
struct debug_store {
	u64	bts_buffer_base;
	u64	bts_index;
	u64	bts_absolute_maximum;
	u64	bts_interrupt_threshold;
	u64	pebs_buffer_base;
	u64	pebs_index;
	u64	pebs_absolute_maximum;
	u64	pebs_interrupt_threshold;
68
	u64	pebs_event_reset[MAX_PEBS_EVENTS];
69 70
};

71 72
struct cpu_hw_events {
	struct perf_event	*events[X86_PMC_IDX_MAX];
73 74
	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75
	unsigned long		interrupts;
76
	int			enabled;
77
	struct debug_store	*ds;
I
Ingo Molnar 已提交
78 79
};

80 81 82 83 84 85 86 87 88 89 90 91
struct event_constraint {
	unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
	int		code;
};

#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
#define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }

#define for_each_event_constraint(e, c) \
	for ((e) = (c); (e)->idxmsk[0]; (e)++)


I
Ingo Molnar 已提交
92
/*
93
 * struct x86_pmu - generic x86 pmu
I
Ingo Molnar 已提交
94
 */
95
struct x86_pmu {
96 97
	const char	*name;
	int		version;
98
	int		(*handle_irq)(struct pt_regs *);
99 100
	void		(*disable_all)(void);
	void		(*enable_all)(void);
101 102
	void		(*enable)(struct hw_perf_event *, int);
	void		(*disable)(struct hw_perf_event *, int);
103 104
	unsigned	eventsel;
	unsigned	perfctr;
105 106
	u64		(*event_map)(int);
	u64		(*raw_event)(u64);
107
	int		max_events;
108 109 110 111
	int		num_events;
	int		num_events_fixed;
	int		event_bits;
	u64		event_mask;
112
	int		apic;
113
	u64		max_period;
114
	u64		intel_ctrl;
115 116
	void		(*enable_bts)(u64 config);
	void		(*disable_bts)(void);
117 118
	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
					 struct hw_perf_event *hwc);
119 120
};

121
static struct x86_pmu x86_pmu __read_mostly;
122

123
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
124 125
	.enabled = 1,
};
I
Ingo Molnar 已提交
126

127
static const struct event_constraint *event_constraints;
128

V
Vince Weaver 已提交
129 130 131 132 133 134 135
/*
 * Not sure about some of these
 */
static const u64 p6_perfmon_event_map[] =
{
  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
136 137
  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
V
Vince Weaver 已提交
138 139 140 141 142
  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
};

143
static u64 p6_pmu_event_map(int hw_event)
V
Vince Weaver 已提交
144
{
145
	return p6_perfmon_event_map[hw_event];
V
Vince Weaver 已提交
146 147
}

148
/*
149
 * Event setting that is specified not to count anything.
150 151 152 153
 * We use this to effectively disable a counter.
 *
 * L2_RQSTS with 0 MESI unit mask.
 */
154
#define P6_NOP_EVENT			0x0000002EULL
155

156
static u64 p6_pmu_raw_event(u64 hw_event)
V
Vince Weaver 已提交
157 158 159 160 161
{
#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
#define P6_EVNTSEL_INV_MASK		0x00800000ULL
162
#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
V
Vince Weaver 已提交
163 164 165 166 167 168

#define P6_EVNTSEL_MASK			\
	(P6_EVNTSEL_EVENT_MASK |	\
	 P6_EVNTSEL_UNIT_MASK  |	\
	 P6_EVNTSEL_EDGE_MASK  |	\
	 P6_EVNTSEL_INV_MASK   |	\
169
	 P6_EVNTSEL_REG_MASK)
V
Vince Weaver 已提交
170

171
	return hw_event & P6_EVNTSEL_MASK;
V
Vince Weaver 已提交
172 173
}

174 175 176 177 178 179 180 181 182 183
static const struct event_constraint intel_p6_event_constraints[] =
{
	EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
	EVENT_CONSTRAINT(0x11, 0x1),	/* FP_ASSIST */
	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
	EVENT_CONSTRAINT_END
};
V
Vince Weaver 已提交
184

185 186 187
/*
 * Intel PerfMon v3. Used on Core2 and later.
 */
188
static const u64 intel_perfmon_event_map[] =
I
Ingo Molnar 已提交
189
{
190 191 192 193 194 195 196
  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
I
Ingo Molnar 已提交
197 198
};

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
static const struct event_constraint intel_core_event_constraints[] =
{
	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
	EVENT_CONSTRAINT(0x11, 0x2),	/* FP_ASSIST */
	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
	EVENT_CONSTRAINT(0x18, 0x1),	/* IDLE_DURING_DIV */
	EVENT_CONSTRAINT(0x19, 0x2),	/* DELAYED_BYPASS */
	EVENT_CONSTRAINT(0xa1, 0x1),	/* RS_UOPS_DISPATCH_CYCLES */
	EVENT_CONSTRAINT(0xcb, 0x1),	/* MEM_LOAD_RETIRED */
	EVENT_CONSTRAINT_END
};

static const struct event_constraint intel_nehalem_event_constraints[] =
{
	EVENT_CONSTRAINT(0x40, 0x3),	/* L1D_CACHE_LD */
	EVENT_CONSTRAINT(0x41, 0x3),	/* L1D_CACHE_ST */
	EVENT_CONSTRAINT(0x42, 0x3),	/* L1D_CACHE_LOCK */
	EVENT_CONSTRAINT(0x43, 0x3),	/* L1D_ALL_REF */
	EVENT_CONSTRAINT(0x4e, 0x3),	/* L1D_PREFETCH */
	EVENT_CONSTRAINT(0x4c, 0x3),	/* LOAD_HIT_PRE */
	EVENT_CONSTRAINT(0x51, 0x3),	/* L1D */
	EVENT_CONSTRAINT(0x52, 0x3),	/* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
	EVENT_CONSTRAINT(0x53, 0x3),	/* L1D_CACHE_LOCK_FB_HIT */
	EVENT_CONSTRAINT(0xc5, 0x3),	/* CACHE_LOCK_CYCLES */
	EVENT_CONSTRAINT_END
};

228
static u64 intel_pmu_event_map(int hw_event)
229
{
230
	return intel_perfmon_event_map[hw_event];
231
}
I
Ingo Molnar 已提交
232

233
/*
234
 * Generalized hw caching related hw_event table, filled
235
 * in on a per model basis. A value of 0 means
236 237
 * 'not supported', -1 means 'hw_event makes no sense on
 * this CPU', any other value means the raw hw_event
238 239 240 241 242 243 244 245 246 247
 * ID.
 */

#define C(x) PERF_COUNT_HW_CACHE_##x

static u64 __read_mostly hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX];

248
static __initconst u64 nehalem_hw_cache_event_ids
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
269
		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
270 271 272 273 274 275 276 277 278 279 280
		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x0,
		[ C(RESULT_MISS)   ] = 0x0,
	},
 },
281
 [ C(LL  ) ] = {
282 283 284 285 286 287 288 289 290
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
	},
	[ C(OP_PREFETCH) ] = {
291 292
		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x0,
		[ C(RESULT_MISS)   ] = 0x0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
312
		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
};

339
static __initconst u64 core2_hw_cache_event_ids
340 341 342 343
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
372
 [ C(LL  ) ] = {
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
428 429
};

430
static __initconst u64 atom_hw_cache_event_ids
431 432 433 434
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
435 436 437 438 439 440
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_WRITE) ] = {
441
		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
442 443 444 445 446 447 448 449 450
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
451 452
		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
453 454 455 456 457 458 459 460 461 462
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
463
 [ C(LL  ) ] = {
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
479
		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
480 481 482
		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
	},
	[ C(OP_WRITE) ] = {
483
		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
519 520
};

521
static u64 intel_pmu_raw_event(u64 hw_event)
522
{
523 524
#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
525 526
#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
527
#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
528

529
#define CORE_EVNTSEL_MASK		\
530 531
	(CORE_EVNTSEL_EVENT_MASK |	\
	 CORE_EVNTSEL_UNIT_MASK  |	\
532 533
	 CORE_EVNTSEL_EDGE_MASK  |	\
	 CORE_EVNTSEL_INV_MASK  |	\
534
	 CORE_EVNTSEL_REG_MASK)
535

536
	return hw_event & CORE_EVNTSEL_MASK;
537 538
}

539
static __initconst u64 amd_hw_cache_event_ids
540 541 542 543 544 545
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(L1D) ] = {
	[ C(OP_READ) ] = {
546 547
		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
548 549
	},
	[ C(OP_WRITE) ] = {
550
		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
551 552 553
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
554 555
		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
556 557 558 559 560 561 562 563 564 565 566 567
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
568
		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
569 570 571
		[ C(RESULT_MISS)   ] = 0,
	},
 },
572
 [ C(LL  ) ] = {
573
	[ C(OP_READ) ] = {
574 575
		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
576 577
	},
	[ C(OP_WRITE) ] = {
578
		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
579 580 581 582 583 584 585 586 587
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
588 589
		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0,
		[ C(RESULT_MISS)   ] = 0,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
};

630 631 632
/*
 * AMD Performance Monitor K7 and later.
 */
633
static const u64 amd_perfmon_event_map[] =
634
{
635 636 637 638 639 640
  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
641 642
};

643
static u64 amd_pmu_event_map(int hw_event)
644
{
645
	return amd_perfmon_event_map[hw_event];
646 647
}

648
static u64 amd_pmu_raw_event(u64 hw_event)
649
{
650 651
#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
652 653
#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
#define K7_EVNTSEL_INV_MASK	0x000800000ULL
654
#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
655 656 657 658

#define K7_EVNTSEL_MASK			\
	(K7_EVNTSEL_EVENT_MASK |	\
	 K7_EVNTSEL_UNIT_MASK  |	\
659 660
	 K7_EVNTSEL_EDGE_MASK  |	\
	 K7_EVNTSEL_INV_MASK   |	\
661
	 K7_EVNTSEL_REG_MASK)
662

663
	return hw_event & K7_EVNTSEL_MASK;
664 665
}

666
/*
667 668
 * Propagate event elapsed time into the generic event.
 * Can only be executed on the CPU where the event is active.
669 670
 * Returns the delta events processed.
 */
671
static u64
672 673
x86_perf_event_update(struct perf_event *event,
			struct hw_perf_event *hwc, int idx)
674
{
675
	int shift = 64 - x86_pmu.event_bits;
676 677
	u64 prev_raw_count, new_raw_count;
	s64 delta;
678

679 680 681
	if (idx == X86_PMC_IDX_FIXED_BTS)
		return 0;

682
	/*
683
	 * Careful: an NMI might modify the previous event value.
684 685 686
	 *
	 * Our tactic to handle this is to first atomically read and
	 * exchange a new raw count - then add that new-prev delta
687
	 * count to the generic event atomically:
688 689 690
	 */
again:
	prev_raw_count = atomic64_read(&hwc->prev_count);
691
	rdmsrl(hwc->event_base + idx, new_raw_count);
692 693 694 695 696 697 698 699

	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
					new_raw_count) != prev_raw_count)
		goto again;

	/*
	 * Now we have the new raw value and have updated the prev
	 * timestamp already. We can now calculate the elapsed delta
700
	 * (event-)time and add that to the generic event.
701 702
	 *
	 * Careful, not all hw sign-extends above the physical width
703
	 * of the count.
704
	 */
705 706
	delta = (new_raw_count << shift) - (prev_raw_count << shift);
	delta >>= shift;
707

708
	atomic64_add(delta, &event->count);
709
	atomic64_sub(delta, &hwc->period_left);
710 711

	return new_raw_count;
712 713
}

714
static atomic_t active_events;
P
Peter Zijlstra 已提交
715 716 717 718
static DEFINE_MUTEX(pmc_reserve_mutex);

static bool reserve_pmc_hardware(void)
{
719
#ifdef CONFIG_X86_LOCAL_APIC
P
Peter Zijlstra 已提交
720 721 722 723 724
	int i;

	if (nmi_watchdog == NMI_LOCAL_APIC)
		disable_lapic_nmi_watchdog();

725
	for (i = 0; i < x86_pmu.num_events; i++) {
726
		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
P
Peter Zijlstra 已提交
727 728 729
			goto perfctr_fail;
	}

730
	for (i = 0; i < x86_pmu.num_events; i++) {
731
		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
P
Peter Zijlstra 已提交
732 733
			goto eventsel_fail;
	}
734
#endif
P
Peter Zijlstra 已提交
735 736 737

	return true;

738
#ifdef CONFIG_X86_LOCAL_APIC
P
Peter Zijlstra 已提交
739 740
eventsel_fail:
	for (i--; i >= 0; i--)
741
		release_evntsel_nmi(x86_pmu.eventsel + i);
P
Peter Zijlstra 已提交
742

743
	i = x86_pmu.num_events;
P
Peter Zijlstra 已提交
744 745 746

perfctr_fail:
	for (i--; i >= 0; i--)
747
		release_perfctr_nmi(x86_pmu.perfctr + i);
P
Peter Zijlstra 已提交
748 749 750 751 752

	if (nmi_watchdog == NMI_LOCAL_APIC)
		enable_lapic_nmi_watchdog();

	return false;
753
#endif
P
Peter Zijlstra 已提交
754 755 756 757
}

static void release_pmc_hardware(void)
{
758
#ifdef CONFIG_X86_LOCAL_APIC
P
Peter Zijlstra 已提交
759 760
	int i;

761
	for (i = 0; i < x86_pmu.num_events; i++) {
762 763
		release_perfctr_nmi(x86_pmu.perfctr + i);
		release_evntsel_nmi(x86_pmu.eventsel + i);
P
Peter Zijlstra 已提交
764 765 766 767
	}

	if (nmi_watchdog == NMI_LOCAL_APIC)
		enable_lapic_nmi_watchdog();
768
#endif
P
Peter Zijlstra 已提交
769 770
}

771 772 773 774 775 776 777
static inline bool bts_available(void)
{
	return x86_pmu.enable_bts != NULL;
}

static inline void init_debug_store_on_cpu(int cpu)
{
778
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
779 780 781 782 783

	if (!ds)
		return;

	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
784 785
		     (u32)((u64)(unsigned long)ds),
		     (u32)((u64)(unsigned long)ds >> 32));
786 787 788 789
}

static inline void fini_debug_store_on_cpu(int cpu)
{
790
	if (!per_cpu(cpu_hw_events, cpu).ds)
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
		return;

	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}

static void release_bts_hardware(void)
{
	int cpu;

	if (!bts_available())
		return;

	get_online_cpus();

	for_each_online_cpu(cpu)
		fini_debug_store_on_cpu(cpu);

	for_each_possible_cpu(cpu) {
809
		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
810 811 812 813

		if (!ds)
			continue;

814
		per_cpu(cpu_hw_events, cpu).ds = NULL;
815

816
		kfree((void *)(unsigned long)ds->bts_buffer_base);
817 818 819 820 821 822 823 824 825 826 827
		kfree(ds);
	}

	put_online_cpus();
}

static int reserve_bts_hardware(void)
{
	int cpu, err = 0;

	if (!bts_available())
828
		return 0;
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846

	get_online_cpus();

	for_each_possible_cpu(cpu) {
		struct debug_store *ds;
		void *buffer;

		err = -ENOMEM;
		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
		if (unlikely(!buffer))
			break;

		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
		if (unlikely(!ds)) {
			kfree(buffer);
			break;
		}

847
		ds->bts_buffer_base = (u64)(unsigned long)buffer;
848 849 850 851 852 853
		ds->bts_index = ds->bts_buffer_base;
		ds->bts_absolute_maximum =
			ds->bts_buffer_base + BTS_BUFFER_SIZE;
		ds->bts_interrupt_threshold =
			ds->bts_absolute_maximum - BTS_OVFL_TH;

854
		per_cpu(cpu_hw_events, cpu).ds = ds;
855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
		err = 0;
	}

	if (err)
		release_bts_hardware();
	else {
		for_each_online_cpu(cpu)
			init_debug_store_on_cpu(cpu);
	}

	put_online_cpus();

	return err;
}

870
static void hw_perf_event_destroy(struct perf_event *event)
P
Peter Zijlstra 已提交
871
{
872
	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
P
Peter Zijlstra 已提交
873
		release_pmc_hardware();
874
		release_bts_hardware();
P
Peter Zijlstra 已提交
875 876 877 878
		mutex_unlock(&pmc_reserve_mutex);
	}
}

879 880 881 882 883
static inline int x86_pmu_initialized(void)
{
	return x86_pmu.handle_irq != NULL;
}

884
static inline int
885
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
{
	unsigned int cache_type, cache_op, cache_result;
	u64 config, val;

	config = attr->config;

	cache_type = (config >>  0) & 0xff;
	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
		return -EINVAL;

	cache_op = (config >>  8) & 0xff;
	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
		return -EINVAL;

	cache_result = (config >> 16) & 0xff;
	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
		return -EINVAL;

	val = hw_cache_event_ids[cache_type][cache_op][cache_result];

	if (val == 0)
		return -ENOENT;

	if (val == -1)
		return -EINVAL;

	hwc->config |= val;

	return 0;
}

917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937
static void intel_pmu_enable_bts(u64 config)
{
	unsigned long debugctlmsr;

	debugctlmsr = get_debugctlmsr();

	debugctlmsr |= X86_DEBUGCTL_TR;
	debugctlmsr |= X86_DEBUGCTL_BTS;
	debugctlmsr |= X86_DEBUGCTL_BTINT;

	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;

	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;

	update_debugctlmsr(debugctlmsr);
}

static void intel_pmu_disable_bts(void)
{
938
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
939 940 941 942 943 944 945 946 947 948 949 950 951 952
	unsigned long debugctlmsr;

	if (!cpuc->ds)
		return;

	debugctlmsr = get_debugctlmsr();

	debugctlmsr &=
		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);

	update_debugctlmsr(debugctlmsr);
}

I
Ingo Molnar 已提交
953
/*
954
 * Setup the hardware configuration for a given attr_type
I
Ingo Molnar 已提交
955
 */
956
static int __hw_perf_event_init(struct perf_event *event)
I
Ingo Molnar 已提交
957
{
958 959
	struct perf_event_attr *attr = &event->attr;
	struct hw_perf_event *hwc = &event->hw;
960
	u64 config;
P
Peter Zijlstra 已提交
961
	int err;
I
Ingo Molnar 已提交
962

963 964
	if (!x86_pmu_initialized())
		return -ENODEV;
I
Ingo Molnar 已提交
965

P
Peter Zijlstra 已提交
966
	err = 0;
967
	if (!atomic_inc_not_zero(&active_events)) {
P
Peter Zijlstra 已提交
968
		mutex_lock(&pmc_reserve_mutex);
969
		if (atomic_read(&active_events) == 0) {
970 971 972
			if (!reserve_pmc_hardware())
				err = -EBUSY;
			else
973
				err = reserve_bts_hardware();
974 975
		}
		if (!err)
976
			atomic_inc(&active_events);
P
Peter Zijlstra 已提交
977 978 979 980 981
		mutex_unlock(&pmc_reserve_mutex);
	}
	if (err)
		return err;

982
	event->destroy = hw_perf_event_destroy;
983

I
Ingo Molnar 已提交
984
	/*
985
	 * Generate PMC IRQs:
I
Ingo Molnar 已提交
986 987
	 * (keep 'enabled' bit clear for now)
	 */
988
	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
I
Ingo Molnar 已提交
989

990 991
	hwc->idx = -1;

I
Ingo Molnar 已提交
992
	/*
993
	 * Count user and OS events unless requested not to.
I
Ingo Molnar 已提交
994
	 */
995
	if (!attr->exclude_user)
996
		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
997
	if (!attr->exclude_kernel)
I
Ingo Molnar 已提交
998
		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
999

1000
	if (!hwc->sample_period) {
1001
		hwc->sample_period = x86_pmu.max_period;
1002
		hwc->last_period = hwc->sample_period;
1003
		atomic64_set(&hwc->period_left, hwc->sample_period);
1004 1005 1006 1007
	} else {
		/*
		 * If we have a PMU initialized but no APIC
		 * interrupts, we cannot sample hardware
1008 1009
		 * events (user-space has to fall back and
		 * sample via a hrtimer based software event):
1010 1011 1012
		 */
		if (!x86_pmu.apic)
			return -EOPNOTSUPP;
1013
	}
1014

I
Ingo Molnar 已提交
1015
	/*
1016
	 * Raw hw_event type provide the config in the hw_event structure
I
Ingo Molnar 已提交
1017
	 */
1018 1019
	if (attr->type == PERF_TYPE_RAW) {
		hwc->config |= x86_pmu.raw_event(attr->config);
1020
		return 0;
I
Ingo Molnar 已提交
1021 1022
	}

1023 1024 1025 1026 1027
	if (attr->type == PERF_TYPE_HW_CACHE)
		return set_ext_hw_attr(hwc, attr);

	if (attr->config >= x86_pmu.max_events)
		return -EINVAL;
1028

1029 1030 1031
	/*
	 * The generic map:
	 */
1032 1033 1034 1035 1036 1037 1038 1039
	config = x86_pmu.event_map(attr->config);

	if (config == 0)
		return -ENOENT;

	if (config == -1LL)
		return -EINVAL;

1040 1041 1042 1043
	/*
	 * Branch tracing:
	 */
	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
1044 1045 1046 1047 1048 1049 1050 1051 1052
	    (hwc->sample_period == 1)) {
		/* BTS is not supported by this architecture. */
		if (!bts_available())
			return -EOPNOTSUPP;

		/* BTS is currently only allowed for user-mode. */
		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
			return -EOPNOTSUPP;
	}
1053

1054
	hwc->config |= config;
P
Peter Zijlstra 已提交
1055

I
Ingo Molnar 已提交
1056 1057 1058
	return 0;
}

V
Vince Weaver 已提交
1059 1060
static void p6_pmu_disable_all(void)
{
1061
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1062
	u64 val;
V
Vince Weaver 已提交
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075

	if (!cpuc->enabled)
		return;

	cpuc->enabled = 0;
	barrier();

	/* p6 only has one enable register */
	rdmsrl(MSR_P6_EVNTSEL0, val);
	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
	wrmsrl(MSR_P6_EVNTSEL0, val);
}

1076
static void intel_pmu_disable_all(void)
1077
{
1078
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1079 1080 1081 1082 1083 1084 1085

	if (!cpuc->enabled)
		return;

	cpuc->enabled = 0;
	barrier();

1086
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1087 1088 1089

	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
		intel_pmu_disable_bts();
I
Ingo Molnar 已提交
1090
}
1091

1092
static void amd_pmu_disable_all(void)
1093
{
1094
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1095 1096 1097 1098
	int idx;

	if (!cpuc->enabled)
		return;
1099 1100

	cpuc->enabled = 0;
1101 1102
	/*
	 * ensure we write the disable before we start disabling the
1103
	 * events proper, so that amd_pmu_enable_event() does the
1104
	 * right thing.
1105
	 */
1106
	barrier();
1107

1108
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
1109 1110
		u64 val;

1111
		if (!test_bit(idx, cpuc->active_mask))
1112
			continue;
1113
		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
1114 1115 1116 1117
		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
			continue;
		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1118 1119 1120
	}
}

1121
void hw_perf_disable(void)
1122
{
1123
	if (!x86_pmu_initialized())
1124 1125
		return;
	return x86_pmu.disable_all();
1126
}
I
Ingo Molnar 已提交
1127

V
Vince Weaver 已提交
1128 1129
static void p6_pmu_enable_all(void)
{
1130
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
V
Vince Weaver 已提交
1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
	unsigned long val;

	if (cpuc->enabled)
		return;

	cpuc->enabled = 1;
	barrier();

	/* p6 only has one enable register */
	rdmsrl(MSR_P6_EVNTSEL0, val);
	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
	wrmsrl(MSR_P6_EVNTSEL0, val);
}

1145
static void intel_pmu_enable_all(void)
1146
{
1147
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1148 1149 1150 1151 1152 1153 1154

	if (cpuc->enabled)
		return;

	cpuc->enabled = 1;
	barrier();

1155
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1156 1157

	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1158 1159
		struct perf_event *event =
			cpuc->events[X86_PMC_IDX_FIXED_BTS];
1160

1161
		if (WARN_ON_ONCE(!event))
1162 1163
			return;

1164
		intel_pmu_enable_bts(event->hw.config);
1165
	}
1166 1167
}

1168
static void amd_pmu_enable_all(void)
1169
{
1170
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1171 1172
	int idx;

1173
	if (cpuc->enabled)
1174 1175
		return;

1176 1177 1178
	cpuc->enabled = 1;
	barrier();

1179 1180
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
		struct perf_event *event = cpuc->events[idx];
1181
		u64 val;
1182

1183
		if (!test_bit(idx, cpuc->active_mask))
1184
			continue;
1185

1186
		val = event->hw.config;
1187 1188
		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1189 1190 1191
	}
}

1192
void hw_perf_enable(void)
1193
{
1194
	if (!x86_pmu_initialized())
1195
		return;
1196
	x86_pmu.enable_all();
1197 1198
}

1199
static inline u64 intel_pmu_get_status(void)
1200 1201 1202
{
	u64 status;

1203
	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1204

1205
	return status;
1206 1207
}

1208
static inline void intel_pmu_ack_status(u64 ack)
1209 1210 1211 1212
{
	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

1213
static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1214
{
V
Vince Weaver 已提交
1215
	(void)checking_wrmsrl(hwc->config_base + idx,
1216
			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1217 1218
}

1219
static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1220
{
V
Vince Weaver 已提交
1221
	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1222 1223
}

1224
static inline void
1225
intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
1226 1227 1228 1229 1230 1231 1232 1233
{
	int idx = __idx - X86_PMC_IDX_FIXED;
	u64 ctrl_val, mask;

	mask = 0xfULL << (idx * 4);

	rdmsrl(hwc->config_base, ctrl_val);
	ctrl_val &= ~mask;
V
Vince Weaver 已提交
1234 1235 1236 1237
	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
}

static inline void
1238
p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
V
Vince Weaver 已提交
1239
{
1240 1241
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	u64 val = P6_NOP_EVENT;
V
Vince Weaver 已提交
1242

1243 1244
	if (cpuc->enabled)
		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
V
Vince Weaver 已提交
1245 1246

	(void)checking_wrmsrl(hwc->config_base + idx, val);
1247 1248
}

1249
static inline void
1250
intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1251
{
1252 1253 1254 1255 1256
	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
		intel_pmu_disable_bts();
		return;
	}

1257 1258 1259 1260 1261
	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
		intel_pmu_disable_fixed(hwc, idx);
		return;
	}

1262
	x86_pmu_disable_event(hwc, idx);
1263 1264 1265
}

static inline void
1266
amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1267
{
1268
	x86_pmu_disable_event(hwc, idx);
1269 1270
}

1271
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
I
Ingo Molnar 已提交
1272

1273 1274
/*
 * Set the next IRQ period, based on the hwc->period_left value.
1275
 * To be called with the event disabled in hw:
1276
 */
1277
static int
1278 1279
x86_perf_event_set_period(struct perf_event *event,
			     struct hw_perf_event *hwc, int idx)
I
Ingo Molnar 已提交
1280
{
1281
	s64 left = atomic64_read(&hwc->period_left);
1282 1283
	s64 period = hwc->sample_period;
	int err, ret = 0;
1284

1285 1286 1287
	if (idx == X86_PMC_IDX_FIXED_BTS)
		return 0;

1288
	/*
1289
	 * If we are way outside a reasonable range then just skip forward:
1290 1291 1292 1293
	 */
	if (unlikely(left <= -period)) {
		left = period;
		atomic64_set(&hwc->period_left, left);
1294
		hwc->last_period = period;
1295
		ret = 1;
1296 1297 1298 1299 1300
	}

	if (unlikely(left <= 0)) {
		left += period;
		atomic64_set(&hwc->period_left, left);
1301
		hwc->last_period = period;
1302
		ret = 1;
1303
	}
1304
	/*
1305
	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1306 1307 1308
	 */
	if (unlikely(left < 2))
		left = 2;
I
Ingo Molnar 已提交
1309

1310 1311 1312
	if (left > x86_pmu.max_period)
		left = x86_pmu.max_period;

1313
	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1314 1315

	/*
1316
	 * The hw event starts counting from this event offset,
1317 1318
	 * mark it to be able to extra future deltas:
	 */
1319
	atomic64_set(&hwc->prev_count, (u64)-left);
1320

1321 1322
	err = checking_wrmsrl(hwc->event_base + idx,
			     (u64)(-left) & x86_pmu.event_mask);
1323

1324
	perf_event_update_userpage(event);
1325

1326
	return ret;
1327 1328 1329
}

static inline void
1330
intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1331 1332 1333 1334 1335 1336
{
	int idx = __idx - X86_PMC_IDX_FIXED;
	u64 ctrl_val, bits, mask;
	int err;

	/*
1337 1338 1339
	 * Enable IRQ generation (0x8),
	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
	 * if requested:
1340
	 */
1341 1342 1343
	bits = 0x8ULL;
	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
		bits |= 0x2;
1344 1345 1346 1347 1348 1349 1350 1351 1352
	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
		bits |= 0x1;
	bits <<= (idx * 4);
	mask = 0xfULL << (idx * 4);

	rdmsrl(hwc->config_base, ctrl_val);
	ctrl_val &= ~mask;
	ctrl_val |= bits;
	err = checking_wrmsrl(hwc->config_base, ctrl_val);
1353 1354
}

1355
static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
V
Vince Weaver 已提交
1356
{
1357
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1358
	u64 val;
V
Vince Weaver 已提交
1359

1360
	val = hwc->config;
V
Vince Weaver 已提交
1361
	if (cpuc->enabled)
1362 1363 1364
		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;

	(void)checking_wrmsrl(hwc->config_base + idx, val);
V
Vince Weaver 已提交
1365 1366 1367
}


1368
static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1369
{
1370
	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1371
		if (!__get_cpu_var(cpu_hw_events).enabled)
1372 1373 1374 1375 1376 1377
			return;

		intel_pmu_enable_bts(hwc->config);
		return;
	}

1378 1379 1380 1381 1382
	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
		intel_pmu_enable_fixed(hwc, idx);
		return;
	}

1383
	x86_pmu_enable_event(hwc, idx);
1384 1385
}

1386
static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1387
{
1388
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1389 1390

	if (cpuc->enabled)
1391
		x86_pmu_enable_event(hwc, idx);
I
Ingo Molnar 已提交
1392 1393
}

1394
static int fixed_mode_idx(struct hw_perf_event *hwc)
1395
{
1396
	unsigned int hw_event;
1397

1398
	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1399

1400
	if (unlikely((hw_event ==
1401 1402 1403 1404
		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
		     (hwc->sample_period == 1)))
		return X86_PMC_IDX_FIXED_BTS;

1405
	if (!x86_pmu.num_events_fixed)
1406 1407
		return -1;

1408 1409 1410 1411 1412 1413
	/*
	 * fixed counters do not take all possible filters
	 */
	if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
		return -1;

1414
	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1415
		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1416
	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1417
		return X86_PMC_IDX_FIXED_CPU_CYCLES;
1418
	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1419 1420
		return X86_PMC_IDX_FIXED_BUS_CYCLES;

1421 1422 1423
	return -1;
}

1424 1425 1426
/*
 * generic counter allocator: get next free counter
 */
1427 1428
static int
gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1429 1430 1431 1432 1433 1434 1435 1436 1437 1438
{
	int idx;

	idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
	return idx == x86_pmu.num_events ? -1 : idx;
}

/*
 * intel-specific counter allocator: check event constraints
 */
1439 1440
static int
intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1441 1442 1443 1444
{
	const struct event_constraint *event_constraint;
	int i, code;

1445
	if (!event_constraints)
1446 1447
		goto skip;

1448
	code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
1449

1450
	for_each_event_constraint(event_constraint, event_constraints) {
1451 1452 1453 1454 1455 1456 1457 1458 1459
		if (code == event_constraint->code) {
			for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
				if (!test_and_set_bit(i, cpuc->used_mask))
					return i;
			}
			return -1;
		}
	}
skip:
1460
	return gen_get_event_idx(cpuc, hwc);
1461 1462
}

1463 1464
static int
x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
I
Ingo Molnar 已提交
1465
{
1466
	int idx;
I
Ingo Molnar 已提交
1467

1468
	idx = fixed_mode_idx(hwc);
1469
	if (idx == X86_PMC_IDX_FIXED_BTS) {
1470
		/* BTS is already occupied. */
1471
		if (test_and_set_bit(idx, cpuc->used_mask))
1472
			return -EAGAIN;
1473 1474

		hwc->config_base	= 0;
1475
		hwc->event_base		= 0;
1476 1477
		hwc->idx		= idx;
	} else if (idx >= 0) {
1478
		/*
1479 1480
		 * Try to get the fixed event, if that is already taken
		 * then try to get a generic event:
1481
		 */
1482
		if (test_and_set_bit(idx, cpuc->used_mask))
1483
			goto try_generic;
1484

1485 1486
		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
		/*
1487
		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
1488 1489
		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
		 */
1490
		hwc->event_base =
1491
			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
I
Ingo Molnar 已提交
1492
		hwc->idx = idx;
1493 1494
	} else {
		idx = hwc->idx;
1495
		/* Try to get the previous generic event again */
1496
		if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
1497
try_generic:
1498
			idx = x86_pmu.get_event_idx(cpuc, hwc);
1499
			if (idx == -1)
1500 1501
				return -EAGAIN;

1502
			set_bit(idx, cpuc->used_mask);
1503 1504
			hwc->idx = idx;
		}
1505 1506
		hwc->config_base = x86_pmu.eventsel;
		hwc->event_base  = x86_pmu.perfctr;
I
Ingo Molnar 已提交
1507 1508
	}

1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524
	return idx;
}

/*
 * Find a PMC slot for the freshly enabled / scheduled in event:
 */
static int x86_pmu_enable(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	int idx;

	idx = x86_schedule_event(cpuc, hwc);
	if (idx < 0)
		return idx;

1525
	perf_events_lapic_init();
1526

1527
	x86_pmu.disable(hwc, idx);
I
Ingo Molnar 已提交
1528

1529
	cpuc->events[idx] = event;
1530
	set_bit(idx, cpuc->active_mask);
1531

1532
	x86_perf_event_set_period(event, hwc, idx);
1533
	x86_pmu.enable(hwc, idx);
1534

1535
	perf_event_update_userpage(event);
1536

1537
	return 0;
I
Ingo Molnar 已提交
1538 1539
}

1540
static void x86_pmu_unthrottle(struct perf_event *event)
1541
{
1542 1543
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
1544 1545

	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1546
				cpuc->events[hwc->idx] != event))
1547 1548 1549 1550 1551
		return;

	x86_pmu.enable(hwc, hwc->idx);
}

1552
void perf_event_print_debug(void)
I
Ingo Molnar 已提交
1553
{
1554
	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1555
	struct cpu_hw_events *cpuc;
1556
	unsigned long flags;
1557 1558
	int cpu, idx;

1559
	if (!x86_pmu.num_events)
1560
		return;
I
Ingo Molnar 已提交
1561

1562
	local_irq_save(flags);
I
Ingo Molnar 已提交
1563 1564

	cpu = smp_processor_id();
1565
	cpuc = &per_cpu(cpu_hw_events, cpu);
I
Ingo Molnar 已提交
1566

1567
	if (x86_pmu.version >= 2) {
1568 1569 1570 1571 1572 1573 1574 1575 1576 1577
		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);

		pr_info("\n");
		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
1578
	}
1579
	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
I
Ingo Molnar 已提交
1580

1581
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
1582 1583
		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
I
Ingo Molnar 已提交
1584

1585
		prev_left = per_cpu(pmc_prev_left[idx], cpu);
I
Ingo Molnar 已提交
1586

1587
		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
I
Ingo Molnar 已提交
1588
			cpu, idx, pmc_ctrl);
1589
		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
I
Ingo Molnar 已提交
1590
			cpu, idx, pmc_count);
1591
		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
1592
			cpu, idx, prev_left);
I
Ingo Molnar 已提交
1593
	}
1594
	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1595 1596
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);

1597
		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1598 1599
			cpu, idx, pmc_count);
	}
1600
	local_irq_restore(flags);
I
Ingo Molnar 已提交
1601 1602
}

1603
static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
1604 1605 1606 1607 1608 1609 1610
{
	struct debug_store *ds = cpuc->ds;
	struct bts_record {
		u64	from;
		u64	to;
		u64	flags;
	};
1611
	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1612
	struct bts_record *at, *top;
1613 1614 1615 1616
	struct perf_output_handle handle;
	struct perf_event_header header;
	struct perf_sample_data data;
	struct pt_regs regs;
1617

1618
	if (!event)
1619 1620 1621 1622 1623
		return;

	if (!ds)
		return;

1624 1625
	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
	top = (struct bts_record *)(unsigned long)ds->bts_index;
1626

1627 1628 1629
	if (top <= at)
		return;

1630 1631
	ds->bts_index = ds->bts_buffer_base;

1632

1633
	data.period	= event->hw.last_period;
1634
	data.addr	= 0;
1635
	data.raw	= NULL;
1636 1637 1638 1639 1640 1641 1642
	regs.ip		= 0;

	/*
	 * Prepare a generic sample, i.e. fill in the invariant fields.
	 * We will overwrite the from and to address before we output
	 * the sample.
	 */
1643
	perf_prepare_sample(&header, &data, event, &regs);
1644

1645
	if (perf_output_begin(&handle, event,
1646 1647 1648
			      header.size * (top - at), 1, 1))
		return;

1649
	for (; at < top; at++) {
1650 1651
		data.ip		= at->from;
		data.addr	= at->to;
1652

1653
		perf_output_sample(&handle, &header, &data, event);
1654 1655
	}

1656
	perf_output_end(&handle);
1657 1658

	/* There's new data available. */
1659 1660
	event->hw.interrupts++;
	event->pending_kill = POLL_IN;
1661 1662
}

1663
static void x86_pmu_disable(struct perf_event *event)
I
Ingo Molnar 已提交
1664
{
1665 1666
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
1667
	int idx = hwc->idx;
I
Ingo Molnar 已提交
1668

1669 1670 1671 1672
	/*
	 * Must be done before we disable, otherwise the nmi handler
	 * could reenable again:
	 */
1673
	clear_bit(idx, cpuc->active_mask);
1674
	x86_pmu.disable(hwc, idx);
I
Ingo Molnar 已提交
1675

1676 1677
	/*
	 * Make sure the cleared pointer becomes visible before we
1678
	 * (potentially) free the event:
1679
	 */
1680
	barrier();
I
Ingo Molnar 已提交
1681

1682
	/*
1683
	 * Drain the remaining delta count out of a event
1684 1685
	 * that we are disabling:
	 */
1686
	x86_perf_event_update(event, hwc, idx);
1687 1688

	/* Drain the remaining BTS records. */
1689 1690
	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
		intel_pmu_drain_bts_buffer(cpuc);
1691

1692
	cpuc->events[idx] = NULL;
1693
	clear_bit(idx, cpuc->used_mask);
1694

1695
	perf_event_update_userpage(event);
I
Ingo Molnar 已提交
1696 1697
}

1698
/*
1699 1700
 * Save and restart an expired event. Called by NMI contexts,
 * so it has to be careful about preempting normal event ops:
1701
 */
1702
static int intel_pmu_save_and_restart(struct perf_event *event)
I
Ingo Molnar 已提交
1703
{
1704
	struct hw_perf_event *hwc = &event->hw;
I
Ingo Molnar 已提交
1705
	int idx = hwc->idx;
1706
	int ret;
I
Ingo Molnar 已提交
1707

1708 1709
	x86_perf_event_update(event, hwc, idx);
	ret = x86_perf_event_set_period(event, hwc, idx);
1710

1711 1712
	if (event->state == PERF_EVENT_STATE_ACTIVE)
		intel_pmu_enable_event(hwc, idx);
1713 1714

	return ret;
I
Ingo Molnar 已提交
1715 1716
}

1717 1718
static void intel_pmu_reset(void)
{
1719
	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1720 1721 1722
	unsigned long flags;
	int idx;

1723
	if (!x86_pmu.num_events)
1724 1725 1726 1727 1728 1729
		return;

	local_irq_save(flags);

	printk("clearing PMU state on CPU#%d\n", smp_processor_id());

1730
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
1731 1732 1733
		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
	}
1734
	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1735 1736
		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
	}
1737 1738
	if (ds)
		ds->bts_index = ds->bts_buffer_base;
1739 1740 1741 1742

	local_irq_restore(flags);
}

V
Vince Weaver 已提交
1743 1744 1745
static int p6_pmu_handle_irq(struct pt_regs *regs)
{
	struct perf_sample_data data;
1746 1747 1748
	struct cpu_hw_events *cpuc;
	struct perf_event *event;
	struct hw_perf_event *hwc;
V
Vince Weaver 已提交
1749 1750 1751 1752
	int idx, handled = 0;
	u64 val;

	data.addr = 0;
1753
	data.raw = NULL;
V
Vince Weaver 已提交
1754

1755
	cpuc = &__get_cpu_var(cpu_hw_events);
V
Vince Weaver 已提交
1756

1757
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
V
Vince Weaver 已提交
1758 1759 1760
		if (!test_bit(idx, cpuc->active_mask))
			continue;

1761 1762
		event = cpuc->events[idx];
		hwc = &event->hw;
V
Vince Weaver 已提交
1763

1764 1765
		val = x86_perf_event_update(event, hwc, idx);
		if (val & (1ULL << (x86_pmu.event_bits - 1)))
V
Vince Weaver 已提交
1766 1767 1768
			continue;

		/*
1769
		 * event overflow
V
Vince Weaver 已提交
1770 1771
		 */
		handled		= 1;
1772
		data.period	= event->hw.last_period;
V
Vince Weaver 已提交
1773

1774
		if (!x86_perf_event_set_period(event, hwc, idx))
V
Vince Weaver 已提交
1775 1776
			continue;

1777 1778
		if (perf_event_overflow(event, 1, &data, regs))
			p6_pmu_disable_event(hwc, idx);
V
Vince Weaver 已提交
1779 1780 1781 1782 1783 1784 1785
	}

	if (handled)
		inc_irq_stat(apic_perf_irqs);

	return handled;
}
1786

I
Ingo Molnar 已提交
1787 1788 1789 1790
/*
 * This handler is triggered by the local APIC, so the APIC IRQ handling
 * rules apply:
 */
1791
static int intel_pmu_handle_irq(struct pt_regs *regs)
I
Ingo Molnar 已提交
1792
{
1793
	struct perf_sample_data data;
1794
	struct cpu_hw_events *cpuc;
V
Vince Weaver 已提交
1795
	int bit, loops;
1796
	u64 ack, status;
1797

1798
	data.addr = 0;
1799
	data.raw = NULL;
1800

1801
	cpuc = &__get_cpu_var(cpu_hw_events);
I
Ingo Molnar 已提交
1802

1803
	perf_disable();
1804
	intel_pmu_drain_bts_buffer(cpuc);
1805
	status = intel_pmu_get_status();
1806 1807 1808 1809
	if (!status) {
		perf_enable();
		return 0;
	}
1810

1811
	loops = 0;
I
Ingo Molnar 已提交
1812
again:
1813
	if (++loops > 100) {
1814 1815
		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
		perf_event_print_debug();
1816 1817
		intel_pmu_reset();
		perf_enable();
1818 1819 1820
		return 1;
	}

1821
	inc_irq_stat(apic_perf_irqs);
I
Ingo Molnar 已提交
1822
	ack = status;
1823
	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1824
		struct perf_event *event = cpuc->events[bit];
I
Ingo Molnar 已提交
1825 1826

		clear_bit(bit, (unsigned long *) &status);
1827
		if (!test_bit(bit, cpuc->active_mask))
I
Ingo Molnar 已提交
1828 1829
			continue;

1830
		if (!intel_pmu_save_and_restart(event))
1831 1832
			continue;

1833
		data.period = event->hw.last_period;
1834

1835 1836
		if (perf_event_overflow(event, 1, &data, regs))
			intel_pmu_disable_event(&event->hw, bit);
I
Ingo Molnar 已提交
1837 1838
	}

1839
	intel_pmu_ack_status(ack);
I
Ingo Molnar 已提交
1840 1841 1842 1843

	/*
	 * Repeat if there is more work to be done:
	 */
1844
	status = intel_pmu_get_status();
I
Ingo Molnar 已提交
1845 1846
	if (status)
		goto again;
1847

1848
	perf_enable();
1849 1850

	return 1;
1851 1852
}

1853
static int amd_pmu_handle_irq(struct pt_regs *regs)
1854
{
1855
	struct perf_sample_data data;
1856 1857 1858
	struct cpu_hw_events *cpuc;
	struct perf_event *event;
	struct hw_perf_event *hwc;
V
Vince Weaver 已提交
1859
	int idx, handled = 0;
1860 1861
	u64 val;

1862
	data.addr = 0;
1863
	data.raw = NULL;
1864

1865
	cpuc = &__get_cpu_var(cpu_hw_events);
1866

1867
	for (idx = 0; idx < x86_pmu.num_events; idx++) {
1868
		if (!test_bit(idx, cpuc->active_mask))
1869
			continue;
1870

1871 1872
		event = cpuc->events[idx];
		hwc = &event->hw;
1873

1874 1875
		val = x86_perf_event_update(event, hwc, idx);
		if (val & (1ULL << (x86_pmu.event_bits - 1)))
1876
			continue;
1877

1878
		/*
1879
		 * event overflow
1880 1881
		 */
		handled		= 1;
1882
		data.period	= event->hw.last_period;
1883

1884
		if (!x86_perf_event_set_period(event, hwc, idx))
1885 1886
			continue;

1887 1888
		if (perf_event_overflow(event, 1, &data, regs))
			amd_pmu_disable_event(hwc, idx);
1889
	}
1890

1891 1892 1893
	if (handled)
		inc_irq_stat(apic_perf_irqs);

1894 1895
	return handled;
}
1896

1897 1898 1899 1900 1901
void smp_perf_pending_interrupt(struct pt_regs *regs)
{
	irq_enter();
	ack_APIC_irq();
	inc_irq_stat(apic_pending_irqs);
1902
	perf_event_do_pending();
1903 1904 1905
	irq_exit();
}

1906
void set_perf_event_pending(void)
1907
{
1908
#ifdef CONFIG_X86_LOCAL_APIC
1909 1910 1911
	if (!x86_pmu.apic || !x86_pmu_initialized())
		return;

1912
	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1913
#endif
1914 1915
}

1916
void perf_events_lapic_init(void)
I
Ingo Molnar 已提交
1917
{
1918 1919
#ifdef CONFIG_X86_LOCAL_APIC
	if (!x86_pmu.apic || !x86_pmu_initialized())
I
Ingo Molnar 已提交
1920
		return;
1921

I
Ingo Molnar 已提交
1922
	/*
1923
	 * Always use NMI for PMU
I
Ingo Molnar 已提交
1924
	 */
1925
	apic_write(APIC_LVTPC, APIC_DM_NMI);
1926
#endif
I
Ingo Molnar 已提交
1927 1928 1929
}

static int __kprobes
1930
perf_event_nmi_handler(struct notifier_block *self,
I
Ingo Molnar 已提交
1931 1932 1933 1934
			 unsigned long cmd, void *__args)
{
	struct die_args *args = __args;
	struct pt_regs *regs;
1935

1936
	if (!atomic_read(&active_events))
1937 1938
		return NOTIFY_DONE;

1939 1940 1941 1942
	switch (cmd) {
	case DIE_NMI:
	case DIE_NMI_IPI:
		break;
I
Ingo Molnar 已提交
1943

1944
	default:
I
Ingo Molnar 已提交
1945
		return NOTIFY_DONE;
1946
	}
I
Ingo Molnar 已提交
1947 1948 1949

	regs = args->regs;

1950
#ifdef CONFIG_X86_LOCAL_APIC
I
Ingo Molnar 已提交
1951
	apic_write(APIC_LVTPC, APIC_DM_NMI);
1952
#endif
1953 1954
	/*
	 * Can't rely on the handled return value to say it was our NMI, two
1955
	 * events could trigger 'simultaneously' raising two back-to-back NMIs.
1956 1957 1958 1959
	 *
	 * If the first NMI handles both, the latter will be empty and daze
	 * the CPU.
	 */
1960
	x86_pmu.handle_irq(regs);
I
Ingo Molnar 已提交
1961

1962
	return NOTIFY_STOP;
I
Ingo Molnar 已提交
1963 1964
}

1965 1966
static __read_mostly struct notifier_block perf_event_nmi_notifier = {
	.notifier_call		= perf_event_nmi_handler,
1967 1968
	.next			= NULL,
	.priority		= 1
I
Ingo Molnar 已提交
1969 1970
};

1971
static __initconst struct x86_pmu p6_pmu = {
V
Vince Weaver 已提交
1972 1973 1974 1975
	.name			= "p6",
	.handle_irq		= p6_pmu_handle_irq,
	.disable_all		= p6_pmu_disable_all,
	.enable_all		= p6_pmu_enable_all,
1976 1977
	.enable			= p6_pmu_enable_event,
	.disable		= p6_pmu_disable_event,
V
Vince Weaver 已提交
1978 1979 1980 1981 1982
	.eventsel		= MSR_P6_EVNTSEL0,
	.perfctr		= MSR_P6_PERFCTR0,
	.event_map		= p6_pmu_event_map,
	.raw_event		= p6_pmu_raw_event,
	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
1983
	.apic			= 1,
V
Vince Weaver 已提交
1984 1985
	.max_period		= (1ULL << 31) - 1,
	.version		= 0,
1986
	.num_events		= 2,
V
Vince Weaver 已提交
1987
	/*
1988
	 * Events have 40 bits implemented. However they are designed such
V
Vince Weaver 已提交
1989
	 * that bits [32-39] are sign extensions of bit 31. As such the
1990
	 * effective width of a event for P6-like PMU is 32 bits only.
V
Vince Weaver 已提交
1991 1992 1993
	 *
	 * See IA-32 Intel Architecture Software developer manual Vol 3B
	 */
1994 1995
	.event_bits		= 32,
	.event_mask		= (1ULL << 32) - 1,
1996
	.get_event_idx		= intel_get_event_idx,
V
Vince Weaver 已提交
1997 1998
};

1999
static __initconst struct x86_pmu intel_pmu = {
2000
	.name			= "Intel",
2001
	.handle_irq		= intel_pmu_handle_irq,
2002 2003
	.disable_all		= intel_pmu_disable_all,
	.enable_all		= intel_pmu_enable_all,
2004 2005
	.enable			= intel_pmu_enable_event,
	.disable		= intel_pmu_disable_event,
2006 2007
	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
2008 2009
	.event_map		= intel_pmu_event_map,
	.raw_event		= intel_pmu_raw_event,
2010
	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
2011
	.apic			= 1,
2012 2013 2014
	/*
	 * Intel PMCs cannot be accessed sanely above 32 bit width,
	 * so we install an artificial 1<<31 period regardless of
2015
	 * the generic event period:
2016 2017
	 */
	.max_period		= (1ULL << 31) - 1,
2018 2019
	.enable_bts		= intel_pmu_enable_bts,
	.disable_bts		= intel_pmu_disable_bts,
2020
	.get_event_idx		= intel_get_event_idx,
2021 2022
};

2023
static __initconst struct x86_pmu amd_pmu = {
2024
	.name			= "AMD",
2025
	.handle_irq		= amd_pmu_handle_irq,
2026 2027
	.disable_all		= amd_pmu_disable_all,
	.enable_all		= amd_pmu_enable_all,
2028 2029
	.enable			= amd_pmu_enable_event,
	.disable		= amd_pmu_disable_event,
2030 2031
	.eventsel		= MSR_K7_EVNTSEL0,
	.perfctr		= MSR_K7_PERFCTR0,
2032 2033
	.event_map		= amd_pmu_event_map,
	.raw_event		= amd_pmu_raw_event,
2034
	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
2035 2036 2037
	.num_events		= 4,
	.event_bits		= 48,
	.event_mask		= (1ULL << 48) - 1,
2038
	.apic			= 1,
2039 2040
	/* use highest bit to detect overflow */
	.max_period		= (1ULL << 47) - 1,
2041
	.get_event_idx		= gen_get_event_idx,
2042 2043
};

2044
static __init int p6_pmu_init(void)
V
Vince Weaver 已提交
2045 2046 2047 2048 2049 2050 2051 2052 2053
{
	switch (boot_cpu_data.x86_model) {
	case 1:
	case 3:  /* Pentium Pro */
	case 5:
	case 6:  /* Pentium II */
	case 7:
	case 8:
	case 11: /* Pentium III */
2054
		event_constraints = intel_p6_event_constraints;
V
Vince Weaver 已提交
2055 2056 2057
		break;
	case 9:
	case 13:
2058
		/* Pentium M */
2059
		event_constraints = intel_p6_event_constraints;
2060
		break;
V
Vince Weaver 已提交
2061 2062 2063 2064 2065 2066
	default:
		pr_cont("unsupported p6 CPU model %d ",
			boot_cpu_data.x86_model);
		return -ENODEV;
	}

2067 2068
	x86_pmu = p6_pmu;

V
Vince Weaver 已提交
2069 2070 2071
	return 0;
}

2072
static __init int intel_pmu_init(void)
I
Ingo Molnar 已提交
2073
{
2074
	union cpuid10_edx edx;
I
Ingo Molnar 已提交
2075
	union cpuid10_eax eax;
2076
	unsigned int unused;
2077
	unsigned int ebx;
2078
	int version;
I
Ingo Molnar 已提交
2079

V
Vince Weaver 已提交
2080 2081 2082 2083 2084
	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
		/* check for P6 processor family */
	   if (boot_cpu_data.x86 == 6) {
		return p6_pmu_init();
	   } else {
2085
		return -ENODEV;
V
Vince Weaver 已提交
2086 2087
	   }
	}
2088

I
Ingo Molnar 已提交
2089 2090
	/*
	 * Check whether the Architectural PerfMon supports
2091
	 * Branch Misses Retired hw_event or not.
I
Ingo Molnar 已提交
2092
	 */
2093
	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
I
Ingo Molnar 已提交
2094
	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2095
		return -ENODEV;
I
Ingo Molnar 已提交
2096

2097 2098
	version = eax.split.version_id;
	if (version < 2)
2099
		return -ENODEV;
2100

2101 2102
	x86_pmu				= intel_pmu;
	x86_pmu.version			= version;
2103 2104 2105
	x86_pmu.num_events		= eax.split.num_events;
	x86_pmu.event_bits		= eax.split.bit_width;
	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
2106 2107

	/*
2108 2109
	 * Quirk: v2 perfmon does not report fixed-purpose events, so
	 * assume at least 3 events:
2110
	 */
2111
	x86_pmu.num_events_fixed	= max((int)edx.split.num_events_fixed, 3);
2112

2113
	/*
2114
	 * Install the hw-cache-events table:
2115 2116
	 */
	switch (boot_cpu_data.x86_model) {
2117 2118 2119 2120
	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
	case 29: /* six-core 45 nm xeon "Dunnington" */
2121
		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2122
		       sizeof(hw_cache_event_ids));
2123

2124
		pr_cont("Core2 events, ");
2125
		event_constraints = intel_core_event_constraints;
2126 2127 2128 2129
		break;
	default:
	case 26:
		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2130
		       sizeof(hw_cache_event_ids));
2131

2132
		event_constraints = intel_nehalem_event_constraints;
2133
		pr_cont("Nehalem/Corei7 events, ");
2134 2135 2136
		break;
	case 28:
		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2137
		       sizeof(hw_cache_event_ids));
2138

2139
		pr_cont("Atom events, ");
2140 2141
		break;
	}
2142
	return 0;
2143 2144
}

2145
static __init int amd_pmu_init(void)
2146
{
2147 2148 2149 2150
	/* Performance-monitoring supported from K7 and later: */
	if (boot_cpu_data.x86 < 6)
		return -ENODEV;

2151
	x86_pmu = amd_pmu;
2152

2153 2154 2155
	/* Events are common for all AMDs */
	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
	       sizeof(hw_cache_event_ids));
2156

2157
	return 0;
2158 2159
}

2160 2161 2162 2163 2164 2165 2166 2167 2168 2169
static void __init pmu_check_apic(void)
{
	if (cpu_has_apic)
		return;

	x86_pmu.apic = 0;
	pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
	pr_info("no hardware sampling interrupt available.\n");
}

2170
void __init init_hw_perf_events(void)
2171
{
2172 2173
	int err;

2174
	pr_info("Performance Events: ");
2175

2176 2177
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
2178
		err = intel_pmu_init();
2179
		break;
2180
	case X86_VENDOR_AMD:
2181
		err = amd_pmu_init();
2182
		break;
2183 2184
	default:
		return;
2185
	}
2186
	if (err != 0) {
2187
		pr_cont("no PMU driver, software events only.\n");
2188
		return;
2189
	}
2190

2191 2192
	pmu_check_apic();

2193
	pr_cont("%s PMU driver.\n", x86_pmu.name);
2194

2195 2196 2197 2198
	if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
		     x86_pmu.num_events, X86_PMC_MAX_GENERIC);
		x86_pmu.num_events = X86_PMC_MAX_GENERIC;
I
Ingo Molnar 已提交
2199
	}
2200 2201
	perf_event_mask = (1 << x86_pmu.num_events) - 1;
	perf_max_events = x86_pmu.num_events;
I
Ingo Molnar 已提交
2202

2203 2204 2205 2206
	if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
		     x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
		x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
2207
	}
2208

2209 2210 2211
	perf_event_mask |=
		((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
	x86_pmu.intel_ctrl = perf_event_mask;
I
Ingo Molnar 已提交
2212

2213 2214
	perf_events_lapic_init();
	register_die_notifier(&perf_event_nmi_notifier);
2215

I
Ingo Molnar 已提交
2216 2217 2218 2219 2220 2221 2222
	pr_info("... version:                %d\n",     x86_pmu.version);
	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
	pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
	pr_info("... event mask:             %016Lx\n", perf_event_mask);
I
Ingo Molnar 已提交
2223
}
I
Ingo Molnar 已提交
2224

2225
static inline void x86_pmu_read(struct perf_event *event)
2226
{
2227
	x86_perf_event_update(event, &event->hw, event->hw.idx);
2228 2229
}

2230 2231 2232 2233
static const struct pmu pmu = {
	.enable		= x86_pmu_enable,
	.disable	= x86_pmu_disable,
	.read		= x86_pmu_read,
2234
	.unthrottle	= x86_pmu_unthrottle,
I
Ingo Molnar 已提交
2235 2236
};

2237 2238 2239 2240 2241
static int
validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
{
	struct hw_perf_event fake_event = event->hw;

2242
	if (event->pmu && event->pmu != &pmu)
2243 2244
		return 0;

2245
	return x86_schedule_event(cpuc, &fake_event) >= 0;
2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268
}

static int validate_group(struct perf_event *event)
{
	struct perf_event *sibling, *leader = event->group_leader;
	struct cpu_hw_events fake_pmu;

	memset(&fake_pmu, 0, sizeof(fake_pmu));

	if (!validate_event(&fake_pmu, leader))
		return -ENOSPC;

	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
		if (!validate_event(&fake_pmu, sibling))
			return -ENOSPC;
	}

	if (!validate_event(&fake_pmu, event))
		return -ENOSPC;

	return 0;
}

2269
const struct pmu *hw_perf_event_init(struct perf_event *event)
I
Ingo Molnar 已提交
2270 2271 2272
{
	int err;

2273
	err = __hw_perf_event_init(event);
2274 2275 2276 2277
	if (!err) {
		if (event->group_leader != event)
			err = validate_group(event);
	}
2278
	if (err) {
2279 2280
		if (event->destroy)
			event->destroy(event);
2281
		return ERR_PTR(err);
2282
	}
I
Ingo Molnar 已提交
2283

2284
	return &pmu;
I
Ingo Molnar 已提交
2285
}
2286 2287 2288 2289 2290 2291

/*
 * callchain support
 */

static inline
2292
void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2293
{
2294
	if (entry->nr < PERF_MAX_STACK_DEPTH)
2295 2296 2297
		entry->ip[entry->nr++] = ip;
}

2298 2299
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2300
static DEFINE_PER_CPU(int, in_ignored_frame);
2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315


static void
backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
{
	/* Ignore warnings */
}

static void backtrace_warning(void *data, char *msg)
{
	/* Ignore warnings */
}

static int backtrace_stack(void *data, char *name)
{
2316 2317 2318
	per_cpu(in_ignored_frame, smp_processor_id()) =
			x86_is_stack_id(NMI_STACK, name) ||
			x86_is_stack_id(DEBUG_STACK, name);
2319

2320
	return 0;
2321 2322 2323 2324 2325 2326
}

static void backtrace_address(void *data, unsigned long addr, int reliable)
{
	struct perf_callchain_entry *entry = data;

2327
	if (per_cpu(in_ignored_frame, smp_processor_id()))
2328 2329
		return;

2330 2331 2332 2333 2334 2335 2336 2337 2338
	if (reliable)
		callchain_store(entry, addr);
}

static const struct stacktrace_ops backtrace_ops = {
	.warning		= backtrace_warning,
	.warning_symbol		= backtrace_warning_symbol,
	.stack			= backtrace_stack,
	.address		= backtrace_address,
2339
	.walk_stack		= print_context_stack,
2340 2341
};

2342 2343
#include "../dumpstack.h"

2344 2345 2346
static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
2347
	callchain_store(entry, PERF_CONTEXT_KERNEL);
2348
	callchain_store(entry, regs->ip);
2349

2350
	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
2351 2352
}

2353 2354 2355 2356 2357
/*
 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
 */
static unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
2358
{
2359 2360 2361 2362 2363
	unsigned long offset, addr = (unsigned long)from;
	int type = in_nmi() ? KM_NMI : KM_IRQ0;
	unsigned long size, len = 0;
	struct page *page;
	void *map;
2364 2365
	int ret;

2366 2367 2368 2369
	do {
		ret = __get_user_pages_fast(addr, 1, 0, &page);
		if (!ret)
			break;
2370

2371 2372
		offset = addr & (PAGE_SIZE - 1);
		size = min(PAGE_SIZE - offset, n - len);
2373

2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394
		map = kmap_atomic(page, type);
		memcpy(to, map+offset, size);
		kunmap_atomic(map, type);
		put_page(page);

		len  += size;
		to   += size;
		addr += size;

	} while (len < n);

	return len;
}

static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{
	unsigned long bytes;

	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));

	return bytes == sizeof(*frame);
2395 2396 2397 2398 2399 2400 2401 2402
}

static void
perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
	struct stack_frame frame;
	const void __user *fp;

2403 2404 2405
	if (!user_mode(regs))
		regs = task_pt_regs(current);

2406
	fp = (void __user *)regs->bp;
2407

2408
	callchain_store(entry, PERF_CONTEXT_USER);
2409 2410
	callchain_store(entry, regs->ip);

2411
	while (entry->nr < PERF_MAX_STACK_DEPTH) {
2412
		frame.next_frame	     = NULL;
2413 2414 2415 2416 2417
		frame.return_address = 0;

		if (!copy_stack_frame(fp, &frame))
			break;

2418
		if ((unsigned long)fp < regs->sp)
2419 2420 2421
			break;

		callchain_store(entry, frame.return_address);
2422
		fp = frame.next_frame;
2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453
	}
}

static void
perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
	int is_user;

	if (!regs)
		return;

	is_user = user_mode(regs);

	if (!current || current->pid == 0)
		return;

	if (is_user && current->state != TASK_RUNNING)
		return;

	if (!is_user)
		perf_callchain_kernel(regs, entry);

	if (current->mm)
		perf_callchain_user(regs, entry);
}

struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
{
	struct perf_callchain_entry *entry;

	if (in_nmi())
2454
		entry = &__get_cpu_var(pmc_nmi_entry);
2455
	else
2456
		entry = &__get_cpu_var(pmc_irq_entry);
2457 2458 2459 2460 2461 2462 2463

	entry->nr = 0;

	perf_do_callchain(regs, entry);

	return entry;
}
2464

2465
void hw_perf_event_setup_online(int cpu)
2466 2467 2468
{
	init_debug_store_on_cpu(cpu);
}