perf_event.c 44.3 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2
 * Performance events x86 architecture code
I
Ingo Molnar 已提交
3
 *
4 5 6 7 8
 *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
 *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
 *  Copyright (C) 2009 Jaswinder Singh Rajput
 *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
 *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9
 *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10
 *  Copyright (C) 2009 Google, Inc., Stephane Eranian
I
Ingo Molnar 已提交
11 12 13 14
 *
 *  For licencing details see kernel-base/COPYING
 */

15
#include <linux/perf_event.h>
I
Ingo Molnar 已提交
16 17 18 19
#include <linux/capability.h>
#include <linux/notifier.h>
#include <linux/hardirq.h>
#include <linux/kprobes.h>
20
#include <linux/module.h>
I
Ingo Molnar 已提交
21 22
#include <linux/kdebug.h>
#include <linux/sched.h>
23
#include <linux/uaccess.h>
24
#include <linux/slab.h>
25
#include <linux/cpu.h>
26
#include <linux/bitops.h>
I
Ingo Molnar 已提交
27 28

#include <asm/apic.h>
29
#include <asm/stacktrace.h>
P
Peter Zijlstra 已提交
30
#include <asm/nmi.h>
31
#include <asm/compat.h>
32
#include <asm/smp.h>
33
#include <asm/alternative.h>
I
Ingo Molnar 已提交
34

35 36 37 38 39 40 41 42 43 44 45
#if 0
#undef wrmsrl
#define wrmsrl(msr, val) 					\
do {								\
	trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
			(unsigned long)(val));			\
	native_write_msr((msr), (u32)((u64)(val)), 		\
			(u32)((u64)(val) >> 32));		\
} while (0)
#endif

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
/*
 *          |   NHM/WSM    |      SNB     |
 * register -------------------------------
 *          |  HT  | no HT |  HT  | no HT |
 *-----------------------------------------
 * offcore  | core | core  | cpu  | core  |
 * lbr_sel  | core | core  | cpu  | core  |
 * ld_lat   | cpu  | core  | cpu  | core  |
 *-----------------------------------------
 *
 * Given that there is a small number of shared regs,
 * we can pre-allocate their slot in the per-cpu
 * per-core reg tables.
 */
enum extra_reg_type {
	EXTRA_REG_NONE  = -1,	/* not used */

	EXTRA_REG_RSP_0 = 0,	/* offcore_response_0 */
	EXTRA_REG_RSP_1 = 1,	/* offcore_response_1 */

	EXTRA_REG_MAX		/* number of entries needed */
};

69
struct event_constraint {
70 71
	union {
		unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
72
		u64		idxmsk64;
73
	};
74 75
	u64	code;
	u64	cmask;
76
	int	weight;
77 78
};

79 80 81 82 83 84 85
struct amd_nb {
	int nb_id;  /* NorthBridge id */
	int refcnt; /* reference count */
	struct perf_event *owners[X86_PMC_IDX_MAX];
	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};

86 87
struct intel_percore;

88 89
#define MAX_LBR_ENTRIES		16

90
struct cpu_hw_events {
91 92 93
	/*
	 * Generic x86 PMC bits
	 */
94
	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
95
	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
96
	unsigned long		running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
97
	int			enabled;
I
Ingo Molnar 已提交
98

99 100
	int			n_events;
	int			n_added;
101
	int			n_txn;
102
	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
103
	u64			tags[X86_PMC_IDX_MAX];
104
	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
105

106 107
	unsigned int		group_flag;

108 109 110 111 112 113
	/*
	 * Intel DebugStore bits
	 */
	struct debug_store	*ds;
	u64			pebs_enabled;

114 115 116 117 118 119 120 121
	/*
	 * Intel LBR bits
	 */
	int				lbr_users;
	void				*lbr_context;
	struct perf_branch_stack	lbr_stack;
	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];

122
	/*
123 124
	 * manage shared (per-core, per-cpu) registers
	 * used on Intel NHM/WSM/SNB
125
	 */
126
	struct intel_shared_regs	*shared_regs;
127

128 129 130
	/*
	 * AMD specific bits
	 */
131
	struct amd_nb		*amd_nb;
132 133

	void			*kfree_on_online;
134 135
};

136
#define __EVENT_CONSTRAINT(c, n, m, w) {\
137
	{ .idxmsk64 = (n) },		\
138 139
	.code = (c),			\
	.cmask = (m),			\
140
	.weight = (w),			\
141
}
142

143 144 145
#define EVENT_CONSTRAINT(c, n, m)	\
	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))

146 147 148
/*
 * Constraint on the Event code.
 */
149
#define INTEL_EVENT_CONSTRAINT(c, n)	\
150
	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
151

152 153
/*
 * Constraint on the Event code + UMask + fixed-mask
154 155 156 157 158 159 160 161
 *
 * filter mask to validate fixed counter events.
 * the following filters disqualify for fixed counters:
 *  - inv
 *  - edge
 *  - cnt-mask
 *  The other filters are supported by fixed counters.
 *  The any-thread option is supported starting with v3.
162
 */
163
#define FIXED_EVENT_CONSTRAINT(c, n)	\
164
	EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
165

166 167 168
/*
 * Constraint on the Event code + UMask
 */
169
#define INTEL_UEVENT_CONSTRAINT(c, n)	\
170 171
	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)

172 173 174 175
#define EVENT_CONSTRAINT_END		\
	EVENT_CONSTRAINT(0, 0, 0)

#define for_each_event_constraint(e, c)	\
176
	for ((e) = (c); (e)->weight; (e)++)
177

178 179 180 181 182 183 184 185 186 187
/*
 * Per register state.
 */
struct er_account {
	raw_spinlock_t		lock;	/* per-core: protect structure */
	u64			config;	/* extra MSR config */
	u64			reg;	/* extra MSR number */
	atomic_t		ref;	/* reference count */
};

188 189
/*
 * Extra registers for specific events.
190
 *
191
 * Some events need large masks and require external MSRs.
192 193 194 195 196
 * Those extra MSRs end up being shared for all events on
 * a PMU and sometimes between PMU of sibling HT threads.
 * In either case, the kernel needs to handle conflicting
 * accesses to those extra, shared, regs. The data structure
 * to manage those registers is stored in cpu_hw_event.
197 198 199 200 201 202
 */
struct extra_reg {
	unsigned int		event;
	unsigned int		msr;
	u64			config_mask;
	u64			valid_mask;
203
	int			idx;  /* per_xxx->regs[] reg index */
204 205
};

206
#define EVENT_EXTRA_REG(e, ms, m, vm, i) {	\
207 208 209 210
	.event = (e),		\
	.msr = (ms),		\
	.config_mask = (m),	\
	.valid_mask = (vm),	\
211
	.idx = EXTRA_REG_##i	\
212
	}
213 214 215 216 217

#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)	\
	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)

#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
218

219 220 221 222 223 224 225 226 227 228 229
union perf_capabilities {
	struct {
		u64	lbr_format    : 6;
		u64	pebs_trap     : 1;
		u64	pebs_arch_reg : 1;
		u64	pebs_format   : 4;
		u64	smm_freeze    : 1;
	};
	u64	capabilities;
};

I
Ingo Molnar 已提交
230
/*
231
 * struct x86_pmu - generic x86 pmu
I
Ingo Molnar 已提交
232
 */
233
struct x86_pmu {
234 235 236
	/*
	 * Generic x86 PMC bits
	 */
237 238
	const char	*name;
	int		version;
239
	int		(*handle_irq)(struct pt_regs *);
240
	void		(*disable_all)(void);
241
	void		(*enable_all)(int added);
242 243
	void		(*enable)(struct perf_event *);
	void		(*disable)(struct perf_event *);
244
	int		(*hw_config)(struct perf_event *event);
245
	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
246 247
	unsigned	eventsel;
	unsigned	perfctr;
248
	u64		(*event_map)(int);
249
	int		max_events;
250 251 252 253
	int		num_counters;
	int		num_counters_fixed;
	int		cntval_bits;
	u64		cntval_mask;
254
	int		apic;
255
	u64		max_period;
256 257 258 259
	struct event_constraint *
			(*get_event_constraints)(struct cpu_hw_events *cpuc,
						 struct perf_event *event);

260 261
	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
						 struct perf_event *event);
262
	struct event_constraint *event_constraints;
263
	void		(*quirks)(void);
264
	int		perfctr_second_write;
265

266
	int		(*cpu_prepare)(int cpu);
267 268 269
	void		(*cpu_starting)(int cpu);
	void		(*cpu_dying)(int cpu);
	void		(*cpu_dead)(int cpu);
270 271 272 273

	/*
	 * Intel Arch Perfmon v2+
	 */
274 275
	u64			intel_ctrl;
	union perf_capabilities intel_cap;
276 277 278 279 280

	/*
	 * Intel DebugStore bits
	 */
	int		bts, pebs;
281
	int		bts_active, pebs_active;
282 283 284
	int		pebs_record_size;
	void		(*drain_pebs)(struct pt_regs *regs);
	struct event_constraint *pebs_constraints;
285 286 287 288 289 290

	/*
	 * Intel LBR
	 */
	unsigned long	lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
	int		lbr_nr;			   /* hardware stack size */
291 292 293 294 295

	/*
	 * Extra registers for events
	 */
	struct extra_reg *extra_regs;
296
	unsigned int er_flags;
297 298
};

299 300 301
#define ERF_NO_HT_SHARING	1
#define ERF_HAS_RSP_1		2

302
static struct x86_pmu x86_pmu __read_mostly;
303

304
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
305 306
	.enabled = 1,
};
I
Ingo Molnar 已提交
307

308
static int x86_perf_event_set_period(struct perf_event *event);
309

310
/*
311
 * Generalized hw caching related hw_event table, filled
312
 * in on a per model basis. A value of 0 means
313 314
 * 'not supported', -1 means 'hw_event makes no sense on
 * this CPU', any other value means the raw hw_event
315 316 317 318 319 320 321 322 323
 * ID.
 */

#define C(x) PERF_COUNT_HW_CACHE_##x

static u64 __read_mostly hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX];
324 325 326 327
static u64 __read_mostly hw_cache_extra_regs
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX];
328

329
/*
330 331
 * Propagate event elapsed time into the generic event.
 * Can only be executed on the CPU where the event is active.
332 333
 * Returns the delta events processed.
 */
334
static u64
335
x86_perf_event_update(struct perf_event *event)
336
{
337
	struct hw_perf_event *hwc = &event->hw;
338
	int shift = 64 - x86_pmu.cntval_bits;
339
	u64 prev_raw_count, new_raw_count;
340
	int idx = hwc->idx;
341
	s64 delta;
342

343 344 345
	if (idx == X86_PMC_IDX_FIXED_BTS)
		return 0;

346
	/*
347
	 * Careful: an NMI might modify the previous event value.
348 349 350
	 *
	 * Our tactic to handle this is to first atomically read and
	 * exchange a new raw count - then add that new-prev delta
351
	 * count to the generic event atomically:
352 353
	 */
again:
354
	prev_raw_count = local64_read(&hwc->prev_count);
355
	rdmsrl(hwc->event_base, new_raw_count);
356

357
	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
358 359 360 361 362 363
					new_raw_count) != prev_raw_count)
		goto again;

	/*
	 * Now we have the new raw value and have updated the prev
	 * timestamp already. We can now calculate the elapsed delta
364
	 * (event-)time and add that to the generic event.
365 366
	 *
	 * Careful, not all hw sign-extends above the physical width
367
	 * of the count.
368
	 */
369 370
	delta = (new_raw_count << shift) - (prev_raw_count << shift);
	delta >>= shift;
371

372 373
	local64_add(delta, &event->count);
	local64_sub(delta, &hwc->period_left);
374 375

	return new_raw_count;
376 377
}

378 379
static inline int x86_pmu_addr_offset(int index)
{
380 381 382 383 384 385 386 387 388 389
	int offset;

	/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
	alternative_io(ASM_NOP2,
		       "shll $1, %%eax",
		       X86_FEATURE_PERFCTR_CORE,
		       "=a" (offset),
		       "a"  (index));

	return offset;
390 391
}

392 393
static inline unsigned int x86_pmu_config_addr(int index)
{
394
	return x86_pmu.eventsel + x86_pmu_addr_offset(index);
395 396 397 398
}

static inline unsigned int x86_pmu_event_addr(int index)
{
399
	return x86_pmu.perfctr + x86_pmu_addr_offset(index);
400 401
}

402 403 404 405 406
/*
 * Find and validate any extra registers to set up.
 */
static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
{
407
	struct hw_perf_event_extra *reg;
408 409
	struct extra_reg *er;

410
	reg = &event->hw.extra_reg;
411 412 413 414 415 416 417 418 419

	if (!x86_pmu.extra_regs)
		return 0;

	for (er = x86_pmu.extra_regs; er->msr; er++) {
		if (er->event != (config & er->config_mask))
			continue;
		if (event->attr.config1 & ~er->valid_mask)
			return -EINVAL;
420 421 422 423

		reg->idx = er->idx;
		reg->config = event->attr.config1;
		reg->reg = er->msr;
424 425 426 427 428
		break;
	}
	return 0;
}

429
static atomic_t active_events;
P
Peter Zijlstra 已提交
430 431
static DEFINE_MUTEX(pmc_reserve_mutex);

432 433
#ifdef CONFIG_X86_LOCAL_APIC

P
Peter Zijlstra 已提交
434 435 436 437
static bool reserve_pmc_hardware(void)
{
	int i;

438
	for (i = 0; i < x86_pmu.num_counters; i++) {
439
		if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
P
Peter Zijlstra 已提交
440 441 442
			goto perfctr_fail;
	}

443
	for (i = 0; i < x86_pmu.num_counters; i++) {
444
		if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
P
Peter Zijlstra 已提交
445 446 447 448 449 450 451
			goto eventsel_fail;
	}

	return true;

eventsel_fail:
	for (i--; i >= 0; i--)
452
		release_evntsel_nmi(x86_pmu_config_addr(i));
P
Peter Zijlstra 已提交
453

454
	i = x86_pmu.num_counters;
P
Peter Zijlstra 已提交
455 456 457

perfctr_fail:
	for (i--; i >= 0; i--)
458
		release_perfctr_nmi(x86_pmu_event_addr(i));
P
Peter Zijlstra 已提交
459 460 461 462 463 464 465 466

	return false;
}

static void release_pmc_hardware(void)
{
	int i;

467
	for (i = 0; i < x86_pmu.num_counters; i++) {
468 469
		release_perfctr_nmi(x86_pmu_event_addr(i));
		release_evntsel_nmi(x86_pmu_config_addr(i));
P
Peter Zijlstra 已提交
470 471 472
	}
}

473 474 475 476 477 478 479
#else

static bool reserve_pmc_hardware(void) { return true; }
static void release_pmc_hardware(void) {}

#endif

480 481 482
static bool check_hw_exists(void)
{
	u64 val, val_new = 0;
483
	int i, reg, ret = 0;
484

485 486 487 488 489
	/*
	 * Check to see if the BIOS enabled any of the counters, if so
	 * complain and bail.
	 */
	for (i = 0; i < x86_pmu.num_counters; i++) {
490
		reg = x86_pmu_config_addr(i);
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513
		ret = rdmsrl_safe(reg, &val);
		if (ret)
			goto msr_fail;
		if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
			goto bios_fail;
	}

	if (x86_pmu.num_counters_fixed) {
		reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
		ret = rdmsrl_safe(reg, &val);
		if (ret)
			goto msr_fail;
		for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
			if (val & (0x03 << i*4))
				goto bios_fail;
		}
	}

	/*
	 * Now write a value and read it back to see if it matches,
	 * this is needed to detect certain hardware emulators (qemu/kvm)
	 * that don't trap on the MSR access and always return 0s.
	 */
514
	val = 0xabcdUL;
515 516
	ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
	ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
517
	if (ret || val != val_new)
518
		goto msr_fail;
519 520

	return true;
521 522

bios_fail:
523 524 525 526
	/*
	 * We still allow the PMU driver to operate:
	 */
	printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
527
	printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
528 529

	return true;
530 531 532

msr_fail:
	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
533

534
	return false;
535 536
}

537
static void reserve_ds_buffers(void);
538
static void release_ds_buffers(void);
539

540
static void hw_perf_event_destroy(struct perf_event *event)
P
Peter Zijlstra 已提交
541
{
542
	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
P
Peter Zijlstra 已提交
543
		release_pmc_hardware();
544
		release_ds_buffers();
P
Peter Zijlstra 已提交
545 546 547 548
		mutex_unlock(&pmc_reserve_mutex);
	}
}

549 550 551 552 553
static inline int x86_pmu_initialized(void)
{
	return x86_pmu.handle_irq != NULL;
}

554
static inline int
555
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
556
{
557
	struct perf_event_attr *attr = &event->attr;
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
	unsigned int cache_type, cache_op, cache_result;
	u64 config, val;

	config = attr->config;

	cache_type = (config >>  0) & 0xff;
	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
		return -EINVAL;

	cache_op = (config >>  8) & 0xff;
	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
		return -EINVAL;

	cache_result = (config >> 16) & 0xff;
	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
		return -EINVAL;

	val = hw_cache_event_ids[cache_type][cache_op][cache_result];

	if (val == 0)
		return -ENOENT;

	if (val == -1)
		return -EINVAL;

	hwc->config |= val;
584 585
	attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
	return x86_pmu_extra_regs(val, event);
586 587
}

588 589 590 591 592 593
static int x86_setup_perfctr(struct perf_event *event)
{
	struct perf_event_attr *attr = &event->attr;
	struct hw_perf_event *hwc = &event->hw;
	u64 config;

594
	if (!is_sampling_event(event)) {
595 596
		hwc->sample_period = x86_pmu.max_period;
		hwc->last_period = hwc->sample_period;
597
		local64_set(&hwc->period_left, hwc->sample_period);
598 599 600 601 602 603 604 605 606 607 608
	} else {
		/*
		 * If we have a PMU initialized but no APIC
		 * interrupts, we cannot sample hardware
		 * events (user-space has to fall back and
		 * sample via a hrtimer based software event):
		 */
		if (!x86_pmu.apic)
			return -EOPNOTSUPP;
	}

609 610 611 612
	/*
	 * Do not allow config1 (extended registers) to propagate,
	 * there's no sane user-space generalization yet:
	 */
613
	if (attr->type == PERF_TYPE_RAW)
614
		return 0;
615 616

	if (attr->type == PERF_TYPE_HW_CACHE)
617
		return set_ext_hw_attr(hwc, event);
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635

	if (attr->config >= x86_pmu.max_events)
		return -EINVAL;

	/*
	 * The generic map:
	 */
	config = x86_pmu.event_map(attr->config);

	if (config == 0)
		return -ENOENT;

	if (config == -1LL)
		return -EINVAL;

	/*
	 * Branch tracing:
	 */
P
Peter Zijlstra 已提交
636 637
	if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
	    !attr->freq && hwc->sample_period == 1) {
638
		/* BTS is not supported by this architecture. */
639
		if (!x86_pmu.bts_active)
640 641 642 643 644 645 646 647 648 649 650
			return -EOPNOTSUPP;

		/* BTS is currently only allowed for user-mode. */
		if (!attr->exclude_kernel)
			return -EOPNOTSUPP;
	}

	hwc->config |= config;

	return 0;
}
651

652
static int x86_pmu_hw_config(struct perf_event *event)
653
{
P
Peter Zijlstra 已提交
654 655 656 657
	if (event->attr.precise_ip) {
		int precise = 0;

		/* Support for constant skid */
658
		if (x86_pmu.pebs_active) {
P
Peter Zijlstra 已提交
659 660
			precise++;

661 662 663 664
			/* Support for IP fixup */
			if (x86_pmu.lbr_nr)
				precise++;
		}
P
Peter Zijlstra 已提交
665 666 667 668 669

		if (event->attr.precise_ip > precise)
			return -EOPNOTSUPP;
	}

670 671 672 673
	/*
	 * Generate PMC IRQs:
	 * (keep 'enabled' bit clear for now)
	 */
674
	event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
675 676 677 678

	/*
	 * Count user and OS events unless requested not to
	 */
679 680 681 682
	if (!event->attr.exclude_user)
		event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
	if (!event->attr.exclude_kernel)
		event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
683

684 685
	if (event->attr.type == PERF_TYPE_RAW)
		event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
686

687
	return x86_setup_perfctr(event);
688 689
}

I
Ingo Molnar 已提交
690
/*
691
 * Setup the hardware configuration for a given attr_type
I
Ingo Molnar 已提交
692
 */
693
static int __x86_pmu_event_init(struct perf_event *event)
I
Ingo Molnar 已提交
694
{
P
Peter Zijlstra 已提交
695
	int err;
I
Ingo Molnar 已提交
696

697 698
	if (!x86_pmu_initialized())
		return -ENODEV;
I
Ingo Molnar 已提交
699

P
Peter Zijlstra 已提交
700
	err = 0;
701
	if (!atomic_inc_not_zero(&active_events)) {
P
Peter Zijlstra 已提交
702
		mutex_lock(&pmc_reserve_mutex);
703
		if (atomic_read(&active_events) == 0) {
704 705
			if (!reserve_pmc_hardware())
				err = -EBUSY;
706 707
			else
				reserve_ds_buffers();
708 709
		}
		if (!err)
710
			atomic_inc(&active_events);
P
Peter Zijlstra 已提交
711 712 713 714 715
		mutex_unlock(&pmc_reserve_mutex);
	}
	if (err)
		return err;

716
	event->destroy = hw_perf_event_destroy;
717

718 719 720
	event->hw.idx = -1;
	event->hw.last_cpu = -1;
	event->hw.last_tag = ~0ULL;
721

722 723 724
	/* mark unused */
	event->hw.extra_reg.idx = EXTRA_REG_NONE;

725
	return x86_pmu.hw_config(event);
726 727
}

728
static void x86_pmu_disable_all(void)
729
{
730
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
731 732
	int idx;

733
	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
734 735
		u64 val;

736
		if (!test_bit(idx, cpuc->active_mask))
737
			continue;
738
		rdmsrl(x86_pmu_config_addr(idx), val);
739
		if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
740
			continue;
741
		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
742
		wrmsrl(x86_pmu_config_addr(idx), val);
743 744 745
	}
}

P
Peter Zijlstra 已提交
746
static void x86_pmu_disable(struct pmu *pmu)
747
{
748 749
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);

750
	if (!x86_pmu_initialized())
751
		return;
752

753 754 755 756 757 758
	if (!cpuc->enabled)
		return;

	cpuc->n_added = 0;
	cpuc->enabled = 0;
	barrier();
759 760

	x86_pmu.disable_all();
761
}
I
Ingo Molnar 已提交
762

763 764 765
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
					  u64 enable_mask)
{
766 767
	if (hwc->extra_reg.reg)
		wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
768
	wrmsrl(hwc->config_base, hwc->config | enable_mask);
769 770
}

771
static void x86_pmu_enable_all(int added)
772
{
773
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
774 775
	int idx;

776
	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
777
		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
778

779
		if (!test_bit(idx, cpuc->active_mask))
780
			continue;
781

782
		__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
783 784 785
	}
}

P
Peter Zijlstra 已提交
786
static struct pmu pmu;
787 788 789 790 791 792 793 794

static inline int is_x86_event(struct perf_event *event)
{
	return event->pmu == &pmu;
}

static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
795
	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
796
	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
797
	int i, j, w, wmax, num = 0;
798 799 800 801 802
	struct hw_perf_event *hwc;

	bitmap_zero(used_mask, X86_PMC_IDX_MAX);

	for (i = 0; i < n; i++) {
803 804
		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
		constraints[i] = c;
805 806
	}

807 808 809
	/*
	 * fastpath, try to reuse previous register
	 */
810
	for (i = 0; i < n; i++) {
811
		hwc = &cpuc->event_list[i]->hw;
812
		c = constraints[i];
813 814 815 816 817 818

		/* never assigned */
		if (hwc->idx == -1)
			break;

		/* constraint still honored */
819
		if (!test_bit(hwc->idx, c->idxmsk))
820 821 822 823 824 825
			break;

		/* not already used */
		if (test_bit(hwc->idx, used_mask))
			break;

P
Peter Zijlstra 已提交
826
		__set_bit(hwc->idx, used_mask);
827 828 829
		if (assign)
			assign[i] = hwc->idx;
	}
830
	if (i == n)
831 832 833 834 835 836 837 838
		goto done;

	/*
	 * begin slow path
	 */

	bitmap_zero(used_mask, X86_PMC_IDX_MAX);

839 840 841 842 843 844 845 846 847
	/*
	 * weight = number of possible counters
	 *
	 * 1    = most constrained, only works on one counter
	 * wmax = least constrained, works on any counter
	 *
	 * assign events to counters starting with most
	 * constrained events.
	 */
848
	wmax = x86_pmu.num_counters;
849 850 851 852 853 854

	/*
	 * when fixed event counters are present,
	 * wmax is incremented by 1 to account
	 * for one more choice
	 */
855
	if (x86_pmu.num_counters_fixed)
856 857
		wmax++;

858
	for (w = 1, num = n; num && w <= wmax; w++) {
859
		/* for each event */
860
		for (i = 0; num && i < n; i++) {
861
			c = constraints[i];
862 863
			hwc = &cpuc->event_list[i]->hw;

864
			if (c->weight != w)
865 866
				continue;

867
			for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
868 869 870 871 872 873 874
				if (!test_bit(j, used_mask))
					break;
			}

			if (j == X86_PMC_IDX_MAX)
				break;

P
Peter Zijlstra 已提交
875
			__set_bit(j, used_mask);
876

877 878 879 880 881
			if (assign)
				assign[i] = j;
			num--;
		}
	}
882
done:
883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904
	/*
	 * scheduling failed or is just a simulation,
	 * free resources if necessary
	 */
	if (!assign || num) {
		for (i = 0; i < n; i++) {
			if (x86_pmu.put_event_constraints)
				x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
		}
	}
	return num ? -ENOSPC : 0;
}

/*
 * dogrp: true if must collect siblings events (group)
 * returns total number of events and error code
 */
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
{
	struct perf_event *event;
	int n, max_count;

905
	max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920

	/* current number of events already accepted */
	n = cpuc->n_events;

	if (is_x86_event(leader)) {
		if (n >= max_count)
			return -ENOSPC;
		cpuc->event_list[n] = leader;
		n++;
	}
	if (!dogrp)
		return n;

	list_for_each_entry(event, &leader->sibling_list, group_entry) {
		if (!is_x86_event(event) ||
921
		    event->state <= PERF_EVENT_STATE_OFF)
922 923 924 925 926 927 928 929 930 931 932 933
			continue;

		if (n >= max_count)
			return -ENOSPC;

		cpuc->event_list[n] = event;
		n++;
	}
	return n;
}

static inline void x86_assign_hw_event(struct perf_event *event,
934
				struct cpu_hw_events *cpuc, int i)
935
{
936 937 938 939 940
	struct hw_perf_event *hwc = &event->hw;

	hwc->idx = cpuc->assign[i];
	hwc->last_cpu = smp_processor_id();
	hwc->last_tag = ++cpuc->tags[i];
941 942 943 944 945 946

	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
		hwc->config_base = 0;
		hwc->event_base	= 0;
	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
947
		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
948
	} else {
949 950
		hwc->config_base = x86_pmu_config_addr(hwc->idx);
		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
951 952 953
	}
}

954 955 956 957 958 959 960 961 962
static inline int match_prev_assignment(struct hw_perf_event *hwc,
					struct cpu_hw_events *cpuc,
					int i)
{
	return hwc->idx == cpuc->assign[i] &&
		hwc->last_cpu == smp_processor_id() &&
		hwc->last_tag == cpuc->tags[i];
}

P
Peter Zijlstra 已提交
963 964
static void x86_pmu_start(struct perf_event *event, int flags);
static void x86_pmu_stop(struct perf_event *event, int flags);
965

P
Peter Zijlstra 已提交
966
static void x86_pmu_enable(struct pmu *pmu)
967
{
968 969 970
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct perf_event *event;
	struct hw_perf_event *hwc;
971
	int i, added = cpuc->n_added;
972

973
	if (!x86_pmu_initialized())
974
		return;
975 976 977 978

	if (cpuc->enabled)
		return;

979
	if (cpuc->n_added) {
980
		int n_running = cpuc->n_events - cpuc->n_added;
981 982 983 984 985 986 987
		/*
		 * apply assignment obtained either from
		 * hw_perf_group_sched_in() or x86_pmu_enable()
		 *
		 * step1: save events moving to new counters
		 * step2: reprogram moved events into new counters
		 */
988
		for (i = 0; i < n_running; i++) {
989 990 991
			event = cpuc->event_list[i];
			hwc = &event->hw;

992 993 994 995 996 997 998 999
			/*
			 * we can avoid reprogramming counter if:
			 * - assigned same counter as last time
			 * - running on same CPU as last time
			 * - no other event has used the counter since
			 */
			if (hwc->idx == -1 ||
			    match_prev_assignment(hwc, cpuc, i))
1000 1001
				continue;

P
Peter Zijlstra 已提交
1002 1003 1004 1005 1006 1007 1008 1009
			/*
			 * Ensure we don't accidentally enable a stopped
			 * counter simply because we rescheduled.
			 */
			if (hwc->state & PERF_HES_STOPPED)
				hwc->state |= PERF_HES_ARCH;

			x86_pmu_stop(event, PERF_EF_UPDATE);
1010 1011 1012 1013 1014 1015
		}

		for (i = 0; i < cpuc->n_events; i++) {
			event = cpuc->event_list[i];
			hwc = &event->hw;

1016
			if (!match_prev_assignment(hwc, cpuc, i))
1017
				x86_assign_hw_event(event, cpuc, i);
1018 1019
			else if (i < n_running)
				continue;
1020

P
Peter Zijlstra 已提交
1021 1022 1023 1024
			if (hwc->state & PERF_HES_ARCH)
				continue;

			x86_pmu_start(event, PERF_EF_RELOAD);
1025 1026 1027 1028
		}
		cpuc->n_added = 0;
		perf_events_lapic_init();
	}
1029 1030 1031 1032

	cpuc->enabled = 1;
	barrier();

1033
	x86_pmu.enable_all(added);
1034 1035
}

1036
static inline void x86_pmu_disable_event(struct perf_event *event)
1037
{
1038
	struct hw_perf_event *hwc = &event->hw;
1039

1040
	wrmsrl(hwc->config_base, hwc->config);
1041 1042
}

1043
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
I
Ingo Molnar 已提交
1044

1045 1046
/*
 * Set the next IRQ period, based on the hwc->period_left value.
1047
 * To be called with the event disabled in hw:
1048
 */
1049
static int
1050
x86_perf_event_set_period(struct perf_event *event)
I
Ingo Molnar 已提交
1051
{
1052
	struct hw_perf_event *hwc = &event->hw;
1053
	s64 left = local64_read(&hwc->period_left);
1054
	s64 period = hwc->sample_period;
1055
	int ret = 0, idx = hwc->idx;
1056

1057 1058 1059
	if (idx == X86_PMC_IDX_FIXED_BTS)
		return 0;

1060
	/*
1061
	 * If we are way outside a reasonable range then just skip forward:
1062 1063 1064
	 */
	if (unlikely(left <= -period)) {
		left = period;
1065
		local64_set(&hwc->period_left, left);
1066
		hwc->last_period = period;
1067
		ret = 1;
1068 1069 1070 1071
	}

	if (unlikely(left <= 0)) {
		left += period;
1072
		local64_set(&hwc->period_left, left);
1073
		hwc->last_period = period;
1074
		ret = 1;
1075
	}
1076
	/*
1077
	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1078 1079 1080
	 */
	if (unlikely(left < 2))
		left = 2;
I
Ingo Molnar 已提交
1081

1082 1083 1084
	if (left > x86_pmu.max_period)
		left = x86_pmu.max_period;

1085
	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1086 1087

	/*
1088
	 * The hw event starts counting from this event offset,
1089 1090
	 * mark it to be able to extra future deltas:
	 */
1091
	local64_set(&hwc->prev_count, (u64)-left);
1092

1093
	wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
1094 1095 1096 1097 1098 1099 1100

	/*
	 * Due to erratum on certan cpu we need
	 * a second write to be sure the register
	 * is updated properly
	 */
	if (x86_pmu.perfctr_second_write) {
1101
		wrmsrl(hwc->event_base,
1102
			(u64)(-left) & x86_pmu.cntval_mask);
1103
	}
1104

1105
	perf_event_update_userpage(event);
1106

1107
	return ret;
1108 1109
}

1110
static void x86_pmu_enable_event(struct perf_event *event)
1111
{
T
Tejun Heo 已提交
1112
	if (__this_cpu_read(cpu_hw_events.enabled))
1113 1114
		__x86_pmu_enable_event(&event->hw,
				       ARCH_PERFMON_EVENTSEL_ENABLE);
I
Ingo Molnar 已提交
1115 1116
}

1117
/*
P
Peter Zijlstra 已提交
1118
 * Add a single event to the PMU.
1119 1120 1121
 *
 * The event is added to the group of enabled events
 * but only if it can be scehduled with existing events.
1122
 */
P
Peter Zijlstra 已提交
1123
static int x86_pmu_add(struct perf_event *event, int flags)
1124 1125
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1126 1127 1128
	struct hw_perf_event *hwc;
	int assign[X86_PMC_IDX_MAX];
	int n, n0, ret;
1129

1130
	hwc = &event->hw;
1131

P
Peter Zijlstra 已提交
1132
	perf_pmu_disable(event->pmu);
1133
	n0 = cpuc->n_events;
1134 1135 1136
	ret = n = collect_events(cpuc, event, false);
	if (ret < 0)
		goto out;
1137

P
Peter Zijlstra 已提交
1138 1139 1140 1141
	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
	if (!(flags & PERF_EF_START))
		hwc->state |= PERF_HES_ARCH;

1142 1143
	/*
	 * If group events scheduling transaction was started,
L
Lucas De Marchi 已提交
1144
	 * skip the schedulability test here, it will be performed
P
Peter Zijlstra 已提交
1145
	 * at commit time (->commit_txn) as a whole
1146
	 */
1147
	if (cpuc->group_flag & PERF_EVENT_TXN)
1148
		goto done_collect;
1149

1150
	ret = x86_pmu.schedule_events(cpuc, n, assign);
1151
	if (ret)
1152
		goto out;
1153 1154 1155 1156 1157
	/*
	 * copy new assignment, now we know it is possible
	 * will be used by hw_perf_enable()
	 */
	memcpy(cpuc->assign, assign, n*sizeof(int));
1158

1159
done_collect:
1160
	cpuc->n_events = n;
1161
	cpuc->n_added += n - n0;
1162
	cpuc->n_txn += n - n0;
1163

1164 1165
	ret = 0;
out:
P
Peter Zijlstra 已提交
1166
	perf_pmu_enable(event->pmu);
1167
	return ret;
I
Ingo Molnar 已提交
1168 1169
}

P
Peter Zijlstra 已提交
1170
static void x86_pmu_start(struct perf_event *event, int flags)
1171
{
P
Peter Zijlstra 已提交
1172 1173 1174
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	int idx = event->hw.idx;

P
Peter Zijlstra 已提交
1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186
	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
		return;

	if (WARN_ON_ONCE(idx == -1))
		return;

	if (flags & PERF_EF_RELOAD) {
		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
		x86_perf_event_set_period(event);
	}

	event->hw.state = 0;
1187

P
Peter Zijlstra 已提交
1188 1189
	cpuc->events[idx] = event;
	__set_bit(idx, cpuc->active_mask);
1190
	__set_bit(idx, cpuc->running);
1191
	x86_pmu.enable(event);
P
Peter Zijlstra 已提交
1192
	perf_event_update_userpage(event);
1193 1194
}

1195
void perf_event_print_debug(void)
I
Ingo Molnar 已提交
1196
{
1197
	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1198
	u64 pebs;
1199
	struct cpu_hw_events *cpuc;
1200
	unsigned long flags;
1201 1202
	int cpu, idx;

1203
	if (!x86_pmu.num_counters)
1204
		return;
I
Ingo Molnar 已提交
1205

1206
	local_irq_save(flags);
I
Ingo Molnar 已提交
1207 1208

	cpu = smp_processor_id();
1209
	cpuc = &per_cpu(cpu_hw_events, cpu);
I
Ingo Molnar 已提交
1210

1211
	if (x86_pmu.version >= 2) {
1212 1213 1214 1215
		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1216
		rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
1217 1218 1219 1220 1221 1222

		pr_info("\n");
		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
1223
		pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
1224
	}
1225
	pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
I
Ingo Molnar 已提交
1226

1227
	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1228 1229
		rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
		rdmsrl(x86_pmu_event_addr(idx), pmc_count);
I
Ingo Molnar 已提交
1230

1231
		prev_left = per_cpu(pmc_prev_left[idx], cpu);
I
Ingo Molnar 已提交
1232

1233
		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
I
Ingo Molnar 已提交
1234
			cpu, idx, pmc_ctrl);
1235
		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
I
Ingo Molnar 已提交
1236
			cpu, idx, pmc_count);
1237
		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
1238
			cpu, idx, prev_left);
I
Ingo Molnar 已提交
1239
	}
1240
	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1241 1242
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);

1243
		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1244 1245
			cpu, idx, pmc_count);
	}
1246
	local_irq_restore(flags);
I
Ingo Molnar 已提交
1247 1248
}

P
Peter Zijlstra 已提交
1249
static void x86_pmu_stop(struct perf_event *event, int flags)
I
Ingo Molnar 已提交
1250
{
1251
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1252
	struct hw_perf_event *hwc = &event->hw;
I
Ingo Molnar 已提交
1253

P
Peter Zijlstra 已提交
1254 1255 1256 1257 1258 1259
	if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
		x86_pmu.disable(event);
		cpuc->events[hwc->idx] = NULL;
		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
		hwc->state |= PERF_HES_STOPPED;
	}
1260

P
Peter Zijlstra 已提交
1261 1262 1263 1264 1265 1266 1267 1268
	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
		/*
		 * Drain the remaining delta count out of a event
		 * that we are disabling:
		 */
		x86_perf_event_update(event);
		hwc->state |= PERF_HES_UPTODATE;
	}
1269 1270
}

P
Peter Zijlstra 已提交
1271
static void x86_pmu_del(struct perf_event *event, int flags)
1272 1273 1274 1275
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	int i;

1276 1277 1278 1279 1280
	/*
	 * If we're called during a txn, we don't need to do anything.
	 * The events never got scheduled and ->cancel_txn will truncate
	 * the event_list.
	 */
1281
	if (cpuc->group_flag & PERF_EVENT_TXN)
1282 1283
		return;

P
Peter Zijlstra 已提交
1284
	x86_pmu_stop(event, PERF_EF_UPDATE);
1285

1286 1287 1288 1289 1290 1291 1292 1293 1294 1295
	for (i = 0; i < cpuc->n_events; i++) {
		if (event == cpuc->event_list[i]) {

			if (x86_pmu.put_event_constraints)
				x86_pmu.put_event_constraints(cpuc, event);

			while (++i < cpuc->n_events)
				cpuc->event_list[i-1] = cpuc->event_list[i];

			--cpuc->n_events;
1296
			break;
1297 1298
		}
	}
1299
	perf_event_update_userpage(event);
I
Ingo Molnar 已提交
1300 1301
}

1302
static int x86_pmu_handle_irq(struct pt_regs *regs)
1303
{
1304
	struct perf_sample_data data;
1305 1306
	struct cpu_hw_events *cpuc;
	struct perf_event *event;
V
Vince Weaver 已提交
1307
	int idx, handled = 0;
1308 1309
	u64 val;

1310
	perf_sample_data_init(&data, 0);
1311

1312
	cpuc = &__get_cpu_var(cpu_hw_events);
1313

1314 1315 1316 1317 1318 1319 1320 1321 1322 1323
	/*
	 * Some chipsets need to unmask the LVTPC in a particular spot
	 * inside the nmi handler.  As a result, the unmasking was pushed
	 * into all the nmi handlers.
	 *
	 * This generic handler doesn't seem to have any issues where the
	 * unmasking occurs so it was left at the top.
	 */
	apic_write(APIC_LVTPC, APIC_DM_NMI);

1324
	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1325 1326 1327 1328 1329 1330 1331 1332
		if (!test_bit(idx, cpuc->active_mask)) {
			/*
			 * Though we deactivated the counter some cpus
			 * might still deliver spurious interrupts still
			 * in flight. Catch them:
			 */
			if (__test_and_clear_bit(idx, cpuc->running))
				handled++;
1333
			continue;
1334
		}
1335

1336
		event = cpuc->events[idx];
1337

1338
		val = x86_perf_event_update(event);
1339
		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
1340
			continue;
1341

1342
		/*
1343
		 * event overflow
1344
		 */
1345
		handled++;
1346
		data.period	= event->hw.last_period;
1347

1348
		if (!x86_perf_event_set_period(event))
1349 1350
			continue;

1351
		if (perf_event_overflow(event, &data, regs))
P
Peter Zijlstra 已提交
1352
			x86_pmu_stop(event, 0);
1353
	}
1354

1355 1356 1357
	if (handled)
		inc_irq_stat(apic_perf_irqs);

1358 1359
	return handled;
}
1360

1361
void perf_events_lapic_init(void)
I
Ingo Molnar 已提交
1362
{
1363
	if (!x86_pmu.apic || !x86_pmu_initialized())
I
Ingo Molnar 已提交
1364
		return;
1365

I
Ingo Molnar 已提交
1366
	/*
1367
	 * Always use NMI for PMU
I
Ingo Molnar 已提交
1368
	 */
1369
	apic_write(APIC_LVTPC, APIC_DM_NMI);
I
Ingo Molnar 已提交
1370 1371
}

1372 1373 1374 1375 1376 1377 1378
struct pmu_nmi_state {
	unsigned int	marked;
	int		handled;
};

static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);

I
Ingo Molnar 已提交
1379
static int __kprobes
1380
perf_event_nmi_handler(struct notifier_block *self,
I
Ingo Molnar 已提交
1381 1382 1383
			 unsigned long cmd, void *__args)
{
	struct die_args *args = __args;
1384 1385
	unsigned int this_nmi;
	int handled;
1386

1387
	if (!atomic_read(&active_events))
1388 1389
		return NOTIFY_DONE;

1390 1391 1392
	switch (cmd) {
	case DIE_NMI:
		break;
1393 1394
	case DIE_NMIUNKNOWN:
		this_nmi = percpu_read(irq_stat.__nmi_count);
T
Tejun Heo 已提交
1395
		if (this_nmi != __this_cpu_read(pmu_nmi.marked))
1396 1397 1398 1399 1400 1401 1402 1403 1404 1405
			/* let the kernel handle the unknown nmi */
			return NOTIFY_DONE;
		/*
		 * This one is a PMU back-to-back nmi. Two events
		 * trigger 'simultaneously' raising two back-to-back
		 * NMIs. If the first NMI handles both, the latter
		 * will be empty and daze the CPU. So, we drop it to
		 * avoid false-positive 'unknown nmi' messages.
		 */
		return NOTIFY_STOP;
1406
	default:
I
Ingo Molnar 已提交
1407
		return NOTIFY_DONE;
1408
	}
I
Ingo Molnar 已提交
1409

1410 1411 1412 1413 1414 1415 1416
	handled = x86_pmu.handle_irq(args->regs);
	if (!handled)
		return NOTIFY_DONE;

	this_nmi = percpu_read(irq_stat.__nmi_count);
	if ((handled > 1) ||
		/* the next nmi could be a back-to-back nmi */
T
Tejun Heo 已提交
1417 1418
	    ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
	     (__this_cpu_read(pmu_nmi.handled) > 1))) {
1419 1420 1421 1422 1423 1424 1425 1426 1427 1428
		/*
		 * We could have two subsequent back-to-back nmis: The
		 * first handles more than one counter, the 2nd
		 * handles only one counter and the 3rd handles no
		 * counter.
		 *
		 * This is the 2nd nmi because the previous was
		 * handling more than one counter. We will mark the
		 * next (3rd) and then drop it if unhandled.
		 */
T
Tejun Heo 已提交
1429 1430
		__this_cpu_write(pmu_nmi.marked, this_nmi + 1);
		__this_cpu_write(pmu_nmi.handled, handled);
1431
	}
I
Ingo Molnar 已提交
1432

1433
	return NOTIFY_STOP;
I
Ingo Molnar 已提交
1434 1435
}

1436 1437 1438
static __read_mostly struct notifier_block perf_event_nmi_notifier = {
	.notifier_call		= perf_event_nmi_handler,
	.next			= NULL,
1439
	.priority		= NMI_LOCAL_LOW_PRIOR,
1440 1441
};

1442
static struct event_constraint unconstrained;
1443
static struct event_constraint emptyconstraint;
1444 1445

static struct event_constraint *
1446
x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1447
{
1448
	struct event_constraint *c;
1449 1450 1451

	if (x86_pmu.event_constraints) {
		for_each_event_constraint(c, x86_pmu.event_constraints) {
1452 1453
			if ((event->hw.config & c->cmask) == c->code)
				return c;
1454 1455
		}
	}
1456 1457

	return &unconstrained;
1458 1459
}

1460 1461
#include "perf_event_amd.c"
#include "perf_event_p6.c"
1462
#include "perf_event_p4.c"
1463
#include "perf_event_intel_lbr.c"
1464
#include "perf_event_intel_ds.c"
1465
#include "perf_event_intel.c"
1466

1467 1468 1469 1470
static int __cpuinit
x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
	unsigned int cpu = (long)hcpu;
1471
	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1472
	int ret = NOTIFY_OK;
1473 1474 1475

	switch (action & ~CPU_TASKS_FROZEN) {
	case CPU_UP_PREPARE:
1476
		cpuc->kfree_on_online = NULL;
1477
		if (x86_pmu.cpu_prepare)
1478
			ret = x86_pmu.cpu_prepare(cpu);
1479 1480 1481 1482 1483 1484 1485
		break;

	case CPU_STARTING:
		if (x86_pmu.cpu_starting)
			x86_pmu.cpu_starting(cpu);
		break;

1486 1487 1488 1489
	case CPU_ONLINE:
		kfree(cpuc->kfree_on_online);
		break;

1490 1491 1492 1493 1494
	case CPU_DYING:
		if (x86_pmu.cpu_dying)
			x86_pmu.cpu_dying(cpu);
		break;

1495
	case CPU_UP_CANCELED:
1496 1497 1498 1499 1500 1501 1502 1503 1504
	case CPU_DEAD:
		if (x86_pmu.cpu_dead)
			x86_pmu.cpu_dead(cpu);
		break;

	default:
		break;
	}

1505
	return ret;
1506 1507
}

1508 1509 1510 1511 1512 1513 1514 1515 1516 1517
static void __init pmu_check_apic(void)
{
	if (cpu_has_apic)
		return;

	x86_pmu.apic = 0;
	pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
	pr_info("no hardware sampling interrupt available.\n");
}

1518
static int __init init_hw_perf_events(void)
1519
{
1520
	struct event_constraint *c;
1521 1522
	int err;

1523
	pr_info("Performance Events: ");
1524

1525 1526
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
1527
		err = intel_pmu_init();
1528
		break;
1529
	case X86_VENDOR_AMD:
1530
		err = amd_pmu_init();
1531
		break;
1532
	default:
1533
		return 0;
1534
	}
1535
	if (err != 0) {
1536
		pr_cont("no PMU driver, software events only.\n");
1537
		return 0;
1538
	}
1539

1540 1541
	pmu_check_apic();

1542
	/* sanity check that the hardware exists or is emulated */
1543
	if (!check_hw_exists())
1544
		return 0;
1545

1546
	pr_cont("%s PMU driver.\n", x86_pmu.name);
1547

1548 1549 1550
	if (x86_pmu.quirks)
		x86_pmu.quirks();

1551
	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1552
		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
1553 1554
		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
I
Ingo Molnar 已提交
1555
	}
1556
	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
I
Ingo Molnar 已提交
1557

1558
	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1559
		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
1560 1561
		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1562
	}
1563

1564
	x86_pmu.intel_ctrl |=
1565
		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
I
Ingo Molnar 已提交
1566

1567 1568
	perf_events_lapic_init();
	register_die_notifier(&perf_event_nmi_notifier);
1569

1570
	unconstrained = (struct event_constraint)
1571 1572
		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
				   0, x86_pmu.num_counters);
1573

1574 1575
	if (x86_pmu.event_constraints) {
		for_each_event_constraint(c, x86_pmu.event_constraints) {
1576
			if (c->cmask != X86_RAW_EVENT_MASK)
1577 1578
				continue;

1579 1580
			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
			c->weight += x86_pmu.num_counters;
1581 1582 1583
		}
	}

I
Ingo Molnar 已提交
1584
	pr_info("... version:                %d\n",     x86_pmu.version);
1585 1586 1587
	pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
	pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
	pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
I
Ingo Molnar 已提交
1588
	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
1589
	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
1590
	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
1591

P
Peter Zijlstra 已提交
1592
	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1593
	perf_cpu_notifier(x86_pmu_notifier);
1594 1595

	return 0;
I
Ingo Molnar 已提交
1596
}
1597
early_initcall(init_hw_perf_events);
I
Ingo Molnar 已提交
1598

1599
static inline void x86_pmu_read(struct perf_event *event)
1600
{
1601
	x86_perf_event_update(event);
1602 1603
}

1604 1605 1606 1607 1608
/*
 * Start group events scheduling transaction
 * Set the flag to make pmu::enable() not perform the
 * schedulability test, it will be performed at commit time
 */
P
Peter Zijlstra 已提交
1609
static void x86_pmu_start_txn(struct pmu *pmu)
1610
{
P
Peter Zijlstra 已提交
1611
	perf_pmu_disable(pmu);
T
Tejun Heo 已提交
1612 1613
	__this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
	__this_cpu_write(cpu_hw_events.n_txn, 0);
1614 1615 1616 1617 1618 1619 1620
}

/*
 * Stop group events scheduling transaction
 * Clear the flag and pmu::enable() will perform the
 * schedulability test.
 */
P
Peter Zijlstra 已提交
1621
static void x86_pmu_cancel_txn(struct pmu *pmu)
1622
{
T
Tejun Heo 已提交
1623
	__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
1624 1625 1626
	/*
	 * Truncate the collected events.
	 */
T
Tejun Heo 已提交
1627 1628
	__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
	__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
P
Peter Zijlstra 已提交
1629
	perf_pmu_enable(pmu);
1630 1631 1632 1633 1634 1635 1636
}

/*
 * Commit group events scheduling transaction
 * Perform the group schedulability test as a whole
 * Return 0 if success
 */
P
Peter Zijlstra 已提交
1637
static int x86_pmu_commit_txn(struct pmu *pmu)
1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	int assign[X86_PMC_IDX_MAX];
	int n, ret;

	n = cpuc->n_events;

	if (!x86_pmu_initialized())
		return -EAGAIN;

	ret = x86_pmu.schedule_events(cpuc, n, assign);
	if (ret)
		return ret;

	/*
	 * copy new assignment, now we know it is possible
	 * will be used by hw_perf_enable()
	 */
	memcpy(cpuc->assign, assign, n*sizeof(int));

1658
	cpuc->group_flag &= ~PERF_EVENT_TXN;
P
Peter Zijlstra 已提交
1659
	perf_pmu_enable(pmu);
1660 1661
	return 0;
}
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695
/*
 * a fake_cpuc is used to validate event groups. Due to
 * the extra reg logic, we need to also allocate a fake
 * per_core and per_cpu structure. Otherwise, group events
 * using extra reg may conflict without the kernel being
 * able to catch this when the last event gets added to
 * the group.
 */
static void free_fake_cpuc(struct cpu_hw_events *cpuc)
{
	kfree(cpuc->shared_regs);
	kfree(cpuc);
}

static struct cpu_hw_events *allocate_fake_cpuc(void)
{
	struct cpu_hw_events *cpuc;
	int cpu = raw_smp_processor_id();

	cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
	if (!cpuc)
		return ERR_PTR(-ENOMEM);

	/* only needed, if we have extra_regs */
	if (x86_pmu.extra_regs) {
		cpuc->shared_regs = allocate_shared_regs(cpu);
		if (!cpuc->shared_regs)
			goto error;
	}
	return cpuc;
error:
	free_fake_cpuc(cpuc);
	return ERR_PTR(-ENOMEM);
}
1696

1697 1698 1699 1700 1701 1702 1703 1704 1705
/*
 * validate that we can schedule this event
 */
static int validate_event(struct perf_event *event)
{
	struct cpu_hw_events *fake_cpuc;
	struct event_constraint *c;
	int ret = 0;

1706 1707 1708
	fake_cpuc = allocate_fake_cpuc();
	if (IS_ERR(fake_cpuc))
		return PTR_ERR(fake_cpuc);
1709 1710 1711 1712 1713 1714 1715 1716 1717

	c = x86_pmu.get_event_constraints(fake_cpuc, event);

	if (!c || !c->weight)
		ret = -ENOSPC;

	if (x86_pmu.put_event_constraints)
		x86_pmu.put_event_constraints(fake_cpuc, event);

1718
	free_fake_cpuc(fake_cpuc);
1719 1720 1721 1722

	return ret;
}

1723 1724 1725 1726
/*
 * validate a single event group
 *
 * validation include:
1727 1728 1729
 *	- check events are compatible which each other
 *	- events do not compete for the same counter
 *	- number of events <= number of counters
1730 1731 1732 1733
 *
 * validation ensures the group can be loaded onto the
 * PMU if it was the only group available.
 */
1734 1735
static int validate_group(struct perf_event *event)
{
1736
	struct perf_event *leader = event->group_leader;
1737
	struct cpu_hw_events *fake_cpuc;
1738
	int ret = -ENOSPC, n;
1739

1740 1741 1742
	fake_cpuc = allocate_fake_cpuc();
	if (IS_ERR(fake_cpuc))
		return PTR_ERR(fake_cpuc);
1743 1744 1745 1746 1747 1748
	/*
	 * the event is not yet connected with its
	 * siblings therefore we must first collect
	 * existing siblings, then add the new event
	 * before we can simulate the scheduling
	 */
1749
	n = collect_events(fake_cpuc, leader, true);
1750
	if (n < 0)
1751
		goto out;
1752

1753 1754
	fake_cpuc->n_events = n;
	n = collect_events(fake_cpuc, event, false);
1755
	if (n < 0)
1756
		goto out;
1757

1758
	fake_cpuc->n_events = n;
1759

1760
	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
1761 1762

out:
1763
	free_fake_cpuc(fake_cpuc);
1764
	return ret;
1765 1766
}

1767
static int x86_pmu_event_init(struct perf_event *event)
I
Ingo Molnar 已提交
1768
{
P
Peter Zijlstra 已提交
1769
	struct pmu *tmp;
I
Ingo Molnar 已提交
1770 1771
	int err;

1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782
	switch (event->attr.type) {
	case PERF_TYPE_RAW:
	case PERF_TYPE_HARDWARE:
	case PERF_TYPE_HW_CACHE:
		break;

	default:
		return -ENOENT;
	}

	err = __x86_pmu_event_init(event);
1783
	if (!err) {
1784 1785 1786 1787 1788 1789 1790 1791
		/*
		 * we temporarily connect event to its pmu
		 * such that validate_group() can classify
		 * it as an x86 event using is_x86_event()
		 */
		tmp = event->pmu;
		event->pmu = &pmu;

1792 1793
		if (event->group_leader != event)
			err = validate_group(event);
1794 1795
		else
			err = validate_event(event);
1796 1797

		event->pmu = tmp;
1798
	}
1799
	if (err) {
1800 1801
		if (event->destroy)
			event->destroy(event);
1802
	}
I
Ingo Molnar 已提交
1803

1804
	return err;
I
Ingo Molnar 已提交
1805
}
1806

1807
static struct pmu pmu = {
P
Peter Zijlstra 已提交
1808 1809 1810
	.pmu_enable	= x86_pmu_enable,
	.pmu_disable	= x86_pmu_disable,

1811
	.event_init	= x86_pmu_event_init,
P
Peter Zijlstra 已提交
1812 1813 1814

	.add		= x86_pmu_add,
	.del		= x86_pmu_del,
1815 1816 1817
	.start		= x86_pmu_start,
	.stop		= x86_pmu_stop,
	.read		= x86_pmu_read,
P
Peter Zijlstra 已提交
1818

1819 1820 1821 1822 1823
	.start_txn	= x86_pmu_start_txn,
	.cancel_txn	= x86_pmu_cancel_txn,
	.commit_txn	= x86_pmu_commit_txn,
};

1824 1825 1826 1827 1828 1829
/*
 * callchain support
 */

static int backtrace_stack(void *data, char *name)
{
1830
	return 0;
1831 1832 1833 1834 1835 1836
}

static void backtrace_address(void *data, unsigned long addr, int reliable)
{
	struct perf_callchain_entry *entry = data;

1837
	perf_callchain_store(entry, addr);
1838 1839 1840 1841 1842
}

static const struct stacktrace_ops backtrace_ops = {
	.stack			= backtrace_stack,
	.address		= backtrace_address,
1843
	.walk_stack		= print_context_stack_bp,
1844 1845
};

1846 1847
void
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1848
{
1849 1850
	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
		/* TODO: We don't support guest os callchain now */
1851
		return;
1852 1853
	}

1854
	perf_callchain_store(entry, regs->ip);
1855

1856
	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1857 1858
}

1859 1860 1861
#ifdef CONFIG_COMPAT
static inline int
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1862
{
1863 1864 1865
	/* 32-bit process in 64-bit kernel. */
	struct stack_frame_ia32 frame;
	const void __user *fp;
1866

1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878
	if (!test_thread_flag(TIF_IA32))
		return 0;

	fp = compat_ptr(regs->bp);
	while (entry->nr < PERF_MAX_STACK_DEPTH) {
		unsigned long bytes;
		frame.next_frame     = 0;
		frame.return_address = 0;

		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
		if (bytes != sizeof(frame))
			break;
1879

1880 1881
		if (fp < compat_ptr(regs->sp))
			break;
1882

1883
		perf_callchain_store(entry, frame.return_address);
1884 1885 1886
		fp = compat_ptr(frame.next_frame);
	}
	return 1;
1887
}
1888 1889 1890 1891 1892 1893 1894
#else
static inline int
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
    return 0;
}
#endif
1895

1896 1897
void
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1898 1899 1900 1901
{
	struct stack_frame frame;
	const void __user *fp;

1902 1903
	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
		/* TODO: We don't support guest os callchain now */
1904
		return;
1905
	}
1906

1907
	fp = (void __user *)regs->bp;
1908

1909
	perf_callchain_store(entry, regs->ip);
1910

1911 1912 1913
	if (!current->mm)
		return;

1914 1915 1916
	if (perf_callchain_user32(regs, entry))
		return;

1917
	while (entry->nr < PERF_MAX_STACK_DEPTH) {
1918
		unsigned long bytes;
1919
		frame.next_frame	     = NULL;
1920 1921
		frame.return_address = 0;

1922 1923
		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
		if (bytes != sizeof(frame))
1924 1925
			break;

1926
		if ((unsigned long)fp < regs->sp)
1927 1928
			break;

1929
		perf_callchain_store(entry, frame.return_address);
1930
		fp = frame.next_frame;
1931 1932 1933
	}
}

1934 1935 1936
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
	unsigned long ip;
1937

1938 1939 1940 1941
	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
		ip = perf_guest_cbs->get_guest_ip();
	else
		ip = instruction_pointer(regs);
1942

1943 1944 1945 1946 1947 1948
	return ip;
}

unsigned long perf_misc_flags(struct pt_regs *regs)
{
	int misc = 0;
1949

1950
	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961
		if (perf_guest_cbs->is_user_mode())
			misc |= PERF_RECORD_MISC_GUEST_USER;
		else
			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
	} else {
		if (user_mode(regs))
			misc |= PERF_RECORD_MISC_USER;
		else
			misc |= PERF_RECORD_MISC_KERNEL;
	}

1962
	if (regs->flags & PERF_EFLAGS_EXACT)
P
Peter Zijlstra 已提交
1963
		misc |= PERF_RECORD_MISC_EXACT_IP;
1964 1965 1966

	return misc;
}