perf_event.h 37.5 KB
Newer Older
T
Thomas Gleixner 已提交
1
/*
I
Ingo Molnar 已提交
2
 * Performance events:
T
Thomas Gleixner 已提交
3
 *
I
Ingo Molnar 已提交
4
 *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
5 6
 *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
 *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
T
Thomas Gleixner 已提交
7
 *
I
Ingo Molnar 已提交
8
 * Data type definitions, declarations, prototypes.
T
Thomas Gleixner 已提交
9
 *
I
Ingo Molnar 已提交
10
 *    Started by: Thomas Gleixner and Ingo Molnar
T
Thomas Gleixner 已提交
11
 *
I
Ingo Molnar 已提交
12
 * For licencing details see kernel-base/COPYING
T
Thomas Gleixner 已提交
13
 */
14 15
#ifndef _LINUX_PERF_EVENT_H
#define _LINUX_PERF_EVENT_H
T
Thomas Gleixner 已提交
16

17
#include <uapi/linux/perf_event.h>
T
Thomas Gleixner 已提交
18

I
Ingo Molnar 已提交
19
/*
20
 * Kernel-internal data types and definitions:
I
Ingo Molnar 已提交
21 22
 */

23 24
#ifdef CONFIG_PERF_EVENTS
# include <asm/perf_event.h>
25
# include <asm/local64.h>
26 27
#endif

28
struct perf_guest_info_callbacks {
29 30 31
	int				(*is_in_guest)(void);
	int				(*is_user_mode)(void);
	unsigned long			(*get_guest_ip)(void);
32 33
};

34 35 36 37
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <asm/hw_breakpoint.h>
#endif

38 39 40 41 42
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
43
#include <linux/hrtimer.h>
P
Peter Zijlstra 已提交
44
#include <linux/fs.h>
45
#include <linux/pid_namespace.h>
46
#include <linux/workqueue.h>
47
#include <linux/ftrace.h>
48
#include <linux/cpu.h>
49
#include <linux/irq_work.h>
50
#include <linux/static_key.h>
51
#include <linux/jump_label_ratelimit.h>
A
Arun Sharma 已提交
52
#include <linux/atomic.h>
53
#include <linux/sysfs.h>
54
#include <linux/perf_regs.h>
55
#include <linux/workqueue.h>
56
#include <linux/cgroup.h>
57
#include <asm/local.h>
58

59 60
struct perf_callchain_entry {
	__u64				nr;
61
	__u64				ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
62 63
};

64 65 66
struct perf_raw_record {
	u32				size;
	void				*data;
67 68
};

69 70 71 72 73 74 75 76 77
/*
 * branch stack layout:
 *  nr: number of taken branches stored in entries[]
 *
 * Note that nr can vary from sample to sample
 * branches (to, from) are stored from most recent
 * to least recent, i.e., entries[0] contains the most
 * recent branch.
 */
78 79 80 81 82
struct perf_branch_stack {
	__u64				nr;
	struct perf_branch_entry	entries[0];
};

83 84
struct task_struct;

85 86 87 88 89 90 91 92 93 94
/*
 * extra PMU register associated with an event
 */
struct hw_perf_event_extra {
	u64		config;	/* register value */
	unsigned int	reg;	/* register address or index */
	int		alloc;	/* extra register already allocated */
	int		idx;	/* index in shared_regs->regs[] */
};

T
Thomas Gleixner 已提交
95
/**
96
 * struct hw_perf_event - performance event hardware details:
T
Thomas Gleixner 已提交
97
 */
98 99
struct hw_perf_event {
#ifdef CONFIG_PERF_EVENTS
100 101
	union {
		struct { /* hardware */
I
Ingo Molnar 已提交
102
			u64		config;
103
			u64		last_tag;
I
Ingo Molnar 已提交
104
			unsigned long	config_base;
105
			unsigned long	event_base;
106
			int		event_base_rdpmc;
I
Ingo Molnar 已提交
107
			int		idx;
108
			int		last_cpu;
109
			int		flags;
110

111
			struct hw_perf_event_extra extra_reg;
112
			struct hw_perf_event_extra branch_reg;
113
		};
114
		struct { /* software */
I
Ingo Molnar 已提交
115
			struct hrtimer	hrtimer;
116
		};
117 118 119 120
		struct { /* tracepoint */
			/* for tp_event->class */
			struct list_head	tp_list;
		};
121 122
		struct { /* intel_cqm */
			int			cqm_state;
123
			u32			cqm_rmid;
124
			int			is_group_event;
125 126 127 128
			struct list_head	cqm_events_entry;
			struct list_head	cqm_groups_entry;
			struct list_head	cqm_group_entry;
		};
129 130 131
		struct { /* itrace */
			int			itrace_started;
		};
132 133 134 135
		struct { /* amd_power */
			u64	pwr_acc;
			u64	ptsc;
		};
136
#ifdef CONFIG_HAVE_HW_BREAKPOINT
137
		struct { /* breakpoint */
138 139 140 141 142
			/*
			 * Crufty hack to avoid the chicken and egg
			 * problem hw_breakpoint has with context
			 * creation and event initalization.
			 */
143 144
			struct arch_hw_breakpoint	info;
			struct list_head		bp_list;
145
		};
146
#endif
147
	};
148 149 150 151
	/*
	 * If the event is a per task event, this will point to the task in
	 * question. See the comment in perf_event_alloc().
	 */
152
	struct task_struct		*target;
153

154 155 156 157 158 159 160 161 162
	/*
	 * PMU would store hardware filter configuration
	 * here.
	 */
	void				*addr_filters;

	/* Last sync'ed generation of filters */
	unsigned long			addr_filters_gen;

163 164 165 166 167 168 169
/*
 * hw_perf_event::state flags; used to track the PERF_EF_* state.
 */
#define PERF_HES_STOPPED	0x01 /* the counter is stopped */
#define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
#define PERF_HES_ARCH		0x04

P
Peter Zijlstra 已提交
170
	int				state;
171 172 173 174 175

	/*
	 * The last observed hardware counter value, updated with a
	 * local64_cmpxchg() such that pmu::read() can be called nested.
	 */
176
	local64_t			prev_count;
177 178 179 180

	/*
	 * The period to start the next sample with.
	 */
181
	u64				sample_period;
182 183 184 185

	/*
	 * The period we started this sample with.
	 */
186
	u64				last_period;
187 188 189 190 191 192

	/*
	 * However much is left of the current period; note that this is
	 * a full 64bit value and allows for generation of periods longer
	 * than hardware might allow.
	 */
193
	local64_t			period_left;
194 195 196 197 198

	/*
	 * State for throttling the event, see __perf_event_overflow() and
	 * perf_adjust_freq_unthr_context().
	 */
199
	u64                             interrupts_seq;
200
	u64				interrupts;
201

202 203 204 205
	/*
	 * State for freq target events, see __perf_event_overflow() and
	 * perf_adjust_freq_unthr_context().
	 */
206 207
	u64				freq_time_stamp;
	u64				freq_count_stamp;
208
#endif
T
Thomas Gleixner 已提交
209 210
};

211
struct perf_event;
I
Ingo Molnar 已提交
212

213 214 215
/*
 * Common implementation detail of pmu::{start,commit,cancel}_txn
 */
216
#define PERF_PMU_TXN_ADD  0x1		/* txn to add/schedule event on PMU */
217
#define PERF_PMU_TXN_READ 0x2		/* txn to read event group from PMU */
218

219 220 221 222
/**
 * pmu::capabilities flags
 */
#define PERF_PMU_CAP_NO_INTERRUPT		0x01
223
#define PERF_PMU_CAP_NO_NMI			0x02
224
#define PERF_PMU_CAP_AUX_NO_SG			0x04
225
#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF		0x08
226
#define PERF_PMU_CAP_EXCLUSIVE			0x10
227
#define PERF_PMU_CAP_ITRACE			0x20
228

I
Ingo Molnar 已提交
229
/**
230
 * struct pmu - generic performance monitoring unit
I
Ingo Molnar 已提交
231
 */
232
struct pmu {
233 234
	struct list_head		entry;

235
	struct module			*module;
P
Peter Zijlstra 已提交
236
	struct device			*dev;
237
	const struct attribute_group	**attr_groups;
238
	const char			*name;
P
Peter Zijlstra 已提交
239 240
	int				type;

241 242 243 244 245
	/*
	 * various common per-pmu feature flags
	 */
	int				capabilities;

P
Peter Zijlstra 已提交
246 247
	int * __percpu			pmu_disable_count;
	struct perf_cpu_context * __percpu pmu_cpu_context;
248
	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
P
Peter Zijlstra 已提交
249
	int				task_ctx_nr;
250
	int				hrtimer_interval_ms;
251

252 253 254
	/* number of address filters this PMU can do */
	unsigned int			nr_addr_filters;

255
	/*
P
Peter Zijlstra 已提交
256 257
	 * Fully disable/enable this PMU, can be used to protect from the PMI
	 * as well as for lazy/batch writing of the MSRs.
258
	 */
P
Peter Zijlstra 已提交
259 260
	void (*pmu_enable)		(struct pmu *pmu); /* optional */
	void (*pmu_disable)		(struct pmu *pmu); /* optional */
261

262
	/*
P
Peter Zijlstra 已提交
263
	 * Try and initialize the event for this PMU.
264 265 266 267 268 269 270 271 272 273 274 275 276
	 *
	 * Returns:
	 *  -ENOENT	-- @event is not for this PMU
	 *
	 *  -ENODEV	-- @event is for this PMU but PMU not present
	 *  -EBUSY	-- @event is for this PMU but PMU temporarily unavailable
	 *  -EINVAL	-- @event is for this PMU but @event is not valid
	 *  -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
	 *  -EACCESS	-- @event is for this PMU, @event is valid, but no privilidges
	 *
	 *  0		-- @event is for this PMU and valid
	 *
	 * Other error return values are allowed.
277
	 */
278 279
	int (*event_init)		(struct perf_event *event);

280 281 282 283 284 285 286
	/*
	 * Notification that the event was mapped or unmapped.  Called
	 * in the context of the mapping task.
	 */
	void (*event_mapped)		(struct perf_event *event); /*optional*/
	void (*event_unmapped)		(struct perf_event *event); /*optional*/

287 288 289 290
	/*
	 * Flags for ->add()/->del()/ ->start()/->stop(). There are
	 * matching hw_perf_event::state flags.
	 */
P
Peter Zijlstra 已提交
291 292 293 294
#define PERF_EF_START	0x01		/* start the counter when adding    */
#define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
#define PERF_EF_UPDATE	0x04		/* update the counter when stopping */

295
	/*
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
	 * Adds/Removes a counter to/from the PMU, can be done inside a
	 * transaction, see the ->*_txn() methods.
	 *
	 * The add/del callbacks will reserve all hardware resources required
	 * to service the event, this includes any counter constraint
	 * scheduling etc.
	 *
	 * Called with IRQs disabled and the PMU disabled on the CPU the event
	 * is on.
	 *
	 * ->add() called without PERF_EF_START should result in the same state
	 *  as ->add() followed by ->stop().
	 *
	 * ->del() must always PERF_EF_UPDATE stop an event. If it calls
	 *  ->stop() that must deal with already being stopped without
	 *  PERF_EF_UPDATE.
P
Peter Zijlstra 已提交
312 313 314 315 316
	 */
	int  (*add)			(struct perf_event *event, int flags);
	void (*del)			(struct perf_event *event, int flags);

	/*
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
	 * Starts/Stops a counter present on the PMU.
	 *
	 * The PMI handler should stop the counter when perf_event_overflow()
	 * returns !0. ->start() will be used to continue.
	 *
	 * Also used to change the sample period.
	 *
	 * Called with IRQs disabled and the PMU disabled on the CPU the event
	 * is on -- will be called from NMI context with the PMU generates
	 * NMIs.
	 *
	 * ->stop() with PERF_EF_UPDATE will read the counter and update
	 *  period/count values like ->read() would.
	 *
	 * ->start() with PERF_EF_RELOAD will reprogram the the counter
	 *  value, must be preceded by a ->stop() with PERF_EF_UPDATE.
P
Peter Zijlstra 已提交
333 334 335 336 337 338
	 */
	void (*start)			(struct perf_event *event, int flags);
	void (*stop)			(struct perf_event *event, int flags);

	/*
	 * Updates the counter value of the event.
339 340 341
	 *
	 * For sampling capable PMUs this will also update the software period
	 * hw_perf_event::period_left field.
P
Peter Zijlstra 已提交
342
	 */
343
	void (*read)			(struct perf_event *event);
344 345

	/*
346 347 348
	 * Group events scheduling is treated as a transaction, add
	 * group events as a whole and perform one schedulability test.
	 * If the test fails, roll back the whole group
P
Peter Zijlstra 已提交
349 350
	 *
	 * Start the transaction, after this ->add() doesn't need to
351
	 * do schedulability tests.
352 353
	 *
	 * Optional.
354
	 */
355
	void (*start_txn)		(struct pmu *pmu, unsigned int txn_flags);
356
	/*
P
Peter Zijlstra 已提交
357
	 * If ->start_txn() disabled the ->add() schedulability test
358 359 360
	 * then ->commit_txn() is required to perform one. On success
	 * the transaction is closed. On error the transaction is kept
	 * open until ->cancel_txn() is called.
361 362
	 *
	 * Optional.
363
	 */
364
	int  (*commit_txn)		(struct pmu *pmu);
365
	/*
P
Peter Zijlstra 已提交
366
	 * Will cancel the transaction, assumes ->del() is called
L
Lucas De Marchi 已提交
367
	 * for each successful ->add() during the transaction.
368 369
	 *
	 * Optional.
370
	 */
371
	void (*cancel_txn)		(struct pmu *pmu);
372 373 374 375 376 377

	/*
	 * Will return the value for perf_event_mmap_page::index for this event,
	 * if no implementation is provided it will default to: event->hw.idx + 1.
	 */
	int (*event_idx)		(struct perf_event *event); /*optional */
378

379 380 381 382 383
	/*
	 * context-switches callback
	 */
	void (*sched_task)		(struct perf_event_context *ctx,
					bool sched_in);
384 385 386 387
	/*
	 * PMU specific data size
	 */
	size_t				task_ctx_size;
388

389 390 391 392 393

	/*
	 * Return the count value for a counter.
	 */
	u64 (*count)			(struct perf_event *event); /*optional*/
394 395 396 397 398 399 400 401 402 403 404 405

	/*
	 * Set up pmu-private data structures for an AUX area
	 */
	void *(*setup_aux)		(int cpu, void **pages,
					 int nr_pages, bool overwrite);
					/* optional */

	/*
	 * Free pmu-private AUX data structures
	 */
	void (*free_aux)		(void *aux); /* optional */
406

407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
	/*
	 * Validate address range filters: make sure the HW supports the
	 * requested configuration and number of filters; return 0 if the
	 * supplied filters are valid, -errno otherwise.
	 *
	 * Runs in the context of the ioctl()ing process and is not serialized
	 * with the rest of the PMU callbacks.
	 */
	int (*addr_filters_validate)	(struct list_head *filters);
					/* optional */

	/*
	 * Synchronize address range filter configuration:
	 * translate hw-agnostic filters into hardware configuration in
	 * event::hw::addr_filters.
	 *
	 * Runs as a part of filter sync sequence that is done in ->start()
	 * callback by calling perf_event_addr_filters_sync().
	 *
	 * May (and should) traverse event::addr_filters::list, for which its
	 * caller provides necessary serialization.
	 */
	void (*addr_filters_sync)	(struct perf_event *event);
					/* optional */

432 433 434 435
	/*
	 * Filter events for PMU-specific reasons.
	 */
	int (*filter_match)		(struct perf_event *event); /* optional */
I
Ingo Molnar 已提交
436 437
};

438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
/**
 * struct perf_addr_filter - address range filter definition
 * @entry:	event's filter list linkage
 * @inode:	object file's inode for file-based filters
 * @offset:	filter range offset
 * @size:	filter range size
 * @range:	1: range, 0: address
 * @filter:	1: filter/start, 0: stop
 *
 * This is a hardware-agnostic filter configuration as specified by the user.
 */
struct perf_addr_filter {
	struct list_head	entry;
	struct inode		*inode;
	unsigned long		offset;
	unsigned long		size;
	unsigned int		range	: 1,
				filter	: 1;
};

/**
 * struct perf_addr_filters_head - container for address range filters
 * @list:	list of filters for this event
 * @lock:	spinlock that serializes accesses to the @list and event's
 *		(and its children's) filter generations.
 *
 * A child event will use parent's @list (and therefore @lock), so they are
 * bundled together; see perf_event_addr_filters().
 */
struct perf_addr_filters_head {
	struct list_head	list;
	raw_spinlock_t		lock;
};

472
/**
473
 * enum perf_event_active_state - the states of a event
474
 */
475
enum perf_event_active_state {
P
Peter Zijlstra 已提交
476
	PERF_EVENT_STATE_DEAD		= -4,
477
	PERF_EVENT_STATE_EXIT		= -3,
I
Ingo Molnar 已提交
478
	PERF_EVENT_STATE_ERROR		= -2,
479 480
	PERF_EVENT_STATE_OFF		= -1,
	PERF_EVENT_STATE_INACTIVE	=  0,
I
Ingo Molnar 已提交
481
	PERF_EVENT_STATE_ACTIVE		=  1,
482 483
};

484
struct file;
485 486
struct perf_sample_data;

487
typedef void (*perf_overflow_handler_t)(struct perf_event *,
488 489 490
					struct perf_sample_data *,
					struct pt_regs *regs);

491
enum perf_group_flag {
492
	PERF_GROUP_SOFTWARE		= 0x1,
493 494
};

495 496
#define SWEVENT_HLIST_BITS		8
#define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
497 498

struct swevent_hlist {
499 500
	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
	struct rcu_head			rcu_head;
501 502
};

503 504
#define PERF_ATTACH_CONTEXT	0x01
#define PERF_ATTACH_GROUP	0x02
505
#define PERF_ATTACH_TASK	0x04
506
#define PERF_ATTACH_TASK_DATA	0x08
507

508
struct perf_cgroup;
509 510
struct ring_buffer;

T
Thomas Gleixner 已提交
511
/**
512
 * struct perf_event - performance event kernel representation:
T
Thomas Gleixner 已提交
513
 */
514 515
struct perf_event {
#ifdef CONFIG_PERF_EVENTS
516 517 518 519 520
	/*
	 * entry onto perf_event_context::event_list;
	 *   modifications require ctx->lock
	 *   RCU safe iterations.
	 */
P
Peter Zijlstra 已提交
521
	struct list_head		event_entry;
522 523 524 525 526 527 528 529 530 531

	/*
	 * XXX: group_entry and sibling_list should be mutually exclusive;
	 * either you're a sibling on a group, or you're the group leader.
	 * Rework the code to always use the same list element.
	 *
	 * Locked for modification by both ctx->mutex and ctx->lock; holding
	 * either sufficies for read.
	 */
	struct list_head		group_entry;
532
	struct list_head		sibling_list;
533 534 535 536 537 538 539 540

	/*
	 * We need storage to track the entries in perf_pmu_migrate_context; we
	 * cannot use the event_entry because of RCU and we want to keep the
	 * group in tact which avoids us using the other two entries.
	 */
	struct list_head		migrate_entry;

541 542
	struct hlist_node		hlist_entry;
	struct list_head		active_entry;
I
Ingo Molnar 已提交
543
	int				nr_siblings;
544
	int				group_flags;
545
	struct perf_event		*group_leader;
P
Peter Zijlstra 已提交
546
	struct pmu			*pmu;
547
	void				*pmu_private;
548

549
	enum perf_event_active_state	state;
550
	unsigned int			attach_state;
551
	local64_t			count;
552
	atomic64_t			child_count;
553

554
	/*
555
	 * These are the total time in nanoseconds that the event
556
	 * has been enabled (i.e. eligible to run, and the task has
557
	 * been scheduled in, if this is a per-task event)
558 559 560
	 * and running (scheduled onto the CPU), respectively.
	 *
	 * They are computed from tstamp_enabled, tstamp_running and
561
	 * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
562 563 564 565 566 567
	 */
	u64				total_time_enabled;
	u64				total_time_running;

	/*
	 * These are timestamps used for computing total_time_enabled
568
	 * and total_time_running when the event is in INACTIVE or
569 570
	 * ACTIVE state, measured in nanoseconds from an arbitrary point
	 * in time.
571 572
	 * tstamp_enabled: the notional time when the event was enabled
	 * tstamp_running: the notional time when the event was scheduled on
573
	 * tstamp_stopped: in INACTIVE state, the notional time when the
574
	 *	event was scheduled off.
575 576 577 578 579
	 */
	u64				tstamp_enabled;
	u64				tstamp_running;
	u64				tstamp_stopped;

580 581 582 583 584 585 586 587 588 589
	/*
	 * timestamp shadows the actual context timing but it can
	 * be safely used in NMI interrupt context. It reflects the
	 * context time as it was when the event was last scheduled in.
	 *
	 * ctx_time already accounts for ctx->timestamp. Therefore to
	 * compute ctx_time for a sample, simply add perf_clock().
	 */
	u64				shadow_ctx_time;

590
	struct perf_event_attr		attr;
591
	u16				header_size;
592
	u16				id_header_size;
593
	u16				read_size;
594
	struct hw_perf_event		hw;
T
Thomas Gleixner 已提交
595

596
	struct perf_event_context	*ctx;
597
	atomic_long_t			refcount;
T
Thomas Gleixner 已提交
598

599 600
	/*
	 * These accumulate total time (in nanoseconds) that children
601
	 * events have been enabled and running, respectively.
602 603 604 605
	 */
	atomic64_t			child_total_time_enabled;
	atomic64_t			child_total_time_running;

T
Thomas Gleixner 已提交
606
	/*
607
	 * Protect attach/detach and child_list:
T
Thomas Gleixner 已提交
608
	 */
609 610
	struct mutex			child_mutex;
	struct list_head		child_list;
611
	struct perf_event		*parent;
T
Thomas Gleixner 已提交
612 613 614 615

	int				oncpu;
	int				cpu;

616 617 618
	struct list_head		owner_entry;
	struct task_struct		*owner;

619 620 621
	/* mmap bits */
	struct mutex			mmap_mutex;
	atomic_t			mmap_count;
P
Peter Zijlstra 已提交
622

623
	struct ring_buffer		*rb;
624
	struct list_head		rb_entry;
625 626
	unsigned long			rcu_batches;
	int				rcu_pending;
627

628
	/* poll related */
T
Thomas Gleixner 已提交
629
	wait_queue_head_t		waitq;
P
Peter Zijlstra 已提交
630
	struct fasync_struct		*fasync;
631 632 633

	/* delayed work for NMIs and such */
	int				pending_wakeup;
634
	int				pending_kill;
635
	int				pending_disable;
636
	struct irq_work			pending;
P
Peter Zijlstra 已提交
637

638 639
	atomic_t			event_limit;

640 641 642 643 644 645
	/* address range filters */
	struct perf_addr_filters_head	addr_filters;
	/* vma address array for file-based filders */
	unsigned long			*addr_filters_offs;
	unsigned long			addr_filters_gen;

646
	void (*destroy)(struct perf_event *);
P
Peter Zijlstra 已提交
647
	struct rcu_head			rcu_head;
648 649

	struct pid_namespace		*ns;
650
	u64				id;
L
Li Zefan 已提交
651

652
	u64				(*clock)(void);
653
	perf_overflow_handler_t		overflow_handler;
654
	void				*overflow_handler_context;
655

656
#ifdef CONFIG_EVENT_TRACING
657
	struct trace_event_call		*tp_event;
L
Li Zefan 已提交
658
	struct event_filter		*filter;
659 660 661
#ifdef CONFIG_FUNCTION_TRACER
	struct ftrace_ops               ftrace_ops;
#endif
662
#endif
L
Li Zefan 已提交
663

S
Stephane Eranian 已提交
664 665 666 667 668
#ifdef CONFIG_CGROUP_PERF
	struct perf_cgroup		*cgrp; /* cgroup event is attach to */
	int				cgrp_defer_enabled;
#endif

L
Li Zefan 已提交
669
#endif /* CONFIG_PERF_EVENTS */
T
Thomas Gleixner 已提交
670 671 672
};

/**
673
 * struct perf_event_context - event context structure
T
Thomas Gleixner 已提交
674
 *
675
 * Used as a container for task events and CPU events as well:
T
Thomas Gleixner 已提交
676
 */
677
struct perf_event_context {
P
Peter Zijlstra 已提交
678
	struct pmu			*pmu;
T
Thomas Gleixner 已提交
679
	/*
680
	 * Protect the states of the events in the list,
681
	 * nr_active, and the list:
T
Thomas Gleixner 已提交
682
	 */
683
	raw_spinlock_t			lock;
684
	/*
685
	 * Protect the list of events.  Locking either mutex or lock
686 687 688
	 * is sufficient to ensure the list doesn't change; to change
	 * the list you need to lock both the mutex and the spinlock.
	 */
I
Ingo Molnar 已提交
689
	struct mutex			mutex;
690

691
	struct list_head		active_ctx_list;
692 693
	struct list_head		pinned_groups;
	struct list_head		flexible_groups;
I
Ingo Molnar 已提交
694
	struct list_head		event_list;
695
	int				nr_events;
I
Ingo Molnar 已提交
696 697
	int				nr_active;
	int				is_active;
698
	int				nr_stat;
699
	int				nr_freq;
700
	int				rotate_disable;
I
Ingo Molnar 已提交
701 702
	atomic_t			refcount;
	struct task_struct		*task;
703 704

	/*
705
	 * Context clock, runs when context enabled.
706
	 */
I
Ingo Molnar 已提交
707 708
	u64				time;
	u64				timestamp;
709 710 711 712 713

	/*
	 * These fields let us detect when two contexts have both
	 * been cloned (inherited) from a common ancestor.
	 */
714
	struct perf_event_context	*parent_ctx;
I
Ingo Molnar 已提交
715 716 717
	u64				parent_gen;
	u64				generation;
	int				pin_count;
718
	int				nr_cgroups;	 /* cgroup evts */
719
	void				*task_ctx_data; /* pmu specific data */
720
	struct rcu_head			rcu_head;
T
Thomas Gleixner 已提交
721 722
};

723 724
/*
 * Number of contexts where an event can trigger:
725
 *	task, softirq, hardirq, nmi.
726 727 728
 */
#define PERF_NR_CONTEXTS	4

T
Thomas Gleixner 已提交
729
/**
730
 * struct perf_event_cpu_context - per cpu event context structure
T
Thomas Gleixner 已提交
731 732
 */
struct perf_cpu_context {
733 734
	struct perf_event_context	ctx;
	struct perf_event_context	*task_ctx;
T
Thomas Gleixner 已提交
735
	int				active_oncpu;
736
	int				exclusive;
P
Peter Zijlstra 已提交
737 738

	raw_spinlock_t			hrtimer_lock;
739 740
	struct hrtimer			hrtimer;
	ktime_t				hrtimer_interval;
P
Peter Zijlstra 已提交
741 742
	unsigned int			hrtimer_active;

743
	struct pmu			*unique_pmu;
S
Stephane Eranian 已提交
744
	struct perf_cgroup		*cgrp;
T
Thomas Gleixner 已提交
745 746
};

747
struct perf_output_handle {
I
Ingo Molnar 已提交
748
	struct perf_event		*event;
749
	struct ring_buffer		*rb;
750
	unsigned long			wakeup;
751
	unsigned long			size;
752 753 754 755
	union {
		void			*addr;
		unsigned long		head;
	};
756
	int				page;
757 758
};

759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780
#ifdef CONFIG_CGROUP_PERF

/*
 * perf_cgroup_info keeps track of time_enabled for a cgroup.
 * This is a per-cpu dynamically allocated data structure.
 */
struct perf_cgroup_info {
	u64				time;
	u64				timestamp;
};

struct perf_cgroup {
	struct cgroup_subsys_state	css;
	struct perf_cgroup_info	__percpu *info;
};

/*
 * Must ensure cgroup is pinned (css_get) before calling
 * this function. In other words, we cannot call this function
 * if there is no cgroup event for the current CPU context.
 */
static inline struct perf_cgroup *
781
perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
782
{
783 784 785
	return container_of(task_css_check(task, perf_event_cgrp_id,
					   ctx ? lockdep_is_held(&ctx->lock)
					       : true),
786 787 788 789
			    struct perf_cgroup, css);
}
#endif /* CONFIG_CGROUP_PERF */

790
#ifdef CONFIG_PERF_EVENTS
791

792 793 794 795 796 797 798 799
extern void *perf_aux_output_begin(struct perf_output_handle *handle,
				   struct perf_event *event);
extern void perf_aux_output_end(struct perf_output_handle *handle,
				unsigned long size, bool truncated);
extern int perf_aux_output_skip(struct perf_output_handle *handle,
				unsigned long size);
extern void *perf_get_aux(struct perf_output_handle *handle);

800
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
801
extern void perf_pmu_unregister(struct pmu *pmu);
I
Ingo Molnar 已提交
802

803
extern int perf_num_counters(void);
804
extern const char *perf_pmu_name(void);
805 806 807 808
extern void __perf_event_task_sched_in(struct task_struct *prev,
				       struct task_struct *task);
extern void __perf_event_task_sched_out(struct task_struct *prev,
					struct task_struct *next);
809 810 811
extern int perf_event_init_task(struct task_struct *child);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
812
extern void perf_event_delayed_put(struct task_struct *task);
813
extern struct file *perf_event_get(unsigned int fd);
814
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
815
extern void perf_event_print_debug(void);
P
Peter Zijlstra 已提交
816 817
extern void perf_pmu_disable(struct pmu *pmu);
extern void perf_pmu_enable(struct pmu *pmu);
818 819
extern void perf_sched_cb_dec(struct pmu *pmu);
extern void perf_sched_cb_inc(struct pmu *pmu);
820 821
extern int perf_event_task_disable(void);
extern int perf_event_task_enable(void);
822
extern int perf_event_refresh(struct perf_event *event, int refresh);
823
extern void perf_event_update_userpage(struct perf_event *event);
824 825 826 827
extern int perf_event_release_kernel(struct perf_event *event);
extern struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr,
				int cpu,
M
Matt Helsley 已提交
828
				struct task_struct *task,
829 830
				perf_overflow_handler_t callback,
				void *context);
831 832
extern void perf_pmu_migrate_context(struct pmu *pmu,
				int src_cpu, int dst_cpu);
833
extern u64 perf_event_read_local(struct perf_event *event);
834 835
extern u64 perf_event_read_value(struct perf_event *event,
				 u64 *enabled, u64 *running);
836

837

838
struct perf_sample_data {
839 840 841 842 843 844 845 846 847 848 849
	/*
	 * Fields set by perf_sample_data_init(), group so as to
	 * minimize the cachelines touched.
	 */
	u64				addr;
	struct perf_raw_record		*raw;
	struct perf_branch_stack	*br_stack;
	u64				period;
	u64				weight;
	u64				txn;
	union  perf_mem_data_src	data_src;
850

851 852 853 854 855
	/*
	 * The other fields, optionally {set,used} by
	 * perf_{prepare,output}_sample().
	 */
	u64				type;
856 857 858 859 860 861 862 863 864 865 866 867 868
	u64				ip;
	struct {
		u32	pid;
		u32	tid;
	}				tid_entry;
	u64				time;
	u64				id;
	u64				stream_id;
	struct {
		u32	cpu;
		u32	reserved;
	}				cpu_entry;
	struct perf_callchain_entry	*callchain;
869 870 871 872 873

	/*
	 * regs_user may point to task_pt_regs or to regs_user_copy, depending
	 * on arch details.
	 */
874
	struct perf_regs		regs_user;
875 876
	struct pt_regs			regs_user_copy;

877
	struct perf_regs		regs_intr;
878
	u64				stack_user_size;
879
} ____cacheline_aligned;
880

881 882 883 884 885 886 887
/* default value for data source */
#define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
		    PERF_MEM_S(LVL, NA)   |\
		    PERF_MEM_S(SNOOP, NA) |\
		    PERF_MEM_S(LOCK, NA)  |\
		    PERF_MEM_S(TLB, NA))

888 889
static inline void perf_sample_data_init(struct perf_sample_data *data,
					 u64 addr, u64 period)
890
{
891
	/* remaining struct members initialized in perf_prepare_sample() */
892 893
	data->addr = addr;
	data->raw  = NULL;
894
	data->br_stack = NULL;
895
	data->period = period;
A
Andi Kleen 已提交
896
	data->weight = 0;
897
	data->data_src.val = PERF_MEM_NA;
A
Andi Kleen 已提交
898
	data->txn = 0;
899 900
}

901 902 903
extern void perf_output_sample(struct perf_output_handle *handle,
			       struct perf_event_header *header,
			       struct perf_sample_data *data,
904
			       struct perf_event *event);
905 906
extern void perf_prepare_sample(struct perf_event_header *header,
				struct perf_sample_data *data,
907
				struct perf_event *event,
908 909
				struct pt_regs *regs);

910
extern int perf_event_overflow(struct perf_event *event,
911 912
				 struct perf_sample_data *data,
				 struct pt_regs *regs);
913

914 915 916 917 918 919
extern void perf_event_output_forward(struct perf_event *event,
				     struct perf_sample_data *data,
				     struct pt_regs *regs);
extern void perf_event_output_backward(struct perf_event *event,
				       struct perf_sample_data *data,
				       struct pt_regs *regs);
920
extern void perf_event_output(struct perf_event *event,
921 922
			      struct perf_sample_data *data,
			      struct pt_regs *regs);
923

924 925 926
static inline bool
is_default_overflow_handler(struct perf_event *event)
{
927 928 929 930 931
	if (likely(event->overflow_handler == perf_event_output_forward))
		return true;
	if (unlikely(event->overflow_handler == perf_event_output_backward))
		return true;
	return false;
932 933
}

934 935 936 937 938 939 940 941 942
extern void
perf_event_header__init_id(struct perf_event_header *header,
			   struct perf_sample_data *data,
			   struct perf_event *event);
extern void
perf_event__output_id_sample(struct perf_event *event,
			     struct perf_output_handle *handle,
			     struct perf_sample_data *sample);

943 944 945
extern void
perf_log_lost_samples(struct perf_event *event, u64 lost);

946 947 948 949 950
static inline bool is_sampling_event(struct perf_event *event)
{
	return event->attr.sample_period != 0;
}

951
/*
952
 * Return 1 for a software event, 0 for a hardware event
953
 */
954
static inline int is_software_event(struct perf_event *event)
955
{
956
	return event->pmu->task_ctx_nr == perf_sw_context;
957 958
}

959
extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
960

961
extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
962
extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
963

964
#ifndef perf_arch_fetch_caller_regs
965
static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
966
#endif
967 968 969 970 971 972 973 974 975

/*
 * Take a snapshot of the regs. Skip ip and frame pointer to
 * the nth caller. We only need a few of the regs:
 * - ip for PERF_SAMPLE_IP
 * - cs for user_mode() tests
 * - bp for callchains
 * - eflags, for future purposes, just in case
 */
976
static inline void perf_fetch_caller_regs(struct pt_regs *regs)
977 978 979
{
	memset(regs, 0, sizeof(*regs));

980
	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
981 982
}

P
Peter Zijlstra 已提交
983
static __always_inline void
984
perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
985
{
986 987 988 989 990
	if (static_key_false(&perf_swevent_enabled[event_id]))
		__perf_sw_event(event_id, nr, regs, addr);
}

DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
P
Peter Zijlstra 已提交
991

992 993 994 995 996 997 998 999
/*
 * 'Special' version for the scheduler, it hard assumes no recursion,
 * which is guaranteed by us not actually scheduling inside other swevents
 * because those disable preemption.
 */
static __always_inline void
perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
{
1000
	if (static_key_false(&perf_swevent_enabled[event_id])) {
1001 1002 1003 1004
		struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);

		perf_fetch_caller_regs(regs);
		___perf_sw_event(event_id, nr, regs, addr);
1005 1006 1007
	}
}

1008
extern struct static_key_false perf_sched_events;
1009

1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023
static __always_inline bool
perf_sw_migrate_enabled(void)
{
	if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
		return true;
	return false;
}

static inline void perf_event_task_migrate(struct task_struct *task)
{
	if (perf_sw_migrate_enabled())
		task->sched_migrated = 1;
}

1024
static inline void perf_event_task_sched_in(struct task_struct *prev,
1025
					    struct task_struct *task)
1026
{
1027
	if (static_branch_unlikely(&perf_sched_events))
1028
		__perf_event_task_sched_in(prev, task);
1029 1030 1031 1032 1033 1034 1035 1036

	if (perf_sw_migrate_enabled() && task->sched_migrated) {
		struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);

		perf_fetch_caller_regs(regs);
		___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
		task->sched_migrated = 0;
	}
1037 1038 1039 1040
}

static inline void perf_event_task_sched_out(struct task_struct *prev,
					     struct task_struct *next)
1041
{
1042
	perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
1043

1044
	if (static_branch_unlikely(&perf_sched_events))
1045
		__perf_event_task_sched_out(prev, next);
1046 1047
}

1048 1049 1050 1051 1052
static inline u64 __perf_event_count(struct perf_event *event)
{
	return local64_read(&event->count) + atomic64_read(&event->child_count);
}

1053
extern void perf_event_mmap(struct vm_area_struct *vma);
1054
extern struct perf_guest_info_callbacks *perf_guest_cbs;
1055 1056
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
1057

1058
extern void perf_event_exec(void);
1059
extern void perf_event_comm(struct task_struct *tsk, bool exec);
1060
extern void perf_event_fork(struct task_struct *tsk);
1061

1062 1063 1064
/* Callchains */
DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);

1065 1066
extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
1067 1068 1069 1070 1071
extern struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
		   bool crosstask, bool add_mark);
extern int get_callchain_buffers(void);
extern void put_callchain_buffers(void);
1072

1073 1074
extern int sysctl_perf_event_max_stack;

1075
static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
1076
{
1077
	if (entry->nr < sysctl_perf_event_max_stack) {
1078
		entry->ip[entry->nr++] = ip;
1079 1080 1081 1082
		return 0;
	} else {
		return -1; /* no more room, stop walking the stack */
	}
1083
}
1084

1085 1086 1087
extern int sysctl_perf_event_paranoid;
extern int sysctl_perf_event_mlock;
extern int sysctl_perf_event_sample_rate;
1088 1089 1090
extern int sysctl_perf_cpu_time_max_percent;

extern void perf_sample_event_took(u64 sample_len_ns);
1091

P
Peter Zijlstra 已提交
1092 1093 1094
extern int perf_proc_update_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp,
		loff_t *ppos);
1095 1096 1097 1098
extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp,
		loff_t *ppos);

1099 1100
int perf_event_max_stack_handler(struct ctl_table *table, int write,
				 void __user *buffer, size_t *lenp, loff_t *ppos);
P
Peter Zijlstra 已提交
1101

1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
static inline bool perf_paranoid_tracepoint_raw(void)
{
	return sysctl_perf_event_paranoid > -1;
}

static inline bool perf_paranoid_cpu(void)
{
	return sysctl_perf_event_paranoid > 0;
}

static inline bool perf_paranoid_kernel(void)
{
	return sysctl_perf_event_paranoid > 1;
}

1117
extern void perf_event_init(void);
1118 1119
extern void perf_tp_event(u64 addr, u64 count, void *record,
			  int entry_size, struct pt_regs *regs,
1120 1121
			  struct hlist_head *head, int rctx,
			  struct task_struct *task);
1122
extern void perf_bp_event(struct perf_event *event, void *data);
1123

1124
#ifndef perf_misc_flags
1125 1126 1127
# define perf_misc_flags(regs) \
		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
# define perf_instruction_pointer(regs)	instruction_pointer(regs)
1128 1129
#endif

1130 1131 1132
static inline bool has_branch_stack(struct perf_event *event)
{
	return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
1133 1134 1135 1136 1137
}

static inline bool needs_branch_stack(struct perf_event *event)
{
	return event->attr.branch_sample_type != 0;
1138 1139
}

1140 1141 1142 1143 1144
static inline bool has_aux(struct perf_event *event)
{
	return event->pmu->setup_aux;
}

1145 1146 1147 1148 1149
static inline bool is_write_backward(struct perf_event *event)
{
	return !!event->attr.write_backward;
}

1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
static inline bool has_addr_filter(struct perf_event *event)
{
	return event->pmu->nr_addr_filters;
}

/*
 * An inherited event uses parent's filters
 */
static inline struct perf_addr_filters_head *
perf_event_addr_filters(struct perf_event *event)
{
	struct perf_addr_filters_head *ifh = &event->addr_filters;

	if (event->parent)
		ifh = &event->parent->addr_filters;

	return ifh;
}

extern void perf_event_addr_filters_sync(struct perf_event *event);

1171
extern int perf_output_begin(struct perf_output_handle *handle,
1172
			     struct perf_event *event, unsigned int size);
1173 1174 1175 1176 1177 1178 1179
extern int perf_output_begin_forward(struct perf_output_handle *handle,
				    struct perf_event *event,
				    unsigned int size);
extern int perf_output_begin_backward(struct perf_output_handle *handle,
				      struct perf_event *event,
				      unsigned int size);

1180
extern void perf_output_end(struct perf_output_handle *handle);
1181
extern unsigned int perf_output_copy(struct perf_output_handle *handle,
1182
			     const void *buf, unsigned int len);
1183 1184
extern unsigned int perf_output_skip(struct perf_output_handle *handle,
				     unsigned int len);
1185 1186
extern int perf_swevent_get_recursion_context(void);
extern void perf_swevent_put_recursion_context(int rctx);
1187
extern u64 perf_swevent_set_period(struct perf_event *event);
1188 1189
extern void perf_event_enable(struct perf_event *event);
extern void perf_event_disable(struct perf_event *event);
1190
extern void perf_event_disable_local(struct perf_event *event);
1191
extern void perf_event_task_tick(void);
1192
#else /* !CONFIG_PERF_EVENTS: */
1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203
static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
		      struct perf_event *event)				{ return NULL; }
static inline void
perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
		    bool truncated)					{ }
static inline int
perf_aux_output_skip(struct perf_output_handle *handle,
		     unsigned long size)				{ return -EINVAL; }
static inline void *
perf_get_aux(struct perf_output_handle *handle)				{ return NULL; }
T
Thomas Gleixner 已提交
1204
static inline void
1205 1206
perf_event_task_migrate(struct task_struct *task)			{ }
static inline void
1207 1208 1209 1210 1211
perf_event_task_sched_in(struct task_struct *prev,
			 struct task_struct *task)			{ }
static inline void
perf_event_task_sched_out(struct task_struct *prev,
			  struct task_struct *next)			{ }
1212 1213 1214
static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
static inline void perf_event_exit_task(struct task_struct *child)	{ }
static inline void perf_event_free_task(struct task_struct *task)	{ }
1215
static inline void perf_event_delayed_put(struct task_struct *task)	{ }
1216
static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
1217 1218 1219 1220 1221
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{
	return ERR_PTR(-EINVAL);
}
static inline u64 perf_event_read_local(struct perf_event *event)	{ return -EINVAL; }
I
Ingo Molnar 已提交
1222 1223 1224
static inline void perf_event_print_debug(void)				{ }
static inline int perf_event_task_disable(void)				{ return -EINVAL; }
static inline int perf_event_task_enable(void)				{ return -EINVAL; }
1225 1226 1227 1228
static inline int perf_event_refresh(struct perf_event *event, int refresh)
{
	return -EINVAL;
}
1229

1230
static inline void
1231
perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
1232
static inline void
1233 1234
perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)			{ }
static inline void
1235
perf_bp_event(struct perf_event *event, void *data)			{ }
1236

1237
static inline int perf_register_guest_info_callbacks
1238
(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
1239
static inline int perf_unregister_guest_info_callbacks
1240
(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
1241

I
Ingo Molnar 已提交
1242
static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
1243
static inline void perf_event_exec(void)				{ }
1244
static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
1245 1246
static inline void perf_event_fork(struct task_struct *tsk)		{ }
static inline void perf_event_init(void)				{ }
1247
static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
1248
static inline void perf_swevent_put_recursion_context(int rctx)		{ }
1249
static inline u64 perf_swevent_set_period(struct perf_event *event)	{ return 0; }
1250 1251
static inline void perf_event_enable(struct perf_event *event)		{ }
static inline void perf_event_disable(struct perf_event *event)		{ }
1252
static inline int __perf_event_disable(void *info)			{ return -1; }
1253
static inline void perf_event_task_tick(void)				{ }
1254
static inline int perf_event_release_kernel(struct perf_event *event)	{ return 0; }
T
Thomas Gleixner 已提交
1255 1256
#endif

1257 1258 1259
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
extern void perf_restore_debug_store(void);
#else
1260
static inline void perf_restore_debug_store(void)			{ }
T
Thomas Gleixner 已提交
1261 1262
#endif

1263
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
1264

1265
/*
1266
 * This has to have a higher priority than migration_notifier in sched/core.c.
1267
 */
1268 1269
#define perf_cpu_notifier(fn)						\
do {									\
1270
	static struct notifier_block fn##_nb =				\
1271
		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
1272
	unsigned long cpu = smp_processor_id();				\
1273
	unsigned long flags;						\
1274 1275
									\
	cpu_notifier_register_begin();					\
1276
	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,			\
1277
		(void *)(unsigned long)cpu);				\
1278
	local_irq_save(flags);						\
1279
	fn(&fn##_nb, (unsigned long)CPU_STARTING,			\
1280
		(void *)(unsigned long)cpu);				\
1281
	local_irq_restore(flags);					\
1282
	fn(&fn##_nb, (unsigned long)CPU_ONLINE,				\
1283
		(void *)(unsigned long)cpu);				\
1284 1285
	__register_cpu_notifier(&fn##_nb);				\
	cpu_notifier_register_done();					\
1286 1287
} while (0)

1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298
/*
 * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the
 * callback for already online CPUs.
 */
#define __perf_cpu_notifier(fn)						\
do {									\
	static struct notifier_block fn##_nb =				\
		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
									\
	__register_cpu_notifier(&fn##_nb);				\
} while (0)
1299

1300 1301 1302
struct perf_pmu_events_attr {
	struct device_attribute attr;
	u64 id;
1303
	const char *event_str;
1304 1305
};

1306 1307 1308
ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
			      char *page);

1309 1310 1311 1312 1313 1314
#define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
static struct perf_pmu_events_attr _var = {				\
	.attr = __ATTR(_name, 0444, _show, NULL),			\
	.id   =  _id,							\
};

1315 1316 1317 1318 1319 1320 1321
#define PMU_EVENT_ATTR_STRING(_name, _var, _str)			    \
static struct perf_pmu_events_attr _var = {				    \
	.attr		= __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
	.id		= 0,						    \
	.event_str	= _str,						    \
};

1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333
#define PMU_FORMAT_ATTR(_name, _format)					\
static ssize_t								\
_name##_show(struct device *dev,					\
			       struct device_attribute *attr,		\
			       char *page)				\
{									\
	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
	return sprintf(page, _format "\n");				\
}									\
									\
static struct device_attribute format_attr_##_name = __ATTR_RO(_name)

1334
#endif /* _LINUX_PERF_EVENT_H */