perf_event.h 24.7 KB
Newer Older
T
Thomas Gleixner 已提交
1
/*
I
Ingo Molnar 已提交
2
 * Performance events:
T
Thomas Gleixner 已提交
3
 *
I
Ingo Molnar 已提交
4 5 6
 *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
 *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
 *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
T
Thomas Gleixner 已提交
7
 *
I
Ingo Molnar 已提交
8
 * Data type definitions, declarations, prototypes.
T
Thomas Gleixner 已提交
9
 *
I
Ingo Molnar 已提交
10
 *    Started by: Thomas Gleixner and Ingo Molnar
T
Thomas Gleixner 已提交
11
 *
I
Ingo Molnar 已提交
12
 * For licencing details see kernel-base/COPYING
T
Thomas Gleixner 已提交
13
 */
14 15
#ifndef _LINUX_PERF_EVENT_H
#define _LINUX_PERF_EVENT_H
T
Thomas Gleixner 已提交
16

17 18
#include <linux/types.h>
#include <linux/ioctl.h>
19
#include <asm/byteorder.h>
T
Thomas Gleixner 已提交
20 21

/*
I
Ingo Molnar 已提交
22 23 24 25
 * User-space ABI bits:
 */

/*
26
 * attr.type
T
Thomas Gleixner 已提交
27
 */
P
Peter Zijlstra 已提交
28
enum perf_type_id {
I
Ingo Molnar 已提交
29 30 31 32 33
	PERF_TYPE_HARDWARE			= 0,
	PERF_TYPE_SOFTWARE			= 1,
	PERF_TYPE_TRACEPOINT			= 2,
	PERF_TYPE_HW_CACHE			= 3,
	PERF_TYPE_RAW				= 4,
34
	PERF_TYPE_BREAKPOINT			= 5,
35

I
Ingo Molnar 已提交
36
	PERF_TYPE_MAX,				/* non-ABI */
37
};
38

39
/*
40 41
 * Generalized performance event event_id types, used by the
 * attr.event_id parameter of the sys_perf_event_open()
I
Ingo Molnar 已提交
42
 * syscall:
43
 */
P
Peter Zijlstra 已提交
44
enum perf_hw_id {
I
Ingo Molnar 已提交
45
	/*
46
	 * Common hardware events, generalized by the kernel:
I
Ingo Molnar 已提交
47
	 */
48 49 50 51 52 53 54 55
	PERF_COUNT_HW_CPU_CYCLES		= 0,
	PERF_COUNT_HW_INSTRUCTIONS		= 1,
	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
	PERF_COUNT_HW_CACHE_MISSES		= 3,
	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
	PERF_COUNT_HW_BRANCH_MISSES		= 5,
	PERF_COUNT_HW_BUS_CYCLES		= 6,

I
Ingo Molnar 已提交
56
	PERF_COUNT_HW_MAX,			/* non-ABI */
57
};
58

59
/*
60
 * Generalized hardware cache events:
61
 *
62
 *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
63 64 65
 *       { read, write, prefetch } x
 *       { accesses, misses }
 */
P
Peter Zijlstra 已提交
66
enum perf_hw_cache_id {
I
Ingo Molnar 已提交
67 68 69 70 71 72 73 74
	PERF_COUNT_HW_CACHE_L1D			= 0,
	PERF_COUNT_HW_CACHE_L1I			= 1,
	PERF_COUNT_HW_CACHE_LL			= 2,
	PERF_COUNT_HW_CACHE_DTLB		= 3,
	PERF_COUNT_HW_CACHE_ITLB		= 4,
	PERF_COUNT_HW_CACHE_BPU			= 5,

	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
75 76
};

P
Peter Zijlstra 已提交
77
enum perf_hw_cache_op_id {
I
Ingo Molnar 已提交
78 79 80
	PERF_COUNT_HW_CACHE_OP_READ		= 0,
	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
81

I
Ingo Molnar 已提交
82
	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
83 84
};

P
Peter Zijlstra 已提交
85 86 87
enum perf_hw_cache_op_result_id {
	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
88

I
Ingo Molnar 已提交
89
	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
90 91
};

92
/*
93 94
 * Special "software" events provided by the kernel, even if the hardware
 * does not support performance events. These events measure various
95 96 97
 * physical and sw events of the kernel (and allow the profiling of them as
 * well):
 */
P
Peter Zijlstra 已提交
98
enum perf_sw_ids {
I
Ingo Molnar 已提交
99 100 101 102 103 104 105
	PERF_COUNT_SW_CPU_CLOCK			= 0,
	PERF_COUNT_SW_TASK_CLOCK		= 1,
	PERF_COUNT_SW_PAGE_FAULTS		= 2,
	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
106 107
	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
I
Ingo Molnar 已提交
108 109

	PERF_COUNT_SW_MAX,			/* non-ABI */
T
Thomas Gleixner 已提交
110 111
};

112
/*
113
 * Bits that can be set in attr.sample_type to request information
114 115
 * in the overflow packets.
 */
116
enum perf_event_sample_format {
I
Ingo Molnar 已提交
117 118 119 120
	PERF_SAMPLE_IP				= 1U << 0,
	PERF_SAMPLE_TID				= 1U << 1,
	PERF_SAMPLE_TIME			= 1U << 2,
	PERF_SAMPLE_ADDR			= 1U << 3,
121
	PERF_SAMPLE_READ			= 1U << 4,
I
Ingo Molnar 已提交
122 123 124 125
	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
	PERF_SAMPLE_ID				= 1U << 6,
	PERF_SAMPLE_CPU				= 1U << 7,
	PERF_SAMPLE_PERIOD			= 1U << 8,
126
	PERF_SAMPLE_STREAM_ID			= 1U << 9,
127
	PERF_SAMPLE_RAW				= 1U << 10,
128

129
	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
130 131
};

132
/*
133
 * The format of the data returned by read() on a perf event fd,
134 135 136
 * as specified by attr.read_format:
 *
 * struct read_format {
I
Ingo Molnar 已提交
137 138 139 140 141
 *	{ u64		value;
 *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
 *	  { u64		time_running; } && PERF_FORMAT_RUNNING
 *	  { u64		id;           } && PERF_FORMAT_ID
 *	} && !PERF_FORMAT_GROUP
142
 *
I
Ingo Molnar 已提交
143 144 145 146 147 148 149
 *	{ u64		nr;
 *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
 *	  { u64		time_running; } && PERF_FORMAT_RUNNING
 *	  { u64		value;
 *	    { u64	id;           } && PERF_FORMAT_ID
 *	  }		cntr[nr];
 *	} && PERF_FORMAT_GROUP
150
 * };
151
 */
152
enum perf_event_read_format {
I
Ingo Molnar 已提交
153 154 155
	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
	PERF_FORMAT_ID				= 1U << 2,
156
	PERF_FORMAT_GROUP			= 1U << 3,
157

I
Ingo Molnar 已提交
158
	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
159 160
};

161 162
#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */

I
Ingo Molnar 已提交
163
/*
164
 * Hardware event_id to monitor via a performance monitoring event:
I
Ingo Molnar 已提交
165
 */
166
struct perf_event_attr {
167

168
	/*
169 170 171
	 * Major type: hardware/software/tracepoint/etc.
	 */
	__u32			type;
172 173 174 175 176

	/*
	 * Size of the attr structure, for fwd/bwd compat.
	 */
	__u32			size;
177 178 179

	/*
	 * Type specific configuration information.
180 181
	 */
	__u64			config;
I
Ingo Molnar 已提交
182

183
	union {
184 185
		__u64		sample_period;
		__u64		sample_freq;
186 187
	};

188 189
	__u64			sample_type;
	__u64			read_format;
I
Ingo Molnar 已提交
190

191
	__u64			disabled       :  1, /* off by default        */
192 193 194 195 196 197
				inherit	       :  1, /* children inherit it   */
				pinned	       :  1, /* must always be on PMU */
				exclusive      :  1, /* only group on PMU     */
				exclude_user   :  1, /* don't count user      */
				exclude_kernel :  1, /* ditto kernel          */
				exclude_hv     :  1, /* ditto hypervisor      */
198
				exclude_idle   :  1, /* don't count when idle */
199
				mmap           :  1, /* include mmap data     */
200
				comm	       :  1, /* include comm data     */
201
				freq           :  1, /* use freq, not period  */
202
				inherit_stat   :  1, /* per task counts       */
203
				enable_on_exec :  1, /* next exec enables     */
P
Peter Zijlstra 已提交
204
				task           :  1, /* trace fork/exit       */
205
				watermark      :  1, /* wakeup_watermark      */
206

207
				__reserved_1   : 49;
208

209 210 211 212
	union {
		__u32		wakeup_events;	  /* wakeup every n events */
		__u32		wakeup_watermark; /* bytes before wakeup   */
	};
213

214
	__u32			bp_type;
215 216
	__u64			bp_addr;
	__u64			bp_len;
217 218
};

219
/*
220
 * Ioctls that can be done on a perf event fd:
221
 */
222
#define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
I
Ingo Molnar 已提交
223 224
#define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
#define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
225
#define PERF_EVENT_IOC_RESET		_IO ('$', 3)
226
#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
227
#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
L
Li Zefan 已提交
228
#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
229 230

enum perf_event_ioc_flags {
P
Peter Zijlstra 已提交
231 232
	PERF_IOC_FLAG_GROUP		= 1U << 0,
};
233

234 235 236
/*
 * Structure of the page that can be mapped via mmap
 */
237
struct perf_event_mmap_page {
238 239
	__u32	version;		/* version number of this structure */
	__u32	compat_version;		/* lowest version this is compat with */
240 241

	/*
242
	 * Bits needed to read the hw events in user-space.
243
	 *
244 245
	 *   u32 seq;
	 *   s64 count;
246
	 *
247 248
	 *   do {
	 *     seq = pc->lock;
249
	 *
250 251 252 253 254 255
	 *     barrier()
	 *     if (pc->index) {
	 *       count = pmc_read(pc->index - 1);
	 *       count += pc->offset;
	 *     } else
	 *       goto regular_read;
256
	 *
257 258
	 *     barrier();
	 *   } while (pc->lock != seq);
259
	 *
260 261
	 * NOTE: for obvious reason this only works on self-monitoring
	 *       processes.
262
	 */
263
	__u32	lock;			/* seqlock for synchronization */
264 265 266 267
	__u32	index;			/* hardware event identifier */
	__s64	offset;			/* add to hardware event value */
	__u64	time_enabled;		/* time event active */
	__u64	time_running;		/* time event on cpu */
268

269 270 271 272
		/*
		 * Hole for extension of the self monitor capabilities
		 */

273
	__u64	__reserved[123];	/* align to 1k */
274

275 276 277
	/*
	 * Control data for the mmap() data buffer.
	 *
278 279
	 * User-space reading the @data_head value should issue an rmb(), on
	 * SMP capable platforms, after reading this value -- see
280
	 * perf_event_wakeup().
281 282 283 284
	 *
	 * When the mapping is PROT_WRITE the @data_tail value should be
	 * written by userspace to reflect the last read data. In this case
	 * the kernel will not over-write unread data.
285
	 */
286
	__u64   data_head;		/* head in the data section */
287
	__u64	data_tail;		/* user-space written tail */
288 289
};

290
#define PERF_RECORD_MISC_CPUMODE_MASK		(3 << 0)
291
#define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
292 293 294
#define PERF_RECORD_MISC_KERNEL			(1 << 0)
#define PERF_RECORD_MISC_USER			(2 << 0)
#define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
295

P
Peter Zijlstra 已提交
296 297
struct perf_event_header {
	__u32	type;
298 299
	__u16	misc;
	__u16	size;
P
Peter Zijlstra 已提交
300 301 302
};

enum perf_event_type {
303

304 305 306 307 308
	/*
	 * The MMAP events record the PROT_EXEC mappings so that we can
	 * correlate userspace IPs to code. They have the following structure:
	 *
	 * struct {
I
Ingo Molnar 已提交
309
	 *	struct perf_event_header	header;
310
	 *
I
Ingo Molnar 已提交
311 312 313 314 315
	 *	u32				pid, tid;
	 *	u64				addr;
	 *	u64				len;
	 *	u64				pgoff;
	 *	char				filename[];
316 317
	 * };
	 */
318
	PERF_RECORD_MMAP			= 1,
319

320 321
	/*
	 * struct {
I
Ingo Molnar 已提交
322 323 324
	 *	struct perf_event_header	header;
	 *	u64				id;
	 *	u64				lost;
325 326
	 * };
	 */
327
	PERF_RECORD_LOST			= 2,
328

329 330
	/*
	 * struct {
I
Ingo Molnar 已提交
331
	 *	struct perf_event_header	header;
332
	 *
I
Ingo Molnar 已提交
333 334
	 *	u32				pid, tid;
	 *	char				comm[];
335 336
	 * };
	 */
337
	PERF_RECORD_COMM			= 3,
338

P
Peter Zijlstra 已提交
339 340 341 342 343
	/*
	 * struct {
	 *	struct perf_event_header	header;
	 *	u32				pid, ppid;
	 *	u32				tid, ptid;
344
	 *	u64				time;
P
Peter Zijlstra 已提交
345 346
	 * };
	 */
347
	PERF_RECORD_EXIT			= 4,
P
Peter Zijlstra 已提交
348

349 350
	/*
	 * struct {
I
Ingo Molnar 已提交
351 352
	 *	struct perf_event_header	header;
	 *	u64				time;
353
	 *	u64				id;
354
	 *	u64				stream_id;
355 356
	 * };
	 */
357 358
	PERF_RECORD_THROTTLE			= 5,
	PERF_RECORD_UNTHROTTLE			= 6,
359

P
Peter Zijlstra 已提交
360 361
	/*
	 * struct {
362 363
	 *	struct perf_event_header	header;
	 *	u32				pid, ppid;
P
Peter Zijlstra 已提交
364
	 *	u32				tid, ptid;
365
	 *	u64				time;
P
Peter Zijlstra 已提交
366 367
	 * };
	 */
368
	PERF_RECORD_FORK			= 7,
P
Peter Zijlstra 已提交
369

370 371
	/*
	 * struct {
372 373
	 *	struct perf_event_header	header;
	 *	u32				pid, tid;
374
	 *
375
	 *	struct read_format		values;
376 377
	 * };
	 */
378
	PERF_RECORD_READ			= 8,
379

380
	/*
381
	 * struct {
I
Ingo Molnar 已提交
382
	 *	struct perf_event_header	header;
383
	 *
384 385 386 387
	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
	 *	{ u64			time;     } && PERF_SAMPLE_TIME
	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
388
	 *	{ u64			id;	  } && PERF_SAMPLE_ID
389
	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
390
	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
I
Ingo Molnar 已提交
391
	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
392
	 *
393
	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
394
	 *
395
	 *	{ u64			nr,
396
	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
397
	 *
I
Ingo Molnar 已提交
398 399 400 401 402 403 404 405 406 407
	 *	#
	 *	# The RAW record below is opaque data wrt the ABI
	 *	#
	 *	# That is, the ABI doesn't make any promises wrt to
	 *	# the stability of its content, it may vary depending
	 *	# on event, hardware, kernel version and phase of
	 *	# the moon.
	 *	#
	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
	 *	#
408
	 *
409 410
	 *	{ u32			size;
	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
411
	 * };
412
	 */
413
	PERF_RECORD_SAMPLE			= 9,
414

415
	PERF_RECORD_MAX,			/* non-ABI */
P
Peter Zijlstra 已提交
416 417
};

418 419 420 421
enum perf_callchain_context {
	PERF_CONTEXT_HV			= (__u64)-32,
	PERF_CONTEXT_KERNEL		= (__u64)-128,
	PERF_CONTEXT_USER		= (__u64)-512,
422

423 424 425 426 427
	PERF_CONTEXT_GUEST		= (__u64)-2048,
	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,

	PERF_CONTEXT_MAX		= (__u64)-4095,
428 429
};

430 431 432
#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
#define PERF_FLAG_FD_OUTPUT	(1U << 1)

433
#ifdef __KERNEL__
I
Ingo Molnar 已提交
434
/*
435
 * Kernel-internal data types and definitions:
I
Ingo Molnar 已提交
436 437
 */

438 439
#ifdef CONFIG_PERF_EVENTS
# include <asm/perf_event.h>
440 441
#endif

442 443 444 445
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <asm/hw_breakpoint.h>
#endif

446 447 448 449 450
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
451
#include <linux/hrtimer.h>
P
Peter Zijlstra 已提交
452
#include <linux/fs.h>
453
#include <linux/pid_namespace.h>
454
#include <linux/workqueue.h>
455
#include <linux/ftrace.h>
456 457
#include <asm/atomic.h>

458 459 460 461 462 463 464
#define PERF_MAX_STACK_DEPTH		255

struct perf_callchain_entry {
	__u64				nr;
	__u64				ip[PERF_MAX_STACK_DEPTH];
};

465 466 467
struct perf_raw_record {
	u32				size;
	void				*data;
468 469
};

470 471
struct task_struct;

T
Thomas Gleixner 已提交
472
/**
473
 * struct hw_perf_event - performance event hardware details:
T
Thomas Gleixner 已提交
474
 */
475 476
struct hw_perf_event {
#ifdef CONFIG_PERF_EVENTS
477 478
	union {
		struct { /* hardware */
I
Ingo Molnar 已提交
479
			u64		config;
480
			u64		last_tag;
I
Ingo Molnar 已提交
481
			unsigned long	config_base;
482
			unsigned long	event_base;
I
Ingo Molnar 已提交
483
			int		idx;
484
			int		last_cpu;
485
		};
486 487
		struct { /* software */
			s64		remaining;
I
Ingo Molnar 已提交
488
			struct hrtimer	hrtimer;
489
		};
490
#ifdef CONFIG_HAVE_HW_BREAKPOINT
491 492
		/* breakpoint */
		struct arch_hw_breakpoint	info;
493
#endif
494
	};
495
	atomic64_t			prev_count;
496
	u64				sample_period;
497
	u64				last_period;
498
	atomic64_t			period_left;
499
	u64				interrupts;
500

501 502
	u64				freq_time_stamp;
	u64				freq_count_stamp;
503
#endif
T
Thomas Gleixner 已提交
504 505
};

506
struct perf_event;
I
Ingo Molnar 已提交
507 508

/**
509
 * struct pmu - generic performance monitoring unit
I
Ingo Molnar 已提交
510
 */
511
struct pmu {
512 513
	int (*enable)			(struct perf_event *event);
	void (*disable)			(struct perf_event *event);
514 515
	int (*start)			(struct perf_event *event);
	void (*stop)			(struct perf_event *event);
516 517
	void (*read)			(struct perf_event *event);
	void (*unthrottle)		(struct perf_event *event);
I
Ingo Molnar 已提交
518 519
};

520
/**
521
 * enum perf_event_active_state - the states of a event
522
 */
523
enum perf_event_active_state {
I
Ingo Molnar 已提交
524
	PERF_EVENT_STATE_ERROR		= -2,
525 526
	PERF_EVENT_STATE_OFF		= -1,
	PERF_EVENT_STATE_INACTIVE	=  0,
I
Ingo Molnar 已提交
527
	PERF_EVENT_STATE_ACTIVE		=  1,
528 529
};

530 531
struct file;

532 533
struct perf_mmap_data {
	struct rcu_head			rcu_head;
534 535 536 537
#ifdef CONFIG_PERF_USE_VMALLOC
	struct work_struct		work;
#endif
	int				data_order;
P
Peter Zijlstra 已提交
538
	int				nr_pages;	/* nr of data pages  */
539
	int				writable;	/* are we writable   */
540
	int				nr_locked;	/* nr pages mlocked  */
P
Peter Zijlstra 已提交
541

542
	atomic_t			poll;		/* POLL_ for wakeups */
543
	atomic_t			events;		/* event_id limit       */
P
Peter Zijlstra 已提交
544

545 546 547
	atomic_long_t			head;		/* write position    */
	atomic_long_t			done_head;	/* completed head    */

548
	atomic_t			lock;		/* concurrent writes */
549
	atomic_t			wakeup;		/* needs a wakeup    */
550
	atomic_t			lost;		/* nr records lost   */
551

552 553
	long				watermark;	/* wakeup watermark  */

I
Ingo Molnar 已提交
554
	struct perf_event_mmap_page	*user_page;
I
Ingo Molnar 已提交
555
	void				*data_pages[0];
556 557
};

558 559 560
struct perf_pending_entry {
	struct perf_pending_entry *next;
	void (*func)(struct perf_pending_entry *);
561 562
};

563 564
struct perf_sample_data;

565 566 567 568
typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
					struct perf_sample_data *,
					struct pt_regs *regs);

569 570 571 572
enum perf_group_flag {
	PERF_GROUP_SOFTWARE = 0x1,
};

T
Thomas Gleixner 已提交
573
/**
574
 * struct perf_event - performance event kernel representation:
T
Thomas Gleixner 已提交
575
 */
576 577
struct perf_event {
#ifdef CONFIG_PERF_EVENTS
578
	struct list_head		group_entry;
P
Peter Zijlstra 已提交
579
	struct list_head		event_entry;
580
	struct list_head		sibling_list;
I
Ingo Molnar 已提交
581
	int				nr_siblings;
582
	int				group_flags;
583 584
	struct perf_event		*group_leader;
	struct perf_event		*output;
585
	const struct pmu		*pmu;
586

587
	enum perf_event_active_state	state;
T
Thomas Gleixner 已提交
588
	atomic64_t			count;
589

590
	/*
591
	 * These are the total time in nanoseconds that the event
592
	 * has been enabled (i.e. eligible to run, and the task has
593
	 * been scheduled in, if this is a per-task event)
594 595 596
	 * and running (scheduled onto the CPU), respectively.
	 *
	 * They are computed from tstamp_enabled, tstamp_running and
597
	 * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
598 599 600 601 602 603
	 */
	u64				total_time_enabled;
	u64				total_time_running;

	/*
	 * These are timestamps used for computing total_time_enabled
604
	 * and total_time_running when the event is in INACTIVE or
605 606
	 * ACTIVE state, measured in nanoseconds from an arbitrary point
	 * in time.
607 608
	 * tstamp_enabled: the notional time when the event was enabled
	 * tstamp_running: the notional time when the event was scheduled on
609
	 * tstamp_stopped: in INACTIVE state, the notional time when the
610
	 *	event was scheduled off.
611 612 613 614 615
	 */
	u64				tstamp_enabled;
	u64				tstamp_running;
	u64				tstamp_stopped;

616
	struct perf_event_attr		attr;
617
	struct hw_perf_event		hw;
T
Thomas Gleixner 已提交
618

619
	struct perf_event_context	*ctx;
620
	struct file			*filp;
T
Thomas Gleixner 已提交
621

622 623
	/*
	 * These accumulate total time (in nanoseconds) that children
624
	 * events have been enabled and running, respectively.
625 626 627 628
	 */
	atomic64_t			child_total_time_enabled;
	atomic64_t			child_total_time_running;

T
Thomas Gleixner 已提交
629
	/*
630
	 * Protect attach/detach and child_list:
T
Thomas Gleixner 已提交
631
	 */
632 633
	struct mutex			child_mutex;
	struct list_head		child_list;
634
	struct perf_event		*parent;
T
Thomas Gleixner 已提交
635 636 637 638

	int				oncpu;
	int				cpu;

639 640 641
	struct list_head		owner_entry;
	struct task_struct		*owner;

642 643 644 645
	/* mmap bits */
	struct mutex			mmap_mutex;
	atomic_t			mmap_count;
	struct perf_mmap_data		*data;
646

647
	/* poll related */
T
Thomas Gleixner 已提交
648
	wait_queue_head_t		waitq;
P
Peter Zijlstra 已提交
649
	struct fasync_struct		*fasync;
650 651 652

	/* delayed work for NMIs and such */
	int				pending_wakeup;
653
	int				pending_kill;
654
	int				pending_disable;
655
	struct perf_pending_entry	pending;
P
Peter Zijlstra 已提交
656

657 658
	atomic_t			event_limit;

659
	void (*destroy)(struct perf_event *);
P
Peter Zijlstra 已提交
660
	struct rcu_head			rcu_head;
661 662

	struct pid_namespace		*ns;
663
	u64				id;
L
Li Zefan 已提交
664

665
	perf_overflow_handler_t		overflow_handler;
666

667
#ifdef CONFIG_EVENT_TRACING
L
Li Zefan 已提交
668
	struct event_filter		*filter;
669
#endif
L
Li Zefan 已提交
670 671

#endif /* CONFIG_PERF_EVENTS */
T
Thomas Gleixner 已提交
672 673 674
};

/**
675
 * struct perf_event_context - event context structure
T
Thomas Gleixner 已提交
676
 *
677
 * Used as a container for task events and CPU events as well:
T
Thomas Gleixner 已提交
678
 */
679
struct perf_event_context {
T
Thomas Gleixner 已提交
680
	/*
681
	 * Protect the states of the events in the list,
682
	 * nr_active, and the list:
T
Thomas Gleixner 已提交
683
	 */
684
	raw_spinlock_t			lock;
685
	/*
686
	 * Protect the list of events.  Locking either mutex or lock
687 688 689
	 * is sufficient to ensure the list doesn't change; to change
	 * the list you need to lock both the mutex and the spinlock.
	 */
I
Ingo Molnar 已提交
690
	struct mutex			mutex;
691

692 693
	struct list_head		pinned_groups;
	struct list_head		flexible_groups;
I
Ingo Molnar 已提交
694
	struct list_head		event_list;
695
	int				nr_events;
I
Ingo Molnar 已提交
696 697
	int				nr_active;
	int				is_active;
698
	int				nr_stat;
I
Ingo Molnar 已提交
699 700
	atomic_t			refcount;
	struct task_struct		*task;
701 702

	/*
703
	 * Context clock, runs when context enabled.
704
	 */
I
Ingo Molnar 已提交
705 706
	u64				time;
	u64				timestamp;
707 708 709 710 711

	/*
	 * These fields let us detect when two contexts have both
	 * been cloned (inherited) from a common ancestor.
	 */
712
	struct perf_event_context	*parent_ctx;
I
Ingo Molnar 已提交
713 714 715 716
	u64				parent_gen;
	u64				generation;
	int				pin_count;
	struct rcu_head			rcu_head;
T
Thomas Gleixner 已提交
717 718 719
};

/**
720
 * struct perf_event_cpu_context - per cpu event context structure
T
Thomas Gleixner 已提交
721 722
 */
struct perf_cpu_context {
723 724
	struct perf_event_context	ctx;
	struct perf_event_context	*task_ctx;
T
Thomas Gleixner 已提交
725 726
	int				active_oncpu;
	int				max_pertask;
727
	int				exclusive;
P
Peter Zijlstra 已提交
728 729 730 731 732 733

	/*
	 * Recursion avoidance:
	 *
	 * task, softirq, irq, nmi context
	 */
734
	int				recursion[4];
T
Thomas Gleixner 已提交
735 736
};

737
struct perf_output_handle {
I
Ingo Molnar 已提交
738 739 740 741 742 743 744
	struct perf_event		*event;
	struct perf_mmap_data		*data;
	unsigned long			head;
	unsigned long			offset;
	int				nmi;
	int				sample;
	int				locked;
745 746
};

747
#ifdef CONFIG_PERF_EVENTS
748

T
Thomas Gleixner 已提交
749 750 751
/*
 * Set by architecture code:
 */
752
extern int perf_max_events;
T
Thomas Gleixner 已提交
753

754
extern const struct pmu *hw_perf_event_init(struct perf_event *event);
I
Ingo Molnar 已提交
755

756
extern void perf_event_task_sched_in(struct task_struct *task);
757
extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
758
extern void perf_event_task_tick(struct task_struct *task);
759 760 761 762 763 764
extern int perf_event_init_task(struct task_struct *child);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
extern void set_perf_event_pending(void);
extern void perf_event_do_pending(void);
extern void perf_event_print_debug(void);
765 766 767 768
extern void __perf_disable(void);
extern bool __perf_enable(void);
extern void perf_disable(void);
extern void perf_enable(void);
769 770 771
extern int perf_event_task_disable(void);
extern int perf_event_task_enable(void);
extern int hw_perf_group_sched_in(struct perf_event *group_leader,
772
	       struct perf_cpu_context *cpuctx,
773
	       struct perf_event_context *ctx);
774
extern void perf_event_update_userpage(struct perf_event *event);
775 776 777 778
extern int perf_event_release_kernel(struct perf_event *event);
extern struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr,
				int cpu,
779
				pid_t pid,
780
				perf_overflow_handler_t callback);
781 782
extern u64 perf_event_read_value(struct perf_event *event,
				 u64 *enabled, u64 *running);
783

784
struct perf_sample_data {
785 786 787 788 789 790 791 792
	u64				type;

	u64				ip;
	struct {
		u32	pid;
		u32	tid;
	}				tid_entry;
	u64				time;
I
Ingo Molnar 已提交
793
	u64				addr;
794 795 796 797 798 799
	u64				id;
	u64				stream_id;
	struct {
		u32	cpu;
		u32	reserved;
	}				cpu_entry;
I
Ingo Molnar 已提交
800
	u64				period;
801
	struct perf_callchain_entry	*callchain;
802
	struct perf_raw_record		*raw;
803 804
};

805 806 807 808 809 810 811
static inline
void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
{
	data->addr = addr;
	data->raw  = NULL;
}

812 813 814
extern void perf_output_sample(struct perf_output_handle *handle,
			       struct perf_event_header *header,
			       struct perf_sample_data *data,
815
			       struct perf_event *event);
816 817
extern void perf_prepare_sample(struct perf_event_header *header,
				struct perf_sample_data *data,
818
				struct perf_event *event,
819 820
				struct pt_regs *regs);

821
extern int perf_event_overflow(struct perf_event *event, int nmi,
822 823
				 struct perf_sample_data *data,
				 struct pt_regs *regs);
824

825
/*
826
 * Return 1 for a software event, 0 for a hardware event
827
 */
828
static inline int is_software_event(struct perf_event *event)
829
{
830 831 832 833 834 835 836 837
	switch (event->attr.type) {
	case PERF_TYPE_SOFTWARE:
	case PERF_TYPE_TRACEPOINT:
	/* for now the breakpoint stuff also works as software event */
	case PERF_TYPE_BREAKPOINT:
		return 1;
	}
	return 0;
838 839
}

840
extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
841

842
extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
843 844

static inline void
845
perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
846
{
847 848
	if (atomic_read(&perf_swevent_enabled[event_id]))
		__perf_sw_event(event_id, nr, nmi, regs, addr);
849
}
850

851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888
extern void
perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);

/*
 * Take a snapshot of the regs. Skip ip and frame pointer to
 * the nth caller. We only need a few of the regs:
 * - ip for PERF_SAMPLE_IP
 * - cs for user_mode() tests
 * - bp for callchains
 * - eflags, for future purposes, just in case
 */
static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
{
	unsigned long ip;

	memset(regs, 0, sizeof(*regs));

	switch (skip) {
	case 1 :
		ip = CALLER_ADDR0;
		break;
	case 2 :
		ip = CALLER_ADDR1;
		break;
	case 3 :
		ip = CALLER_ADDR2;
		break;
	case 4:
		ip = CALLER_ADDR3;
		break;
	/* No need to support further for now */
	default:
		ip = 0;
	}

	return perf_arch_fetch_caller_regs(regs, ip, skip);
}

889
extern void __perf_event_mmap(struct vm_area_struct *vma);
890

891
static inline void perf_event_mmap(struct vm_area_struct *vma)
892 893
{
	if (vma->vm_flags & VM_EXEC)
894
		__perf_event_mmap(vma);
895
}
896

897 898
extern void perf_event_comm(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);
899

900 901
extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);

902 903 904
extern int sysctl_perf_event_paranoid;
extern int sysctl_perf_event_mlock;
extern int sysctl_perf_event_sample_rate;
905

906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
static inline bool perf_paranoid_tracepoint_raw(void)
{
	return sysctl_perf_event_paranoid > -1;
}

static inline bool perf_paranoid_cpu(void)
{
	return sysctl_perf_event_paranoid > 0;
}

static inline bool perf_paranoid_kernel(void)
{
	return sysctl_perf_event_paranoid > 1;
}

921
extern void perf_event_init(void);
922 923
extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
			  int entry_size, struct pt_regs *regs);
924
extern void perf_bp_event(struct perf_event *event, void *data);
925

926
#ifndef perf_misc_flags
927 928
#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
				 PERF_RECORD_MISC_KERNEL)
929 930 931
#define perf_instruction_pointer(regs)	instruction_pointer(regs)
#endif

932
extern int perf_output_begin(struct perf_output_handle *handle,
933
			     struct perf_event *event, unsigned int size,
934 935 936 937
			     int nmi, int sample);
extern void perf_output_end(struct perf_output_handle *handle);
extern void perf_output_copy(struct perf_output_handle *handle,
			     const void *buf, unsigned int len);
938 939
extern int perf_swevent_get_recursion_context(void);
extern void perf_swevent_put_recursion_context(int rctx);
940 941
extern void perf_event_enable(struct perf_event *event);
extern void perf_event_disable(struct perf_event *event);
T
Thomas Gleixner 已提交
942 943
#else
static inline void
944
perf_event_task_sched_in(struct task_struct *task)			{ }
T
Thomas Gleixner 已提交
945
static inline void
946
perf_event_task_sched_out(struct task_struct *task,
947
			    struct task_struct *next)			{ }
T
Thomas Gleixner 已提交
948
static inline void
949
perf_event_task_tick(struct task_struct *task)				{ }
950 951 952
static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
static inline void perf_event_exit_task(struct task_struct *child)	{ }
static inline void perf_event_free_task(struct task_struct *task)	{ }
I
Ingo Molnar 已提交
953 954
static inline void perf_event_do_pending(void)				{ }
static inline void perf_event_print_debug(void)				{ }
955 956
static inline void perf_disable(void)					{ }
static inline void perf_enable(void)					{ }
I
Ingo Molnar 已提交
957 958
static inline int perf_event_task_disable(void)				{ return -EINVAL; }
static inline int perf_event_task_enable(void)				{ return -EINVAL; }
959

960
static inline void
961
perf_sw_event(u32 event_id, u64 nr, int nmi,
962
		     struct pt_regs *regs, u64 addr)			{ }
963
static inline void
964
perf_bp_event(struct perf_event *event, void *data)			{ }
965

I
Ingo Molnar 已提交
966
static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
967 968 969
static inline void perf_event_comm(struct task_struct *tsk)		{ }
static inline void perf_event_fork(struct task_struct *tsk)		{ }
static inline void perf_event_init(void)				{ }
970
static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
971
static inline void perf_swevent_put_recursion_context(int rctx)		{ }
972 973
static inline void perf_event_enable(struct perf_event *event)		{ }
static inline void perf_event_disable(struct perf_event *event)		{ }
T
Thomas Gleixner 已提交
974 975
#endif

976 977 978
#define perf_output_put(handle, x) \
	perf_output_copy((handle), &(x), sizeof(x))

979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994
/*
 * This has to have a higher priority than migration_notifier in sched.c.
 */
#define perf_cpu_notifier(fn)					\
do {								\
	static struct notifier_block fn##_nb __cpuinitdata =	\
		{ .notifier_call = fn, .priority = 20 };	\
	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,		\
		(void *)(unsigned long)smp_processor_id());	\
	fn(&fn##_nb, (unsigned long)CPU_STARTING,		\
		(void *)(unsigned long)smp_processor_id());	\
	fn(&fn##_nb, (unsigned long)CPU_ONLINE,			\
		(void *)(unsigned long)smp_processor_id());	\
	register_cpu_notifier(&fn##_nb);			\
} while (0)

995
#endif /* __KERNEL__ */
996
#endif /* _LINUX_PERF_EVENT_H */