ds.c 60.8 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/slab.h>
5

6
#include <asm/cpu_entry_area.h>
7
#include <asm/perf_event.h>
8
#include <asm/tlbflush.h>
9
#include <asm/insn.h>
10
#include <asm/io.h>
11

12
#include "../perf_event.h"
13

14 15 16
/* Waste a full page so it can be mapped into the cpu_entry_area */
DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);

17 18 19
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE		24

20
#define PEBS_FIXUP_SIZE		PAGE_SIZE
21 22 23 24 25 26 27 28 29 30 31 32

/*
 * pebs_record_32 for p4 and core not supported

struct pebs_record_32 {
	u32 flags, ip;
	u32 ax, bc, cx, dx;
	u32 si, di, bp, sp;
};

 */

33 34 35 36 37 38
union intel_x86_pebs_dse {
	u64 val;
	struct {
		unsigned int ld_dse:4;
		unsigned int ld_stlb_miss:1;
		unsigned int ld_locked:1;
39 40 41
		unsigned int ld_data_blk:1;
		unsigned int ld_addr_blk:1;
		unsigned int ld_reserved:24;
42 43 44 45 46 47 48 49
	};
	struct {
		unsigned int st_l1d_hit:1;
		unsigned int st_reserved1:3;
		unsigned int st_stlb_miss:1;
		unsigned int st_locked:1;
		unsigned int st_reserved2:26;
	};
50 51 52 53 54 55
	struct {
		unsigned int st_lat_dse:4;
		unsigned int st_lat_stlb_miss:1;
		unsigned int st_lat_locked:1;
		unsigned int ld_reserved3:26;
	};
56 57 58 59 60 61 62 63 64
};


/*
 * Map PEBS Load Latency Data Source encodings to generic
 * memory data source information
 */
#define P(a, b) PERF_MEM_S(a, b)
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
65 66
#define LEVEL(x) P(LVLNUM, x)
#define REM P(REMOTE, REMOTE)
67 68
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))

69 70
/* Version for Sandy Bridge and later */
static u64 pebs_data_source[] = {
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
	OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
	OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
	OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
	OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
87 88
};

89 90 91
/* Patch up minor differences in the bits */
void __init intel_pmu_pebs_data_source_nhm(void)
{
92 93 94 95 96 97 98 99 100 101 102 103 104 105
	pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
	pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
	pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
}

void __init intel_pmu_pebs_data_source_skl(bool pmem)
{
	u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);

	pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
	pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
	pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
	pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
	pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
106 107
}

108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
static u64 precise_store_data(u64 status)
{
	union intel_x86_pebs_dse dse;
	u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);

	dse.val = status;

	/*
	 * bit 4: TLB access
	 * 1 = stored missed 2nd level TLB
	 *
	 * so it either hit the walker or the OS
	 * otherwise hit 2nd level TLB
	 */
	if (dse.st_stlb_miss)
		val |= P(TLB, MISS);
	else
		val |= P(TLB, HIT);

	/*
	 * bit 0: hit L1 data cache
	 * if not set, then all we know is that
	 * it missed L1D
	 */
	if (dse.st_l1d_hit)
		val |= P(LVL, HIT);
	else
		val |= P(LVL, MISS);

	/*
	 * bit 5: Locked prefix
	 */
	if (dse.st_locked)
		val |= P(LOCK, LOCKED);

	return val;
}

146
static u64 precise_datala_hsw(struct perf_event *event, u64 status)
147 148 149
{
	union perf_mem_data_src dse;

150 151 152 153 154 155
	dse.val = PERF_MEM_NA;

	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
		dse.mem_op = PERF_MEM_OP_STORE;
	else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
		dse.mem_op = PERF_MEM_OP_LOAD;
156 157 158 159 160 161 162 163 164

	/*
	 * L1 info only valid for following events:
	 *
	 * MEM_UOPS_RETIRED.STLB_MISS_STORES
	 * MEM_UOPS_RETIRED.LOCK_STORES
	 * MEM_UOPS_RETIRED.SPLIT_STORES
	 * MEM_UOPS_RETIRED.ALL_STORES
	 */
165 166 167 168 169 170
	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
		if (status & 1)
			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
		else
			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
	}
171 172 173
	return dse.val;
}

174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
static u64 load_latency_data(u64 status)
{
	union intel_x86_pebs_dse dse;
	u64 val;

	dse.val = status;

	/*
	 * use the mapping table for bit 0-3
	 */
	val = pebs_data_source[dse.ld_dse];

	/*
	 * Nehalem models do not support TLB, Lock infos
	 */
189
	if (x86_pmu.pebs_no_tlb) {
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
		val |= P(TLB, NA) | P(LOCK, NA);
		return val;
	}
	/*
	 * bit 4: TLB access
	 * 0 = did not miss 2nd level TLB
	 * 1 = missed 2nd level TLB
	 */
	if (dse.ld_stlb_miss)
		val |= P(TLB, MISS) | P(TLB, L2);
	else
		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);

	/*
	 * bit 5: locked prefix
	 */
	if (dse.ld_locked)
		val |= P(LOCK, LOCKED);

209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
	/*
	 * Ice Lake and earlier models do not support block infos.
	 */
	if (!x86_pmu.pebs_block) {
		val |= P(BLK, NA);
		return val;
	}
	/*
	 * bit 6: load was blocked since its data could not be forwarded
	 *        from a preceding store
	 */
	if (dse.ld_data_blk)
		val |= P(BLK, DATA);

	/*
	 * bit 7: load was blocked due to potential address conflict with
	 *        a preceding store
	 */
	if (dse.ld_addr_blk)
		val |= P(BLK, ADDR);

	if (!dse.ld_data_blk && !dse.ld_addr_blk)
		val |= P(BLK, NA);

	return val;
}

static u64 store_latency_data(u64 status)
{
	union intel_x86_pebs_dse dse;
	u64 val;

	dse.val = status;

	/*
	 * use the mapping table for bit 0-3
	 */
	val = pebs_data_source[dse.st_lat_dse];

	/*
	 * bit 4: TLB access
	 * 0 = did not miss 2nd level TLB
	 * 1 = missed 2nd level TLB
	 */
	if (dse.st_lat_stlb_miss)
		val |= P(TLB, MISS) | P(TLB, L2);
	else
		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);

	/*
	 * bit 5: locked prefix
	 */
	if (dse.st_lat_locked)
		val |= P(LOCK, LOCKED);

	val |= P(BLK, NA);

266 267 268
	return val;
}

269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
struct pebs_record_core {
	u64 flags, ip;
	u64 ax, bx, cx, dx;
	u64 si, di, bp, sp;
	u64 r8,  r9,  r10, r11;
	u64 r12, r13, r14, r15;
};

struct pebs_record_nhm {
	u64 flags, ip;
	u64 ax, bx, cx, dx;
	u64 si, di, bp, sp;
	u64 r8,  r9,  r10, r11;
	u64 r12, r13, r14, r15;
	u64 status, dla, dse, lat;
};

286 287 288 289
/*
 * Same as pebs_record_nhm, with two additional fields.
 */
struct pebs_record_hsw {
290 291 292 293 294 295
	u64 flags, ip;
	u64 ax, bx, cx, dx;
	u64 si, di, bp, sp;
	u64 r8,  r9,  r10, r11;
	u64 r12, r13, r14, r15;
	u64 status, dla, dse, lat;
296
	u64 real_ip, tsx_tuning;
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
};

union hsw_tsx_tuning {
	struct {
		u32 cycles_last_block     : 32,
		    hle_abort		  : 1,
		    rtm_abort		  : 1,
		    instruction_abort     : 1,
		    non_instruction_abort : 1,
		    retry		  : 1,
		    data_conflict	  : 1,
		    capacity_writes	  : 1,
		    capacity_reads	  : 1;
	};
	u64	    value;
312 313
};

314 315
#define PEBS_HSW_TSX_FLAGS	0xff00000000ULL

316 317 318 319 320 321 322 323 324 325 326 327 328
/* Same as HSW, plus TSC */

struct pebs_record_skl {
	u64 flags, ip;
	u64 ax, bx, cx, dx;
	u64 si, di, bp, sp;
	u64 r8,  r9,  r10, r11;
	u64 r12, r13, r14, r15;
	u64 status, dla, dse, lat;
	u64 real_ip, tsx_tuning;
	u64 tsc;
};

329
void init_debug_store_on_cpu(int cpu)
330 331 332 333 334 335 336 337 338 339 340
{
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;

	if (!ds)
		return;

	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
		     (u32)((u64)(unsigned long)ds),
		     (u32)((u64)(unsigned long)ds >> 32));
}

341
void fini_debug_store_on_cpu(int cpu)
342 343 344 345 346 347 348
{
	if (!per_cpu(cpu_hw_events, cpu).ds)
		return;

	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}

349 350
static DEFINE_PER_CPU(void *, insn_buffer);

351
static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
352
{
353
	unsigned long start = (unsigned long)cea;
354 355 356 357
	phys_addr_t pa;
	size_t msz = 0;

	pa = virt_to_phys(addr);
358 359

	preempt_disable();
360 361
	for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
		cea_set_pte(cea, pa, prot);
362 363 364 365 366 367 368

	/*
	 * This is a cross-CPU update of the cpu_entry_area, we must shoot down
	 * all TLB entries for it.
	 */
	flush_tlb_kernel_range(start, start + size);
	preempt_enable();
369 370 371 372
}

static void ds_clear_cea(void *cea, size_t size)
{
373
	unsigned long start = (unsigned long)cea;
374 375
	size_t msz = 0;

376
	preempt_disable();
377 378
	for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
		cea_set_pte(cea, 0, PAGE_NONE);
379 380 381

	flush_tlb_kernel_range(start, start + size);
	preempt_enable();
382 383 384 385 386
}

static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
{
	unsigned int order = get_order(size);
387
	int node = cpu_to_node(cpu);
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
	struct page *page;

	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
	return page ? page_address(page) : NULL;
}

static void dsfree_pages(const void *buffer, size_t size)
{
	if (buffer)
		free_pages((unsigned long)buffer, get_order(size));
}

static int alloc_pebs_buffer(int cpu)
{
	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
	struct debug_store *ds = hwev->ds;
	size_t bsiz = x86_pmu.pebs_buffer_size;
	int max, node = cpu_to_node(cpu);
406
	void *buffer, *insn_buff, *cea;
407 408 409 410

	if (!x86_pmu.pebs)
		return 0;

411
	buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
412 413 414
	if (unlikely(!buffer))
		return -ENOMEM;

415 416 417 418 419
	/*
	 * HSW+ already provides us the eventing ip; no need to allocate this
	 * buffer then.
	 */
	if (x86_pmu.intel_cap.pebs_format < 2) {
420 421
		insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
		if (!insn_buff) {
422
			dsfree_pages(buffer, bsiz);
423 424
			return -ENOMEM;
		}
425
		per_cpu(insn_buffer, cpu) = insn_buff;
426
	}
427 428 429 430 431
	hwev->ds_pebs_vaddr = buffer;
	/* Update the cpu entry area mapping */
	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
	ds->pebs_buffer_base = (unsigned long) cea;
	ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
432
	ds->pebs_index = ds->pebs_buffer_base;
433 434
	max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
	ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
435 436 437
	return 0;
}

438 439
static void release_pebs_buffer(int cpu)
{
440 441
	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
	void *cea;
442

443
	if (!x86_pmu.pebs)
444 445
		return;

446 447 448
	kfree(per_cpu(insn_buffer, cpu));
	per_cpu(insn_buffer, cpu) = NULL;

449 450 451 452 453
	/* Clear the fixmap */
	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
	ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
	dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
	hwev->ds_pebs_vaddr = NULL;
454 455
}

456 457
static int alloc_bts_buffer(int cpu)
{
458 459 460 461
	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
	struct debug_store *ds = hwev->ds;
	void *buffer, *cea;
	int max;
462 463 464 465

	if (!x86_pmu.bts)
		return 0;

466
	buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
467 468
	if (unlikely(!buffer)) {
		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
469
		return -ENOMEM;
470
	}
471 472 473 474 475
	hwev->ds_bts_vaddr = buffer;
	/* Update the fixmap */
	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
	ds->bts_buffer_base = (unsigned long) cea;
	ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
476
	ds->bts_index = ds->bts_buffer_base;
477 478 479 480 481
	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
	ds->bts_absolute_maximum = ds->bts_buffer_base +
					max * BTS_RECORD_SIZE;
	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
					(max / 16) * BTS_RECORD_SIZE;
482 483 484
	return 0;
}

485 486
static void release_bts_buffer(int cpu)
{
487 488
	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
	void *cea;
489

490
	if (!x86_pmu.bts)
491 492
		return;

493 494 495 496 497
	/* Clear the fixmap */
	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
	ds_clear_cea(cea, BTS_BUFFER_SIZE);
	dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
	hwev->ds_bts_vaddr = NULL;
498 499
}

500 501
static int alloc_ds_buffer(int cpu)
{
502
	struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
503

504
	memset(ds, 0, sizeof(*ds));
505 506 507 508 509 510 511 512 513
	per_cpu(cpu_hw_events, cpu).ds = ds;
	return 0;
}

static void release_ds_buffer(int cpu)
{
	per_cpu(cpu_hw_events, cpu).ds = NULL;
}

514
void release_ds_buffers(void)
515 516 517 518 519 520
{
	int cpu;

	if (!x86_pmu.bts && !x86_pmu.pebs)
		return;

521 522 523 524 525 526 527 528 529
	for_each_possible_cpu(cpu)
		release_ds_buffer(cpu);

	for_each_possible_cpu(cpu) {
		/*
		 * Again, ignore errors from offline CPUs, they will no longer
		 * observe cpu_hw_events.ds and not program the DS_AREA when
		 * they come up.
		 */
530
		fini_debug_store_on_cpu(cpu);
531
	}
532 533

	for_each_possible_cpu(cpu) {
534 535
		release_pebs_buffer(cpu);
		release_bts_buffer(cpu);
536 537 538
	}
}

539
void reserve_ds_buffers(void)
540
{
541 542 543 544 545
	int bts_err = 0, pebs_err = 0;
	int cpu;

	x86_pmu.bts_active = 0;
	x86_pmu.pebs_active = 0;
546 547

	if (!x86_pmu.bts && !x86_pmu.pebs)
548
		return;
549

550 551 552 553 554 555
	if (!x86_pmu.bts)
		bts_err = 1;

	if (!x86_pmu.pebs)
		pebs_err = 1;

556
	for_each_possible_cpu(cpu) {
557 558 559 560
		if (alloc_ds_buffer(cpu)) {
			bts_err = 1;
			pebs_err = 1;
		}
561

562 563 564 565 566
		if (!bts_err && alloc_bts_buffer(cpu))
			bts_err = 1;

		if (!pebs_err && alloc_pebs_buffer(cpu))
			pebs_err = 1;
567

568
		if (bts_err && pebs_err)
569
			break;
570 571 572 573 574 575
	}

	if (bts_err) {
		for_each_possible_cpu(cpu)
			release_bts_buffer(cpu);
	}
576

577 578 579
	if (pebs_err) {
		for_each_possible_cpu(cpu)
			release_pebs_buffer(cpu);
580 581
	}

582 583 584 585 586 587 588 589 590 591
	if (bts_err && pebs_err) {
		for_each_possible_cpu(cpu)
			release_ds_buffer(cpu);
	} else {
		if (x86_pmu.bts && !bts_err)
			x86_pmu.bts_active = 1;

		if (x86_pmu.pebs && !pebs_err)
			x86_pmu.pebs_active = 1;

592 593 594 595 596
		for_each_possible_cpu(cpu) {
			/*
			 * Ignores wrmsr_on_cpu() errors for offline CPUs they
			 * will get this call through intel_pmu_cpu_starting().
			 */
597
			init_debug_store_on_cpu(cpu);
598
		}
599 600 601 602 603 604 605
	}
}

/*
 * BTS
 */

606
struct event_constraint bts_constraint =
607
	EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
608

609
void intel_pmu_enable_bts(u64 config)
610 611 612 613 614
{
	unsigned long debugctlmsr;

	debugctlmsr = get_debugctlmsr();

615 616
	debugctlmsr |= DEBUGCTLMSR_TR;
	debugctlmsr |= DEBUGCTLMSR_BTS;
617 618
	if (config & ARCH_PERFMON_EVENTSEL_INT)
		debugctlmsr |= DEBUGCTLMSR_BTINT;
619 620

	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
621
		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
622 623

	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
624
		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
625 626 627 628

	update_debugctlmsr(debugctlmsr);
}

629
void intel_pmu_disable_bts(void)
630
{
631
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
632 633 634 635 636 637 638 639
	unsigned long debugctlmsr;

	if (!cpuc->ds)
		return;

	debugctlmsr = get_debugctlmsr();

	debugctlmsr &=
640 641
		~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
		  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
642 643 644 645

	update_debugctlmsr(debugctlmsr);
}

646
int intel_pmu_drain_bts_buffer(void)
647
{
648
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
649 650 651 652 653 654
	struct debug_store *ds = cpuc->ds;
	struct bts_record {
		u64	from;
		u64	to;
		u64	flags;
	};
655
	struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
656
	struct bts_record *at, *base, *top;
657 658 659
	struct perf_output_handle handle;
	struct perf_event_header header;
	struct perf_sample_data data;
660
	unsigned long skip = 0;
661 662 663
	struct pt_regs regs;

	if (!event)
664
		return 0;
665

666
	if (!x86_pmu.bts_active)
667
		return 0;
668

669 670
	base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
	top  = (struct bts_record *)(unsigned long)ds->bts_index;
671

672
	if (top <= base)
673
		return 0;
674

675 676
	memset(&regs, 0, sizeof(regs));

677 678
	ds->bts_index = ds->bts_buffer_base;

679
	perf_sample_data_init(&data, 0, event->hw.last_period);
680

681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
	/*
	 * BTS leaks kernel addresses in branches across the cpl boundary,
	 * such as traps or system calls, so unless the user is asking for
	 * kernel tracing (and right now it's not possible), we'd need to
	 * filter them out. But first we need to count how many of those we
	 * have in the current batch. This is an extra O(n) pass, however,
	 * it's much faster than the other one especially considering that
	 * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
	 * alloc_bts_buffer()).
	 */
	for (at = base; at < top; at++) {
		/*
		 * Note that right now *this* BTS code only works if
		 * attr::exclude_kernel is set, but let's keep this extra
		 * check here in case that changes.
		 */
		if (event->attr.exclude_kernel &&
		    (kernel_ip(at->from) || kernel_ip(at->to)))
			skip++;
	}

702 703 704 705 706
	/*
	 * Prepare a generic sample, i.e. fill in the invariant fields.
	 * We will overwrite the from and to address before we output
	 * the sample.
	 */
P
Peter Zijlstra 已提交
707
	rcu_read_lock();
708 709
	perf_prepare_sample(&header, &data, event, &regs);

710 711
	if (perf_output_begin(&handle, &data, event,
			      header.size * (top - base - skip)))
P
Peter Zijlstra 已提交
712
		goto unlock;
713

714 715 716 717 718 719
	for (at = base; at < top; at++) {
		/* Filter out any records that contain kernel addresses. */
		if (event->attr.exclude_kernel &&
		    (kernel_ip(at->from) || kernel_ip(at->to)))
			continue;

720 721 722 723 724 725 726 727 728 729 730
		data.ip		= at->from;
		data.addr	= at->to;

		perf_output_sample(&handle, &header, &data, event);
	}

	perf_output_end(&handle);

	/* There's new data available. */
	event->hw.interrupts++;
	event->pending_kill = POLL_IN;
P
Peter Zijlstra 已提交
731 732
unlock:
	rcu_read_unlock();
733
	return 1;
734 735
}

736 737
static inline void intel_pmu_drain_pebs_buffer(void)
{
738 739 740
	struct perf_sample_data data;

	x86_pmu.drain_pebs(NULL, &data);
741 742
}

743 744 745
/*
 * PEBS
 */
746
struct event_constraint intel_core2_pebs_event_constraints[] = {
747 748 749 750 751
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
752
	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
753
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
754 755 756
	EVENT_CONSTRAINT_END
};

757
struct event_constraint intel_atom_pebs_event_constraints[] = {
758 759 760
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
761
	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
762
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
763 764
	/* Allow all events as PEBS with no flags */
	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
765 766 767
	EVENT_CONSTRAINT_END
};

768
struct event_constraint intel_slm_pebs_event_constraints[] = {
769
	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
770
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
771 772
	/* Allow all events as PEBS with no flags */
	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
773 774 775
	EVENT_CONSTRAINT_END
};

776 777 778 779 780 781
struct event_constraint intel_glm_pebs_event_constraints[] = {
	/* Allow all events as PEBS with no flags */
	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
	EVENT_CONSTRAINT_END
};

782
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
783
	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
784 785 786
	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
787
	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
788 789 790 791 792 793
	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
794
	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
795
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
796 797 798
	EVENT_CONSTRAINT_END
};

799
struct event_constraint intel_westmere_pebs_event_constraints[] = {
800
	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
801 802 803
	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
804
	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
805 806 807 808 809 810
	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
811
	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
812
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
813 814 815
	EVENT_CONSTRAINT_END
};

816
struct event_constraint intel_snb_pebs_event_constraints[] = {
817
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
818
	INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
819
	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
820
	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
821
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
822 823 824 825
        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
826 827
	/* Allow all events as PEBS with no flags */
	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
828 829 830
	EVENT_CONSTRAINT_END
};

831
struct event_constraint intel_ivb_pebs_event_constraints[] = {
832
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
833
        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
834
	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
835
	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
836
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
837
	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
838
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
839 840 841 842
	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
843 844
	/* Allow all events as PEBS with no flags */
	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
845 846 847
        EVENT_CONSTRAINT_END
};

848
struct event_constraint intel_hsw_pebs_event_constraints[] = {
849
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
850 851
	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
852
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
853
	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
854
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
855
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
856 857 858 859 860 861 862 863 864 865
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
866 867 868 869 870
	/* Allow all events as PEBS with no flags */
	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
	EVENT_CONSTRAINT_END
};

871 872 873 874
struct event_constraint intel_bdw_pebs_event_constraints[] = {
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
875
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
876
	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
877
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
	/* Allow all events as PEBS with no flags */
	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
	EVENT_CONSTRAINT_END
};


895 896
struct event_constraint intel_skl_pebs_event_constraints[] = {
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
897
	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
898
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
899
	/* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
900
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
901 902 903 904 905 906 907 908 909 910 911 912
	INTEL_PLD_CONSTRAINT(0x1cd, 0xf),		      /* MEM_TRANS_RETIRED.* */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
913 914
	/* Allow all events as PEBS with no flags */
	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
915 916 917
	EVENT_CONSTRAINT_END
};

K
Kan Liang 已提交
918 919
struct event_constraint intel_icl_pebs_event_constraints[] = {
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
920
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),	/* SLOTS */
K
Kan Liang 已提交
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937

	INTEL_PLD_CONSTRAINT(0x1cd, 0xff),			/* MEM_TRANS_RETIRED.LOAD_LATENCY */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),	/* MEM_INST_RETIRED.LOAD */
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),	/* MEM_INST_RETIRED.STORE */

	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */

	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),		/* MEM_INST_RETIRED.* */

	/*
	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
	 * need the full constraints from the main table.
	 */

	EVENT_CONSTRAINT_END
};

938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
struct event_constraint intel_spr_pebs_event_constraints[] = {
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),

	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
	INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
	INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),

	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),

	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),

	/*
	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
	 * need the full constraints from the main table.
	 */

	EVENT_CONSTRAINT_END
};

960
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
961 962 963
{
	struct event_constraint *c;

P
Peter Zijlstra 已提交
964
	if (!event->attr.precise_ip)
965 966 967 968
		return NULL;

	if (x86_pmu.pebs_constraints) {
		for_each_event_constraint(c, x86_pmu.pebs_constraints) {
969
			if (constraint_match(c, event->hw.config)) {
970
				event->hw.flags |= c->flags;
971
				return c;
972
			}
973 974 975
		}
	}

976 977 978 979 980 981 982
	/*
	 * Extended PEBS support
	 * Makes the PEBS code search the normal constraints.
	 */
	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
		return NULL;

983 984 985
	return &emptyconstraint;
}

986 987 988 989 990 991 992
/*
 * We need the sched_task callback even for per-cpu events when we use
 * the large interrupt threshold, such that we can provide PID and TID
 * to PEBS samples.
 */
static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
{
993 994 995
	if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
		return false;

996 997 998
	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
}

999 1000 1001 1002 1003 1004 1005 1006
void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

	if (!sched_in && pebs_needs_sched_cb(cpuc))
		intel_pmu_drain_pebs_buffer();
}

1007 1008 1009 1010
static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
{
	struct debug_store *ds = cpuc->ds;
	u64 threshold;
1011 1012
	int reserved;

1013 1014 1015
	if (cpuc->n_pebs_via_pt)
		return;

1016 1017 1018 1019
	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
		reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
	else
		reserved = x86_pmu.max_pebs_events;
1020 1021 1022

	if (cpuc->n_pebs == cpuc->n_large_pebs) {
		threshold = ds->pebs_absolute_maximum -
1023
			reserved * cpuc->pebs_record_size;
1024
	} else {
1025
		threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
1026 1027 1028 1029 1030
	}

	ds->pebs_interrupt_threshold = threshold;
}

1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
static void adaptive_pebs_record_size_update(void)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	u64 pebs_data_cfg = cpuc->pebs_data_cfg;
	int sz = sizeof(struct pebs_basic);

	if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
		sz += sizeof(struct pebs_meminfo);
	if (pebs_data_cfg & PEBS_DATACFG_GP)
		sz += sizeof(struct pebs_gprs);
	if (pebs_data_cfg & PEBS_DATACFG_XMMS)
		sz += sizeof(struct pebs_xmm);
	if (pebs_data_cfg & PEBS_DATACFG_LBRS)
1044
		sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1045 1046 1047 1048 1049

	cpuc->pebs_record_size = sz;
}

#define PERF_PEBS_MEMINFO_TYPE	(PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
1050 1051
				PERF_SAMPLE_PHYS_ADDR |			     \
				PERF_SAMPLE_WEIGHT_TYPE |		     \
1052 1053
				PERF_SAMPLE_TRANSACTION |		     \
				PERF_SAMPLE_DATA_PAGE_SIZE)
1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077

static u64 pebs_update_adaptive_cfg(struct perf_event *event)
{
	struct perf_event_attr *attr = &event->attr;
	u64 sample_type = attr->sample_type;
	u64 pebs_data_cfg = 0;
	bool gprs, tsx_weight;

	if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
	    attr->precise_ip > 1)
		return pebs_data_cfg;

	if (sample_type & PERF_PEBS_MEMINFO_TYPE)
		pebs_data_cfg |= PEBS_DATACFG_MEMINFO;

	/*
	 * We need GPRs when:
	 * + user requested them
	 * + precise_ip < 2 for the non event IP
	 * + For RTM TSX weight we need GPRs for the abort code.
	 */
	gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
	       (attr->sample_regs_intr & PEBS_GP_REGS);

1078
	tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
1079 1080 1081 1082 1083 1084 1085
		     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
		      x86_pmu.rtm_abort_event);

	if (gprs || (attr->precise_ip < 2) || tsx_weight)
		pebs_data_cfg |= PEBS_DATACFG_GP;

	if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
K
Kan Liang 已提交
1086
	    (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
		pebs_data_cfg |= PEBS_DATACFG_XMMS;

	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
		/*
		 * For now always log all LBRs. Could configure this
		 * later.
		 */
		pebs_data_cfg |= PEBS_DATACFG_LBRS |
			((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
	}

	return pebs_data_cfg;
}

1101
static void
1102 1103
pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
		  struct perf_event *event, bool add)
1104
{
1105
	struct pmu *pmu = event->ctx->pmu;
1106 1107 1108 1109 1110 1111 1112
	/*
	 * Make sure we get updated with the first PEBS
	 * event. It will trigger also during removal, but
	 * that does not hurt:
	 */
	bool update = cpuc->n_pebs == 1;

1113 1114 1115 1116 1117 1118
	if (needed_cb != pebs_needs_sched_cb(cpuc)) {
		if (!needed_cb)
			perf_sched_cb_inc(pmu);
		else
			perf_sched_cb_dec(pmu);

1119
		update = true;
1120
	}
1121

1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
	/*
	 * The PEBS record doesn't shrink on pmu::del(). Doing so would require
	 * iterating all remaining PEBS events to reconstruct the config.
	 */
	if (x86_pmu.intel_cap.pebs_baseline && add) {
		u64 pebs_data_cfg;

		/* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
		if (cpuc->n_pebs == 1) {
			cpuc->pebs_data_cfg = 0;
			cpuc->pebs_record_size = sizeof(struct pebs_basic);
		}

		pebs_data_cfg = pebs_update_adaptive_cfg(event);

		/* Update pebs_record_size if new event requires more data. */
		if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
			cpuc->pebs_data_cfg |= pebs_data_cfg;
			adaptive_pebs_record_size_update();
			update = true;
		}
	}

1145 1146
	if (update)
		pebs_update_threshold(cpuc);
1147 1148
}

1149
void intel_pmu_pebs_add(struct perf_event *event)
1150
{
1151 1152 1153 1154 1155
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	bool needed_cb = pebs_needs_sched_cb(cpuc);

	cpuc->n_pebs++;
1156
	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1157
		cpuc->n_large_pebs++;
1158 1159
	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
		cpuc->n_pebs_via_pt++;
1160

1161
	pebs_update_state(needed_cb, cpuc, event, true);
1162 1163
}

1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

	if (!is_pebs_pt(event))
		return;

	if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
		cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
}

static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	struct debug_store *ds = cpuc->ds;

	if (!is_pebs_pt(event))
		return;

	if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
		cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;

	cpuc->pebs_enabled |= PEBS_OUTPUT_PT;

	wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
}

1192
void intel_pmu_pebs_enable(struct perf_event *event)
1193
{
1194
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1195
	struct hw_perf_event *hwc = &event->hw;
1196
	struct debug_store *ds = cpuc->ds;
1197

1198 1199
	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;

1200
	cpuc->pebs_enabled |= 1ULL << hwc->idx;
1201

K
Kan Liang 已提交
1202
	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
1203
		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
1204 1205
	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
		cpuc->pebs_enabled |= 1ULL << 63;
1206

1207 1208 1209 1210 1211 1212 1213 1214
	if (x86_pmu.intel_cap.pebs_baseline) {
		hwc->config |= ICL_EVENTSEL_ADAPTIVE;
		if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
			wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
			cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
		}
	}

1215
	/*
1216 1217
	 * Use auto-reload if possible to save a MSR write in the PMI.
	 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
1218
	 */
1219
	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1220 1221 1222 1223 1224
		unsigned int idx = hwc->idx;

		if (idx >= INTEL_PMC_IDX_FIXED)
			idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
		ds->pebs_event_reset[idx] =
1225
			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
1226 1227
	} else {
		ds->pebs_event_reset[hwc->idx] = 0;
1228
	}
1229 1230

	intel_pmu_pebs_via_pt_enable(event);
1231 1232
}

1233
void intel_pmu_pebs_del(struct perf_event *event)
1234 1235 1236 1237 1238 1239
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	bool needed_cb = pebs_needs_sched_cb(cpuc);

	cpuc->n_pebs--;
1240
	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1241
		cpuc->n_large_pebs--;
1242 1243
	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
		cpuc->n_pebs_via_pt--;
1244

1245
	pebs_update_state(needed_cb, cpuc, event, false);
1246 1247
}

1248
void intel_pmu_pebs_disable(struct perf_event *event)
1249
{
1250
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1251
	struct hw_perf_event *hwc = &event->hw;
1252

1253 1254
	if (cpuc->n_pebs == cpuc->n_large_pebs &&
	    cpuc->n_pebs != cpuc->n_pebs_via_pt)
1255
		intel_pmu_drain_pebs_buffer();
1256

1257
	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1258

K
Kan Liang 已提交
1259 1260
	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
	    (x86_pmu.version < 5))
1261
		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
1262
	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1263 1264
		cpuc->pebs_enabled &= ~(1ULL << 63);

1265 1266
	intel_pmu_pebs_via_pt_disable(event);

1267
	if (cpuc->enabled)
1268
		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1269 1270 1271 1272

	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
}

1273
void intel_pmu_pebs_enable_all(void)
1274
{
1275
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1276 1277 1278 1279 1280

	if (cpuc->pebs_enabled)
		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
}

1281
void intel_pmu_pebs_disable_all(void)
1282
{
1283
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1284 1285 1286 1287 1288

	if (cpuc->pebs_enabled)
		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}

1289 1290
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
{
1291
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1292 1293 1294
	unsigned long from = cpuc->lbr_entries[0].from;
	unsigned long old_to, to = cpuc->lbr_entries[0].to;
	unsigned long ip = regs->ip;
1295
	int is_64bit = 0;
1296
	void *kaddr;
1297
	int size;
1298

1299 1300 1301 1302 1303 1304
	/*
	 * We don't need to fixup if the PEBS assist is fault like
	 */
	if (!x86_pmu.intel_cap.pebs_trap)
		return 1;

P
Peter Zijlstra 已提交
1305 1306 1307
	/*
	 * No LBR entry, no basic block, no rewinding
	 */
1308 1309 1310
	if (!cpuc->lbr_stack.nr || !from || !to)
		return 0;

P
Peter Zijlstra 已提交
1311 1312 1313 1314 1315 1316 1317 1318 1319 1320
	/*
	 * Basic blocks should never cross user/kernel boundaries
	 */
	if (kernel_ip(ip) != kernel_ip(to))
		return 0;

	/*
	 * unsigned math, either ip is before the start (impossible) or
	 * the basic block is larger than 1 page (sanity)
	 */
1321
	if ((ip - to) > PEBS_FIXUP_SIZE)
1322 1323 1324 1325 1326 1327
		return 0;

	/*
	 * We sampled a branch insn, rewind using the LBR stack
	 */
	if (ip == to) {
1328
		set_linear_ip(regs, from);
1329 1330 1331
		return 1;
	}

1332
	size = ip - to;
1333
	if (!kernel_ip(ip)) {
1334
		int bytes;
1335 1336
		u8 *buf = this_cpu_read(insn_buffer);

1337
		/* 'size' must fit our buffer, see above */
1338
		bytes = copy_from_user_nmi(buf, (void __user *)to, size);
1339
		if (bytes != 0)
1340 1341 1342 1343 1344 1345 1346
			return 0;

		kaddr = buf;
	} else {
		kaddr = (void *)to;
	}

1347 1348 1349 1350 1351
	do {
		struct insn insn;

		old_to = to;

1352
#ifdef CONFIG_X86_64
1353
		is_64bit = kernel_ip(to) || any_64bit_mode(regs);
1354
#endif
1355
		insn_init(&insn, kaddr, size, is_64bit);
1356
		insn_get_length(&insn);
1357 1358 1359 1360 1361 1362 1363 1364
		/*
		 * Make sure there was not a problem decoding the
		 * instruction and getting the length.  This is
		 * doubly important because we have an infinite
		 * loop if insn.length=0.
		 */
		if (!insn.length)
			break;
1365

1366
		to += insn.length;
1367
		kaddr += insn.length;
1368
		size -= insn.length;
1369 1370 1371
	} while (to < ip);

	if (to == ip) {
1372
		set_linear_ip(regs, old_to);
1373 1374 1375
		return 1;
	}

P
Peter Zijlstra 已提交
1376 1377 1378 1379
	/*
	 * Even though we decoded the basic block, the instruction stream
	 * never matched the given IP, either the TO or the IP got corrupted.
	 */
1380 1381 1382
	return 0;
}

1383
static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
1384
{
1385 1386
	if (tsx_tuning) {
		union hsw_tsx_tuning tsx = { .value = tsx_tuning };
1387 1388 1389 1390 1391
		return tsx.cycles_last_block;
	}
	return 0;
}

1392
static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
1393
{
1394
	u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
1395 1396

	/* For RTM XABORTs also log the abort code from AX */
1397 1398
	if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
		txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1399 1400 1401
	return txn;
}

1402 1403 1404 1405 1406 1407 1408
static inline u64 get_pebs_status(void *n)
{
	if (x86_pmu.intel_cap.pebs_format < 4)
		return ((struct pebs_record_nhm *)n)->status;
	return ((struct pebs_basic *)n)->applicable_counters;
}

1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421
#define PERF_X86_EVENT_PEBS_HSW_PREC \
		(PERF_X86_EVENT_PEBS_ST_HSW | \
		 PERF_X86_EVENT_PEBS_LD_HSW | \
		 PERF_X86_EVENT_PEBS_NA_HSW)

static u64 get_data_src(struct perf_event *event, u64 aux)
{
	u64 val = PERF_MEM_NA;
	int fl = event->hw.flags;
	bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);

	if (fl & PERF_X86_EVENT_PEBS_LDLAT)
		val = load_latency_data(aux);
1422 1423
	else if (fl & PERF_X86_EVENT_PEBS_STLAT)
		val = store_latency_data(aux);
1424 1425 1426 1427 1428 1429 1430
	else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
		val = precise_datala_hsw(event, aux);
	else if (fst)
		val = precise_store_data(aux);
	return val;
}

1431 1432 1433 1434
#define PERF_SAMPLE_ADDR_TYPE	(PERF_SAMPLE_ADDR |		\
				 PERF_SAMPLE_PHYS_ADDR |	\
				 PERF_SAMPLE_DATA_PAGE_SIZE)

1435
static void setup_pebs_fixed_sample_data(struct perf_event *event,
1436 1437 1438
				   struct pt_regs *iregs, void *__pebs,
				   struct perf_sample_data *data,
				   struct pt_regs *regs)
1439 1440
{
	/*
1441 1442
	 * We cast to the biggest pebs_record but are careful not to
	 * unconditionally access the 'extra' entries.
1443
	 */
1444
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1445
	struct pebs_record_skl *pebs = __pebs;
1446
	u64 sample_type;
1447
	int fll;
1448

1449 1450 1451
	if (pebs == NULL)
		return;

1452
	sample_type = event->attr.sample_type;
1453
	fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
1454

1455
	perf_sample_data_init(data, 0, event->hw.last_period);
1456

1457
	data->period = event->hw.last_period;
1458 1459

	/*
1460
	 * Use latency for weight (only avail with PEBS-LL)
1461
	 */
1462 1463
	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
		data->weight.full = pebs->lat;
1464 1465 1466 1467

	/*
	 * data.data_src encodes the data source
	 */
1468 1469
	if (sample_type & PERF_SAMPLE_DATA_SRC)
		data->data_src.val = get_data_src(event, pebs->dse);
1470

1471 1472 1473
	/*
	 * We must however always use iregs for the unwinder to stay sane; the
	 * record BP,SP,IP can point into thin air when the record is from a
I
Ingo Molnar 已提交
1474
	 * previous PMI context or an (I)RET happened between the record and
1475 1476 1477 1478 1479
	 * PMI.
	 */
	if (sample_type & PERF_SAMPLE_CALLCHAIN)
		data->callchain = perf_callchain(event, iregs);

1480
	/*
1481 1482 1483
	 * We use the interrupt regs as a base because the PEBS record does not
	 * contain a full regs set, specifically it seems to lack segment
	 * descriptors, which get used by things like user_mode().
1484
	 *
1485
	 * In the simple case fix up only the IP for PERF_SAMPLE_IP.
1486
	 */
1487
	*regs = *iregs;
1488 1489 1490 1491 1492 1493 1494

	/*
	 * Initialize regs_>flags from PEBS,
	 * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
	 * i.e., do not rely on it being zero:
	 */
	regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
1495

1496
	if (sample_type & PERF_SAMPLE_REGS_INTR) {
1497 1498 1499 1500 1501 1502 1503
		regs->ax = pebs->ax;
		regs->bx = pebs->bx;
		regs->cx = pebs->cx;
		regs->dx = pebs->dx;
		regs->si = pebs->si;
		regs->di = pebs->di;

1504 1505
		regs->bp = pebs->bp;
		regs->sp = pebs->sp;
1506

1507
#ifndef CONFIG_X86_32
1508 1509 1510 1511 1512 1513 1514 1515
		regs->r8 = pebs->r8;
		regs->r9 = pebs->r9;
		regs->r10 = pebs->r10;
		regs->r11 = pebs->r11;
		regs->r12 = pebs->r12;
		regs->r13 = pebs->r13;
		regs->r14 = pebs->r14;
		regs->r15 = pebs->r15;
1516 1517 1518
#endif
	}

1519
	if (event->attr.precise_ip > 1) {
1520 1521 1522 1523 1524
		/*
		 * Haswell and later processors have an 'eventing IP'
		 * (real IP) which fixes the off-by-1 skid in hardware.
		 * Use it when precise_ip >= 2 :
		 */
1525 1526 1527 1528
		if (x86_pmu.intel_cap.pebs_format >= 2) {
			set_linear_ip(regs, pebs->real_ip);
			regs->flags |= PERF_EFLAGS_EXACT;
		} else {
1529
			/* Otherwise, use PEBS off-by-1 IP: */
1530 1531
			set_linear_ip(regs, pebs->ip);

1532 1533 1534 1535 1536
			/*
			 * With precise_ip >= 2, try to fix up the off-by-1 IP
			 * using the LBR. If successful, the fixup function
			 * corrects regs->ip and calls set_linear_ip() on regs:
			 */
1537 1538 1539
			if (intel_pmu_pebs_fixup_ip(regs))
				regs->flags |= PERF_EFLAGS_EXACT;
		}
1540 1541 1542 1543 1544
	} else {
		/*
		 * When precise_ip == 1, return the PEBS off-by-1 IP,
		 * no fixup attempted:
		 */
1545
		set_linear_ip(regs, pebs->ip);
1546
	}
1547

1548

1549
	if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
1550
	    x86_pmu.intel_cap.pebs_format >= 1)
1551
		data->addr = pebs->dla;
1552

1553 1554
	if (x86_pmu.intel_cap.pebs_format >= 2) {
		/* Only set the TSX weight when no memory weight. */
1555 1556
		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
			data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
1557

1558
		if (sample_type & PERF_SAMPLE_TRANSACTION)
1559 1560
			data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
							      pebs->ax);
1561
	}
1562

1563 1564 1565 1566 1567 1568 1569 1570 1571 1572
	/*
	 * v3 supplies an accurate time stamp, so we use that
	 * for the time stamp.
	 *
	 * We can only do this for the default trace clock.
	 */
	if (x86_pmu.intel_cap.pebs_format >= 3 &&
		event->attr.use_clockid == 0)
		data->time = native_sched_clock_from_tsc(pebs->tsc);

1573
	if (has_branch_stack(event))
1574 1575 1576
		data->br_stack = &cpuc->lbr_stack;
}

1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599
static void adaptive_pebs_save_regs(struct pt_regs *regs,
				    struct pebs_gprs *gprs)
{
	regs->ax = gprs->ax;
	regs->bx = gprs->bx;
	regs->cx = gprs->cx;
	regs->dx = gprs->dx;
	regs->si = gprs->si;
	regs->di = gprs->di;
	regs->bp = gprs->bp;
	regs->sp = gprs->sp;
#ifndef CONFIG_X86_32
	regs->r8 = gprs->r8;
	regs->r9 = gprs->r9;
	regs->r10 = gprs->r10;
	regs->r11 = gprs->r11;
	regs->r12 = gprs->r12;
	regs->r13 = gprs->r13;
	regs->r14 = gprs->r14;
	regs->r15 = gprs->r15;
#endif
}

1600 1601 1602
#define PEBS_LATENCY_MASK			0xffff
#define PEBS_CACHE_LATENCY_OFFSET		32

1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672
/*
 * With adaptive PEBS the layout depends on what fields are configured.
 */

static void setup_pebs_adaptive_sample_data(struct perf_event *event,
					    struct pt_regs *iregs, void *__pebs,
					    struct perf_sample_data *data,
					    struct pt_regs *regs)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct pebs_basic *basic = __pebs;
	void *next_record = basic + 1;
	u64 sample_type;
	u64 format_size;
	struct pebs_meminfo *meminfo = NULL;
	struct pebs_gprs *gprs = NULL;
	struct x86_perf_regs *perf_regs;

	if (basic == NULL)
		return;

	perf_regs = container_of(regs, struct x86_perf_regs, regs);
	perf_regs->xmm_regs = NULL;

	sample_type = event->attr.sample_type;
	format_size = basic->format_size;
	perf_sample_data_init(data, 0, event->hw.last_period);
	data->period = event->hw.last_period;

	if (event->attr.use_clockid == 0)
		data->time = native_sched_clock_from_tsc(basic->tsc);

	/*
	 * We must however always use iregs for the unwinder to stay sane; the
	 * record BP,SP,IP can point into thin air when the record is from a
	 * previous PMI context or an (I)RET happened between the record and
	 * PMI.
	 */
	if (sample_type & PERF_SAMPLE_CALLCHAIN)
		data->callchain = perf_callchain(event, iregs);

	*regs = *iregs;
	/* The ip in basic is EventingIP */
	set_linear_ip(regs, basic->ip);
	regs->flags = PERF_EFLAGS_EXACT;

	/*
	 * The record for MEMINFO is in front of GP
	 * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
	 * Save the pointer here but process later.
	 */
	if (format_size & PEBS_DATACFG_MEMINFO) {
		meminfo = next_record;
		next_record = meminfo + 1;
	}

	if (format_size & PEBS_DATACFG_GP) {
		gprs = next_record;
		next_record = gprs + 1;

		if (event->attr.precise_ip < 2) {
			set_linear_ip(regs, gprs->ip);
			regs->flags &= ~PERF_EFLAGS_EXACT;
		}

		if (sample_type & PERF_SAMPLE_REGS_INTR)
			adaptive_pebs_save_regs(regs, gprs);
	}

	if (format_size & PEBS_DATACFG_MEMINFO) {
1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693
		if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
			u64 weight = meminfo->latency;

			if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
				data->weight.var2_w = weight & PEBS_LATENCY_MASK;
				weight >>= PEBS_CACHE_LATENCY_OFFSET;
			}

			/*
			 * Although meminfo::latency is defined as a u64,
			 * only the lower 32 bits include the valid data
			 * in practice on Ice Lake and earlier platforms.
			 */
			if (sample_type & PERF_SAMPLE_WEIGHT) {
				data->weight.full = weight ?:
					intel_get_tsx_weight(meminfo->tsx_tuning);
			} else {
				data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
					intel_get_tsx_weight(meminfo->tsx_tuning);
			}
		}
1694 1695 1696 1697

		if (sample_type & PERF_SAMPLE_DATA_SRC)
			data->data_src.val = get_data_src(event, meminfo->aux);

1698
		if (sample_type & PERF_SAMPLE_ADDR_TYPE)
1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713
			data->addr = meminfo->address;

		if (sample_type & PERF_SAMPLE_TRANSACTION)
			data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
							  gprs ? gprs->ax : 0);
	}

	if (format_size & PEBS_DATACFG_XMMS) {
		struct pebs_xmm *xmm = next_record;

		next_record = xmm + 1;
		perf_regs->xmm_regs = xmm->xmm;
	}

	if (format_size & PEBS_DATACFG_LBRS) {
1714
		struct lbr_entry *lbr = next_record;
1715 1716
		int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
					& 0xff) + 1;
1717
		next_record = next_record + num_lbr * sizeof(struct lbr_entry);
1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731

		if (has_branch_stack(event)) {
			intel_pmu_store_pebs_lbrs(lbr);
			data->br_stack = &cpuc->lbr_stack;
		}
	}

	WARN_ONCE(next_record != __pebs + (format_size >> 48),
			"PEBS record size %llu, expected %llu, config %llx\n",
			format_size >> 48,
			(u64)(next_record - __pebs),
			basic->format_size);
}

1732 1733 1734 1735 1736 1737 1738
static inline void *
get_next_pebs_record_by_bit(void *base, void *top, int bit)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	void *at;
	u64 pebs_status;

1739 1740 1741 1742 1743 1744 1745
	/*
	 * fmt0 does not have a status bitfield (does not use
	 * perf_record_nhm format)
	 */
	if (x86_pmu.intel_cap.pebs_format < 1)
		return base;

1746 1747 1748
	if (base == NULL)
		return NULL;

1749 1750
	for (at = base; at < top; at += cpuc->pebs_record_size) {
		unsigned long status = get_pebs_status(at);
1751

1752
		if (test_bit(bit, (unsigned long *)&status)) {
1753 1754 1755
			/* PEBS v3 has accurate status bits */
			if (x86_pmu.intel_cap.pebs_format >= 3)
				return at;
1756

1757
			if (status == (1 << bit))
1758 1759 1760
				return at;

			/* clear non-PEBS bit and re-check */
1761
			pebs_status = status & cpuc->pebs_enabled;
1762
			pebs_status &= PEBS_COUNTER_MASK;
1763 1764 1765 1766 1767 1768 1769
			if (pebs_status == (1 << bit))
				return at;
		}
	}
	return NULL;
}

1770 1771 1772 1773 1774 1775 1776 1777 1778
void intel_pmu_auto_reload_read(struct perf_event *event)
{
	WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));

	perf_pmu_disable(event->pmu);
	intel_pmu_drain_pebs_buffer();
	perf_pmu_enable(event->pmu);
}

1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832
/*
 * Special variant of intel_pmu_save_and_restart() for auto-reload.
 */
static int
intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
{
	struct hw_perf_event *hwc = &event->hw;
	int shift = 64 - x86_pmu.cntval_bits;
	u64 period = hwc->sample_period;
	u64 prev_raw_count, new_raw_count;
	s64 new, old;

	WARN_ON(!period);

	/*
	 * drain_pebs() only happens when the PMU is disabled.
	 */
	WARN_ON(this_cpu_read(cpu_hw_events.enabled));

	prev_raw_count = local64_read(&hwc->prev_count);
	rdpmcl(hwc->event_base_rdpmc, new_raw_count);
	local64_set(&hwc->prev_count, new_raw_count);

	/*
	 * Since the counter increments a negative counter value and
	 * overflows on the sign switch, giving the interval:
	 *
	 *   [-period, 0]
	 *
	 * the difference between two consequtive reads is:
	 *
	 *   A) value2 - value1;
	 *      when no overflows have happened in between,
	 *
	 *   B) (0 - value1) + (value2 - (-period));
	 *      when one overflow happened in between,
	 *
	 *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
	 *      when @n overflows happened in between.
	 *
	 * Here A) is the obvious difference, B) is the extension to the
	 * discrete interval, where the first term is to the top of the
	 * interval and the second term is from the bottom of the next
	 * interval and C) the extension to multiple intervals, where the
	 * middle term is the whole intervals covered.
	 *
	 * An equivalent of C, by reduction, is:
	 *
	 *   value2 - value1 + n * period
	 */
	new = ((s64)(new_raw_count << shift) >> shift);
	old = ((s64)(prev_raw_count << shift) >> shift);
	local64_add(new - old + count * period, &event->count);

1833 1834
	local64_set(&hwc->period_left, -new);

1835 1836 1837 1838 1839
	perf_event_update_userpage(event);

	return 0;
}

1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850
static __always_inline void
__intel_pmu_pebs_event(struct perf_event *event,
		       struct pt_regs *iregs,
		       struct perf_sample_data *data,
		       void *base, void *top,
		       int bit, int count,
		       void (*setup_sample)(struct perf_event *,
					    struct pt_regs *,
					    void *,
					    struct perf_sample_data *,
					    struct pt_regs *))
1851
{
1852
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1853
	struct hw_perf_event *hwc = &event->hw;
1854 1855
	struct x86_perf_regs perf_regs;
	struct pt_regs *regs = &perf_regs.regs;
1856
	void *at = get_next_pebs_record_by_bit(base, top, bit);
P
Peter Zijlstra 已提交
1857
	static struct pt_regs dummy_iregs;
1858

1859 1860 1861 1862 1863 1864 1865 1866 1867
	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
		/*
		 * Now, auto-reload is only enabled in fixed period mode.
		 * The reload value is always hwc->sample_period.
		 * May need to change it, if auto-reload is enabled in
		 * freq mode later.
		 */
		intel_pmu_save_and_restart_reload(event, count);
	} else if (!intel_pmu_save_and_restart(event))
1868 1869
		return;

1870 1871 1872
	if (!iregs)
		iregs = &dummy_iregs;

1873
	while (count > 1) {
1874 1875
		setup_sample(event, iregs, at, data, regs);
		perf_event_output(event, data, regs);
1876
		at += cpuc->pebs_record_size;
1877 1878
		at = get_next_pebs_record_by_bit(at, top, bit);
		count--;
1879 1880
	}

1881
	setup_sample(event, iregs, at, data, regs);
1882 1883 1884 1885 1886 1887 1888
	if (iregs == &dummy_iregs) {
		/*
		 * The PEBS records may be drained in the non-overflow context,
		 * e.g., large PEBS + context switch. Perf should treat the
		 * last record the same as other PEBS records, and doesn't
		 * invoke the generic overflow handler.
		 */
1889
		perf_event_output(event, data, regs);
1890 1891 1892 1893 1894
	} else {
		/*
		 * All but the last records are processed.
		 * The last one is left to be able to call the overflow handler.
		 */
1895
		if (perf_event_overflow(event, data, regs))
1896
			x86_pmu_stop(event, 0);
1897
	}
1898 1899
}

1900
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
1901
{
1902
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1903 1904 1905 1906 1907
	struct debug_store *ds = cpuc->ds;
	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
	struct pebs_record_core *at, *top;
	int n;

1908
	if (!x86_pmu.pebs_active)
1909 1910 1911 1912 1913
		return;

	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
	top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;

1914 1915 1916 1917 1918 1919
	/*
	 * Whatever else happens, drain the thing
	 */
	ds->pebs_index = ds->pebs_buffer_base;

	if (!test_bit(0, cpuc->active_mask))
P
Peter Zijlstra 已提交
1920
		return;
1921

1922 1923
	WARN_ON_ONCE(!event);

P
Peter Zijlstra 已提交
1924
	if (!event->attr.precise_ip)
1925 1926
		return;

1927
	n = top - at;
1928 1929 1930
	if (n <= 0) {
		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
			intel_pmu_save_and_restart_reload(event, 0);
1931
		return;
1932
	}
1933

1934
	__intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
1935
			       setup_pebs_fixed_sample_data);
1936 1937
}

1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956
static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
{
	struct perf_event *event;
	int bit;

	/*
	 * The drain_pebs() could be called twice in a short period
	 * for auto-reload event in pmu::read(). There are no
	 * overflows have happened in between.
	 * It needs to call intel_pmu_save_and_restart_reload() to
	 * update the event->count for this case.
	 */
	for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
		event = cpuc->events[bit];
		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
			intel_pmu_save_and_restart_reload(event, 0);
	}
}

1957
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
1958
{
1959
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1960
	struct debug_store *ds = cpuc->ds;
1961 1962
	struct perf_event *event;
	void *base, *at, *top;
1963 1964 1965 1966
	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
	int bit, i, size;
	u64 mask;
1967 1968 1969 1970

	if (!x86_pmu.pebs_active)
		return;

1971
	base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
1972
	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
1973 1974 1975

	ds->pebs_index = ds->pebs_buffer_base;

1976 1977 1978 1979 1980 1981 1982
	mask = (1ULL << x86_pmu.max_pebs_events) - 1;
	size = x86_pmu.max_pebs_events;
	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
		mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
		size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
	}

1983
	if (unlikely(base >= top)) {
1984
		intel_pmu_pebs_event_update_no_drain(cpuc, size);
1985
		return;
1986
	}
1987

1988
	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
1989
		struct pebs_record_nhm *p = at;
1990
		u64 pebs_status;
1991

1992
		pebs_status = p->status & cpuc->pebs_enabled;
1993
		pebs_status &= mask;
1994 1995

		/* PEBS v3 has more accurate status bits */
1996
		if (x86_pmu.intel_cap.pebs_format >= 3) {
1997
			for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
1998 1999 2000 2001 2002
				counts[bit]++;

			continue;
		}

2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
		/*
		 * On some CPUs the PEBS status can be zero when PEBS is
		 * racing with clearing of GLOBAL_STATUS.
		 *
		 * Normally we would drop that record, but in the
		 * case when there is only a single active PEBS event
		 * we can assume it's for that event.
		 */
		if (!pebs_status && cpuc->pebs_enabled &&
			!(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
			pebs_status = cpuc->pebs_enabled;

2015
		bit = find_first_bit((unsigned long *)&pebs_status,
2016
					x86_pmu.max_pebs_events);
2017
		if (bit >= x86_pmu.max_pebs_events)
2018
			continue;
2019

2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034
		/*
		 * The PEBS hardware does not deal well with the situation
		 * when events happen near to each other and multiple bits
		 * are set. But it should happen rarely.
		 *
		 * If these events include one PEBS and multiple non-PEBS
		 * events, it doesn't impact PEBS record. The record will
		 * be handled normally. (slow path)
		 *
		 * If these events include two or more PEBS events, the
		 * records for the events can be collapsed into a single
		 * one, and it's not possible to reconstruct all events
		 * that caused the PEBS record. It's called collision.
		 * If collision happened, the record will be dropped.
		 */
2035
		if (pebs_status != (1ULL << bit)) {
2036
			for_each_set_bit(i, (unsigned long *)&pebs_status, size)
2037 2038
				error[i]++;
			continue;
2039
		}
2040

2041 2042
		counts[bit]++;
	}
2043

2044
	for_each_set_bit(bit, (unsigned long *)&mask, size) {
2045
		if ((counts[bit] == 0) && (error[bit] == 0))
2046
			continue;
2047

2048
		event = cpuc->events[bit];
2049 2050 2051 2052 2053
		if (WARN_ON_ONCE(!event))
			continue;

		if (WARN_ON_ONCE(!event->attr.precise_ip))
			continue;
2054

2055
		/* log dropped samples number */
2056
		if (error[bit]) {
2057 2058
			perf_log_lost_samples(event, error[bit]);

2059
			if (iregs && perf_event_account_interrupt(event))
2060 2061 2062
				x86_pmu_stop(event, 0);
		}

2063
		if (counts[bit]) {
2064
			__intel_pmu_pebs_event(event, iregs, data, base,
2065 2066
					       top, bit, counts[bit],
					       setup_pebs_fixed_sample_data);
2067
		}
2068 2069 2070
	}
}

2071
static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118
{
	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct debug_store *ds = cpuc->ds;
	struct perf_event *event;
	void *base, *at, *top;
	int bit, size;
	u64 mask;

	if (!x86_pmu.pebs_active)
		return;

	base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
	top = (struct pebs_basic *)(unsigned long)ds->pebs_index;

	ds->pebs_index = ds->pebs_buffer_base;

	mask = ((1ULL << x86_pmu.max_pebs_events) - 1) |
	       (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
	size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;

	if (unlikely(base >= top)) {
		intel_pmu_pebs_event_update_no_drain(cpuc, size);
		return;
	}

	for (at = base; at < top; at += cpuc->pebs_record_size) {
		u64 pebs_status;

		pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
		pebs_status &= mask;

		for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
			counts[bit]++;
	}

	for_each_set_bit(bit, (unsigned long *)&mask, size) {
		if (counts[bit] == 0)
			continue;

		event = cpuc->events[bit];
		if (WARN_ON_ONCE(!event))
			continue;

		if (WARN_ON_ONCE(!event->attr.precise_ip))
			continue;

2119
		__intel_pmu_pebs_event(event, iregs, data, base,
2120 2121 2122 2123 2124
				       top, bit, counts[bit],
				       setup_pebs_adaptive_sample_data);
	}
}

2125 2126 2127 2128
/*
 * BTS, PEBS probe and setup
 */

2129
void __init intel_ds_init(void)
2130 2131 2132 2133 2134 2135 2136 2137 2138
{
	/*
	 * No support for 32bit formats
	 */
	if (!boot_cpu_has(X86_FEATURE_DTES64))
		return;

	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
2139
	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
2140
	if (x86_pmu.version <= 4)
2141
		x86_pmu.pebs_no_isolation = 1;
2142

2143
	if (x86_pmu.pebs) {
2144
		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
2145
		char *pebs_qual = "";
2146
		int format = x86_pmu.intel_cap.pebs_format;
2147

2148 2149 2150
		if (format < 4)
			x86_pmu.intel_cap.pebs_baseline = 0;

2151 2152
		switch (format) {
		case 0:
2153
			pr_cont("PEBS fmt0%c, ", pebs_type);
2154
			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
2155 2156 2157 2158 2159 2160 2161 2162
			/*
			 * Using >PAGE_SIZE buffers makes the WRMSR to
			 * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
			 * mysteriously hang on Core2.
			 *
			 * As a workaround, we don't do this.
			 */
			x86_pmu.pebs_buffer_size = PAGE_SIZE;
2163 2164 2165 2166
			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
			break;

		case 1:
2167
			pr_cont("PEBS fmt1%c, ", pebs_type);
2168 2169 2170 2171
			x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
			break;

2172 2173 2174
		case 2:
			pr_cont("PEBS fmt2%c, ", pebs_type);
			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
2175
			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2176 2177
			break;

2178 2179 2180 2181 2182
		case 3:
			pr_cont("PEBS fmt3%c, ", pebs_type);
			x86_pmu.pebs_record_size =
						sizeof(struct pebs_record_skl);
			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2183
			x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
2184 2185
			break;

2186 2187 2188 2189 2190 2191 2192 2193 2194
		case 4:
			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
			if (x86_pmu.intel_cap.pebs_baseline) {
				x86_pmu.large_pebs_flags |=
					PERF_SAMPLE_BRANCH_STACK |
					PERF_SAMPLE_TIME;
				x86_pmu.flags |= PMU_FL_PEBS_ALL;
				pebs_qual = "-baseline";
2195
				x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206
			} else {
				/* Only basic record supported */
				x86_pmu.large_pebs_flags &=
					~(PERF_SAMPLE_ADDR |
					  PERF_SAMPLE_TIME |
					  PERF_SAMPLE_DATA_SRC |
					  PERF_SAMPLE_TRANSACTION |
					  PERF_SAMPLE_REGS_USER |
					  PERF_SAMPLE_REGS_INTR);
			}
			pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
2207 2208 2209 2210 2211 2212

			if (x86_pmu.intel_cap.pebs_output_pt_available) {
				pr_cont("PEBS-via-PT, ");
				x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
			}

2213 2214
			break;

2215
		default:
2216
			pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
2217 2218 2219 2220
			x86_pmu.pebs = 0;
		}
	}
}
2221 2222 2223

void perf_restore_debug_store(void)
{
2224 2225
	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);

2226 2227 2228
	if (!x86_pmu.bts && !x86_pmu.pebs)
		return;

2229
	wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
2230
}