pmu-emul.c 25.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7 8 9 10
/*
 * Copyright (C) 2015 Linaro Ltd.
 * Author: Shannon Zhao <shannon.zhao@linaro.org>
 */

#include <linux/cpu.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
11
#include <linux/perf/arm_pmu.h>
12
#include <linux/uaccess.h>
13 14
#include <asm/kvm_emulate.h>
#include <kvm/arm_pmu.h>
15
#include <kvm/arm_vgic.h>
16

17
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18 19
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
20

21 22
#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1

23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
static u32 kvm_pmu_event_mask(struct kvm *kvm)
{
	switch (kvm->arch.pmuver) {
	case 1:			/* ARMv8.0 */
		return GENMASK(9, 0);
	case 4:			/* ARMv8.1 */
	case 5:			/* ARMv8.4 */
	case 6:			/* ARMv8.5 */
		return GENMASK(15, 0);
	default:		/* Shouldn't be here, just for sanity */
		WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
		return 0;
	}
}

38 39 40 41 42 43 44 45 46 47 48
/**
 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
 * @vcpu: The vcpu pointer
 * @select_idx: The counter index
 */
static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
{
	return (select_idx == ARMV8_PMU_CYCLE_IDX &&
		__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
}

49 50 51 52 53 54 55 56 57 58 59
static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
{
	struct kvm_pmu *pmu;
	struct kvm_vcpu_arch *vcpu_arch;

	pmc -= pmc->idx;
	pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
	vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
	return container_of(vcpu_arch, struct kvm_vcpu, arch);
}

60
/**
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
 * @pmc: The PMU counter pointer
 */
static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
{
	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);

	return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
}

/**
 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
 * @select_idx: The counter index
 */
static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
{
	return select_idx & 0x1;
}

/**
 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
 * @pmc: The PMU counter pointer
 *
 * When a pair of PMCs are chained together we use the low counter (canonical)
 * to hold the underlying perf event.
 */
static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
{
	if (kvm_pmu_pmc_is_chained(pmc) &&
	    kvm_pmu_idx_is_high_counter(pmc->idx))
		return pmc - 1;

	return pmc;
}
95 96 97 98 99 100 101
static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
{
	if (kvm_pmu_idx_is_high_counter(pmc->idx))
		return pmc - 1;
	else
		return pmc + 1;
}
102 103 104

/**
 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
105 106 107
 * @vcpu: The vcpu pointer
 * @select_idx: The counter index
 */
108
static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
109
{
110
	u64 eventsel, reg;
111

112 113 114 115 116 117
	select_idx |= 0x1;

	if (select_idx == ARMV8_PMU_CYCLE_IDX)
		return false;

	reg = PMEVTYPER0_EL0 + select_idx;
118
	eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
119 120 121 122 123 124 125 126 127 128 129 130 131

	return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
}

/**
 * kvm_pmu_get_pair_counter_value - get PMU counter value
 * @vcpu: The vcpu pointer
 * @pmc: The PMU counter pointer
 */
static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
					  struct kvm_pmc *pmc)
{
	u64 counter, counter_high, reg, enabled, running;
132

133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
	if (kvm_pmu_pmc_is_chained(pmc)) {
		pmc = kvm_pmu_get_canonical_pmc(pmc);
		reg = PMEVCNTR0_EL0 + pmc->idx;

		counter = __vcpu_sys_reg(vcpu, reg);
		counter_high = __vcpu_sys_reg(vcpu, reg + 1);

		counter = lower_32_bits(counter) | (counter_high << 32);
	} else {
		reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
		      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
		counter = __vcpu_sys_reg(vcpu, reg);
	}

	/*
	 * The real counter value is equal to the value of counter register plus
149 150 151 152 153 154
	 * the value perf event counts.
	 */
	if (pmc->perf_event)
		counter += perf_event_read_value(pmc->perf_event, &enabled,
						 &running);

155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
	return counter;
}

/**
 * kvm_pmu_get_counter_value - get PMU counter value
 * @vcpu: The vcpu pointer
 * @select_idx: The counter index
 */
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
{
	u64 counter;
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
	struct kvm_pmc *pmc = &pmu->pmc[select_idx];

	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);

	if (kvm_pmu_pmc_is_chained(pmc) &&
	    kvm_pmu_idx_is_high_counter(select_idx))
		counter = upper_32_bits(counter);
174
	else if (select_idx != ARMV8_PMU_CYCLE_IDX)
175 176 177
		counter = lower_32_bits(counter);

	return counter;
178 179 180 181 182 183 184 185 186 187 188 189 190 191
}

/**
 * kvm_pmu_set_counter_value - set PMU counter value
 * @vcpu: The vcpu pointer
 * @select_idx: The counter index
 * @val: The counter value
 */
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
{
	u64 reg;

	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
192
	__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
193 194 195

	/* Recreate the perf event to reflect the updated sample_period */
	kvm_pmu_create_perf_event(vcpu, select_idx);
196
}
197

198 199 200 201 202 203
/**
 * kvm_pmu_release_perf_event - remove the perf event
 * @pmc: The PMU counter pointer
 */
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
{
204
	pmc = kvm_pmu_get_canonical_pmc(pmc);
205 206 207 208 209 210 211
	if (pmc->perf_event) {
		perf_event_disable(pmc->perf_event);
		perf_event_release_kernel(pmc->perf_event);
		pmc->perf_event = NULL;
	}
}

212 213 214 215 216 217 218 219
/**
 * kvm_pmu_stop_counter - stop PMU counter
 * @pmc: The PMU counter pointer
 *
 * If this counter has been configured to monitor some event, release it here.
 */
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
{
220
	u64 counter, reg, val;
221

222 223 224 225 226 227
	pmc = kvm_pmu_get_canonical_pmc(pmc);
	if (!pmc->perf_event)
		return;

	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);

228 229 230
	if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
		reg = PMCCNTR_EL0;
		val = counter;
231
	} else {
232 233
		reg = PMEVCNTR0_EL0 + pmc->idx;
		val = lower_32_bits(counter);
234
	}
235

236 237 238 239 240
	__vcpu_sys_reg(vcpu, reg) = val;

	if (kvm_pmu_pmc_is_chained(pmc))
		__vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);

241
	kvm_pmu_release_perf_event(pmc);
242 243
}

244 245 246 247 248 249 250 251 252 253 254 255 256 257
/**
 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
 * @vcpu: The vcpu pointer
 *
 */
void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
{
	int i;
	struct kvm_pmu *pmu = &vcpu->arch.pmu;

	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
		pmu->pmc[i].idx = i;
}

258 259 260 261 262 263 264
/**
 * kvm_pmu_vcpu_reset - reset pmu state for cpu
 * @vcpu: The vcpu pointer
 *
 */
void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
{
265
	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
266
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
267
	int i;
268

269
	for_each_set_bit(i, &mask, 32)
270
		kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
271 272

	bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
273 274
}

275 276 277 278 279 280 281 282 283 284
/**
 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
 * @vcpu: The vcpu pointer
 *
 */
void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
{
	int i;
	struct kvm_pmu *pmu = &vcpu->arch.pmu;

285 286
	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
		kvm_pmu_release_perf_event(&pmu->pmc[i]);
287 288
}

289 290
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
{
291
	u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
292 293 294 295 296 297 298 299 300

	val &= ARMV8_PMU_PMCR_N_MASK;
	if (val == 0)
		return BIT(ARMV8_PMU_CYCLE_IDX);
	else
		return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
}

/**
301
 * kvm_pmu_enable_counter_mask - enable selected PMU counters
302 303 304 305 306
 * @vcpu: The vcpu pointer
 * @val: the value guest writes to PMCNTENSET register
 *
 * Call perf_event_enable to start counting the perf event
 */
307
void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
308 309 310 311 312
{
	int i;
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
	struct kvm_pmc *pmc;

313
	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
314 315 316 317 318 319 320
		return;

	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
		if (!(val & BIT(i)))
			continue;

		pmc = &pmu->pmc[i];
321

322 323 324
		/* A change in the enable state may affect the chain state */
		kvm_pmu_update_pmc_chained(vcpu, i);
		kvm_pmu_create_perf_event(vcpu, i);
325 326

		/* At this point, pmc must be the canonical */
327 328 329 330 331 332 333 334 335
		if (pmc->perf_event) {
			perf_event_enable(pmc->perf_event);
			if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
				kvm_debug("fail to enable perf event\n");
		}
	}
}

/**
336
 * kvm_pmu_disable_counter_mask - disable selected PMU counters
337 338 339 340 341
 * @vcpu: The vcpu pointer
 * @val: the value guest writes to PMCNTENCLR register
 *
 * Call perf_event_disable to stop counting the perf event
 */
342
void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
343 344 345 346 347 348 349 350 351 352 353 354 355
{
	int i;
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
	struct kvm_pmc *pmc;

	if (!val)
		return;

	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
		if (!(val & BIT(i)))
			continue;

		pmc = &pmu->pmc[i];
356

357 358 359
		/* A change in the enable state may affect the chain state */
		kvm_pmu_update_pmc_chained(vcpu, i);
		kvm_pmu_create_perf_event(vcpu, i);
360 361

		/* At this point, pmc must be the canonical */
362 363 364 365
		if (pmc->perf_event)
			perf_event_disable(pmc->perf_event);
	}
}
366

367 368 369 370
static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
{
	u64 reg = 0;

371 372 373 374
	if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
		reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
		reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
		reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
375
		reg &= kvm_pmu_valid_counter_mask(vcpu);
376
	}
377 378 379 380

	return reg;
}

381
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
382 383
{
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
384 385 386 387
	bool overflow;

	if (!kvm_arm_pmu_v3_ready(vcpu))
		return;
388

389
	overflow = !!kvm_pmu_overflow_status(vcpu);
390 391 392 393 394 395 396
	if (pmu->irq_level == overflow)
		return;

	pmu->irq_level = overflow;

	if (likely(irqchip_in_kernel(vcpu->kvm))) {
		int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
397
					      pmu->irq_num, overflow, pmu);
398 399 400 401
		WARN_ON(ret);
	}
}

402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
{
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
	bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;

	if (likely(irqchip_in_kernel(vcpu->kvm)))
		return false;

	return pmu->irq_level != run_level;
}

/*
 * Reflect the PMU overflow interrupt output level into the kvm_run structure
 */
void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
{
	struct kvm_sync_regs *regs = &vcpu->run->s.regs;

	/* Populate the timer bitmap for user space */
	regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
	if (vcpu->arch.pmu.irq_level)
		regs->device_irq_level |= KVM_ARM_DEV_PMU;
}

427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
/**
 * kvm_pmu_flush_hwstate - flush pmu state to cpu
 * @vcpu: The vcpu pointer
 *
 * Check if the PMU has overflowed while we were running in the host, and inject
 * an interrupt if that was the case.
 */
void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
{
	kvm_pmu_update_state(vcpu);
}

/**
 * kvm_pmu_sync_hwstate - sync pmu state from cpu
 * @vcpu: The vcpu pointer
 *
 * Check if the PMU has overflowed while we were running in the guest, and
 * inject an interrupt if that was the case.
 */
void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
{
	kvm_pmu_update_state(vcpu);
}

/**
452
 * When the perf event overflows, set the overflow status and inform the vcpu.
453 454 455 456 457 458
 */
static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
				  struct perf_sample_data *data,
				  struct pt_regs *regs)
{
	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
459
	struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
460 461
	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
	int idx = pmc->idx;
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
	u64 period;

	cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);

	/*
	 * Reset the sample period to the architectural limit,
	 * i.e. the point where the counter overflows.
	 */
	period = -(local64_read(&perf_event->count));

	if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
		period &= GENMASK(31, 0);

	local64_set(&perf_event->hw.period_left, 0);
	perf_event->attr.sample_period = period;
	perf_event->hw.sample_period = period;
478

479
	__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
480 481 482 483 484

	if (kvm_pmu_overflow_status(vcpu)) {
		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
		kvm_vcpu_kick(vcpu);
	}
485 486

	cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
487 488
}

489 490 491 492 493 494 495
/**
 * kvm_pmu_software_increment - do software increment
 * @vcpu: The vcpu pointer
 * @val: the value guest writes to PMSWINC register
 */
void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
{
496
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
497 498
	int i;

499 500 501
	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
		return;

502 503 504
	/* Weed out disabled counters */
	val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);

505
	for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
506 507
		u64 type, reg;

508 509
		if (!(val & BIT(i)))
			continue;
510 511 512

		/* PMSWINC only applies to ... SW_INC! */
		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
513
		type &= kvm_pmu_event_mask(vcpu->kvm);
514 515 516 517 518 519 520 521 522 523 524 525 526 527
		if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
			continue;

		/* increment this even SW_INC counter */
		reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
		reg = lower_32_bits(reg);
		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;

		if (reg) /* no overflow on the low part */
			continue;

		if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
			/* increment the high counter */
			reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
528
			reg = lower_32_bits(reg);
529 530 531 532 533 534
			__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
			if (!reg) /* mark overflow on the high counter */
				__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
		} else {
			/* mark overflow on low counter */
			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
535 536 537 538
		}
	}
}

539 540 541 542 543 544 545
/**
 * kvm_pmu_handle_pmcr - handle PMCR register
 * @vcpu: The vcpu pointer
 * @val: the value guest writes to PMCR register
 */
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
546
	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
547 548 549
	int i;

	if (val & ARMV8_PMU_PMCR_E) {
550
		kvm_pmu_enable_counter_mask(vcpu,
551
		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
552
	} else {
553
		kvm_pmu_disable_counter_mask(vcpu, mask);
554 555 556 557 558 559
	}

	if (val & ARMV8_PMU_PMCR_C)
		kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);

	if (val & ARMV8_PMU_PMCR_P) {
560
		for_each_set_bit(i, &mask, 32)
561 562 563 564
			kvm_pmu_set_counter_value(vcpu, i, 0);
	}
}

565 566
static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
{
567 568
	return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
	       (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
569 570 571
}

/**
572
 * kvm_pmu_create_perf_event - create a perf event for a counter
573 574 575
 * @vcpu: The vcpu pointer
 * @select_idx: The number of selected counter
 */
576
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
577 578
{
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
579
	struct kvm_pmc *pmc;
580 581
	struct perf_event *event;
	struct perf_event_attr attr;
582 583
	u64 eventsel, counter, reg, data;

584 585 586 587 588 589 590 591 592
	/*
	 * For chained counters the event type and filtering attributes are
	 * obtained from the low/even counter. We also use this counter to
	 * determine if the event is enabled/disabled.
	 */
	pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);

	reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
593
	data = __vcpu_sys_reg(vcpu, reg);
594 595

	kvm_pmu_stop_counter(vcpu, pmc);
596 597 598 599 600 601 602 603
	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
		eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
	else
		eventsel = data & kvm_pmu_event_mask(vcpu->kvm);

	/* Software increment event doesn't need to be backed by a perf event */
	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
		return;
604

605 606 607 608 609 610
	/*
	 * If we have a filter in place and that the event isn't allowed, do
	 * not install a perf event either.
	 */
	if (vcpu->kvm->arch.pmu_filter &&
	    !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
611 612
		return;

613 614 615 616
	memset(&attr, 0, sizeof(struct perf_event_attr));
	attr.type = PERF_TYPE_RAW;
	attr.size = sizeof(attr);
	attr.pinned = 1;
617
	attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
618 619 620 621
	attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
	attr.exclude_hv = 1; /* Don't count EL2 events */
	attr.exclude_host = 1; /* Don't count host events */
622
	attr.config = eventsel;
623

624 625
	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);

626
	if (kvm_pmu_pmc_is_chained(pmc)) {
627 628 629 630 631
		/**
		 * The initial sample period (overflow count) of an event. For
		 * chained counters we only support overflow interrupts on the
		 * high counter.
		 */
632
		attr.sample_period = (-counter) & GENMASK(63, 0);
633
		attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
634

635 636 637 638 639 640 641 642 643
		event = perf_event_create_kernel_counter(&attr, -1, current,
							 kvm_pmu_perf_overflow,
							 pmc + 1);
	} else {
		/* The initial sample period (overflow count) of an event. */
		if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
			attr.sample_period = (-counter) & GENMASK(63, 0);
		else
			attr.sample_period = (-counter) & GENMASK(31, 0);
644

645
		event = perf_event_create_kernel_counter(&attr, -1, current,
646
						 kvm_pmu_perf_overflow, pmc);
647 648
	}

649 650 651 652 653 654 655 656
	if (IS_ERR(event)) {
		pr_err_once("kvm: pmu event creation failed %ld\n",
			    PTR_ERR(event));
		return;
	}

	pmc->perf_event = event;
}
657

658 659 660 661 662 663
/**
 * kvm_pmu_update_pmc_chained - update chained bitmap
 * @vcpu: The vcpu pointer
 * @select_idx: The number of selected counter
 *
 * Update the chained bitmap based on the event type written in the
664
 * typer register and the enable state of the odd register.
665 666 667 668
 */
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
{
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
669 670 671 672 673 674
	struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
	bool new_state, old_state;

	old_state = kvm_pmu_pmc_is_chained(pmc);
	new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
		    kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
675

676 677 678 679 680 681
	if (old_state == new_state)
		return;

	canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
	kvm_pmu_stop_counter(vcpu, canonical_pmc);
	if (new_state) {
682 683 684 685
		/*
		 * During promotion from !chained to chained we must ensure
		 * the adjacent counter is stopped and its event destroyed
		 */
686
		kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
687
		set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
688
		return;
689
	}
690
	clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
691 692
}

693 694 695 696 697 698 699 700 701 702 703 704 705
/**
 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
 * @vcpu: The vcpu pointer
 * @data: The data guest writes to PMXEVTYPER_EL0
 * @select_idx: The number of selected counter
 *
 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
 * event with given hardware event number. Here we call perf_event API to
 * emulate this action and create a kernel perf event for it.
 */
void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
				    u64 select_idx)
{
706 707 708 709 710
	u64 reg, mask;

	mask  =  ARMV8_PMU_EVTYPE_MASK;
	mask &= ~ARMV8_PMU_EVTYPE_EVENT;
	mask |= kvm_pmu_event_mask(vcpu->kvm);
711 712 713 714

	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;

715
	__vcpu_sys_reg(vcpu, reg) = data & mask;
716 717

	kvm_pmu_update_pmc_chained(vcpu, select_idx);
718 719 720
	kvm_pmu_create_perf_event(vcpu, select_idx);
}

721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765
static int kvm_pmu_probe_pmuver(void)
{
	struct perf_event_attr attr = { };
	struct perf_event *event;
	struct arm_pmu *pmu;
	int pmuver = 0xf;

	/*
	 * Create a dummy event that only counts user cycles. As we'll never
	 * leave this function with the event being live, it will never
	 * count anything. But it allows us to probe some of the PMU
	 * details. Yes, this is terrible.
	 */
	attr.type = PERF_TYPE_RAW;
	attr.size = sizeof(attr);
	attr.pinned = 1;
	attr.disabled = 0;
	attr.exclude_user = 0;
	attr.exclude_kernel = 1;
	attr.exclude_hv = 1;
	attr.exclude_host = 1;
	attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
	attr.sample_period = GENMASK(63, 0);

	event = perf_event_create_kernel_counter(&attr, -1, current,
						 kvm_pmu_perf_overflow, &attr);

	if (IS_ERR(event)) {
		pr_err_once("kvm: pmu event creation failed %ld\n",
			    PTR_ERR(event));
		return 0xf;
	}

	if (event->pmu) {
		pmu = to_arm_pmu(event->pmu);
		if (pmu->pmuver)
			pmuver = pmu->pmuver;
	}

	perf_event_disable(event);
	perf_event_release_kernel(event);

	return pmuver;
}

766 767 768 769 770 771 772 773 774
bool kvm_arm_support_pmu_v3(void)
{
	/*
	 * Check if HW_PERF_EVENTS are supported by checking the number of
	 * hardware performance counters. This could ensure the presence of
	 * a physical PMU and CONFIG_PERF_EVENT is selected.
	 */
	return (perf_num_counters() > 0);
}
775

776
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
777
{
778 779
	if (!vcpu->arch.pmu.created)
		return 0;
780

781
	/*
782 783
	 * A valid interrupt configuration for the PMU is either to have a
	 * properly configured interrupt number and using an in-kernel
784
	 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
785
	 */
786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801
	if (irqchip_in_kernel(vcpu->kvm)) {
		int irq = vcpu->arch.pmu.irq_num;
		if (!kvm_arm_pmu_irq_initialized(vcpu))
			return -EINVAL;

		/*
		 * If we are using an in-kernel vgic, at this point we know
		 * the vgic will be initialized, so we can check the PMU irq
		 * number against the dimensions of the vgic and make sure
		 * it's valid.
		 */
		if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
			return -EINVAL;
	} else if (kvm_arm_pmu_irq_initialized(vcpu)) {
		   return -EINVAL;
	}
802 803 804 805 806 807 808 809 810 811

	kvm_pmu_vcpu_reset(vcpu);
	vcpu->arch.pmu.ready = true;

	return 0;
}

static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
{
	if (irqchip_in_kernel(vcpu->kvm)) {
812 813
		int ret;

814 815 816 817 818 819 820 821 822 823
		/*
		 * If using the PMU with an in-kernel virtual GIC
		 * implementation, we require the GIC to be already
		 * initialized when initializing the PMU.
		 */
		if (!vgic_initialized(vcpu->kvm))
			return -ENODEV;

		if (!kvm_arm_pmu_irq_initialized(vcpu))
			return -ENXIO;
824 825 826 827 828

		ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
					 &vcpu->arch.pmu);
		if (ret)
			return ret;
829
	}
830

831
	vcpu->arch.pmu.created = true;
832 833 834
	return 0;
}

835 836 837 838 839 840
/*
 * For one VM the interrupt type must be same for each vcpu.
 * As a PPI, the interrupt number is the same for all vcpus,
 * while as an SPI it must be a separate number per vcpu.
 */
static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
841 842 843 844 845 846 847 848
{
	int i;
	struct kvm_vcpu *vcpu;

	kvm_for_each_vcpu(i, vcpu, kvm) {
		if (!kvm_arm_pmu_irq_initialized(vcpu))
			continue;

849
		if (irq_is_ppi(irq)) {
850 851 852 853 854 855 856 857 858 859 860 861 862
			if (vcpu->arch.pmu.irq_num != irq)
				return false;
		} else {
			if (vcpu->arch.pmu.irq_num == irq)
				return false;
		}
	}

	return true;
}

int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
863 864 865 866 867 868 869
	if (!kvm_arm_support_pmu_v3() ||
	    !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
		return -ENODEV;

	if (vcpu->arch.pmu.created)
		return -EBUSY;

870 871 872 873 874 875
	if (!vcpu->kvm->arch.pmuver)
		vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();

	if (vcpu->kvm->arch.pmuver == 0xf)
		return -ENODEV;

876 877 878 879 880
	switch (attr->attr) {
	case KVM_ARM_VCPU_PMU_V3_IRQ: {
		int __user *uaddr = (int __user *)(long)attr->addr;
		int irq;

881 882 883
		if (!irqchip_in_kernel(vcpu->kvm))
			return -EINVAL;

884 885 886
		if (get_user(irq, uaddr))
			return -EFAULT;

887
		/* The PMU overflow interrupt can be a PPI or a valid SPI. */
888
		if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
889 890 891
			return -EINVAL;

		if (!pmu_irq_is_valid(vcpu->kvm, irq))
892 893 894 895 896 897 898 899 900
			return -EINVAL;

		if (kvm_arm_pmu_irq_initialized(vcpu))
			return -EBUSY;

		kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
		vcpu->arch.pmu.irq_num = irq;
		return 0;
	}
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
	case KVM_ARM_VCPU_PMU_V3_FILTER: {
		struct kvm_pmu_event_filter __user *uaddr;
		struct kvm_pmu_event_filter filter;
		int nr_events;

		nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;

		uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;

		if (copy_from_user(&filter, uaddr, sizeof(filter)))
			return -EFAULT;

		if (((u32)filter.base_event + filter.nevents) > nr_events ||
		    (filter.action != KVM_PMU_EVENT_ALLOW &&
		     filter.action != KVM_PMU_EVENT_DENY))
			return -EINVAL;

		mutex_lock(&vcpu->kvm->lock);

		if (!vcpu->kvm->arch.pmu_filter) {
			vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL);
			if (!vcpu->kvm->arch.pmu_filter) {
				mutex_unlock(&vcpu->kvm->lock);
				return -ENOMEM;
			}

			/*
			 * The default depends on the first applied filter.
			 * If it allows events, the default is to deny.
			 * Conversely, if the first filter denies a set of
			 * events, the default is to allow.
			 */
			if (filter.action == KVM_PMU_EVENT_ALLOW)
				bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
			else
				bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
		}

		if (filter.action == KVM_PMU_EVENT_ALLOW)
			bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
		else
			bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);

		mutex_unlock(&vcpu->kvm->lock);

		return 0;
	}
948 949 950 951 952 953 954 955 956 957 958 959 960 961
	case KVM_ARM_VCPU_PMU_V3_INIT:
		return kvm_arm_pmu_v3_init(vcpu);
	}

	return -ENXIO;
}

int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
	switch (attr->attr) {
	case KVM_ARM_VCPU_PMU_V3_IRQ: {
		int __user *uaddr = (int __user *)(long)attr->addr;
		int irq;

962 963 964
		if (!irqchip_in_kernel(vcpu->kvm))
			return -EINVAL;

965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983
		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
			return -ENODEV;

		if (!kvm_arm_pmu_irq_initialized(vcpu))
			return -ENXIO;

		irq = vcpu->arch.pmu.irq_num;
		return put_user(irq, uaddr);
	}
	}

	return -ENXIO;
}

int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
	switch (attr->attr) {
	case KVM_ARM_VCPU_PMU_V3_IRQ:
	case KVM_ARM_VCPU_PMU_V3_INIT:
984
	case KVM_ARM_VCPU_PMU_V3_FILTER:
985 986 987 988 989 990 991
		if (kvm_arm_support_pmu_v3() &&
		    test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
			return 0;
	}

	return -ENXIO;
}