pmu-emul.c 23.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7 8 9 10
/*
 * Copyright (C) 2015 Linaro Ltd.
 * Author: Shannon Zhao <shannon.zhao@linaro.org>
 */

#include <linux/cpu.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
11
#include <linux/perf/arm_pmu.h>
12
#include <linux/uaccess.h>
13 14
#include <asm/kvm_emulate.h>
#include <kvm/arm_pmu.h>
15
#include <kvm/arm_vgic.h>
16

17
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18 19
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
20

21 22
#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1

23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
static u32 kvm_pmu_event_mask(struct kvm *kvm)
{
	switch (kvm->arch.pmuver) {
	case 1:			/* ARMv8.0 */
		return GENMASK(9, 0);
	case 4:			/* ARMv8.1 */
	case 5:			/* ARMv8.4 */
	case 6:			/* ARMv8.5 */
		return GENMASK(15, 0);
	default:		/* Shouldn't be here, just for sanity */
		WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
		return 0;
	}
}

38 39 40 41 42 43 44 45 46 47 48
/**
 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
 * @vcpu: The vcpu pointer
 * @select_idx: The counter index
 */
static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
{
	return (select_idx == ARMV8_PMU_CYCLE_IDX &&
		__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
}

49 50 51 52 53 54 55 56 57 58 59
static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
{
	struct kvm_pmu *pmu;
	struct kvm_vcpu_arch *vcpu_arch;

	pmc -= pmc->idx;
	pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
	vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
	return container_of(vcpu_arch, struct kvm_vcpu, arch);
}

60
/**
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
 * @pmc: The PMU counter pointer
 */
static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
{
	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);

	return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
}

/**
 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
 * @select_idx: The counter index
 */
static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
{
	return select_idx & 0x1;
}

/**
 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
 * @pmc: The PMU counter pointer
 *
 * When a pair of PMCs are chained together we use the low counter (canonical)
 * to hold the underlying perf event.
 */
static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
{
	if (kvm_pmu_pmc_is_chained(pmc) &&
	    kvm_pmu_idx_is_high_counter(pmc->idx))
		return pmc - 1;

	return pmc;
}
95 96 97 98 99 100 101
static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
{
	if (kvm_pmu_idx_is_high_counter(pmc->idx))
		return pmc - 1;
	else
		return pmc + 1;
}
102 103 104

/**
 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
105 106 107
 * @vcpu: The vcpu pointer
 * @select_idx: The counter index
 */
108
static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
109
{
110
	u64 eventsel, reg;
111

112 113 114 115 116 117
	select_idx |= 0x1;

	if (select_idx == ARMV8_PMU_CYCLE_IDX)
		return false;

	reg = PMEVTYPER0_EL0 + select_idx;
118
	eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
119 120 121 122 123 124 125 126 127 128 129 130 131

	return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
}

/**
 * kvm_pmu_get_pair_counter_value - get PMU counter value
 * @vcpu: The vcpu pointer
 * @pmc: The PMU counter pointer
 */
static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
					  struct kvm_pmc *pmc)
{
	u64 counter, counter_high, reg, enabled, running;
132

133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
	if (kvm_pmu_pmc_is_chained(pmc)) {
		pmc = kvm_pmu_get_canonical_pmc(pmc);
		reg = PMEVCNTR0_EL0 + pmc->idx;

		counter = __vcpu_sys_reg(vcpu, reg);
		counter_high = __vcpu_sys_reg(vcpu, reg + 1);

		counter = lower_32_bits(counter) | (counter_high << 32);
	} else {
		reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
		      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
		counter = __vcpu_sys_reg(vcpu, reg);
	}

	/*
	 * The real counter value is equal to the value of counter register plus
149 150 151 152 153 154
	 * the value perf event counts.
	 */
	if (pmc->perf_event)
		counter += perf_event_read_value(pmc->perf_event, &enabled,
						 &running);

155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
	return counter;
}

/**
 * kvm_pmu_get_counter_value - get PMU counter value
 * @vcpu: The vcpu pointer
 * @select_idx: The counter index
 */
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
{
	u64 counter;
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
	struct kvm_pmc *pmc = &pmu->pmc[select_idx];

	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);

	if (kvm_pmu_pmc_is_chained(pmc) &&
	    kvm_pmu_idx_is_high_counter(select_idx))
		counter = upper_32_bits(counter);
174
	else if (select_idx != ARMV8_PMU_CYCLE_IDX)
175 176 177
		counter = lower_32_bits(counter);

	return counter;
178 179 180 181 182 183 184 185 186 187 188 189 190 191
}

/**
 * kvm_pmu_set_counter_value - set PMU counter value
 * @vcpu: The vcpu pointer
 * @select_idx: The counter index
 * @val: The counter value
 */
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
{
	u64 reg;

	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
192
	__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
193 194 195

	/* Recreate the perf event to reflect the updated sample_period */
	kvm_pmu_create_perf_event(vcpu, select_idx);
196
}
197

198 199 200 201 202 203
/**
 * kvm_pmu_release_perf_event - remove the perf event
 * @pmc: The PMU counter pointer
 */
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
{
204
	pmc = kvm_pmu_get_canonical_pmc(pmc);
205 206 207 208 209 210 211
	if (pmc->perf_event) {
		perf_event_disable(pmc->perf_event);
		perf_event_release_kernel(pmc->perf_event);
		pmc->perf_event = NULL;
	}
}

212 213 214 215 216 217 218 219
/**
 * kvm_pmu_stop_counter - stop PMU counter
 * @pmc: The PMU counter pointer
 *
 * If this counter has been configured to monitor some event, release it here.
 */
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
{
220
	u64 counter, reg, val;
221

222 223 224 225 226 227
	pmc = kvm_pmu_get_canonical_pmc(pmc);
	if (!pmc->perf_event)
		return;

	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);

228 229 230
	if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
		reg = PMCCNTR_EL0;
		val = counter;
231
	} else {
232 233
		reg = PMEVCNTR0_EL0 + pmc->idx;
		val = lower_32_bits(counter);
234
	}
235

236 237 238 239 240
	__vcpu_sys_reg(vcpu, reg) = val;

	if (kvm_pmu_pmc_is_chained(pmc))
		__vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);

241
	kvm_pmu_release_perf_event(pmc);
242 243
}

244 245 246 247 248 249 250 251 252 253 254 255 256 257
/**
 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
 * @vcpu: The vcpu pointer
 *
 */
void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
{
	int i;
	struct kvm_pmu *pmu = &vcpu->arch.pmu;

	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
		pmu->pmc[i].idx = i;
}

258 259 260 261 262 263 264
/**
 * kvm_pmu_vcpu_reset - reset pmu state for cpu
 * @vcpu: The vcpu pointer
 *
 */
void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
{
265
	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
266
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
267
	int i;
268

269
	for_each_set_bit(i, &mask, 32)
270
		kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
271 272

	bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
273 274
}

275 276 277 278 279 280 281 282 283 284
/**
 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
 * @vcpu: The vcpu pointer
 *
 */
void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
{
	int i;
	struct kvm_pmu *pmu = &vcpu->arch.pmu;

285 286
	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
		kvm_pmu_release_perf_event(&pmu->pmc[i]);
287 288
}

289 290
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
{
291
	u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
292 293 294 295 296 297 298 299 300

	val &= ARMV8_PMU_PMCR_N_MASK;
	if (val == 0)
		return BIT(ARMV8_PMU_CYCLE_IDX);
	else
		return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
}

/**
301
 * kvm_pmu_enable_counter_mask - enable selected PMU counters
302 303 304 305 306
 * @vcpu: The vcpu pointer
 * @val: the value guest writes to PMCNTENSET register
 *
 * Call perf_event_enable to start counting the perf event
 */
307
void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
308 309 310 311 312
{
	int i;
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
	struct kvm_pmc *pmc;

313
	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
314 315 316 317 318 319 320
		return;

	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
		if (!(val & BIT(i)))
			continue;

		pmc = &pmu->pmc[i];
321

322 323 324
		/* A change in the enable state may affect the chain state */
		kvm_pmu_update_pmc_chained(vcpu, i);
		kvm_pmu_create_perf_event(vcpu, i);
325 326

		/* At this point, pmc must be the canonical */
327 328 329 330 331 332 333 334 335
		if (pmc->perf_event) {
			perf_event_enable(pmc->perf_event);
			if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
				kvm_debug("fail to enable perf event\n");
		}
	}
}

/**
336
 * kvm_pmu_disable_counter_mask - disable selected PMU counters
337 338 339 340 341
 * @vcpu: The vcpu pointer
 * @val: the value guest writes to PMCNTENCLR register
 *
 * Call perf_event_disable to stop counting the perf event
 */
342
void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
343 344 345 346 347 348 349 350 351 352 353 354 355
{
	int i;
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
	struct kvm_pmc *pmc;

	if (!val)
		return;

	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
		if (!(val & BIT(i)))
			continue;

		pmc = &pmu->pmc[i];
356

357 358 359
		/* A change in the enable state may affect the chain state */
		kvm_pmu_update_pmc_chained(vcpu, i);
		kvm_pmu_create_perf_event(vcpu, i);
360 361

		/* At this point, pmc must be the canonical */
362 363 364 365
		if (pmc->perf_event)
			perf_event_disable(pmc->perf_event);
	}
}
366

367 368 369 370
static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
{
	u64 reg = 0;

371 372 373 374
	if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
		reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
		reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
		reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
375
		reg &= kvm_pmu_valid_counter_mask(vcpu);
376
	}
377 378 379 380

	return reg;
}

381
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
382 383
{
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
384 385 386 387
	bool overflow;

	if (!kvm_arm_pmu_v3_ready(vcpu))
		return;
388

389
	overflow = !!kvm_pmu_overflow_status(vcpu);
390 391 392 393 394 395 396
	if (pmu->irq_level == overflow)
		return;

	pmu->irq_level = overflow;

	if (likely(irqchip_in_kernel(vcpu->kvm))) {
		int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
397
					      pmu->irq_num, overflow, pmu);
398 399 400 401
		WARN_ON(ret);
	}
}

402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
{
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
	bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;

	if (likely(irqchip_in_kernel(vcpu->kvm)))
		return false;

	return pmu->irq_level != run_level;
}

/*
 * Reflect the PMU overflow interrupt output level into the kvm_run structure
 */
void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
{
	struct kvm_sync_regs *regs = &vcpu->run->s.regs;

	/* Populate the timer bitmap for user space */
	regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
	if (vcpu->arch.pmu.irq_level)
		regs->device_irq_level |= KVM_ARM_DEV_PMU;
}

427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
/**
 * kvm_pmu_flush_hwstate - flush pmu state to cpu
 * @vcpu: The vcpu pointer
 *
 * Check if the PMU has overflowed while we were running in the host, and inject
 * an interrupt if that was the case.
 */
void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
{
	kvm_pmu_update_state(vcpu);
}

/**
 * kvm_pmu_sync_hwstate - sync pmu state from cpu
 * @vcpu: The vcpu pointer
 *
 * Check if the PMU has overflowed while we were running in the guest, and
 * inject an interrupt if that was the case.
 */
void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
{
	kvm_pmu_update_state(vcpu);
}

/**
452
 * When the perf event overflows, set the overflow status and inform the vcpu.
453 454 455 456 457 458
 */
static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
				  struct perf_sample_data *data,
				  struct pt_regs *regs)
{
	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
459
	struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
460 461
	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
	int idx = pmc->idx;
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
	u64 period;

	cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);

	/*
	 * Reset the sample period to the architectural limit,
	 * i.e. the point where the counter overflows.
	 */
	period = -(local64_read(&perf_event->count));

	if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
		period &= GENMASK(31, 0);

	local64_set(&perf_event->hw.period_left, 0);
	perf_event->attr.sample_period = period;
	perf_event->hw.sample_period = period;
478

479
	__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
480 481 482 483 484

	if (kvm_pmu_overflow_status(vcpu)) {
		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
		kvm_vcpu_kick(vcpu);
	}
485 486

	cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
487 488
}

489 490 491 492 493 494 495
/**
 * kvm_pmu_software_increment - do software increment
 * @vcpu: The vcpu pointer
 * @val: the value guest writes to PMSWINC register
 */
void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
{
496
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
497 498
	int i;

499 500 501
	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
		return;

502 503 504
	/* Weed out disabled counters */
	val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);

505
	for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
506 507
		u64 type, reg;

508 509
		if (!(val & BIT(i)))
			continue;
510 511 512

		/* PMSWINC only applies to ... SW_INC! */
		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
513
		type &= kvm_pmu_event_mask(vcpu->kvm);
514 515 516 517 518 519 520 521 522 523 524 525 526 527
		if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
			continue;

		/* increment this even SW_INC counter */
		reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
		reg = lower_32_bits(reg);
		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;

		if (reg) /* no overflow on the low part */
			continue;

		if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
			/* increment the high counter */
			reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
528
			reg = lower_32_bits(reg);
529 530 531 532 533 534
			__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
			if (!reg) /* mark overflow on the high counter */
				__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
		} else {
			/* mark overflow on low counter */
			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
535 536 537 538
		}
	}
}

539 540 541 542 543 544 545
/**
 * kvm_pmu_handle_pmcr - handle PMCR register
 * @vcpu: The vcpu pointer
 * @val: the value guest writes to PMCR register
 */
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
546
	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
547 548 549
	int i;

	if (val & ARMV8_PMU_PMCR_E) {
550
		kvm_pmu_enable_counter_mask(vcpu,
551
		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
552
	} else {
553
		kvm_pmu_disable_counter_mask(vcpu, mask);
554 555 556 557 558 559
	}

	if (val & ARMV8_PMU_PMCR_C)
		kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);

	if (val & ARMV8_PMU_PMCR_P) {
560
		for_each_set_bit(i, &mask, 32)
561 562 563 564
			kvm_pmu_set_counter_value(vcpu, i, 0);
	}
}

565 566
static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
{
567 568
	return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
	       (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
569 570 571
}

/**
572
 * kvm_pmu_create_perf_event - create a perf event for a counter
573 574 575
 * @vcpu: The vcpu pointer
 * @select_idx: The number of selected counter
 */
576
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
577 578
{
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
579
	struct kvm_pmc *pmc;
580 581
	struct perf_event *event;
	struct perf_event_attr attr;
582 583
	u64 eventsel, counter, reg, data;

584 585 586 587 588 589 590 591 592
	/*
	 * For chained counters the event type and filtering attributes are
	 * obtained from the low/even counter. We also use this counter to
	 * determine if the event is enabled/disabled.
	 */
	pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);

	reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
593
	data = __vcpu_sys_reg(vcpu, reg);
594 595

	kvm_pmu_stop_counter(vcpu, pmc);
596
	eventsel = data & kvm_pmu_event_mask(vcpu->kvm);;
597

598
	/* Software increment event does't need to be backed by a perf event */
599
	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
600
	    pmc->idx != ARMV8_PMU_CYCLE_IDX)
601 602
		return;

603 604 605 606
	memset(&attr, 0, sizeof(struct perf_event_attr));
	attr.type = PERF_TYPE_RAW;
	attr.size = sizeof(attr);
	attr.pinned = 1;
607
	attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
608 609 610 611
	attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
	attr.exclude_hv = 1; /* Don't count EL2 events */
	attr.exclude_host = 1; /* Don't count host events */
612
	attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
613
		ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
614

615 616
	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);

617
	if (kvm_pmu_pmc_is_chained(pmc)) {
618 619 620 621 622
		/**
		 * The initial sample period (overflow count) of an event. For
		 * chained counters we only support overflow interrupts on the
		 * high counter.
		 */
623
		attr.sample_period = (-counter) & GENMASK(63, 0);
624
		attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
625

626 627 628 629 630 631 632 633 634
		event = perf_event_create_kernel_counter(&attr, -1, current,
							 kvm_pmu_perf_overflow,
							 pmc + 1);
	} else {
		/* The initial sample period (overflow count) of an event. */
		if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
			attr.sample_period = (-counter) & GENMASK(63, 0);
		else
			attr.sample_period = (-counter) & GENMASK(31, 0);
635

636
		event = perf_event_create_kernel_counter(&attr, -1, current,
637
						 kvm_pmu_perf_overflow, pmc);
638 639
	}

640 641 642 643 644 645 646 647
	if (IS_ERR(event)) {
		pr_err_once("kvm: pmu event creation failed %ld\n",
			    PTR_ERR(event));
		return;
	}

	pmc->perf_event = event;
}
648

649 650 651 652 653 654
/**
 * kvm_pmu_update_pmc_chained - update chained bitmap
 * @vcpu: The vcpu pointer
 * @select_idx: The number of selected counter
 *
 * Update the chained bitmap based on the event type written in the
655
 * typer register and the enable state of the odd register.
656 657 658 659
 */
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
{
	struct kvm_pmu *pmu = &vcpu->arch.pmu;
660 661 662 663 664 665
	struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
	bool new_state, old_state;

	old_state = kvm_pmu_pmc_is_chained(pmc);
	new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
		    kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
666

667 668 669 670 671 672
	if (old_state == new_state)
		return;

	canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
	kvm_pmu_stop_counter(vcpu, canonical_pmc);
	if (new_state) {
673 674 675 676
		/*
		 * During promotion from !chained to chained we must ensure
		 * the adjacent counter is stopped and its event destroyed
		 */
677
		kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
678
		set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
679
		return;
680
	}
681
	clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
682 683
}

684 685 686 687 688 689 690 691 692 693 694 695 696
/**
 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
 * @vcpu: The vcpu pointer
 * @data: The data guest writes to PMXEVTYPER_EL0
 * @select_idx: The number of selected counter
 *
 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
 * event with given hardware event number. Here we call perf_event API to
 * emulate this action and create a kernel perf event for it.
 */
void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
				    u64 select_idx)
{
697 698 699 700 701
	u64 reg, mask;

	mask  =  ARMV8_PMU_EVTYPE_MASK;
	mask &= ~ARMV8_PMU_EVTYPE_EVENT;
	mask |= kvm_pmu_event_mask(vcpu->kvm);
702 703 704 705

	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;

706
	__vcpu_sys_reg(vcpu, reg) = data & mask;
707 708

	kvm_pmu_update_pmc_chained(vcpu, select_idx);
709 710 711
	kvm_pmu_create_perf_event(vcpu, select_idx);
}

712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
static int kvm_pmu_probe_pmuver(void)
{
	struct perf_event_attr attr = { };
	struct perf_event *event;
	struct arm_pmu *pmu;
	int pmuver = 0xf;

	/*
	 * Create a dummy event that only counts user cycles. As we'll never
	 * leave this function with the event being live, it will never
	 * count anything. But it allows us to probe some of the PMU
	 * details. Yes, this is terrible.
	 */
	attr.type = PERF_TYPE_RAW;
	attr.size = sizeof(attr);
	attr.pinned = 1;
	attr.disabled = 0;
	attr.exclude_user = 0;
	attr.exclude_kernel = 1;
	attr.exclude_hv = 1;
	attr.exclude_host = 1;
	attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
	attr.sample_period = GENMASK(63, 0);

	event = perf_event_create_kernel_counter(&attr, -1, current,
						 kvm_pmu_perf_overflow, &attr);

	if (IS_ERR(event)) {
		pr_err_once("kvm: pmu event creation failed %ld\n",
			    PTR_ERR(event));
		return 0xf;
	}

	if (event->pmu) {
		pmu = to_arm_pmu(event->pmu);
		if (pmu->pmuver)
			pmuver = pmu->pmuver;
	}

	perf_event_disable(event);
	perf_event_release_kernel(event);

	return pmuver;
}

757 758 759 760 761 762 763 764 765
bool kvm_arm_support_pmu_v3(void)
{
	/*
	 * Check if HW_PERF_EVENTS are supported by checking the number of
	 * hardware performance counters. This could ensure the presence of
	 * a physical PMU and CONFIG_PERF_EVENT is selected.
	 */
	return (perf_num_counters() > 0);
}
766

767
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
768
{
769 770
	if (!vcpu->arch.pmu.created)
		return 0;
771

772
	/*
773 774
	 * A valid interrupt configuration for the PMU is either to have a
	 * properly configured interrupt number and using an in-kernel
775
	 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
776
	 */
777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792
	if (irqchip_in_kernel(vcpu->kvm)) {
		int irq = vcpu->arch.pmu.irq_num;
		if (!kvm_arm_pmu_irq_initialized(vcpu))
			return -EINVAL;

		/*
		 * If we are using an in-kernel vgic, at this point we know
		 * the vgic will be initialized, so we can check the PMU irq
		 * number against the dimensions of the vgic and make sure
		 * it's valid.
		 */
		if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
			return -EINVAL;
	} else if (kvm_arm_pmu_irq_initialized(vcpu)) {
		   return -EINVAL;
	}
793 794 795 796 797 798 799 800 801 802

	kvm_pmu_vcpu_reset(vcpu);
	vcpu->arch.pmu.ready = true;

	return 0;
}

static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
{
	if (irqchip_in_kernel(vcpu->kvm)) {
803 804
		int ret;

805 806 807 808 809 810 811 812 813 814
		/*
		 * If using the PMU with an in-kernel virtual GIC
		 * implementation, we require the GIC to be already
		 * initialized when initializing the PMU.
		 */
		if (!vgic_initialized(vcpu->kvm))
			return -ENODEV;

		if (!kvm_arm_pmu_irq_initialized(vcpu))
			return -ENXIO;
815 816 817 818 819

		ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
					 &vcpu->arch.pmu);
		if (ret)
			return ret;
820
	}
821

822
	vcpu->arch.pmu.created = true;
823 824 825
	return 0;
}

826 827 828 829 830 831
/*
 * For one VM the interrupt type must be same for each vcpu.
 * As a PPI, the interrupt number is the same for all vcpus,
 * while as an SPI it must be a separate number per vcpu.
 */
static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
832 833 834 835 836 837 838 839
{
	int i;
	struct kvm_vcpu *vcpu;

	kvm_for_each_vcpu(i, vcpu, kvm) {
		if (!kvm_arm_pmu_irq_initialized(vcpu))
			continue;

840
		if (irq_is_ppi(irq)) {
841 842 843 844 845 846 847 848 849 850 851 852 853
			if (vcpu->arch.pmu.irq_num != irq)
				return false;
		} else {
			if (vcpu->arch.pmu.irq_num == irq)
				return false;
		}
	}

	return true;
}

int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
854 855 856 857 858 859 860
	if (!kvm_arm_support_pmu_v3() ||
	    !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
		return -ENODEV;

	if (vcpu->arch.pmu.created)
		return -EBUSY;

861 862 863 864 865 866
	if (!vcpu->kvm->arch.pmuver)
		vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();

	if (vcpu->kvm->arch.pmuver == 0xf)
		return -ENODEV;

867 868 869 870 871
	switch (attr->attr) {
	case KVM_ARM_VCPU_PMU_V3_IRQ: {
		int __user *uaddr = (int __user *)(long)attr->addr;
		int irq;

872 873 874
		if (!irqchip_in_kernel(vcpu->kvm))
			return -EINVAL;

875 876 877
		if (get_user(irq, uaddr))
			return -EFAULT;

878
		/* The PMU overflow interrupt can be a PPI or a valid SPI. */
879
		if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
880 881 882
			return -EINVAL;

		if (!pmu_irq_is_valid(vcpu->kvm, irq))
883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
			return -EINVAL;

		if (kvm_arm_pmu_irq_initialized(vcpu))
			return -EBUSY;

		kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
		vcpu->arch.pmu.irq_num = irq;
		return 0;
	}
	case KVM_ARM_VCPU_PMU_V3_INIT:
		return kvm_arm_pmu_v3_init(vcpu);
	}

	return -ENXIO;
}

int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
	switch (attr->attr) {
	case KVM_ARM_VCPU_PMU_V3_IRQ: {
		int __user *uaddr = (int __user *)(long)attr->addr;
		int irq;

906 907 908
		if (!irqchip_in_kernel(vcpu->kvm))
			return -EINVAL;

909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934
		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
			return -ENODEV;

		if (!kvm_arm_pmu_irq_initialized(vcpu))
			return -ENXIO;

		irq = vcpu->arch.pmu.irq_num;
		return put_user(irq, uaddr);
	}
	}

	return -ENXIO;
}

int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
	switch (attr->attr) {
	case KVM_ARM_VCPU_PMU_V3_IRQ:
	case KVM_ARM_VCPU_PMU_V3_INIT:
		if (kvm_arm_support_pmu_v3() &&
		    test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
			return 0;
	}

	return -ENXIO;
}