arch_timer.c 25.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (C) 2012 ARM Ltd.
 * Author: Marc Zyngier <marc.zyngier@arm.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

#include <linux/cpu.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
23
#include <linux/irq.h>
24
#include <linux/uaccess.h>
25

26
#include <clocksource/arm_arch_timer.h>
27
#include <asm/arch_timer.h>
28
#include <asm/kvm_hyp.h>
29

30 31
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
32

33 34
#include "trace.h"

35
static struct timecounter *timecounter;
36
static unsigned int host_vtimer_irq;
37
static u32 host_vtimer_irq_flags;
38

39 40
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);

41 42 43 44 45 46 47 48 49 50
static const struct kvm_irq_level default_ptimer_irq = {
	.irq	= 30,
	.level	= 1,
};

static const struct kvm_irq_level default_vtimer_irq = {
	.irq	= 27,
	.level	= 1,
};

51 52 53
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
				 struct arch_timer_context *timer_ctx);
54
static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
55

56
u64 kvm_phys_timer_read(void)
57 58 59 60
{
	return timecounter->cc->read(timecounter->cc);
}

61 62 63 64 65 66
static inline bool userspace_irqchip(struct kvm *kvm)
{
	return static_branch_unlikely(&userspace_irqchip_in_use) &&
		unlikely(!irqchip_in_kernel(kvm));
}

67
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
68
{
69
	hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
70 71 72
		      HRTIMER_MODE_ABS);
}

73
static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
74
{
75
	hrtimer_cancel(hrt);
76 77
	if (work)
		cancel_work_sync(work);
78 79
}

80 81 82 83 84
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
{
	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
	struct arch_timer_context *vtimer;

85 86 87 88 89 90 91 92
	/*
	 * We may see a timer interrupt after vcpu_put() has been called which
	 * sets the CPU's vcpu pointer to NULL, because even though the timer
	 * has been disabled in vtimer_save_state(), the hardware interrupt
	 * signal may not have been retired from the interrupt controller yet.
	 */
	if (!vcpu)
		return IRQ_HANDLED;
93

94 95
	vtimer = vcpu_vtimer(vcpu);
	if (kvm_timer_should_fire(vtimer))
96
		kvm_timer_update_irq(vcpu, true, vtimer);
97

98 99 100
	if (userspace_irqchip(vcpu->kvm) &&
	    !static_branch_unlikely(&has_gic_active_state))
		disable_percpu_irq(host_vtimer_irq);
101

102 103 104
	return IRQ_HANDLED;
}

105 106 107 108
/*
 * Work function for handling the backup timer that we schedule when a vcpu is
 * no longer running, but had a timer programmed to fire in the future.
 */
109 110 111 112 113
static void kvm_timer_inject_irq_work(struct work_struct *work)
{
	struct kvm_vcpu *vcpu;

	vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
114

115 116 117 118
	/*
	 * If the vcpu is blocked we want to wake it up so that it will see
	 * the timer has expired when entering the guest.
	 */
119
	kvm_vcpu_wake_up(vcpu);
120 121
}

122
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
123
{
124
	u64 cval, now;
125

126 127
	cval = timer_ctx->cnt_cval;
	now = kvm_phys_timer_read() - timer_ctx->cntvoff;
128 129 130 131 132 133 134 135 136 137 138 139 140 141

	if (now < cval) {
		u64 ns;

		ns = cyclecounter_cyc2ns(timecounter->cc,
					 cval - now,
					 timecounter->mask,
					 &timecounter->frac);
		return ns;
	}

	return 0;
}

142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
{
	return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
		(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
}

/*
 * Returns the earliest expiration time in ns among guest timers.
 * Note that it will return 0 if none of timers can fire.
 */
static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
{
	u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX;
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);

	if (kvm_timer_irq_can_fire(vtimer))
		min_virt = kvm_timer_compute_delta(vtimer);

	if (kvm_timer_irq_can_fire(ptimer))
		min_phys = kvm_timer_compute_delta(ptimer);

	/* If none of timers can fire, then return 0 */
	if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX))
		return 0;

	return min(min_virt, min_phys);
}

171
static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
172 173
{
	struct arch_timer_cpu *timer;
174 175 176
	struct kvm_vcpu *vcpu;
	u64 ns;

177
	timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
178 179 180 181 182 183 184
	vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);

	/*
	 * Check that the timer has really expired from the guest's
	 * PoV (NTP on the host may have forced it to expire
	 * early). If we should have slept longer, restart it.
	 */
185
	ns = kvm_timer_earliest_exp(vcpu);
186 187 188 189 190
	if (unlikely(ns)) {
		hrtimer_forward_now(hrt, ns_to_ktime(ns));
		return HRTIMER_RESTART;
	}

191
	schedule_work(&timer->expired);
192 193 194
	return HRTIMER_NORESTART;
}

195 196
static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt)
{
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
	struct arch_timer_context *ptimer;
	struct arch_timer_cpu *timer;
	struct kvm_vcpu *vcpu;
	u64 ns;

	timer = container_of(hrt, struct arch_timer_cpu, phys_timer);
	vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
	ptimer = vcpu_ptimer(vcpu);

	/*
	 * Check that the timer has really expired from the guest's
	 * PoV (NTP on the host may have forced it to expire
	 * early). If not ready, schedule for a later time.
	 */
	ns = kvm_timer_compute_delta(ptimer);
	if (unlikely(ns)) {
		hrtimer_forward_now(hrt, ns_to_ktime(ns));
		return HRTIMER_RESTART;
	}

	kvm_timer_update_irq(vcpu, true, ptimer);
218 219 220
	return HRTIMER_NORESTART;
}

221
static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
222
{
223
	u64 cval, now;
224

225 226 227 228 229 230 231 232 233 234
	if (timer_ctx->loaded) {
		u32 cnt_ctl;

		/* Only the virtual timer can be loaded so far */
		cnt_ctl = read_sysreg_el0(cntv_ctl);
		return  (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
		        (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
		       !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
	}

235
	if (!kvm_timer_irq_can_fire(timer_ctx))
236 237
		return false;

238 239
	cval = timer_ctx->cnt_cval;
	now = kvm_phys_timer_read() - timer_ctx->cntvoff;
240 241 242 243

	return cval <= now;
}

244 245 246 247 248
bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
{
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);

249
	if (kvm_timer_should_fire(vtimer))
250 251 252 253 254
		return true;

	return kvm_timer_should_fire(ptimer);
}

255 256 257 258 259 260 261 262 263 264 265 266
/*
 * Reflect the timer output level into the kvm_run structure
 */
void kvm_timer_update_run(struct kvm_vcpu *vcpu)
{
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
	struct kvm_sync_regs *regs = &vcpu->run->s.regs;

	/* Populate the device bitmap with the timer states */
	regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
				    KVM_ARM_DEV_EL1_PTIMER);
267
	if (kvm_timer_should_fire(vtimer))
268
		regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
269
	if (kvm_timer_should_fire(ptimer))
270 271 272
		regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
}

273 274
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
				 struct arch_timer_context *timer_ctx)
275 276 277
{
	int ret;

278 279 280
	timer_ctx->irq.level = new_level;
	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
				   timer_ctx->irq.level);
281

282
	if (!userspace_irqchip(vcpu->kvm)) {
283 284
		ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
					  timer_ctx->irq.irq,
285 286
					  timer_ctx->irq.level,
					  timer_ctx);
287 288
		WARN_ON(ret);
	}
289 290
}

291
/* Schedule the background timer for the emulated timer. */
292
static void phys_timer_emulate(struct kvm_vcpu *vcpu)
293 294
{
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
295
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
296

297
	/*
298 299 300
	 * If the timer can fire now, we don't need to have a soft timer
	 * scheduled for the future.  If the timer cannot fire at all,
	 * then we also don't need a soft timer.
301 302 303
	 */
	if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
		soft_timer_cancel(&timer->phys_timer, NULL);
304
		return;
305
	}
306

307
	soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer));
308 309
}

310
/*
311 312 313
 * Check if there was a change in the timer state, so that we should either
 * raise or lower the line level to the GIC or schedule a background timer to
 * emulate the physical timer.
314
 */
315
static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
316 317
{
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
318
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
319
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
320
	bool level;
321

322
	if (unlikely(!timer->enabled))
323
		return;
324

325 326 327 328 329 330 331 332 333
	/*
	 * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part
	 * of its lifecycle is offloaded to the hardware, and we therefore may
	 * not have lowered the irq.level value before having to signal a new
	 * interrupt, but have to signal an interrupt every time the level is
	 * asserted.
	 */
	level = kvm_timer_should_fire(vtimer);
	kvm_timer_update_irq(vcpu, level, vtimer);
334

335 336
	phys_timer_emulate(vcpu);

337 338
	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
339 340
}

341
static void vtimer_save_state(struct kvm_vcpu *vcpu)
342 343 344
{
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
345 346 347 348 349 350
	unsigned long flags;

	local_irq_save(flags);

	if (!vtimer->loaded)
		goto out;
351

352 353 354 355
	if (timer->enabled) {
		vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
		vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
	}
356 357 358

	/* Disable the virtual timer */
	write_sysreg_el0(0, cntv_ctl);
359
	isb();
360 361 362 363

	vtimer->loaded = false;
out:
	local_irq_restore(flags);
364 365
}

366 367 368 369 370 371 372 373
/*
 * Schedule the background timer before calling kvm_vcpu_block, so that this
 * thread is removed from its waitqueue and made runnable when there's a timer
 * interrupt to handle.
 */
void kvm_timer_schedule(struct kvm_vcpu *vcpu)
{
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
374
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
375
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
376

377 378
	vtimer_save_state(vcpu);

379
	/*
380
	 * No need to schedule a background timer if any guest timer has
381 382 383
	 * already expired, because kvm_vcpu_block will return before putting
	 * the thread to sleep.
	 */
384
	if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer))
385 386 387
		return;

	/*
388
	 * If both timers are not capable of raising interrupts (disabled or
389 390
	 * masked), then there's no more work for us to do.
	 */
391
	if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer))
392 393
		return;

394 395 396 397
	/*
	 * The guest timers have not yet expired, schedule a background timer.
	 * Set the earliest expiration time among the guest timers.
	 */
398
	soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
399 400
}

401
static void vtimer_restore_state(struct kvm_vcpu *vcpu)
402 403 404
{
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
405 406 407 408 409 410
	unsigned long flags;

	local_irq_save(flags);

	if (vtimer->loaded)
		goto out;
411 412 413 414 415 416

	if (timer->enabled) {
		write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
		isb();
		write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
	}
417 418 419 420

	vtimer->loaded = true;
out:
	local_irq_restore(flags);
421 422
}

423 424 425
void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
{
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
426

427 428
	vtimer_restore_state(vcpu);

429
	soft_timer_cancel(&timer->bg_timer, &timer->expired);
430 431
}

432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
static void set_cntvoff(u64 cntvoff)
{
	u32 low = lower_32_bits(cntvoff);
	u32 high = upper_32_bits(cntvoff);

	/*
	 * Since kvm_call_hyp doesn't fully support the ARM PCS especially on
	 * 32-bit systems, but rather passes register by register shifted one
	 * place (we put the function address in r0/x0), we cannot simply pass
	 * a 64-bit value as an argument, but have to split the value in two
	 * 32-bit halves.
	 */
	kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
}

447 448 449 450 451 452 453 454
static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
{
	int r;
	r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
	WARN_ON(r);
}

static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
455
{
456
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
457
	bool phys_active;
458

459 460 461 462 463
	if (irqchip_in_kernel(vcpu->kvm))
		phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
	else
		phys_active = vtimer->irq.level;
	set_vtimer_irq_phys_active(vcpu, phys_active);
464
}
465

466
static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
467
{
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);

	/*
	 * When using a userspace irqchip with the architected timers and a
	 * host interrupt controller that doesn't support an active state, we
	 * must still prevent continuously exiting from the guest, and
	 * therefore mask the physical interrupt by disabling it on the host
	 * interrupt controller when the virtual level is high, such that the
	 * guest can make forward progress.  Once we detect the output level
	 * being de-asserted, we unmask the interrupt again so that we exit
	 * from the guest when the timer fires.
	 */
	if (vtimer->irq.level)
		disable_percpu_irq(host_vtimer_irq);
	else
		enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
484 485 486 487 488 489
}

void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
{
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
490
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
491 492 493 494

	if (unlikely(!timer->enabled))
		return;

495 496
	if (static_branch_likely(&has_gic_active_state))
		kvm_timer_vcpu_load_gic(vcpu);
497
	else
498
		kvm_timer_vcpu_load_nogic(vcpu);
499 500 501 502 503

	set_cntvoff(vtimer->cntvoff);

	vtimer_restore_state(vcpu);

504 505
	/* Set the background timer for the physical timer emulation. */
	phys_timer_emulate(vcpu);
506 507 508 509

	/* If the timer fired while we weren't running, inject it now */
	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
510 511
}

512 513 514 515 516 517 518 519 520 521 522 523 524
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
{
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
	bool vlevel, plevel;

	if (likely(irqchip_in_kernel(vcpu->kvm)))
		return false;

	vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
	plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;

525 526
	return kvm_timer_should_fire(vtimer) != vlevel ||
	       kvm_timer_should_fire(ptimer) != plevel;
527 528
}

529 530 531
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
532

533 534 535 536 537
	if (unlikely(!timer->enabled))
		return;

	vtimer_save_state(vcpu);

538 539 540 541 542 543 544 545 546 547 548
	/*
	 * Cancel the physical timer emulation, because the only case where we
	 * need it after a vcpu_put is in the context of a sleeping VCPU, and
	 * in that case we already factor in the deadline for the physical
	 * timer when scheduling the bg_timer.
	 *
	 * In any case, we re-schedule the hrtimer for the physical timer when
	 * coming back to the VCPU thread in kvm_timer_vcpu_load().
	 */
	soft_timer_cancel(&timer->phys_timer, NULL);

549 550 551 552
	/*
	 * The kernel may decide to run userspace after calling vcpu_put, so
	 * we reset cntvoff to 0 to ensure a consistent read between user
	 * accesses to the virtual counter and kernel access to the physical
553 554
	 * counter of non-VHE case. For VHE, the virtual counter uses a fixed
	 * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
555
	 */
556 557
	if (!has_vhe())
		set_cntvoff(0);
558 559
}

560 561 562 563 564 565
/*
 * With a userspace irqchip we have to check if the guest de-asserted the
 * timer and if so, unmask the timer irq signal on the host interrupt
 * controller to ensure that we see future timer signals.
 */
static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
566 567 568
{
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);

569 570 571 572 573 574
	if (!kvm_timer_should_fire(vtimer)) {
		kvm_timer_update_irq(vcpu, false, vtimer);
		if (static_branch_likely(&has_gic_active_state))
			set_vtimer_irq_phys_active(vcpu, false);
		else
			enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
575
	}
576 577
}

578 579
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
580 581 582 583 584 585 586
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;

	if (unlikely(!timer->enabled))
		return;

	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
		unmask_vtimer_irq_user(vcpu);
587 588
}

589
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
590
{
591
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
592
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
593
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
594

595 596 597 598 599 600
	/*
	 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
	 * and to 0 for ARMv7.  We provide an implementation that always
	 * resets the timer to be disabled and unmasked and is compliant with
	 * the ARMv7 architecture.
	 */
601
	vtimer->cnt_ctl = 0;
602
	ptimer->cnt_ctl = 0;
603
	kvm_timer_update_state(vcpu);
604

605 606 607
	if (timer->enabled && irqchip_in_kernel(vcpu->kvm))
		kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq);

608
	return 0;
609 610
}

611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
/* Make the updates of cntvoff for all vtimer contexts atomic */
static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
{
	int i;
	struct kvm *kvm = vcpu->kvm;
	struct kvm_vcpu *tmp;

	mutex_lock(&kvm->lock);
	kvm_for_each_vcpu(i, tmp, kvm)
		vcpu_vtimer(tmp)->cntvoff = cntvoff;

	/*
	 * When called from the vcpu create path, the CPU being created is not
	 * included in the loop above, so we just set it here as well.
	 */
	vcpu_vtimer(vcpu)->cntvoff = cntvoff;
	mutex_unlock(&kvm->lock);
}

630 631 632
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
633 634
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
635

636 637
	/* Synchronize cntvoff across all vtimers of a VM. */
	update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
638
	vcpu_ptimer(vcpu)->cntvoff = 0;
639

640
	INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
641 642
	hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
	timer->bg_timer.function = kvm_bg_timer_expire;
643

644 645 646
	hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
	timer->phys_timer.function = kvm_phys_timer_expire;

647 648
	vtimer->irq.irq = default_vtimer_irq.irq;
	ptimer->irq.irq = default_ptimer_irq.irq;
649 650 651 652
}

static void kvm_timer_init_interrupt(void *info)
{
653
	enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
654 655
}

656 657
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
{
658
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
659
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
660 661 662

	switch (regid) {
	case KVM_REG_ARM_TIMER_CTL:
663
		vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
664 665
		break;
	case KVM_REG_ARM_TIMER_CNT:
666
		update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
667 668
		break;
	case KVM_REG_ARM_TIMER_CVAL:
669
		vtimer->cnt_cval = value;
670
		break;
671 672 673 674 675 676 677
	case KVM_REG_ARM_PTIMER_CTL:
		ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
		break;
	case KVM_REG_ARM_PTIMER_CVAL:
		ptimer->cnt_cval = value;
		break;

678 679 680
	default:
		return -1;
	}
681 682

	kvm_timer_update_state(vcpu);
683 684 685
	return 0;
}

686 687 688 689 690 691 692 693 694 695 696 697 698 699
static u64 read_timer_ctl(struct arch_timer_context *timer)
{
	/*
	 * Set ISTATUS bit if it's expired.
	 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
	 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
	 * regardless of ENABLE bit for our implementation convenience.
	 */
	if (!kvm_timer_compute_delta(timer))
		return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT;
	else
		return timer->cnt_ctl;
}

700 701
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
{
702
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
703
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
704 705 706

	switch (regid) {
	case KVM_REG_ARM_TIMER_CTL:
707
		return read_timer_ctl(vtimer);
708
	case KVM_REG_ARM_TIMER_CNT:
709
		return kvm_phys_timer_read() - vtimer->cntvoff;
710
	case KVM_REG_ARM_TIMER_CVAL:
711
		return vtimer->cnt_cval;
712 713 714 715 716 717
	case KVM_REG_ARM_PTIMER_CTL:
		return read_timer_ctl(ptimer);
	case KVM_REG_ARM_PTIMER_CVAL:
		return ptimer->cnt_cval;
	case KVM_REG_ARM_PTIMER_CNT:
		return kvm_phys_timer_read();
718 719 720
	}
	return (u64)-1;
}
721

722
static int kvm_timer_starting_cpu(unsigned int cpu)
723
{
724 725
	kvm_timer_init_interrupt(NULL);
	return 0;
726 727
}

728 729 730 731 732
static int kvm_timer_dying_cpu(unsigned int cpu)
{
	disable_percpu_irq(host_vtimer_irq);
	return 0;
}
733

734
int kvm_timer_hyp_init(bool has_gic)
735
{
736
	struct arch_timer_kvm_info *info;
737 738
	int err;

739 740
	info = arch_timer_get_kvm_info();
	timecounter = &info->timecounter;
741

742 743 744 745 746
	if (!timecounter->cc) {
		kvm_err("kvm_arch_timer: uninitialized timecounter\n");
		return -ENODEV;
	}

747 748 749
	if (info->virtual_irq <= 0) {
		kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
			info->virtual_irq);
750 751
		return -ENODEV;
	}
752
	host_vtimer_irq = info->virtual_irq;
753

754 755 756 757 758 759 760 761
	host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
	if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
	    host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
		kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
			host_vtimer_irq);
		host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
	}

762
	err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
763 764 765
				 "kvm guest timer", kvm_get_running_vcpus());
	if (err) {
		kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
766
			host_vtimer_irq, err);
767
		return err;
768 769
	}

770 771 772 773 774 775 776
	if (has_gic) {
		err = irq_set_vcpu_affinity(host_vtimer_irq,
					    kvm_get_running_vcpus());
		if (err) {
			kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
			goto out_free_irq;
		}
777 778

		static_branch_enable(&has_gic_active_state);
779 780
	}

781
	kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
782

783
	cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
T
Thomas Gleixner 已提交
784
			  "kvm/arm/timer:starting", kvm_timer_starting_cpu,
785
			  kvm_timer_dying_cpu);
786 787 788
	return 0;
out_free_irq:
	free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
789 790 791 792 793 794
	return err;
}

void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
{
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
795
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
796

797
	soft_timer_cancel(&timer->bg_timer, &timer->expired);
798
	soft_timer_cancel(&timer->phys_timer, NULL);
799
	kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
800 801
}

802
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
803 804
{
	int vtimer_irq, ptimer_irq;
805
	int i, ret;
806 807

	vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
808 809 810
	ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
	if (ret)
		return false;
811

812 813 814
	ptimer_irq = vcpu_ptimer(vcpu)->irq.irq;
	ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu));
	if (ret)
815 816
		return false;

817
	kvm_for_each_vcpu(i, vcpu, vcpu->kvm) {
818 819 820 821 822 823 824 825
		if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq ||
		    vcpu_ptimer(vcpu)->irq.irq != ptimer_irq)
			return false;
	}

	return true;
}

826 827 828 829 830 831 832 833 834 835 836 837 838
bool kvm_arch_timer_get_input_level(int vintid)
{
	struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu();
	struct arch_timer_context *timer;

	if (vintid == vcpu_vtimer(vcpu)->irq.irq)
		timer = vcpu_vtimer(vcpu);
	else
		BUG(); /* We only map the vtimer so far */

	return kvm_timer_should_fire(timer);
}

839
int kvm_timer_enable(struct kvm_vcpu *vcpu)
840
{
841
	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
842
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
843 844 845 846 847
	int ret;

	if (timer->enabled)
		return 0;

848 849 850 851 852 853 854
	/* Without a VGIC we do not map virtual IRQs to physical IRQs */
	if (!irqchip_in_kernel(vcpu->kvm))
		goto no_vgic;

	if (!vgic_initialized(vcpu->kvm))
		return -ENODEV;

855
	if (!timer_irqs_are_valid(vcpu)) {
856 857 858 859
		kvm_debug("incorrectly configured timer irqs\n");
		return -EINVAL;
	}

860
	ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq,
861
				    kvm_arch_timer_get_input_level);
862 863 864
	if (ret)
		return ret;

865
no_vgic:
866
	timer->enabled = 1;
867
	return 0;
868
}
869

870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890
/*
 * On VHE system, we only need to configure trap on physical timer and counter
 * accesses in EL0 and EL1 once, not for every world switch.
 * The host kernel runs at EL2 with HCR_EL2.TGE == 1,
 * and this makes those bits have no effect for the host kernel execution.
 */
void kvm_timer_init_vhe(void)
{
	/* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */
	u32 cnthctl_shift = 10;
	u64 val;

	/*
	 * Disallow physical timer access for the guest.
	 * Physical counter access is allowed.
	 */
	val = read_sysreg(cnthctl_el2);
	val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift);
	val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
	write_sysreg(val, cnthctl_el2);
}
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966

static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
{
	struct kvm_vcpu *vcpu;
	int i;

	kvm_for_each_vcpu(i, vcpu, kvm) {
		vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
		vcpu_ptimer(vcpu)->irq.irq = ptimer_irq;
	}
}

int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
	int __user *uaddr = (int __user *)(long)attr->addr;
	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
	int irq;

	if (!irqchip_in_kernel(vcpu->kvm))
		return -EINVAL;

	if (get_user(irq, uaddr))
		return -EFAULT;

	if (!(irq_is_ppi(irq)))
		return -EINVAL;

	if (vcpu->arch.timer_cpu.enabled)
		return -EBUSY;

	switch (attr->attr) {
	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
		set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq);
		break;
	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
		set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq);
		break;
	default:
		return -ENXIO;
	}

	return 0;
}

int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
	int __user *uaddr = (int __user *)(long)attr->addr;
	struct arch_timer_context *timer;
	int irq;

	switch (attr->attr) {
	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
		timer = vcpu_vtimer(vcpu);
		break;
	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
		timer = vcpu_ptimer(vcpu);
		break;
	default:
		return -ENXIO;
	}

	irq = timer->irq.irq;
	return put_user(irq, uaddr);
}

int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
	switch (attr->attr) {
	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
		return 0;
	}

	return -ENXIO;
}