lapic.c 69.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
E
Eddie Dong 已提交
2 3 4 5 6 7 8

/*
 * Local APIC virtualization
 *
 * Copyright (C) 2006 Qumranet, Inc.
 * Copyright (C) 2007 Novell
 * Copyright (C) 2007 Intel
N
Nicolas Kaiser 已提交
9
 * Copyright 2009 Red Hat, Inc. and/or its affiliates.
E
Eddie Dong 已提交
10 11 12 13 14 15 16 17 18
 *
 * Authors:
 *   Dor Laor <dor.laor@qumranet.com>
 *   Gregory Haskins <ghaskins@novell.com>
 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
 *
 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
 */

19
#include <linux/kvm_host.h>
E
Eddie Dong 已提交
20 21 22 23 24 25
#include <linux/kvm.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/smp.h>
#include <linux/hrtimer.h>
#include <linux/io.h>
26
#include <linux/export.h>
R
Roman Zippel 已提交
27
#include <linux/math64.h>
28
#include <linux/slab.h>
E
Eddie Dong 已提交
29 30 31 32 33
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/page.h>
#include <asm/current.h>
#include <asm/apicdef.h>
34
#include <asm/delay.h>
A
Arun Sharma 已提交
35
#include <linux/atomic.h>
36
#include <linux/jump_label.h>
37
#include "kvm_cache_regs.h"
E
Eddie Dong 已提交
38
#include "irq.h"
39
#include "trace.h"
40
#include "x86.h"
A
Avi Kivity 已提交
41
#include "cpuid.h"
42
#include "hyperv.h"
E
Eddie Dong 已提交
43

44 45 46 47 48 49
#ifndef CONFIG_X86_64
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
#else
#define mod_64(x, y) ((x) % (y))
#endif

E
Eddie Dong 已提交
50 51 52 53 54 55
#define PRId64 "d"
#define PRIx64 "llx"
#define PRIu64 "u"
#define PRIo64 "o"

/* 14 is the version for Xeon and Pentium 8.4.8*/
56
#define APIC_VERSION			(0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
E
Eddie Dong 已提交
57 58 59 60 61 62
#define LAPIC_MMIO_LENGTH		(1 << 12)
/* followed define is not in apicdef.h */
#define APIC_SHORT_MASK			0xc0000
#define APIC_DEST_NOSHORT		0x0
#define APIC_DEST_MASK			0x800
#define MAX_APIC_VECTOR			256
63
#define APIC_VECTORS_PER_REG		32
E
Eddie Dong 已提交
64

65 66 67
#define APIC_BROADCAST			0xFF
#define X2APIC_BROADCAST		0xFFFFFFFFul

68 69 70
static bool lapic_timer_advance_dynamic __read_mostly;
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000
71
#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
72 73 74
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8

M
Michael S. Tsirkin 已提交
75 76 77 78 79
static inline int apic_test_vector(int vec, void *bitmap)
{
	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}

80 81 82 83 84 85 86 87
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
{
	struct kvm_lapic *apic = vcpu->arch.apic;

	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
		apic_test_vector(vector, apic->regs + APIC_IRR);
}

M
Michael S. Tsirkin 已提交
88 89 90 91 92 93 94 95 96 97
static inline int __apic_test_and_set_vector(int vec, void *bitmap)
{
	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}

static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
{
	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}

98
struct static_key_deferred apic_hw_disabled __read_mostly;
99 100
struct static_key_deferred apic_sw_disabled __read_mostly;

E
Eddie Dong 已提交
101 102
static inline int apic_enabled(struct kvm_lapic *apic)
{
103
	return kvm_apic_sw_enabled(apic) &&	kvm_apic_hw_enabled(apic);
104 105
}

E
Eddie Dong 已提交
106 107 108 109 110 111 112
#define LVT_MASK	\
	(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)

#define LINT_MASK	\
	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)

113 114 115 116 117 118 119 120 121 122
static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
{
	return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
}

static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
{
	return apic->vcpu->vcpu_id;
}

123 124 125 126 127 128 129 130 131 132 133
bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
{
	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);

static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
{
	return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
}

134 135 136 137 138
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
	switch (map->mode) {
	case KVM_APIC_MODE_X2APIC: {
		u32 offset = (dest_id >> 16) * 16;
R
Radim Krčmář 已提交
139
		u32 max_apic_id = map->max_apic_id;
140 141 142 143

		if (offset <= max_apic_id) {
			u8 cluster_size = min(max_apic_id - offset + 1, 16U);

P
Paolo Bonzini 已提交
144
			offset = array_index_nospec(offset, map->max_apic_id + 1);
145 146 147 148 149
			*cluster = &map->phys_map[offset];
			*mask = dest_id & (0xffff >> (16 - cluster_size));
		} else {
			*mask = 0;
		}
150

151 152 153 154 155 156 157
		return true;
		}
	case KVM_APIC_MODE_XAPIC_FLAT:
		*cluster = map->xapic_flat_map;
		*mask = dest_id & 0xff;
		return true;
	case KVM_APIC_MODE_XAPIC_CLUSTER:
158
		*cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
159 160 161 162 163 164
		*mask = dest_id & 0xf;
		return true;
	default:
		/* Not optimized. */
		return false;
	}
165 166
}

167
static void kvm_apic_map_free(struct rcu_head *rcu)
168
{
169
	struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
170

171
	kvfree(map);
172 173
}

174 175 176 177 178
static void recalculate_apic_map(struct kvm *kvm)
{
	struct kvm_apic_map *new, *old = NULL;
	struct kvm_vcpu *vcpu;
	int i;
179
	u32 max_id = 255; /* enough space for any xAPIC ID */
180 181 182

	mutex_lock(&kvm->arch.apic_map_lock);

R
Radim Krčmář 已提交
183 184
	kvm_for_each_vcpu(i, vcpu, kvm)
		if (kvm_apic_present(vcpu))
185
			max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
R
Radim Krčmář 已提交
186

M
Michal Hocko 已提交
187
	new = kvzalloc(sizeof(struct kvm_apic_map) +
188 189
	                   sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
			   GFP_KERNEL_ACCOUNT);
R
Radim Krčmář 已提交
190

191 192 193
	if (!new)
		goto out;

R
Radim Krčmář 已提交
194 195
	new->max_apic_id = max_id;

196 197
	kvm_for_each_vcpu(i, vcpu, kvm) {
		struct kvm_lapic *apic = vcpu->arch.apic;
198 199
		struct kvm_lapic **cluster;
		u16 mask;
200 201 202
		u32 ldr;
		u8 xapic_id;
		u32 x2apic_id;
203

204 205 206
		if (!kvm_apic_present(vcpu))
			continue;

207 208 209 210 211 212 213 214 215 216 217 218 219
		xapic_id = kvm_xapic_id(apic);
		x2apic_id = kvm_x2apic_id(apic);

		/* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
		if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
				x2apic_id <= new->max_apic_id)
			new->phys_map[x2apic_id] = apic;
		/*
		 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
		 * prevent them from masking VCPUs with APIC ID <= 0xff.
		 */
		if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
			new->phys_map[xapic_id] = apic;
220

221 222 223
		if (!kvm_apic_sw_enabled(apic))
			continue;

224 225
		ldr = kvm_lapic_get_reg(apic, APIC_LDR);

226 227 228 229
		if (apic_x2apic_mode(apic)) {
			new->mode |= KVM_APIC_MODE_X2APIC;
		} else if (ldr) {
			ldr = GET_APIC_LOGICAL_ID(ldr);
230
			if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
231 232 233 234 235
				new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
			else
				new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
		}

236
		if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
237 238
			continue;

239 240
		if (mask)
			cluster[ffs(mask) - 1] = apic;
241 242 243 244 245 246 247 248
	}
out:
	old = rcu_dereference_protected(kvm->arch.apic_map,
			lockdep_is_held(&kvm->arch.apic_map_lock));
	rcu_assign_pointer(kvm->arch.apic_map, new);
	mutex_unlock(&kvm->arch.apic_map_lock);

	if (old)
249
		call_rcu(&old->rcu, kvm_apic_map_free);
250

251
	kvm_make_scan_ioapic_request(kvm);
252 253
}

254 255
static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
{
256
	bool enabled = val & APIC_SPIV_APIC_ENABLED;
257

258
	kvm_lapic_set_reg(apic, APIC_SPIV, val);
259 260 261

	if (enabled != apic->sw_enabled) {
		apic->sw_enabled = enabled;
262
		if (enabled)
263
			static_key_slow_dec_deferred(&apic_sw_disabled);
264
		else
265
			static_key_slow_inc(&apic_sw_disabled.key);
266 267

		recalculate_apic_map(apic->vcpu->kvm);
268 269 270
	}
}

271
static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
272
{
273
	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
274 275 276 277 278
	recalculate_apic_map(apic->vcpu->kvm);
}

static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
{
279
	kvm_lapic_set_reg(apic, APIC_LDR, id);
280 281 282
	recalculate_apic_map(apic->vcpu->kvm);
}

283 284 285 286 287
static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
{
	return ((id >> 4) << 16) | (1 << (id & 0xf));
}

288
static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
289
{
290
	u32 ldr = kvm_apic_calc_x2apic_ldr(id);
291

292 293
	WARN_ON_ONCE(id != apic->vcpu->vcpu_id);

294
	kvm_lapic_set_reg(apic, APIC_ID, id);
295
	kvm_lapic_set_reg(apic, APIC_LDR, ldr);
296 297 298
	recalculate_apic_map(apic->vcpu->kvm);
}

E
Eddie Dong 已提交
299 300
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
{
301
	return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
E
Eddie Dong 已提交
302 303 304 305
}

static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
{
306
	return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
E
Eddie Dong 已提交
307 308
}

309 310
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
{
311
	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
312 313
}

E
Eddie Dong 已提交
314 315
static inline int apic_lvtt_period(struct kvm_lapic *apic)
{
316
	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
317 318 319 320
}

static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
{
321
	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
E
Eddie Dong 已提交
322 323
}

324 325 326 327 328
static inline int apic_lvt_nmi_mode(u32 lvt_val)
{
	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
}

329 330 331 332 333 334
void kvm_apic_set_version(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
	struct kvm_cpuid_entry2 *feat;
	u32 v = APIC_VERSION;

335
	if (!lapic_in_kernel(vcpu))
336 337
		return;

338 339 340 341 342 343 344
	/*
	 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
	 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
	 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
	 * version first and level-triggered interrupts never get EOIed in
	 * IOAPIC.
	 */
345
	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
346 347
	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
	    !ioapic_in_kernel(vcpu->kvm))
348
		v |= APIC_LVR_DIRECTED_EOI;
349
	kvm_lapic_set_reg(apic, APIC_LVR, v);
350 351
}

352
static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
353
	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
E
Eddie Dong 已提交
354 355 356 357 358 359 360 361
	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
	LINT_MASK, LINT_MASK,	/* LVT0-1 */
	LVT_MASK		/* LVTERR */
};

static int find_highest_vector(void *bitmap)
{
362 363
	int vec;
	u32 *reg;
E
Eddie Dong 已提交
364

365 366 367 368
	for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
	     vec >= 0; vec -= APIC_VECTORS_PER_REG) {
		reg = bitmap + REG_POS(vec);
		if (*reg)
369
			return __fls(*reg) + vec;
370
	}
E
Eddie Dong 已提交
371

372
	return -1;
E
Eddie Dong 已提交
373 374
}

M
Michael S. Tsirkin 已提交
375 376
static u8 count_vectors(void *bitmap)
{
377 378
	int vec;
	u32 *reg;
M
Michael S. Tsirkin 已提交
379
	u8 count = 0;
380 381 382 383 384 385

	for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
		reg = bitmap + REG_POS(vec);
		count += hweight32(*reg);
	}

M
Michael S. Tsirkin 已提交
386 387 388
	return count;
}

389
bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
390
{
391
	u32 i, vec;
392 393 394 395 396
	u32 pir_val, irr_val, prev_irr_val;
	int max_updated_irr;

	max_updated_irr = -1;
	*max_irr = -1;
397

398
	for (i = vec = 0; i <= 7; i++, vec += 32) {
399
		pir_val = READ_ONCE(pir[i]);
400
		irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
401
		if (pir_val) {
402
			prev_irr_val = irr_val;
403 404
			irr_val |= xchg(&pir[i], 0);
			*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
405 406 407 408
			if (prev_irr_val != irr_val) {
				max_updated_irr =
					__fls(irr_val ^ prev_irr_val) + vec;
			}
409
		}
410
		if (irr_val)
411
			*max_irr = __fls(irr_val) + vec;
412
	}
413

414 415
	return ((max_updated_irr != -1) &&
		(max_updated_irr == *max_irr));
416
}
417 418
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);

419
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
420 421 422
{
	struct kvm_lapic *apic = vcpu->arch.apic;

423
	return __kvm_apic_update_irr(pir, apic->regs, max_irr);
424
}
425 426
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);

427
static inline int apic_search_irr(struct kvm_lapic *apic)
E
Eddie Dong 已提交
428
{
429
	return find_highest_vector(apic->regs + APIC_IRR);
E
Eddie Dong 已提交
430 431 432 433 434 435
}

static inline int apic_find_highest_irr(struct kvm_lapic *apic)
{
	int result;

436 437 438 439
	/*
	 * Note that irr_pending is just a hint. It will be always
	 * true with virtual interrupt delivery enabled.
	 */
440 441 442 443
	if (!apic->irr_pending)
		return -1;

	result = apic_search_irr(apic);
E
Eddie Dong 已提交
444 445 446 447 448
	ASSERT(result == -1 || result >= 16);

	return result;
}

449 450
static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
{
451 452 453 454
	struct kvm_vcpu *vcpu;

	vcpu = apic->vcpu;

455
	if (unlikely(vcpu->arch.apicv_active)) {
456
		/* need to update RVI */
457
		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
458 459
		kvm_x86_ops->hwapic_irr_update(vcpu,
				apic_find_highest_irr(apic));
460 461
	} else {
		apic->irr_pending = false;
462
		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
463 464
		if (apic_search_irr(apic) != -1)
			apic->irr_pending = true;
465
	}
466 467
}

M
Michael S. Tsirkin 已提交
468 469
static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
{
470 471 472 473 474 475
	struct kvm_vcpu *vcpu;

	if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
		return;

	vcpu = apic->vcpu;
476

M
Michael S. Tsirkin 已提交
477
	/*
478 479 480
	 * With APIC virtualization enabled, all caching is disabled
	 * because the processor can modify ISR under the hood.  Instead
	 * just set SVI.
M
Michael S. Tsirkin 已提交
481
	 */
482
	if (unlikely(vcpu->arch.apicv_active))
483
		kvm_x86_ops->hwapic_isr_update(vcpu, vec);
484 485 486 487 488 489 490 491 492 493
	else {
		++apic->isr_count;
		BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
		/*
		 * ISR (in service register) bit is set when injecting an interrupt.
		 * The highest vector is injected. Thus the latest bit set matches
		 * the highest bit in ISR.
		 */
		apic->highest_isr_cache = vec;
	}
M
Michael S. Tsirkin 已提交
494 495
}

496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
static inline int apic_find_highest_isr(struct kvm_lapic *apic)
{
	int result;

	/*
	 * Note that isr_count is always 1, and highest_isr_cache
	 * is always -1, with APIC virtualization enabled.
	 */
	if (!apic->isr_count)
		return -1;
	if (likely(apic->highest_isr_cache != -1))
		return apic->highest_isr_cache;

	result = find_highest_vector(apic->regs + APIC_ISR);
	ASSERT(result == -1 || result >= 16);

	return result;
}

M
Michael S. Tsirkin 已提交
515 516
static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
{
517 518 519 520 521 522 523 524 525 526 527 528 529
	struct kvm_vcpu *vcpu;
	if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
		return;

	vcpu = apic->vcpu;

	/*
	 * We do get here for APIC virtualization enabled if the guest
	 * uses the Hyper-V APIC enlightenment.  In this case we may need
	 * to trigger a new interrupt delivery by writing the SVI field;
	 * on the other hand isr_count and highest_isr_cache are unused
	 * and must be left alone.
	 */
530
	if (unlikely(vcpu->arch.apicv_active))
531
		kvm_x86_ops->hwapic_isr_update(vcpu,
532 533
					       apic_find_highest_isr(apic));
	else {
M
Michael S. Tsirkin 已提交
534
		--apic->isr_count;
535 536 537
		BUG_ON(apic->isr_count < 0);
		apic->highest_isr_cache = -1;
	}
M
Michael S. Tsirkin 已提交
538 539
}

540 541
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
{
542 543 544 545 546
	/* This may race with setting of irr in __apic_accept_irq() and
	 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
	 * will cause vmexit immediately and the value will be recalculated
	 * on the next vmentry.
	 */
547
	return apic_find_highest_irr(vcpu->arch.apic);
548
}
549
EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
550

551
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
552
			     int vector, int level, int trig_mode,
553
			     struct dest_map *dest_map);
554

555
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
556
		     struct dest_map *dest_map)
E
Eddie Dong 已提交
557
{
558
	struct kvm_lapic *apic = vcpu->arch.apic;
559

560
	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
561
			irq->level, irq->trig_mode, dest_map);
E
Eddie Dong 已提交
562 563
}

564
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
565
		    unsigned long ipi_bitmap_high, u32 min,
566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587
		    unsigned long icr, int op_64_bit)
{
	int i;
	struct kvm_apic_map *map;
	struct kvm_vcpu *vcpu;
	struct kvm_lapic_irq irq = {0};
	int cluster_size = op_64_bit ? 64 : 32;
	int count = 0;

	irq.vector = icr & APIC_VECTOR_MASK;
	irq.delivery_mode = icr & APIC_MODE_MASK;
	irq.level = (icr & APIC_INT_ASSERT) != 0;
	irq.trig_mode = icr & APIC_INT_LEVELTRIG;

	if (icr & APIC_DEST_MASK)
		return -KVM_EINVAL;
	if (icr & APIC_SHORT_MASK)
		return -KVM_EINVAL;

	rcu_read_lock();
	map = rcu_dereference(kvm->arch.apic_map);

588 589 590 591 592
	if (unlikely(!map)) {
		count = -EOPNOTSUPP;
		goto out;
	}

593 594
	if (min > map->max_apic_id)
		goto out;
595
	/* Bits above cluster_size are masked in the caller.  */
596 597 598 599 600 601
	for_each_set_bit(i, &ipi_bitmap_low,
		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
		if (map->phys_map[min + i]) {
			vcpu = map->phys_map[min + i]->vcpu;
			count += kvm_apic_set_irq(vcpu, &irq, NULL);
		}
602 603 604
	}

	min += cluster_size;
605 606 607 608 609 610 611 612 613 614

	if (min > map->max_apic_id)
		goto out;

	for_each_set_bit(i, &ipi_bitmap_high,
		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
		if (map->phys_map[min + i]) {
			vcpu = map->phys_map[min + i]->vcpu;
			count += kvm_apic_set_irq(vcpu, &irq, NULL);
		}
615 616
	}

617
out:
618 619 620 621
	rcu_read_unlock();
	return count;
}

622 623
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
{
624 625 626

	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
				      sizeof(val));
627 628 629 630
}

static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
{
631 632 633

	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
				      sizeof(*val));
634 635 636 637 638 639 640 641 642 643 644
}

static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
}

static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
{
	u8 val;
	if (pv_eoi_get_user(vcpu, &val) < 0)
645
		printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
646
			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
647 648 649 650 651 652
	return val & 0x1;
}

static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
{
	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
653
		printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
654
			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
655 656 657 658 659 660 661 662
		return;
	}
	__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
}

static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
{
	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
663
		printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
664
			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
665 666 667 668 669
		return;
	}
	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
}

670 671
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
672
	int highest_irr;
673
	if (apic->vcpu->arch.apicv_active)
674 675 676
		highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
	else
		highest_irr = apic_find_highest_irr(apic);
677 678 679 680 681 682
	if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
		return -1;
	return highest_irr;
}

static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
E
Eddie Dong 已提交
683
{
684
	u32 tpr, isrv, ppr, old_ppr;
E
Eddie Dong 已提交
685 686
	int isr;

687 688
	old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
E
Eddie Dong 已提交
689 690 691 692 693 694 695 696
	isr = apic_find_highest_isr(apic);
	isrv = (isr != -1) ? isr : 0;

	if ((tpr & 0xf0) >= (isrv & 0xf0))
		ppr = tpr & 0xff;
	else
		ppr = isrv & 0xf0;

697 698
	*new_ppr = ppr;
	if (old_ppr != ppr)
699
		kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
700 701 702 703 704 705 706 707

	return ppr < old_ppr;
}

static void apic_update_ppr(struct kvm_lapic *apic)
{
	u32 ppr;

708 709
	if (__apic_update_ppr(apic, &ppr) &&
	    apic_has_interrupt_for_ppr(apic, ppr) != -1)
710
		kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
E
Eddie Dong 已提交
711 712
}

713 714 715 716 717 718
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
{
	apic_update_ppr(vcpu->arch.apic);
}
EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);

E
Eddie Dong 已提交
719 720
static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
{
721
	kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
E
Eddie Dong 已提交
722 723 724
	apic_update_ppr(apic);
}

725
static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
726
{
727 728
	return mda == (apic_x2apic_mode(apic) ?
			X2APIC_BROADCAST : APIC_BROADCAST);
729 730
}

731
static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
E
Eddie Dong 已提交
732
{
733 734 735 736
	if (kvm_apic_broadcast(apic, mda))
		return true;

	if (apic_x2apic_mode(apic))
737
		return mda == kvm_x2apic_id(apic);
738

739 740 741 742 743 744 745 746 747
	/*
	 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
	 * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
	 * this allows unique addressing of VCPUs with APIC ID over 0xff.
	 * The 0xff condition is needed because writeable xAPIC ID.
	 */
	if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
		return true;

748
	return mda == kvm_xapic_id(apic);
E
Eddie Dong 已提交
749 750
}

751
static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
E
Eddie Dong 已提交
752
{
G
Gleb Natapov 已提交
753 754
	u32 logical_id;

755
	if (kvm_apic_broadcast(apic, mda))
756
		return true;
757

758
	logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
E
Eddie Dong 已提交
759

760
	if (apic_x2apic_mode(apic))
761 762
		return ((logical_id >> 16) == (mda >> 16))
		       && (logical_id & mda & 0xffff) != 0;
E
Eddie Dong 已提交
763

764
	logical_id = GET_APIC_LOGICAL_ID(logical_id);
E
Eddie Dong 已提交
765

766
	switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
E
Eddie Dong 已提交
767
	case APIC_DFR_FLAT:
768
		return (logical_id & mda) != 0;
E
Eddie Dong 已提交
769
	case APIC_DFR_CLUSTER:
770 771
		return ((logical_id >> 4) == (mda >> 4))
		       && (logical_id & mda & 0xf) != 0;
E
Eddie Dong 已提交
772
	default:
773
		return false;
E
Eddie Dong 已提交
774 775 776
	}
}

777 778
/* The KVM local APIC implementation has two quirks:
 *
779 780 781
 *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
 *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
 *    KVM doesn't do that aliasing.
782 783 784 785 786 787 788 789 790 791
 *
 *  - in-kernel IOAPIC messages have to be delivered directly to
 *    x2APIC, because the kernel does not support interrupt remapping.
 *    In order to support broadcast without interrupt remapping, x2APIC
 *    rewrites the destination of non-IPI messages from APIC_BROADCAST
 *    to X2APIC_BROADCAST.
 *
 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
 * important when userspace wants to use x2APIC-format MSIs, because
 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
792
 */
793 794
static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
		struct kvm_lapic *source, struct kvm_lapic *target)
795 796 797
{
	bool ipi = source != NULL;

798
	if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
799
	    !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
800 801
		return X2APIC_BROADCAST;

802
	return dest_id;
803 804
}

805
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
806
			   int short_hand, unsigned int dest, int dest_mode)
E
Eddie Dong 已提交
807
{
808
	struct kvm_lapic *target = vcpu->arch.apic;
809
	u32 mda = kvm_apic_mda(vcpu, dest, source, target);
E
Eddie Dong 已提交
810

Z
Zachary Amsden 已提交
811
	ASSERT(target);
E
Eddie Dong 已提交
812 813
	switch (short_hand) {
	case APIC_DEST_NOSHORT:
814
		if (dest_mode == APIC_DEST_PHYSICAL)
815
			return kvm_apic_match_physical_addr(target, mda);
816
		else
817
			return kvm_apic_match_logical_addr(target, mda);
E
Eddie Dong 已提交
818
	case APIC_DEST_SELF:
819
		return target == source;
E
Eddie Dong 已提交
820
	case APIC_DEST_ALLINC:
821
		return true;
E
Eddie Dong 已提交
822
	case APIC_DEST_ALLBUT:
823
		return target != source;
E
Eddie Dong 已提交
824
	default:
825
		return false;
E
Eddie Dong 已提交
826 827
	}
}
828
EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
E
Eddie Dong 已提交
829

830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845
int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
		       const unsigned long *bitmap, u32 bitmap_size)
{
	u32 mod;
	int i, idx = -1;

	mod = vector % dest_vcpus;

	for (i = 0; i <= mod; i++) {
		idx = find_next_bit(bitmap, bitmap_size, idx + 1);
		BUG_ON(idx == bitmap_size);
	}

	return idx;
}

846 847 848 849 850 851 852 853 854
static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
{
	if (!kvm->arch.disabled_lapic_found) {
		kvm->arch.disabled_lapic_found = true;
		printk(KERN_INFO
		       "Disabled LAPIC found during irq injection\n");
	}
}

855 856
static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
		struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
857
{
858 859 860 861 862 863 864 865 866 867 868 869
	if (kvm->arch.x2apic_broadcast_quirk_disabled) {
		if ((irq->dest_id == APIC_BROADCAST &&
				map->mode != KVM_APIC_MODE_X2APIC))
			return true;
		if (irq->dest_id == X2APIC_BROADCAST)
			return true;
	} else {
		bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
		if (irq->dest_id == (x2apic_ipi ?
		                     X2APIC_BROADCAST : APIC_BROADCAST))
			return true;
	}
870

871 872
	return false;
}
873

874 875 876 877 878 879 880 881 882 883 884 885 886
/* Return true if the interrupt can be handled by using *bitmap as index mask
 * for valid destinations in *dst array.
 * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
 * Note: we may have zero kvm_lapic destinations when we return true, which
 * means that the interrupt should be dropped.  In this case, *bitmap would be
 * zero and *dst undefined.
 */
static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
		struct kvm_lapic **src, struct kvm_lapic_irq *irq,
		struct kvm_apic_map *map, struct kvm_lapic ***dst,
		unsigned long *bitmap)
{
	int i, lowest;
887

888 889 890 891 892
	if (irq->shorthand == APIC_DEST_SELF && src) {
		*dst = src;
		*bitmap = 1;
		return true;
	} else if (irq->shorthand)
893 894
		return false;

895
	if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
896 897
		return false;

898
	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
R
Radim Krčmář 已提交
899
		if (irq->dest_id > map->max_apic_id) {
900 901
			*bitmap = 0;
		} else {
P
Paolo Bonzini 已提交
902 903
			u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
			*dst = &map->phys_map[dest_id];
904 905
			*bitmap = 1;
		}
906
		return true;
907
	}
908

909 910 911
	*bitmap = 0;
	if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
				(u16 *)bitmap))
912
		return false;
913

914 915
	if (!kvm_lowest_prio_delivery(irq))
		return true;
916

917 918 919 920 921 922 923 924 925 926
	if (!kvm_vector_hashing_enabled()) {
		lowest = -1;
		for_each_set_bit(i, bitmap, 16) {
			if (!(*dst)[i])
				continue;
			if (lowest < 0)
				lowest = i;
			else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
						(*dst)[lowest]->vcpu) < 0)
				lowest = i;
927
		}
928 929 930
	} else {
		if (!*bitmap)
			return true;
931

932 933
		lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
				bitmap, 16);
934

935 936 937 938 939 940
		if (!(*dst)[lowest]) {
			kvm_apic_disabled_lapic_found(kvm);
			*bitmap = 0;
			return true;
		}
	}
941

942
	*bitmap = (lowest >= 0) ? 1 << lowest : 0;
943

944 945
	return true;
}
946

947 948 949 950 951 952 953 954
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
		struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
{
	struct kvm_apic_map *map;
	unsigned long bitmap;
	struct kvm_lapic **dst = NULL;
	int i;
	bool ret;
955

956
	*r = -1;
957

958 959 960 961
	if (irq->shorthand == APIC_DEST_SELF) {
		*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
		return true;
	}
962

963 964
	rcu_read_lock();
	map = rcu_dereference(kvm->arch.apic_map);
965

966
	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
967 968
	if (ret) {
		*r = 0;
969 970 971 972
		for_each_set_bit(i, &bitmap, 16) {
			if (!dst[i])
				continue;
			*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
973
		}
974
	}
975 976 977 978 979

	rcu_read_unlock();
	return ret;
}

980 981 982 983 984 985 986 987 988 989 990 991 992 993
/*
 * This routine tries to handler interrupts in posted mode, here is how
 * it deals with different cases:
 * - For single-destination interrupts, handle it in posted mode
 * - Else if vector hashing is enabled and it is a lowest-priority
 *   interrupt, handle it in posted mode and use the following mechanism
 *   to find the destinaiton vCPU.
 *	1. For lowest-priority interrupts, store all the possible
 *	   destination vCPUs in an array.
 *	2. Use "guest vector % max number of destination vCPUs" to find
 *	   the right destination vCPU in the array for the lowest-priority
 *	   interrupt.
 * - Otherwise, use remapped mode to inject the interrupt.
 */
994 995 996 997
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
			struct kvm_vcpu **dest_vcpu)
{
	struct kvm_apic_map *map;
998 999
	unsigned long bitmap;
	struct kvm_lapic **dst = NULL;
1000 1001 1002 1003 1004 1005 1006 1007
	bool ret = false;

	if (irq->shorthand)
		return false;

	rcu_read_lock();
	map = rcu_dereference(kvm->arch.apic_map);

1008 1009 1010
	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
			hweight16(bitmap) == 1) {
		unsigned long i = find_first_bit(&bitmap, 16);
1011

1012 1013 1014
		if (dst[i]) {
			*dest_vcpu = dst[i]->vcpu;
			ret = true;
1015
		}
1016 1017 1018 1019 1020 1021
	}

	rcu_read_unlock();
	return ret;
}

E
Eddie Dong 已提交
1022 1023 1024 1025 1026
/*
 * Add a pending IRQ into lapic.
 * Return 1 if successfully added and 0 if discarded.
 */
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1027
			     int vector, int level, int trig_mode,
1028
			     struct dest_map *dest_map)
E
Eddie Dong 已提交
1029
{
1030
	int result = 0;
1031
	struct kvm_vcpu *vcpu = apic->vcpu;
E
Eddie Dong 已提交
1032

1033 1034
	trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
				  trig_mode, vector);
E
Eddie Dong 已提交
1035 1036
	switch (delivery_mode) {
	case APIC_DM_LOWEST:
1037
		vcpu->arch.apic_arb_prio++;
1038
		/* fall through */
1039
	case APIC_DM_FIXED:
1040 1041 1042
		if (unlikely(trig_mode && !level))
			break;

E
Eddie Dong 已提交
1043 1044 1045 1046
		/* FIXME add logic for vcpu on reset */
		if (unlikely(!apic_enabled(apic)))
			break;

1047 1048
		result = 1;

1049
		if (dest_map) {
1050
			__set_bit(vcpu->vcpu_id, dest_map->map);
1051 1052
			dest_map->vectors[vcpu->vcpu_id] = vector;
		}
1053

1054 1055
		if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
			if (trig_mode)
1056 1057
				kvm_lapic_set_vector(vector,
						     apic->regs + APIC_TMR);
1058
			else
1059 1060
				kvm_lapic_clear_vector(vector,
						       apic->regs + APIC_TMR);
1061 1062
		}

1063
		if (vcpu->arch.apicv_active)
1064
			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
1065
		else {
1066
			kvm_lapic_set_irr(vector, apic);
1067 1068 1069 1070

			kvm_make_request(KVM_REQ_EVENT, vcpu);
			kvm_vcpu_kick(vcpu);
		}
E
Eddie Dong 已提交
1071 1072 1073
		break;

	case APIC_DM_REMRD:
1074 1075 1076 1077
		result = 1;
		vcpu->arch.pv.pv_unhalted = 1;
		kvm_make_request(KVM_REQ_EVENT, vcpu);
		kvm_vcpu_kick(vcpu);
E
Eddie Dong 已提交
1078 1079 1080
		break;

	case APIC_DM_SMI:
P
Paolo Bonzini 已提交
1081 1082 1083
		result = 1;
		kvm_make_request(KVM_REQ_SMI, vcpu);
		kvm_vcpu_kick(vcpu);
E
Eddie Dong 已提交
1084
		break;
1085

E
Eddie Dong 已提交
1086
	case APIC_DM_NMI:
1087
		result = 1;
1088
		kvm_inject_nmi(vcpu);
J
Jan Kiszka 已提交
1089
		kvm_vcpu_kick(vcpu);
E
Eddie Dong 已提交
1090 1091 1092
		break;

	case APIC_DM_INIT:
1093
		if (!trig_mode || level) {
1094
			result = 1;
1095 1096 1097 1098 1099
			/* assumes that there are only KVM_APIC_INIT/SIPI */
			apic->pending_events = (1UL << KVM_APIC_INIT);
			/* make sure pending_events is visible before sending
			 * the request */
			smp_wmb();
1100
			kvm_make_request(KVM_REQ_EVENT, vcpu);
1101 1102
			kvm_vcpu_kick(vcpu);
		}
E
Eddie Dong 已提交
1103 1104 1105
		break;

	case APIC_DM_STARTUP:
1106 1107 1108 1109 1110 1111 1112
		result = 1;
		apic->sipi_vector = vector;
		/* make sure sipi_vector is visible for the receiver */
		smp_wmb();
		set_bit(KVM_APIC_SIPI, &apic->pending_events);
		kvm_make_request(KVM_REQ_EVENT, vcpu);
		kvm_vcpu_kick(vcpu);
E
Eddie Dong 已提交
1113 1114
		break;

1115 1116 1117 1118 1119 1120 1121 1122
	case APIC_DM_EXTINT:
		/*
		 * Should only be called by kvm_apic_local_deliver() with LVT0,
		 * before NMI watchdog was enabled. Already handled by
		 * kvm_apic_accept_pic_intr().
		 */
		break;

E
Eddie Dong 已提交
1123 1124 1125 1126 1127 1128 1129 1130
	default:
		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
		       delivery_mode);
		break;
	}
	return result;
}

1131
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1132
{
1133
	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1134 1135
}

1136 1137
static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
{
1138
	return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1139 1140
}

1141 1142
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
{
1143 1144 1145 1146 1147
	int trigger_mode;

	/* Eoi the ioapic only if the ioapic doesn't own the vector. */
	if (!kvm_ioapic_handles_vector(apic, vector))
		return;
1148

1149 1150 1151 1152 1153
	/* Request a KVM exit to inform the userspace IOAPIC. */
	if (irqchip_split(apic->vcpu->kvm)) {
		apic->vcpu->arch.pending_ioapic_eoi = vector;
		kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
		return;
1154
	}
1155 1156 1157 1158 1159 1160 1161

	if (apic_test_vector(vector, apic->regs + APIC_TMR))
		trigger_mode = IOAPIC_LEVEL_TRIG;
	else
		trigger_mode = IOAPIC_EDGE_TRIG;

	kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1162 1163
}

1164
static int apic_set_eoi(struct kvm_lapic *apic)
E
Eddie Dong 已提交
1165 1166
{
	int vector = apic_find_highest_isr(apic);
1167 1168 1169

	trace_kvm_eoi(apic, vector);

E
Eddie Dong 已提交
1170 1171 1172 1173 1174
	/*
	 * Not every write EOI will has corresponding ISR,
	 * one example is when Kernel check timer on setup_IO_APIC
	 */
	if (vector == -1)
1175
		return vector;
E
Eddie Dong 已提交
1176

M
Michael S. Tsirkin 已提交
1177
	apic_clear_isr(vector, apic);
E
Eddie Dong 已提交
1178 1179
	apic_update_ppr(apic);

1180 1181 1182
	if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
		kvm_hv_synic_send_eoi(apic->vcpu, vector);

1183
	kvm_ioapic_send_eoi(apic, vector);
1184
	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1185
	return vector;
E
Eddie Dong 已提交
1186 1187
}

1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202
/*
 * this interface assumes a trap-like exit, which has already finished
 * desired side effect including vISR and vPPR update.
 */
void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
{
	struct kvm_lapic *apic = vcpu->arch.apic;

	trace_kvm_eoi(apic, vector);

	kvm_ioapic_send_eoi(apic, vector);
	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
}
EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);

1203
static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
E
Eddie Dong 已提交
1204
{
1205
	struct kvm_lapic_irq irq;
E
Eddie Dong 已提交
1206

1207 1208 1209
	irq.vector = icr_low & APIC_VECTOR_MASK;
	irq.delivery_mode = icr_low & APIC_MODE_MASK;
	irq.dest_mode = icr_low & APIC_DEST_MASK;
1210
	irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1211 1212
	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
	irq.shorthand = icr_low & APIC_SHORT_MASK;
1213
	irq.msi_redir_hint = false;
G
Gleb Natapov 已提交
1214 1215 1216 1217
	if (apic_x2apic_mode(apic))
		irq.dest_id = icr_high;
	else
		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
E
Eddie Dong 已提交
1218

1219 1220
	trace_kvm_apic_ipi(icr_low, irq.dest_id);

1221
	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
E
Eddie Dong 已提交
1222 1223 1224 1225
}

static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
1226
	ktime_t remaining, now;
1227
	s64 ns;
1228
	u32 tmcct;
E
Eddie Dong 已提交
1229 1230 1231

	ASSERT(apic != NULL);

1232
	/* if initial count is 0, current count should also be 0 */
1233
	if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1234
		apic->lapic_timer.period == 0)
1235 1236
		return 0;

1237
	now = ktime_get();
1238
	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1239
	if (ktime_to_ns(remaining) < 0)
T
Thomas Gleixner 已提交
1240
		remaining = 0;
1241

1242 1243 1244
	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
	tmcct = div64_u64(ns,
			 (APIC_BUS_CYCLE_NS * apic->divide_count));
E
Eddie Dong 已提交
1245 1246 1247 1248

	return tmcct;
}

1249 1250 1251 1252 1253
static void __report_tpr_access(struct kvm_lapic *apic, bool write)
{
	struct kvm_vcpu *vcpu = apic->vcpu;
	struct kvm_run *run = vcpu->run;

1254
	kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1255
	run->tpr_access.rip = kvm_rip_read(vcpu);
1256 1257 1258 1259 1260 1261 1262 1263 1264
	run->tpr_access.is_write = write;
}

static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
{
	if (apic->vcpu->arch.tpr_access_reporting)
		__report_tpr_access(apic, write);
}

E
Eddie Dong 已提交
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276
static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
{
	u32 val = 0;

	if (offset >= LAPIC_MMIO_LENGTH)
		return 0;

	switch (offset) {
	case APIC_ARBPRI:
		break;

	case APIC_TMCCT:	/* Timer CCR */
1277 1278 1279
		if (apic_lvtt_tscdeadline(apic))
			return 0;

E
Eddie Dong 已提交
1280 1281
		val = apic_get_tmcct(apic);
		break;
1282 1283
	case APIC_PROCPRI:
		apic_update_ppr(apic);
1284
		val = kvm_lapic_get_reg(apic, offset);
1285
		break;
1286 1287 1288
	case APIC_TASKPRI:
		report_tpr_access(apic, false);
		/* fall thru */
E
Eddie Dong 已提交
1289
	default:
1290
		val = kvm_lapic_get_reg(apic, offset);
E
Eddie Dong 已提交
1291 1292 1293 1294 1295 1296
		break;
	}

	return val;
}

G
Gregory Haskins 已提交
1297 1298 1299 1300 1301
static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
{
	return container_of(dev, struct kvm_lapic, dev);
}

1302 1303 1304 1305
#define APIC_REG_MASK(reg)	(1ull << ((reg) >> 4))
#define APIC_REGS_MASK(first, count) \
	(APIC_REG_MASK(first) * ((1ull << (count)) - 1))

1306
int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
G
Gleb Natapov 已提交
1307
		void *data)
E
Eddie Dong 已提交
1308 1309 1310
{
	unsigned char alignment = offset & 0xf;
	u32 result;
G
Guo Chao 已提交
1311
	/* this bitmask has a bit cleared for each reserved register */
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338
	u64 valid_reg_mask =
		APIC_REG_MASK(APIC_ID) |
		APIC_REG_MASK(APIC_LVR) |
		APIC_REG_MASK(APIC_TASKPRI) |
		APIC_REG_MASK(APIC_PROCPRI) |
		APIC_REG_MASK(APIC_LDR) |
		APIC_REG_MASK(APIC_DFR) |
		APIC_REG_MASK(APIC_SPIV) |
		APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
		APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
		APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
		APIC_REG_MASK(APIC_ESR) |
		APIC_REG_MASK(APIC_ICR) |
		APIC_REG_MASK(APIC_ICR2) |
		APIC_REG_MASK(APIC_LVTT) |
		APIC_REG_MASK(APIC_LVTTHMR) |
		APIC_REG_MASK(APIC_LVTPC) |
		APIC_REG_MASK(APIC_LVT0) |
		APIC_REG_MASK(APIC_LVT1) |
		APIC_REG_MASK(APIC_LVTERR) |
		APIC_REG_MASK(APIC_TMICT) |
		APIC_REG_MASK(APIC_TMCCT) |
		APIC_REG_MASK(APIC_TDCR);

	/* ARBPRI is not valid on x2APIC */
	if (!apic_x2apic_mode(apic))
		valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
G
Gleb Natapov 已提交
1339

1340
	if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
G
Gleb Natapov 已提交
1341 1342
		return 1;

E
Eddie Dong 已提交
1343 1344
	result = __apic_read(apic, offset & ~0xf);

1345 1346
	trace_kvm_apic_read(offset, result);

E
Eddie Dong 已提交
1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357
	switch (len) {
	case 1:
	case 2:
	case 4:
		memcpy(data, (char *)&result + alignment, len);
		break;
	default:
		printk(KERN_ERR "Local APIC read with len = %x, "
		       "should be 1,2, or 4 instead\n", len);
		break;
	}
1358
	return 0;
E
Eddie Dong 已提交
1359
}
1360
EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
E
Eddie Dong 已提交
1361

G
Gleb Natapov 已提交
1362 1363
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
{
1364 1365
	return addr >= apic->base_address &&
		addr < apic->base_address + LAPIC_MMIO_LENGTH;
G
Gleb Natapov 已提交
1366 1367
}

1368
static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
G
Gleb Natapov 已提交
1369 1370 1371 1372 1373 1374 1375 1376
			   gpa_t address, int len, void *data)
{
	struct kvm_lapic *apic = to_lapic(this);
	u32 offset = address - apic->base_address;

	if (!apic_mmio_in_range(apic, address))
		return -EOPNOTSUPP;

1377 1378 1379 1380 1381 1382 1383 1384 1385
	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
		if (!kvm_check_has_quirk(vcpu->kvm,
					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
			return -EOPNOTSUPP;

		memset(data, 0xff, len);
		return 0;
	}

1386
	kvm_lapic_reg_read(apic, offset, len, data);
G
Gleb Natapov 已提交
1387 1388 1389 1390

	return 0;
}

E
Eddie Dong 已提交
1391 1392 1393 1394
static void update_divide_count(struct kvm_lapic *apic)
{
	u32 tmp1, tmp2, tdcr;

1395
	tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
E
Eddie Dong 已提交
1396 1397
	tmp1 = tdcr & 0xf;
	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1398
	apic->divide_count = 0x1 << (tmp2 & 0x7);
E
Eddie Dong 已提交
1399 1400
}

1401 1402 1403 1404 1405 1406 1407
static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
{
	/*
	 * Do not allow the guest to program periodic timers with small
	 * interval, since the hrtimers are not throttled by the host
	 * scheduler.
	 */
1408
	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421
		s64 min_period = min_timer_period_us * 1000LL;

		if (apic->lapic_timer.period < min_period) {
			pr_info_ratelimited(
			    "kvm: vcpu %i: requested %lld ns "
			    "lapic timer period limited to %lld ns\n",
			    apic->vcpu->vcpu_id,
			    apic->lapic_timer.period, min_period);
			apic->lapic_timer.period = min_period;
		}
	}
}

1422 1423
static void apic_update_lvtt(struct kvm_lapic *apic)
{
1424
	u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1425 1426 1427
			apic->lapic_timer.timer_mode_mask;

	if (apic->lapic_timer.timer_mode != timer_mode) {
1428
		if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1429 1430
				APIC_LVT_TIMER_TSCDEADLINE)) {
			hrtimer_cancel(&apic->lapic_timer.timer);
1431 1432 1433
			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
			apic->lapic_timer.period = 0;
			apic->lapic_timer.tscdeadline = 0;
1434
		}
1435
		apic->lapic_timer.timer_mode = timer_mode;
1436
		limit_periodic_timer_frequency(apic);
1437 1438 1439
	}
}

1440 1441 1442 1443 1444 1445 1446 1447
/*
 * On APICv, this test will cause a busy wait
 * during a higher-priority task.
 */

static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
1448
	u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1449 1450 1451

	if (kvm_apic_hw_enabled(apic)) {
		int vec = reg & APIC_VECTOR_MASK;
1452
		void *bitmap = apic->regs + APIC_ISR;
1453

1454
		if (vcpu->arch.apicv_active)
1455 1456 1457 1458
			bitmap = apic->regs + APIC_IRR;

		if (apic_test_vector(vec, bitmap))
			return true;
1459 1460 1461 1462
	}
	return false;
}

1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482
static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
{
	u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;

	/*
	 * If the guest TSC is running at a different ratio than the host, then
	 * convert the delay to nanoseconds to achieve an accurate delay.  Note
	 * that __delay() uses delay_tsc whenever the hardware has TSC, thus
	 * always for VMX enabled hardware.
	 */
	if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
		__delay(min(guest_cycles,
			nsec_to_cycles(vcpu, timer_advance_ns)));
	} else {
		u64 delay_ns = guest_cycles * 1000000ULL;
		do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
		ndelay(min_t(u32, delay_ns, timer_advance_ns));
	}
}

1483
static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1484
					      s64 advance_expire_delta)
1485 1486
{
	struct kvm_lapic *apic = vcpu->arch.apic;
1487
	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1488 1489
	u64 ns;

1490 1491 1492 1493 1494
	/* Do not adjust for tiny fluctuations or large random spikes. */
	if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
	    abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
		return;

1495
	/* too early */
1496 1497
	if (advance_expire_delta < 0) {
		ns = -advance_expire_delta * 1000000ULL;
1498
		do_div(ns, vcpu->arch.virtual_tsc_khz);
1499
		timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1500 1501
	} else {
	/* too late */
1502
		ns = advance_expire_delta * 1000000ULL;
1503
		do_div(ns, vcpu->arch.virtual_tsc_khz);
1504
		timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1505 1506
	}

1507
	if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX))
1508
		timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
1509 1510 1511
	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
}

1512
static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1513 1514 1515
{
	struct kvm_lapic *apic = vcpu->arch.apic;
	u64 guest_tsc, tsc_deadline;
1516 1517 1518 1519 1520 1521

	if (apic->lapic_timer.expired_tscdeadline == 0)
		return;

	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
	apic->lapic_timer.expired_tscdeadline = 0;
1522
	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1523
	apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
1524 1525

	if (guest_tsc < tsc_deadline)
1526
		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1527

1528
	if (lapic_timer_advance_dynamic)
1529
		adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1530
}
1531 1532 1533 1534 1535 1536

void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
{
	if (lapic_timer_int_injected(vcpu))
		__kvm_wait_lapic_expire(vcpu);
}
1537
EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1538

1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573
static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
{
	struct kvm_timer *ktimer = &apic->lapic_timer;

	kvm_apic_local_deliver(apic, APIC_LVTT);
	if (apic_lvtt_tscdeadline(apic))
		ktimer->tscdeadline = 0;
	if (apic_lvtt_oneshot(apic)) {
		ktimer->tscdeadline = 0;
		ktimer->target_expiration = 0;
	}
}

static void apic_timer_expired(struct kvm_lapic *apic)
{
	struct kvm_vcpu *vcpu = apic->vcpu;
	struct kvm_timer *ktimer = &apic->lapic_timer;

	if (atomic_read(&apic->lapic_timer.pending))
		return;

	if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
		ktimer->expired_tscdeadline = ktimer->tscdeadline;

	if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
		if (apic->lapic_timer.timer_advance_ns)
			__kvm_wait_lapic_expire(vcpu);
		kvm_apic_inject_pending_timer_irqs(apic);
		return;
	}

	atomic_inc(&apic->lapic_timer.pending);
	kvm_set_pending_timer(vcpu);
}

1574 1575
static void start_sw_tscdeadline(struct kvm_lapic *apic)
{
1576 1577
	struct kvm_timer *ktimer = &apic->lapic_timer;
	u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
	u64 ns = 0;
	ktime_t expire;
	struct kvm_vcpu *vcpu = apic->vcpu;
	unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
	unsigned long flags;
	ktime_t now;

	if (unlikely(!tscdeadline || !this_tsc_khz))
		return;

	local_irq_save(flags);

1590
	now = ktime_get();
1591
	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1592 1593 1594 1595 1596

	ns = (tscdeadline - guest_tsc) * 1000000ULL;
	do_div(ns, this_tsc_khz);

	if (likely(tscdeadline > guest_tsc) &&
1597
	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
1598
		expire = ktime_add_ns(now, ns);
1599
		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1600
		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1601 1602 1603 1604 1605 1606
	} else
		apic_timer_expired(apic);

	local_irq_restore(flags);
}

1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
{
	ktime_t now, remaining;
	u64 ns_remaining_old, ns_remaining_new;

	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
		* APIC_BUS_CYCLE_NS * apic->divide_count;
	limit_periodic_timer_frequency(apic);

	now = ktime_get();
	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
	if (ktime_to_ns(remaining) < 0)
		remaining = 0;

	ns_remaining_old = ktime_to_ns(remaining);
	ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
	                                   apic->divide_count, old_divisor);

	apic->lapic_timer.tscdeadline +=
		nsec_to_cycles(apic->vcpu, ns_remaining_new) -
		nsec_to_cycles(apic->vcpu, ns_remaining_old);
	apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
}

1631
static bool set_target_expiration(struct kvm_lapic *apic)
1632 1633
{
	ktime_t now;
1634
	u64 tscl = rdtsc();
1635

1636
	now = ktime_get();
1637
	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1638
		* APIC_BUS_CYCLE_NS * apic->divide_count;
1639

1640 1641
	if (!apic->lapic_timer.period) {
		apic->lapic_timer.tscdeadline = 0;
1642
		return false;
1643 1644
	}

1645
	limit_periodic_timer_frequency(apic);
1646

1647 1648 1649 1650 1651 1652 1653 1654 1655
	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
		nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
	apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);

	return true;
}

static void advance_periodic_target_expiration(struct kvm_lapic *apic)
{
1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666
	ktime_t now = ktime_get();
	u64 tscl = rdtsc();
	ktime_t delta;

	/*
	 * Synchronize both deadlines to the same time source or
	 * differences in the periods (caused by differences in the
	 * underlying clocks or numerical approximation errors) will
	 * cause the two to drift apart over time as the errors
	 * accumulate.
	 */
1667 1668 1669
	apic->lapic_timer.target_expiration =
		ktime_add_ns(apic->lapic_timer.target_expiration,
				apic->lapic_timer.period);
1670 1671 1672
	delta = ktime_sub(apic->lapic_timer.target_expiration, now);
	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
		nsec_to_cycles(apic->vcpu, delta);
1673 1674
}

1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691
static void start_sw_period(struct kvm_lapic *apic)
{
	if (!apic->lapic_timer.period)
		return;

	if (ktime_after(ktime_get(),
			apic->lapic_timer.target_expiration)) {
		apic_timer_expired(apic);

		if (apic_lvtt_oneshot(apic))
			return;

		advance_periodic_target_expiration(apic);
	}

	hrtimer_start(&apic->lapic_timer.timer,
		apic->lapic_timer.target_expiration,
1692
		HRTIMER_MODE_ABS);
1693 1694
}

1695 1696
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
{
1697 1698 1699
	if (!lapic_in_kernel(vcpu))
		return false;

1700 1701 1702 1703
	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
}
EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);

1704
static void cancel_hv_timer(struct kvm_lapic *apic)
1705
{
1706
	WARN_ON(preemptible());
1707
	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1708 1709 1710 1711
	kvm_x86_ops->cancel_hv_timer(apic->vcpu);
	apic->lapic_timer.hv_timer_in_use = false;
}

1712
static bool start_hv_timer(struct kvm_lapic *apic)
1713
{
1714
	struct kvm_timer *ktimer = &apic->lapic_timer;
1715 1716
	struct kvm_vcpu *vcpu = apic->vcpu;
	bool expired;
1717

1718
	WARN_ON(preemptible());
1719 1720 1721
	if (!kvm_x86_ops->set_hv_timer)
		return false;

1722 1723 1724
	if (!ktimer->tscdeadline)
		return false;

1725
	if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
1726 1727 1728 1729
		return false;

	ktimer->hv_timer_in_use = true;
	hrtimer_cancel(&ktimer->timer);
1730

1731
	/*
1732 1733 1734
	 * To simplify handling the periodic timer, leave the hv timer running
	 * even if the deadline timer has expired, i.e. rely on the resulting
	 * VM-Exit to recompute the periodic timer's target expiration.
1735
	 */
1736 1737 1738 1739 1740 1741 1742
	if (!apic_lvtt_period(apic)) {
		/*
		 * Cancel the hv timer if the sw timer fired while the hv timer
		 * was being programmed, or if the hv timer itself expired.
		 */
		if (atomic_read(&ktimer->pending)) {
			cancel_hv_timer(apic);
1743
		} else if (expired) {
1744
			apic_timer_expired(apic);
1745 1746
			cancel_hv_timer(apic);
		}
1747
	}
1748

1749
	trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1750

1751 1752 1753
	return true;
}

1754
static void start_sw_timer(struct kvm_lapic *apic)
1755
{
1756
	struct kvm_timer *ktimer = &apic->lapic_timer;
1757 1758

	WARN_ON(preemptible());
1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769
	if (apic->lapic_timer.hv_timer_in_use)
		cancel_hv_timer(apic);
	if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
		return;

	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
		start_sw_period(apic);
	else if (apic_lvtt_tscdeadline(apic))
		start_sw_tscdeadline(apic);
	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
}
1770

1771 1772
static void restart_apic_timer(struct kvm_lapic *apic)
{
1773
	preempt_disable();
1774 1775 1776 1777

	if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
		goto out;

1778 1779
	if (!start_hv_timer(apic))
		start_sw_timer(apic);
1780
out:
1781
	preempt_enable();
1782 1783
}

1784 1785 1786 1787
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;

1788 1789 1790 1791
	preempt_disable();
	/* If the preempt notifier has already run, it also called apic_timer_expired */
	if (!apic->lapic_timer.hv_timer_in_use)
		goto out;
1792 1793 1794 1795 1796 1797
	WARN_ON(swait_active(&vcpu->wq));
	cancel_hv_timer(apic);
	apic_timer_expired(apic);

	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
		advance_periodic_target_expiration(apic);
1798
		restart_apic_timer(apic);
1799
	}
1800 1801
out:
	preempt_enable();
1802 1803 1804
}
EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);

1805 1806
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
1807
	restart_apic_timer(vcpu->arch.apic);
1808 1809 1810 1811 1812 1813 1814
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);

void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;

1815
	preempt_disable();
1816
	/* Possibly the TSC deadline timer is not enabled yet */
1817 1818
	if (apic->lapic_timer.hv_timer_in_use)
		start_sw_timer(apic);
1819
	preempt_enable();
1820 1821
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1822

1823 1824 1825
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
1826

1827 1828
	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
	restart_apic_timer(apic);
1829 1830
}

E
Eddie Dong 已提交
1831 1832
static void start_apic_timer(struct kvm_lapic *apic)
{
1833
	atomic_set(&apic->lapic_timer.pending, 0);
1834

1835 1836 1837 1838 1839
	if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
	    && !set_target_expiration(apic))
		return;

	restart_apic_timer(apic);
E
Eddie Dong 已提交
1840 1841
}

1842 1843
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
{
1844
	bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1845

1846 1847 1848
	if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
		apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
		if (lvt0_in_nmi_mode) {
1849
			atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1850 1851 1852
		} else
			atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
	}
1853 1854
}

1855
int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
E
Eddie Dong 已提交
1856
{
G
Gleb Natapov 已提交
1857
	int ret = 0;
E
Eddie Dong 已提交
1858

G
Gleb Natapov 已提交
1859
	trace_kvm_apic_write(reg, val);
E
Eddie Dong 已提交
1860

G
Gleb Natapov 已提交
1861
	switch (reg) {
E
Eddie Dong 已提交
1862
	case APIC_ID:		/* Local APIC ID */
G
Gleb Natapov 已提交
1863
		if (!apic_x2apic_mode(apic))
1864
			kvm_apic_set_xapic_id(apic, val >> 24);
G
Gleb Natapov 已提交
1865 1866
		else
			ret = 1;
E
Eddie Dong 已提交
1867 1868 1869
		break;

	case APIC_TASKPRI:
1870
		report_tpr_access(apic, true);
E
Eddie Dong 已提交
1871 1872 1873 1874 1875 1876 1877 1878
		apic_set_tpr(apic, val & 0xff);
		break;

	case APIC_EOI:
		apic_set_eoi(apic);
		break;

	case APIC_LDR:
G
Gleb Natapov 已提交
1879
		if (!apic_x2apic_mode(apic))
1880
			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
G
Gleb Natapov 已提交
1881 1882
		else
			ret = 1;
E
Eddie Dong 已提交
1883 1884 1885
		break;

	case APIC_DFR:
1886
		if (!apic_x2apic_mode(apic)) {
1887
			kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1888 1889
			recalculate_apic_map(apic->vcpu->kvm);
		} else
G
Gleb Natapov 已提交
1890
			ret = 1;
E
Eddie Dong 已提交
1891 1892
		break;

1893 1894
	case APIC_SPIV: {
		u32 mask = 0x3ff;
1895
		if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
1896
			mask |= APIC_SPIV_DIRECTED_EOI;
1897
		apic_set_spiv(apic, val & mask);
E
Eddie Dong 已提交
1898 1899 1900 1901
		if (!(val & APIC_SPIV_APIC_ENABLED)) {
			int i;
			u32 lvt_val;

1902
			for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
1903
				lvt_val = kvm_lapic_get_reg(apic,
E
Eddie Dong 已提交
1904
						       APIC_LVTT + 0x10 * i);
1905
				kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
E
Eddie Dong 已提交
1906 1907
					     lvt_val | APIC_LVT_MASKED);
			}
1908
			apic_update_lvtt(apic);
1909
			atomic_set(&apic->lapic_timer.pending, 0);
E
Eddie Dong 已提交
1910 1911 1912

		}
		break;
1913
	}
E
Eddie Dong 已提交
1914 1915
	case APIC_ICR:
		/* No delay here, so we always clear the pending bit */
1916 1917 1918
		val &= ~(1 << 12);
		apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
		kvm_lapic_set_reg(apic, APIC_ICR, val);
E
Eddie Dong 已提交
1919 1920 1921
		break;

	case APIC_ICR2:
G
Gleb Natapov 已提交
1922 1923
		if (!apic_x2apic_mode(apic))
			val &= 0xff000000;
1924
		kvm_lapic_set_reg(apic, APIC_ICR2, val);
E
Eddie Dong 已提交
1925 1926
		break;

1927
	case APIC_LVT0:
1928
		apic_manage_nmi_watchdog(apic, val);
1929
		/* fall through */
E
Eddie Dong 已提交
1930 1931 1932 1933 1934
	case APIC_LVTTHMR:
	case APIC_LVTPC:
	case APIC_LVT1:
	case APIC_LVTERR:
		/* TODO: Check vector */
1935
		if (!kvm_apic_sw_enabled(apic))
E
Eddie Dong 已提交
1936 1937
			val |= APIC_LVT_MASKED;

G
Gleb Natapov 已提交
1938
		val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
1939
		kvm_lapic_set_reg(apic, reg, val);
E
Eddie Dong 已提交
1940 1941 1942

		break;

1943
	case APIC_LVTT:
1944
		if (!kvm_apic_sw_enabled(apic))
1945 1946
			val |= APIC_LVT_MASKED;
		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1947
		kvm_lapic_set_reg(apic, APIC_LVTT, val);
1948
		apic_update_lvtt(apic);
1949 1950
		break;

E
Eddie Dong 已提交
1951
	case APIC_TMICT:
1952 1953 1954
		if (apic_lvtt_tscdeadline(apic))
			break;

1955
		hrtimer_cancel(&apic->lapic_timer.timer);
1956
		kvm_lapic_set_reg(apic, APIC_TMICT, val);
E
Eddie Dong 已提交
1957
		start_apic_timer(apic);
G
Gleb Natapov 已提交
1958
		break;
E
Eddie Dong 已提交
1959

1960 1961 1962
	case APIC_TDCR: {
		uint32_t old_divisor = apic->divide_count;

1963
		kvm_lapic_set_reg(apic, APIC_TDCR, val);
E
Eddie Dong 已提交
1964
		update_divide_count(apic);
1965 1966 1967 1968 1969 1970
		if (apic->divide_count != old_divisor &&
				apic->lapic_timer.period) {
			hrtimer_cancel(&apic->lapic_timer.timer);
			update_target_expiration(apic, old_divisor);
			restart_apic_timer(apic);
		}
E
Eddie Dong 已提交
1971
		break;
1972
	}
G
Gleb Natapov 已提交
1973
	case APIC_ESR:
1974
		if (apic_x2apic_mode(apic) && val != 0)
G
Gleb Natapov 已提交
1975 1976 1977 1978 1979
			ret = 1;
		break;

	case APIC_SELF_IPI:
		if (apic_x2apic_mode(apic)) {
1980
			kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
G
Gleb Natapov 已提交
1981 1982 1983
		} else
			ret = 1;
		break;
E
Eddie Dong 已提交
1984
	default:
G
Gleb Natapov 已提交
1985
		ret = 1;
E
Eddie Dong 已提交
1986 1987
		break;
	}
1988

G
Gleb Natapov 已提交
1989 1990
	return ret;
}
1991
EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
G
Gleb Natapov 已提交
1992

1993
static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
G
Gleb Natapov 已提交
1994 1995 1996 1997 1998 1999 2000 2001 2002
			    gpa_t address, int len, const void *data)
{
	struct kvm_lapic *apic = to_lapic(this);
	unsigned int offset = address - apic->base_address;
	u32 val;

	if (!apic_mmio_in_range(apic, address))
		return -EOPNOTSUPP;

2003 2004 2005 2006 2007 2008 2009 2010
	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
		if (!kvm_check_has_quirk(vcpu->kvm,
					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
			return -EOPNOTSUPP;

		return 0;
	}

G
Gleb Natapov 已提交
2011 2012 2013 2014 2015
	/*
	 * APIC register must be aligned on 128-bits boundary.
	 * 32/64/128 bits registers must be accessed thru 32 bits.
	 * Refer SDM 8.4.1
	 */
2016
	if (len != 4 || (offset & 0xf))
2017
		return 0;
G
Gleb Natapov 已提交
2018 2019 2020

	val = *(u32*)data;

2021
	kvm_lapic_reg_write(apic, offset & 0xff0, val);
G
Gleb Natapov 已提交
2022

2023
	return 0;
E
Eddie Dong 已提交
2024 2025
}

2026 2027
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
{
2028
	kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2029 2030 2031
}
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);

2032 2033 2034 2035 2036 2037 2038 2039
/* emulate APIC access in a trap manner */
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
{
	u32 val = 0;

	/* hw has done the conditional check and inst decode */
	offset &= 0xff0;

2040
	kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
2041 2042

	/* TODO: optimize to just emulate side effect w/o one more write */
2043
	kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
2044 2045 2046
}
EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);

2047
void kvm_free_lapic(struct kvm_vcpu *vcpu)
E
Eddie Dong 已提交
2048
{
2049 2050
	struct kvm_lapic *apic = vcpu->arch.apic;

2051
	if (!vcpu->arch.apic)
E
Eddie Dong 已提交
2052 2053
		return;

2054
	hrtimer_cancel(&apic->lapic_timer.timer);
E
Eddie Dong 已提交
2055

2056 2057 2058
	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
		static_key_slow_dec_deferred(&apic_hw_disabled);

2059
	if (!apic->sw_enabled)
2060
		static_key_slow_dec_deferred(&apic_sw_disabled);
E
Eddie Dong 已提交
2061

2062 2063 2064 2065
	if (apic->regs)
		free_page((unsigned long)apic->regs);

	kfree(apic);
E
Eddie Dong 已提交
2066 2067 2068 2069 2070 2071 2072
}

/*
 *----------------------------------------------------------------------
 * LAPIC interface
 *----------------------------------------------------------------------
 */
2073 2074 2075 2076
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;

2077 2078
	if (!lapic_in_kernel(vcpu) ||
		!apic_lvtt_tscdeadline(apic))
2079 2080 2081 2082 2083 2084 2085 2086 2087
		return 0;

	return apic->lapic_timer.tscdeadline;
}

void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
{
	struct kvm_lapic *apic = vcpu->arch.apic;

2088
	if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
2089
			apic_lvtt_period(apic))
2090 2091 2092 2093 2094 2095 2096
		return;

	hrtimer_cancel(&apic->lapic_timer.timer);
	apic->lapic_timer.tscdeadline = data;
	start_apic_timer(apic);
}

E
Eddie Dong 已提交
2097 2098
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
{
2099
	struct kvm_lapic *apic = vcpu->arch.apic;
E
Eddie Dong 已提交
2100

A
Avi Kivity 已提交
2101
	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
2102
		     | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
E
Eddie Dong 已提交
2103 2104 2105 2106 2107 2108
}

u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
{
	u64 tpr;

2109
	tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
E
Eddie Dong 已提交
2110 2111 2112 2113 2114 2115

	return (tpr & 0xf0) >> 4;
}

void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
{
2116
	u64 old_value = vcpu->arch.apic_base;
2117
	struct kvm_lapic *apic = vcpu->arch.apic;
E
Eddie Dong 已提交
2118

2119
	if (!apic)
E
Eddie Dong 已提交
2120
		value |= MSR_IA32_APICBASE_BSP;
2121

2122 2123
	vcpu->arch.apic_base = value;

2124 2125 2126 2127 2128 2129
	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
		kvm_update_cpuid(vcpu);

	if (!apic)
		return;

2130
	/* update jump label if enable bit changes */
2131
	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2132 2133
		if (value & MSR_IA32_APICBASE_ENABLE) {
			kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2134
			static_key_slow_dec_deferred(&apic_hw_disabled);
2135
		} else {
2136
			static_key_slow_inc(&apic_hw_disabled.key);
2137 2138
			recalculate_apic_map(vcpu->kvm);
		}
2139 2140
	}

2141 2142 2143 2144 2145
	if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
		kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);

	if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
		kvm_x86_ops->set_virtual_apic_mode(vcpu);
2146

2147
	apic->base_address = apic->vcpu->arch.apic_base &
E
Eddie Dong 已提交
2148 2149
			     MSR_IA32_APICBASE_BASE;

2150 2151 2152
	if ((value & MSR_IA32_APICBASE_ENABLE) &&
	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
		pr_warn_once("APIC base relocation is unsupported by KVM");
E
Eddie Dong 已提交
2153 2154
}

2155
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
E
Eddie Dong 已提交
2156
{
2157
	struct kvm_lapic *apic = vcpu->arch.apic;
E
Eddie Dong 已提交
2158 2159
	int i;

2160 2161
	if (!apic)
		return;
E
Eddie Dong 已提交
2162 2163

	/* Stop the timer in case it's a reset to an active apic */
2164
	hrtimer_cancel(&apic->lapic_timer.timer);
E
Eddie Dong 已提交
2165

2166 2167 2168
	if (!init_event) {
		kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
		                         MSR_IA32_APICBASE_ENABLE);
2169
		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2170
	}
2171
	kvm_apic_set_version(apic->vcpu);
E
Eddie Dong 已提交
2172

2173 2174
	for (i = 0; i < KVM_APIC_LVT_NUM; i++)
		kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2175
	apic_update_lvtt(apic);
2176 2177
	if (kvm_vcpu_is_reset_bsp(vcpu) &&
	    kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2178
		kvm_lapic_set_reg(apic, APIC_LVT0,
2179
			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2180
	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
E
Eddie Dong 已提交
2181

2182
	kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
2183
	apic_set_spiv(apic, 0xff);
2184
	kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2185 2186
	if (!apic_x2apic_mode(apic))
		kvm_apic_set_ldr(apic, 0);
2187 2188 2189 2190 2191
	kvm_lapic_set_reg(apic, APIC_ESR, 0);
	kvm_lapic_set_reg(apic, APIC_ICR, 0);
	kvm_lapic_set_reg(apic, APIC_ICR2, 0);
	kvm_lapic_set_reg(apic, APIC_TDCR, 0);
	kvm_lapic_set_reg(apic, APIC_TMICT, 0);
E
Eddie Dong 已提交
2192
	for (i = 0; i < 8; i++) {
2193 2194 2195
		kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
		kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
		kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
E
Eddie Dong 已提交
2196
	}
2197 2198
	apic->irr_pending = vcpu->arch.apicv_active;
	apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
M
Michael S. Tsirkin 已提交
2199
	apic->highest_isr_cache = -1;
2200
	update_divide_count(apic);
2201
	atomic_set(&apic->lapic_timer.pending, 0);
2202
	if (kvm_vcpu_is_bsp(vcpu))
2203 2204
		kvm_lapic_set_base(vcpu,
				vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
2205
	vcpu->arch.pv_eoi.msr_val = 0;
E
Eddie Dong 已提交
2206
	apic_update_ppr(apic);
2207 2208 2209 2210 2211
	if (vcpu->arch.apicv_active) {
		kvm_x86_ops->apicv_post_state_restore(vcpu);
		kvm_x86_ops->hwapic_irr_update(vcpu, -1);
		kvm_x86_ops->hwapic_isr_update(vcpu, -1);
	}
E
Eddie Dong 已提交
2212

2213
	vcpu->arch.apic_arb_prio = 0;
2214
	vcpu->arch.apic_attention = 0;
E
Eddie Dong 已提交
2215 2216 2217 2218 2219 2220 2221
}

/*
 *----------------------------------------------------------------------
 * timer interface
 *----------------------------------------------------------------------
 */
2222

A
Avi Kivity 已提交
2223
static bool lapic_is_periodic(struct kvm_lapic *apic)
E
Eddie Dong 已提交
2224
{
2225
	return apic_lvtt_period(apic);
E
Eddie Dong 已提交
2226 2227
}

2228 2229
int apic_has_pending_timer(struct kvm_vcpu *vcpu)
{
2230
	struct kvm_lapic *apic = vcpu->arch.apic;
2231

2232
	if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2233
		return atomic_read(&apic->lapic_timer.pending);
2234 2235 2236 2237

	return 0;
}

A
Avi Kivity 已提交
2238
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2239
{
2240
	u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2241 2242
	int vector, mode, trig_mode;

2243
	if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2244 2245 2246
		vector = reg & APIC_VECTOR_MASK;
		mode = reg & APIC_MODE_MASK;
		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2247 2248
		return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
					NULL);
2249 2250 2251
	}
	return 0;
}
2252

2253
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2254
{
2255 2256 2257 2258
	struct kvm_lapic *apic = vcpu->arch.apic;

	if (apic)
		kvm_apic_local_deliver(apic, APIC_LVT0);
2259 2260
}

G
Gregory Haskins 已提交
2261 2262 2263 2264 2265
static const struct kvm_io_device_ops apic_mmio_ops = {
	.read     = apic_mmio_read,
	.write    = apic_mmio_write,
};

2266 2267 2268
static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
{
	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
A
Avi Kivity 已提交
2269
	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2270

2271
	apic_timer_expired(apic);
2272

A
Avi Kivity 已提交
2273
	if (lapic_is_periodic(apic)) {
2274
		advance_periodic_target_expiration(apic);
2275 2276 2277 2278 2279 2280
		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
		return HRTIMER_RESTART;
	} else
		return HRTIMER_NORESTART;
}

2281
int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
E
Eddie Dong 已提交
2282 2283 2284 2285 2286
{
	struct kvm_lapic *apic;

	ASSERT(vcpu != NULL);

2287
	apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
E
Eddie Dong 已提交
2288 2289 2290
	if (!apic)
		goto nomem;

2291
	vcpu->arch.apic = apic;
E
Eddie Dong 已提交
2292

2293
	apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2294
	if (!apic->regs) {
E
Eddie Dong 已提交
2295 2296
		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
		       vcpu->vcpu_id);
2297
		goto nomem_free_apic;
E
Eddie Dong 已提交
2298 2299 2300
	}
	apic->vcpu = vcpu;

2301
	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2302
		     HRTIMER_MODE_ABS_HARD);
2303
	apic->lapic_timer.timer.function = apic_timer_fn;
2304
	if (timer_advance_ns == -1) {
2305
		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
2306
		lapic_timer_advance_dynamic = true;
2307 2308
	} else {
		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2309
		lapic_timer_advance_dynamic = false;
2310 2311
	}

2312 2313
	/*
	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
2314
	 * thinking that APIC state has changed.
2315 2316
	 */
	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2317
	static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
G
Gregory Haskins 已提交
2318
	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
E
Eddie Dong 已提交
2319 2320

	return 0;
2321 2322
nomem_free_apic:
	kfree(apic);
2323
	vcpu->arch.apic = NULL;
E
Eddie Dong 已提交
2324 2325 2326 2327 2328 2329
nomem:
	return -ENOMEM;
}

int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
{
2330
	struct kvm_lapic *apic = vcpu->arch.apic;
2331
	u32 ppr;
E
Eddie Dong 已提交
2332

2333
	if (!kvm_apic_hw_enabled(apic))
E
Eddie Dong 已提交
2334 2335
		return -1;

2336 2337
	__apic_update_ppr(apic, &ppr);
	return apic_has_interrupt_for_ppr(apic, ppr);
E
Eddie Dong 已提交
2338 2339
}

Q
Qing He 已提交
2340 2341
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
{
2342
	u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
Q
Qing He 已提交
2343 2344
	int r = 0;

2345
	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2346 2347 2348 2349
		r = 1;
	if ((lvt0 & APIC_LVT_MASKED) == 0 &&
	    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
		r = 1;
Q
Qing He 已提交
2350 2351 2352
	return r;
}

2353 2354
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
{
2355
	struct kvm_lapic *apic = vcpu->arch.apic;
2356

2357
	if (atomic_read(&apic->lapic_timer.pending) > 0) {
2358
		kvm_apic_inject_pending_timer_irqs(apic);
2359
		atomic_set(&apic->lapic_timer.pending, 0);
2360 2361 2362
	}
}

E
Eddie Dong 已提交
2363 2364 2365
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
{
	int vector = kvm_apic_has_interrupt(vcpu);
2366
	struct kvm_lapic *apic = vcpu->arch.apic;
2367
	u32 ppr;
E
Eddie Dong 已提交
2368 2369 2370 2371

	if (vector == -1)
		return -1;

2372 2373 2374 2375 2376 2377 2378
	/*
	 * We get here even with APIC virtualization enabled, if doing
	 * nested virtualization and L1 runs with the "acknowledge interrupt
	 * on exit" mode.  Then we cannot inject the interrupt via RVI,
	 * because the process would deliver it through the IDT.
	 */

E
Eddie Dong 已提交
2379
	apic_clear_irr(vector, apic);
2380
	if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
2381 2382 2383 2384 2385
		/*
		 * For auto-EOI interrupts, there might be another pending
		 * interrupt above PPR, so check whether to raise another
		 * KVM_REQ_EVENT.
		 */
2386
		apic_update_ppr(apic);
2387 2388 2389 2390 2391 2392 2393 2394 2395
	} else {
		/*
		 * For normal interrupts, PPR has been raised and there cannot
		 * be a higher-priority pending interrupt---except if there was
		 * a concurrent interrupt injection, but that would have
		 * triggered KVM_REQ_EVENT already.
		 */
		apic_set_isr(vector, apic);
		__apic_update_ppr(apic, &ppr);
2396 2397
	}

E
Eddie Dong 已提交
2398 2399
	return vector;
}
2400

2401 2402 2403 2404 2405
static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
		struct kvm_lapic_state *s, bool set)
{
	if (apic_x2apic_mode(vcpu->arch.apic)) {
		u32 *id = (u32 *)(s->regs + APIC_ID);
2406
		u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2407

2408 2409 2410 2411 2412 2413 2414 2415 2416
		if (vcpu->kvm->arch.x2apic_format) {
			if (*id != vcpu->vcpu_id)
				return -EINVAL;
		} else {
			if (set)
				*id >>= 24;
			else
				*id <<= 24;
		}
2417 2418 2419 2420

		/* In x2APIC mode, the LDR is fixed and based on the id */
		if (set)
			*ldr = kvm_apic_calc_x2apic_ldr(*id);
2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432
	}

	return 0;
}

int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
{
	memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
	return kvm_apic_state_fixup(vcpu, s, false);
}

int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2433
{
2434
	struct kvm_lapic *apic = vcpu->arch.apic;
2435 2436
	int r;

2437

2438
	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2439 2440
	/* set SPIV separately to get count of SW disabled APICs right */
	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2441 2442 2443 2444

	r = kvm_apic_state_fixup(vcpu, s, true);
	if (r)
		return r;
2445
	memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2446 2447

	recalculate_apic_map(vcpu->kvm);
2448 2449
	kvm_apic_set_version(vcpu);

2450
	apic_update_ppr(apic);
2451
	hrtimer_cancel(&apic->lapic_timer.timer);
2452
	apic_update_lvtt(apic);
2453
	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2454 2455
	update_divide_count(apic);
	start_apic_timer(apic);
2456
	apic->irr_pending = true;
2457
	apic->isr_count = vcpu->arch.apicv_active ?
2458
				1 : count_vectors(apic->regs + APIC_ISR);
M
Michael S. Tsirkin 已提交
2459
	apic->highest_isr_cache = -1;
2460
	if (vcpu->arch.apicv_active) {
2461
		kvm_x86_ops->apicv_post_state_restore(vcpu);
W
Wei Wang 已提交
2462 2463
		kvm_x86_ops->hwapic_irr_update(vcpu,
				apic_find_highest_irr(apic));
2464
		kvm_x86_ops->hwapic_isr_update(vcpu,
2465
				apic_find_highest_isr(apic));
2466
	}
2467
	kvm_make_request(KVM_REQ_EVENT, vcpu);
2468 2469
	if (ioapic_in_kernel(vcpu->kvm))
		kvm_rtc_eoi_tracking_restore_one(vcpu);
2470 2471

	vcpu->arch.apic_arb_prio = 0;
2472 2473

	return 0;
2474
}
2475

2476
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2477 2478 2479
{
	struct hrtimer *timer;

2480 2481
	if (!lapic_in_kernel(vcpu) ||
		kvm_can_post_timer_interrupt(vcpu))
2482 2483
		return;

2484
	timer = &vcpu->arch.apic->lapic_timer.timer;
2485
	if (hrtimer_cancel(timer))
2486
		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2487
}
A
Avi Kivity 已提交
2488

2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525
/*
 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
 *
 * Detect whether guest triggered PV EOI since the
 * last entry. If yes, set EOI on guests's behalf.
 * Clear PV EOI in guest memory in any case.
 */
static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
					struct kvm_lapic *apic)
{
	bool pending;
	int vector;
	/*
	 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
	 * and KVM_PV_EOI_ENABLED in guest memory as follows:
	 *
	 * KVM_APIC_PV_EOI_PENDING is unset:
	 * 	-> host disabled PV EOI.
	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
	 * 	-> host enabled PV EOI, guest did not execute EOI yet.
	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
	 * 	-> host enabled PV EOI, guest executed EOI.
	 */
	BUG_ON(!pv_eoi_enabled(vcpu));
	pending = pv_eoi_get_pending(vcpu);
	/*
	 * Clear pending bit in any case: it will be set again on vmentry.
	 * While this might not be ideal from performance point of view,
	 * this makes sure pv eoi is only enabled when we know it's safe.
	 */
	pv_eoi_clr_pending(vcpu);
	if (pending)
		return;
	vector = apic_set_eoi(apic);
	trace_kvm_pv_eoi(apic, vector);
}

A
Avi Kivity 已提交
2526 2527 2528 2529
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
{
	u32 data;

2530 2531 2532
	if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
		apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);

2533
	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
A
Avi Kivity 已提交
2534 2535
		return;

2536 2537
	if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
				  sizeof(u32)))
2538
		return;
A
Avi Kivity 已提交
2539 2540 2541 2542

	apic_set_tpr(vcpu->arch.apic, data & 0xff);
}

2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557
/*
 * apic_sync_pv_eoi_to_guest - called before vmentry
 *
 * Detect whether it's safe to enable PV EOI and
 * if yes do so.
 */
static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
					struct kvm_lapic *apic)
{
	if (!pv_eoi_enabled(vcpu) ||
	    /* IRR set or many bits in ISR: could be nested. */
	    apic->irr_pending ||
	    /* Cache not set: could be safe but we don't bother. */
	    apic->highest_isr_cache == -1 ||
	    /* Need EOI to update ioapic. */
2558
	    kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2559 2560 2561 2562 2563 2564 2565 2566 2567 2568
		/*
		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
		 * so we need not do anything here.
		 */
		return;
	}

	pv_eoi_set_pending(apic->vcpu);
}

A
Avi Kivity 已提交
2569 2570 2571 2572
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
{
	u32 data, tpr;
	int max_irr, max_isr;
2573
	struct kvm_lapic *apic = vcpu->arch.apic;
A
Avi Kivity 已提交
2574

2575 2576
	apic_sync_pv_eoi_to_guest(vcpu, apic);

2577
	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
A
Avi Kivity 已提交
2578 2579
		return;

2580
	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
A
Avi Kivity 已提交
2581 2582 2583 2584 2585 2586 2587 2588
	max_irr = apic_find_highest_irr(apic);
	if (max_irr < 0)
		max_irr = 0;
	max_isr = apic_find_highest_isr(apic);
	if (max_isr < 0)
		max_isr = 0;
	data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);

2589 2590
	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
				sizeof(u32));
A
Avi Kivity 已提交
2591 2592
}

2593
int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
A
Avi Kivity 已提交
2594
{
2595
	if (vapic_addr) {
2596
		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2597 2598 2599
					&vcpu->arch.apic->vapic_cache,
					vapic_addr, sizeof(u32)))
			return -EINVAL;
2600
		__set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2601
	} else {
2602
		__clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2603 2604 2605 2606
	}

	vcpu->arch.apic->vapic_addr = vapic_addr;
	return 0;
A
Avi Kivity 已提交
2607
}
G
Gleb Natapov 已提交
2608 2609 2610 2611 2612 2613

int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
	u32 reg = (msr - APIC_BASE_MSR) << 4;

2614
	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
G
Gleb Natapov 已提交
2615 2616
		return 1;

2617 2618 2619
	if (reg == APIC_ICR2)
		return 1;

G
Gleb Natapov 已提交
2620
	/* if this is ICR write vector before command */
2621
	if (reg == APIC_ICR)
2622 2623
		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
	return kvm_lapic_reg_write(apic, reg, (u32)data);
G
Gleb Natapov 已提交
2624 2625 2626 2627 2628 2629 2630
}

int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;

2631
	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
G
Gleb Natapov 已提交
2632 2633
		return 1;

2634
	if (reg == APIC_DFR || reg == APIC_ICR2)
2635 2636
		return 1;

2637
	if (kvm_lapic_reg_read(apic, reg, 4, &low))
G
Gleb Natapov 已提交
2638
		return 1;
2639
	if (reg == APIC_ICR)
2640
		kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
G
Gleb Natapov 已提交
2641 2642 2643 2644 2645

	*data = (((u64)high) << 32) | low;

	return 0;
}
G
Gleb Natapov 已提交
2646 2647 2648 2649 2650

int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
{
	struct kvm_lapic *apic = vcpu->arch.apic;

2651
	if (!lapic_in_kernel(vcpu))
G
Gleb Natapov 已提交
2652 2653 2654 2655
		return 1;

	/* if this is ICR write vector before command */
	if (reg == APIC_ICR)
2656 2657
		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
	return kvm_lapic_reg_write(apic, reg, (u32)data);
G
Gleb Natapov 已提交
2658 2659 2660 2661 2662 2663 2664
}

int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
	u32 low, high = 0;

2665
	if (!lapic_in_kernel(vcpu))
G
Gleb Natapov 已提交
2666 2667
		return 1;

2668
	if (kvm_lapic_reg_read(apic, reg, 4, &low))
G
Gleb Natapov 已提交
2669 2670
		return 1;
	if (reg == APIC_ICR)
2671
		kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
G
Gleb Natapov 已提交
2672 2673 2674 2675 2676

	*data = (((u64)high) << 32) | low;

	return 0;
}
2677

2678
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2679 2680
{
	u64 addr = data & ~KVM_MSR_ENABLED;
2681 2682 2683
	struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
	unsigned long new_len;

2684 2685 2686 2687 2688 2689
	if (!IS_ALIGNED(addr, 4))
		return 1;

	vcpu->arch.pv_eoi.msr_val = data;
	if (!pv_eoi_enabled(vcpu))
		return 0;
2690 2691 2692 2693 2694 2695 2696

	if (addr == ghc->gpa && len <= ghc->len)
		new_len = ghc->len;
	else
		new_len = len;

	return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
2697
}
2698

2699 2700 2701
void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
2702
	u8 sipi_vector;
2703
	unsigned long pe;
2704

2705
	if (!lapic_in_kernel(vcpu) || !apic->pending_events)
2706 2707
		return;

2708
	/*
2709 2710 2711 2712 2713 2714
	 * INITs are latched while CPU is in specific states
	 * (SMM, VMX non-root mode, SVM with GIF=0).
	 * Because a CPU cannot be in these states immediately
	 * after it has processed an INIT signal (and thus in
	 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
	 * and leave the INIT pending.
2715
	 */
2716
	if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) {
2717 2718 2719 2720 2721
		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
		if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
			clear_bit(KVM_APIC_SIPI, &apic->pending_events);
		return;
	}
2722

2723
	pe = xchg(&apic->pending_events, 0);
2724
	if (test_bit(KVM_APIC_INIT, &pe)) {
2725
		kvm_vcpu_reset(vcpu, true);
2726 2727 2728 2729 2730
		if (kvm_vcpu_is_bsp(apic->vcpu))
			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
		else
			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
	}
2731
	if (test_bit(KVM_APIC_SIPI, &pe) &&
2732 2733 2734 2735 2736 2737 2738 2739 2740
	    vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
		/* evaluate pending_events before reading the vector */
		smp_rmb();
		sipi_vector = apic->sipi_vector;
		kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
	}
}

2741 2742 2743 2744
void kvm_lapic_init(void)
{
	/* do not patch jump label more than once per second */
	jump_label_rate_limit(&apic_hw_disabled, HZ);
2745
	jump_label_rate_limit(&apic_sw_disabled, HZ);
2746
}
2747 2748 2749 2750 2751 2752

void kvm_lapic_exit(void)
{
	static_key_deferred_flush(&apic_hw_disabled);
	static_key_deferred_flush(&apic_sw_disabled);
}