kvm_host.h 29.3 KB
Newer Older
1 2
#ifndef __KVM_HOST_H
#define __KVM_HOST_H
A
Avi Kivity 已提交
3 4 5 6 7 8 9

/*
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 */

#include <linux/types.h>
10
#include <linux/hardirq.h>
A
Avi Kivity 已提交
11 12 13
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
M
Markus Rechberger 已提交
14 15
#include <linux/signal.h>
#include <linux/sched.h>
16
#include <linux/bug.h>
A
Avi Kivity 已提交
17
#include <linux/mm.h>
18
#include <linux/mmu_notifier.h>
19
#include <linux/preempt.h>
20
#include <linux/msi.h>
21
#include <linux/slab.h>
22
#include <linux/rcupdate.h>
23
#include <linux/ratelimit.h>
X
Xiao Guangrong 已提交
24
#include <linux/err.h>
25
#include <linux/irqflags.h>
26
#include <linux/context_tracking.h>
A
Alexey Dobriyan 已提交
27
#include <asm/signal.h>
A
Avi Kivity 已提交
28 29

#include <linux/kvm.h>
I
Ingo Molnar 已提交
30
#include <linux/kvm_para.h>
A
Avi Kivity 已提交
31

32
#include <linux/kvm_types.h>
33

34
#include <asm/kvm_host.h>
35

A
Avi Kivity 已提交
36 37 38 39
#ifndef KVM_MMIO_SIZE
#define KVM_MMIO_SIZE 8
#endif

40 41 42 43 44 45 46
/*
 * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
 * in kvm, other bits are visible for userspace which are defined in
 * include/linux/kvm_h.
 */
#define KVM_MEMSLOT_INVALID	(1UL << 16)

47 48
/* Two fragments for cross MMIO pages. */
#define KVM_MAX_MMIO_FRAGMENTS	2
A
Avi Kivity 已提交
49

50 51
/*
 * For the normal pfn, the highest 12 bits should be zero,
52 53
 * so we can mask bit 62 ~ bit 52  to indicate the error pfn,
 * mask bit 63 to indicate the noslot pfn.
54
 */
55 56 57
#define KVM_PFN_ERR_MASK	(0x7ffULL << 52)
#define KVM_PFN_ERR_NOSLOT_MASK	(0xfffULL << 52)
#define KVM_PFN_NOSLOT		(0x1ULL << 63)
58 59 60

#define KVM_PFN_ERR_FAULT	(KVM_PFN_ERR_MASK)
#define KVM_PFN_ERR_HWPOISON	(KVM_PFN_ERR_MASK + 1)
61
#define KVM_PFN_ERR_RO_FAULT	(KVM_PFN_ERR_MASK + 2)
62

63 64 65 66
/*
 * error pfns indicate that the gfn is in slot but faild to
 * translate it to pfn on host.
 */
67
static inline bool is_error_pfn(pfn_t pfn)
X
Xiao Guangrong 已提交
68
{
69
	return !!(pfn & KVM_PFN_ERR_MASK);
X
Xiao Guangrong 已提交
70 71
}

72 73 74 75 76 77
/*
 * error_noslot pfns indicate that the gfn can not be
 * translated to pfn - it is not in slot or failed to
 * translate it to pfn.
 */
static inline bool is_error_noslot_pfn(pfn_t pfn)
X
Xiao Guangrong 已提交
78
{
79
	return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK);
X
Xiao Guangrong 已提交
80 81
}

82 83
/* noslot pfn indicates that the gfn is not in slot. */
static inline bool is_noslot_pfn(pfn_t pfn)
X
Xiao Guangrong 已提交
84
{
85
	return pfn == KVM_PFN_NOSLOT;
X
Xiao Guangrong 已提交
86 87
}

88 89
#define KVM_HVA_ERR_BAD		(PAGE_OFFSET)
#define KVM_HVA_ERR_RO_BAD	(PAGE_OFFSET + PAGE_SIZE)
X
Xiao Guangrong 已提交
90 91 92

static inline bool kvm_is_error_hva(unsigned long addr)
{
93
	return addr >= PAGE_OFFSET;
X
Xiao Guangrong 已提交
94 95
}

96 97
#define KVM_ERR_PTR_BAD_PAGE	(ERR_PTR(-ENOENT))

98
static inline bool is_error_page(struct page *page)
99 100 101 102
{
	return IS_ERR(page);
}

103 104 105
/*
 * vcpu->requests bit members
 */
106
#define KVM_REQ_TLB_FLUSH          0
107
#define KVM_REQ_MIGRATE_TIMER      1
108
#define KVM_REQ_REPORT_TPR_ACCESS  2
109
#define KVM_REQ_MMU_RELOAD         3
J
Joerg Roedel 已提交
110
#define KVM_REQ_TRIPLE_FAULT       4
111
#define KVM_REQ_PENDING_TIMER      5
112
#define KVM_REQ_UNHALT             6
113
#define KVM_REQ_MMU_SYNC           7
Z
Zachary Amsden 已提交
114
#define KVM_REQ_CLOCK_UPDATE       8
115
#define KVM_REQ_KICK               9
116
#define KVM_REQ_DEACTIVATE_FPU    10
117
#define KVM_REQ_EVENT             11
118
#define KVM_REQ_APF_HALT          12
G
Glauber Costa 已提交
119
#define KVM_REQ_STEAL_UPDATE      13
A
Avi Kivity 已提交
120
#define KVM_REQ_NMI               14
121 122 123 124 125 126 127
#define KVM_REQ_PMU               15
#define KVM_REQ_PMI               16
#define KVM_REQ_WATCHDOG          17
#define KVM_REQ_MASTERCLOCK_UPDATE 18
#define KVM_REQ_MCLOCK_INPROGRESS 19
#define KVM_REQ_EPR_EXIT          20
#define KVM_REQ_SCAN_IOAPIC       21
A
Avi Kivity 已提交
128

129 130
#define KVM_USERSPACE_IRQ_SOURCE_ID		0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
131

132
struct kvm;
A
Avi Kivity 已提交
133
struct kvm_vcpu;
134
extern struct kmem_cache *kvm_vcpu_cache;
A
Avi Kivity 已提交
135

136 137 138
extern raw_spinlock_t kvm_lock;
extern struct list_head vm_list;

139 140 141 142 143 144
struct kvm_io_range {
	gpa_t addr;
	int len;
	struct kvm_io_device *dev;
};

145
#define NR_IOBUS_DEVS 1000
146

147 148
struct kvm_io_bus {
	int                   dev_count;
149
	struct kvm_io_range range[];
150 151
};

M
Marcelo Tosatti 已提交
152 153 154
enum kvm_bus {
	KVM_MMIO_BUS,
	KVM_PIO_BUS,
155
	KVM_VIRTIO_CCW_NOTIFY_BUS,
M
Marcelo Tosatti 已提交
156 157 158 159 160 161
	KVM_NR_BUSES
};

int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
		     int len, const void *val);
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
162
		    void *val);
163 164
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
			    int len, struct kvm_io_device *dev);
M
Marcelo Tosatti 已提交
165 166
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
			      struct kvm_io_device *dev);
167

168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
#ifdef CONFIG_KVM_ASYNC_PF
struct kvm_async_pf {
	struct work_struct work;
	struct list_head link;
	struct list_head queue;
	struct kvm_vcpu *vcpu;
	struct mm_struct *mm;
	gva_t gva;
	unsigned long addr;
	struct kvm_arch_async_pf arch;
	struct page *page;
	bool done;
};

void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
		       struct kvm_arch_async_pf *arch);
186
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
187 188
#endif

189 190 191
enum {
	OUTSIDE_GUEST_MODE,
	IN_GUEST_MODE,
192 193
	EXITING_GUEST_MODE,
	READING_SHADOW_PAGE_TABLES,
194 195
};

A
Avi Kivity 已提交
196 197 198 199 200 201 202 203 204 205
/*
 * Sometimes a large or cross-page mmio needs to be broken up into separate
 * exits for userspace servicing.
 */
struct kvm_mmio_fragment {
	gpa_t gpa;
	void *data;
	unsigned len;
};

206 207
struct kvm_vcpu {
	struct kvm *kvm;
208
#ifdef CONFIG_PREEMPT_NOTIFIERS
209
	struct preempt_notifier preempt_notifier;
210
#endif
211
	int cpu;
212
	int vcpu_id;
213 214
	int srcu_idx;
	int mode;
215
	unsigned long requests;
J
Jan Kiszka 已提交
216
	unsigned long guest_debug;
217 218 219

	struct mutex mutex;
	struct kvm_run *run;
220

221
	int fpu_active;
222
	int guest_fpu_loaded, guest_xcr0_loaded;
223
	wait_queue_head_t wq;
224
	struct pid *pid;
225 226 227 228
	int sigset_active;
	sigset_t sigset;
	struct kvm_vcpu_stat stat;

229
#ifdef CONFIG_HAS_IOMEM
230 231 232
	int mmio_needed;
	int mmio_read_completed;
	int mmio_is_write;
A
Avi Kivity 已提交
233 234 235
	int mmio_cur_fragment;
	int mmio_nr_fragments;
	struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS];
236
#endif
A
Avi Kivity 已提交
237

238 239 240 241 242 243 244 245 246
#ifdef CONFIG_KVM_ASYNC_PF
	struct {
		u32 queued;
		struct list_head queue;
		struct list_head done;
		spinlock_t lock;
	} async_pf;
#endif

247 248 249 250 251 252 253 254 255 256 257 258
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
	/*
	 * Cpu relax intercept or pause loop exit optimization
	 * in_spin_loop: set when a vcpu does a pause loop exit
	 *  or cpu relax intercepted.
	 * dy_eligible: indicates whether vcpu is eligible for directed yield.
	 */
	struct {
		bool in_spin_loop;
		bool dy_eligible;
	} spin_loop;
#endif
259
	bool preempted;
260 261 262
	struct kvm_vcpu_arch arch;
};

263 264 265 266 267
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
{
	return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
}

268 269 270 271 272 273
/*
 * Some of the bitops functions do not support too long bitmaps.
 * This number must be determined not to exceed such limits.
 */
#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)

A
Avi Kivity 已提交
274 275 276 277
struct kvm_memory_slot {
	gfn_t base_gfn;
	unsigned long npages;
	unsigned long *dirty_bitmap;
278
	struct kvm_arch_memory_slot arch;
279
	unsigned long userspace_addr;
280
	u32 flags;
281
	short id;
A
Avi Kivity 已提交
282 283
};

284 285 286 287 288
static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
{
	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
}

289 290
struct kvm_kernel_irq_routing_entry {
	u32 gsi;
291
	u32 type;
292
	int (*set)(struct kvm_kernel_irq_routing_entry *e,
293 294
		   struct kvm *kvm, int irq_source_id, int level,
		   bool line_status);
295 296 297 298 299
	union {
		struct {
			unsigned irqchip;
			unsigned pin;
		} irqchip;
S
Sheng Yang 已提交
300
		struct msi_msg msi;
301
	};
302 303 304
	struct hlist_node link;
};

305
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
306

307
struct kvm_irq_routing_table {
308
	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
309 310 311 312 313 314 315
	struct kvm_kernel_irq_routing_entry *rt_entries;
	u32 nr_rt_entries;
	/*
	 * Array indexed by gsi. Each entry contains list of irq chips
	 * the gsi is connected to.
	 */
	struct hlist_head map[0];
316 317
};

318 319 320 321 322 323
#else

struct kvm_irq_routing_table {};

#endif

324 325 326 327
#ifndef KVM_PRIVATE_MEM_SLOTS
#define KVM_PRIVATE_MEM_SLOTS 0
#endif

328
#ifndef KVM_MEM_SLOTS_NUM
329
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
330 331
#endif

332 333 334 335 336
/*
 * Note:
 * memslots are not sorted by id anymore, please use id_to_memslot()
 * to get the memslot by its id.
 */
337
struct kvm_memslots {
338
	u64 generation;
339
	struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
340
	/* The mapping table from slot id to the index in memslots[]. */
341
	short id_to_index[KVM_MEM_SLOTS_NUM];
342 343
};

A
Avi Kivity 已提交
344
struct kvm {
345
	spinlock_t mmu_lock;
346
	struct mutex slots_lock;
347
	struct mm_struct *mm; /* userspace tied to this vm */
348
	struct kvm_memslots *memslots;
349
	struct srcu_struct srcu;
350 351 352
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
	u32 bsp_vcpu_id;
#endif
R
Rusty Russell 已提交
353
	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
354
	atomic_t online_vcpus;
355
	int last_boosted_vcpu;
356
	struct list_head vm_list;
357
	struct mutex lock;
M
Marcelo Tosatti 已提交
358
	struct kvm_io_bus *buses[KVM_NR_BUSES];
G
Gregory Haskins 已提交
359 360 361 362
#ifdef CONFIG_HAVE_KVM_EVENTFD
	struct {
		spinlock_t        lock;
		struct list_head  items;
363 364
		struct list_head  resampler_list;
		struct mutex      resampler_lock;
G
Gregory Haskins 已提交
365
	} irqfds;
G
Gregory Haskins 已提交
366
	struct list_head ioeventfds;
G
Gregory Haskins 已提交
367
#endif
368
	struct kvm_vm_stat stat;
369
	struct kvm_arch arch;
I
Izik Eidus 已提交
370
	atomic_t users_count;
371 372
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
373 374
	spinlock_t ring_lock;
	struct list_head coalesced_zones;
375
#endif
376

377
	struct mutex irq_lock;
378
#ifdef CONFIG_HAVE_KVM_IRQCHIP
379 380 381 382
	/*
	 * Update side is protected by irq_lock and,
	 * if configured, irqfds.lock.
	 */
A
Arnd Bergmann 已提交
383
	struct kvm_irq_routing_table __rcu *irq_routing;
384
	struct hlist_head mask_notifier_list;
385
	struct hlist_head irq_ack_notifier_list;
386 387
#endif

388
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
389 390 391 392
	struct mmu_notifier mmu_notifier;
	unsigned long mmu_notifier_seq;
	long mmu_notifier_count;
#endif
393
	long tlbs_dirty;
394
	struct list_head devices;
A
Avi Kivity 已提交
395 396
};

397 398 399 400 401 402 403 404 405
#define kvm_err(fmt, ...) \
	pr_err("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
#define kvm_info(fmt, ...) \
	pr_info("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
#define kvm_debug(fmt, ...) \
	pr_debug("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
#define kvm_pr_unimpl(fmt, ...) \
	pr_err_ratelimited("kvm [%i]: " fmt, \
			   task_tgid_nr(current), ## __VA_ARGS__)
406

407 408 409
/* The guest did something we don't support. */
#define vcpu_unimpl(vcpu, fmt, ...)					\
	kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
A
Avi Kivity 已提交
410

411 412 413 414 415 416 417
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
	smp_rmb();
	return kvm->vcpus[i];
}

#define kvm_for_each_vcpu(idx, vcpup, kvm) \
418 419 420 421
	for (idx = 0; \
	     idx < atomic_read(&kvm->online_vcpus) && \
	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
	     idx++)
422

423 424
#define kvm_for_each_memslot(memslot, slots)	\
	for (memslot = &slots->memslots[0];	\
425 426
	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
		memslot++)
427

R
Rusty Russell 已提交
428 429 430
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);

431
int __must_check vcpu_load(struct kvm_vcpu *vcpu);
432 433
void vcpu_put(struct kvm_vcpu *vcpu);

434
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
435 436 437 438 439 440 441 442 443 444 445 446
int kvm_irqfd_init(void);
void kvm_irqfd_exit(void);
#else
static inline int kvm_irqfd_init(void)
{
	return 0;
}

static inline void kvm_irqfd_exit(void)
{
}
#endif
447
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
448
		  struct module *module);
449
void kvm_exit(void);
A
Avi Kivity 已提交
450

I
Izik Eidus 已提交
451 452
void kvm_get_kvm(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
453 454
void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
		     u64 last_generation);
I
Izik Eidus 已提交
455

456 457 458 459 460 461 462
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
{
	return rcu_dereference_check(kvm->memslots,
			srcu_read_lock_held(&kvm->srcu)
			|| lockdep_is_held(&kvm->slots_lock));
}

463 464 465
static inline struct kvm_memory_slot *
id_to_memslot(struct kvm_memslots *slots, int id)
{
466 467
	int index = slots->id_to_index[id];
	struct kvm_memory_slot *slot;
468

469
	slot = &slots->memslots[index];
470

471 472
	WARN_ON(slot->id != id);
	return slot;
473 474
}

475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
/*
 * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
 * - create a new memory slot
 * - delete an existing memory slot
 * - modify an existing memory slot
 *   -- move it in the guest physical memory space
 *   -- just change its flags
 *
 * Since flags can be changed by some of these operations, the following
 * differentiation is the best we can do for __kvm_set_memory_region():
 */
enum kvm_mr_change {
	KVM_MR_CREATE,
	KVM_MR_DELETE,
	KVM_MR_MOVE,
	KVM_MR_FLAGS_ONLY,
};

493
int kvm_set_memory_region(struct kvm *kvm,
494
			  struct kvm_userspace_memory_region *mem);
495
int __kvm_set_memory_region(struct kvm *kvm,
496
			    struct kvm_userspace_memory_region *mem);
497 498 499
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
			   struct kvm_memory_slot *dont);
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
500 501 502
int kvm_arch_prepare_memory_region(struct kvm *kvm,
				struct kvm_memory_slot *memslot,
				struct kvm_userspace_memory_region *mem,
503
				enum kvm_mr_change change);
504
void kvm_arch_commit_memory_region(struct kvm *kvm,
505
				struct kvm_userspace_memory_region *mem,
506 507
				const struct kvm_memory_slot *old,
				enum kvm_mr_change change);
508
bool kvm_largepages_enabled(void);
509
void kvm_disable_largepages(void);
510 511 512 513 514
/* flush all memory translations */
void kvm_arch_flush_shadow_all(struct kvm *kvm);
/* flush memory translations pointing to 'slot' */
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
				   struct kvm_memory_slot *slot);
515

516 517 518
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
			    int nr_pages);

A
Avi Kivity 已提交
519
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
M
Marcelo Tosatti 已提交
520
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
X
Xiao Guangrong 已提交
521
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
522 523
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
524 525 526
void kvm_set_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);

527
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
528 529
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
		       bool write_fault, bool *writable);
530
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
531 532
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
		      bool *writable);
533
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
534 535
pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);

536
void kvm_release_pfn_dirty(pfn_t pfn);
537 538 539 540 541
void kvm_release_pfn_clean(pfn_t pfn);
void kvm_set_pfn_dirty(pfn_t pfn);
void kvm_set_pfn_accessed(pfn_t pfn);
void kvm_get_pfn(pfn_t pfn);

542 543
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
			int len);
544 545
int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
			  unsigned long len);
546
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
547 548
int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
			   void *data, unsigned long len);
549 550 551 552
int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
			 int offset, int len);
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
		    unsigned long len);
553 554 555
int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
			   void *data, unsigned long len);
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
556
			      gpa_t gpa, unsigned long len);
557 558
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
A
Avi Kivity 已提交
559
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
560
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
J
Joerg Roedel 已提交
561
unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
A
Avi Kivity 已提交
562
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
563 564
void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
			     gfn_t gfn);
A
Avi Kivity 已提交
565

566
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
567
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
568
bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
Z
Zhai, Edwin 已提交
569
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
A
Avi Kivity 已提交
570
void kvm_resched(struct kvm_vcpu *vcpu);
571 572
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
573

574
void kvm_flush_remote_tlbs(struct kvm *kvm);
575
void kvm_reload_remote_mmus(struct kvm *kvm);
576
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
577
void kvm_make_scan_ioapic_request(struct kvm *kvm);
A
Avi Kivity 已提交
578

579 580
long kvm_arch_dev_ioctl(struct file *filp,
			unsigned int ioctl, unsigned long arg);
581 582
long kvm_arch_vcpu_ioctl(struct file *filp,
			 unsigned int ioctl, unsigned long arg);
583
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
584 585 586

int kvm_dev_ioctl_check_extension(long ext);

587 588 589 590 591
int kvm_get_dirty_log(struct kvm *kvm,
			struct kvm_dirty_log *log, int *is_dirty);
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
				struct kvm_dirty_log *log);

592
int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
593
				   struct kvm_userspace_memory_region *mem);
594 595
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
			bool line_status);
596 597
long kvm_arch_vm_ioctl(struct file *filp,
		       unsigned int ioctl, unsigned long arg);
598

599 600 601
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);

602 603 604
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
				    struct kvm_translation *tr);

605 606 607 608 609 610
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
				  struct kvm_sregs *sregs);
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
				  struct kvm_sregs *sregs);
611 612 613 614
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
				    struct kvm_mp_state *mp_state);
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
				    struct kvm_mp_state *mp_state);
J
Jan Kiszka 已提交
615 616
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
					struct kvm_guest_debug *dbg);
617 618
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);

619 620
int kvm_arch_init(void *opaque);
void kvm_arch_exit(void);
621

622 623 624 625 626 627 628
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);

void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
629
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
630
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
631
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
632

633
int kvm_arch_hardware_enable(void *garbage);
634 635 636 637
void kvm_arch_hardware_disable(void *garbage);
int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn);
638
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
639
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
640

641 642
void kvm_free_physmem(struct kvm *kvm);

643 644 645
void *kvm_kvzalloc(unsigned long size);
void kvm_kvfree(const void *addr);

646 647 648 649 650 651 652 653 654 655 656 657
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
{
	return kzalloc(sizeof(struct kvm), GFP_KERNEL);
}

static inline void kvm_arch_free_vm(struct kvm *kvm)
{
	kfree(kvm);
}
#endif

658 659
static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
{
660 661 662
#ifdef __KVM_HAVE_ARCH_WQP
	return vcpu->arch.wqp;
#else
663 664
	return &vcpu->wq;
#endif
665
}
666

667
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
668
void kvm_arch_destroy_vm(struct kvm *kvm);
669
void kvm_arch_sync_events(struct kvm *kvm);
670

671
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
672
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
673

X
Xiao Guangrong 已提交
674
bool kvm_is_mmio_pfn(pfn_t pfn);
675

B
Ben-Ami Yassour 已提交
676 677 678 679 680 681 682 683 684 685
struct kvm_irq_ack_notifier {
	struct hlist_node link;
	unsigned gsi;
	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
};

struct kvm_assigned_dev_kernel {
	struct kvm_irq_ack_notifier ack_notifier;
	struct list_head list;
	int assigned_dev_id;
686
	int host_segnr;
B
Ben-Ami Yassour 已提交
687 688
	int host_busnr;
	int host_devfn;
S
Sheng Yang 已提交
689
	unsigned int entries_nr;
B
Ben-Ami Yassour 已提交
690
	int host_irq;
691
	bool host_irq_disabled;
692
	bool pci_2_3;
S
Sheng Yang 已提交
693
	struct msix_entry *host_msix_entries;
B
Ben-Ami Yassour 已提交
694
	int guest_irq;
695
	struct msix_entry *guest_msix_entries;
696
	unsigned long irq_requested_type;
697
	int irq_source_id;
698
	int flags;
B
Ben-Ami Yassour 已提交
699 700
	struct pci_dev *dev;
	struct kvm *kvm;
701
	spinlock_t intx_lock;
702
	spinlock_t intx_mask_lock;
703
	char irq_name[32];
704
	struct pci_saved_state *pci_saved_state;
B
Ben-Ami Yassour 已提交
705
};
706 707 708 709 710 711 712 713 714 715 716

struct kvm_irq_mask_notifier {
	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
	int irq;
	struct hlist_node link;
};

void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
				    struct kvm_irq_mask_notifier *kimn);
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
				      struct kvm_irq_mask_notifier *kimn);
717 718
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
			     bool mask);
719

720 721
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
		bool line_status);
M
Michael S. Tsirkin 已提交
722
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
723
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
724
		int irq_source_id, int level, bool line_status);
725
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
726
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
727 728
void kvm_register_irq_ack_notifier(struct kvm *kvm,
				   struct kvm_irq_ack_notifier *kian);
729 730
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
				   struct kvm_irq_ack_notifier *kian);
731 732
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
B
Ben-Ami Yassour 已提交
733

734 735 736
/* For vcpu->arch.iommu_flags */
#define KVM_IOMMU_CACHE_COHERENCY	0x1

737
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
738
int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
739
void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
W
Weidong Han 已提交
740
int kvm_iommu_map_guest(struct kvm *kvm);
B
Ben-Ami Yassour 已提交
741
int kvm_iommu_unmap_guest(struct kvm *kvm);
W
Weidong Han 已提交
742 743
int kvm_assign_device(struct kvm *kvm,
		      struct kvm_assigned_dev_kernel *assigned_dev);
W
Weidong Han 已提交
744 745
int kvm_deassign_device(struct kvm *kvm,
			struct kvm_assigned_dev_kernel *assigned_dev);
746
#else
B
Ben-Ami Yassour 已提交
747
static inline int kvm_iommu_map_pages(struct kvm *kvm,
748
				      struct kvm_memory_slot *slot)
B
Ben-Ami Yassour 已提交
749 750 751 752
{
	return 0;
}

753 754 755 756 757
static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
					 struct kvm_memory_slot *slot)
{
}

B
Ben-Ami Yassour 已提交
758 759 760 761
static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
{
	return 0;
}
762
#endif
B
Ben-Ami Yassour 已提交
763

764 765 766 767 768 769 770 771 772 773
static inline void kvm_guest_enter(void)
{
	unsigned long flags;

	BUG_ON(preemptible());

	local_irq_save(flags);
	guest_enter();
	local_irq_restore(flags);

774 775 776 777 778 779 780 781
	/* KVM does not hold any references to rcu protected data when it
	 * switches CPU into a guest mode. In fact switching to a guest mode
	 * is very similar to exiting to userspase from rcu point of view. In
	 * addition CPU may stay in a guest mode for quite a long time (up to
	 * one time slice). Lets treat guest mode as quiescent state, just like
	 * we do with user-mode execution.
	 */
	rcu_virt_note_context_switch(smp_processor_id());
782 783 784 785
}

static inline void kvm_guest_exit(void)
{
786 787 788 789 790
	unsigned long flags;

	local_irq_save(flags);
	guest_exit();
	local_irq_restore(flags);
791 792
}

793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
/*
 * search_memslots() and __gfn_to_memslot() are here because they are
 * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
 * gfn_to_memslot() itself isn't here as an inline because that would
 * bloat other code too much.
 */
static inline struct kvm_memory_slot *
search_memslots(struct kvm_memslots *slots, gfn_t gfn)
{
	struct kvm_memory_slot *memslot;

	kvm_for_each_memslot(memslot, slots)
		if (gfn >= memslot->base_gfn &&
		      gfn < memslot->base_gfn + memslot->npages)
			return memslot;

	return NULL;
}

static inline struct kvm_memory_slot *
__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
{
	return search_memslots(slots, gfn);
}

818 819 820 821 822 823
static inline unsigned long
__gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
	return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
}

824 825 826 827 828
static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
{
	return gfn_to_memslot(kvm, gfn)->id;
}

829 830 831 832 833 834 835
static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
{
	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
}

836 837
static inline gfn_t
hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
838
{
839 840 841
	gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT;

	return slot->base_gfn + gfn_offset;
842 843
}

A
Avi Kivity 已提交
844 845 846 847
static inline gpa_t gfn_to_gpa(gfn_t gfn)
{
	return (gpa_t)gfn << PAGE_SHIFT;
}
A
Avi Kivity 已提交
848

849 850 851 852 853
static inline gfn_t gpa_to_gfn(gpa_t gpa)
{
	return (gfn_t)(gpa >> PAGE_SHIFT);
}

B
Ben-Ami Yassour 已提交
854 855 856 857 858
static inline hpa_t pfn_to_hpa(pfn_t pfn)
{
	return (hpa_t)pfn << PAGE_SHIFT;
}

M
Marcelo Tosatti 已提交
859
static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
860 861 862 863
{
	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
}

864 865 866 867 868
enum kvm_stat_kind {
	KVM_STAT_VM,
	KVM_STAT_VCPU,
};

869 870 871
struct kvm_stats_debugfs_item {
	const char *name;
	int offset;
872
	enum kvm_stat_kind kind;
873 874 875
	struct dentry *dentry;
};
extern struct kvm_stats_debugfs_item debugfs_entries[];
876
extern struct dentry *kvm_debugfs_dir;
877

878
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
879
static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
880
{
881
	if (unlikely(kvm->mmu_notifier_count))
882 883
		return 1;
	/*
884 885 886 887 888 889 890 891
	 * Ensure the read of mmu_notifier_count happens before the read
	 * of mmu_notifier_seq.  This interacts with the smp_wmb() in
	 * mmu_notifier_invalidate_range_end to make sure that the caller
	 * either sees the old (non-zero) value of mmu_notifier_count or
	 * the new (incremented) value of mmu_notifier_seq.
	 * PowerPC Book3s HV KVM calls this under a per-page lock
	 * rather than under kvm->mmu_lock, for scalability, so
	 * can't rely on kvm->mmu_lock to keep things ordered.
892
	 */
893
	smp_rmb();
894
	if (kvm->mmu_notifier_seq != mmu_seq)
895 896 897 898 899
		return 1;
	return 0;
}
#endif

900
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
901 902 903 904 905 906 907 908

#define KVM_MAX_IRQ_ROUTES 1024

int kvm_setup_default_irq_routing(struct kvm *kvm);
int kvm_set_irq_routing(struct kvm *kvm,
			const struct kvm_irq_routing_entry *entries,
			unsigned nr,
			unsigned flags);
909 910 911
int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
			  struct kvm_kernel_irq_routing_entry *e,
			  const struct kvm_irq_routing_entry *ue);
912 913
void kvm_free_irq_routing(struct kvm *kvm);

914 915
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);

916 917 918 919 920 921
#else

static inline void kvm_free_irq_routing(struct kvm *kvm) {}

#endif

G
Gregory Haskins 已提交
922 923
#ifdef CONFIG_HAVE_KVM_EVENTFD

G
Gregory Haskins 已提交
924
void kvm_eventfd_init(struct kvm *kvm);
925 926 927
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);

#ifdef CONFIG_HAVE_KVM_IRQCHIP
928
int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
G
Gregory Haskins 已提交
929
void kvm_irqfd_release(struct kvm *kvm);
930
void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
931 932 933 934 935 936 937 938
#else
static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
{
	return -EINVAL;
}

static inline void kvm_irqfd_release(struct kvm *kvm) {}
#endif
G
Gregory Haskins 已提交
939 940 941

#else

G
Gregory Haskins 已提交
942
static inline void kvm_eventfd_init(struct kvm *kvm) {}
943

944
static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
G
Gregory Haskins 已提交
945 946 947 948 949
{
	return -EINVAL;
}

static inline void kvm_irqfd_release(struct kvm *kvm) {}
950

A
Alexander Graf 已提交
951
#ifdef CONFIG_HAVE_KVM_IRQCHIP
952 953 954 955 956
static inline void kvm_irq_routing_update(struct kvm *kvm,
					  struct kvm_irq_routing_table *irq_rt)
{
	rcu_assign_pointer(kvm->irq_routing, irq_rt);
}
A
Alexander Graf 已提交
957
#endif
958

G
Gregory Haskins 已提交
959 960 961 962
static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
	return -ENOSYS;
}
G
Gregory Haskins 已提交
963 964 965

#endif /* CONFIG_HAVE_KVM_EVENTFD */

966
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
967 968
static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
{
969
	return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
970
}
971 972 973 974 975 976 977

bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);

#else

static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }

A
Avi Kivity 已提交
978
#endif
979

980
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
981 982 983 984

long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
				  unsigned long arg);

985 986
void kvm_free_all_assigned_devices(struct kvm *kvm);

987 988 989 990 991 992 993 994
#else

static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
						unsigned long arg)
{
	return -ENOTTY;
}

995 996
static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}

997
#endif
998

999 1000 1001 1002 1003 1004 1005
static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
{
	set_bit(req, &vcpu->requests);
}

static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
{
1006 1007 1008 1009 1010 1011
	if (test_bit(req, &vcpu->requests)) {
		clear_bit(req, &vcpu->requests);
		return true;
	} else {
		return false;
	}
1012 1013
}

1014 1015
extern bool kvm_rebooting;

S
Scott Wood 已提交
1016 1017 1018 1019 1020 1021
struct kvm_device_ops;

struct kvm_device {
	struct kvm_device_ops *ops;
	struct kvm *kvm;
	void *private;
1022
	struct list_head vm_node;
S
Scott Wood 已提交
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
};

/* create, destroy, and name are mandatory */
struct kvm_device_ops {
	const char *name;
	int (*create)(struct kvm_device *dev, u32 type);

	/*
	 * Destroy is responsible for freeing dev.
	 *
	 * Destroy may be called before or after destructors are called
	 * on emulated I/O regions, depending on whether a reference is
	 * held by a vcpu or other kvm component that gets destroyed
	 * after the emulated I/O.
	 */
	void (*destroy)(struct kvm_device *dev);

	int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
	int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
	int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
	long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
		      unsigned long arg);
};

void kvm_device_get(struct kvm_device *dev);
void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);

1051
extern struct kvm_device_ops kvm_mpic_ops;
1052
extern struct kvm_device_ops kvm_xics_ops;
1053

1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT

static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
{
	vcpu->spin_loop.in_spin_loop = val;
}
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
{
	vcpu->spin_loop.dy_eligible = val;
}

#else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */

static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
{
}

static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
{
}

1075 1076 1077 1078 1079
static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
{
	return true;
}

1080
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
1081
#endif
1082