kvm_host.h 29.3 KB
Newer Older
1 2
#ifndef __KVM_HOST_H
#define __KVM_HOST_H
A
Avi Kivity 已提交
3 4 5 6 7 8 9

/*
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 */

#include <linux/types.h>
10
#include <linux/hardirq.h>
A
Avi Kivity 已提交
11 12 13
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
M
Markus Rechberger 已提交
14 15
#include <linux/signal.h>
#include <linux/sched.h>
16
#include <linux/bug.h>
A
Avi Kivity 已提交
17
#include <linux/mm.h>
18
#include <linux/mmu_notifier.h>
19
#include <linux/preempt.h>
20
#include <linux/msi.h>
21
#include <linux/slab.h>
22
#include <linux/rcupdate.h>
23
#include <linux/ratelimit.h>
X
Xiao Guangrong 已提交
24
#include <linux/err.h>
25
#include <linux/irqflags.h>
26
#include <linux/context_tracking.h>
A
Alexey Dobriyan 已提交
27
#include <asm/signal.h>
A
Avi Kivity 已提交
28 29

#include <linux/kvm.h>
I
Ingo Molnar 已提交
30
#include <linux/kvm_para.h>
A
Avi Kivity 已提交
31

32
#include <linux/kvm_types.h>
33

34
#include <asm/kvm_host.h>
35

A
Avi Kivity 已提交
36 37 38 39
#ifndef KVM_MMIO_SIZE
#define KVM_MMIO_SIZE 8
#endif

40 41 42 43 44 45 46
/*
 * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
 * in kvm, other bits are visible for userspace which are defined in
 * include/linux/kvm_h.
 */
#define KVM_MEMSLOT_INVALID	(1UL << 16)

47 48
/* Two fragments for cross MMIO pages. */
#define KVM_MAX_MMIO_FRAGMENTS	2
A
Avi Kivity 已提交
49

50 51
/*
 * For the normal pfn, the highest 12 bits should be zero,
52 53
 * so we can mask bit 62 ~ bit 52  to indicate the error pfn,
 * mask bit 63 to indicate the noslot pfn.
54
 */
55 56 57
#define KVM_PFN_ERR_MASK	(0x7ffULL << 52)
#define KVM_PFN_ERR_NOSLOT_MASK	(0xfffULL << 52)
#define KVM_PFN_NOSLOT		(0x1ULL << 63)
58 59 60

#define KVM_PFN_ERR_FAULT	(KVM_PFN_ERR_MASK)
#define KVM_PFN_ERR_HWPOISON	(KVM_PFN_ERR_MASK + 1)
61
#define KVM_PFN_ERR_RO_FAULT	(KVM_PFN_ERR_MASK + 2)
62

63 64 65 66
/*
 * error pfns indicate that the gfn is in slot but faild to
 * translate it to pfn on host.
 */
67
static inline bool is_error_pfn(pfn_t pfn)
X
Xiao Guangrong 已提交
68
{
69
	return !!(pfn & KVM_PFN_ERR_MASK);
X
Xiao Guangrong 已提交
70 71
}

72 73 74 75 76 77
/*
 * error_noslot pfns indicate that the gfn can not be
 * translated to pfn - it is not in slot or failed to
 * translate it to pfn.
 */
static inline bool is_error_noslot_pfn(pfn_t pfn)
X
Xiao Guangrong 已提交
78
{
79
	return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK);
X
Xiao Guangrong 已提交
80 81
}

82 83
/* noslot pfn indicates that the gfn is not in slot. */
static inline bool is_noslot_pfn(pfn_t pfn)
X
Xiao Guangrong 已提交
84
{
85
	return pfn == KVM_PFN_NOSLOT;
X
Xiao Guangrong 已提交
86 87
}

88 89
#define KVM_HVA_ERR_BAD		(PAGE_OFFSET)
#define KVM_HVA_ERR_RO_BAD	(PAGE_OFFSET + PAGE_SIZE)
X
Xiao Guangrong 已提交
90 91 92

static inline bool kvm_is_error_hva(unsigned long addr)
{
93
	return addr >= PAGE_OFFSET;
X
Xiao Guangrong 已提交
94 95
}

96 97
#define KVM_ERR_PTR_BAD_PAGE	(ERR_PTR(-ENOENT))

98
static inline bool is_error_page(struct page *page)
99 100 101 102
{
	return IS_ERR(page);
}

103 104 105
/*
 * vcpu->requests bit members
 */
106
#define KVM_REQ_TLB_FLUSH          0
107
#define KVM_REQ_MIGRATE_TIMER      1
108
#define KVM_REQ_REPORT_TPR_ACCESS  2
109
#define KVM_REQ_MMU_RELOAD         3
J
Joerg Roedel 已提交
110
#define KVM_REQ_TRIPLE_FAULT       4
111
#define KVM_REQ_PENDING_TIMER      5
112
#define KVM_REQ_UNHALT             6
113
#define KVM_REQ_MMU_SYNC           7
Z
Zachary Amsden 已提交
114
#define KVM_REQ_CLOCK_UPDATE       8
115
#define KVM_REQ_KICK               9
116
#define KVM_REQ_DEACTIVATE_FPU    10
117
#define KVM_REQ_EVENT             11
118
#define KVM_REQ_APF_HALT          12
G
Glauber Costa 已提交
119
#define KVM_REQ_STEAL_UPDATE      13
A
Avi Kivity 已提交
120
#define KVM_REQ_NMI               14
121 122 123 124 125 126 127
#define KVM_REQ_PMU               15
#define KVM_REQ_PMI               16
#define KVM_REQ_WATCHDOG          17
#define KVM_REQ_MASTERCLOCK_UPDATE 18
#define KVM_REQ_MCLOCK_INPROGRESS 19
#define KVM_REQ_EPR_EXIT          20
#define KVM_REQ_SCAN_IOAPIC       21
128
#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
A
Avi Kivity 已提交
129

130 131
#define KVM_USERSPACE_IRQ_SOURCE_ID		0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
132

133
struct kvm;
A
Avi Kivity 已提交
134
struct kvm_vcpu;
135
extern struct kmem_cache *kvm_vcpu_cache;
A
Avi Kivity 已提交
136

137 138 139
extern raw_spinlock_t kvm_lock;
extern struct list_head vm_list;

140 141 142 143 144 145
struct kvm_io_range {
	gpa_t addr;
	int len;
	struct kvm_io_device *dev;
};

146
#define NR_IOBUS_DEVS 1000
147

148
struct kvm_io_bus {
149 150
	int dev_count;
	int ioeventfd_count;
151
	struct kvm_io_range range[];
152 153
};

M
Marcelo Tosatti 已提交
154 155 156
enum kvm_bus {
	KVM_MMIO_BUS,
	KVM_PIO_BUS,
157
	KVM_VIRTIO_CCW_NOTIFY_BUS,
M
Marcelo Tosatti 已提交
158 159 160 161 162 163
	KVM_NR_BUSES
};

int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
		     int len, const void *val);
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
164
		    void *val);
165 166
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
			    int len, struct kvm_io_device *dev);
M
Marcelo Tosatti 已提交
167 168
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
			      struct kvm_io_device *dev);
169

170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
#ifdef CONFIG_KVM_ASYNC_PF
struct kvm_async_pf {
	struct work_struct work;
	struct list_head link;
	struct list_head queue;
	struct kvm_vcpu *vcpu;
	struct mm_struct *mm;
	gva_t gva;
	unsigned long addr;
	struct kvm_arch_async_pf arch;
	struct page *page;
	bool done;
};

void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
		       struct kvm_arch_async_pf *arch);
188
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
189 190
#endif

191 192 193
enum {
	OUTSIDE_GUEST_MODE,
	IN_GUEST_MODE,
194 195
	EXITING_GUEST_MODE,
	READING_SHADOW_PAGE_TABLES,
196 197
};

A
Avi Kivity 已提交
198 199 200 201 202 203 204 205 206 207
/*
 * Sometimes a large or cross-page mmio needs to be broken up into separate
 * exits for userspace servicing.
 */
struct kvm_mmio_fragment {
	gpa_t gpa;
	void *data;
	unsigned len;
};

208 209
struct kvm_vcpu {
	struct kvm *kvm;
210
#ifdef CONFIG_PREEMPT_NOTIFIERS
211
	struct preempt_notifier preempt_notifier;
212
#endif
213
	int cpu;
214
	int vcpu_id;
215 216
	int srcu_idx;
	int mode;
217
	unsigned long requests;
J
Jan Kiszka 已提交
218
	unsigned long guest_debug;
219 220 221

	struct mutex mutex;
	struct kvm_run *run;
222

223
	int fpu_active;
224
	int guest_fpu_loaded, guest_xcr0_loaded;
225
	wait_queue_head_t wq;
226
	struct pid *pid;
227 228 229 230
	int sigset_active;
	sigset_t sigset;
	struct kvm_vcpu_stat stat;

231
#ifdef CONFIG_HAS_IOMEM
232 233 234
	int mmio_needed;
	int mmio_read_completed;
	int mmio_is_write;
A
Avi Kivity 已提交
235 236 237
	int mmio_cur_fragment;
	int mmio_nr_fragments;
	struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS];
238
#endif
A
Avi Kivity 已提交
239

240 241 242 243 244 245 246 247 248
#ifdef CONFIG_KVM_ASYNC_PF
	struct {
		u32 queued;
		struct list_head queue;
		struct list_head done;
		spinlock_t lock;
	} async_pf;
#endif

249 250 251 252 253 254 255 256 257 258 259 260
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
	/*
	 * Cpu relax intercept or pause loop exit optimization
	 * in_spin_loop: set when a vcpu does a pause loop exit
	 *  or cpu relax intercepted.
	 * dy_eligible: indicates whether vcpu is eligible for directed yield.
	 */
	struct {
		bool in_spin_loop;
		bool dy_eligible;
	} spin_loop;
#endif
261
	bool preempted;
262 263 264
	struct kvm_vcpu_arch arch;
};

265 266 267 268 269
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
{
	return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
}

270 271 272 273 274 275
/*
 * Some of the bitops functions do not support too long bitmaps.
 * This number must be determined not to exceed such limits.
 */
#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)

A
Avi Kivity 已提交
276 277 278 279
struct kvm_memory_slot {
	gfn_t base_gfn;
	unsigned long npages;
	unsigned long *dirty_bitmap;
280
	struct kvm_arch_memory_slot arch;
281
	unsigned long userspace_addr;
282
	u32 flags;
283
	short id;
A
Avi Kivity 已提交
284 285
};

286 287 288 289 290
static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
{
	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
}

291 292
struct kvm_kernel_irq_routing_entry {
	u32 gsi;
293
	u32 type;
294
	int (*set)(struct kvm_kernel_irq_routing_entry *e,
295 296
		   struct kvm *kvm, int irq_source_id, int level,
		   bool line_status);
297 298 299 300 301
	union {
		struct {
			unsigned irqchip;
			unsigned pin;
		} irqchip;
S
Sheng Yang 已提交
302
		struct msi_msg msi;
303
	};
304 305 306
	struct hlist_node link;
};

307
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
308

309
struct kvm_irq_routing_table {
310
	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
311 312 313 314 315 316 317
	struct kvm_kernel_irq_routing_entry *rt_entries;
	u32 nr_rt_entries;
	/*
	 * Array indexed by gsi. Each entry contains list of irq chips
	 * the gsi is connected to.
	 */
	struct hlist_head map[0];
318 319
};

320 321 322 323 324 325
#else

struct kvm_irq_routing_table {};

#endif

326 327 328 329
#ifndef KVM_PRIVATE_MEM_SLOTS
#define KVM_PRIVATE_MEM_SLOTS 0
#endif

330
#ifndef KVM_MEM_SLOTS_NUM
331
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
332 333
#endif

334 335 336 337 338
/*
 * Note:
 * memslots are not sorted by id anymore, please use id_to_memslot()
 * to get the memslot by its id.
 */
339
struct kvm_memslots {
340
	u64 generation;
341
	struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
342
	/* The mapping table from slot id to the index in memslots[]. */
343
	short id_to_index[KVM_MEM_SLOTS_NUM];
344 345
};

A
Avi Kivity 已提交
346
struct kvm {
347
	spinlock_t mmu_lock;
348
	struct mutex slots_lock;
349
	struct mm_struct *mm; /* userspace tied to this vm */
350
	struct kvm_memslots *memslots;
351
	struct srcu_struct srcu;
352 353 354
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
	u32 bsp_vcpu_id;
#endif
R
Rusty Russell 已提交
355
	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
356
	atomic_t online_vcpus;
357
	int last_boosted_vcpu;
358
	struct list_head vm_list;
359
	struct mutex lock;
M
Marcelo Tosatti 已提交
360
	struct kvm_io_bus *buses[KVM_NR_BUSES];
G
Gregory Haskins 已提交
361 362 363 364
#ifdef CONFIG_HAVE_KVM_EVENTFD
	struct {
		spinlock_t        lock;
		struct list_head  items;
365 366
		struct list_head  resampler_list;
		struct mutex      resampler_lock;
G
Gregory Haskins 已提交
367
	} irqfds;
G
Gregory Haskins 已提交
368
	struct list_head ioeventfds;
G
Gregory Haskins 已提交
369
#endif
370
	struct kvm_vm_stat stat;
371
	struct kvm_arch arch;
I
Izik Eidus 已提交
372
	atomic_t users_count;
373 374
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
375 376
	spinlock_t ring_lock;
	struct list_head coalesced_zones;
377
#endif
378

379
	struct mutex irq_lock;
380
#ifdef CONFIG_HAVE_KVM_IRQCHIP
381 382 383 384
	/*
	 * Update side is protected by irq_lock and,
	 * if configured, irqfds.lock.
	 */
A
Arnd Bergmann 已提交
385
	struct kvm_irq_routing_table __rcu *irq_routing;
386
	struct hlist_head mask_notifier_list;
387
	struct hlist_head irq_ack_notifier_list;
388 389
#endif

390
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
391 392 393 394
	struct mmu_notifier mmu_notifier;
	unsigned long mmu_notifier_seq;
	long mmu_notifier_count;
#endif
395
	long tlbs_dirty;
396
	struct list_head devices;
A
Avi Kivity 已提交
397 398
};

399 400 401 402 403 404 405 406 407
#define kvm_err(fmt, ...) \
	pr_err("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
#define kvm_info(fmt, ...) \
	pr_info("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
#define kvm_debug(fmt, ...) \
	pr_debug("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
#define kvm_pr_unimpl(fmt, ...) \
	pr_err_ratelimited("kvm [%i]: " fmt, \
			   task_tgid_nr(current), ## __VA_ARGS__)
408

409 410 411
/* The guest did something we don't support. */
#define vcpu_unimpl(vcpu, fmt, ...)					\
	kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
A
Avi Kivity 已提交
412

413 414 415 416 417 418 419
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
	smp_rmb();
	return kvm->vcpus[i];
}

#define kvm_for_each_vcpu(idx, vcpup, kvm) \
420 421 422 423
	for (idx = 0; \
	     idx < atomic_read(&kvm->online_vcpus) && \
	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
	     idx++)
424

425 426
#define kvm_for_each_memslot(memslot, slots)	\
	for (memslot = &slots->memslots[0];	\
427 428
	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
		memslot++)
429

R
Rusty Russell 已提交
430 431 432
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);

433
int __must_check vcpu_load(struct kvm_vcpu *vcpu);
434 435
void vcpu_put(struct kvm_vcpu *vcpu);

436
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
437 438 439 440 441 442 443 444 445 446 447 448
int kvm_irqfd_init(void);
void kvm_irqfd_exit(void);
#else
static inline int kvm_irqfd_init(void)
{
	return 0;
}

static inline void kvm_irqfd_exit(void)
{
}
#endif
449
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
450
		  struct module *module);
451
void kvm_exit(void);
A
Avi Kivity 已提交
452

I
Izik Eidus 已提交
453 454
void kvm_get_kvm(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
455 456
void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
		     u64 last_generation);
I
Izik Eidus 已提交
457

458 459 460 461 462 463 464
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
{
	return rcu_dereference_check(kvm->memslots,
			srcu_read_lock_held(&kvm->srcu)
			|| lockdep_is_held(&kvm->slots_lock));
}

465 466 467
static inline struct kvm_memory_slot *
id_to_memslot(struct kvm_memslots *slots, int id)
{
468 469
	int index = slots->id_to_index[id];
	struct kvm_memory_slot *slot;
470

471
	slot = &slots->memslots[index];
472

473 474
	WARN_ON(slot->id != id);
	return slot;
475 476
}

477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
/*
 * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
 * - create a new memory slot
 * - delete an existing memory slot
 * - modify an existing memory slot
 *   -- move it in the guest physical memory space
 *   -- just change its flags
 *
 * Since flags can be changed by some of these operations, the following
 * differentiation is the best we can do for __kvm_set_memory_region():
 */
enum kvm_mr_change {
	KVM_MR_CREATE,
	KVM_MR_DELETE,
	KVM_MR_MOVE,
	KVM_MR_FLAGS_ONLY,
};

495
int kvm_set_memory_region(struct kvm *kvm,
496
			  struct kvm_userspace_memory_region *mem);
497
int __kvm_set_memory_region(struct kvm *kvm,
498
			    struct kvm_userspace_memory_region *mem);
499 500 501
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
			   struct kvm_memory_slot *dont);
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
502 503 504
int kvm_arch_prepare_memory_region(struct kvm *kvm,
				struct kvm_memory_slot *memslot,
				struct kvm_userspace_memory_region *mem,
505
				enum kvm_mr_change change);
506
void kvm_arch_commit_memory_region(struct kvm *kvm,
507
				struct kvm_userspace_memory_region *mem,
508 509
				const struct kvm_memory_slot *old,
				enum kvm_mr_change change);
510
bool kvm_largepages_enabled(void);
511
void kvm_disable_largepages(void);
512 513 514 515 516
/* flush all memory translations */
void kvm_arch_flush_shadow_all(struct kvm *kvm);
/* flush memory translations pointing to 'slot' */
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
				   struct kvm_memory_slot *slot);
517

518 519 520
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
			    int nr_pages);

A
Avi Kivity 已提交
521
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
M
Marcelo Tosatti 已提交
522
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
X
Xiao Guangrong 已提交
523
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
524 525
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
526 527 528
void kvm_set_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);

529
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
530 531
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
		       bool write_fault, bool *writable);
532
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
533 534
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
		      bool *writable);
535
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
536 537
pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);

538
void kvm_release_pfn_dirty(pfn_t pfn);
539 540 541 542 543
void kvm_release_pfn_clean(pfn_t pfn);
void kvm_set_pfn_dirty(pfn_t pfn);
void kvm_set_pfn_accessed(pfn_t pfn);
void kvm_get_pfn(pfn_t pfn);

544 545
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
			int len);
546 547
int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
			  unsigned long len);
548
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
549 550
int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
			   void *data, unsigned long len);
551 552 553 554
int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
			 int offset, int len);
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
		    unsigned long len);
555 556 557
int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
			   void *data, unsigned long len);
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
558
			      gpa_t gpa, unsigned long len);
559 560
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
A
Avi Kivity 已提交
561
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
562
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
J
Joerg Roedel 已提交
563
unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
A
Avi Kivity 已提交
564
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
565 566
void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
			     gfn_t gfn);
A
Avi Kivity 已提交
567

568
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
569
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
570
bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
Z
Zhai, Edwin 已提交
571
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
A
Avi Kivity 已提交
572
void kvm_resched(struct kvm_vcpu *vcpu);
573 574
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
575

576
void kvm_flush_remote_tlbs(struct kvm *kvm);
577
void kvm_reload_remote_mmus(struct kvm *kvm);
578
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
579
void kvm_make_scan_ioapic_request(struct kvm *kvm);
A
Avi Kivity 已提交
580

581 582
long kvm_arch_dev_ioctl(struct file *filp,
			unsigned int ioctl, unsigned long arg);
583 584
long kvm_arch_vcpu_ioctl(struct file *filp,
			 unsigned int ioctl, unsigned long arg);
585
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
586 587 588

int kvm_dev_ioctl_check_extension(long ext);

589 590 591 592 593
int kvm_get_dirty_log(struct kvm *kvm,
			struct kvm_dirty_log *log, int *is_dirty);
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
				struct kvm_dirty_log *log);

594
int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
595
				   struct kvm_userspace_memory_region *mem);
596 597
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
			bool line_status);
598 599
long kvm_arch_vm_ioctl(struct file *filp,
		       unsigned int ioctl, unsigned long arg);
600

601 602 603
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);

604 605 606
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
				    struct kvm_translation *tr);

607 608 609 610 611 612
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
				  struct kvm_sregs *sregs);
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
				  struct kvm_sregs *sregs);
613 614 615 616
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
				    struct kvm_mp_state *mp_state);
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
				    struct kvm_mp_state *mp_state);
J
Jan Kiszka 已提交
617 618
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
					struct kvm_guest_debug *dbg);
619 620
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);

621 622
int kvm_arch_init(void *opaque);
void kvm_arch_exit(void);
623

624 625 626 627 628 629 630
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);

void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
631
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
632
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
633
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
634

635
int kvm_arch_hardware_enable(void *garbage);
636 637 638 639
void kvm_arch_hardware_disable(void *garbage);
int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn);
640
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
641
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
642

643 644
void kvm_free_physmem(struct kvm *kvm);

645 646 647
void *kvm_kvzalloc(unsigned long size);
void kvm_kvfree(const void *addr);

648 649 650 651 652 653 654 655 656 657 658 659
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
{
	return kzalloc(sizeof(struct kvm), GFP_KERNEL);
}

static inline void kvm_arch_free_vm(struct kvm *kvm)
{
	kfree(kvm);
}
#endif

660 661
static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
{
662 663 664
#ifdef __KVM_HAVE_ARCH_WQP
	return vcpu->arch.wqp;
#else
665 666
	return &vcpu->wq;
#endif
667
}
668

669
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
670
void kvm_arch_destroy_vm(struct kvm *kvm);
671
void kvm_arch_sync_events(struct kvm *kvm);
672

673
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
674
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
675

X
Xiao Guangrong 已提交
676
bool kvm_is_mmio_pfn(pfn_t pfn);
677

B
Ben-Ami Yassour 已提交
678 679 680 681 682 683 684 685 686 687
struct kvm_irq_ack_notifier {
	struct hlist_node link;
	unsigned gsi;
	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
};

struct kvm_assigned_dev_kernel {
	struct kvm_irq_ack_notifier ack_notifier;
	struct list_head list;
	int assigned_dev_id;
688
	int host_segnr;
B
Ben-Ami Yassour 已提交
689 690
	int host_busnr;
	int host_devfn;
S
Sheng Yang 已提交
691
	unsigned int entries_nr;
B
Ben-Ami Yassour 已提交
692
	int host_irq;
693
	bool host_irq_disabled;
694
	bool pci_2_3;
S
Sheng Yang 已提交
695
	struct msix_entry *host_msix_entries;
B
Ben-Ami Yassour 已提交
696
	int guest_irq;
697
	struct msix_entry *guest_msix_entries;
698
	unsigned long irq_requested_type;
699
	int irq_source_id;
700
	int flags;
B
Ben-Ami Yassour 已提交
701 702
	struct pci_dev *dev;
	struct kvm *kvm;
703
	spinlock_t intx_lock;
704
	spinlock_t intx_mask_lock;
705
	char irq_name[32];
706
	struct pci_saved_state *pci_saved_state;
B
Ben-Ami Yassour 已提交
707
};
708 709 710 711 712 713 714 715 716 717 718

struct kvm_irq_mask_notifier {
	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
	int irq;
	struct hlist_node link;
};

void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
				    struct kvm_irq_mask_notifier *kimn);
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
				      struct kvm_irq_mask_notifier *kimn);
719 720
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
			     bool mask);
721

722 723
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
		bool line_status);
M
Michael S. Tsirkin 已提交
724
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
725
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
726
		int irq_source_id, int level, bool line_status);
727
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
728
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
729 730
void kvm_register_irq_ack_notifier(struct kvm *kvm,
				   struct kvm_irq_ack_notifier *kian);
731 732
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
				   struct kvm_irq_ack_notifier *kian);
733 734
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
B
Ben-Ami Yassour 已提交
735

736 737 738
/* For vcpu->arch.iommu_flags */
#define KVM_IOMMU_CACHE_COHERENCY	0x1

739
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
740
int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
741
void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
W
Weidong Han 已提交
742
int kvm_iommu_map_guest(struct kvm *kvm);
B
Ben-Ami Yassour 已提交
743
int kvm_iommu_unmap_guest(struct kvm *kvm);
W
Weidong Han 已提交
744 745
int kvm_assign_device(struct kvm *kvm,
		      struct kvm_assigned_dev_kernel *assigned_dev);
W
Weidong Han 已提交
746 747
int kvm_deassign_device(struct kvm *kvm,
			struct kvm_assigned_dev_kernel *assigned_dev);
748
#else
B
Ben-Ami Yassour 已提交
749
static inline int kvm_iommu_map_pages(struct kvm *kvm,
750
				      struct kvm_memory_slot *slot)
B
Ben-Ami Yassour 已提交
751 752 753 754
{
	return 0;
}

755 756 757 758 759
static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
					 struct kvm_memory_slot *slot)
{
}

B
Ben-Ami Yassour 已提交
760 761 762 763
static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
{
	return 0;
}
764
#endif
B
Ben-Ami Yassour 已提交
765

766 767 768 769 770 771 772 773 774 775
static inline void kvm_guest_enter(void)
{
	unsigned long flags;

	BUG_ON(preemptible());

	local_irq_save(flags);
	guest_enter();
	local_irq_restore(flags);

776 777 778 779 780 781 782 783
	/* KVM does not hold any references to rcu protected data when it
	 * switches CPU into a guest mode. In fact switching to a guest mode
	 * is very similar to exiting to userspase from rcu point of view. In
	 * addition CPU may stay in a guest mode for quite a long time (up to
	 * one time slice). Lets treat guest mode as quiescent state, just like
	 * we do with user-mode execution.
	 */
	rcu_virt_note_context_switch(smp_processor_id());
784 785 786 787
}

static inline void kvm_guest_exit(void)
{
788 789 790 791 792
	unsigned long flags;

	local_irq_save(flags);
	guest_exit();
	local_irq_restore(flags);
793 794
}

795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
/*
 * search_memslots() and __gfn_to_memslot() are here because they are
 * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
 * gfn_to_memslot() itself isn't here as an inline because that would
 * bloat other code too much.
 */
static inline struct kvm_memory_slot *
search_memslots(struct kvm_memslots *slots, gfn_t gfn)
{
	struct kvm_memory_slot *memslot;

	kvm_for_each_memslot(memslot, slots)
		if (gfn >= memslot->base_gfn &&
		      gfn < memslot->base_gfn + memslot->npages)
			return memslot;

	return NULL;
}

static inline struct kvm_memory_slot *
__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
{
	return search_memslots(slots, gfn);
}

820 821 822 823 824 825
static inline unsigned long
__gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
	return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
}

826 827 828 829 830
static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
{
	return gfn_to_memslot(kvm, gfn)->id;
}

831 832 833 834 835 836 837
static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
{
	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
}

838 839
static inline gfn_t
hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
840
{
841 842 843
	gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT;

	return slot->base_gfn + gfn_offset;
844 845
}

A
Avi Kivity 已提交
846 847 848 849
static inline gpa_t gfn_to_gpa(gfn_t gfn)
{
	return (gpa_t)gfn << PAGE_SHIFT;
}
A
Avi Kivity 已提交
850

851 852 853 854 855
static inline gfn_t gpa_to_gfn(gpa_t gpa)
{
	return (gfn_t)(gpa >> PAGE_SHIFT);
}

B
Ben-Ami Yassour 已提交
856 857 858 859 860
static inline hpa_t pfn_to_hpa(pfn_t pfn)
{
	return (hpa_t)pfn << PAGE_SHIFT;
}

M
Marcelo Tosatti 已提交
861
static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
862 863 864 865
{
	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
}

866 867 868 869 870
enum kvm_stat_kind {
	KVM_STAT_VM,
	KVM_STAT_VCPU,
};

871 872 873
struct kvm_stats_debugfs_item {
	const char *name;
	int offset;
874
	enum kvm_stat_kind kind;
875 876 877
	struct dentry *dentry;
};
extern struct kvm_stats_debugfs_item debugfs_entries[];
878
extern struct dentry *kvm_debugfs_dir;
879

880
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
881
static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
882
{
883
	if (unlikely(kvm->mmu_notifier_count))
884 885
		return 1;
	/*
886 887 888 889 890 891 892 893
	 * Ensure the read of mmu_notifier_count happens before the read
	 * of mmu_notifier_seq.  This interacts with the smp_wmb() in
	 * mmu_notifier_invalidate_range_end to make sure that the caller
	 * either sees the old (non-zero) value of mmu_notifier_count or
	 * the new (incremented) value of mmu_notifier_seq.
	 * PowerPC Book3s HV KVM calls this under a per-page lock
	 * rather than under kvm->mmu_lock, for scalability, so
	 * can't rely on kvm->mmu_lock to keep things ordered.
894
	 */
895
	smp_rmb();
896
	if (kvm->mmu_notifier_seq != mmu_seq)
897 898 899 900 901
		return 1;
	return 0;
}
#endif

902
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
903 904 905 906 907 908 909 910

#define KVM_MAX_IRQ_ROUTES 1024

int kvm_setup_default_irq_routing(struct kvm *kvm);
int kvm_set_irq_routing(struct kvm *kvm,
			const struct kvm_irq_routing_entry *entries,
			unsigned nr,
			unsigned flags);
911 912 913
int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
			  struct kvm_kernel_irq_routing_entry *e,
			  const struct kvm_irq_routing_entry *ue);
914 915
void kvm_free_irq_routing(struct kvm *kvm);

916 917
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);

918 919 920 921 922 923
#else

static inline void kvm_free_irq_routing(struct kvm *kvm) {}

#endif

G
Gregory Haskins 已提交
924 925
#ifdef CONFIG_HAVE_KVM_EVENTFD

G
Gregory Haskins 已提交
926
void kvm_eventfd_init(struct kvm *kvm);
927 928 929
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);

#ifdef CONFIG_HAVE_KVM_IRQCHIP
930
int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
G
Gregory Haskins 已提交
931
void kvm_irqfd_release(struct kvm *kvm);
932
void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
933 934 935 936 937 938 939 940
#else
static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
{
	return -EINVAL;
}

static inline void kvm_irqfd_release(struct kvm *kvm) {}
#endif
G
Gregory Haskins 已提交
941 942 943

#else

G
Gregory Haskins 已提交
944
static inline void kvm_eventfd_init(struct kvm *kvm) {}
945

946
static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
G
Gregory Haskins 已提交
947 948 949 950 951
{
	return -EINVAL;
}

static inline void kvm_irqfd_release(struct kvm *kvm) {}
952

A
Alexander Graf 已提交
953
#ifdef CONFIG_HAVE_KVM_IRQCHIP
954 955 956 957 958
static inline void kvm_irq_routing_update(struct kvm *kvm,
					  struct kvm_irq_routing_table *irq_rt)
{
	rcu_assign_pointer(kvm->irq_routing, irq_rt);
}
A
Alexander Graf 已提交
959
#endif
960

G
Gregory Haskins 已提交
961 962 963 964
static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
	return -ENOSYS;
}
G
Gregory Haskins 已提交
965 966 967

#endif /* CONFIG_HAVE_KVM_EVENTFD */

968
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
969 970
static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
{
971
	return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
972
}
973 974 975 976 977 978 979

bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);

#else

static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }

A
Avi Kivity 已提交
980
#endif
981

982
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
983 984 985 986

long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
				  unsigned long arg);

987 988
void kvm_free_all_assigned_devices(struct kvm *kvm);

989 990 991 992 993 994 995 996
#else

static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
						unsigned long arg)
{
	return -ENOTTY;
}

997 998
static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}

999
#endif
1000

1001 1002 1003 1004 1005 1006 1007
static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
{
	set_bit(req, &vcpu->requests);
}

static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
{
1008 1009 1010 1011 1012 1013
	if (test_bit(req, &vcpu->requests)) {
		clear_bit(req, &vcpu->requests);
		return true;
	} else {
		return false;
	}
1014 1015
}

1016 1017
extern bool kvm_rebooting;

S
Scott Wood 已提交
1018 1019 1020 1021 1022 1023
struct kvm_device_ops;

struct kvm_device {
	struct kvm_device_ops *ops;
	struct kvm *kvm;
	void *private;
1024
	struct list_head vm_node;
S
Scott Wood 已提交
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
};

/* create, destroy, and name are mandatory */
struct kvm_device_ops {
	const char *name;
	int (*create)(struct kvm_device *dev, u32 type);

	/*
	 * Destroy is responsible for freeing dev.
	 *
	 * Destroy may be called before or after destructors are called
	 * on emulated I/O regions, depending on whether a reference is
	 * held by a vcpu or other kvm component that gets destroyed
	 * after the emulated I/O.
	 */
	void (*destroy)(struct kvm_device *dev);

	int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
	int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
	int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
	long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
		      unsigned long arg);
};

void kvm_device_get(struct kvm_device *dev);
void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);

1053
extern struct kvm_device_ops kvm_mpic_ops;
1054
extern struct kvm_device_ops kvm_xics_ops;
1055

1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT

static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
{
	vcpu->spin_loop.in_spin_loop = val;
}
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
{
	vcpu->spin_loop.dy_eligible = val;
}

#else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */

static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
{
}

static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
{
}

1077 1078 1079 1080 1081
static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
{
	return true;
}

1082
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
1083
#endif
1084