kvm.h 13.1 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9
#ifndef __KVM_H
#define __KVM_H

/*
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 */

#include <linux/types.h>
10
#include <linux/hardirq.h>
A
Avi Kivity 已提交
11 12 13
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
M
Markus Rechberger 已提交
14 15
#include <linux/signal.h>
#include <linux/sched.h>
A
Avi Kivity 已提交
16
#include <linux/mm.h>
17
#include <linux/preempt.h>
A
Alexey Dobriyan 已提交
18
#include <asm/signal.h>
A
Avi Kivity 已提交
19 20

#include <linux/kvm.h>
I
Ingo Molnar 已提交
21
#include <linux/kvm_para.h>
A
Avi Kivity 已提交
22

A
Avi Kivity 已提交
23
#define KVM_MAX_VCPUS 4
24
#define KVM_ALIAS_SLOTS 4
I
Izik Eidus 已提交
25
#define KVM_MEMORY_SLOTS 8
26 27
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
28 29
#define KVM_PERMILLE_MMU_PAGES 20
#define KVM_MIN_ALLOC_MMU_PAGES 64
30
#define KVM_NUM_MMU_PAGES 1024
A
Avi Kivity 已提交
31 32
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
33
#define KVM_MAX_CPUID_ENTRIES 40
A
Avi Kivity 已提交
34

35 36
#define KVM_PIO_PAGE_OFFSET 1

37 38 39
/*
 * vcpu->requests bit members
 */
40
#define KVM_REQ_TLB_FLUSH          0
41

A
Avi Kivity 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
/*
 * Address types:
 *
 *  gva - guest virtual address
 *  gpa - guest physical address
 *  gfn - guest frame number
 *  hva - host virtual address
 *  hpa - host physical address
 *  hfn - host frame number
 */

typedef unsigned long  gva_t;
typedef u64            gpa_t;
typedef unsigned long  gfn_t;

typedef unsigned long  hva_t;
typedef u64            hpa_t;
typedef unsigned long  hfn_t;

61 62 63 64 65 66 67 68 69 70 71 72 73 74
#define NR_PTE_CHAIN_ENTRIES 5

struct kvm_pte_chain {
	u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
	struct hlist_node link;
};

/*
 * kvm_mmu_page_role, below, is defined as:
 *
 *   bits 0:3 - total guest paging levels (2-4, or zero for real mode)
 *   bits 4:7 - page table level for this shadow (1-4)
 *   bits 8:9 - page table quadrant for 2-level guests
 *   bit   16 - "metaphysical" - gfn is not a real page (huge page/real mode)
75
 *   bits 17:19 - "access" - the user, writable, and nx bits of a huge page pde
76 77 78 79 80 81 82 83 84
 */
union kvm_mmu_page_role {
	unsigned word;
	struct {
		unsigned glevels : 4;
		unsigned level : 4;
		unsigned quadrant : 2;
		unsigned pad_for_nice_hex_output : 6;
		unsigned metaphysical : 1;
85
		unsigned hugepage_access : 3;
86 87 88
	};
};

A
Avi Kivity 已提交
89 90
struct kvm_mmu_page {
	struct list_head link;
91 92 93 94 95 96 97 98 99
	struct hlist_node hash_link;

	/*
	 * The following two entries are used to key the shadow page in the
	 * hash table.
	 */
	gfn_t gfn;
	union kvm_mmu_page_role role;

100
	u64 *spt;
101 102
	/* hold the gfn of each spte inside spt */
	gfn_t *gfns;
A
Avi Kivity 已提交
103 104 105
	unsigned long slot_bitmap; /* One bit set per slot which has memory
				    * in this shadow page.
				    */
106
	int multimapped;         /* More than one parent_pte? */
107
	int root_count;          /* Currently serving as active root */
108 109 110 111
	union {
		u64 *parent_pte;               /* !multimapped */
		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
	};
A
Avi Kivity 已提交
112 113 114
};

struct kvm_vcpu;
115
extern struct kmem_cache *kvm_vcpu_cache;
A
Avi Kivity 已提交
116 117 118 119 120 121 122 123 124 125 126

/*
 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
 * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
 * mode.
 */
struct kvm_mmu {
	void (*new_cr3)(struct kvm_vcpu *vcpu);
	int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
	void (*free)(struct kvm_vcpu *vcpu);
	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
127 128
	void (*prefetch_page)(struct kvm_vcpu *vcpu,
			      struct kvm_mmu_page *page);
A
Avi Kivity 已提交
129 130 131
	hpa_t root_hpa;
	int root_level;
	int shadow_root_level;
132 133

	u64 *pae_root;
A
Avi Kivity 已提交
134 135
};

136
#define KVM_NR_MEM_OBJS 40
137

H
Hollis Blanchard 已提交
138 139 140 141
/*
 * We don't want allocation failures within the mmu code, so we preallocate
 * enough memory for a single page fault in a cache.
 */
142 143 144 145 146
struct kvm_mmu_memory_cache {
	int nobjs;
	void *objects[KVM_NR_MEM_OBJS];
};

A
Avi Kivity 已提交
147 148 149 150 151 152
struct kvm_guest_debug {
	int enabled;
	unsigned long bp[4];
	int singlestep;
};

153 154 155 156 157 158
struct kvm_pio_request {
	unsigned long count;
	int cur_count;
	struct page *guest_pages[2];
	unsigned guest_page_offset;
	int in;
159
	int port;
160 161 162 163 164 165
	int size;
	int string;
	int down;
	int rep;
};

166
struct kvm_vcpu_stat {
A
Avi Kivity 已提交
167 168 169 170 171 172 173 174 175 176 177
	u32 pf_fixed;
	u32 pf_guest;
	u32 tlb_flush;
	u32 invlpg;

	u32 exits;
	u32 io_exits;
	u32 mmio_exits;
	u32 signal_exits;
	u32 irq_window_exits;
	u32 halt_exits;
E
Eddie Dong 已提交
178
	u32 halt_wakeup;
A
Avi Kivity 已提交
179 180
	u32 request_irq_exits;
	u32 irq_exits;
181
	u32 host_state_reload;
182
	u32 efer_reload;
A
Avi Kivity 已提交
183
	u32 fpu_reload;
184 185
	u32 insn_emulation;
	u32 insn_emulation_fail;
A
Avi Kivity 已提交
186 187
};

188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
struct kvm_io_device {
	void (*read)(struct kvm_io_device *this,
		     gpa_t addr,
		     int len,
		     void *val);
	void (*write)(struct kvm_io_device *this,
		      gpa_t addr,
		      int len,
		      const void *val);
	int (*in_range)(struct kvm_io_device *this, gpa_t addr);
	void (*destructor)(struct kvm_io_device *this);

	void             *private;
};

static inline void kvm_iodevice_read(struct kvm_io_device *dev,
				     gpa_t addr,
				     int len,
				     void *val)
{
	dev->read(dev, addr, len, val);
}

static inline void kvm_iodevice_write(struct kvm_io_device *dev,
				      gpa_t addr,
				      int len,
				      const void *val)
{
	dev->write(dev, addr, len, val);
}

static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
{
	return dev->in_range(dev, addr);
}

static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
{
226 227
	if (dev->destructor)
		dev->destructor(dev);
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
}

/*
 * It would be nice to use something smarter than a linear search, TBD...
 * Thankfully we dont expect many devices to register (famous last words :),
 * so until then it will suffice.  At least its abstracted so we can change
 * in one place.
 */
struct kvm_io_bus {
	int                   dev_count;
#define NR_IOBUS_DEVS 6
	struct kvm_io_device *devs[NR_IOBUS_DEVS];
};

void kvm_io_bus_init(struct kvm_io_bus *bus);
void kvm_io_bus_destroy(struct kvm_io_bus *bus);
struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
			     struct kvm_io_device *dev);

248 249 250 251 252 253 254
#ifdef CONFIG_HAS_IOMEM
#define KVM_VCPU_MMIO 			\
	int mmio_needed;		\
	int mmio_read_completed;	\
	int mmio_is_write;		\
	int mmio_size;			\
	unsigned char mmio_data[8];	\
A
Avi Kivity 已提交
255 256
	gpa_t mmio_phys_addr;

257 258
#else
#define KVM_VCPU_MMIO
A
Avi Kivity 已提交
259

260
#endif
A
Avi Kivity 已提交
261

262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
#define KVM_VCPU_COMM 					\
	struct kvm *kvm; 				\
	struct preempt_notifier preempt_notifier;	\
	int vcpu_id;					\
	struct mutex mutex;				\
	int   cpu;					\
	struct kvm_run *run;				\
	int guest_mode;					\
	unsigned long requests;				\
	struct kvm_guest_debug guest_debug;		\
	int fpu_active; 				\
	int guest_fpu_loaded;				\
	wait_queue_head_t wq;				\
	int sigset_active;				\
	sigset_t sigset;				\
277
	struct kvm_vcpu_stat stat;			\
278
	KVM_VCPU_MMIO
A
Avi Kivity 已提交
279

280 281 282 283 284 285
struct kvm_mem_alias {
	gfn_t base_gfn;
	unsigned long npages;
	gfn_t target_gfn;
};

A
Avi Kivity 已提交
286 287 288 289
struct kvm_memory_slot {
	gfn_t base_gfn;
	unsigned long npages;
	unsigned long flags;
290
	unsigned long *rmap;
A
Avi Kivity 已提交
291
	unsigned long *dirty_bitmap;
292
	unsigned long userspace_addr;
293
	int user_alloc;
A
Avi Kivity 已提交
294 295
};

296
struct kvm_vm_stat {
A
Avi Kivity 已提交
297 298 299 300 301 302
	u32 mmu_shadow_zapped;
	u32 mmu_pte_write;
	u32 mmu_pte_updated;
	u32 mmu_pde_zapped;
	u32 mmu_flooded;
	u32 mmu_recycled;
303 304
};

A
Avi Kivity 已提交
305
struct kvm {
S
Shaohua Li 已提交
306
	struct mutex lock; /* protects everything except vcpus */
307 308
	int naliases;
	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
A
Avi Kivity 已提交
309
	int nmemslots;
310 311
	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
					KVM_PRIVATE_MEM_SLOTS];
312 313 314
	/*
	 * Hash table of struct kvm_mmu_page.
	 */
A
Avi Kivity 已提交
315
	struct list_head active_mmu_pages;
316 317 318
	unsigned int n_free_mmu_pages;
	unsigned int n_requested_mmu_pages;
	unsigned int n_alloc_mmu_pages;
319
	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
R
Rusty Russell 已提交
320
	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
321
	struct list_head vm_list;
A
Avi Kivity 已提交
322
	struct file *filp;
323
	struct kvm_io_bus mmio_bus;
324
	struct kvm_io_bus pio_bus;
325
	struct kvm_pic *vpic;
E
Eddie Dong 已提交
326
	struct kvm_ioapic *vioapic;
327
	int round_robin_prev_vcpu;
328
	unsigned int tss_addr;
329
	struct page *apic_access_page;
330
	struct kvm_vm_stat stat;
A
Avi Kivity 已提交
331 332
};

333 334 335 336 337
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
{
	return kvm->vpic;
}

E
Eddie Dong 已提交
338 339 340 341 342
static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
{
	return kvm->vioapic;
}

343 344
static inline int irqchip_in_kernel(struct kvm *kvm)
{
J
Joe Perches 已提交
345
	return pic_irqchip(kvm) != NULL;
346 347
}

A
Avi Kivity 已提交
348 349 350 351 352
struct descriptor_table {
	u16 limit;
	unsigned long base;
} __attribute__((packed));

353 354 355 356 357 358
/* The guest did something we don't support. */
#define pr_unimpl(vcpu, fmt, ...)					\
 do {									\
	if (printk_ratelimit())						\
		printk(KERN_ERR "kvm: %i: cpu%i " fmt,			\
		       current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
M
Mike Day 已提交
359
 } while (0)
360

A
Avi Kivity 已提交
361 362 363
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)

R
Rusty Russell 已提交
364 365 366
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);

367 368 369
void vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);

370 371
void decache_vcpus_on_cpu(int cpu);

372

373
int kvm_init(void *opaque, unsigned int vcpu_size,
374
		  struct module *module);
375
void kvm_exit(void);
A
Avi Kivity 已提交
376

377
hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa);
A
Avi Kivity 已提交
378 379 380 381
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
382
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
A
Avi Kivity 已提交
383

384
extern struct page *bad_page;
A
Avi Kivity 已提交
385

386
int is_error_page(struct page *page);
I
Izik Eidus 已提交
387
int kvm_is_error_hva(unsigned long addr);
388 389 390
int kvm_set_memory_region(struct kvm *kvm,
			  struct kvm_userspace_memory_region *mem,
			  int user_alloc);
391 392 393
int __kvm_set_memory_region(struct kvm *kvm,
			    struct kvm_userspace_memory_region *mem,
			    int user_alloc);
394
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
A
Avi Kivity 已提交
395
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
396 397
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
398 399 400 401 402 403 404 405 406
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
			int len);
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
			 int offset, int len);
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
		    unsigned long len);
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
A
Avi Kivity 已提交
407
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
408
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
A
Avi Kivity 已提交
409 410
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);

411
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
A
Avi Kivity 已提交
412
void kvm_resched(struct kvm_vcpu *vcpu);
413 414
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
415
void kvm_flush_remote_tlbs(struct kvm *kvm);
A
Avi Kivity 已提交
416

417 418
long kvm_arch_dev_ioctl(struct file *filp,
			unsigned int ioctl, unsigned long arg);
419 420 421 422
long kvm_arch_vcpu_ioctl(struct file *filp,
			 unsigned int ioctl, unsigned long arg);
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
423 424 425

int kvm_dev_ioctl_check_extension(long ext);

426 427 428 429 430
int kvm_get_dirty_log(struct kvm *kvm,
			struct kvm_dirty_log *log, int *is_dirty);
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
				struct kvm_dirty_log *log);

431 432 433 434 435 436 437
int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
				   struct
				   kvm_userspace_memory_region *mem,
				   int user_alloc);
long kvm_arch_vm_ioctl(struct file *filp,
		       unsigned int ioctl, unsigned long arg);
void kvm_arch_destroy_vm(struct kvm *kvm);
438

439 440 441
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);

442 443 444
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
				    struct kvm_translation *tr);

445 446 447 448 449 450 451 452 453 454
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
				  struct kvm_sregs *sregs);
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
				  struct kvm_sregs *sregs);
int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
				    struct kvm_debug_guest *dbg);
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);

455 456
int kvm_arch_init(void *opaque);
void kvm_arch_exit(void);
457

458 459 460 461 462 463 464
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);

void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
465
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
466 467 468 469 470 471 472 473

int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_arch_hardware_enable(void *garbage);
void kvm_arch_hardware_disable(void *garbage);
int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn);

474 475 476 477
void kvm_free_physmem(struct kvm *kvm);

struct  kvm *kvm_arch_create_vm(void);
void kvm_arch_destroy_vm(struct kvm *kvm);
478

479 480
static inline void kvm_guest_enter(void)
{
481
	account_system_vtime(current);
482 483 484 485 486
	current->flags |= PF_VCPU;
}

static inline void kvm_guest_exit(void)
{
487
	account_system_vtime(current);
488 489 490
	current->flags &= ~PF_VCPU;
}

A
Avi Kivity 已提交
491 492 493 494 495 496
static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
{
	return slot - kvm->memslots;
}


497 498 499 500 501
enum kvm_stat_kind {
	KVM_STAT_VM,
	KVM_STAT_VCPU,
};

502 503 504
struct kvm_stats_debugfs_item {
	const char *name;
	int offset;
505
	enum kvm_stat_kind kind;
506 507 508 509
	struct dentry *dentry;
};
extern struct kvm_stats_debugfs_item debugfs_entries[];

A
Avi Kivity 已提交
510
#endif