paravirt.h 25.1 KB
Newer Older
H
H. Peter Anvin 已提交
1 2
#ifndef _ASM_X86_PARAVIRT_H
#define _ASM_X86_PARAVIRT_H
3 4
/* Various instructions on x86 need to be replaced for
 * para-virtualization: those hooks are defined here. */
5 6

#ifdef CONFIG_PARAVIRT
7
#include <asm/pgtable_types.h>
8
#include <asm/asm.h>
9

10
#include <asm/paravirt_types.h>
11

12
#ifndef __ASSEMBLY__
13
#include <linux/types.h>
14
#include <linux/cpumask.h>
15

16 17
static inline int paravirt_enabled(void)
{
18
	return pv_info.paravirt_enabled;
19
}
20

21
static inline void load_sp0(struct tss_struct *tss,
22 23
			     struct thread_struct *thread)
{
24
	PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
25 26 27 28 29 30
}

/* The paravirtualized CPUID instruction. */
static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
			   unsigned int *ecx, unsigned int *edx)
{
31
	PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx);
32 33 34 35 36
}

/*
 * These special macros can be used to get or set a debugging register
 */
37 38
static inline unsigned long paravirt_get_debugreg(int reg)
{
39
	return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg);
40 41 42 43
}
#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
static inline void set_debugreg(unsigned long val, int reg)
{
44
	PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
45
}
46

47 48
static inline void clts(void)
{
49
	PVOP_VCALL0(pv_cpu_ops.clts);
50
}
51

52 53
static inline unsigned long read_cr0(void)
{
54
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
55
}
56

57 58
static inline void write_cr0(unsigned long x)
{
59
	PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
60 61 62 63
}

static inline unsigned long read_cr2(void)
{
64
	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
65 66 67 68
}

static inline void write_cr2(unsigned long x)
{
69
	PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
70 71 72 73
}

static inline unsigned long read_cr3(void)
{
74
	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
75
}
76

77 78
static inline void write_cr3(unsigned long x)
{
79
	PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
80
}
81

82 83
static inline unsigned long read_cr4(void)
{
84
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
85 86 87
}
static inline unsigned long read_cr4_safe(void)
{
88
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
89
}
90

91 92
static inline void write_cr4(unsigned long x)
{
93
	PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
94
}
95

96
#ifdef CONFIG_X86_64
97 98 99 100 101 102 103 104 105
static inline unsigned long read_cr8(void)
{
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
}

static inline void write_cr8(unsigned long x)
{
	PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
}
106
#endif
107

D
David Howells 已提交
108
static inline void arch_safe_halt(void)
109
{
110
	PVOP_VCALL0(pv_irq_ops.safe_halt);
111 112 113 114
}

static inline void halt(void)
{
115
	PVOP_VCALL0(pv_irq_ops.halt);
116 117 118 119
}

static inline void wbinvd(void)
{
120
	PVOP_VCALL0(pv_cpu_ops.wbinvd);
121 122
}

123
#define get_kernel_rpl()  (pv_info.kernel_rpl)
124

125 126
static inline u64 paravirt_read_msr(unsigned msr, int *err)
{
127
	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
128
}
129 130 131 132 133 134

static inline int paravirt_rdmsr_regs(u32 *regs)
{
	return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs);
}

135 136
static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
{
137
	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
138 139
}

140 141 142 143 144
static inline int paravirt_wrmsr_regs(u32 *regs)
{
	return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs);
}

145
/* These should all do BUG_ON(_err), but our headers are too tangled. */
146 147
#define rdmsr(msr, val1, val2)			\
do {						\
148 149 150 151
	int _err;				\
	u64 _l = paravirt_read_msr(msr, &_err);	\
	val1 = (u32)_l;				\
	val2 = _l >> 32;			\
152
} while (0)
153

154 155
#define wrmsr(msr, val1, val2)			\
do {						\
156
	paravirt_write_msr(msr, val1, val2);	\
157
} while (0)
158

159 160
#define rdmsrl(msr, val)			\
do {						\
161 162
	int _err;				\
	val = paravirt_read_msr(msr, &_err);	\
163
} while (0)
164

165 166
#define wrmsrl(msr, val)	wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32)
#define wrmsr_safe(msr, a, b)	paravirt_write_msr(msr, a, b)
167 168

/* rdmsr with exception handling */
169 170
#define rdmsr_safe(msr, a, b)			\
({						\
171 172 173 174
	int _err;				\
	u64 _l = paravirt_read_msr(msr, &_err);	\
	(*a) = (u32)_l;				\
	(*b) = _l >> 32;			\
175 176
	_err;					\
})
177

178 179 180
#define rdmsr_safe_regs(regs)	paravirt_rdmsr_regs(regs)
#define wrmsr_safe_regs(regs)	paravirt_wrmsr_regs(regs)

A
Andi Kleen 已提交
181 182 183 184 185 186 187
static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
{
	int err;

	*p = paravirt_read_msr(msr, &err);
	return err;
}
Y
Yinghai Lu 已提交
188 189
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
{
190
	u32 gprs[8] = { 0 };
Y
Yinghai Lu 已提交
191 192
	int err;

193 194 195 196 197 198 199
	gprs[1] = msr;
	gprs[7] = 0x9c5a203a;

	err = paravirt_rdmsr_regs(gprs);

	*p = gprs[0] | ((u64)gprs[2] << 32);

Y
Yinghai Lu 已提交
200 201
	return err;
}
202

203 204 205 206 207 208 209 210 211 212 213 214
static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
{
	u32 gprs[8] = { 0 };

	gprs[0] = (u32)val;
	gprs[1] = msr;
	gprs[2] = val >> 32;
	gprs[7] = 0x9c5a203a;

	return paravirt_wrmsr_regs(gprs);
}

215 216
static inline u64 paravirt_read_tsc(void)
{
217
	return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
218
}
219

220 221
#define rdtscl(low)				\
do {						\
222 223
	u64 _l = paravirt_read_tsc();		\
	low = (int)_l;				\
224
} while (0)
225

226
#define rdtscll(val) (val = paravirt_read_tsc())
227

228 229
static inline unsigned long long paravirt_sched_clock(void)
{
230
	return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
231
}
232

233 234
static inline unsigned long long paravirt_read_pmc(int counter)
{
235
	return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
236
}
237

238 239
#define rdpmc(counter, low, high)		\
do {						\
240 241 242
	u64 _l = paravirt_read_pmc(counter);	\
	low = (u32)_l;				\
	high = _l >> 32;			\
243
} while (0)
244

245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
{
	return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
}

#define rdtscp(low, high, aux)				\
do {							\
	int __aux;					\
	unsigned long __val = paravirt_rdtscp(&__aux);	\
	(low) = (u32)__val;				\
	(high) = (u32)(__val >> 32);			\
	(aux) = __aux;					\
} while (0)

#define rdtscpll(val, aux)				\
do {							\
	unsigned long __aux; 				\
	val = paravirt_rdtscp(&__aux);			\
	(aux) = __aux;					\
} while (0)

266 267 268 269 270 271 272 273 274 275
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
	PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
}

static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
	PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
}

276 277
static inline void load_TR_desc(void)
{
278
	PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
279
}
280
static inline void load_gdt(const struct desc_ptr *dtr)
281
{
282
	PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
283
}
284
static inline void load_idt(const struct desc_ptr *dtr)
285
{
286
	PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
287 288 289
}
static inline void set_ldt(const void *addr, unsigned entries)
{
290
	PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
291
}
292
static inline void store_gdt(struct desc_ptr *dtr)
293
{
294
	PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
295
}
296
static inline void store_idt(struct desc_ptr *dtr)
297
{
298
	PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
299 300 301
}
static inline unsigned long paravirt_store_tr(void)
{
302
	return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
303 304 305 306
}
#define store_tr(tr)	((tr) = paravirt_store_tr())
static inline void load_TLS(struct thread_struct *t, unsigned cpu)
{
307
	PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
308
}
309

310 311 312 313 314 315 316
#ifdef CONFIG_X86_64
static inline void load_gs_index(unsigned int gs)
{
	PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
}
#endif

317 318
static inline void write_ldt_entry(struct desc_struct *dt, int entry,
				   const void *desc)
319
{
320
	PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc);
321
}
322 323 324

static inline void write_gdt_entry(struct desc_struct *dt, int entry,
				   void *desc, int type)
325
{
326
	PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type);
327
}
328

329
static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
330
{
331
	PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
332 333 334
}
static inline void set_iopl_mask(unsigned mask)
{
335
	PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
336
}
337

338
/* The paravirtualized I/O functions */
339 340
static inline void slow_down_io(void)
{
341
	pv_cpu_ops.io_delay();
342
#ifdef REALLY_SLOW_IO
343 344 345
	pv_cpu_ops.io_delay();
	pv_cpu_ops.io_delay();
	pv_cpu_ops.io_delay();
346 347 348
#endif
}

349 350 351 352
#ifdef CONFIG_SMP
static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
				    unsigned long start_esp)
{
353 354
	PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
		    phys_apicid, start_eip, start_esp);
355 356
}
#endif
357

358 359 360
static inline void paravirt_activate_mm(struct mm_struct *prev,
					struct mm_struct *next)
{
361
	PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
362 363 364 365 366
}

static inline void arch_dup_mmap(struct mm_struct *oldmm,
				 struct mm_struct *mm)
{
367
	PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
368 369 370 371
}

static inline void arch_exit_mmap(struct mm_struct *mm)
{
372
	PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
373 374
}

375 376
static inline void __flush_tlb(void)
{
377
	PVOP_VCALL0(pv_mmu_ops.flush_tlb_user);
378 379 380
}
static inline void __flush_tlb_global(void)
{
381
	PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
382 383 384
}
static inline void __flush_tlb_single(unsigned long addr)
{
385
	PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
386
}
387

388 389
static inline void flush_tlb_others(const struct cpumask *cpumask,
				    struct mm_struct *mm,
390 391
				    unsigned long va)
{
392
	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
393 394
}

395 396 397 398 399 400 401 402 403 404
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
{
	return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
}

static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
	PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
}

405
static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)
406
{
407
	PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
408
}
409
static inline void paravirt_release_pte(unsigned long pfn)
410
{
411
	PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
412
}
413

414
static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
415
{
416
	PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
417
}
418

419
static inline void paravirt_release_pmd(unsigned long pfn)
420
{
421
	PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
422 423
}

424
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)
425 426 427
{
	PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
}
428
static inline void paravirt_release_pud(unsigned long pfn)
429 430 431 432
{
	PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
}

433 434
static inline void pte_update(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep)
435
{
436
	PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
437
}
A
Andrea Arcangeli 已提交
438 439 440 441 442
static inline void pmd_update(struct mm_struct *mm, unsigned long addr,
			      pmd_t *pmdp)
{
	PVOP_VCALL3(pv_mmu_ops.pmd_update, mm, addr, pmdp);
}
443

444 445
static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
				    pte_t *ptep)
446
{
447
	PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
448 449
}

A
Andrea Arcangeli 已提交
450 451 452 453 454 455
static inline void pmd_update_defer(struct mm_struct *mm, unsigned long addr,
				    pmd_t *pmdp)
{
	PVOP_VCALL3(pv_mmu_ops.pmd_update_defer, mm, addr, pmdp);
}

456
static inline pte_t __pte(pteval_t val)
457
{
458 459 460
	pteval_t ret;

	if (sizeof(pteval_t) > sizeof(long))
461 462 463
		ret = PVOP_CALLEE2(pteval_t,
				   pv_mmu_ops.make_pte,
				   val, (u64)val >> 32);
464
	else
465 466 467
		ret = PVOP_CALLEE1(pteval_t,
				   pv_mmu_ops.make_pte,
				   val);
468

469
	return (pte_t) { .pte = ret };
470 471
}

472 473 474 475 476
static inline pteval_t pte_val(pte_t pte)
{
	pteval_t ret;

	if (sizeof(pteval_t) > sizeof(long))
477 478
		ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val,
				   pte.pte, (u64)pte.pte >> 32);
479
	else
480 481
		ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val,
				   pte.pte);
482 483 484 485

	return ret;
}

486
static inline pgd_t __pgd(pgdval_t val)
487
{
488 489 490
	pgdval_t ret;

	if (sizeof(pgdval_t) > sizeof(long))
491 492
		ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd,
				   val, (u64)val >> 32);
493
	else
494 495
		ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd,
				   val);
496 497 498 499 500 501 502 503 504

	return (pgd_t) { ret };
}

static inline pgdval_t pgd_val(pgd_t pgd)
{
	pgdval_t ret;

	if (sizeof(pgdval_t) > sizeof(long))
505 506
		ret =  PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val,
				    pgd.pgd, (u64)pgd.pgd >> 32);
507
	else
508 509
		ret =  PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val,
				    pgd.pgd);
510 511

	return ret;
512 513
}

514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
#define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
					   pte_t *ptep)
{
	pteval_t ret;

	ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
			 mm, addr, ptep);

	return (pte_t) { .pte = ret };
}

static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
					   pte_t *ptep, pte_t pte)
{
	if (sizeof(pteval_t) > sizeof(long))
		/* 5 arg words */
		pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
	else
		PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit,
			    mm, addr, ptep, pte.pte);
}

537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
static inline void set_pte(pte_t *ptep, pte_t pte)
{
	if (sizeof(pteval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pte, ptep,
			    pte.pte, (u64)pte.pte >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pte, ptep,
			    pte.pte);
}

static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep, pte_t pte)
{
	if (sizeof(pteval_t) > sizeof(long))
		/* 5 arg words */
		pv_mmu_ops.set_pte_at(mm, addr, ptep, pte);
	else
		PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
}

A
Andrea Arcangeli 已提交
557 558 559 560 561 562 563 564
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
			      pmd_t *pmdp, pmd_t pmd)
{
	if (sizeof(pmdval_t) > sizeof(long))
		/* 5 arg words */
		pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
	else
565 566
		PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
			    native_pmd_val(pmd));
A
Andrea Arcangeli 已提交
567 568 569
}
#endif

570 571 572 573 574 575 576 577 578 579
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
	pmdval_t val = native_pmd_val(pmd);

	if (sizeof(pmdval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
}

580 581 582 583 584 585
#if PAGETABLE_LEVELS >= 3
static inline pmd_t __pmd(pmdval_t val)
{
	pmdval_t ret;

	if (sizeof(pmdval_t) > sizeof(long))
586 587
		ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd,
				   val, (u64)val >> 32);
588
	else
589 590
		ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd,
				   val);
591 592 593 594 595 596 597 598 599

	return (pmd_t) { ret };
}

static inline pmdval_t pmd_val(pmd_t pmd)
{
	pmdval_t ret;

	if (sizeof(pmdval_t) > sizeof(long))
600 601
		ret =  PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val,
				    pmd.pmd, (u64)pmd.pmd >> 32);
602
	else
603 604
		ret =  PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val,
				    pmd.pmd);
605 606 607 608 609 610 611 612 613 614 615 616 617 618 619

	return ret;
}

static inline void set_pud(pud_t *pudp, pud_t pud)
{
	pudval_t val = native_pud_val(pud);

	if (sizeof(pudval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
			    val, (u64)val >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
			    val);
}
620 621 622 623 624 625
#if PAGETABLE_LEVELS == 4
static inline pud_t __pud(pudval_t val)
{
	pudval_t ret;

	if (sizeof(pudval_t) > sizeof(long))
626 627
		ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud,
				   val, (u64)val >> 32);
628
	else
629 630
		ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud,
				   val);
631 632 633 634 635 636 637 638 639

	return (pud_t) { ret };
}

static inline pudval_t pud_val(pud_t pud)
{
	pudval_t ret;

	if (sizeof(pudval_t) > sizeof(long))
640 641
		ret =  PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val,
				    pud.pud, (u64)pud.pud >> 32);
642
	else
643 644
		ret =  PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val,
				    pud.pud);
645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672

	return ret;
}

static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
	pgdval_t val = native_pgd_val(pgd);

	if (sizeof(pgdval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp,
			    val, (u64)val >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp,
			    val);
}

static inline void pgd_clear(pgd_t *pgdp)
{
	set_pgd(pgdp, __pgd(0));
}

static inline void pud_clear(pud_t *pudp)
{
	set_pud(pudp, __pud(0));
}

#endif	/* PAGETABLE_LEVELS == 4 */

673 674
#endif	/* PAGETABLE_LEVELS >= 3 */

675 676 677 678 679 680 681 682 683 684 685 686 687 688
#ifdef CONFIG_X86_PAE
/* Special-case pte-setting operations for PAE, which can't update a
   64-bit pte atomically */
static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
	PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
		    pte.pte, pte.pte >> 32);
}

static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
			     pte_t *ptep)
{
	PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
}
689 690 691 692 693

static inline void pmd_clear(pmd_t *pmdp)
{
	PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
}
694 695 696 697 698 699 700 701 702 703 704
#else  /* !CONFIG_X86_PAE */
static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
	set_pte(ptep, pte);
}

static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
			     pte_t *ptep)
{
	set_pte_at(mm, addr, ptep, __pte(0));
}
705 706 707 708 709

static inline void pmd_clear(pmd_t *pmdp)
{
	set_pmd(pmdp, __pmd(0));
}
710 711
#endif	/* CONFIG_X86_PAE */

712
#define  __HAVE_ARCH_START_CONTEXT_SWITCH
713
static inline void arch_start_context_switch(struct task_struct *prev)
714
{
715
	PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
716 717
}

718
static inline void arch_end_context_switch(struct task_struct *next)
719
{
720
	PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
721 722
}

723
#define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
724 725
static inline void arch_enter_lazy_mmu_mode(void)
{
726
	PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter);
727 728 729 730
}

static inline void arch_leave_lazy_mmu_mode(void)
{
731
	PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
732 733
}

734
void arch_flush_lazy_mmu_mode(void);
735

736
static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
737
				phys_addr_t phys, pgprot_t flags)
738 739 740 741
{
	pv_mmu_ops.set_fixmap(idx, phys, flags);
}

742
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
743

744
static inline int arch_spin_is_locked(struct arch_spinlock *lock)
745 746 747 748
{
	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
}

749
static inline int arch_spin_is_contended(struct arch_spinlock *lock)
750 751 752
{
	return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
}
753
#define arch_spin_is_contended	arch_spin_is_contended
754

755
static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
756
{
757
	PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
758 759
}

760
static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
761 762 763 764 765
						  unsigned long flags)
{
	PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
}

766
static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
767 768 769 770
{
	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
}

771
static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
772
{
773
	PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
774 775
}

776 777
#endif

778
#ifdef CONFIG_X86_32
779 780 781 782
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
#define PV_RESTORE_REGS "popl %edx; popl %ecx;"

/* save and restore all caller-save registers, except return value */
783 784
#define PV_SAVE_ALL_CALLER_REGS		"pushl %ecx;"
#define PV_RESTORE_ALL_CALLER_REGS	"popl  %ecx;"
785

786 787 788 789
#define PV_FLAGS_ARG "0"
#define PV_EXTRA_CLOBBERS
#define PV_VEXTRA_CLOBBERS
#else
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
/* save and restore all caller-save registers, except return value */
#define PV_SAVE_ALL_CALLER_REGS						\
	"push %rcx;"							\
	"push %rdx;"							\
	"push %rsi;"							\
	"push %rdi;"							\
	"push %r8;"							\
	"push %r9;"							\
	"push %r10;"							\
	"push %r11;"
#define PV_RESTORE_ALL_CALLER_REGS					\
	"pop %r11;"							\
	"pop %r10;"							\
	"pop %r9;"							\
	"pop %r8;"							\
	"pop %rdi;"							\
	"pop %rsi;"							\
	"pop %rdx;"							\
	"pop %rcx;"

810 811 812 813
/* We save some registers, but all of them, that's too much. We clobber all
 * caller saved registers but the argument parameter */
#define PV_SAVE_REGS "pushq %%rdi;"
#define PV_RESTORE_REGS "popq %%rdi;"
814 815
#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
816 817 818
#define PV_FLAGS_ARG "D"
#endif

819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850
/*
 * Generate a thunk around a function which saves all caller-save
 * registers except for the return value.  This allows C functions to
 * be called from assembler code where fewer than normal registers are
 * available.  It may also help code generation around calls from C
 * code if the common case doesn't use many registers.
 *
 * When a callee is wrapped in a thunk, the caller can assume that all
 * arg regs and all scratch registers are preserved across the
 * call. The return value in rax/eax will not be saved, even for void
 * functions.
 */
#define PV_CALLEE_SAVE_REGS_THUNK(func)					\
	extern typeof(func) __raw_callee_save_##func;			\
	static void *__##func##__ __used = func;			\
									\
	asm(".pushsection .text;"					\
	    "__raw_callee_save_" #func ": "				\
	    PV_SAVE_ALL_CALLER_REGS					\
	    "call " #func ";"						\
	    PV_RESTORE_ALL_CALLER_REGS					\
	    "ret;"							\
	    ".popsection")

/* Get a reference to a callee-save function */
#define PV_CALLEE_SAVE(func)						\
	((struct paravirt_callee_save) { __raw_callee_save_##func })

/* Promise that "func" already uses the right calling convention */
#define __PV_IS_CALLEE_SAVE(func)			\
	((struct paravirt_callee_save) { func })

851
static inline notrace unsigned long arch_local_save_flags(void)
852
{
853
	return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
854 855
}

856
static inline notrace void arch_local_irq_restore(unsigned long f)
857
{
858
	PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
859 860
}

861
static inline notrace void arch_local_irq_disable(void)
862
{
863
	PVOP_VCALLEE0(pv_irq_ops.irq_disable);
864 865
}

866
static inline notrace void arch_local_irq_enable(void)
867
{
868
	PVOP_VCALLEE0(pv_irq_ops.irq_enable);
869 870
}

871
static inline notrace unsigned long arch_local_irq_save(void)
872 873 874
{
	unsigned long f;

D
David Howells 已提交
875 876
	f = arch_local_save_flags();
	arch_local_irq_disable();
877 878 879
	return f;
}

880

881
/* Make sure as little as possible of this mess escapes. */
882
#undef PARAVIRT_CALL
883 884
#undef __PVOP_CALL
#undef __PVOP_VCALL
885 886 887 888 889 890 891 892 893 894
#undef PVOP_VCALL0
#undef PVOP_CALL0
#undef PVOP_VCALL1
#undef PVOP_CALL1
#undef PVOP_VCALL2
#undef PVOP_CALL2
#undef PVOP_VCALL3
#undef PVOP_CALL3
#undef PVOP_VCALL4
#undef PVOP_CALL4
895

896 897
extern void default_banner(void);

898 899
#else  /* __ASSEMBLY__ */

900
#define _PVSITE(ptype, clobbers, ops, word, algn)	\
901 902 903 904
771:;						\
	ops;					\
772:;						\
	.pushsection .parainstructions,"a";	\
905 906
	 .align	algn;				\
	 word 771b;				\
907 908 909 910 911
	 .byte ptype;				\
	 .byte 772b-771b;			\
	 .short clobbers;			\
	.popsection

912

913
#define COND_PUSH(set, mask, reg)			\
914
	.if ((~(set)) & mask); push %reg; .endif
915
#define COND_POP(set, mask, reg)			\
916
	.if ((~(set)) & mask); pop %reg; .endif
917

918
#ifdef CONFIG_X86_64
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940

#define PV_SAVE_REGS(set)			\
	COND_PUSH(set, CLBR_RAX, rax);		\
	COND_PUSH(set, CLBR_RCX, rcx);		\
	COND_PUSH(set, CLBR_RDX, rdx);		\
	COND_PUSH(set, CLBR_RSI, rsi);		\
	COND_PUSH(set, CLBR_RDI, rdi);		\
	COND_PUSH(set, CLBR_R8, r8);		\
	COND_PUSH(set, CLBR_R9, r9);		\
	COND_PUSH(set, CLBR_R10, r10);		\
	COND_PUSH(set, CLBR_R11, r11)
#define PV_RESTORE_REGS(set)			\
	COND_POP(set, CLBR_R11, r11);		\
	COND_POP(set, CLBR_R10, r10);		\
	COND_POP(set, CLBR_R9, r9);		\
	COND_POP(set, CLBR_R8, r8);		\
	COND_POP(set, CLBR_RDI, rdi);		\
	COND_POP(set, CLBR_RSI, rsi);		\
	COND_POP(set, CLBR_RDX, rdx);		\
	COND_POP(set, CLBR_RCX, rcx);		\
	COND_POP(set, CLBR_RAX, rax)

941
#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
942
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
943
#define PARA_INDIRECT(addr)	*addr(%rip)
944
#else
945 946 947 948 949 950 951 952 953 954 955
#define PV_SAVE_REGS(set)			\
	COND_PUSH(set, CLBR_EAX, eax);		\
	COND_PUSH(set, CLBR_EDI, edi);		\
	COND_PUSH(set, CLBR_ECX, ecx);		\
	COND_PUSH(set, CLBR_EDX, edx)
#define PV_RESTORE_REGS(set)			\
	COND_POP(set, CLBR_EDX, edx);		\
	COND_POP(set, CLBR_ECX, ecx);		\
	COND_POP(set, CLBR_EDI, edi);		\
	COND_POP(set, CLBR_EAX, eax)

956
#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
957
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
958
#define PARA_INDIRECT(addr)	*%cs:addr
959 960
#endif

961 962
#define INTERRUPT_RETURN						\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
963
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
964 965

#define DISABLE_INTERRUPTS(clobbers)					\
966
	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
967
		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
968
		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
969
		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
970 971

#define ENABLE_INTERRUPTS(clobbers)					\
972
	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
973
		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
974
		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
975
		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
976

977 978
#define USERGS_SYSRET32							\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),	\
979
		  CLBR_NONE,						\
980
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
981

982
#ifdef CONFIG_X86_32
983 984 985
#define GET_CR0_INTO_EAX				\
	push %ecx; push %edx;				\
	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);	\
986
	pop %edx; pop %ecx
987 988 989 990 991 992 993 994

#define ENABLE_INTERRUPTS_SYSEXIT					\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
		  CLBR_NONE,						\
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))


#else	/* !CONFIG_X86_32 */
995 996 997 998 999 1000 1001 1002 1003 1004

/*
 * If swapgs is used while the userspace stack is still current,
 * there's no way to call a pvop.  The PV replacement *must* be
 * inlined, or the swapgs instruction must be trapped and emulated.
 */
#define SWAPGS_UNSAFE_STACK						\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
		  swapgs)

1005 1006 1007 1008 1009 1010
/*
 * Note: swapgs is very special, and in practise is either going to be
 * implemented with a single "swapgs" instruction or something very
 * special.  Either way, we don't need to save any registers for
 * it.
 */
1011 1012
#define SWAPGS								\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
1013
		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)		\
1014 1015
		 )

1016 1017 1018
#define GET_CR2_INTO_RCX				\
	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);	\
	movq %rax, %rcx;				\
1019 1020
	xorq %rax, %rax;

1021 1022 1023 1024 1025
#define PARAVIRT_ADJUST_EXCEPTION_FRAME					\
	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
		  CLBR_NONE,						\
		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))

1026 1027
#define USERGS_SYSRET64							\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
1028
		  CLBR_NONE,						\
1029 1030 1031 1032 1033 1034 1035
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))

#define ENABLE_INTERRUPTS_SYSEXIT32					\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
		  CLBR_NONE,						\
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
#endif	/* CONFIG_X86_32 */
1036

1037
#endif /* __ASSEMBLY__ */
1038 1039 1040
#else  /* CONFIG_PARAVIRT */
# define default_banner x86_init_noop
#endif /* !CONFIG_PARAVIRT */
H
H. Peter Anvin 已提交
1041
#endif /* _ASM_X86_PARAVIRT_H */