paravirt.h 24.6 KB
Newer Older
H
H. Peter Anvin 已提交
1 2
#ifndef _ASM_X86_PARAVIRT_H
#define _ASM_X86_PARAVIRT_H
3 4
/* Various instructions on x86 need to be replaced for
 * para-virtualization: those hooks are defined here. */
5 6

#ifdef CONFIG_PARAVIRT
7
#include <asm/pgtable_types.h>
8
#include <asm/asm.h>
9

10
#include <asm/paravirt_types.h>
11

12
#ifndef __ASSEMBLY__
13
#include <linux/bug.h>
14
#include <linux/types.h>
15
#include <linux/cpumask.h>
16

17 18
static inline int paravirt_enabled(void)
{
19
	return pv_info.paravirt_enabled;
20
}
21

22
static inline void load_sp0(struct tss_struct *tss,
23 24
			     struct thread_struct *thread)
{
25
	PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
26 27 28 29 30 31
}

/* The paravirtualized CPUID instruction. */
static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
			   unsigned int *ecx, unsigned int *edx)
{
32
	PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx);
33 34 35 36 37
}

/*
 * These special macros can be used to get or set a debugging register
 */
38 39
static inline unsigned long paravirt_get_debugreg(int reg)
{
40
	return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg);
41 42 43 44
}
#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
static inline void set_debugreg(unsigned long val, int reg)
{
45
	PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
46
}
47

48 49
static inline void clts(void)
{
50
	PVOP_VCALL0(pv_cpu_ops.clts);
51
}
52

53 54
static inline unsigned long read_cr0(void)
{
55
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
56
}
57

58 59
static inline void write_cr0(unsigned long x)
{
60
	PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
61 62 63 64
}

static inline unsigned long read_cr2(void)
{
65
	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
66 67 68 69
}

static inline void write_cr2(unsigned long x)
{
70
	PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
71 72 73 74
}

static inline unsigned long read_cr3(void)
{
75
	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
76
}
77

78 79
static inline void write_cr3(unsigned long x)
{
80
	PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
81
}
82

83 84
static inline unsigned long read_cr4(void)
{
85
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
86 87 88
}
static inline unsigned long read_cr4_safe(void)
{
89
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
90
}
91

92 93
static inline void write_cr4(unsigned long x)
{
94
	PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
95
}
96

97
#ifdef CONFIG_X86_64
98 99 100 101 102 103 104 105 106
static inline unsigned long read_cr8(void)
{
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
}

static inline void write_cr8(unsigned long x)
{
	PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
}
107
#endif
108

D
David Howells 已提交
109
static inline void arch_safe_halt(void)
110
{
111
	PVOP_VCALL0(pv_irq_ops.safe_halt);
112 113 114 115
}

static inline void halt(void)
{
116
	PVOP_VCALL0(pv_irq_ops.halt);
117 118 119 120
}

static inline void wbinvd(void)
{
121
	PVOP_VCALL0(pv_cpu_ops.wbinvd);
122 123
}

124
#define get_kernel_rpl()  (pv_info.kernel_rpl)
125

126 127
static inline u64 paravirt_read_msr(unsigned msr, int *err)
{
128
	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
129
}
130

131 132
static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
{
133
	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
134 135
}

136
/* These should all do BUG_ON(_err), but our headers are too tangled. */
137 138
#define rdmsr(msr, val1, val2)			\
do {						\
139 140 141 142
	int _err;				\
	u64 _l = paravirt_read_msr(msr, &_err);	\
	val1 = (u32)_l;				\
	val2 = _l >> 32;			\
143
} while (0)
144

145 146
#define wrmsr(msr, val1, val2)			\
do {						\
147
	paravirt_write_msr(msr, val1, val2);	\
148
} while (0)
149

150 151
#define rdmsrl(msr, val)			\
do {						\
152 153
	int _err;				\
	val = paravirt_read_msr(msr, &_err);	\
154
} while (0)
155

156 157
#define wrmsrl(msr, val)	wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32)
#define wrmsr_safe(msr, a, b)	paravirt_write_msr(msr, a, b)
158 159

/* rdmsr with exception handling */
160 161
#define rdmsr_safe(msr, a, b)			\
({						\
162 163 164 165
	int _err;				\
	u64 _l = paravirt_read_msr(msr, &_err);	\
	(*a) = (u32)_l;				\
	(*b) = _l >> 32;			\
166 167
	_err;					\
})
168

A
Andi Kleen 已提交
169 170 171 172 173 174 175
static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
{
	int err;

	*p = paravirt_read_msr(msr, &err);
	return err;
}
176

177 178
static inline u64 paravirt_read_tsc(void)
{
179
	return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
180
}
181

182 183
#define rdtscl(low)				\
do {						\
184 185
	u64 _l = paravirt_read_tsc();		\
	low = (int)_l;				\
186
} while (0)
187

188
#define rdtscll(val) (val = paravirt_read_tsc())
189

190 191
static inline unsigned long long paravirt_sched_clock(void)
{
192
	return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
193
}
194

195 196 197
struct static_key;
extern struct static_key paravirt_steal_enabled;
extern struct static_key paravirt_steal_rq_enabled;
198 199 200 201 202 203

static inline u64 paravirt_steal_clock(int cpu)
{
	return PVOP_CALL1(u64, pv_time_ops.steal_clock, cpu);
}

204 205
static inline unsigned long long paravirt_read_pmc(int counter)
{
206
	return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
207
}
208

209 210
#define rdpmc(counter, low, high)		\
do {						\
211 212 213
	u64 _l = paravirt_read_pmc(counter);	\
	low = (u32)_l;				\
	high = _l >> 32;			\
214
} while (0)
215

A
Andi Kleen 已提交
216 217
#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))

218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
{
	return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
}

#define rdtscp(low, high, aux)				\
do {							\
	int __aux;					\
	unsigned long __val = paravirt_rdtscp(&__aux);	\
	(low) = (u32)__val;				\
	(high) = (u32)(__val >> 32);			\
	(aux) = __aux;					\
} while (0)

#define rdtscpll(val, aux)				\
do {							\
	unsigned long __aux; 				\
	val = paravirt_rdtscp(&__aux);			\
	(aux) = __aux;					\
} while (0)

239 240 241 242 243 244 245 246 247 248
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
	PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
}

static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
	PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
}

249 250
static inline void load_TR_desc(void)
{
251
	PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
252
}
253
static inline void load_gdt(const struct desc_ptr *dtr)
254
{
255
	PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
256
}
257
static inline void load_idt(const struct desc_ptr *dtr)
258
{
259
	PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
260 261 262
}
static inline void set_ldt(const void *addr, unsigned entries)
{
263
	PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
264
}
265
static inline void store_gdt(struct desc_ptr *dtr)
266
{
267
	PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
268
}
269
static inline void store_idt(struct desc_ptr *dtr)
270
{
271
	PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
272 273 274
}
static inline unsigned long paravirt_store_tr(void)
{
275
	return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
276 277 278 279
}
#define store_tr(tr)	((tr) = paravirt_store_tr())
static inline void load_TLS(struct thread_struct *t, unsigned cpu)
{
280
	PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
281
}
282

283 284 285 286 287 288 289
#ifdef CONFIG_X86_64
static inline void load_gs_index(unsigned int gs)
{
	PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
}
#endif

290 291
static inline void write_ldt_entry(struct desc_struct *dt, int entry,
				   const void *desc)
292
{
293
	PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc);
294
}
295 296 297

static inline void write_gdt_entry(struct desc_struct *dt, int entry,
				   void *desc, int type)
298
{
299
	PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type);
300
}
301

302
static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
303
{
304
	PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
305 306 307
}
static inline void set_iopl_mask(unsigned mask)
{
308
	PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
309
}
310

311
/* The paravirtualized I/O functions */
312 313
static inline void slow_down_io(void)
{
314
	pv_cpu_ops.io_delay();
315
#ifdef REALLY_SLOW_IO
316 317 318
	pv_cpu_ops.io_delay();
	pv_cpu_ops.io_delay();
	pv_cpu_ops.io_delay();
319 320 321
#endif
}

322 323 324 325
#ifdef CONFIG_SMP
static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
				    unsigned long start_esp)
{
326 327
	PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
		    phys_apicid, start_eip, start_esp);
328 329
}
#endif
330

331 332 333
static inline void paravirt_activate_mm(struct mm_struct *prev,
					struct mm_struct *next)
{
334
	PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
335 336 337 338 339
}

static inline void arch_dup_mmap(struct mm_struct *oldmm,
				 struct mm_struct *mm)
{
340
	PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
341 342 343 344
}

static inline void arch_exit_mmap(struct mm_struct *mm)
{
345
	PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
346 347
}

348 349
static inline void __flush_tlb(void)
{
350
	PVOP_VCALL0(pv_mmu_ops.flush_tlb_user);
351 352 353
}
static inline void __flush_tlb_global(void)
{
354
	PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
355 356 357
}
static inline void __flush_tlb_single(unsigned long addr)
{
358
	PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
359
}
360

361 362
static inline void flush_tlb_others(const struct cpumask *cpumask,
				    struct mm_struct *mm,
363 364
				    unsigned long start,
				    unsigned long end)
365
{
366
	PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end);
367 368
}

369 370 371 372 373 374 375 376 377 378
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
{
	return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
}

static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
	PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
}

379
static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)
380
{
381
	PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
382
}
383
static inline void paravirt_release_pte(unsigned long pfn)
384
{
385
	PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
386
}
387

388
static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
389
{
390
	PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
391
}
392

393
static inline void paravirt_release_pmd(unsigned long pfn)
394
{
395
	PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
396 397
}

398
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)
399 400 401
{
	PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
}
402
static inline void paravirt_release_pud(unsigned long pfn)
403 404 405 406
{
	PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
}

407 408
static inline void pte_update(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep)
409
{
410
	PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
411
}
A
Andrea Arcangeli 已提交
412 413 414 415 416
static inline void pmd_update(struct mm_struct *mm, unsigned long addr,
			      pmd_t *pmdp)
{
	PVOP_VCALL3(pv_mmu_ops.pmd_update, mm, addr, pmdp);
}
417

418 419
static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
				    pte_t *ptep)
420
{
421
	PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
422 423
}

A
Andrea Arcangeli 已提交
424 425 426 427 428 429
static inline void pmd_update_defer(struct mm_struct *mm, unsigned long addr,
				    pmd_t *pmdp)
{
	PVOP_VCALL3(pv_mmu_ops.pmd_update_defer, mm, addr, pmdp);
}

430
static inline pte_t __pte(pteval_t val)
431
{
432 433 434
	pteval_t ret;

	if (sizeof(pteval_t) > sizeof(long))
435 436 437
		ret = PVOP_CALLEE2(pteval_t,
				   pv_mmu_ops.make_pte,
				   val, (u64)val >> 32);
438
	else
439 440 441
		ret = PVOP_CALLEE1(pteval_t,
				   pv_mmu_ops.make_pte,
				   val);
442

443
	return (pte_t) { .pte = ret };
444 445
}

446 447 448 449 450
static inline pteval_t pte_val(pte_t pte)
{
	pteval_t ret;

	if (sizeof(pteval_t) > sizeof(long))
451 452
		ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val,
				   pte.pte, (u64)pte.pte >> 32);
453
	else
454 455
		ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val,
				   pte.pte);
456 457 458 459

	return ret;
}

460
static inline pgd_t __pgd(pgdval_t val)
461
{
462 463 464
	pgdval_t ret;

	if (sizeof(pgdval_t) > sizeof(long))
465 466
		ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd,
				   val, (u64)val >> 32);
467
	else
468 469
		ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd,
				   val);
470 471 472 473 474 475 476 477 478

	return (pgd_t) { ret };
}

static inline pgdval_t pgd_val(pgd_t pgd)
{
	pgdval_t ret;

	if (sizeof(pgdval_t) > sizeof(long))
479 480
		ret =  PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val,
				    pgd.pgd, (u64)pgd.pgd >> 32);
481
	else
482 483
		ret =  PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val,
				    pgd.pgd);
484 485

	return ret;
486 487
}

488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
#define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
					   pte_t *ptep)
{
	pteval_t ret;

	ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
			 mm, addr, ptep);

	return (pte_t) { .pte = ret };
}

static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
					   pte_t *ptep, pte_t pte)
{
	if (sizeof(pteval_t) > sizeof(long))
		/* 5 arg words */
		pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
	else
		PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit,
			    mm, addr, ptep, pte.pte);
}

511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
static inline void set_pte(pte_t *ptep, pte_t pte)
{
	if (sizeof(pteval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pte, ptep,
			    pte.pte, (u64)pte.pte >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pte, ptep,
			    pte.pte);
}

static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep, pte_t pte)
{
	if (sizeof(pteval_t) > sizeof(long))
		/* 5 arg words */
		pv_mmu_ops.set_pte_at(mm, addr, ptep, pte);
	else
		PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
}

A
Andrea Arcangeli 已提交
531 532 533 534 535 536 537
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
			      pmd_t *pmdp, pmd_t pmd)
{
	if (sizeof(pmdval_t) > sizeof(long))
		/* 5 arg words */
		pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
	else
538 539
		PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
			    native_pmd_val(pmd));
A
Andrea Arcangeli 已提交
540 541
}

542 543 544 545 546 547 548 549 550 551
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
	pmdval_t val = native_pmd_val(pmd);

	if (sizeof(pmdval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
}

552 553 554 555 556 557
#if PAGETABLE_LEVELS >= 3
static inline pmd_t __pmd(pmdval_t val)
{
	pmdval_t ret;

	if (sizeof(pmdval_t) > sizeof(long))
558 559
		ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd,
				   val, (u64)val >> 32);
560
	else
561 562
		ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd,
				   val);
563 564 565 566 567 568 569 570 571

	return (pmd_t) { ret };
}

static inline pmdval_t pmd_val(pmd_t pmd)
{
	pmdval_t ret;

	if (sizeof(pmdval_t) > sizeof(long))
572 573
		ret =  PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val,
				    pmd.pmd, (u64)pmd.pmd >> 32);
574
	else
575 576
		ret =  PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val,
				    pmd.pmd);
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591

	return ret;
}

static inline void set_pud(pud_t *pudp, pud_t pud)
{
	pudval_t val = native_pud_val(pud);

	if (sizeof(pudval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
			    val, (u64)val >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
			    val);
}
592 593 594 595 596 597
#if PAGETABLE_LEVELS == 4
static inline pud_t __pud(pudval_t val)
{
	pudval_t ret;

	if (sizeof(pudval_t) > sizeof(long))
598 599
		ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud,
				   val, (u64)val >> 32);
600
	else
601 602
		ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud,
				   val);
603 604 605 606 607 608 609 610 611

	return (pud_t) { ret };
}

static inline pudval_t pud_val(pud_t pud)
{
	pudval_t ret;

	if (sizeof(pudval_t) > sizeof(long))
612 613
		ret =  PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val,
				    pud.pud, (u64)pud.pud >> 32);
614
	else
615 616
		ret =  PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val,
				    pud.pud);
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644

	return ret;
}

static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
	pgdval_t val = native_pgd_val(pgd);

	if (sizeof(pgdval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp,
			    val, (u64)val >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp,
			    val);
}

static inline void pgd_clear(pgd_t *pgdp)
{
	set_pgd(pgdp, __pgd(0));
}

static inline void pud_clear(pud_t *pudp)
{
	set_pud(pudp, __pud(0));
}

#endif	/* PAGETABLE_LEVELS == 4 */

645 646
#endif	/* PAGETABLE_LEVELS >= 3 */

647 648 649 650 651 652 653 654 655 656 657 658 659 660
#ifdef CONFIG_X86_PAE
/* Special-case pte-setting operations for PAE, which can't update a
   64-bit pte atomically */
static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
	PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
		    pte.pte, pte.pte >> 32);
}

static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
			     pte_t *ptep)
{
	PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
}
661 662 663 664 665

static inline void pmd_clear(pmd_t *pmdp)
{
	PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
}
666 667 668 669 670 671 672 673 674 675 676
#else  /* !CONFIG_X86_PAE */
static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
	set_pte(ptep, pte);
}

static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
			     pte_t *ptep)
{
	set_pte_at(mm, addr, ptep, __pte(0));
}
677 678 679 680 681

static inline void pmd_clear(pmd_t *pmdp)
{
	set_pmd(pmdp, __pmd(0));
}
682 683
#endif	/* CONFIG_X86_PAE */

684
#define  __HAVE_ARCH_START_CONTEXT_SWITCH
685
static inline void arch_start_context_switch(struct task_struct *prev)
686
{
687
	PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
688 689
}

690
static inline void arch_end_context_switch(struct task_struct *next)
691
{
692
	PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
693 694
}

695
#define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
696 697
static inline void arch_enter_lazy_mmu_mode(void)
{
698
	PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter);
699 700 701 702
}

static inline void arch_leave_lazy_mmu_mode(void)
{
703
	PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
704 705
}

706
void arch_flush_lazy_mmu_mode(void);
707

708
static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
709
				phys_addr_t phys, pgprot_t flags)
710 711 712 713
{
	pv_mmu_ops.set_fixmap(idx, phys, flags);
}

714
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
715

716
static inline int arch_spin_is_locked(struct arch_spinlock *lock)
717 718 719 720
{
	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
}

721
static inline int arch_spin_is_contended(struct arch_spinlock *lock)
722 723 724
{
	return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
}
725
#define arch_spin_is_contended	arch_spin_is_contended
726

727
static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
728
{
729
	PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
730 731
}

732
static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
733 734 735 736 737
						  unsigned long flags)
{
	PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
}

738
static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
739 740 741 742
{
	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
}

743
static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
744
{
745
	PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
746 747
}

748 749
#endif

750
#ifdef CONFIG_X86_32
751 752 753 754
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
#define PV_RESTORE_REGS "popl %edx; popl %ecx;"

/* save and restore all caller-save registers, except return value */
755 756
#define PV_SAVE_ALL_CALLER_REGS		"pushl %ecx;"
#define PV_RESTORE_ALL_CALLER_REGS	"popl  %ecx;"
757

758 759 760 761
#define PV_FLAGS_ARG "0"
#define PV_EXTRA_CLOBBERS
#define PV_VEXTRA_CLOBBERS
#else
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781
/* save and restore all caller-save registers, except return value */
#define PV_SAVE_ALL_CALLER_REGS						\
	"push %rcx;"							\
	"push %rdx;"							\
	"push %rsi;"							\
	"push %rdi;"							\
	"push %r8;"							\
	"push %r9;"							\
	"push %r10;"							\
	"push %r11;"
#define PV_RESTORE_ALL_CALLER_REGS					\
	"pop %r11;"							\
	"pop %r10;"							\
	"pop %r9;"							\
	"pop %r8;"							\
	"pop %rdi;"							\
	"pop %rsi;"							\
	"pop %rdx;"							\
	"pop %rcx;"

782 783 784 785
/* We save some registers, but all of them, that's too much. We clobber all
 * caller saved registers but the argument parameter */
#define PV_SAVE_REGS "pushq %%rdi;"
#define PV_RESTORE_REGS "popq %%rdi;"
786 787
#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
788 789 790
#define PV_FLAGS_ARG "D"
#endif

791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
/*
 * Generate a thunk around a function which saves all caller-save
 * registers except for the return value.  This allows C functions to
 * be called from assembler code where fewer than normal registers are
 * available.  It may also help code generation around calls from C
 * code if the common case doesn't use many registers.
 *
 * When a callee is wrapped in a thunk, the caller can assume that all
 * arg regs and all scratch registers are preserved across the
 * call. The return value in rax/eax will not be saved, even for void
 * functions.
 */
#define PV_CALLEE_SAVE_REGS_THUNK(func)					\
	extern typeof(func) __raw_callee_save_##func;			\
	static void *__##func##__ __used = func;			\
									\
	asm(".pushsection .text;"					\
	    "__raw_callee_save_" #func ": "				\
	    PV_SAVE_ALL_CALLER_REGS					\
	    "call " #func ";"						\
	    PV_RESTORE_ALL_CALLER_REGS					\
	    "ret;"							\
	    ".popsection")

/* Get a reference to a callee-save function */
#define PV_CALLEE_SAVE(func)						\
	((struct paravirt_callee_save) { __raw_callee_save_##func })

/* Promise that "func" already uses the right calling convention */
#define __PV_IS_CALLEE_SAVE(func)			\
	((struct paravirt_callee_save) { func })

823
static inline notrace unsigned long arch_local_save_flags(void)
824
{
825
	return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
826 827
}

828
static inline notrace void arch_local_irq_restore(unsigned long f)
829
{
830
	PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
831 832
}

833
static inline notrace void arch_local_irq_disable(void)
834
{
835
	PVOP_VCALLEE0(pv_irq_ops.irq_disable);
836 837
}

838
static inline notrace void arch_local_irq_enable(void)
839
{
840
	PVOP_VCALLEE0(pv_irq_ops.irq_enable);
841 842
}

843
static inline notrace unsigned long arch_local_irq_save(void)
844 845 846
{
	unsigned long f;

D
David Howells 已提交
847 848
	f = arch_local_save_flags();
	arch_local_irq_disable();
849 850 851
	return f;
}

852

853
/* Make sure as little as possible of this mess escapes. */
854
#undef PARAVIRT_CALL
855 856
#undef __PVOP_CALL
#undef __PVOP_VCALL
857 858 859 860 861 862 863 864 865 866
#undef PVOP_VCALL0
#undef PVOP_CALL0
#undef PVOP_VCALL1
#undef PVOP_CALL1
#undef PVOP_VCALL2
#undef PVOP_CALL2
#undef PVOP_VCALL3
#undef PVOP_CALL3
#undef PVOP_VCALL4
#undef PVOP_CALL4
867

868 869
extern void default_banner(void);

870 871
#else  /* __ASSEMBLY__ */

872
#define _PVSITE(ptype, clobbers, ops, word, algn)	\
873 874 875 876
771:;						\
	ops;					\
772:;						\
	.pushsection .parainstructions,"a";	\
877 878
	 .align	algn;				\
	 word 771b;				\
879 880 881 882 883
	 .byte ptype;				\
	 .byte 772b-771b;			\
	 .short clobbers;			\
	.popsection

884

885
#define COND_PUSH(set, mask, reg)			\
886
	.if ((~(set)) & mask); push %reg; .endif
887
#define COND_POP(set, mask, reg)			\
888
	.if ((~(set)) & mask); pop %reg; .endif
889

890
#ifdef CONFIG_X86_64
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912

#define PV_SAVE_REGS(set)			\
	COND_PUSH(set, CLBR_RAX, rax);		\
	COND_PUSH(set, CLBR_RCX, rcx);		\
	COND_PUSH(set, CLBR_RDX, rdx);		\
	COND_PUSH(set, CLBR_RSI, rsi);		\
	COND_PUSH(set, CLBR_RDI, rdi);		\
	COND_PUSH(set, CLBR_R8, r8);		\
	COND_PUSH(set, CLBR_R9, r9);		\
	COND_PUSH(set, CLBR_R10, r10);		\
	COND_PUSH(set, CLBR_R11, r11)
#define PV_RESTORE_REGS(set)			\
	COND_POP(set, CLBR_R11, r11);		\
	COND_POP(set, CLBR_R10, r10);		\
	COND_POP(set, CLBR_R9, r9);		\
	COND_POP(set, CLBR_R8, r8);		\
	COND_POP(set, CLBR_RDI, rdi);		\
	COND_POP(set, CLBR_RSI, rsi);		\
	COND_POP(set, CLBR_RDX, rdx);		\
	COND_POP(set, CLBR_RCX, rcx);		\
	COND_POP(set, CLBR_RAX, rax)

913
#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
914
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
915
#define PARA_INDIRECT(addr)	*addr(%rip)
916
#else
917 918 919 920 921 922 923 924 925 926 927
#define PV_SAVE_REGS(set)			\
	COND_PUSH(set, CLBR_EAX, eax);		\
	COND_PUSH(set, CLBR_EDI, edi);		\
	COND_PUSH(set, CLBR_ECX, ecx);		\
	COND_PUSH(set, CLBR_EDX, edx)
#define PV_RESTORE_REGS(set)			\
	COND_POP(set, CLBR_EDX, edx);		\
	COND_POP(set, CLBR_ECX, ecx);		\
	COND_POP(set, CLBR_EDI, edi);		\
	COND_POP(set, CLBR_EAX, eax)

928
#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
929
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
930
#define PARA_INDIRECT(addr)	*%cs:addr
931 932
#endif

933 934
#define INTERRUPT_RETURN						\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
935
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
936 937

#define DISABLE_INTERRUPTS(clobbers)					\
938
	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
939
		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
940
		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
941
		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
942 943

#define ENABLE_INTERRUPTS(clobbers)					\
944
	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
945
		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
946
		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
947
		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
948

949 950
#define USERGS_SYSRET32							\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),	\
951
		  CLBR_NONE,						\
952
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
953

954
#ifdef CONFIG_X86_32
955 956 957
#define GET_CR0_INTO_EAX				\
	push %ecx; push %edx;				\
	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);	\
958
	pop %edx; pop %ecx
959 960 961 962 963 964 965 966

#define ENABLE_INTERRUPTS_SYSEXIT					\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
		  CLBR_NONE,						\
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))


#else	/* !CONFIG_X86_32 */
967 968 969 970 971 972 973 974 975 976

/*
 * If swapgs is used while the userspace stack is still current,
 * there's no way to call a pvop.  The PV replacement *must* be
 * inlined, or the swapgs instruction must be trapped and emulated.
 */
#define SWAPGS_UNSAFE_STACK						\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
		  swapgs)

977 978 979 980 981 982
/*
 * Note: swapgs is very special, and in practise is either going to be
 * implemented with a single "swapgs" instruction or something very
 * special.  Either way, we don't need to save any registers for
 * it.
 */
983 984
#define SWAPGS								\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
985
		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)		\
986 987
		 )

988 989
#define GET_CR2_INTO_RAX				\
	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
990

991 992 993 994 995
#define PARAVIRT_ADJUST_EXCEPTION_FRAME					\
	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
		  CLBR_NONE,						\
		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))

996 997
#define USERGS_SYSRET64							\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
998
		  CLBR_NONE,						\
999 1000 1001 1002 1003 1004 1005
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))

#define ENABLE_INTERRUPTS_SYSEXIT32					\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
		  CLBR_NONE,						\
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
#endif	/* CONFIG_X86_32 */
1006

1007
#endif /* __ASSEMBLY__ */
1008 1009 1010
#else  /* CONFIG_PARAVIRT */
# define default_banner x86_init_noop
#endif /* !CONFIG_PARAVIRT */
H
H. Peter Anvin 已提交
1011
#endif /* _ASM_X86_PARAVIRT_H */