paravirt.h 25.3 KB
Newer Older
H
H. Peter Anvin 已提交
1 2
#ifndef _ASM_X86_PARAVIRT_H
#define _ASM_X86_PARAVIRT_H
3 4
/* Various instructions on x86 need to be replaced for
 * para-virtualization: those hooks are defined here. */
5 6

#ifdef CONFIG_PARAVIRT
7
#include <asm/pgtable_types.h>
8
#include <asm/asm.h>
9

10
#include <asm/paravirt_types.h>
11

12
#ifndef __ASSEMBLY__
13
#include <linux/bug.h>
14
#include <linux/types.h>
15
#include <linux/cpumask.h>
16

17 18
static inline int paravirt_enabled(void)
{
19
	return pv_info.paravirt_enabled;
20
}
21

22
static inline void load_sp0(struct tss_struct *tss,
23 24
			     struct thread_struct *thread)
{
25
	PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
26 27 28 29 30 31
}

/* The paravirtualized CPUID instruction. */
static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
			   unsigned int *ecx, unsigned int *edx)
{
32
	PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx);
33 34 35 36 37
}

/*
 * These special macros can be used to get or set a debugging register
 */
38 39
static inline unsigned long paravirt_get_debugreg(int reg)
{
40
	return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg);
41 42 43 44
}
#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
static inline void set_debugreg(unsigned long val, int reg)
{
45
	PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
46
}
47

48 49
static inline void clts(void)
{
50
	PVOP_VCALL0(pv_cpu_ops.clts);
51
}
52

53 54
static inline unsigned long read_cr0(void)
{
55
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
56
}
57

58 59
static inline void write_cr0(unsigned long x)
{
60
	PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
61 62 63 64
}

static inline unsigned long read_cr2(void)
{
65
	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
66 67 68 69
}

static inline void write_cr2(unsigned long x)
{
70
	PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
71 72 73 74
}

static inline unsigned long read_cr3(void)
{
75
	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
76
}
77

78 79
static inline void write_cr3(unsigned long x)
{
80
	PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
81
}
82

83 84
static inline unsigned long read_cr4(void)
{
85
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
86 87 88
}
static inline unsigned long read_cr4_safe(void)
{
89
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
90
}
91

92 93
static inline void write_cr4(unsigned long x)
{
94
	PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
95
}
96

97
#ifdef CONFIG_X86_64
98 99 100 101 102 103 104 105 106
static inline unsigned long read_cr8(void)
{
	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
}

static inline void write_cr8(unsigned long x)
{
	PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
}
107
#endif
108

D
David Howells 已提交
109
static inline void arch_safe_halt(void)
110
{
111
	PVOP_VCALL0(pv_irq_ops.safe_halt);
112 113 114 115
}

static inline void halt(void)
{
116
	PVOP_VCALL0(pv_irq_ops.halt);
117 118 119 120
}

static inline void wbinvd(void)
{
121
	PVOP_VCALL0(pv_cpu_ops.wbinvd);
122 123
}

124
#define get_kernel_rpl()  (pv_info.kernel_rpl)
125

126 127
static inline u64 paravirt_read_msr(unsigned msr, int *err)
{
128
	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
129
}
130 131 132 133 134 135

static inline int paravirt_rdmsr_regs(u32 *regs)
{
	return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs);
}

136 137
static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
{
138
	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
139 140
}

141 142 143 144 145
static inline int paravirt_wrmsr_regs(u32 *regs)
{
	return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs);
}

146
/* These should all do BUG_ON(_err), but our headers are too tangled. */
147 148
#define rdmsr(msr, val1, val2)			\
do {						\
149 150 151 152
	int _err;				\
	u64 _l = paravirt_read_msr(msr, &_err);	\
	val1 = (u32)_l;				\
	val2 = _l >> 32;			\
153
} while (0)
154

155 156
#define wrmsr(msr, val1, val2)			\
do {						\
157
	paravirt_write_msr(msr, val1, val2);	\
158
} while (0)
159

160 161
#define rdmsrl(msr, val)			\
do {						\
162 163
	int _err;				\
	val = paravirt_read_msr(msr, &_err);	\
164
} while (0)
165

166 167
#define wrmsrl(msr, val)	wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32)
#define wrmsr_safe(msr, a, b)	paravirt_write_msr(msr, a, b)
168 169

/* rdmsr with exception handling */
170 171
#define rdmsr_safe(msr, a, b)			\
({						\
172 173 174 175
	int _err;				\
	u64 _l = paravirt_read_msr(msr, &_err);	\
	(*a) = (u32)_l;				\
	(*b) = _l >> 32;			\
176 177
	_err;					\
})
178

179 180 181
#define rdmsr_safe_regs(regs)	paravirt_rdmsr_regs(regs)
#define wrmsr_safe_regs(regs)	paravirt_wrmsr_regs(regs)

A
Andi Kleen 已提交
182 183 184 185 186 187 188
static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
{
	int err;

	*p = paravirt_read_msr(msr, &err);
	return err;
}
Y
Yinghai Lu 已提交
189 190
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
{
191
	u32 gprs[8] = { 0 };
Y
Yinghai Lu 已提交
192 193
	int err;

194 195 196 197 198 199 200
	gprs[1] = msr;
	gprs[7] = 0x9c5a203a;

	err = paravirt_rdmsr_regs(gprs);

	*p = gprs[0] | ((u64)gprs[2] << 32);

Y
Yinghai Lu 已提交
201 202
	return err;
}
203

204 205 206 207 208 209 210 211 212 213 214 215
static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
{
	u32 gprs[8] = { 0 };

	gprs[0] = (u32)val;
	gprs[1] = msr;
	gprs[2] = val >> 32;
	gprs[7] = 0x9c5a203a;

	return paravirt_wrmsr_regs(gprs);
}

216 217
static inline u64 paravirt_read_tsc(void)
{
218
	return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
219
}
220

221 222
#define rdtscl(low)				\
do {						\
223 224
	u64 _l = paravirt_read_tsc();		\
	low = (int)_l;				\
225
} while (0)
226

227
#define rdtscll(val) (val = paravirt_read_tsc())
228

229 230
static inline unsigned long long paravirt_sched_clock(void)
{
231
	return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
232
}
233

234 235 236
struct static_key;
extern struct static_key paravirt_steal_enabled;
extern struct static_key paravirt_steal_rq_enabled;
237 238 239 240 241 242

static inline u64 paravirt_steal_clock(int cpu)
{
	return PVOP_CALL1(u64, pv_time_ops.steal_clock, cpu);
}

243 244
static inline unsigned long long paravirt_read_pmc(int counter)
{
245
	return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
246
}
247

248 249
#define rdpmc(counter, low, high)		\
do {						\
250 251 252
	u64 _l = paravirt_read_pmc(counter);	\
	low = (u32)_l;				\
	high = _l >> 32;			\
253
} while (0)
254

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
{
	return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
}

#define rdtscp(low, high, aux)				\
do {							\
	int __aux;					\
	unsigned long __val = paravirt_rdtscp(&__aux);	\
	(low) = (u32)__val;				\
	(high) = (u32)(__val >> 32);			\
	(aux) = __aux;					\
} while (0)

#define rdtscpll(val, aux)				\
do {							\
	unsigned long __aux; 				\
	val = paravirt_rdtscp(&__aux);			\
	(aux) = __aux;					\
} while (0)

276 277 278 279 280 281 282 283 284 285
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
	PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
}

static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
	PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
}

286 287
static inline void load_TR_desc(void)
{
288
	PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
289
}
290
static inline void load_gdt(const struct desc_ptr *dtr)
291
{
292
	PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
293
}
294
static inline void load_idt(const struct desc_ptr *dtr)
295
{
296
	PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
297 298 299
}
static inline void set_ldt(const void *addr, unsigned entries)
{
300
	PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
301
}
302
static inline void store_gdt(struct desc_ptr *dtr)
303
{
304
	PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
305
}
306
static inline void store_idt(struct desc_ptr *dtr)
307
{
308
	PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
309 310 311
}
static inline unsigned long paravirt_store_tr(void)
{
312
	return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
313 314 315 316
}
#define store_tr(tr)	((tr) = paravirt_store_tr())
static inline void load_TLS(struct thread_struct *t, unsigned cpu)
{
317
	PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
318
}
319

320 321 322 323 324 325 326
#ifdef CONFIG_X86_64
static inline void load_gs_index(unsigned int gs)
{
	PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
}
#endif

327 328
static inline void write_ldt_entry(struct desc_struct *dt, int entry,
				   const void *desc)
329
{
330
	PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc);
331
}
332 333 334

static inline void write_gdt_entry(struct desc_struct *dt, int entry,
				   void *desc, int type)
335
{
336
	PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type);
337
}
338

339
static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
340
{
341
	PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
342 343 344
}
static inline void set_iopl_mask(unsigned mask)
{
345
	PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
346
}
347

348
/* The paravirtualized I/O functions */
349 350
static inline void slow_down_io(void)
{
351
	pv_cpu_ops.io_delay();
352
#ifdef REALLY_SLOW_IO
353 354 355
	pv_cpu_ops.io_delay();
	pv_cpu_ops.io_delay();
	pv_cpu_ops.io_delay();
356 357 358
#endif
}

359 360 361 362
#ifdef CONFIG_SMP
static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
				    unsigned long start_esp)
{
363 364
	PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
		    phys_apicid, start_eip, start_esp);
365 366
}
#endif
367

368 369 370
static inline void paravirt_activate_mm(struct mm_struct *prev,
					struct mm_struct *next)
{
371
	PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
372 373 374 375 376
}

static inline void arch_dup_mmap(struct mm_struct *oldmm,
				 struct mm_struct *mm)
{
377
	PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
378 379 380 381
}

static inline void arch_exit_mmap(struct mm_struct *mm)
{
382
	PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
383 384
}

385 386
static inline void __flush_tlb(void)
{
387
	PVOP_VCALL0(pv_mmu_ops.flush_tlb_user);
388 389 390
}
static inline void __flush_tlb_global(void)
{
391
	PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
392 393 394
}
static inline void __flush_tlb_single(unsigned long addr)
{
395
	PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
396
}
397

398 399
static inline void flush_tlb_others(const struct cpumask *cpumask,
				    struct mm_struct *mm,
400 401
				    unsigned long va)
{
402
	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
403 404
}

405 406 407 408 409 410 411 412 413 414
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
{
	return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
}

static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
	PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
}

415
static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)
416
{
417
	PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
418
}
419
static inline void paravirt_release_pte(unsigned long pfn)
420
{
421
	PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
422
}
423

424
static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
425
{
426
	PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
427
}
428

429
static inline void paravirt_release_pmd(unsigned long pfn)
430
{
431
	PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
432 433
}

434
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)
435 436 437
{
	PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
}
438
static inline void paravirt_release_pud(unsigned long pfn)
439 440 441 442
{
	PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
}

443 444
static inline void pte_update(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep)
445
{
446
	PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
447
}
A
Andrea Arcangeli 已提交
448 449 450 451 452
static inline void pmd_update(struct mm_struct *mm, unsigned long addr,
			      pmd_t *pmdp)
{
	PVOP_VCALL3(pv_mmu_ops.pmd_update, mm, addr, pmdp);
}
453

454 455
static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
				    pte_t *ptep)
456
{
457
	PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
458 459
}

A
Andrea Arcangeli 已提交
460 461 462 463 464 465
static inline void pmd_update_defer(struct mm_struct *mm, unsigned long addr,
				    pmd_t *pmdp)
{
	PVOP_VCALL3(pv_mmu_ops.pmd_update_defer, mm, addr, pmdp);
}

466
static inline pte_t __pte(pteval_t val)
467
{
468 469 470
	pteval_t ret;

	if (sizeof(pteval_t) > sizeof(long))
471 472 473
		ret = PVOP_CALLEE2(pteval_t,
				   pv_mmu_ops.make_pte,
				   val, (u64)val >> 32);
474
	else
475 476 477
		ret = PVOP_CALLEE1(pteval_t,
				   pv_mmu_ops.make_pte,
				   val);
478

479
	return (pte_t) { .pte = ret };
480 481
}

482 483 484 485 486
static inline pteval_t pte_val(pte_t pte)
{
	pteval_t ret;

	if (sizeof(pteval_t) > sizeof(long))
487 488
		ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val,
				   pte.pte, (u64)pte.pte >> 32);
489
	else
490 491
		ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val,
				   pte.pte);
492 493 494 495

	return ret;
}

496
static inline pgd_t __pgd(pgdval_t val)
497
{
498 499 500
	pgdval_t ret;

	if (sizeof(pgdval_t) > sizeof(long))
501 502
		ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd,
				   val, (u64)val >> 32);
503
	else
504 505
		ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd,
				   val);
506 507 508 509 510 511 512 513 514

	return (pgd_t) { ret };
}

static inline pgdval_t pgd_val(pgd_t pgd)
{
	pgdval_t ret;

	if (sizeof(pgdval_t) > sizeof(long))
515 516
		ret =  PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val,
				    pgd.pgd, (u64)pgd.pgd >> 32);
517
	else
518 519
		ret =  PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val,
				    pgd.pgd);
520 521

	return ret;
522 523
}

524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
#define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
					   pte_t *ptep)
{
	pteval_t ret;

	ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
			 mm, addr, ptep);

	return (pte_t) { .pte = ret };
}

static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
					   pte_t *ptep, pte_t pte)
{
	if (sizeof(pteval_t) > sizeof(long))
		/* 5 arg words */
		pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
	else
		PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit,
			    mm, addr, ptep, pte.pte);
}

547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
static inline void set_pte(pte_t *ptep, pte_t pte)
{
	if (sizeof(pteval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pte, ptep,
			    pte.pte, (u64)pte.pte >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pte, ptep,
			    pte.pte);
}

static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep, pte_t pte)
{
	if (sizeof(pteval_t) > sizeof(long))
		/* 5 arg words */
		pv_mmu_ops.set_pte_at(mm, addr, ptep, pte);
	else
		PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
}

A
Andrea Arcangeli 已提交
567 568 569 570 571 572 573 574
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
			      pmd_t *pmdp, pmd_t pmd)
{
	if (sizeof(pmdval_t) > sizeof(long))
		/* 5 arg words */
		pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
	else
575 576
		PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
			    native_pmd_val(pmd));
A
Andrea Arcangeli 已提交
577 578 579
}
#endif

580 581 582 583 584 585 586 587 588 589
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
	pmdval_t val = native_pmd_val(pmd);

	if (sizeof(pmdval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
}

590 591 592 593 594 595
#if PAGETABLE_LEVELS >= 3
static inline pmd_t __pmd(pmdval_t val)
{
	pmdval_t ret;

	if (sizeof(pmdval_t) > sizeof(long))
596 597
		ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd,
				   val, (u64)val >> 32);
598
	else
599 600
		ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd,
				   val);
601 602 603 604 605 606 607 608 609

	return (pmd_t) { ret };
}

static inline pmdval_t pmd_val(pmd_t pmd)
{
	pmdval_t ret;

	if (sizeof(pmdval_t) > sizeof(long))
610 611
		ret =  PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val,
				    pmd.pmd, (u64)pmd.pmd >> 32);
612
	else
613 614
		ret =  PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val,
				    pmd.pmd);
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629

	return ret;
}

static inline void set_pud(pud_t *pudp, pud_t pud)
{
	pudval_t val = native_pud_val(pud);

	if (sizeof(pudval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
			    val, (u64)val >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
			    val);
}
630 631 632 633 634 635
#if PAGETABLE_LEVELS == 4
static inline pud_t __pud(pudval_t val)
{
	pudval_t ret;

	if (sizeof(pudval_t) > sizeof(long))
636 637
		ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud,
				   val, (u64)val >> 32);
638
	else
639 640
		ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud,
				   val);
641 642 643 644 645 646 647 648 649

	return (pud_t) { ret };
}

static inline pudval_t pud_val(pud_t pud)
{
	pudval_t ret;

	if (sizeof(pudval_t) > sizeof(long))
650 651
		ret =  PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val,
				    pud.pud, (u64)pud.pud >> 32);
652
	else
653 654
		ret =  PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val,
				    pud.pud);
655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682

	return ret;
}

static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
	pgdval_t val = native_pgd_val(pgd);

	if (sizeof(pgdval_t) > sizeof(long))
		PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp,
			    val, (u64)val >> 32);
	else
		PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp,
			    val);
}

static inline void pgd_clear(pgd_t *pgdp)
{
	set_pgd(pgdp, __pgd(0));
}

static inline void pud_clear(pud_t *pudp)
{
	set_pud(pudp, __pud(0));
}

#endif	/* PAGETABLE_LEVELS == 4 */

683 684
#endif	/* PAGETABLE_LEVELS >= 3 */

685 686 687 688 689 690 691 692 693 694 695 696 697 698
#ifdef CONFIG_X86_PAE
/* Special-case pte-setting operations for PAE, which can't update a
   64-bit pte atomically */
static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
	PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
		    pte.pte, pte.pte >> 32);
}

static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
			     pte_t *ptep)
{
	PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
}
699 700 701 702 703

static inline void pmd_clear(pmd_t *pmdp)
{
	PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
}
704 705 706 707 708 709 710 711 712 713 714
#else  /* !CONFIG_X86_PAE */
static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
	set_pte(ptep, pte);
}

static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
			     pte_t *ptep)
{
	set_pte_at(mm, addr, ptep, __pte(0));
}
715 716 717 718 719

static inline void pmd_clear(pmd_t *pmdp)
{
	set_pmd(pmdp, __pmd(0));
}
720 721
#endif	/* CONFIG_X86_PAE */

722
#define  __HAVE_ARCH_START_CONTEXT_SWITCH
723
static inline void arch_start_context_switch(struct task_struct *prev)
724
{
725
	PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
726 727
}

728
static inline void arch_end_context_switch(struct task_struct *next)
729
{
730
	PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
731 732
}

733
#define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
734 735
static inline void arch_enter_lazy_mmu_mode(void)
{
736
	PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter);
737 738 739 740
}

static inline void arch_leave_lazy_mmu_mode(void)
{
741
	PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
742 743
}

744
void arch_flush_lazy_mmu_mode(void);
745

746
static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
747
				phys_addr_t phys, pgprot_t flags)
748 749 750 751
{
	pv_mmu_ops.set_fixmap(idx, phys, flags);
}

752
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
753

754
static inline int arch_spin_is_locked(struct arch_spinlock *lock)
755 756 757 758
{
	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
}

759
static inline int arch_spin_is_contended(struct arch_spinlock *lock)
760 761 762
{
	return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
}
763
#define arch_spin_is_contended	arch_spin_is_contended
764

765
static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
766
{
767
	PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
768 769
}

770
static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
771 772 773 774 775
						  unsigned long flags)
{
	PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
}

776
static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
777 778 779 780
{
	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
}

781
static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
782
{
783
	PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
784 785
}

786 787
#endif

788
#ifdef CONFIG_X86_32
789 790 791 792
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
#define PV_RESTORE_REGS "popl %edx; popl %ecx;"

/* save and restore all caller-save registers, except return value */
793 794
#define PV_SAVE_ALL_CALLER_REGS		"pushl %ecx;"
#define PV_RESTORE_ALL_CALLER_REGS	"popl  %ecx;"
795

796 797 798 799
#define PV_FLAGS_ARG "0"
#define PV_EXTRA_CLOBBERS
#define PV_VEXTRA_CLOBBERS
#else
800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
/* save and restore all caller-save registers, except return value */
#define PV_SAVE_ALL_CALLER_REGS						\
	"push %rcx;"							\
	"push %rdx;"							\
	"push %rsi;"							\
	"push %rdi;"							\
	"push %r8;"							\
	"push %r9;"							\
	"push %r10;"							\
	"push %r11;"
#define PV_RESTORE_ALL_CALLER_REGS					\
	"pop %r11;"							\
	"pop %r10;"							\
	"pop %r9;"							\
	"pop %r8;"							\
	"pop %rdi;"							\
	"pop %rsi;"							\
	"pop %rdx;"							\
	"pop %rcx;"

820 821 822 823
/* We save some registers, but all of them, that's too much. We clobber all
 * caller saved registers but the argument parameter */
#define PV_SAVE_REGS "pushq %%rdi;"
#define PV_RESTORE_REGS "popq %%rdi;"
824 825
#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
826 827 828
#define PV_FLAGS_ARG "D"
#endif

829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860
/*
 * Generate a thunk around a function which saves all caller-save
 * registers except for the return value.  This allows C functions to
 * be called from assembler code where fewer than normal registers are
 * available.  It may also help code generation around calls from C
 * code if the common case doesn't use many registers.
 *
 * When a callee is wrapped in a thunk, the caller can assume that all
 * arg regs and all scratch registers are preserved across the
 * call. The return value in rax/eax will not be saved, even for void
 * functions.
 */
#define PV_CALLEE_SAVE_REGS_THUNK(func)					\
	extern typeof(func) __raw_callee_save_##func;			\
	static void *__##func##__ __used = func;			\
									\
	asm(".pushsection .text;"					\
	    "__raw_callee_save_" #func ": "				\
	    PV_SAVE_ALL_CALLER_REGS					\
	    "call " #func ";"						\
	    PV_RESTORE_ALL_CALLER_REGS					\
	    "ret;"							\
	    ".popsection")

/* Get a reference to a callee-save function */
#define PV_CALLEE_SAVE(func)						\
	((struct paravirt_callee_save) { __raw_callee_save_##func })

/* Promise that "func" already uses the right calling convention */
#define __PV_IS_CALLEE_SAVE(func)			\
	((struct paravirt_callee_save) { func })

861
static inline notrace unsigned long arch_local_save_flags(void)
862
{
863
	return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
864 865
}

866
static inline notrace void arch_local_irq_restore(unsigned long f)
867
{
868
	PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
869 870
}

871
static inline notrace void arch_local_irq_disable(void)
872
{
873
	PVOP_VCALLEE0(pv_irq_ops.irq_disable);
874 875
}

876
static inline notrace void arch_local_irq_enable(void)
877
{
878
	PVOP_VCALLEE0(pv_irq_ops.irq_enable);
879 880
}

881
static inline notrace unsigned long arch_local_irq_save(void)
882 883 884
{
	unsigned long f;

D
David Howells 已提交
885 886
	f = arch_local_save_flags();
	arch_local_irq_disable();
887 888 889
	return f;
}

890

891
/* Make sure as little as possible of this mess escapes. */
892
#undef PARAVIRT_CALL
893 894
#undef __PVOP_CALL
#undef __PVOP_VCALL
895 896 897 898 899 900 901 902 903 904
#undef PVOP_VCALL0
#undef PVOP_CALL0
#undef PVOP_VCALL1
#undef PVOP_CALL1
#undef PVOP_VCALL2
#undef PVOP_CALL2
#undef PVOP_VCALL3
#undef PVOP_CALL3
#undef PVOP_VCALL4
#undef PVOP_CALL4
905

906 907
extern void default_banner(void);

908 909
#else  /* __ASSEMBLY__ */

910
#define _PVSITE(ptype, clobbers, ops, word, algn)	\
911 912 913 914
771:;						\
	ops;					\
772:;						\
	.pushsection .parainstructions,"a";	\
915 916
	 .align	algn;				\
	 word 771b;				\
917 918 919 920 921
	 .byte ptype;				\
	 .byte 772b-771b;			\
	 .short clobbers;			\
	.popsection

922

923
#define COND_PUSH(set, mask, reg)			\
924
	.if ((~(set)) & mask); push %reg; .endif
925
#define COND_POP(set, mask, reg)			\
926
	.if ((~(set)) & mask); pop %reg; .endif
927

928
#ifdef CONFIG_X86_64
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950

#define PV_SAVE_REGS(set)			\
	COND_PUSH(set, CLBR_RAX, rax);		\
	COND_PUSH(set, CLBR_RCX, rcx);		\
	COND_PUSH(set, CLBR_RDX, rdx);		\
	COND_PUSH(set, CLBR_RSI, rsi);		\
	COND_PUSH(set, CLBR_RDI, rdi);		\
	COND_PUSH(set, CLBR_R8, r8);		\
	COND_PUSH(set, CLBR_R9, r9);		\
	COND_PUSH(set, CLBR_R10, r10);		\
	COND_PUSH(set, CLBR_R11, r11)
#define PV_RESTORE_REGS(set)			\
	COND_POP(set, CLBR_R11, r11);		\
	COND_POP(set, CLBR_R10, r10);		\
	COND_POP(set, CLBR_R9, r9);		\
	COND_POP(set, CLBR_R8, r8);		\
	COND_POP(set, CLBR_RDI, rdi);		\
	COND_POP(set, CLBR_RSI, rsi);		\
	COND_POP(set, CLBR_RDX, rdx);		\
	COND_POP(set, CLBR_RCX, rcx);		\
	COND_POP(set, CLBR_RAX, rax)

951
#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
952
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
953
#define PARA_INDIRECT(addr)	*addr(%rip)
954
#else
955 956 957 958 959 960 961 962 963 964 965
#define PV_SAVE_REGS(set)			\
	COND_PUSH(set, CLBR_EAX, eax);		\
	COND_PUSH(set, CLBR_EDI, edi);		\
	COND_PUSH(set, CLBR_ECX, ecx);		\
	COND_PUSH(set, CLBR_EDX, edx)
#define PV_RESTORE_REGS(set)			\
	COND_POP(set, CLBR_EDX, edx);		\
	COND_POP(set, CLBR_ECX, ecx);		\
	COND_POP(set, CLBR_EDI, edi);		\
	COND_POP(set, CLBR_EAX, eax)

966
#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
967
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
968
#define PARA_INDIRECT(addr)	*%cs:addr
969 970
#endif

971 972
#define INTERRUPT_RETURN						\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
973
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
974 975

#define DISABLE_INTERRUPTS(clobbers)					\
976
	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
977
		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
978
		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
979
		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
980 981

#define ENABLE_INTERRUPTS(clobbers)					\
982
	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
983
		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
984
		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
985
		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
986

987 988
#define USERGS_SYSRET32							\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),	\
989
		  CLBR_NONE,						\
990
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
991

992
#ifdef CONFIG_X86_32
993 994 995
#define GET_CR0_INTO_EAX				\
	push %ecx; push %edx;				\
	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);	\
996
	pop %edx; pop %ecx
997 998 999 1000 1001 1002 1003 1004

#define ENABLE_INTERRUPTS_SYSEXIT					\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
		  CLBR_NONE,						\
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))


#else	/* !CONFIG_X86_32 */
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014

/*
 * If swapgs is used while the userspace stack is still current,
 * there's no way to call a pvop.  The PV replacement *must* be
 * inlined, or the swapgs instruction must be trapped and emulated.
 */
#define SWAPGS_UNSAFE_STACK						\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
		  swapgs)

1015 1016 1017 1018 1019 1020
/*
 * Note: swapgs is very special, and in practise is either going to be
 * implemented with a single "swapgs" instruction or something very
 * special.  Either way, we don't need to save any registers for
 * it.
 */
1021 1022
#define SWAPGS								\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
1023
		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)		\
1024 1025
		 )

1026 1027
#define GET_CR2_INTO_RAX				\
	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
1028

1029 1030 1031 1032 1033
#define PARAVIRT_ADJUST_EXCEPTION_FRAME					\
	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
		  CLBR_NONE,						\
		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))

1034 1035
#define USERGS_SYSRET64							\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
1036
		  CLBR_NONE,						\
1037 1038 1039 1040 1041 1042 1043
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))

#define ENABLE_INTERRUPTS_SYSEXIT32					\
	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
		  CLBR_NONE,						\
		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
#endif	/* CONFIG_X86_32 */
1044

1045
#endif /* __ASSEMBLY__ */
1046 1047 1048
#else  /* CONFIG_PARAVIRT */
# define default_banner x86_init_noop
#endif /* !CONFIG_PARAVIRT */
H
H. Peter Anvin 已提交
1049
#endif /* _ASM_X86_PARAVIRT_H */