提交 88cbfd07 编写于 作者: L Linus Torvalds

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "The main changes in this cycle were:

   - vDSO and asm entry improvements (Andy Lutomirski)

   - Xen paravirt entry enhancements (Boris Ostrovsky)

   - asm entry labels enhancement (Borislav Petkov)

   - and other misc changes (Thomas Gleixner, me)"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/vsdo: Fix build on PARAVIRT_CLOCK=y, KVM_GUEST=n
  Revert "x86/kvm: On KVM re-enable (e.g. after suspend), update clocks"
  x86/entry/64_compat: Make labels local
  x86/platform/uv: Include clocksource.h for clocksource_touch_watchdog()
  x86/vdso: Enable vdso pvclock access on all vdso variants
  x86/vdso: Remove pvclock fixmap machinery
  x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap
  x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader
  x86/kvm: On KVM re-enable (e.g. after suspend), update clocks
  x86/entry/64: Bypass enter_from_user_mode on non-context-tracking boots
  x86/asm: Add asm macros for static keys/jump labels
  x86/asm: Error out if asm/jump_label.h is included inappropriately
  context_tracking: Switch to new static_branch API
  x86/entry, x86/paravirt: Remove the unused usergs_sysret32 PV op
  x86/paravirt: Remove the unused irq_enable_sysexit pv op
  x86/xen: Avoid fast syscall path for Xen PV guests
#include <linux/jump_label.h>
/*
x86 function call convention, 64-bit:
......@@ -232,3 +234,16 @@ For 32-bit we have the following conventions - kernel is built with
#endif /* CONFIG_X86_64 */
/*
* This does 'call enter_from_user_mode' unless we can avoid it based on
* kernel config or using the static jump infrastructure.
*/
.macro CALL_enter_from_user_mode
#ifdef CONFIG_CONTEXT_TRACKING
#ifdef HAVE_JUMP_LABEL
STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
#endif
call enter_from_user_mode
.Lafter_call_\@:
#endif
.endm
......@@ -329,7 +329,8 @@ sysenter_past_esp:
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
*/
ENABLE_INTERRUPTS_SYSEXIT
sti
sysexit
.pushsection .fixup, "ax"
2: movl $0, PT_FS(%esp)
......@@ -552,11 +553,6 @@ ENTRY(native_iret)
iret
_ASM_EXTABLE(native_iret, iret_exc)
END(native_iret)
ENTRY(native_irq_enable_sysexit)
sti
sysexit
END(native_irq_enable_sysexit)
#endif
ENTRY(overflow)
......
......@@ -520,9 +520,7 @@ END(irq_entries_start)
*/
TRACE_IRQS_OFF
#ifdef CONFIG_CONTEXT_TRACKING
call enter_from_user_mode
#endif
CALL_enter_from_user_mode
1:
/*
......@@ -1066,9 +1064,7 @@ ENTRY(error_entry)
* (which can take locks).
*/
TRACE_IRQS_OFF
#ifdef CONFIG_CONTEXT_TRACKING
call enter_from_user_mode
#endif
CALL_enter_from_user_mode
ret
.Lerror_entry_done:
......
......@@ -18,13 +18,6 @@
.section .entry.text, "ax"
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret32)
swapgs
sysretl
ENDPROC(native_usergs_sysret32)
#endif
/*
* 32-bit SYSENTER instruction entry.
*
......@@ -103,15 +96,15 @@ ENTRY(entry_SYSENTER_compat)
* This needs to happen before enabling interrupts so that
* we don't get preempted with NT set.
*
* NB.: sysenter_fix_flags is a label with the code under it moved
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched.
*/
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
jnz sysenter_fix_flags
sysenter_flags_fixed:
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
/*
* User mode is traced as though IRQs are on, and SYSENTER
......@@ -126,10 +119,10 @@ sysenter_flags_fixed:
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
jmp sysret32_from_system_call
sysenter_fix_flags:
.Lsysenter_fix_flags:
pushq $X86_EFLAGS_FIXED
popfq
jmp sysenter_flags_fixed
jmp .Lsysenter_flags_fixed
ENDPROC(entry_SYSENTER_compat)
/*
......@@ -238,7 +231,8 @@ sysret32_from_system_call:
xorq %r9, %r9
xorq %r10, %r10
movq RSP-ORIG_RAX(%rsp), %rsp
USERGS_SYSRET32
swapgs
sysretl
END(entry_SYSCALL_compat)
/*
......
......@@ -17,8 +17,10 @@
#include <asm/vvar.h>
#include <asm/unistd.h>
#include <asm/msr.h>
#include <asm/pvclock.h>
#include <linux/math64.h>
#include <linux/time.h>
#include <linux/kernel.h>
#define gtod (&VVAR(vsyscall_gtod_data))
......@@ -36,12 +38,12 @@ static notrace cycle_t vread_hpet(void)
}
#endif
#ifndef BUILD_VDSO32
#ifdef CONFIG_PARAVIRT_CLOCK
extern u8 pvclock_page
__attribute__((visibility("hidden")));
#endif
#include <linux/kernel.h>
#include <asm/vsyscall.h>
#include <asm/fixmap.h>
#include <asm/pvclock.h>
#ifndef BUILD_VDSO32
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
......@@ -60,75 +62,6 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
return ret;
}
#ifdef CONFIG_PARAVIRT_CLOCK
static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
{
const struct pvclock_vsyscall_time_info *pvti_base;
int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
pvti_base = (struct pvclock_vsyscall_time_info *)
__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
return &pvti_base[offset];
}
static notrace cycle_t vread_pvclock(int *mode)
{
const struct pvclock_vsyscall_time_info *pvti;
cycle_t ret;
u64 last;
u32 version;
u8 flags;
unsigned cpu, cpu1;
/*
* Note: hypervisor must guarantee that:
* 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
* 2. that per-CPU pvclock time info is updated if the
* underlying CPU changes.
* 3. that version is increased whenever underlying CPU
* changes.
*
*/
do {
cpu = __getcpu() & VGETCPU_CPU_MASK;
/* TODO: We can put vcpu id into higher bits of pvti.version.
* This will save a couple of cycles by getting rid of
* __getcpu() calls (Gleb).
*/
pvti = get_pvti(cpu);
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
/*
* Test we're still on the cpu as well as the version.
* We could have been migrated just after the first
* vgetcpu but before fetching the version, so we
* wouldn't notice a version change.
*/
cpu1 = __getcpu() & VGETCPU_CPU_MASK;
} while (unlikely(cpu != cpu1 ||
(pvti->pvti.version & 1) ||
pvti->pvti.version != version));
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
*mode = VCLOCK_NONE;
/* refer to tsc.c read_tsc() comment for rationale */
last = gtod->cycle_last;
if (likely(ret >= last))
return ret;
return last;
}
#endif
#else
......@@ -162,15 +95,77 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
return ret;
}
#endif
#ifdef CONFIG_PARAVIRT_CLOCK
static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
{
return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
}
static notrace cycle_t vread_pvclock(int *mode)
{
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
cycle_t ret;
u64 tsc, pvti_tsc;
u64 last, delta, pvti_system_time;
u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
/*
* Note: The kernel and hypervisor must guarantee that cpu ID
* number maps 1:1 to per-CPU pvclock time info.
*
* Because the hypervisor is entirely unaware of guest userspace
* preemption, it cannot guarantee that per-CPU pvclock time
* info is updated if the underlying CPU changes or that that
* version is increased whenever underlying CPU changes.
*
* On KVM, we are guaranteed that pvti updates for any vCPU are
* atomic as seen by *all* vCPUs. This is an even stronger
* guarantee than we get with a normal seqlock.
*
* On Xen, we don't appear to have that guarantee, but Xen still
* supplies a valid seqlock using the version field.
* We only do pvclock vdso timing at all if
* PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
* mean that all vCPUs have matching pvti and that the TSC is
* synced, so we can just look at vCPU 0's pvti.
*/
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
*mode = VCLOCK_NONE;
return 0;
}
#endif
}
do {
version = pvti->version;
smp_rmb();
tsc = rdtsc_ordered();
pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
pvti_tsc_shift = pvti->tsc_shift;
pvti_system_time = pvti->system_time;
pvti_tsc = pvti->tsc_timestamp;
/* Make sure that the version double-check is last. */
smp_rmb();
} while (unlikely((version & 1) || version != pvti->version));
delta = tsc - pvti_tsc;
ret = pvti_system_time +
pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
pvti_tsc_shift);
/* refer to vread_tsc() comment for rationale */
last = gtod->cycle_last;
if (likely(ret >= last))
return ret;
return last;
}
#endif
notrace static cycle_t vread_tsc(void)
......
......@@ -25,7 +25,7 @@ SECTIONS
* segment.
*/
vvar_start = . - 2 * PAGE_SIZE;
vvar_start = . - 3 * PAGE_SIZE;
vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
......@@ -36,6 +36,7 @@ SECTIONS
#undef EMIT_VVAR
hpet_page = vvar_start + PAGE_SIZE;
pvclock_page = vvar_start + 2 * PAGE_SIZE;
. = SIZEOF_HEADERS;
......
......@@ -73,6 +73,7 @@ enum {
sym_vvar_start,
sym_vvar_page,
sym_hpet_page,
sym_pvclock_page,
sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END,
};
......@@ -80,6 +81,7 @@ enum {
const int special_pages[] = {
sym_vvar_page,
sym_hpet_page,
sym_pvclock_page,
};
struct vdso_sym {
......@@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true},
[sym_pvclock_page] = {"pvclock_page", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false
},
......
......@@ -12,6 +12,7 @@
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/cpu.h>
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
......@@ -100,6 +101,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
.name = "[vvar]",
.pages = no_pages,
};
struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
......@@ -169,6 +171,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
}
#endif
pvti = pvclock_pvti_cpu0_va();
if (pvti && image->sym_pvclock_page) {
ret = remap_pfn_range(vma,
text_start + image->sym_pvclock_page,
__pa(pvti) >> PAGE_SHIFT,
PAGE_SIZE,
PAGE_READONLY);
if (ret)
goto up_fail;
}
up_fail:
if (ret)
current->mm->context.vdso = NULL;
......
......@@ -19,7 +19,6 @@
#include <asm/acpi.h>
#include <asm/apicdef.h>
#include <asm/page.h>
#include <asm/pvclock.h>
#ifdef CONFIG_X86_32
#include <linux/threads.h>
#include <asm/kmap_types.h>
......@@ -72,10 +71,6 @@ enum fixed_addresses {
#ifdef CONFIG_X86_VSYSCALL_EMULATION
VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
#endif
#ifdef CONFIG_PARAVIRT_CLOCK
PVCLOCK_FIXMAP_BEGIN,
PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
#endif
#endif
FIX_DBGP_BASE,
FIX_EARLYCON_MEM_BASE,
......
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
#ifndef __ASSEMBLY__
#include <linux/stringify.h>
#include <linux/types.h>
#include <asm/nops.h>
#include <asm/asm.h>
#ifndef HAVE_JUMP_LABEL
/*
* For better or for worse, if jump labels (the gcc extension) are missing,
* then the entire static branch patching infrastructure is compiled out.
* If that happens, the code in here will malfunction. Raise a compiler
* error instead.
*
* In theory, jump labels and the static branch patching infrastructure
* could be decoupled to fix this.
*/
#error asm/jump_label.h included on a non-jump-label kernel
#endif
#define JUMP_LABEL_NOP_SIZE 5
......@@ -16,6 +22,14 @@
# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
#endif
#include <asm/asm.h>
#include <asm/nops.h>
#ifndef __ASSEMBLY__
#include <linux/stringify.h>
#include <linux/types.h>
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:"
......@@ -59,5 +73,40 @@ struct jump_entry {
jump_label_t key;
};
#else /* __ASSEMBLY__ */
.macro STATIC_JUMP_IF_TRUE target, key, def
.Lstatic_jump_\@:
.if \def
/* Equivalent to "jmp.d32 \target" */
.byte 0xe9
.long \target - .Lstatic_jump_after_\@
.Lstatic_jump_after_\@:
.else
.byte STATIC_KEY_INIT_NOP
.endif
.pushsection __jump_table, "aw"
_ASM_ALIGN
_ASM_PTR .Lstatic_jump_\@, \target, \key
.popsection
.endm
.macro STATIC_JUMP_IF_FALSE target, key, def
.Lstatic_jump_\@:
.if \def
.byte STATIC_KEY_INIT_NOP
.else
/* Equivalent to "jmp.d32 \target" */
.byte 0xe9
.long \target - .Lstatic_jump_after_\@
.Lstatic_jump_after_\@:
.endif
.pushsection __jump_table, "aw"
_ASM_ALIGN
_ASM_PTR .Lstatic_jump_\@, \target, \key + 1
.popsection
.endm
#endif /* __ASSEMBLY__ */
#endif
......@@ -928,23 +928,11 @@ extern void default_banner(void);
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define USERGS_SYSRET32 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
#ifdef CONFIG_X86_32
#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx
#define ENABLE_INTERRUPTS_SYSEXIT \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
#else /* !CONFIG_X86_32 */
/*
......
......@@ -162,15 +162,6 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter);
#ifdef CONFIG_X86_32
/*
* Atomically enable interrupts and return to userspace. This
* is only used in 32-bit kernels. 64-bit kernels use
* usergs_sysret32 instead.
*/
void (*irq_enable_sysexit)(void);
#endif
/*
* Switch to usermode gs and return to 64-bit usermode using
* sysret. Only used in 64-bit kernels to return to 64-bit
......@@ -179,14 +170,6 @@ struct pv_cpu_ops {
*/
void (*usergs_sysret64)(void);
/*
* Switch to usermode gs and return to 32-bit usermode using
* sysret. Used to return to 32-on-64 compat processes.
* Other usermode register state, including %esp, must already
* be restored.
*/
void (*usergs_sysret32)(void);
/* Normal iret. Jump to this with the standard iret stack
frame set up. */
void (*iret)(void);
......
......@@ -4,6 +4,15 @@
#include <linux/clocksource.h>
#include <asm/pvclock-abi.h>
#ifdef CONFIG_KVM_GUEST
extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void);
#else
static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
{
return NULL;
}
#endif
/* some helper functions for xen and kvm pv clock sources */
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
......@@ -91,10 +100,5 @@ struct pvclock_vsyscall_time_info {
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1)
int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
int size);
struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu);
#endif /* _ASM_X86_PVCLOCK_H */
......@@ -22,6 +22,7 @@ struct vdso_image {
long sym_vvar_page;
long sym_hpet_page;
long sym_pvclock_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
......
......@@ -65,9 +65,6 @@ void common(void) {
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
#ifdef CONFIG_X86_32
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
#endif
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
#endif
......
......@@ -23,7 +23,6 @@ int main(void)
{
#ifdef CONFIG_PARAVIRT
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
BLANK();
......
......@@ -45,6 +45,11 @@ early_param("no-kvmclock", parse_no_kvmclock);
static struct pvclock_vsyscall_time_info *hv_clock;
static struct pvclock_wall_clock wall_clock;
struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
{
return hv_clock;
}
/*
* The wallclock is the time of day when we booted. Since then, some time may
* have elapsed since the hypervisor wrote the data. So we try to account for
......@@ -305,7 +310,6 @@ int __init kvm_setup_vsyscall_timeinfo(void)
{
#ifdef CONFIG_X86_64
int cpu;
int ret;
u8 flags;
struct pvclock_vcpu_time_info *vcpu_time;
unsigned int size;
......@@ -325,11 +329,6 @@ int __init kvm_setup_vsyscall_timeinfo(void)
return 1;
}
if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
put_cpu();
return ret;
}
put_cpu();
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
......
......@@ -162,10 +162,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
#ifdef CONFIG_X86_32
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
#endif
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
......@@ -220,8 +216,6 @@ static u64 native_steal_clock(int cpu)
/* These are in entry.S */
extern void native_iret(void);
extern void native_irq_enable_sysexit(void);
extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
static struct resource reserve_ioports = {
......@@ -379,13 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.load_sp0 = native_load_sp0,
#if defined(CONFIG_X86_32)
.irq_enable_sysexit = native_irq_enable_sysexit,
#endif
#ifdef CONFIG_X86_64
#ifdef CONFIG_IA32_EMULATION
.usergs_sysret32 = native_usergs_sysret32,
#endif
.usergs_sysret64 = native_usergs_sysret64,
#endif
.iret = native_iret,
......
......@@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
......@@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
......
......@@ -13,9 +13,7 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit");
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax");
......@@ -55,7 +53,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
PATCH_SITE(pv_cpu_ops, swapgs);
PATCH_SITE(pv_mmu_ops, read_cr2);
......
......@@ -140,27 +140,3 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
#ifdef CONFIG_X86_64
/*
* Initialize the generic pvclock vsyscall state. This will allocate
* a/some page(s) for the per-vcpu pvclock information, set up a
* fixmap mapping for the page(s)
*/
int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
int size)
{
int idx;
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
__pa(i) + (idx*PAGE_SIZE),
PAGE_KERNEL_VVAR);
}
return 0;
}
#endif
......@@ -28,6 +28,7 @@
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/clocksource.h>
#include <asm/apic.h>
#include <asm/current.h>
......
......@@ -1229,10 +1229,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.iret = xen_iret,
#ifdef CONFIG_X86_64
.usergs_sysret32 = xen_sysret32,
.usergs_sysret64 = xen_sysret64,
#else
.irq_enable_sysexit = xen_sysexit,
#endif
.load_tr_desc = paravirt_nop,
......
......@@ -34,20 +34,6 @@ check_events:
pop %eax
ret
/*
* We can't use sysexit directly, because we're not running in ring0.
* But we can easily fake it up using iret. Assuming xen_sysexit is
* jumped to with a standard stack frame, we can just strip it back to
* a standard iret frame and use iret.
*/
ENTRY(xen_sysexit)
movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
lea PT_EIP(%esp), %esp
jmp xen_iret
ENDPROC(xen_sysexit)
/*
* This is run where a normal iret would be run, with the same stack setup:
* 8: eflags
......
......@@ -68,25 +68,6 @@ ENTRY(xen_sysret64)
ENDPATCH(xen_sysret64)
RELOC(xen_sysret64, 1b+1)
ENTRY(xen_sysret32)
/*
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER32_DS
pushq PER_CPU_VAR(rsp_scratch)
pushq %r11
pushq $__USER32_CS
pushq %rcx
pushq $0
1: jmp hypercall_iret
ENDPATCH(xen_sysret32)
RELOC(xen_sysret32, 1b+1)
/*
* Xen handles syscall callbacks much like ordinary exceptions, which
* means we have:
......
......@@ -139,9 +139,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
#ifdef CONFIG_X86_32
__visible void xen_sysexit(void);
#endif
__visible void xen_sysret32(void);
__visible void xen_sysret64(void);
__visible void xen_adjust_exception_frame(void);
......
......@@ -22,12 +22,12 @@ struct context_tracking {
};
#ifdef CONFIG_CONTEXT_TRACKING
extern struct static_key context_tracking_enabled;
extern struct static_key_false context_tracking_enabled;
DECLARE_PER_CPU(struct context_tracking, context_tracking);
static inline bool context_tracking_is_enabled(void)
{
return static_key_false(&context_tracking_enabled);
return static_branch_unlikely(&context_tracking_enabled);
}
static inline bool context_tracking_cpu_is_enabled(void)
......
......@@ -24,7 +24,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/context_tracking.h>
struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
DEFINE_STATIC_KEY_FALSE(context_tracking_enabled);
EXPORT_SYMBOL_GPL(context_tracking_enabled);
DEFINE_PER_CPU(struct context_tracking, context_tracking);
......@@ -191,7 +191,7 @@ void __init context_tracking_cpu_set(int cpu)
if (!per_cpu(context_tracking.active, cpu)) {
per_cpu(context_tracking.active, cpu) = true;
static_key_slow_inc(&context_tracking_enabled);
static_branch_inc(&context_tracking_enabled);
}
if (initialized)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册