提交 88cbfd07 编写于 作者: L Linus Torvalds

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "The main changes in this cycle were:

   - vDSO and asm entry improvements (Andy Lutomirski)

   - Xen paravirt entry enhancements (Boris Ostrovsky)

   - asm entry labels enhancement (Borislav Petkov)

   - and other misc changes (Thomas Gleixner, me)"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/vsdo: Fix build on PARAVIRT_CLOCK=y, KVM_GUEST=n
  Revert "x86/kvm: On KVM re-enable (e.g. after suspend), update clocks"
  x86/entry/64_compat: Make labels local
  x86/platform/uv: Include clocksource.h for clocksource_touch_watchdog()
  x86/vdso: Enable vdso pvclock access on all vdso variants
  x86/vdso: Remove pvclock fixmap machinery
  x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap
  x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader
  x86/kvm: On KVM re-enable (e.g. after suspend), update clocks
  x86/entry/64: Bypass enter_from_user_mode on non-context-tracking boots
  x86/asm: Add asm macros for static keys/jump labels
  x86/asm: Error out if asm/jump_label.h is included inappropriately
  context_tracking: Switch to new static_branch API
  x86/entry, x86/paravirt: Remove the unused usergs_sysret32 PV op
  x86/paravirt: Remove the unused irq_enable_sysexit pv op
  x86/xen: Avoid fast syscall path for Xen PV guests
#include <linux/jump_label.h>
/* /*
x86 function call convention, 64-bit: x86 function call convention, 64-bit:
...@@ -232,3 +234,16 @@ For 32-bit we have the following conventions - kernel is built with ...@@ -232,3 +234,16 @@ For 32-bit we have the following conventions - kernel is built with
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
/*
* This does 'call enter_from_user_mode' unless we can avoid it based on
* kernel config or using the static jump infrastructure.
*/
.macro CALL_enter_from_user_mode
#ifdef CONFIG_CONTEXT_TRACKING
#ifdef HAVE_JUMP_LABEL
STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
#endif
call enter_from_user_mode
.Lafter_call_\@:
#endif
.endm
...@@ -329,7 +329,8 @@ sysenter_past_esp: ...@@ -329,7 +329,8 @@ sysenter_past_esp:
* Return back to the vDSO, which will pop ecx and edx. * Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS). * Don't bother with DS and ES (they already contain __USER_DS).
*/ */
ENABLE_INTERRUPTS_SYSEXIT sti
sysexit
.pushsection .fixup, "ax" .pushsection .fixup, "ax"
2: movl $0, PT_FS(%esp) 2: movl $0, PT_FS(%esp)
...@@ -552,11 +553,6 @@ ENTRY(native_iret) ...@@ -552,11 +553,6 @@ ENTRY(native_iret)
iret iret
_ASM_EXTABLE(native_iret, iret_exc) _ASM_EXTABLE(native_iret, iret_exc)
END(native_iret) END(native_iret)
ENTRY(native_irq_enable_sysexit)
sti
sysexit
END(native_irq_enable_sysexit)
#endif #endif
ENTRY(overflow) ENTRY(overflow)
......
...@@ -520,9 +520,7 @@ END(irq_entries_start) ...@@ -520,9 +520,7 @@ END(irq_entries_start)
*/ */
TRACE_IRQS_OFF TRACE_IRQS_OFF
#ifdef CONFIG_CONTEXT_TRACKING CALL_enter_from_user_mode
call enter_from_user_mode
#endif
1: 1:
/* /*
...@@ -1066,9 +1064,7 @@ ENTRY(error_entry) ...@@ -1066,9 +1064,7 @@ ENTRY(error_entry)
* (which can take locks). * (which can take locks).
*/ */
TRACE_IRQS_OFF TRACE_IRQS_OFF
#ifdef CONFIG_CONTEXT_TRACKING CALL_enter_from_user_mode
call enter_from_user_mode
#endif
ret ret
.Lerror_entry_done: .Lerror_entry_done:
......
...@@ -18,13 +18,6 @@ ...@@ -18,13 +18,6 @@
.section .entry.text, "ax" .section .entry.text, "ax"
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret32)
swapgs
sysretl
ENDPROC(native_usergs_sysret32)
#endif
/* /*
* 32-bit SYSENTER instruction entry. * 32-bit SYSENTER instruction entry.
* *
...@@ -103,15 +96,15 @@ ENTRY(entry_SYSENTER_compat) ...@@ -103,15 +96,15 @@ ENTRY(entry_SYSENTER_compat)
* This needs to happen before enabling interrupts so that * This needs to happen before enabling interrupts so that
* we don't get preempted with NT set. * we don't get preempted with NT set.
* *
* NB.: sysenter_fix_flags is a label with the code under it moved * NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the * out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily, * majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as * we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched. * not-taken and therefore its instructions won't be fetched.
*/ */
testl $X86_EFLAGS_NT, EFLAGS(%rsp) testl $X86_EFLAGS_NT, EFLAGS(%rsp)
jnz sysenter_fix_flags jnz .Lsysenter_fix_flags
sysenter_flags_fixed: .Lsysenter_flags_fixed:
/* /*
* User mode is traced as though IRQs are on, and SYSENTER * User mode is traced as though IRQs are on, and SYSENTER
...@@ -126,10 +119,10 @@ sysenter_flags_fixed: ...@@ -126,10 +119,10 @@ sysenter_flags_fixed:
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
jmp sysret32_from_system_call jmp sysret32_from_system_call
sysenter_fix_flags: .Lsysenter_fix_flags:
pushq $X86_EFLAGS_FIXED pushq $X86_EFLAGS_FIXED
popfq popfq
jmp sysenter_flags_fixed jmp .Lsysenter_flags_fixed
ENDPROC(entry_SYSENTER_compat) ENDPROC(entry_SYSENTER_compat)
/* /*
...@@ -238,7 +231,8 @@ sysret32_from_system_call: ...@@ -238,7 +231,8 @@ sysret32_from_system_call:
xorq %r9, %r9 xorq %r9, %r9
xorq %r10, %r10 xorq %r10, %r10
movq RSP-ORIG_RAX(%rsp), %rsp movq RSP-ORIG_RAX(%rsp), %rsp
USERGS_SYSRET32 swapgs
sysretl
END(entry_SYSCALL_compat) END(entry_SYSCALL_compat)
/* /*
......
...@@ -17,8 +17,10 @@ ...@@ -17,8 +17,10 @@
#include <asm/vvar.h> #include <asm/vvar.h>
#include <asm/unistd.h> #include <asm/unistd.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/pvclock.h>
#include <linux/math64.h> #include <linux/math64.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/kernel.h>
#define gtod (&VVAR(vsyscall_gtod_data)) #define gtod (&VVAR(vsyscall_gtod_data))
...@@ -36,12 +38,12 @@ static notrace cycle_t vread_hpet(void) ...@@ -36,12 +38,12 @@ static notrace cycle_t vread_hpet(void)
} }
#endif #endif
#ifndef BUILD_VDSO32 #ifdef CONFIG_PARAVIRT_CLOCK
extern u8 pvclock_page
__attribute__((visibility("hidden")));
#endif
#include <linux/kernel.h> #ifndef BUILD_VDSO32
#include <asm/vsyscall.h>
#include <asm/fixmap.h>
#include <asm/pvclock.h>
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{ {
...@@ -60,75 +62,6 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) ...@@ -60,75 +62,6 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
return ret; return ret;
} }
#ifdef CONFIG_PARAVIRT_CLOCK
static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
{
const struct pvclock_vsyscall_time_info *pvti_base;
int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
pvti_base = (struct pvclock_vsyscall_time_info *)
__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
return &pvti_base[offset];
}
static notrace cycle_t vread_pvclock(int *mode)
{
const struct pvclock_vsyscall_time_info *pvti;
cycle_t ret;
u64 last;
u32 version;
u8 flags;
unsigned cpu, cpu1;
/*
* Note: hypervisor must guarantee that:
* 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
* 2. that per-CPU pvclock time info is updated if the
* underlying CPU changes.
* 3. that version is increased whenever underlying CPU
* changes.
*
*/
do {
cpu = __getcpu() & VGETCPU_CPU_MASK;
/* TODO: We can put vcpu id into higher bits of pvti.version.
* This will save a couple of cycles by getting rid of
* __getcpu() calls (Gleb).
*/
pvti = get_pvti(cpu);
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
/*
* Test we're still on the cpu as well as the version.
* We could have been migrated just after the first
* vgetcpu but before fetching the version, so we
* wouldn't notice a version change.
*/
cpu1 = __getcpu() & VGETCPU_CPU_MASK;
} while (unlikely(cpu != cpu1 ||
(pvti->pvti.version & 1) ||
pvti->pvti.version != version));
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
*mode = VCLOCK_NONE;
/* refer to tsc.c read_tsc() comment for rationale */
last = gtod->cycle_last;
if (likely(ret >= last))
return ret;
return last;
}
#endif
#else #else
...@@ -162,15 +95,77 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) ...@@ -162,15 +95,77 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
return ret; return ret;
} }
#endif
#ifdef CONFIG_PARAVIRT_CLOCK #ifdef CONFIG_PARAVIRT_CLOCK
static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
{
return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
}
static notrace cycle_t vread_pvclock(int *mode) static notrace cycle_t vread_pvclock(int *mode)
{ {
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
cycle_t ret;
u64 tsc, pvti_tsc;
u64 last, delta, pvti_system_time;
u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
/*
* Note: The kernel and hypervisor must guarantee that cpu ID
* number maps 1:1 to per-CPU pvclock time info.
*
* Because the hypervisor is entirely unaware of guest userspace
* preemption, it cannot guarantee that per-CPU pvclock time
* info is updated if the underlying CPU changes or that that
* version is increased whenever underlying CPU changes.
*
* On KVM, we are guaranteed that pvti updates for any vCPU are
* atomic as seen by *all* vCPUs. This is an even stronger
* guarantee than we get with a normal seqlock.
*
* On Xen, we don't appear to have that guarantee, but Xen still
* supplies a valid seqlock using the version field.
* We only do pvclock vdso timing at all if
* PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
* mean that all vCPUs have matching pvti and that the TSC is
* synced, so we can just look at vCPU 0's pvti.
*/
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
*mode = VCLOCK_NONE; *mode = VCLOCK_NONE;
return 0; return 0;
} }
#endif
do {
version = pvti->version;
smp_rmb();
tsc = rdtsc_ordered();
pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
pvti_tsc_shift = pvti->tsc_shift;
pvti_system_time = pvti->system_time;
pvti_tsc = pvti->tsc_timestamp;
/* Make sure that the version double-check is last. */
smp_rmb();
} while (unlikely((version & 1) || version != pvti->version));
delta = tsc - pvti_tsc;
ret = pvti_system_time +
pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
pvti_tsc_shift);
/* refer to vread_tsc() comment for rationale */
last = gtod->cycle_last;
if (likely(ret >= last))
return ret;
return last;
}
#endif #endif
notrace static cycle_t vread_tsc(void) notrace static cycle_t vread_tsc(void)
......
...@@ -25,7 +25,7 @@ SECTIONS ...@@ -25,7 +25,7 @@ SECTIONS
* segment. * segment.
*/ */
vvar_start = . - 2 * PAGE_SIZE; vvar_start = . - 3 * PAGE_SIZE;
vvar_page = vvar_start; vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */ /* Place all vvars at the offsets in asm/vvar.h. */
...@@ -36,6 +36,7 @@ SECTIONS ...@@ -36,6 +36,7 @@ SECTIONS
#undef EMIT_VVAR #undef EMIT_VVAR
hpet_page = vvar_start + PAGE_SIZE; hpet_page = vvar_start + PAGE_SIZE;
pvclock_page = vvar_start + 2 * PAGE_SIZE;
. = SIZEOF_HEADERS; . = SIZEOF_HEADERS;
......
...@@ -73,6 +73,7 @@ enum { ...@@ -73,6 +73,7 @@ enum {
sym_vvar_start, sym_vvar_start,
sym_vvar_page, sym_vvar_page,
sym_hpet_page, sym_hpet_page,
sym_pvclock_page,
sym_VDSO_FAKE_SECTION_TABLE_START, sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END, sym_VDSO_FAKE_SECTION_TABLE_END,
}; };
...@@ -80,6 +81,7 @@ enum { ...@@ -80,6 +81,7 @@ enum {
const int special_pages[] = { const int special_pages[] = {
sym_vvar_page, sym_vvar_page,
sym_hpet_page, sym_hpet_page,
sym_pvclock_page,
}; };
struct vdso_sym { struct vdso_sym {
...@@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = { ...@@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true}, [sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true}, [sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true}, [sym_hpet_page] = {"hpet_page", true},
[sym_pvclock_page] = {"pvclock_page", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = { [sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false "VDSO_FAKE_SECTION_TABLE_START", false
}, },
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/random.h> #include <linux/random.h>
#include <linux/elf.h> #include <linux/elf.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <asm/pvclock.h>
#include <asm/vgtod.h> #include <asm/vgtod.h>
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/vdso.h> #include <asm/vdso.h>
...@@ -100,6 +101,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) ...@@ -100,6 +101,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
.name = "[vvar]", .name = "[vvar]",
.pages = no_pages, .pages = no_pages,
}; };
struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) { if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack, addr = vdso_addr(current->mm->start_stack,
...@@ -169,6 +171,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) ...@@ -169,6 +171,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
} }
#endif #endif
pvti = pvclock_pvti_cpu0_va();
if (pvti && image->sym_pvclock_page) {
ret = remap_pfn_range(vma,
text_start + image->sym_pvclock_page,
__pa(pvti) >> PAGE_SHIFT,
PAGE_SIZE,
PAGE_READONLY);
if (ret)
goto up_fail;
}
up_fail: up_fail:
if (ret) if (ret)
current->mm->context.vdso = NULL; current->mm->context.vdso = NULL;
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include <asm/acpi.h> #include <asm/acpi.h>
#include <asm/apicdef.h> #include <asm/apicdef.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/pvclock.h>
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#include <linux/threads.h> #include <linux/threads.h>
#include <asm/kmap_types.h> #include <asm/kmap_types.h>
...@@ -72,10 +71,6 @@ enum fixed_addresses { ...@@ -72,10 +71,6 @@ enum fixed_addresses {
#ifdef CONFIG_X86_VSYSCALL_EMULATION #ifdef CONFIG_X86_VSYSCALL_EMULATION
VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT, VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
#endif #endif
#ifdef CONFIG_PARAVIRT_CLOCK
PVCLOCK_FIXMAP_BEGIN,
PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
#endif
#endif #endif
FIX_DBGP_BASE, FIX_DBGP_BASE,
FIX_EARLYCON_MEM_BASE, FIX_EARLYCON_MEM_BASE,
......
#ifndef _ASM_X86_JUMP_LABEL_H #ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H #define _ASM_X86_JUMP_LABEL_H
#ifndef __ASSEMBLY__ #ifndef HAVE_JUMP_LABEL
/*
#include <linux/stringify.h> * For better or for worse, if jump labels (the gcc extension) are missing,
#include <linux/types.h> * then the entire static branch patching infrastructure is compiled out.
#include <asm/nops.h> * If that happens, the code in here will malfunction. Raise a compiler
#include <asm/asm.h> * error instead.
*
* In theory, jump labels and the static branch patching infrastructure
* could be decoupled to fix this.
*/
#error asm/jump_label.h included on a non-jump-label kernel
#endif
#define JUMP_LABEL_NOP_SIZE 5 #define JUMP_LABEL_NOP_SIZE 5
...@@ -16,6 +22,14 @@ ...@@ -16,6 +22,14 @@
# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
#endif #endif
#include <asm/asm.h>
#include <asm/nops.h>
#ifndef __ASSEMBLY__
#include <linux/stringify.h>
#include <linux/types.h>
static __always_inline bool arch_static_branch(struct static_key *key, bool branch) static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{ {
asm_volatile_goto("1:" asm_volatile_goto("1:"
...@@ -59,5 +73,40 @@ struct jump_entry { ...@@ -59,5 +73,40 @@ struct jump_entry {
jump_label_t key; jump_label_t key;
}; };
#else /* __ASSEMBLY__ */
.macro STATIC_JUMP_IF_TRUE target, key, def
.Lstatic_jump_\@:
.if \def
/* Equivalent to "jmp.d32 \target" */
.byte 0xe9
.long \target - .Lstatic_jump_after_\@
.Lstatic_jump_after_\@:
.else
.byte STATIC_KEY_INIT_NOP
.endif
.pushsection __jump_table, "aw"
_ASM_ALIGN
_ASM_PTR .Lstatic_jump_\@, \target, \key
.popsection
.endm
.macro STATIC_JUMP_IF_FALSE target, key, def
.Lstatic_jump_\@:
.if \def
.byte STATIC_KEY_INIT_NOP
.else
/* Equivalent to "jmp.d32 \target" */
.byte 0xe9
.long \target - .Lstatic_jump_after_\@
.Lstatic_jump_after_\@:
.endif
.pushsection __jump_table, "aw"
_ASM_ALIGN
_ASM_PTR .Lstatic_jump_\@, \target, \key + 1
.popsection
.endm
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif #endif
...@@ -928,23 +928,11 @@ extern void default_banner(void); ...@@ -928,23 +928,11 @@ extern void default_banner(void);
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define USERGS_SYSRET32 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#define GET_CR0_INTO_EAX \ #define GET_CR0_INTO_EAX \
push %ecx; push %edx; \ push %ecx; push %edx; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx pop %edx; pop %ecx
#define ENABLE_INTERRUPTS_SYSEXIT \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
#else /* !CONFIG_X86_32 */ #else /* !CONFIG_X86_32 */
/* /*
......
...@@ -162,15 +162,6 @@ struct pv_cpu_ops { ...@@ -162,15 +162,6 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter); u64 (*read_pmc)(int counter);
#ifdef CONFIG_X86_32
/*
* Atomically enable interrupts and return to userspace. This
* is only used in 32-bit kernels. 64-bit kernels use
* usergs_sysret32 instead.
*/
void (*irq_enable_sysexit)(void);
#endif
/* /*
* Switch to usermode gs and return to 64-bit usermode using * Switch to usermode gs and return to 64-bit usermode using
* sysret. Only used in 64-bit kernels to return to 64-bit * sysret. Only used in 64-bit kernels to return to 64-bit
...@@ -179,14 +170,6 @@ struct pv_cpu_ops { ...@@ -179,14 +170,6 @@ struct pv_cpu_ops {
*/ */
void (*usergs_sysret64)(void); void (*usergs_sysret64)(void);
/*
* Switch to usermode gs and return to 32-bit usermode using
* sysret. Used to return to 32-on-64 compat processes.
* Other usermode register state, including %esp, must already
* be restored.
*/
void (*usergs_sysret32)(void);
/* Normal iret. Jump to this with the standard iret stack /* Normal iret. Jump to this with the standard iret stack
frame set up. */ frame set up. */
void (*iret)(void); void (*iret)(void);
......
...@@ -4,6 +4,15 @@ ...@@ -4,6 +4,15 @@
#include <linux/clocksource.h> #include <linux/clocksource.h>
#include <asm/pvclock-abi.h> #include <asm/pvclock-abi.h>
#ifdef CONFIG_KVM_GUEST
extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void);
#else
static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
{
return NULL;
}
#endif
/* some helper functions for xen and kvm pv clock sources */ /* some helper functions for xen and kvm pv clock sources */
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
...@@ -91,10 +100,5 @@ struct pvclock_vsyscall_time_info { ...@@ -91,10 +100,5 @@ struct pvclock_vsyscall_time_info {
} __attribute__((__aligned__(SMP_CACHE_BYTES))); } __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1)
int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
int size);
struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu);
#endif /* _ASM_X86_PVCLOCK_H */ #endif /* _ASM_X86_PVCLOCK_H */
...@@ -22,6 +22,7 @@ struct vdso_image { ...@@ -22,6 +22,7 @@ struct vdso_image {
long sym_vvar_page; long sym_vvar_page;
long sym_hpet_page; long sym_hpet_page;
long sym_pvclock_page;
long sym_VDSO32_NOTE_MASK; long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn; long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn; long sym___kernel_rt_sigreturn;
......
...@@ -65,9 +65,6 @@ void common(void) { ...@@ -65,9 +65,6 @@ void common(void) {
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret); OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
#ifdef CONFIG_X86_32
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
#endif
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
#endif #endif
......
...@@ -23,7 +23,6 @@ int main(void) ...@@ -23,7 +23,6 @@ int main(void)
{ {
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
BLANK(); BLANK();
......
...@@ -45,6 +45,11 @@ early_param("no-kvmclock", parse_no_kvmclock); ...@@ -45,6 +45,11 @@ early_param("no-kvmclock", parse_no_kvmclock);
static struct pvclock_vsyscall_time_info *hv_clock; static struct pvclock_vsyscall_time_info *hv_clock;
static struct pvclock_wall_clock wall_clock; static struct pvclock_wall_clock wall_clock;
struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
{
return hv_clock;
}
/* /*
* The wallclock is the time of day when we booted. Since then, some time may * The wallclock is the time of day when we booted. Since then, some time may
* have elapsed since the hypervisor wrote the data. So we try to account for * have elapsed since the hypervisor wrote the data. So we try to account for
...@@ -305,7 +310,6 @@ int __init kvm_setup_vsyscall_timeinfo(void) ...@@ -305,7 +310,6 @@ int __init kvm_setup_vsyscall_timeinfo(void)
{ {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
int cpu; int cpu;
int ret;
u8 flags; u8 flags;
struct pvclock_vcpu_time_info *vcpu_time; struct pvclock_vcpu_time_info *vcpu_time;
unsigned int size; unsigned int size;
...@@ -325,11 +329,6 @@ int __init kvm_setup_vsyscall_timeinfo(void) ...@@ -325,11 +329,6 @@ int __init kvm_setup_vsyscall_timeinfo(void)
return 1; return 1;
} }
if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
put_cpu();
return ret;
}
put_cpu(); put_cpu();
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
......
...@@ -162,10 +162,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ...@@ -162,10 +162,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len); ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
#ifdef CONFIG_X86_32
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
#endif
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
/* If operation requires a jmp, then jmp */ /* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
...@@ -220,8 +216,6 @@ static u64 native_steal_clock(int cpu) ...@@ -220,8 +216,6 @@ static u64 native_steal_clock(int cpu)
/* These are in entry.S */ /* These are in entry.S */
extern void native_iret(void); extern void native_iret(void);
extern void native_irq_enable_sysexit(void);
extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void); extern void native_usergs_sysret64(void);
static struct resource reserve_ioports = { static struct resource reserve_ioports = {
...@@ -379,13 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { ...@@ -379,13 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.load_sp0 = native_load_sp0, .load_sp0 = native_load_sp0,
#if defined(CONFIG_X86_32)
.irq_enable_sysexit = native_irq_enable_sysexit,
#endif
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#ifdef CONFIG_IA32_EMULATION
.usergs_sysret32 = native_usergs_sysret32,
#endif
.usergs_sysret64 = native_usergs_sysret64, .usergs_sysret64 = native_usergs_sysret64,
#endif #endif
.iret = native_iret, .iret = native_iret,
......
...@@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); ...@@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret"); DEF_NATIVE(pv_cpu_ops, iret, "iret");
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
...@@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, ...@@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl); PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret); PATCH_SITE(pv_cpu_ops, iret);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_mmu_ops, write_cr3);
......
...@@ -13,9 +13,7 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); ...@@ -13,9 +13,7 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit");
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax"); DEF_NATIVE(, mov32, "mov %edi, %eax");
...@@ -55,7 +53,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, ...@@ -55,7 +53,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable); PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64); PATCH_SITE(pv_cpu_ops, usergs_sysret64);
PATCH_SITE(pv_cpu_ops, swapgs); PATCH_SITE(pv_cpu_ops, swapgs);
PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr2);
......
...@@ -140,27 +140,3 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, ...@@ -140,27 +140,3 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
} }
#ifdef CONFIG_X86_64
/*
* Initialize the generic pvclock vsyscall state. This will allocate
* a/some page(s) for the per-vcpu pvclock information, set up a
* fixmap mapping for the page(s)
*/
int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
int size)
{
int idx;
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
__pa(i) + (idx*PAGE_SIZE),
PAGE_KERNEL_VVAR);
}
return 0;
}
#endif
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/nmi.h> #include <linux/nmi.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/clocksource.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/current.h> #include <asm/current.h>
......
...@@ -1229,10 +1229,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { ...@@ -1229,10 +1229,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.iret = xen_iret, .iret = xen_iret,
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
.usergs_sysret32 = xen_sysret32,
.usergs_sysret64 = xen_sysret64, .usergs_sysret64 = xen_sysret64,
#else
.irq_enable_sysexit = xen_sysexit,
#endif #endif
.load_tr_desc = paravirt_nop, .load_tr_desc = paravirt_nop,
......
...@@ -34,20 +34,6 @@ check_events: ...@@ -34,20 +34,6 @@ check_events:
pop %eax pop %eax
ret ret
/*
* We can't use sysexit directly, because we're not running in ring0.
* But we can easily fake it up using iret. Assuming xen_sysexit is
* jumped to with a standard stack frame, we can just strip it back to
* a standard iret frame and use iret.
*/
ENTRY(xen_sysexit)
movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
lea PT_EIP(%esp), %esp
jmp xen_iret
ENDPROC(xen_sysexit)
/* /*
* This is run where a normal iret would be run, with the same stack setup: * This is run where a normal iret would be run, with the same stack setup:
* 8: eflags * 8: eflags
......
...@@ -68,25 +68,6 @@ ENTRY(xen_sysret64) ...@@ -68,25 +68,6 @@ ENTRY(xen_sysret64)
ENDPATCH(xen_sysret64) ENDPATCH(xen_sysret64)
RELOC(xen_sysret64, 1b+1) RELOC(xen_sysret64, 1b+1)
ENTRY(xen_sysret32)
/*
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER32_DS
pushq PER_CPU_VAR(rsp_scratch)
pushq %r11
pushq $__USER32_CS
pushq %rcx
pushq $0
1: jmp hypercall_iret
ENDPATCH(xen_sysret32)
RELOC(xen_sysret32, 1b+1)
/* /*
* Xen handles syscall callbacks much like ordinary exceptions, which * Xen handles syscall callbacks much like ordinary exceptions, which
* means we have: * means we have:
......
...@@ -139,9 +139,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long); ...@@ -139,9 +139,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
/* These are not functions, and cannot be called normally */ /* These are not functions, and cannot be called normally */
__visible void xen_iret(void); __visible void xen_iret(void);
#ifdef CONFIG_X86_32
__visible void xen_sysexit(void);
#endif
__visible void xen_sysret32(void); __visible void xen_sysret32(void);
__visible void xen_sysret64(void); __visible void xen_sysret64(void);
__visible void xen_adjust_exception_frame(void); __visible void xen_adjust_exception_frame(void);
......
...@@ -22,12 +22,12 @@ struct context_tracking { ...@@ -22,12 +22,12 @@ struct context_tracking {
}; };
#ifdef CONFIG_CONTEXT_TRACKING #ifdef CONFIG_CONTEXT_TRACKING
extern struct static_key context_tracking_enabled; extern struct static_key_false context_tracking_enabled;
DECLARE_PER_CPU(struct context_tracking, context_tracking); DECLARE_PER_CPU(struct context_tracking, context_tracking);
static inline bool context_tracking_is_enabled(void) static inline bool context_tracking_is_enabled(void)
{ {
return static_key_false(&context_tracking_enabled); return static_branch_unlikely(&context_tracking_enabled);
} }
static inline bool context_tracking_cpu_is_enabled(void) static inline bool context_tracking_cpu_is_enabled(void)
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/context_tracking.h> #include <trace/events/context_tracking.h>
struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE; DEFINE_STATIC_KEY_FALSE(context_tracking_enabled);
EXPORT_SYMBOL_GPL(context_tracking_enabled); EXPORT_SYMBOL_GPL(context_tracking_enabled);
DEFINE_PER_CPU(struct context_tracking, context_tracking); DEFINE_PER_CPU(struct context_tracking, context_tracking);
...@@ -191,7 +191,7 @@ void __init context_tracking_cpu_set(int cpu) ...@@ -191,7 +191,7 @@ void __init context_tracking_cpu_set(int cpu)
if (!per_cpu(context_tracking.active, cpu)) { if (!per_cpu(context_tracking.active, cpu)) {
per_cpu(context_tracking.active, cpu) = true; per_cpu(context_tracking.active, cpu) = true;
static_key_slow_inc(&context_tracking_enabled); static_branch_inc(&context_tracking_enabled);
} }
if (initialized) if (initialized)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册