提交 7453f33b 编写于 作者: L Linus Torvalds

Merge branch 'x86-xsave-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/xsave changes from Peter Anvin:
 "This is a patchset to support the XSAVES instruction required to
  support context switch of supervisor-only features in upcoming
  silicon.

  This patchset missed the 3.16 merge window, which is why it is based
  on 3.15-rc7"

* 'x86-xsave-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, xsave: Add forgotten inline annotation
  x86/xsaves: Clean up code in xstate offsets computation in xsave area
  x86/xsave: Make it clear that the XSAVE macros use (%edi)/(%rdi)
  Define kernel API to get address of each state in xsave area
  x86/xsaves: Enable xsaves/xrstors
  x86/xsaves: Call booting time xsaves and xrstors in setup_init_fpu_buf
  x86/xsaves: Save xstate to task's xsave area in __save_fpu during booting time
  x86/xsaves: Add xsaves and xrstors support for booting time
  x86/xsaves: Clear reserved bits in xsave header
  x86/xsaves: Use xsave/xrstor for saving and restoring user space context
  x86/xsaves: Use xsaves/xrstors for context switch
  x86/xsaves: Use xsaves/xrstors to save and restore xsave area
  x86/xsaves: Define a macro for handling xsave/xrstor instruction fault
  x86/xsaves: Define macros for xsave instructions
  x86/xsaves: Change compacted format xsave area header
  x86/alternative: Add alternative_input_2 to support alternative with two features and input
  x86/xsaves: Add a kernel parameter noxsaves to disable xsaves/xrstors
...@@ -2200,6 +2200,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -2200,6 +2200,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
and restore using xsave. The kernel will fallback to and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state. enabling legacy floating-point and sse state.
noxsaveopt [X86] Disables xsaveopt used in saving x86 extended
register states. The kernel will fall back to use
xsave to save the states. By using this parameter,
performance of saving the states is degraded because
xsave doesn't support modified optimization while
xsaveopt supports it on xsaveopt enabled systems.
noxsaves [X86] Disables xsaves and xrstors used in saving and
restoring x86 extended register state in compacted
form of xsave area. The kernel will fall back to use
xsaveopt and xrstor to save and restore the states
in standard form of xsave area. By using this
parameter, xsave area per process might occupy more
memory on xsaves enabled systems.
eagerfpu= [X86] eagerfpu= [X86]
on enable eager fpu restore on enable eager fpu restore
off disable eager fpu restore off disable eager fpu restore
......
...@@ -161,6 +161,20 @@ static inline int alternatives_text_reserved(void *start, void *end) ...@@ -161,6 +161,20 @@ static inline int alternatives_text_reserved(void *start, void *end)
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
: : "i" (0), ## input) : : "i" (0), ## input)
/*
* This is similar to alternative_input. But it has two features and
* respective instructions.
*
* If CPU has feature2, newinstr2 is used.
* Otherwise, if CPU has feature1, newinstr1 is used.
* Otherwise, oldinstr is used.
*/
#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \
feature2, input...) \
asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
newinstr2, feature2) \
: : "i" (0), ## input)
/* Like alternative_input, but with a single output argument */ /* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, newinstr, feature, output, input...) \ #define alternative_io(oldinstr, newinstr, feature, output, input...) \
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
......
...@@ -508,9 +508,12 @@ static inline void user_fpu_begin(void) ...@@ -508,9 +508,12 @@ static inline void user_fpu_begin(void)
static inline void __save_fpu(struct task_struct *tsk) static inline void __save_fpu(struct task_struct *tsk)
{ {
if (use_xsave()) if (use_xsave()) {
xsave_state(&tsk->thread.fpu.state->xsave, -1); if (unlikely(system_state == SYSTEM_BOOTING))
else xsave_state_booting(&tsk->thread.fpu.state->xsave, -1);
else
xsave_state(&tsk->thread.fpu.state->xsave, -1);
} else
fpu_fxsave(&tsk->thread.fpu); fpu_fxsave(&tsk->thread.fpu);
} }
......
...@@ -385,8 +385,8 @@ struct bndcsr_struct { ...@@ -385,8 +385,8 @@ struct bndcsr_struct {
struct xsave_hdr_struct { struct xsave_hdr_struct {
u64 xstate_bv; u64 xstate_bv;
u64 reserved1[2]; u64 xcomp_bv;
u64 reserved2[5]; u64 reserved[6];
} __attribute__((packed)); } __attribute__((packed));
struct xsave_struct { struct xsave_struct {
......
...@@ -52,24 +52,170 @@ extern void xsave_init(void); ...@@ -52,24 +52,170 @@ extern void xsave_init(void);
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
extern int init_fpu(struct task_struct *child); extern int init_fpu(struct task_struct *child);
static inline int fpu_xrstor_checking(struct xsave_struct *fx) /* These macros all use (%edi)/(%rdi) as the single memory argument. */
#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
#define xstate_fault ".section .fixup,\"ax\"\n" \
"3: movl $-1,%[err]\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err)
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
*/
static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
{ {
int err; u32 lmask = mask;
u32 hmask = mask >> 32;
int err = 0;
WARN_ON(system_state != SYSTEM_BOOTING);
if (boot_cpu_has(X86_FEATURE_XSAVES))
asm volatile("1:"XSAVES"\n\t"
"2:\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
else
asm volatile("1:"XSAVE"\n\t"
"2:\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
asm volatile(xstate_fault
: "0" (0)
: "memory");
return err;
}
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
*/
static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
int err = 0;
WARN_ON(system_state != SYSTEM_BOOTING);
asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" if (boot_cpu_has(X86_FEATURE_XSAVES))
"2:\n" asm volatile("1:"XRSTORS"\n\t"
".section .fixup,\"ax\"\n" "2:\n\t"
"3: movl $-1,%[err]\n" : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
" jmp 2b\n" : "memory");
".previous\n" else
_ASM_EXTABLE(1b, 3b) asm volatile("1:"XRSTOR"\n\t"
: [err] "=r" (err) "2:\n\t"
: "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
asm volatile(xstate_fault
: "0" (0)
: "memory");
return err;
}
/*
* Save processor xstate to xsave area.
*/
static inline int xsave_state(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
int err = 0;
/*
* If xsaves is enabled, xsaves replaces xsaveopt because
* it supports compact format and supervisor states in addition to
* modified optimization in xsaveopt.
*
* Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
* because xsaveopt supports modified optimization which is not
* supported by xsave.
*
* If none of xsaves and xsaveopt is enabled, use xsave.
*/
alternative_input_2(
"1:"XSAVE,
"1:"XSAVEOPT,
X86_FEATURE_XSAVEOPT,
"1:"XSAVES,
X86_FEATURE_XSAVES,
[fx] "D" (fx), "a" (lmask), "d" (hmask) :
"memory");
asm volatile("2:\n\t"
xstate_fault
: "0" (0)
: "memory"); : "memory");
return err; return err;
} }
/*
* Restore processor xstate from xsave area.
*/
static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
{
int err = 0;
u32 lmask = mask;
u32 hmask = mask >> 32;
/*
* Use xrstors to restore context if it is enabled. xrstors supports
* compacted format of xsave area which is not supported by xrstor.
*/
alternative_input(
"1: " XRSTOR,
"1: " XRSTORS,
X86_FEATURE_XSAVES,
"D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
asm volatile("2:\n"
xstate_fault
: "0" (0)
: "memory");
return err;
}
/*
* Save xstate context for old process during context switch.
*/
static inline void fpu_xsave(struct fpu *fpu)
{
xsave_state(&fpu->state->xsave, -1);
}
/*
* Restore xstate context for new process during context switch.
*/
static inline int fpu_xrstor_checking(struct xsave_struct *fx)
{
return xrstor_state(fx, -1);
}
/*
* Save xstate to user space xsave area.
*
* We don't use modified optimization because xrstor/xrstors might track
* a different application.
*
* We don't use compacted format xsave area for
* backward compatibility for old applications which don't understand
* compacted format of xsave area.
*/
static inline int xsave_user(struct xsave_struct __user *buf) static inline int xsave_user(struct xsave_struct __user *buf)
{ {
int err; int err;
...@@ -83,69 +229,34 @@ static inline int xsave_user(struct xsave_struct __user *buf) ...@@ -83,69 +229,34 @@ static inline int xsave_user(struct xsave_struct __user *buf)
return -EFAULT; return -EFAULT;
__asm__ __volatile__(ASM_STAC "\n" __asm__ __volatile__(ASM_STAC "\n"
"1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" "1:"XSAVE"\n"
"2: " ASM_CLAC "\n" "2: " ASM_CLAC "\n"
".section .fixup,\"ax\"\n" xstate_fault
"3: movl $-1,%[err]\n"
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b,3b)
: [err] "=r" (err)
: "D" (buf), "a" (-1), "d" (-1), "0" (0) : "D" (buf), "a" (-1), "d" (-1), "0" (0)
: "memory"); : "memory");
return err; return err;
} }
/*
* Restore xstate from user space xsave area.
*/
static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
{ {
int err; int err = 0;
struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
u32 lmask = mask; u32 lmask = mask;
u32 hmask = mask >> 32; u32 hmask = mask >> 32;
__asm__ __volatile__(ASM_STAC "\n" __asm__ __volatile__(ASM_STAC "\n"
"1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" "1:"XRSTOR"\n"
"2: " ASM_CLAC "\n" "2: " ASM_CLAC "\n"
".section .fixup,\"ax\"\n" xstate_fault
"3: movl $-1,%[err]\n"
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b,3b)
: [err] "=r" (err)
: "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
: "memory"); /* memory required? */ : "memory"); /* memory required? */
return err; return err;
} }
static inline void xrstor_state(struct xsave_struct *fx, u64 mask) void *get_xsave_addr(struct xsave_struct *xsave, int xstate);
{ void setup_xstate_comp(void);
u32 lmask = mask;
u32 hmask = mask >> 32;
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
}
static inline void xsave_state(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
}
static inline void fpu_xsave(struct fpu *fpu)
{
/* This, however, we can work around by forcing the compiler to select
an addressing mode that doesn't require extended registers. */
alternative_input(
".byte " REX_PREFIX "0x0f,0xae,0x27",
".byte " REX_PREFIX "0x0f,0xae,0x37",
X86_FEATURE_XSAVEOPT,
[fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) :
"memory");
}
#endif #endif
...@@ -148,6 +148,7 @@ static int __init x86_xsave_setup(char *s) ...@@ -148,6 +148,7 @@ static int __init x86_xsave_setup(char *s)
{ {
setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
setup_clear_cpu_cap(X86_FEATURE_AVX); setup_clear_cpu_cap(X86_FEATURE_AVX);
setup_clear_cpu_cap(X86_FEATURE_AVX2); setup_clear_cpu_cap(X86_FEATURE_AVX2);
return 1; return 1;
...@@ -161,6 +162,13 @@ static int __init x86_xsaveopt_setup(char *s) ...@@ -161,6 +162,13 @@ static int __init x86_xsaveopt_setup(char *s)
} }
__setup("noxsaveopt", x86_xsaveopt_setup); __setup("noxsaveopt", x86_xsaveopt_setup);
static int __init x86_xsaves_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
return 1;
}
__setup("noxsaves", x86_xsaves_setup);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
static int cachesize_override = -1; static int cachesize_override = -1;
static int disable_x86_serial_nr = 1; static int disable_x86_serial_nr = 1;
......
...@@ -375,7 +375,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, ...@@ -375,7 +375,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
/* /*
* These bits must be zero. * These bits must be zero.
*/ */
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; memset(xsave_hdr->reserved, 0, 48);
return ret; return ret;
} }
......
...@@ -93,6 +93,7 @@ void arch_task_cache_init(void) ...@@ -93,6 +93,7 @@ void arch_task_cache_init(void)
kmem_cache_create("task_xstate", xstate_size, kmem_cache_create("task_xstate", xstate_size,
__alignof__(union thread_xstate), __alignof__(union thread_xstate),
SLAB_PANIC | SLAB_NOTRACK, NULL); SLAB_PANIC | SLAB_NOTRACK, NULL);
setup_xstate_comp();
} }
/* /*
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/cpu.h>
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/fpu-internal.h> #include <asm/fpu-internal.h>
#include <asm/sigframe.h> #include <asm/sigframe.h>
...@@ -24,7 +25,9 @@ u64 pcntxt_mask; ...@@ -24,7 +25,9 @@ u64 pcntxt_mask;
struct xsave_struct *init_xstate_buf; struct xsave_struct *init_xstate_buf;
static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; static unsigned int *xstate_offsets, *xstate_sizes;
static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8];
static unsigned int xstate_features;
/* /*
* If a processor implementation discern that a processor state component is * If a processor implementation discern that a processor state component is
...@@ -283,7 +286,7 @@ sanitize_restored_xstate(struct task_struct *tsk, ...@@ -283,7 +286,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
if (use_xsave()) { if (use_xsave()) {
/* These bits must be zero. */ /* These bits must be zero. */
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; memset(xsave_hdr->reserved, 0, 48);
/* /*
* Init the state that is not present in the memory * Init the state that is not present in the memory
...@@ -478,6 +481,52 @@ static void __init setup_xstate_features(void) ...@@ -478,6 +481,52 @@ static void __init setup_xstate_features(void)
} while (1); } while (1);
} }
/*
* This function sets up offsets and sizes of all extended states in
* xsave area. This supports both standard format and compacted format
* of the xsave aread.
*
* Input: void
* Output: void
*/
void setup_xstate_comp(void)
{
unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8];
int i;
/*
* The FP xstates and SSE xstates are legacy states. They are always
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
xstate_comp_offsets[0] = 0;
xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space);
if (!cpu_has_xsaves) {
for (i = 2; i < xstate_features; i++) {
if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
xstate_comp_offsets[i] = xstate_offsets[i];
xstate_comp_sizes[i] = xstate_sizes[i];
}
}
return;
}
xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = 2; i < xstate_features; i++) {
if (test_bit(i, (unsigned long *)&pcntxt_mask))
xstate_comp_sizes[i] = xstate_sizes[i];
else
xstate_comp_sizes[i] = 0;
if (i > 2)
xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
+ xstate_comp_sizes[i-1];
}
}
/* /*
* setup the xstate image representing the init state * setup the xstate image representing the init state
*/ */
...@@ -496,15 +545,21 @@ static void __init setup_init_fpu_buf(void) ...@@ -496,15 +545,21 @@ static void __init setup_init_fpu_buf(void)
setup_xstate_features(); setup_xstate_features();
if (cpu_has_xsaves) {
init_xstate_buf->xsave_hdr.xcomp_bv =
(u64)1 << 63 | pcntxt_mask;
init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask;
}
/* /*
* Init all the features state with header_bv being 0x0 * Init all the features state with header_bv being 0x0
*/ */
xrstor_state(init_xstate_buf, -1); xrstor_state_booting(init_xstate_buf, -1);
/* /*
* Dump the init state again. This is to identify the init state * Dump the init state again. This is to identify the init state
* of any feature which is not represented by all zero's. * of any feature which is not represented by all zero's.
*/ */
xsave_state(init_xstate_buf, -1); xsave_state_booting(init_xstate_buf, -1);
} }
static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
...@@ -520,6 +575,30 @@ static int __init eager_fpu_setup(char *s) ...@@ -520,6 +575,30 @@ static int __init eager_fpu_setup(char *s)
} }
__setup("eagerfpu=", eager_fpu_setup); __setup("eagerfpu=", eager_fpu_setup);
/*
* Calculate total size of enabled xstates in XCR0/pcntxt_mask.
*/
static void __init init_xstate_size(void)
{
unsigned int eax, ebx, ecx, edx;
int i;
if (!cpu_has_xsaves) {
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
xstate_size = ebx;
return;
}
xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = 2; i < 64; i++) {
if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
xstate_size += eax;
}
}
}
/* /*
* Enable and initialize the xsave feature. * Enable and initialize the xsave feature.
*/ */
...@@ -551,8 +630,7 @@ static void __init xstate_enable_boot_cpu(void) ...@@ -551,8 +630,7 @@ static void __init xstate_enable_boot_cpu(void)
/* /*
* Recompute the context size for enabled features * Recompute the context size for enabled features
*/ */
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); init_xstate_size();
xstate_size = ebx;
update_regset_xstate_info(xstate_size, pcntxt_mask); update_regset_xstate_info(xstate_size, pcntxt_mask);
prepare_fx_sw_frame(); prepare_fx_sw_frame();
...@@ -572,8 +650,9 @@ static void __init xstate_enable_boot_cpu(void) ...@@ -572,8 +650,9 @@ static void __init xstate_enable_boot_cpu(void)
} }
} }
pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n",
pcntxt_mask, xstate_size); pcntxt_mask, xstate_size,
cpu_has_xsaves ? "compacted form" : "standard form");
} }
/* /*
...@@ -635,3 +714,26 @@ void eager_fpu_init(void) ...@@ -635,3 +714,26 @@ void eager_fpu_init(void)
else else
fxrstor_checking(&init_xstate_buf->i387); fxrstor_checking(&init_xstate_buf->i387);
} }
/*
* Given the xsave area and a state inside, this function returns the
* address of the state.
*
* This is the API that is called to get xstate address in either
* standard format or compacted format of xsave area.
*
* Inputs:
* xsave: base address of the xsave area;
* xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE,
* etc.)
* Output:
* address of the state in the xsave area.
*/
void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
{
int feature = fls64(xstate) - 1;
if (!test_bit(feature, (unsigned long *)&pcntxt_mask))
return NULL;
return (void *)xsave + xstate_comp_offsets[feature];
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册